I have the following code using the scheduler and multiprocessing module:
def computation():
def function1(q):
while True:
daydate = datetime.now()
number = random.randrange(1, 215)
print('Sent to function2: ({}, {})'.format(daydate, number))
q.put((daydate, number))
time.sleep(2)
def function2(q):
while True:
date, number = q.get()
print("Recevied values from function1: ({}, {})".format(date, number))
time.sleep(2)
if __name__ == "__main__":
q = Queue()
a = Process(target=function1, args=(q,))
a.start()
b = Process(target=function2, args=(q,))
b.start()
a.join()
b.join()
schedule.every().monday.at("08:45").do(computation)
schedule.every().tuesday.at("08:45").do(computation)
while True:
schedule.run_pending()
time.sleep(1)
However while executing the code gives the following error:
AttributeError: Can't pickle local object 'computation..
function1
And:
OSError: [WinError 87] The parameter is incorrect
How does one solve this problem? I've tried to solve this by define a function at the top level of a module as stated in the documents (https://docs.python.org/2/library/pickle.html#what-can-be-pickled-and-unpickled) however it still gives the same error.
Nested functions are not functions defined at the top-level so that's why you get the error. You need to relocate the definition of function1 and function2 outside
of computation.
How you wrote it, your processes would start immediately instead of on the date you scheduled them to run. That probably does what you intended:
import os
import time
import random
from multiprocessing import Process, Queue
from threading import Thread
from datetime import datetime
import schedule
def function1(q):
while True:
daydate = datetime.now()
number = random.randrange(1, 215)
fmt = '(pid: {}) Sent to function2: ({}, {})'
print(fmt.format(os.getpid(), daydate, number))
q.put((daydate, number))
time.sleep(2)
def function2(q):
while True:
date, number = q.get()
fmt = "(pid: {}) Received values from function1: ({}, {})"
print(fmt.format(os.getpid(), date, number))
# time.sleep(2) no need to sleep here because q.get will block until
# new items are available
def computation():
q = Queue()
a = Process(target=function1, args=(q,))
a.start()
b = Process(target=function2, args=(q,))
b.start()
a.join()
b.join()
if __name__ == "__main__":
# We are spawning new threads as a launching platform for
# computation. Without it, the next job couldn't start before the last
# one has finished. If your jobs always end before the next one should
# start, you don't need this construct and you can just pass
# ...do(computation)
schedule.every().friday.at("01:02").do(
Thread(target=computation).start
)
schedule.every().friday.at("01:03").do(
Thread(target=computation).start
)
while True:
schedule.run_pending()
time.sleep(1)
As it is now, your processes would run forever after started once. If that's not what you want, you have to think about implementing some stop condition.
Related
I'm having trouble writing a benchmark code in python using threading. I was able to get my threading to work, but I can't get my object to return a value. I want to take the values and add them to a list so I can calculate the flops.
create class to carry out threading
class myThread(threading.Thread):
def calculation(self):
n=0
start=time.time()
ex_time=0
while ex_time < 30:
n+=1
end=time.time()
ex_time=end-start
return ex_time
def run(self):
t = threading.Thread(target = self.calculation)
t.start()
function to create threads
def make_threads(num):
times=[]
calcs=[]
for i in range(num):
print('start thread', i+1)
thread1=myThread()
t=thread1.start()
times.append(t)
#calcs.append(n)
#when trying to get a return value it comes back as none as seen
print(times)
#average out the times,add all the calculations to get the final numbers
#to calculate flops
time.sleep(32) #stop the menu from printing until calc finish
def main():
answer=1
while answer != 0:
answer=int(input("Please indicate how many threads to use: (Enter 0 to exit)"))
print("\n\nBenchmark test with ", answer, "threads")
make_threads(answer)
main()
Two ways to do this:
1. Using static variables (hacky, but efficient and quick)
Define some global variable that you then manipulate in the thread. I.e.:
import threading
import time
class myThread(threading.Thread):
def calculation(self):
n=0
start=time.time()
ex_time=0
print("Running....")
while ex_time < 30:
n+=1
end=time.time()
ex_time=end-start
self.myThreadValues[self.idValue] = ex_time
print(self.myThreadValues)
return ex_time
def setup(self,myThreadValues=None,idValue=None):
self.myThreadValues = myThreadValues
self.idValue = idValue
def run(self):
self.calculation()
#t = threading.Thread(target = self.calculation)
#t.start()
def make_threads(num):
threads=[]
calcs=[]
myThreadValues = {}
for i in range(num):
print('start thread', i+1)
myThreadValues[i] = 0
thread1=myThread()
thread1.setup(myThreadValues,i)
thread1.start()
#times.append(t)
threads.append(thread1)
# Now we need to wait for all the threads to finish. There are a couple ways to do this, but the best is joining.
print("joining all threads...")
for thread in threads:
thread.join()
#calcs.append(n)
#when trying to get a return value it comes back as none as seen
print("Final thread values: " + str(myThreadValues))
print("Done")
#average out the times,add all the calculations to get the final numbers
#to calculate flops
#time.sleep(32) #stop the menu from printing until calc finish
def main():
answer=1
while answer != 0:
answer=int(input("Please indicate how many threads to use: (Enter 0 to exit)"))
print("\n\nBenchmark test with ", answer, "threads")
make_threads(answer)
main()
2. The proper way to do this is with Processes
Processes are designed for passing information back and forth, versus threads which are commonly used for async work. See explanation here: https://docs.python.org/3/library/multiprocessing.html
See this answer: How can I recover the return value of a function passed to multiprocessing.Process?
import multiprocessing
from os import getpid
def worker(procnum):
print 'I am number %d in process %d' % (procnum, getpid())
return getpid()
if __name__ == '__main__':
pool = multiprocessing.Pool(processes = 3)
print pool.map(worker, range(5))
I'm writing Python 2.7 that need to run sub-process and terminate it if the subprocess user time exceeded max time.
The definiton for user time can be found in this answer to What do 'real', 'user' and 'sys' mean in the output of time(1)?
So far I have this:
Code
from multiprocessing import Process
import time
def do_something():
pass
def run_process_limited_time(max_seconds):
# init Process
p = Process(target=do_something, args=())
# start process
p.start()
run_time = 0;
sleep_time = 1;
while 1:
# sleep for sleep_time seconds
time.sleep(sleep_time)
# increase run time
run_time += sleep_time
# if process is not alive, we can break the loop
if not p.is_alive():
break
# if process is still alive after max_seconds, kill it!
# TBD - condition is not sufficient
elif run_time > max_seconds:
p.terminate()
break
def main():
run_process_limited_time(10)
if __name__ == "__main__":
main()
Question
the condition elif run_time > max_seconds is not good enough. I'd like to check that max seconds does not exceed user time only. Not sure I can do it using Process module but I'm open to new ideas.
An alternative using the resource module. Handling the timeout can be achieved using the signal module. Returning results can be handled in a variety of ways, some of which are described in the multiprocessing documentation.
from multiprocessing import Process
import time
import resource
import signal
def do_something():
import resource
resource.setrlimit(resource.RLIMIT_CPU, (1,1))
def f(signo, stackframe):
raise Exception('stop')
signal.signal(signal.SIGXCPU, f)
try:
while 1:
print 'hello'
except:
print 'aborted'
def run_process_limited_time(function):
# init Process
p = Process(target=function, args=())
# start process
p.start()
p.join(2)
p.terminate()
if __name__ == "__main__":
run_process_limited_time(do_something)
This is because you're using an elif statement where you should be using an if statement:
from multiprocessing import Process
import time
def do_something():
while True:
time.sleep(100)
def run_process_limited_time(max_seconds):
# init Process
p = Process(target=do_something, args=())
# start process
p.start()
run_time = 0
sleep_time = 1
while True:
time.sleep(sleep_time)
run_time += sleep_time
if not p.is_alive():
break
if run_time > max_seconds:
print "Process was terminated, due to exceeding max time"
p.terminate()
break
def main():
run_process_limited_time(10)
if __name__ == "__main__":
main()
Try the above, the process should be terminated.
I don't know why I'm having such a problem with this, basically, I want to have a Queue that is constantly running during the program called "Worker" this then works, however, every 10 seconds or so.. Another method called "Process" comes in and processes the data. Let's assume the following, data is captured every 10 seconds.. (0, 1, 2, 3, ..... n) and then the "Proces" function receives this, processes the data, ends, and then the "Worker" goes back to work and does their job until the program has ended.
I have the following code:
import multiprocessing as mp
import time
DELAY_SIZE = 10
def Worker(q):
print "I'm working..."
def Process(q):
print "I'm processing.."
queue = mp.Queue(maxsize=DELAY_SIZE)
p = mp.Process(target=Worker, args=(queue,))
p.start()
while True:
d = queue.get()
time.sleep(10)
Process()
In this example, it would look like the following:
I'm working...
I'm working...
I'm working...
...
...
...
I'm working...
I'm processing...
I'm processing...
I'm processing...
...
...
I'm working..
I'm working..
Any ideas?
Here is an alternative way using threads:
import threading
import Queue
import time
class Worker(threading.Thread):
def __init__(self, q):
threading.Thread.__init__(self)
self._q = q
def run(self):
# here, worker does its job
# results are pushed to the shared queue
while True:
print 'I am working'
time.sleep(1)
result = time.time() # just an example
self._q.put(result)
def process(q):
while True:
if q.empty():
time.sleep(10)
print 'I am processing'
worker_result = q.get()
# do whatever you want with the result...
print " ", worker_result
if __name__ == '__main__':
shared_queue = Queue.Queue()
worker = Worker(shared_queue)
worker.start()
process(shared_queue)
I have a simple example script constructed that defines three separate processes using multiprocessing in python. My objective is to have one parent thread that spawns two smaller threads that will collect and process data.
Currently, my implementation looks like this:
from Queue import Queue,Empty
from multiprocessing import Process
import time
import hashlib
class FillQueue(Process):
def __init__(self,q):
Process.__init__(self)
self.q = q
def run(self):
i = 0
while i is not 5:
print 'putting'
self.q.put('foo')
i+=1
self.q.put('|STOP|')
class ConsumeQueue(Process):
def __init__(self,q):
Process.__init__(self)
self.q = q
def run(self):
print 'Consume'
while True:
try:
value = self.q.get(False)
print value
if value == '|STOP|':
print 'done'
break;
except Empty:
print 'Nothing to process atm'
class Ripper(Process):
q = Queue()
def __init__(self):
self.fq = FillQueue(self.q)
self.cq = ConsumeQueue(self.q)
self.fq.daemon = True
self.cq.daemon = True
def run(self):
try:
self.fq.start()
self.cq.start()
except KeyboardInterrupt:
print 'exit'
if __name__ == '__main__':
r = Ripper()
r.start()
As it runs presently, the output from the script on CLI looks like this:
putting
putting
putting
putting
putting
Consume
foo
foo
foo
foo
foo
|STOP|
done
Obviously, the way I am starting my two threads is blocking, since the consumer doesn't even begin to process the items in the queue until the filler finishes adding items.
How should I rewrite this to make both threads begin immediately and not block, so the consumer will simply pass to the Empty except block while there is no work to process, but will exit completely when it receives the stop message?
EDIT: typo, had the start and run methods mixed up
You seem to be starting multiple processes using multiprocessing.Process.
However, you are using Queue.Queue which is only threadsafe, and not designed to be used by multiple processes.
shevek's answer is valid as well, but as a start, you should replace Queue.Queue with multiprocessing.Queue.
try this:
from Queue import Empty
from multiprocessing import Process, Queue
import time
import hashlib
class FillQueue(object):
def __init__(self, q):
self.q = q
def run(self):
i = 0
while i < 5:
print 'putting'
self.q.put('foo %d' % i )
i+=1
time.sleep(.5)
self.q.put('|STOP|')
class ConsumeQueue(object):
def __init__(self, q):
self.q = q
def run(self):
while True:
try:
value = self.q.get(False)
print value
if value == '|STOP|':
print 'done'
break;
except Empty:
print 'Nothing to process atm'
time.sleep(.2)
if __name__ == '__main__':
q = Queue()
f = FillQueue(q)
c = ConsumeQueue(q)
p1 = Process(target=f.run)
p1.start()
p2 = Process(target=c.run)
p2.start()
p1.join()
p2.join()
I think your program works fine. The CPU processes only one thing at a time, for a short time. However, the time required to put all your stuff in the queue is very short. So there is no reason that the filler cannot do this in one time slice.
If you add some delays in the filler, I think you should see that it actually works as you expect.
I have a script and I want one function to run at the same time as the other.
The example code I have looked at:
import threading
def MyThread (threading.thread):
# doing something........
def MyThread2 (threading.thread):
# doing something........
MyThread().start()
MyThread2().start()
I am having trouble getting this working. I would prefer to get this going using a threaded function rather than a class.
This is the working script:
from threading import Thread
class myClass():
def help(self):
os.system('./ssh.py')
def nope(self):
a = [1,2,3,4,5,6,67,78]
for i in a:
print i
sleep(1)
if __name__ == "__main__":
Yep = myClass()
thread = Thread(target = Yep.help)
thread2 = Thread(target = Yep.nope)
thread.start()
thread2.start()
thread.join()
print 'Finished'
You don't need to use a subclass of Thread to make this work - take a look at the simple example I'm posting below to see how:
from threading import Thread
from time import sleep
def threaded_function(arg):
for i in range(arg):
print("running")
sleep(1)
if __name__ == "__main__":
thread = Thread(target = threaded_function, args = (10, ))
thread.start()
thread.join()
print("thread finished...exiting")
Here I show how to use the threading module to create a thread which invokes a normal function as its target. You can see how I can pass whatever arguments I need to it in the thread constructor.
There are a few problems with your code:
def MyThread ( threading.thread ):
You can't subclass with a function; only with a class
If you were going to use a subclass you'd want threading.Thread, not threading.thread
If you really want to do this with only functions, you have two options:
With threading:
import threading
def MyThread1():
pass
def MyThread2():
pass
t1 = threading.Thread(target=MyThread1, args=[])
t2 = threading.Thread(target=MyThread2, args=[])
t1.start()
t2.start()
With thread:
import thread
def MyThread1():
pass
def MyThread2():
pass
thread.start_new_thread(MyThread1, ())
thread.start_new_thread(MyThread2, ())
Doc for thread.start_new_thread
I tried to add another join(), and it seems worked. Here is code
from threading import Thread
from time import sleep
def function01(arg,name):
for i in range(arg):
print(name,'i---->',i,'\n')
print (name,"arg---->",arg,'\n')
sleep(1)
def test01():
thread1 = Thread(target = function01, args = (10,'thread1', ))
thread1.start()
thread2 = Thread(target = function01, args = (10,'thread2', ))
thread2.start()
thread1.join()
thread2.join()
print ("thread finished...exiting")
test01()
Python 3 has the facility of Launching parallel tasks. This makes our work easier.
It has for thread pooling and Process pooling.
The following gives an insight:
ThreadPoolExecutor Example
import concurrent.futures
import urllib.request
URLS = ['http://www.foxnews.com/',
'http://www.cnn.com/',
'http://europe.wsj.com/',
'http://www.bbc.co.uk/',
'http://some-made-up-domain.com/']
# Retrieve a single page and report the URL and contents
def load_url(url, timeout):
with urllib.request.urlopen(url, timeout=timeout) as conn:
return conn.read()
# We can use a with statement to ensure threads are cleaned up promptly
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
# Start the load operations and mark each future with its URL
future_to_url = {executor.submit(load_url, url, 60): url for url in URLS}
for future in concurrent.futures.as_completed(future_to_url):
url = future_to_url[future]
try:
data = future.result()
except Exception as exc:
print('%r generated an exception: %s' % (url, exc))
else:
print('%r page is %d bytes' % (url, len(data)))
Another Example
import concurrent.futures
import math
PRIMES = [
112272535095293,
112582705942171,
112272535095293,
115280095190773,
115797848077099,
1099726899285419]
def is_prime(n):
if n % 2 == 0:
return False
sqrt_n = int(math.floor(math.sqrt(n)))
for i in range(3, sqrt_n + 1, 2):
if n % i == 0:
return False
return True
def main():
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
for number, prime in zip(PRIMES, executor.map(is_prime, PRIMES)):
print('%d is prime: %s' % (number, prime))
if __name__ == '__main__':
main()
You can use the target argument in the Thread constructor to directly pass in a function that gets called instead of run.
Did you override the run() method? If you overrided __init__, did you make sure to call the base threading.Thread.__init__()?
After starting the two threads, does the main thread continue to do work indefinitely/block/join on the child threads so that main thread execution does not end before the child threads complete their tasks?
And finally, are you getting any unhandled exceptions?
the simple way to implement multithread process using threading
code snippet for the same
import threading
#function which takes some time to process
def say(i):
time.sleep(1)
print(i)
threads = []
for i in range(10):
thread = threading.Thread(target=say, args=(i,))
thread.start()
threads.append(thread)
#wait for all threads to complete before main program exits
for thread in threads:
thread.join()