Could some explain to me why this hangs sometimes? (This is just for learning purposes, I wouldn't sum a list like that)
import multiprocessing as mp
q = mp.JoinableQueue()
def worker():
S = 0
while not q.empty():
S += q.get()
q.task_done()
print(S)
procs = []
for i in range(1000):
q.put(i)
for i in range(2):
t = mp.Process(target=worker)
t.start()
procs.append(t)
q.join()
for t in procs:
t.join()
Related
I am trying to create 3 threads within each of 2 processes and share a queue of the type multiprocessing.JoinableQueue among all threads. The worker_func function simply creates the threads while the thread_func function prints out the values it gets from the queue. The program gets stuck somewhere in the time.sleep or in the get() method of queue. What am I doing wrong? I am running on a Windows computer.
import threading
from multiprocessing import Pool, Manager, JoinableQueue
import multiprocessing
from threading import Thread
import time
def thread_func(q, disp_lock):
with disp_lock:
print('thread ', threading.current_thread().name, ' in process ', multiprocessing.current_process().name ,
' reporting for duty')
while True:
time.sleep(0.1)
try:
val = q.get_nowait()
with disp_lock:
print('thread ', threading.current_thread().name, ' in process ', multiprocessing.current_process().name , ' got value: ',val)
q.task_done()
except:
with disp_lock:
print('queue is empty: ', q.qsize())
def worker_func(num_threads, q, disp_lock):
threads = []
for i in range(num_threads):
thread = Thread(target= thread_func, args=( q, disp_lock,))
thread.daemon = True
thread.start()
if __name__ == "__main__":
manager = Manager()
lock = manager.Lock()
q1 = JoinableQueue()#manager.Queue()
q1_length = 20
for i in range(q1_length):
q1.put(i)
processes = []
num_processes = 2 # 2 processes
num_threads = 3
for _ in range(num_processes):
p = multiprocessing.Process(target=worker_func, args=( num_threads, q1, lock, )) # create a new Process
p.daemon = True
p.start()
processes.append(p)
q1.join()
You are not allowing the threads to complete their work. Either set them as non-daemon, or explicitly wait for them to join:
def worker_func(num_threads, q, disp_lock):
threads = []
for i in range(num_threads):
thread = Thread(target=thread_func, args=(q, disp_lock,))
thread.daemon = True
thread.start()
threads.append(thread)
# Wait for them to finish
for thread in threads:
thread.join()
Python 2.7 here.
In the Queue example, the threads run indefinitely:
def worker():
while True:
item = q.get()
do_work(item)
q.task_done()
q = Queue()
for i in range(num_worker_threads):
t = Thread(target=worker)
t.daemon = True
t.start()
for item in source():
q.put(item)
q.join() # block until all tasks are done
Is there an "official" or recommended way to tell the threads to exit after q.join() without using a global variable or subclassing Thread?
Currently, I am doing something like:
class MyThread(Thread, object):
...
def run(self):
...
while True:
try:
item = self.q.get_nowait()
except Queue.Empty:
if self.is_queue_empty:
break
else:
...
...
threads = [MyThread(q, target) for _ in range(num_threads)]
...
q.join()
for thread in threads:
thread.is_queue_empty = True
It works, but it seems kind of hacky. I would also like to avoid making a list of threads, if possible.
I have this code:
import multiprocessing
def worker():
print 'Worker'
return
if __name__ == '__main__':
jobs = []
for i in range(5):
p = multiprocessing.Process(target=worker)
jobs.append(p)
p.start()
For whatever reason it prints 'Worker' two times and stops. Does anyone know why? What am I doing wrong?
Starting multiprocessing tasks in Python on a defined value of "cores", you better choose to create a Pool and start the Process inside of that Pool.
pool = multiprocessing.Pool()
for i in range(5):
pool.apply_async(worker)
pool.close()
But if you like to do it on your way, I think you have to add a p.join():
import multiprocessing
def worker():
print 'Worker'
return
if __name__ == '__main__':
jobs = []
for i in range(5):
p = multiprocessing.Process(target=worker)
jobs.append(p)
p.start()
p.join()
I can't figure out what's wrong with the following python multiprocessing code. It does not terminate. Any suggestions will be highly appreciated.
from multiprocessing import Process, Queue, Lock
def hello(num, myqueue):
while True:
item = myqueue.get()
print 'Thread ', num, 'got', item
return
def put_on_queue(myqueue):
for i in range(10):
myqueue.put(i)
return
if __name__ == '__main__':
processes = []
myqueue = Queue()
for i in range(4):
proc = Process(target = hello, args = (i, myqueue))
proc.start()
processes.append(proc)
put_on_queue(myqueue)
for proc in processes:
proc.join()
-------------- EDIT -----------------
OK, so based on the comments I received, and some online help I revised my code as below. Still no luck :-(
def hello(num, myqueue):
while not exit_flag:
item = myqueue.get(False,5)
print 'Thread ', num, 'got', item
return
def put_on_queue(myqueue):
global exit_flag
for i in range(10):
myqueue.put(i)
while not myqueue.empty():
pass
exit_flag = 1
return
if __name__ == '__main__':
mylock = Lock()
processes = []
myqueue = Queue()
exit_flag = 0
for i in range(4):
proc = Process(target = hello, args = (i, myqueue))
#proc.daemon = True
proc.start()
processes.append(proc)
put_on_queue(myqueue)
for proc in processes:
proc.join()
There's an infinite loop inside your hello function.
You have to put a sentinent as a last value into the queue, check for it and break out of the loop.
I'm trying to use a queue with the multiprocessing library in Python. After executing the code below (the print statements work), but the processes do not quit after I call join on the Queue and there are still alive. How can I terminate the remaining processes?
Thanks!
def MultiprocessTest(self):
print "Starting multiprocess."
print "Number of CPUs",multiprocessing.cpu_count()
num_procs = 4
def do_work(message):
print "work",message ,"completed"
def worker():
while True:
item = q.get()
do_work(item)
q.task_done()
q = multiprocessing.JoinableQueue()
for i in range(num_procs):
p = multiprocessing.Process(target=worker)
p.daemon = True
p.start()
source = ['hi','there','how','are','you','doing']
for item in source:
q.put(item)
print "q close"
q.join()
#q.close()
print "Finished everything...."
print "num active children:",multiprocessing.active_children()
try this:
import multiprocessing
num_procs = 4
def do_work(message):
print "work",message ,"completed"
def worker():
for item in iter( q.get, None ):
do_work(item)
q.task_done()
q.task_done()
q = multiprocessing.JoinableQueue()
procs = []
for i in range(num_procs):
procs.append( multiprocessing.Process(target=worker) )
procs[-1].daemon = True
procs[-1].start()
source = ['hi','there','how','are','you','doing']
for item in source:
q.put(item)
q.join()
for p in procs:
q.put( None )
q.join()
for p in procs:
p.join()
print "Finished everything...."
print "num active children:", multiprocessing.active_children()
Your workers need a sentinel to terminate, or they will just sit on the blocking reads. Note that using sleep on the Q instead of join on the P lets you display status information etc.
My preferred template is:
def worker(q,nameStr):
print 'Worker %s started' %nameStr
while True:
item = q.get()
if item is None: # detect sentinel
break
print '%s processed %s' % (nameStr,item) # do something useful
q.task_done()
print 'Worker %s Finished' % nameStr
q.task_done()
q = multiprocessing.JoinableQueue()
procs = []
for i in range(num_procs):
nameStr = 'Worker_'+str(i)
p = multiprocessing.Process(target=worker, args=(q,nameStr))
p.daemon = True
p.start()
procs.append(p)
source = ['hi','there','how','are','you','doing']
for item in source:
q.put(item)
for i in range(num_procs):
q.put(None) # send termination sentinel, one for each process
while not q.empty(): # wait for processing to finish
sleep(1) # manage timeouts and status updates etc.
Here is a sentinel-free method for the relatively simple case where you put a number of tasks on a JoinableQueue, then launch worker processes that consume the tasks and exit once they read the queue "dry". The trick is to use JoinableQueue.get_nowait() instead of get(). get_nowait(), as the name implies, tries to get a value from the queue in a non-blocking manner and if there's nothing to be gotten then a queue.Empty exception is raised. The worker handles this exception by exiting.
Rudimentary code to illustrate the principle:
import multiprocessing as mp
from queue import Empty
def worker(q):
while True:
try:
work = q.get_nowait()
# ... do something with `work`
q.task_done()
except Empty:
break # completely done
# main
worknum = 4
jq = mp.JoinableQueue()
# fill up the task queue
# let's assume `tasks` contains some sort of data
# that your workers know how to process
for task in tasks:
jq.put(task)
procs = [ mp.Process(target=worker, args=(jq,)) for _ in range(worknum) ]
for p in procs:
p.start()
for p in procs:
p.join()
The advantage is that you do not need to put the "poison pills" on the queue so the code is a bit shorter.
IMPORTANT : in more complex situations where producers and consumers use the same queue in an "interleaved" manner and the workers may have to wait for new tasks to come along, the "poison pill" approach should be used. My suggestion above is for simple cases where the workers "know" that if the task queue is empty, then there's no point hanging around any more.
You have to clear the queue before joining the process, but q.empty() is unreliable.
The best way to clear the queue is to count the number of successful gets or loop until you receive a sentinel value, just like a socket with a reliable network.
The code below may not be very relevant but I post it for your comments/feedbacks so we can learn together. Thank you!
import multiprocessing
def boss(q,nameStr):
source = range(1024)
for item in source:
q.put(nameStr+' '+str(item))
q.put(None) # send termination sentinel, one for each process
def worker(q,nameStr):
while True:
item = q.get()
if item is None: # detect sentinel
break
print '%s processed %s' % (nameStr,item) # do something useful
q = multiprocessing.Queue()
procs = []
num_procs = 4
for i in range(num_procs):
nameStr = 'ID_'+str(i)
p = multiprocessing.Process(target=worker, args=(q,nameStr))
procs.append(p)
p = multiprocessing.Process(target=boss, args=(q,nameStr))
procs.append(p)
for j in procs:
j.start()
for j in procs:
j.join()