I'm trying to use a queue with the multiprocessing library in Python. After executing the code below (the print statements work), but the processes do not quit after I call join on the Queue and there are still alive. How can I terminate the remaining processes?
Thanks!
def MultiprocessTest(self):
print "Starting multiprocess."
print "Number of CPUs",multiprocessing.cpu_count()
num_procs = 4
def do_work(message):
print "work",message ,"completed"
def worker():
while True:
item = q.get()
do_work(item)
q.task_done()
q = multiprocessing.JoinableQueue()
for i in range(num_procs):
p = multiprocessing.Process(target=worker)
p.daemon = True
p.start()
source = ['hi','there','how','are','you','doing']
for item in source:
q.put(item)
print "q close"
q.join()
#q.close()
print "Finished everything...."
print "num active children:",multiprocessing.active_children()
try this:
import multiprocessing
num_procs = 4
def do_work(message):
print "work",message ,"completed"
def worker():
for item in iter( q.get, None ):
do_work(item)
q.task_done()
q.task_done()
q = multiprocessing.JoinableQueue()
procs = []
for i in range(num_procs):
procs.append( multiprocessing.Process(target=worker) )
procs[-1].daemon = True
procs[-1].start()
source = ['hi','there','how','are','you','doing']
for item in source:
q.put(item)
q.join()
for p in procs:
q.put( None )
q.join()
for p in procs:
p.join()
print "Finished everything...."
print "num active children:", multiprocessing.active_children()
Your workers need a sentinel to terminate, or they will just sit on the blocking reads. Note that using sleep on the Q instead of join on the P lets you display status information etc.
My preferred template is:
def worker(q,nameStr):
print 'Worker %s started' %nameStr
while True:
item = q.get()
if item is None: # detect sentinel
break
print '%s processed %s' % (nameStr,item) # do something useful
q.task_done()
print 'Worker %s Finished' % nameStr
q.task_done()
q = multiprocessing.JoinableQueue()
procs = []
for i in range(num_procs):
nameStr = 'Worker_'+str(i)
p = multiprocessing.Process(target=worker, args=(q,nameStr))
p.daemon = True
p.start()
procs.append(p)
source = ['hi','there','how','are','you','doing']
for item in source:
q.put(item)
for i in range(num_procs):
q.put(None) # send termination sentinel, one for each process
while not q.empty(): # wait for processing to finish
sleep(1) # manage timeouts and status updates etc.
Here is a sentinel-free method for the relatively simple case where you put a number of tasks on a JoinableQueue, then launch worker processes that consume the tasks and exit once they read the queue "dry". The trick is to use JoinableQueue.get_nowait() instead of get(). get_nowait(), as the name implies, tries to get a value from the queue in a non-blocking manner and if there's nothing to be gotten then a queue.Empty exception is raised. The worker handles this exception by exiting.
Rudimentary code to illustrate the principle:
import multiprocessing as mp
from queue import Empty
def worker(q):
while True:
try:
work = q.get_nowait()
# ... do something with `work`
q.task_done()
except Empty:
break # completely done
# main
worknum = 4
jq = mp.JoinableQueue()
# fill up the task queue
# let's assume `tasks` contains some sort of data
# that your workers know how to process
for task in tasks:
jq.put(task)
procs = [ mp.Process(target=worker, args=(jq,)) for _ in range(worknum) ]
for p in procs:
p.start()
for p in procs:
p.join()
The advantage is that you do not need to put the "poison pills" on the queue so the code is a bit shorter.
IMPORTANT : in more complex situations where producers and consumers use the same queue in an "interleaved" manner and the workers may have to wait for new tasks to come along, the "poison pill" approach should be used. My suggestion above is for simple cases where the workers "know" that if the task queue is empty, then there's no point hanging around any more.
You have to clear the queue before joining the process, but q.empty() is unreliable.
The best way to clear the queue is to count the number of successful gets or loop until you receive a sentinel value, just like a socket with a reliable network.
The code below may not be very relevant but I post it for your comments/feedbacks so we can learn together. Thank you!
import multiprocessing
def boss(q,nameStr):
source = range(1024)
for item in source:
q.put(nameStr+' '+str(item))
q.put(None) # send termination sentinel, one for each process
def worker(q,nameStr):
while True:
item = q.get()
if item is None: # detect sentinel
break
print '%s processed %s' % (nameStr,item) # do something useful
q = multiprocessing.Queue()
procs = []
num_procs = 4
for i in range(num_procs):
nameStr = 'ID_'+str(i)
p = multiprocessing.Process(target=worker, args=(q,nameStr))
procs.append(p)
p = multiprocessing.Process(target=boss, args=(q,nameStr))
procs.append(p)
for j in procs:
j.start()
for j in procs:
j.join()
Related
I am trying to create 3 threads within each of 2 processes and share a queue of the type multiprocessing.JoinableQueue among all threads. The worker_func function simply creates the threads while the thread_func function prints out the values it gets from the queue. The program gets stuck somewhere in the time.sleep or in the get() method of queue. What am I doing wrong? I am running on a Windows computer.
import threading
from multiprocessing import Pool, Manager, JoinableQueue
import multiprocessing
from threading import Thread
import time
def thread_func(q, disp_lock):
with disp_lock:
print('thread ', threading.current_thread().name, ' in process ', multiprocessing.current_process().name ,
' reporting for duty')
while True:
time.sleep(0.1)
try:
val = q.get_nowait()
with disp_lock:
print('thread ', threading.current_thread().name, ' in process ', multiprocessing.current_process().name , ' got value: ',val)
q.task_done()
except:
with disp_lock:
print('queue is empty: ', q.qsize())
def worker_func(num_threads, q, disp_lock):
threads = []
for i in range(num_threads):
thread = Thread(target= thread_func, args=( q, disp_lock,))
thread.daemon = True
thread.start()
if __name__ == "__main__":
manager = Manager()
lock = manager.Lock()
q1 = JoinableQueue()#manager.Queue()
q1_length = 20
for i in range(q1_length):
q1.put(i)
processes = []
num_processes = 2 # 2 processes
num_threads = 3
for _ in range(num_processes):
p = multiprocessing.Process(target=worker_func, args=( num_threads, q1, lock, )) # create a new Process
p.daemon = True
p.start()
processes.append(p)
q1.join()
You are not allowing the threads to complete their work. Either set them as non-daemon, or explicitly wait for them to join:
def worker_func(num_threads, q, disp_lock):
threads = []
for i in range(num_threads):
thread = Thread(target=thread_func, args=(q, disp_lock,))
thread.daemon = True
thread.start()
threads.append(thread)
# Wait for them to finish
for thread in threads:
thread.join()
I want to kill a certain process or all processes in the middle of its execution. My sample code is as follows. How can I do that? Here, I want to kill the processes based on the current time. If the time is divisible by 2, I want to kill the processes, otherwise not.
import time
from multiprocessing import Process
def runTests(a, b):
time.sleep(10)
return a + b
def main(kill_processes):
print(kill_processes)
processes = []
for i in range(3):
print(i)
proc = Process(target=runTests, args=(2, 4,))
processes.append(proc)
proc.start()
for proc in processes:
proc.join()
if kill_processes:
print("killing")
proc.terminate()
if __name__ == "__main__":
if round(time.time()) % 2 == 0:
main(True)
else:
main(False)
This illustrates how to do it by using multiple threads in a manner similar to what's in the answer to the question multiprocessing in stoppable multithreading that I suggested you have a look at.
Basically all that is going on is all the join() calls are done in separate threads, so they won't block the main thread in the main process — which makes it possible for it to kill them.
import threading
import time
from multiprocessing import Process
def runTests(a, b):
time.sleep(10)
return a + b
def create_process(lock, i):
proc = Process(target=runTests, args=(2, 4,))
print(f'{proc.name} created')
proc.start()
with lock:
processes.append(proc)
proc.join()
def main(kill_processes):
global processes
N = 3
lock = threading.RLock()
processes = []
print(f'main({kill_processes=})')
for i in range(N):
thread = threading.Thread(target=create_process, args=(lock, i))
thread.start()
while True: # Wait for all processes to have been created.
with lock:
if len(processes) == N:
break
else:
time.sleep(.001)
if kill_processes:
print("Killing the processes")
for proc in processes:
proc.terminate()
print(f'process {proc} terminated')
if __name__ == "__main__":
main(True)
# if round(time.time()) % 2 == 0:
# main(True)
# else:
# main(False)
You can use multiprocessing.Event to signal termination condition to your child processes. Don't join child processes in the main process. Instead let main process and child processes run in their own loop. Check the termination condition in the main loop, and signal it using the multiprocessing.Event to the child processes.
The Event object is passed as an argument to child process. Child process continuously checks if the event is set, and stops its work if so. Main loop checks the termination condition and sets the Event if condition is met (in the below example main loop waits for Ctrl+c).
import multiprocessing as mp
import os
import time
def do_work(a, b, stop_event):
while not stop_event.is_set():
try:
time.sleep(2)
print(f"worker {os.getpid()}: working ...", a + b)
a += 1
b += 1
except KeyboardInterrupt:
print(f"worker {os.getpid()}: received SIGINT. ignore.")
pass
print(f"worker {os.getpid()}: stop_event is set. exit.")
if __name__ == "__main__":
stop_event = mp.Event()
procs = []
for i in range(3):
# p = mp.Process(target=do_work, args=(1, 2, stop_event), daemon=True)
p = mp.Process(target=do_work, args=(1, 2, stop_event))
p.start()
procs.append(p)
while True:
try:
print("main: waiting for termination signal")
time.sleep(1)
except KeyboardInterrupt:
print("main: received termination signal")
stop_event.set()
# wait for the processes to stop
for p in procs:
p.join()
for p in procs:
print(f"worker {p.pid} is terminated: {not p.is_alive()}")
# exit the main loop
break
print("main: bye")
If you want to terminate based on time, use the timeout paramter of join. One way is to set a stop time and as each process is joined, use time remaining as its timeout.
import time
from multiprocessing import Process
def runTests(a, b):
time.sleep(10)
return a + b
def main(kill_processes):
print(kill_processes)
processes = []
end = time.time() + 10 # wait 10 seconds
for i in range(3):
print(i)
proc = Process(target=runTests, args=(2, 4,))
processes.append(proc)
proc.start()
for proc in processes:
if kill_processes:
delta = end - time.time()
else:
delta = None
proc.join(delta)
if kill_processes:
print("killing")
proc.terminate()
proc.join(1)
if proc.is_alive():
proc.kill()
if __name__ == "__main__":
if round(time.time()) % 2 == 0:
main(True)
else:
main(False)
Python 2.7 here.
In the Queue example, the threads run indefinitely:
def worker():
while True:
item = q.get()
do_work(item)
q.task_done()
q = Queue()
for i in range(num_worker_threads):
t = Thread(target=worker)
t.daemon = True
t.start()
for item in source():
q.put(item)
q.join() # block until all tasks are done
Is there an "official" or recommended way to tell the threads to exit after q.join() without using a global variable or subclassing Thread?
Currently, I am doing something like:
class MyThread(Thread, object):
...
def run(self):
...
while True:
try:
item = self.q.get_nowait()
except Queue.Empty:
if self.is_queue_empty:
break
else:
...
...
threads = [MyThread(q, target) for _ in range(num_threads)]
...
q.join()
for thread in threads:
thread.is_queue_empty = True
It works, but it seems kind of hacky. I would also like to avoid making a list of threads, if possible.
Could some explain to me why this hangs sometimes? (This is just for learning purposes, I wouldn't sum a list like that)
import multiprocessing as mp
q = mp.JoinableQueue()
def worker():
S = 0
while not q.empty():
S += q.get()
q.task_done()
print(S)
procs = []
for i in range(1000):
q.put(i)
for i in range(2):
t = mp.Process(target=worker)
t.start()
procs.append(t)
q.join()
for t in procs:
t.join()
I'm experimenting with the multiprocessing module and I'm getting some strange behavior. I have a list of tasks, which I first enqueue to a task_queue. Then I start all processes, which have access to both the task queue and the result queue. After the processes have started, I actively check for new content in the result queue while there are active processes.
The weird behavior is that when I start the script below, 4 of the processes exit immediately, and all the work is done by one process. No except Exception clauses are entered.
from multiprocessing import Process, Queue
from time import sleep, time
from queue import Empty
def function_doing_heavy_computation(task):
return task + 1
def service_function(func, tasks_q, result_q):
"""
:param func: user passed function that takes one argument - the task and returns the result of processing the task
:param tasks_q:
:param result_q:
"""
while True:
try:
task = tasks_q.get_nowait()
except Empty:
result_q.close() # https://docs.python.org/2/library/multiprocessing.html#multiprocessing.Queue.close
result_q.join_thread()
print("Worker finished ")
break
except Exception as ex:
print(str(ex))
try:
res = func(task)
except Exception as ex:
print(str(ex))
result_q.put_nowait(res)
def multiproc(func, all_tasks, num_procs):
result = []
task_queue, result_queue = Queue(), Queue()
# add the tasks to the task queue
start_put_tasks = time()
for x in all_tasks:
task_queue.put_nowait(x)
print("Finished adding tasks in %.2f" % (time() - start_put_tasks))
# create the processes and pass them the task and result queue
start_create_procs = time()
procs = []
for _ in range(num_procs):
p = Process(target=service_function, args=(func, task_queue, result_queue))
procs.append(p)
for p in procs:
p.start()
print("Started %i workers in %.2f" % (len(procs), time() - start_create_procs))
# collect the results in a list, and return it
start_drain_queue = time()
liveprocs = list(procs)
while liveprocs:
# drain the current contents of the result_queue
while True:
try:
result.append(result_queue.get_nowait())
except Empty:
break
# set the currently active procs. while loop will exit if all procs have terminated
liveprocs = [p for p in procs if p.is_alive()]
# no process has put a result/all ready results have been drained. wait for new results to arrive
sleep(.1)
print("Finished draining result queue in %.2f" % (time() - start_drain_queue))
if len(result) != len(all_tasks):
raise RuntimeError("Only %i/%i tasks processed" % (len(result), len(all_tasks)))
return result
if __name__ == '__main__':
start = time()
# a task is just a number here.
# normally more tasks than workers. i
tasks = range(100000)
result = multiproc(func=function_doing_heavy_computation, all_tasks=tasks, num_procs=5)
print("Done in %s seconds" % str(time() - start))
assert len(tasks) == len(result)
print("Processed %i tasks" % len(result))