I'm experimenting with the multiprocessing module and I'm getting some strange behavior. I have a list of tasks, which I first enqueue to a task_queue. Then I start all processes, which have access to both the task queue and the result queue. After the processes have started, I actively check for new content in the result queue while there are active processes.
The weird behavior is that when I start the script below, 4 of the processes exit immediately, and all the work is done by one process. No except Exception clauses are entered.
from multiprocessing import Process, Queue
from time import sleep, time
from queue import Empty
def function_doing_heavy_computation(task):
return task + 1
def service_function(func, tasks_q, result_q):
"""
:param func: user passed function that takes one argument - the task and returns the result of processing the task
:param tasks_q:
:param result_q:
"""
while True:
try:
task = tasks_q.get_nowait()
except Empty:
result_q.close() # https://docs.python.org/2/library/multiprocessing.html#multiprocessing.Queue.close
result_q.join_thread()
print("Worker finished ")
break
except Exception as ex:
print(str(ex))
try:
res = func(task)
except Exception as ex:
print(str(ex))
result_q.put_nowait(res)
def multiproc(func, all_tasks, num_procs):
result = []
task_queue, result_queue = Queue(), Queue()
# add the tasks to the task queue
start_put_tasks = time()
for x in all_tasks:
task_queue.put_nowait(x)
print("Finished adding tasks in %.2f" % (time() - start_put_tasks))
# create the processes and pass them the task and result queue
start_create_procs = time()
procs = []
for _ in range(num_procs):
p = Process(target=service_function, args=(func, task_queue, result_queue))
procs.append(p)
for p in procs:
p.start()
print("Started %i workers in %.2f" % (len(procs), time() - start_create_procs))
# collect the results in a list, and return it
start_drain_queue = time()
liveprocs = list(procs)
while liveprocs:
# drain the current contents of the result_queue
while True:
try:
result.append(result_queue.get_nowait())
except Empty:
break
# set the currently active procs. while loop will exit if all procs have terminated
liveprocs = [p for p in procs if p.is_alive()]
# no process has put a result/all ready results have been drained. wait for new results to arrive
sleep(.1)
print("Finished draining result queue in %.2f" % (time() - start_drain_queue))
if len(result) != len(all_tasks):
raise RuntimeError("Only %i/%i tasks processed" % (len(result), len(all_tasks)))
return result
if __name__ == '__main__':
start = time()
# a task is just a number here.
# normally more tasks than workers. i
tasks = range(100000)
result = multiproc(func=function_doing_heavy_computation, all_tasks=tasks, num_procs=5)
print("Done in %s seconds" % str(time() - start))
assert len(tasks) == len(result)
print("Processed %i tasks" % len(result))
Related
I want to kill a certain process or all processes in the middle of its execution. My sample code is as follows. How can I do that? Here, I want to kill the processes based on the current time. If the time is divisible by 2, I want to kill the processes, otherwise not.
import time
from multiprocessing import Process
def runTests(a, b):
time.sleep(10)
return a + b
def main(kill_processes):
print(kill_processes)
processes = []
for i in range(3):
print(i)
proc = Process(target=runTests, args=(2, 4,))
processes.append(proc)
proc.start()
for proc in processes:
proc.join()
if kill_processes:
print("killing")
proc.terminate()
if __name__ == "__main__":
if round(time.time()) % 2 == 0:
main(True)
else:
main(False)
This illustrates how to do it by using multiple threads in a manner similar to what's in the answer to the question multiprocessing in stoppable multithreading that I suggested you have a look at.
Basically all that is going on is all the join() calls are done in separate threads, so they won't block the main thread in the main process — which makes it possible for it to kill them.
import threading
import time
from multiprocessing import Process
def runTests(a, b):
time.sleep(10)
return a + b
def create_process(lock, i):
proc = Process(target=runTests, args=(2, 4,))
print(f'{proc.name} created')
proc.start()
with lock:
processes.append(proc)
proc.join()
def main(kill_processes):
global processes
N = 3
lock = threading.RLock()
processes = []
print(f'main({kill_processes=})')
for i in range(N):
thread = threading.Thread(target=create_process, args=(lock, i))
thread.start()
while True: # Wait for all processes to have been created.
with lock:
if len(processes) == N:
break
else:
time.sleep(.001)
if kill_processes:
print("Killing the processes")
for proc in processes:
proc.terminate()
print(f'process {proc} terminated')
if __name__ == "__main__":
main(True)
# if round(time.time()) % 2 == 0:
# main(True)
# else:
# main(False)
You can use multiprocessing.Event to signal termination condition to your child processes. Don't join child processes in the main process. Instead let main process and child processes run in their own loop. Check the termination condition in the main loop, and signal it using the multiprocessing.Event to the child processes.
The Event object is passed as an argument to child process. Child process continuously checks if the event is set, and stops its work if so. Main loop checks the termination condition and sets the Event if condition is met (in the below example main loop waits for Ctrl+c).
import multiprocessing as mp
import os
import time
def do_work(a, b, stop_event):
while not stop_event.is_set():
try:
time.sleep(2)
print(f"worker {os.getpid()}: working ...", a + b)
a += 1
b += 1
except KeyboardInterrupt:
print(f"worker {os.getpid()}: received SIGINT. ignore.")
pass
print(f"worker {os.getpid()}: stop_event is set. exit.")
if __name__ == "__main__":
stop_event = mp.Event()
procs = []
for i in range(3):
# p = mp.Process(target=do_work, args=(1, 2, stop_event), daemon=True)
p = mp.Process(target=do_work, args=(1, 2, stop_event))
p.start()
procs.append(p)
while True:
try:
print("main: waiting for termination signal")
time.sleep(1)
except KeyboardInterrupt:
print("main: received termination signal")
stop_event.set()
# wait for the processes to stop
for p in procs:
p.join()
for p in procs:
print(f"worker {p.pid} is terminated: {not p.is_alive()}")
# exit the main loop
break
print("main: bye")
If you want to terminate based on time, use the timeout paramter of join. One way is to set a stop time and as each process is joined, use time remaining as its timeout.
import time
from multiprocessing import Process
def runTests(a, b):
time.sleep(10)
return a + b
def main(kill_processes):
print(kill_processes)
processes = []
end = time.time() + 10 # wait 10 seconds
for i in range(3):
print(i)
proc = Process(target=runTests, args=(2, 4,))
processes.append(proc)
proc.start()
for proc in processes:
if kill_processes:
delta = end - time.time()
else:
delta = None
proc.join(delta)
if kill_processes:
print("killing")
proc.terminate()
proc.join(1)
if proc.is_alive():
proc.kill()
if __name__ == "__main__":
if round(time.time()) % 2 == 0:
main(True)
else:
main(False)
I run some jobs in parallel, which can sometime take a long time, so I want the main thread to report on the progress. For example, each hour.
Below is the simplified version of what I came up with. The code will run test_function in 2 threads with arguments from input_arguments. Every 5 seconds it will print % of the jobs finished.
import threading
import queue
import time
def test_function(x):
time.sleep(4)
print("Finished ", x)
num_processes = 2
input_arguments = range(10)
# Define a worker which will continuously execute function taking input parameters from the queue
def worker():
while True:
x = q.get()
if x is None:
break
test_function(x)
q.task_done()
# Initialize queue and the threads
q = queue.Queue()
threads = []
for i in range(num_processes):
t = threading.Thread(target=worker)
t.start()
threads.append(t)
# Create a queue of input parameters for function
for item in input_arguments:
q.put(item)
# Report progress every 5 seconds
report_progress(q)
# stop workers
for i in range(num_processes):
q.put(None)
for t in threads:
t.join()
Where report_progress is defined as following
def report_progress(q):
qsize_init = q.qsize()
while not q.empty():
time.sleep(5)
portion_finished = 1 - q.qsize() / qsize_init
print("run_parallel: {:.1%} jobs are finished".format(portion_finished))
However, I want to report the progress every hour instead of 5 seconds, and if all jobs are finished, the program might just be idle for many minutes.
Another possibility is to define report_progress differently:
def report_progress(q):
qsize_init = q.qsize()
time_start = time.time()
while not q.empty():
current_time = time.time()
if current_time - time_start > 5:
portion_finished = 1 - q.qsize() / qsize_init
print("run_parallel: {:.1%} jobs are finished".format(portion_finished))
time_start = time.time()
I am worried that constantly checking this condition will drain CPU resources, small portion, but on a scale of hours it could be a lot.
Is there a standard way of handling this?
Python: 3.6
For now I will use a simple solution, suggested in the comments by #Andriy Maletsky.
Main thread will check every few seconds if the q is not empty yet, and it will print a progress message if it has past more than 1 hour since the last report.
time_between_reports = 3600
time_between_checks = 5
def report_progress_until_finished(q):
qsize_init = q.qsize()
last_report_time = time.time()
while not q.empty():
time_elapsed = time.time() - last_report_time
if time_elapsed > time_between_reports:
portion_finished = 1 - q.qsize() / qsize_init
print("run_parallel: {:.1%} jobs are finished".format(portion_finished))
last_report_time = time.time()
time.sleep(time_between_checks)
I have a Python multiprocessing scenario which I have simplified for my question here. There are x number of jobs to be processed in 2 parts. In my code, the 2 job parts are actually HTTP requests where part 2 is dependent on the results of part 1. Finally, there is a 3rd part that simply reports on how long parts 1 and 2 took and calculates a running average of time taken across all jobs.
Taking advantage of multiprocessing, I set up 2x process workers for Job Part 1 and also 2x workers for Job Part 2 and only 1x Reporter worker. To communicate the time taken for each job part, I am using Queues.
The code I have works fine for a complete workthrough of the specified x number of jobs BUT, I would like to add a timeout/cancel event that should stop all the workers and terminates gracefully.
In my code, I use the Report Worker to check for this event and when it happens I thought it would simply be a case of consuming the remaining jobs in all the queues and adding a poison pill to signal them to terminate.
The sub-processes do terminate BUT it seems control is NOT passed back to the "main" program and I can still see the main process hanging until I do a ctrl-c cancel in the command prompt.
Please help see my code and tell me where I am going wrong:
#!/usr/bin/python3
import sys, time, datetime, os, math, multiprocessing, time, random
TYPE_JOB_PART_1 = '1'
TYPE_JOB_PART_2 = '2'
class Worker(multiprocessing.Process):
def __init__(self, task_queue, result_queue, task_type):
multiprocessing.Process.__init__(self)
self.task_queue = task_queue
self.result_queue = result_queue
self.task_type = task_type
def run(self):
while True:
next_task = self.task_queue.get()
if next_task is None:
#= Poison pill means shutdown
print('%s: is exiting: %s ===============' % (self.name, self.task_type))
self.task_queue.task_done()
break
job_response = next_task()
self.task_queue.task_done()
if self.task_type == TYPE_JOB_PART_1:
self.result_queue.put(do_jobPart_2(job_response))
elif self.task_type == TYPE_JOB_PART_2:
self.result_queue.put(do_reporting(job_response))
return
class Reporter(multiprocessing.Process):
def __init__(self, task_queue, result_queue, num_tasks, num_workers_jobPart1, num_workers_jobPart2, jobPart1_queue, jobPart2_queue):
multiprocessing.Process.__init__(self)
self.task_queue = task_queue
self.result_queue = result_queue
self.jobPart1_queue = jobPart1_queue
self.jobPart2_queue = jobPart2_queue
self.num_tasks = num_tasks
self.num_workers_jobPart1 = num_workers_jobPart1
self.num_workers_jobPart2 = num_workers_jobPart2
self.time_start = datetime.datetime.now() #= Start the timer
self.time_elapsed = 0
self.time_wait_to_terminate = 3 #= Define the timeout to terminate all jobs
def run(self):
while True:
next_task = self.task_queue.get()
if next_task is None:
#= Poison pill means shutdown
self.task_queue.task_done()
print('==================================================')
print('============ END OF PROCESSING ===================')
print('==================================================')
break
job_response = next_task()
self.task_queue.task_done()
self.result_queue.put(job_response)
queueSize = self.result_queue.qsize()
#= TERMINATTION time
print("================>i:%s" % (queueSize))
self.time_elapsed = (datetime.datetime.now() - self.time_start).total_seconds()
if self.time_elapsed > self.time_wait_to_terminate:
print("TIME IS UP. %s elapsed!" % self.time_wait_to_terminate)
#= Empty the JobPart_1 queue to relieve the workers
while not self.jobPart1_queue.empty():
self.jobPart1_queue.get()
self.jobPart1_queue.task_done()
#= And add poison pills again
for i in range(self.num_workers_jobPart1):
self.jobPart1_queue.put(None)
#= Empty the JobPart_2 queue to relieve the workers
while not self.jobPart2_queue.empty():
self.jobPart2_queue.get()
self.jobPart2_queue.task_done()
#= And add poison pills again
for i in range(self.num_workers_jobPart2):
self.jobPart2_queue.put(None)
#= Empty the report queue to relieve the reporter itself
while not self.task_queue.empty():
self.task_queue.get()
self.task_queue.task_done()
print("TIME IS UP: workers stopped, Reporter shutting itself down....")
break
#= Results queue is filled up when count = num_tasks, so give Poison pill to shutdown JobPart_2 workers
if queueSize == self.num_tasks:
for i in range(self.num_workers_jobPart2):
self.jobPart2_queue.put(None)
print("JobPart_2 workers will be poisoned")
return
class do_reporting(object):
def __init__(self, info):
self.info = info
def __call__(self):
try:
print("%s:do_reporting - is RUNNING " % (self.info['jobPart1_results']['i']))
randtime = 0.5 * random.random()
time.sleep(randtime)
print( 'jobPart1_time:%s, jobPart2_time:%s, report_time;%s' % ( self.info["jobPart1_results"]["jobPart1_time"], self.info["jobPart2_time"], randtime ) )
return {'results':self.info,'report_time':randtime}
except:
print("error:do_reporting")
class do_jobPart_1(object):
def __init__(self, i, t0):
self.t0 = t0
self.i = i
def __call__(self):
try:
print("%s:do_jobPart_1 - is RUNNING " % self.i)
randtime = 0.5 * random.random()
time.sleep(randtime)
time_elapsed = (datetime.datetime.now() - self.t0).total_seconds()
return {'i':self.i, 't0':self.t0, 'time_elapsed_job1':time_elapsed, 'jobPart1_time':randtime}
except:
print("error:do_jobPart_1")
class do_jobPart_2(object):
def __init__(self, info):
self.info = info
def __call__(self):
try:
print("%s:do_jobPart_2 - is RUNNING " % (self.info['i']))
randtime = 0.5 * random.random()
time.sleep(randtime)
return {"jobPart1_results":self.info,'jobPart2_time':randtime}
except:
print("error:do_jobPart_2")
if __name__ == '__main__':
print('==================================================')
print('============ START PROCESSING ====================')
print('==================================================')
#===============================================
#= Establish communication queues
q_jobPart_1 = multiprocessing.JoinableQueue()
q_jobPart_2 = multiprocessing.JoinableQueue()
q_reportTasks = multiprocessing.JoinableQueue()
q_results = multiprocessing.Queue()
#===============================================
#= Start workersReporter !!! Should always be just 1 worker !!!
numJobs = 90
numWorkers_jobPart1 = 2
numWorkers_jobPart2 = 2
workersJobPart_1 = [ Worker(q_jobPart_1, q_jobPart_2, TYPE_JOB_PART_1) for i in range(numWorkers_jobPart1) ]
workersJobPart_2 = [ Worker(q_jobPart_2, q_reportTasks, TYPE_JOB_PART_2) for i in range(numWorkers_jobPart2) ]
workerJobReporter = Reporter(q_reportTasks, q_results, numJobs, numWorkers_jobPart1, numWorkers_jobPart2, q_jobPart_1, q_jobPart_2)
#===============================================
#= Start the workers
print("Main PID:%s" % os.getpid())
for w in workersJobPart_1:
w.start()
print("JobPart_1 PID=%s" % w.pid)
for w in workersJobPart_2:
w.start()
print("JobPart_2 PID=%s" % w.pid)
workerJobReporter.start()
print("JobReporter PID=%s" % workerJobReporter.pid)
#= Start the timer and add tasks to the queues
time_start = datetime.datetime.now()
for i in range(numJobs):
q_jobPart_1.put(do_jobPart_1(i, time_start))
#= Add poison pill for each jobPart_1 workers
for i in range(numWorkers_jobPart1):
q_jobPart_1.put(None)
q_jobPart_1.join()
print("JobPart_1 workers terminated")
q_jobPart_2.join()
print("JobPart_2 workers terminated")
q_reportTasks.put(None)
q_reportTasks.join()
workerJobReporter.terminate()
print("Reporter terminated")
print("FINISHED")
Here's an example. I have one producer and several consumers.
#!/usr/bin/env python2
from multiprocessing import Process, Queue
import time
def counter(low, high):
current = low
while current <= high:
yield current
current += 1
def put_tasks(q):
for c in counter(0, 9):
q.put(c)
time.sleep(.1)
print('put_tasks: no more tasks')
def work(id, q):
while True:
task = q.get()
print('process %d: %s' % (id, task))
time.sleep(.3)
print('process %d: done' % id)
if __name__ == '__main__':
q = Queue(2)
task_gen = Process(target=put_tasks, args=(q,))
processes = [Process(target=work, args=(id, q)) for id in range(0, 3)]
task_gen.start()
for p in processes:
p.start()
for p in processes:
p.join()
counter is just a number generator for put_tasks. Typically, I would have several thousands of tasks instead of just 10 like in this example. The point of this code is to feed the queue with tasks incrementally.
The problem is that consumers cannot know in advance how many tasks they will have to process but the put_tasks function does know when it's done (it then prints no more tasks).
Sample output:
process 2: 0
process 0: 1
process 1: 2
process 2: 3
process 0: 4
process 1: 5
process 2: 6
process 0: 7
process 1: 8
process 2: 9
put_tasks: no more tasks
All tasks get processed but the program then hangs (each process gets stuck on q.get(). I would like it to terminate when all tasks have been processed without sacrificing speed or safety (no ugly timeouts).
Any ideas?
I suggest to put a sentinel value to put on the end of the queue
def put_tasks(q):
...
print('put_tasks: no more tasks')
q.put(end_of_queue)
def work(id, q):
while True:
task = q.get()
if task == end_of_queue:
q.put(task)
print("DONE")
return
print('process %d: %s' % (id, task))
time.sleep(.1)
print('process %d: done' % id)
class Sentinel:
def __init__(self, id):
self.id = id
def __eq__(self, other):
if isinstance(other, Sentinel):
return self.id == other.id
return NotImplemented
if __name__ == '__main__':
q = Queue(2)
end_of_queue = Sentinel("end of queue")
task_gen = Process(target=put_tasks, args=(q,))
processes = [Process(target=work, args=(id, q)) for id in range(0, 3)]
...
I don't seem to be able to use object() as the sentinel because the threads seem to have access different instances, so they don't compare equal.
If you ever wish to generate random sentinels you can use the uuid module to generate random ids:
import uuid
class Sentinel:
def __init__(self):
self.id = uuid.uuid4()
def __eq__(self, other):
if isinstance(other, Sentinel):
return self.id == other.id
return NotImplemented
Finally, zch used None for a sentinel which is perfectly adequate as long as the queue cannot have None in. The sentinel method will work for mostly-arbitrary arguments.
The simplest way is to add to the queue something that tells consumers all work is done.
number_of_consumers = 3
def put_tasks(q):
for c in counter(0, 9):
q.put(c)
time.sleep(.1)
print('put_tasks: no more tasks')
for i in range(number_of_consumers):
q.put(None)
def work(id, q):
while True:
task = q.get()
if task is None:
break
print('process %d: %s' % (id, task))
time.sleep(.3)
print('process %d: done' % id)
I recently looked into the same question and found an alternate answer to the above, in the Python documentation
It looks like the "correct" way to do this is with the Queue.task_done() method, i.e.:
def worker():
while True:
item = q.get()
do_work(item)
q.task_done()
q = Queue()
for i in range(num_worker_threads):
t = Thread(target=worker)
t.daemon = True
t.start()
for item in source():
q.put(item)
q.join() # block until all tasks are done
I'm trying to use a queue with the multiprocessing library in Python. After executing the code below (the print statements work), but the processes do not quit after I call join on the Queue and there are still alive. How can I terminate the remaining processes?
Thanks!
def MultiprocessTest(self):
print "Starting multiprocess."
print "Number of CPUs",multiprocessing.cpu_count()
num_procs = 4
def do_work(message):
print "work",message ,"completed"
def worker():
while True:
item = q.get()
do_work(item)
q.task_done()
q = multiprocessing.JoinableQueue()
for i in range(num_procs):
p = multiprocessing.Process(target=worker)
p.daemon = True
p.start()
source = ['hi','there','how','are','you','doing']
for item in source:
q.put(item)
print "q close"
q.join()
#q.close()
print "Finished everything...."
print "num active children:",multiprocessing.active_children()
try this:
import multiprocessing
num_procs = 4
def do_work(message):
print "work",message ,"completed"
def worker():
for item in iter( q.get, None ):
do_work(item)
q.task_done()
q.task_done()
q = multiprocessing.JoinableQueue()
procs = []
for i in range(num_procs):
procs.append( multiprocessing.Process(target=worker) )
procs[-1].daemon = True
procs[-1].start()
source = ['hi','there','how','are','you','doing']
for item in source:
q.put(item)
q.join()
for p in procs:
q.put( None )
q.join()
for p in procs:
p.join()
print "Finished everything...."
print "num active children:", multiprocessing.active_children()
Your workers need a sentinel to terminate, or they will just sit on the blocking reads. Note that using sleep on the Q instead of join on the P lets you display status information etc.
My preferred template is:
def worker(q,nameStr):
print 'Worker %s started' %nameStr
while True:
item = q.get()
if item is None: # detect sentinel
break
print '%s processed %s' % (nameStr,item) # do something useful
q.task_done()
print 'Worker %s Finished' % nameStr
q.task_done()
q = multiprocessing.JoinableQueue()
procs = []
for i in range(num_procs):
nameStr = 'Worker_'+str(i)
p = multiprocessing.Process(target=worker, args=(q,nameStr))
p.daemon = True
p.start()
procs.append(p)
source = ['hi','there','how','are','you','doing']
for item in source:
q.put(item)
for i in range(num_procs):
q.put(None) # send termination sentinel, one for each process
while not q.empty(): # wait for processing to finish
sleep(1) # manage timeouts and status updates etc.
Here is a sentinel-free method for the relatively simple case where you put a number of tasks on a JoinableQueue, then launch worker processes that consume the tasks and exit once they read the queue "dry". The trick is to use JoinableQueue.get_nowait() instead of get(). get_nowait(), as the name implies, tries to get a value from the queue in a non-blocking manner and if there's nothing to be gotten then a queue.Empty exception is raised. The worker handles this exception by exiting.
Rudimentary code to illustrate the principle:
import multiprocessing as mp
from queue import Empty
def worker(q):
while True:
try:
work = q.get_nowait()
# ... do something with `work`
q.task_done()
except Empty:
break # completely done
# main
worknum = 4
jq = mp.JoinableQueue()
# fill up the task queue
# let's assume `tasks` contains some sort of data
# that your workers know how to process
for task in tasks:
jq.put(task)
procs = [ mp.Process(target=worker, args=(jq,)) for _ in range(worknum) ]
for p in procs:
p.start()
for p in procs:
p.join()
The advantage is that you do not need to put the "poison pills" on the queue so the code is a bit shorter.
IMPORTANT : in more complex situations where producers and consumers use the same queue in an "interleaved" manner and the workers may have to wait for new tasks to come along, the "poison pill" approach should be used. My suggestion above is for simple cases where the workers "know" that if the task queue is empty, then there's no point hanging around any more.
You have to clear the queue before joining the process, but q.empty() is unreliable.
The best way to clear the queue is to count the number of successful gets or loop until you receive a sentinel value, just like a socket with a reliable network.
The code below may not be very relevant but I post it for your comments/feedbacks so we can learn together. Thank you!
import multiprocessing
def boss(q,nameStr):
source = range(1024)
for item in source:
q.put(nameStr+' '+str(item))
q.put(None) # send termination sentinel, one for each process
def worker(q,nameStr):
while True:
item = q.get()
if item is None: # detect sentinel
break
print '%s processed %s' % (nameStr,item) # do something useful
q = multiprocessing.Queue()
procs = []
num_procs = 4
for i in range(num_procs):
nameStr = 'ID_'+str(i)
p = multiprocessing.Process(target=worker, args=(q,nameStr))
procs.append(p)
p = multiprocessing.Process(target=boss, args=(q,nameStr))
procs.append(p)
for j in procs:
j.start()
for j in procs:
j.join()