Terminating all sub-processes after an event - python

I have a Python multiprocessing scenario which I have simplified for my question here. There are x number of jobs to be processed in 2 parts. In my code, the 2 job parts are actually HTTP requests where part 2 is dependent on the results of part 1. Finally, there is a 3rd part that simply reports on how long parts 1 and 2 took and calculates a running average of time taken across all jobs.
Taking advantage of multiprocessing, I set up 2x process workers for Job Part 1 and also 2x workers for Job Part 2 and only 1x Reporter worker. To communicate the time taken for each job part, I am using Queues.
The code I have works fine for a complete workthrough of the specified x number of jobs BUT, I would like to add a timeout/cancel event that should stop all the workers and terminates gracefully.
In my code, I use the Report Worker to check for this event and when it happens I thought it would simply be a case of consuming the remaining jobs in all the queues and adding a poison pill to signal them to terminate.
The sub-processes do terminate BUT it seems control is NOT passed back to the "main" program and I can still see the main process hanging until I do a ctrl-c cancel in the command prompt.
Please help see my code and tell me where I am going wrong:
#!/usr/bin/python3
import sys, time, datetime, os, math, multiprocessing, time, random
TYPE_JOB_PART_1 = '1'
TYPE_JOB_PART_2 = '2'
class Worker(multiprocessing.Process):
def __init__(self, task_queue, result_queue, task_type):
multiprocessing.Process.__init__(self)
self.task_queue = task_queue
self.result_queue = result_queue
self.task_type = task_type
def run(self):
while True:
next_task = self.task_queue.get()
if next_task is None:
#= Poison pill means shutdown
print('%s: is exiting: %s ===============' % (self.name, self.task_type))
self.task_queue.task_done()
break
job_response = next_task()
self.task_queue.task_done()
if self.task_type == TYPE_JOB_PART_1:
self.result_queue.put(do_jobPart_2(job_response))
elif self.task_type == TYPE_JOB_PART_2:
self.result_queue.put(do_reporting(job_response))
return
class Reporter(multiprocessing.Process):
def __init__(self, task_queue, result_queue, num_tasks, num_workers_jobPart1, num_workers_jobPart2, jobPart1_queue, jobPart2_queue):
multiprocessing.Process.__init__(self)
self.task_queue = task_queue
self.result_queue = result_queue
self.jobPart1_queue = jobPart1_queue
self.jobPart2_queue = jobPart2_queue
self.num_tasks = num_tasks
self.num_workers_jobPart1 = num_workers_jobPart1
self.num_workers_jobPart2 = num_workers_jobPart2
self.time_start = datetime.datetime.now() #= Start the timer
self.time_elapsed = 0
self.time_wait_to_terminate = 3 #= Define the timeout to terminate all jobs
def run(self):
while True:
next_task = self.task_queue.get()
if next_task is None:
#= Poison pill means shutdown
self.task_queue.task_done()
print('==================================================')
print('============ END OF PROCESSING ===================')
print('==================================================')
break
job_response = next_task()
self.task_queue.task_done()
self.result_queue.put(job_response)
queueSize = self.result_queue.qsize()
#= TERMINATTION time
print("================>i:%s" % (queueSize))
self.time_elapsed = (datetime.datetime.now() - self.time_start).total_seconds()
if self.time_elapsed > self.time_wait_to_terminate:
print("TIME IS UP. %s elapsed!" % self.time_wait_to_terminate)
#= Empty the JobPart_1 queue to relieve the workers
while not self.jobPart1_queue.empty():
self.jobPart1_queue.get()
self.jobPart1_queue.task_done()
#= And add poison pills again
for i in range(self.num_workers_jobPart1):
self.jobPart1_queue.put(None)
#= Empty the JobPart_2 queue to relieve the workers
while not self.jobPart2_queue.empty():
self.jobPart2_queue.get()
self.jobPart2_queue.task_done()
#= And add poison pills again
for i in range(self.num_workers_jobPart2):
self.jobPart2_queue.put(None)
#= Empty the report queue to relieve the reporter itself
while not self.task_queue.empty():
self.task_queue.get()
self.task_queue.task_done()
print("TIME IS UP: workers stopped, Reporter shutting itself down....")
break
#= Results queue is filled up when count = num_tasks, so give Poison pill to shutdown JobPart_2 workers
if queueSize == self.num_tasks:
for i in range(self.num_workers_jobPart2):
self.jobPart2_queue.put(None)
print("JobPart_2 workers will be poisoned")
return
class do_reporting(object):
def __init__(self, info):
self.info = info
def __call__(self):
try:
print("%s:do_reporting - is RUNNING " % (self.info['jobPart1_results']['i']))
randtime = 0.5 * random.random()
time.sleep(randtime)
print( 'jobPart1_time:%s, jobPart2_time:%s, report_time;%s' % ( self.info["jobPart1_results"]["jobPart1_time"], self.info["jobPart2_time"], randtime ) )
return {'results':self.info,'report_time':randtime}
except:
print("error:do_reporting")
class do_jobPart_1(object):
def __init__(self, i, t0):
self.t0 = t0
self.i = i
def __call__(self):
try:
print("%s:do_jobPart_1 - is RUNNING " % self.i)
randtime = 0.5 * random.random()
time.sleep(randtime)
time_elapsed = (datetime.datetime.now() - self.t0).total_seconds()
return {'i':self.i, 't0':self.t0, 'time_elapsed_job1':time_elapsed, 'jobPart1_time':randtime}
except:
print("error:do_jobPart_1")
class do_jobPart_2(object):
def __init__(self, info):
self.info = info
def __call__(self):
try:
print("%s:do_jobPart_2 - is RUNNING " % (self.info['i']))
randtime = 0.5 * random.random()
time.sleep(randtime)
return {"jobPart1_results":self.info,'jobPart2_time':randtime}
except:
print("error:do_jobPart_2")
if __name__ == '__main__':
print('==================================================')
print('============ START PROCESSING ====================')
print('==================================================')
#===============================================
#= Establish communication queues
q_jobPart_1 = multiprocessing.JoinableQueue()
q_jobPart_2 = multiprocessing.JoinableQueue()
q_reportTasks = multiprocessing.JoinableQueue()
q_results = multiprocessing.Queue()
#===============================================
#= Start workersReporter !!! Should always be just 1 worker !!!
numJobs = 90
numWorkers_jobPart1 = 2
numWorkers_jobPart2 = 2
workersJobPart_1 = [ Worker(q_jobPart_1, q_jobPart_2, TYPE_JOB_PART_1) for i in range(numWorkers_jobPart1) ]
workersJobPart_2 = [ Worker(q_jobPart_2, q_reportTasks, TYPE_JOB_PART_2) for i in range(numWorkers_jobPart2) ]
workerJobReporter = Reporter(q_reportTasks, q_results, numJobs, numWorkers_jobPart1, numWorkers_jobPart2, q_jobPart_1, q_jobPart_2)
#===============================================
#= Start the workers
print("Main PID:%s" % os.getpid())
for w in workersJobPart_1:
w.start()
print("JobPart_1 PID=%s" % w.pid)
for w in workersJobPart_2:
w.start()
print("JobPart_2 PID=%s" % w.pid)
workerJobReporter.start()
print("JobReporter PID=%s" % workerJobReporter.pid)
#= Start the timer and add tasks to the queues
time_start = datetime.datetime.now()
for i in range(numJobs):
q_jobPart_1.put(do_jobPart_1(i, time_start))
#= Add poison pill for each jobPart_1 workers
for i in range(numWorkers_jobPart1):
q_jobPart_1.put(None)
q_jobPart_1.join()
print("JobPart_1 workers terminated")
q_jobPart_2.join()
print("JobPart_2 workers terminated")
q_reportTasks.put(None)
q_reportTasks.join()
workerJobReporter.terminate()
print("Reporter terminated")
print("FINISHED")

Related

Python multiprocessing task queue

I'm experimenting with the multiprocessing module and I'm getting some strange behavior. I have a list of tasks, which I first enqueue to a task_queue. Then I start all processes, which have access to both the task queue and the result queue. After the processes have started, I actively check for new content in the result queue while there are active processes.
The weird behavior is that when I start the script below, 4 of the processes exit immediately, and all the work is done by one process. No except Exception clauses are entered.
from multiprocessing import Process, Queue
from time import sleep, time
from queue import Empty
def function_doing_heavy_computation(task):
return task + 1
def service_function(func, tasks_q, result_q):
"""
:param func: user passed function that takes one argument - the task and returns the result of processing the task
:param tasks_q:
:param result_q:
"""
while True:
try:
task = tasks_q.get_nowait()
except Empty:
result_q.close() # https://docs.python.org/2/library/multiprocessing.html#multiprocessing.Queue.close
result_q.join_thread()
print("Worker finished ")
break
except Exception as ex:
print(str(ex))
try:
res = func(task)
except Exception as ex:
print(str(ex))
result_q.put_nowait(res)
def multiproc(func, all_tasks, num_procs):
result = []
task_queue, result_queue = Queue(), Queue()
# add the tasks to the task queue
start_put_tasks = time()
for x in all_tasks:
task_queue.put_nowait(x)
print("Finished adding tasks in %.2f" % (time() - start_put_tasks))
# create the processes and pass them the task and result queue
start_create_procs = time()
procs = []
for _ in range(num_procs):
p = Process(target=service_function, args=(func, task_queue, result_queue))
procs.append(p)
for p in procs:
p.start()
print("Started %i workers in %.2f" % (len(procs), time() - start_create_procs))
# collect the results in a list, and return it
start_drain_queue = time()
liveprocs = list(procs)
while liveprocs:
# drain the current contents of the result_queue
while True:
try:
result.append(result_queue.get_nowait())
except Empty:
break
# set the currently active procs. while loop will exit if all procs have terminated
liveprocs = [p for p in procs if p.is_alive()]
# no process has put a result/all ready results have been drained. wait for new results to arrive
sleep(.1)
print("Finished draining result queue in %.2f" % (time() - start_drain_queue))
if len(result) != len(all_tasks):
raise RuntimeError("Only %i/%i tasks processed" % (len(result), len(all_tasks)))
return result
if __name__ == '__main__':
start = time()
# a task is just a number here.
# normally more tasks than workers. i
tasks = range(100000)
result = multiproc(func=function_doing_heavy_computation, all_tasks=tasks, num_procs=5)
print("Done in %s seconds" % str(time() - start))
assert len(tasks) == len(result)
print("Processed %i tasks" % len(result))

Handle multiprocess in python

My code is processing some parallel perforce tasks while showing a progress bar and letting user to terminate the job whenever he wants, the problem is when user clicks the close button the thread function is not being killed but the lock is released and the main UI thread is being unlocked.
The p4.run_sync() is not terminating when Cancel button is clicked.
def P4SyncLibrary(args, que):
syncType = args[0]
view = args[1]
p4 = P4CreateConnection(disable_tmp_cleanup=True)
try:
p4.run_sync(view)
except P4Exception:
for e in p4.errors:
print "SyncError: - %s" %e
p4.disconnect()
que.put(None)
class CreateJob(QtGui.QDialog):
def __init__(self, thread, args):
QtGui.QDialog.__init__(self)
self.ui=Ui_ProgressBar()
self.ui.setupUi(self)
self.ui.cancel.clicked.connect(self.closeEvent)
self.ui.cancel.setIcon(QtGui.QIcon(QtGui.QPixmap("%s/delete.xpm" %resources)))
self.threadControl = ThreadControl(thread=thread, args=args)
self.connect(self.threadControl, QtCore.SIGNAL("__updateProgressBar(int)"), self.__updateProgressBar)
self.threadControl.finished.connect(self.closeEvent)
self.threadControl.start()
#QtCore.pyqtSlot(int)
def __updateProgressBar(self,val):
self.ui.progressBar.setValue(val)
self.setWindowTitle("Processing: {0}%".format(val))
def closeEvent(self, QCloseEvent=None):
if self.threadControl.isRunning():
self.threadControl.stop()
self.threadControl.wait()
if QCloseEvent: QtGui.QDialog.closeEvent(self, QCloseEvent)
else: self.close()
def getResults(self):
return self.threadControl.resultDict
class ThreadControl(QtCore.QThread):
stopFlag = 0
def __init__(self, thread=None, args=None):
super(ThreadControl, self).__init__()
self.args = args
self.thread = thread
self.resultDict = []
def run(self):
threads = {}
queue = multiprocessing.Queue()
for arg in self.args:
process = multiprocessing.Process(target=self.thread, args=(arg, queue))
process.start()
threads[process] = 1 ## ACTIVE thread
# WAIT TILL ALL PROCESSES COMPLETE
completedThreads = 0
total = len(threads.keys())
while completedThreads != total:
if self.stopFlag:
for t in threads.keys():
if threads[t] == 1:
t.terminate()
t.join()
threads[t] = 0
completedThreads += 1
else:
for t in threads.keys():
if self.stopFlag: break ## Process threads termination
elif threads[t] == 1 and not t.is_alive():
threads[t] = 0
completedThreads += 1
self.resultDict.append(queue.get())
self.emit(QtCore.SIGNAL('__updateProgressBar(int)'),(completedThreads*100)/total)
sleep(0.5) ## Prevent CPU from overloading
def stop(self):
self.stopFlag=1
a job is being created using instance of CreateJob
CreateJob(thread=P4SyncLibrary, args=P4Libraries).exec_()
The only solution I could give is to pass p4 object to calling thread as argument so that p4 server connection can disconnect when user wants to cancel the job.
def P4SyncLibrary(p4, args, que):
syncType = args[0]
view = args[1]
try:
p4.run_sync(view)
except P4Exception:
for e in p4.errors:
print "SyncError: - %s" %e
que.put(None)
class ThreadControl(QtCore.QThread):
...
def run(self):
threads = {}
queue = multiprocessing.Queue()
for arg in self.args:
connection = P4CreateConnection(disable_tmp_cleanup=True)
if connection.connected():
process = multiprocessing.Process(target=self.thread, args=(connection, arg, queue))
process.start()
threads[process] = {
'isAlive': True,
'connection': connection
}
# WAIT TILL ALL PROCESSES COMPLETE
completedThreads = 0
total = len(threads.keys())
while completedThreads != total:
if self._stop:
for t in threads.keys():
if threads[t]['isAlive']:
threads[t]['connection'].disconnect()
t.terminate()
t.join()
threads[t]['isAlive'] = False
completedThreads += 1
else:
for t in threads.keys():
if self._stop: break ## Process threads termination
elif threads[t]['isAlive'] and not t.is_alive():
threads[t]['connection'].disconnect()
threads[t]['isAlive'] = False
completedThreads += 1
self.results.append(queue.get())
self.emit(QtCore.SIGNAL('__updateProgressBar(int)'),(completedThreads*100)/total)
sleep(0.5) ## Prevent CPU from overloading

Delete Objects in a List as Passed to Multiprocessing

I need to pass each object in a large list to a function. After the function completes I no longer need the object passed to the function and would like to delete the object to save memory. If I were working with a single process I would do the following:
result = []
while len(mylist) > 0:
result.append(myfunc(mylist.pop())
As I loop over mylist I pop off each object in the list such that the object is no longer stored in mylist after it's passed to my function. How do I achieve this same effect in parallel using multiprocessing?
A simple consumer example (credits go here) :
import multiprocessing
import time
import random
class Consumer(multiprocessing.Process):
def __init__(self, task_queue, result_queue):
multiprocessing.Process.__init__(self)
self.task_queue = task_queue
self.result_queue = result_queue
def run(self):
while True:
task = self.task_queue.get()
if task is None:
# Poison pill means shutdown
self.task_queue.task_done()
break
answer = task.process()
self.task_queue.task_done()
self.result_queue.put(answer)
return
class Task(object):
def process(self):
time.sleep(0.1) # pretend to take some time to do the work
return random.randint(0, 100)
if __name__ == '__main__':
# Establish communication queues
tasks = multiprocessing.JoinableQueue()
results = multiprocessing.Queue()
# Start consumers
num_consumers = multiprocessing.cpu_count() * 2
consumers = [Consumer(tasks, results) for i in xrange(num_consumers)]
for consumer in consumers:
consumer.start()
# Enqueue jobs
num_jobs = 10
for _ in xrange(num_jobs):
tasks.put(Task())
# Add a poison pill for each consumer
for _ in xrange(num_consumers):
tasks.put(None)
# Wait for all tasks to finish
tasks.join()
# Start printing results
while num_jobs:
result = results.get()
print 'Result:', result
num_jobs -= 1

Process vs. Thread with regards to using Queue()/deque() and class variable for communication and "poison pill"

I would like to create either a Thread or a Process which runs forever in a While True loop.
I need to send and receive data to the worker in the form for queues, either a multiprocessing.Queue() or a collections.deque(). I prefer to use collections.deque() as it is significantly faster.
I also need to be able to kill the worker eventually (as it runs in a while True loop. Here is some test code I've put together to try and understand the differences between Threads, Processes, Queues, and deque ..
import time
from multiprocessing import Process, Queue
from threading import Thread
from collections import deque
class ThreadingTest(Thread):
def __init__(self, q):
super(ThreadingTest, self).__init__()
self.q = q
self.toRun = False
def run(self):
print("Started Thread")
self.toRun = True
while self.toRun:
if type(self.q) == type(deque()):
if self.q:
i = self.q.popleft()
print("Thread deque: " + str(i))
elif type(self.q) == type(Queue()):
if not self.q.empty():
i = self.q.get_nowait()
print("Thread Queue: " + str(i))
def stop(self):
print("Trying to stop Thread")
self.toRun = False
while self.isAlive():
time.sleep(0.1)
print("Stopped Thread")
class ProcessTest(Process):
def __init__(self, q):
super(ProcessTest, self).__init__()
self.q = q
self.toRun = False
self.ctr = 0
def run(self):
print("Started Process")
self.toRun = True
while self.toRun:
if type(self.q) == type(deque()):
if self.q:
i = self.q.popleft()
print("Process deque: " + str(i))
elif type(self.q) == type(Queue()):
if not self.q.empty():
i = self.q.get_nowait()
print("Process Queue: " + str(i))
def stop(self):
print("Trying to stop Process")
self.toRun = False
while self.is_alive():
time.sleep(0.1)
print("Stopped Process")
if __name__ == '__main__':
q = Queue()
t1 = ProcessTest(q)
t1.start()
for i in range(10):
if type(q) == type(deque()):
q.append(i)
elif type(q) == type(Queue()):
q.put_nowait(i)
time.sleep(1)
t1.stop()
t1.join()
if type(q) == type(deque()):
print(q)
elif type(q) == type(Queue()):
while q.qsize() > 0:
print(str(q.get_nowait()))
As you can see, t1 can either be ThreadingTest, or ProcessTest. Also, the queue passed to it can either be a multiprocessing.Queue or a collections.deque.
ThreadingTest works with a Queue or deque(). It also kills run() properly when the stop() method is called.
Started Thread
Thread deque: 0
Thread deque: 1
Thread deque: 2
Thread deque: 3
Thread deque: 4
Thread deque: 5
Thread deque: 6
Thread deque: 7
Thread deque: 8
Thread deque: 9
Trying to stop Thread
Stopped Thread
deque([])
ProcessTest is only able to read from the queue if it is of type multiprocessing.Queue. It doesn't work with collections.deque. Furthermore, I am unable to kill the process using stop().
Process Queue: 0
Process Queue: 1
Process Queue: 2
Process Queue: 3
Process Queue: 4
Process Queue: 5
Process Queue: 6
Process Queue: 7
Process Queue: 8
Process Queue: 9
Trying to stop Process
I'm trying to figure out why? Also, what would be the best way to use deque with a process? And, how would I go about killing the process using some sort of stop() method.
You can't use a collections.deque to pass data between two multiprocessing.Process instances, because collections.deque is not process-aware. multiprocessing.Queue writes its contents to a multiprocessing.Pipe internally, which means that data in it can be enqueued in once process and retrieved in another. collections.deque doesn't have that kind of plumbing, so it won't work. When you write to the deque in one process, the deque instance in the other process won't be affected at all; they're completely separate instances.
A similar issue is happening to your stop() method. You're changing the value of toRun in the main process, but this won't affect the child at all. They're completely separate instances. The best way to end the child would be to send some sentinel to the Queue. When you get the sentinel in the child, break out of the infinite loop:
def run(self):
print("Started Process")
self.toRun = True
while self.toRun:
if type(self.q) == type(deque()):
if self.q:
i = self.q.popleft()
print("Process deque: " + str(i))
elif type(self.q) == type(Queue()):
if not self.q.empty():
i = self.q.get_nowait()
if i is None:
break # Got sentinel, so break
print("Process Queue: " + str(i))
def stop(self):
print("Trying to stop Process")
self.q.put(None) # Send sentinel
while self.is_alive():
time.sleep(0.1)
print("Stopped Process")
Edit:
If you actually do need deque semantics between two process, you can use a custom multiprocessing.Manager() to create a shared deque in a Manager process, and each of your Process instances will get a Proxy to it:
import time
from multiprocessing import Process
from multiprocessing.managers import SyncManager
from collections import deque
SyncManager.register('deque', deque)
def Manager():
m = SyncManager()
m.start()
return m
class ProcessTest(Process):
def __init__(self, q):
super(ProcessTest, self).__init__()
self.q = q
self.ctr = 0
def run(self):
print("Started Process")
self.toRun = True
while self.toRun:
if self.q._getvalue():
i = self.q.popleft()
if i is None:
break
print("Process deque: " + str(i))
def stop(self):
print("Trying to stop Process")
self.q.append(None)
while self.is_alive():
time.sleep(0.1)
print("Stopped Process")
if __name__ == '__main__':
m = Manager()
q = m.deque()
t1 = ProcessTest(q)
t1.start()
for i in range(10):
q.append(i)
time.sleep(1)
t1.stop()
t1.join()
print(q)
Note that this probably isn't going to be faster than a multiprocessing.Queue, though, since there's an IPC cost for every time you access the deque. It's also a much less natural data structure for passing messages the way you are.

python can't start a new thread

I am building a multi threading application.
I have setup a threadPool.
[ A Queue of size N and N Workers that get data from the queue]
When all tasks are done I use
tasks.join()
where tasks is the queue .
The application seems to run smoothly until suddently at some point (after 20 minutes in example) it terminates with the error
thread.error: can't start new thread
Any ideas?
Edit: The threads are daemon Threads and the code is like:
while True:
t0 = time.time()
keyword_statuses = DBSession.query(KeywordStatus).filter(KeywordStatus.status==0).options(joinedload(KeywordStatus.keyword)).with_lockmode("update").limit(100)
if keyword_statuses.count() == 0:
DBSession.commit()
break
for kw_status in keyword_statuses:
kw_status.status = 1
DBSession.commit()
t0 = time.time()
w = SWorker(threads_no=32, network_server='http://192.168.1.242:8180/', keywords=keyword_statuses, cities=cities, saver=MySqlRawSave(DBSession), loglevel='debug')
w.work()
print 'finished'
When the daemon threads are killed?
When the application finishes or when the work() finishes?
Look at the thread pool and the worker (it's from a recipe )
from Queue import Queue
from threading import Thread, Event, current_thread
import time
event = Event()
class Worker(Thread):
"""Thread executing tasks from a given tasks queue"""
def __init__(self, tasks):
Thread.__init__(self)
self.tasks = tasks
self.daemon = True
self.start()
def run(self):
'''Start processing tasks from the queue'''
while True:
event.wait()
#time.sleep(0.1)
try:
func, args, callback = self.tasks.get()
except Exception, e:
print str(e)
return
else:
if callback is None:
func(args)
else:
callback(func(args))
self.tasks.task_done()
class ThreadPool:
"""Pool of threads consuming tasks from a queue"""
def __init__(self, num_threads):
self.tasks = Queue(num_threads)
for _ in range(num_threads): Worker(self.tasks)
def add_task(self, func, args=None, callback=None):
''''Add a task to the queue'''
self.tasks.put((func, args, callback))
def wait_completion(self):
'''Wait for completion of all the tasks in the queue'''
self.tasks.join()
def broadcast_block_event(self):
'''blocks running threads'''
event.clear()
def broadcast_unblock_event(self):
'''unblocks running threads'''
event.set()
def get_event(self):
'''returns the event object'''
return event
ALSo maybe the problem it's because I create SWorker objects in a loop?
What happens with the old SWorker (garbage collection ?) ?
There is still not enough code for localize the problem, but I'm sure that this is because you don't utilize the threads and start too much of them. Did you see canonical example from Queue python documentation http://docs.python.org/library/queue.html (bottom of the page)?
I can reproduce your problem with the following code:
import threading
import Queue
q = Queue.Queue()
def worker():
item = q.get(block=True) # sleeps forever for now
do_work(item)
q.task_done()
# create infinite number of workers threads and fails
# after some time with "error: can't start new thread"
while True:
t = threading.Thread(target=worker)
t.start()
q.join() # newer reached this
Instead you must create the poll of threads with known number of threads and put your data to queue like:
q = Queue()
def worker():
while True:
item = q.get()
do_work(item)
q.task_done()
for i in range(num_worker_threads):
t = Thread(target=worker)
t.daemon = True
t.start()
for item in source():
q.put(item)
q.join() # block until all tasks are done
UPD: In case you need to stop some thread, you can add a flag to it or send a special mark means "stop" for break while loop:
class Worker(Thread):
break_msg = object() # just uniq mark sign
def __init__(self):
self.continue = True
def run():
while self.continue: # can stop and destroy thread, (var 1)
msg = queue.get(block=True)
if msg == self.break_msg:
return # will stop and destroy thread (var 2)
do_work()
queue.task_done()
workers = [Worker() for _ in xrange(num_workers)]
for w in workers:
w.start()
for task in tasks:
queue.put(task)
for _ in xrange(num_workers):
queue.put(Worker.break_msg) # stop thread after all tasks done. Need as many messages as many threads you have
OR
queue.join() # wait until all tasks done
for w in workers:
w.continue = False
w.put(None)

Categories

Resources