create threads within processes and share a queue between the threads - python

I am trying to create 3 threads within each of 2 processes and share a queue of the type multiprocessing.JoinableQueue among all threads. The worker_func function simply creates the threads while the thread_func function prints out the values it gets from the queue. The program gets stuck somewhere in the time.sleep or in the get() method of queue. What am I doing wrong? I am running on a Windows computer.
import threading
from multiprocessing import Pool, Manager, JoinableQueue
import multiprocessing
from threading import Thread
import time
def thread_func(q, disp_lock):
with disp_lock:
print('thread ', threading.current_thread().name, ' in process ', multiprocessing.current_process().name ,
' reporting for duty')
while True:
time.sleep(0.1)
try:
val = q.get_nowait()
with disp_lock:
print('thread ', threading.current_thread().name, ' in process ', multiprocessing.current_process().name , ' got value: ',val)
q.task_done()
except:
with disp_lock:
print('queue is empty: ', q.qsize())
def worker_func(num_threads, q, disp_lock):
threads = []
for i in range(num_threads):
thread = Thread(target= thread_func, args=( q, disp_lock,))
thread.daemon = True
thread.start()
if __name__ == "__main__":
manager = Manager()
lock = manager.Lock()
q1 = JoinableQueue()#manager.Queue()
q1_length = 20
for i in range(q1_length):
q1.put(i)
processes = []
num_processes = 2 # 2 processes
num_threads = 3
for _ in range(num_processes):
p = multiprocessing.Process(target=worker_func, args=( num_threads, q1, lock, )) # create a new Process
p.daemon = True
p.start()
processes.append(p)
q1.join()

You are not allowing the threads to complete their work. Either set them as non-daemon, or explicitly wait for them to join:
def worker_func(num_threads, q, disp_lock):
threads = []
for i in range(num_threads):
thread = Thread(target=thread_func, args=(q, disp_lock,))
thread.daemon = True
thread.start()
threads.append(thread)
# Wait for them to finish
for thread in threads:
thread.join()

Related

How to kill a Process in the middle of execution after using join()?

I want to kill a certain process or all processes in the middle of its execution. My sample code is as follows. How can I do that? Here, I want to kill the processes based on the current time. If the time is divisible by 2, I want to kill the processes, otherwise not.
import time
from multiprocessing import Process
def runTests(a, b):
time.sleep(10)
return a + b
def main(kill_processes):
print(kill_processes)
processes = []
for i in range(3):
print(i)
proc = Process(target=runTests, args=(2, 4,))
processes.append(proc)
proc.start()
for proc in processes:
proc.join()
if kill_processes:
print("killing")
proc.terminate()
if __name__ == "__main__":
if round(time.time()) % 2 == 0:
main(True)
else:
main(False)
This illustrates how to do it by using multiple threads in a manner similar to what's in the answer to the question multiprocessing in stoppable multithreading that I suggested you have a look at.
Basically all that is going on is all the join() calls are done in separate threads, so they won't block the main thread in the main process — which makes it possible for it to kill them.
import threading
import time
from multiprocessing import Process
def runTests(a, b):
time.sleep(10)
return a + b
def create_process(lock, i):
proc = Process(target=runTests, args=(2, 4,))
print(f'{proc.name} created')
proc.start()
with lock:
processes.append(proc)
proc.join()
def main(kill_processes):
global processes
N = 3
lock = threading.RLock()
processes = []
print(f'main({kill_processes=})')
for i in range(N):
thread = threading.Thread(target=create_process, args=(lock, i))
thread.start()
while True: # Wait for all processes to have been created.
with lock:
if len(processes) == N:
break
else:
time.sleep(.001)
if kill_processes:
print("Killing the processes")
for proc in processes:
proc.terminate()
print(f'process {proc} terminated')
if __name__ == "__main__":
main(True)
# if round(time.time()) % 2 == 0:
# main(True)
# else:
# main(False)
You can use multiprocessing.Event to signal termination condition to your child processes. Don't join child processes in the main process. Instead let main process and child processes run in their own loop. Check the termination condition in the main loop, and signal it using the multiprocessing.Event to the child processes.
The Event object is passed as an argument to child process. Child process continuously checks if the event is set, and stops its work if so. Main loop checks the termination condition and sets the Event if condition is met (in the below example main loop waits for Ctrl+c).
import multiprocessing as mp
import os
import time
def do_work(a, b, stop_event):
while not stop_event.is_set():
try:
time.sleep(2)
print(f"worker {os.getpid()}: working ...", a + b)
a += 1
b += 1
except KeyboardInterrupt:
print(f"worker {os.getpid()}: received SIGINT. ignore.")
pass
print(f"worker {os.getpid()}: stop_event is set. exit.")
if __name__ == "__main__":
stop_event = mp.Event()
procs = []
for i in range(3):
# p = mp.Process(target=do_work, args=(1, 2, stop_event), daemon=True)
p = mp.Process(target=do_work, args=(1, 2, stop_event))
p.start()
procs.append(p)
while True:
try:
print("main: waiting for termination signal")
time.sleep(1)
except KeyboardInterrupt:
print("main: received termination signal")
stop_event.set()
# wait for the processes to stop
for p in procs:
p.join()
for p in procs:
print(f"worker {p.pid} is terminated: {not p.is_alive()}")
# exit the main loop
break
print("main: bye")
If you want to terminate based on time, use the timeout paramter of join. One way is to set a stop time and as each process is joined, use time remaining as its timeout.
import time
from multiprocessing import Process
def runTests(a, b):
time.sleep(10)
return a + b
def main(kill_processes):
print(kill_processes)
processes = []
end = time.time() + 10 # wait 10 seconds
for i in range(3):
print(i)
proc = Process(target=runTests, args=(2, 4,))
processes.append(proc)
proc.start()
for proc in processes:
if kill_processes:
delta = end - time.time()
else:
delta = None
proc.join(delta)
if kill_processes:
print("killing")
proc.terminate()
proc.join(1)
if proc.is_alive():
proc.kill()
if __name__ == "__main__":
if round(time.time()) % 2 == 0:
main(True)
else:
main(False)

How to kill old threads in python

My multi-threading script raising this error:
thread.error : can't start new thread
when it reached 460 threads:
threading.active_count() = 460
I assume the old threads keeps stack up, since the script didn't kill them. This my code:
import threading
import Queue
import time
import os
import csv
def main(worker):
#Do Work
print worker
return
def threader():
while True:
worker = q.get()
main(worker)
q.task_done()
def main_threader(workers):
global q
global city
q = Queue.Queue()
for x in range(20):
t = threading.Thread(target=threader)
t.daemon = True
print "\n\nthreading.active_count() = " + str(threading.active_count()) + "\n\n"
t.start()
for worker in workers:
q.put(worker)
q.join()
How do I kill the old threads when their job is done? (Is the function returning not enough?)
Python threading API doesn't have any function to kill a thread (nothing like threading.kill(PID)).
That said, you should code some thread-stopping algorithm yourself. For example, your thread should somehow decide that is should terminate (e.g. check some global variable or check whether some signal has been sent) and simply return.
For example:
import threading
nthreads = 7
you_should_stop = [0 for _ in range(nthreads)]
def Athread(number):
while True:
if you_should_stop[number]:
print "Thread {} stopping...".format(number)
return
print "Running..."
for x in range(nthreads):
threading.Thread(target = Athread, args = (x, )).start()
for x in range(nthreads):
you_should_stop[x] = 1
print "\nStopped all threads!"

How to share data between Python processes?

I'm using multiprocessing to create a sub-process to my Python app.
I would like to share data between my parent process and the child process.
it's important to mention that I need to share this asynchronously, means that the child process and the parent process will update the data during the code running.
What would be the best way to perform that?
This is one simple example from python documentation -
from multiprocessing import Process, Queue
def f(q):
q.put([42, None, 'hello'])
if __name__ == '__main__':
q = Queue()
p = Process(target=f, args=(q,))
p.start()
print q.get() # prints "[42, None, 'hello']"
p.join()
You can use pipe as well,
Refer for more details - https://docs.python.org/2/library/multiprocessing.html
Here's an example of multiprocess-multithread and sharing a couple variables:
from multiprocessing import Process, Queue, Value, Manager
from ctypes import c_bool
from threading import Thread
ps = []
def yourFunc(pause, budget):
while True:
print(budget.value, pause.value)
##set value
pause.value = True
....
def multiProcess(threads, pause, budget):
for _ in range(threads):
t = Thread(target=yourFunc(), args=(pause, budget,))
t.start()
ts.append(t)
time.sleep(3)
if __name__ == '__main__':
pause = Value(c_bool, False)
budget = Value('i', 5000)
for i in range(2):
p = Process(target=multiProcess, args=(2, pause, budget))
p.start()
ps.append(p)

python can't start a new thread

I am building a multi threading application.
I have setup a threadPool.
[ A Queue of size N and N Workers that get data from the queue]
When all tasks are done I use
tasks.join()
where tasks is the queue .
The application seems to run smoothly until suddently at some point (after 20 minutes in example) it terminates with the error
thread.error: can't start new thread
Any ideas?
Edit: The threads are daemon Threads and the code is like:
while True:
t0 = time.time()
keyword_statuses = DBSession.query(KeywordStatus).filter(KeywordStatus.status==0).options(joinedload(KeywordStatus.keyword)).with_lockmode("update").limit(100)
if keyword_statuses.count() == 0:
DBSession.commit()
break
for kw_status in keyword_statuses:
kw_status.status = 1
DBSession.commit()
t0 = time.time()
w = SWorker(threads_no=32, network_server='http://192.168.1.242:8180/', keywords=keyword_statuses, cities=cities, saver=MySqlRawSave(DBSession), loglevel='debug')
w.work()
print 'finished'
When the daemon threads are killed?
When the application finishes or when the work() finishes?
Look at the thread pool and the worker (it's from a recipe )
from Queue import Queue
from threading import Thread, Event, current_thread
import time
event = Event()
class Worker(Thread):
"""Thread executing tasks from a given tasks queue"""
def __init__(self, tasks):
Thread.__init__(self)
self.tasks = tasks
self.daemon = True
self.start()
def run(self):
'''Start processing tasks from the queue'''
while True:
event.wait()
#time.sleep(0.1)
try:
func, args, callback = self.tasks.get()
except Exception, e:
print str(e)
return
else:
if callback is None:
func(args)
else:
callback(func(args))
self.tasks.task_done()
class ThreadPool:
"""Pool of threads consuming tasks from a queue"""
def __init__(self, num_threads):
self.tasks = Queue(num_threads)
for _ in range(num_threads): Worker(self.tasks)
def add_task(self, func, args=None, callback=None):
''''Add a task to the queue'''
self.tasks.put((func, args, callback))
def wait_completion(self):
'''Wait for completion of all the tasks in the queue'''
self.tasks.join()
def broadcast_block_event(self):
'''blocks running threads'''
event.clear()
def broadcast_unblock_event(self):
'''unblocks running threads'''
event.set()
def get_event(self):
'''returns the event object'''
return event
ALSo maybe the problem it's because I create SWorker objects in a loop?
What happens with the old SWorker (garbage collection ?) ?
There is still not enough code for localize the problem, but I'm sure that this is because you don't utilize the threads and start too much of them. Did you see canonical example from Queue python documentation http://docs.python.org/library/queue.html (bottom of the page)?
I can reproduce your problem with the following code:
import threading
import Queue
q = Queue.Queue()
def worker():
item = q.get(block=True) # sleeps forever for now
do_work(item)
q.task_done()
# create infinite number of workers threads and fails
# after some time with "error: can't start new thread"
while True:
t = threading.Thread(target=worker)
t.start()
q.join() # newer reached this
Instead you must create the poll of threads with known number of threads and put your data to queue like:
q = Queue()
def worker():
while True:
item = q.get()
do_work(item)
q.task_done()
for i in range(num_worker_threads):
t = Thread(target=worker)
t.daemon = True
t.start()
for item in source():
q.put(item)
q.join() # block until all tasks are done
UPD: In case you need to stop some thread, you can add a flag to it or send a special mark means "stop" for break while loop:
class Worker(Thread):
break_msg = object() # just uniq mark sign
def __init__(self):
self.continue = True
def run():
while self.continue: # can stop and destroy thread, (var 1)
msg = queue.get(block=True)
if msg == self.break_msg:
return # will stop and destroy thread (var 2)
do_work()
queue.task_done()
workers = [Worker() for _ in xrange(num_workers)]
for w in workers:
w.start()
for task in tasks:
queue.put(task)
for _ in xrange(num_workers):
queue.put(Worker.break_msg) # stop thread after all tasks done. Need as many messages as many threads you have
OR
queue.join() # wait until all tasks done
for w in workers:
w.continue = False
w.put(None)

Multiprocessing Queue in Python

I'm trying to use a queue with the multiprocessing library in Python. After executing the code below (the print statements work), but the processes do not quit after I call join on the Queue and there are still alive. How can I terminate the remaining processes?
Thanks!
def MultiprocessTest(self):
print "Starting multiprocess."
print "Number of CPUs",multiprocessing.cpu_count()
num_procs = 4
def do_work(message):
print "work",message ,"completed"
def worker():
while True:
item = q.get()
do_work(item)
q.task_done()
q = multiprocessing.JoinableQueue()
for i in range(num_procs):
p = multiprocessing.Process(target=worker)
p.daemon = True
p.start()
source = ['hi','there','how','are','you','doing']
for item in source:
q.put(item)
print "q close"
q.join()
#q.close()
print "Finished everything...."
print "num active children:",multiprocessing.active_children()
try this:
import multiprocessing
num_procs = 4
def do_work(message):
print "work",message ,"completed"
def worker():
for item in iter( q.get, None ):
do_work(item)
q.task_done()
q.task_done()
q = multiprocessing.JoinableQueue()
procs = []
for i in range(num_procs):
procs.append( multiprocessing.Process(target=worker) )
procs[-1].daemon = True
procs[-1].start()
source = ['hi','there','how','are','you','doing']
for item in source:
q.put(item)
q.join()
for p in procs:
q.put( None )
q.join()
for p in procs:
p.join()
print "Finished everything...."
print "num active children:", multiprocessing.active_children()
Your workers need a sentinel to terminate, or they will just sit on the blocking reads. Note that using sleep on the Q instead of join on the P lets you display status information etc.
My preferred template is:
def worker(q,nameStr):
print 'Worker %s started' %nameStr
while True:
item = q.get()
if item is None: # detect sentinel
break
print '%s processed %s' % (nameStr,item) # do something useful
q.task_done()
print 'Worker %s Finished' % nameStr
q.task_done()
q = multiprocessing.JoinableQueue()
procs = []
for i in range(num_procs):
nameStr = 'Worker_'+str(i)
p = multiprocessing.Process(target=worker, args=(q,nameStr))
p.daemon = True
p.start()
procs.append(p)
source = ['hi','there','how','are','you','doing']
for item in source:
q.put(item)
for i in range(num_procs):
q.put(None) # send termination sentinel, one for each process
while not q.empty(): # wait for processing to finish
sleep(1) # manage timeouts and status updates etc.
Here is a sentinel-free method for the relatively simple case where you put a number of tasks on a JoinableQueue, then launch worker processes that consume the tasks and exit once they read the queue "dry". The trick is to use JoinableQueue.get_nowait() instead of get(). get_nowait(), as the name implies, tries to get a value from the queue in a non-blocking manner and if there's nothing to be gotten then a queue.Empty exception is raised. The worker handles this exception by exiting.
Rudimentary code to illustrate the principle:
import multiprocessing as mp
from queue import Empty
def worker(q):
while True:
try:
work = q.get_nowait()
# ... do something with `work`
q.task_done()
except Empty:
break # completely done
# main
worknum = 4
jq = mp.JoinableQueue()
# fill up the task queue
# let's assume `tasks` contains some sort of data
# that your workers know how to process
for task in tasks:
jq.put(task)
procs = [ mp.Process(target=worker, args=(jq,)) for _ in range(worknum) ]
for p in procs:
p.start()
for p in procs:
p.join()
The advantage is that you do not need to put the "poison pills" on the queue so the code is a bit shorter.
IMPORTANT : in more complex situations where producers and consumers use the same queue in an "interleaved" manner and the workers may have to wait for new tasks to come along, the "poison pill" approach should be used. My suggestion above is for simple cases where the workers "know" that if the task queue is empty, then there's no point hanging around any more.
You have to clear the queue before joining the process, but q.empty() is unreliable.
The best way to clear the queue is to count the number of successful gets or loop until you receive a sentinel value, just like a socket with a reliable network.
The code below may not be very relevant but I post it for your comments/feedbacks so we can learn together. Thank you!
import multiprocessing
def boss(q,nameStr):
source = range(1024)
for item in source:
q.put(nameStr+' '+str(item))
q.put(None) # send termination sentinel, one for each process
def worker(q,nameStr):
while True:
item = q.get()
if item is None: # detect sentinel
break
print '%s processed %s' % (nameStr,item) # do something useful
q = multiprocessing.Queue()
procs = []
num_procs = 4
for i in range(num_procs):
nameStr = 'ID_'+str(i)
p = multiprocessing.Process(target=worker, args=(q,nameStr))
procs.append(p)
p = multiprocessing.Process(target=boss, args=(q,nameStr))
procs.append(p)
for j in procs:
j.start()
for j in procs:
j.join()

Categories

Resources