Quit signal when waiting for blocking read from queue.Queue - python

In many cases I have a worker thread which pops data from a Queue and acts on it. At some kind of event I want my worker thread to stop. The simple solution is to add a timeout to the get call and check the Event/flag every time the get times out. This however as two problems:
Causes an unnecessary context switch
Delays the shutdown until a timeout occurs
Is there any better way to listen both to a stop event and new data in the Queue? Is it possible to listen to two Queue's at the same time and block until there's data in the first one? (In this case one can use a second Queue just to trigger the shutdown.)
The solution I'm currently using:
from queue import Queue, Empty
from threading import Event, Thread
from time import sleep
def worker(exit_event, queue):
print("Worker started.")
while not exit_event.isSet():
try:
data = queue.get(timeout=10)
print("got {}".format(data))
except Empty:
pass
print("Worker quit.")
if __name__ == "__main__":
exit_event = Event()
queue = Queue()
th = Thread(target=worker, args=(exit_event, queue))
th.start()
queue.put("Testing")
queue.put("Hello!")
sleep(2)
print("Asking worker to quit")
exit_event.set()
th.join()
print("All done..")

I guess you may easily reduce timeout to 0.1...0.01 sec. Slightly different solution is to use the queue to send both data and control commands to the thread:
import queue
import threading
import time
THREADSTOP = 0
class ThreadControl:
def __init__(self, command):
self.command = command
def worker(q):
print("Worker started.")
while True:
data = q.get()
if isinstance(data, ThreadControl):
if data.command == THREADSTOP:
break
print("got {}".format(data))
print("Worker quit.")
if __name__ == '__main__':
q = queue.Queue()
th = threading.Thread(target=worker, args=(q,))
th.start()
q.put("Testing")
q.put("Hello!")
time.sleep(2)
print("Asking worker to quit")
q.put(ThreadControl(command=THREADSTOP)) # sending command
th.join()
print("All done..")
Another option is to use sockets instead of queues.

Related

Python Pool - How do I keep Python Process running when I get a timeout exception?

I am using the Python multiprocessing library. Whenever one of the processes throw a timeout error, my application ends itself. I want to keep the processes up.
I have a function that subscribes to a queue and listens to incoming messages:
def process_msg(i):
#get new message from the queue
#process it
import time
time.sleep(10)
return True
I have created a Pool that creates 6 processes and executes the process_msg() function above.
When the function times out, I want the Pool to call the function again and wait for new messages instead of exiting:
if __name__ == "main":
import multiprocessing
from multiprocessing import Pool
pool = Pool(processes=6)
collection = range(6)
try:
val = pool.map_async(process_msg, collection)
try:
res = val.get(5)
except TimeoutError:
print('timeout here')
pool.close()
pool.terminate()
pool.join()
The code runs and when I get a timeout, the application terminates itself.
What I want it to do is to print that the timeout as occurred and call the same function again.
What's the right approach?
Here's a skeleton for a program that works. The main issue you had is the use of pool.terminate, which "Stops the worker processes immediately without completing outstanding work" (see the documentation).
from multiprocessing import Pool, TimeoutError
def process_msg(i):
#get new message from the queue
#process it
import time
print(f"Starting to sleep, proxess # {i}")
time.sleep(10)
return True
def main():
print("in main")
pool = Pool(processes=6)
collection = range(6)
print("About to spawn sub processes")
val = pool.map_async(process_msg, collection)
while True:
try:
print("Waiting for results")
res = val.get(3)
print(f"Res is {res}")
break
except TimeoutError:
print("Timeout here")
print("Closing pool")
pool.close()
# pool.terminate() # do not terminate - it kill the child processes
print ("Joining pool")
pool.join()
print("exiting main")
if __name__ == "__main__":
main()
The output of this code is:
in main
About to spawn sub processes
Waiting for results
Starting to sleep, proxess # 0
Starting to sleep, proxess # 1
Starting to sleep, proxess # 2
Starting to sleep, proxess # 3
Starting to sleep, proxess # 4
Starting to sleep, proxess # 5
Timeout here
Waiting for results
Timeout here
Waiting for results
Timeout here
Waiting for results
Res is [True, True, True, True, True, True]
Closing pool
Joining pool
exiting main

Should you join() a completed daemon Thread?

I have some worker threads consuming data from a pre-populated input queue, and putting results into another queue.
import queue
import threading
worker_count = 8
input_queue = queue.Queue()
output_queue = queue.Queue()
threads = []
for _ in range(worker_count):
thread = threading.Thread(target=perform_work, args=(input_queue, output_queue)
thread.daemon = True
thread.start()
threads.append(thread)
I am processing the results in the main thread, and I want to make sure I process all of the results.
while True:
try:
result = output_queue.get(True, 0.1)
except queue.Empty:
pass
else:
process_result(result)
if not any([t.is_alive() for t in threads]) and not output_queue.empty():
# All results have been processed, stop.
break
Is it safe to just use .is_alive() in this case? Or is there a particular reason to use .join() instead?
NOTE: I'm making my threads daemon = True because it makes it easier to debug and terminate the program.

Python: prevent signals to propagate to child threads

import threading
import time
def worker(i):
while True:
try:
print i
time.sleep(10)
break
except Exception, msg:
print msg
threads = []
for i in range(10):
t1 = threading.Thread(target=worker, args=(i,))
threads.append(t1)
for t in threads:
t.start()
print "started all threads... waiting to be finished"
for t in threads:
t.join()
if i press ^C while the threads are running, does the thread gets the SIGINT?
if this is true, what can i do from the caller thread to stop it from propagating SIGINT to running threads?
signal handler in caller thread would prevent it?
or do i need signal handler for each thread?
if i press ^C while the threads are running, does the thread gets the SIGINT?
No. As it says in the documentation:
Python signal handlers are always executed in the main Python thread of the main interpreter, even if the signal was received in another thread.
You can see that this is true with a simple test:
import threading
import time
def worker():
while True:
print('Worker working')
time.sleep(0.5)
pass
worker_thread = threading.Thread(target=worker)
worker_thread.start()
while True:
print('Parent parenting')
time.sleep(0.5)
After you send SIGINT with ^C, you will see that the main thread is killed (no more 'Parent parenting' logs) and the child thread continues to run.
In your example, your child threads exit because you break out of their while loops after 10 seconds.
As referred in Python's docs, you should use the attribute daemon:
daemon: A boolean value indicating whether this thread is a daemon
thread (True) or not (False). This must be set before start() is
called, otherwise RuntimeError is raised. Its initial value is
inherited from the creating thread; the main thread is not a daemon
thread and therefore all threads created in the main thread default to
daemon = False.
The entire Python program exits when no alive non-daemon threads are
left.
New in version 2.6.
To control the CTRL+C signal, you should capture it changing the handler with the signal.signal(signal_number, handler) function. The child process inherits the signal handler for SIGINT.
import threading
import time
import signal
def worker(i):
while True:
try:
print(i)
time.sleep(10)
break
except Exception as msg:
print(msg)
def signal_handler(signal, frame):
print('You pressed Ctrl+C!')
print("I will wait for all threads... waiting to be finished")
for t in threads:
t.join()
signal.signal(signal.SIGINT, signal_handler)
threads = []
for i in range(10):
t1 = threading.Thread(target=worker, args=(i,))
threads.append(t1)
for t in threads:
t.start()
print("started all threads... waiting to be finished")
for t in threads:
t.join()

How to make sure queue is empty before exiting main thread

I have a program that has two threads, the main thread and one additional that works on handling jobs from a FIFO queue.
Something like this:
import queue
import threading
q = queue.Queue()
def _worker():
while True:
msg = q.get(block=True)
print(msg)
q.task_done()
t = threading.Thread(target=_worker)
#t.daemon = True
t.start()
q.put('asdf-1')
q.put('asdf-2')
q.put('asdf-4')
q.put('asdf-4')
What I want to accomplish is basically to make sure the queue is emptied before the main thread exits.
If I set t.daemon to be True the program will exit before the queue is emptied, however if it's set to False the program will never exit. Is there some way to make sure the thread running the _worker() method clears the queue on main thread exit?
The comments touch on using .join(), but depending on your use case, using a join may make threading pointless.
I assume that your main thread will be doing things other than adding items to the queue - and may be shut down at any point, you just want to ensure that your queue is empty before shutting down is complete.
At the end of your main thread, you could add a simple empty check in a loop.
while not q.empty():
sleep(1)
If you don't set t.daemon = True then the thread will never finish. Setting the thread as a daemon thread will mean that the thread does not cause your program to stay running when the main thread finishes.
Put a special item (e.g. None) in the queue, that signals the worker thread to stop.
import queue
import threading
q = queue.Queue()
def _worker():
while True:
msg = q.get(block=True)
if msg is None:
return
print(msg) # do your stuff here
t = threading.Thread(target=_worker)
#t.daemon = True
t.start()
q.put('asdf-1')
q.put('asdf-2')
q.put('asdf-4')
q.put('asdf-4')
q.put(None)
t.join()

In Producer/Consumer pattern, how could I kill the consumer thread?

I will run the consumer in another work thread, the code is as following:
def Consumer(self):
while True:
condition.acquire()
if not queue:
condition.wait()
json = queue.pop()
clients[0].write_message(json)
condition.notify()
condition.release()
t = threading.Thread(target=self.Consumer);
t.start()
However, I find that I could not kill this work thread, the thread will be wait() all the time after the job...
I try to send a single from Procedurer to Consumer whenever finish the procedure work, if the consumer receive the single, the work thread should exit(), is it possible to do that ?
My standard way to notify a consumer thread that should stop its work is send a fake message (I rewrite it to make it runnable):
import threading
condition = threading.Condition()
queue = []
class Client():
def write_message(self,msg):
print(msg)
clients=[Client()]
jobdone=object()
def Consumer():
while True:
condition.acquire()
try:
if not queue:
condition.wait()
json = queue.pop()
if json is jobdone:
break;
clients[0].write_message(json)
finally:
condition.release()
t = threading.Thread(target=Consumer);
t.start()
import time
time.sleep(2)
condition.acquire()
queue.append(jobdone)
condition.notify()
condition.release()
Anyway consider to use queue.Queue that is standard and make synchronization simple. Here is how my example become:
import threading
import queue
import time
queue = queue.Queue()
class Client():
def write_message(self,msg):
print(msg)
clients=[Client()]
jobdone=object()
def Consumer():
while True:
json = queue.get()
if json is jobdone:
break;
clients[0].write_message(json)
t = threading.Thread(target=Consumer);
t.start()
queue.put("Hello")
queue.put("Word")
time.sleep(2)
queue.put(jobdone)
t.join()
#You can use also q.join()
print("Job Done")

Categories

Resources