Python Multiprocessing and Queues - python

I have a code snippet taken from another stackoverflow post
Python Workers and Queues
from multiprocessing import Process
from Queue import Queue
class Worker(Process):
def __init__(self, queue):
super(Worker, self).__init__()
self.queue= queue
def run(self):
print 'Worker started'
# do some initialization here
print 'Computing things!'
for data in iter( self.queue.get, None ):
print(data)
if __name__ == '__main__':
request_queue = Queue()
for i in range(4):
Worker( request_queue ).start()
for data in range(100):
request_queue.put( data )
# Sentinel objects to allow clean shutdown: 1 per worker.
for i in range(4):
request_queue.put( None )
Why does this process hang and not process the queue contents?

Found my error. I did not know there are two Queues.
changing
from multiprocessing import Process
from Queue import Queue
to
from multiprocessing import Process,Queue
Now I have the expected behavior

Related

Killing child process/task without killing main in Python using Pool Executor

I am trying to implement a method to force stop the child that have been started with ThreadPoolExecutor / ProcessPoolExecutor. I would like a cross platform implementation (Windows and Linux).
When the signal is triggered from main, the main process exits and I do NOT want that, only the child.
What is the correct way to force the child to quit? I do NOT want Events because in the following example I can have a while loop that never gets to event.is_set() again
eg:
while not event.is_set():
# Do stuff
while waiting_for_something:
# Here is blocked
Here is the code I am using but I miss something and I don't know what:
import os
import signal
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
import time
def handler(signum, frame):
print(signum, os.getpid())
os.kill(os.getpid(), signal.SIGINT)
class asd:
def __init__(self):
pass
def run(self):
signal.signal(signal.SIGBREAK, handler)
while True:
print('running thread', os.getpid())
time.sleep(1)
while True:
print('running 2 ', os.getpid())
time.sleep(1)
print("after while")
if __name__ == "__main__":
t1 = asd()
pool = ProcessPoolExecutor(max_workers=4)
# pool = ThreadPoolExecutor(max_workers=4)
pool.submit(t1.run)
print('running main', os.getpid())
time.sleep(3)
signal.raise_signal(signal.SIGBREAK)
while True:
print("after killing process")
time.sleep(1)
Thank you!
you are sending the signal to your main python process not to the children.
in order to send signals to your children you need their PID, which is not available using the concurrent module, instead you should use multiprocess.Pool, then you can get the PID of the children and send the signal to them using os.kill
just remember to eventually use pool.terminate() to guarantee resources cleanup.
import os
import signal
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
import time
import psutil
import multiprocessing
def handler(signum, frame):
print(signum, os.getpid())
os.kill(os.getpid(), signal.SIGINT)
class asd:
def __init__(self):
pass
def run(self):
signal.signal(signal.SIGBREAK, handler)
while True:
print('running thread', os.getpid())
time.sleep(1)
while True:
print('running 2 ', os.getpid())
time.sleep(1)
print("after while")
if __name__ == "__main__":
t1 = asd()
pool = multiprocessing.Pool(4)
children = multiprocessing.active_children()
res = pool.apply_async(t1.run)
print('running main', os.getpid())
time.sleep(3)
for child in children:
os.kill(child.pid,signal.SIGBREAK)
while True:
print("after killing process")
time.sleep(1)
with result
running main 16860
running thread 14212
running 2 14212
running 2 14212
after killing process
after killing process
after killing process
You can take a look at pebble which has been designed to solve this problem transparently for the User.
It provides concurrent.futures compatible APIs and allows to end a processing job either by cancelling the returned Future object or by setting a computing timeout.
import time
from pebble import ProcessPool
from concurrent.futures import TimeoutError
TIMEOUT = 5
def function(sleep):
while True:
time.sleep(sleep)
with ProcessPool() as pool:
future = pool.submit(function, TIMEOUT, 1)
assert isinstance(future.exception(), TimeoutError)
Note that you cannot stop executing threads in Python so only process pools can support such functionality.

How to manage the exit of a process without blocking its thread in Python?

I'm trying to code a kind of task manager in Python. It's based on a job queue, the main thread is in charge of adding jobs to this queue. I have made this class to handle the jobs queued, able to limit the number of concurrent processes and handle the output of the finished processes.
Here comes the problem, the _check_jobs function I don't get updated the returncode value of each process, independently of its status (running, finished...) job.returncode is always None, therefore I can't run if statement and remove jobs from the processing job list.
I know it can be done with process.communicate() or process.wait() but I don't want to block the thread that launches the processes. Is there any other way to do it, maybe using a ProcessPoolExecutor? The queue can be hit by processes at any time and I need to be able to handle them.
Thank you all for your time and support :)
from queue import Queue
import subprocess
from threading import Thread
from time import sleep
class JobQueueManager(Queue):
def __init__(self, maxsize: int):
super().__init__(maxsize)
self.processing_jobs = []
self.process = None
self.jobs_launcher=Thread(target=self._worker_job)
self.processing_jobs_checker=Thread(target=self._check_jobs_status)
self.jobs_launcher.start()
self.processing_jobs_checker.start()
def _worker_job(self):
while True:
# Run at max 3 jobs concurrently
if self.not_empty and len(self.processing_jobs) < 3:
# Get job from queue
job = self.get()
# Execute a task without blocking the thread
self.process = subprocess.Popen(job)
self.processing_jobs.append(self.process)
# util if queue.join() is used to block the queue
self.task_done()
else:
print("Waiting 4s for jobs")
sleep(4)
def _check_jobs_status(self):
while True:
# Check if jobs are finished
for job in self.processing_jobs:
# Sucessfully completed
if job.returncode == 0:
self.processing_jobs.remove(job)
# Wait 4 seconds and repeat
sleep(4)
def main():
q = JobQueueManager(100)
task = ["stress", "--cpu", "1", "--timeout", "20"]
for i in range(10): #put 10 tasks in the queue
q.put(task)
q.join() #block until all tasks are done
if __name__ == "__main__":
main()
I answer myself, I have come up with a working solution. The JobExecutor class handles in a custom way the Pool of processes. The watch_completed_tasks function tries to watch and handle the output of the tasks when they are done. This way everything is done with only two threads and the main thread is not blocked when submitting processes.
import subprocess
from threading import Timer
from concurrent.futures import ProcessPoolExecutor, as_completed
import logging
def launch_job(job):
process = subprocess.Popen(job, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print(f"launching {process.pid}")
return [process.pid, process.stdout.read(), process.stderr.read()]
class JobExecutor(ProcessPoolExecutor):
def __init__(self, max_workers: int):
super().__init__(max_workers)
self.futures = []
self.watch_completed_tasks()
def submit(self, command):
future = super().submit(launch_job, command)
self.futures.append(future)
return future
def watch_completed_tasks(self):
# Manage tasks completion
for completed_task in as_completed(self.futures):
print(f"FINISHED task with PID {completed_task.result()[0]}")
self.futures.remove(completed_task)
# call this function evevery 5 seconds
timer_thread = Timer(5.0, self.watch_completed_tasks)
timer_thread.setName("TasksWatcher")
timer_thread.start()
def main():
executor = JobExecutor(max_workers=5)
for i in range(10):
task = ["stress",
"--cpu", "1",
"--timeout", str(i+5)]
executor.submit(task)

python multithreading queues not running or exiting cleanly

I'm learning python multithreading and queues. The following creates a bunch of threads that pass data through a queue to another thread for printing:
import time
import threading
import Queue
queue = Queue.Queue()
def add(data):
return ["%sX" % x for x in data]
class PrintThread(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
data = self.queue.get()
print data
self.queue.task_done()
class MyThread(threading.Thread):
def __init__(self, queue, data):
threading.Thread.__init__(self)
self.queue = queue
self.data = data
def run(self):
self.queue.put(add(self.data))
if __name__ == "__main__":
a = MyThread(queue, ["a","b","c"])
a.start()
b = MyThread(queue, ["d","e","f"])
b.start()
c = MyThread(queue, ["g","h","i"])
c.start()
printme = PrintThread(queue)
printme.start()
queue.join()
However, I see only the data from the first thread print out:
['aX', 'bX', 'cX']
Then nothing else, but the program doesn't exit. I have to kill the process to have it exit.
Ideally, after each MyThread does it data processing and puts the result to the queue, that thread should exit? Simultaneously the PrintThread should take whatever is on the queue and print it.
After all MyThread threads have finished and the PrintThread thread has finished processing everything on the queue, the program should exit cleanly.
What have I done wrong?
EDIT:
If each MyThread thread takes a while to process, is there a way to guarantee that the PrintThread thread will wait for all the MyThread threads to finish before it will exit itself?
That way the print thread will definitely have processed every possible data on the queue because all the other threads have already exited.
For example,
class MyThread(threading.Thread):
def __init__(self, queue, data):
threading.Thread.__init__(self)
self.queue = queue
self.data = data
def run(self):
time.sleep(10)
self.queue.put(add(self.data))
The above modification will wait for 10 seconds before putting anything on the queue. The print thread will run, but I think it's exiting too early since there is not data on the queue yet, so the program prints out nothing.
Your PrintThread does not loop but instead only prints out a single queue item and then stops running.
Therefore, the queue will never be empty and the queue.join() statement will prevent the main program from terminating
Change the run() method of your PrintThread into the following code in order to have all queue items processed:
try:
while True:
data = self.queue.get_nowait()
print data
self.queue.task_done()
except queue.Empty:
# All items have been taken off the queue
pass

Need help on producer and consumer thread in python

I wanted to create the consumer and producer thread in python simultaneously, where producer thread will append the queue and consumer thread retrieves the item which stored in the queue. And I need to start the consumer thread along with producer. Consumer thread should wait till the queue gets an item. And it should terminate when there is no item in queue. I am new to python, please help on this.
Requirements:
If there is a list of 10 numbers, producer thread should insert the queue with one item, and consumer thread should retrieve the number. Both thread should start simultaneously .
from queue import Queue
import threading
import time
class producer(threading.Thread):
def __init__(self, list_of_numbers):
threading.Thread.__init__(self)
self.list_items = list_of_numbers
def run(self):
for i in self.list_items:
queue.put(str(i))
class consumer(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
def run(self):
while queue.not_empty:
queue_ret = queue.get()
print("Retrieved", queue_ret)
queue = Queue()
producers = producer([10,20,5,4,3,2,1])
consumers = consumer()
producers.start()
consumers.start()
producers.join()
consumers.join()
Just put a special item once you are done:
_im_done = object()
class producer(threading.Thread):
def run(self):
'''feed the consumer until you are done'''
queue.put(_im_done)
class consumer(threading.Thread):
def run(self):
while True:
queue_ret = queue.get()
if queue_ret is _im_done:
break
'''normal execution'''
If there are multiple consumers, then you have to put the item back before you stop:
class consumer(threading.Thread):
def run(self):
while True:
queue_ret = queue.get()
if queue_ret is _im_done:
queue.put(_im_done)
break
'''normal execution'''
You can use the queue module directly. The documentation contains an example for your use case. As a side note, the module is named Queue in Python 2.
However threading in Python won't get your program any faster if it is CPU bound, in this case you may use multiprocessing module instead (in IO bound cases threading may be more viable since threads are often cheaper). Mutiprocessing module also provides a safe queue implementation named multiprocessing.Queue.
queue.get() is blocking. If there are no items in queue it will just get stuck there. You should use while True: queue.get(block=False) and handle Empty exception and exit.
Ok full code to clear confusion
from Queue import Queue, Empty
import threading
import time
started = False
running = True
class producer(threading.Thread):
def __init__(self, list_of_numbers):
threading.Thread.__init__(self)
self.list_items = list_of_numbers
def run(self):
started = True
for i in self.list_items:
queue.put(str(i))
running = False
class consumer(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
def run(self):
while not started:
sleep(0)
while running:
try:
queue_ret = queue.get(block=False)
except Empty:
sleep(0)
continue
print("Retrieved", queue_ret)
queue = Queue()
producers = producer([10,20,5,4,3,2,1])
consumers = consumer()
producers.start()
consumers.start()
producers.join()
consumers.join()

Sending messages to other QThread

I'm trying to figure out how to implement the concept of having my main thread spawn a new thread that processes data concurrently as messages are passed to it.
From what I figured so far the simplest way of doing this would be something like:
from PySide.QtCore import QCoreApplication, QObject, Signal, QThread, QTimer
class Foo(QObject):
do_foo = Signal(str)
def __init__(self, parent=None):
super(Foo, self).__init__(parent)
self.do_foo.connect(self._do_foo)
def foo(self, string):
self.do_foo.emit(string)
def _do_foo(self, string):
# Process string
print "Process thread:", self.thread().currentThreadId()
class App(QCoreApplication):
def run(self):
print "Main thread:", self.thread().currentThreadId()
thread = QThread()
foo = Foo()
foo.moveToThread(thread)
thread.start()
# Obviously the following should be done with the event-loop
# or at doing processing of events.
running = True
while running:
try:
string = raw_input()
foo.foo(string)
except EOFError:
running = False
thread.exit()
thread.wait()
self.exit()
if __name__ == '__main__':
import sys
app = App(sys.argv)
QTimer.singleShot(0, app.run)
sys.exit(app.exec_())
But if this would be the way of doing it I can not see what the use of Slots would be.
Or you can use the design patter "Provider-Consumer". How it works? Well you have to implement a queue. The spwaned thread will get the data from this queue while your main thread will feed the queue with new data.
Your spawned threads blocks while the queue is empty. This way you can even process data in more that one thread, and you don't have to worry about two threads trying to read the same data.
Here is some seudo-code for consumer threads.
class MyThread:
def __init__(self, queue):
self.queue = queue
self.event = Event() # I generally use threading.Event for stopping threads. You don't need it here.
def run():
while not self.event.isSet():
data = self.queue.get() # This stop the thread until new data be available.
do_something_with_data(data)
Then in your main thread:
import Queue
queue = Queue.Queue()
mthread = MyThread(queue)
mthread.start()
# And now you can send data to threads by:
queue.put(data)

Categories

Resources