Join timeout in multiprocessing

Join timeout in multiprocessing - python

I have a dummy example, I want to apply multiprocessing in it. Consider a scenario where you have a stream of numbers(which I call frame) incoming one by one. And I want to assign it to any single process that is available currently. So I am creating 4 processes that are running a while loop, seeing if any element in queue, than apply function on it.
The problem is that when I join it, it gets stuck in any while loop, even though I close the while loop before it. But somehow it gets stuck inside it.
Code:
# step 1, 4 processes
import multiprocessing as mp
import os
import time
class MpListOperations:
def __init__(self):
self.results_queue = mp.Manager().Queue()
self.frames_queue = mp.Manager().Queue()
self.flag = mp.Manager().Value(typecode='b',value=True)
self.list_nums = list(range(0,5000))
def process_list(self):
print(f"Process id {os.getpid()} started")
while self.flag.value:
# print(self.flag.value)
if self.frames_queue.qsize():
self.results_queue.put(self.frames_queue.get()**2)
def create_processes(self, no_of_processes = mp.cpu_count()):
print("Creating Processes")
self.processes = [mp.Process(target=self.process_list) for _ in range(no_of_processes)]
def start_processes(self):
print(f"starting processes")
for process in self.processes:
process.start()
def join_process(self):
print("Joining Processes")
while True:
if not self.frames_queue.qsize():
self.flag.value=False
print("JOININNG HERE")
for process in self.processes:
exit_code = process.join()
print(exit_code)
print("BREAKING DONE")
break
def stream_frames(self):
print("Streaming Frames")
for frame in self.list_nums:
self.frames_queue.put(frame)
if __name__=="__main__":
start = time.time()
mp_ops = MpListOperations()
mp_ops.create_processes()
mp_ops.start_processes()
mp_ops.stream_frames()
mp_ops.join_process()
print(time.time()-start)
Now if I add a timeout parameter in join, even 0, i.e exit_code = process.join(0) it works. I want to understand in this scenario, if this code is correct, what should be the value of timeout? Why is it working with timeout and not without it? What is the proper way to implement multiprocessing with it?

If you look at the documentation for a managed queue you will see that the qsize method only returns an approximate size. I would therefore not use it for testing when all the items have been taken of the frames queue. Presumably you want to let the processes run until all frames have been processed. The simplest way I know would be to put N sentinel items on the frames queue after the actual frames have been put where N is the number of processes getting from the queue. A sentinel item is a special value that cannot be mistaken for an actual frame and signals to the process that there are no more items for it to get from the queue (i.e. a quasi end-of-file item). In this case we can use None as the sentinel items. Each process then just continues to do get operations on the queue until it sees a sentinel item and then terminates. There is therefore no need for the self.flag attribute.
Here is the updated and simplified code. I have made some other minor changes that have been commented:
import multiprocessing as mp
import os
import time
class MpListOperations:
def __init__(self):
# Only create one manager process:
manager = mp.Manager()
self.results_queue = manager.Queue()
self.frames_queue = manager.Queue()
# No need to convert range to a list:
self.list_nums = range(0, 5000)
def process_list(self):
print(f"Process id {os.getpid()} started")
while True:
frame = self.frames_queue.get()
if frame is None: # Sentinel?
# Yes, we are done:
break
self.results_queue.put(frame ** 2)
def create_processes(self, no_of_processes = mp.cpu_count()):
print("Creating Processes")
self.no_of_processes = no_of_processes
self.processes = [mp.Process(target=self.process_list) for _ in range(no_of_processes)]
def start_processes(self):
print("Starting Processes")
for process in self.processes:
process.start()
def join_processes(self):
print("Joining Processes")
for process in self.processes:
# join returns None:
process.join()
def stream_frames(self):
print("Streaming Frames")
for frame in self.list_nums:
self.frames_queue.put(frame)
# Put sentinels:
for _ in range(self.no_of_processes):
self.frames_queue.put(None)
if __name__== "__main__":
start = time.time()
mp_ops = MpListOperations()
mp_ops.create_processes()
mp_ops.start_processes()
mp_ops.stream_frames()
mp_ops.join_processes()
print(time.time()-start)
Prints:
Creating Processes
Starting Processes
Process id 28 started
Process id 29 started
Streaming Frames
Process id 33 started
Process id 31 started
Process id 38 started
Process id 44 started
Process id 42 started
Process id 45 started
Joining Processes
2.3660173416137695
Note for Windows
I have modified method start_processes to temporarily set attribute self.processes to None:
def start_processes(self):
print("Starting Processes")
processes = self.processes
# Don't try to pickle list of processes:
self.processes = None
for process in processes:
process.start()
# Restore attribute:
self.processes = processes
Otherwise under Windows we get a pickle error trying to serialize/deserialize a list of processes containing two or more multiprocessing.Process instances. The error is "TypeError: cannot pickle 'weakref' object." This can be demonstrated with the following code where we first try to pickle a list of 1 process and then a list of 2 processes:
import multiprocessing as mp
import os
class Foo:
def __init__(self, number_of_processes):
self.processes = [mp.Process(target=self.worker) for _ in range(number_of_processes)]
self.start_processes()
self.join_processes()
def start_processes(self):
processes = self.processes
for process in self.processes:
process.start()
def join_processes(self):
for process in self.processes:
process.join()
def worker(self):
print(f"Process id {os.getpid()} started")
print(f"Process id {os.getpid()} ended")
if __name__== "__main__":
foo = Foo(1)
foo = Foo(2)
Prints:
Process id 7540 started
Process id 7540 ended
Traceback (most recent call last):
File "C:\Booboo\test\test.py", line 26, in <module>
foo = Foo(2)
File "C:\Booboo\test\test.py", line 7, in __init__
self.start_processes()
File "C:\Booboo\test\test.py", line 13, in start_processes
process.start()
File "C:\Program Files\Python38\lib\multiprocessing\process.py", line 121, in start
self._popen = self._Popen(self)
File "C:\Program Files\Python38\lib\multiprocessing\context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Program Files\Python38\lib\multiprocessing\context.py", line 327, in _Popen
return Popen(process_obj)
File "C:\Program Files\Python38\lib\multiprocessing\popen_spawn_win32.py", line 93, in __init__
reduction.dump(process_obj, to_child)
File "C:\Program Files\Python38\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: cannot pickle 'weakref' object
Process id 18152 started
Process id 18152 ended
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "C:\Program Files\Python38\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "C:\Program Files\Python38\lib\multiprocessing\spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
EOFError: Ran out of input

The target loop is stuck in the get() method of your loop. This is because multiple processes could see that the queue wasn't empty, but only 1 of them was able to get the last item. The remaining processes are waiting for the next item to be available from the queue.
You might need to add a Lock when you are reading the size of the Queue object And getting the object of that queue.
Or alternatively, you avoid reading the size of the queue by simply using the queue.get() method with a timeout that allows us to check the flag regularly
import queue
TIMEOUT = 1 # seconds
class MpListOperations:
#[...]
def process_list(self):
print(f"Process id {os.getpid()} started")
previous = self.flag.value
while self.flag.value:
try:
got = self.frames_queue.get(timeout=TIMEOUT)
except queue.Empty:
pass
else:
print(f"Gotten {got}")
self.results_queue.put(got**2)
_next = self.flag.value
if previous != _next:
print(f"Flag change: {_next}")
$ python ./test_mp.py
Creating Processes
starting processes
Process id 36566 started
Streaming Frames
Process id 36565 started
Process id 36564 started
Process id 36570 started
Process id 36567 started
Gotten 0
Process id 36572 started
Gotten 1
Gotten 2
Gotten 3
Process id 36579 started
Gotten 4
Gotten 5
Gotten 6
Process id 36583 started
Gotten 7
# [...]
Gotten 4997
Joining Processes
Gotten 4998
Gotten 4999
JOININNG HERE
Flag change: False
Flag change: False
Flag change: False
Flag change: False
Flag change: False
Flag change: False
Flag change: False
Flag change: False
Exit code : None
Exit code : None
Exit code : None
Exit code : None
Exit code : None
Exit code : None
Exit code : None
Exit code : None
BREAKING DONE
1.4375360012054443
Alternatively, using a multiprocessing.Pool object:
def my_func(arg):
time.sleep(0.002)
return arg**2
def get_input():
for i in range(5000):
yield i
time.sleep(0.001)
if __name__=="__main__":
start = time.time()
mp_pool = mp.Pool()
result = mp_pool.map(my_func, get_input())
mp_pool.close()
mp_pool.join()
print(len(result))
print(f"Duration: {time.time()-start}")
Giving:
$ python ./test_mp.py
5000
Duration: 6.847279787063599

Related

Multi-processed file reading in python

I want a file to be read in a multi-processed way. Each process will be a class instance doing some specific task, but the class where the file is being opened will be a singleton class.
We don't want to batch the entire file into a number of processes instead we want each process to asynchronously read a batch of lines from the text file where no two processes will have the same line.
I know this is quite a task to execute and I have tried multiple ways to achieve this task but was unsuccessful.
Below is the code snippet where I tried to achieve the above mentioned
class ListFromTextFile():
def __init__(self, config: str):
self.file_pointer = open(file_path, "r")
self.batch_size = config["batch_size"]
self.__lock = Lock()
self.__lock_aquire = False
def get_list_of_pids(self):
function_name = "get_list_of_pids"
pids = []
try:
for line in self.file_pointer:
if self.__lock_aquire:
pid = line.strip("\n")
pids.append(pid)
else:
self.__lock.acquire()
log_message(function_name, "Lock aquired !!!")
self.__lock_aquire = True
if len(pids) == self.batch_size:
self.__lock.release()
log_message(function_name, "Lock released !!!")
self.__lock_aquire = False
yield pids
pids = []
if len(pids):
self.__lock.release()
log_message(function_name, "Lock released !!!")
yield pids
except Exception as e:
err = str(e)
trace_back = traceback.format_exc()
log_exception()(self.function_name, err, trace_back)
Here ListFromTextFile is a singleton class and, processes will call get_list_of_pids() which will yield a list of pids. The task here is every batch generated and yielded by all processes must be unique.
Any suggestions to improve the above code or new ideas are welcome.

What you want is not necessarily a singleton but rather a class instance that can be sharable among multiple processes. The most straightforward way I know of doing this (other people might have other ideas) is to create a managed class from ListTextFile. I would first modify its definition so that method get_list_of_pids is not a generator function (these cannot be pickled). Instead you can just call it repeatedly to return the next batch of pids until an empty list is returned signifying end of file on the input file. As a managed object running within a manager's process and method calls are handled by threads, it is sufficent to to run get_list_of_pids as a critical section using a thread.Lock.
For demo purposes my input file, test.txt, consists of 7 lines:
1
2
3
4
5
6
7
The Demo
from threading import Lock
from multiprocessing import Process, current_process
from multiprocessing.managers import BaseManager
file_path = 'test.txt'
class ListFromTextFile():
def __init__(self, config: dict):
self.file_pointer = open(file_path, "r")
self.batch_size = config["batch_size"]
self.eof = False
self.__lock = Lock()
def get_list_of_pids(self):
function_name = "get_list_of_pids"
pids = []
if self.eof:
# Returning an empty list signifies end of file:
return pids;
with self.__lock:
while True:
line = self.file_pointer.readline()
if line == '':
self.eof = True
# Return what we have if anything.
return pids
pids.append(line.strip('\n'))
if len(pids) == self.batch_size:
return pids
class MyManager(BaseManager):
pass
def worker(reader):
import time
while True:
pids = reader.get_list_of_pids()
if not pids:
# Empty list returned -> end of file
return
print(f'Current pid: {current_process().pid}, pids: {pids}', flush=True)
time.sleep(.2) # Give other process a chance to run
def demo():
MyManager.register('ListFromTextFile', ListFromTextFile)
with MyManager() as manager:
config = {'batch_size': 3}
reader = manager.ListFromTextFile(config)
p1 = Process(target=worker, args=(reader,))
p2 = Process(target=worker, args=(reader,))
p1.start()
p2.start()
p1.join()
p2.join()
if __name__ == '__main__':
demo()
Prints:
Current pid: 1492, pids: ['1', '2', '3']
Current pid: 29072, pids: ['4', '5', '6']
Current pid: 1492, pids: ['7']
But Is There an Alternate Approach?
If each process does the processing for a single batch and then terminates (those details have not been provided), the main process can be doing all the batching and providing each batch to a multiprocessing pool:
from multiprocessing import Pool, current_process
file_path = 'test.txt'
def create_batch(config):
batch_size = config["batch_size"]
pids = []
with open(file_path, "r") as f:
for line in f:
pids.append(line.strip('\n'))
if len(pids) == batch_size:
yield pids
pids = []
if pids:
yield pids
def worker(pids):
import time
print(f'Current pid: {current_process().pid}, pids: {pids}', flush=True)
time.sleep(.2) # Give other process a chance to run
def demo():
pool = Pool(2)
# lazily evaluate the iterable using imap_unordered
# as a memory-savings technique. Specify a chunksize if
# the number of batches are huge:
config = {'batch_size': 3}
pool.imap_unordered(worker, create_batch(config))
# Wait for all tasks to complete:
pool.close()
pool.join()
if __name__ == '__main__':
demo()
Prints:
Current pid: 4316, pids: ['1', '2', '3']
Current pid: 13424, pids: ['4', '5', '6']
Current pid: 4316, pids: ['7']

processing very large text files in parallel using multiprocessing and threading

I have found several other questions that touch on this topic but none that are quite like my situation.
I have several very large text files (3+ gigabytes in size).
I would like to process them (say 2 documents) in parallel using multiprocessing. As part of my processing (within a single process) I need to make an API call and because of this would like to have each process have it's own threads to run asynchronously.
I have came up with a simplified example ( I have commented the code to try to explain what I think it should be doing):
import multiprocessing
from threading import Thread
import threading
from queue import Queue
import time
def process_huge_file(*, file_, batch_size=250, num_threads=4):
# create APICaller instance for each process that has it's own Queue
api_call = APICaller()
batch = []
# create threads that will run asynchronously to make API calls
# I expect these to immediately block since there is nothing in the Queue (which is was
# the api_call.run depends on to make a call
threads = []
for i in range(num_threads):
thread = Thread(target=api_call.run)
threads.append(thread)
thread.start()
for thread in threads:
thread.join()
####
# start processing the file line by line
for line in file_:
# if we are at our batch size, add the batch to the api_call to to let the threads do
# their api calling
if i % batch_size == 0:
api_call.queue.put(batch)
else:
# add fake line to batch
batch.append(fake_line)
class APICaller:
def __init__(self):
# thread safe queue to feed the threads which point at instances
of these APICaller objects
self.queue = Queue()
def run(self):
print("waiting for something to do")
self.queue.get()
print("processing item in queue")
time.sleep(0.1)
print("finished processing item in queue")
if __name__ == "__main__":
# fake docs
fake_line = "this is a fake line of some text"
# two fake docs with line length == 1000
fake_docs = [[fake_line] * 1000 for i in range(2)]
####
num_processes = 2
procs = []
for idx, doc in enumerate(fake_docs):
proc = multiprocessing.Process(target=process_huge_file, kwargs=dict(file_=doc))
proc.start()
procs.append(proc)
for proc in procs:
proc.join()
As the code is now, "waiting for something to do" prints 8 times (makes sense 4 threads per process) and then it stops or "deadlocks" which is not what I expect - I expect it to start sharing time with the threads as soon as I start putting items in the Queue but the code does not appear to make it this far. I ordinarily would step through to find a hang up but I still don't have a solid understanding of how to best debug using Threads (another topic for another day).
In the meantime, can someone help me figure out why my code is not doing what it should be doing?

I have made a few adjustments and additions and the code appears to do what it is supposed to now. The main adjustments are: adding a CloseableQueue class (from Brett Slatkins Effective Python Item 55), and ensuring that I call close and join on the queue so that the threads properly exit. Full code with these changes below:
import multiprocessing
from threading import Thread
import threading
from queue import Queue
import time
from concurrency_utils import CloseableQueue
def sync_process_huge_file(*, file_, batch_size=250):
batch = []
for idx, line in enumerate(file_):
# do processing on the text
if idx % batch_size == 0:
time.sleep(0.1)
batch = []
# api_call.queue.put(batch)
else:
computation = 0
for i in range(100000):
computation += i
batch.append(line)
def process_huge_file(*, file_, batch_size=250, num_threads=4):
api_call = APICaller()
batch = []
# api call threads
threads = []
for i in range(num_threads):
thread = Thread(target=api_call.run)
threads.append(thread)
thread.start()
for idx, line in enumerate(file_):
# do processing on the text
if idx % batch_size == 0:
api_call.queue.put(batch)
else:
computation = 0
for i in range(100000):
computation += i
batch.append(line)
for _ in threads:
api_call.queue.close()
api_call.queue.join()
for thread in threads:
thread.join()
class APICaller:
def __init__(self):
self.queue = CloseableQueue()
def run(self):
for item in self.queue:
print("waiting for something to do")
pass
print("processing item in queue")
time.sleep(0.1)
print("finished processing item in queue")
print("exiting run")
if __name__ == "__main__":
# fake docs
fake_line = "this is a fake line of some text"
# two fake docs with line length == 1000
fake_docs = [[fake_line] * 10000 for i in range(2)]
####
time_s = time.time()
num_processes = 2
procs = []
for idx, doc in enumerate(fake_docs):
proc = multiprocessing.Process(target=process_huge_file, kwargs=dict(file_=doc))
proc.start()
procs.append(proc)
for proc in procs:
proc.join()
time_e = time.time()
print(f"took {time_e-time_s} ")
class CloseableQueue(Queue):
SENTINEL = object()
def __init__(self, **kwargs):
super().__init__(**kwargs)
def close(self):
self.put(self.SENTINEL)
def __iter__(self):
while True:
item = self.get()
try:
if item is self.SENTINEL:
return # exit thread
yield item
finally:
self.task_done()
As expected this is a great speedup from running synchronously - 120 seconds vs 50 seconds.

python multiprocessing process pool fails to find asynced function

Pretty simple multiprocessing example. Goals:
Create a pool of process workers using mp.Pool
Do some sort of transformation (here a simple string operation on line)
Push the transformed line to mp.Queue
Further process data from that mp.Queue in the main program afterwards
So lets do this:
import multiprocessing as mp
Init async processes with a mp.queue
def process_pool_init_per_process(q):
global mp_queue
mp_queue = q
Really init the mp_pool
no_of_processes = 4
q = mp.Queue()
mp_pool = mp.Pool(no_of_processes, process_pool_init_per_process, (q,))
This is getting called for every line to be proccesed async
def process_async_main(line):
print(line)
q.put(line + '_asynced')
And now let´s start it using apply_async
line = "Hi, this is a test to test mp_queues with mp process pools"
handler = mp_pool.apply_async(process_async_main, (line))
mp_resp = handler.get()
And read from the queue
while not q.empty():
print(q.get()) # This should be the inital line
Fails wih:
python3 mp_process_example.py
Process ForkPoolWorker-1:
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib/python3.6/multiprocessing/pool.py", line 108, in worker
task = get()
File "/usr/lib/python3.6/multiprocessing/queues.py", line 337, in get
return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'process_async_main' on <module '__main__' from 'mp_process_example.py'>
The question is: Why is multiprocessing not finding the main class?
Complete code to reproduce:
import multiprocessing as mp
##### Init async processes
def process_pool_init_per_process(q):
global mp_queue
mp_queue = q
# Really init the mp_pool
no_of_processes = 4
q = mp.Queue()
mp_pool = mp.Pool(no_of_processes, process_pool_init_per_process, (q,))
#This is getting called for every line to be proccesed async
def process_async_main(line):
print(line)
q.put(line + '_asynced')
line = "Hi, this is a test to test mp_queues with mp process pools"
handler = mp_pool.apply_async(process_async_main, (line))
mp_resp = handler.get()
while not q.empty():
print(q.get()) # This should be the inital line

Ok... I´ve got it... For some strange reason multiprocessing is not able to have the function to be asynced in the same file as the synchronized code.
Writing the code like this:
asynced.py
##### Init async processes
def process_pool_init_per_process(q):
global mp_queue
mp_queue = q
##### Function to be asycned
def process_async_main(line):
print(line)
mp_queue.put(line + '_asynced')
And than mp_process_example.py:
import multiprocessing as mp
from asynced import process_async_main, process_pool_init_per_process
# Really init the mp_pool
no_of_processes = 4
q = mp.Queue()
mp_pool = mp.Pool(no_of_processes, process_pool_init_per_process, (q,))
line = "Hi, this is a test to test mp_queues with mp process pools"
handler = mp_pool.apply_async(process_async_main, (line,))
mp_resp = handler.get()
while not q.empty():
print(q.get()) # This should be the inital line + "_asynced"
Works as expected:
$ python3 mp_process_example.py
Hi, this is a test to test mp_queues with mp process pools
Hi, this is a test to test mp_queues with mp process pools_asynced

Processes not joining even after flushing their queues

I wrote a program using the module multiprocessing which globally executes as follows:
both a simulation and an ui processes are started.
the simulation process feeds a queue with new simulation states. If the queue is full, the simulation loop isn't blocked so it can handle possible incoming messages.
the ui process consumes the simulation queue.
after around 1 second of execution time, the ui process sends a quit event to the main process then exits the loop. Upon exiting it sends a stopped event to the main process through the _create_process()'s inner wrapper() function.
the main process receives both events in whichever order. The quit event results in the main process sending stop signals to all the child processes, while the stopped event increments a counter in the main loop which will cause it to exit after having received as many stopped events as there are processes.
the simulation process receives the stop event and exits the loop, sending in turn a stopped event to the main process.
the main process has now received 2 stopped events in total and concludes that all child processes are—on their way to be—stopped. As a result, the main loop is exited
the run() function flushes the queues which have been written by the child processes.
the child processes are being joined.
The problem is that quite often (but not always) the program will hang upon trying to join the simulation process, as per the log below.
[...]
[INFO/ui] process exiting with exitcode 0
[DEBUG/MainProcess] starting thread to feed data to pipe
[DEBUG/MainProcess] ... done self._thread.start()
[DEBUG/simulation] Queue._start_thread()
[DEBUG/simulation] doing self._thread.start()
[DEBUG/simulation] starting thread to feed data to pipe
[DEBUG/simulation] ... done self._thread.start()
[DEBUG/simulation] telling queue thread to quit
[DEBUG/MainProcess] all child processes (2) should have been stopped!
[INFO/simulation] process shutting down
[DEBUG/simulation] running all "atexit" finalizers with priority >= 0
[DEBUG/simulation] telling queue thread to quit
[DEBUG/simulation] running the remaining "atexit" finalizers
[DEBUG/simulation] joining queue thread
[DEBUG/MainProcess] joining process <Process(simulation, started)>
[DEBUG/simulation] feeder thread got sentinel -- exiting
[DEBUG/simulation] ... queue thread joined
[DEBUG/simulation] joining queue thread
Stopping the execution through a Ctrl + C in the shell results in these mangled tracebacks:
Process simulation:
Traceback (most recent call last):
Traceback (most recent call last):
File "./debug.py", line 224, in <module>
run()
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/multiprocessing/process.py", line 257, in _bootstrap
util._exit_function()
File "./debug.py", line 92, in run
process.join() #< This doesn't work.
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/multiprocessing/util.py", line 312, in _exit_function
_run_finalizers()
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/multiprocessing/process.py", line 121, in join
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/multiprocessing/util.py", line 252, in _run_finalizers
finalizer()
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/multiprocessing/util.py", line 185, in __call__
res = self._callback(*self._args, **self._kwargs)
res = self._popen.wait(timeout)
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/multiprocessing/popen_fork.py", line 54, in wait
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/multiprocessing/queues.py", line 196, in _finalize_join
thread.join()
return self.poll(os.WNOHANG if timeout == 0.0 else 0)
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/multiprocessing/popen_fork.py", line 30, in poll
pid, sts = os.waitpid(self.pid, flag)
KeyboardInterrupt
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/threading.py", line 1060, in join
self._wait_for_tstate_lock()
File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/threading.py", line 1076, in _wait_for_tstate_lock
elif lock.acquire(block, timeout):
KeyboardInterrupt
As for the code, here is a stripped down version of it (hence why it often seems incomplete):
#!/usr/bin/env python3
import logging
import multiprocessing
import pickle
import queue
import time
from collections import namedtuple
_LOGGER = multiprocessing.log_to_stderr()
_LOGGER.setLevel(logging.DEBUG)
_BUFFER_SIZE = 4
_DATA_LENGTH = 2 ** 12
_STATUS_SUCCESS = 0
_STATUS_FAILURE = 1
_EVENT_ERROR = 0
_EVENT_QUIT = 1
_EVENT_STOPPED = 2
_MESSAGE_STOP = 0
_MESSAGE_EVENT = 1
_MESSAGE_SIMULATION_UPDATE = 2
_Message = namedtuple('_Message', ('type', 'value',))
_StopMessage = namedtuple('_StopMessage', ())
_EventMessage = namedtuple('_EventMessage', ('type', 'value',))
_SimulationUpdateMessage = namedtuple('_SimulationUpdateMessage', ('state',))
_MESSAGE_STRUCTS = {
_MESSAGE_STOP: _StopMessage,
_MESSAGE_EVENT: _EventMessage,
_MESSAGE_SIMULATION_UPDATE: _SimulationUpdateMessage
}
def run():
# Messages from the main process to the child ones.
downward_queue = multiprocessing.Queue()
# Messages from the child processes to the main one.
upward_queue = multiprocessing.Queue()
# Messages from the simulation process to the UI one.
simulation_to_ui_queue = multiprocessing.Queue(maxsize=_BUFFER_SIZE)
# Regroup all the queues that can be written by child processes.
child_process_queues = (upward_queue, simulation_to_ui_queue,)
processes = (
_create_process(
_simulation,
upward_queue,
name='simulation',
args=(
simulation_to_ui_queue,
downward_queue
)
),
_create_process(
_ui,
upward_queue,
name='ui',
args=(
upward_queue,
simulation_to_ui_queue,
downward_queue
)
)
)
try:
for process in processes:
process.start()
_main(downward_queue, upward_queue, len(processes))
finally:
# while True:
# alive_processes = tuple(process for process in processes
# if process.is_alive())
# if not alive_processes:
# break
# _LOGGER.debug("processes still alive: %s" % (alive_processes,))
for q in child_process_queues:
_flush_queue(q)
for process in processes:
_LOGGER.debug("joining process %s" % process)
# process.terminate() #< This works!
process.join() #< This doesn't work.
def _main(downward_queue, upward_queue, process_count):
try:
stopped_count = 0
while True:
message = _receive_message(upward_queue, False)
if message is not None and message.type == _MESSAGE_EVENT:
event_type = message.value.type
if event_type in (_EVENT_QUIT, _EVENT_ERROR):
break
elif event_type == _EVENT_STOPPED:
stopped_count += 1
if stopped_count >= process_count:
break
finally:
# Whatever happens, make sure that all child processes have stopped.
if stopped_count >= process_count:
return
# Send a 'stop' signal to all the child processes.
for _ in range(process_count):
_send_message(downward_queue, True, _MESSAGE_STOP)
while True:
message = _receive_message(upward_queue, False)
if (message is not None
and message.type == _MESSAGE_EVENT
and message.value.type == _EVENT_STOPPED):
stopped_count += 1
if stopped_count >= process_count:
_LOGGER.debug(
"all child processes (%d) should have been stopped!"
% stopped_count
)
break
def _simulation(simulation_to_ui_queue, downward_queue):
simulation_state = [i * 0.123 for i in range(_DATA_LENGTH)]
# When the queue is full (possibly form reaching _BUFFER_SIZE), the next
# solve is computed and kept around until the queue is being consumed.
next_solve_message = None
while True:
message = _receive_message(downward_queue, False)
if message is not None and message.type == _MESSAGE_STOP:
break
if next_solve_message is None:
# _step(simulation_state)
# Somehow the copy (pickle) seems to increase the chances for
# the issue to happen.
next_solve_message = _SimulationUpdateMessage(
state=pickle.dumps(simulation_state)
)
status = _send_message(simulation_to_ui_queue, False,
_MESSAGE_SIMULATION_UPDATE,
**next_solve_message._asdict())
if status == _STATUS_SUCCESS:
next_solve_message = None
def _ui(upward_queue, simulation_to_ui_queue, downward_queue):
time_start = -1.0
previous_time = 0.0
while True:
message = _receive_message(downward_queue, False)
if message is not None and message.type == _MESSAGE_STOP:
break
if time_start < 0:
current_time = 0.0
time_start = time.perf_counter()
else:
current_time = time.perf_counter() - time_start
message = _receive_message(simulation_to_ui_queue, False)
if current_time > 1.0:
_LOGGER.debug("asking to quit")
_send_message(upward_queue, True, _MESSAGE_EVENT,
type=_EVENT_QUIT, value=None)
break
previous_time = current_time
def _create_process(target, upward_queue, name='', args=None):
def wrapper(function, upward_queue, *args, **kwargs):
try:
function(*args, **kwargs)
except Exception:
_send_message(upward_queue, True, _MESSAGE_EVENT,
type=_EVENT_ERROR, value=None)
finally:
_send_message(upward_queue, True, _MESSAGE_EVENT,
type=_EVENT_STOPPED, value=None)
upward_queue.close()
process = multiprocessing.Process(
target=wrapper,
name=name,
args=(target, upward_queue) + args,
kwargs={}
)
return process
def _receive_message(q, block):
try:
message = q.get(block=block)
except queue.Empty:
return None
return message
def _send_message(q, block, message_type, **kwargs):
message_value = _MESSAGE_STRUCTS[message_type](**kwargs)
try:
q.put(_Message(type=message_type, value=message_value), block=block)
except queue.Full:
return _STATUS_FAILURE
return _STATUS_SUCCESS
def _flush_queue(q):
try:
while True:
q.get(block=False)
except queue.Empty:
pass
if __name__ == '__main__':
run()
Related questions on StackOverflow and hints in Python's doc basically boil down to needing to flush the queues before joining the processes, which I believe I've been trying to do here. I realize that the simulation queue could still be trying to push the (potentially large) buffered data onto the pipe by the time the program would try to flush them upon exiting, and thus ending up with still non-empty queues. This is why I tried to ensure that all the child processes were stopped before reaching this point. Now, looking at the log above and at the additional log outputted after uncommenting the while True loop checking for alive processes, it appears that the simulation process simply doesn't want to completely shut down even though its target function definitely exited. Could this be the reason of my problem?
If so, how am I suppsoed to deal with it cleanly? Otherwise, what am I missing here?
Tested with Python 3.4 on Mac OS X 10.9.5.
PS: I'm wondering if this couldn't be related to this bug ?

Sounds like the issue was indeed due to some delay in pushing the data through the queue, causing the flushes to be ineffective because fired too early.
A simple while process.is_alive(): flush_the_queues() seems to do the trick!

Lately I have run into a similar use case like yours: multiple processes (up to 11), one input queue, one output queue. But very heavy output queue.
I was getting an overhead of up to 5 seconds (!) using your suggestion to perform while process.is_alive(): flush_the_queues() before the process.join().
I've reduced that overhead down to 0.7 seconds by relying on a multiprocessing.Manager.list instead of a multiprocessing.Queue for the output queue. The multiprocessing.Manager.list doesn't need any flushing. I might consider also finding an alternative to the input queue if I can..
Full example here:
import multiprocessing
import queue
import time
PROCESSES = multiprocessing.cpu_count() - 1
processes = []
def run():
start = time.time()
input_queue = multiprocessing.Queue()
feed_input_queue(input_queue)
with multiprocessing.Manager() as manager:
output_list = manager.list()
for _ in range(PROCESSES):
p = multiprocessing.Process(target=_execute, args=(input_queue, output_list))
processes.append(p)
p.start()
print(f"Time to process = {time.time() - start:.10f}")
start = time.time()
for p in processes:
while p.is_alive(): # in principle we could get rid of this if we find an alternative to the output queue
_flush_queue(input_queue)
p.join()
print(f"Time to join = {time.time() - start:.10f}")
# from here you can do something with the output_list
def _feed_input_queue(input_queue):
for i in range(10000):
input_queue.put(i)
def _execute(input_queue: multiprocessing.Queue, output_list: list):
while not input_queue.empty():
input_item = input_queue.get()
output_list.append(do_and_return_something_heavy(input_item))
return True
def _flush_queue(q):
try:
while True:
q.get(block=False)
except queue.Empty:
pass
def do_and_return_something_heavy(input_item):
return str(input_item) * 100000
if __name__ == '__main__':
run()
Output
Time to process = 0.1855618954
Time to join = 0.6889970303
Tested on Python 3.6.

Anything wrong in my Producer-Consumer implementation in Python using condition objects?

Can someone let me know what is wrong in my code below that implements the producer-consumer problem in python. I am using Python 3.4
import threading
from threading import Thread
from collections import deque
import time
maxSize = 4 # maximum size of the queue
q = deque([])
cur = 0 # current value to push into the queue
lk = threading.Lock()
cvP = threading.Condition(lk) # condition object for consumer
cvC = threading.Condition(lk) # condition object for producer
class Producer:
def run(self):
global maxSize, q, cur
while True:
with cvP:
while len(q) >= maxSize:
print( "Queue is full and size = ", len(q) )
cvC.notify() # notify the Consumer
cvP.wait() # put Producer to wait
q.append(cur)
print("Produced ", cur)
cur = cur + 1
cvC.notify() # notify the Consumer
class Consumer:
def run(self):
global maxSize, q, cur
while True:
with cvC:
while len(q) == 0:
print( "Queue is empty and size = ", len(q) )
cvP.notify() # notify the Producer
cvC.wait() # put Consumer to wait
x = q.popleft()
print("Consumed ", x)
time.sleep(1)
cvP.notify() # notify the Producer
p = Producer()
c = Consumer()
pThread = Thread( target=p.run(), args=())
cThread = Thread( target=c.run(), args=())
pThread.start()
cThread.start()
pThread.join()
cThread.join()
The program output:
Produced 0
Produced 1
Produced 2
Produced 3
Queue is full and size = 4
Then it got stuck. When terminating the program, I got:
Traceback (most recent call last):
File "path/t.py", line 47, in <module>
pThread = Thread( target=p.run(), args=())
File "path/t.py", line 22, in run
cvP.wait()
File "/usr/lib/python3.4/threading.py", line 289, in wait
waiter.acquire()
KeyboardInterrupt
The Producer seemed not "nofity" the consumer. Can someone let me know why?
Many thanks in advance!

The locking and unlocking are fine, but you probably want to specify 'run' as the target and not 'run()'
pThread = Thread( target=p.run, args=())
cThread = Thread( target=c.run, args=())
:-)
Explanation: lets simplify
def foo():
# ..
# Foo will run on a new thread
Thread(target=foo)
# foo() is run before the thread is created, and its return
# value is the target for the Thread call.
Thread(target=foo())
You can see in the stack trace that it never went beyond line 47, which is
pThread = Thread( target=p.run(), args=())
Which is the same as
x = p.run()
pThread = Thread(x, args=())

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Join timeout in multiprocessing - python

Related

Multi-processed file reading in python

processing very large text files in parallel using multiprocessing and threading

python multiprocessing process pool fails to find asynced function

Processes not joining even after flushing their queues

Anything wrong in my Producer-Consumer implementation in Python using condition objects?

Categories

Resources