I am writing some code where I have 3 processes (spawned from the main). The first one is a process that uses Async IO to create 3 coroutines and switch between them. The last two processes run independently and generate two outputs that are used in one of the coroutines of the first process.
The communication has been managed using multiprocessing.queue(), the main puts the input data inside queue_source_position_hrir_calculator and queue_source_position_cutoff_calculator, then these two queues are emptied by p2_hrir_computation_process and p3_cutoff_computation_process. These two processes outputs their computation results in two output queues queue_computed_hrirs and queue_computed_cutoff
Finally these two queues are consumed by the Async IO process, in particular inside the input_parameters_coroutine function.
The full code is the following (I will highlight the key parts in following snippets):
import asyncio
import multiprocessing
import numpy as np
import time
from classes.HRIR_interpreter_min_phase_linear_interpolation import HRIR_interpreter_min_phase_linear_interpolation
from classes.object_renderer import ObjectRenderer
#Useful resources: https://bbc.github.io/cloudfit-public-docs/asyncio/asyncio-part-2
#https://realpython.com/async-io-python/
Fs = 44100
# region Async_IO functions
async def audio_input_coroutine(overlay):
for i in range(0,100):
print('Executing audio input coroutine')
print(overlay)
await asyncio.sleep(1/(Fs*4))
async def input_parameters_coroutine(overlay, queue_computed_hrirs,queue_computed_cutoff):
for i in range(0,10):
print('Executing audio input_parameters coroutine')
#print(overlay)
current_hrir = queue_computed_hrirs.get()
print('got current hrir')
current_cutoff = queue_computed_cutoff.get()
print('got current cutoff')
await asyncio.sleep(0.5)
async def audio_output_coroutine(overlay):
for i in range(0,10):
print('Executing audio_output coroutine')
#print(overlay)
await asyncio.sleep(0.5)
async def main_coroutine(overlay, queue_computed_hrirs,queue_computed_cutoff):
await asyncio.gather(audio_input_coroutine(overlay), input_parameters_coroutine(overlay, queue_computed_hrirs,queue_computed_cutoff), audio_output_coroutine(overlay))
def async_IO_main_process(queue_computed_hrirs,queue_computed_cutoff):
overlay = 10
asyncio.run(main_coroutine(overlay, queue_computed_hrirs,queue_computed_cutoff))
# endregion
# region HRIR_computation_process
def compute_hrir(queue_source_position, queue_computed_hrirs):
print('computing hrir')
SOFA_filename = '../HRTF_data/HUTUBS_min_phase.sofa'
# loading the simulated dataset using the support class HRIRInterpreter
HRIRInterpreter = HRIR_interpreter_min_phase_linear_interpolation(SOFA_filename=SOFA_filename)
# variable to check if I have other positions in my input queue
eof_source_position = False
# Un-comment following line to return when no more messages
while not eof_source_position:
#while True:
# print('inside while loop')
time.sleep(1)
# print('state of the queue', queue_source_position.empty())
if not eof_source_position:
position = queue_source_position.get()
if position is None:
eof_source_position = True # end of messages indicator
else:
required_IR = HRIRInterpreter.get_interpolated_IR(position[0], position[1], 1)
queue_computed_hrirs.put(required_IR)
# print('printing computed HRIR:', required_IR)
print('completed hrir computation, adding none to queue')
queue_computed_hrirs.put(None) # end of messages indicator
print('completed hrir process')
# endregion
# region cutoff_computation_process
def compute_cutoff(queue_source_position, queue_computed_cutoff):
print('computing cutoff')
cutoff = 20000
object_renderer = ObjectRenderer()
object_positions = np.array([(20, 0), (40, 0), (100, 0), (225, 0)])
eof_source_position = False
# Un-comment following line to return when no more messages
while not eof_source_position:
#while True:
time.sleep(1)
object_renderer.update_object_position(object_positions)
if not eof_source_position:
print('inside source position update')
source_position = queue_source_position.get()
if source_position is None: # end of messages indicator
eof_source_position = True
else:
cutoff = object_renderer.get_cutoff(azimuth=source_position[0], elevation=source_position[1])
queue_computed_cutoff.put(cutoff)
queue_computed_cutoff.put(None) # end of messages indicator
# endregion
if __name__ == "__main__":
import time
queue_source_position_hrir_calculator = multiprocessing.Queue()
queue_source_position_cutoff_calculator = multiprocessing.Queue()
queue_computed_hrirs = multiprocessing.Queue()
queue_computed_cutoff = multiprocessing.Queue()
i = 0.0
#Basically here I am writing a sequence of positions into the queue
#then I add a None value to detect when I am done with the simulation so the process can end
for _ in range(10):
# print('into main while-> source_position:', source_position[0])
source_position = np.array([i, 0.0])
queue_source_position_hrir_calculator.put(source_position)
queue_source_position_cutoff_calculator.put(source_position)
i += 10
queue_source_position_hrir_calculator.put(None) # "end of messages" indicator
queue_source_position_cutoff_calculator.put(None) # "end of messages" indicator
p1_async_IO_process = multiprocessing.Process(target=async_IO_main_process, args=(queue_computed_hrirs,queue_computed_cutoff)) #process that manages the ASYNC_IO coroutines between DMAs
p2_hrir_computation_process = multiprocessing.Process(target=compute_hrir, args=(queue_source_position_hrir_calculator, queue_computed_hrirs))
p3_cutoff_computation_process = multiprocessing.Process(target=compute_hrir, args=(queue_source_position_cutoff_calculator, queue_computed_cutoff))
p1_async_IO_process.start()
p2_hrir_computation_process.start()
p3_cutoff_computation_process.start()
#temp cycle to join processes
#for _ in range(2):
# current_hrir = queue_computed_hrirs.get()
# current_cutoff = queue_computed_cutoff.get()
print('joining async_IO process')
p1_async_IO_process.join()
print('joined async_IO process')
#NB: to join a process, its qeues must be empty. So before calling the join on p2, I should get the values from the queue_computed_hrirs queue
print('joining hrir computation process')
p2_hrir_computation_process.join()
print('joined hrir computation process')
print('joining hrir computation process')
p2_hrir_computation_process.join()
print('joined hrir computation process')
print('joining cutoff computation process')
p3_cutoff_computation_process.join()
print('joined cutoff computation process')
print("completed main")
The important part of the code is:
async def input_parameters_coroutine(overlay, queue_computed_hrirs,queue_computed_cutoff):
for i in range(0,10):
print('Executing audio input_parameters coroutine')
#print(overlay)
current_hrir = queue_computed_hrirs.get()
print('got current hrir')
current_cutoff = queue_computed_cutoff.get()
print('got current cutoff')
await asyncio.sleep(0.5)
This coroutine receives as input 3 variables overlay (which is a dummy variable I am using for future developments) and the two multiprocessing.Queue() classes, queue_computed_hrirs and queue_computed_cutoff.
At the moment my input_parameters_coroutine gets "stuck" while executing current_hrir = queue_computed_hrirs.get() and current_cutoff = queue_computed_cutoff.get(). I said "stuck" because the code works fine and complete its execution, the problem is that those two commands are blocking, thus my coroutine stops until it has something to get from the queue.
What I would like to achieve is: try to execute current_hrir = queue_computed_hrirs.get(), if it is not possible at that moment, switch to another coroutine and let it execute what it wants, then go back and check if it possible to execute current_hrir = queue_computed_hrirs.get(), if yes go on, if not switch again to another coroutine and let it do its job.
I saw that there are some problems in making async IO and multiprocessing communicate ( What kind of problems (if any) would there be combining asyncio with multiprocessing? , Can I somehow share an asynchronous queue with a subprocess? ) but I wasn't able to find a smart solution to my problem.
I am using pafy to stream in a set of youtube videos with the aim of combining them (splitscreen style) and displaying as one video. It's working but the frame rate is very slow when going above two videos because a frame is fetched from each stream, when I try 9 videos (for a 3x3 stitch) the fetching of the frames takes 0.1725 secs (too slow).
I figured the best way to reduce this was to fetch the streams in a parallel/multiprocess way.
I tried using Pipes and mulitprocessing but I am getting an EOFError: Ran out of input
See code below comment out/in the frames = line to change between the working but slow method and my attempt at mulitprocessing
import multiprocessing
import cv2
import numpy as np
import pafy
import typing
import timeit
urls = [
"https://www.youtube.com/watch?v=tT0ob3cHPmE",
"https://www.youtube.com/watch?v=XmjKODQYYfg",
"https://www.youtube.com/watch?v=E2zrqzvtWio",
"https://www.youtube.com/watch?v=6cQLNXELdtw",
"https://www.youtube.com/watch?v=s_rmsH0wQ3g",
"https://www.youtube.com/watch?v=QfhpNe6pOqU",
"https://www.youtube.com/watch?v=C_9x0P0ebNc",
"https://www.youtube.com/watch?v=Ger6gU_9v9A",
"https://www.youtube.com/watch?v=39dZ5WhDlLE"
]
width = np.math.ceil(np.sqrt(len(urls)))
dim = 1920, 1080
def main():
streams = [pafy.new(url).getbest() for url in urls]
videos = [cv2.VideoCapture() for streams in streams]
[video.open(best.url) for video, best in zip(videos, streams)]
cv2.namedWindow('Video', cv2.WINDOW_FREERATIO)
cv2.setWindowProperty('Video', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
while True:
start_time = timeit.default_timer()
# frames = [cv2.resize(video.read()[-1], (dim[0] // width, dim[1] // width)) for video in videos]
frames = get_frames(videos)
print(timeit.default_timer() - start_time)
start_time = timeit.default_timer()
dst = merge_frames(frames)
print(timeit.default_timer() - start_time)
start_time = timeit.default_timer()
cv2.imshow('Video', dst)
if cv2.waitKey(1) & 0xFF == ord('e'):
break
print(timeit.default_timer() - start_time)
continue
[video.release() for video in videos]
cv2.destroyAllWindows()
def get_frames(videos):
# frames = [video.read()[-1] for video in videos]
jobs = []
pipe_list = []
for video in videos:
recv_end, send_end = multiprocessing.Pipe(False)
p = multiprocessing.Process(target=get_frame, args=(video, send_end))
jobs.append(p)
pipe_list.append(recv_end)
p.start()
for proc in jobs:
proc.join()
frames = [x.recv() for x in pipe_list]
return frames
def get_frame(video, send_end):
send_end.send(video.read()[1])
# send_end.send(cv2.resize(video.read()[1], (dim[0] // width, dim[1] // width)))
def merge_frames(frames: typing.List[np.ndarray]):
width = np.math.ceil(np.sqrt(len(frames)))
rows = []
for row in range(width):
i1, i2 = width * row, width * row + width
rows.append(np.hstack(frames[i1: i2]))
return np.vstack(rows)
if __name__ == '__main__':
main()
Interesting application! Re the error, I run your code and the message is that it can't pickle VideoCapture object, see the links below, that's probably why the receiving pipe is empty. There are two errors from two threads: the first is the pickle, then comes the EOF.
EDIT #2: I managed to run it with one process per video etc.:
Regarding performance, I first did it without merging the images (I had to fix some details) to see whether it received, and for 3 and 4 frames, displayed in separated windows from the receiving threads it played very fast, faster than real time (tested with 3-4 streams). I think the merging and resizing for display are slow, the picture gets 2560x1440 for 4 streams (4x1280x720). In my case it's resized to fit the screen.
Thanks for sharing that question and that library etc!
(BTW, I tried with a lock as well, initially but it happened that it is not necessary. The code needs cleaning of some experiments. Also, the current implementation might not be synchronized per frame for each stream, because it doesn't join per frame as your original example, which created new processes to grab one frame from each and then merge them.)
The CPU load is mainly in the main process
(a 4-core CPU, thus max=25% per instance):
Some times:
0.06684677699999853
0.030737616999999773
1.2829999995744856e-06
LEN(FRAMES)= 9
0.06703700200000284
0.030708104000002123
6.409999997458726e-07
LEN(FRAMES)= 9
The waitKey in the main loop can be tweaked.
Code
https://github.com/Twenkid/Twenkid-FX-Studio/blob/master/Py/YoutubeAggregatorPafy/y6.py
# Merging Youtube streams with pafy, opencv and multithreading
# Base code by Fraser Langton - Thanks!
# Refactored and debugged by Twenkid
# version y6 - more cleaning of unused code, properly close VideoCapture in the processes
import multiprocessing #Process, Lock
from multiprocessing import Lock # Not needed
import cv2
import numpy as np
import pafy
import typing
import timeit
import time
urls = [
"https://www.youtube.com/watch?v=tT0ob3cHPmE",
"https://www.youtube.com/watch?v=XmjKODQYYfg",
"https://www.youtube.com/watch?v=E2zrqzvtWio",
"https://www.youtube.com/watch?v=6cQLNXELdtw",
"https://www.youtube.com/watch?v=s_rmsH0wQ3g",
"https://www.youtube.com/watch?v=QfhpNe6pOqU",
"https://www.youtube.com/watch?v=C_9x0P0ebNc",
"https://www.youtube.com/watch?v=Ger6gU_9v9A",
"https://www.youtube.com/watch?v=39dZ5WhDlLE"
]
# Merging seems to require equal number of sides, so 2x2, 3x3 etc. The resolutions should be the same.
'''
[
"https://www.youtube.com/watch?v=C_9x0P0ebNc",
"https://www.youtube.com/watch?v=Ger6gU_9v9A",
"https://www.youtube.com/watch?v=39dZ5WhDlLE",
"https://www.youtube.com/watch?v=QfhpNe6pOqU",
]
'''
width = np.math.ceil(np.sqrt(len(urls)))
dim = 1920, 1080
streams = []
#bestStreams = []
def main():
global bestStreams
streams = [pafy.new(url).getbest() for url in urls]
print(streams)
#[bestStreams for best in streams]
#print(bestStreams)
cv2.waitKey(0)
videos = [cv2.VideoCapture() for streams in streams]
bestURLS = []
#[video.open(best.url) for video, best in zip(videos, streams)] # Opened per process
[bestURLS.append(best.url) for best in streams]
#[ for video, best in zip(videos, streams)]
print(bestURLS)
cv2.waitKey(0)
cv2.namedWindow('Video', cv2.WINDOW_FREERATIO)
cv2.setWindowProperty('Video', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
LOCK = Lock()
#proc = get_framesUL(bestStreams, LOCK)
#proc, pipes = get_framesULJ(bestStreams, LOCK)
proc, pipes = get_framesULJ(bestURLS, LOCK)
print("PROC, PIPES", proc, pipes)
#cv2.waitKey(0)
frames = []
numStreams = len(streams)
while True:
start_time = timeit.default_timer()
# frames = [cv2.resize(video.read()[-1], (dim[0] // width, dim[1] // width)) for video in videos]
#frames = get_frames(videos, LOCK)
#frames = get_framesUL(streams, LOCK)
print(timeit.default_timer() - start_time)
start_time = timeit.default_timer()
frames = [x.recv() for x in pipes]
lf = len(frames)
print("LEN(FRAMES)=", lf);
#if lf<3: time.sleep(3); print("LEN(FRAMES)=", lf); #continue #Else merge and show
#proc.join()
#elif lf==3: frames = [x.recv() for x in pipes]
dst = merge_frames(frames)
print(timeit.default_timer() - start_time)
start_time = timeit.default_timer()
#if cv2!=None:
try:
cv2.imshow('Video', dst)
except: print("Skip")
#cv2.waitKey(1)
if cv2.waitKey(20) & 0xFF == ord('e'):
break
print(timeit.default_timer() - start_time)
continue
for proc in jobs:
proc.join()
# [video.release() for video in videos] # Per process
cv2.destroyAllWindows()
def get_framesULJ(videosURL, L): #return the processes, join in main and read the frames there
# frames = [video.read()[-1] for video in videos]
print("get_framesULJ:",videosURL)
jobs = []
pipe_list = []
#print("VIDEOS:",videosURL)
#for video in videos:
for videoURL in videosURL: #urls:
recv_end, send_end = multiprocessing.Pipe(False)
print(recv_end, send_end)
p = multiprocessing.Process(target=get_frame2L, args=(videoURL, send_end, L))
#p = multiprocessing.Process(target=get_frame, args=(video, send_end, L))
#if (p==None): continue
print("P = ", p)
#time.sleep(0.001)
jobs.append(p)
print("JOBS, len", jobs, len(jobs))
pipe_list.append(recv_end)
print("pipe_list", pipe_list)
p.start()
#cv2.waitKey(0)
#for proc in jobs:
# proc.join()
#frames = [x.recv() for x in pipe_list]
#return frames
#cv2.waitKey(0)
return jobs, pipe_list
def get_frame2L(videoURL, send_end, L):
v = cv2.VideoCapture()
#[video.open(best.url)
#L.acquire()
v.open(videoURL)
print("get_frame2", videoURL, v, send_end)
#cv2.waitKey(0)
while True:
ret, frame = v.read()
if ret: send_end.send(frame); #cv2.imshow("FRAME", frame); cv2.waitKey(1)
else: print("NOT READ!"); break
#send_end.send(v.read()[1])
#L.release()
def get_framesUL(videosURL, L):
# frames = [video.read()[-1] for video in videos]
jobs = []
pipe_list = []
print("VIDEOS:",videosURL)
#for video in videos:
for videoURL in videosURL: #urls:
recv_end, send_end = multiprocessing.Pipe(False)
print(recv_end, send_end)
p = multiprocessing.Process(target=get_frame2L, args=(videoURL, send_end, L))
#p = multiprocessing.Process(target=get_frame, args=(video, send_end, L))
#if (p==None): continue
print("P = ", p)
#time.sleep(0.001)
jobs.append(p)
print("JOBS, len", jobs, len(jobs))
pipe_list.append(recv_end)
print("pipe_list", pipe_list)
p.start()
for proc in jobs:
proc.join()
frames = [x.recv() for x in pipe_list]
return frames
def get_frames(videos, L):
# frames = [video.read()[-1] for video in videos]
jobs = []
pipe_list = []
print("VIDEOS:",videos)
for video in videos:
recv_end, send_end = multiprocessing.Pipe(False)
print(recv_end, send_end)
p = multiprocessing.Process(target=get_frame, args=(video, send_end, L))
#p = multiprocessing.Process(target=get_frame, args=(video, send_end, L))
#if (p==None): continue
print("P = ", p)
#time.sleep(0.001)
jobs.append(p)
print("JOBS, len", jobs, len(jobs))
pipe_list.append(recv_end)
print("pipe_list", pipe_list)
p.start()
for proc in jobs:
proc.join()
frames = [x.recv() for x in pipe_list]
return frames
def get_frame(video, send_end, L):
L.acquire()
print("get_frame", video, send_end)
send_end.send(video.read()[1])
L.release()
# send_end.send(cv2.resize(video.read()[1], (dim[0] // width, dim[1] // width)))
def get_frame2(videoURL, send_end):
v = video.open(videoURL)
while True:
ret, frame = v.read()
if ret: send_end.send(frame)
else: break
def merge_frames(frames: typing.List[np.ndarray]):
#cv2.imshow("FRAME0", frames[0]) ########## not images/small
#cv2.imshow("FRAME1", frames[1]) ##########
#cv2.imshow("FRAME2", frames[2]) ##########
#cv2.imshow("FRAME3", frames[3]) ##########
#cv2.waitKey(1)
width = np.math.ceil(np.sqrt(len(frames)))
rows = []
for row in range(width):
i1, i2 = width * row, width * row + width
rows.append(np.hstack(frames[i1: i2]))
return np.vstack(rows)
if __name__ == '__main__':
main()
EDIT #1 IDEA: Creating one process per video stream and reading it in a loop (pumping in a pipe), instead of a new process for each frame, and/thus opening the videos/VideoCapture objects with a videoURL through the pipe, instead of sending a VideoCapture object. (I don't know if it has the same pickle issue in this form)
...
in main:
bestURLS = []
proc, pipes = get_framesULJ(bestURLS, LOCK)
[bestURLS.append(best.url) for best in streams]
def get_frame2(videoURL, send_end):
v = video.open(videoURL)
while True:
ret, frame = v.read()
if ret: send_end.send(video)
else: break
def get_framesULJ(videosURL, L): #return the processes, join in main and read the frames there
print("get_framesULJ:",videosURL)
jobs = []
pipe_list = []
for videoURL in videosURL:
recv_end, send_end = multiprocessing.Pipe(False)
print(recv_end, send_end)
p = multiprocessing.Process(target=get_frame2L, args=(videoURL, send_end, L))
print("P = ", p)
jobs.append(p)
print("JOBS, len", jobs, len(jobs))
pipe_list.append(recv_end)
print("pipe_list", pipe_list)
p.start()
return jobs, pipe_list
Original answer:
<multiprocessing.connection.PipeConnection object at 0x000000000D3C7D90> <multip
rocessing.connection.PipeConnection object at 0x000000000D3BD2E0>
Traceback (most recent call last):
File "y.py", line 104, in <module>
main()
File "y.py", line 48, in main
frames = get_frames(videos)
File "y.py", line 80, in get_frames
p.start()
File "C:\Program Files\Python38\lib\multiprocessing\process.py", line 121, in
start
self._popen = self._Popen(self)
File "C:\Program Files\Python38\lib\multiprocessing\context.py", line 224, in
_Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Program Files\Python38\lib\multiprocessing\context.py", line 326, in
_Popen
return Popen(process_obj)
File "C:\Program Files\Python38\lib\multiprocessing\popen_spawn_win32.py", lin
e 93, in __init__
reduction.dump(process_obj, to_child)
File "C:\Program Files\Python38\lib\multiprocessing\reduction.py", line 60, in
dump
ForkingPickler(file, protocol).dump(obj)
TypeError: cannot pickle 'cv2.VideoCapture' object
Z:\>Traceback (most recent call last):
File "<string>", line 1, in <module>
File "C:\Program Files\Python38\lib\multiprocessing\spawn.py", line 116, in sp
awn_main
exitcode = _main(fd, parent_sentinel)
File "C:\Program Files\Python38\lib\multiprocessing\spawn.py", line 126, in _m
ain
self = reduction.pickle.load(from_parent)
EOFError: Ran out of input
It fails before p.start.
The instances are created and the structures seems OK:
VIDEOS: [<VideoCapture 000000000D418710>, <VideoCapture 000000000D4186F0>, <Vide
oCapture 000000000D418B70>]
<multiprocessing.connection.PipeConnection object at 0x000000000D3C3D90> <multip
rocessing.connection.PipeConnection object at 0x000000000D3B62E0>
P = <Process name='Process-1' parent=8892 initial>
JOBS, len [<Process name='Process-1' parent=8892 initial>] 1
RECV_END <multiprocessing.connection.PipeConnection object at 0x000000000D3C3D90
>
See the module pickle:
https://docs.python.org/3/library/pickle.html
It seems not everything can be "pickled".
What can be pickled and unpickled?
The following types can be pickled:
None, True, and False
integers, floating point numbers, complex numbers
strings, bytes, bytearrays
tuples, lists, sets, and dictionaries containing only picklable objects
functions defined at the top level of a module (using def, not lambda)
built-in functions defined at the top level of a module
classes that are defined at the top level of a module
instances of such classes whose __dict__ or the result of calling __getstate__() is picklable (see section Pickling Class Instances for details).
Besides it seems there was a bug in opencv causing that. One of the solution given is to turn the multiprocessing off...
Python multiprocess can't pickle opencv videocapture object
https://github.com/MVIG-SJTU/AlphaPose/issues/164
Fang-Haoshu commented on 17 Oct 2018
This bug is due to multi-processing in opencv. --sp disable
multi-processing. BTW, can you tell me the version of opencv that you
are using?
I guess something about locked memory or something.
A workaround I would try would be to first dump the pixels of the object as plain data or raw something, maybe with a header about the size etc.
Also, in general, for smoother play I think some buffering needs to be added.
BTW, what version is your openCV? Mine is 4.2.0
I create 3 processes and want the function wirte1 to write value 'A,B,C' to queue1 ,and function read1 read value from queue1 and put it to queue2 ,in the same time, function read2 read value from queue2, but value B,C can't read from queue2 in time and the process finished.
from multiprocessing import Process, Queue,Manager,Pool,Lock
import os, time, random
#向队列1写数据
def write1(q1,lock):
lock.acquire()
for value in ['A', 'B', 'C']:
print ('Put %s to queue111...%s' % (value,str(os.getpid())))
q1.put(value)
time.sleep(1)
lock.release()
#从队列1读取数据并写入队列2
def read1(q1,q2,lock):
lock.acquire()
while True:
time.sleep(1)
value=q1.get()
# if value is None:break
print('Get %s from queue111.%s' % (value,str(os.getpid())))
q2.put(value)
print('Put %s to queue222...%s' % (value,str(os.getpid())))
lock.release()
def read2(q2,lock):
lock.acquire()
while True:
# if not q2.empty() or not q1.empty():
time.sleep(2)
value=q2.get(True)
print('Get %s from queue222.%s' % (value,os.getpid()))
lock.release()
if __name__=='__main__':
manager = Manager()
# 父进程创建Queue,并传给各个子进程:
q1 = manager.Queue()
q2 = manager.Queue()
lock1 = manager.Lock()
lock2 = manager.Lock()
lock3 = manager.Lock()
start=time.time()
p = Pool()
# pw = p.apply_async(write1, args=(q1,lock1,))
pw = Process(target=write1,args=(q1,lock1,))
# time.sleep(0.5)
# pr = p.apply_async(read1, args=(q1,q2,lock2,))
# pr2 = p.apply_async(read2, args=(q2,lock3))
pr=Process(target=read1,args=(q1,q2,lock2,))
pr2 = Process(target=read2,args=(q2,lock3,))
pw.start()
pr.start()
pr2.start()
# p.close()
# p.join()
pw.join()
pr.terminate()
pr2.terminate()
end=time.time()
# print
print('finished!!')
print(end-start)
the output is:
Put A to queue111...77678 Put B to queue111...77678 Get A from queue111.77680 Put A to queue222...77680 Put C to queue111...77678 Get A from queue222.77681 Get B from queue111.77680 Put B to queue222...77680 Get C from queue111.77680 Put C to queue222...77680 finished!! 3.025238275527954
You can’t use terminate to control a system like this: it races with completing the actual work. Instead, make your loops not be infinite, probably by using a sentinel value in each Queue (as in one commented-out line).
I have developed a Python multi-thread program, one producer thread which acquires frames (512 x 640, 16uint) at high fps (around 75 fps), and two consumer-threads, one for real-time visualization and other for saving as 16-bit tiff. For each one of the consumers, I use a different queue.
Visualizing at real-time works fine, but saving takes so much time after the video is stoped ( even 20 seconds for a 2-minute recording). For saving, I used tifffile library or cv2, with similar performances.
UPDATED info
The images are gray-scale 16-bit numpy arrays directly placed in the queue, not compression, saved using tifffile.imsave. The second queue for visualization works properly on real-time, so saving must be the slowest process. I need to save each image independently, saving 3D is not an option for the time being. Using different threads for saving may ruin my acquisition order.
Is there any way, both in Python and/or OS (windows10) to accelerate the process, taking into account I need to save them in the same order they were recorded? I have a SSD 970 EVO disk drive
class VideoGet():
def __init__(self,input_dict,folder,record):
self.handle=input_dict['handle']
self.frame_t=input_dict['frametype']
self.frameSize= input_dict['frameSize']
# self.buffer = np.zeros(shape=(513,640), dtype=np.uint16)
self.record = record
self.save = False
self.done=False
self.counter=0
self.folder = folder
def displayer(self,q2):
while self.record is True:
if q2.empty() is True:
pass
else:
framedisplay = q2.get()
cv2.namedWindow("Video", cv2.WINDOW_NORMAL)
cv2.imshow("Video", framedisplay[:-1,:])
cv2.waitKey(1)
q2.task_done()
cv2.destroyAllWindows()
def consumer(self,q):
data=[]
while True:
if q.empty():
time.sleep(0.002)
pass
else:
frame_get = q.get()
if frame_get is None:
print('gone')
break
imsave(os.path.join(self.folder,(str(self.counter).zfill(5)+'.tiff')), frame_get[:-1,:])
if self.counter==0:
TS=1.0e-3 *struct.unpack('Q',(frame_get[-1,6:10]).tobytes())[0]
entr=[str(self.counter).zfill(5),str(round(1.0e3*(1.0e-3 *struct.unpack('Q',(frame_get[-1,6:10]).tobytes())[0]-TS)))]
data.append(entr)
self.counter=self.counter+1
q.task_done()
if data:
df = pd.DataFrame(data, columns = ['Picture', 'Timestamp'])
df.to_csv(os.path.join(self.folder,'timestamps.txt'), header=False, index=False, sep=' ')
print('done')
self.done=True
def producer(self,buffer,q,q2):
while self.record is True:
buffer=np.empty_like(buffer)
if camera.properties.get_frame(self.handle,self.frame_t,4,buffer,self.frameSize)==0:
frame=buffer
q2.put(frame)
if self.save is True:
q.put(frame)
del frame
print('None')
q.put(None)
def run(self,buffer,q,q2):
prod_thread=Thread(target=self.producer,args=(buffer,q,q2,))
display_thread=Thread(target=self.displayer,args=(q2,))
con_thread= Thread(target=self.consumer, args=(q,))
prod_thread.start()
display_thread.start()
con_thread.start()
It's hard to say what's going wrong when you don't show your code. Also, it is not clear to me why the acquisition order would change if you have multiple writers.
Here is a script to generate synthetic frames the same size as your images and save them as TIFF files, in order. It scales up in speed pretty linearly with more writer threads:
NFRAMES NWRITERS TIME(s)
1000 1 1.48
1000 2 0.78
1000 4 0.48
#!/usr/bin/env python3
import time
import numpy as np
import threading, queue
from tifffile import imsave
def writer(q):
print('[WRITER] Started')
total = 0
while True:
(frameNum, im) = q.get()
if frameNum < 0:
break
# Save as TIFF
imsave(f'frame-{frameNum}.tif', im)
total += 1
print(f'[WRITER] Complete: wrote {total} frames')
if __name__ == "__main__":
# Edit these to suit
NFRAMES = 1000
NWRITERS= 4
# Create dummy image of correct size
h, w = 640, 512
im = np.random.randint(0, 65536, (h,w), dtype=np.uint16)
# Create a queue to pass frames to writer(s)
q = queue.Queue(16)
print('[MAIN] Started')
start = time.time()
# Create and start writer thread(s)
threads = []
for _ in range(NWRITERS):
t = threading.Thread(target=writer, args=(q,))
t.start()
threads.append(t)
# Generate a large number of frames to store
for frameNum in range(NFRAMES):
# Put a tuple of frameNum and image in queue
q.put((frameNum, im))
# Sentinel to tell each writer to exit
for _ in range(NWRITERS):
q.put((-1,-1))
# Wait for our writer thread(s) to exit
for thread in threads:
thread.join()
elapsed = time.time() - start;
print(f'[MAIN] Complete: {NFRAMES} frames, with {NWRITERS} writers in {elapsed} seconds')
Sample Output
[MAIN] Started
[WRITER] Started
[WRITER] Started
[WRITER] Started
[WRITER] Started
[WRITER] Complete: wrote 250 frames
[WRITER] Complete: wrote 250 frames
[WRITER] Complete: wrote 250 frames
[WRITER] Complete: wrote 250 frames
[MAIN] Complete: 1000 frames, with 4 writers in 0.4869719505310059 seconds
One thing I noticed, is that it runs about 10% faster if you replace tifffile.imsave() with:
np.save(f'frame-{frameNum}.npy', im)
I am a little bit confused testing the multiprocessing module.
Let's simulate a digital timer. The code would look like:
start=datetime.now()
while True:
now=datetime.now()
delta=now-start
s = delta.seconds + delta.microseconds/1E6
print s
time.sleep(1)
Which returns correctly:
8e-06
1.001072
2.00221
3.003353
4.004416
...
Now I want to read the clock from my virtual external digital clock device using a pipe:
def ask_timer(conn):
start=datetime.now()
while True:
now=datetime.now()
delta=now-start
s = delta.seconds + delta.microseconds/1E6
conn.send(s)
parent_conn, child_conn = Pipe()
p = Process(target=ask_timer, args=(child_conn,))
p.start()
while True:
print parent_conn.recv()
time.sleep(1)
It returns:
2.9e-05
6.7e-05
7.7e-05
8.3e-05
8.9e-05
9.4e-05
0.0001
...
Here the timer doesn't seem to run permanently in the background..The implementation of "Queue" looks like:
def ask_timer(q):
while True:
now=datetime.now()
delta=now-start
s = delta.seconds + delta.microseconds/1E6
q.put(s)
#conn.close()
q = Queue()
p = Process(target=ask_timer, args=(q,))
p.start()
while True:
print q.get()
time.sleep(1)
which does the same like pipe. Is this just my misconception of multiprocessing of python? How could I ask a value realtime from a running parallel-thread?
Everything is working correctly. The child process is executing ask_timer() function completely independently from you main process. You don't have any time.sleep() in this function, so it just prints or puts in the queue deltas in the infinite loop with interval of like 10ms.
Once a second your main process asks child process for data and get's it. Data is one of those small intervals.
The problem there is that you're putting much more data into pipe/queue, than taking from it. So you're getting old data, when you ask. To test that you can print queue size in the loop (won't work on OS X):
def ask_timer(q):
start = datetime.now()
while True:
now = datetime.now()
delta = now - start
s = delta.seconds + delta.microseconds / 1E6
q.put(s)
q = Queue()
p = Process(target=ask_timer, args=(q,))
p.start()
while True:
print q.get()
print q.qsize()
time.sleep(1)
The queue size will grow really fast.
Apparently you can use shared memory to read current value from the child process.
from multiprocessing import Process, Value
from datetime import datetime
import time
from ctypes import c_double
def ask_timer(v):
start = datetime.now()
while True:
now = datetime.now()
delta = now - start
s = delta.seconds + delta.microseconds / 1E6
v.value = s
val = Value(c_double, 0.0)
p = Process(target=ask_timer, args=(val,))
p.start()
while True:
print(val.value)
time.sleep(1)