Use multiprocessing to read multiple video streams? - python
I am using pafy to stream in a set of youtube videos with the aim of combining them (splitscreen style) and displaying as one video. It's working but the frame rate is very slow when going above two videos because a frame is fetched from each stream, when I try 9 videos (for a 3x3 stitch) the fetching of the frames takes 0.1725 secs (too slow).
I figured the best way to reduce this was to fetch the streams in a parallel/multiprocess way.
I tried using Pipes and mulitprocessing but I am getting an EOFError: Ran out of input
See code below comment out/in the frames = line to change between the working but slow method and my attempt at mulitprocessing
import multiprocessing
import cv2
import numpy as np
import pafy
import typing
import timeit
urls = [
"https://www.youtube.com/watch?v=tT0ob3cHPmE",
"https://www.youtube.com/watch?v=XmjKODQYYfg",
"https://www.youtube.com/watch?v=E2zrqzvtWio",
"https://www.youtube.com/watch?v=6cQLNXELdtw",
"https://www.youtube.com/watch?v=s_rmsH0wQ3g",
"https://www.youtube.com/watch?v=QfhpNe6pOqU",
"https://www.youtube.com/watch?v=C_9x0P0ebNc",
"https://www.youtube.com/watch?v=Ger6gU_9v9A",
"https://www.youtube.com/watch?v=39dZ5WhDlLE"
]
width = np.math.ceil(np.sqrt(len(urls)))
dim = 1920, 1080
def main():
streams = [pafy.new(url).getbest() for url in urls]
videos = [cv2.VideoCapture() for streams in streams]
[video.open(best.url) for video, best in zip(videos, streams)]
cv2.namedWindow('Video', cv2.WINDOW_FREERATIO)
cv2.setWindowProperty('Video', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
while True:
start_time = timeit.default_timer()
# frames = [cv2.resize(video.read()[-1], (dim[0] // width, dim[1] // width)) for video in videos]
frames = get_frames(videos)
print(timeit.default_timer() - start_time)
start_time = timeit.default_timer()
dst = merge_frames(frames)
print(timeit.default_timer() - start_time)
start_time = timeit.default_timer()
cv2.imshow('Video', dst)
if cv2.waitKey(1) & 0xFF == ord('e'):
break
print(timeit.default_timer() - start_time)
continue
[video.release() for video in videos]
cv2.destroyAllWindows()
def get_frames(videos):
# frames = [video.read()[-1] for video in videos]
jobs = []
pipe_list = []
for video in videos:
recv_end, send_end = multiprocessing.Pipe(False)
p = multiprocessing.Process(target=get_frame, args=(video, send_end))
jobs.append(p)
pipe_list.append(recv_end)
p.start()
for proc in jobs:
proc.join()
frames = [x.recv() for x in pipe_list]
return frames
def get_frame(video, send_end):
send_end.send(video.read()[1])
# send_end.send(cv2.resize(video.read()[1], (dim[0] // width, dim[1] // width)))
def merge_frames(frames: typing.List[np.ndarray]):
width = np.math.ceil(np.sqrt(len(frames)))
rows = []
for row in range(width):
i1, i2 = width * row, width * row + width
rows.append(np.hstack(frames[i1: i2]))
return np.vstack(rows)
if __name__ == '__main__':
main()
Interesting application! Re the error, I run your code and the message is that it can't pickle VideoCapture object, see the links below, that's probably why the receiving pipe is empty. There are two errors from two threads: the first is the pickle, then comes the EOF.
EDIT #2: I managed to run it with one process per video etc.:
Regarding performance, I first did it without merging the images (I had to fix some details) to see whether it received, and for 3 and 4 frames, displayed in separated windows from the receiving threads it played very fast, faster than real time (tested with 3-4 streams). I think the merging and resizing for display are slow, the picture gets 2560x1440 for 4 streams (4x1280x720). In my case it's resized to fit the screen.
Thanks for sharing that question and that library etc!
(BTW, I tried with a lock as well, initially but it happened that it is not necessary. The code needs cleaning of some experiments. Also, the current implementation might not be synchronized per frame for each stream, because it doesn't join per frame as your original example, which created new processes to grab one frame from each and then merge them.)
The CPU load is mainly in the main process
(a 4-core CPU, thus max=25% per instance):
Some times:
0.06684677699999853
0.030737616999999773
1.2829999995744856e-06
LEN(FRAMES)= 9
0.06703700200000284
0.030708104000002123
6.409999997458726e-07
LEN(FRAMES)= 9
The waitKey in the main loop can be tweaked.
Code
https://github.com/Twenkid/Twenkid-FX-Studio/blob/master/Py/YoutubeAggregatorPafy/y6.py
# Merging Youtube streams with pafy, opencv and multithreading
# Base code by Fraser Langton - Thanks!
# Refactored and debugged by Twenkid
# version y6 - more cleaning of unused code, properly close VideoCapture in the processes
import multiprocessing #Process, Lock
from multiprocessing import Lock # Not needed
import cv2
import numpy as np
import pafy
import typing
import timeit
import time
urls = [
"https://www.youtube.com/watch?v=tT0ob3cHPmE",
"https://www.youtube.com/watch?v=XmjKODQYYfg",
"https://www.youtube.com/watch?v=E2zrqzvtWio",
"https://www.youtube.com/watch?v=6cQLNXELdtw",
"https://www.youtube.com/watch?v=s_rmsH0wQ3g",
"https://www.youtube.com/watch?v=QfhpNe6pOqU",
"https://www.youtube.com/watch?v=C_9x0P0ebNc",
"https://www.youtube.com/watch?v=Ger6gU_9v9A",
"https://www.youtube.com/watch?v=39dZ5WhDlLE"
]
# Merging seems to require equal number of sides, so 2x2, 3x3 etc. The resolutions should be the same.
'''
[
"https://www.youtube.com/watch?v=C_9x0P0ebNc",
"https://www.youtube.com/watch?v=Ger6gU_9v9A",
"https://www.youtube.com/watch?v=39dZ5WhDlLE",
"https://www.youtube.com/watch?v=QfhpNe6pOqU",
]
'''
width = np.math.ceil(np.sqrt(len(urls)))
dim = 1920, 1080
streams = []
#bestStreams = []
def main():
global bestStreams
streams = [pafy.new(url).getbest() for url in urls]
print(streams)
#[bestStreams for best in streams]
#print(bestStreams)
cv2.waitKey(0)
videos = [cv2.VideoCapture() for streams in streams]
bestURLS = []
#[video.open(best.url) for video, best in zip(videos, streams)] # Opened per process
[bestURLS.append(best.url) for best in streams]
#[ for video, best in zip(videos, streams)]
print(bestURLS)
cv2.waitKey(0)
cv2.namedWindow('Video', cv2.WINDOW_FREERATIO)
cv2.setWindowProperty('Video', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
LOCK = Lock()
#proc = get_framesUL(bestStreams, LOCK)
#proc, pipes = get_framesULJ(bestStreams, LOCK)
proc, pipes = get_framesULJ(bestURLS, LOCK)
print("PROC, PIPES", proc, pipes)
#cv2.waitKey(0)
frames = []
numStreams = len(streams)
while True:
start_time = timeit.default_timer()
# frames = [cv2.resize(video.read()[-1], (dim[0] // width, dim[1] // width)) for video in videos]
#frames = get_frames(videos, LOCK)
#frames = get_framesUL(streams, LOCK)
print(timeit.default_timer() - start_time)
start_time = timeit.default_timer()
frames = [x.recv() for x in pipes]
lf = len(frames)
print("LEN(FRAMES)=", lf);
#if lf<3: time.sleep(3); print("LEN(FRAMES)=", lf); #continue #Else merge and show
#proc.join()
#elif lf==3: frames = [x.recv() for x in pipes]
dst = merge_frames(frames)
print(timeit.default_timer() - start_time)
start_time = timeit.default_timer()
#if cv2!=None:
try:
cv2.imshow('Video', dst)
except: print("Skip")
#cv2.waitKey(1)
if cv2.waitKey(20) & 0xFF == ord('e'):
break
print(timeit.default_timer() - start_time)
continue
for proc in jobs:
proc.join()
# [video.release() for video in videos] # Per process
cv2.destroyAllWindows()
def get_framesULJ(videosURL, L): #return the processes, join in main and read the frames there
# frames = [video.read()[-1] for video in videos]
print("get_framesULJ:",videosURL)
jobs = []
pipe_list = []
#print("VIDEOS:",videosURL)
#for video in videos:
for videoURL in videosURL: #urls:
recv_end, send_end = multiprocessing.Pipe(False)
print(recv_end, send_end)
p = multiprocessing.Process(target=get_frame2L, args=(videoURL, send_end, L))
#p = multiprocessing.Process(target=get_frame, args=(video, send_end, L))
#if (p==None): continue
print("P = ", p)
#time.sleep(0.001)
jobs.append(p)
print("JOBS, len", jobs, len(jobs))
pipe_list.append(recv_end)
print("pipe_list", pipe_list)
p.start()
#cv2.waitKey(0)
#for proc in jobs:
# proc.join()
#frames = [x.recv() for x in pipe_list]
#return frames
#cv2.waitKey(0)
return jobs, pipe_list
def get_frame2L(videoURL, send_end, L):
v = cv2.VideoCapture()
#[video.open(best.url)
#L.acquire()
v.open(videoURL)
print("get_frame2", videoURL, v, send_end)
#cv2.waitKey(0)
while True:
ret, frame = v.read()
if ret: send_end.send(frame); #cv2.imshow("FRAME", frame); cv2.waitKey(1)
else: print("NOT READ!"); break
#send_end.send(v.read()[1])
#L.release()
def get_framesUL(videosURL, L):
# frames = [video.read()[-1] for video in videos]
jobs = []
pipe_list = []
print("VIDEOS:",videosURL)
#for video in videos:
for videoURL in videosURL: #urls:
recv_end, send_end = multiprocessing.Pipe(False)
print(recv_end, send_end)
p = multiprocessing.Process(target=get_frame2L, args=(videoURL, send_end, L))
#p = multiprocessing.Process(target=get_frame, args=(video, send_end, L))
#if (p==None): continue
print("P = ", p)
#time.sleep(0.001)
jobs.append(p)
print("JOBS, len", jobs, len(jobs))
pipe_list.append(recv_end)
print("pipe_list", pipe_list)
p.start()
for proc in jobs:
proc.join()
frames = [x.recv() for x in pipe_list]
return frames
def get_frames(videos, L):
# frames = [video.read()[-1] for video in videos]
jobs = []
pipe_list = []
print("VIDEOS:",videos)
for video in videos:
recv_end, send_end = multiprocessing.Pipe(False)
print(recv_end, send_end)
p = multiprocessing.Process(target=get_frame, args=(video, send_end, L))
#p = multiprocessing.Process(target=get_frame, args=(video, send_end, L))
#if (p==None): continue
print("P = ", p)
#time.sleep(0.001)
jobs.append(p)
print("JOBS, len", jobs, len(jobs))
pipe_list.append(recv_end)
print("pipe_list", pipe_list)
p.start()
for proc in jobs:
proc.join()
frames = [x.recv() for x in pipe_list]
return frames
def get_frame(video, send_end, L):
L.acquire()
print("get_frame", video, send_end)
send_end.send(video.read()[1])
L.release()
# send_end.send(cv2.resize(video.read()[1], (dim[0] // width, dim[1] // width)))
def get_frame2(videoURL, send_end):
v = video.open(videoURL)
while True:
ret, frame = v.read()
if ret: send_end.send(frame)
else: break
def merge_frames(frames: typing.List[np.ndarray]):
#cv2.imshow("FRAME0", frames[0]) ########## not images/small
#cv2.imshow("FRAME1", frames[1]) ##########
#cv2.imshow("FRAME2", frames[2]) ##########
#cv2.imshow("FRAME3", frames[3]) ##########
#cv2.waitKey(1)
width = np.math.ceil(np.sqrt(len(frames)))
rows = []
for row in range(width):
i1, i2 = width * row, width * row + width
rows.append(np.hstack(frames[i1: i2]))
return np.vstack(rows)
if __name__ == '__main__':
main()
EDIT #1 IDEA: Creating one process per video stream and reading it in a loop (pumping in a pipe), instead of a new process for each frame, and/thus opening the videos/VideoCapture objects with a videoURL through the pipe, instead of sending a VideoCapture object. (I don't know if it has the same pickle issue in this form)
...
in main:
bestURLS = []
proc, pipes = get_framesULJ(bestURLS, LOCK)
[bestURLS.append(best.url) for best in streams]
def get_frame2(videoURL, send_end):
v = video.open(videoURL)
while True:
ret, frame = v.read()
if ret: send_end.send(video)
else: break
def get_framesULJ(videosURL, L): #return the processes, join in main and read the frames there
print("get_framesULJ:",videosURL)
jobs = []
pipe_list = []
for videoURL in videosURL:
recv_end, send_end = multiprocessing.Pipe(False)
print(recv_end, send_end)
p = multiprocessing.Process(target=get_frame2L, args=(videoURL, send_end, L))
print("P = ", p)
jobs.append(p)
print("JOBS, len", jobs, len(jobs))
pipe_list.append(recv_end)
print("pipe_list", pipe_list)
p.start()
return jobs, pipe_list
Original answer:
<multiprocessing.connection.PipeConnection object at 0x000000000D3C7D90> <multip
rocessing.connection.PipeConnection object at 0x000000000D3BD2E0>
Traceback (most recent call last):
File "y.py", line 104, in <module>
main()
File "y.py", line 48, in main
frames = get_frames(videos)
File "y.py", line 80, in get_frames
p.start()
File "C:\Program Files\Python38\lib\multiprocessing\process.py", line 121, in
start
self._popen = self._Popen(self)
File "C:\Program Files\Python38\lib\multiprocessing\context.py", line 224, in
_Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Program Files\Python38\lib\multiprocessing\context.py", line 326, in
_Popen
return Popen(process_obj)
File "C:\Program Files\Python38\lib\multiprocessing\popen_spawn_win32.py", lin
e 93, in __init__
reduction.dump(process_obj, to_child)
File "C:\Program Files\Python38\lib\multiprocessing\reduction.py", line 60, in
dump
ForkingPickler(file, protocol).dump(obj)
TypeError: cannot pickle 'cv2.VideoCapture' object
Z:\>Traceback (most recent call last):
File "<string>", line 1, in <module>
File "C:\Program Files\Python38\lib\multiprocessing\spawn.py", line 116, in sp
awn_main
exitcode = _main(fd, parent_sentinel)
File "C:\Program Files\Python38\lib\multiprocessing\spawn.py", line 126, in _m
ain
self = reduction.pickle.load(from_parent)
EOFError: Ran out of input
It fails before p.start.
The instances are created and the structures seems OK:
VIDEOS: [<VideoCapture 000000000D418710>, <VideoCapture 000000000D4186F0>, <Vide
oCapture 000000000D418B70>]
<multiprocessing.connection.PipeConnection object at 0x000000000D3C3D90> <multip
rocessing.connection.PipeConnection object at 0x000000000D3B62E0>
P = <Process name='Process-1' parent=8892 initial>
JOBS, len [<Process name='Process-1' parent=8892 initial>] 1
RECV_END <multiprocessing.connection.PipeConnection object at 0x000000000D3C3D90
>
See the module pickle:
https://docs.python.org/3/library/pickle.html
It seems not everything can be "pickled".
What can be pickled and unpickled?
The following types can be pickled:
None, True, and False
integers, floating point numbers, complex numbers
strings, bytes, bytearrays
tuples, lists, sets, and dictionaries containing only picklable objects
functions defined at the top level of a module (using def, not lambda)
built-in functions defined at the top level of a module
classes that are defined at the top level of a module
instances of such classes whose __dict__ or the result of calling __getstate__() is picklable (see section Pickling Class Instances for details).
Besides it seems there was a bug in opencv causing that. One of the solution given is to turn the multiprocessing off...
Python multiprocess can't pickle opencv videocapture object
https://github.com/MVIG-SJTU/AlphaPose/issues/164
Fang-Haoshu commented on 17 Oct 2018
This bug is due to multi-processing in opencv. --sp disable
multi-processing. BTW, can you tell me the version of opencv that you
are using?
I guess something about locked memory or something.
A workaround I would try would be to first dump the pixels of the object as plain data or raw something, maybe with a header about the size etc.
Also, in general, for smoother play I think some buffering needs to be added.
BTW, what version is your openCV? Mine is 4.2.0
Related
Python: Why reading file in multiple parallel processes is slower than in 1 single process/thread?
What could be the reason that Python multiprocessing is slower that a single thread while reading binary files? def getBinaryData(procnum, filename, pointer_from, pointer_to): binary_values = [] start = time.time() with open(filename, 'rb') as fileobject: # read file byte by byte fileobject.seek(pointer_from) data = fileobject.read(1) while data != b'' or pointer_position < pointer_to: #binary_values.append(ord(data)) data = fileobject.read(1) pointer_position = fileobject.tell() end = time.time() print("proc ", procnum, " finished in: ", end - start) return binary_values def worker(procnum, last_proc_num, file_path, bytes_chunk, return_dict): """worker function""" print(str(procnum) + " represent!") if procnum == 0: greyscale_data = getBinaryData(procnum, file_path, 0, bytes_chunk) elif procnum == last_proc_num: greyscale_data = getBinaryData(procnum, file_path, procnum * bytes_chunk, os.stat(file_path).st_size) else: greyscale_data = getBinaryData(procnum, file_path, procnum * bytes_chunk, (procnum+1) * bytes_chunk) size = get_size(len(greyscale_data)) return_dict[procnum] = procnum def main(): cpu_cores = 10 file_path = r"test_binary_file.exe" file_stats = os.stat(file_path) file_size = file_stats.st_size manager = multiprocessing.Manager() return_dict = manager.dict() jobs = [] for i in range(cpu_cores): p = multiprocessing.Process(target=worker, args=(i, cpu_cores-1, file_path, int(file_size/cpu_cores), return_dict)) jobs.append(p) p.start() for proc in jobs: proc.join() print(return_dict.values()) While single-threaded process finishes to read 10mb file in ~30seconds - the multiprocesses solution gets it done way slower. Python log output: 10 processes 1 process Ruled-out issues: IO bottleneck (NVMe SSD) CPU/RAM bottleneck (16 cores, 4.4 GHz / 64GB 3200GHz RAM)
Processes are heavy and it takes a lot of time to creates and ends a process so in my opinion reading the file is really fast but most of the time it takes to create and terminate the process. To reads a lot of file it's enought to use multithreading because thread are light and GIL works like True multithreading for i/o operations. It's recommended to use multiprocessing when you need to execute heavy operations. Source of picture: https://youtu.be/kRy_UwUhBpo?t=763 He told that the img is from fastpython.com.
cannot start a python multiprocessing.Process twice
Below given is my code. I am trying to scan barcodes and display it using OpenCV. The program works well but there is a huge lag in fps when grabbing frames from drone camera as RTMP stream. Due to the same I am trying to use multi processing method. import pandas as pd import cv2 import numpy as np from pyzbar.pyzbar import decode from pyzbar.pyzbar import ZBarSymbol import time import multiprocessing global frame def barcode(frame): for barcode in decode(frame, symbols=[ZBarSymbol.CODE128]): myData = barcode.data.decode('utf-8') pts = np.array([barcode.polygon],np.int32) pts = pts.reshape((-1,1,2)) cv2.polylines(frame, [pts], True, (255,0,255),5) pts2 = barcode.rect akash = [] akash.append(myData) cv2.putText(frame, myData, (pts2[0], pts2[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255,99,71), 2) p1 = multiprocessing.Process(target = barcode) cv2.namedWindow("Result", cv2.WINDOW_NORMAL) vid = cv2.VideoCapture(0) if __name__ == '__main__': while(True): ret, frame = vid.read() if frame is not None: p1.start() cv2.imshow('Result',frame) if cv2.waitKey(1) & 0xFF == ord('q'): break vid.release() vid.destroyAllWindows() and the error is AssertionError Traceback (most recent call last) <ipython-input-1-df50d7c70cda> in <module> 27 ret, frame = vid.read() 28 if frame is not None: ---> 29 p1.start() 30 cv2.imshow('Result',frame) 31 if cv2.waitKey(1) & 0xFF == ord('q'): C:\ProgramData\Anaconda3\lib\multiprocessing\process.py in start(self) 113 ''' 114 self._check_closed() --> 115 assert self._popen is None, 'cannot start a process twice' 116 assert self._parent_pid == os.getpid(), \ 117 'can only start a process object created by current process' AssertionError: cannot start a process twice
Try not to create processes inside loops. The best way to use processes is to create n processes outside and then, with the help of Queues, access and push data. In the following code, I created 5 processes which would run infinitely and try to fetch data from inQ queue. Then do all the processing that you were doing. After that, I'm pushing it to outQ queue, which we'll use later to show the results. In the main, I am simply reading the data from the opencv vid object and the pushing to the inQ which our Processes will use to fetch frame. Next, I'm just fetching the results. This way appears better to me as we don't have to create processes in every iteration as well as we have multiple processes ready to process the data at all times. You can also set the buffer limit for the queue if you want. Also, with live streams, try to have a skipFrame parameter to skip a few frames. That would boost up the fps. import cv2 import numpy as np from pyzbar.pyzbar import decode from pyzbar.pyzbar import ZBarSymbol import time from multiprocessing import Process, Queue inQ = Queue() outQ = Queue() def barcode(): global inQ global outQ try: print("Solving..") frame = inQ.get() for barcode in decode(frame, symbols=[ZBarSymbol.CODE128]): myData = barcode.data.decode('utf-8') pts = np.array([barcode.polygon],np.int32) pts = pts.reshape((-1,1,2)) cv2.polylines(frame, [pts], True, (255,0,255),5) pts2 = barcode.rect akash = [] akash.append(myData) cv2.putText(frame, myData, (pts2[0], pts2[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255,99,71), 2) outQ.put(frame) except Exception as e: print(e) for _ in range(5): # configure yourself Process(target = barcode).start() cv2.namedWindow("Result", cv2.WINDOW_NORMAL) if __name__ == '__main__': print("Inside main") vid = cv2.VideoCapture(0) while vid.isOpened(): print("While...") ret, frame = vid.read() if ret: try: inQ.put(frame) except Exception as e: print(e) try: output = outQ.get() cv2.imshow("Result", output) except Exception as e: print(e) vid.release() vid.destroyAllWindows()
Saving images faster on disk
I have developed a Python multi-thread program, one producer thread which acquires frames (512 x 640, 16uint) at high fps (around 75 fps), and two consumer-threads, one for real-time visualization and other for saving as 16-bit tiff. For each one of the consumers, I use a different queue. Visualizing at real-time works fine, but saving takes so much time after the video is stoped ( even 20 seconds for a 2-minute recording). For saving, I used tifffile library or cv2, with similar performances. UPDATED info The images are gray-scale 16-bit numpy arrays directly placed in the queue, not compression, saved using tifffile.imsave. The second queue for visualization works properly on real-time, so saving must be the slowest process. I need to save each image independently, saving 3D is not an option for the time being. Using different threads for saving may ruin my acquisition order. Is there any way, both in Python and/or OS (windows10) to accelerate the process, taking into account I need to save them in the same order they were recorded? I have a SSD 970 EVO disk drive class VideoGet(): def __init__(self,input_dict,folder,record): self.handle=input_dict['handle'] self.frame_t=input_dict['frametype'] self.frameSize= input_dict['frameSize'] # self.buffer = np.zeros(shape=(513,640), dtype=np.uint16) self.record = record self.save = False self.done=False self.counter=0 self.folder = folder def displayer(self,q2): while self.record is True: if q2.empty() is True: pass else: framedisplay = q2.get() cv2.namedWindow("Video", cv2.WINDOW_NORMAL) cv2.imshow("Video", framedisplay[:-1,:]) cv2.waitKey(1) q2.task_done() cv2.destroyAllWindows() def consumer(self,q): data=[] while True: if q.empty(): time.sleep(0.002) pass else: frame_get = q.get() if frame_get is None: print('gone') break imsave(os.path.join(self.folder,(str(self.counter).zfill(5)+'.tiff')), frame_get[:-1,:]) if self.counter==0: TS=1.0e-3 *struct.unpack('Q',(frame_get[-1,6:10]).tobytes())[0] entr=[str(self.counter).zfill(5),str(round(1.0e3*(1.0e-3 *struct.unpack('Q',(frame_get[-1,6:10]).tobytes())[0]-TS)))] data.append(entr) self.counter=self.counter+1 q.task_done() if data: df = pd.DataFrame(data, columns = ['Picture', 'Timestamp']) df.to_csv(os.path.join(self.folder,'timestamps.txt'), header=False, index=False, sep=' ') print('done') self.done=True def producer(self,buffer,q,q2): while self.record is True: buffer=np.empty_like(buffer) if camera.properties.get_frame(self.handle,self.frame_t,4,buffer,self.frameSize)==0: frame=buffer q2.put(frame) if self.save is True: q.put(frame) del frame print('None') q.put(None) def run(self,buffer,q,q2): prod_thread=Thread(target=self.producer,args=(buffer,q,q2,)) display_thread=Thread(target=self.displayer,args=(q2,)) con_thread= Thread(target=self.consumer, args=(q,)) prod_thread.start() display_thread.start() con_thread.start()
It's hard to say what's going wrong when you don't show your code. Also, it is not clear to me why the acquisition order would change if you have multiple writers. Here is a script to generate synthetic frames the same size as your images and save them as TIFF files, in order. It scales up in speed pretty linearly with more writer threads: NFRAMES NWRITERS TIME(s) 1000 1 1.48 1000 2 0.78 1000 4 0.48 #!/usr/bin/env python3 import time import numpy as np import threading, queue from tifffile import imsave def writer(q): print('[WRITER] Started') total = 0 while True: (frameNum, im) = q.get() if frameNum < 0: break # Save as TIFF imsave(f'frame-{frameNum}.tif', im) total += 1 print(f'[WRITER] Complete: wrote {total} frames') if __name__ == "__main__": # Edit these to suit NFRAMES = 1000 NWRITERS= 4 # Create dummy image of correct size h, w = 640, 512 im = np.random.randint(0, 65536, (h,w), dtype=np.uint16) # Create a queue to pass frames to writer(s) q = queue.Queue(16) print('[MAIN] Started') start = time.time() # Create and start writer thread(s) threads = [] for _ in range(NWRITERS): t = threading.Thread(target=writer, args=(q,)) t.start() threads.append(t) # Generate a large number of frames to store for frameNum in range(NFRAMES): # Put a tuple of frameNum and image in queue q.put((frameNum, im)) # Sentinel to tell each writer to exit for _ in range(NWRITERS): q.put((-1,-1)) # Wait for our writer thread(s) to exit for thread in threads: thread.join() elapsed = time.time() - start; print(f'[MAIN] Complete: {NFRAMES} frames, with {NWRITERS} writers in {elapsed} seconds') Sample Output [MAIN] Started [WRITER] Started [WRITER] Started [WRITER] Started [WRITER] Started [WRITER] Complete: wrote 250 frames [WRITER] Complete: wrote 250 frames [WRITER] Complete: wrote 250 frames [WRITER] Complete: wrote 250 frames [MAIN] Complete: 1000 frames, with 4 writers in 0.4869719505310059 seconds One thing I noticed, is that it runs about 10% faster if you replace tifffile.imsave() with: np.save(f'frame-{frameNum}.npy', im)
Python multiprocessing hangs at join
I am reading a video file such that out for every 20 frames I'm storing first frames in Input Queue. Once I get all the required frames in Input Queue, then I run multiple processes to perform some operation on these frames and store the results in output queue. But the code always stuck at join, I tried different solutions proposed for such problems but none of them seems to work. import numpy as np import cv2 import timeit import face_recognition from multiprocessing import Process, Queue, Pool import multiprocessing import os s = timeit.default_timer() def alternative_process_target_func(input_queue, output_queue): while not output_queue.full(): frame_no, small_frame, face_loc = input_queue.get() print('Frame_no: ', frame_no, 'Process ID: ', os.getpid(), '----', multiprocessing.current_process()) #canny_frame(frame_no, small_frame, face_loc) #I am just storing frame no for now but will perform something else later output_queue.put((frame_no, frame_no)) if output_queue.full(): print('Its Full ---------------------------------------------------------------------------------------') else: print('Not Full') print(timeit.default_timer() - s, ' seconds.') print('I m not reading anymore. . .', os.getpid()) def alternative_process(file_name): start = timeit.default_timer() cap = cv2.VideoCapture(file_name) frame_no = 1 fps = cap.get(cv2.CAP_PROP_FPS) length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) print('Frames Per Second: ', fps) print('Total Number of frames: ', length) print('Duration of file: ', int(length / fps)) processed_frames = 1 not_processed = 1 frames = [] process_this_frame = True frame_no = 1 Input_Queue = Queue() while (cap.isOpened()): ret, frame = cap.read() if not ret: print('Size of input Queue: ', Input_Queue.qsize()) print('Total no of frames read: ', frame_no) end1 = timeit.default_timer() print('Time taken to fetch useful frames: ', end1 - start) threadn = cv2.getNumberOfCPUs() Output_Queue = Queue(maxsize=Input_Queue.qsize()) process_list = [] #quit = multiprocessing.Event() #foundit = multiprocessing.Event() for x in range((threadn - 1)): # print('Process No : ', x) p = Process(target=alternative_process_target_func, args=(Input_Queue, Output_Queue))#, quit, foundit #p.daemon = True p.start() process_list.append(p) #p.join() # for proc in process_list: # print('---------------------------------------------------------------', proc.p) i = 1 for proc in process_list: print('I am hanged here') proc.join() print('I am done') i += 1 end = timeit.default_timer() print('Time taken by face verification: ', end - start) break if process_this_frame: print(frame_no) small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25) rgb_small_frame = small_frame[:, :, ::-1] face_locations = face_recognition.face_locations(rgb_small_frame) # frames.append((rgb_small_frame, face_locations)) Input_Queue.put((frame_no, rgb_small_frame, face_locations)) frame_no += 1 if processed_frames < 5: processed_frames += 1 not_processed = 1 else: if not_processed < 15: process_this_frame = False not_processed += 1 else: processed_frames = 1 process_this_frame = True print('-----------------------------------------------------------------------------------------------') cap.release() cv2.destroyAllWindows() alternative_process('user_verification_2.avi')
As the documentation on Process.join() says, hanging (or "blocking") is exactly what is expected to happen: Block the calling thread until the process whose join() method is called terminates or until the optional timeout occurs. join() stops current thread until the target process finishes. Target process is calling alternative_process_target_func, so the problem is obviously in that function. It never finishes. There may be more than one reason for that. Problem 1 alternative_process_target_func runs until output_queue.full(). What if it is never full? It never ends? It is really better to determine the end some other way, e.g. run until the input queue is empty. Problem 2 input_queue.get() will block if the input queue is empty. As the documentation says: Remove and return an item from the queue. If optional args block is true and timeout is None (the default), block if necessary until an item is available. You are running multiple processes, so do not expect that there is something in input just because output_queue.full() was False a moment ago, and because input size is the same as output size. A lot could have happened in the meantime. What you want to do is: try: input_queue.get(False) # or input_queue.get_nowait() except Empty: break # stop when there is nothing more to read from the input Problem 3 output_queue.put((frame_no, frame_no)) will block if there is no room in the output to store the data. Again, you are assuming that there is room in output, just because you checked output_queue.full() a few moments ago, and because input size is equal to output size. Never rely on such things. You want to do the same thing as for input: try: output_queue.put((frame_no, frame_no), False) # or output_queue.put_nowait((frame_no, frame_no)) except Empty: # deal with this somehow, e.g. raise Exception("There is no room in the output queue to write to.")
reading video frames in parallel with opencv and Python
I have to read frames from a video in parallel with multiprocessing & queues using opencv in Python and I'm getting an error with my code. This is my code, and I don't know where my problem is. #! /usr/bin/python import numpy as np import cv2 import multiprocessing as mp import time def read_frames(q1,q2): while True : frame = q1.get() if frame=='Done': break R=frame[:,:,0] G=frame[:,:,1] B=frame[:,:,2] y = (np.uint8)((0.299 * R)+ (0.587 *G) +( 0.114 *B)) q2.put(y) if __name__ == '__main__': q1 = mp.Queue() q2 = mp.Queue() processes =[mp.Process(target=read_frames, args= (q1,q2)) for i in rang$ for p in processes: p.start() # feed the processes # read input file and send to the processes the frames: cap = cv2.VideoCapture('gou.avi') lines = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) cols = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) fps = int(cap.get(cv2.CAP_PROP_FPS)) fourcc_ver = int(cap.get(cv2.CAP_PROP_FOURCC)) out = cv2.VideoWriter('output.avi',fourcc_ver, fps, (cols,lines),False) # y = np.empty(shape=(lines,cols),dtype=np.uint8) while(cap.isOpened()): ret, frame = cap.read() # as long as new frames are there if ret==True: q1.put(frame) if cv2.waitKey(1) & 0xFF == ord('q'): break else: break q1.put('Done') for p in processes: p.join() for p in processes: result=[q2.get()] # result.sort() # results = [r[1] for r in results] for i in result: out.write(i) # Release everything if job is finished cap.release() out.release() cv2.destroyAllWindows()
What you could do is add all the frames to a list[] and then use list references such as frame = list[] and then address frames by location on the list: frame[0] or frame[1:4]