yolov5 reduce the frame rate while using the camera as a source? - python

I am working on a project to predict the pedestrian path using a history and so on so i am using this paper and trying to run and implement the missing annotation in this
https://github.com/JunweiLiang/Multiverse/blob/master/SimAug
so i used yolov5 to solve this issue but i found that the fps is 30 and it takes a long time to process all these frames so i wanted to decrease the fps so for example it take only 2 frames per second and run the pipeline
(detection --> tracking) --> segmentation --> prediction and so on.
so basically here is what i did
import numpy as np
import cv2
import datetime
import queue
from threading import Thread
# global variables
stop_thread = False # controls thread execution
def start_capture_thread(cap, queue):
# global stop_thread
i=0
# continuously read fames from the camera
while True:
_, img = cap.read()
queue.put(img)
# cv2.imwrite('Images/frame{:d}.jpg'.format(i), img)
i=i+1
# if (stop_thread):
# break
def main():
global stop_thread
# create display window
cv2.namedWindow("webcam", cv2.WINDOW_NORMAL)
# initialize webcam capture object
cap = cv2.VideoCapture(0)
#cap = cv2.VideoCapture(0 + cv2.CAP_DSHOW)
# retrieve properties of the capture object
cap_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
cap_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
cap_fps = cap.get(cv2.CAP_PROP_FPS)
print('* Capture width:', cap_width)
print('* Capture height:', cap_height)
print('* Capture FPS:', cap_fps)
# create a queue
frames_queue = queue.Queue(maxsize=0)
# start the capture thread: reads frames from the camera (non-stop) and stores the result in img
t = Thread(target=start_capture_thread, args=(cap, frames_queue,), daemon=True) # a deamon thread is killed when the application exits
t.start()
# initialize time and frame count variables
last_time = datetime.datetime.now()
frames = 0
cur_fps = 0
i=0
while (True):
if (frames_queue.empty()):
continue
if i%5 !=0 :
_ = frames_queue.get()
i+=1
continue
# blocks until the entire frame is read
frames += 1
# # measure runtime: current_time - last_time
# delta_time = datetime.datetime.now() - last_time
# elapsed_time = delta_time.total_seconds()
# # compute fps but avoid division by zero
# if (elapsed_time != 0):
# cur_fps = np.around(frames / elapsed_time, 1)
# retrieve an image from the queue
img = frames_queue.get()
cv2.imwrite('Images/frame{:d}.jpg'.format(i), img)
i+=1
# for i in range(10):
# _ = frames_queue.get()
# i+=1
# TODO: process the image here if needed
# draw FPS text and display image
# if (img is not None):
# cv2.putText(img, 'FPS: ' + str(cur_fps), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2, cv2.LINE_AA)
# cv2.imshow("webcam", img)
# wait 1ms for ESC to be pressed
key = cv2.waitKey(1)
if (key == 27):
stop_thread = True
break
# release resources
cv2.destroyAllWindows()
cap.release()
if __name__ == "__main__":
main()
and it works so i want to use this in the track.py in yolov5 instead of the
dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt and not jit)
but i can't modify it as it used in the code like this
for frame_idx, (path, img, im0s, vid_cap, s) in enumerate(dataset):
but i couldn't full understand this line
so i wanted to modify the LoadStreams function in here https://github.com/ultralytics/yolov5/blob/master/utils/datasets.py but i am kinda stuck here

Related

failing to save video after applying saliency

I am attempting to save the output of the code proposed here: https://www.programmersought.com/article/34317272452/
The output video is created, but when I try to reproduce the result, there is this message: 'an error occurred could not demultiplex stream'. Also, I have tried several forms to save the video; for example, I have used the shape of the frames, but it neither recognizes them. Could anyone help to figure out where I am doing wrong?
def read_video(video):
# Saliency detection algorithm
saliency_algorithm = "BinWangApr2014"
start_frame = 0
if saliency_algorithm is None or video is None:
print('Please set saliency_algorithm and video')
return
cap = cv2.VideoCapture(video)
video = cv2.VideoWriter('out_j.avi',cv2.VideoWriter_fourcc(*'XVID'), 5, (360,360))
# Set the video start frame
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
_, frame = cap.read()
if frame is None:
print('Please set saliency_algorithm and video')
return
image = frame.copy()
if saliency_algorithm.find("BinWangApr2014")==0:
saliency_algorithm = cv2.saliency.MotionSaliencyBinWangApr2014_create()
# set the size of the data structure
saliency_algorithm.setImagesize(image.shape[1], image.shape[0])
# Initialization
saliency_algorithm.init()
paused = False
while True:
if not paused:
_, frame = cap.read()
if frame is None:
break
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
start = cv2.getTickCount()
success, saliencyMap = saliency_algorithm.computeSaliency(frame)
duration = (cv2.getCPUTickCount() - start)/ \
cv2.getTickFrequency()
#print("computeBinaryMap cost time is: {} ms".format(duration * 1000))
video.write(frame)
#cv2.imshow('image', frame)
cv2.imshow('saliencyMap', saliencyMap*255)
c = cv2.waitKey(2)
c = chr(c) if c !=-1 else 0
if c == 'q':
break
if c == 'p':
paused = not paused
#video.write(frame)
cv2.destroyAllWindows()
video.release()
#camera.stop()
return
Thanks in advance

How to perform Real Time Yolo3 Object Detection from 4 cameras streaming asynchronous

I have implemented a python app that performs real time car_model and license plate recognition from a camera using yolo3 / opencv- gpu. Frames are captured in Threading having a descent fps.
I am trying to update it by getting streaming from 4 cameras showing the frames to a pysimplegui window. However I can't figure out how to make stream from the cameras run in parallel. It seems that each stream is waiting from the previous one to finish in order to stream a frame, so the overall fps, drops to 1/4.
if __name__ == '__main__':
Results.num_cam = 2
# initialize the video streams and allow them to warmup
img_size_x, img_size_y = 1280, 800
window = create_window(img_size_x, img_size_y,Results.num_cam)
print("[INFO] starting cameras...")
prev_frame_time = 0
prev_frame_time = 0
if Results.num_cam == 2:
webcam1 = VideoStream(src='rtsp://admin:VVZOZR#192.168.1.11:554/11').start()
webcam2 = VideoStream(src='rtsp://admin:VVZOZR#192.168.1.11:554/11').start()
else:
webcam1 = VideoStream(src='rtsp://admin:VVZOZR#192.168.1.11:554/11').start()
webcam2 = VideoStream(src='rtsp://admin:VVZOZR#192.168.1.11:554/11').start()
webcam3 = VideoStream(src='rtsp://admin:VVZOZR#192.168.1.11:554/11').start()
webcam4 = VideoStream(src='rtsp://admin:VVZOZR#192.168.1.11:554/11').start()
time.sleep(2.0)
Results.car_m, Results.car_l, _ = yolo_cars()
Results.pl_m, Results.pl_l, _ = yolo_plate()
# initialize the two motion detectors, along with the total
# number of frames read
results = Results()
total = 0
while True:
# initialize image elements objects
if Results.num_cam == 2:
cam_view = window['-FRAME1-']
cam1_view = window['-FRAME2-']
else:
cam_view = window['-FRAME1-']
cam1_view = window['-FRAME2-']
cam2_view = window['-FRAME3-']
cam3_view = window['-FRAME4-']
# initialize the list of frames that have been processed
frames = []
event, ts = window.read(timeout=0.05)
t1 = time.perf_counter()
if event == sg.WINDOW_CLOSED:
break
# loop over the frames and their respective motion detectors
if Results.num_cam == 2:
res_all = (results, results)
web_all = (webcam1, webcam2)
else:
res_all = (results, results, results, results)
web_all = (webcam1, webcam2, webcam3, webcam4)
for (stream, res) in zip(web_all, res_all):
# read the next frame from the video stream and resize
frame = stream.read()
res_inst = res.process_frame(frame,True)
if total < 32:
frames.append(res_inst.frame)
continue
# update the frames list
frames.append(res_inst.frame)
# increment the total number of frames read and grab the
# current timestamp
total += 1
if Results.num_cam == 2:
for (frame, name) in zip(frames, (cam_view, cam1_view)): # , 'Webcam3', 'Webcam4')):
name.update(frame)
else:
for (frame, name) in zip(frames, (cam_view, cam1_view, cam2_view, cam3_view)):
name.update(frame)
new_frame_time = time.time()
fps = 1 / (new_frame_time - prev_frame_time)
prev_frame_time = new_frame_time
fps = int(fps)
window['-FR-'].update(str(fps))
# do a bit of cleanup
print("[INFO] cleaning up...")
for web in web_all:
web.stop()`.

How to prompt an input box when an object is detected in real time and have the output printed in a textfile? (Tensorflow Object Detection)

I'm using Tensorflow object detection API with OpenCV to detect objects in real-time using webcam.
NOTE: I'm using Tensorflow 2.4.1.
How can I display an input box when an object is detected in real-time and then once the user has input his answer, the class name and the user's answer are saved in a text file (.txt)?
For example: once the webcam detects the class 'banana', an input box is prompted asking the user, "How many banana do you want to input?". Then, the user enters 2. In the text file the output will be: banana = 2.
Here's the code for real-time detection for reference:
import cv2
import numpy as np
category_index = label_map_util.create_category_index_from_labelmap(ANNOTATION_PATH+'/label_map.pbtxt')
cap = cv2.VideoCapture(0)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
while True:
ret, frame = cap.read()
image_np = np.array(frame)
input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
detections = detect_fn(input_tensor)
num_detections = int(detections.pop('num_detections'))
detections = {key: value[0, :num_detections].numpy()
for key, value in detections.items()}
detections['num_detections'] = num_detections
# detection_classes should be ints.
detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
label_id_offset = 1
image_np_with_detections = image_np.copy()
viz_utils.visualize_boxes_and_labels_on_image_array(
image_np_with_detections,
detections['detection_boxes'],
detections['detection_classes']+label_id_offset,
detections['detection_scores'],
category_index,
use_normalized_coordinates=True,
max_boxes_to_draw=3,
min_score_thresh=.9,
agnostic_mode=False)
cv2.imshow('object detection', cv2.resize(image_np_with_detections, (800, 600)))
if cv2.waitKey(1) & 0xFF == ord('q'):
cap.release()
break
Thank you!
By directly running this simplified example code:
import cv2
import numpy as np
import keyboard
#category_index = label_map_util.create_category_index_from_labelmap(ANNOTATION_PATH+'/label_map.pbtxt')
cap = cv2.VideoCapture(0)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
while True:
ret, frame = cap.read()
image_np = np.array(frame)
#input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
#detections = detect_fn(input_tensor)
#num_detections = int(detections.pop('num_detections'))
#detections = {key: value[0, :num_detections].numpy()
# for key, value in detections.items()}
#detections['num_detections'] = num_detections
# detection_classes should be ints.
#detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
#label_id_offset = 1
image_np_with_detections = image_np.copy()
#viz_utils.visualize_boxes_and_labels_on_image_array(
# image_np_with_detections,
# detections['detection_boxes'],
# detections['detection_classes']+label_id_offset,
# detections['detection_scores'],
# max_boxes_to_draw=3,
# min_score_thresh=.9,
# agnostic_mode=False)
if np.random.random()<0.05:
if np.random.random()>0.5:
detected_item='Banana'
else:
detected_item='Carrot'
print(detected_item + " found!")
cv2.imshow(detected_item,image_np_with_detections)
#cv2.moveWindow('object detection',100,50)
#cv2.moveWindow('something',100+660,50)
cv2.waitKey(0)
#get the user input:
user_reaction=keyboard.read_key()
print(user_reaction)
#and save it to a text file:
tiedostonimi="jotain" + str(np.random.randint(10000,20000)) + ".txt"
tekstitiedosto=open(tiedostonimi,"w")
teksti="We found" + detected_item + " and ate those " + user_reaction + "...brp!"
tekstitiedosto.write(teksti)
tekstitiedosto.close()
print("...text file saved!")
cv2.imshow('object detection', cv2.resize(image_np_with_detections, (800, 600)))
if cv2.waitKey(1) & 0xFF == ord('q'):
cap.release()
break
...you will directly see how every time you get a new detection a new window appears, and the system waits your response, and saves the result in .txt -format. By adopting this example in an appropriate way you find a solution for you problem.
The below-attached stand-alone code shows how to make an "assistant window" to make easily advanced interactive operations or the like with an usual cv2 window. That kind of approach is useful when the basic functionalities of cv2 window are not enough.
To begin "monitoring the cv2 input" simply start the program and you see the "assistant window"...
...then first press "Start" ...and the cv2 -window apperas, and when a detection is done it looks like that:
...and when you enter user input (like numbers or text or whatever) and then press "OK", the file is saved like this:
Next press "Continue search" and the search process for new detections continues.
It is easy to modify the "CV2 assistant-idea" for different kinds of processes alongside with ordinary CV2 image processing.
import cv2
import numpy as np
import keyboard
#let's add some tkinter-functionality for advanced inputs
import tkinter
from tkinter import *
import matplotlib.pyplot as plt
#let's make some global variables
global image_np_with_detections
global detected_item
global detection_box_xy
global ikkuna_sijainti
global cv_ikkuna_sijainti
global cap
#initialize the location of the cv image screen...
cv_ikkuna_sijainti=[500,200]
#helpful function...
def change_position(root_variable,x,y):
root_variable.geometry("+{}+{}".format(x,y))
root_variable.update()
#let's make a tkinter window...
ikkuna=tkinter.Tk()
ikkuna.title('CV2 assistant window...')
ikkuna_sijainti=[cv_ikkuna_sijainti[0],cv_ikkuna_sijainti[1]-150]
change_position(ikkuna,ikkuna_sijainti[0],ikkuna_sijainti[1])
#category_index = label_map_util.create_category_index_from_labelmap(ANNOTATION_PATH+'/label_map.pbtxt')
def pollaus():
global image_np_with_detections
global detected_item
global detection_box
global ikkuna_sijainti
global cv_ikkuna_sijainti
global cap
cap = cv2.VideoCapture(0)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
painike_1['text']='Search'
while True:
ret, frame = cap.read()
image_np = np.array(frame)
#let's ask assistant window to vibrate during search...
ikkuna_sijainti=[cv_ikkuna_sijainti[0]+np.random.randint(0,10),cv_ikkuna_sijainti[1]-150]
change_position(ikkuna,ikkuna_sijainti[0],ikkuna_sijainti[1])
#input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
#detections = detect_fn(input_tensor)
#num_detections = int(detections.pop('num_detections'))
#detections = {key: value[0, :num_detections].numpy()
# for key, value in detections.items()}
#detections['num_detections'] = num_detections
# detection_classes should be ints.
#detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
#label_id_offset = 1
image_np_with_detections = image_np.copy()
#viz_utils.visualize_boxes_and_labels_on_image_array(
# image_np_with_detections,
# detections['detection_boxes'],
# detections['detection_classes']+label_id_offset,
# detections['detection_scores'],
# max_boxes_to_draw=3,
# min_score_thresh=.9,
# agnostic_mode=False)
if np.random.random()<0.05:
if np.random.random()>0.5:
detected_item='Banana'
else:
detected_item='Carrot'
print(detected_item + " found!")
detection_box=[np.random.randint(100,200),np.random.randint(100,200),np.random.randint(300,400),np.random.randint(300,400)]
alkunurkka=(detection_box[0],detection_box[1])
loppunurkka=(detection_box[2],detection_box[3])
image_np=cv2.rectangle(image_np,alkunurkka,loppunurkka,(0,255,0),2)
#paikka=(alkunurkka,loppunurkka)
vari=(255,0,0)
fontti=cv2.FONT_HERSHEY_SIMPLEX
fontScale=1
image_np=cv2.putText(image_np,detected_item,alkunurkka,fontti,fontScale,vari,2,cv2.LINE_AA)
cv2.imshow('object detection', cv2.resize(image_np, (800, 600)))
painike_1['text']='Continue search'
cap.release()
break
else:
cv2.imshow('object detection', cv2.resize(image_np, (800, 600)))
cv2.moveWindow('object detection',cv_ikkuna_sijainti[0],cv_ikkuna_sijainti[1])
cv2.waitKey(1)
#after while loop let's ask user for the input...
kysymys()
def kysymys():
global image_np_with_detections
global detected_item
global detection_box
global ikkuna_sijainti
global cv_ikkuna_sijainti
#let's set the assistant window to correct position according to detected item..
apu_x=cv_ikkuna_sijainti[0]+detection_box[0]-70
apu_y=cv_ikkuna_sijainti[1]+detection_box[1]-120
change_position(ikkuna,apu_x,apu_y)
#and let's put the assistant window on top...
ikkuna.lift()
def tallennatiedot():
print("Begin to save a file...")
user_reaction=syote.get()
#and save it to a text file:
tiedostonimi="jotain" + str(np.random.randint(10000,20000)) + ".txt"
tekstitiedosto=open(tiedostonimi,"w")
teksti="We found " + detected_item + " and ate those " + user_reaction + " ...brp!"
tekstitiedosto.write(teksti)
tekstitiedosto.close()
print("...text file saved!")
painike_1['text']='Search'
return
def lopeta():
global cap
cap.release()
print("Search stopped!")
painike_1=tkinter.Button(ikkuna,text="Start",command=pollaus,height=5,width=20)
painike_1.grid(row=0,column=0,pady=10,padx=10)
painike_1.config(height=2,width=20)
otsikko=tkinter.Label(ikkuna,text="Enter amount and press OK")
otsikko.grid(row=0,column=1,pady=5,padx=10)
painike_2=tkinter.Button(ikkuna,text="OK",command=tallennatiedot,height=5,width=20)
painike_2.grid(row=0,column=2,pady=10,padx=10)
painike_2.config(height=2,width=20)
painike_3=tkinter.Button(ikkuna,text="Stop",command=lopeta,height=5,width=20)
painike_3.grid(row=1,column=0,pady=10,padx=10)
painike_3.config(height=2,width=20)
syote=tkinter.Entry(ikkuna,width=20)
syote.grid(row=1,column=1,pady=10,padx=5)
ikkuna.mainloop()

Unable to save image from Raspi camera -> cv2.imwrite (Using Jetson Nano)

I am trying to save images from the Raspi cameras connected to my Jetson nano. My Code is below. However, the code shows that it is saving the files, but no matter which method I try I cannot find the images. Thanks for your help. I've included a smaller snippet of just the while loop itself so it will be easier for you all to refer to.
While loop:
while True:
_ , left_image=left_camera.read()
_ , right_image=right_camera.read()
camera_images = np.hstack((left_image, right_image))
cv2.imshow("CSI Cameras", camera_images)
t1 = datetime.now()
cntdwn_timer = countdown - int ((t1-t2).total_seconds())
# If cowntdown is zero - let's record next image
if cntdwn_timer == -1:
counter += 1
filename = './scenes/scene_'+ str(counter) + 'x'+'_'+ '.png'
#img = cv2.imread(camera_images)
#cv2.imwrite(os.path.join(os.path.expanduser('~'),'CSI-Camera', filename), camera_images)
cv2.imwrite('/home/aryan/CSI-Camera/{}'.format(filename), camera_images)
print (' monkey'+filename)
t2 = datetime.now()
time.sleep(1)
cntdwn_timer = 0 # To avoid "-1" timer display
next
# This also acts as
keyCode = cv2.waitKey(30) & 0xFF
# Stop the program on the ESC key
if keyCode == 27:
break
left_camera.stop()
left_camera.release()
right_camera.stop()
right_camera.release()
cv2.destroyAllWindows()
import cv2
import threading
import numpy as np
import time
from datetime import datetime
# gstreamer_pipeline returns a GStreamer pipeline for capturing from the CSI camera
# Flip the image by setting the flip_method (most common values: 0 and 2)
# display_width and display_height determine the size of each camera pane in the window on the screen
left_camera = None
right_camera = None
#PiCam
# Photo session settings
total_photos = 30 # Number of images to take
countdown = 5 # Interval for count-down timer, seconds
font=cv2.FONT_HERSHEY_SIMPLEX # Cowntdown timer font
class CSI_Camera:
def __init__ (self) :
# Initialize instance variables
# OpenCV video capture element
self.video_capture = None
# The last captured image from the camera
self.frame = None
self.grabbed = False
# The thread where the video capture runs
self.read_thread = None
self.read_lock = threading.Lock()
self.running = False
def open(self, gstreamer_pipeline_string):
try:
self.video_capture = cv2.VideoCapture(
gstreamer_pipeline_string, cv2.CAP_GSTREAMER
)
except RuntimeError:
self.video_capture = None
print("Unable to open camera")
print("Pipeline: " + gstreamer_pipeline_string)
return
# Grab the first frame to start the video capturing
self.grabbed, self.frame = self.video_capture.read()
def start(self):
if self.running:
print('Video capturing is already running')
return None
# create a thread to read the camera image
if self.video_capture != None:
self.running=True
self.read_thread = threading.Thread(target=self.updateCamera)
self.read_thread.start()
return self
def stop(self):
self.running=False
self.read_thread.join()
def updateCamera(self):
# This is the thread to read images from the camera
while self.running:
try:
grabbed, frame = self.video_capture.read()
with self.read_lock:
self.grabbed=grabbed
self.frame=frame
except RuntimeError:
print("Could not read image from camera")
# FIX ME - stop and cleanup thread
# Something bad happened
def read(self):
with self.read_lock:
frame = self.frame.copy()
grabbed=self.grabbed
return grabbed, frame
def release(self):
if self.video_capture != None:
self.video_capture.release()
self.video_capture = None
# Now kill the thread
if self.read_thread != None:
self.read_thread.join()
# Currently there are setting frame rate on CSI Camera on Nano through gstreamer
# Here we directly select sensor_mode 3 (1280x720, 59.9999 fps)
def gstreamer_pipeline(
sensor_id=0,
sensor_mode=3,
capture_width=1280,
capture_height=720,
display_width=1280,
display_height=720,
framerate=30,
flip_method=0,
):
return (
"nvarguscamerasrc sensor-id=%d sensor-mode=%d ! "
"video/x-raw(memory:NVMM), "
"width=(int)%d, height=(int)%d, "
"format=(string)NV12, framerate=(fraction)%d/1 ! "
"nvvidconv flip-method=%d ! "
"video/x-raw, width=(int)%d, height=(int)%d, format=(string)BGRx ! "
"videoconvert ! "
"video/x-raw, format=(string)BGR ! appsink"
% (
sensor_id,
sensor_mode,
capture_width,
capture_height,
framerate,
flip_method,
display_width,
display_height,
)
)
def start_cameras():
left_camera = CSI_Camera()
left_camera.open(
gstreamer_pipeline(
sensor_id=0,
sensor_mode=3,
flip_method=0,
display_height=540,
display_width=960,
)
)
left_camera.start()
right_camera = CSI_Camera()
right_camera.open(
gstreamer_pipeline(
sensor_id=1,
sensor_mode=3,
flip_method=0,
display_height=540,
display_width=960,
)
)
right_camera.start()
cv2.namedWindow("CSI-AV Cameras", cv2.WINDOW_AUTOSIZE)
if (
not left_camera.video_capture.isOpened()
or not right_camera.video_capture.isOpened()
):
# Cameras did not open, or no camera attached
print("Unable to open any cameras")
# TODO: Proper Cleanup
SystemExit(0)
counter = 0
t2 = datetime.now()
#Main stuff here
while True:
_ , left_image=left_camera.read()
_ , right_image=right_camera.read()
camera_images = np.hstack((left_image, right_image))
cv2.imshow("CSI Cameras", camera_images)
t1 = datetime.now()
cntdwn_timer = countdown - int ((t1-t2).total_seconds())
# If cowntdown is zero - let's record next image
if cntdwn_timer == -1:
counter += 1
filename = './scenes/scene_'+ str(counter) + 'x'+'_'+ '.png'
#img = cv2.imread(camera_images)
#cv2.imwrite(os.path.join(os.path.expanduser('~'),'CSI-Camera', filename), camera_images)
cv2.imwrite('/home/aryan/CSI-Camera/{}'.format(filename), camera_images)
print (' monkey'+filename)
t2 = datetime.now()
time.sleep(1)
cntdwn_timer = 0 # To avoid "-1" timer display
next
# This also acts as
keyCode = cv2.waitKey(30) & 0xFF
# Stop the program on the ESC key
if keyCode == 27:
break
left_camera.stop()
left_camera.release()
right_camera.stop()
right_camera.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
start_cameras()
Put breakpoint on line where you want to save image. Inspect image you want to save:
Does it have data inside?
Does it have camera_images.shape ?
What is return value of cv2.imwrite function?
Does path you are trying to write really exist?
Did you appended .png or .jpg ?

Speed up the creation of a circular ROI on a 4K 25 fps video in python opencv

I am working with a 2h-long 4K MP4 video shot at 25 fps. I am using openCV in python 3. From the video, I need to extract 3 circular ROIs.
Because of the large number of frames (212831), processing the whole video frame by frame with the code below takes over 24h on a 64 GB RAM workstation. Is there a way to speed up the processing or a workaround that does something slightly different to a similar effect?
Here is the code.
import numpy as np
import cv2
import sys
import time
# name of source video and paths
video = 'C0023_2hanalysis'
input_vidpath = 'path_to_video/' + video + '.MP4'
output_vidpath = 'path_to_video/' + video + '-withROI.MP4'
codec = 'avc1'
# set ROI coordinates extrapolated from last video frame as well as fixed parameters for analysis (radius)
x = 1188 # in pixels
y = 1204 # in pixels
radius = 75
# set parameters for output video as identical to input
fps = 25.0
scaling = 1.0 # output vs input video speed (?)
## import video
cap = cv2.VideoCapture(input_vidpath)
if cap.isOpened() == False:
sys.exit('Video file cannot be read! Please check input_vidpath to ensure it is correctly pointing to the video file')
## Video writer class to output video
fourcc = cv2.VideoWriter_fourcc(*codec) # concatenate the 4 chars to a fourcc code, i.e. the 4-char name of the codec used to compress the frames
# adjust output frame size to scaling if any is applied
#(frame shape is given as height,width , so the output needs to be re-ordered to match VideoWriter arguments)
o_height = cap.read()[1].shape[0]
o_width = cap.read()[1].shape[1]
output_framesize = (int(o_width*scaling),int(o_height*scaling))
out = cv2.VideoWriter(filename = output_vidpath, fourcc = 0x7634706d, fps = fps, frameSize = output_framesize, isColor = True)
## apply ROI frame by frame and thread them back into output video
start = time.time()
f = -1
last = 0
while(True):
# Capture frame-by-frame
ret, frame = cap.read() #'return' value (T/F) and frame
this = cap.get(1) # get 'CV_CAP_PROP_POS_FRAMES'
if ret == True:
#frame = cv2.resize(frame, None, fx = scaling, fy = scaling, interpolation = cv2.INTER_LINEAR) # no need to resize in this case
# Apply mask to area of interest
mask = np.zeros((o_height,o_width), np.uint8)
mask = cv2.circle(mask,(x,y),radius,255,thickness=-1) #image, row and column coord of centre of circle, radius, color (black), thickness
frame[mask == 0] = 0
out.write(frame)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):
break
f += 1
if f%1000==0:
print(f)
if last == this:
break
last = this
## When everything done, release the capture
cap.release()
out.release()
cv2.destroyAllWindows()
cv2.waitKey(1)
## End time and duration
end = time.time()
duration = end - start
print("--- %s seconds ---" %duration)
This is a common mistake. You shouldn't call waitKey(1) when you want to process the frames as fast possible. That function is basically add a short sleep after processing each frame, and that sleep time is much longer that processing time.
You just need to remove that, and still you can just kill the process if want to stop in the middle.
import numpy as np
import cv2
import sys
import time
# name of source video and paths
video = 'C0023_2hanalysis'
input_vidpath = 'path_to_video/' + video + '.MP4'
output_vidpath = 'path_to_video/' + video + '-withROI.MP4'
codec = 'avc1'
# set ROI coordinates extrapolated from last video frame as well as fixed parameters for analysis (radius)
x = 1188 # in pixels
y = 1204 # in pixels
radius = 75
# set parameters for output video as identical to input
fps = 25.0
scaling = 1.0 # output vs input video speed (?)
## import video
cap = cv2.VideoCapture(input_vidpath)
if cap.isOpened() == False:
sys.exit('Video file cannot be read! Please check input_vidpath to ensure it is correctly pointing to the video file')
## Video writer class to output video
fourcc = cv2.VideoWriter_fourcc(*codec) # concatenate the 4 chars to a fourcc code, i.e. the 4-char name of the codec used to compress the frames
# adjust output frame size to scaling if any is applied
#(frame shape is given as height,width , so the output needs to be re-ordered to match VideoWriter arguments)
o_height = cap.read()[1].shape[0]
o_width = cap.read()[1].shape[1]
output_framesize = (int(o_width*scaling),int(o_height*scaling))
out = cv2.VideoWriter(filename = output_vidpath, fourcc = 0x7634706d, fps = fps, frameSize = output_framesize, isColor = True)
## apply ROI frame by frame and thread them back into output video
start = time.time()
f = -1
last = 0
while(True):
# Capture frame-by-frame
ret, frame = cap.read() #'return' value (T/F) and frame
this = cap.get(1) # get 'CV_CAP_PROP_POS_FRAMES'
if ret == True:
#frame = cv2.resize(frame, None, fx = scaling, fy = scaling, interpolation = cv2.INTER_LINEAR) # no need to resize in this case
# Apply mask to area of interest
mask = np.zeros((o_height,o_width), np.uint8)
mask = cv2.circle(mask,(x,y),radius,255,thickness=-1) #image, row and column coord of centre of circle, radius, color (black), thickness
frame[mask == 0] = 0
out.write(frame)
f += 1
if f%1000==0:
print(f)
if last == this:
break
last = this
## When everything done, release the capture
cap.release()
out.release()
cv2.destroyAllWindows()
cv2.waitKey(1)
## End time and duration
end = time.time()
duration = end - start
print("--- %s seconds ---" %duration)

Categories

Resources