Make faster videocapture opencv - python

So I create a Neural Network(CNN) that can predict in real-time using opencv the gender of a person, everything works perfect, but, when I run the code OpenCv has so much lag, my webcam is not that bad, here is my code
'''
Real-time Face Gender Recognition using Conv-Nueral Network (CNN) and Cv2
Here we predict the save model that it is train
'''
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
import numpy as np
import cv2
import os
import cvlib as cv
import imutils
# load the model
model = load_model('gender_detection.model')
# open webcams and initiate the camara
webcam = cv2.VideoCapture(0, cv2.CAP_DSHOW)
classes = ['hombre', 'mujer']
# loop through frames
while webcam.isOpened():
# read frame from webcam
status, frame = webcam.read()
#webcam.set(cv2.CAP_PROP_FPS, 1000)
frame = cv2.flip(frame, 1)
# apply face detection
face, confidence = cv.detect_face(frame) # this detects that there is a face in the camara, cvlib does, but not if it is a man that detects the neural network
# loop through detected faces
for idx, f in enumerate(face):
# get corner points of face rectangle
# this only will draw a rectangle when the cvlib detects the face with the vars giving up there
startX, startY = f[0], f[1]
endX, endY = f[2], f[3]
# draw the rectangle over the face
cv2.rectangle(frame, (startX, startY), (endX, endY), (0,255,0), 2)
# crop the detected face region
face_crop = np.copy(frame[startY:endY, startX:endX])
if face_crop.shape[0] < 10 or face_crop.shape[1] < 10:
continue
# preprocessing for gender detection model
face_crop = cv2.resize(face_crop, (96,96))
face_crop = face_crop.astype("float") / 255.0
face_crop = img_to_array(face_crop)
face_crop = np.expand_dims(face_crop, axis=0)
# apply gender detection face with the model
conf = model.predict(face_crop)[0]
# get label with max acc
idx = np.argmax(conf)
label = classes[idx]
label = "{}: {:.2f}".format(label, conf[idx] * 100)
Y = startY - 10 if startY - 10 > 10 else startY + 10
# write label and confidence above the face rectangle
cv2.putText(frame, label, (startX, Y), cv2.FONT_HERSHEY_SIMPLEX,
0.7, (0,255,0), 2)
# display output
cv2.imshow("Gender Detection", frame)
# press "Q" to stop
if cv2.waitKey(1) & 0xFF == ord('q'):
break
#realese resources
webcam.release()
cv2.destroyAllWindows()
and I also tried to use cv2.CAP_PROB_FPS but that only helps a little bit, not much.

I've had this same problem using openCV video capture with text-detection. It's not the webcam quality, but the fact that openCV can only show you frames as fast as you can process them in your gender detection. The solution that worked for me is to use multi-threading.
You can create a thread for the OpenCV video capture, then another thread for your image processing. The caveat: you can't magically make your image processing happen quicker without making changes the image process itself. It takes as long as it takes. What you can do is allow openCV to work on its own and send the frames to an exchange class, then allow the image processing to grab a frame and work at its own pace as CV2 continues as normal.
Here is a (shortened) version of my class for OCR image processing. You can see that in start() i'm creating a thread pointed at the ocr() process. This is where your gender identification process can go.
class OCR:
# def __init__(self, exchange: VideoStream, language=None):
def __init__(self):
self.exchange = None
# init stuff for OCR not relevant to my example, but note that it
# takes a VideoStream class called exchange which is where this class
# grabs frames to process
def start(self):
Thread(target=self.ocr, args=()).start()
return self
def set_exchange(self, video_stream):
self.exchange = video_stream
def ocr(self):
while not self.stopped:
if self.exchange is not None:
frame = self.exchange.frame
# # # OCR stuff goes here
Now here is the VideoStream class that grabs frames at its own pace, in a different thread. The image processing class (OCR) can then take these frames at it's own pace and the two don't affect each other's performance.
class VideoStream:
"""Class for CV2 video capture. The start() method will create a new
thread to read the video stream"""
def __init__(self, src=0):
self.stream = cv2.VideoCapture(src)
(self.grabbed, self.frame) = self.stream.read()
# self._boxes = None
self.stopped = False
def start(self):
Thread(target=self.get, args=()).start()
return self
def get(self):
while not self.stopped:
(self.grabbed, self.frame) = self.stream.read()
def get_video_dimensions(self):
width = self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)
height = self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)
return int(width), int(height)
def stop_process(self):
self.stopped = True
Then you can do your CV2 imshow loop as you normally would.
exchange = VideoStream(0).start()
ocr = OCR().start()
ocr.set_exchange(exchange)
while True: # Begins a loop for the real-time OCR display
pressed_key = cv2.waitKey(1) & 0xFF
if pressed_key == ord('q'):
stop_stream_ocr(exchange, ocr)
break
frame = exchange.frame
cv2.imshow("Video Get Frame", frame)
cps1.increment()
Note that you can't really control what thread CV2 decides to use in its internal workings, but this approach will allow you to display your webcam at its natural fps and while image processing happens in the background.

Related

Limit MaxResult of Object Detection

# Import packages
import os
import argparse
import cv2
import numpy as np
import sys
import time
from threading import Thread
import importlib.util
# Define VideoStream class to handle streaming of video from webcam in separate processing thread
# Source - Adrian Rosebrock, PyImageSearch: https://www.pyimagesearch.com/2015/12/28/increasing-raspberry-pi-fps-with-python-and-opencv/
class VideoStream:
"""Camera object that controls video streaming from the Picamera"""
def __init__(self,resolution=(640,480),framerate=30):
# Initialize the PiCamera and the camera image stream
self.stream = cv2.VideoCapture(0)
ret = self.stream.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG'))
ret = self.stream.set(3,resolution[0])
ret = self.stream.set(4,resolution[1])
# Read first frame from the stream
(self.grabbed, self.frame) = self.stream.read()
# Variable to control when the camera is stopped
self.stopped = False
def start(self):
# Start the thread that reads frames from the video stream
Thread(target=self.update,args=()).start()
return self
def update(self):
# Keep looping indefinitely until the thread is stopped
while True:
# If the camera is stopped, stop the thread
if self.stopped:
# Close camera resources
self.stream.release()
return
# Otherwise, grab the next frame from the stream
(self.grabbed, self.frame) = self.stream.read()
def read(self):
# Return the most recent frame
return self.frame
def stop(self):
# Indicate that the camera and thread should be stopped
self.stopped = True
# Define and parse input arguments
parser = argparse.ArgumentParser()
parser.add_argument('--modeldir', help='Folder the .tflite file is located in',
default='/home/pi/TensorFlow')
parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite',
default='edgetpu.tflite')
parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt',
default='labelmap.txt')
parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects',
default=0.4)
parser.add_argument('--resolution', help='Desired webcam resolution in WxH. If the webcam does not support the resolution entered, errors may occur.',
default='640x480')
parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection',
default=True,
#required=True,
action='store_true')
args = parser.parse_args()
MODEL_NAME = args.modeldir
GRAPH_NAME = args.graph
LABELMAP_NAME = args.labels
min_conf_threshold = float(args.threshold)
resW, resH = args.resolution.split('x')
imW, imH = int(resW), int(resH)
use_TPU = args.edgetpu
# Import TensorFlow libraries
# If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow
# If using Coral Edge TPU, import the load_delegate library
pkg = importlib.util.find_spec('tflite_runtime')
if pkg:
from tflite_runtime.interpreter import Interpreter
if use_TPU:
from tflite_runtime.interpreter import load_delegate
else:
from tensorflow.lite.python.interpreter import Interpreter
if use_TPU:
from tensorflow.lite.python.interpreter import load_delegate
GRAPH_NAME = 'edgetpu.tflite'
# Get path to current working directory
CWD_PATH = os.getcwd()
# Path to .tflite file, which contains the model that is used for object detection
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,GRAPH_NAME)
# Path to label map file
PATH_TO_LABELS = os.path.join(CWD_PATH,MODEL_NAME,LABELMAP_NAME)
# Load the label map
with open(PATH_TO_LABELS, 'r') as f:
labels = [line.strip() for line in f.readlines()]
# Have to do a weird fix for label map if using the COCO "starter model" from
# https://www.tensorflow.org/lite/models/object_detection/overview
# First label is '???', which has to be removed.
if labels[0] == '???':
del(labels[0])
# Load the Tensorflow Lite model.
# If using Edge TPU, use special load_delegate argument
if use_TPU:
interpreter = Interpreter(model_path=PATH_TO_CKPT,
experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
print(PATH_TO_CKPT)
else:
interpreter = Interpreter(model_path=PATH_TO_CKPT)
interpreter.allocate_tensors()
# Get model details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
height = input_details[0]['shape'][1]
width = input_details[0]['shape'][2]
floating_model = (input_details[0]['dtype'] == np.float32)
input_mean = 127.5
input_std = 127.5
# Check output layer name to determine if this model was created with TF2 or TF1,
# because outputs are ordered differently for TF2 and TF1 models
outname = output_details[0]['name']
if ('StatefulPartitionedCall' in outname): # This is a TF2 model
boxes_idx, classes_idx, scores_idx = 1, 3, 0
else: # This is a TF1 model
boxes_idx, classes_idx, scores_idx = 0, 1, 2
# Initialize frame rate calculation
frame_rate_calc = 1
freq = cv2.getTickFrequency()
# Initialize video stream
videostream = VideoStream(resolution=(imW,imH),framerate=30).start()
time.sleep(1)
#for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True):
while True:
# Start timer (for calculating frame rate)
t1 = cv2.getTickCount()
# Grab frame from video stream
frame1 = videostream.read()
frame1 = cv2.flip(frame1,0)
# Acquire frame and resize to expected shape [1xHxWx3]
frame = frame1.copy()
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_resized = cv2.resize(frame_rgb, (width, height))
input_data = np.expand_dims(frame_resized, axis=0)
# Normalize pixel values if using a floating model (i.e. if model is non-quantized)
if floating_model:
input_data = (np.float32(input_data) - input_mean) / input_std
# Perform the actual detection by running the model with the image as input
interpreter.set_tensor(input_details[0]['index'],input_data)
interpreter.invoke()
# Retrieve detection results
boxes = interpreter.get_tensor(output_details[boxes_idx]['index'])[0] # Bounding box coordinates of detected objects
classes = interpreter.get_tensor(output_details[classes_idx]['index'])[0] # Class index of detected objects
scores = interpreter.get_tensor(output_details[scores_idx]['index'])[0] # Confidence of detected objects
# Loop over all detections and draw detection box if confidence is above minimum threshold
for i in range(len(scores)):
if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):
# Get bounding box coordinates and draw box
# Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
ymin = int(max(1,(boxes[i][0] * imH)))
xmin = int(max(1,(boxes[i][1] * imW)))
ymax = int(min(imH,(boxes[i][2] * imH)))
xmax = int(min(imW,(boxes[i][3] * imW)))
cv2.rectangle(frame, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2)
# Draw label
object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index
label = '%s: %d%%' % (object_name, int(scores[i]*100)) # Example: 'person: 72%'
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size
label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window
cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in
cv2.putText(frame, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text
# Draw circle in center
xcenter = xmin + (int(round((xmax - xmin) / 2)))
ycenter = ymin + (int(round((ymax - ymin) / 2)))
cv2.circle(frame, (xcenter, ycenter), 5, (0,0,255), thickness=-1)
# Print info
print('Object ' + str(i) + ': ' + object_name + ' at (' + str(xcenter) + ', ' + str(ycenter) + ')')
# Draw framerate in corner of frame
cv2.putText(frame,'FPS: {0:.2f}'.format(frame_rate_calc),(30,50),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,0),2,cv2.LINE_AA)
# All the results have been drawn on the frame, so it's time to display it.
cv2.imshow('Object detector', frame)
# Press 'q' to quit
if cv2.waitKey(1) == ord('q'):
break
# Clean up
cv2.destroyAllWindows()
videostream.stop()
I would like to try something in this code but i can't limit the detection to 1 at a time
The object detection and the model itself are good but i want to limit the result to 1 instead of multiple
this is the whole code i get from the internet
i also tried to add (parse.addArgument --maxResult) but i don't know how to run it in the tflite_runtime
i am new to the codes in python and in using RPi

Avoid foreground becomes into background on background substractor using OpenCV

I want to make a hand detector using OpenCV. I've created a background substractor using the next code:
import cv2
import numpy as np
global fgMask
camera = cv2.VideoCapture(0)
backSub = cv2.createBackgroundSubtractorMOG2(detectShadows=False)
firstImage = True
crop_width = 300
crop_height = 300
camera.set(cv2.CAP_PROP_FRAME_WIDTH, 1024)
camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 1024)
while True:
try:
ret, image = camera.read()
image = cv2.flip(image, 1)
roi = image[0:crop_height, 0:crop_width]
if firstImage:
fgMask = backSub.apply(roi)
firstImage = False
else:
fgMask = backSub.apply(roi, None, 0)
cv2.imshow("Original", image)
cv2.imshow("Mask", fgMask)
cv2.imshow("Roi", roi)
k = cv2.waitKey(10)
if k == 27: # press ESC to exit
camera.release()
cv2.destroyAllWindows()
break
except Exception as ex:
print(ex)
I add images during few seconds using apply method in order to the model learns background, and the mask generated by model is black (everything is ok at this point)
when I put my hand, the mask is ok
but after a while the hand begins to disapper
I have read you can set learningRate parameter to 0 to avoid the model trains using new frames, but I get the same result (hand disappers after a while). I've tried different learning parameters but the result is always same.

Python3 process and display webcam stream at the webcams fps

How can I read a camera and display the images at the cameras frame rate?
I want to continuously read images from my webcam, (do some fast preprocessing) and then display the image in a window. This should run at the frame rate, that my webcam provides (29 fps).
It seems like the OpenCV GUI and Tkinter GUI is too slow, to display images at such a frame rate. These are clearly the bottlenecks in my experiments. Even without the preprocessing, the images are not displayed fast enough. I am on a MacBook Pro 2018.
Here is what I tried. The webcam is always read with OpenCV:
Everything happens in the main thread, the images are displayed with OpenCV: 12 fps
Read camera and do preprocessing in separate threads, show image with OpenCV in the main thread: 20 fps
multithreaded like above, but do not show the image: 29 fps
multithreaded like above, but show the images with Tkinter: don't know the exact fps but it feels like <10 fps.
Here is the code:
Single loop, OpenCV GUI:
import cv2
import time
def main():
cap = cv2.VideoCapture(0)
window_name = "FPS Single Loop"
cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
start_time = time.time()
frames = 0
seconds_to_measure = 10
while start_time + seconds_to_measure > time.time():
success, img = cap.read()
img = img[:, ::-1] # mirror
time.sleep(0.01) # simulate some processing time
cv2.imshow(window_name, img)
cv2.waitKey(1)
frames = frames + 1
cv2.destroyAllWindows()
print(
f"Captured {frames} in {seconds_to_measure} seconds. FPS: {frames/seconds_to_measure}"
)
if __name__ == "__main__":
main()
Captured 121 in 10 seconds. FPS: 12.1
Multithreaded, opencv gui:
import logging
import time
from queue import Full, Queue
from threading import Thread, Event
import cv2
logger = logging.getLogger("VideoStream")
def setup_webcam_stream(src=0):
cap = cv2.VideoCapture(src)
width, height = (
cap.get(cv2.CAP_PROP_FRAME_WIDTH),
cap.get(cv2.CAP_PROP_FRAME_HEIGHT),
)
logger.info(f"Camera dimensions: {width, height}")
logger.info(f"Camera FPS: {cap.get(cv2.CAP_PROP_FPS)}")
grabbed, frame = cap.read() # Read once to init
if not grabbed:
raise IOError("Cannot read video stream.")
return cap
def video_stream_loop(video_stream: cv2.VideoCapture, queue: Queue, stop_event: Event):
while not stop_event.is_set():
try:
success, img = video_stream.read()
# We need a timeout here to not get stuck when no images are retrieved from the queue
queue.put(img, timeout=1)
except Full:
pass # try again with a newer frame
def processing_loop(input_queue: Queue, output_queue: Queue, stop_event: Event):
while not stop_event.is_set():
try:
img = input_queue.get()
img = img[:, ::-1] # mirror
time.sleep(0.01) # simulate some processing time
# We need a timeout here to not get stuck when no images are retrieved from the queue
output_queue.put(img, timeout=1)
except Full:
pass # try again with a newer frame
def main():
stream = setup_webcam_stream(0)
webcam_queue = Queue()
processed_queue = Queue()
stop_event = Event()
window_name = "FPS Multi Threading"
cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
start_time = time.time()
frames = 0
seconds_to_measure = 10
try:
Thread(
target=video_stream_loop, args=[stream, webcam_queue, stop_event]
).start()
Thread(
target=processing_loop, args=[webcam_queue, processed_queue, stop_event]
).start()
while start_time + seconds_to_measure > time.time():
img = processed_queue.get()
cv2.imshow(window_name, img)
cv2.waitKey(1)
frames = frames + 1
finally:
stop_event.set()
cv2.destroyAllWindows()
print(
f"Captured {frames} frames in {seconds_to_measure} seconds. FPS: {frames/seconds_to_measure}"
)
print(f"Webcam queue: {webcam_queue.qsize()}")
print(f"Processed queue: {processed_queue.qsize()}")
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
main()
INFO:VideoStream:Camera dimensions: (1280.0, 720.0)
INFO:VideoStream:Camera FPS: 29.000049
Captured 209 frames in 10 seconds. FPS: 20.9
Webcam queue: 0
Processed queue: 82
Here you can see that there are images remaining in the second queue where the images get fetched for displaying them.
When I uncomment these two lines:
cv2.imshow(window_name, img)
cv2.waitKey(1)
then the output is:
INFO:VideoStream:Camera dimensions: (1280.0, 720.0)
INFO:VideoStream:Camera FPS: 29.000049
Captured 291 frames in 10 seconds. FPS: 29.1
Webcam queue: 0
Processed queue: 0
So it is able to process all frames at the webcams speed without a GUI displaying them.
Multithreaded, Tkinter gui:
import logging
import time
import tkinter
from queue import Full, Queue, Empty
from threading import Thread, Event
import PIL
from PIL import ImageTk
import cv2
logger = logging.getLogger("VideoStream")
def setup_webcam_stream(src=0):
cap = cv2.VideoCapture(src)
width, height = cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
logger.info(f"Camera dimensions: {width, height}")
logger.info(f"Camera FPS: {cap.get(cv2.CAP_PROP_FPS)}")
grabbed, frame = cap.read() # Read once to init
if not grabbed:
raise IOError("Cannot read video stream.")
return cap, width, height
def video_stream_loop(video_stream: cv2.VideoCapture, queue: Queue, stop_event: Event):
while not stop_event.is_set():
try:
success, img = video_stream.read()
# We need a timeout here to not get stuck when no images are retrieved from the queue
queue.put(img, timeout=1)
except Full:
pass # try again with a newer frame
def processing_loop(input_queue: Queue, output_queue: Queue, stop_event: Event):
while not stop_event.is_set():
try:
img = input_queue.get()
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = img[:, ::-1] # mirror
time.sleep(0.01) # simulate some processing time
# We need a timeout here to not get stuck when no images are retrieved from the queue
output_queue.put(img, timeout=1)
except Full:
pass # try again with a newer frame
class App:
def __init__(self, window, window_title, image_queue: Queue, image_dimensions: tuple):
self.window = window
self.window.title(window_title)
self.image_queue = image_queue
# Create a canvas that can fit the above video source size
self.canvas = tkinter.Canvas(window, width=image_dimensions[0], height=image_dimensions[1])
self.canvas.pack()
# After it is called once, the update method will be automatically called every delay milliseconds
self.delay = 1
self.update()
self.window.mainloop()
def update(self):
try:
frame = self.image_queue.get(timeout=0.1) # Timeout to not block this method forever
self.photo = ImageTk.PhotoImage(image=PIL.Image.fromarray(frame))
self.canvas.create_image(0, 0, image=self.photo, anchor=tkinter.NW)
self.window.after(self.delay, self.update)
except Empty:
pass # try again next time
def main():
stream, width, height = setup_webcam_stream(0)
webcam_queue = Queue()
processed_queue = Queue()
stop_event = Event()
window_name = "FPS Multi Threading"
try:
Thread(target=video_stream_loop, args=[stream, webcam_queue, stop_event]).start()
Thread(target=processing_loop, args=[webcam_queue, processed_queue, stop_event]).start()
App(tkinter.Tk(), window_name, processed_queue, (width, height))
finally:
stop_event.set()
print(f"Webcam queue: {webcam_queue.qsize()}")
print(f"Processed queue: {processed_queue.qsize()}")
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
main()
INFO:VideoStream:Camera dimensions: (1280.0, 720.0)
INFO:VideoStream:Camera FPS: 29.000049
Webcam queue: 0
Processed queue: 968
On this answer I share some considerations on camera FPS VS display FPS and some code examples that demonstrates:
The basics on FPS calculation;
How to increase the display FPS from 29 fps to 300+ fps;
How to use threading and queue efficiently to capture at the closest maximum fps supported by the camera;
For anyone going through your issue, here is a couple of important questions that need to be answered first:
What's the size of the images being captured?
How many FPS does your webcam support? (camera FPS)
How fast can you grab a frame from the webcam and display it in a window? (display FPS)
Camera FPS VS Display FPS
The camera fps refers to what the hardware of the camera is capable of. For instance, ffmpeg tells that at 640x480 my camera can return 15 fps minimum and 30 at maximum, among other formats:
ffmpeg -list_devices true -f dshow -i dummy
ffmpeg -f dshow -list_options true -i video="HP HD Camera"
[dshow # 00000220181cc600] vcodec=mjpeg min s=640x480 fps=15 max s=640x480 fps=30
[dshow # 00000220181cc600] vcodec=mjpeg min s=320x180 fps=15 max s=320x180 fps=30
[dshow # 00000220181cc600] vcodec=mjpeg min s=320x240 fps=15 max s=320x240 fps=30
[dshow # 00000220181cc600] vcodec=mjpeg min s=424x240 fps=15 max s=424x240 fps=30
[dshow # 00000220181cc600] vcodec=mjpeg min s=640x360 fps=15 max s=640x360 fps=30
[dshow # 00000220181cc600] vcodec=mjpeg min s=848x480 fps=15 max s=848x480 fps=30
[dshow # 00000220181cc600] vcodec=mjpeg min s=960x540 fps=15 max s=960x540 fps=30
[dshow # 00000220181cc600] vcodec=mjpeg min s=1280x720 fps=15 max s=1280x720 fps=30
The important realization here is that despite being able to capture 30 fps internally, there is NO guarantee that an application will be able to pull those 30 frames from the camera in a second. The reasons behind this are clarified on the following sections.
The display fps refers to how many images can be draw in a window per second. This number is not limited by the camera at all and its usually much much higher than the camera fps. As you'll see later, its possible to create and application that pulls 29 images per second from the camera and draws them more than 300 times a second. That means that the same image from the camera is drawn multiple times in a window before the next frame is pulled from the camera.
How many FPS can my webcam capture?
The following application simply demonstrates how to print the default settings used by the camera (size, fps) and how to retrieve frames from it, display it in a window and compute the amount of FPS being rendered:
import numpy as np
import cv2
import datetime
def main():
# create display window
cv2.namedWindow("webcam", cv2.WINDOW_NORMAL)
# initialize webcam capture object
cap = cv2.VideoCapture(0)
# retrieve properties of the capture object
cap_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
cap_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
cap_fps = cap.get(cv2.CAP_PROP_FPS)
fps_sleep = int(1000 / cap_fps)
print('* Capture width:', cap_width)
print('* Capture height:', cap_height)
print('* Capture FPS:', cap_fps, 'ideal wait time between frames:', fps_sleep, 'ms')
# initialize time and frame count variables
last_time = datetime.datetime.now()
frames = 0
# main loop: retrieves and displays a frame from the camera
while (True):
# blocks until the entire frame is read
success, img = cap.read()
frames += 1
# compute fps: current_time - last_time
delta_time = datetime.datetime.now() - last_time
elapsed_time = delta_time.total_seconds()
cur_fps = np.around(frames / elapsed_time, 1)
# draw FPS text and display image
cv2.putText(img, 'FPS: ' + str(cur_fps), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
cv2.imshow("webcam", img)
# wait 1ms for ESC to be pressed
key = cv2.waitKey(1)
if (key == 27):
break
# release resources
cv2.destroyAllWindows()
cap.release()
if __name__ == "__main__":
main()
Output:
* Capture width: 640.0
* Capture height: 480.0
* Capture FPS: 30.0 wait time between frames: 33 ms
As mentioned earlier, my camera is able to capture 640x480 images at 30 fps by default and even though the loop above is pretty simple, my display FPS is lower: I'm only able to retrieve frames and display them at 28 or 29 fps and that's without performing any custom image processing in between. What's going on?
The reality is that even though the loop looks pretty simple, there are things happening under the hood that costs just enough processing time to make it difficult for one iteration of the loop to happen in less than 33ms:
cap.read() executes I/O calls to the camera driver in order to pull the new data. This function blocks execution of your application until the data has been transferred completely;
a numpy array needs to be setup with the new pixels;
other calls are required to display a window and draw the pixels in it, namely cv2.imshow(), which is usually slow operation;
there's also a 1ms delay thanks to cv2.waitKey(1) which is required to keep the window opened;
All of these operations, as small as they are, make it incredibly difficult for an application to call cap.read(), get a new frame and display it at precisely 30 fps.
There's a number of things you can try to speed up the application to be able to display more frames than the camera driver allows and this post covers them well. Just remember this: you won't be able to capture more frames from the camera than what the driver says it supports. You will, however, be able to display more frames.
How to increase the display FPS to 300+? A threading example.
One of the approaches used to increase the amount of images being displayed per second relies on the threading package to create a separate thread to continuously pull frames from the camera. This happens because the main loop of the application is not blocked on cap.read() anymore waiting for it to return a new frame, thus increasing the number of frames that can be displayed (or draw) per second.
Note: this approach renders the same image multiple times on a window until the next image from the camera is retrieved. Keep in mind that it might even draw an image while it's contents are still being updated with new data from the camera.
The following application is just an academic example, not something I recommend as production code, to increase the amount of frames per second that are display in a window:
import numpy as np
import cv2
import datetime
from threading import Thread
# global variables
stop_thread = False # controls thread execution
img = None # stores the image retrieved by the camera
def start_capture_thread(cap):
global img, stop_thread
# continuously read fames from the camera
while True:
_, img = cap.read()
if (stop_thread):
break
def main():
global img, stop_thread
# create display window
cv2.namedWindow("webcam", cv2.WINDOW_NORMAL)
# initialize webcam capture object
cap = cv2.VideoCapture(0)
# retrieve properties of the capture object
cap_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
cap_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
cap_fps = cap.get(cv2.CAP_PROP_FPS)
fps_sleep = int(1000 / cap_fps)
print('* Capture width:', cap_width)
print('* Capture height:', cap_height)
print('* Capture FPS:', cap_fps, 'wait time between frames:', fps_sleep)
# start the capture thread: reads frames from the camera (non-stop) and stores the result in img
t = Thread(target=start_capture_thread, args=(cap,), daemon=True) # a deamon thread is killed when the application exits
t.start()
# initialize time and frame count variables
last_time = datetime.datetime.now()
frames = 0
cur_fps = 0
while (True):
# blocks until the entire frame is read
frames += 1
# measure runtime: current_time - last_time
delta_time = datetime.datetime.now() - last_time
elapsed_time = delta_time.total_seconds()
# compute fps but avoid division by zero
if (elapsed_time != 0):
cur_fps = np.around(frames / elapsed_time, 1)
# TODO: make a copy of the image and process it here if needed
# draw FPS text and display image
if (img is not None):
cv2.putText(img, 'FPS: ' + str(cur_fps), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
cv2.imshow("webcam", img)
# wait 1ms for ESC to be pressed
key = cv2.waitKey(1)
if (key == 27):
stop_thread = True
break
# release resources
cv2.destroyAllWindows()
cap.release()
if __name__ == "__main__":
main()
How to capture at the closest maximum fps supported by the camera? A threading and queue example.
The problem of using a queue is that, performance-wise, what you get depends on how many frames per second the application can pull from the camera. If the camera supports 30 fps then that's what your application might get as long as the image processing operations being done are fast. Otherwise, there will be a drop in the number of frames being displayed (per second) and the size of the queue will slowly increase until all your RAM memory runs out. To avoid that problem, make sure to set queueSize with a number that prevents the queue from growing beyond what your OS can handle.
The following code is a naive implementation that creates a dedicated thread to grab frames from the camera and puts them in a queue that is later used by the main loop of the application:
import numpy as np
import cv2
import datetime
import queue
from threading import Thread
# global variables
stop_thread = False # controls thread execution
def start_capture_thread(cap, queue):
global stop_thread
# continuously read fames from the camera
while True:
_, img = cap.read()
queue.put(img)
if (stop_thread):
break
def main():
global stop_thread
# create display window
cv2.namedWindow("webcam", cv2.WINDOW_NORMAL)
# initialize webcam capture object
cap = cv2.VideoCapture(0)
#cap = cv2.VideoCapture(0 + cv2.CAP_DSHOW)
# retrieve properties of the capture object
cap_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
cap_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
cap_fps = cap.get(cv2.CAP_PROP_FPS)
print('* Capture width:', cap_width)
print('* Capture height:', cap_height)
print('* Capture FPS:', cap_fps)
# create a queue
frames_queue = queue.Queue(maxsize=0)
# start the capture thread: reads frames from the camera (non-stop) and stores the result in img
t = Thread(target=start_capture_thread, args=(cap, frames_queue,), daemon=True) # a deamon thread is killed when the application exits
t.start()
# initialize time and frame count variables
last_time = datetime.datetime.now()
frames = 0
cur_fps = 0
while (True):
if (frames_queue.empty()):
continue
# blocks until the entire frame is read
frames += 1
# measure runtime: current_time - last_time
delta_time = datetime.datetime.now() - last_time
elapsed_time = delta_time.total_seconds()
# compute fps but avoid division by zero
if (elapsed_time != 0):
cur_fps = np.around(frames / elapsed_time, 1)
# retrieve an image from the queue
img = frames_queue.get()
# TODO: process the image here if needed
# draw FPS text and display image
if (img is not None):
cv2.putText(img, 'FPS: ' + str(cur_fps), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
cv2.imshow("webcam", img)
# wait 1ms for ESC to be pressed
key = cv2.waitKey(1)
if (key == 27):
stop_thread = True
break
# release resources
cv2.destroyAllWindows()
cap.release()
if __name__ == "__main__":
main()
Earlier I said might and here is what I meant: even when I use a dedicated thread to pull frames from the camera and a queue to store them, the displayed fps is still capped to 29.3 when it should have been 30 fps. In this case, I assume that the camera driver or the backend implementation used by VideoCapture can be blamed for the issue. On Windows, the backend used by default is MSMF.
It is possible to force VideoCapture to use a different backend by passing the right arguments on the constructor:
cap = cv2.VideoCapture(0 + cv2.CAP_DSHOW)
My experience with DShow was terrible: the returned CAP_PROP_FPS from the camera was 0 and the displayed FPS got stuck around 14. This is just an example to illustrate how the backend capture driver can interfere negatively with the camera capture.
But that's something you can explore. Maybe using a different backend on your OS can provide better results. Here's a nice high-level overview of the Video I/O module from OpenCV that lists the supported backends:
Update
In one of the comments of this answer, the OP upgraded OpenCV 4.1 to 4.3 on Mac OS and observed a noticeable improvement on FPS rendering. It looks like it was a performance issue related to cv2.imshow().

OpenCV upscales webcam's feed instead of using full resolution

I have a 1080p webcam (Logitech v-u0032) and I am creating program that saves still from it's feed every X seconds, however images have black bars on sides and resolution is seemingly lower than still taken with Windows built in camera app, both output file that has 1920x1080 pixels but difference in actual resolution is very visible
I've searched for ways to change the resolution but OpenCV seems to always upscale to new resolution
import cv2
import numpy as np
import datetime
import time
import os
cv2.namedWindow("Unregistered Hypercam")
vc = cv2.VideoCapture(0)
vc.set(3, 1920) # Set the horizontal resolution
vc.set(4, 1080) # Set the vertical resolution
vc.set(5, 10) # Set the framerate
if vc.isOpened():
rval, frame = vc.read()
else:
rval = False
lastSave = time.time()
def mfold(name):
try:
os.mkdir(name)
except OSError:
x = 0
else:
print ("Successfully created the directory %s " % fold)
while rval:
cv2.imshow("Unregistered Hypercam", frame)
rval, frame = vc.read()
now = datetime.datetime.now()
if time.time() > lastSave + 10:
lastSave = time.time()
fold = '\\snapshots\\' # This will create folder C:\snapshots
mfold(fold)
cv2.imwrite(fold + str(now.year) + '.' + str(now.month) + '.' + str(now.day) + ' ' + str(now.hour) + 'h_' + str(now.minute) + 'm_' + str(now.second) + 's.png', frame)
key = cv2.waitKey(20)
if key == 27:
break
cv2.destroyWindow("Unregistered Hypercam")
Images are blurry and unsharp with black bars and are nowhere similar to images taken with Windows camera app
Here's an example widget to save webcam/streams into video file or screenshot images. It keeps the original resolution of the stream. I used one of my IP camera streams instead of a webcam but it should work the same with a webcam. Currently it opens a stream and saves it as a video but you can modify it to take periodic screenshots. Also to modify the resolution, you can manually resize the frame while keeping aspect ratio with this function (it currently is not used in the widget).
Adjust resolution of frame while maintaining aspect ratio
# Resizes a image and maintains aspect ratio
def maintain_aspect_ratio_resize(image, width=None, height=None, inter=cv2.INTER_AREA):
# Grab the image size and initialize dimensions
dim = None
(h, w) = image.shape[:2]
# Return original image if no need to resize
if width is None and height is None:
return image
# We are resizing height if width is none
if width is None:
# Calculate the ratio of the height and construct the dimensions
r = height / float(h)
dim = (int(w * r), height)
# We are resizing width if height is none
else:
# Calculate the ratio of the 0idth and construct the dimensions
r = width / float(w)
dim = (width, int(h * r))
# Return the resized image
return cv2.resize(image, dim, interpolation=inter)
You can use it like this to adjust width or height
rval, frame = vc.read()
resize_width = maintain_aspect_ratio_resize(frame, width=500)
resize_height = maintain_aspect_ratio_resize(frame, height=500)
Stream and save video widget
from threading import Thread
import cv2
class VideoWriterObject(object):
def __init__(self, src=0):
# Create a VideoCapture object
self.capture = cv2.VideoCapture(src)
# Default resolutions of the frame are obtained (system dependent)
self.frame_width = int(self.capture.get(3))
self.frame_height = int(self.capture.get(4))
# Set up codec and output video settings
self.codec = cv2.VideoWriter_fourcc('M','J','P','G')
self.output_video = cv2.VideoWriter('output.avi', self.codec, 30, (self.frame_width, self.frame_height))
# Start the thread to read frames from the video stream
self.thread = Thread(target=self.update, args=())
self.thread.daemon = True
self.thread.start()
def update(self):
# Read the next frame from the stream in a different thread
while True:
if self.capture.isOpened():
(self.status, self.frame) = self.capture.read()
def show_frame(self):
# Display frames in main program
if self.status:
cv2.imshow('frame', self.frame)
# Press Q on keyboard to stop recording
key = cv2.waitKey(1)
if key == ord('q'):
self.capture.release()
self.output_video.release()
cv2.destroyAllWindows()
exit(1)
def save_frame(self):
# Save obtained frame into video output file
self.output_video.write(self.frame)
if __name__ == '__main__':
video_stream_widget = VideoWriterObject(0)
while True:
try:
video_stream_widget.show_frame()
video_stream_widget.save_frame()
except AttributeError:
pass

MultiProcessing or Multithreading with python and Opencv to detect a face

Recently i working on raspberry pi 3(OS raspbian) with OPENCV + PYTHON to detect face on live camera. And I see detected face on raspberry pi3 use opencv very slow, about 4 - 5 FPS/s. So I wonder can I use MultiThread or MultiProcessing to speed up the FPS and if yes, how can i do that?
Please help me, any idea will appreciate. Thanks
Yes, you can multithread the process.
Use the threading library in python 2, or thread in python 3.
Here is a simple example.
Besides your main thread,
Have one thread only operating the camera and constantly updating the latest frame, which is global in this case.
def camera_thread():
cam = cv2.VideoCapture(1)
_ret, self.image = cam.read()
cv2.imshow('camera', self.image)
A second thread can run the inference from the model using the latest frame.
An optional third could perhaps draw a bounding box over the face, or perform other operations etc.
You might need mutex locks in between 2nd and 3rd threads in this case as you can only begin drawing boxes after you obtain an output from the model. This allows your 2nd thread to begin inference of the next frame without waiting for other threads.
The above example will result in a smooth video output with the inference lagging behind slightly. If you're not sure how multithreading works, I suggest reading up on the basics first.
update. I use separate thread to capture image from camera and I don't see any speed up of FPS compare to serial... Please see my code.
import threading
import time
import cv2
import numpy as np
class myThread (threading.Thread):
def __init__(self, src):
print("thread -------------init-------------")
threading.Thread.__init__(self)
self.cap = cv2.VideoCapture(src)
self.stop = False
def run(self):
while(self.stop == False):
self.ret, self.frame = self.cap.read()
def Stop(self):
self.cap.release()
self.stop = True
def read(self):
return self.ret, self.frame
recognizer = cv2.face.LBPHFaceRecognizer_create()
recognizer.read('trainer/trainer.yml')
cascadePath = "haarcascade_frontalface_default.xml"
faceCascade = cv2.CascadeClassifier(cascadePath);
thread = myThread(0)
thread.start()
time.sleep(1)
start = time.time()
frames = 0
font = cv2.FONT_HERSHEY_SIMPLEX
cap = cv2.VideoCapture(0)
while(True):
ret, frame = thread.read()
frame = cv2.resize(frame, (640, 480))
frames += 1
gray = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
faces = faceCascade.detectMultiScale(gray, 1.2, 5)
for(x,y,w,h) in faces:
# Create rectangle around the face
cv2.rectangle(frame, (x-20,y-20), (x+w+20,y+h+20), (0,255,0), 4)
Id, con = recognizer.predict(gray[y:y+h,x:x+w])
print(Id, con)
# Check the ID if exist
if(con < 60):
if(Id == 1):
Id = "HUY"
if(Id == 2):
Id = "HOA"
#If not exist, then it is Unknown
else:
#print(Id)
Id = "Unknow"
# Put text describe who is in the picture
cv2.rectangle(frame, (x-22,y-90), (x+w+22, y-22), (0,255,0), -1)
cv2.putText(frame, str(Id), (x,y-40), font, 2, (255,255,255), 3)
if cv2.waitKey(10) & 0xFF == ord('q'):
thread.Stop()
break
cv2.imshow("frame", frame)
end = time.time()
second = end - start
print("second:", + second)
print(frames/second)
cv2.destroyAllWindows()

Categories

Resources