I am trying to run an openCV program with my GPU. I am using pre-trained models to detect faces on a playing video. The code works fine my question is, how is my CPU performing better than my GPU? Am I missing some attribute that should be defined to use the full potential of the GPU or is it just because the performance of my CPU is better than that of my GPU? Thanks in advacne.
model download links:
https://github.com/haroonshakeel/opencv_face_detection
PC specs:
OS: Windows 11
GPU: nvidia rtx 3060 mobile
CPU: core i7-12700H
import numpy as np
import cv2
from imutils.video import FPS
class Detector:
def __init__(self, use_cuda = False):
self.faceModel = cv2.dnn.readNetFromCaffe("models/res10_300x300_ssd_iter_140000.prototxt", caffeModel = "models/res10_300x300_ssd_iter_140000.caffemodel")
if use_cuda:
self.faceModel.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
self.faceModel.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
print("running on GPU")
else:
print("running on CPU")
def processVideo(self, videoName):
cap = cv2.VideoCapture(videoName)
if (cap.isOpened() == False):
print("error opening video feed")
return
(success, self.img) = cap.read()
(self.height, self.width) = self.img.shape[:2]
fps = FPS().start()
while success:
self.processFrame()
cv2.namedWindow("Output", cv2.WINDOW_NORMAL)
cv2.imshow("Output", self.img)
cv2.resizeWindow('Output', 900, 900)
key = cv2.waitKey(1) & 0xFF
if key == ord("q"):
break
fps.update()
(success, self.img) = cap.read()
fps.stop()
print("Elapsed time: {:.2f}".format(fps.elapsed()))
print("FPS: {:.2f}".format(fps.fps()))
cap.release()
cv2.destroyAllWindows()
def processFrame(self):
blob = cv2.dnn.blobFromImage(self.img, 1.0, (300, 300), (104.0, 177.0, 123.0), swapRB = False, crop = False)
self.faceModel.setInput(blob)
predictions = self.faceModel.forward()
for i in range(0, predictions.shape[2]):
if predictions[0, 0, i, 2] > 0.5:
bbox = predictions[0, 0, i, 3:7] * np.array([self.width, self.height, self.width, self.height])
(xmin, ymin, xmax, ymax) = bbox.astype("int")
cv2.rectangle(self.img, (xmin, ymin), (xmax, ymax), (0, 0, 255), 2)
detector = Detector(use_cuda = True)
detector.processVideo("test.mp4")
Related
I am implementing an object detection model using a YOLO algorithm with PyTorch and OpenCV. Running my model on a single video works fine. But whenever I am trying to use multiprocessing for testing more videos at once it is freezing. Can you please explain what is wrong with this code ??
import torch
import cv2
import time
from multiprocessing import Process
model = torch.hub.load('ultralytics/yolov5', 'custom', path='runs/best.pt', force_reload=True)
def detectObject(video,name):
cap = cv2.VideoCapture(video)
while cap.isOpened():
pTime = time.time()
ret, img = cap.read()
cTime = time.time()
fps = str(int(1 / (cTime - pTime)))
if img is None:
break
else:
results = model(img)
labels = results.xyxyn[0][:, -1].cpu().numpy()
cord = results.xyxyn[0][:, :-1].cpu().numpy()
n = len(labels)
x_shape, y_shape = img.shape[1], img.shape[0]
for i in range(n):
row = cord[i]
# If score is less than 0.3 we avoid making a prediction.
if row[4] < 0.3:
continue
x1 = int(row[0] * x_shape)
y1 = int(row[1] * y_shape)
x2 = int(row[2] * x_shape)
y2 = int(row[3] * y_shape)
bgr = (0, 255, 0) # color of the box
classes = model.names # Get the name of label index
label_font = cv2.FONT_HERSHEY_COMPLEX # Font for the label.
cv2.rectangle(img, (x1, y1), (x2, y2), bgr, 2) # Plot the boxes
cv2.putText(img, classes[int(labels[i])], (x1, y1), label_font, 2, bgr, 2)
cv2.putText(img, f'FPS={fps}', (8, 70), label_font, 3, (100, 255, 0), 3, cv2.LINE_AA)
img = cv2.resize(img, (700, 700))
cv2.imshow(name, img)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
Videos = ['../Dataset/Test1.mp4','../Dataset/Test2.mp4']
for i in Videos:
process = Process(target=detectObject, args=(i, str(i)))
process.start()
Every time I run that code it freezes.
Here is the output :
Downloading: "https://github.com/ultralytics/yolov5/archive/master.zip" to /home/com/.cache/torch/hub/master.zip
YOLOv5 🚀 2022-6-27 Python-3.9.9 torch-1.11.0+cu102 CPU
Fusing layers...
YOLOv5s summary: 213 layers, 7023610 parameters, 0 gradients
Adding AutoShape...
I got it to work by adding torch multiprocessing code.
from torch.multiprocessing import Pool, Process, set_start_method
try:
set_start_method('spawn', force=True)
except RuntimeError:
pass
videos = ['videos/video1.mp4', 'videos/video2.mp4']
for i in videos:
process = Process(target=detectObject, args=(i, str(i)))
process.start()
I was able to run on multiple videos at once this way.
Description:
I want to create a people counter using DNN. The model I'm using is MobileNetSSD. The camera I use is IPCam from Hikvision. Python communicates with IPCam using the RSTP protocol.
The program that I made is good and there are no bugs, when running the sample video the program does its job well. But when I replaced it with IPcam there was an unknown error.
Error:
Sometimes the error is:
[h264 # 000001949f7adfc0] error while decoding MB 13 4, bytestream -6
[h264 # 000001949f825ac0] left block unavailable for requested intra4x4 mode -1
[h264 # 000001949f825ac0] error while decoding MB 0 17, bytestream 762
Sometimes the error does not appear and the program is killed.
Update Error
After revising the code, I caught the error. The error found is
[h264 # 0000019289b3fa80] error while decoding MB 4 5, bytestream -25
Now I don't know what to do, because the error is not in Google.
Source Code:
Old Code
This is my very earliest code before getting suggestions from the comments field.
import time
import cv2
import numpy as np
import math
import threading
print("Load MobileNeteSSD model")
prototxt = "MobileNetSSD_deploy.prototxt"
model = "MobileNetSSD_deploy.caffemodel"
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person", "pottedplant", "sheep",
"sofa", "train", "tvmonitor"]
net = cv2.dnn.readNetFromCaffe(prototxt, model)
pos_line = 0
offset = 50
car = 0
detected = False
check = 0
prev_frame_time = 0
def detect():
global check, car, detected
check = 0
if(detected == False):
car += 1
detected = True
def center_object(x, y, w, h):
cx = x + int(w / 2)
cy = y + int(h / 2)
return cx, cy
def process_frame_MobileNetSSD(next_frame):
global car, check, detected
rgb = cv2.cvtColor(next_frame, cv2.COLOR_BGR2RGB)
(H, W) = next_frame.shape[:2]
blob = cv2.dnn.blobFromImage(next_frame, size=(300, 300), ddepth=cv2.CV_8U)
net.setInput(blob, scalefactor=1.0/127.5, mean=[127.5, 127.5, 127.5])
detections = net.forward()
for i in np.arange(0, detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > 0.5:
idx = int(detections[0, 0, i, 1])
if CLASSES[idx] != "person":
continue
label = CLASSES[idx]
box = detections[0, 0, i, 3:7] * np.array([W, H, W, H])
(startX, startY, endX, endY) = box.astype("int")
center_ob = center_object(startX, startY, endX-startX, endY-startY)
cv2.circle(next_frame, center_ob, 4, (0, 0, 255), -1)
if center_ob[0] < (pos_line+offset) and center_ob[0] > (pos_line-offset):
# car+=1
detect()
else:
check += 1
if(check >= 5):
detected = False
cv2.putText(next_frame, label+' '+str(round(confidence, 2)),
(startX, startY-10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
cv2.rectangle(next_frame, (startX, startY),
(endX, endY), (0, 255, 0), 3)
return next_frame
def PersonDetection_UsingMobileNetSSD():
cap = cv2.VideoCapture()
cap.open("rtsp://admin:Admin12345#192.168.100.20:554/Streaming/channels/2/")
global car,pos_line,prev_frame_time
frame_count = 0
while True:
try:
time.sleep(0.1)
new_frame_time = time.time()
fps = int(1/(new_frame_time-prev_frame_time))
prev_frame_time = new_frame_time
ret, next_frame = cap.read()
w_video = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
h_video = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
pos_line = int(h_video/2)-50
if ret == False: break
frame_count += 1
cv2.line(next_frame, (int(h_video/2), 0),
(int(h_video/2), int(h_video)), (255, 127, 0), 3)
next_frame = process_frame_MobileNetSSD(next_frame)
cv2.rectangle(next_frame, (248,22), (342,8), (0,0,0), -1)
cv2.putText(next_frame, "Counter : "+str(car), (250, 20),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
cv2.putText(next_frame, "FPS : "+str(fps), (0, int(h_video)-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
cv2.imshow("Video Original", next_frame)
# print(car)
except Exception as e:
print(str(e))
if cv2.waitKey(1) & 0xFF == ord('q'):
break
print("/MobileNetSSD Person Detector")
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
t1 = threading.Thread(PersonDetection_UsingMobileNetSSD())
t1.start()
New Code
I have revised my code and the program still stops taking frames. I just revised the PersonDetection_UsingMobileNetSSD() function. I've also removed the multithreading I was using. The code has been running for about 30 minutes but after a broken frame, the code will never re-execute the program block if ret == True.
def PersonDetection_UsingMobileNetSSD():
cap = cv2.VideoCapture()
cap.open("rtsp://admin:Admin12345#192.168.100.20:554/Streaming/channels/2/")
global car,pos_line,prev_frame_time
frame_count = 0
while True:
try:
if cap.isOpened():
ret, next_frame = cap.read()
if ret:
new_frame_time = time.time()
fps = int(1/(new_frame_time-prev_frame_time))
prev_frame_time = new_frame_time
w_video = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
h_video = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
pos_line = int(h_video/2)-50
# next_frame = cv2.resize(next_frame,(720,480),fx=0,fy=0, interpolation = cv2.INTER_CUBIC)
if ret == False: break
frame_count += 1
cv2.line(next_frame, (int(h_video/2), 0),
(int(h_video/2), int(h_video)), (255, 127, 0), 3)
next_frame = process_frame_MobileNetSSD(next_frame)
cv2.rectangle(next_frame, (248,22), (342,8), (0,0,0), -1)
cv2.putText(next_frame, "Counter : "+str(car), (250, 20),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
cv2.putText(next_frame, "FPS : "+str(fps), (0, int(h_video)-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
cv2.imshow("Video Original", next_frame)
# print(car)
else:
print("Crashed Frame")
else:
print("Cap is not open")
except Exception as e:
print(str(e))
if cv2.waitKey(1) & 0xFF == ord('q'):
break
print("/MobileNetSSD Person Detector")
cap.release()
cv2.destroyAllWindows()
Requirement:
Hardware : Intel i5-1035G1, RAM 8 GB, NVIDIA GeForce MX330
Software : Python 3.6.2 , OpenCV 4.5.1, Numpy 1.16.0
Question:
What should i do for fixing this error?
What causes this to happen?
Best Regards,
Thanks
The main problem here is that RSTP always has some corrupted frames in it. The solution is to run video capture on thread 1 and video processing on thread 2.
As an example:
import cv2
import threading
import queue
q=queue.Queue()
def this_receive(q):
cap = cv2.VideoCapture("rtsp://admin:Admin12345#192.168.10.20:554/Streaming/channels/2/")
cap.set(cv2.CAP_PROP_FPS, 5)
ret, next_frame = cap.read()
q.put(next_frame)
while ret:
ret, next_frame = cap.read()
w_video = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
h_video = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
q.put(next_frame)
def main_program(q):
while True:
try:
if q.empty() != True:
next_frame=q.get()
except Exception as e:
print(str(e))
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if __name__ == "__main__":
print("Main Program")
p2 = threading.Thread(target=this_receive,args=((q),))
p2.start()
p1 = threading.Thread(target=main_program,args=((q),))
p1.start()
This example will work according to the case you are experiencing. Damage to the frame will not affect the quality of data processing. It's just that this method can cause delays in processing. Time on video and real time have a delay of up to 10 minutes. Want to know what kind of delay? Just try it!
I'm making a pose estimation script for my game. However, it's working at 20-30 fps and not using the whole CPU even if there is no fps limit. It's not using whole GPU too. Can someone help me?
Here is resource usage while playing a dance video: https://imgur.com/a/6yI2TWg
Here is my code:
import cv2
import mediapipe as mp
import time
inFile = '/dev/video0'
capture = cv2.VideoCapture(inFile)
FramesVideo = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) # Number of frames inside video
FrameCount = 0 #Â Currently playing frame
prevTime = 0
# some objects for mediapipe
mpPose = mp.solutions.pose
mpDraw = mp.solutions.drawing_utils
pose = mpPose.Pose()
while True:
FrameCount += 1
#read image and convert to rgb
success, img = capture.read()
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#process image
results = pose.process(imgRGB)
if results.pose_landmarks:
mpDraw.draw_landmarks(img, results.pose_landmarks, mpPose.POSE_CONNECTIONS)
#get landmark positions
landmarks = []
for id, lm in enumerate(results.pose_landmarks.landmark):
h, w, c = img.shape
cx, cy = int(lm.x * w), int(lm.y * h)
cv2.putText(img, str(id), (cx,cy), cv2.FONT_HERSHEY_PLAIN, 1, (255,0,0), 1)
landmarks.append((cx,cy))
#Â calculate and print fps
frameTime = time.time()
fps = 1/(frameTime-prevTime)
prevTime = frameTime
cv2.putText(img, str(int(fps)), (30,50), cv2.FONT_HERSHEY_PLAIN, 3, (255,0,0), 3)
#show image
cv2.imshow('Video', img)
cv2.waitKey(1)
if FrameCount == FramesVideo-1:
capture.release()
cv2.destroyAllWindows()
break
Set the model_complexity of mp.Pose to 0.
As the documentation states:
MODEL_COMPLEXITY
Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as inference latency generally go up with the model complexity. Default to 1.
This is the best solution I've found, also use this.
So I wanted to test performance difference between python-opencv library and newest opencv compiled, on a raspberry pi 4 board. After this change cv2.resize() stopped working in my script and only outputs max resolution from my webcam. I also tried imutils library but without success.I tried using both:
cv2.CAP_PROP_FRAME_WIDTH
cv2.CAP_PROP_FRAME_HEIGHT
But I only get a resized window not frame
Additionally I get this error
GStreamer warning:Cannot query video position: status=0, value=-1, duration=-1
What have I missed?
Update:
Minimal code
import cv2
from imutils.video import FPS
cap = cv2.VideoCapture(0)
#cap.set(cv2.CAP_PROP_FRAME_WIDTH,960)
#cap.set(cv2.CAP_PROP_FRAME_HEIGHT,540)
fps = FPS().start()
font = cv2.FONT_HERSHEY_DUPLEX
while cap.isOpened():
ret, frame = cap.read()
small_frame = cv2.resize(frame, (0, 0), fx=0.75, fy=0.75)
fps.update()
fps.stop()
cv2.putText(small_frame,"FPS {:.1f}".format(fps.fps()),
(10,30),font, 1.0, (255, 255, 255), 1)
cv2.imshow("Frame",small_frame)
key = cv2.waitKey(1)
if key == ord('q'):
break
if key == ord('p'):
cv2.waitKey(-1)
cap.release()
cv2.destroyAllWindows()
you are using scale and dimentions 0 togheter
try this:
import cv2
img = cv2.imread('/home/img/python.png', cv2.IMREAD_UNCHANGED)
print('Original Dimensions : ',img.shape)
scale_percent = 60 # percent of original size
width = int(img.shape[1] * scale_percent / 100)
height = int(img.shape[0] * scale_percent / 100)
dim = (width, height)
# resize image
resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
print('Resized Dimensions : ',resized.shape)
cv2.imshow("Resized image", resized)
cv2.waitKey(0)
cv2.destroyAllWindows()
In this script below I am experimenting around with OpenCV and calculating a distance to my laptop webcam of a face detection with Haar Cascades. I am using a windows 10 laptop with a web cam, Python 3.6, and OpenCV 3.4.
I am having an issue with the OpenCV.putext of displaying this calculated value on the view of the video stream…
text = "Inches{}".format(np.int(inches))
cv2.putText(gray, text, (roi[0] - 10, roi[1] - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
The code will run if this is commented out.. Any tips for what I am doing wrong would be greatly appreciated!
import numpy as np
import imutils
import cv2
from imutils.video import VideoStream
from imutils.video import FPS
import time
def distance_to_camera(knownWidth, focalLength, perWidth):
# compute and return the distance from the maker to the camera
return (knownWidth * focalLength) / perWidth
face_cascade = cv2.CascadeClassifier('C:/Users/Haar/frontalFace10/haarcascade_frontalface_alt.xml')
#Calculated from a different script
focalLength = 709.0909090909091
#average human head width
knownWidth = 7
# Initialize mutithreading the video stream.
camera = VideoStream(src=0).start()
# Allow the camera to warm up.
time.sleep(2.0)
#start FPS
fps = FPS().start()
roi = None
while True:
image = camera.read()
image = imutils.resize(image, width=500)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5);
for (x, y, w, h) in faces:
cv2.rectangle(gray,(x,y),(x+w,y+h),(255,255,255),2)
roi = gray[y:y+h, x:x+w]
if roi is None:
pass
else:
inches = distance_to_camera(knownWidth, focalLength, roi.shape[1])
print(inches)
text = "Inches{:.2f}".format(np.int(inches))
cv2.putText(gray, text, (roi[0] - 10, roi[1] - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
cv2.imshow("gray", gray)
key = cv2.waitKey(1) & 0xFF
fps.update()
# if the `q` key was pressed, break from the loop
if key == ord("q"):
break
fps.stop()
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
camera.stop()
cv2.destroyAllWindows()
This is the full traceback of the error:
Traceback (most recent call last):
File "C:\Users\distance-to-camera\selectHaar3.py", line 53, in <module>
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
TypeError: only size-1 arrays can be converted to Python scalars
>>>
roi[0] and roi[1] are arrays.
I think wat you are trying to do is:
cv2.putText(gray, text, (x - 10, y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)