OpenCV Python: Using multithreading with VideoCapture and VideoWriter - python

I am labeling some recordings with quite a few objects, I notice that when I do this in the naieve OpenCV manner, the video export takes ~real-time (hour long video = hour long export). I'd like to accelerate this process, and I was looking at some forums and some code examples about how to use a separate thread to read the frames and then use the main thread to do the frame processing. When I try this, however, the exported video is way too fast, and my added labels/annotations do not sync with the actual recording. Furthermore, even though I set the export frame rate the same as the input video (30fps), it seems that only my added annotations follow it. Here's my code, the multiprocessing class was adapted from (https://github.com/gilbertfrancois/video-capture-async), I took their threadlock idea, but it did not do anything for me.
class Threaded_VidCapture(object):
def __init__(self, videoSrc):
#Constructor parameters
self.VidIn = cv2.VideoCapture(videoSrc)
self.Terminate = False
self.Ret, self.Frame = self.VidIn.read()
#Video Parameters
self.FPS = int(self.VidIn.get(cv2.CAP_PROP_FPS))
self.Resolution = (
int(self.VidIn.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(self.VidIn.get(cv2.CAP_PROP_FRAME_HEIGHT))
)
self.FrameCount = int(self.VidIn.get(cv2.CAP_PROP_FRAME_COUNT))
self.ThreadLock = threading.Lock()
def startProcess(self):
self.ThreadStart = Thread(target=self.retrieveFrames, args=())
self.ThreadStart.daemon = True
self.ThreadStart.start()
return(self)
def retrieveFrames(self):
while self.Terminate is False:
Ret, Frame = self.VidIn.read()
with self.ThreadLock:
self.Ret = Ret
self.Frame = Frame
def read(self):
with self.ThreadLock:
return(self.Frame.copy(), self.Ret)
def TerminateProgram(self):
self.Terminate = True
self.ThreadStart.join()
self.VidIn.release()
def annotateVideo_wrapper(RotationalCounts, Coords_FromLabel, Coords_ToLabel,
videoFile, videoExport, **kwargs):
GetFrames = Threaded_VidCapture(videoSrc=videoFile).startProcess()
Write = cv2.VideoWriter_fourcc(*"XVID")
Export = cv2.VideoWriter(videoExport, Write, GetFrames.FPS, GetFrames.Resolution)
def labelAnnotation(Frame, Text, Position, Color_3ChannelRGB, **kwargs):
ScaleKwargs = dict([(key, vals) for key, vals in kwargs.items()])
#Base parameters from CV2 website
Font = cv2.FONT_HERSHEY_PLAIN
if len(ScaleKwargs) == 0:
Scale = 2
else:
Scale = ScaleKwargs["Scale"]
Color = (0,0,0)#Black
Thickness = cv2.FILLED
Margin = 5
TextSize = cv2.getTextSize(text=str(Text), fontFace=Font, fontScale=Scale, thickness=Thickness)
EndX = Position[0] + TextSize[0][0] + Margin
EndY = Position[1] - TextSize[0][1] - Margin
cv2.rectangle(Frame, Position, (EndX, EndY), Color_3ChannelRGB, Thickness)
cv2.putText(Frame, str(Text), Position, Font, Scale, Color, 1, cv2.LINE_AA)
if kwargs:
HashMap = {keys: Val for keys, Val in kwargs.items()}
try:
Graphs = Graph.Live_movementTrace(
DataFrame=HashMap["DataFrame"],
Label=HashMap["Label"],
RotationMetaData=HashMap["RotationMetaData"],
IndexLimit=HashMap["IndexLimit"]
)
except KeyError:
raise(KeyError("Missing arguments, re-run the program with the correct input or leave this function blank"))
fig = mp.figure()
fig.suptitle("Live Movement Plot", fontweight="bold", fontsize=14)
Hash = {"CW_x":[], "CW_y":[], "CCW_x":[], "CCW_y":[]}
line1, = mp.plot(Hash["CW_x"], Hash["CW_y"], color = "blue")
line2, = mp.plot(Hash["CCW_x"], Hash["CCW_y"], color = "red")
mp.xlim(0, GetFrames.Resolution[0])
mp.ylim(0, GetFrames.Resolution[1])
ax = mp.gca()
ax.invert_yaxis()
else:
Graphs = []
Ind=0
ProgressBar = tqdm(total = len(Coords_FromLabel))
while(Ind < GetFrames.FrameCount):
Frame = GetFrames.read()[0]
####################
#Add Coordinate System
####################
cv2.line(Frame, (int(Coords_FromLabel[Ind][0]), int(Coords_FromLabel[Ind][1])), (int(Coords_FromLabel[Ind][0]), 0), (0, 0, 0), 5, 8, 0)#MinY
cv2.line(Frame, (int(Coords_FromLabel[Ind][0]), int(Coords_FromLabel[Ind][1])), (int(Coords_FromLabel[Ind][0]), GetFrames.Resolution[1]), (255, 0, 0), 5, 8, 0)#MaxY
cv2.line(Frame, (int(Coords_FromLabel[Ind][0]), int(Coords_FromLabel[Ind][1])), (0, int(Coords_FromLabel[Ind][1])), (0, 255, 0), 5, 8, 0)#MinX
cv2.line(Frame, (int(Coords_FromLabel[Ind][0]), int(Coords_FromLabel[Ind][1])), (GetFrames.Resolution[0], int(Coords_FromLabel[Ind][1])), (255, 255, 255), 5, 8, 0)#MaxX
####################
#Add Coutners
####################
labelAnnotation(Frame, f"CW: {round(RotationalCounts[0][Ind], 3)}", (20, 100), (255, 255, 255))
labelAnnotation(Frame, f"CCW: {round(RotationalCounts[1][Ind], 3)}", (20, 130), (0, 0, 255))
if Ind < 10000:
labelAnnotation(Frame, f"Frame Num: {Ind}", (20, 160), (100, 100, 100))
elif Ind >= 10000:
labelAnnotation(Frame, f"Frame Num: {Ind}", (20, 160), (100, 100, 100), Scale = 1.5)
####################
#Add Vectors
####################
cv2.line(Frame, (int(Coords_FromLabel[Ind][0]), int(Coords_FromLabel[Ind][1])), (int(Coords_ToLabel[Ind][0]), int(Coords_ToLabel[Ind][1])), (0, 0, 255), 5, 8, 0)
if len(Graphs) != 0:
if ((Ind in Graphs[3]) and (Ind != 0)):
for keys in Hash:
Hash[keys].clear()
Hash["CW_x"].append(Graphs[0][f'{HashMap["Label"]}_x'][Ind])
Hash["CW_y"].append(Graphs[0][f'{HashMap["Label"]}_y'][Ind])
line1.set_xdata(Hash["CW_x"])
line1.set_ydata(Hash["CW_y"])
Hash["CCW_x"].append(Graphs[1][Ind])
Hash["CCW_y"].append(Graphs[2][Ind])
line2.set_xdata(Hash["CCW_x"])
line2.set_ydata(Hash["CCW_y"])
fig.canvas.draw()
img = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
img = img.reshape(fig.canvas.get_width_height()[::-1] + (3,))
img = cv2.cvtColor(img,cv2.COLOR_RGB2BGR)
#Needs to be a variable offset
x_offset = GetFrames.Resolution[0] - 450
y_offset = GetFrames.Resolution[1] - 340
# print(Frame.shape)
x_end = x_offset + img.shape[1]
y_end = y_offset + img.shape[0]
Frame[y_offset:y_end, x_offset:x_end] = img
Export.write(Frame)
#if HashMap["ShowFrame"] is True:
#cv2.imshow("Frame", Frame)
cv2.waitKey(1) & 0xFF
# if ((cv2.waitKey(1) & 0xFF == ord("q")) or (GetFrames.Terminate is True)):
# GetFrames.TerminateProgram()
# break
ProgressBar.update(1)
Ind += 1
Export.release()
cv2.destroyAllWindows()
GetFrames.TerminateProgram()
There's a lot that I'm adding to each frame, and again this works fine in just the naive approach, I get about 23 it/s as measured with tqdm, which is close to FPS mark. I'd like to be above 30 it/s, but I don't know if that's possible.

Related

How do I clock the user action?

I follow a simple head pose estimation tutorial in python, I try to make some modification in the code but I got stuck for days now, I just want to know how long the user been looking to the left or right, if the user is detected to be looking to the left or right for a long time let say 2-3 mins, then the program should give a warning or print a simple message saying how long his/her been looking to the left or right. How do I achieve this? any ideas how to do this?
sorry for my bad english
any help will be appreciated :)
here's my code:
import cv2
import mediapipe as mp
import numpy as np
import time
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
cap = cv2.VideoCapture(0)
while cap.isOpened():
success, image = cap.read()
image = cv2.resize(image, (780, 350))
image = cv2.flip(image, 1)
#start = time.time()
# Flip the image horizontally for a later selfie-view display
# Also convert the color space from BGR to RGB
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
# To improve performance
image.flags.writeable = False
# Get the result
results = face_mesh.process(image)
# To improve performance
image.flags.writeable = True
# Convert the color space from RGB to BGR
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
img_h, img_w, img_c = image.shape
face_3d = []
face_2d = []
if results.multi_face_landmarks:
for face_landmarks in results.multi_face_landmarks:
for idx, lm in enumerate(face_landmarks.landmark):
if idx == 33 or idx == 263 or idx == 1 or idx == 61 or idx == 291 or idx == 199:
if idx == 1:
nose_2d = (lm.x * img_w, lm.y * img_h)
nose_3d = (lm.x * img_w, lm.y * img_h, lm.z * 3000)
x, y = int(lm.x * img_w), int(lm.y * img_h)
# Get the 2D Coordinates
face_2d.append([x, y])
# Get the 3D Coordinates
face_3d.append([x, y, lm.z])
# Convert it to the NumPy array
face_2d = np.array(face_2d, dtype=np.float64)
# Convert it to the NumPy array
face_3d = np.array(face_3d, dtype=np.float64)
# The camera matrix
focal_length = 1 * img_w
cam_matrix = np.array([ [focal_length, 0, img_h / 2],
[0, focal_length, img_w / 2],
[0, 0, 1]])
# The distortion parameters
dist_matrix = np.zeros((4, 1), dtype=np.float64)
# Solve PnP
success, rot_vec, trans_vec = cv2.solvePnP(face_3d, face_2d, cam_matrix, dist_matrix)
# Get rotational matrix
rmat, jac = cv2.Rodrigues(rot_vec)
# Get angles
angles, mtxR, mtxQ, Qx, Qy, Qz = cv2.RQDecomp3x3(rmat)
# Get the y rotation degree
x = angles[0] * 360
y = angles[1] * 360
z = angles[2] * 360
# See where the user's head tilting
if y < -10:
# i want to know how long this guy been looking to his left or right or up or down
text = "Looking Left"
elif y > 10:
text = "Looking Right"
elif x < -10:
text = "Looking Down"
elif x > 10:
text = "Looking Up"
else:
text = "Forward"
# Display the nose direction
nose_3d_projection, jacobian = cv2.projectPoints(nose_3d, rot_vec, trans_vec, cam_matrix, dist_matrix)
p1 = (int(nose_2d[0]), int(nose_2d[1]))
p2 = (int(nose_2d[0] + y * 10) , int(nose_2d[1] - x * 10))
cv2.line(image, p1, p2, (255, 0, 0), 3)
# Add the text on the image
cv2.putText(image, text, (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 2)
cv2.putText(image, "x: " + str(np.round(x,2)), (500, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
cv2.putText(image, "y: " + str(np.round(y,2)), (500, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
cv2.putText(image, "z: " + str(np.round(z,2)), (500, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
# need some fix here
# end = time.time()
# totalTime = end - start
# fps = 1 / totalTime
#print("FPS: ", fps)
#cv2.putText(image, f'FPS: {int(fps)}', (20,450), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)
mp_drawing.draw_landmarks(
image=image,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACEMESH_CONTOURS,
landmark_drawing_spec=drawing_spec,
connection_drawing_spec=drawing_spec)
cv2.imshow('Head Pose Estimation', image)
if cv2.waitKey(5) & 0xFF == 27:
break
cap.release()

Python Hand tracking script crashing with a KeyError: 2 error

I'm testing out a hand tracking volume control script. It runs but when I bring my hand into frame it instantly crashes. I get this error message:
area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) // 100 KeyError: 2
If I comment out area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) // 100. It runs the scipt as normal but doesn't give me the volume controlling function.
Here is the complete code:
import cv2
import time
import numpy as np
import HandTrackingModule as htm
import math
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
################################
wCam, hCam = 640, 480
################################
cap = cv2.VideoCapture(0)
cap.set(3, wCam)
cap.set(4, hCam)
pTime = 0
detector = htm.HandDetector(detectionCon=0.7, maxHands=1)
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(
IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))
# volume.GetMute()
# volume.GetMasterVolumeLevel()
volRange = volume.GetVolumeRange()
minVol = volRange[0]
maxVol = volRange[1]
vol = 0
volBar = 400
volPer = 0
area = 0
colorVol = (255, 0, 0)
while True:
success, img = cap.read()
# Find hands
img = detector.findHands(img)
lmList, bbox = detector.findPosition(img, draw=True)
if len(lmList) != 0:
# Filter based on size
area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) // 100
#print(area)
if 250 < area < 1000:
#print("Yes")
# Find Disteance between index and thumb
length, img, lineInfo = detector.findDistance(4, 8, img)
#print(length)
# Convert volume
volBar = np.interp(length, [50, 200], [400, 150])
volPer = np.interp(length, [50, 200], [0, 100])
# Reduce reselution to make it smoother
smoothness = 10
volPer = smoothness * round(volPer / smoothness)
# Check fingers for up
fingers = detector.fingersUp()
print(fingers)
# if pinky is down set volume
if not fingers[4]:
volume.SetMasterVolumeLevelScalar(volPer / 100, None)
cv2.circle(img, (lineInfo[4], lineInfo[5]), 15, (0, 225, 0), cv2.FILLED)
colorVol = (225, 0 ,0)
else:
colorVol = (0, 255, 0)
# drawings
cv2.rectangle(img, (50, 150), (85, 400), (225, 0, 0), 3)
cv2.rectangle(img, (50, int(volBar)), (85, 400), (225, 0, 0), cv2.FILLED)
cv2.putText(img, f' {int(volPer)} %', (40, 450), cv2.FONT_HERSHEY_COMPLEX, 1, (225, 0, 0), 3)
cVol = int(volume.GetMasterVolumeLevelScalar() * 100)
cv2.putText(img, f'Vol Set: {int(cVol)}', (400, 50), cv2.FONT_HERSHEY_COMPLEX, 1, colorVol, 3)
# Frame rate
cTime = time.time()
fps = 1/(cTime - pTime)
pTime = cTime
cv2.putText(img, f'FPS: {int(fps)}', (40, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 0), 3)
cv2.imshow("image", img)
cv2.waitKey(1)
I've looked everywhere but came up empty handed.
If you know how to fix this please let me know.
I assume HandTrackingModule is this, and it's been updated since you downloaded it since it no longer has a HandDetector class. It does however have a FindHands class with a getPosition() method. I assume this is equivalent to your detector.findPosition().
The method returns a list of hands, so when you index into it you're selecting which hand you want and indexing with 2 will fail unless there are at least 3 hands. If it has multiple hands then the list will be length > 1.
Rename what you've currently called bbox to bboxes, since it's plural, then do:
areas = [
(bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) // 100
for bbox in bboxes
]
for area in areas:
print(area)

How to solve Python webcam crash. I'm using webcam to count the number of Person/Car/Truck that passed an area

I want to make a Person/Car/Truck count program using Python and Pycharm using webcam.
The video source is from a webcam (private link).
When the 'Person' button (on the left side) is clicked, from that moment, everytime a person walk accross the red line (green line is the starting parameter), the program will count it as 1 person and continue counting until the same
Person' button (on the left side) is clicked. Same goes for 'Car' button and 'Truck' button. The total count for Car and Truck is also shown at the window.
I already have a working code for to do this but whenever I run the program (py file), for unknown reason, it will crash/stop few moments later around 15 seconds to 30 seconds from its start. The error is:
Traceback (most recent call last):
File "C:/Users/admin/Desktop/Fadhli/License Plate Recognition/Mark1/mark5_1.py", line 110, in <module>
frame = frame2.copy()
AttributeError: 'NoneType' object has no attribute 'copy'
Process finished with exit code 1
I tested the same code but I change the coding to use my own webcam and it works fine and didn't crash.
I suspect the problem is because there's some filter from the private webcam or something that causes the program to stuck and then crash.
My question is, how do I either:
A) add buffer or something like that, so that the program doesn't hang or
B) altogether bypass the filter from that private camera so that
the program doesn't crash.
My coding:
import cv2
import numpy as np
# Resolution Pixel Size
# 640 x 480 - Standard Definition(SD)
# 1280 x 720 - High Definition(HD)
# 1920 x 1080 - Ultra High Definiion(HD+)
# 3840 x 2160 - Ultra High Definition(UHD or 2K)
cyanColor = (255, 255, 0)
pinkColor = (255, 0, 255)
yellowColor = (0, 255, 255)
greenColor = (0, 255, 0)
blueColor = (255, 0, 0)
redColor = (0,0,255)
path_vid = "Resources/video/license_plate.mp4"
path_main_ent= 'rtsp://admin:~~~Streaming/Channels/101'
path_parking_Lot = 'rtsp://admin:~~~/Streaming/Channels/101'
button_person = False
button_car = False
button_truck = False
counter = 0
nmsThreshold = 0.3
confThreshold = 0.4
def rescale_frame(image, percentage):
width = int(image.shape[1] * percentage / 100)
height = int(image.shape[0] * percentage / 100)
new_frame = (width, height)
return cv2.resize(image, new_frame, interpolation=cv2.INTER_AREA)
def click_button(event,x,y,flags,params):
global button_person
global button_car
global button_truck
if event == cv2.EVENT_LBUTTONDOWN:
print(x,y)
# ------ Person Button Clicking ---------
polygon_person = np.array([[(20, 160), (200, 160), (200, 230), (20, 230)]])
is_inside_person_button = cv2.pointPolygonTest(polygon_person,(x,y),False)
if is_inside_person_button>0:
print("You've clicked a button",x,y)
if button_person is False:
button_person = True
else:
button_person = False
print("Now Person Button is: ",button_person)
# ------ Car Button Clicking ---------
polygon_car = np.array([[(20, 250), (200, 250), (200, 320), (20, 320)]])
is_inside_car_button = cv2.pointPolygonTest(polygon_car, (x, y), False)
if is_inside_car_button > 0:
print("You've clicked a button", x, y)
if button_car is False:
button_car = True
else:
button_car = False
print("Now Car Button is: ", button_car)
# ------ Truck Button Clicking ---------
polygon_truck = np.array([[(20, 340), (200, 340), (200, 410), (20, 410)]])
is_inside_truck_button = cv2.pointPolygonTest(polygon_truck, (x, y), False)
if is_inside_truck_button > 0:
print("You've clicked a button", x, y)
if button_truck is False:
button_truck = True
else:
button_truck = False
print("Now Truck Button is: ", button_truck)
# net = cv2.dnn.readNet("dnn_model/yolov3.weights", "dnn_model/yolov3.cfg")
# net = cv2.dnn.readNet("dnn_model/yolov3-spp.weights", "dnn_model/yolov3-spp.cfg")
# net = cv2.dnn.readNet("dnn_model/yolov3-tiny.weights", "dnn_model/yolov3-tiny.cfg")
# net = cv2.dnn.readNet("dnn_model/yolov4.weights", "dnn_model/yolov4.cfg")
net = cv2.dnn.readNet("dnn_model/yolov4-tiny.weights", "dnn_model/yolov4-tiny.cfg")
model = cv2.dnn_DetectionModel(net)
model.setInputParams(size=(640,480), scale=1/255)
classes = []
with open("dnn_model/classes.txt","r") as file_object:
for class_name in file_object.readlines():
class_name = class_name.strip()
classes.append(class_name)
# print("- Object List -")
# print(classes[0])
cap = cv2.VideoCapture(path_main_ent)
# cap = cv2.VideoCapture(path_parking_Lot)
cv2.namedWindow("Frame") # The name should be same with cv2.imshow("_Name_")
cv2.setMouseCallback("Frame",click_button)
ret,frame1 = cap.read()
while cap.isOpened():
ret,frame2 = cap.read()
frame = frame2.copy()
# print(type(frame))
if not ret:
break
img_frame_90 = rescale_frame(frame, 90)
line_frame_above = cv2.line(img_frame_90, (390, 430), (1220, 470), yellowColor, 2)
line_frame = cv2.line(img_frame_90, (380, 440), (1220, 480), blueColor, 4)
line_frame_bottom = cv2.line(img_frame_90, (370, 450), (1220, 490), yellowColor, 2)
polygon_person = np.array([[(20, 160), (200, 160), (200, 230), (20, 230)]])
cv2.fillPoly(img_frame_90, polygon_person, greenColor)
cv2.putText(img_frame_90, "Person", (50, 200), cv2.FONT_HERSHEY_PLAIN, 2, blueColor, 2)
polygon_car = np.array([[(20, 250), (200, 250), (200, 320), (20, 320)]])
cv2.fillPoly(img_frame_90, polygon_car, greenColor)
cv2.putText(img_frame_90, "Car", (50, 290), cv2.FONT_HERSHEY_PLAIN, 2, blueColor, 2)
polygon_truck = np.array([[(20, 340), (200, 340), (200, 410), (20, 410)]])
cv2.fillPoly(img_frame_90, polygon_truck, greenColor)
cv2.putText(img_frame_90, "Truck", (50, 380), cv2.FONT_HERSHEY_PLAIN, 2, blueColor, 2)
(class_ids, scores, bboxes) = model.detect(img_frame_90,nmsThreshold=nmsThreshold, confThreshold=confThreshold)
if len(class_ids) != 0:
for class_id, score, bbox in zip(class_ids,scores,bboxes):
(x,y,w,h) = bbox
class_name = classes[class_id]
xmid = int((x + (x + w)) / 2)
ymid = int((y + (y + h)) / 2)
if class_name == "person" and button_person is True:
cv2.rectangle(img_frame_90, (x, y), (x + w, y + h), pinkColor, 2)
cv2.circle(img_frame_90, (xmid, ymid), 3, redColor, -1)
# cv2.putText(img_frame_90,str(class_name),(x,y-10),cv2.FONT_HERSHEY_SIMPLEX,1,yellowColor,2)
if ymid > 431 and ymid < 507 and xmid > 370 and xmid <490:
line_frame = cv2.line(img_frame_90, (350, 440), (1220, 480), greenColor, 4)
counter += 1
if class_name == "car" and button_car is True:
cv2.rectangle(img_frame_90, (x, y), (x + w, y + h), greenColor, 2)
cv2.circle(img_frame_90, (xmid, ymid), 3, redColor, -1)
# cv2.putText(img_frame_90,str(class_name),(x,y-10),cv2.FONT_HERSHEY_SIMPLEX,1,yellowColor,2)
if ymid > 431 and ymid < 507:
line_frame = cv2.line(img_frame_90, (350, 440), (1220, 480), greenColor, 4)
counter += 1
if class_name == "truck" and button_truck is True:
cv2.rectangle(img_frame_90, (x, y), (x + w, y + h), cyanColor, 2)
cv2.circle(img_frame_90, (xmid, ymid), 3, redColor, -1)
# cv2.putText(img_frame_90,str(class_name),(x,y-10),cv2.FONT_HERSHEY_SIMPLEX,1,yellowColor,2)
if ymid > 431 and ymid < 507:
line_frame = cv2.line(img_frame_90, (350, 440), (1220, 480), greenColor, 4)
counter += 1
cv2.putText(img_frame_90, 'Total Vehicles : {}'.format(counter), (0, 100), cv2.FONT_HERSHEY_SIMPLEX, 2, yellowColor,2)
frame1 = frame2
cv2.imshow("Frame",img_frame_90)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
#KenY-N as per suggestion, I modified the code (from line 169) to
cv2.putText(img_frame_90, 'Total Vehicles : {}'.format(counter), (0, 100), cv2.FONT_HERSHEY_SIMPLEX, 2, yellowColor,2)
frame1 = frame2
if not frame2:
continue
cv2.imshow("Frame",img_frame_90)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
print(ret)
cap.release()
cv2.destroyAllWindows()
The result is still crash and error:
True
''
''
''
True
Traceback (most recent call last):
File "C:/Users/admin/Desktop/Fadhli/License Plate Recognition/Mark1/mark5_1.py", line 172, in <module>
if not frame2:
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
Process finished with exit code 1
Here's the problem:
ret,frame2 = cap.read()
frame = frame2.copy()
# print(type(frame))
if not ret:
break
This code tries to copy frame2 before checking whether the capture succeeded. Just rearrange the operations to put the check first:
ret,frame2 = cap.read()
# print(type(frame))
if not ret:
break
frame = frame2.copy()
Of course break will break out of the loop on the first capture failure, so you might want to use continue instead.
Try this solution for buffer issue. This uses the latest frame.
And to solve the error, try using:
ret,frame2 = cap.read()
if not ret:
continue

OpenCV Memory and CPU Usage

I'm making a USV (Unmanned Surface Vehicle) for my bachelor project. The track it needs to keep is made by having coloured buoys on the left/right side of the track and for obstacles.
So I need to track the depth of these objects and give all that information on to my navigation program.
And I have made a Python code using ROS and OpenCV to track these buoys with a ZED2 camera. But I'm having CPU and memory issues. Where the ubuntu desktop starts to lag.
Using a Nvidia Jetson Xavier NX and I’m using 85% of the CPU and 5,5+/7.59Gb Memory.
Anyone interested in looking over my code and see if I'm doing something stupid. That would explain my issues.
from __future__ import print_function
import roslib
import sys
import rospy
import cv2
from main.msg import VarRed, VarGreen, VarYellow, RedHSV, GreenHSV, YellowHSV, MidPoint
from sensor_msgs.msg import Image
from cv_bridge import CvBridge, CvBridgeError
import numpy as np
import imutils
import time
from collections import deque
import math
class image_converter:
def __init__(self):
self.image_subd = rospy.Subscriber("/zed2/zed_node/depth/depth_registered",Image,self.callbackDepth)
self.image_sub = rospy.Subscriber("/zed2/zed_node/rgb_raw/image_raw_color",Image,self.callbackVideo)
self.image_pub = rospy.Publisher("/Tracking/RG_image", Image, queue_size = 1)
self.RedHSV_sub = rospy.Subscriber("/Tracking/Red_HSV", RedHSV, self.redHSV)
self.GreenHSV_sub = rospy.Subscriber("/Tracking/Green_HSV", GreenHSV, self.greenHSV)
self.YellowHSV_sub = rospy.Subscriber("/Tracking/Yellow_HSV", YellowHSV, self.yellowHSV)
self.MidPoint_pub = rospy.Publisher("/Tracking/MidPoint", MidPoint, queue_size = 1)
self.red_bridge = CvBridge()
self.red_publisher = rospy.Publisher("/Tracking/red", VarRed, queue_size = 1)
self.green_bridge = CvBridge()
self.green_publisher = rospy.Publisher("/Tracking/green", VarGreen, queue_size = 1)
self.yellow_bridge = CvBridge()
self.yellow_publisher = rospy.Publisher("/Tracking/yellow", VarYellow, queue_size = 1)
self.RedLower = (0, 101, 68) # Declaring the red-specter
self.RedUpper = (15, 255, 255)
self.GreenLower = (75, 145, 48) # Declaring the green-specter
self.GreenUpper = (96, 255, 75)
self.YellowLower = (28, 56, 91) # Declaring the yellow-specter
self.YellowUpper = (51, 152, 150)
self.red_pts = deque(maxlen=14)
self.currentDepthImg=0
self.red_counter = 0
self.red_x = 0
self.red_y = 0
self.red_radius = 30
self.green_pts = deque(maxlen=14)
self.green_currentDepthImg=0
self.green_counter = 0
self.green_x = 0
self.green_y = 0
self.green_radius = 30
self.yellow_pts = deque(maxlen=14)
self.yellow_currentDepthImg=0
self.yellow_counter = 0
self.yellow_x = 0
self.yellow_y = 0
self.yellow_radius = 30
def redHSV(self,msg):
self.RedLower = (msg.r_h_low-10, msg.r_s_low-10, msg.r_v_low-10)
self.RedUpper = (msg.r_h_high+10, msg.r_s_high+10, msg.r_v_high+10)
def greenHSV(self,msg):
self.GreenLower = (msg.g_h_low-10, msg.g_s_low-10, msg.g_v_low-10)
self.GreenUpper = (msg.g_h_high+10, msg.g_s_high+10, msg.g_v_high+10)
def yellowHSV(self,msg):
self.YellowLower = (msg.y_h_low-10, msg.y_s_low-10, msg.y_v_low-10)
self.YellowUpper = (msg.y_h_high+10, msg.y_s_high+10, msg.y_v_high+10)
def callbackDepth(self,msg_depth):
try:
cv_image_depth = self.red_bridge.imgmsg_to_cv2(msg_depth, "32FC1") # CV_Bridge Depth
except CvBridgeError as e:
print(e)
return
self.currentDepthImg=cv_image_depth
try:
a=1
except CvBridgeError as e:
print(e)
return
def callbackVideo(self,data):
try:
cv_image = self.red_bridge.imgmsg_to_cv2(data, "bgr8") # CV_Bridge Video
except CvBridgeError as e:
print(e)
return
(rows,cols,channels) = cv_image.shape
frame = cv_image
blurred = cv2.GaussianBlur(frame, (21, 21), 0) # resize the frame, blur it, and convert it to the HSV (11,11), 0
hsv = cv2.cvtColor(blurred, cv2.COLOR_BGR2HSV) # color space.
red_mask = cv2.inRange(hsv, self.RedLower, self.RedUpper) # Construct a mask for the color "red", then perform
red_mask = cv2.erode(red_mask, None, iterations=2) # a series of dilations and erosions to remove any small
red_mask = cv2.dilate(red_mask, None, iterations=2) # blobs thats left in the mask.
green_mask = cv2.inRange(hsv, self.GreenLower, self.GreenUpper) # construct a mask for the color "green", then perform
green_mask = cv2.erode(green_mask, None, iterations=2) # a series of dilations and erosions to remove any small
green_mask = cv2.dilate(green_mask, None, iterations=2) # blobs thats left in the mask.
yellow_mask = cv2.inRange(hsv, self.YellowLower, self.YellowUpper) # construct a mask for the color "yellow", then perform
yellow_mask = cv2.erode(yellow_mask, None, iterations=2) # a series of dilations and erosions to remove any small
yellow_mask = cv2.dilate(yellow_mask, None, iterations=2) # blobs thats left in the mask.
red_cnts = cv2.findContours(red_mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # find contours in the mask and initialize the current
red_cnts = imutils.grab_contours(red_cnts)
red_center = None
self.red_radius = 0
green_cnts = cv2.findContours(green_mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # find contours in the mask and initialize the current
green_cnts = imutils.grab_contours(green_cnts)
green_center = None
self.green_radius = 0
yellow_cnts = cv2.findContours(yellow_mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # find contours in the mask and initialize the current
yellow_cnts = imutils.grab_contours(yellow_cnts)
yellow_center = None
self.yellow_radius = 0
cv_imaged=self.currentDepthImg
#-----------------------------------------RED_START------------------------------------------------------
if len(red_cnts) > 0: # only proceed if at least one contour was found
red_c = max(red_cnts, key=cv2.contourArea) # find the largest contour in the red_mask, then use
((self.red_x, self.red_y), self.red_radius) = cv2.minEnclosingCircle(red_c) # it to compute the minimum enclosing circle and
red_M = cv2.moments(red_c) # centroid
red_center = (int(red_M["m10"] / red_M["m00"]), int(red_M["m01"] / red_M["m00"]))
if self.red_radius > 5: # only proceed if the radius meets a minimum size
cv2.circle(frame, (int(self.red_x), int(self.red_y)), int(self.red_radius), (0, 255, 255), 2) # draw the circle and centroid on the red_frame,
cv2.circle(frame, red_center, 5, (0, 255, 255), -1) # then update the list of tracked points
msg = VarRed()
msg.r_visible = True
#if self.red_y == self.red_y and self.red_x == self.red_x:
r_length = cv_imaged[int(self.red_y),int(self.red_x)] # length to object
msg.r_x = self.red_x
msg.r_y = self.red_y
msg.r_rad = self.red_radius
ToRad = 2*np.pi/360 # = 0.01745329252
ToDeg = 360/(2*np.pi) # = 57.29577951308
# Printing pixel values
cv2.rectangle(frame, (0, 0), (200, 190), (0,0,0), -1)
cv2.putText(frame, str("L: %.3f" %r_length), ((int(self.red_x)),int(self.red_y)), cv2.FONT_HERSHEY_COMPLEX, 1, (0,255,255), 2)
cv2.putText(frame, str("RX: %.1f" %msg.r_x +" px"), (10,30), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
cv2.putText(frame, str("RY: %.1f" %msg.r_y + " px"), (10,60), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
# For X-direction
red_l_cm = (r_length*100) # Converting to Centimeters
start_x_r = 960/(math.tan((55*ToRad))) # finding start x-length in px
ang_x_r = (math.atan((self.red_x-960)/start_x_r))*ToDeg # finding horizontal angle
red_x_cm = (red_l_cm*math.sin((ang_x_r)*ToRad))
cv2.putText(frame, str("RXC: %.1f" %red_x_cm + " cm"), (10,90), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
cv2.putText(frame, str("X Ang: %.1f" %ang_x_r), (10,150), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
# For Y-direction
start_y_r = 540/(math.tan((35*ToRad))) # finding start y-length in px
ang_y_r = ((math.atan((self.red_y-540)/start_y_r))*ToDeg)*-1 # finding vertical angle
red_y_cm = (red_l_cm/math.tan((ang_y_r*ToRad)+(math.pi/2)))*-1 # finding the y-length
cv2.putText(frame, str("RYC: %.1f" %red_y_cm + " cm"), (10,120), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
cv2.putText(frame, str("Y Ang: %.1f" %ang_y_r), (10,180), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
red_z = (math.cos(abs(ang_x_r)*ToRad))*red_l_cm
self.red_pts.appendleft(red_center)
msg.r_length = red_l_cm
msg.r_xc = red_x_cm
msg.r_yc = red_y_cm
msg.r_angle = ang_x_r # update the points queue
msg.r_z = red_z
self.red_publisher.publish(msg)
for i in range(1, len(self.red_pts)): # loop over the set of points
if self.red_pts[i - 1] is None or self.red_pts[i] is None: # if either of the tracked points
continue # are None, ignore them.
thickness = int(np.sqrt(64 / float(i + 1)) * 2.5) # otherwise, compute the thickness of the line and
cv2.line(frame, self.red_pts[i - 1], self.red_pts[i], (0, 255, 255), thickness) # draw the connecting lines
if self.red_radius < 5:
msg = VarRed()
msg.r_visible = False
self.red_publisher.publish(msg)
#-----------------------------------------RED_END------------------------------------------------------
#-----------------------------------------GREEN_START------------------------------------------------------
if len(green_cnts) > 0: # same as in red, but for green
green_c = max(green_cnts, key=cv2.contourArea)
((self.green_x, self.green_y), self.green_radius) = cv2.minEnclosingCircle(green_c)
green_M = cv2.moments(green_c)
green_center = (int(green_M["m10"] / green_M["m00"]), int(green_M["m01"] / green_M["m00"]))
if self.green_radius > 5:
cv2.circle(frame, (int(self.green_x), int(self.green_y)), int(self.green_radius), (0, 255, 255), 2)
cv2.circle(frame, green_center, 5, (0, 255, 255), -1)
ToRad = 2*np.pi/360 # = 0.01745329252
ToDeg = 360/(2*np.pi) # = 57.29577951308
msg1 = VarGreen()
msg1.g_visible = True
g_length = cv_imaged[int(self.green_y),int(self.green_x)]
msg1.g_x = self.green_x
msg1.g_y = self.green_y
msg1.g_rad = self.green_radius
# Printing pixel values
cv2.rectangle(frame, (1740, 0), (1920, 200), (0,0,0), -1)
cv2.putText(frame, str("L: %.3f" %g_length), ((int(self.green_x)),int(self.green_y)), cv2.FONT_HERSHEY_COMPLEX, 1, (0,255,255), 2)
cv2.putText(frame, str("GX: %.1f" %msg1.g_x +"px"), (1740,30), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
cv2.putText(frame, str("GY: %.1f" %msg1.g_y + "px"), (1740,60), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
# For X-direction
green_l_cm = (g_length*100)
start_x_g = 960/(math.tan((55*2*np.pi/360)))
ang_x_g = (math.atan((self.green_x-960)/start_x_g))*57.295779513
green_x_cm = (green_l_cm*math.sin((ang_x_g)*ToRad))
# For Y-direction
start_y_g = 540/(math.tan((35*2*np.pi/360)))
ang_y_g = ((math.atan((self.green_y-540)/start_y_g))*57.295779513)*-1
green_y_cm = green_l_cm/math.tan(ang_y_g*ToRad+(math.pi/2))*-1
cv2.putText(frame, str("GXC: %.1f" %green_x_cm + "cm"), (1740,90), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
cv2.putText(frame, str("X Ang: %.1f" %ang_x_g), (1740,150), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
cv2.putText(frame, str("GYC: %.1f" %green_y_cm + "cm"), (1740,120), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
cv2.putText(frame, str("Y Ang: %.1f" %ang_y_g), (1740,180), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
green_z = (math.cos(abs(ang_x_g)*ToRad))*green_l_cm
self.green_pts.appendleft(green_center)
msg1.g_length = green_l_cm
msg1.g_xc = green_x_cm
msg1.g_yc = green_y_cm
msg1.g_angle = ang_x_g
msg1.g_z = green_z
self.green_publisher.publish(msg1)
for i in range(1, len(self.green_pts)):
if self.green_pts[i - 1] is None or self.green_pts[i] is None:
continue
thickness = int(np.sqrt(64 / float(i + 1)) * 2.5)
cv2.line(frame, self.green_pts[i - 1], self.green_pts[i], (0, 255, 255), thickness)
if self.green_radius < 5:
msg1 = VarGreen()
msg1.g_visible = False
self.green_publisher.publish(msg1)
#-----------------------------------------GREEN_END------------------------------------------------------
#-----------------------------------------YELLOW_START------------------------------------------------------
if len(yellow_cnts) > 0: # only proceed if at least one contour was found
yellow_c = max(yellow_cnts, key=cv2.contourArea) # find the largest contour in the yellow_mask, then use
((self.yellow_x, self.yellow_y), self.yellow_radius) = cv2.minEnclosingCircle(yellow_c) # it to compute the minimum enclosing circle and
yellow_M = cv2.moments(yellow_c) # centroid
yellow_center = (int(yellow_M["m10"] / yellow_M["m00"]), int(yellow_M["m01"] / yellow_M["m00"]))
if self.yellow_radius > 5: # only proceed if the radius meets a minimum size
cv2.circle(frame, (int(self.yellow_x), int(self.yellow_y)), int(self.yellow_radius), (0, 0, 200), 2) # draw the circle and centroid on the yellow_frame,
cv2.circle(frame, yellow_center, 5, (0, 0, 200), -1) # then update the list of tracked points
ToRad = 2*np.pi/360 # = 0.01745329252
ToDeg = 360/(2*np.pi) # = 57.29577951308
msg2 = VarYellow()
msg2.y_visible = True
y_length = cv_imaged[int(self.yellow_y),int(self.yellow_x)] # length to object
msg2.y_x = self.yellow_x
msg2.y_y = self.yellow_y
msg2.y_rad = self.yellow_radius
cv2.putText(frame, str("L: %.3f" %y_length), ((int(self.yellow_x)),int(self.yellow_y)), cv2.FONT_HERSHEY_COMPLEX, 1, (0,0,200), 2)
# For X-direction
yellow_l_cm = y_length*100 # Converting to Centimeters
start_x_y = 960/(math.tan((55*2*np.pi/360))) # finding start x-length in px
ang_x_y = (math.atan((self.yellow_x-960)/start_x_y))*57.295779513 # finding horizontal angle
#yellow_x = yellow_l_cm/math.tan((ang_x_y/57.295779513)) # finding the x-length
yellow_x_cm = (yellow_l_cm*math.sin((ang_x_y)*ToRad))
# For Y-direction
start_y_y = 540/(math.tan((35*2*np.pi/360))) # finding start y-length in px
ang_y_y = ((math.atan((self.yellow_y-540)/start_y_y))*57.295779513)*-1 # finding vertical angle
#yellow_y = yellow_l_cm/math.tan((ang_y_y/57.295779513)) # finding the y-length
yellow_y_cm = yellow_l_cm/math.tan(ang_y_y*ToRad+(math.pi/2))*-1
yellow_z = (math.cos(abs(ang_x_y)*ToRad))*yellow_l_cm
self.yellow_pts.appendleft(yellow_center)
msg2.y_length = yellow_l_cm
msg2.y_xc = yellow_x_cm
msg2.y_yc = yellow_y_cm
msg2.y_angle = ang_x_y # update the points queue
msg2.y_z = yellow_z
self.yellow_publisher.publish(msg2)
for i in range(1, len(self.yellow_pts)): # loop over the set of points
if self.yellow_pts[i - 1] is None or self.yellow_pts[i] is None: # if either of the tracked points
continue # are None, ignore them.
thickness = int(np.sqrt(64 / float(i + 1)) * 2.5) # otherwise, compute the thickness of the line and
cv2.line(frame, self.yellow_pts[i - 1], self.yellow_pts[i], (0, 0, 255), thickness) # draw the connecting lines
if self.yellow_radius < 5:
msg2 = VarYellow()
msg2.y_visible = False
self.yellow_publisher.publish(msg2)
#-----------------------------------------YELLOW_END------------------------------------------------------
try:
if (self.green_radius > 5) & (self.red_radius > 5): # if you can see both colors, proceed
ToRad = 2*np.pi/360 # = 0.01745329252
ToDeg = 360/(2*np.pi) # = 57.29577951308
red_z = (math.cos(abs(ang_x_r)*ToRad))*red_l_cm
green_z = (math.cos(abs(ang_x_g)*ToRad))*green_l_cm
Delta_z = abs(red_z-green_z)
Tot_x = abs(green_x_cm) + abs(red_x_cm)
if Delta_z == Delta_z and Tot_x == Tot_x:
red_green_angle = (math.atan(Delta_z/Tot_x))*ToDeg
normal_angle = red_green_angle
if green_l_cm >= red_l_cm:
normal_angle = red_green_angle*-1
if green_l_cm < red_l_cm:
normal_angle = red_green_angle
MidPoint_data = MidPoint()
MidPoint_data.angle = normal_angle
self.MidPoint_pub.publish(MidPoint_data)
length_between_x = math.sqrt((Tot_x*Tot_x)+(Delta_z*Delta_z))
Delta_y = abs(red_y_cm-green_y_cm)
length_between = math.sqrt((length_between_x*length_between_x)+(Delta_y*Delta_y))
#dx = green_x_cm - red_x_cm # Finding the space between the colors in x-direction
#dy = green_y_cm - red_y_cm # Finding the space between the colors in y-direction
# Calculating the direct length between the colors in cm
#cv2.rectangle(frame, (500, 0), (680, 160), (0,0,0), -1)
#cv2.putText(frame, str("Dist: %.1f" %length_between + " cm"), (500,30), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
#Middle_x = dx
#Middle_y = dy
MP_X = (msg1.g_x + msg.r_x)/2
MP_Y = (msg1.g_y + msg.r_y)/2
#Middle_Point_Angle = (math.atan((MP_X-960)/start_x_g))*57.295779513
#Middle_Point_Angle = ang_x_g - ang_x_r
#Middle_Point_Length =(red_x_cm-abs(Middle_x))/(math.sin((math.pi/2)-(Middle_Point_Angle*((2*math.pi)/720))))
#Middle_Point_Length =((red_x_cm-abs(Middle_x))/(math.cos(Middle_Point_Angle*((2*math.pi)/720))))
#cv2.putText(frame, str("MX: %.1f" %Middle_x + " cm"), (500,60), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
#cv2.putText(frame, str("MY: %.1f" %Middle_y + " cm"), (500,90), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
if MP_X == MP_X and MP_Y == MP_Y:
cv2.circle(frame, (int(MP_X), int(MP_Y)), 8, (0, 0, 255), -1)
#cv2.putText(frame, str("M_L: %.1f" %Middle_Point_Length + " cm"), (500,120), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
#cv2.putText(frame, str("M_ang: %.1f" %Middle_Point_Angle), (500,150), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
cv2.line(frame, (int(self.red_x),int(self.red_y)), (int(self.green_x),int(self.green_y)), (0, 0, 0), 2)
#MidPoint_data = MidPoint()
#MidPoint_data.z = Middle_Point_Length
#self.MidPoint_pub.publish(MidPoint_data)
cv2.line(frame, (960, 1280), (960, 0), (0, 255, 0), 1)
cv2.line(frame, (0, 540), (1920, 540), (0, 255, 0), 1)
self.image_pub.publish(self.red_bridge.cv2_to_imgmsg(frame, "bgr8"))
except CvBridgeError as e:
print(e)
return
def main(args):
ic = image_converter()
rospy.init_node('Color_Tracker', anonymous=True)
try:
rospy.spin()
except KeyboardInterrupt:
print("Shutting down")
cv2.destroyAllWindows()
if __name__ == '__main__':
main(sys.argv)

Centroid Tracking with by using background subtracting in python

So I have been following this tutorial for centroid tracking
https://www.pyimagesearch.com/2018/07/23/simple-object-tracking-with-opencv/
and have built the centroid tracking class like it is mentions in the tutorial.
Now when I try to use background subtraction for the detection instead of the CNN that he is using, it does not work and gives me this issue from the CentroidTracker.py
for i in range(0, inputCentroids):
TypeError: only integer scalar arrays can be converted to a scalar index
Here is my code that I am using
for i in range(0, num_frames):
rects = []
#Get the very first image from the video
if (first_iteration == 1):
ret, frame = cap.read()
frame = cv2.resize(frame, (imageHight,imageWidth))
first_frame = copy.deepcopy(frame)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
height, width = frame.shape[:2]
print("shape:", height,width)
first_iteration = 0
else:
ret, frame = cap.read()
frame = cv2.resize(frame, (imageHight,imageWidth))
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
forgroundMask = backgroundSub.apply(frame)
#Get contor for each person
_, contours, _ = cv2.findContours(forgroundMask.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
contours = filter(lambda cont: cv2.contourArea(cont) > 20, contours)
#Get bbox from the controus
for c in contours:
(x, y, w, h) = cv2.boundingRect(c)
rectangle = [x, y, (x + w), (y + h)]
rects.append(rectangle)
cv2.rectangle(frame, (rectangle[0], rectangle[1]), (rectangle[2], rectangle[3]),
(0, 255, 0), 2)
objects = ct.update(rects)
for (objectID, centroid) in objects.items():
text = "ID:{}".format(objectID)
cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
cv2.circle(frame, (centroid[0], centroid[1]), 4, (0, 255, 0), -1)
'''Display Windows'''
cv2.imshow('FGMask', forgroundMask)
frame1 = frame.copy()
cv2.imshow('MOG', frame1)
cv2.imshow('frame', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
the code is breaking at the
objects = ct.update(rects)
line.
Here is the implementation of the CentroidTracker from the Tutorial:
from scipy.spatial import distance as dist
from collections import OrderedDict
import numpy as np
#Makes a the next unique object ID with
#2 ordered dictionaries
class CentroidTracker():
def __init__(self, maxDisappeared = 50):
self.nextObjectID = 0
self.objects = OrderedDict()
self.disappeared = OrderedDict()
self.maxDisappeared = maxDisappeared
def register(self, centroid):
self.objects[self.nextObjectID] = centroid
self.disappeared[self.nextObjectID] = 0
self.nextObjectID += 1
def deregister(self, objectID):
del self.objects[objectID]
del self.disappeared[objectID]
def update(self, rects):
if len(rects) == 0:
for objectID in self.disappeared.keys():
self.disappeared[objectID] += 1
if self.disappeared[objectID] > self.maxDisappeared:
self.deregister(objectID)
return self.objects
inputCentroids = np.zeros((len(rects), 2), dtype="int")
for (i, (startX, startY, endX, endY)) in enumerate(rects):
cX = int((startX + endX) / 2.0)
cY = int((startY + endY) / 2.0)
inputCentroids[i] = (cX, cY)
if len(self.objects) == 0:
for i in range(0, inputCentroids):
self.register(inputCentroids[i])
else:
objectIDs = list(self.objects.keys())
objectCentroids = list(self.objects.values())
D = dist.cdist(np.array(objectCentroids), inputCentroids)
rows = D.min(axis=1).argsort()
cols = D.argmin(axis=1)[rows]
usedRows = set()
usedCols = set()
for (row, col) in zip(rows, cols):
if row in usedRows or col in usedCols:
continue
objectID = objectIDs[row]
self.objects[objectID] = inputCentroids[col]
self.disappeared[objectID] = 0
usedRows.add(row)
usedCols.add(col)
# compute both the row and column index we have NOT yet
# examined
unusedRows = set(range(0, D.shape[0])).difference(usedRows)
unusedCols = set(range(0, D.shape[1])).difference(usedCols)
if D.shape[0] >= D.shape[1]:
# loop over the unused row indexes
for row in unusedRows:
# grab the object ID for the corresponding row
# index and increment the disappeared counter
objectID = objectIDs[row]
self.disappeared[objectID] += 1
# check to see if the number of consecutive
# frames the object has been marked "disappeared"
# for warrants deregistering the object
if self.disappeared[objectID] > self.maxDisappeared:
self.deregister(objectID)
else:
for col in unusedCols:
self.register(inputCentroids[col])
# return the set of trackable objects
return self.objects
I am kind of lost on what I am doing wrong here. All I should do is pass in a bounding box (x,y,x+w, y+h) into the rects[] list correct and that should give similar results for this, or am I wrong and do not understand how this works? Any help will be appreciated
You have forgotten the len function: for i in range(0, len(inputCentroids)):
By doing what Axel Puig said and then adding this line to the Main mehtod
objects = ct.update(rects)
if objects is not None:
for (objectID, centroid) in objects.items():
text = "ID:{}".format(objectID)
cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
cv2.circle(frame, (centroid[0], centroid[1]), 4, (0, 255, 0), -1)
That fixed the issue. What I think was happening is the first frame didnt initialize the tracker so I needed to make sure it was not None then it worked after that

Categories

Resources