find the biggest ball - detect object - python

i am a newby in python and i am researching about detect object. I'm looking for the biggest ball. I have no idea how to make it work. Can anybody give me some pointers? Here is what I have so far.
my code: (from: "https://youtu.be/Vg9rrOFmwHo")
import cv2
import serial
from time import sleep
classNames = []
classFile = "/home/pi/Desktop/Object_Detection_Files/coco.names"
with open(classFile,"rt") as f:
classNames = f.read().rstrip("\n").split("\n")
configPath = "/home/pi/Desktop/Object_Detection_Files/ssd_mobilenet.pbtxt"
weightsPath = "/home/pi/Desktop/Object_Detection_Files/frozen_inference_graph.pb"
net = cv2.dnn_DetectionModel(weightsPath,configPath)
net.setInputSize(320,320)
net.setInputScale(1/ 127.5)
net.setInputMean((127.5, 127.5, 127.5))
net.setInputSwapRB(True)
def getObjects(img, thres, nms, draw=True, objects=[]):
classIds, confs, bbox = net.detect(img,confThreshold=thres,nmsThreshold=nms)
if len(objects) == 0: objects = classNames
objectInfo =[]
if len(classIds) != 0:
for classId, confidence,box in zip(classIds.flatten(),confs.flatten(),bbox):
className = classNames[classId - 1]
if className in objects:
objectInfo.append([box,className])
if (draw):
cv2.rectangle(img,box,color=(0,255,0),thickness=2)
cv2.putText(img,'sports ball',(box[0]+10,box[1]+30),
cv2.FONT_HERSHEY_COMPLEX,1,(0,255,0),2)
x= int(box[0]+box[2]/2)
y= int(box[1]+box[3]/2)
cv2.circle(img, (x, y), 5, (0,255,0), 1)
return img,objectInfo
if __name__ == "__main__":
cap = cv2.VideoCapture(0)
cap.set(3,640)
cap.set(4,480)
while True:
success, img = cap.read()
height, width, _ = img.shape
result, objectInfo = getObjects(img,0.29,0.2, objects=['sports ball'] )
#print(objectInfo)
cv2.imshow("Output",img)
cv2.waitKey(1)

Related

yolov7 deepSORT object tracking IDs are different in all video frames

I am trying to detect objects in a certain area using yolov7 and deepSORT algorithm, but in the results I get, I see that the IDs are always changing. I leave 3 photos for you to understand.
As you can see the IDs are different in all frames.
`
#class base virtual zone tracking
import random
import torch
import numpy as np
from models.experimental import attempt_load
from utils.torch_utils import TracedModel
from utils.datasets import letterbox
from utils.plots import plot_one_box, plot_one_box_center
from utils.general import check_img_size, non_max_suppression, scale_coords
import cv2
import time
from google.colab.patches import cv2_imshow
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
#deep sort
import os
import tensorflow as tf
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
tf.config.experimental.set_memory_growth(physical_devices[0], True)
from tensorflow.compat.v1 import ConfigProto
from deep_sort.tracker import Tracker
from deep_sort.detection import Detection
import matplotlib.pyplot as plt
from deep_sort import preprocessing, nn_matching
from tracking_helpers import read_class_names, create_box_encoder
from detection_helpers import *
class YOLOv7:
def __init__(self, weights: str, image_size:int,device:str):
self.device = device
self.weights = weights
self.model = attempt_load(self.weights, map_location=self.device) # Model Load FP32
self.stride = int(self.model.stride.max())
self.image_size = check_img_size(image_size, self.stride)
if self.device != 'cpu':
self.half = True
else:
self.half = False
if self.half:
self.model.half() # FP16
self.names = self.model.module.names if hasattr(self.model , 'module') else self.model.names
color_values = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(self.names))]
self.colors = {i:color_values[i] for i in range(len(self.names))}
def detect(self, raw_image: np.ndarray, conf_thresh =0.45, iou_thresh =0.45, classes = [0]): #default class people
# Run inference
if self.device != 'cpu':
self.model(torch.zeros(1, 3, self.image_size, self.image_size).to(self.device).type_as(next(self.model.parameters())))
with torch.no_grad():
image = letterbox(raw_image, self.image_size, stride=self.stride)[0]
image = image[:, :, ::-1].transpose(2, 0, 1)
image = np.ascontiguousarray(image)
image = torch.from_numpy(image).to(self.device)
image = image.half() if self.half else image.float()
image /= 255.0
if image.ndimension() == 3:
image = image.unsqueeze(0)
# Inference
detections = self.model(image, augment=False)[0]
# Apply NMS
detections = non_max_suppression(detections, conf_thresh, iou_thresh, classes=classes, agnostic=False)[0]
# Rescale boxes from img_size to raw image size
detections[:, :4] = scale_coords(image.shape[2:], detections[:, :4], raw_image.shape).round()
return detections
def tracking(self, video_frame, yolo_dets, inside_poly = True, count_objects:bool=False,verbose=False, reID_model_path = "./deep_sort/model_weights/mars-small128.pb", nms_max_overlap:float=1.0, max_cosine_distance:float=0.4, nn_budget:float=None):
class_names = read_class_names()
encoder = create_box_encoder(reID_model_path, batch_size=1)
nms_max_overlap = nms_max_overlap
metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
tracker = Tracker(metric)
*xyxy, conf, cls = yolo_dets
frame = cv2.cvtColor(video_frame, cv2.COLOR_BGR2RGB)
if yolo_dets is None:
bboxes = []
scores = []
classes = []
num_objects = 0
else:
bboxes = yolo_dets[:,:4]
bboxes[:,2] = bboxes[:,2] - bboxes[:,0] # convert from xyxy to xywh
bboxes[:,3] = bboxes[:,3] - bboxes[:,1]
scores = yolo_dets[:,4]
classes = yolo_dets[:,-1]
num_objects = bboxes.shape[0]
#how many object you track
names = []
for i in range(num_objects): # loop through objects and use class index to get class name
class_indx = int(classes[i])
class_name = class_names[class_indx]
names.append(class_name)
names = np.array(names)
count = len(names)
if count_objects:
cv2.putText(frame, "both inside and outside the polygon detection: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 2)
# DeepSORT tacker work starts here
features = encoder(frame, bboxes) # encode detections and feed to tracker. [No of BB / detections per frame, embed_size]
detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(bboxes, scores, names, features)] # [No of BB per frame] deep_sort.detection.Detection object
cmap = plt.get_cmap('tab20b') #initialize color map
colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]
boxs = np.array([d.tlwh for d in detections]) # run non-maxima supression below
scores = np.array([d.confidence for d in detections])
classes = np.array([d.class_name for d in detections])
indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
detections = [detections[i] for i in indices]
tracker.predict() # Call the tracker
tracker.update(detections) # updtate using Kalman Gain
for track in tracker.tracks: # update new findings AKA tracks
#if not track.is_confirmed() or track.time_since_update > 1:
#continue
bbox = track.to_tlbr()
class_name = track.get_class()
color = colors[int(track.track_id) % len(colors)] # draw bbox on screen
color = [i * 255 for i in color]
#drawing poly
#pts = np.array([[6,449], [1052, 2], [1914, 6], [1766, 1074], [2, 1076]])
#frame = cv2.polylines(frame, [pts], True, (0,0,255), 5)
#creating poly
#poli = Polygon([(6,449), (1052, 2), (1914, 6), (1766, 1074), (2, 1076)])
#center = (int((bbox[0] + bbox[2]) / 2), int((bbox[1] + bbox[3]) / 2)) #center point ( (x1 + x2) / 2, (y1 + y2) / 2 )
#point = Point(center)
if inside_poly:
#drawing poly
pts = np.array([[6,449], [1052, 2], [1914, 6], [1766, 1074], [2, 1076]])
frame = cv2.polylines(frame, [pts], True, (0,0,255), 5)
#creating poly
poli = Polygon([(6,449), (1052, 2), (1914, 6), (1766, 1074), (2, 1076)])
center = (int((bbox[0] + bbox[2]) / 2), int((bbox[1] + bbox[3]) / 2)) #center point ( (x1 + x2) / 2, (y1 + y2) / 2 )
point = Point(center)
if poli.contains(point):
cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
cv2.rectangle(frame, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1)
cv2.putText(frame, class_name + " : " + str(track.track_id),(int(bbox[0]), int(bbox[1]-11)),0, 0.6, (255,255,255),1, lineType=cv2.LINE_AA)
cv2.putText(frame, "0", center,0, 0.6, (255,255,255),1, lineType=cv2.LINE_AA)
else:
cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
cv2.rectangle(frame, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1)
cv2.putText(frame, class_name + " : " + str(track.track_id),(int(bbox[0]), int(bbox[1]-11)),0, 0.6, (255,255,255),1, lineType=cv2.LINE_AA)
if verbose == 2:
print("Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))
result = np.asarray(frame)
result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
return result
if __name__=='__main__':
yolov7=YOLOv7(weights='yolov7x.pt', device='cpu', image_size=800)
cap = cv2.VideoCapture('street5sn.mp4')
torch.cuda.empty_cache()
#writer
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # by default VideoCapture returns float instead of int
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
codec = cv2.VideoWriter_fourcc(*"DIVX")
out = cv2.VideoWriter("./output/video_out_track5sn-d2.mp4", codec, fps, (width, height))
while True:
t1 = time.time()
ret, frame = cap.read()
if not ret:
break
detections=yolov7.detect(frame)
vir = yolov7.tracking(frame, detections, count_objects = True, inside_poly = False)
out.write(vir)
cv2_imshow(vir) #colab imshow kodu
print("add frame ...")
if cv2.waitKey(1) & 0xFF == ord('q'):
break
out.release()
cap.release()
cv2.destroyAllWindows()
`
I use this repo repo
I did not make any changes to other files.

python Opencv template match error with win32api

So i have been trying to use this code to get a screenshot from a window, with this screenshot i will use opencv to match template, but every time that i try to match template i always get the same error, i have tried with a few other things like image grab from pillow, pyautogui and i am now trying with windowsAPI. i will say in advance this code is not mine.
Can anyone help me with this?
the error:
test = cv.matchTemplate(screenshot, target, cv.TM_CCORR_NORMED)
cv2.error: OpenCV(4.6.0) :-1: error: (-5:Bad argument) in function 'matchTemplate'
> Overload resolution failed:
> - templ is not a numpy array, neither a scalar
> - Expected Ptr<cv::UMat> for argument 'templ'
the source code:
import cv2 as cv
import numpy as np
import pyautogui
from time import time
import win32gui, win32ui, win32con
from PIL import ImageGrab
game = 'LOST ARK (64-bit, DX9) v.2.5.3.1'
target = 'box.png'
def list_window_names():
def winEnumHandler(hwnd, ctx):
if win32gui.IsWindowVisible(hwnd):
print(hex(hwnd), win32gui.GetWindowText(hwnd))
win32gui.EnumWindows(winEnumHandler, None)
def windowcapture():
hwnd = win32gui.FindWindow(None, game)
rect = win32gui.GetWindowRect(hwnd)
w = rect[2] - rect[0]
h = rect[3] - rect[1]
#hwnd = win32gui.FindWindow(None, windowname)
wDc = win32gui.GetWindowDC(hwnd)
dcObj = win32ui.CreateDCFromHandle(wDc)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, w, h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (w, h), dcObj, (0, 0), win32con.SRCCOPY)
dataBitMap.SaveBitmapFile(cDC, 'debug.bmp')
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (h, w, 4)
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(hwnd, wDc)
win32gui.DeleteObject(dataBitMap.GetHandle())
return img
list_window_names()
loop_time = time()
while True:
#screenshot = ImageGrab.grab(bbox = (w,h,x,y))
#screenshot = pyautogui.screenshot()
#screenshot = np.array(screenshot)
#screenshot = cv.cvtColor(screenshot, cv.COLOR_RGB2BGR)
#print('top:{} , left:{}, w:{} ,h: {}'.format(w,h,x,y))
#screenshot.show()
#screenshot = np.array(screenshot)
#screenshot = screenshot[:, :, ::-1].copy()
#screenshot = screenshot[..., :3].copy()
screenshot = windowcapture()
cv.imshow("computer vision",screenshot)
method = cv.TM_CCORR
result = cv.matchTemplate(screenshot, target, method)
# debug the loop rate
print('FPS {}'.format(1 / (time() - loop_time)))
loop_time = time()
if cv.waitKey(1) == ord('q'):
cv.destroyAllWindows()
break
print('Done.')

Reading images from files simultaneously

I have multiple different folders with the images have same naming like a.png etc. I want to modify the above code to read this same named files in different directories and give their opencv output using yolo at the same time. To be more specific I have 10 files which contains images transported with different categories like one folder contains rgb files and the other contains gray files etc. To compare their output, I want to show the images with same naming but in different folders. I know it should not be that hard but I am pretty confused. Thanks in advance!
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
import tkinter
from tkinter import filedialog
def cal_alpB(minMax):
minD = minMax[0]
maxD = minMax[1]
alpha = 255/(maxD-minD)
beta = -alpha*minD
return [alpha, beta]
def getMinMax(path):
with open(path+'/config') as f:
minMax = f.read().splitlines()
minMax = minMax[0].split(',')
minMax = [eval(x) for x in minMax]
return minMax
def normalizeData(minMax, img):
alpB = cal_alpB(minMax)
img[img>minMax[1]] = minMax[1]
img[img<0] = 0
return alpB
def boxDrawing(layerOutput, frameWidth, frameHeight, class_ids, confidences, boxes, img):
for output in layerOutput:
for detection in output:
score = detection[5:]
class_id = np.argmax(score)
confidence = score[class_id]
if confidence > 0.5:
center_x = int(detection[0] * frameWidth)
center_y = int(detection[1] * frameHeight)
width = int(detection[2] * frameWidth)
height = int(detection[3] * frameHeight)
left = int(center_x - width / 2)
top = int(center_y - height / 2)
class_ids.append(class_id)
confidences.append(float(confidence))
boxes.append([left, top, width, height])
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.8, 0.7)
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size = (len(boxes),3))
for i in range(len(boxes)):
if i in indexes:
x,y,w,h = boxes[i]
label = str(classes[class_ids[i]])
confi = str(round(confidences[i],2))
color = colors[i]
cv2.rectangle(img, (x,y), (x+w,y+h), color,1)
cv2.putText(img, label+" "+ confi, (x,y+20), font, 1, (255,255,255),1)
def algorythmYolo():
tkinter.Tk().withdraw()
folder = filedialog.askdirectory()
minMax = getMinMax(folder)
for filename in sorted(os.listdir(folder)):
img = cv2.imread(os.path.join(folder,filename),-1)
if img is not None:
alpB = normalizeData(minMax,img)
img = cv2.convertScaleAbs(img, alpha=alpB[0], beta= alpB[1])
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
frameHeight, frameWidth, channels = img.shape
blob = cv2.dnn.blobFromImage(img, 1/255, (frameWidth,frameHeight), (0,0,0), swapRB = True, crop = False)
yolo.setInput(blob)
layerOutput = yolo.forward(outputLayers)
boxes = []
confidences = []
class_ids = []
boxDrawing(layerOutput,frameWidth, frameHeight,class_ids,confidences,boxes,img)
cv2.imshow("window", img)
cv2.setWindowTitle('window', folder)
cv2.waitKey(1)
else:
break
cv2.destroyAllWindows()
yolo = cv2.dnn.readNet("./yolov3.weights","./yolov3.cfg")
with open("./coco.names","r") as f:
classes = f.read().splitlines()
layers_names = yolo.getLayerNames()
outputLayers = [layers_names[i-1] for i in yolo.getUnconnectedOutLayers()]
cv2.namedWindow("window", cv2.WINDOW_NORMAL)
algorythmYolo()

not able to scale the roi in opencv python

import cv2
import mediapipe as mp
import time
cap = cv2.VideoCapture(0)
while True:
_, im0 = cap.read()
showCrosshair = False
fromCenter = False
r = cv2.selectROI("Image", im0, fromCenter, showCrosshair)
break
mpHands = mp.solutions.hands
hands = mpHands.Hands(static_image_mode=False,
max_num_hands=2,
min_detection_confidence=0.5,
min_tracking_confidence=0.5)
mpDraw = mp.solutions.drawing_utils
pTime = 0
cTime = 0
while True:
_, img = cap.read()
img = cv2.rectangle(img,(r[0],r[1]),(r[2],r[3]),(0,255,0),5)
#imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = hands.process(img)
print(results.multi_hand_landmarks)
if results.multi_hand_landmarks:
for handLms in results.multi_hand_landmarks:
for id, lm in enumerate(handLms.landmark):
print(id,lm)
h, w, c = img.shape
cx, cy = int(lm.x *w), int(lm.y*h)
cv2.circle(img, (cx,cy), 3, (255,0,255), cv2.FILLED)
mpDraw.draw_landmarks(img, handLms, mpHands.HAND_CONNECTIONS)
cTime = time.time()
fps = 1/(cTime-pTime)
pTime = cTime
if cv2.waitKey(1) & 0xFF == 27:
break
cv2.putText(img,str(int(fps)), (10,70), cv2.FONT_HERSHEY_PLAIN, 3, (255,0,255), 3)
cv2.imshow("ThumbsDown", img)
cv2.waitKey(1)
I am trying to build a program that detects hand movements in a selected region of interest, but the rectangular selection I perform does works, or it gets unscaled.
The hand detection also starts working randomly at a few points.
Any help would be appreciated.
the question was solved
this is the code :
import cv2
import mediapipe as mp
import time
from shapely.geometry import Point
from shapely.geometry import polygon
from shapely.geometry.polygon import Polygon
cap = cv2.VideoCapture(0)
while True:
_, im0 = cap.read()
showCrosshair = False
fromCenter = False
r = cv2.selectROI("ThumbsDown", im0, fromCenter, showCrosshair)
break
mpHands = mp.solutions.hands
hands = mpHands.Hands(static_image_mode=False,
max_num_hands=2,
min_detection_confidence=0.5,
min_tracking_confidence=0.5)
mpDraw = mp.solutions.drawing_utils
x=int(r[0])
y=int(r[1])
w=int(r[2])
h=int(r[3])
a= (x,y)
b= (x,y+h)
c= (x+w,y+h)
d= (x+w,y)
points_cord=(a,b,c,d)
points=Polygon(points_cord)
pTime = 0
cTime = 0
while True:
_, img = cap.read()
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
rect_img = imgRGB[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])]
results = hands.process(rect_img)
print(results.multi_hand_landmarks)
if results.multi_hand_landmarks:
for handLms in results.multi_hand_landmarks:
for id, lm in enumerate(handLms.landmark):
print(id,lm)
h, w, c = rect_img.shape
cx, cy = int(lm.x *w), int(lm.y*h)
cv2.circle(rect_img, (cx,cy), 3, (255,0,255), cv2.FILLED)
cv2.putText(img,str("Hands-Detected"), (120,70), cv2.FONT_HERSHEY_PLAIN, 3, (252,0,0), 3)
cv2.rectangle(img,(int(r[0]),int(r[1]+r[3])),(int(r[0]+r[2]),int(r[1])),255,3)
cv2.rectangle(img,b,d,(25,255,231),3)
if((cx or cy)!=0):
cp=Point(cx,cy)
if(points.contains(cp)):
cv2.putText(img,str("TEST"), (300,200), cv2.FONT_HERSHEY_PLAIN, 3, (25,255,231), 3)
mpDraw.draw_landmarks(rect_img, handLms, mpHands.HAND_CONNECTIONS)
img[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])] =rect_img
cv2.rectangle(img,b,d,(25,255,231),3)
cTime = time.time()
fps = 1/(cTime-pTime)
pTime = cTime
if cv2.waitKey(1) & 0xFF == 27:
break
cv2.putText(img,str(int(fps)), (10,70), cv2.FONT_HERSHEY_PLAIN, 3, (25,255,231), 3)
cv2.namedWindow("ThumbsDown", cv2.WINDOW_NORMAL)
cv2.imshow("ThumbsDown", img)
cv2.waitKey(1)
firstly, I was not sending the correct inputs in the previous code to the inbuilt cv2.rectangle function.
x=int(r[0])
y=int(r[1])
w=int(r[2])
h=int(r[3])
this is the part where I rearranged the coordinates according to the cv2.rectangle function,and its data members. rect_img = imgRGB[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])]
in this line, we not only need x,y, but width and height.
Secondly, I was not calling the correct frame to construct the rectangle on, rect_img = imgRGB[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])]
this is the area selected(ROI),
img[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])] =rect_img
then we merge the selected frame, to the original output frame.

Unknown problem with module using CV2 and mediapipe

I have some trouble running the module on python 3.10.1. This is my code:
import mediapipe as mp
import cv2
import time
class handDetector:
def __init__(self, mode=False, maxHands=2, complexity = 1, detectionCon=0.5, trackCon=0.5):
self.mode = mode
self.maxHands = maxHands
self.complexity = complexity
self.detectionCon = detectionCon
self.trackCon = trackCon
self.mpDraw = mp.solutions.drawing_utils
def FindHands(self, img, draw = True):
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = self.hands.process(imgRGB)
landmarks = results.multi_hand_landmarks
#print(results.multi_hand_landmarks)
if landmarks:
for handLms in landmarks:
if draw:
self.mpDraw.draw_landmarks(img,
handLms,
self.mpHands.HAND_CONNECTIONS)
return img
#for id, lm in enumerate(handLms.landmark):
#print(id,lm)
# height, width, c = img.shape
# cx, cy = int(lm.x*width), int(lm.y*height)
# print(id, ", x=",cx, ", y=",cy)
# if id%10 == 0:
# cv2.circle(img, (cx,cy), 8, (255,0,255), cv2.FILLED)
def main():
pTime = 0
cTime = 0
cap = cv2.VideoCapture(1)
detector = handDetector()
while True:
success, img = cap.read()
img = detector.FindHands(img)
cTime = time.time()
fps = 1/(cTime-pTime)
pTime = cTime
cv2.putText(img, str(int(fps)), (10,70),
cv2.FONT_HERSHEY_PLAIN, 3,(255,0,255), 3)
cv2.imshow("Image",img)
cv2.waitKey(1)
if __name__ == '__main__':
main()
It returns me the following traceback:
Traceback (most recent call last):
File "C:\Users\Eduardo.PC\Documents\UNAM 2020-24\COMPU\HandTracker\HandTrackModule.py", line 65, in <module>
main()
File "C:\Users\Eduardo.PC\Documents\UNAM 2020-24\COMPU\HandTracker\HandTrackModule.py", line 55, in main
img = detector.FindHands(img)
File "C:\Users\Eduardo.PC\Documents\UNAM 2020-24\COMPU\HandTracker\HandTrackModule.py", line 21, in FindHands
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
cv2.error: OpenCV(4.5.4) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'
I'm following a youtube online course for computer vision linked here. At around the 30 minute mark, the module presented above is added. I pretty much copied it and it doesn't work. What's happening?
Thanks and sorry for the long post
Try this
import mediapipe as mp
import cv2
import time
class handDetector:
def __init__(self, mode=False, maxHands=2, complexity = 1, detectionCon=0.5, trackCon=0.5):
self.mode = mode
self.maxHands = maxHands
self.complexity = complexity
self.detectionCon = detectionCon
self.trackCon = trackCon
self.mpDraw = mp.solutions.drawing_utils
def FindHands(self, img, draw = True):
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = self.hands.process(imgRGB)
landmarks = results.multi_hand_landmarks
#print(results.multi_hand_landmarks)
if landmarks:
for handLms in landmarks:
if draw:
self.mpDraw.draw_landmarks(img,
handLms,
self.mpHands.HAND_CONNECTIONS)
return img
#for id, lm in enumerate(handLms.landmark):
#print(id,lm)
# height, width, c = img.shape
# cx, cy = int(lm.x*width), int(lm.y*height)
# print(id, ", x=",cx, ", y=",cy)
# if id%10 == 0:
# cv2.circle(img, (cx,cy), 8, (255,0,255), cv2.FILLED)
def main():
pTime = 0
cTime = 0
cap = cv2.VideoCapture(1)
detector = handDetector()
while True:
success, img = cap.read()
if not success:
break
img = detector.FindHands(img)
cTime = time.time()
fps = 1/(cTime-pTime)
pTime = cTime
cv2.putText(img, str(int(fps)), (10,70),
cv2.FONT_HERSHEY_PLAIN, 3,(255,0,255), 3)
cv2.imshow("Image",img)
cv2.waitKey(1)
if __name__ == '__main__':
main()

Categories

Resources