i have a 7 minute video. from this video i extract rgb data from the set ROI. Im using Mediapipe facemesh to track face and set the ROI.
however, this evaluation takes several minutes. What can I do to speed this up? Or what am I doing wrong?
can it be that it is because of the facemesh initialization that it has to re-identify the face in each frame and this is the reason for the long duration? How else should I solve this?
cap = cv2.VideoCapture("Video.mp4")
red, image = cap.read()
total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# print toal number of frames
print("total number of Frames: ",total)
face_mesh = mp_face_mesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5)
while red:
red, image = cap.read()
height, width, _ = image.shape
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
image.flags.writeable = False
if image is None:
continue
processed_img = face_mesh.process(image)
# Draw the face mesh annotations on the image.
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # convert the RGB image to BGR.
if processed_img.multi_face_landmarks:
for face_landmarks in processed_img.multi_face_landmarks:
landmark_points = []
for i in range(0, 468):
x = int(face_landmarks.landmark[i].x * width)
y = int(face_landmarks.landmark[i].y * height)
p = [x, y]
landmark_points.append([x, y])
forehead = np.array((
landmark_points[9], landmark_points[107], landmark_points[66], landmark_points[105],
landmark_points[104], landmark_points[103],
landmark_points[67], landmark_points[109], landmark_points[10],
landmark_points[338], landmark_points[297], landmark_points[332],
landmark_points[333], landmark_points[334], landmark_points[296],
landmark_points[336]))
left_cheek = np.array((landmark_points[266], landmark_points[426], landmark_points[436],
landmark_points[416], landmark_points[376],
landmark_points[352], landmark_points[347], landmark_points[330]))
right_cheek = np.array((landmark_points[36], landmark_points[206], landmark_points[216],
landmark_points[192], landmark_points[147],
landmark_points[123], landmark_points[117], landmark_points[118],
landmark_points[101]))
forehead_New = np.array((landmark_points[109],landmark_points[10],landmark_points[338],landmark_points[337],landmark_points[336],landmark_points[285],landmark_points[417],
landmark_points[168],landmark_points[193],landmark_points[55],landmark_points[107],landmark_points[108]))
rightCheek_New = np.array((landmark_points[355],landmark_points[329],landmark_points[348],landmark_points[347],landmark_points[346],landmark_points[345],
landmark_points[352],landmark_points[280],landmark_points[266],landmark_points[371]))
leftCheek_New = np.array((landmark_points[116],landmark_points[117],landmark_points[118],landmark_points[119],landmark_points[100],landmark_points[126],
landmark_points[142],landmark_points[36],landmark_points[50],landmark_points[123]))
cv2.polylines(image, [forehead_New], True, (0, 255, 255), 2)
cv2.polylines(image, [leftCheek_New], True, (0, 255, 255), 2)
cv2.polylines(image, [rightCheek_New], True, (0, 255, 255), 2)
mask = np.zeros((height, width), dtype=np.uint8)
cv2.fillPoly(mask, [forehead_New, leftCheek_New, rightCheek_New], (255))
crop_img = cv2.bitwise_and(image, image, mask=mask)
b, g, r = cv2.split(crop_img)
indices_list = np.where(np.any(crop_img != [0, 0, 0], axis=-1))
roi_pixel_img =crop_img[indices_list]
r = (roi_pixel_img == [0,255,255]).all(axis = -1)
roi_pixel_img = roi_pixel_img[~r]
b_plot.append(roi_pixel_img[:, 0].mean())
g_plot.append(roi_pixel_img[:, 1].mean()) # -//- ... green-channel
r_plot.append(roi_pixel_img[:, 2].mean()) # -//- ... red-channel
frame_count += 1
print("Frame_progress:", frame_count, "of: ", total)
t_plot.append(round(time_count))
time_count += (1000 / fps)
# Draw the face mesh on the image
mp_drawing.draw_landmarks(
image=image,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACEMESH_TESSELATION,
landmark_drawing_spec=drawing_spec,
connection_drawing_spec=drawing_spec)
mean_rgb = np.vstack((red, green, blue)).T
Related
I would like to realize a project for a gun club. The goal is to detect and measure shots on a target and count the points. My thoughts about this projects are as follows:
apply a region of interest to focus onto the target
apply filters on the camera stream to get a sharp boundary around the black center
define the diameter since it is known
get the center of the boundary and store it as a reference point
detect shots and get the radial and distance in reference to the diameter and reference point and hence the value of the shot
show the last shots with a circle and their values on the screen
What I got so far is this:
edge detection and center point
Explanation radial and distance:
get the radial and distance
Screen with circles and values:
goal
import cv2
import numpy as np
import imutils
# declare variables
framewidth = 1920
frameheight = 1080
RTSP_URL = 'rtsp://xxxxxx:xxxxxxxx#192.168.1.64:554/Streaming/channels/1'
cap = cv2.VideoCapture(RTSP_URL, cv2.CAP_FFMPEG)
cap.set(3, framewidth)
cap.set(4, frameheight)
if not cap.isOpened():
print('Cannot open RTSP stream')
exit(-1)
# pseudo function
def empty(a):
pass
# slider
cv2.namedWindow("Parameters")
cv2.resizeWindow("Parameters", 640,240)
cv2.createTrackbar("Threshold1","Parameters",16,255,empty)
cv2.createTrackbar("Threshold2","Parameters",192,255,empty)
cv2.createTrackbar("Threshold3","Parameters",243,255,empty)
cv2.createTrackbar("Threshold4","Parameters",255,255,empty)
# imagestack
def stackImages(scale,imgArray):
rows = len(imgArray)
cols = len(imgArray[0])
rowsAvailable = isinstance(imgArray[0], list)
width = imgArray[0][0].shape[1]
height = imgArray[0][0].shape[0]
if rowsAvailable:
for x in range ( 0, rows):
for y in range(0, cols):
if imgArray[x][y].shape[:2] == imgArray[0][0].shape [:2]:
imgArray[x][y] = cv2.resize(imgArray[x][y], (0, 0), None, scale, scale)
else:
imgArray[x][y] = cv2.resize(imgArray[x][y], (imgArray[0][0].shape[1], imgArray[0][0].shape[0]), None, scale, scale)
if len(imgArray[x][y].shape) == 2: imgArray[x][y]= cv2.cvtColor( imgArray[x][y], cv2.COLOR_GRAY2BGR)
imageBlank = np.zeros((height, width, 3), np.uint8)
hor = [imageBlank]*rows
hor_con = [imageBlank]*rows
for x in range(0, rows):
hor[x] = np.hstack(imgArray[x])
ver = np.vstack(hor)
else:
for x in range(0, rows):
if imgArray[x].shape[:2] == imgArray[0].shape[:2]:
imgArray[x] = cv2.resize(imgArray[x], (0, 0), None, scale, scale)
else:
imgArray[x] = cv2.resize(imgArray[x], (imgArray[0].shape[1], imgArray[0].shape[0]), None,scale, scale)
if len(imgArray[x].shape) == 2: imgArray[x] = cv2.cvtColor(imgArray[x], cv2.COLOR_GRAY2BGR)
hor= np.hstack(imgArray)
ver = hor
return ver
def getContours(imgDil,imgContour):
contours, hierarchy = cv2.findContours(imgDil, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
for cnt in contours:
area = cv2.contourArea(cnt)
# compute the center of the contour
M = cv2.moments(cnt)
cX = int(M["m10"] / M["m00"])
cY = int(M["m01"] / M["m00"])
# draw the contour and center of the shape on the image
if area > 5000:
cv2.drawContours(imgContour, cnt, -1, (255, 0 ,255),3)
cv2.circle(imgContour, (cX, cY), 7, (255, 0, 255), -1)
cv2.putText(imgContour, "center", (cX - 20, cY - 20),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 255), 2)
while(True):
success, img = cap.read()
imgContour = img.copy()
imgGray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
threshold1 = cv2.getTrackbarPos("Threshold1", "Parameters")
threshold2 = cv2.getTrackbarPos("Threshold2", "Parameters")
threshold3 = cv2.getTrackbarPos("Threshold3", "Parameters")
threshold4 = cv2.getTrackbarPos("Threshold4", "Parameters")
ret, thresh = cv2.threshold(imgGray,threshold1,threshold2,1)
imgCanny = cv2.Canny(imgGray,threshold3,threshold4)
kernel = np.ones((3,3))
imgDil = cv2.dilate(thresh, kernel, iterations=1)
getContours(imgDil,imgContour)
imgStack = stackImages(0.4,([img,imgGray,thresh],[imgCanny,img,imgContour]))
cv2.imshow('Result',imgStack)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()
I really appreciate any suggestions on best practices and of course any help. Shall I better go for a stereo camera with depth recognition like the Oak-D, since I think the detection of a shot in the black target area could be challenging.
I'm failing miserably trying to draw 3d objects on a aruco marker
as of now I've done camera calibration and I'm capable of placing images on the designated aruco markers but I'm having trouble with 3d objects I think it might come from image points I think I'm missing something there.
Also, I'm doing this in real-time and is there a a way to use already existing 3d objects
bellow is my code
import cv2.aruco as aruco
import cv2
import time
import sys
import argparse
import numpy as np
#import imutils
def load_coefficients(path):
'''Loads camera matrix and distortion coefficients.'''
# FILE_STORAGE_READ
cv_file = cv2.FileStorage(path, cv2.FILE_STORAGE_READ)
# note we also have to specify the type to retrieve other wise we only get a
# FileNode object back instead of a matrix
camera_matrix = cv_file.getNode('K').mat()
dist_matrix = cv_file.getNode('D').mat()
cv_file.release()
return [camera_matrix, dist_matrix]
def drawQ(img, corners, imgpts):
imgpts = np.int32(imgpts).reshape(-1,2)
# draw ground floor in green
img = cv2.drawContours(img, [imgpts[:4]],-1,(0,255,0),-3)
# draw pillars in blue color
for i,j in zip(range(4),range(4,8)):
img = cv2.line(img, tuple(imgpts[i]), tuple(imgpts[j]),(255),3)
# draw top layer in red color
img = cv2.drawContours(img, [imgpts[4:]],-1,(0,0,255),3)
return img
mtx, dist = load_coefficients('camera_parameters.txt')
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-t", "--type", type=str,
default="DICT_ARUCO_ORIGINAL",
help="type of ArUCo tag to detect")
args = vars(ap.parse_args())
source0 = cv2.imread('00000019.jpg')
source1 = cv2.imread('00000042.jpg')
source2 = cv2.imread('00000054.jpg')
source3 = cv2.imread('00000111.png')
source4 = cv2.imread('00000156.jpg')
source5 = cv2.imread('00000000.png')
source6 = cv2.imread('00000041.jpg')
#source = cv2.imread('Kratos_Pose.usdz')
#source = '/toothless/source/toothless.obj'
Aruco_Dict = {
"DICT_4X4_50": aruco.DICT_4X4_50,
"DICT_4X4_100": aruco.DICT_4X4_100,
"DICT_4X4_250": aruco.DICT_4X4_250,
"DICT_4X4_1000": aruco.DICT_4X4_1000,
"DICT_5X5_50": aruco.DICT_5X5_50,
"DICT_5X5_100": aruco.DICT_5X5_100,
"DICT_5X5_250": aruco.DICT_5X5_250,
"DICT_5X5_1000": aruco.DICT_5X5_1000,
"DICT_6X6_50": aruco.DICT_6X6_50,
"DICT_6X6_100": aruco.DICT_6X6_100,
"DICT_6X6_250": aruco.DICT_6X6_250,
"DICT_6X6_1000": aruco.DICT_6X6_1000,
"DICT_7X7_50": aruco.DICT_7X7_50,
"DICT_7X7_100": aruco.DICT_7X7_100,
"DICT_7X7_250": aruco.DICT_7X7_250,
"DICT_7X7_1000": aruco.DICT_7X7_1000,
"DICT_ARUCO_ORIGINAL": aruco.DICT_ARUCO_ORIGINAL,
"DICT_APRILTAG_16h5": aruco.DICT_APRILTAG_16h5,
"DICT_APRILTAG_25h9": aruco.DICT_APRILTAG_25h9,
"DICT_APRILTAG_36h10": aruco.DICT_APRILTAG_36h10,
"DICT_APRILTAG_36h11": aruco.DICT_APRILTAG_36h11
}
image_dict = {
"0": source0,
"1": source1,
"2": source2,
"3": source3,
"4": source4,
"5": source5,
"6": source6
}
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
objp = np.zeros((6*7,3), np.float32)
objp[:,:2] = np.mgrid[0:7,0:6].T.reshape(-1,2)
axis = np.float32([[3,0,0], [0,3,0], [0,0,-3]]).reshape(-1,3)
# verify that the supplied ArUCo tag exists and is supported by
# OpenCV
if Aruco_Dict.get(args["type"], None) is None:
print("[INFO] ArUCo tag of '{}' is not supported".format(
args["type"]))
sys.exit(0)
# load the ArUCo dictionary and grab the ArUCo parameters
#print("[INFO] detecting '{}' tags...".format(args["type"]))
#arucoDict = cv2.aruco.Dictionary_get(Aruco_Dict[args["type"]])
arucoDict = cv2.aruco.Dictionary_get(Aruco_Dict["DICT_4X4_50"])
arucoParams = cv2.aruco.DetectorParameters_create()
# initialize the video stream and allow the camera sensor to warm up
print("[INFO] starting video stream...")
cap = cv2.VideoCapture(0)
#time.sleep(1.0)
if not cap.isOpened():
raise IOError("Cannot open webcam")
while True:
ret, frame = cap.read()
#frame = cv2.resize(frame, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA)
undistorted = cv2.undistort(frame, mtx, dist, None, None)
undistortedG = cv2.cvtColor(undistorted,cv2.COLOR_BGR2GRAY)
(imgH, imgW) = undistorted.shape[:2]
#cv2.imshow('Input', frame)
cv2.imshow('undistorted image', undistorted)
# detect ArUco markers in the input frame
(corners, ids, rejected) = cv2.aruco.detectMarkers(undistorted,
arucoDict, parameters=arucoParams)
MarkerLength = 0.04
# verify *at least* one ArUco marker was detected
if len(corners) > 0:
print('corners lenght: ', len(corners))
# flatten the ArUco IDs list
ids = ids.flatten()
refpoints = []
# loop over the detected ArUCo corners
for (markerCorner, markerID) in zip(corners, ids):
# extract the marker corners (which are always returned
# in top-left, top-right, bottom-right, and bottom-left
# order)
rvec, tvec, markerPoints = aruco.estimatePoseSingleMarkers(markerCorner, MarkerLength, mtx, dist)
(rvec - tvec).any()
corners = markerCorner.reshape((4, 2))
(topLeft, topRight, bottomRight, bottomLeft) = corners
# convert each of the (x, y)-coordinate pairs to integers
topRight = (int(topRight[0]), int(topRight[1]))
bottomRight = (int(bottomRight[0]), int(bottomRight[1]))
bottomLeft = (int(bottomLeft[0]), int(bottomLeft[1]))
topLeft = (int(topLeft[0]), int(topLeft[1]))
# upper cordnates
a = (3,3)
topRight2 = topRight[0] - 20, topRight[1] - 20
bottomRight2 = bottomRight[0] -20, bottomRight[1] -20
bottomLeft2 = bottomLeft[0] -20, bottomLeft[1] -20
topLeft2 = topLeft[0] -20, topLeft[1] -20
# testing
#corners2 = cv2.cornerSubPix(undistortedG,corners,(11,11),(-1,-1),criteria)
imgpoints, jac = cv2.projectPoints(objp, rvec, tvec, mtx, dist)
#imgpoints = np.float32(imgpoints)
srcrvec, srctvec, srcinliners = cv2.solvePnP(markerPoints, corners, mtx, dist)
undistortedQ = drawC(undistorted, corners, imgpoints)
# testing
# testing
dstMat = [topLeft, topRight, bottomRight, bottomLeft]
dstMat = np.array(dstMat)
#(srcH, srcW) = source.shape[:2]
source = image_dict[f'{markerID}']
(srcH, srcW) = source.shape[:2]
srcMat = np.array([[0, 0], [srcW, 0], [srcW, srcH], [0, srcH]])
(H, _) = cv2.findHomography(srcMat, dstMat)
warped = cv2.warpPerspective(source, H, (imgW, imgH))
cv2.imshow('warped image', warped)
mask = np.zeros((imgH, imgW), dtype="uint8")
cv2.fillConvexPoly(mask, dstMat.astype("int32"), (255, 255, 255),
cv2.LINE_AA)
rect = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
mask = cv2.dilate(mask, rect, iterations=2)
maskScaled = mask.copy() / 255.0
maskScaled = np.dstack([maskScaled] * 3)
warpedMultiplied = cv2.multiply(warped.astype("float"), maskScaled)
imageMultiplied = cv2.multiply(undistorted.astype(float), 1.0 - maskScaled)
undistorted = cv2.add(warpedMultiplied, imageMultiplied)
undistorted = undistorted.astype("uint8")
#cv2.imshow("Input", undistorted)
#cv2.imshow("Source", source)
#cv2.imshow("OpenCV AR Output", output)
cv2.imshow('draw 3d cube ', undistortedQ)
#cv2.waitKey(0)
# testing
# draw the bounding box of the ArUCo detection
cv2.line(undistorted, topLeft, topRight, (0, 255, 0), 2)
cv2.line(undistorted, topRight, bottomRight, (0, 255, 0), 2)
cv2.line(undistorted, bottomRight, bottomLeft, (0, 255, 0), 2)
cv2.line(undistorted, bottomLeft, topLeft, (0, 255, 0), 2)
# draw rest of cube
cv2.line(undistorted, topLeft2, topRight2, (0, 255, 0), 2)
cv2.line(undistorted, topRight2, bottomRight2, (0, 255, 0), 2)
cv2.line(undistorted, bottomRight2, bottomLeft2, (0, 255, 0), 2)
cv2.line(undistorted, bottomLeft2, topLeft2, (0, 255, 0), 2)
# draw the the axis from estimated pose
aruco.drawAxis(undistorted, mtx, dist, rvec, tvec, 0.01)
# compute and draw the center (x, y)-coordinates of the
# ArUco marker
cX = int((topLeft[0] + bottomRight[0]) / 2.0)
cY = int((topLeft[1] + bottomRight[1]) / 2.0)
cv2.circle(undistorted, (cX, cY), 4, (0, 0, 255), -1)
# draw the ArUco marker ID on the frame
cv2.putText(undistorted, str(markerID),
(topLeft[0], topLeft[1] - 15),
cv2.FONT_HERSHEY_SIMPLEX,
0.5, (0, 255, 0), 2)
# show the output frame
cv2.imshow("ArUco Markers Detector", undistorted)
key = cv2.waitKey(1) & 0xFF
c = cv2.waitKey(1)
if c == 27:
break
cap.release()
cv2.destroyAllWindows()
here is an image of what is happnening
UPD: Added working MWE.
I am trying to parse the amount of HP iт the game. The idea that I know the width of image and just get the width of filled part of the HP bar. And then just calculate it.
Previously it worked well. But recently game got some update and the color is changed. I know. Just a color.
Here is my fully worked MWE code: You can try it with sourcr files attached in the end of the post
import cv2
import numpy as np
def parse_hp(hp_area):
width = int(hp_area.shape[1] * 5)
height = int(hp_area.shape[0] * 5)
dim = (width, height)
# resize image
resized = cv2.resize(hp_area, dim, interpolation=cv2.INTER_AREA)
# Color segmentation
hsv = cv2.cvtColor(resized, cv2.COLOR_BGR2HSV)
lower_red = np.array([0, 50, 50])
upper_red = np.array([5, 255, 255])
mask = cv2.inRange(hsv, lower_red, upper_red)
res = cv2.bitwise_and(resized, resized, mask=mask)
# Contour exctraction
imgray = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(imgray, (5, 5), 0)
ret, thresholded = cv2.threshold(blurred, 50, 255, 0)
contours, h = cv2.findContours(thresholded, 1, 2)
if contours:
cnt = contours[0]
approx = cv2.approxPolyDP(cnt, 0.01 * cv2.arcLength(cnt, True), True)
if cv2.contourArea(cnt) > 25: # to discard noise from the color segmentation
contour_poly = cv2.approxPolyDP(cnt, 3, True)
center, radius = cv2.minEnclosingCircle(contour_poly)
cv2.circle(resized, (int(center[0]), int(center[1])), int(radius), (0, 255, 0), 2)
cv2.imshow("Found limits", resized)
cv2.waitKey(0)
resized_width = int(resized.shape[1])
hp_width = radius * 2
return int(hp_width * 100 / resized_width)
else:
return -1
if __name__ == "__main__":
hp_area = cv2.imread("/Users/vetalll/Documents/Cv2Working.png")
result = parse_hp(hp_area)
print(result)
I tried to use these values. But it dos not work. openCv does not recognize them:
lower_red = np.array([355, 44, 45])
upper_red = np.array([356, 41, 43])
And now the color is a little bit purple.I know that it uses HSV color but really not able to figure aout how to adjust it to make it work. |
Working image:
Not working image:
Source images can be grabbed here:
https://drive.google.com/file/d/1dJ4ePw_7oJov_OU5n6IO6fwdm_N3W5k2/view?usp=sharing
After a bit of guessing, I came up with these values. Hope they work:
import cv2
import numpy as np
def parse_hp(hp_area):
width = int(hp_area.shape[1] * 5)
height = int(hp_area.shape[0] * 5)
dim = (width, height)
# resize image
resized = cv2.resize(hp_area, dim, interpolation=cv2.INTER_AREA)
# Color segmentation
hsv = cv2.cvtColor(resized, cv2.COLOR_RGB2HSV)
lower_red = np.array([120, 170, 0])
upper_red = np.array([245, 255, 255])
mask = cv2.inRange(hsv, lower_red, upper_red)
res = cv2.bitwise_and(resized, resized, mask=mask)
# Contour exctraction
imgray = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(imgray, (5, 5), 0)
ret, thresholded = cv2.threshold(blurred, 50, 255, 0)
contours, h = cv2.findContours(thresholded, 1, 2)
if contours:
cnt = contours[0]
approx = cv2.approxPolyDP(cnt, 0.01 * cv2.arcLength(cnt, True), True)
if cv2.contourArea(cnt) > 25: # to discard noise from the color segmentation
contour_poly = cv2.approxPolyDP(cnt, 3, True)
center, radius = cv2.minEnclosingCircle(contour_poly)
cv2.circle(resized, (int(center[0]), int(center[1])), int(radius), (0, 255, 0), 2)
cv2.imshow("Found limits", resized)
cv2.waitKey(0)
resized_width = int(resized.shape[1])
hp_width = radius * 2
return int(hp_width * 100 / resized_width)
else:
return -1
if __name__ == "__main__":
hp_area = cv2.imread("Cv2NotWorking.png")
result = parse_hp(hp_area)
print(result)
I'm building a neural network model and the detection.py file is really slow, regarding the speed of the output video (it is literally frame by frame).
I tried adjusting a few things, though still the same result - slow video. What can be the problem? Thank you.
import numpy as np
import random as rnd
import cv2
from utils import *
from model import *
seed = 11
rnd.seed(seed)
np.random.seed(seed)
videofile = "files/cardriving.mp4"
cap = cv2.VideoCapture(videofile)
model = make_model()
model.load_weights("weights/weights_best.h5")
lower = np.array([0, 0, 0])
upper = np.array([100, 100, 100])
stepSize=30
while True:
ret, frame = cap.read()
if ret == False:
print("Done")
break
#convert image to HSV from BGR
img_hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
#find the pixels that correspond to the road
img_out = cv2.inRange(img_hsv, lower, upper)
#clean from noisy pixels and keep the largest connected segment
img_out = post_process(img_out)
image_masked = frame.copy()
#get masked image
image_masked[img_out==0] = (0,0,0)
s=0.25
#resize images for computational efficiency
frame = cv2.resize(frame, None, fx=s, fy=s)
image_masked = cv2.resize(image_masked, None, fx=s, fy=s)
#run the sliding window detection process
bbox_list, totalWindows, correct, score = detectionProcess(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), model, winH=50, winW=50, depth=3, nb_images=1, scale=1, stepSize=stepSize, thres_score=0.05)
#draw the detections
drawBoxes(frame, bbox_list)
#draw detections and road masks
cv2.imshow("video", sidebyside(frame, image_masked))
k = cv2.waitKey(3)
if(k & 0xFF == ord('q')):
cv2.destroyWindow("video")
break
cap.release()
cv2.destroyAllWindows()
here is the utils.py file where I wrote the post_process and detectionProcess:
import numpy as np
import cv2
def scale_to_image(x, a=0, b=255):
#min-max scaling for grayscale images
ma=(np.max(x))
if(ma==0):
return x.astype(np.uint8)
mi=(np.min(x))
normalized_data = ((x.astype(np.float)-float(mi))/float(ma)) #normalize 0-1
normalized_data = (normalized_data*b + a*(1-normalized_data)) #scale values
return normalized_data.astype(np.uint8)
def nothing(x):
pass
def channels3(x):
#stack grayscale images together to increase the color channels to 3
return np.dstack((x,x,x))
def sidebyside(x,y):
#concatenate images side by side (horizontally)
return np.concatenate((x,y), axis=1)
def updown(x,y):
#concatenate images up and down (vertically)
return np.concatenate((x,y), axis=0)
def extractLargerSegment(maskROAD):
contours, hierarchy = cv2.findContours(maskROAD.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE, contours=None, hierarchy=None)
maxA = 0
maskTemp = np.zeros_like(maskROAD)
if(len(contours) > 0):
for h, cnt in enumerate(contours):
if(cv2.contourArea(cnt) > maxA):
cntMax = cnt
maxA = cv2.contourArea(cnt)
mask = np.zeros(maskROAD.shape, np.uint8)
cv2.drawContours(maskTemp, [cntMax], 0, 255, -1)
maskROAD = cv2.bitwise_and(maskROAD, maskTemp)
return maskROAD
def post_process(img):
kernel = np.ones((5,5), np.uint8)
img_out = cv2.erode(img, kernel, iterations=3)
kernel = np.ones((20,20), np.uint8)
img_out = cv2.dilate(img_out, kernel, iterations=5)
img_out = extractLargerSegment(img_out)
return img_out
def display(img_init, img_hsv, img_out2, img_out):
mask = scale_to_image(np.dstack((img_out, np.zeros_like(img_out), np.zeros_like(img_out))))
cv2.imshow("Output", updown(sidebyside(cv2.addWeighted(img_init, 1, mask, 0.3, 0), img_hsv), sidebyside(channels3(img_out), channels3(img_out2))))
def detectionProcess(frame, model, winH=32, winW=32, depth=1, nb_images=2, scale=1.2, stepSize=10, thres_score=0):
index=0
totalWindows=0
correct=0
bbox_list = []
score = []
for resized in pyramid(frame, scale=scale, minSize=(winH, winW), nb_images=nb_images):
scale = frame.shape[0]/resized.shape[0]
for (x,y,window) in sliding_window(resized, stepSize=stepSize, windowSize=(winH, winW)):
if window.shape[0] != winH or window.shape[1] != winW:
continue
if(depth==1):
window=cv2.cvtColor(window, cv2.COLOR_BGR2GRAY)
window=np.expand_dims(window,3)
window = window[None, :, :, :]
totalWindows += 1
class_out = model.predict((window.astype(np.float32))/255., batch_size=1)[0]
if(class_out<thres_score):
bbox_list.append(((int(x*scale)), int(y*scale), int((x+winW)*scale), int((y+winH)*scale)))
score.append(class_out)
correct+=1
index+=1
return bbox_list, totalWindows, correct, score
def sliding_window(image, stepSize, windowSize):
#slide a window accross the image
for y in range(0, image.shape[0], stepSize):
for x in range(0, image.shape[1], stepSize):
yield(x, y, image[y:y + windowSize[1], x:x + windowSize[0]])
def pyramid(image, scale=1.5, minSize=(30,30), nb_images=3):
yield image
count=0
#keep looping over the pyramid
while True:
#compute new dimensions of an image and resize it
w = int(image.shape[1]/scale)
h = int(image.shape[0]/scale)
image = cv2.resize(image, (w,h))
count += 1
scale = np.power((1/scale), count)
if image.shape[0] < minSize[1] or image.shape[1] < minSize[0] or (count == nb_images):
break
yield image
def drawBoxes(frame, bbox_list):
for i in range(len(bbox_list)):
box = bbox_list[i]
cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 255, 255), 2)
return frame
I am making a object detection project.
I have my code. And I have written it by following a tutorial. In the tutorial, the guy drew a rectangle in opencv for every single object which is detected.
But I want to change the rectangle to triangle or Arrow.
let me explain with code===>
In my function, I detect objects.
And here I draw rectangle for detected objects==>
cv2.rectangle(img, (x, y), (x+w,y+h), (255, 0 , 255), 2)
But I want to change this rectangle to a triangle.(And I want to set position of triangle to above of object.
Just like in these images:::
This is the object detection with triangle
[![enter image description here][1]][1]
This is the thing that what I want to make instead of rectangle:::
[![enter image description here][2]][2]
How Can I make a triangle/arrow with positions of my detected objects?
All of my code is here==>
from os.path import sep
import cv2 as cv2
import numpy as np
import json
# Camera feed
cap_cam = cv2.VideoCapture(0)
ret, frame_cam = cap_cam.read()
hey = 0
print(cv2. __version__)
whT = 320
confThreshold =0.5
nmsThreshold= 0.2
classesFile = "coco.names"
classNames = []
with open(classesFile, 'rt') as f:
classNames = f.read().rstrip('\n').split('\n')
print(classNames)
## Model Files
modelConfiguration = "custom-yolov4-tiny-detector.cfg"
modelWeights = "custom-yolov4-tiny-detector_last.weights"
net = cv2.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
def findObjects(outputs,img):
global hey
global previousHey
hT, wT, cT = img.shape
bbox = []
classIds = []
confs = []
for output in outputs:
for det in output:
scores = det[5:]
classId = np.argmax(scores)
confidence = scores[classId]
if confidence > confThreshold:
w,h = int(det[2]*wT) , int(det[3]*hT)
x,y = int((det[0]*wT)-w/2) , int((det[1]*hT)-h/2)
bbox.append([x,y,w,h])
classIds.append(classId)
confs.append(float(confidence))
global indicates
indices = cv2.dnn.NMSBoxes(bbox, confs, confThreshold, nmsThreshold)
hey = 0
for i in indices:
i = i[0]
box = bbox[i]
x, y, w, h = box[0], box[1], box[2], box[3]
# print(x,y,w,h)
cv2.rectangle(img, (x, y), (x+w,y+h), (255, 0 , 255), 2)
#cv2.line(img, (350,400), (x, y), (255,0,0), 4)
#cv2.line(img, (400,400), (x + 50 , y), (255,0,0), 4)
#cv.putText(img,f'{classNames[classIds[i]].upper()} {int(confs[i]*100)}%',
#(x, y-10), cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)
print('success')
hey = 1
video_frame_counter = 0
while cap_cam.isOpened():
img = cv2.imread('photos' + sep + 'lutfen.jpg')
#BURADA OK VİDEOSU OYNATILACAK
#if not decetiona diye dene yarın.
blob = cv2.dnn.blobFromImage(img, 1 / 255, (whT, whT), [0, 0, 0], 1, crop=False)
net.setInput(blob)
layersNames = net.getLayerNames()
outputNames = [(layersNames[i[0] - 1]) for i in net.getUnconnectedOutLayers()]
outputs = net.forward(outputNames)
findObjects(outputs,img)
cv2.imshow('Image', img)
# Video feed
if hey == 1:
filename = 'photos' + sep + 'Baslksz-3.mp4'
cap_vid = cv2.VideoCapture(filename)
if hey == 0:
filename = 'photos' + sep + 'vid2.mp4'
cap_vid = cv2.VideoCapture(filename)
print(hey)
ret, frame_vid = cap_vid.read()
#cap_cam.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
#cap_cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)
# Resize the camera frame to the size of the video
height = int(cap_vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
width = int(cap_vid.get(cv2.CAP_PROP_FRAME_WIDTH))
# Capture the next frame from camera
ret, frame_cam = cap_cam.read()
video_frame_counter += 1
if video_frame_counter == cap_vid.get(cv2.CAP_PROP_FRAME_COUNT):
video_frame_counter = 0
cap_vid.set(cv2.CAP_PROP_POS_FRAMES, 0)
frame_cam = cv2.resize(frame_cam, (width, height), interpolation = cv2.INTER_AREA)
#ret = cap_vid.set(cv2.CAP_PROP_POS_MSEC, time_passed)
ret, frame_vid = cap_vid.read()
if not ret:
print('Cannot read from video stream')
break
# Blend the two images and show the result
tr = 0.4 # transparency between 0-1, show camera if 0
frame = ((1-tr) * frame_cam.astype(np.float) + tr * frame_vid.astype(np.float)).astype(np.uint8)
cv2.imshow('Transparent result', frame)
if cv2.waitKey(1) == 27: # ESC is pressed
break
cap_cam.release()
cap_vid.release()
cv2.destroyAllWindows()
The easy way
You can use the cv.arrowedLine() function that will draw something similar to what you want. For example, to draw a red arrow above your rectangle:
center_x = x + w//2
cv2.arrowedLine(img, (center_x, y-50), (center_x, y-5), (0,0,255), 2, 8, 0, 0.5)
which should give a result similar to the image below. Take a look at the OpenCV documentation for the description of the parameters of the function. You can change its size, thickness, color, etc.
Custom arrow shape
If you want more control over the shape of your arrow, you can define a contour (vertex by vertex) and use cv.drawContours() to render it. For example:
# define the arrow shape
shape = np.array([[[0,0],[-25,-25],[-10,-25],[-10,-50],
[10,-50],[10,-25],[25,-25]]])
# move it to the desired position
cx = x + w // 2
cy = y - 5
shape[:,:,0] += cx
shape[:,:,1] += cy
# draw it
cv2.drawContours(img, shape, -1, (0, 255, 0), -1)
This snippet will give you the image below. You can adjust the shape by altering the vertices in the shape array, or look at the documentation to change the way OpenCV draws it.