How to map a coordinate to another image(Open-CV) - python

Hey guys im working on a simple eye tracking project i am planning on doing this by the following method:
1.find both pupils using Dlib and opencv thresholding which is working perfectly and providing the pupil center
I drew a rectangle by using the landmark coordinates of the eyes to draw a rectangle surrounding the eye such that the pupil is within this rectangle
I averaged the coordinates of the rectangles and pupils of both the eyes to get a rectangle at the centre of the nose or forehead region
Image here
Now i need to map this averaged rectangle with the pupil coordinate the one in the green to the screen so that the boundaries of the rectangle acts as the screen boundaries and the pupil coordinates vaguely represent where i am looking at
Any idea how Thanks in Advance
heres how i tried to map it
import cv2
import numpy as np
from gaze_tracking import GazeTracking
from scipy.spatial import distance as dist
gaze = GazeTracking()
webcam = cv2.VideoCapture(2)
def map_to_smaller_bbox(sm_bbox_w, sm_bbox_h, lg_win_w, lg_win_h, avg_p_x, avg_p_y):
sm_height_delta = lg_win_h/sm_bbox_h
sm_width_delta = lg_win_w/sm_bbox_w
sm_scaled_x = round(avg_p_x/sm_width_delta)
sm_scaled_y = round(avg_p_y/sm_height_delta)
return [sm_scaled_x, sm_scaled_y]
def map_to_larger_bbox(sm_bbox_w, sm_bbox_h, win_w, win_h, sc_x, sc_y):
sc_height_delta = sm_bbox_h/win_h
sc_width_delta = sm_bbox_w/win_w
sc_win_x = round(sc_x/sc_width_delta)
sc_win_y = round(sc_y/sc_height_delta)
return [sc_win_x, sc_win_y]
def cam_frame_size(frame_cam):
cam_w, cam_h = gaze.get_frame_size()
return [cam_w, cam_h]
def screen_frame_size(frame_screen):
screen_w, screen_h = frame_screen.shape[:2]
return [screen_w, screen_h]
def get_gaze_coords(cam_frame):
left_pupil = gaze.pupil_left_coords()
right_pupil = gaze.pupil_right_coords()
coord_coll = gaze.eye_roi_bb()
if len(coord_coll) == 2:
left_coords = coord_coll[0]
right_coords = coord_coll[1]
left_x1 = left_coords[0]
left_x2 = left_coords[1]
left_y1 = left_coords[2]
left_y2 = left_coords[3]
right_x1 = right_coords[0]
right_x2 = right_coords[1]
right_y1 = right_coords[2]
right_y2 = right_coords[3]
avg_x1 = int((left_x1 + right_x1)/2)
avg_x2 = int((left_x2 + right_x2)/2)
avg_y1 = int((left_y1 + right_y1)/2 - 5)
avg_y2 = int((left_y2 + right_y2)/2 + 5)
avg_bbox_w = int(dist.euclidean((avg_x1,avg_y1), (avg_x2, avg_y1)))
avg_bbox_h = int(dist.euclidean((avg_x1,avg_y1), (avg_x1, avg_y2)))
# avg_bbox_w_c = int((avg_x1+avg_x2)/2, (avg_y1+avg_y2)/2)
# avg_bbox_h_c = int((avg))
cam_size = cam_frame_size(cam_frame)
cam_height_delta = cam_size[1]/avg_bbox_h
cam_width_delta = cam_size[0]/avg_bbox_w
screen_size = screen_frame_size(screen_frame)
if left_pupil or right_pupil is not None:
avg_pupil_x = int((right_pupil[0] + left_pupil[0])/2)
avg_pupil_y = int((right_pupil[1] + left_pupil[1])/2)
cv2.rectangle(cam_frame, (avg_x1,avg_y1), (avg_x2,avg_y2), (0, 255, 0), 1)
cv2.rectangle(cam_frame, (right_x1,right_y1-5), (right_x2,right_y2+5), (0, 0, 255), 1)
cv2.rectangle(cam_frame, (left_x1,left_y1-5), (left_x2,left_y2+5), (0, 0, 255), 1)
# sm_sc_coords = map_to_smaller_bbox(avg_bbox_w, avg_bbox_h,
# cam_size[0], cam_size[1], avg_pupil_x,avg_pupil_y)
# lg_sc_coords = map_to_larger_bbox(avg_bbox_w, avg_bbox_h,
# screen_size[0], screen_size[1], sm_sc_coords[0], sm_sc_coords[1])
return (avg_pupil_x, avg_pupil_y)
while True:
# We get a new frame from the webcam
ret, cam_frame = webcam.read()
if not ret:
print("Couldn't get Video Source")
break
# We send this frame to GazeTracking to analyze it
gaze.refresh(cam_frame)
cam_frame = gaze.annotated_frame()
screen_frame = np.zeros((1366, 768, 3), np.uint8)
# text = ""
# if gaze.is_blinking():
# text = "Blinking"
# elif gaze.is_right():
# text = "Looking right"
# elif gaze.is_left():
# text = "Looking left"
# elif gaze.is_center():
# text = "Looking center"
# cv2.putText(frame, text, (90, 60), cv2.FONT_HERSHEY_DUPLEX, 0.5, (147, 58, 31), 1)
# left_pupil = gaze.pupil_left_coords()
# right_pupil = gaze.pupil_right_coords()
# if (left_pupil or right_pupil) != None :
# avg_center_coords = (int((left_pupil[0] + right_pupil[0])/2), int((left_pupil[1] + right_pupil[1])/2))
# cv2.putText(frame, "Avg: " + str(avg_center_coords), (90, 200), cv2.FONT_HERSHEY_DUPLEX, 0.5, (147, 58, 31), 1)
# cv2.putText(frame, "Left pupil: " + str(left_pupil), (90, 130), cv2.FONT_HERSHEY_DUPLEX, 0.5, (147, 58, 31), 1)
# cv2.putText(frame, "Right pupil: " + str(right_pupil), (90, 165), cv2.FONT_HERSHEY_DUPLEX, 0.5, (147, 58, 31), 1)
# coord_coll = gaze.eye_roi_bb()
# if len(coord_coll) == 2:
# left_coords = coord_coll[0]
# right_coords = coord_coll[1]
# left_x1 = left_coords[0]
# left_x2 = left_coords[1]
# left_y1 = left_coords[2]
# left_y2 = left_coords[3]
# right_x1 = right_coords[0]
# right_x2 = right_coords[1]
# right_y1 = right_coords[2]
# right_y2 = right_coords[3]
# avg_x1 = int((left_x1 + right_x1)/2)
# avg_x2 = int((left_x2 + right_x2)/2)
# avg_y1 = int((left_y1 + right_y1)/2 - 5)
# avg_y2 = int((left_y2 + right_y2)/2 + 5)
# cv2.rectangle(frame, (avg_x1,avg_y1), (avg_x2,avg_y2), (0, 255, 0), 1)
# cv2.rectangle(frame, (right_x1,right_y1-5), (right_x2,right_y2+5), (0, 0, 255), 1)
# left_bbox_width = int((left_x2 - left_x1) + 1)
# left_bbox_height = int((left_y2 - left_y1) + 1)
# right_bbox_width = int((right_x2 - right_x1) + 1)
# right_bbox_height = int((right_y2 - right_y1) + 1)
# print((left_bbox_width, left_bbox_height), (right_bbox_width, right_bbox_height))
# left_bbox_w = int(dist.euclidean((left_x1,left_y1),(left_x2,left_y2)))
# left_bbox_h = int(dist.euclidean((left_x1,left_y2),(left_x1,left_y1)))
# right_bbox_w = int(dist.euclidean((right_x1,right_y1),(right_x2,right_y2)))
# right_bbox_w = int(dist.euclidean((right_x1,right_y2),(right_x1,right_y1)))
# avg_bbox_w = int(dist.euclidean((avg_x1,avg_y1), (avg_x2, avg_y1)))
# avg_bbox_h = int(dist.euclidean((avg_x1,avg_y1), (avg_x1, avg_y2)))
# print(avg_bbox_w, avg_bbox_h)
# width, height = gaze.get_frame_size()
# width, height = width, height
# height_delta = height/avg_bbox_h
# width_delta = width/avg_bbox_w
# if right_pupil is not None:
# scaled_x1 = round(right_pupil[0]*height_delta)
# print(width_delta, height_delta)
# cv2.rectangle(frame, (scaled_x1, scaled_x2), (scaled_y1, scaled_y2), (0,0,255), 1)
# print(scaled_x1, scaled_x2, scaled_y1, scaled_y2)
# if left_pupil or right_pupil is not None:
# avg_pupil_x = int((right_pupil[0] + left_pupil[0])/2)
# avg_pupil_y = int((right_pupil[1] + left_pupil[1])/2)
# sm_sc_coords = map_to_smaller_bbox(avg_bbox_w, avg_bbox_h,
# width, height, avg_pupil_x,avg_pupil_y)
# # print(sm_sc_coords[0], sm_sc_coords[1])
# # cv2.rectangle(frame, (sm_sc_coords[2], sm_sc_coords[4]), (sm_sc_coords[3], sm_sc_coords[5]), (0,0,255), 1)
# lg_sc_coords = map_to_larger_bbox(avg_bbox_w, avg_bbox_h,
# width, height, sm_sc_coords[0], sm_sc_coords[1])
# cv2.rectangle(frame, (lg_sc_coords[2], lg_sc_coords[4]), (lg_sc_coords[3], lg_sc_coords[5]), (0,0,255), 1)
# print(lg_sc_coords[0], lg_sc_coords[1])
# scaled_x = round(avg_pupil_x*width_delta)
# scaled_y = round(avg_pupil_y*height_delta)
# print(avg_pupil_x, avg_pupil_y, scaled_x, scaled_y)
p_xy = get_gaze_coords(cam_frame)
print(p_xy)
# if p_xy is not None:
# # print(p_xy)
# screen_frame = cv2.circle(screen_frame, (p_xy[0], p_xy[1]), 10, (0, 255, 0), 1)
# cv2.line(frame, (left_pupil[0], left_pupil[1]), (lg_sc_coords[0], lg_sc_coords[1]), (0,0,255), 1)
# cv2.line(frame, (right_pupil[0], right_pupil[1]), (lg_sc_coords[0], lg_sc_coords[1]), (0,0,255), 1)
# print(lg_sc_coords[2], lg_sc_coords[4], lg_sc_coords[3], lg_sc_coords[5])
# cv2.namedWindow('Demo',cv2.WINDOW_NORMAL)
# cv2.namedWindow('gaze',cv2.WINDOW_NORMAL)
cv2.imshow("Demo", cam_frame)
# cv2.imshow("gaze", screen_frame)
if cv2.waitKey(1) == 27:
break

Related

How do I clock the user action?

I follow a simple head pose estimation tutorial in python, I try to make some modification in the code but I got stuck for days now, I just want to know how long the user been looking to the left or right, if the user is detected to be looking to the left or right for a long time let say 2-3 mins, then the program should give a warning or print a simple message saying how long his/her been looking to the left or right. How do I achieve this? any ideas how to do this?
sorry for my bad english
any help will be appreciated :)
here's my code:
import cv2
import mediapipe as mp
import numpy as np
import time
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
cap = cv2.VideoCapture(0)
while cap.isOpened():
success, image = cap.read()
image = cv2.resize(image, (780, 350))
image = cv2.flip(image, 1)
#start = time.time()
# Flip the image horizontally for a later selfie-view display
# Also convert the color space from BGR to RGB
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
# To improve performance
image.flags.writeable = False
# Get the result
results = face_mesh.process(image)
# To improve performance
image.flags.writeable = True
# Convert the color space from RGB to BGR
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
img_h, img_w, img_c = image.shape
face_3d = []
face_2d = []
if results.multi_face_landmarks:
for face_landmarks in results.multi_face_landmarks:
for idx, lm in enumerate(face_landmarks.landmark):
if idx == 33 or idx == 263 or idx == 1 or idx == 61 or idx == 291 or idx == 199:
if idx == 1:
nose_2d = (lm.x * img_w, lm.y * img_h)
nose_3d = (lm.x * img_w, lm.y * img_h, lm.z * 3000)
x, y = int(lm.x * img_w), int(lm.y * img_h)
# Get the 2D Coordinates
face_2d.append([x, y])
# Get the 3D Coordinates
face_3d.append([x, y, lm.z])
# Convert it to the NumPy array
face_2d = np.array(face_2d, dtype=np.float64)
# Convert it to the NumPy array
face_3d = np.array(face_3d, dtype=np.float64)
# The camera matrix
focal_length = 1 * img_w
cam_matrix = np.array([ [focal_length, 0, img_h / 2],
[0, focal_length, img_w / 2],
[0, 0, 1]])
# The distortion parameters
dist_matrix = np.zeros((4, 1), dtype=np.float64)
# Solve PnP
success, rot_vec, trans_vec = cv2.solvePnP(face_3d, face_2d, cam_matrix, dist_matrix)
# Get rotational matrix
rmat, jac = cv2.Rodrigues(rot_vec)
# Get angles
angles, mtxR, mtxQ, Qx, Qy, Qz = cv2.RQDecomp3x3(rmat)
# Get the y rotation degree
x = angles[0] * 360
y = angles[1] * 360
z = angles[2] * 360
# See where the user's head tilting
if y < -10:
# i want to know how long this guy been looking to his left or right or up or down
text = "Looking Left"
elif y > 10:
text = "Looking Right"
elif x < -10:
text = "Looking Down"
elif x > 10:
text = "Looking Up"
else:
text = "Forward"
# Display the nose direction
nose_3d_projection, jacobian = cv2.projectPoints(nose_3d, rot_vec, trans_vec, cam_matrix, dist_matrix)
p1 = (int(nose_2d[0]), int(nose_2d[1]))
p2 = (int(nose_2d[0] + y * 10) , int(nose_2d[1] - x * 10))
cv2.line(image, p1, p2, (255, 0, 0), 3)
# Add the text on the image
cv2.putText(image, text, (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 2)
cv2.putText(image, "x: " + str(np.round(x,2)), (500, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
cv2.putText(image, "y: " + str(np.round(y,2)), (500, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
cv2.putText(image, "z: " + str(np.round(z,2)), (500, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
# need some fix here
# end = time.time()
# totalTime = end - start
# fps = 1 / totalTime
#print("FPS: ", fps)
#cv2.putText(image, f'FPS: {int(fps)}', (20,450), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)
mp_drawing.draw_landmarks(
image=image,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACEMESH_CONTOURS,
landmark_drawing_spec=drawing_spec,
connection_drawing_spec=drawing_spec)
cv2.imshow('Head Pose Estimation', image)
if cv2.waitKey(5) & 0xFF == 27:
break
cap.release()

yolov7 deepSORT object tracking IDs are different in all video frames

I am trying to detect objects in a certain area using yolov7 and deepSORT algorithm, but in the results I get, I see that the IDs are always changing. I leave 3 photos for you to understand.
As you can see the IDs are different in all frames.
`
#class base virtual zone tracking
import random
import torch
import numpy as np
from models.experimental import attempt_load
from utils.torch_utils import TracedModel
from utils.datasets import letterbox
from utils.plots import plot_one_box, plot_one_box_center
from utils.general import check_img_size, non_max_suppression, scale_coords
import cv2
import time
from google.colab.patches import cv2_imshow
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
#deep sort
import os
import tensorflow as tf
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
tf.config.experimental.set_memory_growth(physical_devices[0], True)
from tensorflow.compat.v1 import ConfigProto
from deep_sort.tracker import Tracker
from deep_sort.detection import Detection
import matplotlib.pyplot as plt
from deep_sort import preprocessing, nn_matching
from tracking_helpers import read_class_names, create_box_encoder
from detection_helpers import *
class YOLOv7:
def __init__(self, weights: str, image_size:int,device:str):
self.device = device
self.weights = weights
self.model = attempt_load(self.weights, map_location=self.device) # Model Load FP32
self.stride = int(self.model.stride.max())
self.image_size = check_img_size(image_size, self.stride)
if self.device != 'cpu':
self.half = True
else:
self.half = False
if self.half:
self.model.half() # FP16
self.names = self.model.module.names if hasattr(self.model , 'module') else self.model.names
color_values = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(self.names))]
self.colors = {i:color_values[i] for i in range(len(self.names))}
def detect(self, raw_image: np.ndarray, conf_thresh =0.45, iou_thresh =0.45, classes = [0]): #default class people
# Run inference
if self.device != 'cpu':
self.model(torch.zeros(1, 3, self.image_size, self.image_size).to(self.device).type_as(next(self.model.parameters())))
with torch.no_grad():
image = letterbox(raw_image, self.image_size, stride=self.stride)[0]
image = image[:, :, ::-1].transpose(2, 0, 1)
image = np.ascontiguousarray(image)
image = torch.from_numpy(image).to(self.device)
image = image.half() if self.half else image.float()
image /= 255.0
if image.ndimension() == 3:
image = image.unsqueeze(0)
# Inference
detections = self.model(image, augment=False)[0]
# Apply NMS
detections = non_max_suppression(detections, conf_thresh, iou_thresh, classes=classes, agnostic=False)[0]
# Rescale boxes from img_size to raw image size
detections[:, :4] = scale_coords(image.shape[2:], detections[:, :4], raw_image.shape).round()
return detections
def tracking(self, video_frame, yolo_dets, inside_poly = True, count_objects:bool=False,verbose=False, reID_model_path = "./deep_sort/model_weights/mars-small128.pb", nms_max_overlap:float=1.0, max_cosine_distance:float=0.4, nn_budget:float=None):
class_names = read_class_names()
encoder = create_box_encoder(reID_model_path, batch_size=1)
nms_max_overlap = nms_max_overlap
metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
tracker = Tracker(metric)
*xyxy, conf, cls = yolo_dets
frame = cv2.cvtColor(video_frame, cv2.COLOR_BGR2RGB)
if yolo_dets is None:
bboxes = []
scores = []
classes = []
num_objects = 0
else:
bboxes = yolo_dets[:,:4]
bboxes[:,2] = bboxes[:,2] - bboxes[:,0] # convert from xyxy to xywh
bboxes[:,3] = bboxes[:,3] - bboxes[:,1]
scores = yolo_dets[:,4]
classes = yolo_dets[:,-1]
num_objects = bboxes.shape[0]
#how many object you track
names = []
for i in range(num_objects): # loop through objects and use class index to get class name
class_indx = int(classes[i])
class_name = class_names[class_indx]
names.append(class_name)
names = np.array(names)
count = len(names)
if count_objects:
cv2.putText(frame, "both inside and outside the polygon detection: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 2)
# DeepSORT tacker work starts here
features = encoder(frame, bboxes) # encode detections and feed to tracker. [No of BB / detections per frame, embed_size]
detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(bboxes, scores, names, features)] # [No of BB per frame] deep_sort.detection.Detection object
cmap = plt.get_cmap('tab20b') #initialize color map
colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]
boxs = np.array([d.tlwh for d in detections]) # run non-maxima supression below
scores = np.array([d.confidence for d in detections])
classes = np.array([d.class_name for d in detections])
indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
detections = [detections[i] for i in indices]
tracker.predict() # Call the tracker
tracker.update(detections) # updtate using Kalman Gain
for track in tracker.tracks: # update new findings AKA tracks
#if not track.is_confirmed() or track.time_since_update > 1:
#continue
bbox = track.to_tlbr()
class_name = track.get_class()
color = colors[int(track.track_id) % len(colors)] # draw bbox on screen
color = [i * 255 for i in color]
#drawing poly
#pts = np.array([[6,449], [1052, 2], [1914, 6], [1766, 1074], [2, 1076]])
#frame = cv2.polylines(frame, [pts], True, (0,0,255), 5)
#creating poly
#poli = Polygon([(6,449), (1052, 2), (1914, 6), (1766, 1074), (2, 1076)])
#center = (int((bbox[0] + bbox[2]) / 2), int((bbox[1] + bbox[3]) / 2)) #center point ( (x1 + x2) / 2, (y1 + y2) / 2 )
#point = Point(center)
if inside_poly:
#drawing poly
pts = np.array([[6,449], [1052, 2], [1914, 6], [1766, 1074], [2, 1076]])
frame = cv2.polylines(frame, [pts], True, (0,0,255), 5)
#creating poly
poli = Polygon([(6,449), (1052, 2), (1914, 6), (1766, 1074), (2, 1076)])
center = (int((bbox[0] + bbox[2]) / 2), int((bbox[1] + bbox[3]) / 2)) #center point ( (x1 + x2) / 2, (y1 + y2) / 2 )
point = Point(center)
if poli.contains(point):
cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
cv2.rectangle(frame, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1)
cv2.putText(frame, class_name + " : " + str(track.track_id),(int(bbox[0]), int(bbox[1]-11)),0, 0.6, (255,255,255),1, lineType=cv2.LINE_AA)
cv2.putText(frame, "0", center,0, 0.6, (255,255,255),1, lineType=cv2.LINE_AA)
else:
cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
cv2.rectangle(frame, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1)
cv2.putText(frame, class_name + " : " + str(track.track_id),(int(bbox[0]), int(bbox[1]-11)),0, 0.6, (255,255,255),1, lineType=cv2.LINE_AA)
if verbose == 2:
print("Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))
result = np.asarray(frame)
result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
return result
if __name__=='__main__':
yolov7=YOLOv7(weights='yolov7x.pt', device='cpu', image_size=800)
cap = cv2.VideoCapture('street5sn.mp4')
torch.cuda.empty_cache()
#writer
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # by default VideoCapture returns float instead of int
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
codec = cv2.VideoWriter_fourcc(*"DIVX")
out = cv2.VideoWriter("./output/video_out_track5sn-d2.mp4", codec, fps, (width, height))
while True:
t1 = time.time()
ret, frame = cap.read()
if not ret:
break
detections=yolov7.detect(frame)
vir = yolov7.tracking(frame, detections, count_objects = True, inside_poly = False)
out.write(vir)
cv2_imshow(vir) #colab imshow kodu
print("add frame ...")
if cv2.waitKey(1) & 0xFF == ord('q'):
break
out.release()
cap.release()
cv2.destroyAllWindows()
`
I use this repo repo
I did not make any changes to other files.

OpenCV Python: Using multithreading with VideoCapture and VideoWriter

I am labeling some recordings with quite a few objects, I notice that when I do this in the naieve OpenCV manner, the video export takes ~real-time (hour long video = hour long export). I'd like to accelerate this process, and I was looking at some forums and some code examples about how to use a separate thread to read the frames and then use the main thread to do the frame processing. When I try this, however, the exported video is way too fast, and my added labels/annotations do not sync with the actual recording. Furthermore, even though I set the export frame rate the same as the input video (30fps), it seems that only my added annotations follow it. Here's my code, the multiprocessing class was adapted from (https://github.com/gilbertfrancois/video-capture-async), I took their threadlock idea, but it did not do anything for me.
class Threaded_VidCapture(object):
def __init__(self, videoSrc):
#Constructor parameters
self.VidIn = cv2.VideoCapture(videoSrc)
self.Terminate = False
self.Ret, self.Frame = self.VidIn.read()
#Video Parameters
self.FPS = int(self.VidIn.get(cv2.CAP_PROP_FPS))
self.Resolution = (
int(self.VidIn.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(self.VidIn.get(cv2.CAP_PROP_FRAME_HEIGHT))
)
self.FrameCount = int(self.VidIn.get(cv2.CAP_PROP_FRAME_COUNT))
self.ThreadLock = threading.Lock()
def startProcess(self):
self.ThreadStart = Thread(target=self.retrieveFrames, args=())
self.ThreadStart.daemon = True
self.ThreadStart.start()
return(self)
def retrieveFrames(self):
while self.Terminate is False:
Ret, Frame = self.VidIn.read()
with self.ThreadLock:
self.Ret = Ret
self.Frame = Frame
def read(self):
with self.ThreadLock:
return(self.Frame.copy(), self.Ret)
def TerminateProgram(self):
self.Terminate = True
self.ThreadStart.join()
self.VidIn.release()
def annotateVideo_wrapper(RotationalCounts, Coords_FromLabel, Coords_ToLabel,
videoFile, videoExport, **kwargs):
GetFrames = Threaded_VidCapture(videoSrc=videoFile).startProcess()
Write = cv2.VideoWriter_fourcc(*"XVID")
Export = cv2.VideoWriter(videoExport, Write, GetFrames.FPS, GetFrames.Resolution)
def labelAnnotation(Frame, Text, Position, Color_3ChannelRGB, **kwargs):
ScaleKwargs = dict([(key, vals) for key, vals in kwargs.items()])
#Base parameters from CV2 website
Font = cv2.FONT_HERSHEY_PLAIN
if len(ScaleKwargs) == 0:
Scale = 2
else:
Scale = ScaleKwargs["Scale"]
Color = (0,0,0)#Black
Thickness = cv2.FILLED
Margin = 5
TextSize = cv2.getTextSize(text=str(Text), fontFace=Font, fontScale=Scale, thickness=Thickness)
EndX = Position[0] + TextSize[0][0] + Margin
EndY = Position[1] - TextSize[0][1] - Margin
cv2.rectangle(Frame, Position, (EndX, EndY), Color_3ChannelRGB, Thickness)
cv2.putText(Frame, str(Text), Position, Font, Scale, Color, 1, cv2.LINE_AA)
if kwargs:
HashMap = {keys: Val for keys, Val in kwargs.items()}
try:
Graphs = Graph.Live_movementTrace(
DataFrame=HashMap["DataFrame"],
Label=HashMap["Label"],
RotationMetaData=HashMap["RotationMetaData"],
IndexLimit=HashMap["IndexLimit"]
)
except KeyError:
raise(KeyError("Missing arguments, re-run the program with the correct input or leave this function blank"))
fig = mp.figure()
fig.suptitle("Live Movement Plot", fontweight="bold", fontsize=14)
Hash = {"CW_x":[], "CW_y":[], "CCW_x":[], "CCW_y":[]}
line1, = mp.plot(Hash["CW_x"], Hash["CW_y"], color = "blue")
line2, = mp.plot(Hash["CCW_x"], Hash["CCW_y"], color = "red")
mp.xlim(0, GetFrames.Resolution[0])
mp.ylim(0, GetFrames.Resolution[1])
ax = mp.gca()
ax.invert_yaxis()
else:
Graphs = []
Ind=0
ProgressBar = tqdm(total = len(Coords_FromLabel))
while(Ind < GetFrames.FrameCount):
Frame = GetFrames.read()[0]
####################
#Add Coordinate System
####################
cv2.line(Frame, (int(Coords_FromLabel[Ind][0]), int(Coords_FromLabel[Ind][1])), (int(Coords_FromLabel[Ind][0]), 0), (0, 0, 0), 5, 8, 0)#MinY
cv2.line(Frame, (int(Coords_FromLabel[Ind][0]), int(Coords_FromLabel[Ind][1])), (int(Coords_FromLabel[Ind][0]), GetFrames.Resolution[1]), (255, 0, 0), 5, 8, 0)#MaxY
cv2.line(Frame, (int(Coords_FromLabel[Ind][0]), int(Coords_FromLabel[Ind][1])), (0, int(Coords_FromLabel[Ind][1])), (0, 255, 0), 5, 8, 0)#MinX
cv2.line(Frame, (int(Coords_FromLabel[Ind][0]), int(Coords_FromLabel[Ind][1])), (GetFrames.Resolution[0], int(Coords_FromLabel[Ind][1])), (255, 255, 255), 5, 8, 0)#MaxX
####################
#Add Coutners
####################
labelAnnotation(Frame, f"CW: {round(RotationalCounts[0][Ind], 3)}", (20, 100), (255, 255, 255))
labelAnnotation(Frame, f"CCW: {round(RotationalCounts[1][Ind], 3)}", (20, 130), (0, 0, 255))
if Ind < 10000:
labelAnnotation(Frame, f"Frame Num: {Ind}", (20, 160), (100, 100, 100))
elif Ind >= 10000:
labelAnnotation(Frame, f"Frame Num: {Ind}", (20, 160), (100, 100, 100), Scale = 1.5)
####################
#Add Vectors
####################
cv2.line(Frame, (int(Coords_FromLabel[Ind][0]), int(Coords_FromLabel[Ind][1])), (int(Coords_ToLabel[Ind][0]), int(Coords_ToLabel[Ind][1])), (0, 0, 255), 5, 8, 0)
if len(Graphs) != 0:
if ((Ind in Graphs[3]) and (Ind != 0)):
for keys in Hash:
Hash[keys].clear()
Hash["CW_x"].append(Graphs[0][f'{HashMap["Label"]}_x'][Ind])
Hash["CW_y"].append(Graphs[0][f'{HashMap["Label"]}_y'][Ind])
line1.set_xdata(Hash["CW_x"])
line1.set_ydata(Hash["CW_y"])
Hash["CCW_x"].append(Graphs[1][Ind])
Hash["CCW_y"].append(Graphs[2][Ind])
line2.set_xdata(Hash["CCW_x"])
line2.set_ydata(Hash["CCW_y"])
fig.canvas.draw()
img = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
img = img.reshape(fig.canvas.get_width_height()[::-1] + (3,))
img = cv2.cvtColor(img,cv2.COLOR_RGB2BGR)
#Needs to be a variable offset
x_offset = GetFrames.Resolution[0] - 450
y_offset = GetFrames.Resolution[1] - 340
# print(Frame.shape)
x_end = x_offset + img.shape[1]
y_end = y_offset + img.shape[0]
Frame[y_offset:y_end, x_offset:x_end] = img
Export.write(Frame)
#if HashMap["ShowFrame"] is True:
#cv2.imshow("Frame", Frame)
cv2.waitKey(1) & 0xFF
# if ((cv2.waitKey(1) & 0xFF == ord("q")) or (GetFrames.Terminate is True)):
# GetFrames.TerminateProgram()
# break
ProgressBar.update(1)
Ind += 1
Export.release()
cv2.destroyAllWindows()
GetFrames.TerminateProgram()
There's a lot that I'm adding to each frame, and again this works fine in just the naive approach, I get about 23 it/s as measured with tqdm, which is close to FPS mark. I'd like to be above 30 it/s, but I don't know if that's possible.

better way to find bullet holes in a target

Hi I'm making a project about detecting bullet holes in target circles. My original idea was to use Hough circle algorithms to detect both targets which works quite alright for photos that are straight in front of it and bullet holes that are not as good. Sooo I was wandering if anyone could tip me with some better solution on finding them or helping me improve this code.
import cv2 as cv
import numpy as np
import math
import sys
from PIL import Image
import matplotlib.pyplot as plt
MAX_POINTS = 10
def main(argv):
default_file = 'tarczamala.jpg'
default_size = 600, 600
im = Image.open(default_file)
im = im.resize(default_size, Image.ANTIALIAS)
im.save('600' + default_file)
filename = argv[0] if len(argv) > 0 else '600' + default_file
# Loads an image
src = cv.imread(cv.samples.findFile(filename), cv.IMREAD_COLOR)
# Check if image is loaded fine
if src is None:
print ('Error opening image!')
print ('Usage: hough_circle.py [image_name -- default ' + default_file + '] \n')
return -1
# skala szarości
gray = cv.cvtColor(src, cv.COLOR_BGR2GRAY)
cv.imshow('gray', gray)
# Bilateral
bilateral = cv.bilateralFilter(gray, 7, 15, 10)
cv.imshow('bilateral', bilateral)
blank = np.zeros(bilateral.shape[:2], dtype='uint8')
cv.imshow('blank', blank)
# mask = cv.circle(blank, (bilateral.shape[1] // 2, bilateral.shape[0] // 2), 320, 255, -1)
# cv.imshow('Mask', mask)
#
# masked = cv.bitwise_and(bilateral, bilateral, mask=mask)
# cv.imshow('masked', masked)
# Edge Cascade
canny = cv.Canny(bilateral, 50, 175)
cv.imshow('canny1', canny)
# ret, tresh = cv.threshold(gray, 125, 255, cv.THRESH_BINARY)
# cv.imshow('tresch', tresh)
contours, hierarchies = cv.findContours(canny, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_NONE)
print(f'{len(contours)} contour(s) found')
# cv.drawContours(blank, contours, -1, (255,0,0), 1)
# cv.imshow('contours drawn', blank)
rows = canny.shape[0]
# Target
circles = cv.HoughCircles(canny, cv.HOUGH_GRADIENT, 1, 0.01,
param1=100, param2=50,
minRadius=7, maxRadius=300)
# print(f'{circles}"')
biggestCircle = findBiggestCircle(circles)
# print(f'{biggestCircle} biggest circle')
mask = cv.circle(blank, (math.floor(biggestCircle[0]), math.floor(biggestCircle[1])), math.floor(biggestCircle[2]), 255, -1)
cv.imshow('rysowanie granicy', mask)
masked = cv.bitwise_and(bilateral, bilateral, mask=mask)
cv.imshow('granice', masked)
# Edge Cascade
canny = cv.Canny(masked, 50, 175)
cv.imshow('canny2', canny)
if biggestCircle is not None:
circles = np.uint16(np.around(circles))
# print(f'{biggestCircle} biggest circle')
delta_r = biggestCircle[2] / 10
biggest_circle_center = [biggestCircle[0], biggestCircle[1]]
center = (math.floor(biggestCircle[0]), math.floor(biggestCircle[1]))
# print(f'{center} center')
# circle center
cv.circle(src, center, 1, (255, 0, 0), 3)
# circle outline
radius = math.floor(biggestCircle[2])
cv.circle(src, center, radius, (0, 0, 255), 3)
# bullet holes
hits = cv.HoughCircles(canny, cv.HOUGH_GRADIENT, 1, 10,
param1=300, param2=10,
minRadius=7, maxRadius=10)
# print(f'{hits}"')
score = countHitScore(hits.tolist(), delta_r, biggest_circle_center)
print(f'The score is: {score}"')
if hits is not None:
hits = np.uint16(np.around(hits))
for i in hits[0, :]:
# print(f'promien trafienia {i[2]}"')
center = (i[0], i[1])
# circle center
cv.circle(src, center, 1, (0, 100, 100), 3)
# circle outline
radius = i[2]
cv.circle(src, center, radius, (255, 0, 255), 3)
cv.imshow("detected circles", src)
cv.waitKey(0)
return 0
def findBiggestCircle(circles):
# print(f'{circles}')
listOfCircles = circles[0]
biggestCircle = listOfCircles[0]
for circle in listOfCircles:
# print(f'{circle} circle')
# print(f'2 {circle}')
# print(f'3 {biggestCircle}')
if circle[2] > biggestCircle[2]:
# print('4')
biggestCircle = circle
print(biggestCircle)
return biggestCircle.tolist()
def countHitScore(hits, delta_r, target_center):
score = 0
print(f'{hits} hits')
for hit in hits[0]:
# print(f'{hit} hit')
# print(f'{(target_center)} center')
x_dist = hit[0] - target_center[0] if hit[0] > target_center[0] else target_center[0] - hit[0]
y_dist = hit[1] - target_center[1] if hit[1] > target_center[1] else target_center[1] - hit[1]
total_dist = math.hypot(x_dist, y_dist) - hit[2]
punkty = math.ceil(total_dist / delta_r)
if punkty < 1:
punkty = 1
score += 11 - punkty
# print(f'{total_dist / delta_r} math')
# print(f'{total_dist / delta_r} total_dist / delta_r')
print(f'{11 - punkty} zdobyte punkty')
# print(f'{x_dist} x {y_dist} y')
return score
if __name__ == "__main__":
main(sys.argv[1:])

OpenCV Memory and CPU Usage

I'm making a USV (Unmanned Surface Vehicle) for my bachelor project. The track it needs to keep is made by having coloured buoys on the left/right side of the track and for obstacles.
So I need to track the depth of these objects and give all that information on to my navigation program.
And I have made a Python code using ROS and OpenCV to track these buoys with a ZED2 camera. But I'm having CPU and memory issues. Where the ubuntu desktop starts to lag.
Using a Nvidia Jetson Xavier NX and I’m using 85% of the CPU and 5,5+/7.59Gb Memory.
Anyone interested in looking over my code and see if I'm doing something stupid. That would explain my issues.
from __future__ import print_function
import roslib
import sys
import rospy
import cv2
from main.msg import VarRed, VarGreen, VarYellow, RedHSV, GreenHSV, YellowHSV, MidPoint
from sensor_msgs.msg import Image
from cv_bridge import CvBridge, CvBridgeError
import numpy as np
import imutils
import time
from collections import deque
import math
class image_converter:
def __init__(self):
self.image_subd = rospy.Subscriber("/zed2/zed_node/depth/depth_registered",Image,self.callbackDepth)
self.image_sub = rospy.Subscriber("/zed2/zed_node/rgb_raw/image_raw_color",Image,self.callbackVideo)
self.image_pub = rospy.Publisher("/Tracking/RG_image", Image, queue_size = 1)
self.RedHSV_sub = rospy.Subscriber("/Tracking/Red_HSV", RedHSV, self.redHSV)
self.GreenHSV_sub = rospy.Subscriber("/Tracking/Green_HSV", GreenHSV, self.greenHSV)
self.YellowHSV_sub = rospy.Subscriber("/Tracking/Yellow_HSV", YellowHSV, self.yellowHSV)
self.MidPoint_pub = rospy.Publisher("/Tracking/MidPoint", MidPoint, queue_size = 1)
self.red_bridge = CvBridge()
self.red_publisher = rospy.Publisher("/Tracking/red", VarRed, queue_size = 1)
self.green_bridge = CvBridge()
self.green_publisher = rospy.Publisher("/Tracking/green", VarGreen, queue_size = 1)
self.yellow_bridge = CvBridge()
self.yellow_publisher = rospy.Publisher("/Tracking/yellow", VarYellow, queue_size = 1)
self.RedLower = (0, 101, 68) # Declaring the red-specter
self.RedUpper = (15, 255, 255)
self.GreenLower = (75, 145, 48) # Declaring the green-specter
self.GreenUpper = (96, 255, 75)
self.YellowLower = (28, 56, 91) # Declaring the yellow-specter
self.YellowUpper = (51, 152, 150)
self.red_pts = deque(maxlen=14)
self.currentDepthImg=0
self.red_counter = 0
self.red_x = 0
self.red_y = 0
self.red_radius = 30
self.green_pts = deque(maxlen=14)
self.green_currentDepthImg=0
self.green_counter = 0
self.green_x = 0
self.green_y = 0
self.green_radius = 30
self.yellow_pts = deque(maxlen=14)
self.yellow_currentDepthImg=0
self.yellow_counter = 0
self.yellow_x = 0
self.yellow_y = 0
self.yellow_radius = 30
def redHSV(self,msg):
self.RedLower = (msg.r_h_low-10, msg.r_s_low-10, msg.r_v_low-10)
self.RedUpper = (msg.r_h_high+10, msg.r_s_high+10, msg.r_v_high+10)
def greenHSV(self,msg):
self.GreenLower = (msg.g_h_low-10, msg.g_s_low-10, msg.g_v_low-10)
self.GreenUpper = (msg.g_h_high+10, msg.g_s_high+10, msg.g_v_high+10)
def yellowHSV(self,msg):
self.YellowLower = (msg.y_h_low-10, msg.y_s_low-10, msg.y_v_low-10)
self.YellowUpper = (msg.y_h_high+10, msg.y_s_high+10, msg.y_v_high+10)
def callbackDepth(self,msg_depth):
try:
cv_image_depth = self.red_bridge.imgmsg_to_cv2(msg_depth, "32FC1") # CV_Bridge Depth
except CvBridgeError as e:
print(e)
return
self.currentDepthImg=cv_image_depth
try:
a=1
except CvBridgeError as e:
print(e)
return
def callbackVideo(self,data):
try:
cv_image = self.red_bridge.imgmsg_to_cv2(data, "bgr8") # CV_Bridge Video
except CvBridgeError as e:
print(e)
return
(rows,cols,channels) = cv_image.shape
frame = cv_image
blurred = cv2.GaussianBlur(frame, (21, 21), 0) # resize the frame, blur it, and convert it to the HSV (11,11), 0
hsv = cv2.cvtColor(blurred, cv2.COLOR_BGR2HSV) # color space.
red_mask = cv2.inRange(hsv, self.RedLower, self.RedUpper) # Construct a mask for the color "red", then perform
red_mask = cv2.erode(red_mask, None, iterations=2) # a series of dilations and erosions to remove any small
red_mask = cv2.dilate(red_mask, None, iterations=2) # blobs thats left in the mask.
green_mask = cv2.inRange(hsv, self.GreenLower, self.GreenUpper) # construct a mask for the color "green", then perform
green_mask = cv2.erode(green_mask, None, iterations=2) # a series of dilations and erosions to remove any small
green_mask = cv2.dilate(green_mask, None, iterations=2) # blobs thats left in the mask.
yellow_mask = cv2.inRange(hsv, self.YellowLower, self.YellowUpper) # construct a mask for the color "yellow", then perform
yellow_mask = cv2.erode(yellow_mask, None, iterations=2) # a series of dilations and erosions to remove any small
yellow_mask = cv2.dilate(yellow_mask, None, iterations=2) # blobs thats left in the mask.
red_cnts = cv2.findContours(red_mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # find contours in the mask and initialize the current
red_cnts = imutils.grab_contours(red_cnts)
red_center = None
self.red_radius = 0
green_cnts = cv2.findContours(green_mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # find contours in the mask and initialize the current
green_cnts = imutils.grab_contours(green_cnts)
green_center = None
self.green_radius = 0
yellow_cnts = cv2.findContours(yellow_mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # find contours in the mask and initialize the current
yellow_cnts = imutils.grab_contours(yellow_cnts)
yellow_center = None
self.yellow_radius = 0
cv_imaged=self.currentDepthImg
#-----------------------------------------RED_START------------------------------------------------------
if len(red_cnts) > 0: # only proceed if at least one contour was found
red_c = max(red_cnts, key=cv2.contourArea) # find the largest contour in the red_mask, then use
((self.red_x, self.red_y), self.red_radius) = cv2.minEnclosingCircle(red_c) # it to compute the minimum enclosing circle and
red_M = cv2.moments(red_c) # centroid
red_center = (int(red_M["m10"] / red_M["m00"]), int(red_M["m01"] / red_M["m00"]))
if self.red_radius > 5: # only proceed if the radius meets a minimum size
cv2.circle(frame, (int(self.red_x), int(self.red_y)), int(self.red_radius), (0, 255, 255), 2) # draw the circle and centroid on the red_frame,
cv2.circle(frame, red_center, 5, (0, 255, 255), -1) # then update the list of tracked points
msg = VarRed()
msg.r_visible = True
#if self.red_y == self.red_y and self.red_x == self.red_x:
r_length = cv_imaged[int(self.red_y),int(self.red_x)] # length to object
msg.r_x = self.red_x
msg.r_y = self.red_y
msg.r_rad = self.red_radius
ToRad = 2*np.pi/360 # = 0.01745329252
ToDeg = 360/(2*np.pi) # = 57.29577951308
# Printing pixel values
cv2.rectangle(frame, (0, 0), (200, 190), (0,0,0), -1)
cv2.putText(frame, str("L: %.3f" %r_length), ((int(self.red_x)),int(self.red_y)), cv2.FONT_HERSHEY_COMPLEX, 1, (0,255,255), 2)
cv2.putText(frame, str("RX: %.1f" %msg.r_x +" px"), (10,30), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
cv2.putText(frame, str("RY: %.1f" %msg.r_y + " px"), (10,60), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
# For X-direction
red_l_cm = (r_length*100) # Converting to Centimeters
start_x_r = 960/(math.tan((55*ToRad))) # finding start x-length in px
ang_x_r = (math.atan((self.red_x-960)/start_x_r))*ToDeg # finding horizontal angle
red_x_cm = (red_l_cm*math.sin((ang_x_r)*ToRad))
cv2.putText(frame, str("RXC: %.1f" %red_x_cm + " cm"), (10,90), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
cv2.putText(frame, str("X Ang: %.1f" %ang_x_r), (10,150), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
# For Y-direction
start_y_r = 540/(math.tan((35*ToRad))) # finding start y-length in px
ang_y_r = ((math.atan((self.red_y-540)/start_y_r))*ToDeg)*-1 # finding vertical angle
red_y_cm = (red_l_cm/math.tan((ang_y_r*ToRad)+(math.pi/2)))*-1 # finding the y-length
cv2.putText(frame, str("RYC: %.1f" %red_y_cm + " cm"), (10,120), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
cv2.putText(frame, str("Y Ang: %.1f" %ang_y_r), (10,180), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
red_z = (math.cos(abs(ang_x_r)*ToRad))*red_l_cm
self.red_pts.appendleft(red_center)
msg.r_length = red_l_cm
msg.r_xc = red_x_cm
msg.r_yc = red_y_cm
msg.r_angle = ang_x_r # update the points queue
msg.r_z = red_z
self.red_publisher.publish(msg)
for i in range(1, len(self.red_pts)): # loop over the set of points
if self.red_pts[i - 1] is None or self.red_pts[i] is None: # if either of the tracked points
continue # are None, ignore them.
thickness = int(np.sqrt(64 / float(i + 1)) * 2.5) # otherwise, compute the thickness of the line and
cv2.line(frame, self.red_pts[i - 1], self.red_pts[i], (0, 255, 255), thickness) # draw the connecting lines
if self.red_radius < 5:
msg = VarRed()
msg.r_visible = False
self.red_publisher.publish(msg)
#-----------------------------------------RED_END------------------------------------------------------
#-----------------------------------------GREEN_START------------------------------------------------------
if len(green_cnts) > 0: # same as in red, but for green
green_c = max(green_cnts, key=cv2.contourArea)
((self.green_x, self.green_y), self.green_radius) = cv2.minEnclosingCircle(green_c)
green_M = cv2.moments(green_c)
green_center = (int(green_M["m10"] / green_M["m00"]), int(green_M["m01"] / green_M["m00"]))
if self.green_radius > 5:
cv2.circle(frame, (int(self.green_x), int(self.green_y)), int(self.green_radius), (0, 255, 255), 2)
cv2.circle(frame, green_center, 5, (0, 255, 255), -1)
ToRad = 2*np.pi/360 # = 0.01745329252
ToDeg = 360/(2*np.pi) # = 57.29577951308
msg1 = VarGreen()
msg1.g_visible = True
g_length = cv_imaged[int(self.green_y),int(self.green_x)]
msg1.g_x = self.green_x
msg1.g_y = self.green_y
msg1.g_rad = self.green_radius
# Printing pixel values
cv2.rectangle(frame, (1740, 0), (1920, 200), (0,0,0), -1)
cv2.putText(frame, str("L: %.3f" %g_length), ((int(self.green_x)),int(self.green_y)), cv2.FONT_HERSHEY_COMPLEX, 1, (0,255,255), 2)
cv2.putText(frame, str("GX: %.1f" %msg1.g_x +"px"), (1740,30), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
cv2.putText(frame, str("GY: %.1f" %msg1.g_y + "px"), (1740,60), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
# For X-direction
green_l_cm = (g_length*100)
start_x_g = 960/(math.tan((55*2*np.pi/360)))
ang_x_g = (math.atan((self.green_x-960)/start_x_g))*57.295779513
green_x_cm = (green_l_cm*math.sin((ang_x_g)*ToRad))
# For Y-direction
start_y_g = 540/(math.tan((35*2*np.pi/360)))
ang_y_g = ((math.atan((self.green_y-540)/start_y_g))*57.295779513)*-1
green_y_cm = green_l_cm/math.tan(ang_y_g*ToRad+(math.pi/2))*-1
cv2.putText(frame, str("GXC: %.1f" %green_x_cm + "cm"), (1740,90), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
cv2.putText(frame, str("X Ang: %.1f" %ang_x_g), (1740,150), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
cv2.putText(frame, str("GYC: %.1f" %green_y_cm + "cm"), (1740,120), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
cv2.putText(frame, str("Y Ang: %.1f" %ang_y_g), (1740,180), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
green_z = (math.cos(abs(ang_x_g)*ToRad))*green_l_cm
self.green_pts.appendleft(green_center)
msg1.g_length = green_l_cm
msg1.g_xc = green_x_cm
msg1.g_yc = green_y_cm
msg1.g_angle = ang_x_g
msg1.g_z = green_z
self.green_publisher.publish(msg1)
for i in range(1, len(self.green_pts)):
if self.green_pts[i - 1] is None or self.green_pts[i] is None:
continue
thickness = int(np.sqrt(64 / float(i + 1)) * 2.5)
cv2.line(frame, self.green_pts[i - 1], self.green_pts[i], (0, 255, 255), thickness)
if self.green_radius < 5:
msg1 = VarGreen()
msg1.g_visible = False
self.green_publisher.publish(msg1)
#-----------------------------------------GREEN_END------------------------------------------------------
#-----------------------------------------YELLOW_START------------------------------------------------------
if len(yellow_cnts) > 0: # only proceed if at least one contour was found
yellow_c = max(yellow_cnts, key=cv2.contourArea) # find the largest contour in the yellow_mask, then use
((self.yellow_x, self.yellow_y), self.yellow_radius) = cv2.minEnclosingCircle(yellow_c) # it to compute the minimum enclosing circle and
yellow_M = cv2.moments(yellow_c) # centroid
yellow_center = (int(yellow_M["m10"] / yellow_M["m00"]), int(yellow_M["m01"] / yellow_M["m00"]))
if self.yellow_radius > 5: # only proceed if the radius meets a minimum size
cv2.circle(frame, (int(self.yellow_x), int(self.yellow_y)), int(self.yellow_radius), (0, 0, 200), 2) # draw the circle and centroid on the yellow_frame,
cv2.circle(frame, yellow_center, 5, (0, 0, 200), -1) # then update the list of tracked points
ToRad = 2*np.pi/360 # = 0.01745329252
ToDeg = 360/(2*np.pi) # = 57.29577951308
msg2 = VarYellow()
msg2.y_visible = True
y_length = cv_imaged[int(self.yellow_y),int(self.yellow_x)] # length to object
msg2.y_x = self.yellow_x
msg2.y_y = self.yellow_y
msg2.y_rad = self.yellow_radius
cv2.putText(frame, str("L: %.3f" %y_length), ((int(self.yellow_x)),int(self.yellow_y)), cv2.FONT_HERSHEY_COMPLEX, 1, (0,0,200), 2)
# For X-direction
yellow_l_cm = y_length*100 # Converting to Centimeters
start_x_y = 960/(math.tan((55*2*np.pi/360))) # finding start x-length in px
ang_x_y = (math.atan((self.yellow_x-960)/start_x_y))*57.295779513 # finding horizontal angle
#yellow_x = yellow_l_cm/math.tan((ang_x_y/57.295779513)) # finding the x-length
yellow_x_cm = (yellow_l_cm*math.sin((ang_x_y)*ToRad))
# For Y-direction
start_y_y = 540/(math.tan((35*2*np.pi/360))) # finding start y-length in px
ang_y_y = ((math.atan((self.yellow_y-540)/start_y_y))*57.295779513)*-1 # finding vertical angle
#yellow_y = yellow_l_cm/math.tan((ang_y_y/57.295779513)) # finding the y-length
yellow_y_cm = yellow_l_cm/math.tan(ang_y_y*ToRad+(math.pi/2))*-1
yellow_z = (math.cos(abs(ang_x_y)*ToRad))*yellow_l_cm
self.yellow_pts.appendleft(yellow_center)
msg2.y_length = yellow_l_cm
msg2.y_xc = yellow_x_cm
msg2.y_yc = yellow_y_cm
msg2.y_angle = ang_x_y # update the points queue
msg2.y_z = yellow_z
self.yellow_publisher.publish(msg2)
for i in range(1, len(self.yellow_pts)): # loop over the set of points
if self.yellow_pts[i - 1] is None or self.yellow_pts[i] is None: # if either of the tracked points
continue # are None, ignore them.
thickness = int(np.sqrt(64 / float(i + 1)) * 2.5) # otherwise, compute the thickness of the line and
cv2.line(frame, self.yellow_pts[i - 1], self.yellow_pts[i], (0, 0, 255), thickness) # draw the connecting lines
if self.yellow_radius < 5:
msg2 = VarYellow()
msg2.y_visible = False
self.yellow_publisher.publish(msg2)
#-----------------------------------------YELLOW_END------------------------------------------------------
try:
if (self.green_radius > 5) & (self.red_radius > 5): # if you can see both colors, proceed
ToRad = 2*np.pi/360 # = 0.01745329252
ToDeg = 360/(2*np.pi) # = 57.29577951308
red_z = (math.cos(abs(ang_x_r)*ToRad))*red_l_cm
green_z = (math.cos(abs(ang_x_g)*ToRad))*green_l_cm
Delta_z = abs(red_z-green_z)
Tot_x = abs(green_x_cm) + abs(red_x_cm)
if Delta_z == Delta_z and Tot_x == Tot_x:
red_green_angle = (math.atan(Delta_z/Tot_x))*ToDeg
normal_angle = red_green_angle
if green_l_cm >= red_l_cm:
normal_angle = red_green_angle*-1
if green_l_cm < red_l_cm:
normal_angle = red_green_angle
MidPoint_data = MidPoint()
MidPoint_data.angle = normal_angle
self.MidPoint_pub.publish(MidPoint_data)
length_between_x = math.sqrt((Tot_x*Tot_x)+(Delta_z*Delta_z))
Delta_y = abs(red_y_cm-green_y_cm)
length_between = math.sqrt((length_between_x*length_between_x)+(Delta_y*Delta_y))
#dx = green_x_cm - red_x_cm # Finding the space between the colors in x-direction
#dy = green_y_cm - red_y_cm # Finding the space between the colors in y-direction
# Calculating the direct length between the colors in cm
#cv2.rectangle(frame, (500, 0), (680, 160), (0,0,0), -1)
#cv2.putText(frame, str("Dist: %.1f" %length_between + " cm"), (500,30), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
#Middle_x = dx
#Middle_y = dy
MP_X = (msg1.g_x + msg.r_x)/2
MP_Y = (msg1.g_y + msg.r_y)/2
#Middle_Point_Angle = (math.atan((MP_X-960)/start_x_g))*57.295779513
#Middle_Point_Angle = ang_x_g - ang_x_r
#Middle_Point_Length =(red_x_cm-abs(Middle_x))/(math.sin((math.pi/2)-(Middle_Point_Angle*((2*math.pi)/720))))
#Middle_Point_Length =((red_x_cm-abs(Middle_x))/(math.cos(Middle_Point_Angle*((2*math.pi)/720))))
#cv2.putText(frame, str("MX: %.1f" %Middle_x + " cm"), (500,60), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
#cv2.putText(frame, str("MY: %.1f" %Middle_y + " cm"), (500,90), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
if MP_X == MP_X and MP_Y == MP_Y:
cv2.circle(frame, (int(MP_X), int(MP_Y)), 8, (0, 0, 255), -1)
#cv2.putText(frame, str("M_L: %.1f" %Middle_Point_Length + " cm"), (500,120), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
#cv2.putText(frame, str("M_ang: %.1f" %Middle_Point_Angle), (500,150), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255,255,255), 1)
cv2.line(frame, (int(self.red_x),int(self.red_y)), (int(self.green_x),int(self.green_y)), (0, 0, 0), 2)
#MidPoint_data = MidPoint()
#MidPoint_data.z = Middle_Point_Length
#self.MidPoint_pub.publish(MidPoint_data)
cv2.line(frame, (960, 1280), (960, 0), (0, 255, 0), 1)
cv2.line(frame, (0, 540), (1920, 540), (0, 255, 0), 1)
self.image_pub.publish(self.red_bridge.cv2_to_imgmsg(frame, "bgr8"))
except CvBridgeError as e:
print(e)
return
def main(args):
ic = image_converter()
rospy.init_node('Color_Tracker', anonymous=True)
try:
rospy.spin()
except KeyboardInterrupt:
print("Shutting down")
cv2.destroyAllWindows()
if __name__ == '__main__':
main(sys.argv)

Categories

Resources