I have a pice of code which calculates and draws the dense optical flow:
import cv2 as cv
import numpy as np
cap = cv.VideoCapture(0)
def getFrame():
ret, img = cap.read()
img = cv.resize(img,(640,480))
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
return gray
def draw_flow(img, flow, step=16):
h, w = img.shape[:2]
y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1).astype(int)
fx, fy = flow[y,x].T
lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2)
lines = np.int32(lines + 0.5)
vis = cv.cvtColor(img, cv.COLOR_GRAY2BGR)
cv.polylines(vis, lines, 0, (0, 255, 0))
for (x1, y1), (_x2, _y2) in lines:
cv.circle(vis, (x1, y1), 1, (0, 255, 0), -1)
return vis
def main():
prevgray = getFrame()
while True:
gray = getFrame()
flow = cv.calcOpticalFlowFarneback(prevgray, gray, None, 0.5, 3, 15, 3, 10, 1.2, 0)
prevgray = gray
cv.imshow('flow', draw_flow(gray, flow))
cv.waitKey(1)
if __name__ == '__main__':
main()
Since this takes too long, I had the idea to calculate sift features, match them and then calculate the optical flow of the features.
Until now I have this:
import cv2 as cv
import numpy as np
cap = cv.VideoCapture(0)
sift = cv.xfeatures2d.SIFT_create()
bf = cv.BFMatcher()
def getFrame():
ret, img = cap.read()
img = cv.resize(img,(640,480))
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
return gray
def main():
prev_gray = getFrame()
prev_kp, prev_des = sift.detectAndCompute(prev_gray,None)
while True:
next_gray = getFrame()
next_kp, next_des = sift.detectAndCompute(next_gray,None)
matches = bf.knnMatch(prev_des, next_des, k=2)
# Apply ratio test
good = []
for m,n in matches:
if m.distance < 0.75*n.distance:
good.append([m])
#calc optical flow
prev_kp = next_kp
prev_des = next_des
prev_gray = next_gray
cv.waitKey(1)
if __name__ == '__main__':
main()
How can I calculate the optical flow of the sift features and draw them with the drwa_flow function in the first code?
I have also this code:
import numpy as np
import cv2 as cv
lk_params = dict( winSize = (15, 15),
maxLevel = 2,
criteria = (cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 0.03))
feature_params = dict( maxCorners = 500,
qualityLevel = 0.3,
minDistance = 7,
blockSize = 7 )
track_len = 5
tracks = []
cam = cv.VideoCapture(0)
def getFrame():
_ret, bgr = cam.read()
bgr = cv.resize(bgr, (640,480))
gray = cv.cvtColor(bgr, cv.COLOR_BGR2GRAY)
return gray, bgr
while True:
gray, vis = getFrame()
if len(tracks) > 0:
img0, img1 = prev_gray, gray
p0 = np.float32([tr[-1] for tr in tracks]).reshape(-1, 1, 2)
p1, _st, _err = cv.calcOpticalFlowPyrLK(img0, img1, p0, None, **lk_params)
p0r, _st, _err = cv.calcOpticalFlowPyrLK(img1, img0, p1, None, **lk_params)
d = abs(p0-p0r).reshape(-1, 2).max(-1)
good = d < 1
new_tracks = []
for tr, (x, y), good_flag in zip(tracks, p1.reshape(-1, 2), good):
if not good_flag:
continue
tr.append((x, y))
if len(tr) > track_len:
del tr[0]
new_tracks.append(tr)
cv.circle(vis, (x, y), 2, (0, 255, 0), -1)
tracks = new_tracks
cv.polylines(vis, [np.int32(tr) for tr in tracks], False, (0, 255, 0))
mask = np.zeros_like(gray)
mask[:] = 255
for x, y in [np.int32(tr[-1]) for tr in tracks]:
cv.circle(mask, (x, y), 5, 0, -1)
p = cv.goodFeaturesToTrack(gray, mask = mask, **feature_params)
if p is not None:
for x, y in np.float32(p).reshape(-1, 2):
tracks.append([(x, y)])
prev_gray = gray
cv.imshow('lk_track', vis)
ch = cv.waitKey(1)
But with this code do not uses the sift features and also don't draws the flow like the first code...
Related
I am trying to detect drops inside the water, where at first I will detect the edges, but there are light spots in the image, which are also detected as drops.
Noting that the drops are white surrounded by a dark layer.
My code :
import cv2
import numpy as np
def unsharp_mask(img, blur_size = (5,5), imgWeight = 1.5, gaussianWeight = -0.5):
gaussian = cv2.GaussianBlur(img, (5,5), 0)
return cv2.addWeighted(img, imgWeight, gaussian, gaussianWeight, 0)
def clahe(img, clip_limit = 2.0):
clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=(5,5))
return clahe.apply(img)
def get_sobel(img, size = -1):
sobelx64f = cv2.Sobel(img,cv2.CV_64F,2,0,size)
abs_sobel64f = np.absolute(sobelx64f)
return np.uint8(abs_sobel64f)
img = cv2.imread("img_brightened.jpg")
# save color copy for visualizing
imgc = img.copy()
# resize image to make the analytics easier (a form of filtering)
resize_times = 1.5
img = cv2.resize(img, None, img, fx = 1 / resize_times, fy = 1 / resize_times)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imshow("Input", img)
# use sobel operator to evaluate high frequencies
sobel = get_sobel(img)
# experimentally calculated function - needs refining
clip_limit = (-2.556) * np.sum(sobel)/(img.shape[0] * img.shape[1]) + 26.557
# don't apply clahe if there is enough high freq to find blobs
if(clip_limit < 1.0):
clip_limit = 0.1
# limit clahe if there's not enough details - needs more tests
if(clip_limit > 8.0):
clip_limit = 8
# apply clahe and unsharp mask to improve high frequencies as much as possible
img = clahe(img, clip_limit)
img = unsharp_mask(img)
# filter the image to ensure edge continuity and perform Canny
img_blurred = (cv2.GaussianBlur(img, (2*2+1,2*2+1), 0))
canny = cv2.Canny(img_blurred, 100, 255)
cv2.imshow("Output", canny)
cv2.waitKey(0)
Result
I used codes from https://github.com/kavyamusty/Shading-removal-of-images/blob/master/Article%20submission.ipynb, which works for removing shadows first, then the cv2.HoughCircles to find the circles.
The codes as below :
import cv2
import numpy as np
import matplotlib.pyplot as plt
def max_filtering(N, I_temp):
wall = np.full((I_temp.shape[0]+(N//2)*2, I_temp.shape[1]+(N//2)*2), -1)
wall[(N//2):wall.shape[0]-(N//2), (N//2):wall.shape[1]-(N//2)] = I_temp.copy()
temp = np.full((I_temp.shape[0]+(N//2)*2, I_temp.shape[1]+(N//2)*2), -1)
for y in range(0,wall.shape[0]):
for x in range(0,wall.shape[1]):
if wall[y,x]!=-1:
window = wall[y-(N//2):y+(N//2)+1,x-(N//2):x+(N//2)+1]
num = np.amax(window)
temp[y,x] = num
A = temp[(N//2):wall.shape[0]-(N//2), (N//2):wall.shape[1]-(N//2)].copy()
return A
def min_filtering(N, A):
wall_min = np.full((A.shape[0]+(N//2)*2, A.shape[1]+(N//2)*2), 300)
wall_min[(N//2):wall_min.shape[0]-(N//2), (N//2):wall_min.shape[1]-(N//2)] = A.copy()
temp_min = np.full((A.shape[0]+(N//2)*2, A.shape[1]+(N//2)*2), 300)
for y in range(0,wall_min.shape[0]):
for x in range(0,wall_min.shape[1]):
if wall_min[y,x]!=300:
window_min = wall_min[y-(N//2):y+(N//2)+1,x-(N//2):x+(N//2)+1]
num_min = np.amin(window_min)
temp_min[y,x] = num_min
B = temp_min[(N//2):wall_min.shape[0]-(N//2), (N//2):wall_min.shape[1]-(N//2)].copy()
return B
def background_subtraction(I, B):
O = I - B
norm_img = cv2.normalize(O, None, 0,255, norm_type=cv2.NORM_MINMAX)
return norm_img
def min_max_filtering(M, N, I):
if M == 0:
#max_filtering
A = max_filtering(N, I)
#min_filtering
B = min_filtering(N, A)
#subtraction
normalised_img = background_subtraction(I, B)
elif M == 1:
#min_filtering
A = min_filtering(N, I)
#max_filtering
B = max_filtering(N, A)
#subtraction
normalised_img = background_subtraction(I, B)
return normalised_img
# Read Image
img = cv2.imread(r"D:/Image.jpg")
# Copy origin image
cimg = img.copy()
# Initialization array of uint8
img_remove_shadow = np.zeros(np.shape(img), dtype="uint8")
for i in range(np.shape(img)[2]):
img_remove_shadow[:, :, i] = np.array(min_max_filtering(M = 0, N = 20, I = img[:, :, i]))
# Using median blur
img = cv2.medianBlur(img_remove_shadow,5)
# Change to gray image
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imshow("Removing Shadow", img)
# Find circles
circles = cv2.HoughCircles(img, cv2.HOUGH_GRADIENT, 1, 5, np.array([]), 40, 23, 5,20)
circles = np.uint16(np.around(circles))
for i in circles[0,:]:
# draw the outer circle
cv2.circle(cimg,(i[0],i[1]),i[2],(0,255,0),2)
# draw the center of the circle
cv2.circle(cimg,(i[0],i[1]),2,(0,0,255),3)
cv2.imshow('detected circles',cimg)
cv2.waitKey(0)
cv2.destroyAllWindows()
The result is as below:
PS: The codes take 11.74s running time, I would appreciate it if someone could optimize the code.
I am trying to detect objects in a certain area using yolov7 and deepSORT algorithm, but in the results I get, I see that the IDs are always changing. I leave 3 photos for you to understand.
As you can see the IDs are different in all frames.
`
#class base virtual zone tracking
import random
import torch
import numpy as np
from models.experimental import attempt_load
from utils.torch_utils import TracedModel
from utils.datasets import letterbox
from utils.plots import plot_one_box, plot_one_box_center
from utils.general import check_img_size, non_max_suppression, scale_coords
import cv2
import time
from google.colab.patches import cv2_imshow
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
#deep sort
import os
import tensorflow as tf
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
tf.config.experimental.set_memory_growth(physical_devices[0], True)
from tensorflow.compat.v1 import ConfigProto
from deep_sort.tracker import Tracker
from deep_sort.detection import Detection
import matplotlib.pyplot as plt
from deep_sort import preprocessing, nn_matching
from tracking_helpers import read_class_names, create_box_encoder
from detection_helpers import *
class YOLOv7:
def __init__(self, weights: str, image_size:int,device:str):
self.device = device
self.weights = weights
self.model = attempt_load(self.weights, map_location=self.device) # Model Load FP32
self.stride = int(self.model.stride.max())
self.image_size = check_img_size(image_size, self.stride)
if self.device != 'cpu':
self.half = True
else:
self.half = False
if self.half:
self.model.half() # FP16
self.names = self.model.module.names if hasattr(self.model , 'module') else self.model.names
color_values = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(self.names))]
self.colors = {i:color_values[i] for i in range(len(self.names))}
def detect(self, raw_image: np.ndarray, conf_thresh =0.45, iou_thresh =0.45, classes = [0]): #default class people
# Run inference
if self.device != 'cpu':
self.model(torch.zeros(1, 3, self.image_size, self.image_size).to(self.device).type_as(next(self.model.parameters())))
with torch.no_grad():
image = letterbox(raw_image, self.image_size, stride=self.stride)[0]
image = image[:, :, ::-1].transpose(2, 0, 1)
image = np.ascontiguousarray(image)
image = torch.from_numpy(image).to(self.device)
image = image.half() if self.half else image.float()
image /= 255.0
if image.ndimension() == 3:
image = image.unsqueeze(0)
# Inference
detections = self.model(image, augment=False)[0]
# Apply NMS
detections = non_max_suppression(detections, conf_thresh, iou_thresh, classes=classes, agnostic=False)[0]
# Rescale boxes from img_size to raw image size
detections[:, :4] = scale_coords(image.shape[2:], detections[:, :4], raw_image.shape).round()
return detections
def tracking(self, video_frame, yolo_dets, inside_poly = True, count_objects:bool=False,verbose=False, reID_model_path = "./deep_sort/model_weights/mars-small128.pb", nms_max_overlap:float=1.0, max_cosine_distance:float=0.4, nn_budget:float=None):
class_names = read_class_names()
encoder = create_box_encoder(reID_model_path, batch_size=1)
nms_max_overlap = nms_max_overlap
metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
tracker = Tracker(metric)
*xyxy, conf, cls = yolo_dets
frame = cv2.cvtColor(video_frame, cv2.COLOR_BGR2RGB)
if yolo_dets is None:
bboxes = []
scores = []
classes = []
num_objects = 0
else:
bboxes = yolo_dets[:,:4]
bboxes[:,2] = bboxes[:,2] - bboxes[:,0] # convert from xyxy to xywh
bboxes[:,3] = bboxes[:,3] - bboxes[:,1]
scores = yolo_dets[:,4]
classes = yolo_dets[:,-1]
num_objects = bboxes.shape[0]
#how many object you track
names = []
for i in range(num_objects): # loop through objects and use class index to get class name
class_indx = int(classes[i])
class_name = class_names[class_indx]
names.append(class_name)
names = np.array(names)
count = len(names)
if count_objects:
cv2.putText(frame, "both inside and outside the polygon detection: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 2)
# DeepSORT tacker work starts here
features = encoder(frame, bboxes) # encode detections and feed to tracker. [No of BB / detections per frame, embed_size]
detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(bboxes, scores, names, features)] # [No of BB per frame] deep_sort.detection.Detection object
cmap = plt.get_cmap('tab20b') #initialize color map
colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]
boxs = np.array([d.tlwh for d in detections]) # run non-maxima supression below
scores = np.array([d.confidence for d in detections])
classes = np.array([d.class_name for d in detections])
indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
detections = [detections[i] for i in indices]
tracker.predict() # Call the tracker
tracker.update(detections) # updtate using Kalman Gain
for track in tracker.tracks: # update new findings AKA tracks
#if not track.is_confirmed() or track.time_since_update > 1:
#continue
bbox = track.to_tlbr()
class_name = track.get_class()
color = colors[int(track.track_id) % len(colors)] # draw bbox on screen
color = [i * 255 for i in color]
#drawing poly
#pts = np.array([[6,449], [1052, 2], [1914, 6], [1766, 1074], [2, 1076]])
#frame = cv2.polylines(frame, [pts], True, (0,0,255), 5)
#creating poly
#poli = Polygon([(6,449), (1052, 2), (1914, 6), (1766, 1074), (2, 1076)])
#center = (int((bbox[0] + bbox[2]) / 2), int((bbox[1] + bbox[3]) / 2)) #center point ( (x1 + x2) / 2, (y1 + y2) / 2 )
#point = Point(center)
if inside_poly:
#drawing poly
pts = np.array([[6,449], [1052, 2], [1914, 6], [1766, 1074], [2, 1076]])
frame = cv2.polylines(frame, [pts], True, (0,0,255), 5)
#creating poly
poli = Polygon([(6,449), (1052, 2), (1914, 6), (1766, 1074), (2, 1076)])
center = (int((bbox[0] + bbox[2]) / 2), int((bbox[1] + bbox[3]) / 2)) #center point ( (x1 + x2) / 2, (y1 + y2) / 2 )
point = Point(center)
if poli.contains(point):
cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
cv2.rectangle(frame, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1)
cv2.putText(frame, class_name + " : " + str(track.track_id),(int(bbox[0]), int(bbox[1]-11)),0, 0.6, (255,255,255),1, lineType=cv2.LINE_AA)
cv2.putText(frame, "0", center,0, 0.6, (255,255,255),1, lineType=cv2.LINE_AA)
else:
cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
cv2.rectangle(frame, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1)
cv2.putText(frame, class_name + " : " + str(track.track_id),(int(bbox[0]), int(bbox[1]-11)),0, 0.6, (255,255,255),1, lineType=cv2.LINE_AA)
if verbose == 2:
print("Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))
result = np.asarray(frame)
result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
return result
if __name__=='__main__':
yolov7=YOLOv7(weights='yolov7x.pt', device='cpu', image_size=800)
cap = cv2.VideoCapture('street5sn.mp4')
torch.cuda.empty_cache()
#writer
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # by default VideoCapture returns float instead of int
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
codec = cv2.VideoWriter_fourcc(*"DIVX")
out = cv2.VideoWriter("./output/video_out_track5sn-d2.mp4", codec, fps, (width, height))
while True:
t1 = time.time()
ret, frame = cap.read()
if not ret:
break
detections=yolov7.detect(frame)
vir = yolov7.tracking(frame, detections, count_objects = True, inside_poly = False)
out.write(vir)
cv2_imshow(vir) #colab imshow kodu
print("add frame ...")
if cv2.waitKey(1) & 0xFF == ord('q'):
break
out.release()
cap.release()
cv2.destroyAllWindows()
`
I use this repo repo
I did not make any changes to other files.
i have a 7 minute video. from this video i extract rgb data from the set ROI. Im using Mediapipe facemesh to track face and set the ROI.
however, this evaluation takes several minutes. What can I do to speed this up? Or what am I doing wrong?
can it be that it is because of the facemesh initialization that it has to re-identify the face in each frame and this is the reason for the long duration? How else should I solve this?
cap = cv2.VideoCapture("Video.mp4")
red, image = cap.read()
total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# print toal number of frames
print("total number of Frames: ",total)
face_mesh = mp_face_mesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5)
while red:
red, image = cap.read()
height, width, _ = image.shape
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
image.flags.writeable = False
if image is None:
continue
processed_img = face_mesh.process(image)
# Draw the face mesh annotations on the image.
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # convert the RGB image to BGR.
if processed_img.multi_face_landmarks:
for face_landmarks in processed_img.multi_face_landmarks:
landmark_points = []
for i in range(0, 468):
x = int(face_landmarks.landmark[i].x * width)
y = int(face_landmarks.landmark[i].y * height)
p = [x, y]
landmark_points.append([x, y])
forehead = np.array((
landmark_points[9], landmark_points[107], landmark_points[66], landmark_points[105],
landmark_points[104], landmark_points[103],
landmark_points[67], landmark_points[109], landmark_points[10],
landmark_points[338], landmark_points[297], landmark_points[332],
landmark_points[333], landmark_points[334], landmark_points[296],
landmark_points[336]))
left_cheek = np.array((landmark_points[266], landmark_points[426], landmark_points[436],
landmark_points[416], landmark_points[376],
landmark_points[352], landmark_points[347], landmark_points[330]))
right_cheek = np.array((landmark_points[36], landmark_points[206], landmark_points[216],
landmark_points[192], landmark_points[147],
landmark_points[123], landmark_points[117], landmark_points[118],
landmark_points[101]))
forehead_New = np.array((landmark_points[109],landmark_points[10],landmark_points[338],landmark_points[337],landmark_points[336],landmark_points[285],landmark_points[417],
landmark_points[168],landmark_points[193],landmark_points[55],landmark_points[107],landmark_points[108]))
rightCheek_New = np.array((landmark_points[355],landmark_points[329],landmark_points[348],landmark_points[347],landmark_points[346],landmark_points[345],
landmark_points[352],landmark_points[280],landmark_points[266],landmark_points[371]))
leftCheek_New = np.array((landmark_points[116],landmark_points[117],landmark_points[118],landmark_points[119],landmark_points[100],landmark_points[126],
landmark_points[142],landmark_points[36],landmark_points[50],landmark_points[123]))
cv2.polylines(image, [forehead_New], True, (0, 255, 255), 2)
cv2.polylines(image, [leftCheek_New], True, (0, 255, 255), 2)
cv2.polylines(image, [rightCheek_New], True, (0, 255, 255), 2)
mask = np.zeros((height, width), dtype=np.uint8)
cv2.fillPoly(mask, [forehead_New, leftCheek_New, rightCheek_New], (255))
crop_img = cv2.bitwise_and(image, image, mask=mask)
b, g, r = cv2.split(crop_img)
indices_list = np.where(np.any(crop_img != [0, 0, 0], axis=-1))
roi_pixel_img =crop_img[indices_list]
r = (roi_pixel_img == [0,255,255]).all(axis = -1)
roi_pixel_img = roi_pixel_img[~r]
b_plot.append(roi_pixel_img[:, 0].mean())
g_plot.append(roi_pixel_img[:, 1].mean()) # -//- ... green-channel
r_plot.append(roi_pixel_img[:, 2].mean()) # -//- ... red-channel
frame_count += 1
print("Frame_progress:", frame_count, "of: ", total)
t_plot.append(round(time_count))
time_count += (1000 / fps)
# Draw the face mesh on the image
mp_drawing.draw_landmarks(
image=image,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACEMESH_TESSELATION,
landmark_drawing_spec=drawing_spec,
connection_drawing_spec=drawing_spec)
mean_rgb = np.vstack((red, green, blue)).T
I'm building a neural network model and the detection.py file is really slow, regarding the speed of the output video (it is literally frame by frame).
I tried adjusting a few things, though still the same result - slow video. What can be the problem? Thank you.
import numpy as np
import random as rnd
import cv2
from utils import *
from model import *
seed = 11
rnd.seed(seed)
np.random.seed(seed)
videofile = "files/cardriving.mp4"
cap = cv2.VideoCapture(videofile)
model = make_model()
model.load_weights("weights/weights_best.h5")
lower = np.array([0, 0, 0])
upper = np.array([100, 100, 100])
stepSize=30
while True:
ret, frame = cap.read()
if ret == False:
print("Done")
break
#convert image to HSV from BGR
img_hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
#find the pixels that correspond to the road
img_out = cv2.inRange(img_hsv, lower, upper)
#clean from noisy pixels and keep the largest connected segment
img_out = post_process(img_out)
image_masked = frame.copy()
#get masked image
image_masked[img_out==0] = (0,0,0)
s=0.25
#resize images for computational efficiency
frame = cv2.resize(frame, None, fx=s, fy=s)
image_masked = cv2.resize(image_masked, None, fx=s, fy=s)
#run the sliding window detection process
bbox_list, totalWindows, correct, score = detectionProcess(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), model, winH=50, winW=50, depth=3, nb_images=1, scale=1, stepSize=stepSize, thres_score=0.05)
#draw the detections
drawBoxes(frame, bbox_list)
#draw detections and road masks
cv2.imshow("video", sidebyside(frame, image_masked))
k = cv2.waitKey(3)
if(k & 0xFF == ord('q')):
cv2.destroyWindow("video")
break
cap.release()
cv2.destroyAllWindows()
here is the utils.py file where I wrote the post_process and detectionProcess:
import numpy as np
import cv2
def scale_to_image(x, a=0, b=255):
#min-max scaling for grayscale images
ma=(np.max(x))
if(ma==0):
return x.astype(np.uint8)
mi=(np.min(x))
normalized_data = ((x.astype(np.float)-float(mi))/float(ma)) #normalize 0-1
normalized_data = (normalized_data*b + a*(1-normalized_data)) #scale values
return normalized_data.astype(np.uint8)
def nothing(x):
pass
def channels3(x):
#stack grayscale images together to increase the color channels to 3
return np.dstack((x,x,x))
def sidebyside(x,y):
#concatenate images side by side (horizontally)
return np.concatenate((x,y), axis=1)
def updown(x,y):
#concatenate images up and down (vertically)
return np.concatenate((x,y), axis=0)
def extractLargerSegment(maskROAD):
contours, hierarchy = cv2.findContours(maskROAD.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE, contours=None, hierarchy=None)
maxA = 0
maskTemp = np.zeros_like(maskROAD)
if(len(contours) > 0):
for h, cnt in enumerate(contours):
if(cv2.contourArea(cnt) > maxA):
cntMax = cnt
maxA = cv2.contourArea(cnt)
mask = np.zeros(maskROAD.shape, np.uint8)
cv2.drawContours(maskTemp, [cntMax], 0, 255, -1)
maskROAD = cv2.bitwise_and(maskROAD, maskTemp)
return maskROAD
def post_process(img):
kernel = np.ones((5,5), np.uint8)
img_out = cv2.erode(img, kernel, iterations=3)
kernel = np.ones((20,20), np.uint8)
img_out = cv2.dilate(img_out, kernel, iterations=5)
img_out = extractLargerSegment(img_out)
return img_out
def display(img_init, img_hsv, img_out2, img_out):
mask = scale_to_image(np.dstack((img_out, np.zeros_like(img_out), np.zeros_like(img_out))))
cv2.imshow("Output", updown(sidebyside(cv2.addWeighted(img_init, 1, mask, 0.3, 0), img_hsv), sidebyside(channels3(img_out), channels3(img_out2))))
def detectionProcess(frame, model, winH=32, winW=32, depth=1, nb_images=2, scale=1.2, stepSize=10, thres_score=0):
index=0
totalWindows=0
correct=0
bbox_list = []
score = []
for resized in pyramid(frame, scale=scale, minSize=(winH, winW), nb_images=nb_images):
scale = frame.shape[0]/resized.shape[0]
for (x,y,window) in sliding_window(resized, stepSize=stepSize, windowSize=(winH, winW)):
if window.shape[0] != winH or window.shape[1] != winW:
continue
if(depth==1):
window=cv2.cvtColor(window, cv2.COLOR_BGR2GRAY)
window=np.expand_dims(window,3)
window = window[None, :, :, :]
totalWindows += 1
class_out = model.predict((window.astype(np.float32))/255., batch_size=1)[0]
if(class_out<thres_score):
bbox_list.append(((int(x*scale)), int(y*scale), int((x+winW)*scale), int((y+winH)*scale)))
score.append(class_out)
correct+=1
index+=1
return bbox_list, totalWindows, correct, score
def sliding_window(image, stepSize, windowSize):
#slide a window accross the image
for y in range(0, image.shape[0], stepSize):
for x in range(0, image.shape[1], stepSize):
yield(x, y, image[y:y + windowSize[1], x:x + windowSize[0]])
def pyramid(image, scale=1.5, minSize=(30,30), nb_images=3):
yield image
count=0
#keep looping over the pyramid
while True:
#compute new dimensions of an image and resize it
w = int(image.shape[1]/scale)
h = int(image.shape[0]/scale)
image = cv2.resize(image, (w,h))
count += 1
scale = np.power((1/scale), count)
if image.shape[0] < minSize[1] or image.shape[1] < minSize[0] or (count == nb_images):
break
yield image
def drawBoxes(frame, bbox_list):
for i in range(len(bbox_list)):
box = bbox_list[i]
cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 255, 255), 2)
return frame
first of all, I am new to programming though I would like to learn especially python. my background in animation and CGI.
I have python 2.7 and openCV x64 installed on windows. I tested optical flow example they have (opt_flow.py) (the green arrows) I like that, but I am trying to understand how I can get the data out as values. I am not interested in seeing the camera output or the green arrows I just want the data out to use it later.is there a way to do that?
for example: the value of x, y and the length of the green arrows.
Thank you all
You can get the optical flow vectors (green arrows) in the draw_flow function of opt_flow.py. Here is how I would do it :
#!/usr/bin/env python
'''
example to show optical flow
USAGE: opt_flow.py [<video_source>]
Keys:
1 - toggle HSV flow visualization
2 - toggle glitch
Keys:
ESC - exit
'''
# Python 2/3 compatibility
from __future__ import print_function
import numpy as np
import math
import cv2
import video
def draw_flow(img, flow, step=16):
global arrows
h, w = img.shape[:2]
y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1).astype(int)
fx, fy = flow[y,x].T
lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2)
lines = np.int32(lines + 0.5)
vis = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
cv2.polylines(vis, lines, 0, (0, 255, 0))
for (x1, y1), (x2, y2) in lines:
arrows.append([x1,y1, math.sqrt((x2-x1)*(x2-x1) + (y2-y1)*(y2-y1))])
cv2.circle(vis, (x1, y1), 1, (0, 255, 0), -1)
return vis
def draw_hsv(flow):
h, w = flow.shape[:2]
fx, fy = flow[:,:,0], flow[:,:,1]
ang = np.arctan2(fy, fx) + np.pi
v = np.sqrt(fx*fx+fy*fy)
hsv = np.zeros((h, w, 3), np.uint8)
hsv[...,0] = ang*(180/np.pi/2)
hsv[...,1] = 255
hsv[...,2] = np.minimum(v*4, 255)
bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
return bgr
def warp_flow(img, flow):
h, w = flow.shape[:2]
flow = -flow
flow[:,:,0] += np.arange(w)
flow[:,:,1] += np.arange(h)[:,np.newaxis]
res = cv2.remap(img, flow, None, cv2.INTER_LINEAR)
return res
if __name__ == '__main__':
import sys
print(__doc__)
try:
fn = sys.argv[1]
except IndexError:
fn = 0
arrows = []
cam = video.create_capture(fn)
ret, prev = cam.read()
prevgray = cv2.cvtColor(prev, cv2.COLOR_BGR2GRAY)
show_hsv = False
show_glitch = False
cur_glitch = prev.copy()
while True:
ret, img = cam.read()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
flow = cv2.calcOpticalFlowFarneback(prevgray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
prevgray = gray
arrows.clear()
finalImg = draw_flow(gray,flow)
print(arrows)
cv2.imshow('flow', finalImg)
if show_hsv:
cv2.imshow('flow HSV', draw_hsv(flow))
if show_glitch:
cur_glitch = warp_flow(cur_glitch, flow)
cv2.imshow('glitch', cur_glitch)
ch = cv2.waitKey(5)
if ch == 27:
break
if ch == ord('1'):
show_hsv = not show_hsv
print('HSV flow visualization is', ['off', 'on'][show_hsv])
if ch == ord('2'):
show_glitch = not show_glitch
if show_glitch:
cur_glitch = img.copy()
print('glitch is', ['off', 'on'][show_glitch])
cv2.destroyAllWindows()
In the code above, I'm saving the optical flow vectors (start point coordinates and vector length) in the global variable arrows like so :
arrows.append([x1,y1, math.sqrt((x2-x1)*(x2-x1) + (y2-y1)*(y2-y1))])
with (x1, y1) the arrow's start point and (x2, y2) the arrow's end point.
Hope it helps.