Currently, I'm trying to perform motion detection with OpenCV. With each new frame, I use this bellow function to do compare with the previouse frame:
def detect(new_frame, kernel_size):
frame=cv2.cvtColor(new_frame,cv2.COLOR_BGR2GRAY) #Grayscale conversion of the frame
frame=cv2.GaussianBlur(frame, (kernel_size, kernel_size),0)
deltaFrame=cv2.absdiff(old_frame, frame)
old_frame = frame
threshFrame=cv2.threshold(deltaFrame, 5, 255, cv2.THRESH_BINARY)[1]
threshFrame=cv2.dilate(threshFrame, None, iterations=2)
(cnts,_)=cv2.findContours(threshFrame.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
return cnts
My problem is, I have to detect motion of objects of two types, each of them has it's own efficient value of kernel size parameter for that function (ie: 5 and 11). So I must to use that function 2 times with each new frame. But my device has resource limitations, so I want to reduce this process as much as I can. How can I do it?
Try bitwise functions over masks . Detect every pixel is moving. It's fast.
The trick for me is work with little resized images of frame .
import numpy as np
import cv2 as cv2
fid=0
video_path="videos/example.mp4"
cap = cv2.VideoCapture(video_path)
# Some characteristics from the original video
w_frame, h_frame = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps, num_frames = cap.get(cv2.CAP_PROP_FPS), cap.get(cv2.CAP_PROP_FRAME_COUNT)
print(fps,w_frame,h_frame)
x,y,h,w = 0,0,h_frame,w_frame
fnum=0
while(True):
ret, frame = cap.read()
if ret == None: pass
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
edges = gray
if fnum==0:
last_edges = edges.copy()
ret, mask1 = cv2.threshold(edges, 127, 255, cv2.THRESH_BINARY)
ret, mask2 = cv2.threshold(last_edges, 127 , 255, cv2.THRESH_BINARY)
dst1 = cv2.bitwise_and(mask2,mask1)
dst2 = cv2.bitwise_not(dst1)
dst4 = cv2.bitwise_and(dst2,dst2,mask=mask1)
scale_percent = 10 # percent of original size
width = int(dst4.shape[1] * scale_percent / 100)
height = int(dst4.shape[0] * scale_percent / 100)
dim = (width, height)
# resize image
mini = cv2.resize(dst4, dim, interpolation = cv2.INTER_AREA)
h,w = mini.shape
th=30 #my threshold
points=[]
for y in range(0, len(mini),4):
for x in range(0,len(mini[y]),4):
c1 = mini[y][x] > th and mini[y][x+1] > th and mini[y][x+2] > th and mini[y][x+3] > th
c2 = mini[y][x] > th and mini[y+1][x] > th and mini[y+2][x] > th and mini[y+3][x] > th
if c1 or c2:
start_point=(x*scale_percent,y*scale_percent)
points.append(start_point)
color1=(0,0,255)
color2=(0,255,255)
thickness=2
cv2.circle(frame, start_point, 20, color1, thickness)
if len(points) >= 2:
cx1 , cy1 = points[0][0] , points[0][1]
cx2 , cy2 = points[-1][0] , points[-1][1]
cxmin = min(cx1,cx2)
cymin = min(cy1,cy2)
cxmax = max(cx1,cx2)
cymax = max(cy1,cy2)
print(cymin,cymax , '--' , cxmin,cxmax)
cv2.rectangle(frame, (cxmin,cymin) , (cxmax,cymax), color2, thickness)
# Display the resulting frame
cv2.imshow('frame4', frame)
cv2.imshow('framemin', mini)
last_edges = edges.copy()
fnum+=1
if cv2.waitKey(33) & 0xFF == ord('q'):
break
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()
you can apply your own masks to detect one obj or another playing with blur values too.
Related
I would like to realize a project for a gun club. The goal is to detect and measure shots on a target and count the points. My thoughts about this projects are as follows:
apply a region of interest to focus onto the target
apply filters on the camera stream to get a sharp boundary around the black center
define the diameter since it is known
get the center of the boundary and store it as a reference point
detect shots and get the radial and distance in reference to the diameter and reference point and hence the value of the shot
show the last shots with a circle and their values on the screen
What I got so far is this:
edge detection and center point
Explanation radial and distance:
get the radial and distance
Screen with circles and values:
goal
import cv2
import numpy as np
import imutils
# declare variables
framewidth = 1920
frameheight = 1080
RTSP_URL = 'rtsp://xxxxxx:xxxxxxxx#192.168.1.64:554/Streaming/channels/1'
cap = cv2.VideoCapture(RTSP_URL, cv2.CAP_FFMPEG)
cap.set(3, framewidth)
cap.set(4, frameheight)
if not cap.isOpened():
print('Cannot open RTSP stream')
exit(-1)
# pseudo function
def empty(a):
pass
# slider
cv2.namedWindow("Parameters")
cv2.resizeWindow("Parameters", 640,240)
cv2.createTrackbar("Threshold1","Parameters",16,255,empty)
cv2.createTrackbar("Threshold2","Parameters",192,255,empty)
cv2.createTrackbar("Threshold3","Parameters",243,255,empty)
cv2.createTrackbar("Threshold4","Parameters",255,255,empty)
# imagestack
def stackImages(scale,imgArray):
rows = len(imgArray)
cols = len(imgArray[0])
rowsAvailable = isinstance(imgArray[0], list)
width = imgArray[0][0].shape[1]
height = imgArray[0][0].shape[0]
if rowsAvailable:
for x in range ( 0, rows):
for y in range(0, cols):
if imgArray[x][y].shape[:2] == imgArray[0][0].shape [:2]:
imgArray[x][y] = cv2.resize(imgArray[x][y], (0, 0), None, scale, scale)
else:
imgArray[x][y] = cv2.resize(imgArray[x][y], (imgArray[0][0].shape[1], imgArray[0][0].shape[0]), None, scale, scale)
if len(imgArray[x][y].shape) == 2: imgArray[x][y]= cv2.cvtColor( imgArray[x][y], cv2.COLOR_GRAY2BGR)
imageBlank = np.zeros((height, width, 3), np.uint8)
hor = [imageBlank]*rows
hor_con = [imageBlank]*rows
for x in range(0, rows):
hor[x] = np.hstack(imgArray[x])
ver = np.vstack(hor)
else:
for x in range(0, rows):
if imgArray[x].shape[:2] == imgArray[0].shape[:2]:
imgArray[x] = cv2.resize(imgArray[x], (0, 0), None, scale, scale)
else:
imgArray[x] = cv2.resize(imgArray[x], (imgArray[0].shape[1], imgArray[0].shape[0]), None,scale, scale)
if len(imgArray[x].shape) == 2: imgArray[x] = cv2.cvtColor(imgArray[x], cv2.COLOR_GRAY2BGR)
hor= np.hstack(imgArray)
ver = hor
return ver
def getContours(imgDil,imgContour):
contours, hierarchy = cv2.findContours(imgDil, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
for cnt in contours:
area = cv2.contourArea(cnt)
# compute the center of the contour
M = cv2.moments(cnt)
cX = int(M["m10"] / M["m00"])
cY = int(M["m01"] / M["m00"])
# draw the contour and center of the shape on the image
if area > 5000:
cv2.drawContours(imgContour, cnt, -1, (255, 0 ,255),3)
cv2.circle(imgContour, (cX, cY), 7, (255, 0, 255), -1)
cv2.putText(imgContour, "center", (cX - 20, cY - 20),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 255), 2)
while(True):
success, img = cap.read()
imgContour = img.copy()
imgGray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
threshold1 = cv2.getTrackbarPos("Threshold1", "Parameters")
threshold2 = cv2.getTrackbarPos("Threshold2", "Parameters")
threshold3 = cv2.getTrackbarPos("Threshold3", "Parameters")
threshold4 = cv2.getTrackbarPos("Threshold4", "Parameters")
ret, thresh = cv2.threshold(imgGray,threshold1,threshold2,1)
imgCanny = cv2.Canny(imgGray,threshold3,threshold4)
kernel = np.ones((3,3))
imgDil = cv2.dilate(thresh, kernel, iterations=1)
getContours(imgDil,imgContour)
imgStack = stackImages(0.4,([img,imgGray,thresh],[imgCanny,img,imgContour]))
cv2.imshow('Result',imgStack)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()
I really appreciate any suggestions on best practices and of course any help. Shall I better go for a stereo camera with depth recognition like the Oak-D, since I think the detection of a shot in the black target area could be challenging.
I'm building a neural network model and the detection.py file is really slow, regarding the speed of the output video (it is literally frame by frame).
I tried adjusting a few things, though still the same result - slow video. What can be the problem? Thank you.
import numpy as np
import random as rnd
import cv2
from utils import *
from model import *
seed = 11
rnd.seed(seed)
np.random.seed(seed)
videofile = "files/cardriving.mp4"
cap = cv2.VideoCapture(videofile)
model = make_model()
model.load_weights("weights/weights_best.h5")
lower = np.array([0, 0, 0])
upper = np.array([100, 100, 100])
stepSize=30
while True:
ret, frame = cap.read()
if ret == False:
print("Done")
break
#convert image to HSV from BGR
img_hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
#find the pixels that correspond to the road
img_out = cv2.inRange(img_hsv, lower, upper)
#clean from noisy pixels and keep the largest connected segment
img_out = post_process(img_out)
image_masked = frame.copy()
#get masked image
image_masked[img_out==0] = (0,0,0)
s=0.25
#resize images for computational efficiency
frame = cv2.resize(frame, None, fx=s, fy=s)
image_masked = cv2.resize(image_masked, None, fx=s, fy=s)
#run the sliding window detection process
bbox_list, totalWindows, correct, score = detectionProcess(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), model, winH=50, winW=50, depth=3, nb_images=1, scale=1, stepSize=stepSize, thres_score=0.05)
#draw the detections
drawBoxes(frame, bbox_list)
#draw detections and road masks
cv2.imshow("video", sidebyside(frame, image_masked))
k = cv2.waitKey(3)
if(k & 0xFF == ord('q')):
cv2.destroyWindow("video")
break
cap.release()
cv2.destroyAllWindows()
here is the utils.py file where I wrote the post_process and detectionProcess:
import numpy as np
import cv2
def scale_to_image(x, a=0, b=255):
#min-max scaling for grayscale images
ma=(np.max(x))
if(ma==0):
return x.astype(np.uint8)
mi=(np.min(x))
normalized_data = ((x.astype(np.float)-float(mi))/float(ma)) #normalize 0-1
normalized_data = (normalized_data*b + a*(1-normalized_data)) #scale values
return normalized_data.astype(np.uint8)
def nothing(x):
pass
def channels3(x):
#stack grayscale images together to increase the color channels to 3
return np.dstack((x,x,x))
def sidebyside(x,y):
#concatenate images side by side (horizontally)
return np.concatenate((x,y), axis=1)
def updown(x,y):
#concatenate images up and down (vertically)
return np.concatenate((x,y), axis=0)
def extractLargerSegment(maskROAD):
contours, hierarchy = cv2.findContours(maskROAD.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE, contours=None, hierarchy=None)
maxA = 0
maskTemp = np.zeros_like(maskROAD)
if(len(contours) > 0):
for h, cnt in enumerate(contours):
if(cv2.contourArea(cnt) > maxA):
cntMax = cnt
maxA = cv2.contourArea(cnt)
mask = np.zeros(maskROAD.shape, np.uint8)
cv2.drawContours(maskTemp, [cntMax], 0, 255, -1)
maskROAD = cv2.bitwise_and(maskROAD, maskTemp)
return maskROAD
def post_process(img):
kernel = np.ones((5,5), np.uint8)
img_out = cv2.erode(img, kernel, iterations=3)
kernel = np.ones((20,20), np.uint8)
img_out = cv2.dilate(img_out, kernel, iterations=5)
img_out = extractLargerSegment(img_out)
return img_out
def display(img_init, img_hsv, img_out2, img_out):
mask = scale_to_image(np.dstack((img_out, np.zeros_like(img_out), np.zeros_like(img_out))))
cv2.imshow("Output", updown(sidebyside(cv2.addWeighted(img_init, 1, mask, 0.3, 0), img_hsv), sidebyside(channels3(img_out), channels3(img_out2))))
def detectionProcess(frame, model, winH=32, winW=32, depth=1, nb_images=2, scale=1.2, stepSize=10, thres_score=0):
index=0
totalWindows=0
correct=0
bbox_list = []
score = []
for resized in pyramid(frame, scale=scale, minSize=(winH, winW), nb_images=nb_images):
scale = frame.shape[0]/resized.shape[0]
for (x,y,window) in sliding_window(resized, stepSize=stepSize, windowSize=(winH, winW)):
if window.shape[0] != winH or window.shape[1] != winW:
continue
if(depth==1):
window=cv2.cvtColor(window, cv2.COLOR_BGR2GRAY)
window=np.expand_dims(window,3)
window = window[None, :, :, :]
totalWindows += 1
class_out = model.predict((window.astype(np.float32))/255., batch_size=1)[0]
if(class_out<thres_score):
bbox_list.append(((int(x*scale)), int(y*scale), int((x+winW)*scale), int((y+winH)*scale)))
score.append(class_out)
correct+=1
index+=1
return bbox_list, totalWindows, correct, score
def sliding_window(image, stepSize, windowSize):
#slide a window accross the image
for y in range(0, image.shape[0], stepSize):
for x in range(0, image.shape[1], stepSize):
yield(x, y, image[y:y + windowSize[1], x:x + windowSize[0]])
def pyramid(image, scale=1.5, minSize=(30,30), nb_images=3):
yield image
count=0
#keep looping over the pyramid
while True:
#compute new dimensions of an image and resize it
w = int(image.shape[1]/scale)
h = int(image.shape[0]/scale)
image = cv2.resize(image, (w,h))
count += 1
scale = np.power((1/scale), count)
if image.shape[0] < minSize[1] or image.shape[1] < minSize[0] or (count == nb_images):
break
yield image
def drawBoxes(frame, bbox_list):
for i in range(len(bbox_list)):
box = bbox_list[i]
cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 255, 255), 2)
return frame
i have an SLR (sign language Recognition) task, and i want to show the preprocessing part, here is my code :
import numpy as np
import cv2
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.models import load_model
import tensorflow as tf
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
#Load CNN Model
model = load_model("VGG16withALLTRAINABLE(NO BACKGROUND).h5")
#Creating ROI frame for capturing hand
top_ROI = 100
btm_ROI = 300
right_ROI = 50
left_ROI = 250
#Creating Background Removal Parameters
blur_size = 5
canny_low = 25
# min_area = 0
# max_area = 0
canny_high = 150
dilate_iter = 10
erode_iter = 10
mask_color = (0.0,0.0,0.0)
#Video Capture
cap = cv2.VideoCapture(0)
while True:
ret,frame = cap.read()
#flipping frame
# frame = cv2.flip(frame, 1)
#Create ROI inside Frame
roi = frame[top_ROI:btm_ROI, right_ROI:left_ROI]
cv2.rectangle(frame, (left_ROI, top_ROI), (right_ROI,btm_ROI), (255,128,0), 3) #Visual Rectangle for ROI
#Resizing and Reshaping to equalize model input size and shape
roi = cv2.resize(roi, (300, 300))
blurred_roi = cv2.GaussianBlur(roi, (blur_size,blur_size) , 0)
gray_roi = cv2.cvtColor(blurred_roi, cv2.COLOR_BGR2GRAY)
_,threshed = cv2.threshold(gray_roi, 100, 255, cv2.THRESH_BINARY_INV)
# edge = cv2.Canny(gray_roi, canny_low, canny_high)
# edge = cv2.dilate(edge, None)
# edge = cv2.erode(edge, None)
cntr = []
cntr_area = []
contours,_= cv2.findContours(threshed, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
contour_info = []
for c in contours:
contour_info.append((c,cv2.contourArea(c), ))
contour_info = np.array(contour_info)
contour_info = sorted(contour_info, key=lambda x: x[1], reverse=True)
max_contour = contour_info[0]
mask = np.zeros(threshed.shape)
cv2.fillConvexPoly(mask, max_contour[0], (255))
mask = cv2.dilate(mask, None, iterations=dilate_iter)
mask = cv2.erode(mask, None, iterations=erode_iter)
mask = cv2.GaussianBlur(mask, (blur_size, blur_size), 0)
mask_stack = np.dstack([mask]*3) # Create 3-channel alpha mask
#-- Blend masked img into MASK_COLOR background --------------------------------------
mask_stack = mask_stack.astype('float32') / 255.0 # Use float matrices,
roi = roi.astype('float32') / 255.0 # for easy blending
masked = (mask_stack * roi) + ((1-mask_stack) * mask_color) # Blend
masked = (masked * 255).astype('uint8') # Convert back to 8-bit
print(mask.shape)
print(mask_stack.shape)
print(masked.shape)
cv2.imshow("Frame", frame)
cv2.imshow("ROI", gray_roi)
cv2.imshow("Thresed", threshed)
cv2.imshow('Mask', masked)
key = cv2.waitKey(1)
if key == 27:
break
cap.release()
cv2.destroyAllWindows()
This is my current result [Result in diffrent Frames]
My question is, can i make all the result in one frames (one frame with multiple videos) ?
i have tried once with this code, but it wont work while i add the second video stream functions (video_stream2()) :
from tkinter import *
from PIL import ImageTk, Image
import cv2
#Creating ROI frame for capturing hand
top_ROI = 100
btm_ROI = 300
right_ROI = 50
left_ROI = 250
root = Tk()
root.geometry("1920x1080")
# Create a frame
Main_video = Frame(root, highlightbackground='grey', highlightthicknes=3)
Main_video.grid(row=0, column= 0, padx=450, pady=150, ipadx= 0, ipady=0)
Roi_video = Frame(root, highlightbackground='grey', highlightthicknes=3)
Roi_video.grid(row=0, column= 0, padx=0, pady=0, ipadx= 0, ipady=0)
# Create a label in the frame
label_main = Label(Main_video)
label_main.grid()
label_roi = Label(Roi_video)
label_roi.grid()
# Capture from camera
cap = cv2.VideoCapture(0)
# function for video streaming
def video_stream():
_, frame = cap.read()
#Create ROI inside Frame
cv2image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
#Main Video
img = Image.fromarray(cv2image)
imgtk = ImageTk.PhotoImage(image=img)
label_main.imgtk = imgtk
label_main.configure(image=imgtk)
label_main.after(1, video_stream)
def video_stream2():
_, frame = cap.read()
#Create ROI inside Frame
roi = frame[top_ROI:btm_ROI, right_ROI:left_ROI]
cv2.rectangle(frame, (left_ROI, top_ROI), (right_ROI,btm_ROI), (255,128,0), 3) #Visual Rectangle for ROI
cv2roi_gray = cv2.cvtColor(roi, cv2.COLOR_RGB2GRAY)
#Roi Video
roi_img = Image.fromarray(cv2roi_gray)
imgtk_roi= ImageTk.PhotoImage(image=roi_img)
label_roi.imgtk_roi = imgtk_roi
label_roi.configure(image=imgtk_roi)
label_roi.after(1, video_stream2)
video_stream()
video_stream2()
root.mainloop()
The procedure to combine several images (windows) to one like that:
...is easy by following the example code:
import numpy as np
import cv2
import time
#Video Capture
cap = cv2.VideoCapture(0)
while(True):
ret,frame = cap.read()
frame_uus=cv2.resize(frame,(240,160))
#let's simulate the images...
#frame=np.random.randint(0,255,[320,480,3],dtype='uint8')
gray_roi=0.5*np.random.randint(0,255,[160,240,1],dtype='uint8')+0.5*frame_uus[:,:,0:1]
threshed=0.1+0*np.random.randint(0,255,[160,240,3],dtype='uint8')+0.3*frame_uus
masked=0.5*np.random.randint(0,255,[160,240,3],dtype='uint8')+0.2*frame_uus
#make sure all data is in uint8-format suitable for cv2..
gray_roi=gray_roi.astype(np.uint8)
threshed=threshed.astype(np.uint8)
masked=masked.astype(np.uint8)
#show separate images...
cv2.imshow("Frame", frame)
cv2.imshow("ROI", gray_roi)
cv2.imshow("Thresed", threshed)
cv2.imshow('Mask', masked)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
#Define space between images...
vali=2
#let's combine the images...
max_height=np.max([len(frame),len(gray_roi),len(threshed),len(masked)])
#Let's calculate total width for the combined image...remember to add space between images...
total_width=len(frame[0])+len(gray_roi[0])+len(threshed[0])+len(masked[0])+4*vali
#For clearness let's make a green background image
baseimage=np.zeros([max_height,total_width,3],'uint8')
baseimage[:,:,1]=255
#let's add separate images to the baseimage
baseimage[0:len(frame),0:len(frame[0]),:]=frame
#Take into account the grayscale...
alku=len(frame[0])+vali
loppu=alku+len(gray_roi[0])
baseimage[0:len(gray_roi),alku:loppu,0:1]=gray_roi
baseimage[0:len(gray_roi),alku:loppu,1:2]=gray_roi
baseimage[0:len(gray_roi),alku:loppu,2:3]=gray_roi
#Add next image...
alku=loppu+vali
loppu=alku+len(threshed[0])
baseimage[0:len(threshed),alku:loppu,:]=threshed
#And the last one...
alku=loppu+vali
loppu=alku+len(masked[0])
baseimage[0:len(masked),alku:loppu,:]=masked
#And finally let's show the baseimage...
cv2.imshow('Combined', baseimage)
cap.release()
cv2.destroyAllWindows()
Here is my code
I need to draw a color line and rectangle on my gray video stream.
In my code some bug because my line and rectangle are black but they are not.
import cv2
cap = cv2.VideoCapture(0)
if (cap.isOpened() == False):
print("Unable to read camera feed")
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
out = cv2.VideoWriter('output.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width,frame_height),0)
while(cap.isOpened()):
ret, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# draw line
start_point = (0, 0)
end_point = (250, 250)
color = (0, 255, 0)
thickness = 5
gray = cv2.line(img=gray, pt1=start_point, pt2=end_point, color=color, thickness=thickness, lineType=8, shift=0)
# draw rectangle
x1,y1 = 200, 200
x2,y2 = 250, 250
gray = cv2.rectangle(gray,(x1, y1), (x2, y2),color, 2)
cv2.imshow('webcam(1)', gray)
out.write(gray)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
out.release()
cv2.destroyAllWindows()
To draw color elements you have to convert image back to BGR
gray_BGR = cv2.cvtColor(gray cv2.COLOR_GRAY2BGR)
Converting to GRAY not only converts colors to gray but it also reduces every pixel from three values (B,G,R) to single value which can keep only gray color.
If you check frame.shape and gray.shape then you see difference.
First will have (height, width, 3) second will have only (height, width) which means (height, width, 1)
I have to track a window in a video and need to paste an image on window,I have used camshift to track the window, but it did not track it correct.My window is in brown color so I have given the following color range.
np.array((110,0,0)--lower
np.array((130,255,255)--higher..
I have red many documents in opencv but not able to figure out which method to follow.I am using opencv2.4.9 with python.
Below is the code which I tried.Please help me out to figure out the exact location of window.emphasized text
#!/usr/bin/env python
import numpy as np
import cv2
cap = cv2.VideoCapture("myvideo.mp4")
# take first frame of the video
ret,frame = cap.read()
#print frame
#print ret
# setup initial location of window
r,h,c,w = 157,40,337,40
track_window = (c,r,w,h)
# set up the ROI for tracking
roi = frame[r:r+h, c:c+w]
hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv_roi, np.array((110,0,0)), np.array((130,255,255)))
roi_hist = cv2.calcHist([hsv_roi],[0],mask,[255],[0,255])
cv2.imshow('img2',roi_hist)
#print roi_hist
cv2.normalize(roi_hist,roi_hist,0,255,cv2.NORM_MINMAX)
# Setup the termination criteria, either 10 iteration or move by at least 1 pt
term_crit = ( cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1 )
i = 1
while(1):
ret ,frame = cap.read()
if ret == True:
i += 1
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
dst = cv2.calcBackProject([hsv],[0],roi_hist,[0,255],1)
# apply meanshift to get the new location
ret, track_window = cv2.CamShift(dst, track_window, term_crit)
#print track_window
# Draw it on image
x,y,w,h = track_window
img2 = cv2.rectangle(frame, (x,y), (x+w,y+h), 255,2)
cv2.imshow('img2',frame)
k = cv2.waitKey(200) & 0xff
if k == 27:
break
else:
# print "comes here2";
cv2.imwrite(str(i)+"test.jpg",frame)
#break
else:
break
cv2.destroyAllWindows()
cap.release()