Related
sorry if the title is unclear. Basically, I've written a program that tracks an object of a certain color as it moves around my webcam's FOV. As I move the object around, the computer places a red dot on the center of the object and moves the dot with the object. However, the object's location doesn't really mean anything yet. I want the frame to be divided into four equal parts and each part outputs a different number. For example, if the object (dot) is in quadrant one, I want the number 1 to appear on the frame. How would I do this? Can anyone nudge me in the right direction? I'm using OpenCV-Python and am grateful for any help.
Here is the code I have so far.
# import the necessary packages
from collections import deque
from imutils.video import VideoStream
import numpy as np
import argparse
import cv2
import imutils
import time
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-v", "--video",
help="path to the (optional) video file")
ap.add_argument("-b", "--buffer", type=int, default=32,
help="max buffer size")
args = vars(ap.parse_args())
# define the lower and upper boundaries of the "orange"
# fish in the HSV color space
orangeLower = (5, 50, 50)
orangeUpper = (15, 255, 255)
# initialize the list of tracked points, the frame counter,
# and the coordinate deltas
pts = deque(maxlen=args["buffer"])
counter = 0
(dX, dY) = (0, 0)
direction = ""
# if a video path was not supplied, grab the reference
# to the webcam
if not args.get("video", False):
vs = VideoStream(src=0).start()
# otherwise, grab a reference to the video file
else:
vs = cv2.VideoCapture(args["video"])
# allow the camera or video file to warm up
time.sleep(2.0)
# keep looping
while True:
# grab the current frame
frame = vs.read()
# handle the frame from VideoCapture or VideoStream
frame = frame[1] if args.get("video", False) else frame
# if we are viewing a video and we did not grab a frame,
# then we have reached the end of the video
if frame is None:
break
# resize the frame, blur it, and convert it to the HSV
# color space
frame = imutils.resize(frame, width=600)
blurred = cv2.GaussianBlur(frame, (11, 11), 0)
hsv = cv2.cvtColor(blurred, cv2.COLOR_BGR2HSV)
# construct a mask for the color "orange", then perform
# a series of dilations and erosions to remove any small
# blobs left in the mask
mask = cv2.inRange(hsv, orangeLower, orangeUpper)
mask = cv2.erode(mask, None, iterations=2)
mask = cv2.dilate(mask, None, iterations=2)
# find contours in the mask and initialize the current
# (x, y) center of the ball
cnts = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
center = None
# only proceed if at least one contour was found
if len(cnts) > 0:
# find the largest contour in the mask, then use
# it to compute the minimum enclosing circle and
# centroid
c = max(cnts, key=cv2.contourArea)
((x, y), radius) = cv2.minEnclosingCircle(c)
M = cv2.moments(c)
center = (int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"]))
# only proceed if the radius meets a minimum size
if radius > 10:
# draw the circle and centroid on the frame,
# then update the list of tracked points
cv2.circle(frame, (int(x), int(y)), int(radius),
(0, 255, 255), 2)
cv2.circle(frame, center, 5, (0, 0, 255), -1)
pts.appendleft(center)
# loop over the set of tracked points
for i in np.arange(1, len(pts)):
# if either of the tracked points are None, ignore
# them
if pts[i - 1] is None or pts[i] is None:
continue
# check to see if enough points have been accumulated in
# the buffer
if counter >= 10 and i == 10 and pts[i-10] is not None:
# compute the difference between the x and y
# coordinates and re-initialize the direction
# text variables
dX = pts[i-10][0] - pts[i][0]
dY = pts[i-10][1] - pts[i][1]
(dirX, dirY) = ("", "")
# ensure there is significant movement in the
# x-direction
if np.abs(dX) > 20:
dirX = "East" if np.sign(dX) == 1 else "West"
# ensure there is significant movement in the
# y-direction
if np.abs(dY) > 20:
dirY = "South" if np.sign(dY) == 1 else "North"
# handle when both directions are non-empty
if dirX != "" and dirY != "":
direction = "{}-{}".format(dirY, dirX)
# otherwise, only one direction is non-empty
else:
direction = dirX if dirX != "" else dirY
# otherwise, compute the thickness of the line and
# draw the connecting lines
thickness = int(np.sqrt(args["buffer"] / float(i + 1)) * 2.5)
cv2.line(frame, pts[i - 1], pts[i], (0, 0, 255), thickness)
# show the movement deltas and the direction of movement on
# the frame
cv2.putText(frame, direction, (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
0.65, (0, 0, 255), 3)
cv2.putText(frame, "dx: {}, dy: {}".format(dX, dY),
(10, frame.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX,
0.35, (0, 0, 255), 1)
# show the frame to the screen and increment the frame counter
cv2.imshow("Frame", frame)
cv2.rectangle(img=frame, pt1=(0, 0), pt2=(300, 225), color=(0, 0, 0), thickness=3, lineType=8, shift=0)
cv2.rectangle(img=frame, pt1 = (300, 1), pt2 = (600, 225), color = (0, 0, 0), thickness = 3, lineType = 8, shift = 0)
cv2.rectangle(img=frame, pt1 = (0, 225), pt2 = (300, 550), color = (0, 0, 0), thickness = 3, lineType = 8, shift = 0)
cv2.rectangle(img=frame, pt1 = (300, 225), pt2 = (600, 550), color = (0, 0, 0), thickness = 3, lineType = 8, shift = 0)
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
counter += 1
# if the 'q' key is pressed, stop the loop
if key == ord("q"):
break
# if we are not using a video file, stop the camera video stream
if not args.get("video", False):
vs.stop()
# otherwise, release the camera
else:
vs.release()
# close all windows
cv2.destroyAllWindows()
Here is an image of the frame I get when I run the code.
As you can see, there are lines dividing the image in fourths. These rectangles are where I want the outputs to be.
I am working on a video with many people where few of them are wearing red colored t-shirt. I have all the persons detected and tracked with person detection and tracking models. How can I distinguish the persons wearing red from the others.
I am reading the frames in OpenCV format. If I know the coordinates, suppose x,y is a coordinate of the body where the color is red. How can I get the color information from the coordinate in OpenCV format and check whether that comes under the red color range?
I only need to highlight the bounding box of the persons wearing red from others.
Can someone help me in figuring out a solution.
Thank you!
The better way to change the colour space into HSV and find the Hue value range for colour.
Take each frame of the video
Detect humans first then extract the human region (source)
Convert from BGR to HSV color-space
Threshold the HSV image for a range of red colour
Identifying red colour t-shirt guys in Video
We can identify the human region in images using the following code
import time
import cv2
import imutils
import numpy as np
from imutils.video import FPS
# import the necessary packages
from imutils.video import VideoStream
def get_centered_contours(mask):
# find contours
cntrs = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cntrs = cntrs[0] if len(cntrs) == 2 else cntrs[1]
sorted_contours = sorted(cntrs, key=cv2.contourArea, reverse=True)
filterd_contours = []
if sorted_contours != []:
for k in range(len(sorted_contours)):
if cv2.contourArea(sorted_contours[k]) < 1000.0:
filterd_contours = sorted_contours[0:k]
return filterd_contours
return filterd_contours
def check_red_colour_person(roi):
hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
# define range of blue color in HSV
lower_red = np.array([0, 50, 50])
upper_red = np.array([10, 255, 255])
# Threshold the HSV image to get only blue colors
mask = cv2.inRange(hsv, lower_red, upper_red)
cnts = get_centered_contours(mask)
if cnts != []:
return True
else:
return False
# construct the argument parse and parse the arguments
prototxt = 'MobileNetSSD_deploy.prototxt.txt'
model = 'MobileNetSSD_deploy.caffemodel'
confidence_level = 0.8
# initialize the list of class labels MobileNet SSD was trained to
# detect, then generate a set of bounding box colors for each class
CLASSES = ["person"]
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
# load our serialized model from disk
print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe(prototxt, model)
# initialize the video stream, allow the cammera sensor to warmup,
# and initialize the FPS counter
print("[INFO] starting video stream...")
vs = VideoStream(src=0).start()
time.sleep(2.0)
fps = FPS().start()
# loop over the frames from the video stream
while True:
try:
# grab the frame from the threaded video stream and resize it
# to have a maximum width of 400 pixels
frame = vs.read()
frame = imutils.resize(frame, width=400)
# grab the frame dimensions and convert it to a blob
(h, w) = frame.shape[:2]
blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)),
0.007843, (300, 300), 127.5)
# pass the blob through the network and obtain the detections and
# predictions
net.setInput(blob)
detections = net.forward()
# loop over the detections
for i in np.arange(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with
# the prediction
confidence = detections[0, 0, i, 2]
# filter out weak detections by ensuring the `confidence` is
# greater than the minimum confidence
if confidence > confidence_level:
# extract the index of the class label from the
# `detections`, then compute the (x, y)-coordinates of
# the bounding box for the object
idx = int(detections[0, 0, i, 1])
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
roi = frame[startY:endY, startX:endX]
# cv2.imwrite('roi_{}_{}_{}_{}.png'.format(startX,startY,endX,endY),roi)
if check_red_colour_person(roi):
label = "{}: {:.2f}%".format(' Red T-shirt person',
confidence * 100)
cv2.imwrite(
'Red-T-shirt_guy_{}_{}_{}_{}.png'.format(startX, startY, endX,
endY), roi)
cv2.rectangle(frame, (startX, startY), (endX, endY),
(0, 0, 255), 2)
else:
cv2.rectangle(frame, (startX, startY), (endX, endY),
(255, 0, 0), 2)
y = startY - 15 if startY - 15 > 15 else startY + 15
cv2.putText(frame, label, (startX, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):
break
# update the FPS counter
fps.update()
except Exception as e:
print("Exception is occured")
continue
# stop the timer and display FPS information
fps.stop()
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
# do a bit of cleanup
cv2.destroyAllWindows()
vs.stop()
You can set the color boundaries
boundaries = [
([17, 15, 100], [50, 56, 200])]
So here tuple ([17, 15, 100], [50, 56, 200]) .
Here, we are saying that all pixels in our image that have a R >= 100, B >= 15, and G >= 17 along with R <= 200, B <= 56, and G <= 50 will be considered red.
You can implement like follow:
for (lower, upper) in boundaries:
lower = np.array(lower, dtype = "uint8")
upper = np.array(upper, dtype = "uint8")
# find the colors within the specified boundaries and apply
# the mask
mask = cv2.inRange(image, lower, upper)
output = cv2.bitwise_and(image, image, mask = mask)
# show the images
cv2.imshow("images", np.hstack([image, output]))
I am a newbie using the pyimagesearch code for ball tracking using python 2.7 and opencv.
https://www.pyimagesearch.com/2015/09/14/ball-tracking-with-opencv/
I am trying to write the x,y coordinates of a tracked object into a .csv file. I am converting pts to a string and then writing to a .csv file. I get a set of numbers like this: (255 386) (266 399) Are these x,y coordinates? And if so, what do they mean in relation to the image?
#import the necessary packages
from collections import deque
from imutils.video import VideoStream
import numpy as np
import argparse
import cv2
import imutils
import time
import csv
#construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-v", "--video",
help="path to the (optional) video file")
ap.add_argument("-b", "--buffer", type=int, default=64,
help="max buffer size")
args = vars(ap.parse_args())
#define the lower and upper boundaries of the "green"
#ball in the HSV color space, then initialize the
#list of tracked points
greenLower = (0, 0, 0)
greenUpper = (180, 255, 40)
pts = deque(maxlen=args["buffer"])
#if a video path was not supplied, grab the reference
#to the webcam
if not args.get("video", False):
vs = VideoStream(src=0).start()
#otherwise, grab a reference to the video file
else:
vs = cv2.VideoCapture(args["video"])
#allow the camera or video file to warm up
time.sleep(2.0)
#keep looping
while True:
#grab the current frame
frame = vs.read()
#handle the frame from VideoCapture or VideoStream
frame = frame[1] if args.get("video", False) else frame
#if we are viewing a video and we did not grab a frame,
#then we have reached the end of the video
if frame is None:
break
#resize the frame, blur it, and convert it to the HSV
#color space
frame = imutils.resize(frame, width=600)
blurred = cv2.GaussianBlur(frame, (11, 11), 0)
hsv = cv2.cvtColor(blurred, cv2.COLOR_BGR2HSV)
#construct a mask for the color "green", then perform
#a series of dilations and erosions to remove any small
#blobs left in the mask
mask = cv2.inRange(hsv, greenLower, greenUpper)
mask = cv2.erode(mask, None, iterations=2)
mask = cv2.dilate(mask, None, iterations=2)
#find contours in the mask and initialize the current
#(x, y) center of the ball
cnts = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if imutils.is_cv2() else cnts[1]
center = None
#only proceed if at least one contour was found
if len(cnts) > 0:
#find the largest contour in the mask, then use
#it to compute the minimum enclosing circle and
#centroid
c = max(cnts, key=cv2.contourArea)
((x, y), radius) = cv2.minEnclosingCircle(c)
M = cv2.moments(c)
center = (int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"]))
#only proceed if the radius meets a minimum size
if radius > 10:
#draw the circle adn centroid on the frame,
#then update the list of tracked points
cv2.circle(frame, (int(x), int(y)), int(radius),(0, 255, 255), 2)
cv2.circle(frame, center, 5, (0, 0, 225), -1)
#update the points queue
pts.appendleft(center)
#loop over the set of tracket points
for i in range(1, len(pts)):
#if either of the tracked points are None, ignore
#them
if pts[i - 1] is None or pts[i] is None:
continue
#otherwise, compute the thickness of the line and
#draw the connecting lines
thickness = int(np.sqrt(args["buffer"] / float(i + 1)) * 2.5)
cv2.line(frame, pts[i - 1], pts[i], (0, 0, 225), thickness)
#write info to file
f = open("foo11.csv", "w+")
s = str(pts)
f.write(s)
f.close()
#show the frame to our screen
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
#if the 'q' key is press, stop the loop
if key == ord("q"):
break
#if we are not using a video file, stop the camera video stream
if not args.get("video", False):
vs.stop()
#otherwise, release the camera
else:
vs.release()
#close all windows
cv2.destroyAllWindows()
Consider the following grid to be an image:
This image is said to have a shape of (7x7). 7 pixels in height (along y) and 7 pixels in width (along x)). This image is thus said to have 49 pixels, which is the size of this image.
The origin (0, 0) is at the top-left corner. This is the top-leftmost pixel of the image.
Now as the centroid of the contour (ball) moves, it is present in one of these 49 pixels.
As a result the .txt file is storing these pixel coordinates in a tuple of (x, y).
I'm trying to apply Kalman filter with opencv in python for tracking position of a ball. I can already detect it but there is still some noise I want to eliminate. There are two variables I measure - x and y position - and there are four variables I would like to get - x and y position and x and y velocity - but I get none. When I display x0, y0, vy and vx on the screen I get "[.0]".
Another problem is that I cannot apply control matrix to kalman.predict() function because I get the following error:
OpenCV Error: Assertion failed (a_size.width == len) in gemm, file /tmp/opencv3-20170518-8732-1bjq2j7/opencv-3.2.0/modules/core/src/matmul.cpp, line 1537
Traceback (most recent call last):
File "kalman.py", line 128, in <module>
kalmanout = kalman.predict(kalman.controlMatrix)
cv2.error: /tmp/opencv3-20170518-8732-1bjq2j7/opencv-3.2.0/modules/core/src/matmul.cpp:1537: error: (-215) a_size.width == len in function ge
This is the piece of code I'm using for Kalman filter (for control matrix application I use line kalmanout = kalman.predict(kalman.controlMatrix) at the end:
# import the necessary packages
from collections import deque
import numpy as np
import argparse
import imutils
import cv2
import time
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-v", "--video",
help="path to the (optional) video file")
ap.add_argument("-b", "--buffer", type=int, default=10,
help="max buffer size")
ap.add_argument("-a", "--min-area", type=int, default=500, help="minimum area size")
args = vars(ap.parse_args())
# define the lower and upper boundaries of the "blue"
# ball in the HSV color space, then initialize the
# list of tracked points
greenLower = (48, 62, 88)
greenUpper = (151, 238, 255)
pts = deque(maxlen=args["buffer"])
tintervals = deque(maxlen=args["buffer"])
tPrev = 0;
pRad = 0
mapix = 0
mspeed = 0
# if a video path was not supplied, grab the reference
# to the webcam
if not args.get("video", False):
camera = cv2.VideoCapture(0)
# otherwise, grab a reference to the video file
else:
camera = cv2.VideoCapture(args["video"])
# keep looping
#initialize background subtraction
fgbg = cv2.createBackgroundSubtractorMOG2()
while True:
# grab the current frame
(grabbed, frame) = camera.read()
displayx = 0
# start counting time
tPrev = time.time()
# if we are viewing a video and we did not grab a frame,
# then we have reached the end of the video
if args.get("video") and not grabbed:
break
# resize the frame and apply background subtraction
frame = imutils.resize(frame, width=500)
mask = fgbg.apply(frame)
res = cv2.bitwise_and(frame, frame, mask = mask)
# blur the frame and convert it to the HSV
blurred = cv2.GaussianBlur(res, (11, 11), 0)
hsv = cv2.cvtColor(res, cv2.COLOR_BGR2HSV)
# construct a mask for the color "blue", then perform
# a series of dilations and erosions to remove any small
# blobs left in the mask
mask = cv2.inRange(hsv, greenLower, greenUpper)
mask = cv2.erode(mask, None, iterations=2)
mask = cv2.dilate(mask, None, iterations=2)
# find contours in the mask and initialize the current
# (x, y) center of the ball
cnts = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)[-2]
center = None
# only proceed if at least one contour was found
if len(cnts) > 0:
# find the largest contour in the mask, then use
# it to compute the minimum enclosing circle and
# centroid
c = max(cnts, key=cv2.contourArea)
((x, y), radius) = cv2.minEnclosingCircle(c)
pRad = radius
M = cv2.moments(c)
center = (int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"]))
# only proceed if the radius meets a minimum size
if radius > 10:
# draw the circle and centroid on the frame,
# then update the list of tracked points
cv2.circle(frame, (int(x), int(y)), int(radius),
(0, 255, 255), 2)
cv2.circle(frame, center, 5, (0, 0, 255), -1)
# update time intervals queue
tintervals.appendleft(time.time() - tPrev)
# update the points queue
pts.appendleft(center)
# predict position of the ball
if (pRad > 0 and len(pts) > 5):
if pts[0] != None and pts[1] != None:
apix = 98.1/(0.032/pRad)
mapix = apix
y0 = pts[0][1]
x0 = pts[0][0]
kalmanin = np.array((2,1), np.float32) # measurement
kalmanout = np.zeros((4,1), np.float32) # tracked / prediction
kalmanin = np.array([[np.float32(x0)],[np.float32(y0)]])
tkalman = 0.01
kalman = cv2.KalmanFilter(4,2)
kalman.measurementMatrix = np.array([[1,0,0,0],[0,1,0,0]],np.float32)
kalman.transitionMatrix = np.array([[1,0,tkalman,0],[0,1,0,tkalman],[0,0,1,0],[0,0,0,1]],np.float32)
kalman.controlMatrix = np.array([[0],[0.5*(tkalman**2.0)], [0],[tkalman]],np.float32) * mapix
kalman.processNoiseCov = np.array([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]],np.float32) * 0.03
kalman.processNoiseCov = np.array([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]],np.float32) * 0.03
kalman.measurementNoiseCov = np.array([[1,0],[0,1]],np.float32) * 0.00009
kalman.correct(kalmanin)
kalmanout = kalman.predict(kalman.controlMatrix)
x0 = kalmanout[0]
y0 = kalmanout[1]
vx = kalmanout[2]
vy = kalmanout[3]
displayx = x0
listX = []
listY = []
for i in range(1, 11):
t = 0.01 * i
y = y0 + vy * t + (apix * (t ** 2)) / 2
x = x0 + vx * t
listX.append(int(x))
listY.append(int(y))
mspeed = vy
for i in range(0, 9):
cv2.line(frame, (listX[i], listY[i]), (listX[i+1], listY[i+1]), (255, 0, 0), 4)
# loop over the set of tracked points
for i in xrange(1, len(pts)):
# if either of the tracked points are None, ignore
# them
if pts[i - 1] is None or pts[i] is None:
continue
# otherwise, compute the thickness of the line and
# draw the connecting lines
thickness = int(np.sqrt(args["buffer"] / float(i + 1)) * 2.5)
cv2.line(frame, pts[i - 1], pts[i], (0, 0, 255), thickness)
cv2.putText(frame, "y axis speed: {}".format(displayx),
(120, frame.shape[0] - 70), cv2.FONT_HERSHEY_SIMPLEX,
0.5, (0, 0, 255), 1)
cv2.putText(frame, "radius in px: {}".format(pRad),
(120, frame.shape[0] - 30), cv2.FONT_HERSHEY_SIMPLEX,
0.5, (0, 0, 255), 1)
cv2.putText(frame, "apix: {}".format(mapix),
(120, frame.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX,
0.5, (0, 0, 255), 1)
if (mapix != 0):
cv2.putText(frame, "radius in meters: {}".format((9.81*pRad)/mapix),
(120, frame.shape[0] - 50), cv2.FONT_HERSHEY_SIMPLEX,
0.5, (0, 0, 255), 1)
# shows x, y position, (newest input from pts)
cv2.putText(frame, "x, y: {}".format(pts[0]),
(10, frame.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX,
0.35, (0, 0, 255), 1)
# show the frame to our screen
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
# if the 'q' key is pressed, stop the loop
if key == ord("q"):
break
# cleanup the camera and close any open windows
camera.release()
cv2.destroyAllWindows()
First of all I would move the initialization of the Kalman filter outside the loop. The main issue with your code is that you have set the control matrix. If I understand your task you are only observing the system, not controlling it. Just skip the kalman.controlMatrix initialization or set it to a zero matrix. In the loop you then just use
kalmanout = kalman.predict()
kalman.correct(kalmanin)
Im trying to implement a digit recognition program for Video capture in openCV. It works with normal(still) pictures as input, but when I add the video capture functionality it gets stuck while recording, if I move the camera around. My code for the program is here:
import numpy as np
import cv2
from sklearn.externals import joblib
from skimage.feature import hog
# Load the classifier
clf = joblib.load("digits_cls.pkl")
# Default camera has index 0 and externally(USB) connected cameras have
# indexes ranging from 1 to 3
cap = cv2.VideoCapture(0)
while(True):
# Capture frame-by-frame
ret, frame = cap.read()
# Convert to grayscale and apply Gaussian filtering
im_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
im_gray = cv2.GaussianBlur(im_gray, (5, 5), 0)
# Threshold the image
ret, im_th = cv2.threshold(im_gray.copy(), 120, 255, cv2.THRESH_BINARY_INV)
# Find contours in the binary image 'im_th'
_, contours0, hierarchy = cv2.findContours(im_th, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Draw contours in the original image 'im' with contours0 as input
# cv2.drawContours(frame, contours0, -1, (0,0,255), 2, cv2.LINE_AA, hierarchy, abs(-1))
# Rectangular bounding box around each number/contour
rects = [cv2.boundingRect(ctr) for ctr in contours0]
# Draw the bounding box around the numbers
for rect in rects:
cv2.rectangle(frame, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0), 3)
# Make the rectangular region around the digit
leng = int(rect[3] * 1.6)
pt1 = int(rect[1] + rect[3] // 2 - leng // 2)
pt2 = int(rect[0] + rect[2] // 2 - leng // 2)
roi = im_th[pt1:pt1+leng, pt2:pt2+leng]
# Resize the image
roi = cv2.resize(roi, (28, 28), im_th, interpolation=cv2.INTER_AREA)
roi = cv2.dilate(roi, (3, 3))
# Calculate the HOG features
roi_hog_fd = hog(roi, orientations=9, pixels_per_cell=(14, 14), cells_per_block=(1, 1), visualise=False)
nbr = clf.predict(np.array([roi_hog_fd], 'float64'))
cv2.putText(frame, str(int(nbr[0])), (rect[0], rect[1]),cv2.FONT_HERSHEY_DUPLEX, 2, (0, 255, 255), 3)
# Display the resulting frame
cv2.imshow('frame', frame)
cv2.imshow('Threshold', im_th)
# Press 'q' to exit the video stream
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()
The error i get, is that there is no input at the resize ROI(region of interest). I find it weird because it works as long as I don't move thing around too much in the picture. Im sure that it isn't the camera that in at fault, since I've tried a lot of different cameras. Here is the specific error message:
Traceback (most recent call last):
File "C:\Users\marti\Desktop\Code\Python\digitRecognition\Video_cap.py", line 55, in <module>
roi = cv2.resize(roi, (28, 28), im_th, interpolation=cv2.INTER_AREA)
cv2.error: D:\Build\OpenCV\opencv-3.2.0\modules\imgproc\src\imgwarp.cpp:3492: error: (-215) ssize.width > 0 && ssize.height > 0 in function cv::resize
Picture of the program in action, if a move the numbers around the program freezes
You're using a fixed threshold for the preprocessing before trying to find contours. Since cv2.resize() has to resize something, it expects the roi matrix to have non-zero width and height. I'm guessing that at some point when you're moving the camera, you don't detect any digits, because of your non-adaptive preprocessing algorithm.
I suggest that you display the thresholded image and an image with contours superimposed on the frame while moving the camera. This way you'll be able to debug the algorithm. Also, you make sure to print(len(rects)) to see if any rectangles have been detected.
Another trick would be to save the frames and run the algorithm on the last frame saved before crashing, to find out why that frame is causing the error.
Summarizing, you really need to take control over your code if you expect it to produce meaningful results. The solution - depending on your data - might be using some kind of contrast enhancement before the thresholding operaton and/or using the Otsu's Method or Adaptive Thresholding with some additional filtering.
What about trying this:
if roi.any():
roi = cv2.resize(roi, (28, 28), frame, interpolation=cv2.INTER_AREA)
roi = cv2.dilate(roi, (3, 3))
I think this does what you want (I simplified yours for the example):
cap = cv2.VideoCapture(0)
while(True):
# Capture frame-by-frame
ret, frame = cap.read()
frame2=frame.copy()
# Convert to grayscale and apply Gaussian filtering
im_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
im_gray = cv2.GaussianBlur(im_gray, (5, 5), 0)
ret, im_th = cv2.threshold(im_gray.copy(), 120, 255, cv2.THRESH_BINARY_INV)
# Find contours in the binary image 'im_th'
_, contours0, hierarchy = cv2.findContours(im_th, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Rectangular bounding box around each number/contour
rects = [cv2.boundingRect(ctr) for ctr in contours0]
# Draw the bounding box around the numbers
for rect in rects:
cv2.rectangle(frame, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (255, 0, 255), 3)
# Make the rectangular region around the digit
leng = int(rect[3] * 1.6)
pt1 = int(rect[1] + rect[3] // 2 - leng // 2)
pt2 = int(rect[0] + rect[2] // 2 - leng // 2)
roi = im_th[pt1:pt1+leng, pt2:pt2+leng]
# Resize the image
if roi.any():
roi = cv2.resize(roi, (28, 28), frame, interpolation=cv2.INTER_AREA)
roi = cv2.dilate(roi, (3, 3))
# Display the resulting frame
cv2.imshow('frame', frame)
#cv2.imshow('Threshold', im_th)
# Press 'q' to exit the video stream
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()