How to merge nearby bounding boxes OpenCV - python

I am trying to segment a web page into a header, footer, left panel, right panel, etc. (get coordinates) using an image processing tool(OpenCV), which is not giving satisfactory results.
I want to get something like this:
But, all I got
import cv2
import numpy
from google.colab.patches import cv2_imshow
img = cv2.imread("test.png")
blue, green, red = cv2.split(img)
def medianCanny(img, thresh1, thresh2):
median = numpy.median(img)
img = cv2.Canny(img, int(thresh1 * median), int(thresh2 * median))
return img
blue_edges = medianCanny(blue, 0, 1)
green_edges = medianCanny(green, 0, 1)
red_edges = medianCanny(red, 0, 1)
edges = blue_edges | green_edges | red_edges
contours,hierarchy = cv2.findContours(edges, cv2.RETR_EXTERNAL ,cv2.CHAIN_APPROX_SIMPLE)
hierarchy = hierarchy[0]
for component in zip(contours, hierarchy):
currentContour = component[0]
currentHierarchy = component[1]
x,y,w,h = cv2.boundingRect(currentContour)
if currentHierarchy[3] < 0:
cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),1)
cv2_imshow(img)
cv2.waitKey(0)
cv2.destroyAllWindows()
And I want to connect nearby boxes, but I don't understand how best to do it.
Test image

I started with your code as a base to get the green rectangles. I filtered the boxes by size to get rid of the big ones that contained large chunks of the image (there's even one that goes around the entire image). From there I iteratively merged nearby boxes until there were no more overlapping boxes. I used the merge_margin variable to set how close two boxes needed to be before they counted as "overlapping".
Each Step (I highlighted the last merged box and the points it found inside)
(This gif is heavily compressed so you'll see some artifacting)
Final Image
import cv2
import numpy as np
# tuplify
def tup(point):
return (point[0], point[1]);
# returns true if the two boxes overlap
def overlap(source, target):
# unpack points
tl1, br1 = source;
tl2, br2 = target;
# checks
if (tl1[0] >= br2[0] or tl2[0] >= br1[0]):
return False;
if (tl1[1] >= br2[1] or tl2[1] >= br1[1]):
return False;
return True;
# returns all overlapping boxes
def getAllOverlaps(boxes, bounds, index):
overlaps = [];
for a in range(len(boxes)):
if a != index:
if overlap(bounds, boxes[a]):
overlaps.append(a);
return overlaps;
img = cv2.imread("test.png")
orig = np.copy(img);
blue, green, red = cv2.split(img)
def medianCanny(img, thresh1, thresh2):
median = np.median(img)
img = cv2.Canny(img, int(thresh1 * median), int(thresh2 * median))
return img
blue_edges = medianCanny(blue, 0, 1)
green_edges = medianCanny(green, 0, 1)
red_edges = medianCanny(red, 0, 1)
edges = blue_edges | green_edges | red_edges
# I'm using OpenCV 3.4. This returns (contours, hierarchy) in OpenCV 2 and 4
_, contours,hierarchy = cv2.findContours(edges, cv2.RETR_EXTERNAL ,cv2.CHAIN_APPROX_SIMPLE)
# go through the contours and save the box edges
boxes = []; # each element is [[top-left], [bottom-right]];
hierarchy = hierarchy[0]
for component in zip(contours, hierarchy):
currentContour = component[0]
currentHierarchy = component[1]
x,y,w,h = cv2.boundingRect(currentContour)
if currentHierarchy[3] < 0:
cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),1)
boxes.append([[x,y], [x+w, y+h]]);
# filter out excessively large boxes
filtered = [];
max_area = 30000;
for box in boxes:
w = box[1][0] - box[0][0];
h = box[1][1] - box[0][1];
if w*h < max_area:
filtered.append(box);
boxes = filtered;
# go through the boxes and start merging
merge_margin = 20;
# this is gonna take a long time
finished = False;
highlight = [[0,0], [1,1]];
points = [[[0,0]]];
while not finished:
# set end con
finished = True;
# check progress
print("Len Boxes: " + str(len(boxes)));
# draw boxes # comment this section out to run faster
copy = np.copy(orig);
for box in boxes:
cv2.rectangle(copy, tup(box[0]), tup(box[1]), (0,200,0), 1);
cv2.rectangle(copy, tup(highlight[0]), tup(highlight[1]), (0,0,255), 2);
for point in points:
point = point[0];
cv2.circle(copy, tup(point), 4, (255,0,0), -1);
cv2.imshow("Copy", copy);
key = cv2.waitKey(1);
if key == ord('q'):
break;
# loop through boxes
index = 0;
while index < len(boxes):
# grab current box
curr = boxes[index];
# add margin
tl = curr[0][:];
br = curr[1][:];
tl[0] -= merge_margin;
tl[1] -= merge_margin;
br[0] += merge_margin;
br[1] += merge_margin;
# get matching boxes
overlaps = getAllOverlaps(boxes, [tl, br], index);
# check if empty
if len(overlaps) > 0:
# combine boxes
# convert to a contour
con = [];
overlaps.append(index);
for ind in overlaps:
tl, br = boxes[ind];
con.append([tl]);
con.append([br]);
con = np.array(con);
# get bounding rect
x,y,w,h = cv2.boundingRect(con);
# stop growing
w -= 1;
h -= 1;
merged = [[x,y], [x+w, y+h]];
# highlights
highlight = merged[:];
points = con;
# remove boxes from list
overlaps.sort(reverse = True);
for ind in overlaps:
del boxes[ind];
boxes.append(merged);
# set flag
finished = False;
break;
# increment
index += 1;
cv2.destroyAllWindows();
# show final
copy = np.copy(orig);
for box in boxes:
cv2.rectangle(copy, tup(box[0]), tup(box[1]), (0,200,0), 1);
cv2.imshow("Final", copy);
cv2.waitKey(0);
Edit: The inefficiency of this bothered me a bit. The order that the boxes gets merged in doesn't really make sense. You can see that there are a lot of steps where little boxes are merging into a big box, rather than a big box eating everything inside of itself and growing. Turns out this was a really easy code fix. Since the new merged boxes are appended to the end of the boxes list, we can just index in reverse to make it so that we go from big to small.
I changed the merge_margin to 15 since I think that's closer to the target solution in the question.
import cv2
import numpy as np
# tuplify
def tup(point):
return (point[0], point[1]);
# returns true if the two boxes overlap
def overlap(source, target):
# unpack points
tl1, br1 = source;
tl2, br2 = target;
# checks
if (tl1[0] >= br2[0] or tl2[0] >= br1[0]):
return False;
if (tl1[1] >= br2[1] or tl2[1] >= br1[1]):
return False;
return True;
# returns all overlapping boxes
def getAllOverlaps(boxes, bounds, index):
overlaps = [];
for a in range(len(boxes)):
if a != index:
if overlap(bounds, boxes[a]):
overlaps.append(a);
return overlaps;
img = cv2.imread("test.png")
orig = np.copy(img);
blue, green, red = cv2.split(img)
def medianCanny(img, thresh1, thresh2):
median = np.median(img)
img = cv2.Canny(img, int(thresh1 * median), int(thresh2 * median))
return img
blue_edges = medianCanny(blue, 0, 1)
green_edges = medianCanny(green, 0, 1)
red_edges = medianCanny(red, 0, 1)
edges = blue_edges | green_edges | red_edges
# I'm using OpenCV 3.4. This returns (contours, hierarchy) in OpenCV 2 and 4
_, contours,hierarchy = cv2.findContours(edges, cv2.RETR_EXTERNAL ,cv2.CHAIN_APPROX_SIMPLE)
# go through the contours and save the box edges
boxes = []; # each element is [[top-left], [bottom-right]];
hierarchy = hierarchy[0]
for component in zip(contours, hierarchy):
currentContour = component[0]
currentHierarchy = component[1]
x,y,w,h = cv2.boundingRect(currentContour)
if currentHierarchy[3] < 0:
cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),1)
boxes.append([[x,y], [x+w, y+h]]);
# filter out excessively large boxes
filtered = [];
max_area = 30000;
for box in boxes:
w = box[1][0] - box[0][0];
h = box[1][1] - box[0][1];
if w*h < max_area:
filtered.append(box);
boxes = filtered;
# go through the boxes and start merging
merge_margin = 15;
# this is gonna take a long time
finished = False;
highlight = [[0,0], [1,1]];
points = [[[0,0]]];
while not finished:
# set end con
finished = True;
# check progress
print("Len Boxes: " + str(len(boxes)));
# draw boxes # comment this section out to run faster
copy = np.copy(orig);
for box in boxes:
cv2.rectangle(copy, tup(box[0]), tup(box[1]), (0,200,0), 1);
cv2.rectangle(copy, tup(highlight[0]), tup(highlight[1]), (0,0,255), 2);
for point in points:
point = point[0];
cv2.circle(copy, tup(point), 4, (255,0,0), -1);
cv2.imshow("Copy", copy);
key = cv2.waitKey(1);
if key == ord('q'):
break;
# loop through boxes
index = len(boxes) - 1;
while index >= 0:
# grab current box
curr = boxes[index];
# add margin
tl = curr[0][:];
br = curr[1][:];
tl[0] -= merge_margin;
tl[1] -= merge_margin;
br[0] += merge_margin;
br[1] += merge_margin;
# get matching boxes
overlaps = getAllOverlaps(boxes, [tl, br], index);
# check if empty
if len(overlaps) > 0:
# combine boxes
# convert to a contour
con = [];
overlaps.append(index);
for ind in overlaps:
tl, br = boxes[ind];
con.append([tl]);
con.append([br]);
con = np.array(con);
# get bounding rect
x,y,w,h = cv2.boundingRect(con);
# stop growing
w -= 1;
h -= 1;
merged = [[x,y], [x+w, y+h]];
# highlights
highlight = merged[:];
points = con;
# remove boxes from list
overlaps.sort(reverse = True);
for ind in overlaps:
del boxes[ind];
boxes.append(merged);
# set flag
finished = False;
break;
# increment
index -= 1;
cv2.destroyAllWindows();
# show final
copy = np.copy(orig);
for box in boxes:
cv2.rectangle(copy, tup(box[0]), tup(box[1]), (0,200,0), 1);
cv2.imshow("Final", copy);
cv2.waitKey(0);

Related

How to extract coordinates of each element (text, images, tables) from the PDF using Python?

I have a PDF that contains Tables, text, and images. I want to extract the coordinates of each element(text block, image, table). My goal is to extract elements from the whole PDF document. Right now I'm trying using Tesseract to get a text. And OpenCV to get images, but it is not working well. Is this possible?
Example of object detection:
Image for an example of document:
import cv2
import numpy as np
# tuplify
def tup(point):
return (point[0], point[1]);
# returns true if the two boxes overlap
def overlap(source, target):
# unpack points
tl1, br1 = source;
tl2, br2 = target;
# checks
if (tl1[0] >= br2[0] or tl2[0] >= br1[0]):
return False;
if (tl1[1] >= br2[1] or tl2[1] >= br1[1]):
return False;
return True;
# returns all overlapping boxes
def getAllOverlaps(boxes, bounds, index):
overlaps = [];
for a in range(len(boxes)):
if a != index:
if overlap(bounds, boxes[a]):
overlaps.append(a);
return overlaps;
img = cv2.imread("test.png")
orig = np.copy(img);
blue, green, red = cv2.split(img)
def medianCanny(img, thresh1, thresh2):
median = np.median(img)
img = cv2.Canny(img, int(thresh1 * median), int(thresh2 * median))
return img
blue_edges = medianCanny(blue, 0, 1)
green_edges = medianCanny(green, 0, 1)
red_edges = medianCanny(red, 0, 1)
edges = blue_edges | green_edges | red_edges
# I'm using OpenCV 3.4. This returns (contours, hierarchy) in OpenCV 2 and 4
_, contours,hierarchy = cv2.findContours(edges, cv2.RETR_EXTERNAL ,cv2.CHAIN_APPROX_SIMPLE)
# go through the contours and save the box edges
boxes = []; # each element is [[top-left], [bottom-right]];
hierarchy = hierarchy[0]
for component in zip(contours, hierarchy):
currentContour = component[0]
currentHierarchy = component[1]
x,y,w,h = cv2.boundingRect(currentContour)
if currentHierarchy[3] < 0:
cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),1)
boxes.append([[x,y], [x+w, y+h]]);
# filter out excessively large boxes
filtered = [];
max_area = 30000;
for box in boxes:
w = box[1][0] - box[0][0];
h = box[1][1] - box[0][1];
if w*h < max_area:
filtered.append(box);
boxes = filtered;
# go through the boxes and start merging
merge_margin = 15;
# this is gonna take a long time
finished = False;
highlight = [[0,0], [1,1]];
points = [[[0,0]]];
while not finished:
# set end con
finished = True;
# check progress
print("Len Boxes: " + str(len(boxes)));
# draw boxes # comment this section out to run faster
copy = np.copy(orig);
for box in boxes:
cv2.rectangle(copy, tup(box[0]), tup(box[1]), (0,200,0), 1);
cv2.rectangle(copy, tup(highlight[0]), tup(highlight[1]), (0,0,255), 2);
for point in points:
point = point[0];
cv2.circle(copy, tup(point), 4, (255,0,0), -1);
cv2.imshow("Copy", copy);
key = cv2.waitKey(1);
if key == ord('q'):
break;
# loop through boxes
index = len(boxes) - 1;
while index >= 0:
# grab current box
curr = boxes[index];
# add margin
tl = curr[0][:];
br = curr[1][:];
tl[0] -= merge_margin;
tl[1] -= merge_margin;
br[0] += merge_margin;
br[1] += merge_margin;
# get matching boxes
overlaps = getAllOverlaps(boxes, [tl, br], index);
# check if empty
if len(overlaps) > 0:
# combine boxes
# convert to a contour
con = [];
overlaps.append(index);
for ind in overlaps:
tl, br = boxes[ind];
con.append([tl]);
con.append([br]);
con = np.array(con);
# get bounding rect
x,y,w,h = cv2.boundingRect(con);
# stop growing
w -= 1;
h -= 1;
merged = [[x,y], [x+w, y+h]];
# highlights
highlight = merged[:];
points = con;
# remove boxes from list
overlaps.sort(reverse = True);
for ind in overlaps:
del boxes[ind];
boxes.append(merged);
# set flag
finished = False;
break;
# increment
index -= 1;
cv2.destroyAllWindows();
# show final
copy = np.copy(orig);
for box in boxes:
cv2.rectangle(copy, tup(box[0]), tup(box[1]), (0,200,0), 1);
cv2.imshow("Final", copy);
cv2.waitKey(0);

Is there a way I can count bounces from a ball in OpenCV?

I made a ball tracking program using this guide: https://www.pyimagesearch.com/2015/09/14/ball-tracking-with-opencv/
I wanted to ask if there is a way I can tell how many bounces a ball makes in a certain time. Or even any method I can use to count the bounces of the ball on the ground, because I intend to use the program to track someone doing basketball dribbling training. Thank you in advance :)
I want to make something similar to this: https://youtu.be/OMXYvkryF1I at 2:26
Here is my code if it helps:
# import the necessary packages
from collections import deque
#list like data structure will keep prev positions of ball
#can make a trail of the ball from it
import numpy as np
import argparse
import imutils
#this is that guys list of Opencv stuff he uses - got resizing and all - can use pip to get it
#$ pip install --upgrade imutils
import cv2
import time
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-v", "--video",
help="C:/Object_detection/models-master/research/object_detection/test_images/multi_angle.mp4")
#can put path to video here. That is if it is there
#if not there the program will just use the webcam
ap.add_argument("-b", "--buffer", type=int, default=64,
help="max buffer size")
# this tells max size of deque which is the list with points
args = vars(ap.parse_args())
##Put lower & upper boundaries of colour
#colourLow = (0, 135, 30)
#colourHigh = (19, 255, 255)
#Put lower & upper boundaries of colour
colourLow = (0, 135, 30)
colourHigh = (19, 255, 255)
pts = deque(maxlen=args["buffer"]) #initialises our deque points
# if a video path was not supplied, grab the reference
# to the webcam
# item that tells if we using a video or webcam
if not args.get("video", False):
cap = cv2.VideoCapture(0) #imutils.Video stream item works good with webcam
# otherwise, grab a reference to the video file
else:
cap = cv2.VideoCapture(args["video"]) #this is if the video is supplied
#Loop for video frame capturing
while True:
#calls the read method in our capture module
ret, frame = cap.read()
#if we were running a video from external source and no other frame was taken again for processing
#it means we reached end of video so we break out of loop
if frame is None:
break
frame = imutils.resize(frame, width=800) #smaller frames means faster processing so we resize
blurred = cv2.GaussianBlur(frame, (11, 11), 0) #blur reduces picture noise to allow us to see stuff more clearly
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) # converting frame to HSV
# we now masking to get the desired colour only
# we do erosion, dilation and removal of blobs
mask = cv2.inRange(hsv, colourLow, colourHigh) #locates our object in the frame
mask = cv2.erode(mask, None, iterations=2) #erosion
mask = cv2.dilate(mask, None, iterations=2) #removal of blobs
# Will draw outline of ball and find (x, y) center of ball
cnts = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)[-2] #this makes sure contour will work on all opencv items
center = None #make the coords of the ball 0 at first
if len(cnts) > 0: # only proceed if at least one contour was found
# finds largest contour mask, then uses this to get minimum enclosing circle and center
c = max(cnts, key=cv2.contourArea)
((x, y), radius) = cv2.minEnclosingCircle(c)
M = cv2.moments(c)
center = (int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"])) #this & above line get centre coords
# only proceed if the radius meets a minimum size
if (radius > 30):
# draw the circle and centroid on the frame,
# then update the list of tracked points
cv2.circle(frame, (int(x), int(y)), int(radius),
(0, 255, 255), 2)
cv2.circle(frame, center, 5, (0, 0, 255), -1)
# update list of points
pts.appendleft(center)
# loop over set of points
for i in range(1, len(pts)):
#if we don't have tracked points we should ignore them
if pts[i - 1] is None or pts[i] is None:
continue
ickk = int(np.sqrt(args["buffer"] / float(i + 1)) * 2.5)
def drawline(img,pt1,pt2,color,thickness=ickk,style='dotted',gap=20):
dist =((pt1[0]-pt2[0])**2+(pt1[1]-pt2[1])**2)**.5
pts= []
for i in np.arange(0,dist,gap):
r=i/dist
x=int((pt1[0]*(1-r)+pt2[0]*r)+.5)
y=int((pt1[1]*(1-r)+pt2[1]*r)+.5)
p = (x,y)
pts.append(p)
if style=='dotted':
for p in pts:
cv2.circle(img,p,thickness,color,-1)
else:
s=pts[0]
e=pts[0]
i=0
for p in pts:
s=e
e=p
if i%2==1:
cv2.line(img,s,e,color,thickness)
i+=1
#if we do we will draw point connecting line
#gotta define the thickness first
thickness = int(np.sqrt(args["buffer"] / float(i + 1)) * 2.5)
#cv2.line(frame, pts[i - 1], pts[i], (0, 0, 255), thickness)
drawline(frame,pts[i - 1], pts[i],(0, 0, 255),thickness)
# show the frame to our screen
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
# if the 'q' key is pressed, stop the loop
if key == ord("q"):
break
# cleanup the camera and close any open windows
cap.release()
cv2.destroyAllWindows()
I set up a simulation to show what I was talking about in the comments. Basically, every time the camera takes a picture (whatever fps your camera runs at) you can get the ball's position. Using that position you can estimate velocity (change in position divided by time). If there's a sudden change in the direction of that velocity then you can count that as a bounce.
The vast majority of this code is for setting up the simulation and can be safely ignored for your purposes. Here's the relevant code block
# check if it's time for a snapshot
camera_timer += dt; # time since last snapshot
if camera_timer > (1.0 / camera_fps):
# estimate velocity
est_vel[0] = (ball_pos[0] - prev_pos[0]) / camera_timer;
est_vel[1] = (ball_pos[1] - prev_pos[1]) / camera_timer;
# check if the sign of the velocity has changed
if sign(est_vel[0]) != sign(prev_est_vel[0]) or sign(est_vel[1]) != sign(prev_est_vel[1]):
# check for bounces from large change in velocity
dvx = abs(est_vel[0] - prev_est_vel[0]);
dvy = abs(est_vel[1] - prev_est_vel[1]);
change_vel = math.sqrt(dvx*dvx + dvy*dvy);
if change_vel > bounce_thresh:
bounce_count += 1;
# update previous state trackers
prev_est_vel = est_vel[:];
prev_pos = ball_pos[:];
# reset camera timer
camera_timer = 0;
snap = True;
And here's the entire thing if you want to play with the simulation yourself
import cv2
import numpy as np
import time
import math
# get mouse click
click_pos = None;
click = False;
def mouseClick(event, x, y, flags, param):
# hook to globals
global click_pos;
global click;
# check for left mouseclick
if event == cv2.EVENT_LBUTTONDOWN:
click = True;
click_pos = (x,y);
# return sign of number
def sign(val):
if val > 0:
return 1;
if val < 0:
return -1;
return 0;
# create blank image
res = (600,600,3);
bg = np.zeros(res, np.uint8);
display = np.zeros(res, np.uint8);
# set up click callback
cv2.namedWindow("Display");
cv2.setMouseCallback("Display", mouseClick);
click_force = 1000;
# font stuff
font = cv2.FONT_HERSHEY_SIMPLEX;
fontScale = 1;
fontColor = (255, 100, 0);
thickness = 2;
# make a ball
ball_radius = 20;
ball_pos = [300,300];
ball_vel = [0,0];
# set physics
drag = 0.98;
bounce_mult = 0.95;
grav = -9.8; # acceleration in pixels per second
time_scale = 5.0;
# register click animations
click_anims = [];
anim_dur = 0.25; # seconds
anim_radius = 20; # pixels
# track bounces
prev_pos = ball_pos[:];
est_vel = [0,0];
prev_est_vel = [0,0];
bounce_count = 0;
bounce_thresh = 10; # velocity must have a sudden change greater than this magnitude to count
camera_fps = 24; # we'll only take snapshots at this speed
camera_timer = 0; # time since last snapshot
snap = False;
pic_count = 0;
# loop
done = False;
prev_time = time.time();
while not done:
# refresh display
display = np.copy(bg);
# update timestep
now_time = time.time();
dt = now_time - prev_time;
dt *= time_scale;
prev_time = now_time;
# update physics
# position
ball_pos[0] += ball_vel[0] * dt;
ball_pos[1] += ball_vel[1] * dt;
# velocity
ball_vel[1] -= grav * dt;
drag_mult = (1 - ((1 - drag) * dt));
ball_vel[0] *= drag_mult;
ball_vel[1] *= drag_mult;
# check for mouse click
if click:
# register animation
click = False;
click_anims.append([time.time(), click_pos[:]]);
# get dist
dx = ball_pos[0] - click_pos[0];
dy = ball_pos[1] - click_pos[1];
dist = math.sqrt(dx*dx + dy*dy);
# clamp dist
if dist < 1:
dist = 1;
# get force attenuation
# force = click_force / (dist*dist); # too much
force = click_force / dist;
# get angle and get axial force
angle = math.atan2(dy, dx);
xforce = math.cos(angle) * force;
yforce = math.sin(angle) * force;
# apply force
ball_vel[0] += xforce;
ball_vel[1] += yforce;
# check for bounce
# left
if ball_pos[0] - ball_radius < 0:
ball_pos[0] = 0 + ball_radius;
ball_vel[0] *= -bounce_mult;
# right
if ball_pos[0] + ball_radius > res[0]:
ball_pos[0] = res[0] - ball_radius;
ball_vel[0] *= -bounce_mult;
# up # +y-axis is down in OpenCV
if ball_pos[1] - ball_radius < 0:
ball_pos[1] = 0 + ball_radius;
ball_vel[1] *= -bounce_mult;
# down
if ball_pos[1] + ball_radius > res[1]:
ball_pos[1] = res[1] - ball_radius;
ball_vel[1] *= -bounce_mult;
# check if it's time for a snapshot
camera_timer += dt; # time since last snapshot
if camera_timer > (1.0 / camera_fps):
# estimate velocity
est_vel[0] = (ball_pos[0] - prev_pos[0]) / camera_timer;
est_vel[1] = (ball_pos[1] - prev_pos[1]) / camera_timer;
# check if the sign of the velocity has changed
if sign(est_vel[0]) != sign(prev_est_vel[0]) or sign(est_vel[1]) != sign(prev_est_vel[1]):
# check for bounces from large change in velocity
dvx = abs(est_vel[0] - prev_est_vel[0]);
dvy = abs(est_vel[1] - prev_est_vel[1]);
change_vel = math.sqrt(dvx*dvx + dvy*dvy);
if change_vel > bounce_thresh:
bounce_count += 1;
# update previous state trackers
prev_est_vel = est_vel[:];
prev_pos = ball_pos[:];
# reset camera timer
camera_timer = 0;
snap = True;
# draw bounce text
cv2.putText(display, "Bounces: " + str(bounce_count), (15,40), font,
fontScale, fontColor, thickness, cv2.LINE_AA);
# draw ball
x, y = ball_pos;
cv2.circle(display, (int(x), int(y)), ball_radius, (220,150,0), -1);
# draw click animations
for a in range(len(click_anims)-1, -1, -1):
# get lifetime
life = now_time - click_anims[a][0];
if life > anim_dur:
del click_anims[a];
else:
# draw
mult = life / anim_dur;
radius = int(anim_radius * mult);
if radius > 0:
val = 255 - int(255 * mult);
color = [val, val, val];
cv2.circle(display, click_anims[a][1], radius, color, 2);
# show
cv2.imshow("Display", display);
key = cv2.waitKey(1);
# # if snapshot, save a picture
# if snap:
# snap = False;
# cv2.imwrite("bouncy/" + str(pic_count).zfill(5) + ".png", display);
# pic_count += 1;
# check keypresses
done = key == ord('q');

How To Draw a Triangle-Arrow With The Positions of Detected Objects

I am making a object detection project.
I have my code. And I have written it by following a tutorial. In the tutorial, the guy drew a rectangle in opencv for every single object which is detected.
But I want to change the rectangle to triangle or Arrow.
let me explain with code===>
In my function, I detect objects.
And here I draw rectangle for detected objects==>
cv2.rectangle(img, (x, y), (x+w,y+h), (255, 0 , 255), 2)
But I want to change this rectangle to a triangle.(And I want to set position of triangle to above of object.
Just like in these images:::
This is the object detection with triangle
[![enter image description here][1]][1]
This is the thing that what I want to make instead of rectangle:::
[![enter image description here][2]][2]
How Can I make a triangle/arrow with positions of my detected objects?
All of my code is here==>
from os.path import sep
import cv2 as cv2
import numpy as np
import json
# Camera feed
cap_cam = cv2.VideoCapture(0)
ret, frame_cam = cap_cam.read()
hey = 0
print(cv2. __version__)
whT = 320
confThreshold =0.5
nmsThreshold= 0.2
classesFile = "coco.names"
classNames = []
with open(classesFile, 'rt') as f:
classNames = f.read().rstrip('\n').split('\n')
print(classNames)
## Model Files
modelConfiguration = "custom-yolov4-tiny-detector.cfg"
modelWeights = "custom-yolov4-tiny-detector_last.weights"
net = cv2.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
def findObjects(outputs,img):
global hey
global previousHey
hT, wT, cT = img.shape
bbox = []
classIds = []
confs = []
for output in outputs:
for det in output:
scores = det[5:]
classId = np.argmax(scores)
confidence = scores[classId]
if confidence > confThreshold:
w,h = int(det[2]*wT) , int(det[3]*hT)
x,y = int((det[0]*wT)-w/2) , int((det[1]*hT)-h/2)
bbox.append([x,y,w,h])
classIds.append(classId)
confs.append(float(confidence))
global indicates
indices = cv2.dnn.NMSBoxes(bbox, confs, confThreshold, nmsThreshold)
hey = 0
for i in indices:
i = i[0]
box = bbox[i]
x, y, w, h = box[0], box[1], box[2], box[3]
# print(x,y,w,h)
cv2.rectangle(img, (x, y), (x+w,y+h), (255, 0 , 255), 2)
#cv2.line(img, (350,400), (x, y), (255,0,0), 4)
#cv2.line(img, (400,400), (x + 50 , y), (255,0,0), 4)
#cv.putText(img,f'{classNames[classIds[i]].upper()} {int(confs[i]*100)}%',
#(x, y-10), cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)
print('success')
hey = 1
video_frame_counter = 0
while cap_cam.isOpened():
img = cv2.imread('photos' + sep + 'lutfen.jpg')
#BURADA OK VİDEOSU OYNATILACAK
#if not decetiona diye dene yarın.
blob = cv2.dnn.blobFromImage(img, 1 / 255, (whT, whT), [0, 0, 0], 1, crop=False)
net.setInput(blob)
layersNames = net.getLayerNames()
outputNames = [(layersNames[i[0] - 1]) for i in net.getUnconnectedOutLayers()]
outputs = net.forward(outputNames)
findObjects(outputs,img)
cv2.imshow('Image', img)
# Video feed
if hey == 1:
filename = 'photos' + sep + 'Baslksz-3.mp4'
cap_vid = cv2.VideoCapture(filename)
if hey == 0:
filename = 'photos' + sep + 'vid2.mp4'
cap_vid = cv2.VideoCapture(filename)
print(hey)
ret, frame_vid = cap_vid.read()
#cap_cam.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
#cap_cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)
# Resize the camera frame to the size of the video
height = int(cap_vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
width = int(cap_vid.get(cv2.CAP_PROP_FRAME_WIDTH))
# Capture the next frame from camera
ret, frame_cam = cap_cam.read()
video_frame_counter += 1
if video_frame_counter == cap_vid.get(cv2.CAP_PROP_FRAME_COUNT):
video_frame_counter = 0
cap_vid.set(cv2.CAP_PROP_POS_FRAMES, 0)
frame_cam = cv2.resize(frame_cam, (width, height), interpolation = cv2.INTER_AREA)
#ret = cap_vid.set(cv2.CAP_PROP_POS_MSEC, time_passed)
ret, frame_vid = cap_vid.read()
if not ret:
print('Cannot read from video stream')
break
# Blend the two images and show the result
tr = 0.4 # transparency between 0-1, show camera if 0
frame = ((1-tr) * frame_cam.astype(np.float) + tr * frame_vid.astype(np.float)).astype(np.uint8)
cv2.imshow('Transparent result', frame)
if cv2.waitKey(1) == 27: # ESC is pressed
break
cap_cam.release()
cap_vid.release()
cv2.destroyAllWindows()
The easy way
You can use the cv.arrowedLine() function that will draw something similar to what you want. For example, to draw a red arrow above your rectangle:
center_x = x + w//2
cv2.arrowedLine(img, (center_x, y-50), (center_x, y-5), (0,0,255), 2, 8, 0, 0.5)
which should give a result similar to the image below. Take a look at the OpenCV documentation for the description of the parameters of the function. You can change its size, thickness, color, etc.
Custom arrow shape
If you want more control over the shape of your arrow, you can define a contour (vertex by vertex) and use cv.drawContours() to render it. For example:
# define the arrow shape
shape = np.array([[[0,0],[-25,-25],[-10,-25],[-10,-50],
[10,-50],[10,-25],[25,-25]]])
# move it to the desired position
cx = x + w // 2
cy = y - 5
shape[:,:,0] += cx
shape[:,:,1] += cy
# draw it
cv2.drawContours(img, shape, -1, (0, 255, 0), -1)
This snippet will give you the image below. You can adjust the shape by altering the vertices in the shape array, or look at the documentation to change the way OpenCV draws it.

My python code had a ValueError while using cv2.findContours() in python. -> not enough values to unpack (expected 3, got 2)

when I tried to run it the shell threw an error:
line 48, in nada,contours,nada = cv2.findContours(frame5.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
ValueError: not enough values to unpack (expected 3, got 2)
Please, help me fix it.
Thanks in advance.
# master frame
master = None
while 1:
# grab a frame
(grabbed,frame0) = camera.read()
# end of feed
if not grabbed:
break
# gray frame
frame1 = cv2.cvtColor(frame0,cv2.COLOR_BGR2GRAY)
# blur frame
frame2 = cv2.GaussianBlur(frame1,(15,15),0)
# initialize master
if master is None:
master = frame2
continue
# delta frame
frame3 = cv2.absdiff(master,frame2)
# threshold frame
frame4 = cv2.threshold(frame3,15,255,cv2.THRESH_BINARY)[1]
# dilate the thresholded image to fill in holes
kernel = np.ones((2,2),np.uint8)
frame5 = cv2.erode(frame4,kernel,iterations=4)
frame5 = cv2.dilate(frame5,kernel,iterations=8)
# find contours on thresholded image
nada,contours,nada = cv2.findContours(frame5.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
# make coutour frame
frame6 = frame0.copy()
# target contours
targets = []
# loop over the contours
for c in contours:
# if the contour is too small, ignore it
if cv2.contourArea(c) < 500:
continue
# contour data
M = cv2.moments(c)#;print( M )
cx = int(M['m10']/M['m00'])
cy = int(M['m01']/M['m00'])
x,y,w,h = cv2.boundingRect(c)
rx = x+int(w/2)
ry = y+int(h/2)
ca = cv2.contourArea(c)
# plot contours
cv2.drawContours(frame6,[c],0,(0,0,255),2)
cv2.rectangle(frame6,(x,y),(x+w,y+h),(0,255,0),2)
cv2.circle(frame6,(cx,cy),2,(0,0,255),2)
cv2.circle(frame6,(rx,ry),2,(0,255,0),2)
# save target contours
targets.append((cx,cy,ca))
# make target
mx = 0
my = 0
if targets:
# average centroid adjusted for contour size
#area = 0
#for x,y,a in targets:
# mx += x*a
# my += y*a
# area += a
#mx = int(round(mx/area,0))
#my = int(round(my/area,0))
# centroid of largest contour
area = 0
for x,y,a in targets:
if a > area:
mx = x
my = y
area = a
# plot target
tr = 50
frame7 = frame0.copy()
if targets:
cv2.circle(frame7,(mx,my),tr,(0,0,255,0),2)
cv2.line(frame7,(mx-tr,my),(mx+tr,my),(0,0,255,0),2)
cv2.line(frame7,(mx,my-tr),(mx,my+tr),(0,0,255,0),2)
# update master
master = frame2
# display
cv2.imshow("Frame0: Raw",frame0)
cv2.imshow("Frame1: Gray",frame1)
cv2.imshow("Frame2: Blur",frame2)
cv2.imshow("Frame3: Delta",frame3)
cv2.imshow("Frame4: Threshold",frame4)
cv2.imshow("Frame5: Dialated",frame5)
cv2.imshow("Frame6: Contours",frame6)
cv2.imshow("Frame7: Target",frame7)
# key delay and action
key = cv2.waitKey(1) & 0xFF
if key == ord('q'):
break
elif key != 255:
print('key:',[chr(key)])
# release camera
camera.release()
# close all windows
cv2.destroyAllWindows()
The problem is based on a very simple demand of you.
nada,contours,nada = cv2.findContours(frame5.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
You are expecting 3 variables out of this function but it returns only two. You could read the library specs and see maybe what you did wrong.

Centroid Tracking with by using background subtracting in python

So I have been following this tutorial for centroid tracking
https://www.pyimagesearch.com/2018/07/23/simple-object-tracking-with-opencv/
and have built the centroid tracking class like it is mentions in the tutorial.
Now when I try to use background subtraction for the detection instead of the CNN that he is using, it does not work and gives me this issue from the CentroidTracker.py
for i in range(0, inputCentroids):
TypeError: only integer scalar arrays can be converted to a scalar index
Here is my code that I am using
for i in range(0, num_frames):
rects = []
#Get the very first image from the video
if (first_iteration == 1):
ret, frame = cap.read()
frame = cv2.resize(frame, (imageHight,imageWidth))
first_frame = copy.deepcopy(frame)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
height, width = frame.shape[:2]
print("shape:", height,width)
first_iteration = 0
else:
ret, frame = cap.read()
frame = cv2.resize(frame, (imageHight,imageWidth))
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
forgroundMask = backgroundSub.apply(frame)
#Get contor for each person
_, contours, _ = cv2.findContours(forgroundMask.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
contours = filter(lambda cont: cv2.contourArea(cont) > 20, contours)
#Get bbox from the controus
for c in contours:
(x, y, w, h) = cv2.boundingRect(c)
rectangle = [x, y, (x + w), (y + h)]
rects.append(rectangle)
cv2.rectangle(frame, (rectangle[0], rectangle[1]), (rectangle[2], rectangle[3]),
(0, 255, 0), 2)
objects = ct.update(rects)
for (objectID, centroid) in objects.items():
text = "ID:{}".format(objectID)
cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
cv2.circle(frame, (centroid[0], centroid[1]), 4, (0, 255, 0), -1)
'''Display Windows'''
cv2.imshow('FGMask', forgroundMask)
frame1 = frame.copy()
cv2.imshow('MOG', frame1)
cv2.imshow('frame', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
the code is breaking at the
objects = ct.update(rects)
line.
Here is the implementation of the CentroidTracker from the Tutorial:
from scipy.spatial import distance as dist
from collections import OrderedDict
import numpy as np
#Makes a the next unique object ID with
#2 ordered dictionaries
class CentroidTracker():
def __init__(self, maxDisappeared = 50):
self.nextObjectID = 0
self.objects = OrderedDict()
self.disappeared = OrderedDict()
self.maxDisappeared = maxDisappeared
def register(self, centroid):
self.objects[self.nextObjectID] = centroid
self.disappeared[self.nextObjectID] = 0
self.nextObjectID += 1
def deregister(self, objectID):
del self.objects[objectID]
del self.disappeared[objectID]
def update(self, rects):
if len(rects) == 0:
for objectID in self.disappeared.keys():
self.disappeared[objectID] += 1
if self.disappeared[objectID] > self.maxDisappeared:
self.deregister(objectID)
return self.objects
inputCentroids = np.zeros((len(rects), 2), dtype="int")
for (i, (startX, startY, endX, endY)) in enumerate(rects):
cX = int((startX + endX) / 2.0)
cY = int((startY + endY) / 2.0)
inputCentroids[i] = (cX, cY)
if len(self.objects) == 0:
for i in range(0, inputCentroids):
self.register(inputCentroids[i])
else:
objectIDs = list(self.objects.keys())
objectCentroids = list(self.objects.values())
D = dist.cdist(np.array(objectCentroids), inputCentroids)
rows = D.min(axis=1).argsort()
cols = D.argmin(axis=1)[rows]
usedRows = set()
usedCols = set()
for (row, col) in zip(rows, cols):
if row in usedRows or col in usedCols:
continue
objectID = objectIDs[row]
self.objects[objectID] = inputCentroids[col]
self.disappeared[objectID] = 0
usedRows.add(row)
usedCols.add(col)
# compute both the row and column index we have NOT yet
# examined
unusedRows = set(range(0, D.shape[0])).difference(usedRows)
unusedCols = set(range(0, D.shape[1])).difference(usedCols)
if D.shape[0] >= D.shape[1]:
# loop over the unused row indexes
for row in unusedRows:
# grab the object ID for the corresponding row
# index and increment the disappeared counter
objectID = objectIDs[row]
self.disappeared[objectID] += 1
# check to see if the number of consecutive
# frames the object has been marked "disappeared"
# for warrants deregistering the object
if self.disappeared[objectID] > self.maxDisappeared:
self.deregister(objectID)
else:
for col in unusedCols:
self.register(inputCentroids[col])
# return the set of trackable objects
return self.objects
I am kind of lost on what I am doing wrong here. All I should do is pass in a bounding box (x,y,x+w, y+h) into the rects[] list correct and that should give similar results for this, or am I wrong and do not understand how this works? Any help will be appreciated
You have forgotten the len function: for i in range(0, len(inputCentroids)):
By doing what Axel Puig said and then adding this line to the Main mehtod
objects = ct.update(rects)
if objects is not None:
for (objectID, centroid) in objects.items():
text = "ID:{}".format(objectID)
cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
cv2.circle(frame, (centroid[0], centroid[1]), 4, (0, 255, 0), -1)
That fixed the issue. What I think was happening is the first frame didnt initialize the tracker so I needed to make sure it was not None then it worked after that

Categories

Resources