Only detect contours within a square - python

Hello I have a code which draws me the outlines of shapes/objects which have 3 or 4 vertices. So far everything works. But now I want that only shapes/objects are recognized, which are in a square. I wanted to solve it in such a way that I create a square or a picture, in which a square is represented, with cv2.imread read and then with cv2.matchShapes check, how much the recognized contour agrees with the contour of the square. And if this match is good enough, corresponding objects/shapes within this square should be recognized.
Following is the code I have written so far:
import math
import cv2
import numpy as np
from graphics import *
cam = cv2.VideoCapture(0)
cam.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)
rect = cv2.imread("Rectangle.png")
gray = cv2.cvtColor(rect, cv2.COLOR_BGR2GRAY)
rectcontour, _ = cv2.findContours(gray, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
while True:
_ , img =
img = cv2.resize(img, None, fx = 0.5, fy = 0.5, interpolation=cv2.INTER_AREA)
gray1 = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#edges = cv2.Canny(gray1, 250, 300)
_ , binary1 = cv2.threshold(gray1, 130, 255, cv2.THRESH_BINARY)
inverted_binary1 = ~binary1
contours1, _ = cv2.findContours(inverted_binary1, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
a = 0
for a in contours1:
x = cv2.matchShapes(contours1[a], rectcontour[0], cv2.CONTOURS_MATCH_I1, 0.0)
a = a + 1
for contour in contours1:
area = cv2.contourArea(contour)
if area > 100 and area < 20000:
approx = cv2.approxPolyDP(contour, 0.06 * cv2.arcLength(contour, True), True)
if 2 < len(approx) < 5:
moment = cv2.moments(contour)
cx = int(moment['m10'] / moment['m00'])
cy = int(moment['m01'] / moment['m00'])
#print(x, y)
print(cx, cy)
cv2.drawContours(img, [contour], -1, (0, 0, 255), 3)
n = approx.ravel()
i = 0
for j in n:
if (i % 2 == 0):
x = n[i]
y = n[i + 1]
# String containing the co-ordinates.
string = str(x) + " " + str(y)
if (i == 0):
# text on topmost co-ordinate.
cv2.putText(img, string, (x, y),cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 0, 0))
# text on remaining co-ordinates.
cv2.putText(img, string, (x, y),cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0))
i = i + 1
cv2.imshow("schwarz, weiß", inverted_binary1)
cv2.imshow('my webcam', img)
if cv2.waitKey(1) & 0xFF == ord("q"):
At the moment I get following error:
Traceback (most recent call last):
File "C:\Users\Paul\PycharmProjects\Opencv\", line 27, in <module>
x = cv2.matchShapes(contours1[a], rectcontour[0], cv2.CONTOURS_MATCH_I1, 0.0)
TypeError: only integer scalar arrays can be converted to a scalar index
[ WARN:0] global D:\a\opencv-python\opencv-python\opencv\modules\videoio\src\cap_msmf.cpp (438) `anonymous-namespace'::SourceReaderCB::~SourceReaderCB terminating async callback
And I dont exactly know how I can fix that.
The square image I am using:
And another Picture of the shape I actually want to detect:


How do I clock the user action?

I follow a simple head pose estimation tutorial in python, I try to make some modification in the code but I got stuck for days now, I just want to know how long the user been looking to the left or right, if the user is detected to be looking to the left or right for a long time let say 2-3 mins, then the program should give a warning or print a simple message saying how long his/her been looking to the left or right. How do I achieve this? any ideas how to do this?
sorry for my bad english
any help will be appreciated :)
here's my code:
import cv2
import mediapipe as mp
import numpy as np
import time
mp_face_mesh =
face_mesh = mp_face_mesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5)
mp_drawing =
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
cap = cv2.VideoCapture(0)
while cap.isOpened():
success, image =
image = cv2.resize(image, (780, 350))
image = cv2.flip(image, 1)
#start = time.time()
# Flip the image horizontally for a later selfie-view display
# Also convert the color space from BGR to RGB
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
# To improve performance
image.flags.writeable = False
# Get the result
results = face_mesh.process(image)
# To improve performance
image.flags.writeable = True
# Convert the color space from RGB to BGR
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
img_h, img_w, img_c = image.shape
face_3d = []
face_2d = []
if results.multi_face_landmarks:
for face_landmarks in results.multi_face_landmarks:
for idx, lm in enumerate(face_landmarks.landmark):
if idx == 33 or idx == 263 or idx == 1 or idx == 61 or idx == 291 or idx == 199:
if idx == 1:
nose_2d = (lm.x * img_w, lm.y * img_h)
nose_3d = (lm.x * img_w, lm.y * img_h, lm.z * 3000)
x, y = int(lm.x * img_w), int(lm.y * img_h)
# Get the 2D Coordinates
face_2d.append([x, y])
# Get the 3D Coordinates
face_3d.append([x, y, lm.z])
# Convert it to the NumPy array
face_2d = np.array(face_2d, dtype=np.float64)
# Convert it to the NumPy array
face_3d = np.array(face_3d, dtype=np.float64)
# The camera matrix
focal_length = 1 * img_w
cam_matrix = np.array([ [focal_length, 0, img_h / 2],
[0, focal_length, img_w / 2],
[0, 0, 1]])
# The distortion parameters
dist_matrix = np.zeros((4, 1), dtype=np.float64)
# Solve PnP
success, rot_vec, trans_vec = cv2.solvePnP(face_3d, face_2d, cam_matrix, dist_matrix)
# Get rotational matrix
rmat, jac = cv2.Rodrigues(rot_vec)
# Get angles
angles, mtxR, mtxQ, Qx, Qy, Qz = cv2.RQDecomp3x3(rmat)
# Get the y rotation degree
x = angles[0] * 360
y = angles[1] * 360
z = angles[2] * 360
# See where the user's head tilting
if y < -10:
# i want to know how long this guy been looking to his left or right or up or down
text = "Looking Left"
elif y > 10:
text = "Looking Right"
elif x < -10:
text = "Looking Down"
elif x > 10:
text = "Looking Up"
text = "Forward"
# Display the nose direction
nose_3d_projection, jacobian = cv2.projectPoints(nose_3d, rot_vec, trans_vec, cam_matrix, dist_matrix)
p1 = (int(nose_2d[0]), int(nose_2d[1]))
p2 = (int(nose_2d[0] + y * 10) , int(nose_2d[1] - x * 10))
cv2.line(image, p1, p2, (255, 0, 0), 3)
# Add the text on the image
cv2.putText(image, text, (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 2)
cv2.putText(image, "x: " + str(np.round(x,2)), (500, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
cv2.putText(image, "y: " + str(np.round(y,2)), (500, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
cv2.putText(image, "z: " + str(np.round(z,2)), (500, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
# need some fix here
# end = time.time()
# totalTime = end - start
# fps = 1 / totalTime
#print("FPS: ", fps)
#cv2.putText(image, f'FPS: {int(fps)}', (20,450), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)
cv2.imshow('Head Pose Estimation', image)
if cv2.waitKey(5) & 0xFF == 27:

Capture icv2.imshow and save it as an image

I'm trying to save the output imgCanny as an image/screenshot. How do I incorporate this so a screenshot of that window can be captured when a certain key is pressed. I am taking the live feed from the webcam and processing it. I would then like to be able to press a key on the keyboard to capture and save a screenshot of the imgCanny Window.
cap = cv2.VideoCapture(1)
cap.set(3, frameWidth)
cap.set(4, frameHeight)
def empty(a):
cv2.resizeWindow("Parameters", 640, 240)
cv2.createTrackbar("Threshold1", "Parameters", 150, 500, empty)
cv2.createTrackbar("Threshold2", "Parameters", 255, 500, empty)
def getContours(img, imgContour):
contours, hierarchy = cv2.findContours(img, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_NONE) #retreival method. External, only extreme
outer contours
#change APPROX to simple for less points
#remove small bits of noise
for cnt in contours:
area = cv2.contourArea(cnt)
# Used to flatted the array containing
# the co-ordinates of the vertices.
approx = cv2.approxPolyDP(cnt, 0.009 * cv2.arcLength(cnt, True),
n = approx.ravel()
i = 0
for j in n:
if (i % 2 == 0):
x = n[i]
y = n[i + 1]
# String containing the co-ordinates.
string = str(int(x / 3.5)) + " " + str(int(y / 3.5))
if (i == 0):
# text on topmost co-ordinate.
cv2.putText(imgContour, "", (x, y),
font, 0.5, (255, 0, 0))
# text on remaining co-ordinates.
cv2.putText(imgContour, string, (x, y),
font, 0.5, (0, 255, 0))
i = i + 1
if area > 3000:
cv2.drawContours(imgContour, cnt, -1, (255, 0, 255), 1)
peri = cv2.arcLength(cnt, True) #true means contour is closed
while True:
success, img =
imgContour = img.copy()
imgBlur = cv2.GaussianBlur(img, (31, 31), 1)
imgGray = cv2.cvtColor(imgBlur, cv2.COLOR_BGR2GRAY)
threshold1 = cv2.getTrackbarPos("Threshold1", "Parameters")
threshold2 = cv2.getTrackbarPos("Threshold2", "Parameters")
imgCanny = cv2.Canny(imgGray, threshold1, threshold2)
kernel = np.ones((5, 5))
imgDil = cv2.dilate(imgCanny, kernel, iterations=1)
getContours(imgDil, imgContour)
#cv2.imshow("Results", img )
#cv2.imshow("Mask", imgGray)
cv2.imshow("Canny", imgCanny)
cv2.imshow("Dilated", imgContour)
cv2.imshow("Test", imgDil)
key = cv2.waitKey(100)
if key == 27: #kills with Esc

How To Draw a Triangle-Arrow With The Positions of Detected Objects

I am making a object detection project.
I have my code. And I have written it by following a tutorial. In the tutorial, the guy drew a rectangle in opencv for every single object which is detected.
But I want to change the rectangle to triangle or Arrow.
let me explain with code===>
In my function, I detect objects.
And here I draw rectangle for detected objects==>
cv2.rectangle(img, (x, y), (x+w,y+h), (255, 0 , 255), 2)
But I want to change this rectangle to a triangle.(And I want to set position of triangle to above of object.
Just like in these images:::
This is the object detection with triangle
This is the thing that what I want to make instead of rectangle:::
How Can I make a triangle/arrow with positions of my detected objects?
All of my code is here==>
from os.path import sep
import cv2 as cv2
import numpy as np
import json
# Camera feed
cap_cam = cv2.VideoCapture(0)
ret, frame_cam =
hey = 0
print(cv2. __version__)
whT = 320
confThreshold =0.5
nmsThreshold= 0.2
classesFile = "coco.names"
classNames = []
with open(classesFile, 'rt') as f:
classNames ='\n').split('\n')
## Model Files
modelConfiguration = "custom-yolov4-tiny-detector.cfg"
modelWeights = "custom-yolov4-tiny-detector_last.weights"
net = cv2.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
def findObjects(outputs,img):
global hey
global previousHey
hT, wT, cT = img.shape
bbox = []
classIds = []
confs = []
for output in outputs:
for det in output:
scores = det[5:]
classId = np.argmax(scores)
confidence = scores[classId]
if confidence > confThreshold:
w,h = int(det[2]*wT) , int(det[3]*hT)
x,y = int((det[0]*wT)-w/2) , int((det[1]*hT)-h/2)
global indicates
indices = cv2.dnn.NMSBoxes(bbox, confs, confThreshold, nmsThreshold)
hey = 0
for i in indices:
i = i[0]
box = bbox[i]
x, y, w, h = box[0], box[1], box[2], box[3]
# print(x,y,w,h)
cv2.rectangle(img, (x, y), (x+w,y+h), (255, 0 , 255), 2)
#cv2.line(img, (350,400), (x, y), (255,0,0), 4)
#cv2.line(img, (400,400), (x + 50 , y), (255,0,0), 4)
#cv.putText(img,f'{classNames[classIds[i]].upper()} {int(confs[i]*100)}%',
#(x, y-10), cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)
hey = 1
video_frame_counter = 0
while cap_cam.isOpened():
img = cv2.imread('photos' + sep + 'lutfen.jpg')
#if not decetiona diye dene yarın.
blob = cv2.dnn.blobFromImage(img, 1 / 255, (whT, whT), [0, 0, 0], 1, crop=False)
layersNames = net.getLayerNames()
outputNames = [(layersNames[i[0] - 1]) for i in net.getUnconnectedOutLayers()]
outputs = net.forward(outputNames)
cv2.imshow('Image', img)
# Video feed
if hey == 1:
filename = 'photos' + sep + 'Baslksz-3.mp4'
cap_vid = cv2.VideoCapture(filename)
if hey == 0:
filename = 'photos' + sep + 'vid2.mp4'
cap_vid = cv2.VideoCapture(filename)
ret, frame_vid =
#cap_cam.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
#cap_cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)
# Resize the camera frame to the size of the video
height = int(cap_vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
width = int(cap_vid.get(cv2.CAP_PROP_FRAME_WIDTH))
# Capture the next frame from camera
ret, frame_cam =
video_frame_counter += 1
if video_frame_counter == cap_vid.get(cv2.CAP_PROP_FRAME_COUNT):
video_frame_counter = 0
cap_vid.set(cv2.CAP_PROP_POS_FRAMES, 0)
frame_cam = cv2.resize(frame_cam, (width, height), interpolation = cv2.INTER_AREA)
#ret = cap_vid.set(cv2.CAP_PROP_POS_MSEC, time_passed)
ret, frame_vid =
if not ret:
print('Cannot read from video stream')
# Blend the two images and show the result
tr = 0.4 # transparency between 0-1, show camera if 0
frame = ((1-tr) * frame_cam.astype(np.float) + tr * frame_vid.astype(np.float)).astype(np.uint8)
cv2.imshow('Transparent result', frame)
if cv2.waitKey(1) == 27: # ESC is pressed
The easy way
You can use the cv.arrowedLine() function that will draw something similar to what you want. For example, to draw a red arrow above your rectangle:
center_x = x + w//2
cv2.arrowedLine(img, (center_x, y-50), (center_x, y-5), (0,0,255), 2, 8, 0, 0.5)
which should give a result similar to the image below. Take a look at the OpenCV documentation for the description of the parameters of the function. You can change its size, thickness, color, etc.
Custom arrow shape
If you want more control over the shape of your arrow, you can define a contour (vertex by vertex) and use cv.drawContours() to render it. For example:
# define the arrow shape
shape = np.array([[[0,0],[-25,-25],[-10,-25],[-10,-50],
# move it to the desired position
cx = x + w // 2
cy = y - 5
shape[:,:,0] += cx
shape[:,:,1] += cy
# draw it
cv2.drawContours(img, shape, -1, (0, 255, 0), -1)
This snippet will give you the image below. You can adjust the shape by altering the vertices in the shape array, or look at the documentation to change the way OpenCV draws it.

Opencv python distance calculation strange square detection

Heii Im unisng raspberry pi and pi camera to read the distance between pi cand object (square box). im using perimeter of the contour and divide by 4 and getting one side length , then by Distance = (length x known length) / calculated one side length.
I'm always having a square which is the whole frame of the camera window and getting a distance. How to avoid the whole frame window square.
any idea why it comes and how to eliminate it ?
import cv2
import numpy as np
import time
def nothing(x):
# any operation
cap = cv2.VideoCapture(0)
while True:
_, frame =
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
l_h = 0
l_s = 0
l_v = 42
u_h = 180
u_s = 255
u_v = 255
lower_red = np.array([l_h, l_s, l_v])
upper_red = np.array([u_h, u_s, u_v])
mask = cv2.inRange(hsv, lower_red, upper_red)
kernel = np.ones((5, 5), np.uint8)
mask = cv2.erode(mask, kernel)
# Contours detection
contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours:
area = cv2.contourArea(cnt)
approx = cv2.approxPolyDP(cnt, 0.02*cv2.arcLength(cnt, True), True)
x = approx.ravel()[0]
y = approx.ravel()[1]
perimeter = cv2.arcLength(cnt,True)
if area > 200:
cv2.drawContours(frame, [approx], 0, (0, 0, 0), 5)
if len(approx) == 3:
cv2.putText(frame, "triangle " , (x, y), font, 1, (0, 0, 0))
print("Traingle is detected and distance is = ",((154*58)/(perimeter/3))*0.2645 , "mm")
elif len(approx) == 4:
cv2.putText(frame, "square", (x, y), font, 1, (0, 0, 0))
print("Square is detected and distance is = ",((154*58)/(perimeter/4))*0.2645 , "mm")
#print("square distance is = ",((149*58)/(perimeter/4))*0.2645 , "mm")
elif len(approx) == 5:
cv2.putText(frame, "Pentogon", (x, y), font, 1, (0, 0, 0))
print("Pentogon is detected and distance is = ",((151*58)/(perimeter/5))*0.2645 , "mm")
elif len(approx) == 6:
cv2.putText(frame, "6angle", (x, y), font, 1, (0, 0, 0))
cv2.imshow("Frame", frame)
cv2.imshow("Mask", mask)
key = cv2.waitKey(1)
if key == 0:

Can't open VideoCapture in python

I have this code that I downloaded from GitHub to use in an OpenCV project. Everything worked fine the first time, but after that it won't open, and it keeps showing me the following error at line 8:
Traceback (most recent call last):
File "", line 8, in <module>
crop_img = img[100:300,100:300]
TypeError: 'NoneType' object is not subscriptable
Here is the code:
import cv2
import numpy as np
import math
cap = cv2.VideoCapture(0)
ret, img =
crop_img = img[100:300,100:300]
grey = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY)
value = (35, 35)
blurred = cv2.GaussianBlur(grey, value, 0)
_, thresh1 = cv2.threshold(blurred, 127, 255,
cv2.imshow('Thresholded', thresh1)
contours, hierarchy = cv2.findContours(thresh1.copy(),cv2.RETR_TREE, \
max_area = -1
for i in range(len(contours)):
area = cv2.contourArea(cnt)
x,y,w,h = cv2.boundingRect(cnt)
hull = cv2.convexHull(cnt)
drawing = np.zeros(crop_img.shape,np.uint8)
hull = cv2.convexHull(cnt,returnPoints = False)
defects = cv2.convexityDefects(cnt,hull)
count_defects = 0
cv2.drawContours(thresh1, contours, -1, (0,255,0), 3)
for i in range(defects.shape[0]):
s,e,f,d = defects[i,0]
start = tuple(cnt[s][0])
end = tuple(cnt[e][0])
far = tuple(cnt[f][0])
a = math.sqrt((end[0] - start[0])**2 + (end[1] - start[1])**2)
b = math.sqrt((far[0] - start[0])**2 + (far[1] - start[1])**2)
c = math.sqrt((end[0] - far[0])**2 + (end[1] - far[1])**2)
angle = math.acos((b**2 + c**2 - a**2)/(2*b*c)) * 57
if angle <= 90:
count_defects += 1,far,1,[0,0,255],-1)
#dist = cv2.pointPolygonTest(cnt,far,True)
if count_defects == 1:
cv2.putText(img,"this is 2", (50,50), cv2.FONT_HERSHEY_SIMPLEX, 2, 2)
elif count_defects == 2:
str = "this is 3 !!!"
cv2.putText(img, str, (5,50), cv2.FONT_HERSHEY_SIMPLEX, 1, 2)
elif count_defects == 3:
cv2.putText(img,"This is 4 :P", (50,50), cv2.FONT_HERSHEY_SIMPLEX, 2, 2)
elif count_defects == 4:
cv2.putText(img,"this is 5 !!!", (50,50), cv2.FONT_HERSHEY_SIMPLEX, 2, 2)
cv2.putText(img,"this is 0 !!!", (50,50),\
#cv2.imshow('drawing', drawing)
#cv2.imshow('end', crop_img)
cv2.imshow('Gesture', img)
all_img = np.hstack((drawing, crop_img))
cv2.imshow('Contours', all_img)
k = cv2.waitKey(10)
if k == 27:
I found the solution , it appears that some crappy video drivers return an invalid 1st frame. all i did is check ret and continue if it's false , it's working fine .
The exception indicates that returned None to img. You should look out for that:
cap = cv2.VideoCapture(0)
ret, img =
if not ret: break
The documentations say:
If no frames has been grabbed (camera has been disconnected, or there are no more frames in video file), the methods return false and the functions return NULL pointer.
So if you are using a USB webcam, make sure that your USB connection is stable. In particular, if you are using any USB hubs/extensions or your port is in a bad shape.

