not able to scale the roi in opencv python - python

import cv2
import mediapipe as mp
import time
cap = cv2.VideoCapture(0)
while True:
_, im0 = cap.read()
showCrosshair = False
fromCenter = False
r = cv2.selectROI("Image", im0, fromCenter, showCrosshair)
break
mpHands = mp.solutions.hands
hands = mpHands.Hands(static_image_mode=False,
max_num_hands=2,
min_detection_confidence=0.5,
min_tracking_confidence=0.5)
mpDraw = mp.solutions.drawing_utils
pTime = 0
cTime = 0
while True:
_, img = cap.read()
img = cv2.rectangle(img,(r[0],r[1]),(r[2],r[3]),(0,255,0),5)
#imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = hands.process(img)
print(results.multi_hand_landmarks)
if results.multi_hand_landmarks:
for handLms in results.multi_hand_landmarks:
for id, lm in enumerate(handLms.landmark):
print(id,lm)
h, w, c = img.shape
cx, cy = int(lm.x *w), int(lm.y*h)
cv2.circle(img, (cx,cy), 3, (255,0,255), cv2.FILLED)
mpDraw.draw_landmarks(img, handLms, mpHands.HAND_CONNECTIONS)
cTime = time.time()
fps = 1/(cTime-pTime)
pTime = cTime
if cv2.waitKey(1) & 0xFF == 27:
break
cv2.putText(img,str(int(fps)), (10,70), cv2.FONT_HERSHEY_PLAIN, 3, (255,0,255), 3)
cv2.imshow("ThumbsDown", img)
cv2.waitKey(1)
I am trying to build a program that detects hand movements in a selected region of interest, but the rectangular selection I perform does works, or it gets unscaled.
The hand detection also starts working randomly at a few points.
Any help would be appreciated.

the question was solved
this is the code :
import cv2
import mediapipe as mp
import time
from shapely.geometry import Point
from shapely.geometry import polygon
from shapely.geometry.polygon import Polygon
cap = cv2.VideoCapture(0)
while True:
_, im0 = cap.read()
showCrosshair = False
fromCenter = False
r = cv2.selectROI("ThumbsDown", im0, fromCenter, showCrosshair)
break
mpHands = mp.solutions.hands
hands = mpHands.Hands(static_image_mode=False,
max_num_hands=2,
min_detection_confidence=0.5,
min_tracking_confidence=0.5)
mpDraw = mp.solutions.drawing_utils
x=int(r[0])
y=int(r[1])
w=int(r[2])
h=int(r[3])
a= (x,y)
b= (x,y+h)
c= (x+w,y+h)
d= (x+w,y)
points_cord=(a,b,c,d)
points=Polygon(points_cord)
pTime = 0
cTime = 0
while True:
_, img = cap.read()
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
rect_img = imgRGB[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])]
results = hands.process(rect_img)
print(results.multi_hand_landmarks)
if results.multi_hand_landmarks:
for handLms in results.multi_hand_landmarks:
for id, lm in enumerate(handLms.landmark):
print(id,lm)
h, w, c = rect_img.shape
cx, cy = int(lm.x *w), int(lm.y*h)
cv2.circle(rect_img, (cx,cy), 3, (255,0,255), cv2.FILLED)
cv2.putText(img,str("Hands-Detected"), (120,70), cv2.FONT_HERSHEY_PLAIN, 3, (252,0,0), 3)
cv2.rectangle(img,(int(r[0]),int(r[1]+r[3])),(int(r[0]+r[2]),int(r[1])),255,3)
cv2.rectangle(img,b,d,(25,255,231),3)
if((cx or cy)!=0):
cp=Point(cx,cy)
if(points.contains(cp)):
cv2.putText(img,str("TEST"), (300,200), cv2.FONT_HERSHEY_PLAIN, 3, (25,255,231), 3)
mpDraw.draw_landmarks(rect_img, handLms, mpHands.HAND_CONNECTIONS)
img[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])] =rect_img
cv2.rectangle(img,b,d,(25,255,231),3)
cTime = time.time()
fps = 1/(cTime-pTime)
pTime = cTime
if cv2.waitKey(1) & 0xFF == 27:
break
cv2.putText(img,str(int(fps)), (10,70), cv2.FONT_HERSHEY_PLAIN, 3, (25,255,231), 3)
cv2.namedWindow("ThumbsDown", cv2.WINDOW_NORMAL)
cv2.imshow("ThumbsDown", img)
cv2.waitKey(1)

firstly, I was not sending the correct inputs in the previous code to the inbuilt cv2.rectangle function.
x=int(r[0])
y=int(r[1])
w=int(r[2])
h=int(r[3])
this is the part where I rearranged the coordinates according to the cv2.rectangle function,and its data members. rect_img = imgRGB[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])]
in this line, we not only need x,y, but width and height.
Secondly, I was not calling the correct frame to construct the rectangle on, rect_img = imgRGB[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])]
this is the area selected(ROI),
img[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])] =rect_img
then we merge the selected frame, to the original output frame.

Related

I was creating a virtual mouse and I keep getting this error. I have tried with autopy and with pyautogui but in both cases it gives me same error

This is the error I'm getting right now and I just can't figure out what the problem is.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
Traceback (most recent call last):
File "C:\AIproject\virMouse.py", line 18, in <module>
lmList, bbox = detector.findPosition(img)
ValueError: not enough values to unpack (expected 2, got 0)
Process finished with exit code 1
Here is my code for the virtual mouse program that I have written so far.
import cv2
import numpy as np
import time
import HTmodule as htm
import autopy
wCam, hCam = 640, 480
cap = cv2.VideoCapture(0)
cap.set(3, wCam)
cap.set(4, hCam)
pTime = 0
detector = htm.handDetector(maxHands=1)
while True:
success, img = cap.read()
img = detector.findHands(img)
lmList, bbox = detector.findPosition(img)
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(img, f'FPS:{(int(fps))}', (5, 30), cv2.FONT_HERSHEY_DUPLEX, 1, (204, 0, 0), 2)
cv2.imshow("Image", img)
cv2.waitKey(1)
And here is the code for the hand tracking module that I wrote.
import cv2
import mediapipe as mp
import time
class handDetector():
def __init__(self, mode=False, maxHands=2,modelC=1, detectionCon=0.5, trackCon=0.5):
self.mode = mode
self.maxHands = maxHands
self.modelC = modelC
self.detectionCon = detectionCon
self.trackCon = trackCon
self.mpHands = mp.solutions.hands
self.hands = self.mpHands.Hands(self.mode, self.maxHands, self.modelC, self.detectionCon, self.trackCon)
self.mpDraw = mp.solutions.drawing_utils
def findHands(self, img, draw=True):
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
self.results = self.hands.process(imgRGB)
# print(result.multi_hand_landmarks)
if self.results.multi_hand_landmarks:
for handLms in self.results.multi_hand_landmarks:
if draw:
self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)
return img
def findPosition(self, img, handNo=0, draw=True):
lmList=[]
if self.results.multi_hand_landmarks:
myHand = self.results.multi_hand_landmarks[handNo]
for id, lm in enumerate(myHand.landmark):
# print(id,lm)
h, w, c = img.shape
cx, cy = int(lm.x * w), int(lm.y * h)
# print(id, cx, cy)
lmList.append([id, cx, cy])
# if id == 0:
if draw:
cv2.circle(img, (cx, cy), 3, (229, 25, 66), cv2.FILLED)
return lmList
def main():
pTime = 0
cTime = 0
cap = cv2.VideoCapture(0)
detector = handDetector()
while True:
success, img = cap.read()
img = detector.findHands(img)
lmList = detector.findPosition(img, draw=False)
# print(lmList) # PRINTS THE LIST OF LANDMARKS
if len(lmList) != 0:
print(lmList[4])
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(img, f'FPS:{(int(fps))}', (5, 30), cv2.FONT_HERSHEY_DUPLEX, 1, (9, 9, 255), 1)
cv2.imshow("Image", img)
cv2.waitKey(1)
if __name__ == "__main__":
main()
I don't think it has anything to do with autopy I think it has something to do with findPosition() function but I can't figure out what it is. I just started studying computer vision with python and decided to make this project for my practice but this error I can't seem to debug. Any kind of help would be greatly appreciated. Thankyou very much.

How to get one of the specific values(with the specific index) of this iterable enumerate (results.pose_landmarks.landmark)?

import cv2 #OpenCV is the library that we will be using for image processing
import mediapipe as mp #Mediapipe is the framework that will allow us to get our pose estimation
import time
mpDraw = mp.solutions.drawing_utils
mpPose = mp.solutions.pose
pose = mpPose.Pose()
#pose = mpPose.Pose(static_image_mode = False, upper_body_only = True) #ONLY UPPER_BODY_TRACKING
#cap = cv2.VideoCapture(0)
cap = cv2.VideoCapture('PoseVideos/1_girl_choreography.mp4')
pTime = 0 #previous time
while True:
success, img = cap.read() #that will give it our image and then we can write the cv2.imshow()
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) #convert our image to RGB because Mediapipe use that format
results = pose.process(imgRGB) #we are simply going to send this image to our model
#print(enumerate(results.pose_landmarks.landmark)) #<enumerate object at 0x0000012312DD1A00>
#so then we will check if it is detected or not
if results.pose_landmarks:
mpDraw.draw_landmarks(img, results.pose_landmarks, mpPose.POSE_CONNECTIONS)
for id, lm in enumerate(results.pose_landmarks.landmark):
h, w, c = img.shape #get dimensions(h height, w width) and the c channel of image
print(id)
print(lm)
cx, cy = int(lm.x * w), int(lm.y * h)
cv2.circle(img, (cx, cy), 5, (255, 0, 0), cv2.FILLED)
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(img, str(int(fps)), (70, 50), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 0), 3)
cv2.imshow("Image", img)
cv2.waitKey(1)
I want to do what the for does but instead of doing it for all the elements (id) inside the enumerate (), do it only for the id = 25, because in this case it is the only point that interests me.
What would I need to change in this instruction of the loop that uses as iterable to this enumerate(results.pose_landmarks.landmark):
How would I enter an id[25] and a lm[25] ?
I try with itertools, but ir does work in this case
import itertools
gen = (id, lm for id, lm in enumerate(results.pose_landmarks.landmark))
specific_id, specific_lm = 25,25 #index
print( next(itertools.islice(gen, specific_id, None)) )
You can directly access results.pose_landmarks.landmark, no need to enumerate.
lm = results.pose_landmarks.landmark[25]
results.pose_landmarks.landmark is a list
You could try:
for id, lm in enumerate(results.pose_landmarks.landmark):
if not id == 25:
continue
...
Not elegant, but gets the job done.

I am having a problem regarding the type error in python. I am trying to create a virtual mouse. I have created a module for it

**I am trying to create a virtual mouse. I am using packages like opencv-python, mediapipe, time, numpy, And also my own package. But seems to have some error. Can you help me on it. At first I have created the Module. Then the mouse itself. But I don't understand the problem in it. **
Hand tracking module that I have created
import cv2
import mediapipe as mp
import time
class handDetector():
def __init__(self, mode=False, maxHands=2, detectionCon=0.5, trackCon=0.5):
self.mode = mode
self.maxHands = maxHands
self.detectionCon = detectionCon
self.trackCon = trackCon
self.mpHands = mp.solutions.hands
self.hands = self.mpHands.Hands(self.mode, self.maxHands,
self.detectionCon, self.trackCon)
self.mpDraw = mp.solutions.drawing_utils
def findHands(self, img, draw=True):
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
self.results = self.hands.process(imgRGB)
print(self.results.multi_hand_landmarks)
if self.results.multi_hand_landmarks:
for handLms in self.results.multi_hand_landmarks:
if draw:
self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)
return img
def findPosition(self, img, handNo=0, draw=True):
lmList = []
if self.results.multi_hand_landmarks:
myHand = self.results.multi_hand_landmarks [handNo]
for id, lm in enumerate(myHand.landmark):
print(id, lm)
h, w, c = img.shape
cx, cy = int(lm.x * w), int(lm.y * h)
print(id, cx, cy)
lmList.append( [ id, cx, cy])
if draw:
cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED)
return lmList
def main():
pTime = 0
cTime = 0
cap = cv2.VideoCapture(1)
detector = handDetector()
while True:
success, img = cap.read()
img = detector.findHands(img)
lmList = detector.findPosition(img)
if len(lmList) != 0:
print(lmList[4])
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3,
(255, 0, 255), 3)
cv2.imshow("Image", img)
if cv2.waitKey(1) == ord("q"):
break
if __name__ == "__main__":
main()
Here is the software code
import cv2
import cvzone.HandTrackingModule
import numpy as np
import HandTrackingModule as htm
import time
import autopy
##########################
wCam, hCam = 640, 480
frameR = 100 # Frame Reduction
smoothening = 7
#########################
pTime = 0
plocX, plocY = 0, 0
clocX, clocY = 0, 0
cap = cv2.VideoCapture(1)
cap.set(3, wCam)
cap.set(4, hCam)
detector = htm.handDetector()
wScr, hScr = autopy.screen.size()
# print(wScr, hScr)
while True:
# 1. Find hand Landmarks
success, img = cap.read()
img = detector.findHands(img)
lmList, bbox = detector.findPosition(img)
# 2. Get the tip of the index and middle fingers
if len(lmList) != 0:
x1, y1 = lmList [8][1:]
x2, y2 = lmList [12][1:]
# print(x1, y1, x2, y2)
# 3. Check which fingers are up
fingers = cvzone.HandTrackingModule.HandDetector.fingersUp()
# print(fingers)
cv2.rectangle(img, (frameR, frameR), (wCam - frameR, hCam - frameR),
(255, 0, 255), 2)
# 4. Only Index Finger : Moving Mode
if fingers[1] == 1 and fingers[2] == 0:
# 5. Convert Coordinates
x3 = np.interp(x1, (frameR, wCam - frameR), (0, wScr))
y3 = np.interp(y1, (frameR, hCam - frameR), (0, hScr))
# 6. Smoothen Values
clocX = plocX + (x3 - plocX) / smoothening
clocY = plocY + (y3 - plocY) / smoothening
# 7. Move Mouse
autopy.mouse.move(wScr - clocX, clocY)
cv2.circle(img, (x1, y1), 15, (255, 0, 255), cv2.FILLED)
plocX, plocY = clocX, clocY
# 8. Both Index and middle fingers are up : Clicking Mode
if fingers [1] == 1 and fingers[2] == 1:
# 9. Find distance between fingers
length, img, lineInfo = detector.findDistance(8, 12, img)
print(length)
# 10. Click mouse if distance short
if length [40]:
cv2.circle(img, (lineInfo [4], lineInfo[5]),
15, (0, 255, 0), cv2.FILLED)
autopy.mouse.click()
# 11. Frame Rate
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(img, str(int(fps)), (20, 50), cv2.FONT_HERSHEY_PLAIN, 3,
(255, 0, 0), 3)
# 12. Display
cv2.imshow("Image", img)
cv2.waitKey(1)
Error:
lmList, bbox = detector.findPosition(img)
ValueError: not enough values to unpack (expected 2, got 0)
Pleas help me on this. This is the software code. I am trying to create virtual-mouse using these packages and also I created a module myself. But seems to have some error. I hope you can help. May God bless the helpers
Function detector.findPosition returns a list lmList, but when called in your main script its output gets assigned (and unpacked) to two variables, lmList, bbox. So the function needs to output two values, not just one:
def findPosition(self, img, handNo=0, draw=True):
# Do something to produce lmList and bbox...
return lmList, bbox
If you are only interested in the list output, the you can always do:
lmList, _ = detector.findPosition(img)

Python CV2 video writer doesn't save video

I have a simple python code using OpenCV and Keras that performs some detections on frames (follow-up from my previous question here). But when I want to record and save the frames as a video using video_writer, the generated video is empty.
What is wrong in the video_writer?
#........some code
# start the webcam feed
cap = cv2.VideoCapture(1)
canvasImageOriginal = cv2.imread("fg2.png")
canvasImage = cv2.imread("fg2.png")
canvasHappy = cv2.imread("fg2happy.png")
canvasSad = cv2.imread("fg2sad.png")
x0, x1 = 330, 1290
y0, y1 = 155, 700
#=========
w=960#int(cap.get(cv2.CV_CAP_PROP_FRAME_WIDTH ))
h=540#int(cap.get(cv2.CV_CAP_PROP_FRAME_HEIGHT ))
# video recorder
fourcc = cv2.VideoWriter_fourcc(*'XVID')
video_writer = cv2.VideoWriter('output.avi', fourcc, 25.0, (w, h))
#=========
prediction_history = []
LOOKBACK = 5 # how far you want to look back
counter = 0
while True:
# Find haar cascade to draw bounding box around face
ret, frame = cap.read()
frame=cv2.flip(frame,3)
if not ret:
break
facecasc = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = facecasc.detectMultiScale(gray,scaleFactor=1.3, minNeighbors=5)
for (x, y, w, h) in faces:
cv2.rectangle(frame, (x, y-50), (x+w, y+h+10), (255, 0, 0), 2)
roi_gray = gray[y:y + h, x:x + w]
cropped_img = np.expand_dims(np.expand_dims(cv2.resize(roi_gray, (48, 48)), -1), 0)
prediction = model.predict(cropped_img)
maxindex = int(np.argmax(prediction))
text = emotion_dict[maxindex]
print(prediction[0][3])
prediction_history.append(maxindex)
most_common_index = max(set(prediction_history[-LOOKBACK:][::-1]), key = prediction_history.count)
text = emotion_dict[most_common_index]
#if ("Sad" in text) or ("Angry" in text) or ("Disgusted" in text):
# text = "Sad"
if ("Happy" in text) or ("Sad" in text) :
cv2.putText(frame, text, (x+20, y-60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
if ("Happy" in text):
counter= counter+1
if counter == 10:
#print("Happy!")
canvasImage = canvasHappy
else:
counter = 0
canvasImage = canvasImageOriginal
dim = (800,480)
frame_shrunk = cv2.resize(frame, (x1 - x0, y1 - y0))
canvasImage[y0:y1, x0:x1] = frame_shrunk
#cv2.imshow('Video', cv2.resize(frame,dim,interpolation = cv2.INTER_CUBIC))
cv2.imshow('Demo', canvasImage)
video_writer.write(frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
video_writer.release()
cv2.destroyAllWindows()
As it is mentioned above, please check print(frame.shape).
When I did it, I saw (300,450,3), and I changed the resolution of videowriter as (450,300) and it worked for me. As a result, I can say that frame.shape=(y, x, maybe color) but the resolution of videowriter=(x,y).

I'm following a tutorial on [Python + OpenCV + mediapipe] Hand Tracking and I get the following error:

I get the following error:
cv2.error: OpenCV(4.5.1) C:\Users\appveyor\AppData\Local\Temp\1\pip-req-build-wvn_it83\opencv\modules\imgproc\src\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'
And this is my code
import mediapipe as mp
import time
cap = cv2.VideoCapture(1)
mpHands = mp.solutions.hands
hands = mpHands.Hands()
mpDraw = mp.solutions.drawing_utils
pTime = 0
cTime = 0
while True:
success, img = cap.read()
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = hands.process(imgRGB)
# print(results.multi_hand_landmarks)
if results.multi_hand_landmarks:
for handLms in results.multi_hand_landmarks:
for id, lm in enumerate(handLms.landmark):
# print(id, lm)
h, w, c = img.shape
cx, cy = int(lm.x * w), int(lm.y * h)
print(id, cx, cy)
# if id == 4:
cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED)
mpDraw.draw_landmarks(img, handLms, mpHands.HAND_CONNECTIONS)
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3,
(255, 0, 255), 3)
cv2.imshow("Image", img)
cv2.waitKey(1)
Try to check and add these lines.
...
cap = cv2.VideoCapture(1, cv2.CAP_DSHOW) # Additional argument here
...
while cap.isOpened():
success, img = cap.read()
if not success: # Add break if not successful
break
...
cv2.waitKey(1)
# This will close the capture and cv2 properly
cap.release()
cv2.destroyAllWindows()

Categories

Resources