I am running this code to detect the nose and get its position in real time. I was able to detect the face and draw the points of the landmarks.However I couldn't figure out how I can get the positions of a specific landmark in terms of x and y. Can anyone please help me
here is my code
import cv2 as cv
import mediapipe as mp
import time
cap = cv.VideoCapture(0)
if not cap.isOpened():
print("camera-failed")
exit()
pTime = 0
mpDraw = mp.solutions.drawing_utils
mpFaceMesh = mp.solutions.face_mesh
faceMesh = mpFaceMesh.FaceMesh(max_num_faces=1)
drawSpec = mpDraw.DrawingSpec(thickness=1,circle_radius=1)
while True:
ret, img = cap.read()
imgRGB = cv.cv2.cvtColor(img, cv.COLOR_BGRA2RGB)
results = faceMesh.process(imgRGB)
if results.multi_face_landmarks:
for faceLms in results.multi_face_landmarks:
mpDraw.draw_landmarks(img,faceLms,mpFaceMesh.FACEMESH_CONTOURS,
drawSpec,drawSpec)
for id, lm in enumerate(faceLms.landmark):
print(lm)
ih, iw, ic = img.shape
x,y,z= int(lm.x*iw),int(lm.y*ih),int(lm.z*ic)
print(id,x,y,z)
cTime = time.time()
fps = 1/(cTime-pTime)
pTime = cTime
cv.putText(img, f'FPS: {int(fps)}',(20,70), cv.FONT_HERSHEY_PLAIN,
3,(0,255,0),3)
if not ret:
print("Error exiting...")
break
exit()
cv.imshow( "image",img)
if cv.waitKey(1) == ord('q'):
break
cap.release()
cv.destroyAllWindows()
Related
This is my code, I've looked at some tutorials but can't find what I'm looking for
I want to overlay the Frame.png image on my webcam. I tried to add the image directly but it didn't work either. If possible, Is there a way to add an image, not to overlay but to keep the image at a certain coordinate in the live webcam window
import cv2
import numpy as np
def detect_and_save():
alpha = 0.2
beta = 1-alpha
cap = cv2.VideoCapture(0)
sciframe = cv2.imread('Frame.png')
classifier = cv2.CascadeClassifier("haarcascade_frontalface_default.xml")
while True:
ret ,frame = cap.read()
overlay = frame.copy()
output = frame.copy()
gray = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
faces = classifier.detectMultiScale(gray,1.5,5)
cv2.putText(output, "HUD Test",(175, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 50, 50), 3)
cv2
for face in faces:
x,y,w,h = face
cv2.rectangle(overlay,(x,y),(x+w,y+h),(255,200,0),-1)
cv2.rectangle(overlay,(x,y),(x+w,y+h),(255,0,0),1)
cv2.rectangle(overlay,(x,y-20),(x+w,y),(25,20,0),-1)
cv2.addWeighted(overlay,alpha,output,beta,0,output)
cv2.putText(output,"Human",(x+10,y-10),cv2.FONT_HERSHEY_SIMPLEX,
0.35, (0, 0, 255), 1)
if not ret:
continue
cv2.imshow("HUD",output)
key = cv2.waitKey(1)
if key == ord('q'):
break
elif key == ord('s'):
cv2.imwrite('./images/CID_{}.png'.format(time.strftime('%d%m%y_%H_%M_%S')),output)
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
import time
detect_and_save()
You can directly add one image on top of another one at any coordinate easily in opencv.
cap = cv2.VideoCapture(0)
im_height = 50 #define your top image size here
im_width = 50
im = cv2.resize(cv2.imread("Frame.png"), (im_width, im_height))
while (True):
ret, frame = cap.read()
frame[0:im_width, 0:im_height] = im #for top-left corner, 0:50 and 0:50 for my image; select your region here like 200:250
cv2.imshow("live camera", frame)
if cv2.waitKey(1) == ord("q"):
break
cap.release()
cv2.destroyAllWindows()
cv2.imwrite() is not working. I am trying for taking 100 photos when face will be detected.
Here is the code given:
import cv2
import datetime
cap = cv2.VideoCapture(0)
face_cascade = cv2.CascadeClassifier("haarcascade_frontalface_default.xml")
while True:
_,frame = cap.read()
greyImg = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
face = face_cascade.detectMultiScale(greyImg,1.3,5)
for x,y,w,h in face:
cv2.rectangle(frame, (x,y), (x+w,y+h), (0,255,0),5)
time_stamp = datetime.datetime.now().strftime("%D-%m-%Y")
file_name = f"{time_stamp}-face.jpg"
for i in range(100):
cv2.imwrite(file_name,greyImg)
cv2.imshow("Face recogniton", cv2.flip(frame,1))
if cv2.waitKey(1) == ord("q"):
break
The main bug in your code that you are looping the 100 times saving the same image.
This code is supposed to solve your issue:
import cv2
from datetime import datetime
import numpy as np
cap = cv2.VideoCapture(0)
face_cascade = cv2.CascadeClassifier("haarcascade_frontalface_default.xml")
faces_counter: int = 0
while True:
_,frame = cap.read()
greyImg = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
face = face_cascade.detectMultiScale(greyImg,1.3,5)
if np.any(face):
faces_counter += 1
if faces_counter > 100:
break
for x,y,w,h in face:
cv2.rectangle(frame, (x,y), (x+w,y+h), (0,255,0),5)
now = datetime.now()
current_time = now.strftime("%H_%M_%S")
file_name = f"Img_{current_time}_{faces_counter}-face.png"
cv2.imwrite(f"{str(file_name)}", frame)
cv2.imshow("Face recogniton", cv2.flip(frame,1))
if cv2.waitKey(1) == ord("q"):
break
import cv2
import mediapipe as mp
import time
cap = cv2.VideoCapture(0)
while True:
_, im0 = cap.read()
showCrosshair = False
fromCenter = False
r = cv2.selectROI("Image", im0, fromCenter, showCrosshair)
break
mpHands = mp.solutions.hands
hands = mpHands.Hands(static_image_mode=False,
max_num_hands=2,
min_detection_confidence=0.5,
min_tracking_confidence=0.5)
mpDraw = mp.solutions.drawing_utils
pTime = 0
cTime = 0
while True:
_, img = cap.read()
img = cv2.rectangle(img,(r[0],r[1]),(r[2],r[3]),(0,255,0),5)
#imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = hands.process(img)
print(results.multi_hand_landmarks)
if results.multi_hand_landmarks:
for handLms in results.multi_hand_landmarks:
for id, lm in enumerate(handLms.landmark):
print(id,lm)
h, w, c = img.shape
cx, cy = int(lm.x *w), int(lm.y*h)
cv2.circle(img, (cx,cy), 3, (255,0,255), cv2.FILLED)
mpDraw.draw_landmarks(img, handLms, mpHands.HAND_CONNECTIONS)
cTime = time.time()
fps = 1/(cTime-pTime)
pTime = cTime
if cv2.waitKey(1) & 0xFF == 27:
break
cv2.putText(img,str(int(fps)), (10,70), cv2.FONT_HERSHEY_PLAIN, 3, (255,0,255), 3)
cv2.imshow("ThumbsDown", img)
cv2.waitKey(1)
I am trying to build a program that detects hand movements in a selected region of interest, but the rectangular selection I perform does works, or it gets unscaled.
The hand detection also starts working randomly at a few points.
Any help would be appreciated.
the question was solved
this is the code :
import cv2
import mediapipe as mp
import time
from shapely.geometry import Point
from shapely.geometry import polygon
from shapely.geometry.polygon import Polygon
cap = cv2.VideoCapture(0)
while True:
_, im0 = cap.read()
showCrosshair = False
fromCenter = False
r = cv2.selectROI("ThumbsDown", im0, fromCenter, showCrosshair)
break
mpHands = mp.solutions.hands
hands = mpHands.Hands(static_image_mode=False,
max_num_hands=2,
min_detection_confidence=0.5,
min_tracking_confidence=0.5)
mpDraw = mp.solutions.drawing_utils
x=int(r[0])
y=int(r[1])
w=int(r[2])
h=int(r[3])
a= (x,y)
b= (x,y+h)
c= (x+w,y+h)
d= (x+w,y)
points_cord=(a,b,c,d)
points=Polygon(points_cord)
pTime = 0
cTime = 0
while True:
_, img = cap.read()
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
rect_img = imgRGB[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])]
results = hands.process(rect_img)
print(results.multi_hand_landmarks)
if results.multi_hand_landmarks:
for handLms in results.multi_hand_landmarks:
for id, lm in enumerate(handLms.landmark):
print(id,lm)
h, w, c = rect_img.shape
cx, cy = int(lm.x *w), int(lm.y*h)
cv2.circle(rect_img, (cx,cy), 3, (255,0,255), cv2.FILLED)
cv2.putText(img,str("Hands-Detected"), (120,70), cv2.FONT_HERSHEY_PLAIN, 3, (252,0,0), 3)
cv2.rectangle(img,(int(r[0]),int(r[1]+r[3])),(int(r[0]+r[2]),int(r[1])),255,3)
cv2.rectangle(img,b,d,(25,255,231),3)
if((cx or cy)!=0):
cp=Point(cx,cy)
if(points.contains(cp)):
cv2.putText(img,str("TEST"), (300,200), cv2.FONT_HERSHEY_PLAIN, 3, (25,255,231), 3)
mpDraw.draw_landmarks(rect_img, handLms, mpHands.HAND_CONNECTIONS)
img[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])] =rect_img
cv2.rectangle(img,b,d,(25,255,231),3)
cTime = time.time()
fps = 1/(cTime-pTime)
pTime = cTime
if cv2.waitKey(1) & 0xFF == 27:
break
cv2.putText(img,str(int(fps)), (10,70), cv2.FONT_HERSHEY_PLAIN, 3, (25,255,231), 3)
cv2.namedWindow("ThumbsDown", cv2.WINDOW_NORMAL)
cv2.imshow("ThumbsDown", img)
cv2.waitKey(1)
firstly, I was not sending the correct inputs in the previous code to the inbuilt cv2.rectangle function.
x=int(r[0])
y=int(r[1])
w=int(r[2])
h=int(r[3])
this is the part where I rearranged the coordinates according to the cv2.rectangle function,and its data members. rect_img = imgRGB[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])]
in this line, we not only need x,y, but width and height.
Secondly, I was not calling the correct frame to construct the rectangle on, rect_img = imgRGB[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])]
this is the area selected(ROI),
img[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])] =rect_img
then we merge the selected frame, to the original output frame.
I'm making a pose estimation script for my game. However, it's working at 20-30 fps and not using the whole CPU even if there is no fps limit. It's not using whole GPU too. Can someone help me?
Here is resource usage while playing a dance video: https://imgur.com/a/6yI2TWg
Here is my code:
import cv2
import mediapipe as mp
import time
inFile = '/dev/video0'
capture = cv2.VideoCapture(inFile)
FramesVideo = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) # Number of frames inside video
FrameCount = 0 # Currently playing frame
prevTime = 0
# some objects for mediapipe
mpPose = mp.solutions.pose
mpDraw = mp.solutions.drawing_utils
pose = mpPose.Pose()
while True:
FrameCount += 1
#read image and convert to rgb
success, img = capture.read()
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#process image
results = pose.process(imgRGB)
if results.pose_landmarks:
mpDraw.draw_landmarks(img, results.pose_landmarks, mpPose.POSE_CONNECTIONS)
#get landmark positions
landmarks = []
for id, lm in enumerate(results.pose_landmarks.landmark):
h, w, c = img.shape
cx, cy = int(lm.x * w), int(lm.y * h)
cv2.putText(img, str(id), (cx,cy), cv2.FONT_HERSHEY_PLAIN, 1, (255,0,0), 1)
landmarks.append((cx,cy))
# calculate and print fps
frameTime = time.time()
fps = 1/(frameTime-prevTime)
prevTime = frameTime
cv2.putText(img, str(int(fps)), (30,50), cv2.FONT_HERSHEY_PLAIN, 3, (255,0,0), 3)
#show image
cv2.imshow('Video', img)
cv2.waitKey(1)
if FrameCount == FramesVideo-1:
capture.release()
cv2.destroyAllWindows()
break
Set the model_complexity of mp.Pose to 0.
As the documentation states:
MODEL_COMPLEXITY
Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as inference latency generally go up with the model complexity. Default to 1.
This is the best solution I've found, also use this.
i need to track my cursor to the nose of a human to create a DIY aimbot with pose detection.
(just for fun, not intending to cheat, there would be so many better and easier options than to make my own)
i already have the first part of the code and it shows you your screen and the skeleton, as well as the exact coordinates of the nose with no problem,
but the method that im using to move my cursor over to that point is not working
im using mouse.move and have tried other stuff like pyautogui, tkinter.
it doesn't give me an error but still does not work
import cv2
import mediapipe as mp
import numpy as np
import time
import pyautogui
import mouse
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose
# display screen resolution, get it from your OS settings
SCREEN_SIZEX = (1920)
SCREEN_SIZEY = (1080)
# define the codec
fourcc = cv2.VideoWriter_fourcc(*"XVID")
# create the video write object
out = cv2.VideoWriter("output.avi", fourcc, 20.0, (SCREEN_SIZEX, SCREEN_SIZEY))
with mp_pose.Pose(min_detection_confidence=0.1, min_tracking_confidence=0.9) as pose:
while True:
# make a screenshot
img = pyautogui.screenshot()
# convert these pixels to a proper numpy array to work with OpenCV
frame = np.array(img)
# convert colors from BGR to RGB
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Recolor image to RGB
image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
image.flags.writeable = False
# Make detection
results = pose.process(image)
# Recolor back to BGR
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
try:
landmarks = results.pose_landmarks.landmark
lndmark = landmarks[mp_pose.PoseLandmark.NOSE.value]
x = [landmarks[mp_pose.PoseLandmark.NOSE.value].x]
y = [landmarks[mp_pose.PoseLandmark.NOSE.value].y]
#print(x)
#print(y)
mouse.move(x, y)
except:
pass
# Render detections
mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2),
mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
)
# write the frame
out.write(frame)
pTime = 0
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(image, str(int(fps)), (20, 50), cv2.FONT_HERSHEY_PLAIN, 3,
(255, 0, 0), 3)
cv2.imshow('Mediapipe Feed', image)
if cv2.waitKey(10) & 0xFF == ord('q'):
break
out.release()
cv2.destroyAllWindows()
#for lndmark in mp_pose.PoseLandmark:
#print(lndmark)
this is the part that doesn't work:
try:
landmarks = results.pose_landmarks.landmark
lndmark = landmarks[mp_pose.PoseLandmark.NOSE.value]
x = [landmarks[mp_pose.PoseLandmark.NOSE.value].x]
y = [landmarks[mp_pose.PoseLandmark.NOSE.value].y]
mouse.move(x, y)
except:
pass
i would assume that it is beacuse x and y are supposed to numbers or somehow it can't read or proccess it correctly
but it doesn't give me an error, so im asking it here hoping on of you guys had already figured this one out