Im using this code to detect teh 468 facial landmarks from a face:
import cv2
import mediapipe as mp
import time
cap = cv2.VideoCapture(0)
pTime = 0
mpDraw = mp.solutions.drawing_utils
mpFaceMesh = mp.solutions.face_mesh
faceMesh = mpFaceMesh.FaceMesh(max_num_faces=2)
drawSpec = mpDraw.DrawingSpec(thickness=1, circle_radius=2)
while True:
success, img = cap.read()
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = faceMesh.process(imgRGB)
if results.multi_face_landmarks:
for faceLms in results.multi_face_landmarks:
mpDraw.draw_landmarks(img, faceLms, mpFaceMesh.FACEMESH_CONTOURS,
drawSpec,drawSpec)
for id,lm in enumerate(faceLms.landmark):
#print(lm)
ih, iw, ic = img.shape
x,y = int(lm.x*iw), int(lm.y*ih)
print(id,x,y)
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(img, f'FPS: {int(fps)}', (20, 70), cv2.FONT_HERSHEY_PLAIN,
3, (255, 0, 0), 3)
cv2.imshow("Image", img)
cv2.waitKey(1)
When I run this script, I can see the facial landmarks printed on my face, but what I want to achieve is that I display the facial landmarks on a black background without actually showing a face.
How can I achieve that?
You can create an image with dark (black) pixels (0) of the same dimensions of the frame captured by the camera. And carry out your drawings there
# create the dark image
black = np.zeros(img.shape , np.uint8)
# Replace the `img` with `black` while drawing the landmarks
mpDraw.draw_landmarks(black, faceLms, mpFaceMesh.FACEMESH_CONTOURS, drawSpec,drawSpec)
# Display the result
cv2.imshow("Result", black)
Related
I'm making a Python script to save the landmarks obtained when using mediapipe. I want to save only the landmarks but I don't want to save the rest of the frame content. Is this possible?
My first idea was to create a black image and put on top of it the landmarks that are obtained when processing the frame that has a person, and when I show it with cv2.imshow(img) I get the landmarks but when I want to save the video I only see black frames. Can anyone help me, I leave below the function that I have done.
import cv2
import numpy as np
import mediapipe as mp
from mediapipe.python.solutions.face_mesh_connections import FACEMESH_CONTOURS
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic
def extract_bones(video_path, new_video_path):
# Captura de los videos
#cap = cv2.VideoCapture(video_path)
cap = cv2.VideoCapture(video_path)
# Se toma el ancho y alto del video
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# Se crea el formato del nuevo video.
output = cv2.VideoWriter(new_video_path,cv2.VideoWriter_fourcc('M','J','P','G'), 30, (width,height))
# Initialize holistic model
with mp_holistic.Holistic(min_detection_confidence = 0.5, min_tracking_confidence = 0.5) as holistic:
while cap.isOpened():
# Read frame
ret, frame = cap.read()
if ret == True:
img = np.zeros((frame.shape[0], frame.shape[1], frame.shape[2]))
# Resize frame
#frame = cv2.resize(frame, (WIDTH, HEIGHT), interpolation = cv2.INTER_AREA)
# Change color from BGR to RGB
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame.flags.writeable = False
# Detect landmarks
results = holistic.process(frame)
# Left hand (azul)
mp_drawing.draw_landmarks(
img, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(255, 255, 0), thickness=2, circle_radius=1),
mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=2))
print(img)
# Right hand (verde)
mp_drawing.draw_landmarks(
img, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=1),
mp_drawing.DrawingSpec(color=(57, 143, 0), thickness=2))
print(img)
# Pose
mp_drawing.draw_landmarks(
img, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
mp_drawing.DrawingSpec(color=(128, 0, 255), thickness=2, circle_radius=1),
mp_drawing.DrawingSpec(color=(255, 255, 255), thickness=2))
print(img)
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
cv2.imshow("Frame", frame)
cv2.imshow("Black", img)
output.write(img)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
else:
break
cap.release()
Thank you very much
import cv2
import mediapipe as mp
import time
cap = cv2.VideoCapture(0)
while True:
_, im0 = cap.read()
showCrosshair = False
fromCenter = False
r = cv2.selectROI("Image", im0, fromCenter, showCrosshair)
break
mpHands = mp.solutions.hands
hands = mpHands.Hands(static_image_mode=False,
max_num_hands=2,
min_detection_confidence=0.5,
min_tracking_confidence=0.5)
mpDraw = mp.solutions.drawing_utils
pTime = 0
cTime = 0
while True:
_, img = cap.read()
img = cv2.rectangle(img,(r[0],r[1]),(r[2],r[3]),(0,255,0),5)
#imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = hands.process(img)
print(results.multi_hand_landmarks)
if results.multi_hand_landmarks:
for handLms in results.multi_hand_landmarks:
for id, lm in enumerate(handLms.landmark):
print(id,lm)
h, w, c = img.shape
cx, cy = int(lm.x *w), int(lm.y*h)
cv2.circle(img, (cx,cy), 3, (255,0,255), cv2.FILLED)
mpDraw.draw_landmarks(img, handLms, mpHands.HAND_CONNECTIONS)
cTime = time.time()
fps = 1/(cTime-pTime)
pTime = cTime
if cv2.waitKey(1) & 0xFF == 27:
break
cv2.putText(img,str(int(fps)), (10,70), cv2.FONT_HERSHEY_PLAIN, 3, (255,0,255), 3)
cv2.imshow("ThumbsDown", img)
cv2.waitKey(1)
I am trying to build a program that detects hand movements in a selected region of interest, but the rectangular selection I perform does works, or it gets unscaled.
The hand detection also starts working randomly at a few points.
Any help would be appreciated.
the question was solved
this is the code :
import cv2
import mediapipe as mp
import time
from shapely.geometry import Point
from shapely.geometry import polygon
from shapely.geometry.polygon import Polygon
cap = cv2.VideoCapture(0)
while True:
_, im0 = cap.read()
showCrosshair = False
fromCenter = False
r = cv2.selectROI("ThumbsDown", im0, fromCenter, showCrosshair)
break
mpHands = mp.solutions.hands
hands = mpHands.Hands(static_image_mode=False,
max_num_hands=2,
min_detection_confidence=0.5,
min_tracking_confidence=0.5)
mpDraw = mp.solutions.drawing_utils
x=int(r[0])
y=int(r[1])
w=int(r[2])
h=int(r[3])
a= (x,y)
b= (x,y+h)
c= (x+w,y+h)
d= (x+w,y)
points_cord=(a,b,c,d)
points=Polygon(points_cord)
pTime = 0
cTime = 0
while True:
_, img = cap.read()
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
rect_img = imgRGB[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])]
results = hands.process(rect_img)
print(results.multi_hand_landmarks)
if results.multi_hand_landmarks:
for handLms in results.multi_hand_landmarks:
for id, lm in enumerate(handLms.landmark):
print(id,lm)
h, w, c = rect_img.shape
cx, cy = int(lm.x *w), int(lm.y*h)
cv2.circle(rect_img, (cx,cy), 3, (255,0,255), cv2.FILLED)
cv2.putText(img,str("Hands-Detected"), (120,70), cv2.FONT_HERSHEY_PLAIN, 3, (252,0,0), 3)
cv2.rectangle(img,(int(r[0]),int(r[1]+r[3])),(int(r[0]+r[2]),int(r[1])),255,3)
cv2.rectangle(img,b,d,(25,255,231),3)
if((cx or cy)!=0):
cp=Point(cx,cy)
if(points.contains(cp)):
cv2.putText(img,str("TEST"), (300,200), cv2.FONT_HERSHEY_PLAIN, 3, (25,255,231), 3)
mpDraw.draw_landmarks(rect_img, handLms, mpHands.HAND_CONNECTIONS)
img[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])] =rect_img
cv2.rectangle(img,b,d,(25,255,231),3)
cTime = time.time()
fps = 1/(cTime-pTime)
pTime = cTime
if cv2.waitKey(1) & 0xFF == 27:
break
cv2.putText(img,str(int(fps)), (10,70), cv2.FONT_HERSHEY_PLAIN, 3, (25,255,231), 3)
cv2.namedWindow("ThumbsDown", cv2.WINDOW_NORMAL)
cv2.imshow("ThumbsDown", img)
cv2.waitKey(1)
firstly, I was not sending the correct inputs in the previous code to the inbuilt cv2.rectangle function.
x=int(r[0])
y=int(r[1])
w=int(r[2])
h=int(r[3])
this is the part where I rearranged the coordinates according to the cv2.rectangle function,and its data members. rect_img = imgRGB[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])]
in this line, we not only need x,y, but width and height.
Secondly, I was not calling the correct frame to construct the rectangle on, rect_img = imgRGB[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])]
this is the area selected(ROI),
img[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])] =rect_img
then we merge the selected frame, to the original output frame.
I'm making a pose estimation script for my game. However, it's working at 20-30 fps and not using the whole CPU even if there is no fps limit. It's not using whole GPU too. Can someone help me?
Here is resource usage while playing a dance video: https://imgur.com/a/6yI2TWg
Here is my code:
import cv2
import mediapipe as mp
import time
inFile = '/dev/video0'
capture = cv2.VideoCapture(inFile)
FramesVideo = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) # Number of frames inside video
FrameCount = 0 # Currently playing frame
prevTime = 0
# some objects for mediapipe
mpPose = mp.solutions.pose
mpDraw = mp.solutions.drawing_utils
pose = mpPose.Pose()
while True:
FrameCount += 1
#read image and convert to rgb
success, img = capture.read()
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#process image
results = pose.process(imgRGB)
if results.pose_landmarks:
mpDraw.draw_landmarks(img, results.pose_landmarks, mpPose.POSE_CONNECTIONS)
#get landmark positions
landmarks = []
for id, lm in enumerate(results.pose_landmarks.landmark):
h, w, c = img.shape
cx, cy = int(lm.x * w), int(lm.y * h)
cv2.putText(img, str(id), (cx,cy), cv2.FONT_HERSHEY_PLAIN, 1, (255,0,0), 1)
landmarks.append((cx,cy))
# calculate and print fps
frameTime = time.time()
fps = 1/(frameTime-prevTime)
prevTime = frameTime
cv2.putText(img, str(int(fps)), (30,50), cv2.FONT_HERSHEY_PLAIN, 3, (255,0,0), 3)
#show image
cv2.imshow('Video', img)
cv2.waitKey(1)
if FrameCount == FramesVideo-1:
capture.release()
cv2.destroyAllWindows()
break
Set the model_complexity of mp.Pose to 0.
As the documentation states:
MODEL_COMPLEXITY
Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as inference latency generally go up with the model complexity. Default to 1.
This is the best solution I've found, also use this.
i need to track my cursor to the nose of a human to create a DIY aimbot with pose detection.
(just for fun, not intending to cheat, there would be so many better and easier options than to make my own)
i already have the first part of the code and it shows you your screen and the skeleton, as well as the exact coordinates of the nose with no problem,
but the method that im using to move my cursor over to that point is not working
im using mouse.move and have tried other stuff like pyautogui, tkinter.
it doesn't give me an error but still does not work
import cv2
import mediapipe as mp
import numpy as np
import time
import pyautogui
import mouse
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose
# display screen resolution, get it from your OS settings
SCREEN_SIZEX = (1920)
SCREEN_SIZEY = (1080)
# define the codec
fourcc = cv2.VideoWriter_fourcc(*"XVID")
# create the video write object
out = cv2.VideoWriter("output.avi", fourcc, 20.0, (SCREEN_SIZEX, SCREEN_SIZEY))
with mp_pose.Pose(min_detection_confidence=0.1, min_tracking_confidence=0.9) as pose:
while True:
# make a screenshot
img = pyautogui.screenshot()
# convert these pixels to a proper numpy array to work with OpenCV
frame = np.array(img)
# convert colors from BGR to RGB
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Recolor image to RGB
image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
image.flags.writeable = False
# Make detection
results = pose.process(image)
# Recolor back to BGR
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
try:
landmarks = results.pose_landmarks.landmark
lndmark = landmarks[mp_pose.PoseLandmark.NOSE.value]
x = [landmarks[mp_pose.PoseLandmark.NOSE.value].x]
y = [landmarks[mp_pose.PoseLandmark.NOSE.value].y]
#print(x)
#print(y)
mouse.move(x, y)
except:
pass
# Render detections
mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2),
mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
)
# write the frame
out.write(frame)
pTime = 0
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(image, str(int(fps)), (20, 50), cv2.FONT_HERSHEY_PLAIN, 3,
(255, 0, 0), 3)
cv2.imshow('Mediapipe Feed', image)
if cv2.waitKey(10) & 0xFF == ord('q'):
break
out.release()
cv2.destroyAllWindows()
#for lndmark in mp_pose.PoseLandmark:
#print(lndmark)
this is the part that doesn't work:
try:
landmarks = results.pose_landmarks.landmark
lndmark = landmarks[mp_pose.PoseLandmark.NOSE.value]
x = [landmarks[mp_pose.PoseLandmark.NOSE.value].x]
y = [landmarks[mp_pose.PoseLandmark.NOSE.value].y]
mouse.move(x, y)
except:
pass
i would assume that it is beacuse x and y are supposed to numbers or somehow it can't read or proccess it correctly
but it doesn't give me an error, so im asking it here hoping on of you guys had already figured this one out
Here is my code
I need to draw a color line and rectangle on my gray video stream.
In my code some bug because my line and rectangle are black but they are not.
import cv2
cap = cv2.VideoCapture(0)
if (cap.isOpened() == False):
print("Unable to read camera feed")
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
out = cv2.VideoWriter('output.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width,frame_height),0)
while(cap.isOpened()):
ret, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# draw line
start_point = (0, 0)
end_point = (250, 250)
color = (0, 255, 0)
thickness = 5
gray = cv2.line(img=gray, pt1=start_point, pt2=end_point, color=color, thickness=thickness, lineType=8, shift=0)
# draw rectangle
x1,y1 = 200, 200
x2,y2 = 250, 250
gray = cv2.rectangle(gray,(x1, y1), (x2, y2),color, 2)
cv2.imshow('webcam(1)', gray)
out.write(gray)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
out.release()
cv2.destroyAllWindows()
To draw color elements you have to convert image back to BGR
gray_BGR = cv2.cvtColor(gray cv2.COLOR_GRAY2BGR)
Converting to GRAY not only converts colors to gray but it also reduces every pixel from three values (B,G,R) to single value which can keep only gray color.
If you check frame.shape and gray.shape then you see difference.
First will have (height, width, 3) second will have only (height, width) which means (height, width, 1)