I'm making a Python script to save the landmarks obtained when using mediapipe. I want to save only the landmarks but I don't want to save the rest of the frame content. Is this possible?
My first idea was to create a black image and put on top of it the landmarks that are obtained when processing the frame that has a person, and when I show it with cv2.imshow(img) I get the landmarks but when I want to save the video I only see black frames. Can anyone help me, I leave below the function that I have done.
import cv2
import numpy as np
import mediapipe as mp
from mediapipe.python.solutions.face_mesh_connections import FACEMESH_CONTOURS
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic
def extract_bones(video_path, new_video_path):
# Captura de los videos
#cap = cv2.VideoCapture(video_path)
cap = cv2.VideoCapture(video_path)
# Se toma el ancho y alto del video
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# Se crea el formato del nuevo video.
output = cv2.VideoWriter(new_video_path,cv2.VideoWriter_fourcc('M','J','P','G'), 30, (width,height))
# Initialize holistic model
with mp_holistic.Holistic(min_detection_confidence = 0.5, min_tracking_confidence = 0.5) as holistic:
while cap.isOpened():
# Read frame
ret, frame = cap.read()
if ret == True:
img = np.zeros((frame.shape[0], frame.shape[1], frame.shape[2]))
# Resize frame
#frame = cv2.resize(frame, (WIDTH, HEIGHT), interpolation = cv2.INTER_AREA)
# Change color from BGR to RGB
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame.flags.writeable = False
# Detect landmarks
results = holistic.process(frame)
# Left hand (azul)
mp_drawing.draw_landmarks(
img, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(255, 255, 0), thickness=2, circle_radius=1),
mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=2))
print(img)
# Right hand (verde)
mp_drawing.draw_landmarks(
img, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=1),
mp_drawing.DrawingSpec(color=(57, 143, 0), thickness=2))
print(img)
# Pose
mp_drawing.draw_landmarks(
img, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
mp_drawing.DrawingSpec(color=(128, 0, 255), thickness=2, circle_radius=1),
mp_drawing.DrawingSpec(color=(255, 255, 255), thickness=2))
print(img)
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
cv2.imshow("Frame", frame)
cv2.imshow("Black", img)
output.write(img)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
else:
break
cap.release()
Thank you very much
Related
Im using this code to detect teh 468 facial landmarks from a face:
import cv2
import mediapipe as mp
import time
cap = cv2.VideoCapture(0)
pTime = 0
mpDraw = mp.solutions.drawing_utils
mpFaceMesh = mp.solutions.face_mesh
faceMesh = mpFaceMesh.FaceMesh(max_num_faces=2)
drawSpec = mpDraw.DrawingSpec(thickness=1, circle_radius=2)
while True:
success, img = cap.read()
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = faceMesh.process(imgRGB)
if results.multi_face_landmarks:
for faceLms in results.multi_face_landmarks:
mpDraw.draw_landmarks(img, faceLms, mpFaceMesh.FACEMESH_CONTOURS,
drawSpec,drawSpec)
for id,lm in enumerate(faceLms.landmark):
#print(lm)
ih, iw, ic = img.shape
x,y = int(lm.x*iw), int(lm.y*ih)
print(id,x,y)
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(img, f'FPS: {int(fps)}', (20, 70), cv2.FONT_HERSHEY_PLAIN,
3, (255, 0, 0), 3)
cv2.imshow("Image", img)
cv2.waitKey(1)
When I run this script, I can see the facial landmarks printed on my face, but what I want to achieve is that I display the facial landmarks on a black background without actually showing a face.
How can I achieve that?
You can create an image with dark (black) pixels (0) of the same dimensions of the frame captured by the camera. And carry out your drawings there
# create the dark image
black = np.zeros(img.shape , np.uint8)
# Replace the `img` with `black` while drawing the landmarks
mpDraw.draw_landmarks(black, faceLms, mpFaceMesh.FACEMESH_CONTOURS, drawSpec,drawSpec)
# Display the result
cv2.imshow("Result", black)
i need to track my cursor to the nose of a human to create a DIY aimbot with pose detection.
(just for fun, not intending to cheat, there would be so many better and easier options than to make my own)
i already have the first part of the code and it shows you your screen and the skeleton, as well as the exact coordinates of the nose with no problem,
but the method that im using to move my cursor over to that point is not working
im using mouse.move and have tried other stuff like pyautogui, tkinter.
it doesn't give me an error but still does not work
import cv2
import mediapipe as mp
import numpy as np
import time
import pyautogui
import mouse
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose
# display screen resolution, get it from your OS settings
SCREEN_SIZEX = (1920)
SCREEN_SIZEY = (1080)
# define the codec
fourcc = cv2.VideoWriter_fourcc(*"XVID")
# create the video write object
out = cv2.VideoWriter("output.avi", fourcc, 20.0, (SCREEN_SIZEX, SCREEN_SIZEY))
with mp_pose.Pose(min_detection_confidence=0.1, min_tracking_confidence=0.9) as pose:
while True:
# make a screenshot
img = pyautogui.screenshot()
# convert these pixels to a proper numpy array to work with OpenCV
frame = np.array(img)
# convert colors from BGR to RGB
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Recolor image to RGB
image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
image.flags.writeable = False
# Make detection
results = pose.process(image)
# Recolor back to BGR
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
try:
landmarks = results.pose_landmarks.landmark
lndmark = landmarks[mp_pose.PoseLandmark.NOSE.value]
x = [landmarks[mp_pose.PoseLandmark.NOSE.value].x]
y = [landmarks[mp_pose.PoseLandmark.NOSE.value].y]
#print(x)
#print(y)
mouse.move(x, y)
except:
pass
# Render detections
mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2),
mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
)
# write the frame
out.write(frame)
pTime = 0
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(image, str(int(fps)), (20, 50), cv2.FONT_HERSHEY_PLAIN, 3,
(255, 0, 0), 3)
cv2.imshow('Mediapipe Feed', image)
if cv2.waitKey(10) & 0xFF == ord('q'):
break
out.release()
cv2.destroyAllWindows()
#for lndmark in mp_pose.PoseLandmark:
#print(lndmark)
this is the part that doesn't work:
try:
landmarks = results.pose_landmarks.landmark
lndmark = landmarks[mp_pose.PoseLandmark.NOSE.value]
x = [landmarks[mp_pose.PoseLandmark.NOSE.value].x]
y = [landmarks[mp_pose.PoseLandmark.NOSE.value].y]
mouse.move(x, y)
except:
pass
i would assume that it is beacuse x and y are supposed to numbers or somehow it can't read or proccess it correctly
but it doesn't give me an error, so im asking it here hoping on of you guys had already figured this one out
Here is my code
I need to draw a color line and rectangle on my gray video stream.
In my code some bug because my line and rectangle are black but they are not.
import cv2
cap = cv2.VideoCapture(0)
if (cap.isOpened() == False):
print("Unable to read camera feed")
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
out = cv2.VideoWriter('output.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width,frame_height),0)
while(cap.isOpened()):
ret, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# draw line
start_point = (0, 0)
end_point = (250, 250)
color = (0, 255, 0)
thickness = 5
gray = cv2.line(img=gray, pt1=start_point, pt2=end_point, color=color, thickness=thickness, lineType=8, shift=0)
# draw rectangle
x1,y1 = 200, 200
x2,y2 = 250, 250
gray = cv2.rectangle(gray,(x1, y1), (x2, y2),color, 2)
cv2.imshow('webcam(1)', gray)
out.write(gray)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
out.release()
cv2.destroyAllWindows()
To draw color elements you have to convert image back to BGR
gray_BGR = cv2.cvtColor(gray cv2.COLOR_GRAY2BGR)
Converting to GRAY not only converts colors to gray but it also reduces every pixel from three values (B,G,R) to single value which can keep only gray color.
If you check frame.shape and gray.shape then you see difference.
First will have (height, width, 3) second will have only (height, width) which means (height, width, 1)
It's my beginning with coding and this site. I'm working on project, where I want to use openCV, but I've got an issue with that. I need to resize output frame, for recognizing object. I have read, that frame should be in size 416x416, but when I'm trying to release the frame, it's still in regular size.
Here's the code:
import pafy
import youtube_dl
import cv2
import numpy as np
url = "https://www.youtube.com/watch?v=WOn7m0_aYBw"
video = pafy.new(url)
best = video.getbest(preftype="mp4")
cap = cv2.VideoCapture()
cap.open(best.url)
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
classes = []
with open("coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers =[layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
while True:
ret, frame = cap.read()
# if ret == True:
img = cv2.imshow('frame',frame)
#cap.set(cv2.CAP_PROP_FRAME_WIDTH, 416)
#cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 416)
width = 416
height = 416
dim = (width, height)
img = cv2.resize(frame, dim, interpolation = cv2.INTER_AREA)
print(img.shape)
if cv2.waitKey(20) & 0xFF == ord('q'):
break
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
print(img.shape) returns correct size, but I think I'm releasing wrong window. How to change this code to releasing window in correct size?
You were showing the frame before resizing
while True:
ret, frame = cap.read()
width = 416
height = 416
dim = (width, height)
img = cv2.resize(frame, dim, interpolation = cv2.INTER_AREA)
print(img.shape)
cv2.imshow('frame',img)
if cv2.waitKey(20) & 0xFF == ord('q'):
break
I want to create a motion gesture dataset and I'm trying to isolate skin color using bgr2hsv. So far, my output window displays the exact 'black and white video' feed that i aim to achieve. But the video file that i store isn't the same. Do help me fix this.
I'm new to this, i tried doing what i could. If there's a silly mistake, do guide me.
import numpy as np
import os
import cv2
filename = 'video.mp4'
frames_per_second = 10.0
res = '720p'
def change_res(cap, width, height):
cap.set(3, width)
cap.set(4, height)
STD_DIMENSIONS = {
"480p": (640, 480),
"720p": (1280, 720),
"1080p": (1920, 1080),
"4k": (3840, 2160),
}
def get_dims(cap, res='1080p'):
width, height = STD_DIMENSIONS["720p"]
if res in STD_DIMENSIONS:
width,height = STD_DIMENSIONS[res]
change_res(cap, width, height)
return width, height
VIDEO_TYPE = {
'avi': cv2.VideoWriter_fourcc(*'XVID'),
'mp4': cv2.VideoWriter_fourcc(*'XVID'),
}
def get_video_type(filename):
filename, ext = os.path.splitext(filename)
if ext in VIDEO_TYPE:
return VIDEO_TYPE[ext]
return VIDEO_TYPE['mp4']
cap = cv2.VideoCapture(1)
out = cv2.VideoWriter(filename, get_video_type(filename), 25, get_dims(cap, res))
while True:
ret, frame = cap.read()
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
lower_blue = np.array([0, 48, 80])
upper_blue = np.array([20, 255, 255])
mask = cv2.inRange(hsv, lower_blue, upper_blue)
result = cv2.bitwise_and(frame, frame, mask = mask)
out.write(result)
cv2.imshow('frame', mask)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
out.release()
cv2.destroyAllWindows()
I found the Solution!
frame = cv2.cvtColor(mask, cv2.COLOR_GRAY2RGB)
source:
How to save masks of videos in openCV2 python