MoveNet Pose Estimation renders inaccurate keypoints - python

I'm trying to run the MoveNet Pose Estimation model on a video but for some reason my keypoints are very inaccurate. I assume this does not have anything to do with the predictions itself but with how I calculate the points and paint then using my estimation. However I cannot find where these inaccuracies come from.
import tensorflow as tf
import numpy as np
from matplotlib import pyplot as plt
import cv2
interpreter = tf.lite.Interpreter(model_path='lite-model_movenet_singlepose_lightning_3.tflite')
interpreter.allocate_tensors()
def draw_keypoints(frame, keypoints, confidence_threshold):
y, x, c = frame.shape
shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
for kp in shaped:
ky, kx, kp_conf = kp
if kp_conf > confidence_threshold:
cv2.circle(frame, (int(kx), int(ky)), 4, (0,255,0), -1)
cap = cv2.VideoCapture("pushup-stock-compressed.mp4")
while cap.isOpened():
ret, frame = cap.read()
# Reshape image
img = frame.copy()
img = tf.image.resize_with_pad(np.expand_dims(img, axis=0), 192,192)
input_image = tf.cast(img, dtype=tf.float32)
# Setup input and output
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
# Make predictions
interpreter.set_tensor(input_details[0]['index'], np.array(input_image))
interpreter.invoke()
keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
# Rendering
draw_keypoints(frame, keypoints_with_scores, 0.4)
cv2.imshow('MoveNet Lightning', frame)
if cv2.waitKey(10) & 0xFF==ord('q'):
break
cap.release()
cv2.destroyAllWindows()

As pointed by Micka in comments you need to take into account that resize_with_pad preprocess an image so the returned positions of keypoints will not match the original image.
If you look into tf documentation:
https://www.tensorflow.org/api_docs/python/tf/image/resize_with_pad
you will find the following description:
Resizes an image to a target width and height by keeping the aspect ratio the same without distortion
Now we can use the fact that resize+padding could be treated as a affine transformation. Suppose we apply some affine transformation to an image and got keypoints on that preprocessed (resized+padded) image. If we know the matrix of this affine transformation we can find the inverse of it and transform keypoints on the preprocessed image to the locations on the original image by applying founded inverse affine transformation to keypoints.
Let's draw keypoints founded with movenet on the preprocessed image(resize+padded):
That example contains some incorrect predictions from the network (look at the right leg).
Now apply inverse affine transform to these keypoints
on the original image:
As we can see the keypoints are drawn at the same positions as on the resized+padded image.
Complete example:
import tensorflow as tf
import numpy as np
import cv2
interpreter = tf.lite.Interpreter(
model_path="lite-model_movenet_singlepose_lightning_3.tflite"
)
interpreter.allocate_tensors()
def draw_keypoints(frame, keypoints, confidence_threshold):
for kp in keypoints:
ky, kx, kp_conf = kp
cv2.circle(frame, (int(kx), int(ky)), 4, (0, 255, 0), -1)
def get_affine_transform_to_fixed_sizes_with_padding(size, new_sizes):
width, height = new_sizes
scale = min(height / float(size[1]), width / float(size[0]))
M = np.float32([[scale, 0, 0], [0, scale, 0]])
M[0][2] = (width - scale * size[0]) / 2
M[1][2] = (height - scale * size[1]) / 2
return M
frame = cv2.imread("gym.png")
# Reshape image
img = frame.copy()
img = tf.image.resize_with_pad(np.expand_dims(img, axis=0), 192, 192)
input_image = tf.cast(img, dtype=tf.float32)
# Setup input and output
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
# Make predictions
interpreter.set_tensor(input_details[0]["index"], np.array(input_image))
interpreter.invoke()
keypoints_with_scores = interpreter.get_tensor(output_details[0]["index"])[0, 0]
img_resized = np.array(input_image).astype(np.uint8)[0]
keypoints_for_resized = keypoints_with_scores.copy()
keypoints_for_resized[:, 0] *= img_resized.shape[1]
keypoints_for_resized[:, 1] *= img_resized.shape[0]
draw_keypoints(img_resized, keypoints_for_resized, 0.4)
cv2.imwrite("image_with_keypoints_resized.png", img_resized)
orig_w, orig_h = frame.shape[:2]
M = get_affine_transform_to_fixed_sizes_with_padding((orig_w, orig_h), (192, 192))
# M has shape 2x3 but we need square matrix when finding an inverse
M = np.vstack((M, [0, 0, 1]))
M_inv = np.linalg.inv(M)[:2]
xy_keypoints = keypoints_with_scores[:, :2] * 192
xy_keypoints = cv2.transform(np.array([xy_keypoints]), M_inv)[0]
keypoints_with_scores = np.hstack((xy_keypoints, keypoints_with_scores[:, 2:]))
# Rendering
draw_keypoints(frame, keypoints_with_scores, 0.4)
cv2.imwrite("image_with_keypoints_original.png", frame)

Related

OpenCV FindContours cannot find the correct edge with K-Means Binary Clustering

This is the follow-up question from here.
I use cv2.kmeans to segment the left auricle DICOM image as mask.
The following code is how I deal with the k-means binary clustering in OpenCV.
import cv2
import numpy as np
# read input and convert to range 0-1
image = cv2.imread('1.jpg')
h, w, c = image.shape
# reshape to 1D array
image_2d = image.reshape(h*w, c).astype(np.float32)
# set number of colors
numcolors = 2
numiters = 10
epsilon = 1
attempts = 10
# do kmeans processing
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, numiters, epsilon)
ret, labels, centers = cv2.kmeans(image_2d, numcolors, None, criteria, attempts, cv2.KMEANS_RANDOM_CENTERS)
# reconstitute 2D image of results
centers = np.uint8(centers)
newimage = centers[labels.flatten()]
newimage = newimage.reshape(image.shape)
#cv2.imwrite("1_test.jpg", newimage)
#cv2.imshow('new image', newimage)
#cv2.waitKey(0)
k = 0
for center in centers:
# select color and create mask
#print(center)
layer = newimage.copy()
mask = cv2.inRange(layer, center, center)
# apply mask to layer
layer[mask == 0] = [0,0,0]
#cv2.imshow('layer', layer)
#cv2.waitKey(0)
# save kmeans clustered image and layer
if(k == 0):
cv2.imwrite("1_test{0}.jpg".format(k), layer)
k = k + 1
But after I try to cv2.findContours, I cannot draw the correct edge of the left auricle.
Here comes my edge detection code.
import cv2
import numpy as np
import os
img = cv2.imread('1_test0.jpg')
#gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
implt = (img * 255).astype(np.uint8)
implt = np.asarray(implt)
implt = implt[:, :, -1]
im2 = cv2.resize(implt, (350, 350), interpolation=cv2.INTER_CUBIC)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(11, 11))
im2 = cv2.erode(im2,kernel)
ret,thresh = cv2.threshold(im2,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
#ret,binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
#_,contours, hierarchy = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours,hierarchy = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)[-2:]
draw_img3 = cv2.drawContours(img.copy(), contours, -1, (0, 0, 255), 3)
cv2.imwrite('1_test0_edge.jpg', draw_img3)

K-Means Binary Clustering in OpenCV to Extract Mask

I try to use cv2.kmeans to segment the left auricle DICOM image as mask.
I use the following code to do the k-means binary clustering in OpenCV.
import numpy as np
import cv2
import os
from matplotlib import pyplot as plt
img = cv2.imread('1_LA.jpg')
img2 = img.reshape((-1, 3))
img2 = np.float32(img2)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
ret, label, center = cv2.kmeans(img2, 2, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
center = np.uint8(center)
res = center[label.flatten()]
res2 = res.reshape((img.shape))
cv2.imwrite('1_LA_kmeans.jpg', res2)
Then, I can get this segmentation result well.
But how can I extract one of the segmentations as mask?
I have referred other similar questions, and I try to use the code from here.
import numpy as np
import cv2
img = cv2.imread('1_LA.jpg')
Z = np.float32(img.reshape((-1,3)))
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
K = 2
_,labels,centers = cv2.kmeans(Z, K, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
labels = labels.reshape((img.shape[:-1]))
reduced = np.uint8(centers)[labels]
result = [np.hstack([img, reduced])]
for i, c in enumerate(centers):
mask = cv2.inRange(labels, i, i)
mask = np.dstack([mask]*3) # Make it 3 channel
ex_img = cv2.bitwise_and(img, mask)
ex_reduced = cv2.bitwise_and(reduced, mask)
result.append(np.hstack([ex_img, ex_reduced]))
cv2.imwrite('kmeans/' + str(i) + '_1.jpg', np.vstack(result))
cv2.imwrite('1_LA_kmeans.jpg', np.vstack(result))
Then, I can get this output.
Becase I want to calculate the area of the left auricle, I need to extract the mask like below.
So, how can I extract one of the binary segmentation results?
Thanks for #fmw42's help.
After I refer this answer, and use the following code.
import cv2
import numpy as np
# read input and convert to range 0-1
image = cv2.imread('1.jpg')
h, w, c = image.shape
# reshape to 1D array
image_2d = image.reshape(h*w, c).astype(np.float32)
# set number of colors
numcolors = 2
numiters = 10
epsilon = 1
attempts = 10
# do kmeans processing
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, numiters, epsilon)
ret, labels, centers = cv2.kmeans(image_2d, numcolors, None, criteria, attempts, cv2.KMEANS_RANDOM_CENTERS)
# reconstitute 2D image of results
centers = np.uint8(centers)
newimage = centers[labels.flatten()]
newimage = newimage.reshape(image.shape)
#cv2.imwrite("1_test.jpg", newimage)
#cv2.imshow('new image', newimage)
#cv2.waitKey(0)
k = 0
for center in centers:
# select color and create mask
#print(center)
layer = newimage.copy()
mask = cv2.inRange(layer, center, center)
# apply mask to layer
layer[mask == 0] = [0,0,0]
#cv2.imshow('layer', layer)
#cv2.waitKey(0)
# save kmeans clustered image and layer
if(k == 0):
cv2.imwrite("1_test{0}.jpg".format(k), layer)
k = k + 1
I can extract the mask I want, appreciate.

Is there any function or method to display clusters of mage after k means image segmentation?

import cv2
import numpy as np
import matplotlib.pyplot as plt
img = cv2.imread('jelly.jpg') //reading the img
cv2.imshow(' img',img) //initial image
cv2.waitKey(0)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) // converting image to RGB
pixel_vals =img.reshape((-1,3)) //reshaping coloured 3d image to 2d image
pixel_vals = np.float32(pixel_vals)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER,100,0.85) //setting criteria for kmeans
k= 5 //number of clusters
retval, labels, centers = cv2.kmeans(pixel_vals,k,None,criteria,10,cv2.KMEANS_RANDOM_CENTERS)
centers = np.uint8((centers))
segmented_data = centers[labels.flatten()]
segmented_img = segmented_data.reshape((img.shape)) //final image
cv2.imshow('K-means segmented img',segmented_img) // showing the final image after k means segmentation
cv2.waitKey(0)
cv2.destroyAllWindows() //destroying all window pop-up of images
I want to get only like violet part or brown part according to intensities. I have tried looking but i am not able to find any function. as there is a chance that the blue colour or any other colour is present in different shade. Is there a way to also get particular shade of different colours masking other areas ?
Original Image
K means segmented image
I am not sure what you want, but if you want to save each color as its own image from kmeans in Python/OpenCV, then this should do that.
Input:
import cv2
import numpy as np
# read input and convert to range 0-1
image = cv2.imread('jellyfish.png')
h, w, c = image.shape
# reshape to 1D array
image_2d = image.reshape(h*w, c).astype(np.float32)
# set number of colors
numcolors = 5
numiters = 10
epsilon = 1
attempts = 10
# do kmeans processing
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, numiters, epsilon)
ret, labels, centers = cv2.kmeans(image_2d, numcolors, None, criteria, attempts, cv2.KMEANS_RANDOM_CENTERS)
# reconstitute 2D image of results
centers = np.uint8(centers)
newimage = centers[labels.flatten()]
newimage = newimage.reshape(image.shape)
cv2.imwrite("jellyfish_kmeans.png", newimage)
cv2.imshow('new image', newimage)
cv2.waitKey(0)
k = 0
for center in centers:
# select color and create mask
#print(center)
layer = newimage.copy()
mask = cv2.inRange(layer, center, center)
# apply mask to layer
layer[mask == 0] = [0,0,0]
cv2.imshow('layer', layer)
cv2.waitKey(0)
# save kmeans clustered image and layer
cv2.imwrite("jellyfish_layer{0}.png".format(k), layer)
k = k + 1
Kmeans Result:
Individual Colors:
I am not sure what you want to do because by your description you seem to want one thing and then by the title a completely different one. But I have segmented the parts you wanted.
import cv2
import numpy as np
import matplotlib.pyplot as plt
img = cv2.imread('jelly.png')
plt.imshow(img)
plt.show()
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
pixel_vals =img.reshape((-1,3))
pixel_vals = np.float32(pixel_vals)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER,100,0.85)
k= 5
retval, labels, centers = cv2.kmeans(pixel_vals,k,None,criteria,10,cv2.KMEANS_RANDOM_CENTERS)
clustered_img = labels.reshape((img.shape[0], img.shape[1]))
clusters_to_0 = [1,2,4]
for c in clusters_to_0:
clustered_img[clustered_img == c] = -1
clustered_img[clustered_img!=-1] = 1
clustered_img[clustered_img==-1] = 0
clustered_img
plt.imshow(clustered_img)
plt.show()
I suggest another approach by transforming the image to the HSV channel and then thresholding the Hue channel since it contains the information about the tonality of the colours:
import cv2
import numpy as np
import matplotlib.pyplot as plt
img = cv2.imread('jelly.png')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h,s,v = cv2.split(hsv)
(_, th) = cv2.threshold(h, 0, 1, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
plt.subplot(131)
plt.imshow(img)
plt.title('Original image')
plt.subplot(132)
plt.imshow(h)
plt.title('Hue channels of the HSV color-space')
plt.subplot(133)
plt.imshow(th)
plt.title('Thresholded image')
plt.show()

Corner detection with Hughlines transformation

I need to detect corner of a paper on given image. It will always be a cropped part of whole picture containing only one of the corners. My idea was to transform image by bluring and Canny edge detection to get outlines and then aplying Houghlines to get coordinates of corner.
However i get some problem to actualy detect anything consistently and precisly by Hough lines and I'm running out of ideas what can be the cause here.
I've tried tresholding instead of Canny, but it's not gonna work due to many variations in applicable images. I've downcaled whole image to make it easier to see just edges of paper, but still no improvement. Increasing line tresholds make lines from paper content diapear, but at the same time edge lines disapear from time to time
Input
Edges
Results
Code to reproduce
import cv2
import numpy as np
img = cv2.imread('inv_0001-01.1_0_corner.jpg')
resized = cv2.resize(img, (250,250), interpolation = cv2.INTER_AREA)
gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
kernel_size = 5
blur_gray = cv2.GaussianBlur(gray,(kernel_size, kernel_size),0)
edges = cv2.Canny(blur_gray,50,150,apertureSize = 3)
cv2.imshow('edges', edges)
cv2.waitKey()
min_line_length = 50
max_line_gap = 20
lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 5, np.array([]), min_line_length, max_line_gap)
for line in lines:
for x1,y1,x2,y2 in line:
cv2.line(resized,(x1,y1),(x2,y2),(255,0,0),5)
cv2.imshow('hough', resized)
cv2.waitKey()
My go-to result would be coordinate of paper corner in given image, but in this post I'm rather looking for some help in understanding how to use Houglines for such tasks
This answer explains how to find the corner. Finding the corner requires a two part solution. First, the image needs to be segmented in to two regions: paper and background. Second, you can look for corners in the segmented image.
After you find the edges, floodfill the image to segment the paper from the background (this is the floodfill image):
mask = np.zeros((h+2, w+2), np.uint8)
# Floodfill from point (0, 0)
cv2.floodFill(edges, mask, (0,0), 123);
Now that you have segmented the image, get rid of the text on the paper using a mask (this is the image titled 'Masking'):
bg = np.zeros_like(edges)
bg[edges == 123] = 255
After you get the mask, appl the canny edge filter again to get the out line of the paper (HoughLines needs an outline not a mask...this is the 'Edges after masking' image):
bg = cv2.blur(bg, (3,3))
edges = cv2.Canny(bg,50,150,apertureSize = 3)
Now you can run your HoughLines algorithm on the cleaner image. I used a different HoughLines algorithm than you did, but yours should work too. Here is the full code that I used:
import cv2
import numpy as np
import matplotlib.pyplot as plt
# Create a multi plot
f, axarr = plt.subplots(2,3, sharex=True)
img = cv2.imread('/home/stephen/Desktop/IRcCAWL.png')
resized = cv2.resize(img, (250,250), interpolation = cv2.INTER_AREA)
# Show source image
axarr[0,0].imshow(resized)
gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
kernel_size = 5
blur_gray = cv2.GaussianBlur(gray,(kernel_size, kernel_size),0)
edges = cv2.Canny(blur_gray,50,150,apertureSize = 3)
# Show first edges image
axarr[0,1].imshow(edges)
h, w = edges.shape[:2]
mask = np.zeros((h+2, w+2), np.uint8)
# Floodfill from point (0, 0)
cv2.floodFill(edges, mask, (0,0), 123);
# Show the flood fill image
axarr[0,2].imshow(edges)
floodfill = edges.copy()
bg = np.zeros_like(edges)
bg[edges == 123] = 255
# Show the masked image
axarr[1,0].imshow(bg)
bg = cv2.blur(bg, (3,3))
edges = cv2.Canny(bg,50,150,apertureSize = 3)
# Show the edges after masking
axarr[1,1].imshow(edges)
min_line_length = 50
max_line_gap = 20
def intersection(line1, line2):
"""Finds the intersection of two lines given in Hesse normal form.
Returns closest integer pixel locations.
See https://stackoverflow.com/a/383527/5087436
"""
rho1, theta1 = line1[0]
rho2, theta2 = line2[0]
A = np.array([
[np.cos(theta1), np.sin(theta1)],
[np.cos(theta2), np.sin(theta2)]
])
b = np.array([[rho1], [rho2]])
x0, y0 = np.linalg.solve(A, b)
x0, y0 = int(np.round(x0)), int(np.round(y0))
return [[x0, y0]]
import math
lines = cv2.HoughLines(edges, 1, np.pi / 180, 100, None, 0, 0)
# Draw the lines
if lines is not None:
for i in range(0, len(lines)):
rho = lines[i][0][0]
theta = lines[i][0][1]
a = math.cos(theta)
b = math.sin(theta)
x0 = a * rho
y0 = b * rho
pt1 = (int(x0 + 1000*(-b)), int(y0 + 1000*(a)))
pt2 = (int(x0 - 1000*(-b)), int(y0 - 1000*(a)))
cv2.line(resized, pt1, pt2, (123,234,123), 2, cv2.LINE_AA)
xy = tuple(intersection(lines[0], lines[1])[0])
resized = cv2.circle(resized, xy, 5, 255, 2)
# Show the image with the corner
axarr[1,2].imshow(resized)
# Add titles
axarr[0,0].set_title('Source Image')
axarr[0,1].set_title('Edges')
axarr[0,2].set_title('Floodfill')
axarr[1,0].set_title('Masking')
axarr[1,1].set_title('Edges after masking')
axarr[1,2].set_title('Hough Lines')
# Clean up
axarr[0,0].axis('off')
axarr[0,1].axis('off')
axarr[1,0].axis('off')
axarr[1,1].axis('off')
axarr[1,2].axis('off')
axarr[0,2].axis('off')
plt.show()

How to add motion blur to numpy array

I have a numpy array from image
So, is there a good way to do so:
from PIL import Image
a = Image.open('img')
a = a.filter(MOTION_BLUR)
import cv2
import numpy as np
img = cv2.imread('input.jpg')
cv2.imshow('Original', img)
size = 15
# generating the kernel
kernel_motion_blur = np.zeros((size, size))
kernel_motion_blur[int((size-1)/2), :] = np.ones(size)
kernel_motion_blur = kernel_motion_blur / size
# applying the kernel to the input image
output = cv2.filter2D(img, -1, kernel_motion_blur)
cv2.imshow('Motion Blur', output)
cv2.waitKey(0)
explanation you can found here
draw a rotated line as kernel, then apply a convolution filter to an image with that kernel.
The code below uses opencv framework.
import cv2
import numpy as np
#size - in pixels, size of motion blur
#angel - in degrees, direction of motion blur
def apply_motion_blur(image, size, angle):
k = np.zeros((size, size), dtype=np.float32)
k[ (size-1)// 2 , :] = np.ones(size, dtype=np.float32)
k = cv2.warpAffine(k, cv2.getRotationMatrix2D( (size / 2 -0.5 , size / 2 -0.5 ) , angle, 1.0), (size, size) )
k = k * ( 1.0 / np.sum(k) )
return cv2.filter2D(image, -1, k)
if you want to apply vertical, you can use this kernel:
kernel_motion_blur = np.zeros((size, size))
kernel_motion_blur[int(:, (size-1)/2)] = np.ones(size)
kernel_motion_blur = kernel_motion_blur / size
I would use matplotlib:
from PIL import Image
img = Image.open('your_image')
imgplot = plt.imshow(img, interpolation="bicubic")

Categories

Resources