How to add motion blur to numpy array - python

I have a numpy array from image
So, is there a good way to do so:
from PIL import Image
a = Image.open('img')
a = a.filter(MOTION_BLUR)

import cv2
import numpy as np
img = cv2.imread('input.jpg')
cv2.imshow('Original', img)
size = 15
# generating the kernel
kernel_motion_blur = np.zeros((size, size))
kernel_motion_blur[int((size-1)/2), :] = np.ones(size)
kernel_motion_blur = kernel_motion_blur / size
# applying the kernel to the input image
output = cv2.filter2D(img, -1, kernel_motion_blur)
cv2.imshow('Motion Blur', output)
cv2.waitKey(0)
explanation you can found here

draw a rotated line as kernel, then apply a convolution filter to an image with that kernel.
The code below uses opencv framework.
import cv2
import numpy as np
#size - in pixels, size of motion blur
#angel - in degrees, direction of motion blur
def apply_motion_blur(image, size, angle):
k = np.zeros((size, size), dtype=np.float32)
k[ (size-1)// 2 , :] = np.ones(size, dtype=np.float32)
k = cv2.warpAffine(k, cv2.getRotationMatrix2D( (size / 2 -0.5 , size / 2 -0.5 ) , angle, 1.0), (size, size) )
k = k * ( 1.0 / np.sum(k) )
return cv2.filter2D(image, -1, k)

if you want to apply vertical, you can use this kernel:
kernel_motion_blur = np.zeros((size, size))
kernel_motion_blur[int(:, (size-1)/2)] = np.ones(size)
kernel_motion_blur = kernel_motion_blur / size

I would use matplotlib:
from PIL import Image
img = Image.open('your_image')
imgplot = plt.imshow(img, interpolation="bicubic")

Related

MoveNet Pose Estimation renders inaccurate keypoints

I'm trying to run the MoveNet Pose Estimation model on a video but for some reason my keypoints are very inaccurate. I assume this does not have anything to do with the predictions itself but with how I calculate the points and paint then using my estimation. However I cannot find where these inaccuracies come from.
import tensorflow as tf
import numpy as np
from matplotlib import pyplot as plt
import cv2
interpreter = tf.lite.Interpreter(model_path='lite-model_movenet_singlepose_lightning_3.tflite')
interpreter.allocate_tensors()
def draw_keypoints(frame, keypoints, confidence_threshold):
y, x, c = frame.shape
shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
for kp in shaped:
ky, kx, kp_conf = kp
if kp_conf > confidence_threshold:
cv2.circle(frame, (int(kx), int(ky)), 4, (0,255,0), -1)
cap = cv2.VideoCapture("pushup-stock-compressed.mp4")
while cap.isOpened():
ret, frame = cap.read()
# Reshape image
img = frame.copy()
img = tf.image.resize_with_pad(np.expand_dims(img, axis=0), 192,192)
input_image = tf.cast(img, dtype=tf.float32)
# Setup input and output
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
# Make predictions
interpreter.set_tensor(input_details[0]['index'], np.array(input_image))
interpreter.invoke()
keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
# Rendering
draw_keypoints(frame, keypoints_with_scores, 0.4)
cv2.imshow('MoveNet Lightning', frame)
if cv2.waitKey(10) & 0xFF==ord('q'):
break
cap.release()
cv2.destroyAllWindows()
As pointed by Micka in comments you need to take into account that resize_with_pad preprocess an image so the returned positions of keypoints will not match the original image.
If you look into tf documentation:
https://www.tensorflow.org/api_docs/python/tf/image/resize_with_pad
you will find the following description:
Resizes an image to a target width and height by keeping the aspect ratio the same without distortion
Now we can use the fact that resize+padding could be treated as a affine transformation. Suppose we apply some affine transformation to an image and got keypoints on that preprocessed (resized+padded) image. If we know the matrix of this affine transformation we can find the inverse of it and transform keypoints on the preprocessed image to the locations on the original image by applying founded inverse affine transformation to keypoints.
Let's draw keypoints founded with movenet on the preprocessed image(resize+padded):
That example contains some incorrect predictions from the network (look at the right leg).
Now apply inverse affine transform to these keypoints
on the original image:
As we can see the keypoints are drawn at the same positions as on the resized+padded image.
Complete example:
import tensorflow as tf
import numpy as np
import cv2
interpreter = tf.lite.Interpreter(
model_path="lite-model_movenet_singlepose_lightning_3.tflite"
)
interpreter.allocate_tensors()
def draw_keypoints(frame, keypoints, confidence_threshold):
for kp in keypoints:
ky, kx, kp_conf = kp
cv2.circle(frame, (int(kx), int(ky)), 4, (0, 255, 0), -1)
def get_affine_transform_to_fixed_sizes_with_padding(size, new_sizes):
width, height = new_sizes
scale = min(height / float(size[1]), width / float(size[0]))
M = np.float32([[scale, 0, 0], [0, scale, 0]])
M[0][2] = (width - scale * size[0]) / 2
M[1][2] = (height - scale * size[1]) / 2
return M
frame = cv2.imread("gym.png")
# Reshape image
img = frame.copy()
img = tf.image.resize_with_pad(np.expand_dims(img, axis=0), 192, 192)
input_image = tf.cast(img, dtype=tf.float32)
# Setup input and output
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
# Make predictions
interpreter.set_tensor(input_details[0]["index"], np.array(input_image))
interpreter.invoke()
keypoints_with_scores = interpreter.get_tensor(output_details[0]["index"])[0, 0]
img_resized = np.array(input_image).astype(np.uint8)[0]
keypoints_for_resized = keypoints_with_scores.copy()
keypoints_for_resized[:, 0] *= img_resized.shape[1]
keypoints_for_resized[:, 1] *= img_resized.shape[0]
draw_keypoints(img_resized, keypoints_for_resized, 0.4)
cv2.imwrite("image_with_keypoints_resized.png", img_resized)
orig_w, orig_h = frame.shape[:2]
M = get_affine_transform_to_fixed_sizes_with_padding((orig_w, orig_h), (192, 192))
# M has shape 2x3 but we need square matrix when finding an inverse
M = np.vstack((M, [0, 0, 1]))
M_inv = np.linalg.inv(M)[:2]
xy_keypoints = keypoints_with_scores[:, :2] * 192
xy_keypoints = cv2.transform(np.array([xy_keypoints]), M_inv)[0]
keypoints_with_scores = np.hstack((xy_keypoints, keypoints_with_scores[:, 2:]))
# Rendering
draw_keypoints(frame, keypoints_with_scores, 0.4)
cv2.imwrite("image_with_keypoints_original.png", frame)

Projecting a grayscale 2D numpy image into RGB?

I have a grayscale numpy image (shape=(1024, 1024, 1), dtype=float) that I'm trying to translate into the same image, but with the grayscale values assigned to the red channel (ie. the same image but in redscale).
Here's the original image:
Which is generated using numpy:
def create_mandelbrot_matrix(width, height, max_iter=100):
X = np.linspace(-2, 1, width)
Y = np.linspace(-1, 1, height)
#broadcast X to a square array
C = X[:, None] + 1J * Y
#initial value is always zero
Z = np.zeros_like(C)
exit_times = max_iter * np.ones(C.shape, np.int32)
mask = exit_times > 0
for k in range(max_iter):
Z[mask] = Z[mask] * Z[mask] + C[mask]
mask, old_mask = abs(Z) < 2, mask
#use XOR to detect the area which has changed
exit_times[mask ^ old_mask] = k
return exit_times.T
def mandelbrot_image(width, height, max_iter=100):
mandelbrot_matrix = create_mandelbrot_matrix(width, height, max_iter)
img = np.expand_dims(mandelbrot_matrix, axis=2)
return img
This function results in a totally different image from the original:
def mandelbrot_red_image(w, h):
mandelbrot_img = mandelbrot_image(w, h)
print(mandelbrot_img.shape) # (1024, 1024, 1)
img = np.zeros((w, h, 3))
img[:, :, 0] = mandelbrot_img_int.reshape((w, h))
return img
I dont know how your mandelbrot_image works, but image shapes are usually (h, w), due to the number of lines in a matrix being the first dimension, and the height.
Another point is that, maybe your dtype is not 'uint8', I had to do a conversion in order to the image appear properly.
This code worked for me
from cv2 import cv2
import numpy as np
img = cv2.imread('./mandelbrot.png', cv2.IMREAD_GRAYSCALE)
h, w = img.shape
color_img = np.zeros([h, w, 3])
color_img[:, :, 2] = img # In opencv images are BGR
cv2.imshow('color_mandelbrot', color_img.astype('uint8'))
cv2.waitKey(0)
cv2.destroyAllWindows()

Creating a Blank image based on the dimensions from another pictures

I have an image with this dimension (1280 x 960). To create a Blank image with this dimension, I use this:
import cv2
import numpy as np
blank_image2 = 255 * np.ones(shape=[960, 1280, 3], dtype=np.uint8)
Is it possible to create a blank image based on the dimension of another image? Something like this:
import cv2
import numpy as np
blank_image2 = 255 * np.ones(shape=image, dtype=np.uint8)
You can use the shape of the image object:
image = cv2.imread('img.jpg')
h, w, c = image.shape
blank_image2 = 255 * np.ones(shape=(h, w, c), dtype=np.uint8)
Amin is correct, I'm just sharing an alternative using 'ones_like' - similar to Mark's suggestion:
image = cv2.imread('img.jpg')
blank_image = 255 * np.ones_like(image , dtype = np.uint8)

Greyscale Image python Implementation

I try to convert a RGB image to grayscale using python as a function but the problem is I give it a RGB image that have height, width and channel but after the code I should have an image with just height and width but it gives me an image with height, width and channel why?
def RGBtoGRAY(img):
height, width, channels = img.shape
grayimg = img
for i in range(height):
for j in range(width):
grayimg[i,j] = 0.3 * image[i,j][0] + 0.59 * image[i,j][1] + 0.11 * image[i,j][2]
return grayimg
the size of the input image is
image.shape
(533, 541, 3)
the size of the output image is
grayimage.shape
(533, 541, 3)
normally I want to find in the size of the output image
(533, 541)
You should avoid using for loops when performing image processing since it is very slow. Instead you can use Numpy which is highly optimized for vector operations. Using this grayscale conversion formula:
gray = R * .299 + G * .587 + B * .114
Method #1: apply_along_axis:
import cv2
import numpy as np
def grayscale(colors):
r, g, b = colors
return 0.299 * r + 0.587 * g + 0.114 * b
# Create image of size 100x100 of random pixels
# Convert to grayscale
image = np.random.randint(255, size=(100,100,3),dtype=np.uint8)
gray = np.apply_along_axis(grayscale, 2, image)
# Display
cv2.imshow('image', image)
cv2.imshow('gray', gray)
cv2.waitKey()
Before -> After
Method #2: cv2.cvtColor
You could use OpenCV directly and read in the image as grayscale with cv2.imread by passing in the cv2.IMREAD_GRAYSCALE or 0 flag to load the image as grayscale.
image = cv2.imread('img.png', cv2.IMREAD_GRAYSCALE) # OR
# image = cv2.imread('img.png', 0)
If you already have the image loaded, you can convert the RGB or BGR image to grayscale using cv2.cvtColor
image = cv2.imread('img.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
Assuming you are using a for loop, because you intent to solve it "manually" (like C code), there are number of issues with your implementation:
The assignment grayimg = img in Python does not create a copy of img (the result is that grayimg referencing img).
You meant to use: grayimg = img.copy().
img has 3 dimensions, so when using grayimg = img, grayimg also has 3 dimensions.
You need to create grayimg with two dimensions.
Example for creating grayimg and initialize to zeros:
grayimg = np.zeros((height, width), img.dtype)
Inside the for loop, you are using image instead of img.
Here is a corrected version of RGBtoGRAY:
def RGBtoGRAY(img):
height, width, channels = img.shape
#grayimg = img
# Create height x width array with same type of img, and initialize with zeros.
grayimg = np.zeros((height, width), img.dtype)
for i in range(height):
for j in range(width):
grayimg[i,j] = 0.3 * img[i,j][0] + 0.59 * img[i,j][1] + 0.11 * img[i,j][2]
return grayimg

PIL ImageChops.screen in OpenCV

How can I write the same code from PIL in OpenCV
img3 = ImageChops.screen(im1, im2)
You can implement it with the formula used by ImageChops.screen:
out = MAX - ((MAX - image1) * (MAX - image2) / MAX) (source)
The code:
import cv2
import numpy as np
im1 = cv2.imread('im1.png').astype(np.uint16)
im2 = cv2.imread('im2.png').astype(np.uint16)
im3 = (255 - ((255 - im1) * (255 - im2) / 255)).astype(np.uint8)
cv2.imwrite('im3.png', im3)
The promotion to uint16s is necessary because of the multiplication of two uint18 values, at the end I've casted it back into uint8s because the values are guaranteed to be < 256 again.
Screen superimposes two inverted images on top of each other (source)
you can do this too (without numpy):
import cv2
# read the input images, they can be color (RGB) images too
im1 = cv2.imread('im1.jpg')
im2 = cv2.imread('im2.jpg')
# images must be of same size, if not resize one of the images
if im1.shape != im2.shape:
im2 = cv2.resize(im2, im1.shape[:2][::-1], interpolation = cv2.INTER_AREA)
# invert and normalize first image
im1 = cv2.normalize(cv2.bitwise_not(im1), None, 0, 1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
# invert and normalize second image
im2 = cv2.normalize(cv2.bitwise_not(im2), None, 0, 1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
# superimpose two images, re-normalize and invert
im = cv2.bitwise_not(cv2.normalize(cv2.multiply(im1, im2), None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U))
# write the output image
cv2.imwrite('im.jpg', im)

Categories

Resources