Efficiently transform and blend images in OpenCV - python

I'm making a program to simulate brush strokes with a given image, and I'm held up the actual putting the brush strokes down part. Right now I'm using OpenCV and Numpy to rotate and mix images together by setting a section of the original image to be the new one. Here's the current code.
def rotate(img, deg):
"""Rotates an image a certain number of degrees"""
h, w, c = img.shape
matrix = cv2.getRotationMatrix2D((w / 2, h / 2), deg, 1)
out = cv2.warpAffine(img, matrix, (w, h), flags=cv2.INTER_LINEAR)
return out
def paste(img1, img2, mask, x, y):
"""Pastes one image on top of another at a given location"""
h1, w1, _ = img1.shape # dimensions of original image
h2, w2, _ = img2.shape # dimensions of pasted image
if y > h1 or x > w1:
return img1
stamp = img2
factor = mask
subx = ((w2 + x) - w1)
suby = ((h2 + y) - h1)
if suby <= 0:
suby = 0
if subx <= 0:
subx = 0
stamp = img2[0:h2 - suby, 0:w2 - subx]
factor = mask[0:h2 - suby, 0:w2 - subx]
snippet = img1[y:y + (h2 - suby), x:x + (w2 - subx)]
stamp = cv2.add(cv2.multiply(snippet, (1-factor)), cv2.multiply(stamp, factor))
out = img1
out[y:y + h2, x:x + w2] = stamp
return out
It works, but only barely. There are issues that come up with certain scenarios, it's very limited as to what you're allowed to do with it, and is extremely slow. Is there an easier/better way to transform and blend images with OpenCV, or would a different framework (sprite/object based, like pygame) be better for this? Thanks!

I suggest you try seamless cloning. Check this excellent tutorial from Satya Mallick: https://www.learnopencv.com/seamless-cloning-using-opencv-python-cpp/
In this case you can substitute the plane in the tutorial for your brush.

Related

How to rotate an image to get not-null pixels?

In the image I linked below, I need to get all the yellow/green pixels in this rotated rectangle and get rid of the blue background, so that the rectangle's axis are aligned with the x and y axis.
I'm using numpy but don't have a clue what I should do.
I uploaded the array in this drive in case anyone would like to work with the actual array
Thanks for the help in advance.
I used the same image as user2640045, but different approach.
import numpy as np
import cv2
# load and convert image to grayscale
img = cv2.imread('image.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# binarize image
threshold, binarized_img = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# find the largest contour
contours, hierarchy = cv2.findContours(binarized_img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
c = max(contours, key = cv2.contourArea)
# get size of the rotated rectangle
center, size, angle = cv2.minAreaRect(c)
# get size of the image
h, w, *_ = img.shape
# create a rotation matrix and rotate the image
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated_img = cv2.warpAffine(img, M, (w, h))
# crop the image
pad_x = int((w - size[0]) / 2)
pad_y = int((h - size[1]) / 2)
cropped_img = rotated_img[pad_y : pad_y + int(size[1]), pad_x : pad_x + int(size[0]), :]
Result:
I realize there is a allow_pickle=False option in numpys load method but I didn't feel comfortable with unpickling/using data from the internet so I used the small image. After removing the coordinate system and stuff I had
I define two helper methods. One to later rotate the image taken from an other stack overflow thread. See link below. And one to get a mask being one at a specified color and zero otherwise.
import numpy as np
import matplotlib.pyplot as plt
import sympy
import cv2
import functools
color = arr[150,50]
def similar_to_boundary_color(arr, color=tuple(color)):
mask = functools.reduce(np.logical_and, [np.isclose(arr[:,:,i], color[i]) for i in range(4)])
return mask
#https://stackoverflow.com/a/9042907/2640045
def rotate_image(image, angle):
image_center = tuple(np.array(image.shape[1::-1]) / 2)
rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR)
return result
Next I calculate the angle to rotate about. I do that by finding the lowest pixel at width 50 and 300. I picked those since they are far enough from the boundary to not be effected by missing corners etc..
i,j = np.where(~similar_to_boundary_color(arr))
slope = (max(i[j == 50])-max(i[j == 300]))/(50-300)
angle = np.arctan(slope)
arr = rotate_image(arr, np.rad2deg(angle))
plt.imshow(arr)
.
One way of doing the cropping is the following. You calculate the mid in height and width. Then you take two slices around the mid say 20 pixels in one direction and to until the mid in the other one. The biggest/smallest index where the pixel is white/background colored is a reasonable point to cut.
i,j = np.where(~(~similar_to_boundary_color(arr) & ~similar_to_boundary_color(arr, (0,0,0,0))))
imid, jmid = np.array(arr.shape)[:2]/2
imin = max(i[(i < imid) & (jmid - 10 < j) & (j < jmid + 10)])
imax = min(i[(i > imid) & (jmid - 10 < j) & (j < jmid + 10)])
jmax = min(j[(j > jmid) & (imid - 10 < i) & (i < imid + 10)])
jmin = max(j[(j < jmid) & (imid - 10 < i) & (i < imid + 10)])
arr = arr[imin:imax,jmin:jmax]
plt.imshow(arr)
and the result is:

Python OpenCV append matches' center x, y coordinates in tuples

I have this simple opencv template matching function written in Python.
image:
template:
def find(object, sensitivity):
screen = "tool.png"
screen_read = cv2.imread(screen)
screen_gray = cv2.cvtColor(screen_read, cv2.COLOR_BGR2GRAY)
obj = cv2.imread(object, cv2.IMREAD_GRAYSCALE)
w, h = obj.shape[::-1]
location = np.where(cv2.matchTemplate(screen_gray, obj, cv2.TM_CCORR_NORMED) >= sensitivity)
positions = []
for xy in zip(*location[::-1]):
cv2.rectangle(screen_read, xy, (xy[0] + w, xy[1] + h), (0, 0, 255), 1)
x = random(xy[0], (xy[0] + w) - 2)
y = random(xy[1], (xy[1] + h) - 2)
print(x, y)
positions.append(str(x) + ", " + str(y))
#cv2.imshow("Test", screen_read)
#cv2.waitKey(0)
find("enemylogo.png", 0.90)
It will find all the templates correctly, as shown here:
However, my goal here is to pass the center coordinate to be used in loop, outside the function. For this, I need to store the x, y coordinates in an array (positions), as tuples.
However, I'm not getting desired results, it's adding too many tuples instead of only 2.
What I'm trying to do is:
for x in find("enemylogo.png", 0.90):
click(x) #x would be the coordinate of every template matched.
Could someone help me, please?
The line location = np.where.... will give you a lot of matches, and many of them will be right next to each other. Another technique is to recursively use minMaxLoc. This function will only give you the best result. But if you overwrite the best match with zeros on the first pass through, the second pass will find another match.
import cv2
import numpy as np
def find_templates(obj, sensitivity):
image = cv2.imread('tool.png', cv2.IMREAD_COLOR )
template = cv2.imread(obj, cv2.IMREAD_COLOR)
h, w = template.shape[:2]
print('h', h, 'w', w)
method = cv2.TM_CCORR_NORMED
threshold = 0.90
res = cv2.matchTemplate(image, template, method)
res_h, res_w = res.shape[:2]
# fake out max_val for first run through loop
max_val = 1
centers = []
while max_val > sensitivity:
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
if max_val > sensitivity:
centers.append( (max_loc[0] + w//2, max_loc[1] + h//2) )
x1 = max(max_loc[0] - w//2, 0)
y1 = max(max_loc[1] - h//2, 0)
x2 = min(max_loc[0] + w//2, res_w)
y2 = min(max_loc[1] + h//2, res_h)
res[y1:y2, x1:x2] = 0
image = cv2.rectangle(image,(max_loc[0],max_loc[1]), (max_loc[0]+w+1, max_loc[1]+h+1), (0,255,0) )
print(centers)
cv2.imwrite('output.png', image)
find_templates("enemy_logo.png", 0.90)
which gives
[(52, 52), (169, 52)]

Image-Processing: Converting normal pictures into FishEye images with intrinsic matrix

I need to synthesize many FishEye images with different intrinsic matrices based on normal pictures. I am following the method mentioned in this paper.
Ideally, if the algorithm is correct, the ideal fish eye effect should look like this:
.
But when I used my algorithm to convert a picture
it looks like this
So below is my code's flow:
1. First, I read the raw image with cv2
def read_img(image):
img = ndimage.imread(image) #this would return a 4-d array: [R,G,B,255]
img_shape = img.shape
print(img_shape)
#get the pixel coordinate
w = img_shape[1] #the width
# print(w)
h= img_shape[0] #the height
# print(h)
uv_coord = []
for u in range(w):
for v in range(h):
uv_coord.append([float(u),float(v)]) #this records the coord in the fashion of [x1,y1],[x1, y2], [x1, y3]....
return np.array(uv_coord)
Then, based on the paper:
r(θ) = k1θ + k2θ^3 + k3θ^5 + k4θ^7, (1)
where Ks are the distorted coefficients
Given pixel coordinates (x,y) in the pinhole projection image, the corresponding image coordinates (x',y')in the fisheye can be computed as:
x'=r(θ) cos(ϕ), y' = r(θ) sin(ϕ), (2)
where ϕ = arctan((y − y0)/(x − x0)), and (x0, y0) are the coordinates of the principal point in the pinhole projection image.
And then the image coordinates (x',y') is converted into pixel coordinates (xf,yf): (xf, yf):
*xf = mu * x' + u0, yf = mv * y' + v0,* (3)
where (u0, v0) are the coordinates of the principle points in the fisheye, and mu, mv denote the number of pixels per unit distance in the horizontal and vertica directions. So I am guessing there are just from the intrinsic matrix [fx, fy] and u0 v0 are the [cx, cy].
def add_distortion(sourceUV, dmatrix,Kmatrix):
'''This function is programmed to remove the pixel of the given original image coords
input arguments:
dmatrix -- the intrinsic matrix [k1,k2,k3,k4] for tweaking purposes
Kmatrix -- [fx, fy, cx, cy, s]'''
u = sourceUV[:,0] #width in x
v = sourceUV[:,1] #height in y
rho = np.sqrt(u**2 + v**2)
#get theta
theta = np.arctan(rho,np.full_like(u,1))
# rho_mat = np.array([rho, rho**3, rho**5, rho**7])
rho_mat = np.array([theta,theta**3, theta**5, theta**7])
#get the: rho(theta) = k1*theta + k2*theta**3 + k3*theta**5 + k4*theta**7
rho_d = dmatrix#rho_mat
#get phi
phi = np.arctan2((v - Kmatrix[3]), (u - Kmatrix[2]))
xd = rho_d * np.cos(phi)
yd = rho_d * np.sin(phi)
#converting the coords from image plane back to pixel coords
ud = Kmatrix[0] * (xd + Kmatrix[4] * yd) + Kmatrix[2]
vd = Kmatrix[1] * yd + Kmatrix[3]
return np.column_stack((ud,vd))
Then after gaining the distorded coordinates, I perform moving pixels in this way, where I think the problem might be:
def main():
image_name = "original.png"
img = cv2.imread(image_name)
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) #the cv2 read the image as BGR
w = img.shape[1]
h = img.shape[0]
uv_coord = read_img(image_name)
#for adding distortion
dmatrix = [-0.391942708316175,0.012746418822063 ,-0.001374061848026 ,0.005349692659231]
#the Intrinsic matrix of the original picture's
Kmatrix = np.array([9.842439e+02,9.808141e+02 , 1392/2, 2.331966e+02, 0.000000e+00])
# Kmatrix = np.array([2234.23470710156 ,2223.78349134123, 947.511596277837, 647.103139639432,-3.20443253476976]) #the distorted intrinsics
uv = add_distortion(uv_coord,dmatrix,Kmatrix)
i = 0
dstimg = np.zeros_like(img)
for x in range(w): #tthe coo
for y in range(h):
if i > (512 * 1392 -1):
break
xu = uv[i][0] #x, y1, y2, y3
yu = uv[i][1]
i +=1
# if new pixel is in bounds copy from source pixel to destination pixel
if 0 <= xu and xu < img.shape[1] and 0 <= yu and yu < img.shape[0]:
dstimg[int(yu)][int(xu)] = img[int(y)][int(x)]
img = Image.fromarray(dstimg, 'RGB')
img.save('my.png')
img.show()
However, this code does not perform in the way I want. Could you guys please help me with debugging it? I spent 3 days but I still could not see any problem with it. Thanks!!

Wrap image around a circle

What I'm trying to do in this example is wrap an image around a circle, like below.
To wrap the image I simply calculated the x,y coordinates using trig.
The problem is the calculated X and Y positions are rounded to make them integers. This causes the blank pixels in seen the wrapped image above. The x,y positions have to be an integer because they are positions in lists.
I've done this again in the code following but without any images to make things easier to see. All I've done is create two arrays with binary values, one array is black the other white, then wrapped one onto the other.
The output of the code is.
import math as m
from PIL import Image # only used for showing output as image
width = 254.0
height = 24.0
Ro = 40.0
img = [[1 for x in range(int(width))] for y in range(int(height))]
cir = [[0 for x in range(int(Ro * 2))] for y in range(int(Ro * 2))]
def shom_im(img): # for showing data as image
list_image = [item for sublist in img for item in sublist]
new_image = Image.new("1", (len(img[0]), len(img)))
new_image.putdata(list_image)
new_image.show()
increment = m.radians(360 / width)
rad = Ro - 0.5
for i, row in enumerate(img):
hyp = rad - i
for j, column in enumerate(row):
alpha = j * increment
x = m.cos(alpha) * hyp + rad
y = m.sin(alpha) * hyp + rad
# put value from original image to its position in new image
cir[int(round(y))][int(round(x))] = img[i][j]
shom_im(cir)
I later found out about the Midpoint Circle Algorithm but I had worse result with that
from PIL import Image # only used for showing output as image
width, height = 254, 24
ro = 40
img = [[(0, 0, 0, 1) for x in range(int(width))]
for y in range(int(height))]
cir = [[(0, 0, 0, 255) for x in range(int(ro * 2))] for y in range(int(ro * 2))]
def shom_im(img): # for showing data as image
list_image = [item for sublist in img for item in sublist]
new_image = Image.new("RGBA", (len(img[0]), len(img)))
new_image.putdata(list_image)
new_image.show()
def putpixel(x0, y0):
global cir
cir[y0][x0] = (255, 255, 255, 255)
def drawcircle(x0, y0, radius):
x = radius
y = 0
err = 0
while (x >= y):
putpixel(x0 + x, y0 + y)
putpixel(x0 + y, y0 + x)
putpixel(x0 - y, y0 + x)
putpixel(x0 - x, y0 + y)
putpixel(x0 - x, y0 - y)
putpixel(x0 - y, y0 - x)
putpixel(x0 + y, y0 - x)
putpixel(x0 + x, y0 - y)
y += 1
err += 1 + 2 * y
if (2 * (err - x) + 1 > 0):
x -= 1
err += 1 - 2 * x
for i, row in enumerate(img):
rad = ro - i
drawcircle(int(ro - 1), int(ro - 1), rad)
shom_im(cir)
Can anybody suggest a way to eliminate the blank pixels?
You are having problems filling up your circle because you are approaching this from the wrong way – quite literally.
When mapping from a source to a target, you need to fill your target, and map each translated pixel from this into the source image. Then, there is no chance at all you miss a pixel, and, equally, you will never draw (nor lookup) a pixel more than once.
The following is a bit rough-and-ready, it only serves as a concept example. I first wrote some code to draw a filled circle, top to bottom. Then I added some more code to remove the center part (and added a variable Ri, for "inner radius"). This leads to a solid ring, where all pixels are only drawn once: top to bottom, left to right.
How you exactly draw the ring is not actually important! I used trig at first because I thought of re-using the angle bit, but it can be done with Pythagorus' as well, and even with Bresenham's circle routine. All you need to keep in mind is that you iterate over the target rows and columns, not the source. This provides actual x,y coordinates that you can feed into the remapping procedure.
With the above done and working, I wrote the trig functions to translate from the coordinates I would put a pixel at into the original image. For this, I created a test image containing some text:
and a good thing that was, too, as in the first attempt I got the text twice (once left, once right) and mirrored – that needed a few minor tweaks. Also note the background grid. I added that to check if the 'top' and 'bottom' lines – the outermost and innermost circles – got drawn correctly.
Running my code with this image and Ro,Ri at 100 and 50, I get this result:
You can see that the trig functions make it start at the rightmost point, move clockwise, and have the top of the image pointing outwards. All can be trivially adjusted, but this way it mimics the orientation that you want your image drawn.
This is the result with your iris-image, using 33 for the inner radius:
and here is a nice animation, showing the stability of the mapping:
Finally, then, my code is:
import math as m
from PIL import Image
Ro = 100.0
Ri = 50.0
# img = [[1 for x in range(int(width))] for y in range(int(height))]
cir = [[0 for x in range(int(Ro * 2))] for y in range(int(Ro * 2))]
# image = Image.open('0vWEI.png')
image = Image.open('this-is-a-test.png')
# data = image.convert('RGB')
pixels = image.load()
width, height = image.size
def shom_im(img): # for showing data as image
list_image = [item for sublist in img for item in sublist]
new_image = Image.new("RGB", (len(img[0]), len(img)))
new_image.putdata(list_image)
new_image.save("result1.png","PNG")
new_image.show()
for i in range(int(Ro)):
# outer_radius = Ro*m.cos(m.asin(i/Ro))
outer_radius = m.sqrt(Ro*Ro - i*i)
for j in range(-int(outer_radius),int(outer_radius)):
if i < Ri:
# inner_radius = Ri*m.cos(m.asin(i/Ri))
inner_radius = m.sqrt(Ri*Ri - i*i)
else:
inner_radius = -1
if j < -inner_radius or j > inner_radius:
# this is the destination
# solid:
# cir[int(Ro-i)][int(Ro+j)] = (255,255,255)
# cir[int(Ro+i)][int(Ro+j)] = (255,255,255)
# textured:
x = Ro+j
y = Ro-i
# calculate source
angle = m.atan2(y-Ro,x-Ro)/2
distance = m.sqrt((y-Ro)*(y-Ro) + (x-Ro)*(x-Ro))
distance = m.floor((distance-Ri+1)*(height-1)/(Ro-Ri))
# if distance >= height:
# distance = height-1
cir[int(y)][int(x)] = pixels[int(width*angle/m.pi) % width, height-distance-1]
y = Ro+i
# calculate source
angle = m.atan2(y-Ro,x-Ro)/2
distance = m.sqrt((y-Ro)*(y-Ro) + (x-Ro)*(x-Ro))
distance = m.floor((distance-Ri+1)*(height-1)/(Ro-Ri))
# if distance >= height:
# distance = height-1
cir[int(y)][int(x)] = pixels[int(width*angle/m.pi) % width, height-distance-1]
shom_im(cir)
The commented-out lines draw a solid white ring. Note the various tweaks here and there to get the best result. For instance, the distance is measured from the center of the ring, and so returns a low value for close to the center and the largest values for the outside of the circle. Mapping that directly back onto the target image would display the text with its top "inwards", pointing to the inner hole. So I inverted this mapping with height - distance - 1, where the -1 is to make it map from 0 to height again.
A similar fix is in the calculation of distance itself; without the tweaks Ri+1 and height-1 either the innermost or the outermost row would not get drawn, indicating that the calculation is just one pixel off (which was exactly the purpose of that grid).
I think what you need is a noise filter. There are many implementations from which I think Gaussian filter would give a good result. You can find a list of filters here. If it gets blurred too much:
keep your first calculated image
calculate filtered image
copy fixed pixels from filtered image to first calculated image
Here is a crude average filter written by hand:
cir_R = int(Ro*2) # outer circle 2*r
inner_r = int(Ro - 0.5 - len(img)) # inner circle r
for i in range(1, cir_R-1):
for j in range(1, cir_R-1):
if cir[i][j] == 0: # missing pixel
dx = int(i-Ro)
dy = int(j-Ro)
pix_r2 = dx*dx + dy*dy # distance to center
if pix_r2 <= Ro*Ro and pix_r2 >= inner_r*inner_r:
cir[i][j] = (cir[i-1][j] + cir[i+1][j] + cir[i][j-1] +
cir[i][j+1])/4
shom_im(cir)
and the result:
This basically scans between two ranges checks for missing pixels and replaces them with average of 4 pixels adjacent to it. In this black white case it is all white.
Hope it helps!

OpenCV Python: Reading and setting every pixel too slow

I'm trying to track motion with OpenCV in Python. If a pixel doesn't match the color of the last frame then it gets set to black, otherwise it is set to white if it's static. I got this working pretty good, but if I try to do this with every pixel one by one, performance takes a big hit and it runs too slow.
My code:
import numpy as np
import cv2
def distance(b1, g1, r1, b2, g2, r2):
return abs(b2 - b1) + abs(g2 - g1) + abs(r2 - r1)
pixelStep = 1
lastFrame = None
thresh = 100
cap = cv2.VideoCapture(0)
while(True):
flag, frame = cap.read()
frameInst = frame.copy()
height = np.size(frame, 0)
width = np.size(frame, 1)
if lastFrame != None:
for x in range(0, height, pixelStep):
for y in range(0, width, pixelStep):
b1 = lastFrame.item(x, y, 0)
g1 = lastFrame.item(x, y, 1)
r1 = lastFrame.item(x, y, 2)
b2 = frame.item(x, y, 0)
g2 = frame.item(x, y, 1)
r2 = frame.item(x, y, 2)
dist = distance(b2, g2, r2, b1, g1, r1)
colorValue = 255
if dist > thresh:
colorValue = 0 # Change to black if there's another change from pixel
frame.itemset(x, y, 0, colorValue)
frame.itemset(x, y, 1, colorValue)
frame.itemset(x, y, 2, colorValue)
lastFrame = frameInst
cv2.imshow('frame', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
If I change pixelStep to 3 it'll run fast and feel right. Am I doing this right or do I need to approach this in a different way?
As a rule of thumb, any python program which works on every pixel will be too slow. Opencv has huge number of functions -- use them! In your case, you can use chain four functions:
'absdiff' (http://docs.opencv.org/modules/core/doc/operations_on_arrays.html#absdiff) to get image with differences only
'split' to get three separate arrays for R, G, B differences
'add' to add them all together
'Threshold' (http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html#threshold) to make a single array which shows differences
If you then want to find the areas where differences are, you can use blob detector to give you a list of areas.

Categories

Resources