I have a neural network that outputs segments of a face - I'm working on a function that combines these segments together and then clones them into a real face.
Example images are here: https://imgur.com/a/HnpqhEE, I do not have the reputation to include them inline.
So far my function takes the makeup face and lips segment and combines them with addition. The left and right eyes are then cloned in with seamlessClone (the right eye is flipped first).
Then the combined makeup segments are cloned into the normal face.
Very occasionally, my combination function fails and returns a (-215:Assertion failed) 0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.cols && 0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.rows in function 'cv::Mat::Mat' error.
My function is below, I have only seen it error at the last seamlessClone
def combineFace(images, radius = 70):
# Given image segments and eye radii, combine face.
realFace = tensor2im(images['realNormal'])
makeupFace = tensor2im(images['fakeMakeupFace'])
makeupLeft = tensor2im(images['fakeMakeupLeft'])
makeupRight = tensor2im(images['fakeMakeupRight'])
makeupLips = tensor2im(images['fakeMakeupLips'])
makeupRight = cv2.flip(makeupRight, 1)
# I use cv2 and dlib to get face landmarks and interesting points.
normalLandmarks = faceLandmarks(realFace)
facePoints = getFacePoints(normalLandmarks)
# PP means pupil points
outerPoints, leftPP, rightPP, lipPoints, eyeMids = facePoints
# eyeMid is (x, y) of center of eye obtained from landmark points
leftEye = eyeMids[0]
rightEye = eyeMids[1]
faceMask = np.zeros(realFace.shape, realFace.dtype)
cv2.fillPoly(faceMask, [outerPoints], [255, 255, 255])
cv2.fillPoly(faceMask, [lipPoints], [0, 0, 0])
cv2.fillPoly(faceMask, [leftPP], [0, 0, 0])
cv2.fillPoly(faceMask, [rightPP], [0, 0, 0])
# Occasionally, the eye segments overlap eachother so I cut the right eye from the left and vice
# versa
leftMask = np.zeros(realFace.shape, realFace.dtype)
cv2.circle(leftMask, leftEye, radius, [255, 255, 255], -1)
cv2.circle(leftMask, rightEye, radius, [0, 0, 0], -1)
# Errors if i do not use UMat
cv2.circle(cv2.UMat(makeupLeft), rightEye, radius, [0, 0, 0], -1)
rightMask = np.zeros(realFace.shape, realFace.dtype)
cv2.circle(rightMask, rightEye, radius, [255, 255, 255], -1)
cv2.circle(rightMask, leftEye, radius, [0, 0, 0], -1)
cv2.circle(cv2.UMat(makeupRight), leftEye, radius, [0, 0, 0], -1)
# Combine face output and lips
baseCombine = makeupFace + makeupLips
# Left Eye
output = cv2.seamlessClone(makeupLeft, baseCombine, leftMask, leftEye, cv2.MIXED_CLONE)
output = cv2.seamlessClone(makeupRight, output, rightMask, rightEye, cv2.MIXED_CLONE)
# Get center of face
faceRect = cv2.boundingRect(outerPoints)
x, y, w, h = faceRect
output = cv2.bitwise_and(output, faceMask)
center = ( x + w // 2, y + h // 2)
# I have only seen the function error at this point
combinedFace = cv2.seamlessClone(output, realFace, faceMask, center, cv2.MIXED_CLONE)
return combinedFace
Any idea why this is occasionally erroring?
All input images have the form (256, 256, 3)
This version of the function works much better. There was something wrong with my face center calculation which was causing the error
def combineFace(images, radius = 70):
# Given image segments and eye radii, combine face.
realFace = tensor2im(images['realNormal'])
makeupFace = tensor2im(images['fakeMakeupFace'])
makeupLeft = tensor2im(images['fakeMakeupLeft'])
makeupRight = tensor2im(images['fakeMakeupRight'])
makeupLips = tensor2im(images['fakeMakeupLips'])
# Right eye is flipped before input into the network.
makeupRight = cv2.flip(makeupRight, 1)
normalLandmarks = faceLandmarks(realFace)
facePoints = getFacePoints(normalLandmarks)
outerPoints, leftPP, rightPP, lipPoints, eyeMids = facePoints
leftEye = eyeMids[0]
rightEye = eyeMids[1]
leftMask = np.zeros(makeupLeft.shape, makeupLeft.dtype)
cv2.circle(leftMask, leftEye, radius, [255, 255, 255], -1)
cv2.circle(leftMask, rightEye, radius, [0, 0, 0], -1)
# Errors if i do not use cv2.UMat
cv2.circle(cv2.UMat(makeupLeft), rightEye, radius, [0, 0, 0], -1)
rightMask = np.zeros(makeupRight.shape, makeupRight.dtype)
cv2.circle(rightMask, rightEye, radius, [255, 255, 255], -1)
cv2.circle(rightMask, leftEye, radius, [0, 0, 0], -1)
cv2.circle(cv2.UMat(makeupRight), leftEye, radius, [0, 0, 0], -1)
# Base output is combination of face without lips and pupils + lips
baseCombine = makeupFace + makeupLips
# Areas around eyes are changes
output = cv2.seamlessClone(makeupLeft, baseCombine, leftMask, leftEye, cv2.MIXED_CLONE)
output = cv2.seamlessClone(makeupRight, output, rightMask, rightEye, cv2.MIXED_CLONE)
# Find center of face
faceRect = cv2.boundingRect(outerPoints)
x, y, w, h = faceRect
if x < 0:
x = 0
if y < 0:
y = 0
faceCenter = ( x + w // 2, y + h // 2)
croppedOutput = output[y:y+h, x:x+w]
faceMask = np.zeros(realFace.shape, realFace.dtype)
cv2.fillPoly(faceMask, [outerPoints], [255, 255, 255])
cv2.fillPoly(faceMask, [lipPoints], [0, 0, 0])
cv2.fillPoly(faceMask, [leftPP], [0, 0, 0])
cv2.fillPoly(faceMask, [rightPP], [0, 0, 0])
croppedMask = faceMask[y:y+h, x:x+w]
if len(croppedOutput) == 0:
print("OUTPUT 0")
print("FACE RECT: ", faceRect)
sourceW, sourceH, sCH = realFace.shape
width, height, ch = croppedOutput.shape
faceWidth = width/2
faceHeight = height/2
xdiff = 0
ydiff = 0
cx = faceCenter[0]
cy = faceCenter[1]
if cx - faceWidth < 0:
# Face overflows left
xdiff = abs(cx - faceWidth)
if cx + faceWidth > sourceW:
xdiff = (cx + faceWidth - sourceW) * -1
if cy + faceHeight > sourceH:
ydiff = (cy + faceHeight - sourceH) * -1
if cy - faceHeight < 0:
ydiff = abs(cy - faceHeight)
centerx = int(cx + xdiff)
centery = int(cy + ydiff)
center = (centerx, centery)
# We move center, also move mask?
combinedFace = cv2.seamlessClone(croppedOutput, realFace, croppedMask, center, cv2.MIXED_CLONE)
return combinedFace
Related
I'm trying to plot a rotated xyz axis using OpenCV projectPoints function. When testing rotation about the X and Y axis, I noticed that the Z axis is much longer than the X and Y axis when they should be the same length, but I am unsure why. Any help would be greatly appreciated!
Here are some images I generated:
Here is my code:
import numpy as np
import cv2
import sys
def rotByXAxis(angle):
return np.array([
[1, 0, 0],
[0, np.cos(angle), -np.sin(angle)],
[0, np.sin(angle), np.cos(angle)],
], dtype=np.float32)
def rotByYAxis(angle):
return np.array([
[ np.cos(angle), 0, np.sin(angle)],
[ 0, 1, 0],
[-np.sin(angle), 0, np.cos(angle)],
], dtype=np.float32)
def rotByZAxis(angle):
return np.array([
[ np.cos(angle), np.sin(angle), 0],
[-np.sin(angle), np.cos(angle), 0],
[ 0, 0, 1],
], dtype=np.float32)
def createCanvas(self, height, width):
blank_image = np.zeros((height, width, 3), np.uint8)
blank_image[:, :] = (255, 255, 255)
return blank_image
def draw3DAxis(self, image, rvec, tvec, cameraMatrix, scale=1, dist=None):
"""
Draw a 6d of axis (XYZ -> RGB) in the given rotation and translation
:param image - rgb numpy array
:rvec - euler rotations, numpy array of length 3,
use cv2.Rodrigues(R)[0] to convert from rotation matrix
:tvec - 3d translation vector, in meters (dtype must be float)
:cameraMatrix - intrinsic calibration matrix , 3x3
:scale - factor to control the axis lengths
:dist - optional distortion coefficients, numpy array of length 4. If None distortion is ignored.
"""
image = image.astype(np.float32)
dist = np.zeros(4, dtype=float) if dist is None else dist
if rvec.shape == (3, 3):
rvec, _ = cv2.Rodrigues(rvec)
points = scale * np.float32([
[1, 0, 0],
[0, 1, 0],
[0, 0, 1],
[0, 0, 0]
]).reshape(-1, 3)
axis_points, _ = cv2.projectPoints(points, rvec, tvec, cameraMatrix, dist)
print(axis_points)
image = cv2.arrowedLine(
image,
tuple(int(e) for e in axis_points[3].ravel()),
tuple(int(e) for e in axis_points[0].ravel()),
(255, 0, 0),
3,
tipLength=0.01 * scale,
)
image = cv2.arrowedLine(
image,
tuple(int(e) for e in axis_points[3].ravel()),
tuple(int(e) for e in axis_points[1].ravel()),
(0, 255, 0),
3,
tipLength=0.01 * scale,
)
image = cv2.arrowedLine(
image,
tuple(int(e) for e in axis_points[3].ravel()),
tuple(int(e) for e in axis_points[2].ravel()),
(0, 0, 255),
3,
tipLength=0.01 * scale,
)
return image
if __name__ == "__main__":
height = 300
width = 400
image = createCanvas(height, width)
rvec = rotByZAxis(-pi/2)
cameraMatrix = np.array([
[1.0, 0, width/2],
[ 0, 1.0, height/2],
[ 0, 0, 1.0 ]], dtype=np.float32)
image = draw3DAxis(
image,
rvec=rvec,
tvec=np.zeros(3, dtype=float),
cameraMatrix=cameraMatrix,
scale=20,
)
cv2.imshow("output", image)
key = cv2.waitKey(0)
if key:
sys.exit(1)
I have a binary image and I'm trying to contour elements that are prolonged (not circles).
If I try to contour one element - it contours with the correct colour. But how to do it when I have a for loop? It only shows the last element with the correct colour.
contour of last element
Here is my code:
import math
for i in range(len(contours)):
ctr = contours[i]
M = cv2.moments(ctr)
cX = M['m10']/M['m00']
cY = M['m01']/M['m00']
rgb = cv2.cvtColor(img_mask, cv2.COLOR_GRAY2RGB)
cv2.drawContours(rgb, contours, i, (255, 0, 0), 2)
rot_rect = cv2.minAreaRect(ctr)
box = np.int64(cv2.boxPoints(rot_rect))
xx1=(box[0,0]+box[1,0])/2
yy1=(box[0,1]+box[1,1])/2
xx2=(box[2,0]+box[1,0])/2
yy2=(box[2,1]+box[1,1])/2
distance1 = math.sqrt( ((xx1-cX)**2)+((yy1-cY)**2) )
distance2 = math.sqrt( ((xx2-cX)**2)+((yy2-cY)**2) )
if (distance1 < 0.5*distance2) or (0.5*distance1 > distance2):
cv2.drawContours(rgb, [box], -1, (0, 255, 0), 2)
plt.imshow(rgb)
else:
cv2.drawContours(rgb, [box], -1, (0, 0, 255), 2)
plt.imshow(rgb)
This helped me. I saved relevant contours to relevant arrays (note that they must be arrays and not lists, so we can use them in the contour drawing method).
the result of contouring specific elements in image
import math
ctr1= []
ctr2= []
for i in range(len(contours)):
ctr = contours[i]
M = cv2.moments(ctr)
cX = M['m10']/M['m00']
cY = M['m01']/M['m00']
rgb = cv2.cvtColor(img_mask, cv2.COLOR_GRAY2RGB)
rot_rect = cv2.minAreaRect(ctr)
box = np.int64(cv2.boxPoints(rot_rect))
xx1=(box[0,0]+box[1,0])/2
yy1=(box[0,1]+box[1,1])/2
xx2=(box[2,0]+box[1,0])/2
yy2=(box[2,1]+box[1,1])/2
distance1 = math.sqrt( ((xx1-cX)**2)+((yy1-cY)**2) )
distance2 = math.sqrt( ((xx2-cX)**2)+((yy2-cY)**2) )
if (distance1 < 0.5*distance2) or (0.5*distance1 > distance2):
ctr1.append(np.asarray(ctr, dtype=np.int32))
else:
ctr2.append(np.asarray(ctr, dtype=np.int32))
cv2.drawContours(rgb, ctr1, -1, (255, 0, 0),5)
cv2.drawContours(rgb, ctr2, -1, (0, 255, 0),2)
Given a Numpy matrix of shape (height, width), I am looking for the fastest way to create another Numpy matrix of shape (height, width, 4) where 4 represents RGBA values. I would like to do this value-based; so, for all values of 0 in the first matrix I would like to have a value of [255, 255, 255, 0] in the second matrix at the same location.
I would like to do this with NumPy without needing to slowly iterate like below:
for i in range(0, height):
for j in range(0, width):
if image[i][j] = 0:
new_image[i][j] = [255, 255, 255, 0]
elif image[i][j] = 1:
new_image[i][j] = [0, 255, 0, 0.5]
As you can see, I am creating a matrix where the value 0 becomes transparent white, and 1 becomes green with an alpha of 0.5; are there faster NumPy solutions?
I am guessing numpy.where should greatly help speed up the process, but I haven't yet figured out the proper implementation for multiple and many value translations.
For a cleaner solutiuon, especially when working with multiple labels, we could make use of np.searchsorted to trace back the values for the mapping, like so -
# Edit to include more labels and values here
label_ar = np.array([0,1]) # sorted label array
val_ar = np.array([[255, 255, 255, 0],[0, 255, 0, 0.5]])
# Get output array
out = val_ar[np.searchsorted(label_ar, image)]
Note that this assumes that all unique labels from image are in label_ar.
So, now let's say we have two more labels 2 and 3 in image, something like this -
for i in range(0, height):
for j in range(0, width):
if image[i,j] == 0:
new_image[i,j] = [255, 255, 255, 0]
elif image[i,j] == 1:
new_image[i,j] = [0, 255, 0, 0.5]
elif image[i,j] == 2:
new_image[i,j] = [0, 255, 255, 0.5]
elif image[i,j] == 3:
new_image[i,j] = [255, 255, 255, 0.5]
We will edit the labels and values accordingly and use the same searchsorted solution -
label_ar = np.array([0,1,2,3]) # sorted label array
val_ar = np.array([
[255, 255, 255, 0],
[0, 255, 0, 0.5],
[0, 255, 255, 0.5],
[255, 255, 255, 0.5]])
You are right np.where is how you solve this problem. Where is a vectorized function so it should be much faster than your solution.
I'm making an assumption here that there is It doesn't have an elif that I'm aware of, but you can get around that by nesting where statements.
new_image = np.where(
image == 0,
[255, 255, 255, 0],
np.where(
image == 1,
[0, 255, 0, 0.5],
np.nan
)
)
I am trying to achieve perspective transformation using Python and Open CV. While the transformation is done by selecting 4 points on the image, the output image is highly blurred. Even when I don't use the mouse event for selecting the 4 points(rather hard coding it), the image quality is still blurred.Here is my programmatic attempt to this:
`def draw_circle(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDBLCLK:
cv2.circle(img, (x, y), 5, (255, 0, 0), -1)
p = (x, y)
l.append(p)
print(l)
cv2.namedWindow('image', cv2.WINDOW_NORMAL)
img = cv2.imread('Path to my input image')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
cv2.resizeWindow('image', 600, 600)
cv2.setMouseCallback('image', draw_circle)
while 1:
cv2.imshow('image', img)
if cv2.waitKey(20) & 0xFF == 27:
break
cv2.destroyAllWindows()
rows, cols, channels = img.shape
pts1 = np.float32(l)
pts2 = np.float32([[0, 0], [200, 0], [200, 100], [0, 100]])
M = cv2.getPerspectiveTransform(pts1, pts2)
dst = cv2.warpPerspective(img, M, (200, 100), cv2.INTER_LINEAR)
h1 = math.sqrt((abs(pts1[1][0] - pts1[0][0])) ** 2 + (abs(pts1[1][1] - pts1[0][1])) ** 2)
h2 = math.sqrt((abs(pts1[3][0] - pts1[2][0])) ** 2 + (abs(pts1[3][1] - pts1[2][1])) ** 2)
v1 = math.sqrt((abs(pts1[3][0] - pts1[0][0])) ** 2 + (abs(pts1[3][1] - pts1[0][1])) ** 2)
v2 = math.sqrt((abs(pts1[2][0] - pts1[1][0])) ** 2 + (abs(pts1[2][1] - pts1[1][1])) ** 2)
max_h = int(max(h1, h2))
max_v = int(max(v1, v2))
dst = cv2.resize(dst, (max_h, max_v))
plt.subplot(121), plt.imshow(img), plt.title('Input')
plt.subplot(122), plt.imshow(dst), plt.title('Output')
plt.show()`
Here is my input image: This is a fridge image with selective beverages
Here is my output image: This is the output image after perspective transform
replace in your code this line
pts2 = np.float32([[0, 0], [200, 0], [200, 100], [0, 100]])
to this one (maybe you have to switch v/h order, I don't know python syntax):
pts2 = np.float32([[0, 0], [max_h,0], [max_h,max_v], [0,max_v]])
by moving the max_h/max_v computation to before transformation computation. Then remove the resizing code.
At the moment you first (implicitly) resize to a 100x200 temporary image, which will be very blurry if you resize it to a bigger image afterwards.
I need to save a transparent image made from a numpy array. I can save the image with:
img = Image.fromarray(data, 'RGB')
But I need it to be transparent so I tried to save it with :
img = Image.fromarray(data, 'RGBA')
Then I get this error :
File "/home/pi/Documents/Projet/GetPos.py", line 51, in click
img = Image.fromarray(data, 'RGBA')
File "/usr/lib/python2.7/dist-packages/PIL/Image.py", line 2217, in
fromarray
return frombuffer(mode, size, obj, "raw", rawmode, 0, 1)
File "/usr/lib/python2.7/dist-packages/PIL/Image.py", line 2162, in
frombuffer
core.map_buffer(data, size, decoder_name, None, 0, args)
ValueError: buffer is not large enough
I made some research but everythings looks very complicated for the simple thing I'm trying to do...
Does anyone can help me on this one ?
Here's my complete code ( I'm pretty new to python :) ):
mouse = pymouse.PyMouse()
posX, posY = mouse.position()
print(mouse.position())
w, h = 1920, 1080
data = np.zeros((h, w, 3), dtype=np.uint8)
for x in range(posX-20, posX+20):
if x > 1679:
data[posY, w-1] = [255, 0, 0]
else:
data[posY, x] = [255, 0, 0]
for y in range(posY-20, posY+20):
if y > 1049:
data[h-1, posX] = [255, 0, 0]
else:
data[y, posX] = [255, 0, 0]
img = Image.fromarray(data, 'RGBA')
##img = Image.frombuffer('RGBA', [1080, 1920], data, "raw", 'RGBA', 0, 1)
img.save('my.png')
In order to save a transparant image, you need to have a fourth value per pixel called the alpha channel, which determines the opacity of your pixel. (RGBA stands for red, green, blue and alpha.) So the only thing that has to be changed in your code is essentialy providing that 4th alpha value using tuples of 4 values instead of 3 for a pixel. Setting the 4th value to 255 means it's completely visible, 0 would make it a 100% transparant. In the following example I simply set every pixel that you were drawing red completely visible, the others will be transparent:
mouse = pymouse.PyMouse()
posX, posY = mouse.position()
w, h = 1920, 1080
data = np.zeros((h, w, 4), dtype=np.uint8)
for x in range(posX-20, posX+20):
if x > 1679:
data[posY, w-1] = [255, 0, 0, 255]
else:
data[posY, x] = [255, 0, 0, 255]
for y in range(posY-20, posY+20):
if y > 1049:
data[h-1, posX] = [255, 0, 0, 255]
else:
data[y, posX] = [255, 0, 0, 255]
img = Image.fromarray(data, 'RGBA')
img.save('my.png')