Related
I am trying to warp an image based of the orientation of the camera relative to an aruco marker in the middle of the image. I have managed to get the translation part working but the rotation element is not working. It seems like the image isn't rotating about the centre of the aruco axis. The reference image was taken straight on and the warped image is overlayed.
# Find centre of the marker
top_left_x = (corners[0][0][0, 0])
top_left_y = (corners[0][0][0, 1])
top_right_x = (corners[0][0][1, 0])
top_right_y = (corners[0][0][1, 1])
bottom_right_x = (corners[0][0][2, 0])
bottom_right_y = (corners[0][0][2, 1])
bottom_left_x = (corners[0][0][3, 0])
bottom_left_y = (corners[0][0][3, 1])
# Compare this to the centre of the image to calculate the offset
mid_x = top_right_x - (top_right_x - top_left_x) / 2
mid_y = bottom_left_y - (bottom_left_y - top_left_y) / 2
x_centre = 960
y_centre = 540
x_offset = x_centre - mid_x
y_offset = y_centre - mid_y
if x_centre > mid_x: # gone right
x_offset = 1 * (x_centre - mid_x) # correction to the left
if x_centre < mid_x: # gone left
x_offset = -1 * (mid_x - x_centre) # correction to the right
if y_centre > mid_y: # gone down
y_offset = 1 * (y_centre - mid_y) # correction to the left
if y_centre < mid_y: # gone left
y_offset = -1 * (mid_y - y_centre) # correction to the right
current_z_distance = (math.sqrt((pos_camera[0]**2) + (pos_camera[1]**2) +
(pos_camera[2]**2))) * 15.4
img = cv2.imread('Corrected.png')
corrected_z = 31 # Distance when image was taken
initial_z_distance = corrected_z * 15.4 # Pixels
delta_z = (initial_z_distance - current_z_distance)
scale_factor = current_z_distance / initial_z_distance # how much larger the image
now is. Used for scaling
z_translation = delta_z * 1.54 # how much the image has moved. negative for going
backwards
z_translation = 0
z_axis = 960 / scale_factor
proj2dto3d = np.array([[1, 0, -mid_x],
[0, 1, -mid_y],
[0, 0, 0],
[0, 0, 1]], np.float32)
proj3dto2d = np.array([[z_axis, 0, mid_x, 0],
[0, z_axis, mid_y, 0], # defines to centre of rotation
[0, 0, 1, 0]], np.float32)
trans = np.array([[1, 0, 0, x_offset * -1], # Working
[0, 1, 0, y_offset * -1],
[0, 0, 1, 960], # keep as 960
[0, 0, 0, 1]], np.float32)
x = math.degrees(roll_marker) * -1 # forwards and backwards
y = math.degrees(pitch_marker) * -1 # Left and right
z = 0
rx = np.array([[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]], np.float32) #
ry = np.array([[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]], np.float32)
rz = np.array([[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]], np.float32)
ax = float(x * (math.pi / 180.0)) # 0
ay = float(y * (math.pi / 180.0))
az = float(z * (math.pi / 180.0)) # 0
rx[1, 1] = math.cos(ax) # 0
rx[1, 2] = -math.sin(ax) # 0
rx[2, 1] = math.sin(ax) # 0
rx[2, 2] = math.cos(ax) # 0
ry[0, 0] = math.cos(ay)
ry[0, 2] = -math.sin(ay)
ry[2, 0] = math.sin(ay)
ry[2, 2] = math.cos(ay)
rz[0, 0] = math.cos(az) # 0
rz[0, 1] = -math.sin(az) # 0
rz[1, 0] = math.sin(az) # 0
rz[1, 1] = math.cos(az) # 0
# Translation matrix
# r = rx.dot(ry) # if we remove the lines we put r=ry
r = rx.dot(ry) # order may need to be changed
final = proj3dto2d.dot(trans.dot(r.dot(proj2dto3d))) # just rotation
dst = cv2.warpPerspective(img, final, (img.shape[1], img.shape[0]), None, cv2.INTER_LINEAR, cv2.BORDER_CONSTANT, (255, 255, 255))
bilinear_interpolation(image,y,x) which takes 2d matrix image and x,y coordinates of the pixel in the image as like they are in the image
y - to the height of the image x - to the width of the image (x,y)
and returns the pixel ( from 0 to 255 ) from the calculation
notice: the x,y in the calculation aren't the same x,y that we take as parameters in the function
It can be assumed that the function receives a valid image with a single color channel
examples of the input and output:
bilinear_interpolation([[0, 64], [128, 255]], 0, 0) → 0
bilinear_interpolation([[0, 64], [128, 255]], 1, 1) → 255
bilinear_interpolation([[0, 64], [128, 255]], 0.5, 0.5) → 112
bilinear_interpolation([[0, 64], [128, 255]], 0.5, 1.5) → 160
assert bilinear_interpolation([[0, 64], [128, 255]], 0.5, 1) == 160
assert bilinear_interpolation([[0, 64], [128, 255]], 0.5, 1.5) == 160
assert bilinear_interpolation([[0, 64], [128, 255]], 0, 1) == 64
assert bilinear_interpolation([[0, 64], [128, 255]], 0, 0) == 0
assert bilinear_interpolation([[0, 64], [128, 255]], 1, 1) == 255
assert bilinear_interpolation([[0, 64], [128, 255]], 0.5, 0.5) == 112
assert bilinear_interpolation([[255, 255], [255, 255]], 0.5, 1.5) == 255
assert bilinear_interpolation([[255, 255], [255, 255]], 0, 1) == 255
assert bilinear_interpolation([[255, 255], [255, 255]], 1.5, 1.5) == 255
So what I tried is like this:
def bilinear_interpolation(image, y, x):
xa = math.floor(x)
ya = math.floor(y)
if(xa >= len(image[0]) and ya >= len(image)):
xa = len(image[0]) - 1
ya = len(image) - 1
a = image[ya][xa]
elif (xa >= len(image[0]) ):
xa = len(image[0]) - 1
a = image[ya][xa]
elif (ya >= len(image)):
ya = len(image) - 1
a = image[ya][xa]
else:
a = image[ya][xa]
if(ya + 1 >= len(image)):
b = image[ya][xa]
else:
b = image[ya + 1][xa]
if (xa + 1 >= len(image[0])):
c = image[ya][xa]
else:
c = image[ya][xa + 1]
if(xa + 1 >= len(image[0]) and ya + 1 >= len(image)):
d = image[ya][xa]
elif (xa + 1 >= len(image[0]) ):
d = image[ya + 1][xa]
elif (ya+1 >= len(image)):
d = image[ya][xa + 1]
else:
d = image[ya + 1][xa + 1]
dx = x - math.floor(x)
dy = y - math.floor(y)
interpolation_factor = a *( 1 - dx)*(1 - dy) + b * dy * (1 - dx) + c * dx * (1 - dy) + d * dx * dy
return round(interpolation_factor)
but its still failing for me.. I always get list index out of range in huge matrix like picture with size 460 x 460
any direction?
I have been trying to make a 3d engine with python for some time, and I have gotten pretty far, however I have found a problem in when I try and sort items in a list, the sorting flips when you are close enough to the cube.
main.py:
import os
os.environ["SDL_VIDEO_CENTERED"] = '1'
os.environ["PYGAME_HIDE_SUPPORT_PROMPT"] = '1'
import pygame
from pygame import gfxdraw
import math
from matrix import matrix_multiplication
import mesh
from random import randint as random
import time
startTime = time.time()
black, white, blue = (20, 20, 20), (230, 230, 230), (0, 154, 255)
width, height = 700, 700
pygame.init()
pygame.display.set_caption("3D Engine")
screen = pygame.display.set_mode((width, height))
clock = pygame.time.Clock()
frames = 60
outline = False
rs, gs, bs = [random(0, 255) for i in range(len(mesh.faces))], [random(0, 255) for i in
range(len(mesh.faces))], [random(0, 255) for i in range(len(mesh.faces))]
angle_x = 0
angle_y = 0
angle_z = 0
pos_x = 0
pos_y = 0
pos_z = 0
cube_position = [width//2, height//2]
scale = 600
speed = 0.001
points = [[[i] for i in j] for j in mesh.verts]
movSpeed = 0.001
font = pygame.font.SysFont("Corbel", 23)
def avarageX(i):
return (new_points[mesh.faces[i][0]][0][0] + new_points[mesh.faces[i][1]][0][0] + new_points[mesh.faces[i][2]][0][0] + new_points[mesh.faces[i][3]][0][0]) / 4
def avarageY(i):
return (new_points[mesh.faces[i][0]][1][0] + new_points[mesh.faces[i][1]][1][0] + new_points[mesh.faces[i][2]][1][0] + new_points[mesh.faces[i][3]][1][0]) / 4
def avarageZ(i):
return (new_points[mesh.faces[i][0]][2][0] + new_points[mesh.faces[i][1]][2][0] + new_points[mesh.faces[i][2]][2][0] + new_points[mesh.faces[i][3]][2][0]) / 4
def distToCam(i):
a = [0, 0, 0]
b = [avarageX(i), avarageY(i), avarageZ(i)]
return math.dist(a, b)
print("It took: {} seconds".format(time.time() - startTime))
run = True
while run:
dt = clock.tick(frames)
fps = clock.get_fps()
screen.fill(white)
keys = pygame.key.get_pressed()
for event in pygame.event.get():
if event.type == pygame.QUIT:
run = False
index = 0
projected_points = [j for j in range(len(points))]
rotation_x = [[1, 0, 0],
[0, math.cos(angle_x), -math.sin(angle_x)],
[0, math.sin(angle_x), math.cos(angle_x)]]
rotation_y = [[math.cos(angle_y), 0, -math.sin(angle_y)],
[0, 1, 0],
[math.sin(angle_y), 0, math.cos(angle_y)]]
rotation_z = [[math.cos(angle_z), -math.sin(angle_z), 0],
[math.sin(angle_z), math.cos(angle_z), 0],
[0, 0, 1]]
new_points = []
for point in points:
rotated_2d = matrix_multiplication(rotation_y, point)
rotated_2d = matrix_multiplication(rotation_x, rotated_2d)
rotated_2d = matrix_multiplication(rotation_z, rotated_2d)
new_point = [[rotated_2d[0][0] + pos_x], [rotated_2d[1][0] + pos_y], [rotated_2d[2][0] - pos_z]]
new_points.append(new_point)
distance = 5
z = 1 / (distance - new_point[2][0])
projection_matrix = [[z, 0, 0],
[0, z, 0]]
projected_2d = matrix_multiplication(projection_matrix, new_point)
x = int(projected_2d[0][0] * scale) + cube_position[0]
y = int(projected_2d[1][0] * scale) + cube_position[1]
projected_points[index] = [x, y]
index += 1
zs = [[distToCam(i), i] for i in range(len(mesh.faces))]
zs.sort(reverse=True)
faces = [[mesh.faces[zs[i][1]], zs[i][1]] for i in range(len(mesh.faces))]
fi = 0
for f in faces:
gfxdraw.filled_polygon(screen, [projected_points[f[0][0]], projected_points[f[0][1]], projected_points[f[0][2]], projected_points[f[0][3]]], (rs[zs[fi][1]], gs[zs[fi][1]], bs[zs[fi][1]]))
gfxdraw.aapolygon(screen, [projected_points[f[0][0]], projected_points[f[0][1]], projected_points[f[0][2]], projected_points[f[0][3]]], (rs[zs[fi][1]], gs[zs[fi][1]], bs[zs[fi][1]]))
fi += 1
angle_x += (keys[pygame.K_DOWN] - keys[pygame.K_UP]) * speed * dt
angle_y += (keys[pygame.K_RIGHT] - keys[pygame.K_LEFT]) * speed * dt
pos_x += (keys[pygame.K_d] - keys[pygame.K_a]) * movSpeed * dt
pos_z += (keys[pygame.K_w] - keys[pygame.K_s]) * movSpeed * dt
text = font.render(str(round(fps, 1)), False, black)
screen.blit(text, (0, 0))
pygame.display.update()
pygame.quit()
The matrix multiplication
matrix.py:
def matrix_multiplication(a, b):
columns_a = len(a[0])
rows_a = len(a)
columns_b = len(b[0])
rows_b = len(b)
result_matrix = [[j for j in range(columns_b)] for i in range(rows_a)]
if columns_a == rows_b:
for x in range(rows_a):
for y in range(columns_b):
sum = 0
for k in range(columns_a):
sum += a[x][k] * b[k][y]
result_matrix[x][y] = sum
return result_matrix
else:
print("columns of the first matrix must be equal to the rows of the second matrix")
return None
The mesh data.
mesh.py:
verts = [
[1, 1, 1],
[1, 1, -1],
[1, -1, 1],
[1, -1, -1],
[-1, 1, 1],
[-1, 1, -1],
[-1, -1, 1],
[-1, -1, -1]
]
faces = [
[0, 4, 6, 2],
[3, 2, 6, 7],
[7, 6, 4, 5],
[5, 1, 3, 7],
[1, 0, 2, 3],
[5, 4, 0, 1]
]
WARNING: There might be flashing lights on startup
You have to compute the distance of the camera position ([0, 0, distance]) to the points in world space (new_points), instead of the points in model space (points):
def distToCam(i):
a = [0, 0, distance]
b = [sum(new_points[mesh.faces[i][pi]][j][0] for pi in range(4)) / 4 for j in range(3)]
return math.dist(a, b)
I have an image which is read as a uint8 array with the shape (512,512,3).
Now I would like to convert this array to a uint8 array of shape (512,512,1), where each pixel value in the third axis are converted from a color value [255,0,0] to a single class label value [3], based on the following color/class encoding:
1 : [0, 0, 0],
2 : [0, 0, 255],
3 : [255, 0, 0],
4 : [150, 30, 150],
5 : [255, 65, 255],
6 : [150, 80, 0],
7 : [170, 120, 65],
8 : [125, 125, 125],
9 : [255, 255, 0],
10 : [0, 255, 255],
11 : [255, 150, 0],
12 : [255, 225, 120],
13 : [255, 125, 125],
14 : [200, 100, 100],
15 : [0, 255, 0],
16 : [0, 150, 80],
17 : [215, 175, 125],
18 : [220, 180, 210],
19 : [125, 125, 255]
What is the most efficient way to do this? I thought of looping through all classes and using numpy.where, but this is obviously time-consuming.
You could use giant lookup table. Let cls be [[0,0,0], [0,0,255], ...] of dtype=np.uint8.
LUT = np.zeros(size=(256,256,256), dtype='u1')
LUT[cls[:,0],cls[:,1],cls[:,2]] = np.arange(cls.shape[1])+1
img_as_cls = LUT[img[...,0],img[...,1], img[...,2]]
This solution is O(1) per pixel. It is also quite cache efficient because a small part of entries in LUT are actually used. It takes circa 10ms to process 1000x1000 image on my machine.
The solution can be slightly improved by converting 3-color channels to 24-bit integers.
Here is the code
def scalarize(x):
# compute x[...,2]*65536+x[...,1]*256+x[...,0] in efficient way
y = x[...,2].astype('u4')
y <<= 8
y +=x[...,1]
y <<= 8
y += x[...,0]
return y
LUT = np.zeros(2**24, dtype='u1')
LUT[scalarize(cls)] = 1 + np.arange(cls.shape[0])
simg = scalarize(img)
img_to_cls = LUT[simg]
After optimization it takes about 5ms to process 1000x1000 image.
One way: separately create the boolean arrays with True values where the input's pixel value matches one of the palette values, and then use arithmetic to combine them. Thus:
palette = [
[0, 0, 0],
[0, 0, 255],
[255, 0, 0],
# etc.
]
def palettized(data, palette):
# Initialize result array
shape = list(data.shape)
shape[-1] = 1
result = np.zeros(shape)
# Loop and add each palette index component.
for value, colour in enumerate(palette, 1):
result += (data == colour).all(axis=2) * value
return result
Here's one based on views -
# https://stackoverflow.com/a/45313353/ #Divakar
def view1D(a, b): # a, b are arrays
# This function gets 1D view into 2D input arrays
a = np.ascontiguousarray(a)
b = np.ascontiguousarray(b)
void_dt = np.dtype((np.void, a.dtype.itemsize * a.shape[-1]))
return a.view(void_dt).ravel(), b.view(void_dt).ravel()
def img2label(a, maps):
# Get one-dimension reduced view into input image and map arrays.
# We need to reshape image to 2D, then feed it to view1D to get 1D
# outputs and then reshape 1D image to 2D
A,B = view1D(a.reshape(-1,a.shape[-1]),maps)
A = A.reshape(a.shape[:2])
# Trace back positions of A in B using searchsorted. This gives us
# original order, which is the final output.
sidx = B.argsort()
return sidx[np.searchsorted(B,A,sorter=sidx)]
Given that your labels start from 1, you might want to add 1 to the output.
Sample run -
In [100]: # Mapping array
...: maps = np.array([[0, 0, 0],[0, 0, 255],\
...: [255, 0, 0],[150, 30, 150]],dtype=np.uint8)
...:
...: # Setup random image array
...: idx = np.array([[0,2,1,3],[1,3,2,0]])
...: img = maps[idx]
In [101]: img2label(img, maps) # should retrieve back idx
Out[101]:
array([[0, 2, 1, 3],
[1, 3, 2, 0]])
I'm trying to scale the colors of images to predefined ranges. Based on least-squared error from palette's range of colors, a color is assigned to output pixel.
I have written the code in python loops is there a better vectorized way to do this?
import numpy as np
import skimage.io as io
palette = [
[180, 0 , 0],
[255, 150, 0],
[255, 200, 0],
[0, 128, 0]
]
IMG = io.imread('lena.jpg')[:,:,:3]
DIM = IMG.shape
IOUT = np.empty(DIM)
for x in range(DIM[0]):
for y in range(DIM[1]):
P = ((np.array(palette)-IMG[x,y,:])**2).sum(axis=1).argmin()
IOUT[x,y,:] = palette[P]
Can the loops be avoided and solved using numpy operations itself?
Don't loop over all pixels, but over all colors:
import pylab as pl
palette = pl.array([[180, 0, 0], [255, 150, 0], [255, 200, 0], [0, 128, 0]])
img = pl.imread('lena.jpg')[:, :, :3].astype('float')
R, G, B = img[:, :, 0].copy(), img[:, :, 1].copy(), img[:, :, 2].copy()
dist = pl.inf * R
for i in range(len(palette)):
new_dist = pl.square(img[:, :, 0] - palette[i, 0]) \
+ pl.square(img[:, :, 1] - palette[i, 1]) \
+ pl.square(img[:, :, 2] - palette[i, 2])
R[new_dist < dist] = palette[i, 0]
G[new_dist < dist] = palette[i, 1]
B[new_dist < dist] = palette[i, 2]
dist = pl.minimum(dist, new_dist)
pl.clf()
pl.subplot(1, 2, 1)
pl.imshow(img.astype('uint8'))
pl.subplot(1, 2, 2)
pl.imshow(pl.dstack((R, G, B)))
Edit: The loop-less alternative. ;)
import pylab as pl
palette = pl.array([[180, 0 , 0], [255, 150, 0], [255, 200, 0], [0, 128, 0]])
img = pl.imread('lena.jpg')[:, :, :3]
pl.clf()
pl.subplot(1, 2, 1)
pl.imshow(img)
IMG = img.reshape((512, 512, 3, 1))
PAL = palette.transpose().reshape((1, 1, 3, -1))
idx = pl.argmin(pl.sum((IMG - PAL)**2, axis=2), axis=2)
img = palette[idx, :]
pl.subplot(1, 2, 2)
pl.imshow(img)