Image rotation and translation using an aruco marker - python

I am trying to warp an image based of the orientation of the camera relative to an aruco marker in the middle of the image. I have managed to get the translation part working but the rotation element is not working. It seems like the image isn't rotating about the centre of the aruco axis. The reference image was taken straight on and the warped image is overlayed.
# Find centre of the marker
top_left_x = (corners[0][0][0, 0])
top_left_y = (corners[0][0][0, 1])
top_right_x = (corners[0][0][1, 0])
top_right_y = (corners[0][0][1, 1])
bottom_right_x = (corners[0][0][2, 0])
bottom_right_y = (corners[0][0][2, 1])
bottom_left_x = (corners[0][0][3, 0])
bottom_left_y = (corners[0][0][3, 1])
# Compare this to the centre of the image to calculate the offset
mid_x = top_right_x - (top_right_x - top_left_x) / 2
mid_y = bottom_left_y - (bottom_left_y - top_left_y) / 2
x_centre = 960
y_centre = 540
x_offset = x_centre - mid_x
y_offset = y_centre - mid_y
if x_centre > mid_x: # gone right
x_offset = 1 * (x_centre - mid_x) # correction to the left
if x_centre < mid_x: # gone left
x_offset = -1 * (mid_x - x_centre) # correction to the right
if y_centre > mid_y: # gone down
y_offset = 1 * (y_centre - mid_y) # correction to the left
if y_centre < mid_y: # gone left
y_offset = -1 * (mid_y - y_centre) # correction to the right
current_z_distance = (math.sqrt((pos_camera[0]**2) + (pos_camera[1]**2) +
(pos_camera[2]**2))) * 15.4
img = cv2.imread('Corrected.png')
corrected_z = 31 # Distance when image was taken
initial_z_distance = corrected_z * 15.4 # Pixels
delta_z = (initial_z_distance - current_z_distance)
scale_factor = current_z_distance / initial_z_distance # how much larger the image
now is. Used for scaling
z_translation = delta_z * 1.54 # how much the image has moved. negative for going
backwards
z_translation = 0
z_axis = 960 / scale_factor
proj2dto3d = np.array([[1, 0, -mid_x],
[0, 1, -mid_y],
[0, 0, 0],
[0, 0, 1]], np.float32)
proj3dto2d = np.array([[z_axis, 0, mid_x, 0],
[0, z_axis, mid_y, 0], # defines to centre of rotation
[0, 0, 1, 0]], np.float32)
trans = np.array([[1, 0, 0, x_offset * -1], # Working
[0, 1, 0, y_offset * -1],
[0, 0, 1, 960], # keep as 960
[0, 0, 0, 1]], np.float32)
x = math.degrees(roll_marker) * -1 # forwards and backwards
y = math.degrees(pitch_marker) * -1 # Left and right
z = 0
rx = np.array([[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]], np.float32) #
ry = np.array([[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]], np.float32)
rz = np.array([[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]], np.float32)
ax = float(x * (math.pi / 180.0)) # 0
ay = float(y * (math.pi / 180.0))
az = float(z * (math.pi / 180.0)) # 0
rx[1, 1] = math.cos(ax) # 0
rx[1, 2] = -math.sin(ax) # 0
rx[2, 1] = math.sin(ax) # 0
rx[2, 2] = math.cos(ax) # 0
ry[0, 0] = math.cos(ay)
ry[0, 2] = -math.sin(ay)
ry[2, 0] = math.sin(ay)
ry[2, 2] = math.cos(ay)
rz[0, 0] = math.cos(az) # 0
rz[0, 1] = -math.sin(az) # 0
rz[1, 0] = math.sin(az) # 0
rz[1, 1] = math.cos(az) # 0
# Translation matrix
# r = rx.dot(ry) # if we remove the lines we put r=ry
r = rx.dot(ry) # order may need to be changed
final = proj3dto2d.dot(trans.dot(r.dot(proj2dto3d))) # just rotation
dst = cv2.warpPerspective(img, final, (img.shape[1], img.shape[0]), None, cv2.INTER_LINEAR, cv2.BORDER_CONSTANT, (255, 255, 255))

Related

Python - How to output mp4 for 'x' amount of seconds?

I want to add some code that will allow me to output a mp4. I've tried adding; import moviepy.editor as moviepy, import os with no avail. I'm super new and i'm not sure on the right question to ask. Do I have to use import cv2, or what i think is; pygame.Surface.save() function and save the frames then convernt them in ffmpeg? I want to generate a 30 second to 1 min clip directly converted to a mp4.
import pygame
import math
import os
from matrix import matrix_multiplication
from bresenham import bresenham
from numpy import interp
os.environ['SDL_VIDEO_CENTERED'] = '1'
background, bright = (13,13,13), (60,180,120)
width, height = 800, 800
pygame.init()
pygame.display.set_caption('ASCII TESSERACT')
screen = pygame.display.set_mode((width, height))
clock = pygame.time.Clock()
fps = 60
#ASCII FONTS
lineFont = pygame.font.SysFont('scheherazade', 20, bold=False)
cornerFont = pygame.font.SysFont('nachlieliclm', 24, bold=True)
#Tesseract information
angle = 0
cube_position = [width//2, height//2]
scale = 2800
speed = 0.005
points = [n for n in range(16)]
points[0] = [[-1], [-1], [1], [1]]
points[1] = [[1], [-1], [1], [1]]
points\[2\] = \[\[1\], \[1\], \[1\], \[1\]\]
points\[3\] = \[\[-1\], \[1\], \[1\], \[1\]\]
points\[4\] = \[\[-1\], \[-1\], \[-1\], \[1\]\]
points\[5\] = \[\[1\], \[-1\], \[-1\], \[1\]\]
points\[6\] = \[\[1\], \[1\], \[-1\], \[1\]\]
points\[7\] = \[\[-1\], \[1\], \[-1\], \[1\]\]
points\[8\] = \[\[-1\], \[-1\], \[1\], \[-1\]\]
points\[9\] = \[\[1\], \[-1\], \[1\], \[-1\]\]
points\[10\] = \[\[1\], \[1\], \[1\], \[-1\]\]
points\[11\] = \[\[-1\], \[1\], \[1\], \[-1\]\]
points\[12\] = \[\[-1\], \[-1\], \[-1\], \[-1\]\]
points\[13\] = \[\[1\], \[-1\], \[-1\], \[-1\]\]
points\[14\] = \[\[1\], \[1\], \[-1\], \[-1\]\]
points\[15\] = \[\[-1\], \[1\], \[-1\], \[-1\]\]
def connect_point(i, j, k, offset, lineChar = ':', skip = 7):
a = k\[i + offset\]
b = k\[j + offset\]
line = bresenham(a\[0\], a\[1\], b\[0\], b\[1\])
s = skip
for point in line:
s -= 1
if s == 0:
# display ASCII character
text_display(lineChar, point[0], point[1])
if s < 0:
s = skip
def text_display(letter, x_pos, y_pos):
text = lineFont.render(str(letter), True, bright)
screen.blit(text, (x_pos, y_pos))
def corner_display(x, y, z, w, interpolateColor = True, fontSizeInterpolate = True):
\# Interpolate z and brightness
if interpolateColor:
interpolatedColor = (interp(z, \[0.1, 0.27\], \[background\[0\], bright\[0\]\]), interp(z, \[0.1, 0.27\], \[background\[1\], bright\[1\]\]), interp(z, \[0.1, 0.27\], \[background\[2\], bright\[2\]\]))
else:
interpolatedColor = bright
# Interpolate w and font size
if fontSizeInterpolate:
fontSize = round(int(interp(w, [0.1, 0.27], [50, 76])))
cornerFont = pygame.font.SysFont('nachlieliclm', fontSize, bold=True)
text = cornerFont.render('.', True, interpolatedColor)
screen.blit(text, (x, y-fontSize / 2))
# Pygame loop
run = True
while run:
clock.tick(fps)
screen.fill(background)
for event in pygame.event.get():
if event.type == pygame.QUIT:
run = False
if event.type == pygame.KEYUP:
if event.key == pygame.K_ESCAPE:
run = False
index = 0
projected_points = [j for j in range(len(points))]
# 3D matrix rotations
rotation_x = [[1, 0, 0],
[0, math.cos(angle), -math.sin(angle)],
[0, math.sin(angle), math.cos(angle)]]
rotation_y = [[math.cos(angle), 0, -math.sin(angle)],
[0, 1, 0],
[math.sin(angle), 0, math.cos(angle)]]
rotation_z = [[math.cos(angle), -math.sin(angle), 0],
[math.sin(angle), math.cos(angle), 0],
[0, 0 ,1]]
tesseract_rotation = [[1, 0, 0],
[0, math.cos(-math.pi/2), -math.sin(-math.pi/2)],
[0, math.sin(-math.pi/2), math.cos(-math.pi/2)]]
# 4D matrix rotations
rotation4d_xy= [[math.cos(angle), -math.sin(angle), 0, 0],
[math.sin(angle), math.cos(angle), 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]]
rotation4d_xz = [[math.cos(angle), 0, -math.sin(angle), 0],
[0, 1, 0, 0],
[math.sin(angle), 0, math.cos(angle), 0],
[0, 0, 0, 1]]
rotation4d_xw = [[math.cos(angle), 0, 0, -math.sin(angle)],
[0, 1, 0, 0],
[0, 0, 1, 0],
[math.sin(angle), 0, 0, math.cos(angle)]]
rotation4d_yz = [[1, 0, 0, 0],
[0, math.cos(angle), -math.sin(angle), 0],
[0, math.sin(angle), math.cos(angle), 0],
[0, 0, 0, 1]]
rotation4d_yw = [[1, 0, 0, 0],
[0, math.cos(angle), 0, -math.sin(angle)],
[0, 0, 1, 0],
[0, math.sin(angle), 0, math.cos(angle)]]
rotation4d_zw = [[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, math.cos(angle), -math.sin(angle)],
[0, 0, math.sin(angle), math.cos(angle)]]
for point in points:
rotated_3d = matrix_multiplication(rotation4d_xy, point)
rotated_3d = matrix_multiplication(rotation4d_zw, rotated_3d)
distance = 5
w = 1/(distance - rotated_3d[3][0])
projection_matrix4 = [
[w, 0, 0, 0],
[0, w, 0, 0],
[0, 0, w, 0],]
projected_3d = matrix_multiplication(projection_matrix4, rotated_3d)
rotated_2d = matrix_multiplication(tesseract_rotation, projected_3d)
z = 1/(distance - (rotated_2d[2][0] + rotated_3d[3][0]))
projection_matrix = [[z, 0, 0],
[0, z, 0]]
rotated_2d = matrix_multiplication(rotation_x, projected_3d)
projected_2d = matrix_multiplication(projection_matrix, rotated_2d)
x = int(projected_2d[0][0] * scale) + cube_position[0]
y = int(projected_2d[1][0] * scale) + cube_position[1]
projected_points[index] = [x, y, z, w]
corner_display(x, y, z, w)
index += 1
#draw edges
for m in range(4):
connect_point(m, (m+1)%4, projected_points, 8)
connect_point(m+4, (m+1)%4 + 4, projected_points, 8)
connect_point(m, m+4, projected_points, 8)
for m in range(4):
connect_point(m, (m+1)%4, projected_points, 0)
connect_point(m+4, (m+1)%4 + 4, projected_points, 0)
connect_point(m, m+4, projected_points, 0)
for m in range(8):
connect_point(m, m+8, projected_points, 0)
angle += speed
pygame.display.update()
pygame.quit()
You are correct, you cannot save pygame directly into mp4 and you must use pygame.Surface.save() to save each frame into a .jpg, make it into an array, then make it into an mp4. You can change how often it saves the frame by updating the framecount.
import moviepy.editor import *
import moviepy.editor as mp
import os
import glob
import shutil
#Put all your pygame code in here
window = pygame.display.set_mode( ( WINDOW_WIDTH, WINDOW_HEIGHT ) )
while not exiting:
# handle events
# paint the screen
# save the frame
frame_count += 1
filename = "screen_%04d.png" % (frame_count)
pygame.image.save(window, filename) #this saves it in the file
clock.tick( WINDOW_FPS )
src_dir = "your/current/dir" #This is where pygame saves the images
image_dir = "your/destination/dir" #This is where you want the dir to be
for jpgfile in glob.iglob(os.path.join(src_dir, "*.jpg")):
shutil.copy(jpgfile, dst_dir)
#Makes the array of images into imgarr
for filename in os.listdir(image_dir):
if filename.endswith(".jpg") or filename.endswith(".png"):
imgarr.append(os.path.join(image_dir, filename))
slides = []
for n, url in enumerate(imgarr): #All Images in imagedir
slides.append(mp.ImageClip(url).set_fps(1).set_duration(1))
video.mp.concatenate_videoclips(slides)
videoclip.write_videofile("video_1.mp4")

Pygame won't draw quads in the right order

I have been trying to make a 3d engine with python for some time, and I have gotten pretty far, however I have found a problem in when I try and sort items in a list, the sorting flips when you are close enough to the cube.
main.py:
import os
os.environ["SDL_VIDEO_CENTERED"] = '1'
os.environ["PYGAME_HIDE_SUPPORT_PROMPT"] = '1'
import pygame
from pygame import gfxdraw
import math
from matrix import matrix_multiplication
import mesh
from random import randint as random
import time
startTime = time.time()
black, white, blue = (20, 20, 20), (230, 230, 230), (0, 154, 255)
width, height = 700, 700
pygame.init()
pygame.display.set_caption("3D Engine")
screen = pygame.display.set_mode((width, height))
clock = pygame.time.Clock()
frames = 60
outline = False
rs, gs, bs = [random(0, 255) for i in range(len(mesh.faces))], [random(0, 255) for i in
range(len(mesh.faces))], [random(0, 255) for i in range(len(mesh.faces))]
angle_x = 0
angle_y = 0
angle_z = 0
pos_x = 0
pos_y = 0
pos_z = 0
cube_position = [width//2, height//2]
scale = 600
speed = 0.001
points = [[[i] for i in j] for j in mesh.verts]
movSpeed = 0.001
font = pygame.font.SysFont("Corbel", 23)
def avarageX(i):
return (new_points[mesh.faces[i][0]][0][0] + new_points[mesh.faces[i][1]][0][0] + new_points[mesh.faces[i][2]][0][0] + new_points[mesh.faces[i][3]][0][0]) / 4
def avarageY(i):
return (new_points[mesh.faces[i][0]][1][0] + new_points[mesh.faces[i][1]][1][0] + new_points[mesh.faces[i][2]][1][0] + new_points[mesh.faces[i][3]][1][0]) / 4
def avarageZ(i):
return (new_points[mesh.faces[i][0]][2][0] + new_points[mesh.faces[i][1]][2][0] + new_points[mesh.faces[i][2]][2][0] + new_points[mesh.faces[i][3]][2][0]) / 4
def distToCam(i):
a = [0, 0, 0]
b = [avarageX(i), avarageY(i), avarageZ(i)]
return math.dist(a, b)
print("It took: {} seconds".format(time.time() - startTime))
run = True
while run:
dt = clock.tick(frames)
fps = clock.get_fps()
screen.fill(white)
keys = pygame.key.get_pressed()
for event in pygame.event.get():
if event.type == pygame.QUIT:
run = False
index = 0
projected_points = [j for j in range(len(points))]
rotation_x = [[1, 0, 0],
[0, math.cos(angle_x), -math.sin(angle_x)],
[0, math.sin(angle_x), math.cos(angle_x)]]
rotation_y = [[math.cos(angle_y), 0, -math.sin(angle_y)],
[0, 1, 0],
[math.sin(angle_y), 0, math.cos(angle_y)]]
rotation_z = [[math.cos(angle_z), -math.sin(angle_z), 0],
[math.sin(angle_z), math.cos(angle_z), 0],
[0, 0, 1]]
new_points = []
for point in points:
rotated_2d = matrix_multiplication(rotation_y, point)
rotated_2d = matrix_multiplication(rotation_x, rotated_2d)
rotated_2d = matrix_multiplication(rotation_z, rotated_2d)
new_point = [[rotated_2d[0][0] + pos_x], [rotated_2d[1][0] + pos_y], [rotated_2d[2][0] - pos_z]]
new_points.append(new_point)
distance = 5
z = 1 / (distance - new_point[2][0])
projection_matrix = [[z, 0, 0],
[0, z, 0]]
projected_2d = matrix_multiplication(projection_matrix, new_point)
x = int(projected_2d[0][0] * scale) + cube_position[0]
y = int(projected_2d[1][0] * scale) + cube_position[1]
projected_points[index] = [x, y]
index += 1
zs = [[distToCam(i), i] for i in range(len(mesh.faces))]
zs.sort(reverse=True)
faces = [[mesh.faces[zs[i][1]], zs[i][1]] for i in range(len(mesh.faces))]
fi = 0
for f in faces:
gfxdraw.filled_polygon(screen, [projected_points[f[0][0]], projected_points[f[0][1]], projected_points[f[0][2]], projected_points[f[0][3]]], (rs[zs[fi][1]], gs[zs[fi][1]], bs[zs[fi][1]]))
gfxdraw.aapolygon(screen, [projected_points[f[0][0]], projected_points[f[0][1]], projected_points[f[0][2]], projected_points[f[0][3]]], (rs[zs[fi][1]], gs[zs[fi][1]], bs[zs[fi][1]]))
fi += 1
angle_x += (keys[pygame.K_DOWN] - keys[pygame.K_UP]) * speed * dt
angle_y += (keys[pygame.K_RIGHT] - keys[pygame.K_LEFT]) * speed * dt
pos_x += (keys[pygame.K_d] - keys[pygame.K_a]) * movSpeed * dt
pos_z += (keys[pygame.K_w] - keys[pygame.K_s]) * movSpeed * dt
text = font.render(str(round(fps, 1)), False, black)
screen.blit(text, (0, 0))
pygame.display.update()
pygame.quit()
The matrix multiplication
matrix.py:
def matrix_multiplication(a, b):
columns_a = len(a[0])
rows_a = len(a)
columns_b = len(b[0])
rows_b = len(b)
result_matrix = [[j for j in range(columns_b)] for i in range(rows_a)]
if columns_a == rows_b:
for x in range(rows_a):
for y in range(columns_b):
sum = 0
for k in range(columns_a):
sum += a[x][k] * b[k][y]
result_matrix[x][y] = sum
return result_matrix
else:
print("columns of the first matrix must be equal to the rows of the second matrix")
return None
The mesh data.
mesh.py:
verts = [
[1, 1, 1],
[1, 1, -1],
[1, -1, 1],
[1, -1, -1],
[-1, 1, 1],
[-1, 1, -1],
[-1, -1, 1],
[-1, -1, -1]
]
faces = [
[0, 4, 6, 2],
[3, 2, 6, 7],
[7, 6, 4, 5],
[5, 1, 3, 7],
[1, 0, 2, 3],
[5, 4, 0, 1]
]
WARNING: There might be flashing lights on startup
You have to compute the distance of the camera position ([0, 0, distance]) to the points in world space (new_points), instead of the points in model space (points):
def distToCam(i):
a = [0, 0, distance]
b = [sum(new_points[mesh.faces[i][pi]][j][0] for pi in range(4)) / 4 for j in range(3)]
return math.dist(a, b)

Replace in array of zeros with other values in certain cells_updated question

I need to solve a problem in which I have spent hours, with the data from my excel sheet I have created a 6x36 '' zeros '' matrix of zeros and a 6x6 '' matrix_tran '' coordinate transformation matrix [image 1].
My problem is that I can't find a way to replace the zeros of the '' zeros '' matrix with the values that the matrix '' matrix_tran '' dictates, and whose location must be in the columns (4,5,6, 7,8,9) that are given by the connection vector (4,5,6,7,8,9) of element 15 of the Excel sheet, that is, the last row of the for loop iteration [image 2].
In summary: Below I show how it fits and how it should look [image 3 and 4 respectively].
I would very much appreciate your help, and excuse my English, but it is not my native language, a big greeting.
import pandas as pd
import numpy as np
ex = pd.ExcelFile('matrix_tr.xlsx')
hoja = ex.parse('Hoja1')
cols = 36
for n in range(0,len(hoja)):
A = hoja['ELEMENT #'][n]
B = hoja['1(i)'][n]
C = hoja['2(i)'][n]
D = hoja['3(i)'][n]
E = hoja['1(j)'][n]
F = hoja['2(j)'][n]
G = hoja['3(j)'][n]
H = hoja['X(i)'][n]
I = hoja['Y(i)'][n]
J = hoja['X(j)'][n]
K = hoja['Y(j)'][n]
L = np.sqrt((J-H)**2+(K-I)**2)
lx = (J-H)/L
ly = (K-I)/L
zeros = np.zeros((6, cols))
counters = hoja.loc[:, ["1(i)", "2(i)", "3(i)", "1(j)", "2(j)", "3(j)"]]
for _, i1, i2, i3, j1, j2, j3 in counters.itertuples():
matrix_tran = np.array([[lx, ly, 0, 0, 0, 0],
[-ly, lx, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0],
[0, 0, 0, lx, ly, 0],
[0, 0, 0, -ly, lx, 0],
[0, 0, 0, 0, 0, 1]])
zeros[:, [i1 - 1, i2 - 1, i3 - 1, j1 - 1, j2 - 1 , j3 - 1]] = matrix_tran
Try with a transposed zeros matrix
import pandas as pd
import numpy as np
ex = pd.ExcelFile('c:/tmp/SO/matrix_tr.xlsx')
hoja = ex.parse('Hoja1')
counters = hoja.loc[:, ["1(i)", "2(i)", "3(i)", "1(j)", "2(j)", "3(j)"]]
# zeros matrix transposed
cols = 36
zeros_trans = np.zeros((cols,6))
# last row only
for n in range(14,len(hoja)):
Xi = hoja['X(i)'][n]
Yi = hoja['Y(i)'][n]
Xj = hoja['X(j)'][n]
Yj = hoja['Y(j)'][n]
X = Xj-Xi
Y = Yj-Yi
L = np.sqrt(X**2+Y**2)
lx = X/L
ly = Y/L
matrix_tran = np.array([[lx, ly, 0, 0, 0, 0],
[-ly, lx, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0],
[0, 0, 0, lx, ly, 0],
[0, 0, 0, -ly, lx, 0],
[0, 0, 0, 0, 0, 1]])
i = 0
for r in counters.iloc[n]:
zeros_trans[r-1] = matrix_tran[i]
i += 1
print(np.transpose(zeros_trans))

TypeError: unsupported operand type(s) for //: 'NoneType' and 'int'

In ROI pooling, you have a region of interest(ROI) and you want to pool it to a specific size.
ROI gif
I try to implement the same thing like in the gif.
The whole image is
image_pl = tf.placeholder(dtype=tf.float32) # the whole region
the region of interest is
x = 0
y = 3
w = 7
h = -1
roi = tf.slice(image_pl, [x, y, 0], [w, h, -1]) # region of interest
Now I try to get the 4 parts, because every region of interest, no matter of their size, I want to crop to the size 2x2
w_roi = roi.get_shape()[0].value
h_roi = roi.get_shape()[1].value
roi_part1 = tf.slice(roi, [0, 0, 0], [w_roi // 2, h_roi // 2, -1]) # fisrt part from the gif
roi_part2 = tf.slice(roi, [w_roi - w_roi // 2, 0, 0], [w_roi, h_roi // 2, -1]) # second part from the gif
roi_part3 = tf.slice(roi, [0, h_roi - h_roi // 2, 0], [w_roi // 2, h_roi, -1])
roi_part4 = tf.slice(roi, [w_roi - w_roi // 2, h_roi - h_roi // 2, 0], [w_roi, h_roi, -1])
But here I get the error.
TypeError: unsupported operand type(s) for //: 'NoneType' and 'int'
Here is the whole code.
How can I implement the idea of that gif?
import numpy as np
import tensorflow as tf
image_pl = tf.placeholder(dtype=tf.float32) # the whole region
x = 0
y = 3
w = 7
h = -1
roi = tf.slice(image_pl, [x, y, 0], [w, h, -1]) # region of interest
w_roi = roi.get_shape()[0].value
h_roi = roi.get_shape()[1].value
roi_part1 = tf.slice(roi, [0, 0, 0], [w_roi // 2, h_roi // 2, -1]) # fisrt part from the gif
roi_part2 = tf.slice(roi, [w_roi - w_roi // 2, 0, 0], [w_roi, h_roi // 2, -1]) # second part from the gif
roi_part3 = tf.slice(roi, [0, h_roi - h_roi // 2, 0], [w_roi // 2, h_roi, -1])
roi_part4 = tf.slice(roi, [w_roi - w_roi // 2, h_roi - h_roi // 2, 0], [w_roi, h_roi, -1])
output1 = tf.reduce_max(roi_part1) # maximum of the region 1 in the region of interest
output2 = tf.reduce_max(roi_part2)
output3 = tf.reduce_max(roi_part3)
output4 = tf.reduce_max(roi_part4)
output = tf.concat([output1, output2, output3, output4], 0)
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
sess.run(tf.global_variables_initializer())
image = np.random.uniform(0, 1, (8, 8, 1))
curr_image, curr_roi, curr_output = sess.run([image_pl, roi, output], feed_dict={image_pl: image})

Working on multidimensional arrays

I'm trying to scale the colors of images to predefined ranges. Based on least-squared error from palette's range of colors, a color is assigned to output pixel.
I have written the code in python loops is there a better vectorized way to do this?
import numpy as np
import skimage.io as io
palette = [
[180, 0 , 0],
[255, 150, 0],
[255, 200, 0],
[0, 128, 0]
]
IMG = io.imread('lena.jpg')[:,:,:3]
DIM = IMG.shape
IOUT = np.empty(DIM)
for x in range(DIM[0]):
for y in range(DIM[1]):
P = ((np.array(palette)-IMG[x,y,:])**2).sum(axis=1).argmin()
IOUT[x,y,:] = palette[P]
Can the loops be avoided and solved using numpy operations itself?
Don't loop over all pixels, but over all colors:
import pylab as pl
palette = pl.array([[180, 0, 0], [255, 150, 0], [255, 200, 0], [0, 128, 0]])
img = pl.imread('lena.jpg')[:, :, :3].astype('float')
R, G, B = img[:, :, 0].copy(), img[:, :, 1].copy(), img[:, :, 2].copy()
dist = pl.inf * R
for i in range(len(palette)):
new_dist = pl.square(img[:, :, 0] - palette[i, 0]) \
+ pl.square(img[:, :, 1] - palette[i, 1]) \
+ pl.square(img[:, :, 2] - palette[i, 2])
R[new_dist < dist] = palette[i, 0]
G[new_dist < dist] = palette[i, 1]
B[new_dist < dist] = palette[i, 2]
dist = pl.minimum(dist, new_dist)
pl.clf()
pl.subplot(1, 2, 1)
pl.imshow(img.astype('uint8'))
pl.subplot(1, 2, 2)
pl.imshow(pl.dstack((R, G, B)))
Edit: The loop-less alternative. ;)
import pylab as pl
palette = pl.array([[180, 0 , 0], [255, 150, 0], [255, 200, 0], [0, 128, 0]])
img = pl.imread('lena.jpg')[:, :, :3]
pl.clf()
pl.subplot(1, 2, 1)
pl.imshow(img)
IMG = img.reshape((512, 512, 3, 1))
PAL = palette.transpose().reshape((1, 1, 3, -1))
idx = pl.argmin(pl.sum((IMG - PAL)**2, axis=2), axis=2)
img = palette[idx, :]
pl.subplot(1, 2, 2)
pl.imshow(img)

Categories

Resources