I have some problems with combining image channels in one rgb image. I use skimage and numpy for solving my problems. On input we have photo like this http://rghost.ru/8gYDcq2T6. With numpy.array slicing, I slice image in 3 parts by its height, then I slice edges of image (5% of height and width). Now I am ready to calculate mean squared error for two parts (part1 and part2, then for part1 and part3) for best image combining. I shift one part relatively to another on 15 pixels (left, right, up, down) and take minimum of all shifts, then I knew two shifts (x,y coordinates) for part1 and part2 and part1 and part3, then I usenumpy.dstack for combining channels, and it's all. But quality in end is not good, so what's my mistake? I think problems with combining images in end using dstack, but cant solve it, coz dont understand, how to do it. There is my code:
from skimage import data, io
from numpy import *
def metrics(first, second, x, y):
reshaped_second = roll(second, x,0)
reshaped_second = roll(reshaped_second, y, 1)
reshaped_first = first
mse = (((reshaped_first - reshaped_second) ** 2).sum())/(reshaped_first.size)
return (mse, ncc, x, y)
def align(path):
image = data.imread(path)
size = image.shape
part1 = image[0 : size[0]/3, : ]
part2 = image[size[0]/3 : 2*size[0]/3 , :]
part3 = image[2*size[0]/3 : size[0], :]
new_size = [min(part1.shape[0], part2.shape[0], part3.shape[0]), min(part1.shape[1], part2.shape[1], part3.shape[1])]
part1 = part1[new_size[0]/100*5 : new_size[0] - new_size[0]/100*5, new_size[1]/100*5 : new_size[1] - new_size[1]/100*5]
part2 = part2[new_size[0]/100*5 : new_size[0] - new_size[0]/100*5, new_size[1]/100*5 : new_size[1] - new_size[1]/100*5]
part3 = part3[new_size[0]/100*5 : new_size[0] - new_size[0]/100*5, new_size[1]/100*5 : new_size[1] - new_size[1]/100*5]
min_mse = 1000000000
xx_1 = None
yy_1 = None
for x in range(-15, 16):
for y in range(-15, 16):
mse = metrics(part1, part2,x,y)
if mse[0] <= min_mse:
xx_1 = mse[2]
yy_1 = mse[3]
min_mse = mse[0]
min_mse = 1000000000
xx_2 = None
yy_2 = None
for x in range(-15, 16):
for y in range(-15, 16):
mse = metrics(part1, part3,x,y)
if mse[0] <= min_mse:
xx_2 = mse[2]
yy_2 = mse[3]
min_mse = mse[0]
part2 = roll(part2, xx_1, 0) # numpy.roll()
part2 = roll(part2, yy_1, 1)
part3 = roll(part3, xx_2, 0)
part3 = roll(part3, yy_2, 1)
photo = dstack((part3,part2,part1))
io.imshow(photo)
io.show(
After execution of program I get such photo: http://rghost.ru/6fqqmFCnM, its first test image, on others worse. But want like better quality
What can I do? Thank you for help.
PROBLEM FOUND: problem was with type of reshaped_first and reshaped_second in metrics function, they were 'uint8', and when I calculated reshaped_first - reshaped_second it was undefined behavior and I get unrepresentative metric. So 5/6 test photos have good combining, but last its about 15-20 pixels error. So new question is about what metrics I should choose for this problem? I tried normalized cross-correlation, but it's worse than mean squared error, that I use now
Related
Here I have some code that can vertically and horizontally shift images so that a specific feature can align (credits to https://stackoverflow.com/a/24769222/15016884):
def cross_image(im1, im2):
im1_gray = np.sum(im1.astype('float'), axis=2)
im2_gray = np.sum(im2.astype('float'), axis=2)
im1_gray -= np.mean(im1_gray)
im2_gray -= np.mean(im2_gray)
return signal.fftconvolve(im1_gray, im2_gray[::-1,::-1], mode='same')
corr_img_null = cross_image(cloud1,cloud1)
corr_img = cross_image(cloud1,cloud2)
y0, x0 = np.unravel_index(np.argmax(corr_img_null), corr_img_null.shape)
y, x = np.unravel_index(np.argmax(corr_img), corr_img.shape)
ver_shift = y0-y
hor_shift = x0-x
print('horizontally shifted', hor_shift)
print('vertically shifted', ver_shift)
#defining the bounds of the part of the images I'm actually analyzing
xstart = 100
xstop = 310
ystart = 50
ystop = 200
crop_cloud1 = cloud1[ystart:ystop, xstart:xstop]
crop_cloud2 = cloud2[ystart:ystop, xstart:xstop]
crop_cloud2_shift = cloud2[ystart+ver_shift:ystop+ver_shift, xstart+hor_shift:xstop+hor_shift]
plot_pos = plt.figure(5)
plt.title('image 1')
plt.imshow(crop_cloud1)
plot_pos = plt.figure(6)
plt.title('image 2')
plt.imshow(crop_cloud2)
plot_pos = plt.figure(7)
plt.title('Shifted image 2 to align with image 1')
plt.imshow(crop_cloud2_shift)
Here are the results:
Now, I want to work with the example shown below, where rotations in addition to translations will be needed to align the features in my image.
Here is my code for that: The idea is to convolve each possible configuration of image 2 for every angle from -45 to 45 (for my application, this angle is not likely to be exceeded) and find at which coordinates and rotation angle the convolution is maximized.
import cv2
def rotate(img, theta):
(rows, cols) = img.shape[:2]
M = cv2.getRotationMatrix2D((cols / 2, rows / 2), theta, 1)
res = cv2.warpAffine(img, M, (cols, rows))
return res
#testing all rotations of image 2
corr_bucket = []
for i in range(-45,45):
rot_img = rotate(bolt2,i)
corr_img = cross_image(bolt1,rot_img)
corr_bucket.append(corr_img)
corr_arr = np.asarray(corr_bucket)
corr_img_null = cross_image(bolt1,bolt1)
y0, x0 = np.unravel_index(np.argmax(corr_img_null), corr_img_null.shape)
r_index, y1, x1 = np.unravel_index(np.argmax(corr_arr), corr_arr.shape)
r = -45+r_index
ver_shift = y0-y
hor_shift = x0-x
ver_shift_r = y0-y1
hor_shift_r = x0-x1
#What parts of the image do you want to analyze
xstart = 200
xstop = 300
ystart = 100
ystop = 200
crop_bolt1 = bolt1[ystart:ystop, xstart:xstop]
crop_bolt2 = bolt2[ystart:ystop, xstart:xstop]
rot_bolt2 = rotate(bolt2,r)
shift_rot_bolt2 = rot_bolt2[ystart+ver_shift_r:ystop+ver_shift_r, xstart+hor_shift_r:xstop+hor_shift_r]
plot_1 = plt.figure(9)
plt.title('image 1')
plt.imshow(crop_bolt1)
plot_2 = plt.figure(10)
plt.title('image 2')
plt.imshow(crop_bolt2)
plot_3 = plt.figure(11)
plt.title('Shifted and rotated image 2 to align with image 1')
plt.imshow(shift_rot_bolt2)
Unfortunately, from the very last line, I get the error ValueError: zero-size array to reduction operation minimum which has no identity. I'm kind of new to python so I don't really know what this means or why my approach isn't working. I have a feeling that my error is somewhere in unraveling corr_arr because the x, y and r values it returns I can already see, just by estimating, would not make the lightning bolts align. Any advice?
The issue came from feeding in the entire rotated image into scipy.signal.fftconvolve. Crop a part of image2 after rotating to use as a "probe image" (crop your unrotated image 1 in the same way), and the code I have written in my question works fine.
I'm trying to implement Reinhard's method to use the color distribution of a target image to color normalize a passed in image for a research project. I've gotten the code to work and it outputs correctly but it's pretty slow. It takes about 20 minutes to iterate through 300 images. I'm pretty sure the bottleneck is how I'm handling applying the function to each image. I'm currently iterating through each pixel of the image and applying the functions below to each channel.
def reinhard(target, img):
#converts image and target from BGR colorspace to l alpha beta
lAB_img = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)
lAB_tar = cv2.cvtColor(target, cv2.COLOR_BGR2Lab)
#finds mean and standard deviation for each color channel across the entire image
(mean, std) = cv2.meanStdDev(lAB_img)
(mean_tar, std_tar) = cv2.meanStdDev(lAB_tar)
#iterates over image implementing formula to map color normalized pixels to target image
for y in range(512):
for x in range(512):
lAB_tar[x, y, 0] = (lAB_img[x, y, 0] - mean[0]) / std[0] * std_tar[0] + mean_tar[0]
lAB_tar[x, y, 1] = (lAB_img[x, y, 1] - mean[1]) / std[1] * std_tar[1] + mean_tar[1]
lAB_tar[x, y, 2] = (lAB_img[x, y, 2] - mean[2]) / std[2] * std_tar[2] + mean_tar[2]
mapped = cv2.cvtColor(lAB_tar, cv2.COLOR_Lab2BGR)
return mapped
My supervisor told me that I could try using a matrix to apply the function all at once to improve the runtime but I'm not exactly sure how to go about doing that.
The original and the target:
Color transfer reuslts using Reinhard'method in 5 ms:
I prefer to implement the formulat in numpy vectorized operations other than python loops.
# implementing the formula
#(Io - mo)/so*st + mt = Io * (st/so) + mt - mo*(st/so)
ratio = (std_tar/std_ori).reshape(-1)
offset = (mean_tar - mean_ori*std_tar/std_ori).reshape(-1)
lab_tar = cv2.convertScaleAbs(lab_ori*ratio + offset)
Here is the code:
# 2019/02/19 by knight-金
# https://stackoverflow.com/a/54757659/3547485
import numpy as np
import cv2
def reinhard(target, original):
# cvtColor: COLOR_BGR2Lab
lab_tar = cv2.cvtColor(target, cv2.COLOR_BGR2Lab)
lab_ori = cv2.cvtColor(original, cv2.COLOR_BGR2Lab)
# meanStdDev: calculate mean and stadard deviation
mean_tar, std_tar = cv2.meanStdDev(lab_tar)
mean_ori, std_ori = cv2.meanStdDev(lab_ori)
# implementing the formula
#(Io - mo)/so*st + mt = Io * (st/so) + mt - mo*(st/so)
ratio = (std_tar/std_ori).reshape(-1)
offset = (mean_tar - mean_ori*std_tar/std_ori).reshape(-1)
lab_tar = cv2.convertScaleAbs(lab_ori*ratio + offset)
# convert back
mapped = cv2.cvtColor(lab_tar, cv2.COLOR_Lab2BGR)
return mapped
if __name__ == "__main__":
ori = cv2.imread("ori.png")
tar = cv2.imread("tar.png")
mapped = reinhard(tar, ori)
cv2.imwrite("mapped.png", mapped)
mapped_inv = reinhard(ori, tar)
cv2.imwrite("mapped_inv.png", mapped)
I managed to figure it out after looking at the numpy documentation. I just needed to replace my nested for loop with proper array accessing. It took less than a minute to iterate through all 300 images with this.
lAB_tar[:,:,0] = (lAB_img[:,:,0] - mean[0])/std[0] * std_tar[0] + mean_tar[0]
lAB_tar[:,:,1] = (lAB_img[:,:,1] - mean[1])/std[1] * std_tar[1] + mean_tar[1]
lAB_tar[:,:,2] = (lAB_img[:,:,2] - mean[2])/std[2] * std_tar[2] + mean_tar[2]
I have been trying to develop the YOLO cost function which I have shown below. This is the first time I have tried to develop my own cost function in Tensorflow and am unsure if I am approaching it correctly or not. For one, my model uses a number of intermediate steps. I not sure if this complicates the computational graph in some meaningfully destructive way? Or, I am using an abs. value step and am unsure whether it would have some negative effect on my backprop? Any assistance would be helpful in regard to whether I am approaching this problem correctly.
I can answer any questions about my implementation.
Note - Z13 is the prediction, y are the true values. There are 49 cells in my model (7x7) with each cell being represented by a 7x1 vector: [prob of anything in cell, x midpoint, y midpoint, box width, box height, prob dog, prob cat] .Referenced paper: https://arxiv.org/pdf/1506.02640.pdf which explains the cost function in depth.
I believe that there is either an issue with my forward prop or my cost function as my model is not learning meaningful representations.
def cost_function(Z13,y,coord=5,noobj=0.5):
"""
Z13: shape (None,7,7,7)
y: shape (None,7,7,7)
"""
# Masks are used as classification score for box coords only applies to cell where actual bounding box is
c_mask_true = y[:,:,:,0:1] > 0 # Mask which determines which cell has bounding box
c_mask_false = y[:,:,:,0:1] < 1 # Mask for cells w/o bounding boxes
# Confidence scores
ci_guess_t = tf.boolean_mask(Z13[:,:,:,0:1],c_mask_true)
ci_guess_f = tf.boolean_mask(Z13[:,:,:,0:1],c_mask_false)
ci_act_t = tf.boolean_mask(y[:,:,:,0:1],c_mask_true)
ci_act_f = tf.boolean_mask(y[:,:,:,0:1],c_mask_false)
# Bounding box coordinated for ground truth box prediction
xi_guess = tf.boolean_mask(Z13[:,:,:,1:2],c_mask_true) # Midpoint x position
xi_act = tf.boolean_mask(y[:,:,:,1:2],c_mask_true)
yi_guess = tf.boolean_mask(Z13[:,:,:,2:3],c_mask_true) # Midpoint y position
yi_act = tf.boolean_mask(y[:,:,:,2:3],c_mask_true)
# Width:
wi_guess = tf.boolean_mask(Z13[:,:,:,3:4],c_mask_true) # Midpoint width pos.
wi_guess = tf.minimum(tf.sqrt(tf.abs(wi_guess)),wi_guess) # prevent sqrt(neg) and increase cost for neg prediction
wi_act = tf.sqrt(tf.boolean_mask(y[:,:,:,3:4],c_mask_true))
# Height:
hi_guess = tf.boolean_mask(Z13[:,:,:,4:5],c_mask_true) # Midpoint height pos.
hi_guess = tf.minimum(tf.sqrt(tf.abs(hi_guess)),hi_guess) # prevent sqrt(neg) and increase cost for neg prediction
hi_act = tf.sqrt(tf.boolean_mask(y[:,:,:,4:5],c_mask_true))
# Predicted classes:
class_g_dog = tf.boolean_mask(Z13[:,:,:,5:6],c_mask_true)
class_t_dog = tf.boolean_mask(y[:,:,:,5:6],c_mask_true)
class_g_cat = tf.boolean_mask(Z13[:,:,:,6:7],c_mask_true)
class_t_cat = tf.boolean_mask(y[:,:,:,6:7],c_mask_true)
# Parts correspond with the cost function equations above
part1 = coord * tf.reduce_sum(tf.square(xi_act - xi_guess)+tf.square(yi_act - yi_guess))
part2 = coord * tf.reduce_sum(tf.square(wi_act - wi_guess)+tf.square(hi_act - hi_guess))
part3 = tf.reduce_sum(tf.square(ci_act_t - ci_guess_t))
part4 = noobj * tf.reduce_sum(tf.square(ci_act_f - ci_guess_f))
part5 = tf.reduce_sum(tf.square(class_t_dog - class_g_dog)+tf.square(class_t_cat - class_g_cat))
total_cost = part1 + part2 + part3 + part4 + part5
return total_cost
I have a 3D image of size 512*512*30. I also have a csv file with the points of interest stored in it. I want to crop a 3D volume of size 32*32*16 around the point of interest with the point at its center. Ive written the following to achieve this:
block = [32, 32, 16]
img = imageio.volread('path\\to\\tiff\\file')
x, y, z = 191, 303, 17
img_block = img_block[x - int(block[0] / 2):x + int(block[0] / 2),
y - int(block[1] / 2):y + int(block[1] / 2),
z - int(block[2] / 2):z + int(block[2] / 2)]
This works in the above case but fails when I have an x,y,z point on the edge for example at z = 28 I get an out of bound error, which is expected.
How do I avoid this problem and ensure smooth copping?
Thanks
The answer for padding could look like this:
import numpy as np
point = [0,1,2]
img = imageio.volread('path\\to\\tiff\\file')
block = [32,32,16]
img_x, img_y, img_z = img.shape
img_padded = np.pad(img, block, 'constant', constant_values=0) #lookup np.pad for other padding options
img_block = img_padded[point[0]-block[0]/2:point[0]+block[0]/2...]
I would like to have function doing this, but it doesn't exist:
from skimage.transform import shift
shifted = shift(image, translation=(15.2, 35.7),
mode='wrap', preserve_range=True)
Could you help me writing a function using skimage.transform.AffineTransform?
from skimage.transform import AffineTransform
def shift(image, translation):
transform = AffineTransform(translation=translation)
# How to do it???
shifted = transform(image) # Does not work, documentation for usage present
# of this class is not present...
return shifted
However function scipy.ndimage.interpolation.shift does what i want, it is veeeeery slow - approximately even 10-20x slower than rotating. numpy.roll is off the table too, as it doesn't support fractional translations.
documentation is somewhat mean:
http://scikit-image.org/docs/stable/api/skimage.transform.html#skimage.transform.AffineTransform
Seems like this is working. Yet if anyone knows simpler and faster way - please let me know.
from skimage.transform import AffineTransform, warp
def shift(image, vector):
transform = AffineTransform(translation=vector)
shifted = warp(image, transform, mode='wrap', preserve_range=True)
shifted = shifted.astype(image.dtype)
You can do in 2D :
shift_matrix = np.array( [ [ 1, 0, -15.2,], [0, 1, -35.7 ] , [0, 0, 1] ] )
shifted= scipy.ndimage.affine_transform( image, shift_matrix )
But it is still relatively slow.
A home made function could be :
def shift_img_along_axis( img, axis=0, shift = 1 , constant_values=0):
""" shift array along a specific axis. New value is taken as weighted by the two distances to the assocaited original pixels.
CHECKED : Works for floating shift ! ok.
NOTE: at the border of image, when not enough original pixel is accessible, data will be meaned with regard to additional constant_values.
constant_values: value to set to pixels with no association in original image img
RETURNS : shifted image.
A.Mau. """
intshift = int(shift)
remain0 = abs( shift - int(shift) )
remain1 = 1-remain0 #if shift is uint : remain1=1 and remain0 =0
npad = int( np.ceil( abs( shift ) ) ) #ceil relative to 0. ( 0.5=> 1 and -0.5=> -1 )
# npad = int( abs( shift+ 0.5*[-1,1][shift>0] ) )
pad_arg = [(0,0)]*img.ndim
pad_arg[axis] = (npad,npad)
bigger_image = np.pad( img, pad_arg, 'constant', constant_values=constant_values)
part1 = remain1*bigger_image.take( np.arange(npad+intshift, npad+intshift+img.shape[axis]) ,axis)
if remain0==0:
shifted = part1
else:
if shift>0:
part0 = remain0*bigger_image.take( np.arange(npad+intshift+1, npad+intshift+1+img.shape[axis]) ,axis) #
else:
part0 = remain0*bigger_image.take( np.arange(npad+intshift-1, npad+intshift-1+img.shape[axis]) ,axis) #
shifted = part0 + part1
return shifted