I captured the following 2 pictures from my mobile:Image1 , Image2
the camera was calibrated and I used this code to reconstruct 3D cloud point:
'''
Created by Omar Padierna "Para11ax" on Jan 1 2019
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
'''
import cv2
import numpy as np
import glob
from tqdm import tqdm
import PIL.ExifTags
import PIL.Image
from matplotlib import pyplot as plt
#=====================================
# Function declarations
#=====================================
#Function to create point cloud file
def create_output(vertices, colors, filename):
colors = colors.reshape(-1,3)
vertices = np.hstack([vertices.reshape(-1,3),colors])
ply_header = '''ply
format ascii 1.0
element vertex %(vert_num)d
property float x
property float y
property float z
property uchar red
property uchar green
property uchar blue
end_header
'''
with open(filename, 'w') as f:
f.write(ply_header %dict(vert_num=len(vertices)))
np.savetxt(f,vertices,'%f %f %f %d %d %d')
#Function that Downsamples image x number (reduce_factor) of times.
def downsample_image(image, reduce_factor):
for i in range(0,reduce_factor):
#Check if image is color or grayscale
if len(image.shape) > 2:
row,col = image.shape[:2]
else:
row,col = image.shape
image = cv2.pyrDown(image, dstsize= (col//2, row // 2))
return image
#=========================================================
# Stereo 3D reconstruction
#=========================================================
#Load camera parameters
ret = np.load('D:/Books/Pav Man/3DReconstruction-master/Reconstruction/camera_params/ret.npy')
K = np.load('D:/Books/Pav Man/3DReconstruction-master/Reconstruction/camera_params/K.npy')
dist = np.load('D:/Books/Pav Man/3DReconstruction-master/Reconstruction/camera_params/dist.npy')
#Specify image paths
img_path1 = 'D:/Books/Pav Man/3DReconstruction-master/Reconstruction/reconstruct_this/TestVi4.jpg'
img_path2 = 'D:/Books/Pav Man/3DReconstruction-master/Reconstruction/reconstruct_this/TestVi5.jpg'
#Load pictures
img_1 = cv2.imread(img_path1)
img_2 = cv2.imread(img_path2)
#Get height and width. Note: It assumes that both pictures are the same size. They HAVE to be same size and height.
h,w = img_2.shape[:2]
#Get optimal camera matrix for better undistortion
new_camera_matrix, roi = cv2.getOptimalNewCameraMatrix(K,dist,(w,h),1,(w,h))
#Undistort images
img_1_undistorted = cv2.undistort(img_1, K, dist, None, new_camera_matrix)
img_2_undistorted = cv2.undistort(img_2, K, dist, None, new_camera_matrix)
#Downsample each image 3 times (because they're too big)
img_1_downsampled = downsample_image(img_1_undistorted,3)
img_2_downsampled = downsample_image(img_2_undistorted,3)
#cv2.imwrite('undistorted_left.jpg', img_1_downsampled)
#cv2.imwrite('undistorted_right.jpg', img_2_downsampled)
#Set disparity parameters
#Note: disparity range is tuned according to specific parameters obtained through trial and error.
win_size = 1
min_disp = -1
max_disp = abs(min_disp) * 9 #min_disp * 9
num_disp = max_disp - min_disp # Needs to be divisible by 16
#Create Block matching object.
stereo = cv2.StereoSGBM_create(minDisparity= min_disp,
numDisparities = num_disp,
blockSize = 5,
uniquenessRatio = 5,
speckleWindowSize = 1,
speckleRange = 5,
disp12MaxDiff = 2,
P1 = 8*3*win_size**2,#8*3*win_size**2,
P2 =32*3*win_size**2) #32*3*win_size**2)
#Compute disparity map
print ("\nComputing the disparity map...")
disparity_map = stereo.compute(img_1_downsampled, img_2_downsampled)
#Show disparity map before generating 3D cloud to verify that point cloud will be usable.
plt.imshow(disparity_map,'gray')
plt.show()
#Generate point cloud.
print ("\nGenerating the 3D map...")
#Get new downsampled width and height
h,w = img_2_downsampled.shape[:2]
#Load focal length.
focal_length = np.load('D:/Books/Pav Man/3DReconstruction-master/Reconstruction/camera_params/FocalLength.npy')
#Perspective transformation matrix
#This transformation matrix is from the openCV documentation, didn't seem to work for me.
Q = np.float32([[1,0,0,-w/2.0],
[0,-1,0,h/2.0],
[0,0,0,-focal_length],
[0,0,1,0]])
#This transformation matrix is derived from Prof. Didier Stricker's power point presentation on computer vision.
#Link : https://ags.cs.uni-kl.de/fileadmin/inf_ags/3dcv-ws14-15/3DCV_lec01_camera.pdf
Q2 = np.float32([[1,0,0,0],
[0,-1,0,0],
[0,0,focal_length*0.05,0], #Focal length multiplication obtained experimentally.
[0,0,0,1]])
#Reproject points into 3D
points_3D = cv2.reprojectImageTo3D(disparity_map, Q2)
#Get color points
colors = cv2.cvtColor(img_1_downsampled, cv2.COLOR_BGR2RGB)
#Get rid of points with value 0 (i.e no depth)
mask_map = disparity_map > disparity_map.min()
#Mask colors and points.
output_points = points_3D[mask_map]
output_colors = colors[mask_map]
#Define name for output file
output_file = 'D:/Books/Pav Man/3DReconstruction-master/Reconstruction/reconstructed.ply'
#Generate point cloud
print ("\n Creating the output file... \n")
create_output(output_points, output_colors, output_file)
the following images are for disparity map and 3D model:
Disparity map
, Model
as you can see in the Model image, there are empty areas (red areas), how I can fill this area with points, and how to improve the disparity map.
The non-confident region(ie., algorithm not sure what this the correct disparity) is marked as black pixels in the disparity map. This is much expected behaviour.
You have to do some post processing to fill the map. Use Guided filter to complete map. If matlab provides any guided filter you can try once. In OpenCV "WLS" is the most common guided filter to get a filled map.
Related
everyone. I'm trying to triangulate some points (dense reconstruction) lying on a plane in a setup which involves two cameras.
[Reference image]: https://imgur.com/gOps4vP and [The other image]: https://imgur.com/VIiH9Rv
First of all, I solve the relative pose problem using the 5pts algorithm on the undistorted points for the Essential Matrix estimation, the I recover the pose. I'm using RANSAC.
Then, I rectify the stereo pairs the usual way.
R1, R2, Pn1, Pn2, Q, _, _ = cv2.stereoRectify(K1, dcoeffs1, K2, dcoeffs2,
img1.shape[::-1], R, t,
flags=cv2.CALIB_ZERO_DISPARITY,
alpha=-1)
# Compute the rigid transform that OpenCV apply to world points (USEFUL LATER)
# in order for the rectified reference camera to be K_new[I|0]
tn_1 = np.zeros((3,1)) # Cameras are never translated in the rectification
G1_rect = np.block([[R1, tn_1], [np.zeros((1,3)), 1.0]])
maps1 = cv2.initUndistortRectifyMap(K1, dcoeffs1, R1, Pn1, (1920,1080), cv2.CV_32FC1)
maps2 = cv2.initUndistortRectifyMap(K2, dcoeffs2, R2, Pn2, (1920,1080), cv2.CV_32FC1)
img1_remap = cv2.remap(img1, maps1[0], maps1[1], cv2.INTER_LANCZOS4)
img2_remap = cv2.remap(img2, maps2[0], maps2[1], cv2.INTER_LANCZOS4)
Result of the rectification:
[Rectified reference image] https://drive.google.com/open?id=10VfgXrXFO3_lYqtO9qJXr17Dc6F1PuXU
[The other one rectified] https://drive.google.com/open?id=13ZkeMiF5xEovGmX13LSQVaJ237hoJLX0
Now I call a function that recognize a known object in the images (target).
#Now call a function that recognize a known object in the images (target)
# Find target
target_corners, _ = dt.detectTarget(img_scene1, img_target, 0.5) # return 4 corners of the detected polygon
target_corners = target_corners[:,0,:]
# Compute mask for the target cutout:
target_mask = mp.maskPolygon(target_corners, img_scene1.shape[::-1]) # Output: mask of same dimension of the image
Target found (please note the highlighted corners):
[Target found] https://imgur.com/QjYV8tp
Then I compute the disparity map using StereoSGBM. I'm interested in the computation of the target disparity only (I'll mask all the other points).
With the Disparity map obtained and using the 4x4 projection Matrix Q given by stereoRectify, I perform the 3d reprojection of the disparity map.
# Compute disparity map
# https://docs.opencv.org/3.3.1/d2/d85/classcv_1_1StereoSGBM.html
window_size = 5
min_disp = 16
max_disp = 1024
num_disp = max_disp-min_disp # Deve essere divisibile per 16!
stereo = cv2.StereoSGBM_create(minDisparity = min_disp,
numDisparities = num_disp,
blockSize = window_size,
P1 = 8*3*window_size**2,
P2 = 32*3*window_size**2,
disp12MaxDiff = 1,
uniquenessRatio = 10,
speckleWindowSize = 150,
speckleRange = 2
)
print('Calcolo SGBM della disparità...')
disp = stereo.compute(img_scene1, img_scene2).astype(np.float32) / 16.0
target_disparity = target_mask*disp
points = cv2.reprojectImageTo3D(target_disparity, Q)
# DEBUG:
cv2.namedWindow('scene1', cv2.WINDOW_NORMAL)
cv2.resizeWindow('scene1', 800,450)
cv2.imshow('scene1', img_scene1)
cv2.namedWindow('disparity', cv2.WINDOW_NORMAL)
cv2.resizeWindow('disparity', 800,450)
cv2.imshow('disparity', (disp-min_disp)/num_disp)
cv2.namedWindow('target_disparity', cv2.WINDOW_NORMAL)
cv2.resizeWindow('target_disparity', 800,450)
cv2.imshow('target_disparity', target_mask*(disp-min_disp)/num_disp)
cv2.waitKey()
cv2.destroyAllWindows()
# Obtain matrix of the target 3D points starting from disparity image obtained from reprojectImageTo3D()
mask_disp = disp > disp.min()
mask_inf = ~(np.isinf(points[:,:,0]) | np.isinf(points[:,:,1]) | np.isinf(points[:,:,2]))
mask_nan = ~(np.isnan(points[:,:,0]) | np.isnan(points[:,:,1]) | np.isnan(points[:,:,2]))
mask = mask_disp & mask_inf & mask_nan
pts3D = points[mask]
Now, I have 3d reconstructed the region of the images corresponding to the target. I noted that OpenCv, during camera rectification, apply a rigid transform to world points such that the reference original camera and the new (rectified) reference camera have the same extrinsics (R=eye(3) and t=[0,0,0]'). Infact, during rectification both cameras must be rotated, and I think OpenCV simply brings back the new cameras to a new reference such that the reference rectified camera has the same extrinsics of the original one. But this implies that the reconstructed 3d points will be expressed in a world reference that is not the world reference of the original camera!
So, applying the inverse rigid transform to the pts3D, we obtain a reconstruction in the original reference camera frame. (See code).
target3Dpts_hom = cv2.convertPointsToHomogeneous(target3Dpts)[:,0,:].T
target3Dpts_hom = G.T # target3Dpts_hom
new_target3Dpts = cv2.convertPointsFromHomogeneous(target3Dpts_hom.T[:,np.newaxis,:])[:,0,:]
Please NOTE that if I don't perform this operation, the pt3D reprojected on the original cameras by means of their projection matrices will not correspond to the target points!
Check reconstruction via reprojection; Now, i can reproject the new_target3Dpts:
Let me introduce the projection function that I call:
def proj_dist(P, dcoeffs, M):
import numpy as np
import cv2
K, R, t,_,_,_,_ = cv2.decomposeProjectionMatrix(P)
rotv, _ = cv2.Rodrigues(R)
# Projection. Returns a (N,2) shaped array
m,_ = cv2.projectPoints(M,rotv,t[0:-1],K,dcoeffs)
m = m.squeeze()
return m
Finally, the reprojections:
#P_kin = K_kin[eye(3),0] # Originals MPPs of two cameras
#P_rpi = K_rpi[R,t]
m0 = proj.proj_dist(P_kin,dcoeffs_kin,new_points).astype('int32')
for (x, y) in m0:
x = int(x)
y= int(y)
cv2.circle(img_kin, (x, y), 2, (255, 255, 0), 4)
cv2.namedWindow('frame1', cv2.WINDOW_NORMAL)
cv2.resizeWindow('frame1', 800,450)
cv2.imshow('frame1',img_kin)
cv2.waitKey(0)
m1 = proj.proj_dist(P_rpi,dcoeffs_rpi,new_points).astype('int32')
img_rpi1 = img_rpi.copy()
for (x, y) in m1:
x = int(x)
y = int(y)
cv2.circle(img_rpi1, (x, y), 2, (255, 255, 0), 4)
cv2.namedWindow('frame2', cv2.WINDOW_NORMAL)
cv2.resizeWindow('frame2', 800,450)
cv2.imshow('frame2',img_rpi1)
cv2.waitKey(0)
But, while the reprojected points on the original reference camera are correct, this is not true for the second one....The points are simply translated, but I can't explain why.
Results: [First frame repj] https://imgur.com/S4lo9Wz
[2nd frame repj. Error] https://imgur.com/y4igaEI
Any ideas? I will include all the code now.
Thank you.
SM
I solved the problem, which is not related with the reprojectImageto3D --that works fine--, but with this piece of code I've wrote and that I used to reproject the points onto the original frames:
def proj_dist(P, dcoeffs, M):
import numpy as np
import cv2
K, R, t,_,_,_,_ = cv2.decomposeProjectionMatrix(P)
rotv, _ = cv2.Rodrigues(R)
# Projection. Returns a (N,2) shaped array
m,_ = cv2.projectPoints(M,rotv,t[0:-1],K,dcoeffs)
m = m.squeeze()
return m
I've wrote my own function for points projection:
def proj(P, M, hom=0):
# proj(): Esegue la proiezione prospettica dei punti 3D M secondo la MPP P,
# sul piano immagine 2D di una camera pinhole.
import numpy as np
n = M.shape[1]
M = np.concatenate((M, np.ones((1,n))))
# Proiezione
m = P # M
m = m/m[2,:]
if hom !=1 :
# Passo a cartesiane
m = m[0:2,:]
return m
and the problem is solved!
My function does not take in account for lens distortion. I'll further investigate the problem related with the projectPoints() OpenCV function.
I am trying to classify if an image mostly contains black and white or color, to be precise it is a photo of a photocopy(think xerox),which is mostly black and white.The image is NOT single channel image, but a 3 channel image.
I just want to know if there are any obvious ways to solve this that im missing.
for now im trying to plot histograms and may be do a pixel count, but that does not look very promising,any suggestions on this would be really helpful.
Thanks in advance.
I am unsure of the exact use case, but having experienced similar issues I used this rather helpful article.
https://www.alanzucconi.com/2015/05/24/how-to-find-the-main-colours-in-an-image/
The GitHub containing the full code is found here: https://gist.github.com/jayapal/077f63f3163abbfb3c50c7d209524cc6
If this is for your own visual the histogram should be enough, if you are attempting to automate however, it may be helpful to round the color values up or down, this would provide information on if the image is darker or lighter than a certain value.
What are you using this code for on a larger perspective? Maybe that will help provide more adequate information
Edit: The code above also provides the ability to define a region of the image, hopefully this will make your selection more accurate
Adding code directly
from sklearn.cluster import KMeans
from sklearn import metrics
import cv2
import numpy as np
import cv2
image = cv2.imread("red.png")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Resize it
h, w, _ = image.shape
w_new = int(100 * w / max(w, h) )
h_new = int(100 * h / max(w, h) )
image = cv2.resize(image, (w_new, h_new));
# Reshape the image to be a list of pixels
image_array = image.reshape((image.shape[0] * image.shape[1], 3))
print image_array
# Clusters the pixels
clt = KMeans(n_clusters = 3)
clt.fit(image_array)
def centroid_histogram(clt):
# grab the number of different clusters and create a histogram
# based on the number of pixels assigned to each cluster
numLabels = np.arange(0, len(np.unique(clt.labels_)) + 1)
(hist, _) = np.histogram(clt.labels_, bins = numLabels)
# normalize the histogram, such that it sums to one
hist = hist.astype("float")
hist /= hist.sum()
# return the histogram
return hist
# Finds how many pixels are in each cluster
hist = centroid_histogram(clt)
# Sort the clusters according to how many pixel they have
zipped = zip (hist, clt.cluster_centers_)
zipped.sort(reverse=True, key=lambda x : x[0])
hist, clt.cluster_centers = zip(*zipped)
# By Adrian Rosebrock
import numpy as np
import cv2
bestSilhouette = -1
bestClusters = 0;
for clusters in range(2, 10):
# Cluster colours
clt = KMeans(n_clusters = clusters)
clt.fit(image_array)
# Validate clustering result
silhouette = metrics.silhouette_score(image_array, clt.labels_,
metric='euclidean')
# Find the best one
if silhouette > bestSilhouette:
bestSilhouette = silhouette;
bestClusters = clusters;
print bestSilhouette
print bestClusters
I have a set of 4 DICOM CT Volumes which I am reading with SimpleITK ImageSeriesReader. Two of the images represent the CT of patient before and after the surgery. The other two images are binary segmentation masks segmented on the former 2 CT images. The segmentations are a ROI of their source CT.
All the 4 CT images, have different Size, Spacing, Origin and Direction. I have tried applying this GitHub gist https://gist.github.com/zivy/79d7ee0490faee1156c1277a78e4a4c4 to resize my images to 512x512x512 and Spacing 1x1x1. However, it doesn't place the images at the correct location. The segmented structure is always placed in the center of the CT image, instead of the correct location, as you can see from the pictures.
This my "raw" DICOM Image with its tumor segmentation (orange blob).
This is after the "resizing" algorithm and writing to disk (same image as before, just the tumor is colored green blob because inconsistency):
Code used for resampling all 4 DICOM Volumes to the same dimensions:
def resize_resample_images(images):
""" Resize all the images to the same dimensions, spacing and origin.
Usage: newImage = resize_image(source_img_plan, source_img_validation, ROI(ablation/tumor)_mask)
1. translate to same origin
2. largest number of slices and interpolate the others.
3. same resolution 1x1x1 mm3 - resample
4. (physical space)
Slice Thickness (0018,0050)
ImagePositionPatient (0020,0032)
ImageOrientationPatient (0020,0037)
PixelSpacing (0028,0030)
Frame Of Reference UID (0020,0052)
"""
# %% Define tuple to store the images
tuple_resized_imgs = collections.namedtuple('tuple_resized_imgs',
['img_plan',
'img_validation',
'ablation_mask',
'tumor_mask'])
# %% Create Reference image with zero origin, identity direction cosine matrix and isotropic dimension
dimension = images.img_plan.GetDimension() #
reference_direction = np.identity(dimension).flatten()
reference_size = [512] * dimension
reference_origin = np.zeros(dimension)
data = [images.img_plan, images.img_validation, images.ablation_mask, images.tumor_mask]
reference_spacing = np.ones(dimension) # resize to isotropic size
reference_image = sitk.Image(reference_size, images.img_plan.GetPixelIDValue())
reference_image.SetOrigin(reference_origin)
reference_image.SetSpacing(reference_spacing)
reference_image.SetDirection(reference_direction)
reference_center = np.array(
reference_image.TransformContinuousIndexToPhysicalPoint(np.array(reference_image.GetSize()) / 2.0))
#%% Paste the GT segmentation masks before transformation
tumor_mask_paste = (paste_roi_image(images.img_plan, images.tumor_mask))
ablation_mask_paste = (paste_roi_image(images.img_validation, images.ablation_mask))
images.tumor_mask = tumor_mask_paste
images.ablation_mask = ablation_mask_paste
# %% Apply transforms
data_resized = []
for idx,img in enumerate(data):
transform = sitk.AffineTransform(dimension) # use affine transform with 3 dimensions
transform.SetMatrix(img.GetDirection()) # set the cosine direction matrix
# TODO: check translation when computing the segmentations
transform.SetTranslation(np.array(img.GetOrigin()) - reference_origin) # set the translation.
# Modify the transformation to align the centers of the original and reference image instead of their origins.
centering_transform = sitk.TranslationTransform(dimension)
img_center = np.array(img.TransformContinuousIndexToPhysicalPoint(np.array(img.GetSize()) / 2.0))
centering_transform.SetOffset(np.array(transform.GetInverse().TransformPoint(img_center) - reference_center))
centered_transform = sitk.Transform(transform)
centered_transform.AddTransform(centering_transform)
# Using the linear interpolator as these are intensity images, if there is a need to resample a ground truth
# segmentation then the segmentation image should be resampled using the NearestNeighbor interpolator so that
# no new labels are introduced.
if (idx==1 or idx==2): # temporary solution to resample the GT image with NearestNeighbour
resampled_img = sitk.Resample(img, reference_image, centered_transform, sitk.sitkNearestNeighbor, 0.0)
else:
resampled_img = sitk.Resample(img, reference_image, centered_transform, sitk.sitkLinear, 0.0)
# append to list
data_resized.append(resampled_img)
# assuming the order stays the same, reassigng back to tuple
resized_imgs = tuple_resized_imgs(img_plan=data_resized[0],
img_validation=data_resized[1],
ablation_mask=data_resized[2],
tumor_mask=data_resized[3])
Code for "pasting" the ROI segmentations images into a correct size. Might be redundant.:
def paste_roi_image(image_source, image_roi):
""" Resize ROI binary mask to size, dimension, origin of its source/original img.
Usage: newImage = paste_roi_image(source_img_plan, roi_mask)
"""
newSize = image_source.GetSize()
newOrigin = image_source.GetOrigin()
newSpacing = image_roi.GetSpacing()
newDirection = image_roi.GetDirection()
if image_source.GetSpacing() != image_roi.GetSpacing():
print('the spacing of the source and derived mask differ')
# re-cast the pixel type of the roi mask
pixelID = image_source.GetPixelID()
caster = sitk.CastImageFilter()
caster.SetOutputPixelType(pixelID)
image_roi = caster.Execute(image_roi)
# black 3D image
outputImage = sitk.Image(newSize, image_source.GetPixelIDValue())
outputImage.SetOrigin(newOrigin)
outputImage.SetSpacing(newSpacing)
outputImage.SetDirection(newDirection)
# transform from physical point to index the origin of the ROI image
# img_center = np.array(img.TransformContinuousIndexToPhysicalPoint(np.array(img.GetSize()) / 2.0))
destinationIndex = outputImage.TransformPhysicalPointToIndex(image_roi.GetOrigin())
# paste the roi mask into the re-sized image
pasted_img = sitk.Paste(outputImage, image_roi, image_roi.GetSize(), destinationIndex=destinationIndex)
return pasted_img
Having ordered half a dozen webcams online for a project I notice that the colors on the output are not consistent.
In order to compensate for this I have attempted to take a template image and extract the R,G and B histograms and tried to match the target images's RGB histograms based on this.
This was inspired from the description of the solution for a very similar problem Comparative color calibration
The perfect solution will look like this :
In order to try to solve this I wrote the following script which performed poorly:
EDIT (Thanks to #DanMašek and #api55)
import numpy as np
def show_image(title, image, width = 300):
# resize the image to have a constant width, just to
# make displaying the images take up less screen real
# estate
r = width / float(image.shape[1])
dim = (width, int(image.shape[0] * r))
resized = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
# show the resized image
cv2.imshow(title, resized)
def hist_match(source, template):
"""
Adjust the pixel values of a grayscale image such that its histogram
matches that of a target image
Arguments:
-----------
source: np.ndarray
Image to transform; the histogram is computed over the flattened
array
template: np.ndarray
Template image; can have different dimensions to source
Returns:
-----------
matched: np.ndarray
The transformed output image
"""
oldshape = source.shape
source = source.ravel()
template = template.ravel()
# get the set of unique pixel values and their corresponding indices and
# counts
s_values, bin_idx, s_counts = np.unique(source, return_inverse=True,
return_counts=True)
t_values, t_counts = np.unique(template, return_counts=True)
# take the cumsum of the counts and normalize by the number of pixels to
# get the empirical cumulative distribution functions for the source and
# template images (maps pixel value --> quantile)
s_quantiles = np.cumsum(s_counts).astype(np.float64)
s_quantiles /= s_quantiles[-1]
t_quantiles = np.cumsum(t_counts).astype(np.float64)
t_quantiles /= t_quantiles[-1]
# interpolate linearly to find the pixel values in the template image
# that correspond most closely to the quantiles in the source image
interp_t_values = np.interp(s_quantiles, t_quantiles, t_values)
return interp_t_values[bin_idx].reshape(oldshape)
from matplotlib import pyplot as plt
from scipy.misc import lena, ascent
import cv2
source = cv2.imread('/media/somadetect/Lexar/color_transfer_data/1/frame10.png')
s_b = source[:,:,0]
s_g = source[:,:,1]
s_r = source[:,:,2]
template = cv2.imread('/media/somadetect/Lexar/color_transfer_data/5/frame6.png')
t_b = source[:,:,0]
t_r = source[:,:,1]
t_g = source[:,:,2]
matched_b = hist_match(s_b, t_b)
matched_g = hist_match(s_g, t_g)
matched_r = hist_match(s_r, t_r)
y,x,c = source.shape
transfer = np.empty((y,x,c), dtype=np.uint8)
transfer[:,:,0] = matched_r
transfer[:,:,1] = matched_g
transfer[:,:,2] = matched_b
show_image("Template", template)
show_image("Target", source)
show_image("Transfer", transfer)
cv2.waitKey(0)
Template image :
Target Image:
The Matched Image:
Then I found Adrian's (pyimagesearch) attempt to solve a very similar problem in the following link
Fast Color Transfer
The results seem to be fairly good with some saturation defects. I would welcome any suggestions or pointers on how to address this issue so all web cam outputs could be calibrated to output similar colors based on one template image.
Your script performs poorly because you are using the wrong index.
OpenCV images are BGR, so this was correct in your code:
source = cv2.imread('/media/somadetect/Lexar/color_transfer_data/1/frame10.png')
s_b = source[:,:,0]
s_g = source[:,:,1]
s_r = source[:,:,2]
template = cv2.imread('/media/somadetect/Lexar/color_transfer_data/5/frame6.png')
t_b = source[:,:,0]
t_r = source[:,:,1]
t_g = source[:,:,2]
but this is wrong
transfer[:,:,0] = matched_r
transfer[:,:,1] = matched_g
transfer[:,:,2] = matched_b
since here you are using RGB and not BGR, so the color changes and your OpenCV still thinks it is BGR. That is why it looks weird.
It should be:
transfer[:,:,0] = matched_b
transfer[:,:,1] = matched_g
transfer[:,:,2] = matched_r
As other possible solutions, you may try to look which parameters can be set in your camera. Sometimes they have some auto parameters which you can set manually for all of them to match. Also, beware of this auto parameters, usually white balance and focus and others are set auto and they may change quite a lot in the same camera from one time to another (depending on illumination, etc etc).
UPDATE:
As DanMašek points out, also
t_b = source[:,:,0]
t_r = source[:,:,1]
t_g = source[:,:,2]
is wrong, since the r should be index 2 and g index 1
t_b = source[:,:,0]
t_g = source[:,:,1]
t_r = source[:,:,2]
I have attempted a white patch based calibration routine. Here is the link https://theiszm.wordpress.com/tag/white-balance/.
The code snippet follows:
import cv2
import math
import numpy as np
import sys
from matplotlib import pyplot as plt
def hist_match(source, template):
"""
Adjust the pixel values of a grayscale image such that its histogram
matches that of a target image
Arguments:
-----------
source: np.ndarray
Image to transform; the histogram is computed over the flattened
array
template: np.ndarray
Template image; can have different dimensions to source
Returns:
-----------
matched: np.ndarray
The transformed output image
"""
oldshape = source.shape
source = source.ravel()
template = template.ravel()
# get the set of unique pixel values and their corresponding indices and
# counts
s_values, bin_idx, s_counts = np.unique(source, return_inverse=True,
return_counts=True)
t_values, t_counts = np.unique(template, return_counts=True)
# take the cumsum of the counts and normalize by the number of pixels to
# get the empirical cumulative distribution functions for the source and
# template images (maps pixel value --> quantile)
s_quantiles = np.cumsum(s_counts).astype(np.float64)
s_quantiles /= s_quantiles[-1]
t_quantiles = np.cumsum(t_counts).astype(np.float64)
t_quantiles /= t_quantiles[-1]
# interpolate linearly to find the pixel values in the template image
# that correspond most closely to the quantiles in the source image
interp_t_values = np.interp(s_quantiles, t_quantiles, t_values)
return interp_t_values[bin_idx].reshape(oldshape)
# Read original image
im_o = cv2.imread('/media/Lexar/color_transfer_data/5/frame10.png')
im = im_o
cv2.imshow('Org',im)
cv2.waitKey()
B = im[:,:, 0]
G = im[:,:, 1]
R = im[:,:, 2]
R= np.array(R).astype('float')
G= np.array(G).astype('float')
B= np.array(B).astype('float')
# Extract pixels that correspond to pure white R = 255,G = 255,B = 255
B_white = R[168, 351]
G_white = G[168, 351]
R_white = B[168, 351]
print B_white
print G_white
print R_white
# Compensate for the bias using normalization statistics
R_balanced = R / R_white
G_balanced = G / G_white
B_balanced = B / B_white
R_balanced[np.where(R_balanced > 1)] = 1
G_balanced[np.where(G_balanced > 1)] = 1
B_balanced[np.where(B_balanced > 1)] = 1
B_balanced=B_balanced * 255
G_balanced=G_balanced * 255
R_balanced=R_balanced * 255
B_balanced= np.array(B_balanced).astype('uint8')
G_balanced= np.array(G_balanced).astype('uint8')
R_balanced= np.array(R_balanced).astype('uint8')
im[:,:, 0] = (B_balanced)
im[:,:, 1] = (G_balanced)
im[:,:, 2] = (R_balanced)
# Notice saturation artifacts
cv2.imshow('frame',im)
cv2.waitKey()
# Extract the Y plane in original image and match it to the transformed image
im_o = cv2.cvtColor(im_o, cv2.COLOR_BGR2YCR_CB)
im_o_Y = im_o[:,:,0]
im = cv2.cvtColor(im, cv2.COLOR_BGR2YCR_CB)
im_Y = im[:,:,0]
matched_y = hist_match(im_o_Y, im_Y)
matched_y= np.array(matched_y).astype('uint8')
im[:,:,0] = matched_y
im_final = cv2.cvtColor(im, cv2.COLOR_YCR_CB2BGR)
cv2.imshow('frame',im_final)
cv2.waitKey()
The input image is:
The result of the script is:
Thank you all for suggestions and pointers!!
I'm working on a little problem in my sparetime involving analysis of some images obtained through a microscope. It is a wafer with some stuff here and there, and ultimately I want to make a program to detect when certain materials show up.
Anyways, first step is to normalize the intensity across the image, since the lens does not give uniform lightning. Currently I use an image, with no stuff on, only the substrate, as a background, or reference, image. I find the maximum of the three (intensity) values for RGB.
from PIL import Image
from PIL import ImageDraw
rmax = 0;gmax = 0;bmax = 0;rmin = 300;gmin = 300;bmin = 300
im_old = Image.open("test_image.png")
im_back = Image.open("background.png")
maxx = im_old.size[0] #Import the size of the image
maxy = im_old.size[1]
im_new = Image.new("RGB", (maxx,maxy))
pixback = im_back.load()
for x in range(maxx):
for y in range(maxy):
if pixback[x,y][0] > rmax:
rmax = pixback[x,y][0]
if pixback[x,y][1] > gmax:
gmax = pixback[x,y][1]
if pixback[x,y][2] > bmax:
bmax = pixback[x,y][2]
pixnew = im_new.load()
pixold = im_old.load()
for x in range(maxx):
for y in range(maxy):
r = float(pixold[x,y][0]) / ( float(pixback[x,y][0])*rmax )
g = float(pixold[x,y][1]) / ( float(pixback[x,y][1])*gmax )
b = float(pixold[x,y][2]) / ( float(pixback[x,y][2])*bmax )
pixnew[x,y] = (r,g,b)
The first part of the code determines the maximum intensity of the RED, GREEN and BLUE channels, pixel by pixel, of the background image, but needs only be done once.
The second part takes the "real" image (with stuff on it), and normalizes the RED, GREEN and BLUE channels, pixel by pixel, according to the background. This takes some time, 5-10 seconds for an 1280x960 image, which is way too slow if I need to do this to several images.
What can I do to improve the speed? I thought of moving all the images to numpy arrays, but I can't seem to find a fast way to do that for RGB images.
I'd rather not move away from python, since my C++ is quite low-level, and getting a working FORTRAN code would probably take longer than I could ever save in terms of speed :P
import numpy as np
from PIL import Image
def normalize(arr):
"""
Linear normalization
http://en.wikipedia.org/wiki/Normalization_%28image_processing%29
"""
arr = arr.astype('float')
# Do not touch the alpha channel
for i in range(3):
minval = arr[...,i].min()
maxval = arr[...,i].max()
if minval != maxval:
arr[...,i] -= minval
arr[...,i] *= (255.0/(maxval-minval))
return arr
def demo_normalize():
img = Image.open(FILENAME).convert('RGBA')
arr = np.array(img)
new_img = Image.fromarray(normalize(arr).astype('uint8'),'RGBA')
new_img.save('/tmp/normalized.png')
See http://docs.scipy.org/doc/scipy/reference/generated/scipy.misc.fromimage.html#scipy.misc.fromimage
You can say
databack = scipy.misc.fromimage(pixback)
rmax = numpy.max(databack[:,:,0])
gmax = numpy.max(databack[:,:,1])
bmax = numpy.max(databack[:,:,2])
which should be much faster than looping over all (r,g,b) triplets of your image.
Then you can do
dataold = scip.misc.fromimage(pixold)
r = dataold[:,:,0] / (pixback[:,:,0] * rmax )
g = dataold[:,:,1] / (pixback[:,:,1] * gmax )
b = dataold[:,:,2] / (pixback[:,:,2] * bmax )
datanew = numpy.array((r,g,b))
imnew = scipy.misc.toimage(datanew)
The code is not tested, but should work somehow with minor modifications.
This is partially from FolksTalk webpage:
from PIL import Image
import numpy as np
# Read image file
in_file = "my_image.png"
# convert('RGB') for PNG file type
image = Image.open(in_file).convert('RGB')
pixels = np.asarray(image)
# Convert from integers to floats
pixels = pixels.astype('float32')
# Normalize to the range 0-1
pixels /= 255.0