Segmenting pictures using their histograms in python - python

I have problem with segmenting (or clustering?) pictures using their histograms - i mean i dont know totally how can i do it. I have, lets say, 200 images and i have to group it (like people to people, buildings to buildings etc.)
If you want to make exacly same task as me there is a source of images http://wang.ist.psu.edu/docs/related/
I know how to get histograms etc, my code is below (with detailed description).
import numpy as np
import matplotlib.pyplot as plt
import cv2
# 1. Loading images from folder
path = 'image\\350.jpg'
img = plt.imread(path)
imglist=[]
for i in range(0,20):
x = np.random.randint(0,1000)
path = "image\\"+str(x)+".jpg"
# print(path)
img = plt.imread(path)
imglist.append(img)
# just testing if everything is fine
plt.figure(figsize = (20,10))
for i in range(0,20):
plt.subplot(4,5,i+1)
plt.imshow(imglist[i])
plt.xticks([])
plt.yticks([])
# 2. QUANTIZATION (FOR ONE IMAGE)
# QUANTIZATION TO 2 COLORS FOR EACH OF R, G, B --> 8 COLORS
imgq2 = np.floor(img/128)*128+64
imgq2 =imgq2.astype(int)
# QUANTIZATION TO 4 COLORS
imgq4 = np.floor(img/64)*64 + 32
imgq4 = imgq4.astype(int)
# QUANTIZATION TO 8 COLORS
imgq8 = np.floor(img/32)*32 + 16
imgq8 = imgq8.astype(int)
# TESTING
plt.figure(figsize = (20,10))
plt.subplot(1,3,1)
plt.imshow(img)
plt.title('Oryginal image')
plt.subplot(1,3,2)
plt.imshow(imgq2)
plt.title('2 values of RGB')
plt.subplot(1,3,3)
plt.imshow(imgq4)
plt.title('4 values of RGB')
# checking if this is true
imgq4.shape
imgq8.shape
print(np.unique(imgq4[:,:,:]))
print(np.unique(imgq8[:,:,:]))
imgq4 = np.floor(img/128)
imgq4 = imgq4.astype(int)
# plt.imshow(imgq4)
# plt.show()
# 3. MAKING 3-Dim (RGB) HISTOGRAM
hist = {(i,j,k): 0 for i in range(0,4) for j in range(0,4) for k in range(0,4)}
print(hist)
img = plt.imread('image\\0.jpg')
img2 = img.copy()
img2 = np.floor(img2/64).astype(int)
img2.shape
img2 = img2.reshape( img2.shape[0]*img2.shape[1] ,3)
for i in range(0,img2.shape[0]):
hist[ img2[i,0],img2[i,1],img2[i,2] ] = hist[img2[i,0],img2[i,1],img2[i,2]] + 1
print(len(hist.keys()), len(hist.values()))
# histogram values
print(hist.values())
print(img2.shape[0] == np.sum(list(hist.values())))`
I have also this file, which is making clustering on random points
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.datasets import load_digits
no_of_points = 1000
X = np.random.rand(no_of_points,2)
plt.scatter(X[:,0],X[:,1],s = 50, cmap = 'rainbow')
kmeans = KMeans(n_clusters = 5)
kmeans.fit(X)
plt.scatter(X[:,0], X[:,1], s = 50, cmap = 'rainbow', c = kmeans.labels_)
plt.scatter(kmeans.cluster_centers_[:,0],kmeans.cluster_centers_[:,1],s = 100, c = 'black', alpha = 0.5)
plt.show()
To sum up, i have 2 questions.
Is this possible to make or transform somehow histograms (or vectors) to something, which i could cluster like in my second file?
How to automatize process of making image histogram? (its made just for one image)
Thanks for help!

Related

Applying k-mean algorithm to image patches

I want to apply k-mean algorithm to image patches, is it possible ? Think I have an image and splitted into 9 patches. Then I applied k-mean algorithm to all of them by seperately and joined them. Is it possible ? I tried to sum of 9 centroid pixel values and divide into 9 so patches length but it does not give same answer when I apply it directly image and there is so difference.
Here my code:
import cv2
import numpy
import matplotlib.pyplot as plt
import patchify
image = cv2.imread(r"image.jpg")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#image = numpy.resize(image,(2000,2500,3)) if it is needed
patched_image = patchify.patchify(image, (250,250,3),step = 250)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.000000000000000000000002)
k = 3
all_centers = []
all_indexes = []
for i in range(0,patched_image.shape[0]):
for j in range(0,patched_image.shape[1]):
img = patched_image[i,j][0]
pixel_values = img.reshape((-1, 3))
pixel_values = numpy.float32(pixel_values)
_, labels, (centers) = cv2.kmeans(pixel_values, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
centers = numpy.uint8(centers)
all_centers.append(centers)
indexes = centers.argsort(axis = 0)[:,0]
all_indexes.append(indexes)
labels = labels.flatten()
segmented_image = centers[labels]
patched_image[i,j][0]= segmented_image.reshape(img.shape)
all_centers = numpy.array(all_centers)
all_indexes = numpy.array(all_indexes)
c = numpy.where(all_indexes == 0)
t = all_centers[c]
# t is centroid values of label 0
unpatched_image = patchify.unpatchify(patched_image, image.shape)
Here an image to use as sample.

How to morph two grid-like images seamlessly?

I have two images that consist of colored squares with different grid step (10x10 and 12x12).
What I want is to make the first image to be smoothly transformed into the second one.
When I use a plain image overlay with cv2.addWeighted() function, the result (left) is not good because of the intersected grid spaces. I suppose it would be better to shift remaining grid cells to the borders and clear out the rest (right).
Is there any algorithm to deal with this task?
Thanks.
You can interpolate each pixel individually between different images.
import numpy as np
from scipy import interpolate
import matplotlib.pyplot as plt
np.random.seed(200)
num_images = 2
images = np.random.rand(num_images, 8,8)
for index, im in enumerate(images):
print(f'Images {index}')
fig = plt.imshow(im)
plt.show()
Interpolating these images:
n_frames = 4
x_array = np.linspace(0, 1, int(n_frames))
def interpolate_images(frame):
intermediate_image = np.zeros((1, *images.shape[1:]))
for lay in range(images.shape[1]):
for lat in range(images.shape[2]):
tck = interpolate.splrep(np.linspace(0, 1, images.shape[0]), images[:, lay, lat], k = 1)
intermediate_image[:, lay, lat] = interpolate.splev(x_array[frame], tck)
return intermediate_image
for frame in range(n_frames):
im = interpolate_images(int(frame))
fig = plt.imshow(im[0])
plt.show()

Why are my DALL-E Images Coming Out So Desaturated After Resizing?

I'm very new to ML image manipulation/creation, so if I confuse you all with my own lack of knowledge on the subject, I apologize in advance.
I'm attempting to increase the resolution of images produced by DALL-E, inspired by this article:
https://towardsdatascience.com/big-art-using-machine-learning-to-create-high-res-fine-art-7dd695f99788
However, when I attempt to feed images from DALL-E into the "Generate 1K Image" section of the original author's code/colab, my original image becomes very washed out; likely because in the source, some sort of tensor from a different model is fed in, while in my own version, I'm converting an image to a (poorly made?) tensor in the same section and then feeding that in.
Here's what I have:
##title Generate 1K Image
from google.colab import files
from io import BytesIO
from PIL import Image
from matplotlib import pyplot as plt
import numpy as np
from torchvision import transforms as T
import IPython
import os.path
import cv2
uploaded = files.upload()
texture_amount = 0.05 ##param {type:"slider", min:0, max:0.15, step:0.001}
texture_size = 3 ##param {type:"slider", min:1, max:9, step:2}
enhance_details = True ##param {type:"boolean"}
img = Image.open(BytesIO(uploaded['knight.png']))
plt.imshow(img)
plt.show()
transform = transforms.Compose([
transforms.ToTensor()
])
tensorImage = transform(img)
selected_img = tensorImage.cuda()
selected_img = selected_img.type(torch.cuda.FloatTensor)
selected_img = selected_img.add(1).div(2)[None, :]
with torch.no_grad():
torch.cuda.empty_cache()
resized = bsrgan_model(selected_img)
torch.cuda.empty_cache()
noise = torch.normal(0, texture_amount,
size=[resized.shape[0], 1, resized.shape[2], resized.shape[3]]).to(device)
noise = noise.repeat(1, 3, 1, 1)
noise_blurred = T.GaussianBlur(kernel_size=texture_size, sigma=1)(noise)
noise_blurred = noise*0.25 + noise_blurred*0.75
resized = (resized+noise_blurred)
final_image = resized.to(device)
if enhance_details:
with torch.no_grad():
torch.cuda.empty_cache()
z, *_ = vqgan_model.encode(final_image * 2 - 1)
final_image = vqgan_model.decode(z)[0].add(1).div(2).clamp(min=0, max=1)
torch.cuda.empty_cache()
final_image = final_image.clamp(min=0, max=1)
else:
final_image = final_image[0].clamp(min=0, max=1)
img = T.ToPILImage()(final_image)
img.save("output_1k.png")
IPython.display.Image("output_1k.png")
Original Image
Resulting Image
Any ideas as to how I can fix this issue is greatly, greatly appreciated!
Solution
You are calculating average with 1 for every pixel values in this line.
selected_img = selected_img.add(1).div(2)[None, :]
You should change it to this line
selected_img = selected_img[None, :]
Explanation
(For a pixel if R G B = 1 1 1, the color of the pixel is white, and if R G B = 0 0 0 the color of the pixel is black.) For example if for a pixel these values are R G B = 0 .5 1, in the line below, you're changing it to R G B = .5 .75 1(getting average with 1). You can check it with this code.
import torch
import numpy as np
from PIL import Image
from matplotlib import pyplot as plt
from torchvision import transforms
fig, axs = plt.subplots(nrows=1, ncols=3, constrained_layout=True)
img = Image.open('knight.png')
axs[0].imshow(img)
axs[0].set_title('Original Image')
transform = transforms.Compose([
transforms.ToTensor()
])
tensorImage = transform(img)
selected_img = tensorImage
selected_img = selected_img.type(torch.FloatTensor)
selected_img_0 = selected_img.add(1).div(2)[None, :]
axs[1].imshow(np.squeeze(selected_img_0).permute(1, 2, 0))
axs[1].set_title('Averaged Tensor Image')
selected_img_1 = selected_img[None, :]
axs[2].imshow(np.squeeze(selected_img_1).permute(1, 2, 0))
axs[2].set_title('Tensor Image')
plt.show()

Finding matching data point within two images using Python

I am have having two images, namely Fig 1 and Fig 2. Both taken from the same source but not aligned. The task is to find the common data point among these two images and draw lines between the data points that match in both the images., I am looking at this figure should be like Fig 4.
So far, I have used OpenCV and written the following codes:
import cv2
import matplotlib.pyplot as plt
img_file1= "Fig_1.png"
img_file2= "Fig_2.png"
img1= cv2.imread(img_file1)
img2= cv2.imread(img_file2)
img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
figure, ax = plt.subplots(1, 2, figsize=(16, 8))
ax[0].imshow(img1, cmap='gray')
ax[1].imshow(img2, cmap='gray')
#sift
sift = cv2.xfeatures2d.SIFT_create()
keypoints_1, descriptors_1 = sift.detectAndCompute(img1,None)
keypoints_2, descriptors_2 = sift.detectAndCompute(img2,None)
#feature matching
bf = cv2.BFMatcher(cv2.NORM_L1, crossCheck=True)
matches = bf.match(descriptors_1,descriptors_2)
matches = sorted(matches, key = lambda x:x.distance)
img3 = cv2.drawMatches(img1, keypoints_1, img2, keypoints_2, matches[:50], img2, flags=2)
plt.imshow(img3),plt.show()
This gives to be not expected result, see figure 4. Plus look quite messy and unclear.
Can anyone help me with how to do this? Thanks in advance.
Fig 1
Fig 2
img3
Fig 3
The transformation seems purely translational. Try template matching by normalized grayscale correlation.
Basically, this seems to me a registration problem (the images need to be registered).
Here is what you can do:
find the location of the points with connected components analysis
calculate the shift needed to register the two images. Here it seems your images are only translated so a simple crosscorrelation-based registration is enough.
from skimage.registration import phase_cross_correlation
from skimage.io import imread
from skimage.measure import label, regionprops
from skimage.filters import threshold_otsu
from matplotlib.pyplot import imshow, plot, figure
import numpy as np
# Load images
img_a = imread("671OL.jpg", as_gray=True)
img_b = imread("zpevD.jpg", as_gray=True)
# apply threshold
th_img_a = img_a > threshold_otsu(img_a)
th_img_b = img_b > threshold_otsu(img_b)
# measure connected component
img_lable = label(th_img_a)
r_props = regionprops(img_lable)
figure(figsize=(15,7))
rows, cols = img_b.shape
# calculate the registration (shift) of the two images
flow = phase_cross_correlation(th_img_a, th_img_b)
# stack the images and trace the segments that connect the points
d=10
# a vertical white bar between the two pictures
vbar=np.ones((rows,d))
xshift = cols+d
dy,dx = flow[0]
dx=dx + xshift
imshow(np.hstack([img_a, vbar, img_b]), cmap='gray')
for rp in r_props:
y0,x0 = rp.centroid
x1 = x0 + dx
y1 = y0 - dy
if y1<rows and x1 < 2*cols + d:
# filter out points that are not in img_b
plot([x0,x1],[y0,y1], '--', alpha=0.5)

Connected Component Labeling Algorithm in Python

My work requires applying Local Binary Operator on Images. For that I have already converted the images in Gray then implemented a Connected Components analysis on the image also.
Here is the Code:
Adding Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.io import imread, imshow
from skimage.color import rgb2gray
from skimage.morphology import (erosion, dilation, closing, opening,area_closing, area_opening)
from skimage.measure import label, regionprops, regionprops_table
Rendering the image
plt.figure(figsize=(6,6))
painting = imread("E:/Project/for_annotation/Gupi Gain0032.jpg")
plt.imshow(painting);
plt.figure(figsize=(6,6))
Binarizing Image
gray_painting = rgb2gray(painting)
binarized = gray_painting<0.55
plt.imshow(binarized);
4.Declaring Kernel
square = np.array([[1,1,1],
[1,1,1],
[1,1,1]])
Dilation function
def multi_dil(im, num, element=square):
for i in range(num):
im = dilation(im, element)
return im
Erosion function
def multi_ero(im, num, element=square):
for i in range(num):
im = erosion(im, element)
return im
Functions Applied
plt.figure(figsize=(6,6))
multi_dilated = multi_dil(binarized, 7)
area_closed = area_closing(multi_dilated, 50000)
multi_eroded = multi_ero(area_closed, 7)
opened = opening(multi_eroded)
plt.imshow(opened);
Label function
plt.figure(figsize=(6,6))
label_im = label(opened)
regions = regionprops(label_im)
plt.imshow(label_im);
Extract features
properties = ['area','convex_area','bbox_area', 'extent', 'mean_intensity','solidity', 'eccentricity', 'orientation']
pd.DataFrame(regionprops_table(label_im, gray_painting,
properties=properties))
Filtering Regions
masks = []
bbox = []
list_of_index = []
for num, x in enumerate(regions):
area = x.area
convex_area = x.convex_area
if (num!=0 and (area>100) and (convex_area/area <1.05)
and (convex_area/area >0.95)):
masks.append(regions[num].convex_image)
bbox.append(regions[num].bbox)
list_of_index.append(num)
count = len(masks)
Extracting Images
fig, ax = plt.subplots(2, int(count/2), figsize=(15,8))
for axis, box, mask in zip(ax.flatten(), bbox, masks):
red = painting[:,:,0][box[0]:box[2], box[1]:box[3]] * mask
green = painting[:,:,1][box[0]:box[2], box[1]:box[3]] * mask
blue = painting[:,:,2][box[0]:box[2], box[1]:box[3]] * mask
image = np.dstack([red,green,blue])
axis.imshow(image)
plt.tight_layout()
plt.figure(figsize=(6,6))
rgb_mask = np.zeros_like(label_im)
for x in list_of_index:
rgb_mask += (label_im==x+1).astype(int)
red = painting[:,:,0] * rgb_mask
green = painting[:,:,1] * rgb_mask
blue = painting[:,:,2] * rgb_mask
image = np.dstack([red,green,blue])
plt.imshow(image);
I am getting an error.
ValueError: Number of columns must be a positive integer, not 0
There is a possible approach which is not very far from what you attempted. Assume the background pixels are assigned the label 0, and the object pixels the value 1.
scan the image row by row;
when you meet a pixel 1, set a new label and perform a flood fill operation, replacing 1 by the new label.
Flood filling can be implemented very simply:
set the starting pixel to the new label;
recursively fill the eight neighbors, if they have a 1.
https://en.wikipedia.org/wiki/Flood_fill
The code of this version is pretty simple. But you will notice that it can easily overflow the stack because the number of pending fills can be as large as the image size.
def FloodFill(X, Y, Label):
I[X,Y]= Label
for all 8-way neighbors (X'=X±1, Y'=Y±1, inside image):
if I[X',Y'] == 1:
FloodFill(X', Y', Label)
def CCL(Image I):
Label= 1
for Y in range(I.Height):
for X in range(I.Width):
if I[X, Y] == 1:
Label+= 1
FloodFill(X, Y, Label)
So I would recommend the scanline version, which is a little more involved.
https://en.wikipedia.org/wiki/Flood_fill#Scanline_fill

Categories

Resources