not producing right output in mean_shift algorithm and performing segmentation for the image - python

I have a problem like I have to implement shift algorithm and perform the segmentation for the image. here is vegetable image
I have to use a suitable bandwidth such that the vegetables look as seprated as can. I used manually sklearn estimate_bandwidth to calculate bandwidth and i hard coded. I am not allowed to use sklearn i just can use numpy,PIL or matplotlib to implement this.
here is what i tried
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
# Load the image
img = np.array(Image.open("peppers.jpg"))
# Convert the image to grayscale
gray_img = np.mean(img, axis=2)
# Flatten the image to a 2D array of pixel values
flat_img = gray_img.reshape((-1, 1))
# Define the distance metric
def euclidean_distance(x1, x2):
return np.sqrt(np.sum((x1 - x2) ** 2))
# Estimate the bandwidth parameter using the median of the pairwise distances
bandwidth = 0.24570638879032147
# Perform Mean Shift clustering
centroids = []
for i, point in enumerate(flat_img):
centroid = point
converged = False
while not converged:
points_within_bandwidth = flat_img[euclidean_distance(flat_img, centroid) < bandwidth]
new_centroid = np.mean(points_within_bandwidth, axis=0)
if euclidean_distance(new_centroid, centroid) < 1e-5:
converged = True
centroid = new_centroid
centroids.append(centroid)
# Assign each data point to a cluster based on its converged mean
labels = np.zeros_like(flat_img)
for i, centroid in enumerate(centroids):
labels[euclidean_distance(flat_img, centroid) < bandwidth] = i
# Reshape the labels to the shape of the original image
segmented_img = labels.reshape(gray_img.shape)
# Display the segmented image
plt.imshow(segmented_img)
plt.show()
First it took a long time and does not showed the right output.

Related

Wiener filter output doesn't look good enough

I have an image, I add some noise to it and try to denoise it using wiener filter:
Which in case of additive white noise and no blurring simplifies to:
Here is my code according to above formula, however, there is almost no difference with the input image.
import cv2
import numpy as np
img=cv2.imread('Images/P3.jpg',0)
freq2 = np.fft.fft2(img)
mean = 0
var = 100
sigma = var**0.5
gauss = np.random.normal(mean,sigma,np.shape(img))
courrupted=img+gauss
freq2h = np.fft.fft2(gauss)
courrupted[courrupted<0]=0
courrupted[courrupted>255]=255
courrupted=courrupted.astype(np.uint8)
crfre=np.fft.fftshift(np.fft.fft2(courrupted))
sf=np.abs(crfre)**2
wiener=sf/(sf+(100))
F_hat = crfre*wiener
f_hat = np.fft.ifft2( (F_hat))
restored = abs(f_hat)
normalizedImg=np.ones(img.shape)
normalizedImg = cv2.normalize(restored, normalizedImg, 0, 255, cv2.NORM_MINMAX)
cv2.imwrite('output.jpg',normalizedImg)
cv2.imwrite('input.jpg',courrupted)
This is ground truth image:
This is input:
And this is output:
The following works for me in Python/OpenCV/Numpy. As #Cris Luengo suggested, you need to test various values for the noise value, because the value you need may not be exactly your Gaussian variance input value.
Input:
import cv2
import numpy as np
# read image as grayscale
img = cv2.imread('pandas_noisy.jpg',0)
# take dft
dft = np.fft.fft2(img)
# get power spectral density of dft = square of magnitude
# where abs of complex number is the magnitude
pspec = (np.abs(dft))**2
print(np.amin(pspec))
print(np.amax(pspec))
# estimate noise power spectral density
# try different values to achieve compromise between noise reduction and softening/blurring
#noise = 100000000
#noise = 500000000
#noise = 1000000000
noise = 5000000000
# do wiener filtering
wiener = pspec/(pspec+noise)
wiener = wiener*dft
# do dft to restore
restored = np.fft.ifft2(wiener)
# take real() component (or do abs())
restored = np.real(restored)
print(np.amin(restored))
print(np.amax(restored))
# clip and convert to uint8
restored = restored.clip(0,255).astype(np.uint8)
# save results
#cv2.imwrite('pandas_noisy_restored_100000000.jpg',restored)
#cv2.imwrite('pandas_noisy_restored_500000000.jpg',restored)
#cv2.imwrite('pandas_noisy_restored_1000000000.jpg',restored)
cv2.imwrite('pandas_noisy_restored_5000000000.jpg',restored)
# display results
cv2.imshow("input", img)
cv2.imshow("restored", restored)
cv2.waitKey(0)
Restored Result for Noise=100000000:
Restored Result for Noise=500000000:
Restored Result for Noise=1000000000:
Restored Result for Noise=5000000000:

image translation in Pytorch, using affine_grid & grid_sample functions

I am going to move the image for 1 or 2 pixels, as I specified a small number (1.25 , 1.9) in the affine matrix.
BUT, the image is moved far far away, like hundreds of pixels:
( my input image is fully filled with yellow pineapples)
Below is a working example.
import torch
import numpy as np
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
import torch.nn.functional as F
rotation_simple = np.array([[1,0, 1.25],
[ 0,1, 1.9]])
#load image
transform = transforms.Compose([transforms.Resize(255),
transforms.CenterCrop(224),
transforms.ToTensor()])
dataloader = torch.utils.data.DataLoader(datasets.ImageFolder('/home/Pictures',transform=transform,), shuffle=True)
dtype = torch.FloatTensor
i = 0
while i<3:
img, labels = next(iter(dataloader))
img = img#.double() # 有时候要转为double有时候不用转
rotation_simple = torch.as_tensor(rotation_simple)[None]
grid = F.affine_grid(rotation_simple, img.size()).type(dtype)
x = F.grid_sample(img, grid)
plt.imshow(x[0].permute(1, 2, 0))
plt.show()
i+=1
I wonder why does the function move the the image so far away instead of moving it for just 1 pixel in x and y direction.
Ps. Setting "align_corners=True" didn't help for this case.
Pps. My pytorch version is 1.4.0+cu100
The "unit of measures" for the grid and the affine transformation are not pixels, but rather normalized coordinates:
grid specifies the sampling pixel locations normalized by the input spatial dimensions. Therefore, it should have most values in the range of [-1, 1]. For example, values x = -1, y = -1 is the left-top pixel of input, and values x = 1, y = 1 is the right-bottom pixel of input.
Therefore, translating by [1.25, 1.9] is actually translating by almost the entire image size. You need to divide the translation values by 2*img.shape to get pixel-wise translations.
See the doc for grid_sample for more information.

Sliding window on an image to calculate variance of pixels in that window

I am trying to build a function that uses sliding window over and image and calculates the variance of pixels in the window and returns a bounding box where there is the most variance observed.
I'm new to coding and I've tried solutions from this post but I don't know how to input image in that instead of array.
I'm on a deadline here and been trying this since a while so any help is much appreciated . TIA
Edit: Also, if someone could help me with how to call the rolling_window_lastaxis function and modify it to what I'm trying to do then it would mean a lot.
Here is one way to compute the sliding window variance (or standard deviation) using Python/OpenCV/Skimage.
This approach makes use of the following form for computing the variance (see https://en.wikipedia.org/wiki/Variance):
Variance = mean of square of image - square of mean of image
However, since the variance will be outside the 8-bit range, we take the square root to form the standard deviation.
I also use the (local) mean filter from the Skimage rank filter module.
Input:
import cv2
import numpy as np
from skimage.morphology import rectangle
import skimage.filters as filters
# Variance = mean of square of image - square of mean of image
# See # see https://en.wikipedia.org/wiki/Variance
# read the image
# convert to 16-bits grayscale since mean filter below is limited
# to single channel 8 or 16-bits, not float
# and variance will be larger than 8-bit range
img = cv2.imread('lena.png', cv2.IMREAD_GRAYSCALE).astype(np.uint16)
# compute square of image
img_sq = cv2.multiply(img, img)
# compute local mean in 5x5 rectangular region of each image
# note: python will give warning about slower performance when processing 16-bit images
region = rectangle(5,5)
mean_img = filters.rank.mean(img, selem=region)
mean_img_sq = filters.rank.mean(img_sq, selem=region)
# compute square of local mean of img
sq_mean_img = cv2.multiply(mean_img, mean_img)
# compute variance using float versions of images
var = cv2.add(mean_img_sq.astype(np.float32), -sq_mean_img.astype(np.float32))
# compute standard deviation and convert to 8-bit format
std = cv2.sqrt(var).clip(0,255).astype(np.uint8)
# save results
# multiply by 2 to make brighter as an example
cv2.imwrite('lena_std.png',2*std)
# show results
# multiply by 2 to make brighter as an example
cv2.imshow('std', 2*std)
cv2.waitKey(0)
cv2.destroyAllWindows()
Local Standard Deviation Image for 5x5 Sliding Window:
ADDITION
Here is a version that finds the bounding box for the maximum average variance for the bounding box size and draws it on the variance image (actually standard deviation).
import cv2
import numpy as np
from skimage.morphology import rectangle
import skimage.filters as filters
# Variance = mean of square of image - square of mean of image
# See # see https://en.wikipedia.org/wiki/Variance
# set the bounding box size
bbox_size = 25
# read the image
# convert to 16-bits grayscale since mean filter below is limited
# to single channel 8 or 16-bits, not float
# and variance will be larger than 8-bit range
img = cv2.imread('lena.png', cv2.IMREAD_GRAYSCALE).astype(np.uint16)
# compute square of image
img_sq = cv2.multiply(img, img)
# compute local mean in bbox_size x bbox_size rectangular region of each image
# note: python will give warning about slower performance when processing 16-bit images
region = rectangle(bbox_size, bbox_size)
mean_img = filters.rank.mean(img, selem=region)
mean_img_sq = filters.rank.mean(img_sq, selem=region)
# compute square of local mean of img
sq_mean_img = cv2.multiply(mean_img, mean_img)
# compute variance using float versions of images
var = cv2.add(mean_img_sq.astype(np.float32), -sq_mean_img.astype(np.float32))
# compute standard deviation and convert to 8-bit format
std = cv2.sqrt(var).clip(0,255).astype(np.uint8)
# find bbox_size x bbox_size region with largest var (or std)
# get the moving window average at each pixel
std_ave = (cv2.sqrt(var)).astype(np.uint8)
# find the pixel x,y with the largest mean
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(std_ave)
x,y = max_loc
print("x:", x, "y:", y, "max:", max_val)
# draw rectangle for bounding box on copy of std image
result = std.copy()
result = cv2.merge([result, result, result])
cv2.rectangle(result, (x, y), (x+bbox_size, y+bbox_size), (0,0,255), 1)
# save results
# multiply by 2 to make brighter as an example
cv2.imwrite('lena_std.png',std)
cv2.imwrite('lena_std_bbox.png',result)
# show results
# multiply by 2 to make brighter as an example
cv2.imshow('std', std)
cv2.imshow('result', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
x: 208 y: 67 max: 79.0
Resulting Bounding Box:
An alternative method to compute the windowed/rolling variance in regions of WxH is to use just numpy and scipy with convolutions, which are computed fairly quickly. An example:
import numpy as np
import scipy.signal
# Create image data
original = np.zeros((811,123))
img = original + np.random.normal(0, 1, original.shape)
# Create averaging kernel
H, W = 5, 5
mean_op = np.ones((H,W))/(H*W)
# Carry out convolution to compute mean of square, and square of mean
mean_of_sq = scipy.signal.convolve2d( img**2, mean_op, mode='same', boundary='symm')
sq_of_mean = scipy.signal.convolve2d( img , mean_op, mode='same', boundary='symm') **2
win_var = mean_of_sq - sq_of_mean

Pytorch transforms.Compose usage for pair of images in segmentation tasks

I'm trying to use the transforms.Compose() in my segmentation task. But I'm not sure how to use the same (almost) random transforms for both the image and the mask.
So in my segmentation task, I have the raw picture and the corresponding mask, I'd like to generate more random transformed image pairs for training popurse. Meaning if I do some transform on my raw pictures, and this transformation should also happen on my mask pictures, and then this pair can go into my CNN. My transformer is something like:
train_transform = transforms.Compose([
transforms.Resize(512), # resize, the smaller edge will be matched.
transforms.RandomHorizontalFlip(p=0.5),
transforms.RandomVerticalFlip(p=0.5),
transforms.RandomRotation(90),
transforms.RandomResizedCrop(320,scale=(0.3, 1.0)),
AddGaussianNoise(0., 1.),
transforms.ToTensor(), # convert a PIL image or ndarray to tensor.
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) # normalize to Imagenet mean and std
])
mask_transform = transforms.Compose([
transforms.Resize(512), # resize, the smaller edge will be matched.
transforms.RandomHorizontalFlip(p=0.5),
transforms.RandomVerticalFlip(p=0.5),
transforms.RandomRotation(90),
transforms.RandomResizedCrop(320,scale=(0.3, 1.0)),
##---------------------!------------------
transforms.ToTensor(), # convert a PIL image or ndarray to tensor.
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) # normalize to Imagenet mean and std
])
Notice, in the code block, I added a class that can add random noise to the raw images transformation, which is not in the mask_transformation, that I want my mask images follow the raw image transformation, but ignore the random noise. So how can these two transformations happen in pairs (with the same random act)?
This seems to have an answer here: How to apply same transform on a pair of picture.
Basically, you can use the torchvision functional API to get a handle to the randomly generated parameters of a random transform such as RandomCrop. Then call torchvision.transforms.functional.crop() on both images with the same parameter values. It seems a bit lengthy but gets the job done. You can skip some transforms on some images, as per your need.
Another option that I've seen elsewhere is to re-seed the random generator with the same seed, to force generation of the same random transformations twice. I would think that such implementations are hacky and keep changing with pytorch versions (e.g. whether to re-seed np.random, random, or torch.manual_seed() ?)
So Sabyasachi's answer is really helpful for me, and I was able to use the transformer in PyTorch to transform my images. This usage of the torchvision.transformer is not the most straightforward way for transferring images. So I'm adding my solution that has an example of using the torchvision.transforms.functional, but also using skimage.filters, and lots of transform functions are available here: https://scikit-image.org/docs/dev/api/skimage.filters.html#skimage.filters.unsharp_mask.
import torchvision.transforms.functional as TF
from skimage.filters import gaussian
from skimage.filters import unsharp_mask
def transformer(image, mask):
# image and mask are PIL image object.
img_w, img_h = image.size
# Random horizontal flipping
if random.random() > 0.5:
image = TF.hflip(image)
mask = TF.hflip(mask)
# Random vertical flipping
if random.random() > 0.5:
image = TF.vflip(image)
mask = TF.vflip(mask)
# Random affine
affine_param = transforms.RandomAffine.get_params(
degrees = [-180, 180], translate = [0.3,0.3],
img_size = [img_w, img_h], scale_ranges = [1, 1.3],
shears = [2,2])
image = TF.affine(image,
affine_param[0], affine_param[1],
affine_param[2], affine_param[3])
mask = TF.affine(mask,
affine_param[0], affine_param[1],
affine_param[2], affine_param[3])
image = np.array(image)
mask = np.array(mask)
# Randome GaussianBlur -- only for images
if random.random() < 0.25:
sigma_param = random.uniform(0.01, 1)
image = gaussian(image, sigma=sigma_param)
# Randome Gaussian Noise -- only for images
if random.random() < 0.25:
factor_param = random.uniform(0.01, 0.5)
image = image + factor_param * image.std() * np.random.randn(image.shape[0], image.shape[1])
# Unsharp filter -- only for images
if random.random() < 0.25:
radius_param = random.uniform(0, 5)
amount_param = random.uniform(0.5, 2)
image = unsharp_mask(image, radius = radius_param, amount=amount_param)
f, ax = plt.subplots(1, 2, figsize=(8, 8))
ax[0].imshow(image)
ax[1].imshow(mask)
return image, mask
I think I have a simple solution:
If the images are concatenated, the transformations are applied to all of them identically:
import torch
import torchvision.transforms as T
# Create two fake images (identical for test purposes):
image = torch.randn((3, 128, 128))
target = image.clone()
# This is the trick (concatenate the images):
both_images = torch.cat((image.unsqueeze(0), target.unsqueeze(0)),0)
# Apply the transformations to both images simultaneously:
transformed_images = T.RandomRotation(180)(both_images)
# Get the transformed images:
image_trans = transformed_images[0]
target_trans = transformed_images[1]
# Compare the transformed images:
torch.all(image_trans == target_trans).item()
>> True

Checking if image is mostly black and white or color

I am trying to classify if an image mostly contains black and white or color, to be precise it is a photo of a photocopy(think xerox),which is mostly black and white.The image is NOT single channel image, but a 3 channel image.
I just want to know if there are any obvious ways to solve this that im missing.
for now im trying to plot histograms and may be do a pixel count, but that does not look very promising,any suggestions on this would be really helpful.
Thanks in advance.
I am unsure of the exact use case, but having experienced similar issues I used this rather helpful article.
https://www.alanzucconi.com/2015/05/24/how-to-find-the-main-colours-in-an-image/
The GitHub containing the full code is found here: https://gist.github.com/jayapal/077f63f3163abbfb3c50c7d209524cc6
If this is for your own visual the histogram should be enough, if you are attempting to automate however, it may be helpful to round the color values up or down, this would provide information on if the image is darker or lighter than a certain value.
What are you using this code for on a larger perspective? Maybe that will help provide more adequate information
Edit: The code above also provides the ability to define a region of the image, hopefully this will make your selection more accurate
Adding code directly
from sklearn.cluster import KMeans
from sklearn import metrics
import cv2
import numpy as np
import cv2
image = cv2.imread("red.png")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Resize it
h, w, _ = image.shape
w_new = int(100 * w / max(w, h) )
h_new = int(100 * h / max(w, h) )
image = cv2.resize(image, (w_new, h_new));
# Reshape the image to be a list of pixels
image_array = image.reshape((image.shape[0] * image.shape[1], 3))
print image_array
# Clusters the pixels
clt = KMeans(n_clusters = 3)
clt.fit(image_array)
def centroid_histogram(clt):
# grab the number of different clusters and create a histogram
# based on the number of pixels assigned to each cluster
numLabels = np.arange(0, len(np.unique(clt.labels_)) + 1)
(hist, _) = np.histogram(clt.labels_, bins = numLabels)
# normalize the histogram, such that it sums to one
hist = hist.astype("float")
hist /= hist.sum()
# return the histogram
return hist
# Finds how many pixels are in each cluster
hist = centroid_histogram(clt)
# Sort the clusters according to how many pixel they have
zipped = zip (hist, clt.cluster_centers_)
zipped.sort(reverse=True, key=lambda x : x[0])
hist, clt.cluster_centers = zip(*zipped)
# By Adrian Rosebrock
import numpy as np
import cv2
bestSilhouette = -1
bestClusters = 0;
for clusters in range(2, 10):
# Cluster colours
clt = KMeans(n_clusters = clusters)
clt.fit(image_array)
# Validate clustering result
silhouette = metrics.silhouette_score(image_array, clt.labels_,
metric='euclidean')
# Find the best one
if silhouette > bestSilhouette:
bestSilhouette = silhouette;
bestClusters = clusters;
print bestSilhouette
print bestClusters

Categories

Resources