I'm trying to execute this code that processes 70 images and extracts Histogram of Oriented Gradients (HOG) features. These are passed to a classifier (Scikit-Learn).
However, an error is raised:
hog_image = hog_image_rescaled.resize((200, 200), Image.ANTIALIAS)
TypeError: an integer is required
I do not understand why, because with attempting with a single image works correctly.
#Hog Feature
from skimage.feature import hog
from skimage import data, color, exposure
import cv2
import matplotlib.pyplot as plt
from PIL import Image
import os
import glob
import numpy as np
from numpy import array
listagrigie = []
path = 'img/'
for infile in glob.glob( os.path.join(path, '*.jpg') ):
print("current file is: " + infile )
colorato = Image.open(infile)
greyscale = colorato.convert('1')
#hog feature
fd, hog_image = hog(greyscale, orientations=8, pixels_per_cell=(16, 16),
cells_per_block=(1, 1), visualise=True)
plt.figure(figsize=(8, 4))
print(type(fd))
plt.subplot(121).set_axis_off()
plt.imshow(grigiscala, cmap=plt.cm.gray)
plt.title('Input image')
# Rescale histogram for better display
hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 0.02))
print("hog 1 immagine shape")
print(hog_image_rescaled.shape)
hog_image = hog_image_rescaled.resize((200, 200), Image.ANTIALIAS)
listagrigie.append(hog_image)
target.append(i)
print("ARRAY of gray matrices")
print(len(listagrigie))
grigiume = np.dstack(listagrigie)
print(grigiume.shape)
grigiume = np.rollaxis(grigiume, -1)
print(grigiume.shape)
from sklearn import svm, metrics
n_samples = len(listagrigie)
data = grigiume.reshape((n_samples, -1))
# Create a classifier: a support vector classifier
classifier = svm.SVC(gamma=0.001)
# We learn the digits on the first half of the digits
classifier.fit(data[:n_samples / 2], target[:n_samples / 2])
# Now predict the value of the digit on the second half:
expected = target[n_samples / 2:]
predicted = classifier.predict(data[n_samples / 2:])
print("expected")
print("predicted")
You should rescale the source image (named colorato in your example) to (200, 200), then extract the HOG features and then pass the list of fd vectors to your machine learning models. The hog_image are just meant to visualize the feature descriptors in a user friendly manner. The actual features are returned in the fd variable.
Related
I'm very new to ML image manipulation/creation, so if I confuse you all with my own lack of knowledge on the subject, I apologize in advance.
I'm attempting to increase the resolution of images produced by DALL-E, inspired by this article:
https://towardsdatascience.com/big-art-using-machine-learning-to-create-high-res-fine-art-7dd695f99788
However, when I attempt to feed images from DALL-E into the "Generate 1K Image" section of the original author's code/colab, my original image becomes very washed out; likely because in the source, some sort of tensor from a different model is fed in, while in my own version, I'm converting an image to a (poorly made?) tensor in the same section and then feeding that in.
Here's what I have:
##title Generate 1K Image
from google.colab import files
from io import BytesIO
from PIL import Image
from matplotlib import pyplot as plt
import numpy as np
from torchvision import transforms as T
import IPython
import os.path
import cv2
uploaded = files.upload()
texture_amount = 0.05 ##param {type:"slider", min:0, max:0.15, step:0.001}
texture_size = 3 ##param {type:"slider", min:1, max:9, step:2}
enhance_details = True ##param {type:"boolean"}
img = Image.open(BytesIO(uploaded['knight.png']))
plt.imshow(img)
plt.show()
transform = transforms.Compose([
transforms.ToTensor()
])
tensorImage = transform(img)
selected_img = tensorImage.cuda()
selected_img = selected_img.type(torch.cuda.FloatTensor)
selected_img = selected_img.add(1).div(2)[None, :]
with torch.no_grad():
torch.cuda.empty_cache()
resized = bsrgan_model(selected_img)
torch.cuda.empty_cache()
noise = torch.normal(0, texture_amount,
size=[resized.shape[0], 1, resized.shape[2], resized.shape[3]]).to(device)
noise = noise.repeat(1, 3, 1, 1)
noise_blurred = T.GaussianBlur(kernel_size=texture_size, sigma=1)(noise)
noise_blurred = noise*0.25 + noise_blurred*0.75
resized = (resized+noise_blurred)
final_image = resized.to(device)
if enhance_details:
with torch.no_grad():
torch.cuda.empty_cache()
z, *_ = vqgan_model.encode(final_image * 2 - 1)
final_image = vqgan_model.decode(z)[0].add(1).div(2).clamp(min=0, max=1)
torch.cuda.empty_cache()
final_image = final_image.clamp(min=0, max=1)
else:
final_image = final_image[0].clamp(min=0, max=1)
img = T.ToPILImage()(final_image)
img.save("output_1k.png")
IPython.display.Image("output_1k.png")
Original Image
Resulting Image
Any ideas as to how I can fix this issue is greatly, greatly appreciated!
Solution
You are calculating average with 1 for every pixel values in this line.
selected_img = selected_img.add(1).div(2)[None, :]
You should change it to this line
selected_img = selected_img[None, :]
Explanation
(For a pixel if R G B = 1 1 1, the color of the pixel is white, and if R G B = 0 0 0 the color of the pixel is black.) For example if for a pixel these values are R G B = 0 .5 1, in the line below, you're changing it to R G B = .5 .75 1(getting average with 1). You can check it with this code.
import torch
import numpy as np
from PIL import Image
from matplotlib import pyplot as plt
from torchvision import transforms
fig, axs = plt.subplots(nrows=1, ncols=3, constrained_layout=True)
img = Image.open('knight.png')
axs[0].imshow(img)
axs[0].set_title('Original Image')
transform = transforms.Compose([
transforms.ToTensor()
])
tensorImage = transform(img)
selected_img = tensorImage
selected_img = selected_img.type(torch.FloatTensor)
selected_img_0 = selected_img.add(1).div(2)[None, :]
axs[1].imshow(np.squeeze(selected_img_0).permute(1, 2, 0))
axs[1].set_title('Averaged Tensor Image')
selected_img_1 = selected_img[None, :]
axs[2].imshow(np.squeeze(selected_img_1).permute(1, 2, 0))
axs[2].set_title('Tensor Image')
plt.show()
I'm trying to use the transforms.Compose() in my segmentation task. But I'm not sure how to use the same (almost) random transforms for both the image and the mask.
So in my segmentation task, I have the raw picture and the corresponding mask, I'd like to generate more random transformed image pairs for training popurse. Meaning if I do some transform on my raw pictures, and this transformation should also happen on my mask pictures, and then this pair can go into my CNN. My transformer is something like:
train_transform = transforms.Compose([
transforms.Resize(512), # resize, the smaller edge will be matched.
transforms.RandomHorizontalFlip(p=0.5),
transforms.RandomVerticalFlip(p=0.5),
transforms.RandomRotation(90),
transforms.RandomResizedCrop(320,scale=(0.3, 1.0)),
AddGaussianNoise(0., 1.),
transforms.ToTensor(), # convert a PIL image or ndarray to tensor.
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) # normalize to Imagenet mean and std
])
mask_transform = transforms.Compose([
transforms.Resize(512), # resize, the smaller edge will be matched.
transforms.RandomHorizontalFlip(p=0.5),
transforms.RandomVerticalFlip(p=0.5),
transforms.RandomRotation(90),
transforms.RandomResizedCrop(320,scale=(0.3, 1.0)),
##---------------------!------------------
transforms.ToTensor(), # convert a PIL image or ndarray to tensor.
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) # normalize to Imagenet mean and std
])
Notice, in the code block, I added a class that can add random noise to the raw images transformation, which is not in the mask_transformation, that I want my mask images follow the raw image transformation, but ignore the random noise. So how can these two transformations happen in pairs (with the same random act)?
This seems to have an answer here: How to apply same transform on a pair of picture.
Basically, you can use the torchvision functional API to get a handle to the randomly generated parameters of a random transform such as RandomCrop. Then call torchvision.transforms.functional.crop() on both images with the same parameter values. It seems a bit lengthy but gets the job done. You can skip some transforms on some images, as per your need.
Another option that I've seen elsewhere is to re-seed the random generator with the same seed, to force generation of the same random transformations twice. I would think that such implementations are hacky and keep changing with pytorch versions (e.g. whether to re-seed np.random, random, or torch.manual_seed() ?)
So Sabyasachi's answer is really helpful for me, and I was able to use the transformer in PyTorch to transform my images. This usage of the torchvision.transformer is not the most straightforward way for transferring images. So I'm adding my solution that has an example of using the torchvision.transforms.functional, but also using skimage.filters, and lots of transform functions are available here: https://scikit-image.org/docs/dev/api/skimage.filters.html#skimage.filters.unsharp_mask.
import torchvision.transforms.functional as TF
from skimage.filters import gaussian
from skimage.filters import unsharp_mask
def transformer(image, mask):
# image and mask are PIL image object.
img_w, img_h = image.size
# Random horizontal flipping
if random.random() > 0.5:
image = TF.hflip(image)
mask = TF.hflip(mask)
# Random vertical flipping
if random.random() > 0.5:
image = TF.vflip(image)
mask = TF.vflip(mask)
# Random affine
affine_param = transforms.RandomAffine.get_params(
degrees = [-180, 180], translate = [0.3,0.3],
img_size = [img_w, img_h], scale_ranges = [1, 1.3],
shears = [2,2])
image = TF.affine(image,
affine_param[0], affine_param[1],
affine_param[2], affine_param[3])
mask = TF.affine(mask,
affine_param[0], affine_param[1],
affine_param[2], affine_param[3])
image = np.array(image)
mask = np.array(mask)
# Randome GaussianBlur -- only for images
if random.random() < 0.25:
sigma_param = random.uniform(0.01, 1)
image = gaussian(image, sigma=sigma_param)
# Randome Gaussian Noise -- only for images
if random.random() < 0.25:
factor_param = random.uniform(0.01, 0.5)
image = image + factor_param * image.std() * np.random.randn(image.shape[0], image.shape[1])
# Unsharp filter -- only for images
if random.random() < 0.25:
radius_param = random.uniform(0, 5)
amount_param = random.uniform(0.5, 2)
image = unsharp_mask(image, radius = radius_param, amount=amount_param)
f, ax = plt.subplots(1, 2, figsize=(8, 8))
ax[0].imshow(image)
ax[1].imshow(mask)
return image, mask
I think I have a simple solution:
If the images are concatenated, the transformations are applied to all of them identically:
import torch
import torchvision.transforms as T
# Create two fake images (identical for test purposes):
image = torch.randn((3, 128, 128))
target = image.clone()
# This is the trick (concatenate the images):
both_images = torch.cat((image.unsqueeze(0), target.unsqueeze(0)),0)
# Apply the transformations to both images simultaneously:
transformed_images = T.RandomRotation(180)(both_images)
# Get the transformed images:
image_trans = transformed_images[0]
target_trans = transformed_images[1]
# Compare the transformed images:
torch.all(image_trans == target_trans).item()
>> True
I get an error when I'm trying to find the 2 circle inner one for pupil and outer one for iris but unable to do so. Firstly I reshape the image then then finding bandwidth to know kernel value then I do segmentation in using mean shift algo after then i marked cluster region in red colour:
import tkinter as tk
from tkinter import filedialog
from PIL import ImageTk,Image
import numpy as np
import scipy.ndimage as snd
from sklearn.cluster import MeanShift, estimate_bandwidth
from sklearn.datasets.samples_generator import make_blobs
from itertools import cycle
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
import cv2
pylab.rcParams['figure.figsize'] = 16, 12
root = tk.Tk()
root.withdraw()
file_path = filedialog.askopenfilename(initialdir="F:\mean shift\images",title="Open File",filetypes= (("all files","*.*"),("jpg files","*.jpg")))
image = Image.open(file_path)
image = np.array(image)
original_shape = image.shape
# Flatten image.
X = np.reshape(image, [-1, 3])
plt.imshow(image)
bandwidth = estimate_bandwidth(X, quantile=0.1, n_samples=100)
print(bandwidth)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(X)
labels = ms.labels_
print(labels.shape)
cluster_centers = ms.cluster_centers_
print(cluster_centers.shape)
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)
print("number of estimated clusters : %d" % n_clusters_)
segmented_image = np.reshape(labels, original_shape[:2]) # Just take size, ignore RGB channels.
plt.figure(2)
plt.imshow(segmented_image)
plt.axis('off')
masked_image = np.copy(image)
# convert to the shape of a vector of pixel values
masked_image = masked_image.reshape((-1, 3))
# color (i.e cluster) to disable
cluster = 2
masked_image[labels == cluster] = [255, 0, 0]
# convert back to original shape
masked_image = masked_image.reshape(image.shape)
# show the image
plt.imshow(masked_image)
nemo = cv2.cvtColor(masked_image, cv2.COLOR_BGR2RGB)
cv2.imwrite("mean_shift.bmp",nemo)
plt.show()
I would like to know, whether I used toPILImage from torchvision correctly. I want to use it, to see how the images look after initial image transformations are applied to the dataset.
When I use it like in the code below, the image that comes up has weird colors like this one. The original image is a regular RGB image.
This is my code:
import os
import torch
from PIL import Image, ImageFont, ImageDraw
import torch.utils.data as data
import torchvision
from torchvision import transforms
import matplotlib.pyplot as plt
# Image transformations
normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
transform_img = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(256),
transforms.ToTensor(),
normalize ])
train_data = torchvision.datasets.ImageFolder(
root='./train_cl/',
transform=transform_img
)
test_data = torchvision.datasets.ImageFolder(
root='./test_named_cl/',
transform=transform_img
)
train_data_loader = data.DataLoader(train_data,
batch_size=4,
shuffle=True,
num_workers=4) #num_workers=args.nThreads)
test_data_loader = data.DataLoader(test_data,
batch_size=32,
shuffle=False,
num_workers=4)
# Open Image from dataset:
to_pil_image = transforms.ToPILImage()
my_img, _ = train_data[248]
results = to_pil_image(my_img)
results.show()
Edit:
I had to use .data on the Torch Variable to get the tensor.
Also I needed to rescale the numpy array before transposing. I found a working solution here, but it doesn't always work well. How can I do this better?
for i, data in enumerate(train_data_loader, 0):
img, labels = data
img = Variable(img)
break
image = img.data.cpu().numpy()[0]
# This worked for rescaling:
image = (1/(2*2.25)) * image + 0.5
# Both of these didn't work:
# image /= (image.max()/255.0)
# image *= (255.0/image.max())
image = np.transpose(image, (1,2,0))
plt.imshow(image)
plt.show()
You can use PIL image but you're not actually loading the data as you would normally.
Try something like this instead:
import numpy as np
import matplotlib.pyplot as plt
for img,labels in train_data_loader:
# load a batch from train data
break
# this converts it from GPU to CPU and selects first image
img = img.cpu().numpy()[0]
#convert image back to Height,Width,Channels
img = np.transpose(img, (1,2,0))
#show the image
plt.imshow(img)
plt.show()
As an update (02-10-2021):
import torchvision.transforms.functional as F
# load the image (creating a random image as an example)
img_data = torch.ByteTensor(4, 4, 3).random_(0, 255).numpy()
pil_image = F.to_pil_image(img_data)
Alternatively
import torchvision.transforms as transforms
img_data = torch.ByteTensor(4, 4, 3).random_(0, 255).numpy()
pil_image = transforms.ToPILImage()(img_data)
The second form can be integrated with dataset loader in pytorch or called directly as so.
I added a modified to_pil_image here
essentially it does what I suggested back in 2018 but it is integrated into pytorch now.
I would use something like this
# Open Image from dataset:
my_img, _ = train_data[248]
results = transforms.ToPILImage()(my_img)
results.show()
I had a number of images from which I extracted HoG features and saved them.
I've lost the images now, that was apparently one dataset I hadn't backed up before the machine got messed up.
However I have the files containing the HoG features of those images.
If I had the images now, I would apply shearing and rotation to the images to create more samples, and then take the HoG features of those edited images.
But since I don't have the images... is it possible to somehow work with just the HoG features of the originals in order to get the HoG features of the edited ones?
This is the code I would use to edit the images, if I still had them, from which I would then extract HoG features for object classification:
import numpy as np
from skimage import data, io, filter, color, exposure
from skimage.feature import hog
import skimage.transform as tf
from skimage.transform import resize, rescale, rotate, setup, warp, AffineTransform
import matplotlib.pyplot as plt
import os
from os import listdir
from os.path import isfile, join
import pickle
import Image
def generateSamples(path, readfile):
print "generating samples from " + path+"\\"+readfile
img = color.rgb2gray(io.imread(path+"\\"+readfile))
img = resize(img, (50,100))
filename = os.path.splitext(readfile)[0]
angles = [3, 0, -3]
shears = [0.13, 0.0, -0.13]
i = 0
no_samples = len(angles) * len(shears)
samples = np.empty((no_samples, int(img.shape[0]), int(img.shape[1])), dtype=object)
for myangle in angles:
myimg = rotate(img, angle=myangle, order=2)
for myshear in shears:
afine_tf = tf.AffineTransform(shear=myshear)
mymyimg = tf.warp(myimg, afine_tf)
samples[i] = np.array(mymyimg)
i+=1
#io.imshow(mymyimg)
#io.show()
newfile = filename + "_samples.vec"
pickle.dump(samples, file(path+"\\"+newfile,'w'))
print "saved vec file"