I would like to know, whether I used toPILImage from torchvision correctly. I want to use it, to see how the images look after initial image transformations are applied to the dataset.
When I use it like in the code below, the image that comes up has weird colors like this one. The original image is a regular RGB image.
This is my code:
import os
import torch
from PIL import Image, ImageFont, ImageDraw
import torch.utils.data as data
import torchvision
from torchvision import transforms
import matplotlib.pyplot as plt
# Image transformations
normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
transform_img = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(256),
transforms.ToTensor(),
normalize ])
train_data = torchvision.datasets.ImageFolder(
root='./train_cl/',
transform=transform_img
)
test_data = torchvision.datasets.ImageFolder(
root='./test_named_cl/',
transform=transform_img
)
train_data_loader = data.DataLoader(train_data,
batch_size=4,
shuffle=True,
num_workers=4) #num_workers=args.nThreads)
test_data_loader = data.DataLoader(test_data,
batch_size=32,
shuffle=False,
num_workers=4)
# Open Image from dataset:
to_pil_image = transforms.ToPILImage()
my_img, _ = train_data[248]
results = to_pil_image(my_img)
results.show()
Edit:
I had to use .data on the Torch Variable to get the tensor.
Also I needed to rescale the numpy array before transposing. I found a working solution here, but it doesn't always work well. How can I do this better?
for i, data in enumerate(train_data_loader, 0):
img, labels = data
img = Variable(img)
break
image = img.data.cpu().numpy()[0]
# This worked for rescaling:
image = (1/(2*2.25)) * image + 0.5
# Both of these didn't work:
# image /= (image.max()/255.0)
# image *= (255.0/image.max())
image = np.transpose(image, (1,2,0))
plt.imshow(image)
plt.show()
You can use PIL image but you're not actually loading the data as you would normally.
Try something like this instead:
import numpy as np
import matplotlib.pyplot as plt
for img,labels in train_data_loader:
# load a batch from train data
break
# this converts it from GPU to CPU and selects first image
img = img.cpu().numpy()[0]
#convert image back to Height,Width,Channels
img = np.transpose(img, (1,2,0))
#show the image
plt.imshow(img)
plt.show()
As an update (02-10-2021):
import torchvision.transforms.functional as F
# load the image (creating a random image as an example)
img_data = torch.ByteTensor(4, 4, 3).random_(0, 255).numpy()
pil_image = F.to_pil_image(img_data)
Alternatively
import torchvision.transforms as transforms
img_data = torch.ByteTensor(4, 4, 3).random_(0, 255).numpy()
pil_image = transforms.ToPILImage()(img_data)
The second form can be integrated with dataset loader in pytorch or called directly as so.
I added a modified to_pil_image here
essentially it does what I suggested back in 2018 but it is integrated into pytorch now.
I would use something like this
# Open Image from dataset:
my_img, _ = train_data[248]
results = transforms.ToPILImage()(my_img)
results.show()
Related
I have 1000 of images. Now I like to convert those images into grayscale?
import tensorflow as tf
from tensorflow.keras.utils import img_to_array
#df['image_name'] = df['image_name'].apply(str)
df_image = []
for i in tqdm(range(df.shape[0])):
img = image.load_img('/content/drive/MyDrive/Predict DF from Image of Chemical
Structure/2D image/'+df['image_name'][i]+'.png',target_size=(100,100,3))
img = image.img_to_array(img)
img = img/255
df_image.append(img)
X = np.array(df_image)
Per the TensorFlow documentation for tf.keras.utils.load_img, it accepts the argument color_mode, which is
One of "grayscale", "rgb", "rgba". Default: "rgb". The desired image format.
and it also returns "A PIL Image instance.".
The best way to do this is
img = image.load_img(
'/content/drive/MyDrive/Predict DF from Image of Chemical Structure/2D image/'+df['image_name'][i]+'.png',
target_size=(100,100,3),
color_mode="grayscale"
)
If I'm misinterpreting the documentation, the following should also work (put this after load_img but before img_to_array):
img = img.convert("L") # if you need alpha preserved, "LA"
Since this is a PIL Image instance, it has the .convert method. "L" converts the image to just lightness values
I am working with grayscale images of size 75 by 75 and want to perform some augmentation techniques using ImageDataGenerator.
But wondering if we can repeat the output consistently if we run multiple times. I am not talking about epochs but like running the whole code to mimic the exact same augmented images to get same results.
I am attaching sample grayscale image:
import matplotlib.pyplot as plt
import numpy as np
from scipy import misc, ndimage
from keras.preprocessing.image import ImageDataGenerator
gen = ImageDataGenerator(rotation_range=10, width_shift_range=0.1,
height_shift_range=0.1, zoom_range=0.1, # shear_range=0.15,
channel_shift_range=10., horizontal_flip=True, vertical_flip = True,
rescale = 0.2, fill_mode = 'wrap')
image_path = '/trial_img.png' # grayscale image
# Obtain image
# data_format = [#num_images,height,width,#num_of_channels]
# where, #num_images = 1 and #num_of_channels = 1, height = width = 75
image = np.expand_dims(ndimage.imread(image_path),0) # add num_images dimension
image = np.expand_dims(image, axis=3) # add num_of_channels dimension
plt.imshow(image.reshape(75,75), cmap = 'gray')
# Trial #1
# Generate batches of augmented images from this image
aug_iter = gen.flow(image)
# Get 10 samples of augmented images
aug_images1 = [next(aug_iter)[0].reshape(75,75).astype(np.uint8) for i in range(10)]
# Trial #2
aug_iter = gen.flow(image)
aug_images2 = [next(aug_iter)[0].reshape(75,75).astype(np.uint8) for i in range(10)]
# check if equal
truth = []
for val in range(10):
truth.append((aug_images1[val] == aug_images2[val]).all()) # check images
np.asarray(truth).all() # check if all images are same
How to repeat the augmented outputs consistently in above code?
I know this code is written very badly, any suggestions on code optimization are also greatly appreciated.
Thanks,
Gopi
You can set a seed to the flow method:
aug_iter = gen.flow(image, seed = 0)
By setting this parameter to a specific integer, you will always get the same sequence of random shuffling/transformations.
You could run the generator and save the images, then simply load the images:
# Trial #1
# Generate batches of augmented images from this image
aug_iter = gen.flow(image)
# Get 10 samples of augmented images
aug_images1 = [next(aug_iter)[0].reshape(75,75).astype(np.uint8) for i in range(10)]
If memory is not a problem, you can save this with numpy:
aug_images1 = np.array(aug_images1)
np.save(filename, aug_images1)
Then load it:
aug_images1 = np.load(filename)
If you prefer, you can save each image as proper image files (less memory occupied) using an image library such as Pillow:
from PIL import Image
for (im,filename in zip(aug_images1,list_of_names)):
im = Image.fromarray(im) #make sure you have a uint8 from 0 to 255 array.
im.save(filename)
Later, load the files:
aug_images1 = [np.array(image.open(filename)) for filename in list_of_names]
aug_images1 = np.array(aug_images1)
Using ImageDataGenerator for loading files
In case you don't want to load all images at once in memory, with saved images, you can create a new ImageDataGenerator, but without any kind of augmentation, just a pure image loader.
Then use gen.flow_from_directory() to get images from a directory.
Read more in the documentation: https://keras.io/preprocessing/image/
How can I vectorize, reshape, and normalise my image to the same as the size vector of one of the images in mnist.train.images? I've tried the below so far with no success:
import os
import re
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import ops
import os,sys
#import Image
from PIL import Image
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
# Load data and check the shape of the first mnist.train.images image
data_dir = 'temp'
mnist = read_data_sets(data_dir)
print("tmnist.train.images[0].shape is")
print(mnist.train.images[0].shape) # the result is (784,)
def resize_image(image):
img = Image.open(image)
arr = np.array(img)
#my mind is drawing a blank... additional code to make this work...
return arr
resize_image("test.png")
The following should work:
def resize_image(image):
img = Image.open(image)
img = img.resize((28, 28))
arr = np.array(img)
#convert to gray scale
if len(arr.shape) > 2:
arr = np.mean(arr, 2)
#flatten
arr = arr.flatten()
return arr
%matplotlib inline
from keras.preprocessing import image
import matplotlib.pyplot as plt
import numpy as np
img = np.random.rand(224,224,3)
plt.imshow(img)
plt.show()
img_path = "image.jpeg"
img = image.load_img(img_path, target_size=(224, 224))
print(type(img))
x = image.img_to_array(img)
print(type(x))
print(x.shape)
plt.imshow(x)
I have some code like this which should print the image. But it shows the image in wrong channels. What am i missing here?
This is a image scaling issue. The input to the imshow() expects it to be in the 0-1 range, while you are passing it a [0-255] range input. Try to view it as:
plt.imshow(x/255.)
This question is kind of old, but there is a very comfortable way to
display images:
tf.keras.preprocessing.image.array_to_img(image[0]).show()
Your image has to have 3 dimensions (if its in a batch as normally, just take desired_element). Works fine on EagerTensors or numpy arrays.
I'm trying to execute this code that processes 70 images and extracts Histogram of Oriented Gradients (HOG) features. These are passed to a classifier (Scikit-Learn).
However, an error is raised:
hog_image = hog_image_rescaled.resize((200, 200), Image.ANTIALIAS)
TypeError: an integer is required
I do not understand why, because with attempting with a single image works correctly.
#Hog Feature
from skimage.feature import hog
from skimage import data, color, exposure
import cv2
import matplotlib.pyplot as plt
from PIL import Image
import os
import glob
import numpy as np
from numpy import array
listagrigie = []
path = 'img/'
for infile in glob.glob( os.path.join(path, '*.jpg') ):
print("current file is: " + infile )
colorato = Image.open(infile)
greyscale = colorato.convert('1')
#hog feature
fd, hog_image = hog(greyscale, orientations=8, pixels_per_cell=(16, 16),
cells_per_block=(1, 1), visualise=True)
plt.figure(figsize=(8, 4))
print(type(fd))
plt.subplot(121).set_axis_off()
plt.imshow(grigiscala, cmap=plt.cm.gray)
plt.title('Input image')
# Rescale histogram for better display
hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 0.02))
print("hog 1 immagine shape")
print(hog_image_rescaled.shape)
hog_image = hog_image_rescaled.resize((200, 200), Image.ANTIALIAS)
listagrigie.append(hog_image)
target.append(i)
print("ARRAY of gray matrices")
print(len(listagrigie))
grigiume = np.dstack(listagrigie)
print(grigiume.shape)
grigiume = np.rollaxis(grigiume, -1)
print(grigiume.shape)
from sklearn import svm, metrics
n_samples = len(listagrigie)
data = grigiume.reshape((n_samples, -1))
# Create a classifier: a support vector classifier
classifier = svm.SVC(gamma=0.001)
# We learn the digits on the first half of the digits
classifier.fit(data[:n_samples / 2], target[:n_samples / 2])
# Now predict the value of the digit on the second half:
expected = target[n_samples / 2:]
predicted = classifier.predict(data[n_samples / 2:])
print("expected")
print("predicted")
You should rescale the source image (named colorato in your example) to (200, 200), then extract the HOG features and then pass the list of fd vectors to your machine learning models. The hog_image are just meant to visualize the feature descriptors in a user friendly manner. The actual features are returned in the fd variable.