what is the most efficient way to load an image with tensorflow and crop 100 images from that one image.
what I tried is:
import numpy as np
import tensorflow as tf
import cv2
filenames_train = ['image1.jpg', 'image2.jpg', 'image3.jpg']
def _opencv_operation(image,label):
# operation with image without tensorflow
kernel = np.ones((5, 5), np.float32) / 25
image = cv2.filter2D(image, -1, kernel)
return image, int(label)
def _read_images_and_crop(image_path):
image = tf.read_file(image_path)
image = tf.image.decode_jpeg(image)
print image.shape
image.set_shape([None, None, None])
image = tf.cast(image, tf.float32)
image = tf.scalar_mul(2./255.,image)-1.
image = tf.image.resize_images(image, [299, 299])
image = tf.reshape(image,(299, 299,3))
label = 1
#r_values1 = #random values#
#image1 = tf.image.crop_and_resize(image, r_values1)
# ...
#r_values100 = # random values#
#image100 = tf.image.crop_and_resize(image, r_values100)
#label = r_values1 ... r_values100
return image, label
# but what i actually want to return is: return [image1, image2,..image100], [label1, label2,.. label100]
# Training dataset
dataset_train = tf.data.Dataset.from_tensor_slices((filenames_train))
dataset_train = dataset_train.map(_read_images_and_crop)
dataset_train = dataset_train.map(
lambda filename, label: tuple(tf.py_func(
_opencv_operation, [filename, label], [tf.float32, tf.int64])))
dataset_train = dataset_train.batch(5)
iterator = tf.data.Iterator.from_structure(dataset_train.output_types,
dataset_train.output_shapes)
(next_images,next_labels) = iterator.get_next()
training_init_op = iterator.make_initializer(dataset_train)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i_epoch in xrange(5):
sess.run(training_init_op)
curr_images, curr_labels = sess.run([next_images, next_labels])
so, what my script does, is reading one image from a file and resizing it.
and gives this image as an output.
what I need is to crop that image afterwords with 100 different crop parameters, so i have 100 images as an output and 100 labels.
but at the and I want bust as many outputs as big like the batch size.
Is it possible with the dataset api or is it just possible to load one image from file and process that image till its the output of the dataset_train.
I dont want to load an image 100 times and process it 100 times.
I want to load it one time and process it 100 times(e.g.crop, blur with different parameters and so on..)
Related
In a coursera guided project that I was doing, the instructor used
from skimage.transform import rescale
image_rescaled = rescale(rescale(image,0.5),2.0)
to distort the image.
The error that is occurring on my own device (and that didn't arise on the jupyter notebook of the project, probably due to difference in versions of modules and python) was that image_rescaled's number of channel's are increasing by 1.
eg => images_normal.shape = (256,256,256,3) and images_with_twice_reshape.shape=(256,256,256,4)
This issue doesn't come up if I use rescaled(rescale(image,2.0),0.5).
Is this intended in a newer version of python/skimage or am I doing something wrong?
For additional references(didn't delete anything from source code but highlighted important parts with #s):
import os
import re
from scipy import ndimage, misc
from skimage.transform import resize, rescale
from matplotlib import pyplot
import numpy as np
def train_batches(just_load_dataset=False):
batches = 256 # Number of images to have at the same time in a batch
batch = 0 # Number if images in the current batch (grows over time and then resets for each batch)
batch_nb = 0 # Batch current index
ep = 4 # Number of epochs
images = []
x_train_n = []
x_train_down = []
x_train_n2 = [] # Resulting high res dataset
x_train_down2 = [] # Resulting low res dataset
for root, dirnames, filenames in os.walk("data/cars_train.nosync"):
for filename in filenames:
if re.search("\.(jpg|jpeg|JPEG|png|bmp|tiff)$", filename):
filepath = os.path.join(root, filename)
image = pyplot.imread(filepath)
if len(image.shape) > 2:
image_resized = resize(image, (256, 256)) # Resize the image so that every image is the same size
#########################
x_train_n.append(image_resized) # Add this image to the high res dataset
x_train_down.append(rescale(rescale(image_resized, 0.5), 2.0)) # Rescale it 0.5x and 2x so that it is a low res image but still has 256x256 resolution
########################
# >>>> x_train_down.append(rescale(rescale(image_resized, 2.0), 0.5)), this one works and gives the same shape of x_train_down and x_train_n.
########################
batch += 1
if batch == batches:
batch_nb += 1
x_train_n2 = np.array(x_train_n)
x_train_down2 = np.array(x_train_down)
if just_load_dataset:
return x_train_n2, x_train_down2
print('Training batch', batch_nb, '(', batches, ')')
autoencoder.fit(x_train_down2, x_train_n2,
epochs=ep,
batch_size=10,
shuffle=True,
validation_split=0.15)
x_train_n = []
x_train_down = []
batch = 0
return x_train_n2, x_train_down2
And with the above code, I get x_train_n2.shape = (256,256,256,3) and x_train_down2.shape=(256,256,256,4).
I was able to reproduce your issue as follows:
import numpy as np
from skimage.transform import resize, rescale
image = np.random.random((512, 512, 3))
resized = resize(image, (256, 256))
rescaled2x = rescale(
rescale(resized, 0.5),
2,
)
print(rescaled2x.shape)
# prints (256, 256, 4)
The problem is that resize can infer that your final dimension is channels/RGB, because you give it a 2D shape. rescale, on the other hand, treats your array as a 3D image of shape (256, 256, 3), which goes down to (128, 128, 2), interpolating along the colors as well, as if they were another spatial dimension, and then upsampling to (256, 256, 4).
If you look at the rescale documentation, you'll find the "multichannel" parameter, described as:
Whether the last axis of the image is to be interpreted as multiple channels or another spatial dimension.
So, updating my code:
rescaled2x = rescale(
rescale(resized, 0.5, multichannel=True),
2,
multichannel=True,
)
print(rescaled2x.shape)
# prints (256, 256, 3)
I have this problem. I run this code on flask api
# face verification with the VGGFace2 model
from matplotlib import pyplot
from PIL import Image
from numpy import asarray
from scipy.spatial.distance import cosine
from mtcnn.mtcnn import MTCNN
from keras_vggface.vggface import VGGFace
from keras_vggface.utils import preprocess_input
# extract a single face from a given photograph
def extract_face(filename, required_size=(254, 254)):
# load image from file
pixels = pyplot.imread(filename)
# create the detector, using default weights
detector = MTCNN()
# detect faces in the image
results = detector.detect_faces(pixels)
# extract the bounding box from the first face
x1, y1, width, height = results[0]['box']
x2, y2 = x1 + width, y1 + height
# extract the face
face = pixels[y1:y2, x1:x2]
# resize pixels to the model size
image = Image.fromarray(face)
image = image.resize(required_size)
face_array = asarray(image)
# print(face_array)
return face_array
# extract faces and calculate face embeddings for a list of photo files
def get_embeddings(filenames):
# extract faces
faces = [extract_face(f) for f in filenames]
# convert into an array of samples
samples = asarray(faces, 'float32')
# prepare the face for the model, e.g. center pixels
samples = preprocess_input(samples, version=2)
# create a vggface model
model = VGGFace(model='vgg16', include_top=False, input_shape=(254, 254, 3), pooling='max')
# perform prediction
yhat = model.predict(samples)
return yhat
# determine if a candidate face is a match for a known face
def is_match(known_embedding, candidate_embedding, thresh=0.45):
# calculate distance between embeddings
score = cosine(known_embedding, candidate_embedding)
print('Match percentage (%.3f)' % (100 - (100 * score)))
print('>face is a Match (%.3f <= %.3f)' % (score, thresh))
# define filenames
filenames = ['audacious.jpg', 'face-20190717050545949130_123.jpg']
# get embeddings file filenames
embeddings = get_embeddings(filenames)
# define sharon stone
sharon_id = embeddings[0]
# verify known photos of sharon
print('Positive Tests')
is_match(embeddings[0], embeddings[1])
I test with first hit, the process work well. But when the second hit that give error :
'numpy.ndarray' object is not callable
'Cannot interpret feed_dict key as Tensor: Tensor Tensor("Placeholder:0", shape=(3, 3, 3, 64), dty
pe=float32) is not an element of this graph.'
If i run not on API, just in file then run with : python3 file.py, any times i run not give any errors
any clue ?
Check this line:
samples = asarray(faces, 'float32')
and try to replace it with:
samples = asarray(faces, dtype=np.float32)
I have access to a large amount of 2048x2048x3 jpeg pictures which I am storing in the TFRecords binary format. Later, I use the stored files to train a deep neural network. To store the pictures, I am currently using two different methods.
The first one uses tensorflow. I have defined a function that creates a Tensorflow graph. I keep reusing the same graph for all the pictures:
def picture_decoder(height, width):
g = tf.Graph()
with g.as_default():
picture_name_tensor = tf.placeholder(tf.string)
picture_contents = tf.read_file(picture_name_tensor)
picture = tf.image.decode_jpeg(picture_contents)
picture_as_float = tf.image.convert_image_dtype(picture, tf.float32)
picture_4d = tf.expand_dims(picture_as_float, 0)
resize_shape = tf.stack([height, width])
resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32)
final_tensor = tf.image.resize_bilinear(picture_4d, resize_shape_as_int)
return g, picture_name_tensor, final_tensor
Height, Width = 300, 300
graph, nameholder, image_tensor = picture_decoder(Height, Width)
with tf.Session(graph=graph) as sess:
init = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() )
sess.run(init)
#Loop through the pictures
for(...picture_name...):
picture = sess.run(image_tensor, feed_dict={nameholder: picture_name} )
The second method uses numpy:
def picture_decoder_numpy(picture_name, height, width):
image = Image.open(picture_name)
image = image.resize((height,width), Image.LANCZOS)
image = np.array(image, dtype=np.int32)
return np.expand_dims(image, axis=0)
Heigth, Width = 300, 300
for(...picture_name...):
picture = picture_decoder_numpy(pict, Height, Width)
The first method appears to be approximately 6 times faster than the second one.
The issue I am facing is related with the training afterwards. For the first case, the deep neural net I have defined does not learn, i.e., its loss does not improve over many epochs and it is only slightly smaller than 1. Using the second method, without changing any neural net parameter, the loss achieves E-05 values. Am I missing some Tensorflow detail?
I can post the full code if necessary.
Update:
The method using Tensorflow outputs a black picture, while the method using numpy works as expected.
MVCE for decoding the pictures:
from PIL import Image
import numpy as np
import tensorflow as tf
def picture_decoder(height, width):
g = tf.Graph()
with g.as_default():
picture_name_tensor = tf.placeholder(tf.string)
picture_contents = tf.read_file(picture_name_tensor)
picture = tf.image.decode_jpeg(picture_contents, dct_method="INTEGER_ACCURATE")
picture_as_float = tf.image.convert_image_dtype(picture, tf.float32)
picture_4d = tf.expand_dims(picture_as_float, 0)
resize_shape = tf.stack([height, width])
resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32)
final_tensor = tf.squeeze(tf.image.resize_bilinear(picture_4d, resize_shape_as_int))
return g, picture_name_tensor, final_tensor
def picture_decoder_numpy(picture_name, height, width):
image = Image.open(picture_name)
image = image.resize((height,width), Image.LANCZOS)
return np.array(image, dtype=np.int32)
pic_name = "picture.jpg"
#Numpy method
#picture = picture_decoder_numpy(pic_name, 300, 300)
#Tensorflow method
graph, nameholder, picture_tensor = picture_decoder(300, 300)
with tf.Session(graph=graph) as sess:
init = tf.group()
sess.run(init)
picture = sess.run(picture_tensor, feed_dict={nameholder: pic_name})
im = Image.fromarray(picture.astype('uint8'))
im.save("save.jpg")
The TF implementation does not do what you think it does. The problem is that the image value get converted to the (1, 0) range, while in the numpy way, values are in the (255, 0) range.
One way to solve it is to multiply your final result by 255.
def picture_decoder(height, width):
g = tf.Graph()
with g.as_default():
picture_name_tensor = tf.placeholder(tf.string)
picture_contents = tf.read_file(picture_name_tensor)
picture = tf.image.decode_jpeg(picture_contents, dct_method="INTEGER_ACCURATE")
picture_as_float = tf.image.convert_image_dtype(picture, tf.float32)
picture_4d = tf.expand_dims(picture_as_float, 0)
resize_shape = tf.stack([height, width])
resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32)
final_tensor = tf.squeeze(tf.image.resize_bilinear(picture_4d, resize_shape_as_int)) * 255 # FIX: rescale to typical 8-bit range
return g, picture_name_tensor, final_tensor
Of course, the two array should not match exactly as you use two different interpolation methods as well.
norm_dist = np.abs(np.sum(arr - arr2)) / (np.sum(arr) + np.sum(arr2)) / 2
np.isclose(norm_dist, 0, atol=1e-4)
True
(assuming arr contains the numpy implementation, and arr2 the tensorflow one).
I am working with grayscale images of size 75 by 75 and want to perform some augmentation techniques using ImageDataGenerator.
But wondering if we can repeat the output consistently if we run multiple times. I am not talking about epochs but like running the whole code to mimic the exact same augmented images to get same results.
I am attaching sample grayscale image:
import matplotlib.pyplot as plt
import numpy as np
from scipy import misc, ndimage
from keras.preprocessing.image import ImageDataGenerator
gen = ImageDataGenerator(rotation_range=10, width_shift_range=0.1,
height_shift_range=0.1, zoom_range=0.1, # shear_range=0.15,
channel_shift_range=10., horizontal_flip=True, vertical_flip = True,
rescale = 0.2, fill_mode = 'wrap')
image_path = '/trial_img.png' # grayscale image
# Obtain image
# data_format = [#num_images,height,width,#num_of_channels]
# where, #num_images = 1 and #num_of_channels = 1, height = width = 75
image = np.expand_dims(ndimage.imread(image_path),0) # add num_images dimension
image = np.expand_dims(image, axis=3) # add num_of_channels dimension
plt.imshow(image.reshape(75,75), cmap = 'gray')
# Trial #1
# Generate batches of augmented images from this image
aug_iter = gen.flow(image)
# Get 10 samples of augmented images
aug_images1 = [next(aug_iter)[0].reshape(75,75).astype(np.uint8) for i in range(10)]
# Trial #2
aug_iter = gen.flow(image)
aug_images2 = [next(aug_iter)[0].reshape(75,75).astype(np.uint8) for i in range(10)]
# check if equal
truth = []
for val in range(10):
truth.append((aug_images1[val] == aug_images2[val]).all()) # check images
np.asarray(truth).all() # check if all images are same
How to repeat the augmented outputs consistently in above code?
I know this code is written very badly, any suggestions on code optimization are also greatly appreciated.
Thanks,
Gopi
You can set a seed to the flow method:
aug_iter = gen.flow(image, seed = 0)
By setting this parameter to a specific integer, you will always get the same sequence of random shuffling/transformations.
You could run the generator and save the images, then simply load the images:
# Trial #1
# Generate batches of augmented images from this image
aug_iter = gen.flow(image)
# Get 10 samples of augmented images
aug_images1 = [next(aug_iter)[0].reshape(75,75).astype(np.uint8) for i in range(10)]
If memory is not a problem, you can save this with numpy:
aug_images1 = np.array(aug_images1)
np.save(filename, aug_images1)
Then load it:
aug_images1 = np.load(filename)
If you prefer, you can save each image as proper image files (less memory occupied) using an image library such as Pillow:
from PIL import Image
for (im,filename in zip(aug_images1,list_of_names)):
im = Image.fromarray(im) #make sure you have a uint8 from 0 to 255 array.
im.save(filename)
Later, load the files:
aug_images1 = [np.array(image.open(filename)) for filename in list_of_names]
aug_images1 = np.array(aug_images1)
Using ImageDataGenerator for loading files
In case you don't want to load all images at once in memory, with saved images, you can create a new ImageDataGenerator, but without any kind of augmentation, just a pure image loader.
Then use gen.flow_from_directory() to get images from a directory.
Read more in the documentation: https://keras.io/preprocessing/image/
I would like to know, whether I used toPILImage from torchvision correctly. I want to use it, to see how the images look after initial image transformations are applied to the dataset.
When I use it like in the code below, the image that comes up has weird colors like this one. The original image is a regular RGB image.
This is my code:
import os
import torch
from PIL import Image, ImageFont, ImageDraw
import torch.utils.data as data
import torchvision
from torchvision import transforms
import matplotlib.pyplot as plt
# Image transformations
normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
transform_img = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(256),
transforms.ToTensor(),
normalize ])
train_data = torchvision.datasets.ImageFolder(
root='./train_cl/',
transform=transform_img
)
test_data = torchvision.datasets.ImageFolder(
root='./test_named_cl/',
transform=transform_img
)
train_data_loader = data.DataLoader(train_data,
batch_size=4,
shuffle=True,
num_workers=4) #num_workers=args.nThreads)
test_data_loader = data.DataLoader(test_data,
batch_size=32,
shuffle=False,
num_workers=4)
# Open Image from dataset:
to_pil_image = transforms.ToPILImage()
my_img, _ = train_data[248]
results = to_pil_image(my_img)
results.show()
Edit:
I had to use .data on the Torch Variable to get the tensor.
Also I needed to rescale the numpy array before transposing. I found a working solution here, but it doesn't always work well. How can I do this better?
for i, data in enumerate(train_data_loader, 0):
img, labels = data
img = Variable(img)
break
image = img.data.cpu().numpy()[0]
# This worked for rescaling:
image = (1/(2*2.25)) * image + 0.5
# Both of these didn't work:
# image /= (image.max()/255.0)
# image *= (255.0/image.max())
image = np.transpose(image, (1,2,0))
plt.imshow(image)
plt.show()
You can use PIL image but you're not actually loading the data as you would normally.
Try something like this instead:
import numpy as np
import matplotlib.pyplot as plt
for img,labels in train_data_loader:
# load a batch from train data
break
# this converts it from GPU to CPU and selects first image
img = img.cpu().numpy()[0]
#convert image back to Height,Width,Channels
img = np.transpose(img, (1,2,0))
#show the image
plt.imshow(img)
plt.show()
As an update (02-10-2021):
import torchvision.transforms.functional as F
# load the image (creating a random image as an example)
img_data = torch.ByteTensor(4, 4, 3).random_(0, 255).numpy()
pil_image = F.to_pil_image(img_data)
Alternatively
import torchvision.transforms as transforms
img_data = torch.ByteTensor(4, 4, 3).random_(0, 255).numpy()
pil_image = transforms.ToPILImage()(img_data)
The second form can be integrated with dataset loader in pytorch or called directly as so.
I added a modified to_pil_image here
essentially it does what I suggested back in 2018 but it is integrated into pytorch now.
I would use something like this
# Open Image from dataset:
my_img, _ = train_data[248]
results = transforms.ToPILImage()(my_img)
results.show()