resize_images with a batch - python

I'm trying to read three jpg-Files to resize them with a tensorflow batch. No matter what I tried I didn't succeed. Here is one example below. In general how can I resize some pictures in a batch with tf.image.resize_images. I don't want to use an Input Reader. I want to create the batch of some pictures by myself.
I think it's neccessary to have 4 dimensions like batchsize, width, heigt, channels
import numpy as np
import tensorflow as tf
sess = tf.Session()
tensor_list = []
for i in range(3):
img = tf.read_file("{0}.jpg".format(i))
img_tensor = tf.image.decode_jpeg(img, 3)
img_resized = tf.image.resize_images(img_tensor, tf.convert_to_tensor([ 800, 400 ] ), tf.image.ResizeMethod.NEAREST_NEIGHBOR)
img_tensor_dim = tf.expand_dims(img_resized, 0)
tensor_list.append(img_tensor_dim)
batch = tf.train.batch(tensor_list, batch_size=3, enqueue_many=False)
img_resized = tf.image.resize_images(batch, tf.convert_to_tensor([400, 200]), tf.image.ResizeMethod.NEAREST_NEIGHBOR)
for i in range(3):
tmp = img_resized[i]
endcode_jpg = tf.image.encode_jpeg(tmp, x_density=96, y_density=96)
wr = tf.write_file('{0}_out.jpg'.format(i), endcode_jpg)
sess.run(wr)

You can use the tf.map_fn() operation to apply the resizing logic to a vector of strings containing your image data:
import tensorflow as tf
# Build a tensor containing the image data as a vector of strings.
images = []
for i in range(3):
images.append(tf.read_file("/tmp/jpeg420exif.jpg"))
images = tf.stack(images)
# `resize_fn()` contains the logic for resizing and encoding one image.
def resize_fn(img):
img_tensor = tf.image.decode_jpeg(img, 3)
img_resized = tf.image.resize_images(
[img_tensor], [800, 400], tf.image.ResizeMethod.NEAREST_NEIGHBOR)[0]
img_encoded = tf.image.encode_jpeg(img_resized, x_density=96, y_density=96)
return img_encoded
# `tf.map_fn()` applies `resize_fn()` to each image in turn, and
# returns a vector of encoded images.
encoded_images = tf.map_fn(resize_fn, images)
write_ops = []
for i in range(3):
write_ops.append(tf.write_file("{0}_out.jpg".format(i), encoded_images[i]))
with tf.Session() as sess:
sess.run(write_ops)

Related

image captioner generator method from single image to batch

i was following the tensorflow guide on image captioning linked here and everything is working great but i wanted to to convert this method that generates captions for input image to take a batch of images instead of 1
for example this the current generator method
#Captioner.add_method
def simple_gen(self, image, temperature=1):
initial = self.word_to_index([['[ٍSTART]']]) # (batch, sequence)
img_features = self.feature_extractor(image[tf.newaxis, ...])
tokens = initial # (batch, sequence)
for n in range(50):
preds = self((img_features, tokens)).numpy() # (batch, sequence, vocab)
preds = preds[:,-1, :] #(batch, vocab)
if temperature==0:
next = tf.argmax(preds, axis=-1)[:, tf.newaxis] # (batch, 1)
else:
next = tf.random.categorical(preds/temperature, num_samples=1) # (batch, 1)
tokens = tf.concat([tokens, next], axis=1) # (batch, sequence)
if next[0] == self.word_to_index('[END]'):
break
words = idx_to_word(tokens[0, 1:-1])
result = tf.strings.reduce_join(words, axis=-1, separator=' ')
return result.numpy().decode()
it takes one image output loaded by this function
def load_img(img_path):
img = tf.io.read_file(img_path)
img = tf.io.decode_jpeg(img,channels=3)
img = tf.image.resize(img,IMAGE_SHAPE[:-1])
return img
and load_img function takes img_path and the generator function returns generated caption for this image
what i tried is i have a tf dataset that contains a list img paths and corresponding captions i tried the following code to load all images in the tf dataset and loop over them and call the simple_gen method but it's very slow and inefficient and i'm looking for a better way to optimize the method
for (img,capt) in test_raw.map(lambda img,capt: (load_img(img),capt)):
preds = []
for t in [0.0,0.5,1.0]:
result = model.simple_gen(img)
preds.append(result)

How to display images from tf.image.crop_and_resize

I have to apply tf.image.crop_and_resize on my images and want to generate 5 boxes from each image. I have written the below code which works fine
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import numpy as np
# Load the pre-trained Xception model to be used as the base encoder.
xception = keras.applications.Xception(
include_top=False, weights="imagenet", pooling="avg"
)
# Set the trainability of the base encoder.
for layer in xception.layers:
layer.trainable = False
# Receive the images as inputs.
inputs = layers.Input(shape=(299, 299, 3), name="image_input")
input ='/content/1.png'
input = tf.keras.preprocessing.image.load_img(input,target_size=(299,299,3))
image = tf.expand_dims(np.asarray(input)/255, axis=0)
BATCH_SIZE = 1
NUM_BOXES = 5
IMAGE_HEIGHT = 256
IMAGE_WIDTH = 256
CHANNELS = 3
CROP_SIZE = (24, 24)
boxes = tf.random.uniform(shape=(NUM_BOXES, 4))
box_indices = tf.random.uniform(shape=(NUM_BOXES,), minval=0, maxval=BATCH_SIZE, dtype=tf.int32)
output = tf.image.crop_and_resize(image, boxes, box_indices, CROP_SIZE)
xception_input = tf.keras.applications.xception.preprocess_input(output)
The above code works fine however when I want to display these boxes I run below code
for i in range(5):
# define subplot
plt.subplot(330 + 1 + i)
# generate batch of images
batch = xception_input.next()
# convert to unsigned integers for viewing
image = batch[0].astype('uint8')
image = np.reshape(24,24,3)
# plot raw pixel data
plt.imshow(image)
#show the figure
plt.show()
But it generates this error AttributeError: 'tensorflow.python.framework.ops.EagerTensor' object has no attribute 'next'.
You have to use [i] instead of .next()
And there is also problem with converting it to uint8 (but it doesn't need to reshape)
for i in range(5):
plt.subplot(331 + i)
tensor = xception_input[i]
#print(tensor)
tensor = tensor*255
image = np.array(tensor, dtype=np.uint8)
#print(image)
plt.imshow(image)
or use for to get items
for i, tensor in enumerate(xception_input):
#print(tensor)
plt.subplot(331 + i)
tensor = tensor*255
image = np.array(tensor, dtype=np.uint8)
#print(image)
plt.imshow(image)
I don't know what your code should do but this gives me empty images because tensor has values like -0.9 and it convert it all to 0

How to shuffle batches with ImageDataGenerator?

I'm using ImageDataGenerator with flow_from_dataframe to load a dataset.
Using flow_from_dataframe with shuffle=True shuffles the images in the dataset.
I want to shuffle the batches. If I have 12 images and batch_size=3, then I have 4 batches:
batch1 = [image1, image2, image3]
batch2 = [image4, image5, image6]
batch3 = [image7, image8, image9]
batch4 = [image10, image11, image12]
I want to shuffle the batches without shuffling the images in each batch, so that I get for example:
batch2 = [image4, image5, image6]
batch1 = [image1, image2, image3]
batch4 = [image10, image11, image12]
batch3 = [image7, image8, image9]
Is that possible with ImageDataGenerator and flow_from_dataframe? Is there a preprocessing function I can use?
Consider using the tf.data.Dataset API. You can perform the batching operation before the shuffling.
import tensorflow as tf
file_names = [f'image_{i}' for i in range(1, 10)]
ds = tf.data.Dataset.from_tensor_slices(file_names).batch(3).shuffle(3)
for _ in range(3):
for batch in ds:
print(batch.numpy())
print()
[b'image_4' b'image_5' b'image_6']
[b'image_7' b'image_8' b'image_9']
[b'image_1' b'image_2' b'image_3']
[b'image_1' b'image_2' b'image_3']
[b'image_4' b'image_5' b'image_6']
[b'image_7' b'image_8' b'image_9']
[b'image_1' b'image_2' b'image_3']
[b'image_4' b'image_5' b'image_6']
[b'image_7' b'image_8' b'image_9']
Then, you can use a mapping operation to load the images from the file names:
def read_image(file_name):
image = tf.io.read_file(file_name)
image = tf.image.decode_image(image)
image = tf.image.convert_image_dtype(image, tf.float32)
image = tf.image.resize_with_crop_or_pad(image, target_height=224, target_width=224)
label = tf.strings.split(file_path, os.sep)[0]
label = tf.cast(tf.equal(label, class_categories), tf.int32)
return image, label
ds = ds.map(read_image)

How do I create image sequence samples using tf.data?

I want to create image sequence samples using the tf.data API. But as of now, it seems like there is no easy way to concatenate multiple images to form a single sample. I have tried to use the dataset.window function, which groups my images right. But I don't know how to concatenate them.
import tensorflow as tf
from glob import glob
IMG_WIDTH = 256
IMG_HEIGHT = 256
def load_and_process_image(path):
img = tf.io.read_file(path)
img = tf.image.decode_jpeg(img, channels=3)
img = tf.image.resize(img, [IMG_WIDTH, IMG_HEIGHT])
img = tf.reshape(img, shape=(IMG_WIDTH, IMG_HEIGHT, 1, 3))
return img
def create_dataset(files, time_distance=8, frame_step=1):
dataset = tf.data.Dataset.from_tensor_slices(files)
dataset = dataset.map(load_and_process_image)
dataset = dataset.window(time_distance, 1, frame_step, True)
# TODO: Concatenate elements from dataset.window
return dataset
files = sorted(glob('some/path/*.jpg'))
images = create_dataset(images)
I know that I could save my image sequences as TFRecords but that would make my data pipeline much more unflexible and would cost tons of memory.
My input batches should have the form N x W x H x T x C
(N: Number of samples
W: Image Width
H: Image Height
T: Image Sequence length
C: Image Channels).
You can use batching to create batches of size N.
iterations = #
batched_dataset = dataset.batch(N)
for batch in batched_dataset.take(iterations):
# process your batch
Here iterations is the number of batches you want to generate.

Tensorflow: Inputting encapsulated data batch through single placeholder

I am generally struggling with indexing tensors in tensorflow.
I have image data and additional scalar data. I can only use a single placeholder to input all the data to a Neural Network.
The images (img) are numpy arrays with shape (84,84,3) and I have data a with shape (2) and b with shape (1).
Now I create a single sample
sample = np.reshape(np.array([img,a,b]),(3,1)) #shape (3,1)
The placeholder is
input = tf.placeholder(dtype=tf.float32,shape=[None] + list(sample.shape))
Now when TF reads a batch of samples I would like to retrieve the batch of images, the batch of a, and the batch of b, because they need to be input in different locations in the Neural Network.
Here is a minimal example:
import tensorflow as tf
from tensorflow.contrib import layers
import numpy as np
#Numpy
img = np.random.rand(84,84,3)
a = np.random.rand(2)
b = np.random.rand(1)
sample = np.reshape(np.array([img,a,b]),(3,1)) #shape (3,1)
batch = np.repeat(np.expand_dims(sample,axis=0),32,axis=0) #shape (32,3,1)
#TF
input = tf.placeholder(dtype=tf.float32,shape=[None] + list(sample.shape))
#TODO:
tf_img = tf.#get image batch from input
tf_a = tf.#get a batch from input
tf_b = tf.#get b batch from input
out = layers.convolution2d(tf_img,num_outputs=64,kernel_size=8,stride=2,activation_fn=tf.nn.relu)
out = layers.flatten(out)
out = tf.concat([out,tf_a,tf_b])
out = layers.fully_connected(out,10,activation_fn=tf.nn.relu)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
_ = sess.run(out,feed_dict={input:batch})
How can I extract the individual parts of the input from a tensor with shape (?,3,1), use the image data to create an embedding and concatenate the other two parts to that output enbedding.
Is there a better way to input the data? My only constraint is that it has to be a single placeholder.
Here's a complete example for my comment above:
import numpy as np
import tensorflow as tf
im_height = 84
im_width = 84
im_channels = 3
a_len = 2
b_len = 1
np_img = np.random.rand(im_height, im_width, im_channels)
np_a = np.random.rand(a_len)
np_b = np.random.rand(b_len)
# flatten the input and concatenate to a single 1D numpy array
np_sample = np.concatenate((np_img.reshape(-1), np_a.reshape(-1), np_b.reshape(-1)), axis=0)
# construct a pseudo batch
np_batch = np.repeat(np_sample[np.newaxis, :], 32, axis=0)
tf_batch = tf.placeholder(shape=(None, im_height*im_width*im_channels + a_len + b_len), dtype=tf.float32)
img_stop = im_height*im_width*im_channels
a_stop = img_stop+a_len
# you could also use tf.slice(...) here
tf_img = tf.reshape(tf_batch[:, 0:img_stop], (-1, im_height, im_width, im_channels))
tf_a = tf.reshape(tf_batch[:, img_stop:a_stop], (-1, a_len))
tf_b = tf.reshape(tf_batch[:, a_stop:], (-1, b_len))
with tf.Session() as sess:
fetch_dict = {'img': tf_img, 'a': tf_a, 'b': tf_b}
feed_dict = {tf_batch: np_batch}
res = sess.run(fetch_dict, feed_dict=feed_dict)
assert(np.isclose(res['img'][0, ...], np_img).all())
assert(np.isclose(res['a'][0, :], np_a).all())
assert(np.isclose(res['b'][0, :], np_b).all())
However, this is at least as invasive as adding appropriate placeholders to the code. Additionally, it's much less readable, in my opinion.

Categories

Resources