I have to apply tf.image.crop_and_resize on my images and want to generate 5 boxes from each image. I have written the below code which works fine
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import numpy as np
# Load the pre-trained Xception model to be used as the base encoder.
xception = keras.applications.Xception(
include_top=False, weights="imagenet", pooling="avg"
)
# Set the trainability of the base encoder.
for layer in xception.layers:
layer.trainable = False
# Receive the images as inputs.
inputs = layers.Input(shape=(299, 299, 3), name="image_input")
input ='/content/1.png'
input = tf.keras.preprocessing.image.load_img(input,target_size=(299,299,3))
image = tf.expand_dims(np.asarray(input)/255, axis=0)
BATCH_SIZE = 1
NUM_BOXES = 5
IMAGE_HEIGHT = 256
IMAGE_WIDTH = 256
CHANNELS = 3
CROP_SIZE = (24, 24)
boxes = tf.random.uniform(shape=(NUM_BOXES, 4))
box_indices = tf.random.uniform(shape=(NUM_BOXES,), minval=0, maxval=BATCH_SIZE, dtype=tf.int32)
output = tf.image.crop_and_resize(image, boxes, box_indices, CROP_SIZE)
xception_input = tf.keras.applications.xception.preprocess_input(output)
The above code works fine however when I want to display these boxes I run below code
for i in range(5):
# define subplot
plt.subplot(330 + 1 + i)
# generate batch of images
batch = xception_input.next()
# convert to unsigned integers for viewing
image = batch[0].astype('uint8')
image = np.reshape(24,24,3)
# plot raw pixel data
plt.imshow(image)
#show the figure
plt.show()
But it generates this error AttributeError: 'tensorflow.python.framework.ops.EagerTensor' object has no attribute 'next'.
You have to use [i] instead of .next()
And there is also problem with converting it to uint8 (but it doesn't need to reshape)
for i in range(5):
plt.subplot(331 + i)
tensor = xception_input[i]
#print(tensor)
tensor = tensor*255
image = np.array(tensor, dtype=np.uint8)
#print(image)
plt.imshow(image)
or use for to get items
for i, tensor in enumerate(xception_input):
#print(tensor)
plt.subplot(331 + i)
tensor = tensor*255
image = np.array(tensor, dtype=np.uint8)
#print(image)
plt.imshow(image)
I don't know what your code should do but this gives me empty images because tensor has values like -0.9 and it convert it all to 0
Related
I am using grad cam to see which regions of the test images are most important for the prediction of resnet50. The output I got has some errors.
Code Snippets:
from tensorflow.keras.models import Model
import tensorflow as tf
import numpy as np
import cv2
class GradCAM:
def __init__(self, model, classIdx, layerName=None):
# store the model, the class index used to measure the class
# activation map, and the layer to be used when visualizing
# the class activation map
self.model = model
self.classIdx = classIdx
self.layerName = layerName
# if the layer name is None, attempt to automatically find
# the target output layer
if self.layerName is None:
self.layerName = self.find_target_layer()
def find_target_layer(self):
# attempt to find the final convolutional layer in the network
# by looping over the layers of the network in reverse order
for layer in reversed(self.model.layers):
# check to see if the layer has a 4D output
if len(layer.output_shape) == 4:
return layer.name
# otherwise, we could not find a 4D layer so the GradCAM
# algorithm cannot be applied
raise ValueError("Could not find 4D layer. Cannot apply GradCAM.")
def compute_heatmap(self, image, eps=1e-8):
# construct our gradient model by supplying (1) the inputs
# to our pre-trained model, (2) the output of the (presumably)
# final 4D layer in the network, and (3) the output of the
# softmax activations from the model
gradModel = Model(
inputs=[self.model.inputs],
outputs=[self.model.get_layer(self.layerName).output, self.model.output])
# record operations for automatic differentiation
with tf.GradientTape() as tape:
# cast the image tensor to a float-32 data type, pass the
# image through the gradient model, and grab the loss
# associated with the specific class index
inputs = tf.cast(image, tf.float32)
(convOutputs, predictions) = gradModel(inputs)
loss = predictions[:, tf.argmax(predictions[0])]
# use automatic differentiation to compute the gradients
grads = tape.gradient(loss, convOutputs)
# compute the guided gradients
castConvOutputs = tf.cast(convOutputs > 0, "float32")
castGrads = tf.cast(grads > 0, "float32")
guidedGrads = castConvOutputs * castGrads * grads
# the convolution and guided gradients have a batch dimension
# (which we don't need) so let's grab the volume itself and
# discard the batch
convOutputs = convOutputs[0]
guidedGrads = guidedGrads[0]
# compute the average of the gradient values, and using them
# as weights, compute the ponderation of the filters with
# respect to the weights
weights = tf.reduce_mean(guidedGrads, axis=(0, 1))
cam = tf.reduce_sum(tf.multiply(weights, convOutputs), axis=-1)
# grab the spatial dimensions of the input image and resize
# the output class activation map to match the input image
# dimensions
(w, h) = (image.shape[2], image.shape[1])
heatmap = cv2.resize(cam.numpy(), (w, h))
# normalize the heatmap such that all values lie in the range
# [0, 1], scale the resulting values to the range [0, 255],
# and then convert to an unsigned 8-bit integer
numer = heatmap - np.min(heatmap)
denom = (heatmap.max() - heatmap.min()) + eps
heatmap = numer / denom
heatmap = (heatmap * 255).astype("uint8")
# return the resulting heatmap to the calling function
return heatmap
def overlay_heatmap(self, heatmap, image, alpha=0.5,
colormap=cv2.COLORMAP_VIRIDIS):
# apply the supplied color map to the heatmap and then
# overlay the heatmap on the input image
heatmap = cv2.applyColorMap(heatmap, colormap)
output = cv2.addWeighted(image, alpha, heatmap, 1 - alpha, 0)
# return a 2-tuple of the color mapped heatmap and the output,
# overlaid image
return (heatmap, output)
Code Snippet for visualising heatmap:
import random
num_images = 5
random_indices = random.sample(range(len(X_test)), num_images)
for idx in random_indices:
image = X_test[idx] #assuming the image array is the first element in the tuple
# print(image)
# image = cv2.resize(image, (224, 224))
image1 = image.astype('float32') / 255
image1 = np.expand_dims(image1, axis=0)
preds = model.predict(image1)
i = np.argmax(preds[0])
icam = GradCAM(model, i, 'conv5_block3_out')
heatmap = icam.compute_heatmap(image1)
heatmap = cv2.resize(heatmap, (224, 224))
(heatmap, output) = icam.overlay_heatmap(heatmap, image, alpha=0.5)
fig, ax = plt.subplots(1, 3)
ax[0].imshow(heatmap)
ax[1].imshow(image)
ax[2].imshow(output)
The output:
The problem I am facing is, here in the output you can see the original images are different but the heatmaps, images, and grad cam are the same for all the images. I don't know whats the reason behind this.
I want to predict my image from a pre-trained keras xception image model. I have written some code but I get errros. The code is below
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
# Load the pre-trained Xception model to be used as the base encoder.
xception = keras.applications.Xception(
include_top=False, weights="imagenet", pooling="avg"
)
# Set the trainability of the base encoder.
for layer in xception.layers:
layer.trainable = False
# Receive the images as inputs.
#inputs = layers.Input(shape=(299, 299, 3), name="image_input")
input ='/content/1.png'
input = tf.keras.preprocessing.image.load_img(input,target_size=(299,299,3))
BATCH_SIZE = 1
NUM_BOXES = 5
IMAGE_HEIGHT = 256
IMAGE_WIDTH = 256
CHANNELS = 3
CROP_SIZE = (24, 24)
boxes = tf.random.uniform(shape=(NUM_BOXES, 4))
box_indices = tf.random.uniform(shape=(NUM_BOXES,), minval=0,
maxval=BATCH_SIZE, dtype=tf.int32)
output = tf.image.crop_and_resize(input, boxes, box_indices, CROP_SIZE)
xception_input = tf.keras.applications.xception.preprocess_input(output)
plt.imshow(xception_input/255.)
I want to display 5 boxes of each image as written in code. However I get the following error.
ValueError: Attempt to convert a value (<PIL.Image.Image image mode=RGB size=299x299 at 0x7F1DF6044F10>)
with an unsupported type (<class 'PIL.Image.Image'>) to a Tensor.
With tf.keras.preprocessing.image.load_img the image is loaded in a PIL format. You'll have to convert that to a numpy array before getting the prediction:
image = tf.keras.preprocessing.image.load_img(image_path)
input_arr = tf.keras.preprocessing.image.img_to_array(image)
input_arr = np.array([input_arr]) # Convert single image to a batch.
predictions = model.predict(input_arr)
After training my model, I tried to plot graph of the softmax output, but it resulted in the runtime error mentioned in the title.
Here is the following code snippet:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import helper
# Test out your network!
dataiter = iter(testloader)
images, labels = dataiter.next()
img = images[1]
# TODO: Calculate the class probabilities (softmax) for img
ps = torch.exp(model(img))
# Plot the image and probabilities
helper.view_classify(img, ps, version='Fashion')
The problem is with this part (I guess).
img = images[1]
# TODO: Calculate the class probabilities (softmax) for img
ps = torch.exp(model(img))
Problem: image you are loading is of dimension 28x28, however, the first index in input to the model is generally batch size. Since there is 1 image only, so you have to make the first dimension to be of size 1. To do that do img = img.view( (-1,) + img.shape) or img=img.unsqueeze(dim=0). Also, it seems that the first layer weight is 784 x 128. i.e the image should be converted to vector and fed to model. For that we do img=img.view(1, -1).
So, in total, you need to do
img = images[1]
img = img.unsqueeze(dim=0)
img=img.view(1, -1)
# TODO: Calculate the class probabilities (softmax) for img
ps = torch.exp(model(img))
or you can just use one command instead of two (unsqueeze is unnecessary)
img = images[1]
img=img.view(1, -1)
I am using Transfer learning for recognizing objects. I used trained VGG16 model as the base model and added my classifier on top of it using Keras. I then trained the model on my data, the model works well. I want to see the feature generated by the intermediate layers of the model for the given data. I used the following code for this purpose:
def ModeloutputAtthisLayer(model, layernme, imgnme, width, height):
layer_name = layernme
intermediate_layer_model = Model(inputs=model.input,
outputs=model.get_layer(layer_name).output)
img = image.load_img(imgnme, target_size=(width, height))
imageArray = image.img_to_array(img)
image_batch = np.expand_dims(imageArray, axis=0)
processed_image = preprocess_input(image_batch.copy())
intermediate_output = intermediate_layer_model.predict(processed_image)
print("outshape of ", layernme, "is ", intermediate_output.shape)
In the code, I used np.expand_dims to add one extra dimension for the batch as the input matrix to the network should be of the form (batchsize, height, width, channels). This code works fine. The shape of the feature vector is 1, 224, 224, 64.
Now I wish to display this as image, for this I understand there is an additional dimension added as batch so I should remove it. Following this I used the following lines of the code:
imge = np.squeeze(intermediate_output, axis=0)
plt.imshow(imge)
However it throws an error:
"Invalid dimensions for image data"
I wonder how can I display the extracted feature vector as an image. Any suggestion please.
Your feature shape is (1,224,224,64), you cannot directly plot a 64 channel image. What you can do is plot the individual channels independently like following
imge = np.squeeze(intermediate_output, axis=0)
filters = imge.shape[2]
plt.figure(1, figsize=(32, 32)) # plot image of size (32x32)
n_columns = 8
n_rows = math.ceil(filters / n_columns) + 1
for i in range(filters):
plt.subplot(n_rows, n_columns, i+1)
plt.title('Filter ' + str(i))
plt.imshow(imge[:,:,i], interpolation="nearest", cmap="gray")
This will plot 64 images in 8 rows and 8 columns.
A possible way to go consists in combining the 64 channels into a single-channel image through a weighted sum like this:
weighted_imge = np.sum(imge*weights, axis=-1)
where weights is an array with 64 weighting coefficients.
If you wish to give all the channels the same weight you could simply compute the average:
weighted_imge = np.mean(imge, axis=-1)
Demo
import numpy as np
import matplotlib.pyplot as plt
intermediate_output = np.random.randint(size=(1, 224, 224, 64),
low=0, high=2**8, dtype=np.uint8)
imge = np.squeeze(intermediate_output, axis=0)
weights = np.random.random(size=(imge.shape[-1],))
weighted_imge = np.sum(imge*weights, axis=-1)
plt.imshow(weighted_imge)
plt.colorbar()
In [33]: intermediate_output.shape
Out[33]: (1, 224, 224, 64)
In [34]: imge.shape
Out[34]: (224, 224, 64)
In [35]: weights.shape
Out[35]: (64,)
In [36]: weighted_imge.shape
Out[36]: (224, 224)
I'm trying to read three jpg-Files to resize them with a tensorflow batch. No matter what I tried I didn't succeed. Here is one example below. In general how can I resize some pictures in a batch with tf.image.resize_images. I don't want to use an Input Reader. I want to create the batch of some pictures by myself.
I think it's neccessary to have 4 dimensions like batchsize, width, heigt, channels
import numpy as np
import tensorflow as tf
sess = tf.Session()
tensor_list = []
for i in range(3):
img = tf.read_file("{0}.jpg".format(i))
img_tensor = tf.image.decode_jpeg(img, 3)
img_resized = tf.image.resize_images(img_tensor, tf.convert_to_tensor([ 800, 400 ] ), tf.image.ResizeMethod.NEAREST_NEIGHBOR)
img_tensor_dim = tf.expand_dims(img_resized, 0)
tensor_list.append(img_tensor_dim)
batch = tf.train.batch(tensor_list, batch_size=3, enqueue_many=False)
img_resized = tf.image.resize_images(batch, tf.convert_to_tensor([400, 200]), tf.image.ResizeMethod.NEAREST_NEIGHBOR)
for i in range(3):
tmp = img_resized[i]
endcode_jpg = tf.image.encode_jpeg(tmp, x_density=96, y_density=96)
wr = tf.write_file('{0}_out.jpg'.format(i), endcode_jpg)
sess.run(wr)
You can use the tf.map_fn() operation to apply the resizing logic to a vector of strings containing your image data:
import tensorflow as tf
# Build a tensor containing the image data as a vector of strings.
images = []
for i in range(3):
images.append(tf.read_file("/tmp/jpeg420exif.jpg"))
images = tf.stack(images)
# `resize_fn()` contains the logic for resizing and encoding one image.
def resize_fn(img):
img_tensor = tf.image.decode_jpeg(img, 3)
img_resized = tf.image.resize_images(
[img_tensor], [800, 400], tf.image.ResizeMethod.NEAREST_NEIGHBOR)[0]
img_encoded = tf.image.encode_jpeg(img_resized, x_density=96, y_density=96)
return img_encoded
# `tf.map_fn()` applies `resize_fn()` to each image in turn, and
# returns a vector of encoded images.
encoded_images = tf.map_fn(resize_fn, images)
write_ops = []
for i in range(3):
write_ops.append(tf.write_file("{0}_out.jpg".format(i), encoded_images[i]))
with tf.Session() as sess:
sess.run(write_ops)