I am using grad cam to see which regions of the test images are most important for the prediction of resnet50. The output I got has some errors.
Code Snippets:
from tensorflow.keras.models import Model
import tensorflow as tf
import numpy as np
import cv2
class GradCAM:
def __init__(self, model, classIdx, layerName=None):
# store the model, the class index used to measure the class
# activation map, and the layer to be used when visualizing
# the class activation map
self.model = model
self.classIdx = classIdx
self.layerName = layerName
# if the layer name is None, attempt to automatically find
# the target output layer
if self.layerName is None:
self.layerName = self.find_target_layer()
def find_target_layer(self):
# attempt to find the final convolutional layer in the network
# by looping over the layers of the network in reverse order
for layer in reversed(self.model.layers):
# check to see if the layer has a 4D output
if len(layer.output_shape) == 4:
return layer.name
# otherwise, we could not find a 4D layer so the GradCAM
# algorithm cannot be applied
raise ValueError("Could not find 4D layer. Cannot apply GradCAM.")
def compute_heatmap(self, image, eps=1e-8):
# construct our gradient model by supplying (1) the inputs
# to our pre-trained model, (2) the output of the (presumably)
# final 4D layer in the network, and (3) the output of the
# softmax activations from the model
gradModel = Model(
inputs=[self.model.inputs],
outputs=[self.model.get_layer(self.layerName).output, self.model.output])
# record operations for automatic differentiation
with tf.GradientTape() as tape:
# cast the image tensor to a float-32 data type, pass the
# image through the gradient model, and grab the loss
# associated with the specific class index
inputs = tf.cast(image, tf.float32)
(convOutputs, predictions) = gradModel(inputs)
loss = predictions[:, tf.argmax(predictions[0])]
# use automatic differentiation to compute the gradients
grads = tape.gradient(loss, convOutputs)
# compute the guided gradients
castConvOutputs = tf.cast(convOutputs > 0, "float32")
castGrads = tf.cast(grads > 0, "float32")
guidedGrads = castConvOutputs * castGrads * grads
# the convolution and guided gradients have a batch dimension
# (which we don't need) so let's grab the volume itself and
# discard the batch
convOutputs = convOutputs[0]
guidedGrads = guidedGrads[0]
# compute the average of the gradient values, and using them
# as weights, compute the ponderation of the filters with
# respect to the weights
weights = tf.reduce_mean(guidedGrads, axis=(0, 1))
cam = tf.reduce_sum(tf.multiply(weights, convOutputs), axis=-1)
# grab the spatial dimensions of the input image and resize
# the output class activation map to match the input image
# dimensions
(w, h) = (image.shape[2], image.shape[1])
heatmap = cv2.resize(cam.numpy(), (w, h))
# normalize the heatmap such that all values lie in the range
# [0, 1], scale the resulting values to the range [0, 255],
# and then convert to an unsigned 8-bit integer
numer = heatmap - np.min(heatmap)
denom = (heatmap.max() - heatmap.min()) + eps
heatmap = numer / denom
heatmap = (heatmap * 255).astype("uint8")
# return the resulting heatmap to the calling function
return heatmap
def overlay_heatmap(self, heatmap, image, alpha=0.5,
colormap=cv2.COLORMAP_VIRIDIS):
# apply the supplied color map to the heatmap and then
# overlay the heatmap on the input image
heatmap = cv2.applyColorMap(heatmap, colormap)
output = cv2.addWeighted(image, alpha, heatmap, 1 - alpha, 0)
# return a 2-tuple of the color mapped heatmap and the output,
# overlaid image
return (heatmap, output)
Code Snippet for visualising heatmap:
import random
num_images = 5
random_indices = random.sample(range(len(X_test)), num_images)
for idx in random_indices:
image = X_test[idx] #assuming the image array is the first element in the tuple
# print(image)
# image = cv2.resize(image, (224, 224))
image1 = image.astype('float32') / 255
image1 = np.expand_dims(image1, axis=0)
preds = model.predict(image1)
i = np.argmax(preds[0])
icam = GradCAM(model, i, 'conv5_block3_out')
heatmap = icam.compute_heatmap(image1)
heatmap = cv2.resize(heatmap, (224, 224))
(heatmap, output) = icam.overlay_heatmap(heatmap, image, alpha=0.5)
fig, ax = plt.subplots(1, 3)
ax[0].imshow(heatmap)
ax[1].imshow(image)
ax[2].imshow(output)
The output:
The problem I am facing is, here in the output you can see the original images are different but the heatmaps, images, and grad cam are the same for all the images. I don't know whats the reason behind this.
Related
I am working on a CNN multi-class classification of different concentrations (10uM, 30uM, etc.) I create my dataset to include the images as the features and the concentrations as labels. Note that the concentrations are left as a string. When running the code, I am getting the following error:
TypeError: cross_entropy_loss(): argument 'target' (position 2) must be Tensor, not tuple
The following is my dataset class:
class CustomDataset(Dataset):
def __init__(self, path, method):
"""
Args:
csv_path (string): path to csv file
data_path (string): path to the folder where images are
transform: pytorch transforms for transforms and tensor conversion
"""
# Transforms
self.to_tensor = transforms.ToTensor()
# Read the excel file
self.data_path = pd.read_excel(path, sheet_name=method)
# First column contains the image paths
self.img_arr = np.asarray(self.data_path.iloc[:, 0])
# Second column is the labels
self.label_arr = np.asarray(self.data_path.iloc[:, 1])
def __getitem__(self, index):
# Get image name from the pandas df
img_path = self.img_arr[index]
# Open image
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Converts the image from BGR to RGB
# Transform image to tensor
img_tensor = self.to_tensor(img)
# Get label(class) of the image based on the cropped pandas column
img_label = self.to_tensor(self.label_arr[index])
img_label = self.label_arr[index]
return (img_tensor, img_label)
def __len__(self):
return len(self.data_path)
I am aware that the reason is most probably due to the fact that the labels are left as tuples, so the loss function is unable to compare the CNN output with the label. However, I am unable to find any resources that explain how labels are dealt with in multi-class classifications of tuple type labels. The solution seems simple, but I am a bit confused on how to solve it. Can anyone direct me?
EDIT: This is the implemented training loop:
def train_epoch(model,dataloader,loss_fn,optimizer):
train_loss,train_correct = 0.0, 0
model.train() #Sets the mode to train (Helpful when using layers such as DropOut and BatchNorm)
for features,labels in dataloader:
#Zero grad
optimizer.zero_grad()
#Forward Pass
output=model(features)
print(output)
print(labels)
loss=loss_fn(output,labels)
#Backward Pass
loss.backward()
optimizer.step()
train_loss += loss.item()*features.size(0) #features.size is useful when using batches.
scores, predictions = torch.max(output.data,1) # 1 is to create a 1 dimensional tensor with max values from each row
train_correct += (predictions==labels).sum().item()
return train_loss, train_correct
This is the output of "output" and "labels", respectively:
tensor([[-0.0528, -0.0150, -0.0153, -0.0939, -0.0887, -0.0863]],
grad_fn=<AddmmBackward0>)
('70uM',)
I have to apply tf.image.crop_and_resize on my images and want to generate 5 boxes from each image. I have written the below code which works fine
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import numpy as np
# Load the pre-trained Xception model to be used as the base encoder.
xception = keras.applications.Xception(
include_top=False, weights="imagenet", pooling="avg"
)
# Set the trainability of the base encoder.
for layer in xception.layers:
layer.trainable = False
# Receive the images as inputs.
inputs = layers.Input(shape=(299, 299, 3), name="image_input")
input ='/content/1.png'
input = tf.keras.preprocessing.image.load_img(input,target_size=(299,299,3))
image = tf.expand_dims(np.asarray(input)/255, axis=0)
BATCH_SIZE = 1
NUM_BOXES = 5
IMAGE_HEIGHT = 256
IMAGE_WIDTH = 256
CHANNELS = 3
CROP_SIZE = (24, 24)
boxes = tf.random.uniform(shape=(NUM_BOXES, 4))
box_indices = tf.random.uniform(shape=(NUM_BOXES,), minval=0, maxval=BATCH_SIZE, dtype=tf.int32)
output = tf.image.crop_and_resize(image, boxes, box_indices, CROP_SIZE)
xception_input = tf.keras.applications.xception.preprocess_input(output)
The above code works fine however when I want to display these boxes I run below code
for i in range(5):
# define subplot
plt.subplot(330 + 1 + i)
# generate batch of images
batch = xception_input.next()
# convert to unsigned integers for viewing
image = batch[0].astype('uint8')
image = np.reshape(24,24,3)
# plot raw pixel data
plt.imshow(image)
#show the figure
plt.show()
But it generates this error AttributeError: 'tensorflow.python.framework.ops.EagerTensor' object has no attribute 'next'.
You have to use [i] instead of .next()
And there is also problem with converting it to uint8 (but it doesn't need to reshape)
for i in range(5):
plt.subplot(331 + i)
tensor = xception_input[i]
#print(tensor)
tensor = tensor*255
image = np.array(tensor, dtype=np.uint8)
#print(image)
plt.imshow(image)
or use for to get items
for i, tensor in enumerate(xception_input):
#print(tensor)
plt.subplot(331 + i)
tensor = tensor*255
image = np.array(tensor, dtype=np.uint8)
#print(image)
plt.imshow(image)
I don't know what your code should do but this gives me empty images because tensor has values like -0.9 and it convert it all to 0
I have found code online to get the derivative of the total loss with respect to the deep learning weights. I am trying to find the derivative of the weights with respect to the loss of a single class instead of all classes.
I used the following code to get the gradient of an input image with respect to the total loss. If I visualize it, it shows the importance of the pixels for all predictions. But, I would like to compute the derivative of the input image with respect to a particular class (e.g. "lady_bug"). This should show the importance of the pixels for the prediction of lady_bug. Do you have an idea how I can do that?
from keras.applications.vgg19 import VGG19
import numpy as np
import cv2
from keras import backend as K
import matplotlib.pyplot as plt
from keras.applications.inception_v3 import decode_predictions
def get_model():
model = VGG19(include_top=True, weights='imagenet')
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
def predict(model, images):
numeric_prediction = model.predict(images)
categorical_prediction = decode_predictions(numeric_prediction, top=1)
return [(x[0][1], x[0][2]) for x in categorical_prediction]
def get_test_image():
# Image
image_path = "lady_bug.jpg"
image = cv2.imread(image_path)
my_image = cv2.resize(image, (224,224))
my_image = np.expand_dims(my_image, axis=0)
return my_image
def visualize_sample(sample, file_path):
plt.figure()
plt.imshow(sample)
plt.savefig(file_path, bbox_inches='tight')
def test_input_gradient():
images = get_test_image()
model = get_model()
prediction = predict(model, images)
print(prediction)
gradients = K.gradients(model.output, model.input) #Gradient of output wrt the input of the model (Tensor)
print(gradients)
sess = K.get_session()
evaluated_gradients = sess.run(gradients[0], feed_dict={model.input:
images})
visualize_sample((evaluated_gradients[0]*(10**9.5)).clip(0,255), "test.png")
if __name__ == "__main__":
test_input_gradient()
Output:
[('ladybug', 0.53532666)]
[<tf.Tensor 'gradients/block1_conv1/convolution_grad/Conv2DBackpropInput:0' shape=(?, 224, 224, 3) dtype=float32>]
It seems the code is taking the gradients of the outputs wrt the inputs.
So, this is just taking a single slice from the outputs.
Warning: This considers a regular model output. I have no idea of what you're doing in decode predictions and the following list.
gradients = K.gradients(model.output[:, lady_bug_class], model.input)
After training my model, I tried to plot graph of the softmax output, but it resulted in the runtime error mentioned in the title.
Here is the following code snippet:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import helper
# Test out your network!
dataiter = iter(testloader)
images, labels = dataiter.next()
img = images[1]
# TODO: Calculate the class probabilities (softmax) for img
ps = torch.exp(model(img))
# Plot the image and probabilities
helper.view_classify(img, ps, version='Fashion')
The problem is with this part (I guess).
img = images[1]
# TODO: Calculate the class probabilities (softmax) for img
ps = torch.exp(model(img))
Problem: image you are loading is of dimension 28x28, however, the first index in input to the model is generally batch size. Since there is 1 image only, so you have to make the first dimension to be of size 1. To do that do img = img.view( (-1,) + img.shape) or img=img.unsqueeze(dim=0). Also, it seems that the first layer weight is 784 x 128. i.e the image should be converted to vector and fed to model. For that we do img=img.view(1, -1).
So, in total, you need to do
img = images[1]
img = img.unsqueeze(dim=0)
img=img.view(1, -1)
# TODO: Calculate the class probabilities (softmax) for img
ps = torch.exp(model(img))
or you can just use one command instead of two (unsqueeze is unnecessary)
img = images[1]
img=img.view(1, -1)
I am using Transfer learning for recognizing objects. I used trained VGG16 model as the base model and added my classifier on top of it using Keras. I then trained the model on my data, the model works well. I want to see the feature generated by the intermediate layers of the model for the given data. I used the following code for this purpose:
def ModeloutputAtthisLayer(model, layernme, imgnme, width, height):
layer_name = layernme
intermediate_layer_model = Model(inputs=model.input,
outputs=model.get_layer(layer_name).output)
img = image.load_img(imgnme, target_size=(width, height))
imageArray = image.img_to_array(img)
image_batch = np.expand_dims(imageArray, axis=0)
processed_image = preprocess_input(image_batch.copy())
intermediate_output = intermediate_layer_model.predict(processed_image)
print("outshape of ", layernme, "is ", intermediate_output.shape)
In the code, I used np.expand_dims to add one extra dimension for the batch as the input matrix to the network should be of the form (batchsize, height, width, channels). This code works fine. The shape of the feature vector is 1, 224, 224, 64.
Now I wish to display this as image, for this I understand there is an additional dimension added as batch so I should remove it. Following this I used the following lines of the code:
imge = np.squeeze(intermediate_output, axis=0)
plt.imshow(imge)
However it throws an error:
"Invalid dimensions for image data"
I wonder how can I display the extracted feature vector as an image. Any suggestion please.
Your feature shape is (1,224,224,64), you cannot directly plot a 64 channel image. What you can do is plot the individual channels independently like following
imge = np.squeeze(intermediate_output, axis=0)
filters = imge.shape[2]
plt.figure(1, figsize=(32, 32)) # plot image of size (32x32)
n_columns = 8
n_rows = math.ceil(filters / n_columns) + 1
for i in range(filters):
plt.subplot(n_rows, n_columns, i+1)
plt.title('Filter ' + str(i))
plt.imshow(imge[:,:,i], interpolation="nearest", cmap="gray")
This will plot 64 images in 8 rows and 8 columns.
A possible way to go consists in combining the 64 channels into a single-channel image through a weighted sum like this:
weighted_imge = np.sum(imge*weights, axis=-1)
where weights is an array with 64 weighting coefficients.
If you wish to give all the channels the same weight you could simply compute the average:
weighted_imge = np.mean(imge, axis=-1)
Demo
import numpy as np
import matplotlib.pyplot as plt
intermediate_output = np.random.randint(size=(1, 224, 224, 64),
low=0, high=2**8, dtype=np.uint8)
imge = np.squeeze(intermediate_output, axis=0)
weights = np.random.random(size=(imge.shape[-1],))
weighted_imge = np.sum(imge*weights, axis=-1)
plt.imshow(weighted_imge)
plt.colorbar()
In [33]: intermediate_output.shape
Out[33]: (1, 224, 224, 64)
In [34]: imge.shape
Out[34]: (224, 224, 64)
In [35]: weights.shape
Out[35]: (64,)
In [36]: weighted_imge.shape
Out[36]: (224, 224)