I created my model in pytorch and is working really good, but when i want to test just one image batch_size=1 always return the second class (in this case a dog).
I tried to test with batch > 1 and in all cases this works!
The architecture:
model = models.densenet121(pretrained=True)
for param in model.parameters():
param.requires_grad = False
from collections import OrderedDict
classifier = nn.Sequential(OrderedDict([
('fc1', nn.Linear(1024, 500)),
('relu', nn.ReLU()),
('fc2', nn.Linear(500, 2)),
('output', nn.LogSoftmax(dim=1))
]))
model.classifier = classifier
so my tensors are [batch, 3, 224, 224]
i have tried with:
resize
reshape
unsqueeze(0)
the response when is one image is always [[0.4741, 0.5259]]
My Test Code
from PIL import *
msize = 256
loader = transforms.Compose([transforms.Scale(imsize), transforms.ToTensor()])
def image_loader(image_name):
"""load image, returns cuda tensor"""
image = Image.open(image_name)
image = loader(image).float()
image = image.unsqueeze(0)
return image.cuda()
image = image_loader('Cat_Dog_data/test/cat/cat.16.jpg')
with torch.no_grad():
logits = model.forward(image)
ps = torch.exp(logits)
_, predTest = torch.max(ps,1)
print(ps) ## same value in all cases
imagen_mostrar = images[ii].to('cpu')
helper.imshow(imagen_mostrar,title=clas_perro_gato(predTest), normalize=True)
Second Test Code
andrea_data = datasets.ImageFolder(data_dir + '/andrea', transform=test_transforms)
andrealoader = torch.utils.data.DataLoader(andrea_data, batch_size=1, shuffle=True)
dataiter = iter(andrealoader)
images, labels = dataiter.next()
images, labels = images.to(device), labels.to(device)
ps = torch.exp(model.forward(images))
_, predTest = torch.max(ps,1)
print(ps.float())
if i changed my batch_size to 1 always returned a tensor who say that is a dog [0.43,0.57] for example.
Thanks!
I realized that my model wasn't in eval mode.
So i just added model.eval() and now that's all, works for any size batch
You can use this code for test single image for your model train:
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader,Dataset
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
def pre_image(image_path,model):
img = Image.open(image_path)
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
transform_norm = transforms.Compose([transforms.ToTensor(),
transforms.Resize((224,224)),transforms.Normalize(mean, std)])
# get normalized image
img_normalized = transform_norm(img).float()
img_normalized = img_normalized.unsqueeze_(0)
# input = Variable(image_tensor)
img_normalized = img_normalized.to(device)
# print(img_normalized.shape)
with torch.no_grad():
model.eval()
output =model(img_normalized)
# print(output)
index = output.data.cpu().numpy().argmax()
classes = train_ds.classes
class_name = classes[index]
return class_name
example:
predict_class = pre_image("C:/Users/Salio/Desktop/example.jpeg",your_model)
print(predict_class)
If your model is "correct" it just predicts a dog, you can get the label with torch.argmax(output, dim=1) no matter the size of batch.
Anyway, you shouldn't use LogSoftmax as activation, please use torch.nn.BCEWithLogitsLoss as your loss function and remove activation from your final layer and output only one neuron (probability of the image being a dog only). It would look like this in your case:
classifier = nn.Sequential(
OrderedDict(
[
("fc1", nn.Linear(1024, 500)),
("relu", nn.ReLU()),
("fc2", nn.Linear(500, 1)),
# See? No activation needed
]
)
)
You can the correct label with the above network simply by running output > 0 + you get numerical stability "for free".
Related
Hi there I am doing transfer learning with deep learning using VGG16 pre-trained model. I want to extract features from VGG16 to build my own model as I only have access to CPU. Here is my build and train setup.
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix
from keras.applications import Xception, VGG16, ResNet50
conv_base = VGG16(weights='imagenet',include_top=False,input_shape=(224, 224, 3))
conv_base.summary()
base_dir = 'NewDCDatatset'
train_dir = os.path.join(base_dir, 'Train')
validation_dir = os.path.join(base_dir, 'Validation')
test_dir = os.path.join(base_dir, 'Test')
datagen = ImageDataGenerator(rescale=1./255)
batch_size = 5
def extract_features(directory, sample_count):
features = np.zeros(shape=(sample_count, 7 , 7 , 512))
labels = np.zeros(shape=(sample_count,2))
generator = datagen.flow_from_directory(directory,target_size=(224, 224),batch_size=batch_size,class_mode='categorical')
i = 0
for inputs_batch, labels_batch in generator:
features_batch = conv_base.predict(inputs_batch)
features[i * batch_size : (i + 1) * batch_size] = features_batch
labels[i * batch_size : (i + 1) * batch_size] = labels_batch
i += 1
if i * batch_size >= sample_count:
break
return features, labels
train_features, train_labels = extract_features(train_dir, 2000)
validation_features, validation_labels = extract_features(validation_dir,420 )
test_features, test_labels = extract_features(test_dir, 420)
train_features = np.reshape(train_features, (2000, 7 * 7 * 512))
validation_features = np.reshape(validation_features, (420, 7 * 7 * 512))
test_features = np.reshape(test_features, (420, 7 * 7 * 512))
from keras import models
from keras import layers
from keras import optimizers
model = models.Sequential()
model.add(layers.Dense(64, activation='relu', input_dim=7 * 7 * 512))
model.add(layers.Dense(2, activation='softmax'))
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['acc'])
history = model.fit(train_features, train_labels,epochs=2,batch_size=5,shuffle=True)
print(model.evaluate(test_features,test_labels))
model.save('TLFACE.h5')
#predictions = model.predict_generator(test_features,steps = 5)
#print(predictions)
And here is the setup for the way I am predicting the new images through my model to classify cat and dog but I am not getting that much accurate results that seldom I am able to correctly classify any image. I don't what mistake I am doing, is it matter of resizing image or what when predicting.
from keras.models import load_model
deep = load_model('TLFACE.h5')
from PIL import Image
import numpy as np
import cv2
file_nam = '4705.jpg'
img = cv2.imread(file_nam)
img = cv2.imshow('frame',img)
cv2.waitKey(1000)
img = Image.open(file_nam).convert("L")
img = img.resize((256,98))
im2arr = np.array(img)
im2arr = im2arr.reshape(1,25088)
# Predicting the Test set r1esults
y_pred = deep.predict(im2arr)
print(y_pred)
print(y_pred[0][0])
According to your code, your model receives a feature vector of size 1 x 7*7*512 = 1 x 25088. This features is the encoding of an image in the conv_base model (implemented in your extract_features method).
However in your example at prediction time you just take an image, reshape it to (256,98), then flatten to (1,25088). You totally skip the extract_features phase. In other words, you try to predict on a totally different data then you use during training (and the automated testing under the fit method). You need to use the exact same preprocessing method for testing, which means first rescale the image to size 224x224x3, then extracting the features using the conv_base model, and finally use your new model to perform the prediction over the extracted features.
Having said that, I want to suggest that you use very "dirty" implementation, this is not organized properly and it easy to get confused that way. You actually want to build a single model that perform classification on an image, and not do all the heavy coding of extracting features etc. to do so you can just use something like:
pretrained_vgg16 = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
pretrained_vgg16.trainable = False # This is important if you don't want to modify your base model weights during training.
new_model = models.Sequential([pretrained_vgg16,
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(2,activation='softmax')])
In addition, please note that it is very importnat to decide if you want to modify the pre-trained model weights during training or not. if not, you need to envoke the pretrained_vgg16.trainable = False line.
Now you can use new_model.fit with a generator that load the images and rescale them to 224x224x3, without explicitly extracting the features. Another important comment is that the original VGG16 model was probably trained on a normalized data (which means pixels values were normalized to be in range [0,1]), you also need to perform the same preprocessing to your images for ideal results.
I am currently making an Airbus/Boeing classifier that can determine what aircraft the image is of. I have resized my images to be 1000 by 1000. Do any of you guys mind giving me an example f how I should go about constructing a model with Tensorflow and Keras? Also, what types of layers should I include? I am new to Tensorflow so I am not aware of the types of layers I should use for the maximum accuracy. Thank you. Here is my code:
import numpy as np
import cv2
import os
import tensorflow as tf
from tensorflow import keras
boeing_dir = '#'
airbus_dir = '#'
path = '#'
boeing_data = []
boeing_label = []
airbus_data = []
airbus_label = []
print('begun')
for filename in os.listdir(boeing_dir):
if filename.endswith(".jpg") or filename.endswith(".png") or filename.endswith(".jpeg"):
path_b = os.path.join(boeing_dir, filename)
im = cv2.imread(path_b)
im = cv2.cvtColor(im, cv2.COLOR_RGB2GRAY)
im = cv2.imread(path_b, cv2.IMREAD_GRAYSCALE)
im = cv2.resize(im, (1000, 1000))
boeing_data.append(im)
boeing_label.append(0)
print(im.shape)
for filename in os.listdir(airbus_dir):
if filename.endswith(".jpg") or filename.endswith(".png") or filename.endswith(".jpeg"):
path_b = os.path.join(airbus_dir, filename)
im = cv2.imread(path_b)
im = cv2.cvtColor(im, cv2.COLOR_RGB2GRAY)
im = cv2.imread(path_b, cv2.IMREAD_GRAYSCALE)
im = cv2.resize(im, (1000, 1000))
airbus_data.append(im)
airbus_label.append(1)
print(im.shape)
training_data = boeing_data + airbus_data
training_label = boeing_label + airbus_label
print(training_data)
print(training_label)
training_data = np.array(training_data)#.reshape(-1, 1000, 1000, 1)
training_label = np.asarray(training_label)
I have split the images into images and labels. After this, how do I construct my model?
You should probably experiment first with a prebuilt model. You should do some research before building your own model.
from tensorflow.keras.applications.densenet import DenseNet201 as b #
from tensorflow.keras import layers, models, optimizers, regularizers
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
The applications module offers a lot more models to choose from.
traingen =image.ImageDataGenerator(rescale=1./255)
validationgen = image.ImageDataGenerator(rescale=1./255)
testgen = image.ImageDataGenerator(rescale=1./255)
train = traingen.flow_from_directory("train",target_size=(1000, 1000), batch_size=batchSize, shuffle=True)
val = validationgen.flow_from_directory("validation",target_size=(1000, 1000), batch_size=size, shuffle=False)
test=testgen.flow_from_directory("test",target_size=(1000, 1000), batch_size=size, shuffle=False)
For this you should split your data in three folders, one for train, validation and test data. inside of these folders you should have two more folders called "boeing" and "airbus" that yontain those respective images. This code reads the images from tose three folders and inferes the class from the folder names
base = b(include_top=False, # include top refers if to include the dense layer that acts as the classifier, this is not needed in this case because we build one later
weights="imagenet", # these are the weights used by the model that were learned from being trained on the imagenet dataset
input_shape=(1000, 1000, 3)) # in case you work with rgb images, if you work with gray images you should use input_shape=(1000, 1000, 1)
x = base.output
predictions = layers.Dense(2, activation='softmax')(x)
model = Model(inputs=base.input, outputs=predictions)
model.compile(optimizer=optimizers.Adam(),
loss='categorical_crossentropy',
metrics=["categorical_accuracy"])
history = model.fit_generator(train,
steps_per_epoch=np.ceil(numImages/batchSize),
epochs=50,
verbose=2,
validation_data= val,
validation_steps=np.ceil(nunImages/size)
)
By using a pretrained network you can take advantage of the knowledge learned from imagenet, those patterns shoud be useful since you are working with natural images.
I have found code online to get the derivative of the total loss with respect to the deep learning weights. I am trying to find the derivative of the weights with respect to the loss of a single class instead of all classes.
I used the following code to get the gradient of an input image with respect to the total loss. If I visualize it, it shows the importance of the pixels for all predictions. But, I would like to compute the derivative of the input image with respect to a particular class (e.g. "lady_bug"). This should show the importance of the pixels for the prediction of lady_bug. Do you have an idea how I can do that?
from keras.applications.vgg19 import VGG19
import numpy as np
import cv2
from keras import backend as K
import matplotlib.pyplot as plt
from keras.applications.inception_v3 import decode_predictions
def get_model():
model = VGG19(include_top=True, weights='imagenet')
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
def predict(model, images):
numeric_prediction = model.predict(images)
categorical_prediction = decode_predictions(numeric_prediction, top=1)
return [(x[0][1], x[0][2]) for x in categorical_prediction]
def get_test_image():
# Image
image_path = "lady_bug.jpg"
image = cv2.imread(image_path)
my_image = cv2.resize(image, (224,224))
my_image = np.expand_dims(my_image, axis=0)
return my_image
def visualize_sample(sample, file_path):
plt.figure()
plt.imshow(sample)
plt.savefig(file_path, bbox_inches='tight')
def test_input_gradient():
images = get_test_image()
model = get_model()
prediction = predict(model, images)
print(prediction)
gradients = K.gradients(model.output, model.input) #Gradient of output wrt the input of the model (Tensor)
print(gradients)
sess = K.get_session()
evaluated_gradients = sess.run(gradients[0], feed_dict={model.input:
images})
visualize_sample((evaluated_gradients[0]*(10**9.5)).clip(0,255), "test.png")
if __name__ == "__main__":
test_input_gradient()
Output:
[('ladybug', 0.53532666)]
[<tf.Tensor 'gradients/block1_conv1/convolution_grad/Conv2DBackpropInput:0' shape=(?, 224, 224, 3) dtype=float32>]
It seems the code is taking the gradients of the outputs wrt the inputs.
So, this is just taking a single slice from the outputs.
Warning: This considers a regular model output. I have no idea of what you're doing in decode predictions and the following list.
gradients = K.gradients(model.output[:, lady_bug_class], model.input)
How to extract the features from a specific layer from a pre-trained PyTorch model (such as ResNet or VGG), without doing a forward pass again?
New answer
Edit: there's a new feature in torchvision v0.11.0 that allows extracting features.
For example, if you wanna extract features from the layer layer4.2.relu_2, you can do like:
import torch
from torchvision.models import resnet50
from torchvision.models.feature_extraction import create_feature_extractor
x = torch.rand(1, 3, 224, 224)
model = resnet50()
return_nodes = {
"layer4.2.relu_2": "layer4"
}
model2 = create_feature_extractor(model, return_nodes=return_nodes)
intermediate_outputs = model2(x)
Old answer
You can register a forward hook on the specific layer you want. Something like:
def some_specific_layer_hook(module, input_, output):
pass # the value is in 'output'
model.some_specific_layer.register_forward_hook(some_specific_layer_hook)
model(some_input)
For example, to obtain the res5c output in ResNet, you may want to use a nonlocal variable (or global in Python 2):
res5c_output = None
def res5c_hook(module, input_, output):
nonlocal res5c_output
res5c_output = output
resnet.layer4.register_forward_hook(res5c_hook)
resnet(some_input)
# Then, use `res5c_output`.
The accepted answer is very helpful! I'm posting a complete example here (using a registered hook as described by #bryant1410) for the lazy ones looking for a working solution:
import torch
import torchvision.models as models
from torchvision import transforms
from PIL import Image
def get_feat_vector(path_img, model):
'''
Input:
path_img: string, /path/to/image
model: a pretrained torch model
Output:
my_output: torch.tensor, output of avgpool layer
'''
input_image = Image.open(path_img)
preprocess = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0)
with torch.no_grad():
my_output = None
def my_hook(module_, input_, output_):
nonlocal my_output
my_output = output_
a_hook = model.avgpool.register_forward_hook(my_hook)
model(input_batch)
a_hook.remove()
return my_output
There you have your features extraction function, simply call it using the snippet below to obtain features from resnet18.avgpool layer
model = models.resnet18(pretrained=True)
model.eval()
path_ = '/path/to/image'
my_feature = get_feat_vector(path_, model)
Hello I am trying to do my first RNN using Keras and Tensorflow, but I am getting stuck on an issue or reshaping my images to fit into the model.
I have looked at this post but could not figure out about the reshaping:
Keras - Input a 3 channel image into LSTM
What I have is a bunch of images that are taken at every frame in a video. I saved all the frames outside of python so I have a very large folder of images.I separated the frames into 21 frames for a segment so 21 images per motion that I want to capture. I want to read in these 21 images as one sequence. I have the same sequence captured from multiple cameras/angles which I want to us in this model. What I want to try is to model a movement and see if a person is doing this movement or not, so it is a binary model yes or no basically. Not the most sophisticated but its a learning process to use this model and keras.
I need help figuring out how to use these images inside the keras model. I have looked at a few tutorials on MINST data set but that didnt help me figure this out.
Any help will be appreciated.
This is the error that is given to me when I try to train the model
ValueError: Error when checking input: expected lstm_1_input to have 3 dimensions, but got array with shape (2026, 200, 200, 1)
My code is this:
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from tqdm import tqdm
import cv2
import os
import numpy as np
imageSize = 200
#create lables for each image
def labelImage(img):
wordLabel = img.split('.')[-3]
#Conversion to one hot array [lat,not]
if wordLabel == "FWAC":
return[1,0]
else:
return[0,1]
#Process images and add lables
#Convert data into an array and add its lable
def makeTrainingData():
print("Creating Training Data")
trainingData = []
for img in tqdm(os.listdir(trainDir)):
label = labelImage(img)
path = os.path.join(trainDir,img)
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (imageSize,imageSize))
trainingData.append([np.array(img),np.array(label)])
#Save the array file to load it into other models if needed
np.save("trainingData.npy", trainingData)
print("Training Data Saved")
return trainingData
#process the testing data in the same manner
def processTestData():
print("Creating Testing Data")
testData = []
for img in tqdm(os.listdir(testDri)):
print("image", img)
path = os.path.join(testDri, img)
imgNum = img.split(".")[0]
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (imageSize, imageSize))
testData.append([np.array(img), imgNum])
np.save("testingData.npy", testData)
print("Testing Data Saved")
return testData
rnnSize = 512
model = Sequential()
model.add(LSTM(rnnSize, input_shape=(imageSize, imageSize)))
model.add(Dense(1024))
model.add(Activation('relu'))
model.add(Dense(50))
model.add(Activation('sigmoid'))
model.add(Dense(3))
model.add(Activation('softmax'))
model.compile(loss='mean_squared_error', optimizer='adam',metrics=['accuracy'])
#Data
trainDir = "D:/TrainingDataSets/TrainingSet/"
testDri = "D:/TrainingDataSets/TestingSet/"
#trainData = makeTrainingData()
#testData = processTestData()
trainData = np.load('trainingData.npy')
testData = np.load("testingData.npy")
#resize the image to this See above
train = trainData[:-500]
test = trainData[-200:]
x = []
y = []
for xi in trainData:
x.append(xi[0].reshape((-1, imageSize, imageSize)))
y.append(xi[1])
x_train = np.array([i[0] for i in train]).reshape(-1,imageSize, imageSize,1)
y_train = [i[1] for i in train]
test_x = np.array([i[0] for i in test]).reshape(-1,imageSize , imageSize,1)
test_y = [i[1] for i in test]
epoch = 5
batchSize = 100
model.fit(x_train, y_train, epochs=epoch, batch_size= batchSize, verbose=1, shuffle=False)
For the error before dense layers add this line:
model.add(Flatten())
Previously, you should import:
from keras.layers import Flatten