I am trying to do image recognition with ResNet50 in Python (keras). I tried to do the same task with VGG16, and I got some results like these (which seem okay to me):
resultsVGG16 . The training and validation accuracy/loss functions are getting better with each step, so the network must learn.
However, with ResNet50 the training functions are betting better, while the validation functions are not changing: resultsResNet
I've used the same code and data in both of the times, only the model is changed.
So what are the reasons of ResNet50 learning only on the training data?
My ResNet model looks like this:
'''python
model = Sequential()
base_model = VGG16(weights='imagenet', include_top=False,input_shape=
(image_size,image_size,3))
for layer in base_model.layers[:-4]:
layer.trainable=False
model.add(base_model)
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(NUM_CLASSES, activation='softmax'))
The VGG is very similar:
model = Sequential()
base_model = ResNet50(include_top=False, weights='imagenet', input_shape=
(image_size,image_size,3))
for layer in base_model.layers[:-8]:
layer.trainable=False
model.add(base_model)
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(NUM_CLASSES, activation='softmax'))
There is no mistake in your Model but this might be the issue with ResNet as such, because there are many issues raised, 1,2,3, in Github and Stack Overflow, already regarding this Pre-Trained Model.
Having said that, I found out a workaround, which worked for me, and hopefully works for you as well.
Workaround was to replace the Data Augmentation step,
Train_Datagen = ImageDataGenerator(rescale=1./255, rotation_range=40, width_shift_range=0.2,
height_shift_range=0.2, brightness_range=(0.2, 0.7), shear_range=45.0, zoom_range=60.0,
horizontal_flip=True, vertical_flip=True)
Val_Datagen = ImageDataGenerator(rescale=1./255, rotation_range=40, width_shift_range=0.2,
height_shift_range=0.2, brightness_range=(0.2, 0.7), shear_range=45.0, zoom_range=60.0,
horizontal_flip=True, vertical_flip=True)
with tf.keras.applications.resnet.preprocess_input, as shown below:
Train_Datagen = ImageDataGenerator(dtype = 'float32', preprocessing_function=tf.keras.applications.resnet.preprocess_input)
Val_Datagen = ImageDataGenerator(dtype = 'float32', preprocessing_function=tf.keras.applications.resnet.preprocess_input)
By modifying the Data Augmentation as shown above, my Validation Accuracy, which got stuck at 50% increased gradually up to 97%. Reason for this might be that ResNet might expect specific Pre-Processing Operations (not quite sure).
Complete working code which resulted in more than 95% of both Train and Validation Accuracy (for Cat and Dog Dataset) using ResNet50 is shown below:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
# The Convolutional Base of the Pre-Trained Model will be added as a Layer in this Model
Conv_Base = ResNet50(include_top = False, weights = 'imagenet', input_shape = (150,150, 3))
for layer in Conv_Base.layers[:-8]:
layer.trainable = False
model = Sequential()
model.add(Conv_Base)
model.add(Flatten())
model.add(Dense(units = 256, activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(units = 1, activation = 'sigmoid'))
model.summary()
base_dir = 'Deep_Learning_With_Python_Book/Dogs_Vs_Cats_Small'
if os.path.exists(base_dir):
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')
else:
print("The Folder, {}, doesn't exist'".format(base_dir))
batch_size = 20
Train_Datagen = ImageDataGenerator(dtype = 'float32', preprocessing_function=tf.keras.applications.resnet.preprocess_input)
Val_Datagen = ImageDataGenerator(dtype = 'float32', preprocessing_function=tf.keras.applications.resnet.preprocess_input)
train_gen = Train_Datagen.flow_from_directory(directory = train_dir, target_size = (150,150),
batch_size = batch_size, class_mode = 'binary')
val_gen = Val_Datagen.flow_from_directory(directory = validation_dir, target_size = (150,150),
batch_size = batch_size, class_mode = 'binary')
epochs = 15
Number_Of_Training_Images = train_gen.classes.shape[0]
steps_per_epoch = Number_Of_Training_Images/batch_size
model.compile(optimizer = 'Adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
history = model.fit(train_gen, epochs = epochs,
#batch_size = batch_size,
validation_data = val_gen, steps_per_epoch = steps_per_epoch)
import matplotlib.pyplot as plt
train_acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
train_loss = history.history['loss']
val_loss = history.history['val_loss']
No_Of_Epochs = range(epochs)
plt.plot(No_Of_Epochs, train_acc, marker = 'o', color = 'blue', markersize = 12,
linewidth = 2, label = 'Training Accuracy')
plt.plot(No_Of_Epochs, val_acc, marker = '.', color = 'red', markersize = 12,
linewidth = 2, label = 'Validation Accuracy')
plt.title('Training Accuracy and Testing Accuracy w.r.t Number of Epochs')
plt.legend()
plt.figure()
plt.plot(No_Of_Epochs, train_loss, marker = 'o', color = 'blue', markersize = 12,
linewidth = 2, label = 'Training Loss')
plt.plot(No_Of_Epochs, val_acc, marker = '.', color = 'red', markersize = 12,
linewidth = 2, label = 'Validation Loss')
plt.title('Training Loss and Testing Loss w.r.t Number of Epochs')
plt.legend()
plt.show()
Metrics are shown in the below graph,
Related
I'm trying to train a CNN on a set of images. There are 2 folders: training_set and test_set, each containing 2 classes.
They look like this:
training_set/
classA/
img1.png
img2.png
...
classB/
img1.png
img2.png
...
test_set/
classA/
img1.png
img2.png
...
classB/
img1.png
img2.png
...
Code looks like this, where the training set is split into a training and validation set:
import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.python.client import device_lib
import numpy as np
import matplotlib.pyplot as plt
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print(device_lib.list_local_devices())
# Set image properties
img_height = 369
img_width = 496
batch_size = 32
# Import data set from directory
train_images = tf.keras.preprocessing.image_dataset_from_directory(
"path_to_training_set",
labels='inferred',
label_mode="binary", # not sure about this one though, as the classes are not called '0' and '1'
class_names = ['classA', 'classB'],
color_mode = 'rgb',
batch_size = batch_size,
image_size = (img_height, img_width),
shuffle = True,
seed = 123,
validation_split = 0.2,
subset = "training"
)
val_images = tf.keras.preprocessing.image_dataset_from_directory(
"path_to_training_set",
labels='inferred',
label_mode="binary", # not sure about this one though, as the classes are not called '0' and '1'
class_names = ['classA', 'classB'],
color_mode = 'rgb',
batch_size = batch_size,
image_size = (img_height, img_width),
shuffle = True,
seed = 123,
validation_split = 0.2,
subset = "validation"
)
Then:
from matplotlib import pyplot
img_height = 369
img_width = 496
epochs = 25
model = tf.keras.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
# Since we have two classes:
model.add(layers.Dense(1, activation='sigmoid'))
# BinaryCrossentropy because there are 2 classes
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer, loss=tf.keras.losses.BinaryCrossentropy(from_logits=False), metrics=['accuracy'])
# Feed the model
history = model.fit(train_images, epochs=epochs, batch_size=32, verbose=1, validation_data=val_images)
# Plot
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(epochs)
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
Now that the model is trained, it shows plots of the training and validation accuracy and loss. I try to load my test set using:
test_images = tf.keras.preprocessing.image_dataset_from_directory(
"path_to_test_set",
labels='inferred',
label_mode="binary",
class_names = ['classA', 'classB'],
color_mode = 'rgb',
batch_size = batch_size, # not really applicable as I want to use the whole set?
image_size = (img_height, img_width),
shuffle = True,
seed = 123,
validation_split = None
)
But is this the correct way? How do I deal with the batch_size? I think I'd evaluate the model with my test set using:
test_loss, test_acc = model.evaluate(test_images, verbose=2)
print('\nTest accuracy:', test_acc)
but I don't think this is sufficient as I'd like the accuracy, precision, recall and F1-score. I'm also not even sure the right thing is happening here (with how the test set is loaded).
So basically: How do I load my test set and calculate accuracy, precision, recall and F1-score?
You need to iterate over the data, then you can collect predictions and true classes.
predicted_probs = np.array([])
true_classes = np.array([])
for images, labels in test_images:
predicted_probs = np.concatenate([predicted_probs,
model(images)])
true_classes = np.concatenate([true_classes, labels.numpy()])
Since they are sigmoid outputs, you need to transform them into classes with a threshold, i.e 0.5 here:
predicted_classes = [1 * (x[0]>=0.5) for x in predicted_probs]
After that you can get the confusion matrix etc:
conf_matrix = tf.math.confusion_matrix(true_classes, predicted_classes)
There is 2550 images as train set and 1530 images as test set. to classify these images into two classes, a hybrid deep learning model including CNN2D+LSTM is used but there is an error is occurred during running the code as is shown below. i was wonder if someone help me to solve it. thanks in advance
ERROR:
RuntimeError: You must compile your model before using it
# importing libraries
from keras.models import Sequential
from keras.layers import Convolution2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import TimeDistributed
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1./255)
training_set = train_datagen.flow_from_directory(
'D:\\thesis\\Paper 3\\Feature Extraction\\two_dimension_Feature_extraction\\stft_feature\\Training_set',
target_size=(64, 64),
batch_size=32,
class_mode='binary')
test_set = test_datagen.flow_from_directory(
'D:\\thesis\\Paper 3\\Feature Extraction\\two_dimension_Feature_extraction\\stft_feature\\Test_set',
target_size=(64, 64),
batch_size=32,
class_mode='binary')
#initializing the CNN
classifier = Sequential()
#convolution2D
classifier.add(TimeDistributed(Convolution2D(32,3,3, input_shape = (64,64,3), activation = 'relu'))) #32 feature detector with 3*3 dimensions, 64*64 is the used format with 3 channel because the image is colored
#adding maxpooling
classifier.add(TimeDistributed(MaxPooling2D(2, 2)))
#Flattening
classifier.add(TimeDistributed(Flatten()))
classifier.add(TimeDistributed(classifier))
classifier.add(LSTM(units= 20, input_shape = (1,5), return_sequences = True ))
classifier.add(LSTM(units = 20))
#Full Connection
classifier.add(Dense(output_dim = 128, activation = 'relu'))
classifier.add(Dense(output_dim = 1, activation = 'sigmoid'))
#compiling the CNN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
#Fitting the CNN to the images
history = classifier.fit_generator(training_set,
steps_per_epoch=2550,
epochs=25,
validation_data= test_set,
validation_steps=510)
import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'r', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'r', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.plot()
plt.legend()
plt.show()
test_loss, test_acc = classifier.evaluate(test_set)
print('test_acc:', test_acc)
Training and Validation curves have spikes for loss and accuracy when training vgg16. I am using transfer learning technique and have changed the classifier for binary class problem of classifying genders. Can someone suggest me why am i getting such spikes and how can i reduce it.
The code is as follows :
from keras.layers import Input, Lambda, Dense, Flatten, Dropout
from keras.models import Model
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
import numpy as np
from glob import glob
import matplotlib.pyplot as plt
# re-size all the images to this
IMAGE_SIZE = [224, 224]
train_path = 'E:/decompressed_images/data_set/train'
valid_path = 'E:/decompressed_images/data_set/validation'
# add preprocessing layer to the front of VGG
vgg = VGG16(input_shape=IMAGE_SIZE + [3], weights='imagenet', include_top=False)
# don't train existing weights
for layer in vgg.layers:
layer.trainable = False
# useful for getting number of classes
folders = glob('E:/decompressed_images/data_set/train*')
x = Flatten()(vgg.output)
# x = Dense(1000, activation='relu')(x)
prediction = Dense(len(folders), activation='sigmoid')(x)
# create a model object
model = Model(inputs=vgg.input, outputs=prediction)
# view the structure of the model
model.summary()
# tell the model what cost and optimization method to use
model.compile(
loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy']
)
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(rescale = 1./255,
horizontal_flip = True,
vertical_flip = True,
width_shift_range = 0.1,
height_shift_range = 0.1,
zoom_range = 0.1,
rotation_range = 10)
test_datagen = ImageDataGenerator(rescale = 1./255)
training_set = train_datagen.flow_from_directory('E:/Ullu/new_trial__/balanced_dataset/train',
target_size = (224, 224),
batch_size = 64,
class_mode = 'binary')
test_set = test_datagen.flow_from_directory('E:/Ullu/new_trial__/balanced_dataset/test',
target_size = (224, 224),
batch_size = 64,
class_mode = 'binary')
r = model.fit_generator(
training_set,
validation_data=test_set,
epochs=100,
steps_per_epoch=len(training_set),
validation_steps=len(test_set)
)
plt.plot(r.history['loss'], label='train loss')
plt.plot(r.history['val_loss'], label='val loss')
plt.legend()
plt.show()
plt.savefig('E:/Model_128_30/LossVal_loss.png')
# accuracies
plt.plot(r.history['accuracy'], label='train acc')
plt.plot(r.history['val_accuracy'], label='val acc')
plt.legend()
plt.show()
plt.savefig('E:/Model_128_30/AccVal_acc.png')
import tensorflow as tf
from keras.models import load_model
model.save('E:/Model_128_30/128_30_wt.h5')
High and fluctuating training and validation accuracy image
High and fluctuating training and validation loss image
I tried using dropout layer(0.5) for the final layers but my accuracy and loss for training and validation are the same. Could anyone please suggest me where i am going wrong. Thanks.
Following is the architecture for my model.
# %%
# Defining the model
input_shape = img_data[0].shape
model = Sequential()
model.add(Convolution2D(32, 3, 3, border_mode='same', input_shape=input_shape))
model.add(Activation('relu'))
model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.75))
model.add(Convolution2D(64, 3, 3))
model.add(Activation('relu'))
# model.add(Convolution2D(64, 3, 3))
# model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.75))
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.75))
model.add(Dense(num_classes))
model.add(Activation('softmax'))
# sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
# model.compile(loss='categorical_crossentropy', optimizer=sgd,metrics=["accuracy"])
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=["accuracy"])
The accuracy is bit low. so I want to transorm the architecture to mobilenet. Is there any keras based implementation to classify images using mobilenet?
May be this code snip will help you
from keras.applications.mobilenet import MobileNet
from keras.applications.mobilenetv2 import MobileNetV2
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras import Sequential
from keras.layers import Dense
from keras.optimizers import Adam, RMSprop, SGD
import keras
from tensorflow import confusion_matrix
from matplotlib import pyplot as plt
import config
import numpy as np
train_path = 'data/train'
val_batch = 'data/val'
test_batch = 'data/test'
train_batches = ImageDataGenerator(preprocessing_function=keras.applications.mobilenet.preprocess_input).flow_from_directory(train_path, target_size=(config.IMAGE_SIZE, config.IMAGE_SIZE),
class_mode='categorical', batch_size=20)
val_batches = ImageDataGenerator(preprocessing_function=keras.applications.mobilenet.preprocess_input).flow_from_directory(val_batch, target_size=(config.IMAGE_SIZE, config.IMAGE_SIZE),
class_mode='categorical', batch_size=20)
def prepare_image(file):
img = image.load_img(file, target_size=(config.IMAGE_SIZE, config.IMAGE_SIZE))
img_array = image.img_to_array(img)
img_expanded_dims = np.expand_dims(img_array, axis=0)
return keras.applications.mobilenet.preprocess_input(img_expanded_dims)
mobilenet = MobileNetV2()
# x = mobilenet.layers[-6].output
x = mobilenet.layers[-2].output
predictions = Dense(8, activation='softmax')(x)
from keras import Model
model = Model(inputs= mobilenet.input, outputs=predictions)
print(model.summary())
# for layer in model.layers[:-5]:
# layer.trainable = False
# for layer in model.layers[:-1]:
# layer.trainable = False
print(model.summary())
# exit(0)
model.compile(SGD(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit_generator(train_batches, steps_per_epoch=10,
validation_data=val_batches, validation_steps=10, epochs=300, verbose=2)
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'b', label='Training acc')
plt.plot(epochs, val_acc, 'r', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
# Get the ground truth from generator
ground_truth = train_batches.classes
# Get the label to class mapping from the generator
label2index = train_batches.class_indices
# Getting the mapping from class index to class label
idx2label = dict((v, k) for k, v in label2index.items())
print(idx2label)
# _, val_labels = next(val_batches)
#
# predictions = model.predict_generator(val_batches, steps=1, verbose=0)
#
# cm = confusion_matrix(val_batches, np.round(predictions[:,0]))
# cm_plot_labels = []
#
# for k, v in label2index.items():
# cm_plot_labels.append(v)
#
# print(cm)
# serialize model to JSON
model_json = model.to_json()
with open("mobilenet.json", "w") as json_file:
json_file.write(model_json)
from keras.models import save_model
save_model(model, 'mobilenet.h5')
import tensorflow as tf
# from tensorflow.contrib import lite
# tf.lite.TocoConverter
converter = tf.lite.TocoConverter.from_keras_model_file("mobilenet.h5")
tflite_model = converter.convert()
open("model/mobilenet.tflite", "wb").write(tflite_model)
Keras has a set of pretrained model for image classification purposes.
You can check the list and the usage here
You can also copy the implementation of the architecture on the github repository, here the link
I've already asked similar question here, but now I have slightly different problem, therefore asking new question.
I decided to use slightly different approach instead of proposed among answers in the referenced question to train, and then fine-tune model.
Update: I've replaced old question provided here with more suitable version
Here is my sequence of actions:
Build VGG16 model and drop top layer (call it no-top model)
Generate bottleneck features using no-top model
Train a separate fully-connected model using bottleneck features
Build new VGG16 model, drop top layers, and attach pretrained top-model
Train concatenated model on dogs/cats data
And here is a code I use to implement aforementioned sequence of actions:
import warnings
warnings.simplefilter('ignore', UserWarning)
warnings.simplefilter('ignore', DeprecationWarning)
from __future__ import print_function
from itertools import izip_longest as zip_longest
from pprint import pformat as pf
from pprint import pprint as pp
import os
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.layers import Conv2D, MaxPooling2D, MaxPooling2D, ZeroPadding2D
from keras.layers import Dropout, Flatten, Dense, InputLayer, Lambda
from keras.models import Sequential, Model, load_model
from keras.utils.data_utils import get_file
from keras.optimizers import SGD
import keras.backend as K
import numpy as np
RANDOM_STATE = 1
IMAGE_WIDTH = 224
IMAGE_HEIGHT = 224
BATCH_SIZE = 4
VGG_MEAN = np.array([123.68, 116.779, 103.939]).reshape((3, 1, 1))
VGG16_WEIGHTS_PATH = 'http://www.platform.ai/models/vgg16.h5'
DATA_ROOT = os.path.join(os.path.expanduser('~'), 'data', 'dogscats')
TRAIN_DIR = os.path.join(DATA_ROOT, 'train')
VALID_DIR = os.path.join(DATA_ROOT, 'valid')
SAMPLES_DIR = os.path.expanduser('~/dogscats_samples')
np.random.seed(RANDOM_STATE)
K.set_image_dim_ordering('th')
def get_batches(dirname, gen=ImageDataGenerator(), shuffle=True,
batch_size=BATCH_SIZE, class_mode='categorical'):
return gen.flow_from_directory(
os.path.join(SAMPLES_DIR, dirname),
target_size=(IMAGE_WIDTH, IMAGE_HEIGHT),
class_mode=class_mode,
shuffle=shuffle,
batch_size=batch_size)
def vgg_preprocess(x):
x = x - VGG_MEAN
return x[:, ::-1]
def conv_block(model, n_layers, n_filters, name='block'):
for i in range(n_layers):
model.add(ZeroPadding2D((1, 1), name='%s_padding_%s' % (name, i)))
model.add(Conv2D(n_filters, (3, 3), activation='relu', name='%s_conv2d_%s' % (name, i)))
model.add(MaxPooling2D((2, 2), strides=(2, 2), name='%s_maxpool' % name))
def fc_block(model, name='block'):
model.add(Dense(4096, activation='relu', name=name + '_dense'))
model.add(Dropout(0.5))
def build_vgg_16():
model = Sequential()
input_shape = (3, IMAGE_WIDTH, IMAGE_HEIGHT)
model.add(InputLayer(input_shape=input_shape))
model.add(Lambda(vgg_preprocess))
conv_block(model, n_layers=2, n_filters=64, name='block1')
conv_block(model, n_layers=2, n_filters=128, name='block2')
conv_block(model, n_layers=3, n_filters=256, name='block3')
conv_block(model, n_layers=3, n_filters=512, name='block4')
conv_block(model, n_layers=3, n_filters=512, name='block5')
model.add(Flatten())
fc_block(model)
fc_block(model)
model.add(Dense(1000, activation='softmax'))
return model
def train_finetuned_model():
file_path = get_file('vgg16.h5', VGG16_WEIGHTS_PATH, cache_subdir='models')
print('Building VGG16 (no-top) model to generate bottleneck features')
vgg16_notop = build_vgg_16()
vgg16_notop.load_weights(file_path)
for _ in range(6):
vgg16_notop.pop()
vgg16_notop.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
train_batches = get_batches('train', shuffle=False, class_mode=None)
train_labels = np.array([0]*1000 + [1]*1000)
bottleneck_train = vgg16_notop.predict_generator(train_batches, steps=2000 // BATCH_SIZE)
valid_batches = get_batches('valid', shuffle=False, class_mode=None)
valid_labels = np.array([0]*400 + [1]*400)
bottleneck_valid = vgg16_notop.predict_generator(valid_batches, steps=800 // BATCH_SIZE)
print('Training top model on bottleneck features')
top_model = Sequential()
top_model.add(Flatten(input_shape=bottleneck_train.shape[1:]))
top_model.add(Dense(256, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(1, activation='sigmoid'))
top_model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
top_model.fit(bottleneck_train, train_labels,
batch_size=32, epochs=50,
validation_data=(bottleneck_valid, valid_labels))
print('Concatenate new VGG16 (without top layer) with pretrained top model')
vgg16_fine = build_vgg_16()
vgg16_fine.load_weights(file_path)
for _ in range(6):
vgg16_fine.pop()
vgg16_fine.add(Flatten(name='top_flatten'))
vgg16_fine.add(Dense(256, activation='relu', name='top_dense'))
vgg16_fine.add(Dropout(0.5, name='top_dropout'))
vgg16_fine.add(Dense(1, activation='sigmoid', name='top_sigmoid'))
for i, layer in enumerate(reversed(top_model.layers), 1):
pretrained_weights = layer.get_weights()
vgg16_fine.layers[-i].set_weights(pretrained_weights)
for layer in vgg16_fine.layers[:26]:
layer.trainable = False
vgg16_fine.compile(optimizer=SGD(lr=1e-4, momentum=0.9),
loss='binary_crossentropy',
metrics=['accuracy'])
print('Train concatenated model on dogs/cats dataset sample')
train_datagen = ImageDataGenerator(rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1./255)
train_batches = get_batches('train', gen=train_datagen, class_mode='binary')
valid_batches = get_batches('valid', gen=test_datagen, class_mode='binary')
vgg16_fine.fit_generator(train_batches,
steps_per_epoch=2000 // BATCH_SIZE,
epochs=50,
validation_data=valid_batches,
validation_steps=800 // BATCH_SIZE)
return vgg16_fine
final_model = train_finetuned_model()
But the problem is that model's accuracy drastically dropped. After 50 epochs, its accuracy is around 50%. Therefore, probably I've done something wrong.
Maybe something wrong with parameters, i.e. learning rate, batch size, etc.?
Your fully connected layers look totally different from the original VGG architecture.
# yours
Flatten()
Dense(256, activation='relu')
Dense(1, activation='sigmoid')
# original
Flatten()
Dense(4096, activation='relu')
Dense(4096, activation='relu')
Dense(2, activation='softmax')
Two points.
The last layer should be 2-class-softmax instead of sigmoid. The
accuracy is not computed as you expect if you use sigmoid, I guess.
Complexity (number of neurons and layers) seems to be too low.
Well, not sure if it is a right solution, but I was able to increase accuracy at least up to 70% with this code (probably the main reason is decreased learning rate and more epochs):
def train_finetuned_model(lr=1e-5, verbose=True):
file_path = get_file('vgg16.h5', VGG16_WEIGHTS_PATH, cache_subdir='models')
if verbose:
print('Building VGG16 (no-top) model to generate bottleneck features.')
vgg16_notop = build_vgg_16()
vgg16_notop.load_weights(file_path)
for _ in range(6):
vgg16_notop.pop()
vgg16_notop.compile(optimizer=RMSprop(lr=lr), loss='categorical_crossentropy', metrics=['accuracy'])
if verbose:
print('Bottleneck features generation.')
train_batches = get_batches('train', shuffle=False, class_mode=None, batch_size=BATCH_SIZE)
train_labels = np.array([0]*1000 + [1]*1000)
train_bottleneck = vgg16_notop.predict_generator(train_batches, steps=2000 // BATCH_SIZE)
valid_batches = get_batches('valid', shuffle=False, class_mode=None, batch_size=BATCH_SIZE)
valid_labels = np.array([0]*400 + [1]*400)
valid_bottleneck = vgg16_notop.predict_generator(valid_batches, steps=800 // BATCH_SIZE)
if verbose:
print('Training top model on bottleneck features.')
top_model = Sequential()
top_model.add(Flatten(input_shape=train_bottleneck.shape[1:]))
top_model.add(Dense(4096, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(4096, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(2, activation='softmax'))
top_model.compile(optimizer=RMSprop(lr=lr), loss='categorical_crossentropy', metrics=['accuracy'])
top_model.fit(train_bottleneck, to_categorical(train_labels),
batch_size=32, epochs=10,
validation_data=(valid_bottleneck, to_categorical(valid_labels)))
if verbose:
print('Concatenate new VGG16 (without top layer) with pretrained top model.')
vgg16_fine = build_vgg_16()
vgg16_fine.load_weights(file_path)
for _ in range(6):
vgg16_fine.pop()
vgg16_fine.add(Flatten(name='top_flatten'))
vgg16_fine.add(Dense(4096, activation='relu'))
vgg16_fine.add(Dropout(0.5))
vgg16_fine.add(Dense(4096, activation='relu'))
vgg16_fine.add(Dropout(0.5))
vgg16_fine.add(Dense(2, activation='softmax'))
vgg16_fine.compile(optimizer=RMSprop(lr=lr), loss='categorical_crossentropy', metrics=['accuracy'])
if verbose:
print('Loading pre-trained weights into concatenated model')
for i, layer in enumerate(reversed(top_model.layers), 1):
pretrained_weights = layer.get_weights()
vgg16_fine.layers[-i].set_weights(pretrained_weights)
for layer in vgg16_fine.layers[:26]:
layer.trainable = False
if verbose:
print('Layers training status:')
for layer in vgg16_fine.layers:
print('[%6s] %s' % ('' if layer.trainable else 'FROZEN', layer.name))
vgg16_fine.compile(optimizer=RMSprop(lr=1e-6), loss='binary_crossentropy', metrics=['accuracy'])
if verbose:
print('Train concatenated model on dogs/cats dataset sample.')
train_datagen = ImageDataGenerator(rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1./255)
train_batches = get_batches('train', gen=train_datagen, class_mode='categorical', batch_size=BATCH_SIZE)
valid_batches = get_batches('valid', gen=test_datagen, class_mode='categorical', batch_size=BATCH_SIZE)
vgg16_fine.fit_generator(train_batches, epochs=100,
steps_per_epoch=2000 // BATCH_SIZE,
validation_data=valid_batches,
validation_steps=800 // BATCH_SIZE)
return vgg16_fine
I guess there is a way to achieve much better results with fine-tuning (up to 98%), but I wasn't able to achieve it with provided code.