I am currently doing a project in which I need to predict eye disease in a group of images. I am using the Keras built-in applications. I am getting good results on VGG16 and VGG19, but on the Xception architecture I keep getting AUC of exactly 0.5 every epoch.
I have tried different optimizers and learning rates, but nothing works. I solved the same problem with VGG19 by switching from RMSProp optimizer to Adam optimizer, but I can't get it to work for Xception.
def buildModel():
from keras.models import Model
from keras.layers import Dense, Flatten
from keras.optimizers import adam
input_model = applications.xception.Xception(
include_top=False,
weights='imagenet',
input_tensor=None,
input_shape=input_sizes["xception"],
pooling=None,
classes=2)
base_model = input_model
x = base_model.output
x = Flatten()(x)
predictions = Dense(2, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)
for layer in base_model.layers:
layer.trainable = False
model.compile(optimizer=adam(lr=0.01), loss='binary_crossentropy', metrics=['accuracy'])
return model
class Histories(keras.callbacks.Callback):
def __init__(self, val_data):
super(Histories, self).__init__()
self.x_batch = []
self.y_batch = []
for i in range(len(val_data)):
x, y = val_data.__getitem__(i)
self.x_batch.extend(x)
self.y_batch.extend(np.ndarray.astype(y, int))
self.aucs = []
self.specificity = []
self.sensitivity = []
self.losses = []
return
def on_train_begin(self, logs={}):
initFile("results/xception_results_adam_3.txt")
return
def on_train_end(self, logs={}):
return
def on_epoch_begin(self, epoch, logs={}):
return
def on_epoch_end(self, epoch, logs={}):
self.losses.append(logs.get('loss'))
y_pred = self.model.predict(np.asarray(self.x_batch))
con_mat = confusion_matrix(np.asarray(self.y_batch).argmax(axis=-1), y_pred.argmax(axis=-1))
tn, fp, fn, tp = con_mat.ravel()
sens = tp/(tp+fn)
spec = tn/(tn+fp)
auc_score = roc_auc_score(np.asarray(self.y_batch).argmax(axis=-1), y_pred.argmax(axis=-1))
print("Specificity: %f Sensitivity: %f AUC: %f"%(spec, sens, auc_score))
print(con_mat)
self.sensitivity.append(sens)
self.specificity.append(spec)
self.aucs.append(auc_score)
writeToFile("results/xception_results_adam_3.txt", epoch, auc_score, spec, sens, self.losses[epoch])
return
# What follows is data from the Jupyter Notebook that I actually use to evaluate
#%% Initialize data
trainDirectory = 'RetinaMasks/train'
valDirectory = 'RetinaMasks/val'
testDirectory = 'RetinaMasks/test'
train_datagen = ImageDataGenerator(rescale=1. / 255)
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
trainDirectory,
target_size=(299, 299),
batch_size=16,
class_mode='categorical')
validation_generator = test_datagen.flow_from_directory(
valDirectory,
target_size=(299, 299),
batch_size=16,
class_mode='categorical')
test_generator = test_datagen.flow_from_directory(
testDirectory,
target_size=(299, 299),
batch_size=16,
class_mode='categorical')
#%% Create model
model = buildModel("xception")
#%% Initialize metrics
from keras.callbacks import EarlyStopping
from MetricsCallback import Histories
import keras
metrics = Histories(validation_generator)
es = EarlyStopping(monitor='val_loss',
min_delta=0,
patience=20,
verbose=0,
mode='auto',
baseline=None,
restore_best_weights=False)
mcp = keras.callbacks.ModelCheckpoint("saved_models/xception.adam.lr0.1_{epoch:02d}.hdf5",
monitor='val_loss',
verbose=0,
save_best_only=False,
save_weights_only=False,
mode='auto',
period=1)
#%% Train model
from StaticDataAugmenter import superDirectorySize
history = model.fit_generator(
train_generator,
steps_per_epoch=superDirectorySize(trainDirectory) // 16,
epochs=100,
validation_data=validation_generator,
validation_steps=superDirectorySize(valDirectory) // 16,
callbacks=[metrics, es, mcp],
workers=8,
shuffle=False
)
I honestly have no idea what causes this behavior, or how to prevent it. Thank you in advance, and I apologize for the long code snippet :)
Your learning rate is too high.
Try lowering the learning rate.
I used to run into this when using transfer learning, I was fine-tuning at very high learning rates.
An extended AUC of 0.5 over multiple epochs in case of a binary classification means that your (convolutional) neural network is not able to distinguish between the classes at all. This is in turn because it's not able to learn anything.
Use learning_rates of 0.0001,0.00001,0.000001.
At the same time, you should try to unfreeze/make some layers trainable, due to the fact that you entire feature extractor is frozen; in fact this could be another reason why the network is incapable of learning anything.
I am quite confident that your problem will be solved if you lower your learning rate :).
An AUC of 0.5 implies that your network is randomly guessing the output, which means it didn't learn anything. This was already disscued for example here.
As Timbus Calin suggested, you could do a "line search" of the learning rate starting with 0.000001 and then increase the learning rate by potencies of 10.
I would suggest you directly start with a random search, where you not only try to optimize the learning rate, but also other hyperparameters like for example the batch size. Read more about random search in this paper.
You are not computing the AUC correctly, you currently have this:
auc_score = roc_auc_score(np.asarray(self.y_batch).argmax(axis=-1), y_pred.argmax(axis=-1))
AUC is computed from (probability) scores produced by the model. The argmax of the model output does not provide scores, but class labels. The correct function call is:
auc_score = roc_auc_score(np.asarray(self.y_batch).argmax(axis=-1), y_pred[:, 1])
Note that the score needed to compute ROC is the probability of the positive class, which is the second element of the softmax output. This is why only the second column of the predictions is used to make the AUC.
What about this?
def buildModel():
from keras.models import Model
from keras.layers import Dense, Flatten
from keras.optimizers import adam
input_model = applications.xception.Xception(
include_top=False,
weights='imagenet',
input_tensor=None,
input_shape=input_sizes["xception"],
pooling='avg', # 1
classes=2)
base_model = input_model
x = base_model.output
# x = Flatten()(x) # 2
predictions = Dense(2, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)
for layer in base_model.layers:
layer.trainable = False
model.compile(optimizer=adam(lr=0.01),
loss='categorical_crossentropy', # 3
metrics=['accuracy'])
return model
class Histories(keras.callbacks.Callback):
def __init__(self, val_data):
super(Histories, self).__init__()
self.x_batch = []
self.y_batch = []
for i in range(len(val_data)):
x, y = val_data.__getitem__(i)
self.x_batch.extend(x)
self.y_batch.extend(np.ndarray.astype(y, int))
self.aucs = []
self.specificity = []
self.sensitivity = []
self.losses = []
return
def on_train_begin(self, logs={}):
initFile("results/xception_results_adam_3.txt")
return
def on_train_end(self, logs={}):
return
def on_epoch_begin(self, epoch, logs={}):
return
def on_epoch_end(self, epoch, logs={}):
self.losses.append(logs.get('loss'))
y_pred = self.model.predict(np.asarray(self.x_batch))
con_mat = confusion_matrix(np.asarray(self.y_batch).argmax(axis=-1), y_pred.argmax(axis=-1))
tn, fp, fn, tp = con_mat.ravel()
sens = tp/(tp+fn)
spec = tn/(tn+fp)
auc_score = roc_auc_score(np.asarray(self.y_batch).argmax(axis=-1), y_pred.argmax(axis=-1))
print("Specificity: %f Sensitivity: %f AUC: %f"%(spec, sens, auc_score))
print(con_mat)
self.sensitivity.append(sens)
self.specificity.append(spec)
self.aucs.append(auc_score)
writeToFile("results/xception_results_adam_3.txt", epoch, auc_score, spec, sens, self.losses[epoch])
return
# What follows is data from the Jupyter Notebook that I actually use to evaluate
#%% Initialize data
trainDirectory = 'RetinaMasks/train'
valDirectory = 'RetinaMasks/val'
testDirectory = 'RetinaMasks/test'
train_datagen = ImageDataGenerator(rescale=1. / 255)
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
trainDirectory,
target_size=(299, 299),
batch_size=16,
class_mode='categorical')
validation_generator = test_datagen.flow_from_directory(
valDirectory,
target_size=(299, 299),
batch_size=16,
class_mode='categorical')
test_generator = test_datagen.flow_from_directory(
testDirectory,
target_size=(299, 299),
batch_size=16,
class_mode='categorical')
#%% Create model
model = buildModel("xception")
#%% Initialize metrics
from keras.callbacks import EarlyStopping
from MetricsCallback import Histories
import keras
metrics = Histories(validation_generator)
es = EarlyStopping(monitor='val_loss',
min_delta=0,
patience=20,
verbose=0,
mode='auto',
baseline=None,
restore_best_weights=False)
mcp = keras.callbacks.ModelCheckpoint("saved_models/xception.adam.lr0.1_{epoch:02d}.hdf5",
monitor='val_loss',
verbose=0,
save_best_only=False,
save_weights_only=False,
mode='auto',
period=1)
#%% Load saved model
from keras.models import load_model
# model = load_model("saved_models/vgg16.10.hdf5") # 4
#%% Train model
from StaticDataAugmenter import superDirectorySize
history = model.fit_generator(
train_generator,
steps_per_epoch=superDirectorySize(trainDirectory) // 16,
epochs=100,
validation_data=validation_generator,
validation_steps=superDirectorySize(valDirectory) // 16,
callbacks=[metrics, es, mcp],
workers=8,
shuffle=False
)
For 1 and 2,I think it doesn't make sense to use FC layer right after ReLU without use a pooling layer, never try it so it might not help anything.
For 3, why are you using BCE when your generators are using class_mode='categorical'?
For 4, as I comment above, this mean you are loading your VGG model and train it, instead of using the Xception from buildModel().
Related
I have a problem with this code. I need to optimize dropout rate and learning rate.
My code is reported below but the optimizer = BayesianOptimization() doesn't work.
Is there someone that can help?
This is my model:
input_shape=(10,256,256,1)
NUM_CLASSES=2
def get_model(input_shape, dropout1_rate=0.5, dense_1_neurons=128):
model = models.Sequential()
model.add(layers.TimeDistributed(getConvModel(verbose), input_shape=input_shape,
name="conv2d_1"))
model.add(layers.ConvLSTM2D(filters=5, kernel_size=(3,3),"conv2d_lstm"))
model.add(layers.Dropout(dropout1_rate,name="dropout_1")) # dropout rate -> to be tuned
model.add(layers.Dense(NUM_CLASSES, activation='softmax',name="dense_2"))
return model
def fit_with(input_shape, verbose, dropout1_rate, dense_1_neurons_x128, lr):
Create the model using a specified hyperparameter.
dense_1_neurons = max(int(dense_1_neurons_x256 * 256), 256)
model = get_model(input_shape, dropout1_rate, dense_1_neurons)
Train the model for a specified number of epochs.
opt = tf.keras.optimizers.SGD(learning_rate=lr)
model.compile(loss=tf.keras.losses.categorical_crossentropy,
optimizer=opt,
metrics=['accuracy'])
Train the model with the train dataset.
model.fit(x=X_train,y=Y_train, epochs=1, steps_per_epoch=468,
batch_size=64, verbose=verbose)
Evaluate the model with the eval dataset.
score = model.evaluate(X_val,Y_val ,steps=10, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
Return the accuracy.
return score[1]
from functools import partial
verbose = 1
fit_with_partial = partial(fit_with, input_shape, verbose)
fit_with_partial(dropout1_rate=0.5, lr=0.001, dense_1_neurons_x256=1)
Optimization using BayesianOptimizaitio
from bayes_opt import BayesianOptimization
pbounds = {
'dropout1_rate': (0.1, 0.5),
'lr': (1e-4, 1e-2),
}
optimizer = BayesianOptimization(
f=fit_with_partial,
pbounds=pbounds,
verbose=2,
random_state=1,)
optimizer.maximize(init_points=10, n_iter=10,)
for x, res in enumerate(optimizer.res):
print("Iteration {}: \n\t{}".format(x, res))
print(optimizer.max)
I am training a CNN model using Keras on Google Colab for binary image classification, the problem is when i use Sigmoid function i get accuracy fixed on 0.5000, and when i change metrics to 'acc' i get 0.000e+00 as accuracy. Also, when i change the activation function to 'Softmax' my model start learning.
Ps: i am using google colab where Tensorflow version is 2.5.0
My code:
def define_model(input_shape, num_classes):
model=ResNet50(include_top = False, weights = 'imagenet', input_shape = input_shape)
x = model.output
x = GlobalAveragePooling2D()(x)
preds = Dense(num_classes,activation='sigmoid')(x)
model = Model(inputs=model.input,outputs=preds)
return model
def train(epochs):
train_generator = ImageDataGenerator(rescale=1.0/255.0,vertical_flip=True, horizontal_flip=True)
test_generator = ImageDataGenerator(rescale=1.0/255.0)
train_generator = train_generator.flow_from_directory(
'trainset/',
target_size=(image_size, image_size),
batch_size=BATCH_SIZE_TRAINING,
seed = 7)
validation_generator = test_generator.flow_from_directory(
'testset/',
target_size=(image_size, image_size),
batch_size=BATCH_SIZE_VALIDATION,
seed = 7)
input_shape = (CHANNELS, image_size, image_size) if K.image_data_format() == 'channels_first' \
else (image_size, image_size, CHANNELS)
model = define_model(input_shape, NUM_CLASSES)
opt = optimizers.Adam(learning_rate=1e-6, beta_1=0.9, beta_2=0.99, amsgrad=False)
model.summary()
model.compile(loss='binary_crossentropy',
optimizer=opt,
metrics=['acc'])
filepath=path+"weights-improvement-{epoch:02d}-vacc:{val_accuracy:.2f}-tacc:{accuracy:.2f}.hdf5"
'''cb_early_stopper = EarlyStopping(monitor = 'val_accuracy', mode='min', verbose=1, patience = EARLY_STOP_PATIENCE)
cb_checkpointer = ModelCheckpoint(filepath = filepath, monitor = 'val_accuracy', save_best_only = True, mode = 'auto')
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.25, patience=5, min_lr=1e-7)'''
fit_history = model.fit(train_generator,
epochs = NUM_EPOCHS,
validation_data=validation_generator,
verbose=1,
class_weight=class_weights)
# callbacks = [cb_checkpointer, cb_early_stopper, reduce_lr],
return model, fit_history
def main():
start_time = time()
model, fit_history = train(epochs=NUM_EPOCHS)
end_time = time()
seconds_elapsed = end_time - start_time
print('token time: ', seconds_elapsed)
hours, rest = divmod(seconds_elapsed, 3600)
minutes, seconds = divmod(rest, 60)
if __name__ == "__main__":
main()
The problem solved by adding this code to the .flow_from_directory() function:
class_mode='binary',
Thanks to this thread on github:
https://github.com/keras-team/keras/issues/13006
After trying out VGG16 and having really good results, I was trying to train a ResNet50 Model from Imagenet. First I set all layers to trainable because I have a large Dataset and did the same with VGG16, but my results were quite bad.
Then I tried to set the layers to not trainable and see if it gets better, but the results were still bad.
My original images are of size 384x384 but I resized them to 224x224. Is that the issue? Or did I do something wrong while implementing it?
from keras import Input, Model
from keras.applications import ResNet50
from keras.layers import AveragePooling2D, Flatten, Dense, Dropout
from keras.optimizers import Adam
from keras_preprocessing.image import ImageDataGenerator
class example:
def __init__(self):
# define the names of the classes
self.CLASSES = ["nok", "ok"]
# initialize the initial learning rate, batch size, and number of
# epochs to train for
self.INIT_LR = 1e-4
self.BS = 32
self.NUM_EPOCHS = 32
def build_model(self, train_path):
train_data_path = train_path
train_datagen = ImageDataGenerator(rescale=1. / 255, validation_split=0.25)
train_generator = train_datagen.flow_from_directory(
train_data_path,
target_size=(224,224),
color_mode="rgb",
batch_size=self.BS,
class_mode='categorical',
subset='training')
validation_generator = train_datagen.flow_from_directory(
train_data_path,
target_size=(224, 224),
color_mode="rgb",
batch_size=self.BS,
class_mode='categorical',
subset='validation')
# load the ResNet-50 network, ensuring the head FC layer sets are left off
baseModel = ResNet50(weights="imagenet", include_top=False,
input_tensor = Input(shape=(224, 224, 3)))
# construct the head of the model that will be placed on top of the the base model
headModel = baseModel.output
headModel = AveragePooling2D(pool_size=(7, 7))(headModel)
headModel = Flatten(name="flatten")(headModel)
headModel = Dense(256, activation="relu")(headModel)
headModel = Dropout(0.5)(headModel)
headModel = Dense(len(self.CLASSES), activation="softmax")(headModel)
# place the head FC model on top of the base model (this will become the actual model we will train)
model = Model(inputs=baseModel.input, outputs=headModel)
for layer in baseModel.layers:
layer.trainable = True
# compile the model
opt = Adam(lr=self.INIT_LR)#, decay=self.INIT_LR / self.NUM_EPOCHS)
model.compile(loss="binary_crossentropy", optimizer=opt,
metrics=["accuracy"])
from keras.callbacks import ModelCheckpoint, EarlyStopping
import matplotlib.pyplot as plt
checkpoint = ModelCheckpoint('resnetModel.h5', monitor='val_accuracy', verbose=1, save_best_only=True,
save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=6, verbose=1, mode='auto')
hist = model.fit_generator(steps_per_epoch=self.BS, generator=train_generator,
validation_data=validation_generator, validation_steps=32, epochs=self.NUM_EPOCHS,
callbacks=[checkpoint, early])
plt.plot(hist.history['accuracy'])
plt.plot(hist.history['val_accuracy'])
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title("model accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend(["Accuracy", "Validation Accuracy", "loss", "Validation Loss"])
plt.show()
plt.figure(1)
import tensorflow as tf
if __name__ == '__main__':
x = example()
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.compat.v1.Session(config=config)
x.build_model("C:/Users/but/Desktop/dataScratch/Train")
I have 2 Classes which contain images of integrated circuits with defect and non defect images. My Batch Size is 32, Epoches is 32, LR is 1e-4.
Here are example images:
This is a defect image
This is an ok image
I was working on a binary image classification deep learning model using transfer learning in Google colab.
!wget https://storage.googleapis.com/mledu-datasets/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5 -O /tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
local_weights_file = '/tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'
pre_trained_model = InceptionV3(input_shape = (300, 300, 3),
include_top = False,
weights = None)
pre_trained_model.load_weights(local_weights_file)
for layer in pre_trained_model.layers:
layer.trainable = False
last_layer = pre_trained_model.get_layer('mixed7')
last_output = last_layer.output
x = layers.Flatten()(last_output)
x = layers.Dense(512, activation='relu')(x)
x = layers.Dropout(0.2)(x)
x = layers.Dense(1, activation='sigmoid')(x)
model = Model(pre_trained_model.input, x)
from tensorflow.keras.optimizers import RMSprop
model.compile(optimizer=RMSprop(lr=0.0001),
loss='binary_crossentropy',
metrics=['accuracy'])
from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(
rescale=1/255,
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode="nearest")
validation_datagen = ImageDataGenerator(rescale=1/255)
train_generator = train_datagen.flow_from_directory(
train_dir,
target_size=(300, 300),
batch_size=100,
class_mode='binary')
validation_generator = validation_datagen.flow_from_directory(
validation_dir,
target_size=(300, 300),
batch_size=100,
class_mode='binary')
history = model.fit(
train_generator,
steps_per_epoch=20,
epochs=30,
verbose=1,
validation_data=validation_generator,
validation_steps=10,
callbacks=[callbacks])
import numpy as np
from google.colab import files
from tensorflow.keras.preprocessing import image
uploaded=files.upload()
for fn in uploaded.keys():
path='/content/' + fn
img=image.load_img(path, target_size=(300, 300))
x=image.img_to_array(img)
x=np.expand_dims(x, axis=0)
images = np.vstack([x])
classes = model.predict(images, batch_size=10)
print(classes)
Even though after training the model and obtaining a quite good accuracy on training and validation data, the model is always predicting 1 for any new image. I have tried changing the batch size, epochs, learning rate, etc. But, no luck.
Can anyone explain what's the problem here?
I'm trying to train an image classifier using keras applications module. When I run predictions on validation set, all images are predicted as the same class. It is not always the same class, it varies during training. I'm using MobileNetV2 with weights from ImageNet but I also tried other models with same result.
I've tried using model from TensorFlow hub like described in this tutorial: https://www.tensorflow.org/beta/tutorials/images/hub_with_keras and it worked fine, so it is not a data set issue.
My code snippet:
image_size = 224
batch_size = 32
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input)
validation_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input)
train_generator = train_datagen.flow_from_directory(training_data_dir,
target_size=(image_size, image_size),
batch_size=batch_size)
validation_generator = train_datagen.flow_from_directory(validation_data_dir,
target_size=(image_size, image_size),
batch_size=batch_size)
IMG_SHAPE = (image_size, image_size, 3)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=False,
weights="imagenet")
base_model.trainable = False
model = tf.keras.Sequential([
base_model,
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(train_generator.num_classes, activation='softmax')
])
model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.001),
loss="categorical_crossentropy",
metrics=["accuracy"])
model.summary()
batch_stats = CollectBatchStats()
epoch_stats = CollectEpochStats(model, validation_generator)
checkpoint = tf.keras.callbacks.ModelCheckpoint(...)
epochs = 10
steps_per_epoch = train_generator.n // train_generator.batch_size
validation_steps = validation_generator.n // validation_generator.batch_size
history = model.fit_generator(train_generator,
epochs=epochs,
steps_per_epoch=steps_per_epoch,
callbacks=[batch_stats, epoch_stats, checkpoint],
workers=4,
validation_data=validation_generator,
validation_steps=validation_steps)
Issue resolved: in my code I had following lines after model compilation:
sess = keras_backend.get_session()
init = tf.compat.v1.global_variables_initializer()
sess.run(init)
After removing them everything works fine.