I have a problem with this code. I need to optimize dropout rate and learning rate.
My code is reported below but the optimizer = BayesianOptimization() doesn't work.
Is there someone that can help?
This is my model:
input_shape=(10,256,256,1)
NUM_CLASSES=2
def get_model(input_shape, dropout1_rate=0.5, dense_1_neurons=128):
model = models.Sequential()
model.add(layers.TimeDistributed(getConvModel(verbose), input_shape=input_shape,
name="conv2d_1"))
model.add(layers.ConvLSTM2D(filters=5, kernel_size=(3,3),"conv2d_lstm"))
model.add(layers.Dropout(dropout1_rate,name="dropout_1")) # dropout rate -> to be tuned
model.add(layers.Dense(NUM_CLASSES, activation='softmax',name="dense_2"))
return model
def fit_with(input_shape, verbose, dropout1_rate, dense_1_neurons_x128, lr):
Create the model using a specified hyperparameter.
dense_1_neurons = max(int(dense_1_neurons_x256 * 256), 256)
model = get_model(input_shape, dropout1_rate, dense_1_neurons)
Train the model for a specified number of epochs.
opt = tf.keras.optimizers.SGD(learning_rate=lr)
model.compile(loss=tf.keras.losses.categorical_crossentropy,
optimizer=opt,
metrics=['accuracy'])
Train the model with the train dataset.
model.fit(x=X_train,y=Y_train, epochs=1, steps_per_epoch=468,
batch_size=64, verbose=verbose)
Evaluate the model with the eval dataset.
score = model.evaluate(X_val,Y_val ,steps=10, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
Return the accuracy.
return score[1]
from functools import partial
verbose = 1
fit_with_partial = partial(fit_with, input_shape, verbose)
fit_with_partial(dropout1_rate=0.5, lr=0.001, dense_1_neurons_x256=1)
Optimization using BayesianOptimizaitio
from bayes_opt import BayesianOptimization
pbounds = {
'dropout1_rate': (0.1, 0.5),
'lr': (1e-4, 1e-2),
}
optimizer = BayesianOptimization(
f=fit_with_partial,
pbounds=pbounds,
verbose=2,
random_state=1,)
optimizer.maximize(init_points=10, n_iter=10,)
for x, res in enumerate(optimizer.res):
print("Iteration {}: \n\t{}".format(x, res))
print(optimizer.max)
Related
def rnn_model(self,activation="relu"):
in_out_neurons = 50
n_hidden = 512
model = Sequential()
model.add(LSTM(n_hidden, batch_input_shape=(None, self.seq_len, in_out_neurons), return_sequences=True))
model.add(Dense(in_out_neurons, activation=activation))
optimizer = Adam(learning_rate=0.001)
model.compile(loss="mean_squared_error", optimizer=optimizer)
model.summary()
return model
# then try to fit the model
final_x = np.zeros((319083, 2, 50))
final_y = np.zeros((319083, 1, 50))
# this works.
model = self.rnn_model()
model.fit(
final_x,final_y,
batch_size=400,
epochs=10,
validation_split=0.1
)
#However, when I trid to use hyperparameter sarch, this shows the error `ValueError: Invalid shape for y: (319083, 1, 50)`
activation = ["relu","sigmoid"]
model = KerasClassifier(build_fn=self.rnn_model,verbose=0)
param_grid = dict(activation=activation)
grid = GridSearchCV(estimator=model,param_grid=param_grid)
grid_result= grid.fit(final_x,final_y)
How dimension changes when using GridSearchCV
You should be using a KerasRegressor, since your model is not a classifier in that sense:
import tensorflow as tf
import numpy as np
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasRegressor
def rnn_model(activation="relu"):
in_out_neurons = 50
n_hidden = 512
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(n_hidden, batch_input_shape=(None, 2, in_out_neurons), return_sequences=True))
model.add(tf.keras.layers.Dense(in_out_neurons, activation=activation))
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(loss="mean_squared_error", optimizer=optimizer)
model.summary()
return model
final_x = np.zeros((319083, 2, 50))
final_y = np.zeros((319083, 2, 50))
model = rnn_model()
activation = ["relu","sigmoid"]
model = KerasRegressor(build_fn=rnn_model,verbose=0)
param_grid = dict(activation=activation)
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result= grid.fit(final_x,final_y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) # run with a way smaller dataset
Best: 0.000000 using {'activation': 'relu'}
I am training a CNN model using Keras on Google Colab for binary image classification, the problem is when i use Sigmoid function i get accuracy fixed on 0.5000, and when i change metrics to 'acc' i get 0.000e+00 as accuracy. Also, when i change the activation function to 'Softmax' my model start learning.
Ps: i am using google colab where Tensorflow version is 2.5.0
My code:
def define_model(input_shape, num_classes):
model=ResNet50(include_top = False, weights = 'imagenet', input_shape = input_shape)
x = model.output
x = GlobalAveragePooling2D()(x)
preds = Dense(num_classes,activation='sigmoid')(x)
model = Model(inputs=model.input,outputs=preds)
return model
def train(epochs):
train_generator = ImageDataGenerator(rescale=1.0/255.0,vertical_flip=True, horizontal_flip=True)
test_generator = ImageDataGenerator(rescale=1.0/255.0)
train_generator = train_generator.flow_from_directory(
'trainset/',
target_size=(image_size, image_size),
batch_size=BATCH_SIZE_TRAINING,
seed = 7)
validation_generator = test_generator.flow_from_directory(
'testset/',
target_size=(image_size, image_size),
batch_size=BATCH_SIZE_VALIDATION,
seed = 7)
input_shape = (CHANNELS, image_size, image_size) if K.image_data_format() == 'channels_first' \
else (image_size, image_size, CHANNELS)
model = define_model(input_shape, NUM_CLASSES)
opt = optimizers.Adam(learning_rate=1e-6, beta_1=0.9, beta_2=0.99, amsgrad=False)
model.summary()
model.compile(loss='binary_crossentropy',
optimizer=opt,
metrics=['acc'])
filepath=path+"weights-improvement-{epoch:02d}-vacc:{val_accuracy:.2f}-tacc:{accuracy:.2f}.hdf5"
'''cb_early_stopper = EarlyStopping(monitor = 'val_accuracy', mode='min', verbose=1, patience = EARLY_STOP_PATIENCE)
cb_checkpointer = ModelCheckpoint(filepath = filepath, monitor = 'val_accuracy', save_best_only = True, mode = 'auto')
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.25, patience=5, min_lr=1e-7)'''
fit_history = model.fit(train_generator,
epochs = NUM_EPOCHS,
validation_data=validation_generator,
verbose=1,
class_weight=class_weights)
# callbacks = [cb_checkpointer, cb_early_stopper, reduce_lr],
return model, fit_history
def main():
start_time = time()
model, fit_history = train(epochs=NUM_EPOCHS)
end_time = time()
seconds_elapsed = end_time - start_time
print('token time: ', seconds_elapsed)
hours, rest = divmod(seconds_elapsed, 3600)
minutes, seconds = divmod(rest, 60)
if __name__ == "__main__":
main()
The problem solved by adding this code to the .flow_from_directory() function:
class_mode='binary',
Thanks to this thread on github:
https://github.com/keras-team/keras/issues/13006
I am newbie to Machine Learning in general. I am currently trying to follow a tutorial on sentiment analysis using BERT and Transformers https://curiousily.com/posts/sentiment-analysis-with-bert-and-hugging-face-using-pytorch-and-python/
However when I train the model it has appeared that the model is overfitting
I do not know how to fix this. I have tried lowering amount of epochs, increasing batch size , shuffling my data (which is ordered) and increasing the validation split. So far nothing has worked. I have even tried changing different learning rate but the one I am using now is the smallest.
Below is my code:
PRE_TRAINED_MODEL_NAME = 'TurkuNLP/bert-base-finnish-cased-v1'
tokenizer = BertTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)
MAX_LEN = 40
#Make a PyTorch dataset
class FIDataset(Dataset):
def __init__(self, texts, targets, tokenizer, max_len):
self.texts = texts
self.targets = targets
self.tokenizer = tokenizer
self.max_len = max_len
def __len__(self):
return len(self.texts)
def __getitem__(self, item):
text = str(self.texts[item])
target = self.targets[item]
encoding = self.tokenizer.encode_plus(
text,
add_special_tokens=True,
max_length=self.max_len,
return_token_type_ids=False,
pad_to_max_length=True,
return_attention_mask=True,
return_tensors='pt',
)
return {
'text': text,
'input_ids': encoding['input_ids'].flatten(),
'attention_mask': encoding['attention_mask'].flatten(),
'targets': torch.tensor(target, dtype=torch.long)
}
#split test and train
df_train, df_test = train_test_split(
df,
test_size=0.1,
random_state=RANDOM_SEED
)
df_val, df_test = train_test_split(
df_test,
test_size=0.5,
random_state=RANDOM_SEED
)
#data loader function
def create_data_loader(df, tokenizer, max_len, batch_size):
ds = FIDataset(
texts=df.content.to_numpy(),
targets=df.sentiment.to_numpy(),
tokenizer=tokenizer,
max_len=max_len
)
return DataLoader(
ds,
batch_size=batch_size,
num_workers=4
)
BATCH_SIZE = 32
#Load data into train, test, val
train_data_loader = create_data_loader(df_train, tokenizer, MAX_LEN, BATCH_SIZE)
val_data_loader = create_data_loader(df_val, tokenizer, MAX_LEN, BATCH_SIZE)
test_data_loader = create_data_loader(df_test, tokenizer, MAX_LEN, BATCH_SIZE)
#Bert model loading
bert_model = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
# Sentiment Classifier based on Bert model just loaded
class SentimentClassifier(nn.Module):
def __init__(self, n_classes):
super(SentimentClassifier, self).__init__()
self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
self.drop = nn.Dropout(p=0.1)
self.out = nn.Linear(self.bert.config.hidden_size, n_classes)
def forward(self, input_ids, attention_mask):
returned = self.bert(
input_ids=input_ids,
attention_mask=attention_mask
)
pooled_output = returned["pooler_output"]
output = self.drop(pooled_output)
return self.out(output)
#Create a Classifier instance and move to GPU
model = SentimentClassifier(3)
model = model.to(device)
#Optimize with AdamW
EPOCHS = 6
optimizer = AdamW(model.parameters(), lr=2e-5, correct_bias=False)
total_steps = len(train_data_loader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=0,
num_training_steps=total_steps
)
loss_fn = nn.CrossEntropyLoss().to(device)
#Train each Epoch function
def train_epoch(
model,
data_loader,
loss_fn,
optimizer,
device,
scheduler,
n_examples
):
model = model.train()
losses = []
correct_predictions = 0
for d in data_loader:
input_ids = d["input_ids"].to(device)
attention_mask = d["attention_mask"].to(device)
targets = d["targets"].to(device)
outputs = model(
input_ids=input_ids,
attention_mask=attention_mask
)
_, preds = torch.max(outputs, dim=1)
loss = loss_fn(outputs, targets)
correct_predictions += torch.sum(preds == targets)
losses.append(loss.item())
loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
optimizer.step()
scheduler.step()
optimizer.zero_grad()
return correct_predictions.double() / n_examples, np.mean(losses)
import torch
history = defaultdict(list)
best_accuracy = 0
if __name__ == '__main__':
for epoch in range(EPOCHS):
print(f'Epoch {epoch + 1}/{EPOCHS}')
print('-' * 10)
train_acc, train_loss = train_epoch(
model,
train_data_loader,
loss_fn,
optimizer,
device,
scheduler,
len(df_train)
)
print(f'Train loss {train_loss} accuracy {train_acc}')
val_acc, val_loss = eval_model(
model,
val_data_loader,
loss_fn,
device,
len(df_val)
)
print(f'Val loss {val_loss} accuracy {val_acc}')
print()
history['train_acc'].append(train_acc)
history['train_loss'].append(train_loss)
history['val_acc'].append(val_acc)
history['val_loss'].append(val_loss)
if val_acc > best_accuracy:
torch.save(model.state_dict(), 'best_model_state.bin')
best_accuracy = val_acc
Broadly speaking, to reduce overfitting, you can:
increase regularization
reduce model complexity
perform early stopping
increase training data
From what you've written, you've already tried 3 and 4. In the case of neural networks, you can increase regularization by increasing dropout. You already have the code for it.
# NOTE: You don't need bert_model here since you're creating one inside
# of SentimentClassifier.
#bert_model = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
# Sentiment Classifier based on Bert model just loaded
class SentimentClassifier(nn.Module):
def __init__(self, n_classes):
super(SentimentClassifier, self).__init__()
self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
self.drop = nn.Dropout(p=0.1) # <-- INCREASE THIS VALUE
self.out = nn.Linear(self.bert.config.hidden_size, n_classes)
I'd recommend trying higher values of the Dropout probability, as I noted in your code above ("INCREASE THIS VALUE"). Keep track of the Dropout probability and the resulting observed overfitting. Try probability values of 0.1, 0.2, 0.3, 0.4, 0.5.
Usually, I've found that dropout over 0.5 doesn't do much good.
After trying out VGG16 and having really good results, I was trying to train a ResNet50 Model from Imagenet. First I set all layers to trainable because I have a large Dataset and did the same with VGG16, but my results were quite bad.
Then I tried to set the layers to not trainable and see if it gets better, but the results were still bad.
My original images are of size 384x384 but I resized them to 224x224. Is that the issue? Or did I do something wrong while implementing it?
from keras import Input, Model
from keras.applications import ResNet50
from keras.layers import AveragePooling2D, Flatten, Dense, Dropout
from keras.optimizers import Adam
from keras_preprocessing.image import ImageDataGenerator
class example:
def __init__(self):
# define the names of the classes
self.CLASSES = ["nok", "ok"]
# initialize the initial learning rate, batch size, and number of
# epochs to train for
self.INIT_LR = 1e-4
self.BS = 32
self.NUM_EPOCHS = 32
def build_model(self, train_path):
train_data_path = train_path
train_datagen = ImageDataGenerator(rescale=1. / 255, validation_split=0.25)
train_generator = train_datagen.flow_from_directory(
train_data_path,
target_size=(224,224),
color_mode="rgb",
batch_size=self.BS,
class_mode='categorical',
subset='training')
validation_generator = train_datagen.flow_from_directory(
train_data_path,
target_size=(224, 224),
color_mode="rgb",
batch_size=self.BS,
class_mode='categorical',
subset='validation')
# load the ResNet-50 network, ensuring the head FC layer sets are left off
baseModel = ResNet50(weights="imagenet", include_top=False,
input_tensor = Input(shape=(224, 224, 3)))
# construct the head of the model that will be placed on top of the the base model
headModel = baseModel.output
headModel = AveragePooling2D(pool_size=(7, 7))(headModel)
headModel = Flatten(name="flatten")(headModel)
headModel = Dense(256, activation="relu")(headModel)
headModel = Dropout(0.5)(headModel)
headModel = Dense(len(self.CLASSES), activation="softmax")(headModel)
# place the head FC model on top of the base model (this will become the actual model we will train)
model = Model(inputs=baseModel.input, outputs=headModel)
for layer in baseModel.layers:
layer.trainable = True
# compile the model
opt = Adam(lr=self.INIT_LR)#, decay=self.INIT_LR / self.NUM_EPOCHS)
model.compile(loss="binary_crossentropy", optimizer=opt,
metrics=["accuracy"])
from keras.callbacks import ModelCheckpoint, EarlyStopping
import matplotlib.pyplot as plt
checkpoint = ModelCheckpoint('resnetModel.h5', monitor='val_accuracy', verbose=1, save_best_only=True,
save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=6, verbose=1, mode='auto')
hist = model.fit_generator(steps_per_epoch=self.BS, generator=train_generator,
validation_data=validation_generator, validation_steps=32, epochs=self.NUM_EPOCHS,
callbacks=[checkpoint, early])
plt.plot(hist.history['accuracy'])
plt.plot(hist.history['val_accuracy'])
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title("model accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend(["Accuracy", "Validation Accuracy", "loss", "Validation Loss"])
plt.show()
plt.figure(1)
import tensorflow as tf
if __name__ == '__main__':
x = example()
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.compat.v1.Session(config=config)
x.build_model("C:/Users/but/Desktop/dataScratch/Train")
I have 2 Classes which contain images of integrated circuits with defect and non defect images. My Batch Size is 32, Epoches is 32, LR is 1e-4.
Here are example images:
This is a defect image
This is an ok image
I am currently doing a project in which I need to predict eye disease in a group of images. I am using the Keras built-in applications. I am getting good results on VGG16 and VGG19, but on the Xception architecture I keep getting AUC of exactly 0.5 every epoch.
I have tried different optimizers and learning rates, but nothing works. I solved the same problem with VGG19 by switching from RMSProp optimizer to Adam optimizer, but I can't get it to work for Xception.
def buildModel():
from keras.models import Model
from keras.layers import Dense, Flatten
from keras.optimizers import adam
input_model = applications.xception.Xception(
include_top=False,
weights='imagenet',
input_tensor=None,
input_shape=input_sizes["xception"],
pooling=None,
classes=2)
base_model = input_model
x = base_model.output
x = Flatten()(x)
predictions = Dense(2, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)
for layer in base_model.layers:
layer.trainable = False
model.compile(optimizer=adam(lr=0.01), loss='binary_crossentropy', metrics=['accuracy'])
return model
class Histories(keras.callbacks.Callback):
def __init__(self, val_data):
super(Histories, self).__init__()
self.x_batch = []
self.y_batch = []
for i in range(len(val_data)):
x, y = val_data.__getitem__(i)
self.x_batch.extend(x)
self.y_batch.extend(np.ndarray.astype(y, int))
self.aucs = []
self.specificity = []
self.sensitivity = []
self.losses = []
return
def on_train_begin(self, logs={}):
initFile("results/xception_results_adam_3.txt")
return
def on_train_end(self, logs={}):
return
def on_epoch_begin(self, epoch, logs={}):
return
def on_epoch_end(self, epoch, logs={}):
self.losses.append(logs.get('loss'))
y_pred = self.model.predict(np.asarray(self.x_batch))
con_mat = confusion_matrix(np.asarray(self.y_batch).argmax(axis=-1), y_pred.argmax(axis=-1))
tn, fp, fn, tp = con_mat.ravel()
sens = tp/(tp+fn)
spec = tn/(tn+fp)
auc_score = roc_auc_score(np.asarray(self.y_batch).argmax(axis=-1), y_pred.argmax(axis=-1))
print("Specificity: %f Sensitivity: %f AUC: %f"%(spec, sens, auc_score))
print(con_mat)
self.sensitivity.append(sens)
self.specificity.append(spec)
self.aucs.append(auc_score)
writeToFile("results/xception_results_adam_3.txt", epoch, auc_score, spec, sens, self.losses[epoch])
return
# What follows is data from the Jupyter Notebook that I actually use to evaluate
#%% Initialize data
trainDirectory = 'RetinaMasks/train'
valDirectory = 'RetinaMasks/val'
testDirectory = 'RetinaMasks/test'
train_datagen = ImageDataGenerator(rescale=1. / 255)
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
trainDirectory,
target_size=(299, 299),
batch_size=16,
class_mode='categorical')
validation_generator = test_datagen.flow_from_directory(
valDirectory,
target_size=(299, 299),
batch_size=16,
class_mode='categorical')
test_generator = test_datagen.flow_from_directory(
testDirectory,
target_size=(299, 299),
batch_size=16,
class_mode='categorical')
#%% Create model
model = buildModel("xception")
#%% Initialize metrics
from keras.callbacks import EarlyStopping
from MetricsCallback import Histories
import keras
metrics = Histories(validation_generator)
es = EarlyStopping(monitor='val_loss',
min_delta=0,
patience=20,
verbose=0,
mode='auto',
baseline=None,
restore_best_weights=False)
mcp = keras.callbacks.ModelCheckpoint("saved_models/xception.adam.lr0.1_{epoch:02d}.hdf5",
monitor='val_loss',
verbose=0,
save_best_only=False,
save_weights_only=False,
mode='auto',
period=1)
#%% Train model
from StaticDataAugmenter import superDirectorySize
history = model.fit_generator(
train_generator,
steps_per_epoch=superDirectorySize(trainDirectory) // 16,
epochs=100,
validation_data=validation_generator,
validation_steps=superDirectorySize(valDirectory) // 16,
callbacks=[metrics, es, mcp],
workers=8,
shuffle=False
)
I honestly have no idea what causes this behavior, or how to prevent it. Thank you in advance, and I apologize for the long code snippet :)
Your learning rate is too high.
Try lowering the learning rate.
I used to run into this when using transfer learning, I was fine-tuning at very high learning rates.
An extended AUC of 0.5 over multiple epochs in case of a binary classification means that your (convolutional) neural network is not able to distinguish between the classes at all. This is in turn because it's not able to learn anything.
Use learning_rates of 0.0001,0.00001,0.000001.
At the same time, you should try to unfreeze/make some layers trainable, due to the fact that you entire feature extractor is frozen; in fact this could be another reason why the network is incapable of learning anything.
I am quite confident that your problem will be solved if you lower your learning rate :).
An AUC of 0.5 implies that your network is randomly guessing the output, which means it didn't learn anything. This was already disscued for example here.
As Timbus Calin suggested, you could do a "line search" of the learning rate starting with 0.000001 and then increase the learning rate by potencies of 10.
I would suggest you directly start with a random search, where you not only try to optimize the learning rate, but also other hyperparameters like for example the batch size. Read more about random search in this paper.
You are not computing the AUC correctly, you currently have this:
auc_score = roc_auc_score(np.asarray(self.y_batch).argmax(axis=-1), y_pred.argmax(axis=-1))
AUC is computed from (probability) scores produced by the model. The argmax of the model output does not provide scores, but class labels. The correct function call is:
auc_score = roc_auc_score(np.asarray(self.y_batch).argmax(axis=-1), y_pred[:, 1])
Note that the score needed to compute ROC is the probability of the positive class, which is the second element of the softmax output. This is why only the second column of the predictions is used to make the AUC.
What about this?
def buildModel():
from keras.models import Model
from keras.layers import Dense, Flatten
from keras.optimizers import adam
input_model = applications.xception.Xception(
include_top=False,
weights='imagenet',
input_tensor=None,
input_shape=input_sizes["xception"],
pooling='avg', # 1
classes=2)
base_model = input_model
x = base_model.output
# x = Flatten()(x) # 2
predictions = Dense(2, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)
for layer in base_model.layers:
layer.trainable = False
model.compile(optimizer=adam(lr=0.01),
loss='categorical_crossentropy', # 3
metrics=['accuracy'])
return model
class Histories(keras.callbacks.Callback):
def __init__(self, val_data):
super(Histories, self).__init__()
self.x_batch = []
self.y_batch = []
for i in range(len(val_data)):
x, y = val_data.__getitem__(i)
self.x_batch.extend(x)
self.y_batch.extend(np.ndarray.astype(y, int))
self.aucs = []
self.specificity = []
self.sensitivity = []
self.losses = []
return
def on_train_begin(self, logs={}):
initFile("results/xception_results_adam_3.txt")
return
def on_train_end(self, logs={}):
return
def on_epoch_begin(self, epoch, logs={}):
return
def on_epoch_end(self, epoch, logs={}):
self.losses.append(logs.get('loss'))
y_pred = self.model.predict(np.asarray(self.x_batch))
con_mat = confusion_matrix(np.asarray(self.y_batch).argmax(axis=-1), y_pred.argmax(axis=-1))
tn, fp, fn, tp = con_mat.ravel()
sens = tp/(tp+fn)
spec = tn/(tn+fp)
auc_score = roc_auc_score(np.asarray(self.y_batch).argmax(axis=-1), y_pred.argmax(axis=-1))
print("Specificity: %f Sensitivity: %f AUC: %f"%(spec, sens, auc_score))
print(con_mat)
self.sensitivity.append(sens)
self.specificity.append(spec)
self.aucs.append(auc_score)
writeToFile("results/xception_results_adam_3.txt", epoch, auc_score, spec, sens, self.losses[epoch])
return
# What follows is data from the Jupyter Notebook that I actually use to evaluate
#%% Initialize data
trainDirectory = 'RetinaMasks/train'
valDirectory = 'RetinaMasks/val'
testDirectory = 'RetinaMasks/test'
train_datagen = ImageDataGenerator(rescale=1. / 255)
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
trainDirectory,
target_size=(299, 299),
batch_size=16,
class_mode='categorical')
validation_generator = test_datagen.flow_from_directory(
valDirectory,
target_size=(299, 299),
batch_size=16,
class_mode='categorical')
test_generator = test_datagen.flow_from_directory(
testDirectory,
target_size=(299, 299),
batch_size=16,
class_mode='categorical')
#%% Create model
model = buildModel("xception")
#%% Initialize metrics
from keras.callbacks import EarlyStopping
from MetricsCallback import Histories
import keras
metrics = Histories(validation_generator)
es = EarlyStopping(monitor='val_loss',
min_delta=0,
patience=20,
verbose=0,
mode='auto',
baseline=None,
restore_best_weights=False)
mcp = keras.callbacks.ModelCheckpoint("saved_models/xception.adam.lr0.1_{epoch:02d}.hdf5",
monitor='val_loss',
verbose=0,
save_best_only=False,
save_weights_only=False,
mode='auto',
period=1)
#%% Load saved model
from keras.models import load_model
# model = load_model("saved_models/vgg16.10.hdf5") # 4
#%% Train model
from StaticDataAugmenter import superDirectorySize
history = model.fit_generator(
train_generator,
steps_per_epoch=superDirectorySize(trainDirectory) // 16,
epochs=100,
validation_data=validation_generator,
validation_steps=superDirectorySize(valDirectory) // 16,
callbacks=[metrics, es, mcp],
workers=8,
shuffle=False
)
For 1 and 2,I think it doesn't make sense to use FC layer right after ReLU without use a pooling layer, never try it so it might not help anything.
For 3, why are you using BCE when your generators are using class_mode='categorical'?
For 4, as I comment above, this mean you are loading your VGG model and train it, instead of using the Xception from buildModel().