when predict the 2nd label has high score - python

I'm trying to set a custom binary classification model in tensorflow and the frame of this model looks like this
when I am training this model on the dataset, it all goes right; (Output). But when I try to evaluate or predict it goes wrong and it looks like this
all the predicted results of this dataset have high score on the 2nd label and I don't know why. Here's the code:
def getModel():
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(64, 64)),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dropout(0.1),
tf.keras.layers.Dense(16, activation='relu'),
tf.keras.layers.Dense(2, activation='softmax')
])
model.summary()
lossFn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
initial_learning_rate = 0.3
decay_steps = 1.0
decay_rate = 0.5
# learning_rate_fn = tf.keras.optimizers.schedules.InverseTimeDecay(initial_learning_rate, decay_steps, decay_rate)
model.compile(optimizer=tf.keras.optimizers.RMSprop(),
loss=lossFn,
metrics='accuracy')
return model
def getDataset(path):
with np.load(path) as data:
trainData = data['img']
# trainData = np.reshape(trainData, (trainData.shape[0], trainData.shape[1], trainData.shape[2]))
trainLabels = data['label']
trainSet = tf.data.Dataset.from_tensor_slices((trainData, trainLabels))
trainSet = trainSet.batch(BATCH_SIZE)
return trainSet
def getTestSet(path):
with np.load(path) as data:
testData = data['img']
# testData = np.reshape(testData, (testData.shape[0], testData.shape[1], testData.shape[2]))
testLabels = data['label']
testSet = tf.data.Dataset.from_tensor_slices((testData, testLabels))
testSet = testSet.batch(BATCH_SIZE)
return testSet
def getAcc(history):
plt.plot(history.history['accuracy'], label='accuracy')
# plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('acc')
plt.legend(loc='lower right')
plt.savefig('./acc.png')
plt.clf()
def getLoss(history):
plt.plot(history.history['loss'], label='loss')
plt.xlabel('Epoch')
plt.ylabel('loss')
plt.legend(loc='lower right')
plt.savefig('./loss.png')
if __name__ == "__main__": model = getModel()
dataset = getDataset('./train.npz')
checkpoint_path = "training_1/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
save_weights_only=True,
verbose=1)
history = model.fit(dataset, epochs=5, callbacks=[cp_callback])
getAcc(history)
getLoss(history)
model.save('GRaymodel.h5')
print('training done!!')
test = getTestSet('test.npz')
loss, acc = model.evaluate(test)
print(model.predict(test))
print('Restored model, accuracy: {:5.2f}%'.format(100 * acc))

Related

Input contains NaN, infinity or a value too large for dtype('float32'). Pythorch

I try to train model but in vain. I see the error
Input contains NaN, infinity or a value too large for dtype('float32').
I think it can be connected with Mse function, because with MAE it works somehow also with RMSE it works somehow (on the second epoch i have RMSE = 10). I can't figure out what i do wrong.
# Count Nan
df = pd.read_csv('data.txt.zip', header=None)
X = df.iloc[:, 1:].values
y = df.iloc[:, 0].values
train_size = 463715
X_train = X[:train_size, :]
y_train = y[:train_size]
X_test = X[train_size:, :]
y_test = y[train_size:]
#ToTensor
X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train)
X_test = torch.FloatTensor(X_test)
y_test = torch.FloatTensor(y_test)
# Create TensorDataset
train_ds = TensorDataset(X_train, y_train)
test_ds = TensorDataset(X_test, y_test)
val_num = 92743
train_num = 370972
# Divide train data into train and validation data
train_ds, val_ds = random_split(train_ds, [train_num, val_num])
# Evaluate accuracy
def accuracy(y_true, y_pred):
return r2_score(y_true, y_pred)
# create Class
class BaselineModel(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(BaselineModel, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.linear1 = nn.Linear(90, 45)
self.linear2 = nn.Linear(45, 1)
self.linear3 = nn.Linear(45, 15)
self.linear4 = nn.Linear(15, 1)
self.batch = nn.BatchNorm2d(hidden_size)
self.relu = nn.ReLU()
self.lreku = nn.LeakyReLU()
self.elu = nn.ELU()
self.dropout = nn.Dropout(0.5)
def forward(self, x):
x = self.elu(self.linear1(x))
return self.linear2(x)
def training_step(self, criterion, batch):
x_train, y_train = batch
y_pred = self(x_train)
loss = (criterion(y_pred, y_train.unsqueeze(1)))
return loss
def validation_step(self, criterion, batch):
x_val, y_val = batch
y_pred = self(x_val)
loss = (criterion(y_pred, y_val.unsqueeze(1)))
acc = accuracy(y_val, y_pred)
return {'val_loss': loss, 'val_acc': acc}
def validation_epoch_end(self, y_pred):
batch_losses = [x['val_loss'] for x in y_pred]
epoch_loss = torch.stack(batch_losses).mean()
batch_accs = [x['val_acc'] for x in y_pred]
epoch_acc = np.mean(batch_accs)
#epoch_acc = torch.stack(batch_accs).mean()
return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
def epoch_end(self, epoch, result):
print(f"Epoch {epoch}, val_loss: {result['val_loss']}, val_acc: {result['val_acc']} ")
model = BaselineModel(input_size = 90, hidden_size = 45, output_size = 1)
# Evaluate
def evaluate(model, criterion, val_loader):
with torch.no_grad():
y_pred = [model.validation_step(criterion, batch) for batch in val_loader]
return model.validation_epoch_end(y_pred)
# Train
def train(model, criterion, optimizer, train_loader, val_loader, lr, epochs):
history = []
for epoch in range(epochs):
for batch in train_loader:
optimizer.zero_grad()
loss = model.training_step(criterion, batch)
loss.backward()
optimizer.step()
result = evaluate(model, criterion, val_loader)
model.epoch_end(epoch, result)
history.append(result)
#return history
# Create train_loader & val_loader
batch_size = 128
train_loader = DataLoader(train_ds, batch_size = batch_size, shuffle = True)
val_loader = DataLoader(val_ds, batch_size = batch_size, shuffle = True)
# Create parameters and Train
lr = 0.05
optimizer = torch.optim.SGD(model.parameters(), lr, momentum = 0.9)
criterion = F.mse_loss
epochs = 10
train(model, criterion, optimizer, train_loader, val_loader, lr, epochs)
Yes, it is because of your loss of function. if the value of the loss function after some epoch becomes very small or very large then when you want to use it in backpropagation to train the model, you face this error. To handle that, you should use Early Stopping to Halt the Training. so you should implement Callback, Callbacks provide a way to execute code and interact with the training model process automatically.

Keras - Multiclass classification and transfer learning, bad validation and test accuracy

I am building a multiclass classification model that would be able to recognize 4 different insects. I am using Resnet50 (weights = imagenet).
The dataset is small, average 100 photos per class (more than 400 in total)
Depends on model, I usually get val_accuracy more than 90% (epochs 200) and test accuracy around 80-85% but when I print confusion matrix or plot actual and predicted labels for given photos, results are terrible (usually around 25%).
I have tried different models (resnet18, resnet50v2, Xception) I was freezing model layers, tried different data augumentation parameters, different model parameters(such as: Dropout(0.5, 0.2), kernel_regularizer='l2' because I read that helps reducing overfitting).
I think problem is while generating images but I don't know what else to change there, I tried val_generator with shuffle=False/True, train_generator seed=1/off but final results are similar.
I am adding images of confusion matrix, accuracy and plotted photos.
I am using jupyter notebook.
Thank you!
directory_train = "keras_splited/train"
directory_test = "keras_splited/test"
directory_val = "keras_splited/val"
BATCH_SIZE = 32
IMG_SIZE = 224
def make_DataImageGenerator(validation_split=None):
image_generator = ImageDataGenerator(
rescale=(1.0/255),
rotation_range=40,
zoom_range=0.1,
horizontal_flip=True,
vertical_flip=True,
validation_split=validation_split
)
return image_generator
train_img_generator = make_DataImageGenerator(validation_split=None)
val_img_generator = make_DataImageGenerator(validation_split=None)
test_img_generator = make_DataImageGenerator(validation_split=None)
def get_generator(img_generator, directory, train_valid=None, seed=None, shuffle=True):
train_generator = img_generator.flow_from_directory(
directory,
batch_size=BATCH_SIZE,
target_size=(IMG_SIZE, IMG_SIZE),
subset=train_valid,
seed=seed,
shuffle=shuffle
)
return train_generator
train_generator = get_generator(train_img_generator, directory_train)
val_generator = get_generator(val_img_generator, directory_val)
test_generator = get_generator(test_img_generator, directory_test)
target_labels = next(os.walk(directory_train))[1]
target_labels.sort()
num_classes = len(target_labels)
model_feature_extraction = tf.keras.applications.ResNet50(weights="imagenet", include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
x = model_feature_extraction.output
x = layers.GlobalAveragePooling2D()(x)
x = Dense(1024, activation="relu")(x)
myModelOut = Dense(4, activation="softmax")(x)
model = Model(inputs=model_feature_extraction.input, outputs=myModelOut)
optimizer = "adam"
loss = "categorical_crossentropy"
def freeze_pretrained_weights(model):
#model.layers[0].trainable=False #wanted to freeze the model but didn't work good
model.compile(
optimizer=optimizer,
loss=loss,
metrics=["accuracy"]
)
return model
frozen_new_model = freeze_pretrained_weights(model)
my_callbacks = [
tf.keras.callbacks.ModelCheckpoint("testno/best_model/", save_best_only=True, monitor="accuracy", save_weights_only=False, mode="max"),
tf.keras.callbacks.ReduceLROnPlateau(monitor="loss", factor=0.2, patience=25, min_lr=0.001)
]
def train_model(model, train_gen, valid_gen, epochs):
train_steps_per_epoch = train_gen.n // train_gen.batch_size
history = model.fit(
train_gen,
steps_per_epoch=train_steps_per_epoch,
epochs=epochs,
callbacks=my_callbacks,
validation_data=valid_gen,
)
return history
history_frozen_model = train_model(frozen_new_model, train_generator, val_generator, epochs=150)
plt.figure(figsize=(15,5))
plt.subplot(121)
plt.plot(history_frozen_model.history['accuracy'])
plt.plot(history_frozen_model.history['val_accuracy'])
plt.title('Accuracy vs. epochs')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='lower right')
plt.subplot(122)
plt.plot(history_frozen_model.history['loss'])
plt.plot(history_frozen_model.history['val_loss'])
plt.title('Loss vs. epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='upper right')
plt.show()
test_steps = test_generator.n // test_generator.batch_size
test_generator.reset()
new_model_test_loss, new_model_test_acc = frozen_new_model.evaluate(test_generator)
print('\nTest dataset')
print(f"Loss: {new_model_test_loss}")
print(f"Accuracy: {new_model_test_acc}")
pred = frozen_new_model.predict(test_generator, steps=test_steps, verbose=1)
batch = next(test_generator)
batch_images = np.array(batch[0])
batch_labels = np.array(batch[1])
target_labels = np.asarray(target_labels)
print(target_labels)
plt.figure(figsize=(15,15))
for n, i in enumerate(np.arange(6)):
actual = target_labels[np.argmax(batch_labels[i])]
predicted = target_labels[np.argmax(pred[i])]
confidence = round(100*(np.max(pred[i])),2)
ax = plt.subplot(3,3,n+1)
plt.imshow(batch_images[i])
plt.title(f"Actual: {actual},\n Predicted: {predicted},\n Confidence: {confidence}")
plt.axis('off')
from sklearn.metrics import ConfusionMatrixDisplay
y_true_lista = []
y_pred_lista = []
for i, img in enumerate(batch_labels):
y_true = np.argmax(batch_labels[i]).reshape(-1)
for i in y_true:
y_true_lista.append(i)
y_pred = np.argmax(pred[i]).reshape(-1)
for i in y_pred:
y_pred_lista.append(i)
print("y_true: ", y_true_lista)
print("y_pred: ", y_pred_lista)
matrix = confusion_matrix(y_true, y_pred)
#print(matrix.shape)
labels = target_labels
cm = confusion_matrix(y_true_lista, y_pred_lista)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
disp.plot(cmap=plt.cm.Blues, xticks_rotation = 'vertical')
plt.show()
I don't know what to change to get right results when plotting and on the matrix
Can someone point me to the right direction? What did I do wrong with this model or is it something wrong with plotting?

Getting a fixed accuracy: 0.5000 and sometimes 0.0000e+00 in Keras model using Google Colab

I am training a CNN model using Keras on Google Colab for binary image classification, the problem is when i use Sigmoid function i get accuracy fixed on 0.5000, and when i change metrics to 'acc' i get 0.000e+00 as accuracy. Also, when i change the activation function to 'Softmax' my model start learning.
Ps: i am using google colab where Tensorflow version is 2.5.0
My code:
def define_model(input_shape, num_classes):
model=ResNet50(include_top = False, weights = 'imagenet', input_shape = input_shape)
x = model.output
x = GlobalAveragePooling2D()(x)
preds = Dense(num_classes,activation='sigmoid')(x)
model = Model(inputs=model.input,outputs=preds)
return model
def train(epochs):
train_generator = ImageDataGenerator(rescale=1.0/255.0,vertical_flip=True, horizontal_flip=True)
test_generator = ImageDataGenerator(rescale=1.0/255.0)
train_generator = train_generator.flow_from_directory(
'trainset/',
target_size=(image_size, image_size),
batch_size=BATCH_SIZE_TRAINING,
seed = 7)
validation_generator = test_generator.flow_from_directory(
'testset/',
target_size=(image_size, image_size),
batch_size=BATCH_SIZE_VALIDATION,
seed = 7)
input_shape = (CHANNELS, image_size, image_size) if K.image_data_format() == 'channels_first' \
else (image_size, image_size, CHANNELS)
model = define_model(input_shape, NUM_CLASSES)
opt = optimizers.Adam(learning_rate=1e-6, beta_1=0.9, beta_2=0.99, amsgrad=False)
model.summary()
model.compile(loss='binary_crossentropy',
optimizer=opt,
metrics=['acc'])
filepath=path+"weights-improvement-{epoch:02d}-vacc:{val_accuracy:.2f}-tacc:{accuracy:.2f}.hdf5"
'''cb_early_stopper = EarlyStopping(monitor = 'val_accuracy', mode='min', verbose=1, patience = EARLY_STOP_PATIENCE)
cb_checkpointer = ModelCheckpoint(filepath = filepath, monitor = 'val_accuracy', save_best_only = True, mode = 'auto')
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.25, patience=5, min_lr=1e-7)'''
fit_history = model.fit(train_generator,
epochs = NUM_EPOCHS,
validation_data=validation_generator,
verbose=1,
class_weight=class_weights)
# callbacks = [cb_checkpointer, cb_early_stopper, reduce_lr],
return model, fit_history
def main():
start_time = time()
model, fit_history = train(epochs=NUM_EPOCHS)
end_time = time()
seconds_elapsed = end_time - start_time
print('token time: ', seconds_elapsed)
hours, rest = divmod(seconds_elapsed, 3600)
minutes, seconds = divmod(rest, 60)
if __name__ == "__main__":
main()
The problem solved by adding this code to the .flow_from_directory() function:
class_mode='binary',
Thanks to this thread on github:
https://github.com/keras-team/keras/issues/13006

ValueError: Error when checking target: expected avg_pool to have 4 dimensions, but got array with shape (100, 2)

I try to run this code (binary classification) but still stuck with this error : ValueError: Error when checking target: expected avg_pool to have 4 dimensions, but got array with shape (100, 2)
NUM_CLASSES = 2
CHANNELS = 3
IMAGE_RESIZE = 224
RESNET50_POOLING_AVERAGE = 'avg'
DENSE_LAYER_ACTIVATION = 'softmax'
OBJECTIVE_FUNCTION = 'binary_crossentropy'
NUM_EPOCHS = 10
EARLY_STOP_PATIENCE = 3
STEPS_PER_EPOCH_VALIDATION = 10
BATCH_SIZE_TRAINING = 100
BATCH_SIZE_VALIDATION = 100
resnet_weights_path = '../input/resnet50/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
train_data_dir = "C:\\Users\\Desktop\\RESNET"
model = ResNet50(include_top=True, weights='imagenet')
x = model.get_layer('avg_pool').output
predictions = Dense(1, activation='sigmoid')(x)
model = Model(input = model.input, output = predictions)
print(model.summary())
model.layers.pop()
model = Model(input=model.input,output=model.layers[-1].output)
sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='binary_crossentropy', optimizer=SGD(lr=0.01, momentum=0.9), metrics= ['binary_accuracy'])
data_dir = "C:\\Users\\Desktop\\RESNET"
batch_size = 32
from keras.applications.resnet50 import preprocess_input
from keras.preprocessing.image import ImageDataGenerator
image_size = IMAGE_RESIZE
data_generator = ImageDataGenerator(preprocessing_function=preprocess_input)
def append_ext(fn):
return fn+".jpg"
dir_path = os.path.dirname(os.path.realpath(__file__))
train_dir_path = dir_path + '\data'
onlyfiles = [f for f in listdir(dir_path) if isfile(join(dir_path, f))]
NUM_CLASSES = 2
data_labels = [0, 1]
t = []
maxi = 25145
LieOffset = 15799
i = 0
while i < maxi: # t = tuple
if i <= LieOffset:
t.append(label['Lie'])
else:
t.append(label['Truth'])
i = i+1
train_datagenerator = ImageDataGenerator(rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
validation_split=0.2)
train_generator = train_datagenerator.flow_from_directory(
train_data_dir,
target_size=(image_size, image_size),
batch_size=BATCH_SIZE_TRAINING,
class_mode='categorical', shuffle=False, subset='training') # set as training data
validation_generator = train_datagenerator.flow_from_directory(
train_data_dir, # same directory as training data
target_size=(image_size, image_size),
batch_size=BATCH_SIZE_TRAINING,
class_mode='categorical', shuffle=False, subset='validation')
(BATCH_SIZE_TRAINING, len(train_generator), BATCH_SIZE_VALIDATION, len(validation_generator))
from sklearn.grid_search import ParameterGrid
param_grid = {'epochs': [5, 10, 15], 'steps_per_epoch' : [10, 20, 50]}
grid = ParameterGrid(param_grid)
# Accumulate history of all permutations (may be for viewing trend) and keep watching for lowest val_loss as final model
for params in grid:
fit_history = model.fit_generator(
train_generator,
steps_per_epoch=STEPS_PER_EPOCH_TRAINING,
epochs = NUM_EPOCHS,
validation_data=validation_generator,
validation_steps=STEPS_PER_EPOCH_VALIDATION,
callbacks=[cb_checkpointer, cb_early_stopper]
)
model.load_weights("../working/best.hdf5")
Remove these lines and it'll work,
model.layers.pop()
model = Model(input=model.input,output=model.layers[-1].output)
The former is removing the last(Dense) layer, and the letter means create a model without the last(Flatten, as Dense is already popped) layer.
It doesn't make sense as your target data is (100, 2). Why do you put them there in the first place?
Also I think this line
predictions = Dense(1, activation='sigmoid')(x)
Will error, as your target data is 2 channel, if it does error then change this to
predictions = Dense(2, activation='sigmoid')(x)
Update
The output of avg_pool is 4 dimention, (batch_size, height, width, channel). You need to do the Flatten first or use GlobalAveragePooling2D instead of AveragePooling2D.
Like
x = model.get_layer('avg_pool').output
x = keras.layers.Flatten()(x)
predictions = Dense(1, activation='sigmoid')(x)
Or
model = ResNet50(include_top=False, pooling='avg', weights='imagenet') # `pooling='avg'` makes the `ResNet50` include a `GlobalAveragePoiling` layer and `include_top=False` means that you don't include the imagenet's output layer
x = model.output # as I use `include_top=False`, you don't need to care the layer name, just use the model's output right away
predictions = Dense(1, activation='sigmoid')(x)
Also just as #bit01 said, change class_mode='categorical' to class_mode='binary'.
Change class_mode='categorical' to class_mode='binary' in both train_generator and validation_generator.
Additionally, delete the following lines as you have already created model.
model.layers.pop()
model = Model(input=model.input,output=model.layers[-1].output)
So, your model would be like:
model = ResNet50(include_top=True, weights='imagenet')
x = model.get_layer('avg_pool').output
x = Flatten()(x)
predictions = Dense(1, activation='sigmoid')(x)
model = Model(input = model.input, output = predictions)

Why do my models keep getting exactly 0.5 AUC?

I am currently doing a project in which I need to predict eye disease in a group of images. I am using the Keras built-in applications. I am getting good results on VGG16 and VGG19, but on the Xception architecture I keep getting AUC of exactly 0.5 every epoch.
I have tried different optimizers and learning rates, but nothing works. I solved the same problem with VGG19 by switching from RMSProp optimizer to Adam optimizer, but I can't get it to work for Xception.
def buildModel():
from keras.models import Model
from keras.layers import Dense, Flatten
from keras.optimizers import adam
input_model = applications.xception.Xception(
include_top=False,
weights='imagenet',
input_tensor=None,
input_shape=input_sizes["xception"],
pooling=None,
classes=2)
base_model = input_model
x = base_model.output
x = Flatten()(x)
predictions = Dense(2, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)
for layer in base_model.layers:
layer.trainable = False
model.compile(optimizer=adam(lr=0.01), loss='binary_crossentropy', metrics=['accuracy'])
return model
class Histories(keras.callbacks.Callback):
def __init__(self, val_data):
super(Histories, self).__init__()
self.x_batch = []
self.y_batch = []
for i in range(len(val_data)):
x, y = val_data.__getitem__(i)
self.x_batch.extend(x)
self.y_batch.extend(np.ndarray.astype(y, int))
self.aucs = []
self.specificity = []
self.sensitivity = []
self.losses = []
return
def on_train_begin(self, logs={}):
initFile("results/xception_results_adam_3.txt")
return
def on_train_end(self, logs={}):
return
def on_epoch_begin(self, epoch, logs={}):
return
def on_epoch_end(self, epoch, logs={}):
self.losses.append(logs.get('loss'))
y_pred = self.model.predict(np.asarray(self.x_batch))
con_mat = confusion_matrix(np.asarray(self.y_batch).argmax(axis=-1), y_pred.argmax(axis=-1))
tn, fp, fn, tp = con_mat.ravel()
sens = tp/(tp+fn)
spec = tn/(tn+fp)
auc_score = roc_auc_score(np.asarray(self.y_batch).argmax(axis=-1), y_pred.argmax(axis=-1))
print("Specificity: %f Sensitivity: %f AUC: %f"%(spec, sens, auc_score))
print(con_mat)
self.sensitivity.append(sens)
self.specificity.append(spec)
self.aucs.append(auc_score)
writeToFile("results/xception_results_adam_3.txt", epoch, auc_score, spec, sens, self.losses[epoch])
return
# What follows is data from the Jupyter Notebook that I actually use to evaluate
#%% Initialize data
trainDirectory = 'RetinaMasks/train'
valDirectory = 'RetinaMasks/val'
testDirectory = 'RetinaMasks/test'
train_datagen = ImageDataGenerator(rescale=1. / 255)
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
trainDirectory,
target_size=(299, 299),
batch_size=16,
class_mode='categorical')
validation_generator = test_datagen.flow_from_directory(
valDirectory,
target_size=(299, 299),
batch_size=16,
class_mode='categorical')
test_generator = test_datagen.flow_from_directory(
testDirectory,
target_size=(299, 299),
batch_size=16,
class_mode='categorical')
#%% Create model
model = buildModel("xception")
#%% Initialize metrics
from keras.callbacks import EarlyStopping
from MetricsCallback import Histories
import keras
metrics = Histories(validation_generator)
es = EarlyStopping(monitor='val_loss',
min_delta=0,
patience=20,
verbose=0,
mode='auto',
baseline=None,
restore_best_weights=False)
mcp = keras.callbacks.ModelCheckpoint("saved_models/xception.adam.lr0.1_{epoch:02d}.hdf5",
monitor='val_loss',
verbose=0,
save_best_only=False,
save_weights_only=False,
mode='auto',
period=1)
#%% Train model
from StaticDataAugmenter import superDirectorySize
history = model.fit_generator(
train_generator,
steps_per_epoch=superDirectorySize(trainDirectory) // 16,
epochs=100,
validation_data=validation_generator,
validation_steps=superDirectorySize(valDirectory) // 16,
callbacks=[metrics, es, mcp],
workers=8,
shuffle=False
)
I honestly have no idea what causes this behavior, or how to prevent it. Thank you in advance, and I apologize for the long code snippet :)
Your learning rate is too high.
Try lowering the learning rate.
I used to run into this when using transfer learning, I was fine-tuning at very high learning rates.
An extended AUC of 0.5 over multiple epochs in case of a binary classification means that your (convolutional) neural network is not able to distinguish between the classes at all. This is in turn because it's not able to learn anything.
Use learning_rates of 0.0001,0.00001,0.000001.
At the same time, you should try to unfreeze/make some layers trainable, due to the fact that you entire feature extractor is frozen; in fact this could be another reason why the network is incapable of learning anything.
I am quite confident that your problem will be solved if you lower your learning rate :).
An AUC of 0.5 implies that your network is randomly guessing the output, which means it didn't learn anything. This was already disscued for example here.
As Timbus Calin suggested, you could do a "line search" of the learning rate starting with 0.000001 and then increase the learning rate by potencies of 10.
I would suggest you directly start with a random search, where you not only try to optimize the learning rate, but also other hyperparameters like for example the batch size. Read more about random search in this paper.
You are not computing the AUC correctly, you currently have this:
auc_score = roc_auc_score(np.asarray(self.y_batch).argmax(axis=-1), y_pred.argmax(axis=-1))
AUC is computed from (probability) scores produced by the model. The argmax of the model output does not provide scores, but class labels. The correct function call is:
auc_score = roc_auc_score(np.asarray(self.y_batch).argmax(axis=-1), y_pred[:, 1])
Note that the score needed to compute ROC is the probability of the positive class, which is the second element of the softmax output. This is why only the second column of the predictions is used to make the AUC.
What about this?
def buildModel():
from keras.models import Model
from keras.layers import Dense, Flatten
from keras.optimizers import adam
input_model = applications.xception.Xception(
include_top=False,
weights='imagenet',
input_tensor=None,
input_shape=input_sizes["xception"],
pooling='avg', # 1
classes=2)
base_model = input_model
x = base_model.output
# x = Flatten()(x) # 2
predictions = Dense(2, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)
for layer in base_model.layers:
layer.trainable = False
model.compile(optimizer=adam(lr=0.01),
loss='categorical_crossentropy', # 3
metrics=['accuracy'])
return model
class Histories(keras.callbacks.Callback):
def __init__(self, val_data):
super(Histories, self).__init__()
self.x_batch = []
self.y_batch = []
for i in range(len(val_data)):
x, y = val_data.__getitem__(i)
self.x_batch.extend(x)
self.y_batch.extend(np.ndarray.astype(y, int))
self.aucs = []
self.specificity = []
self.sensitivity = []
self.losses = []
return
def on_train_begin(self, logs={}):
initFile("results/xception_results_adam_3.txt")
return
def on_train_end(self, logs={}):
return
def on_epoch_begin(self, epoch, logs={}):
return
def on_epoch_end(self, epoch, logs={}):
self.losses.append(logs.get('loss'))
y_pred = self.model.predict(np.asarray(self.x_batch))
con_mat = confusion_matrix(np.asarray(self.y_batch).argmax(axis=-1), y_pred.argmax(axis=-1))
tn, fp, fn, tp = con_mat.ravel()
sens = tp/(tp+fn)
spec = tn/(tn+fp)
auc_score = roc_auc_score(np.asarray(self.y_batch).argmax(axis=-1), y_pred.argmax(axis=-1))
print("Specificity: %f Sensitivity: %f AUC: %f"%(spec, sens, auc_score))
print(con_mat)
self.sensitivity.append(sens)
self.specificity.append(spec)
self.aucs.append(auc_score)
writeToFile("results/xception_results_adam_3.txt", epoch, auc_score, spec, sens, self.losses[epoch])
return
# What follows is data from the Jupyter Notebook that I actually use to evaluate
#%% Initialize data
trainDirectory = 'RetinaMasks/train'
valDirectory = 'RetinaMasks/val'
testDirectory = 'RetinaMasks/test'
train_datagen = ImageDataGenerator(rescale=1. / 255)
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
trainDirectory,
target_size=(299, 299),
batch_size=16,
class_mode='categorical')
validation_generator = test_datagen.flow_from_directory(
valDirectory,
target_size=(299, 299),
batch_size=16,
class_mode='categorical')
test_generator = test_datagen.flow_from_directory(
testDirectory,
target_size=(299, 299),
batch_size=16,
class_mode='categorical')
#%% Create model
model = buildModel("xception")
#%% Initialize metrics
from keras.callbacks import EarlyStopping
from MetricsCallback import Histories
import keras
metrics = Histories(validation_generator)
es = EarlyStopping(monitor='val_loss',
min_delta=0,
patience=20,
verbose=0,
mode='auto',
baseline=None,
restore_best_weights=False)
mcp = keras.callbacks.ModelCheckpoint("saved_models/xception.adam.lr0.1_{epoch:02d}.hdf5",
monitor='val_loss',
verbose=0,
save_best_only=False,
save_weights_only=False,
mode='auto',
period=1)
#%% Load saved model
from keras.models import load_model
# model = load_model("saved_models/vgg16.10.hdf5") # 4
#%% Train model
from StaticDataAugmenter import superDirectorySize
history = model.fit_generator(
train_generator,
steps_per_epoch=superDirectorySize(trainDirectory) // 16,
epochs=100,
validation_data=validation_generator,
validation_steps=superDirectorySize(valDirectory) // 16,
callbacks=[metrics, es, mcp],
workers=8,
shuffle=False
)
For 1 and 2,I think it doesn't make sense to use FC layer right after ReLU without use a pooling layer, never try it so it might not help anything.
For 3, why are you using BCE when your generators are using class_mode='categorical'?
For 4, as I comment above, this mean you are loading your VGG model and train it, instead of using the Xception from buildModel().

Categories

Resources