Keras VGG model for MNIST: Disparity between training and validation accuracy - python

I have created the following model with Keras. The dataset is MNIST.
'''
conv - relu - conv- relu - pool -
conv - relu - conv- relu - pool -
conv - relu - conv- relu - pool -
affine - relu - dropout - affine - dropout - softmax
'''
model = Sequential()
model.add(Conv2D(16, kernel_size=(3, 3),
padding='same',
input_shape=input_shape))
model.add(Activation('relu'))
model.add(Conv2D(16, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, (3, 3), padding='same', activation='relu'))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(50, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Dropout(0.5))
model.add(Activation('softmax'))
The following is the result:
60000/60000 [==============================] - 10s - loss: 1.2707 - acc: 0.5059 - val_loss: 0.0881 - val_acc: 0.9785
Epoch 2/20
60000/60000 [==============================] - 9s - loss: 0.9694 - acc: 0.5787 - val_loss: 0.0449 - val_acc: 0.9873
...
Epoch 19/20
60000/60000 [==============================] - 9s - loss: 0.8530 - acc: 0.6004 - val_loss: 0.0282 - val_acc: 0.9937
Epoch 20/20
60000/60000 [==============================] - 9s - loss: 0.8564 - acc: 0.5982 - val_loss: 0.0383 - val_acc: 0.9910
Test loss: 0.0382921607383
Test accuracy: 0.991
Why is the training accuracy so low, while the validation accururacy is so high?

The dropout on your last Dense layer removes half of your 10 neurons for your classes by random. Your last layer can only by accurate half of the times because in general half of the neurons are missing.
Try to remove that and I assume you get even values.

Related

how to fix constant accuracy and val_accuracy

I attempted to fix it after seeing this [post][1] by adding dropout, but it didn't work. And I'm still getting consistent accuracy, so any assistance would be greatly appreciated.
import os
os.environ['KAGGLE_CONFIG_DIR'] = "/content"
!kaggle datasets download -d jakeshbohaju/brain-tumor
!unzip \*.zip -d brain_tumor_dataset
!rm -rf yes
!rm -rf no
!rm -rf *.zip
# Commented out IPython magic to ensure Python compatibility.
import pandas as pd
import numpy as np
import os
import tensorflow as tf
import cv2
from tensorflow import keras
from tensorflow.keras import layers, Input
from keras.layers import InputLayer, MaxPooling2D, Flatten, Dense, Conv2D, Dropout, BatchNormalization
from keras.losses import BinaryCrossentropy
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions, ResNet50
from tensorflow.keras.optimizers import Adam, SGD
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from PIL.Image import open
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
import random
# %matplotlib inline
# Constants
IMAGE_DATASET = "/content/brain_tumor_dataset/Brain Tumor/Brain Tumor"
IMAGE_DATASET_RAW = '/content/brain_tumor_dataset/Brain Tumor/Brain Tumor'
WORKING_FOLDER = "/content/brain_tumor_dataset/working"
IMG_HEIGHT = 224
IMG_WIDTH = 224
EPOCHS = 100
# # Image3202
plt.figure(figsize=(20,20))
test_folder="/content/brain_tumor_dataset/Brain Tumor/Brain Tumor/Image100.jpg"
img=mpimg.imread(test_folder)
print(img.size)
ax=plt.subplot(1,5,4)
# # ax.title.set_text(file)
plt.imshow(img)
# We will import the csv file containing the features and the classes of the images
cortex_df = pd.read_csv("/content/brain_tumor_dataset/Brain Tumor.csv")
cortex_df.head()
plt.figure(figsize=(20,20))
test_folder="/content/brain_tumor_dataset/Brain Tumor/Brain Tumor"
for i in range(5):
file = random.choice(os.listdir(test_folder))
image_path= os.path.join(test_folder, file)
img=mpimg.imread(image_path)
ax=plt.subplot(1,5,i+1)
ax.title.set_text(file)
plt.imshow(img)
dataset_df = pd.DataFrame()
dataset_df["Image"] = cortex_df["Image"]
dataset_df["Class"] = cortex_df["Class"]
path_list = []
for img_path in os.listdir(IMAGE_DATASET):
path_list.append( os.path.join(IMAGE_DATASET,img_path))
path_dict = {os.path.splitext(os.path.basename(x))[0]: x for x in path_list}
dataset_df["paths"] = cortex_df["Image"].map(path_dict.get)
dataset_df["pixels"] = dataset_df["paths"].map(lambda x:np.asarray(open(x).resize((IMG_HEIGHT,IMG_WIDTH))))
dataset_df.head()
image_list = []
for i in range(len(dataset_df)):
brain_image = dataset_df["pixels"][i].astype(np.float32)
brain_image /= 255
image_list.append(brain_image)
X = np.array(image_list)
print(X.shape)
y = np.array(dataset_df.Class)
#y.shape
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
print('The shape of the X_train :'+' '+str(X_train.shape))
print('The size of the X_train :'+' '+str(X_train.shape[0]))
print('The shape of the X_test :'+' '+str(X_test.shape))
print('The size of the X_test:'+' '+str(X_test.shape[0]))
def model(input_shape):
# res_conv = ResNet50(include_top=False, weights="imagenet", input_tensor=None, input_shape=input_shape, pooling=None)
model = Sequential()
model.add(Input(shape=input_shape))
model.add(Conv2D(16, kernel_size=3, strides=(2, 2), padding="same", activation="relu", kernel_initializer="he_normal"))
model.add(Conv2D(16, kernel_size=3, strides=(2, 2), padding="same", activation="relu", kernel_initializer="he_normal"))
model.add(Dropout(0.25))
model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_last", padding='same'))
model.add(Conv2D(32, kernel_size=3, strides=(2, 2), padding="same", activation="relu", kernel_initializer="he_normal"))
model.add(Conv2D(32, kernel_size=3, strides=(2, 2), padding="same", activation="relu", kernel_initializer="he_normal"))
model.add(Dropout(0.25))
model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_last", padding='same'))
model.add(Conv2D(64, kernel_size=3, strides=(2, 2), padding="same", activation="relu", kernel_initializer="he_normal"))
model.add(Conv2D(64, kernel_size=3, strides=(2, 2), padding="same", activation="relu", kernel_initializer="he_normal"))
model.add(Conv2D(64, kernel_size=3, strides=(2, 2), padding="same", activation="relu", kernel_initializer="he_normal"))
model.add(Conv2D(64, kernel_size=3, strides=(2, 2), padding="same", activation="relu", kernel_initializer="he_normal"))
model.add(Dropout(0.25))
model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_last", padding='same'))
model.add(Conv2D(128, kernel_size=3, strides=(2, 2), padding="same", activation="relu", kernel_initializer="he_normal"))
model.add(Conv2D(128, kernel_size=3, strides=(2, 2), padding="same", activation="relu", kernel_initializer="he_normal"))
model.add(Conv2D(128, kernel_size=3, strides=(2, 2), padding="same", activation="relu", kernel_initializer="he_normal"))
model.add(Conv2D(128, kernel_size=3, strides=(2, 2), padding="same", activation="relu", kernel_initializer="he_normal"))
model.add(Dropout(0.25))
model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_last", padding='same'))
model.add(Conv2D(256, kernel_size=3, strides=(2, 2), padding="same", activation="relu", kernel_initializer="he_normal"))
model.add(Conv2D(256, kernel_size=3, strides=(2, 2), padding="same", activation="relu", kernel_initializer="he_normal"))
model.add(Conv2D(256, kernel_size=3, strides=(2, 2), padding="same", activation="relu", kernel_initializer="he_normal"))
model.add(Conv2D(256, kernel_size=3, strides=(2, 2), padding="same", activation="relu", kernel_initializer="he_normal"))
model.add(Dropout(0.25))
model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_last", padding='same'))
model.add(Flatten())
model.add(Dense(256, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(128, activation="relu"))
model.add(Dropout(0.4))
model.add(Dense(1, activation="sigmoid")) # Never use sigmoid for binary classification
return model
model = model(input_shape = (IMG_HEIGHT, IMG_WIDTH, 3))
model.summary()
# optimizer = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False, name="Adam",)
optimizer = SGD(learning_rate=0.01)
loss_fn = BinaryCrossentropy(from_logits=True)
model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])
# Training the model
history = model.fit(x=X_train, y=y_train, epochs=EPOCHS, batch_size=10)
loss = history.history["loss"]
acc = history.history["accuracy"]
epoch = np.arange(EPOCHS)
plt.plot(epoch, loss)
# plt.plot(epoch, val_loss)
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training Loss')
plt.legend(['train', 'val'])
epoch = np.arange(EPOCHS)
plt.plot(epoch, acc)
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training Accuracy');
eval_score = model.evaluate(X_test, y_test)
print("Test loss:", eval_score[0])
print("Test accuracy:", eval_score[1])
some of Outputs
Epoch 70/100
301/301 [==============================] - 4s 13ms/step - loss: 0.6864 - accuracy: 0.5577
Epoch 71/100
301/301 [==============================] - 4s 13ms/step - loss: 0.6867 - accuracy: 0.5577
Epoch 72/100
301/301 [==============================] - 4s 12ms/step - loss: 0.6866 - accuracy: 0.5577
Epoch 73/100
301/301 [==============================] - 4s 12ms/step - loss: 0.6866 - accuracy: 0.5577
Epoch 74/100
301/301 [==============================] - 4s 12ms/step - loss: 0.6867 - accuracy: 0.5577
Epoch 75/100
301/301 [==============================] - 4s 12ms/step - loss: 0.6868 - accuracy: 0.5577
Epoch 76/100
301/301 [==============================] - 4s 12ms/step - loss: 0.6869 - accuracy: 0.5577
Epoch 77/100
301/301 [==============================] - 4s 12ms/step - loss: 0.6867 - accuracy: 0.5577
Epoch 78/100
301/301 [==============================] - 4s 12ms/step - loss: 0.6866 - accuracy: 0.5577
Epoch 79/100
301/301 [==============================] - 4s 12ms/step - loss: 0.6867 - accuracy: 0.5577
Epoch 80/100
301/301 [==============================] - 4s 12ms/step - loss: 0.6864 - accuracy: 0.5577
Epoch 81/100
301/301 [==============================] - 4s 12ms/step - loss: 0.6866 - accuracy: 0.5577
Epoch 82/100
301/301 [==============================] - 4s 12ms/step - loss: 0.6866 - accuracy: 0.5577
Epoch 83/100
301/301 [==============================] - 4s 13ms/step - loss: 0.6867 - accuracy: 0.5577
Epoch 84/100
301/301 [==============================] - 4s 13ms/step - loss: 0.6867 - accuracy: 0.557
I tried a few other techniques, such as adding Epoch or dropout, but the accuracy remained the same?
[1]: Keras model gets constant loss and accuracy
One problem in your case is that you are using from_logits = True simultaneously with the sigmoid activation function.
The logit is the unnormalised prediction of a model; in other words, it is the prediction of the network before one applies sigmoid or softmax.
The default parameters are the following:
tf.keras.losses.BinaryCrossentropy(
from_logits=False, label_smoothing=0, reduction=losses_utils.ReductionV2.AUTO,
name='binary_crossentropy'
)
If you use from_logits=True, then you have to change this line here: model.add(Dense(1), which in fact translates to linear activation.
Then your network will start learning.
Another suggestion would be to decrease the learning_rate, to an initial one of 0.0001.

High training accuracy, low validation accuracy CNN binary classification keras

I'm trying to create a binary classifier that can differentiate between MRIs of alzheimer's patients and healthy individuals.
These are the stats so far:
1032 training images
400 validation images
Running a simple model as shown below
I have both the raw 160x160 images as well as the images after edge detection
Model:
model = Sequential([
Conv2D(filters=16, kernel_size=(5, 5), activation='relu', padding = 'same', input_shape=(160,160,3)),
MaxPool2D(pool_size=(2, 2), strides=2),
Flatten(),
Dense(units=2, activation='softmax')
])
As you can see - it's very simple, something I've done purposefully to try and remedy the issue of overfitting.
Output:
11/11 [==============================] - 2s 194ms/step - loss: 0.7604 - accuracy: 0.5155 - val_loss: 0.7081 - val_accuracy: 0.5000
Epoch 2/20
11/11 [==============================] - 2s 185ms/step - loss: 0.6885 - accuracy: 0.5223 - val_loss: 0.6942 - val_accuracy: 0.4839
Epoch 3/20
11/11 [==============================] - 2s 185ms/step - loss: 0.6802 - accuracy: 0.5854 - val_loss: 0.6985 - val_accuracy: 0.4931
Epoch 4/20
11/11 [==============================] - 2s 185ms/step - loss: 0.6717 - accuracy: 0.5932 - val_loss: 0.6996 - val_accuracy: 0.4677
Epoch 5/20
11/11 [==============================] - 2s 195ms/step - loss: 0.6512 - accuracy: 0.6175 - val_loss: 0.7124 - val_accuracy: 0.5115
Epoch 6/20
11/11 [==============================] - 2s 185ms/step - loss: 0.6345 - accuracy: 0.6476 - val_loss: 0.7073 - val_accuracy: 0.5253
Epoch 7/20
11/11 [==============================] - 2s 185ms/step - loss: 0.6118 - accuracy: 0.6680 - val_loss: 0.6920 - val_accuracy: 0.5207
Epoch 8/20
11/11 [==============================] - 2s 185ms/step - loss: 0.5817 - accuracy: 0.7068 - val_loss: 0.6964 - val_accuracy: 0.5207
Epoch 9/20
11/11 [==============================] - 2s 184ms/step - loss: 0.5528 - accuracy: 0.7272 - val_loss: 0.7123 - val_accuracy: 0.5161
Epoch 10/20
11/11 [==============================] - 2s 193ms/step - loss: 0.5239 - accuracy: 0.7417 - val_loss: 0.7397 - val_accuracy: 0.5392
Epoch 11/20
11/11 [==============================] - 2s 186ms/step - loss: 0.5106 - accuracy: 0.7427 - val_loss: 0.7551 - val_accuracy: 0.5461
Epoch 12/20
11/11 [==============================] - 2s 197ms/step - loss: 0.4920 - accuracy: 0.7650 - val_loss: 0.7402 - val_accuracy: 0.5438
Epoch 13/20
11/11 [==============================] - 2s 190ms/step - loss: 0.4741 - accuracy: 0.7835 - val_loss: 0.7564 - val_accuracy: 0.5507
Epoch 14/20
11/11 [==============================] - 2s 188ms/step - loss: 0.4591 - accuracy: 0.7767 - val_loss: 0.7445 - val_accuracy: 0.5300
Epoch 15/20
11/11 [==============================] - 2s 185ms/step - loss: 0.4486 - accuracy: 0.7767 - val_loss: 0.7712 - val_accuracy: 0.5415
Epoch 16/20
11/11 [==============================] - 2s 185ms/step - loss: 0.4503 - accuracy: 0.7806 - val_loss: 0.7446 - val_accuracy: 0.5346
Epoch 17/20
11/11 [==============================] - 2s 188ms/step - loss: 0.4404 - accuracy: 0.7670 - val_loss: 0.7669 - val_accuracy: 0.5553
Epoch 18/20
11/11 [==============================] - 2s 184ms/step - loss: 0.4169 - accuracy: 0.8078 - val_loss: 0.7804 - val_accuracy: 0.5576
Epoch 19/20
11/11 [==============================] - 2s 184ms/step - loss: 0.3987 - accuracy: 0.7971 - val_loss: 0.7846 - val_accuracy: 0.5507
Epoch 20/20
11/11 [==============================] - 2s 192ms/step - loss: 0.3977 - accuracy: 0.7981 - val_loss: 0.8060 - val_accuracy: 0.5461
Things I've tried so far:
resizing the image to a smaller input
adding dropout layers
using preprocessed images where it's just the edges shown
ensuring both classes in both training and validation datasets are evenly distributed
changing learning rate
reducing number of parameters to be of the same magnitude of the number of training images i have
I am literally out of ideas, I'm not sure how to move forward with this so I would appreciate any tips or advice.
All my code:
# Use ImageDataGenerator to create 3 lots of batches
train_batches = ImageDataGenerator(
rescale=1/255).flow_from_directory(directory=train_path,
target_size=(80,80), classes=['cn', 'ad'], batch_size=100,
color_mode="rgb")
valid_batches = ImageDataGenerator(
rescale=1/255).flow_from_directory(directory=valid_path,
target_size=(80,80), classes=['cn', 'ad'], batch_size=100,
color_mode="rgb")
# test_batches = ImageDataGenerator(
# rescale=1/255).flow_from_directory(directory=test_path,
# target_size=(224,224), classes=['cn', 'ad'], batch_size=10,
# color_mode="rgb")
imgs, labels = next(train_batches)
# Test to see normalisation has occurred properly
print(imgs[1][8])
# Define method to plot MRIs
def plotImages(images_arr):
fig, axes = plt.subplots(1, 10, figsize=(20,20))
axes = axes.flatten()
for img, ax in zip( images_arr, axes):
ax.imshow(img)
ax.axis('off')
plt.tight_layout()
plt.show()
# Plot a sample of MRIs
plotImages(imgs)
# # Define the model
# # VGG16
# model = Sequential()
# model.add(Conv2D(input_shape=(160,160,3),filters=64,kernel_size=(3,3),padding="same", activation="relu"))
# model.add(Conv2D(filters=64,kernel_size=(3,3),padding="same", activation="relu"))
# model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
# model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
# model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
# model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
# model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
# model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
# model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
# model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
# model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
# model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
# model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
# model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
# model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
# model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
# model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
# model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
# model.add(Flatten())
# model.add(Dense(units=1024,activation="relu"))
# model.add(Dense(units=128,activation="relu"))
# model.add(Dense(units=2, activation="softmax"))
# # Model from the paper
# model = Sequential([
# Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding = 'same', input_shape=(160,160,3)),
# Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same'),
# MaxPool2D(pool_size=(2, 2), strides=2),
# Flatten(),
# Dense(units=2, activation='softmax')
# ])
## Model from Dr Paul
# static_conv_layer=Conv2D(filters=16, kernel_size=(5, 5), activation='relu', padding = 'same')
#
# model = Sequential([
# Conv2D(filters=16, kernel_size=(5, 5), activation='relu', padding = 'same', input_shape=(32,32,3)),
# MaxPool2D(pool_size=(2, 2), strides=2),
# Dropout(0.1),
# static_conv_layer,
# MaxPool2D(pool_size=(2, 2), strides=2),
# Dropout(0.1),
# Flatten(),
# Dense(units=2, activation='softmax')
# ])
# This model hits around 75% train acc, 54% val acc
model = Sequential([
Conv2D(filters=16, kernel_size=(5, 5), activation='relu', padding = 'same', input_shape=(80,80,3)),
MaxPool2D(pool_size=(2, 2), strides=2),
# Dropout(0.1),
# Conv2D(filters=16, kernel_size=(3, 3), activation='relu', padding='same'),
# MaxPool2D(pool_size=(2, 2), strides=2),
# Conv2D(filters=16, kernel_size=(3, 3), activation='relu', padding='same'),
# MaxPool2D(pool_size=(2, 2), strides=2),
Flatten(),
Dense(units=2, activation='softmax')
])
# model = Sequential([
# Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding = 'same', input_shape=(160,160,3)),
# Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same'),
# MaxPool2D(pool_size=(2, 2), strides=2),
# Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same'),
# Flatten(),
# Dense(units=2, activation='softmax')
# ])
## Basic model with dropouts
# model = Sequential([
# Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding = 'same', input_shape=(224,224,3)),
# MaxPool2D(pool_size=(2, 2), strides=2),
# Dropout(0.1),
# Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'),
# MaxPool2D(pool_size=(2, 2), strides=2),
# Dropout(0.2),
# Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'),
# MaxPool2D(pool_size=(2, 2), strides=2),
# Dropout(0.3),
# Flatten(),
# Dense(units=1, activation='sigmoid')
# ])
# Summarise each layer of the model
print(model.summary())
# Compile and train the model
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
model.fit(x=train_batches,
steps_per_epoch=len(train_batches),
validation_data=valid_batches,
validation_steps=len(valid_batches),
epochs=20,
verbose=1
)
EDIT:
This paper seems to be doing much better than me and completing a very similar task, it may be useful to look at the methodology for:
Things you can try.
It's a nice way to start with transfer learning. Using image net weights helps you to train just last layers and give much better accuracy.
Adding early stopping and learning rate reduction with validation accuracy as constraint.
Taking advantage of ImageDataGenerator and add much more data augmentation techniques.
Make your model much deeper and also try different optimizer(RMSprop), run for more epochs with early stopping.
Add callbacks and plot training validation accuracy graphs with respective to learning rate to see which lr proves best for the data.
It looks like your model is overfitting due to a lack of data. You can do some data augmentation to increase how many images you have. If you don't care about your aspect ratio you can warp the images, if you don't always need the full image you can crop it and you can rotate it if orientation is not important. These things can dramatically increase your dataset size and help mitigate overfitting.
Here is an example from the tensorflow documentation:
batch_size = 32
AUTOTUNE = tf.data.experimental.AUTOTUNE
def prepare(ds, shuffle=False, augment=False):
# Resize and rescale all datasets
ds = ds.map(lambda x, y: (resize_and_rescale(x), y),
num_parallel_calls=AUTOTUNE)
if shuffle:
ds = ds.shuffle(1000)
# Batch all datasets
ds = ds.batch(batch_size)
# Use data augmentation only on the training set
if augment:
ds = ds.map(lambda x, y: (data_augmentation(x, training=True), y),
num_parallel_calls=AUTOTUNE)
# Use buffered prefecting on all datasets
return ds.prefetch(buffer_size=AUTOTUNE)
Also, here is a great video to watch from the TensorFlow developers youtube channel which explains the idea of image augmentation and shows an example of how to implement it.

how to prevent overfitting

i'm doing for my final project and i'm new in ConVnets. i want to classifies which one is genuine image and spoof image. i have +-8000 data (combine). and i want to show you some of my training log.
Epoch 7/100
311/311 [==============================] - 20s 63ms/step - loss: 0.3274 - accuracy: 0.8675 - val_loss: 0.2481 - val_accuracy: 0.9002
Epoch 8/100
311/311 [==============================] - 20s 63ms/step - loss: 0.3189 - accuracy: 0.8691 - val_loss: 0.3015 - val_accuracy: 0.8684
Epoch 9/100
311/311 [==============================] - 19s 62ms/step - loss: 0.3201 - accuracy: 0.8667 - val_loss: 0.2460 - val_accuracy: 0.9036
Epoch 10/100
311/311 [==============================] - 19s 62ms/step - loss: 0.3063 - accuracy: 0.8723 - val_loss: 0.2752 - val_accuracy: 0.8901
Epoch 11/100
311/311 [==============================] - 19s 62ms/step - loss: 0.3086 - accuracy: 0.8749 - val_loss: 0.2717 - val_accuracy: 0.8988
[INFO] evaluating network...
model = Sequential()
inputShape = (height, width, depth)
chanDim = -1
if K.image_data_format() == "channels_first":
inputShape = (depth, height, width)
chanDim = 1
model.add(Conv2D(16, (3, 3), padding="same", input_shape=inputShape)) model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(16, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.3))
model.add(Conv2D(32, (5, 5), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(128))
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(Dropout(0.6))
model.add(Dense(classes))
model.add(Activation("softmax"))
the input is 32x32 and it has two classes. i used EarlyStopping in keras to prevent overfitting. and i always change the value of learning rate and try to change the number of neuron node but still the training always stop below 20 epoch. any advice to prevent overfitting ? since i'm beginner in convolutional neural network. Thanks in advance !
PS LR: 0.001 BS: 20 EPOCHS: 100

keras will only give accuracy of 0.5001

I'm running Keras with a tensorflow-backend.
I try to predict images.
My model looks like this:
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(50, 50, 1)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(1, activation='softmax'))
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
Running my code, it produces this output over 10 epochs:
Epoch 1/10
24946/24946 [==============================] - 36s 1ms/sample - loss: 7.9693 - acc: 0.5001
Epoch 2/10
24946/24946 [==============================] - 35s 1ms/sample - loss: 7.9693 - acc: 0.5001
...
Epoch 9/10
24946/24946 [==============================] - 30s 1ms/sample - loss: 7.9693 - acc: 0.5001
Epoch 10/10
24946/24946 [==============================] - 30s 1ms/sample - loss: 7.9693 - acc: 0.5001
1/1 [==============================] - 0s 36ms/step
[[1.]]
Anyhow, I do not understand why the accuracy is always 0.5001 over all 10 epochs.
My question is: Why does the accuracy not change within any epoch?
This part of your code makes no sense:
model.add(Dense(1, activation='softmax'))
Softmax with only one neuron with always produce output of constant 1.0, due to the normalization. If you want to do binary classification, you should use a sigmoid activation at the output.

Keras fit() doesn't retrain the model on consecutive calls

After playing around with Keras, I realized that somehow models.fit() doesn't retrain the parameters after calling it again.
Below is my toy example. I called models.fit() 6 times, and at the fourth epoch I train it with a completely new dataset. What should happen is that the model should change at the fourth iteration so the fifth iteration should produce different scores as the third iteration. However, this is not what's happening.
model = Sequential()
model.add(Convolution2D(32, 3, 3, border_mode='same', input_shape=(1, 199, 40)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(2))
model.add(Activation('softmax'))
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
nb_epoch = 6
# I know you can add nb_epoch into the fit function, but please ignore that for now
for e in range(nb_epoch):
if e ==3:
# For the third epoch only, let's train the model
# on a completely new dataset
model.fit(X_train1, y_train1, nb_epoch=1, batch_size=batch_size)
else:
model.fit(X_train2, y_train2, nb_epoch=1, batch_size=batch_size)
Results:
546/546 [==============================] - 11s - loss: 4.0249 - acc: 0.6996
Epoch 1/1
546/546 [==============================] - 11s - loss: 4.0443 - acc: 0.7491
Epoch 1/1
546/546 [==============================] - 11s - loss: 4.0443 - acc: 0.7491
Epoch 1/1
365/365 [==============================] - 7s - loss: 3.7977 - acc: 0.7644
Epoch 1/1
546/546 [==============================] - 11s - loss: 4.0443 - acc: 0.7491
Epoch 1/1
546/546 [==============================] - 11s - loss: 4.0443 - acc: 0.7491
It seems to be like calling model.fit after the second iteration has no effect at all on the model. Even if new data is given.
Any ideas on why this is happening? I also tried train_on_batch and it produces the same results.

Categories

Resources