tf.keras model very low accuracy and zero loss - python

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
tokenizer = Tokenizer(num_words = 5408)
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index
vocab_size = len(tokenizer.word_index)
training_sequences = tokenizer.texts_to_sequences(training_sentences)
training_padded = pad_sequences(training_sequences, padding='post', truncating='post', maxlen = 30)
testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences, padding='post', truncating='post', maxlen = 30)
model = tf.keras.Sequential([
tf.keras.layers.Embedding(vocab_size, 1400),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.GlobalAveragePooling1D(),
tf.keras.layers.Dense(34, activation='softmax'),
tf.keras.layers.Dense(50, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='categorical_crossentropy', optimizer='rmsprop',metrics=['accuracy'])
history = model.fit(training_padded, training_labels, epochs = 5,
validation_data=(testing_padded, testing_labels), verbose=2)
Epoch 1/5
44/44 - 4s - loss: 0.0000e+00 - accuracy: 0.0579 - val_loss: 0.0000e+00 - val_accuracy: 0.0379 - 4s/epoch - 97ms/step
Epoch 2/5
44/44 - 3s - loss: 0.0000e+00 - accuracy: 0.0579 - val_loss: 0.0000e+00 - val_accuracy: 0.0379 - 3s/epoch - 77ms/step
Epoch 3/5
44/44 - 3s - loss: 0.0000e+00 - accuracy: 0.0579 - val_loss: 0.0000e+00 - val_accuracy: 0.0379 - 3s/epoch - 69ms/step
Epoch 4/5
44/44 - 3s - loss: 0.0000e+00 - accuracy: 0.0579 - val_loss: 0.0000e+00 - val_accuracy: 0.0379 - 3s/epoch - 69ms/step
Epoch 5/5
44/44 - 3s - loss: 0.0000e+00 - accuracy: 0.0579 - val_loss: 0.0000e+00 - val_accuracy: 0.0379 - 3s/epoch - 75ms/step
My dataset consists of texts with 17 classes. I have preprocessed it by doing stop word removal, punctuation removal, lowercasing. Is the extremely low accuracy due to a problem in the code?

Related

Very low validation accuracy but high training accuracy

I copied some sample code straight from Keras official website and edited it to make a machine learning model.
I am using Google Colab for my code.
Link: https://keras.io/examples/vision/image_classification_from_scratch/
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from google.colab import drive
drive.mount("/content/gdrive")
image_size = (50, 50)
batch_size = 400
import random
num = random.randint(1, 400)
#random seed
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
"/content/gdrive/My Drive/pest/train",
seed=num,
image_size=image_size,
batch_size=batch_size,
)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
"/content/gdrive/My Drive/pest/test",
seed=num,
image_size=image_size,
batch_size=batch_size,
)
#tried data augmentation
data_augmentation = keras.Sequential(
[
layers.RandomFlip("horizontal"),
layers.RandomRotation(0.1),
]
)
def make_model(input_shape, num_classes):
inputs = keras.Input(shape=input_shape)
# Image augmentation block
x = data_augmentation(inputs)
# Entry block
x = layers.Rescaling(1.0 / 255)(x)
x = layers.Conv2D(32, 3, strides=2, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.Conv2D(64, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
previous_block_activation = x # Set aside residual
for size in [128, 256, 512, 728]:
x = layers.Activation("relu")(x)
x = layers.SeparableConv2D(size, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.SeparableConv2D(size, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D(3, strides=2, padding="same")(x)
# Project residual
residual = layers.Conv2D(size, 1, strides=2, padding="same")(
previous_block_activation
)
x = layers.add([x, residual]) # Add back residual
previous_block_activation = x # Set aside next residual
x = layers.SeparableConv2D(1024, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.GlobalAveragePooling2D()(x)
if num_classes == 2:
activation = "sigmoid"
units = 1
else:
activation = "softmax"
units = num_classes
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(units, activation=activation)(x)
return keras.Model(inputs, outputs)
model = make_model(input_shape=image_size + (3,), num_classes=2)
keras.utils.plot_model(model, show_shapes=True)
epochs = 50
callbacks = [
keras.callbacks.ModelCheckpoint("save_at_{epoch}.h5"),
]
model.compile(
optimizer=keras.optimizers.Adam(1e-3),
loss="binary_crossentropy",
metrics=["accuracy"],
)
model.fit(
train_ds, epochs=epochs, callbacks=callbacks, validation_data=val_ds,
#should have automatic shuffling I think
)
However, when I run it, the result is
Epoch 1/50
2/2 [==============================] - 71s 14s/step - loss: 0.6260 - accuracy: 0.6050 - val_loss: 0.6931 - val_accuracy: 0.5000
Epoch 2/50
2/2 [==============================] - 2s 507ms/step - loss: 0.2689 - accuracy: 0.8867 - val_loss: 0.6932 - val_accuracy: 0.5000
Epoch 3/50
2/2 [==============================] - 2s 536ms/step - loss: 0.1241 - accuracy: 0.9483 - val_loss: 0.6932 - val_accuracy: 0.5000
Epoch 4/50
2/2 [==============================] - 2s 506ms/step - loss: 0.0697 - accuracy: 0.9750 - val_loss: 0.6934 - val_accuracy: 0.5000
Epoch 5/50
2/2 [==============================] - 2s 525ms/step - loss: 0.0479 - accuracy: 0.9867 - val_loss: 0.6936 - val_accuracy: 0.5000
Epoch 6/50
2/2 [==============================] - 2s 534ms/step - loss: 0.0359 - accuracy: 0.9867 - val_loss: 0.6940 - val_accuracy: 0.5000
Epoch 7/50
2/2 [==============================] - 2s 509ms/step - loss: 0.0145 - accuracy: 0.9983 - val_loss: 0.6946 - val_accuracy: 0.5000
Epoch 8/50
2/2 [==============================] - 2s 545ms/step - loss: 0.0124 - accuracy: 0.9967 - val_loss: 0.6954 - val_accuracy: 0.5000
Epoch 9/50
2/2 [==============================] - 2s 544ms/step - loss: 0.0092 - accuracy: 0.9967 - val_loss: 0.6964 - val_accuracy: 0.5000
Epoch 10/50
2/2 [==============================] - 2s 512ms/step - loss: 0.0060 - accuracy: 0.9967 - val_loss: 0.6980 - val_accuracy: 0.5000
Epoch 11/50
2/2 [==============================] - 2s 535ms/step - loss: 0.0036 - accuracy: 0.9983 - val_loss: 0.6998 - val_accuracy: 0.5000
Epoch 12/50
2/2 [==============================] - 2s 503ms/step - loss: 0.0085 - accuracy: 0.9983 - val_loss: 0.7020 - val_accuracy: 0.5000
Epoch 13/50
2/2 [==============================] - 2s 665ms/step - loss: 0.0040 - accuracy: 1.0000 - val_loss: 0.7046 - val_accuracy: 0.5000
Epoch 14/50
2/2 [==============================] - 2s 516ms/step - loss: 0.0017 - accuracy: 1.0000 - val_loss: 0.7078 - val_accuracy: 0.5000
Epoch 15/50
2/2 [==============================] - 2s 520ms/step - loss: 0.0023 - accuracy: 0.9983 - val_loss: 0.7115 - val_accuracy: 0.5000
Epoch 16/50
2/2 [==============================] - 2s 500ms/step - loss: 8.5606e-04 - accuracy: 1.0000 - val_loss: 0.7157 - val_accuracy: 0.5000
Epoch 17/50
2/2 [==============================] - 2s 524ms/step - loss: 0.0018 - accuracy: 1.0000 - val_loss: 0.7205 - val_accuracy: 0.5000
Epoch 18/50
2/2 [==============================] - 2s 499ms/step - loss: 9.0626e-04 - accuracy: 1.0000 - val_loss: 0.7258 - val_accuracy: 0.5000
Epoch 19/50
2/2 [==============================] - 2s 510ms/step - loss: 0.0014 - accuracy: 1.0000 - val_loss: 0.7313 - val_accuracy: 0.5000
Epoch 20/50
2/2 [==============================] - 2s 711ms/step - loss: 0.0013 - accuracy: 1.0000 - val_loss: 0.7371 - val_accuracy: 0.5000
Epoch 21/50
2/2 [==============================] - 2s 511ms/step - loss: 9.9904e-04 - accuracy: 1.0000 - val_loss: 0.7431 - val_accuracy: 0.5000
Epoch 22/50
2/2 [==============================] - 2s 540ms/step - loss: 0.0019 - accuracy: 1.0000 - val_loss: 0.7489 - val_accuracy: 0.5000
Epoch 23/50
2/2 [==============================] - 2s 513ms/step - loss: 4.9861e-04 - accuracy: 1.0000 - val_loss: 0.7553 - val_accuracy: 0.5000
Epoch 24/50
2/2 [==============================] - 2s 542ms/step - loss: 6.6248e-04 - accuracy: 1.0000 - val_loss: 0.7622 - val_accuracy: 0.5000
Epoch 25/50
2/2 [==============================] - 2s 510ms/step - loss: 7.7911e-04 - accuracy: 1.0000 - val_loss: 0.7699 - val_accuracy: 0.5000
Epoch 26/50
2/2 [==============================] - 2s 502ms/step - loss: 3.3703e-04 - accuracy: 1.0000 - val_loss: 0.7781 - val_accuracy: 0.5000
Epoch 27/50
2/2 [==============================] - 2s 539ms/step - loss: 3.7860e-04 - accuracy: 1.0000 - val_loss: 0.7870 - val_accuracy: 0.5000
Epoch 28/50
2/2 [==============================] - 2s 507ms/step - loss: 2.4852e-04 - accuracy: 1.0000 - val_loss: 0.7962 - val_accuracy: 0.5000
Epoch 29/50
2/2 [==============================] - 2s 512ms/step - loss: 1.7709e-04 - accuracy: 1.0000 - val_loss: 0.8058 - val_accuracy: 0.5000
Epoch 30/50
2/2 [==============================] - 2s 538ms/step - loss: 1.6884e-04 - accuracy: 1.0000 - val_loss: 0.8161 - val_accuracy: 0.5000
Epoch 31/50
2/2 [==============================] - 2s 521ms/step - loss: 2.0884e-04 - accuracy: 1.0000 - val_loss: 0.8266 - val_accuracy: 0.5000
Epoch 32/50
2/2 [==============================] - 2s 543ms/step - loss: 1.8691e-04 - accuracy: 1.0000 - val_loss: 0.8375 - val_accuracy: 0.5000
Epoch 33/50
2/2 [==============================] - 2s 520ms/step - loss: 1.7296e-04 - accuracy: 1.0000 - val_loss: 0.8487 - val_accuracy: 0.5000
Epoch 34/50
2/2 [==============================] - 2s 516ms/step - loss: 4.5739e-04 - accuracy: 1.0000 - val_loss: 0.8601 - val_accuracy: 0.5000
Epoch 35/50
2/2 [==============================] - 2s 530ms/step - loss: 9.6831e-05 - accuracy: 1.0000 - val_loss: 0.8720 - val_accuracy: 0.5000
Epoch 36/50
2/2 [==============================] - 2s 553ms/step - loss: 1.2694e-04 - accuracy: 1.0000 - val_loss: 0.8847 - val_accuracy: 0.5000
Epoch 37/50
2/2 [==============================] - 2s 514ms/step - loss: 8.6252e-05 - accuracy: 1.0000 - val_loss: 0.8977 - val_accuracy: 0.5000
Epoch 38/50
2/2 [==============================] - 2s 520ms/step - loss: 2.6762e-04 - accuracy: 1.0000 - val_loss: 0.9115 - val_accuracy: 0.5000
Epoch 39/50
2/2 [==============================] - 2s 542ms/step - loss: 8.1350e-05 - accuracy: 1.0000 - val_loss: 0.9258 - val_accuracy: 0.5000
Epoch 40/50
2/2 [==============================] - 2s 506ms/step - loss: 8.0961e-05 - accuracy: 1.0000 - val_loss: 0.9405 - val_accuracy: 0.5000
Epoch 41/50
2/2 [==============================] - 2s 526ms/step - loss: 6.6102e-05 - accuracy: 1.0000 - val_loss: 0.9555 - val_accuracy: 0.5000
Epoch 42/50
2/2 [==============================] - 2s 549ms/step - loss: 1.1529e-04 - accuracy: 1.0000 - val_loss: 0.9707 - val_accuracy: 0.5000
Epoch 43/50
2/2 [==============================] - 2s 528ms/step - loss: 6.1373e-05 - accuracy: 1.0000 - val_loss: 0.9864 - val_accuracy: 0.5000
Epoch 44/50
2/2 [==============================] - 2s 516ms/step - loss: 7.2809e-05 - accuracy: 1.0000 - val_loss: 1.0025 - val_accuracy: 0.5000
Epoch 45/50
2/2 [==============================] - 2s 513ms/step - loss: 5.9504e-05 - accuracy: 1.0000 - val_loss: 1.0191 - val_accuracy: 0.5000
Epoch 46/50
2/2 [==============================] - 2s 515ms/step - loss: 6.1622e-05 - accuracy: 1.0000 - val_loss: 1.0361 - val_accuracy: 0.5000
Epoch 47/50
2/2 [==============================] - 2s 525ms/step - loss: 7.7296e-05 - accuracy: 1.0000 - val_loss: 1.0534 - val_accuracy: 0.5000
Epoch 48/50
2/2 [==============================] - 2s 512ms/step - loss: 4.5088e-05 - accuracy: 1.0000 - val_loss: 1.0711 - val_accuracy: 0.5000
Epoch 49/50
2/2 [==============================] - 2s 532ms/step - loss: 1.1449e-04 - accuracy: 1.0000 - val_loss: 1.0887 - val_accuracy: 0.5000
Epoch 50/50
2/2 [==============================] - 2s 516ms/step - loss: 6.0932e-05 - accuracy: 1.0000 - val_loss: 1.1071 - val_accuracy: 0.5000
<keras.callbacks.History at 0x7fb4205a20d0>
Since I have 2 classes, my teacher said that a validation accuracy of 0.5 means that it is completely random.
My images are in the format of 50x50 .jpg images in Google Drive. Could that be the problem as my current image size is 50x50? But when I run
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
for i in range(9):
ax = plt.subplot(3, 3, i + 1)
plt.imshow(images[i].numpy().astype("uint8"))
plt.title(int(labels[i]))
plt.axis("off")
The images are correct, as in the entire image is shown and is clear.
I tried changing the seed to a random number. The code comes with data augmentation and the model.fit() should automatically shuffle the images (if I understood the online sites correctly).
My teacher does not know what is wrong either. Any solutions?
Edit: this is the dataset
https://www.kaggle.com/datasets/simranvolunesia/pest-dataset
Edit2: Sorry for the confusion but I only used two datasets, aphids and bollworm.
Edit: You are also using binary_crossentropy for a multi-class classification problem, yet you're forcing it to only have two classes when your passed dataset contains nine.
model = make_model(input_shape=image_size + (3,), num_classes=2)
According to your dataset, the classes are:
Pests: aphids, armyworm, beetle, bollworm, grasshopper, mites, mosquito, sawfly, stem borer
I don't see where you're only working with two classes, unless there's some code missing somewhere that removes the other seven. This site (https://keras.io/examples/vision/image_classification_from_scratch/) is classifying into two classes: cat or dog. That's probably where you got two classes from.
So that line needs to be changed to:
model = make_model(input_shape=image_size + (3,), num_classes=9)
Change this:
model.compile(
optimizer=keras.optimizers.Adam(1e-3),
loss="binary_crossentropy",
metrics=["accuracy"],
)
To:
model.compile(
optimizer=keras.optimizers.Adam(1e-3),
loss="sparse_categorical_crossentropy",
metrics=["accuracy"],
)
You might also need to change that metric from accuracy to binary_accuracy. Try with just accuracy first, then with binary_accuracy.
model.compile(
optimizer=keras.optimizers.Adam(1e-3),
loss="sparse_categorical_crossentropy",
metrics=["binary_accuracy"],
According to the documentation, you are not splitting your validation data correctly and probably dealing with the default shuffling too.
Define your datasets like this (assuming a 20% validation split):
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
"/content/gdrive/My Drive/pest/train",
validation_split=0.2,
subset="training"
seed=num,
image_size=image_size,
batch_size=batch_size,
)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
"/content/gdrive/My Drive/pest/train",
validation_split=0.2,
subset="validation"
seed=num,
image_size=image_size,
batch_size=batch_size,
)
# with test folder for test set
test_ds = tf.keras.preprocessing.image_dataset_from_directory(
"/content/gdrive/My Drive/pest/test",
image_size=image_size,
batch_size=batch_size,
shuffle=False
)
the answer provided by Djinn is correct. Also your are including augmentation WITHIN your model. So your model will augment not only the training images but the validation and test images as well. Test and Validation images should not be augmented. If you want augmentation then use the ImageDataGenerator.flow_from_directory. Documentation for that is here
I’ve found the problem. It is because I have quite few images (only 300 in each class), my batch size is too big. val_accuracy is around 0.8 to 0.9 after changing the batch size to 8. Thanks everyone for the answers!

CNN Low test Accuracy

I am building a CNN model to classify images into 11 classes as follows: 'dew','fogsmog','frost','glaze','hail','lightning','rain','rainbow','rime','sandstorm','snow'
and while training I get good accuracy and good validation accuracy
Epoch 1/20
131/131 [==============================] - 1012s 8s/step - loss: 1.8284 - accuracy: 0.3724 - val_loss: 1.4365 - val_accuracy: 0.5719
Epoch 2/20
131/131 [==============================] - 67s 511ms/step - loss: 1.3041 - accuracy: 0.5516 - val_loss: 1.1048 - val_accuracy: 0.6515
Epoch 3/20
131/131 [==============================] - 67s 510ms/step - loss: 1.1547 - accuracy: 0.6161 - val_loss: 1.0509 - val_accuracy: 0.6732
Epoch 4/20
131/131 [==============================] - 67s 510ms/step - loss: 1.0681 - accuracy: 0.6394 - val_loss: 1.0644 - val_accuracy: 0.6616
Epoch 5/20
131/131 [==============================] - 66s 505ms/step - loss: 1.0269 - accuracy: 0.6509 - val_loss: 1.0929 - val_accuracy: 0.6363
Epoch 6/20
131/131 [==============================] - 66s 506ms/step - loss: 1.0018 - accuracy: 0.6576 - val_loss: 0.9666 - val_accuracy: 0.6869
Epoch 7/20
131/131 [==============================] - 67s 507ms/step - loss: 0.9384 - accuracy: 0.6790 - val_loss: 0.8623 - val_accuracy: 0.7144
Epoch 8/20
131/131 [==============================] - 66s 505ms/step - loss: 0.9160 - accuracy: 0.6903 - val_loss: 0.8834 - val_accuracy: 0.7180
Epoch 9/20
131/131 [==============================] - 66s 502ms/step - loss: 0.8909 - accuracy: 0.6915 - val_loss: 0.8667 - val_accuracy: 0.7050
Epoch 10/20
131/131 [==============================] - 66s 503ms/step - loss: 0.8476 - accuracy: 0.7075 - val_loss: 0.8100 - val_accuracy: 0.7339
Epoch 11/20
131/131 [==============================] - 67s 509ms/step - loss: 0.8108 - accuracy: 0.7262 - val_loss: 0.8352 - val_accuracy: 0.7137
Epoch 12/20
131/131 [==============================] - 66s 506ms/step - loss: 0.7922 - accuracy: 0.7212 - val_loss: 0.8368 - val_accuracy: 0.7195
Epoch 13/20
131/131 [==============================] - 66s 505ms/step - loss: 0.7424 - accuracy: 0.7442 - val_loss: 0.8813 - val_accuracy: 0.7166
Epoch 14/20
131/131 [==============================] - 66s 503ms/step - loss: 0.7060 - accuracy: 0.7579 - val_loss: 0.8453 - val_accuracy: 0.7231
Epoch 15/20
131/131 [==============================] - 66s 503ms/step - loss: 0.6767 - accuracy: 0.7584 - val_loss: 0.8347 - val_accuracy: 0.7151
Epoch 16/20
131/131 [==============================] - 66s 506ms/step - loss: 0.6692 - accuracy: 0.7632 - val_loss: 0.8038 - val_accuracy: 0.7346
Epoch 17/20
131/131 [==============================] - 67s 507ms/step - loss: 0.6308 - accuracy: 0.7718 - val_loss: 0.7956 - val_accuracy: 0.7455
Epoch 18/20
131/131 [==============================] - 67s 508ms/step - loss: 0.6043 - accuracy: 0.7901 - val_loss: 0.8295 - val_accuracy: 0.7477
Epoch 19/20
131/131 [==============================] - 66s 506ms/step - loss: 0.5632 - accuracy: 0.8018 - val_loss: 0.7918 - val_accuracy: 0.7455
Epoch 20/20
131/131 [==============================] - 67s 510ms/step - loss: 0.5368 - accuracy: 0.8138 - val_loss: 0.7798 - val_accuracy: 0.7549
but when I predict and submit my results I get very low accuracy.
here is my model
from keras.preprocessing.image import ImageDataGenerator
IMG_SIZE = 50
datagen = ImageDataGenerator(
rescale=1./255,
validation_split=0.25)
train_dataset = datagen.flow_from_directory( directory=Train_folder,
shuffle=True,
target_size=(50,50),
subset="training",
classes=['dew','fogsmog','frost','glaze','hail','lightning','rain','rainbow','rime','sandstorm','snow'],
class_mode='categorical')
validation_dataset = datagen.flow_from_directory( directory=Train_folder,
shuffle=True,
target_size=(50,50),
subset="validation",
classes=['dew','fogsmog','frost','glaze','hail','lightning','rain','rainbow','rime','sandstorm','snow'],
class_mode='categorical')
Found 4168 images belonging to 11 classes.
Found 1383 images belonging to 11 classes.
model = Sequential([
layers.Conv2D(32, kernel_size=(3, 3),activation="relu",padding='same',input_shape=(IMG_SIZE, IMG_SIZE, 3)),
layers.MaxPooling2D((2, 2),padding='same'),
layers.Dropout(0.25),
layers.Conv2D(64, (3, 3), activation="relu",padding='same'),
layers.MaxPooling2D(pool_size=(2, 2),padding='same'),
layers.Dropout(0.25),
layers.Conv2D(128, (3, 3), activation="relu",padding='same'),
layers.MaxPooling2D(pool_size=(2, 2),padding='same'),
layers.Dropout(0.4),
layers.Flatten(),
layers.Dense(128, activation="relu"),
layers.Dropout(0.3),
layers.Dense(11, activation='softmax')
])
model.build()
model.summary()
model.compile(optimizer='adam',
loss=tf.keras.losses.CategoricalCrossentropy(),
metrics=['accuracy'])
history = model.fit(
train_dataset,
epochs=20,
validation_data=validation_dataset,
)
model.save('model.tfl')
Test_folder="/content/drive/MyDrive/[NN'22] Project Dataset/Test"
test_data = []
labels = []
for img in tqdm(os.listdir(Test_folder)):
path = os.path.join(Test_folder, img)
img_data2 = cv2.imread(path)
try:
img_data2 = cv2.resize(img_data2, (IMG_SIZE,IMG_SIZE))
except:
continue
test_data.append([np.array(img_data2)])
labels.append(img)
X_data=np.array([test_data]).reshape(-1, IMG_SIZE, IMG_SIZE, 3)
prediction = model.predict([X_data])

Validation accuracy not increasing, overfitting?

I created a simple LSTM model but my validation accuracy always revolves around 50 no matter how many epochs I use. Here's how it looks compared to training accuracy:
Epoch 15/50
2527/2527 [==============================] - 22s 9ms/step - loss: 0.9408 - accuracy: 0.7999 - val_loss: 3.5255 - val_accuracy: 0.5190
Epoch 16/50
2527/2527 [==============================] - 22s 9ms/step - loss: 0.8724 - accuracy: 0.8080 - val_loss: 3.6279 - val_accuracy: 0.5127
Epoch 17/50
2527/2527 [==============================] - 22s 9ms/step - loss: 0.8041 - accuracy: 0.8177 - val_loss: 3.6627 - val_accuracy: 0.5158
Epoch 18/50
2527/2527 [==============================] - 22s 9ms/step - loss: 0.7377 - accuracy: 0.8297 - val_loss: 3.7247 - val_accuracy: 0.5140
Epoch 19/50
2527/2527 [==============================] - 22s 9ms/step - loss: 0.6680 - accuracy: 0.8431 - val_loss: 3.8000 - val_accuracy: 0.5144
Epoch 20/50
2527/2527 [==============================] - 22s 9ms/step - loss: 0.6036 - accuracy: 0.8578 - val_loss: 3.9164 - val_accuracy: 0.5051
Epoch 21/50
2527/2527 [==============================] - 22s 9ms/step - loss: 0.5460 - accuracy: 0.8715 - val_loss: 3.9832 - val_accuracy: 0.5089
Epoch 22/50
2527/2527 [==============================] - 22s 9ms/step - loss: 0.4830 - accuracy: 0.8872 - val_loss: 4.0284 - val_accuracy: 0.5095
Epoch 23/50
2527/2527 [==============================] - 22s 9ms/step - loss: 0.4277 - accuracy: 0.9019 - val_loss: 4.1428 - val_accuracy: 0.5067
Epoch 24/50
2527/2527 [==============================] - 22s 9ms/step - loss: 0.3760 - accuracy: 0.9169 - val_loss: 4.1972 - val_accuracy: 0.5069
Epoch 25/50
2527/2527 [==============================] - 22s 9ms/step - loss: 0.3319 - accuracy: 0.9275 - val_loss: 4.2494 - val_accuracy: 0.5047
Epoch 26/50
2527/2527 [==============================] - 22s 9ms/step - loss: 0.2883 - accuracy: 0.9406 - val_loss: 4.3047 - val_accuracy: 0.5075
Epoch 27/50
2527/2527 [==============================] - 22s 9ms/step - loss: 0.2471 - accuracy: 0.9507 - val_loss: 4.3822 - val_accuracy: 0.5063
Epoch 28/50
2527/2527 [==============================] - 22s 9ms/step - loss: 0.2131 - accuracy: 0.9592 - val_loss: 4.4553 - val_accuracy: 0.5071
I think it might be overfitting but I have doubts about validation_split function that Keras provides. Does it even shuffle the data?
Anyways, here's my full code from start, even how I take input so u can tell me how I can modify it, from batch size to last nodes size etc. Please take a look and tell me how it can be optimised so my validation accuracy can improve.
BATCH_SIZE = 64
EPOCHS = 50
LSTM_NODES =256
NUM_SENTENCES = 3000
MAX_SENTENCE_LENGTH = 50
MAX_NUM_WORDS = 3000
EMBEDDING_SIZE = 100
input_sentences = []
output_sentences = []
output_sentences_inputs = []
count = 0
for line in open(r'/content/drive/My Drive/TEMPPP/123.txt', encoding="utf-8"):
count += 1
if count > NUM_SENTENCES:
break
if '\t' not in line:
continue
input_sentence, output = line.rstrip().split('\t')
output_sentence = output + ' <eos>'
output_sentence_input = '<sos> ' + output
input_sentences.append(input_sentence)
output_sentences.append(output_sentence)
output_sentences_inputs.append(output_sentence_input)
input_tokenizer = Tokenizer(num_words=MAX_NUM_WORDS)
input_tokenizer.fit_on_texts(input_sentences)
input_integer_seq = input_tokenizer.texts_to_sequences(input_sentences)
word2idx_inputs = input_tokenizer.word_index
max_input_len = max(len(sen) for sen in input_integer_seq)
output_tokenizer = Tokenizer(num_words=MAX_NUM_WORDS, filters='')
output_tokenizer.fit_on_texts(output_sentences + output_sentences_inputs)
output_integer_seq = output_tokenizer.texts_to_sequences(output_sentences)
output_input_integer_seq = output_tokenizer.texts_to_sequences(output_sentences_inputs)
word2idx_outputs = output_tokenizer.word_index
num_words_output = len(word2idx_outputs) + 1
max_out_len = max(len(sen) for sen in output_integer_seq)
encoder_input_sequences = pad_sequences(input_integer_seq, maxlen=max_input_len)
decoder_input_sequences = pad_sequences(output_input_integer_seq, maxlen=max_out_len, padding='post')
import numpy as np
read_dictionary = np.load('/content/drive/My Drive/TEMPPP/hinvec.npy',allow_pickle='TRUE').item()
num_words = min(MAX_NUM_WORDS, len(word2idx_inputs) + 1)
embedding_matrix = np.zeros((num_words, EMBEDDING_SIZE))
for word, index in word2idx_inputs.items():
embedding_vector = read_dictionary.get(word)
if embedding_vector is not None:
embedding_matrix[index] = embedding_vector
embedding_layer = Embedding(num_words, EMBEDDING_SIZE, weights=[embedding_matrix], input_length=max_input_len)
decoder_targets_one_hot = np.zeros((
len(input_sentences),
max_out_len,
num_words_output
),
dtype='float32'
)
decoder_output_sequences = pad_sequences(output_integer_seq, maxlen=max_out_len, padding='post')
for i, d in enumerate(decoder_output_sequences):
for t, word in enumerate(d):
decoder_targets_one_hot[i, t, word] = 1
encoder_inputs_placeholder = Input(shape=(max_input_len,))
x = embedding_layer(encoder_inputs_placeholder)
encoder = LSTM(LSTM_NODES, return_state=True)
encoder_outputs, h, c = encoder(x)
encoder_states = [h, c]
decoder_inputs_placeholder = Input(shape=(max_out_len,))
decoder_embedding = Embedding(num_words_output, LSTM_NODES)
decoder_inputs_x = decoder_embedding(decoder_inputs_placeholder)
decoder_lstm = LSTM(LSTM_NODES, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs_x, initial_state=encoder_states)
decoder_dense = Dense(num_words_output, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)
import tensorflow as tf
starter_learning_rate = 0.1
end_learning_rate = 0.01
decay_steps = 2000
learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay(
starter_learning_rate,
decay_steps,
end_learning_rate,
power=0.5)
opt = tf.keras.optimizers.Adam(learning_rate=learning_rate_fn, epsilon=1e-03, clipvalue=0.5)
model = Model([encoder_inputs_placeholder,
decoder_inputs_placeholder],
decoder_outputs)
model.compile(
optimizer=opt,
loss='categorical_crossentropy',
metrics=['accuracy']
)
history = model.fit(
[encoder_input_sequences, decoder_input_sequences],
decoder_targets_one_hot,
batch_size=BATCH_SIZE,
epochs=EPOCHS,
validation_split=0.1,
)
I tried to add dropout layer but I couldn't add it between LSTM layer and dense layer. And I have doubts about validation_split. I tried to split dataset in train_test_set and valid_test_set but count make it work and ended up sticking with validation_split. Im pretty sure this is case of overfitting but not able to deal with it.

No change in train & test accuracy and loss

I'm tring to use CNN to classifiy 3 classes data, every data is 30*188. Class1 has 5794 data, class2 has 8471, class3 has 9092. When I train my model, the value of accuracy, loss , val_acc and val_loss don't change.
Please help me to solve this problem.
import glob
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
import librosa.display
import sklearn
from keras.utils import to_categorical
import scipy.io as scio
path1 = 'class1_feature_array.mat'
data1 = scio.loadmat(path1)
class1_feature_array = data1['class1_feature_array']
class1_label = np.zeros((class1_feature_array.shape[0],))
class1_label=class1_label.astype(np.int32)
class1_label=class1_label.astype(np.str)
path2 = 'class2_feature_array.mat'
data2 = scio.loadmat(path2)
class2_feature_array = data2['class2_feature_array']
class2_label = np.ones((class2_feature_array.shape[0],))
class2_label=class2_label.astype(np.int32)
class2_label=class2_label.astype(np.str)
path3 = 'class3_feature_array.mat'
data3 = scio.loadmat(path3)
class3_feature_array = data3['class3_feature_array']
class3_label = np.ones((class3_feature_array.shape[0],))*2
class3_label=class3_label.astype(np.int32)
class3_label=class3_label.astype(np.str)
features, labels = np.empty((0,40,188)), np.empty(0)
features = np.append(features,class1_feature_array,axis=0)
features = np.append(features,class2_feature_array,axis=0)
features = np.append(features,class3_feature_array,axis=0)
features = np.array(features)
labels = np.append(labels,class1_label,axis=0)
labels = np.append(labels,class2_label,axis=0)
labels = np.append(labels,class3_label,axis=0)
labels = np.array(labels, dtype = np.int)
def one_hot_encode(labels):
n_labels = len(labels)
n_unique_labels = len(np.unique(labels))
one_hot_encode = np.zeros((n_labels,n_unique_labels))
print("one_hot_encode",one_hot_encode.shape)
one_hot_encode[np.arange(n_labels), labels] = 1
return one_hot_encode
labels = one_hot_encode(labels)
train_test_split = np.random.rand(len(features)) < 0.80
train_x = features[train_test_split]
train_y = labels[train_test_split]
test_x = features[~train_test_split]
test_y = labels[~train_test_split]
train_x = train_x.reshape(train_x.shape[0],train_x.shape[1],train_x.shape[2],1)
test_x = test_x.reshape(test_x.shape[0],test_x.shape[1],test_x.shape[2],1)
import sklearn
import keras
from keras.models import Sequential
from keras.layers import *
from keras.callbacks import LearningRateScheduler
from keras import optimizers
#LeNet
model = Sequential()
model.add(Conv2D(32,(5, 5),strides=(1,1),padding='valid',activation='relu',input_shape=(40,188,1),kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(64,(5,5),strides=(1,1),padding='valid',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(100,activation='relu'))
model.add(Dense(3, activation='softmax'))
sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd,
loss='binary_crossentropy',
metrics=['accuracy'])
model.summary(line_length=80)
history = model.fit(train_x, train_y, epochs=100, batch_size=32, validation_data=(test_x, test_y))
The output after training is as shown below:
Train on 18625 samples, validate on 4732 samples
Epoch 1/100
18625/18625 [==============================] - 30s 2ms/step - loss: 8.0138 - accuracy: 0.5001 - val_loss: 8.1055 - val_accuracy: 0.4944
Epoch 2/100
18625/18625 [==============================] - 22s 1ms/step - loss: 8.0181 - accuracy: 0.4998 - val_loss: 8.1055 - val_accuracy: 0.4944
Epoch 3/100
18625/18625 [==============================] - 23s 1ms/step - loss: 8.0181 - accuracy: 0.4998 - val_loss: 8.1055 - val_accuracy: 0.4944
Epoch 4/100
18625/18625 [==============================] - 24s 1ms/step - loss: 8.0181 - accuracy: 0.4998 - val_loss: 8.1055 - val_accuracy: 0.4944
Epoch 5/100
18625/18625 [==============================] - 23s 1ms/step - loss: 8.0181 - accuracy: 0.4998 - val_loss: 8.1055 - val_accuracy: 0.4944
Epoch 6/100
18625/18625 [==============================] - 24s 1ms/step - loss: 8.0181 - accuracy: 0.4998 - val_loss: 8.1055 - val_accuracy: 0.4944
Epoch 7/100
18625/18625 [==============================] - 24s 1ms/step - loss: 8.0181 - accuracy: 0.4998 - val_loss: 8.1055 - val_accuracy: 0.4944
Epoch 8/100
18625/18625 [==============================] - 25s 1ms/step - loss: 8.0181 - accuracy: 0.4998 - val_loss: 8.1055 - val_accuracy: 0.4944
Epoch 9/100
18625/18625 [==============================] - 26s 1ms/step - loss: 8.0181 - accuracy: 0.4998 - val_loss: 8.1055 - val_accuracy: 0.4944
Epoch 10/100
18625/18625 [==============================] - 25s 1ms/step - loss: 8.0181 - accuracy: 0.4998 - val_loss: 8.1055 - val_accuracy: 0.4944
Epoch 11/100
18625/18625 [==============================] - 26s 1ms/step - loss: 8.0181 - accuracy: 0.4998 - val_loss: 8.1055 - val_accuracy: 0.4944
Epoch 12/100
18625/18625 [==============================] - 26s 1ms/step - loss: 8.0181 - accuracy: 0.4998 - val_loss: 8.1055 - val_accuracy: 0.4944

Accuracy doesn't improve in training model character recognition

I am building a training model for my character recognition system. During every epochs, I am getting the same accuracy and it doesn't improve. I have currently 4000 training images and 77 validation images.
My model is as follows:
inputs = Input(shape=(32,32,3))
x = Conv2D(filters = 64, kernel_size = 5, activation = 'relu')(inputs)
x = MaxPooling2D()(x)
x = Conv2D(filters = 32,
kernel_size = 3,
activation = 'relu')(x)
x = MaxPooling2D()(x)
x = Flatten()(x)
x=Dense(256,
activation='relu')(x)
outputs = Dense(1, activation = 'softmax')(x)
model = Model(inputs = inputs, outputs = outputs)
model.compile(
optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
data_gen_train = ImageDataGenerator(rescale=1/255)
data_gen_test=ImageDataGenerator(rescale=1/255)
data_gen_valid = ImageDataGenerator(rescale=1/255)
train_generator = data_gen_train.flow_from_directory(directory=r"./drive/My Drive/train_dataset",
target_size=(32,32), batch_size=10, class_mode="binary")
valid_generator = data_gen_valid.flow_from_directory(directory=r"./drive/My
Drive/validation_dataset", target_size=(32,32), batch_size=2, class_mode="binary")
test_generator = data_gen_test.flow_from_directory(
directory=r"./drive/My Drive/test_dataset",
target_size=(32, 32),
batch_size=6,
class_mode="binary"
)
model.fit(
train_generator,
epochs =10,
steps_per_epoch=400,
validation_steps=37,
validation_data=valid_generator)
The result is as follows:
Found 4000 images belonging to 2 classes.
Found 77 images belonging to 2 classes.
Found 6 images belonging to 2 classes.
Epoch 1/10
400/400 [==============================] - 14s 35ms/step - loss: 0.0000e+00 - accuracy: 0.5000 - val_loss: 0.0000e+00 - val_accuracy: 0.5811
Epoch 2/10
400/400 [==============================] - 13s 33ms/step - loss: 0.0000e+00 - accuracy: 0.5000 - val_loss: 0.0000e+00 - val_accuracy: 0.5811
Epoch 3/10
400/400 [==============================] - 13s 34ms/step - loss: 0.0000e+00 - accuracy: 0.5000 - val_loss: 0.0000e+00 - val_accuracy: 0.5676
Epoch 4/10
400/400 [==============================] - 13s 33ms/step - loss: 0.0000e+00 - accuracy: 0.5000 - val_loss: 0.0000e+00 - val_accuracy: 0.5676
Epoch 5/10
400/400 [==============================] - 18s 46ms/step - loss: 0.0000e+00 - accuracy: 0.5000 - val_loss: 0.0000e+00 - val_accuracy: 0.5541
Epoch 6/10
400/400 [==============================] - 13s 34ms/step - loss: 0.0000e+00 - accuracy: 0.5000 - val_loss: 0.0000e+00 - val_accuracy: 0.5676
Epoch 7/10
400/400 [==============================] - 13s 33ms/step - loss: 0.0000e+00 - accuracy: 0.5000 - val_loss: 0.0000e+00 - val_accuracy: 0.5676
Epoch 8/10
400/400 [==============================] - 13s 33ms/step - loss: 0.0000e+00 - accuracy: 0.5000 - val_loss: 0.0000e+00 - val_accuracy: 0.5946
Epoch 9/10
400/400 [==============================] - 13s 33ms/step - loss: 0.0000e+00 - accuracy: 0.5000 - val_loss: 0.0000e+00 - val_accuracy: 0.5811
Epoch 10/10
400/400 [==============================] - 13s 33ms/step - loss: 0.0000e+00 - accuracy: 0.5000 - val_loss: 0.0000e+00 - val_accuracy: 0.5811
<tensorflow.python.keras.callbacks.History at 0x7fa3a5f4a8d0>
If you are trying to recognize charaters of 2 classes, you should:
use class_mode="binary" in the flow_from_directory function
use binary_crossentropy as loss
your last layer must have 1 neuron with sigmoid activation function
In case there are more than 2 classes:
do not use class_mode="binary" in the flow_from_directory function
use categorical_crossentropy as loss
your last layer must have n neurons with softmax activation, where n stands for the number of classes

Categories

Resources