High loss rate for unknow reason when training CNN - python

I have stuck in the assignment for three days and checked everything I could get from the internet. but the loss rate of my model cannot be reduced. The model is just random guessing the validation dataset.
(data source)[https://www.kaggle.com/datamunge/sign-language-mnist]
Here are some methods I have tried and verified that don't work:
increasing batch size, but the batch size seems to be irrelevant to the high loss rate and low accuracy.
check the format of input data, but I found nothing, everything seems to work properly.
try to remove image augmentation, the loss rate doesn't care.
try to change optimizer, I have tried Adam, RMSDrop, SGD.
try to add more neurons and increase epoch of training, only increase the training accuracy but not validation accuracy.
check my environment, I have run other sample codes of CNN and they worked as expected.
Here is my code and output.
import matplotlib.pyplot as plt
import csv
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from os import getcwd
import sys
def progressbar(it, prefix="", size=29, file=sys.stdout):
# This def is made by: https://stackoverflow.com/users/1207193/iambr
# it is the list you are going to iterate
# prefix is the title of your progress bar
# size is the length of your progress bar
count = len(it)
def show(j):
x = int(size*j/count)
file.write("%s[%s%s%s] %i/%i\r" %
(prefix, "="*x, ">", "."*(size-x), j, count))
file.flush()
show(0)
for i, item in enumerate(it):
yield item
show(i+1)
file.write("\n")
file.flush()
def get_data(filename):
with open(filename) as training_file:
images = np.empty((0, 28, 28), dtype=float)
labels = np.empty((0), dtype=float)
# Your code starts here
raw_file = np.loadtxt(training_file.readlines()[
:-1], dtype=float, skiprows=1, delimiter=',')
for row in progressbar(raw_file, "Loading data: "):
if(len(row) == 785):
labels = np.append(labels, row[0])
image = np.reshape(row[1:785], (1, 28, 28))
images = np.append(image, images, axis=0)
print(f'read file:{filename} complete')
return images, labels
# full data set
# path_sign_mnist_train = f'{getcwd()}/tmp2/sign_mnist_train.csv'
# path_sign_mnist_test = f'{getcwd()}/tmp2/sign_mnist_test.csv'
# reduce training set
path_sign_mnist_train = f'{getcwd()}/tmp2/sign_mnist_train_a.csv'
path_sign_mnist_test = f'{getcwd()}/tmp2/sign_mnist_test_a.csv'
training_images, training_labels = get_data(path_sign_mnist_train)
testing_images, testing_labels = get_data(path_sign_mnist_test)
training_images=training_images/255.
testing_images=testing_images/255.
# Keep these
print(training_images.shape)
print(training_labels.shape)
print(testing_images.shape)
print(testing_labels.shape)
print(testing_labels)
# Testing code
plt.imshow(training_images[1], interpolation='nearest')
plt.show()
print(training_labels[1])
train_datagen = ImageDataGenerator(
featurewise_center=False, # set input mean to 0 over the dataset
samplewise_center=False, # set each sample mean to 0
featurewise_std_normalization=False, # divide inputs by std of the dataset
samplewise_std_normalization=False, # divide each input by its std
zca_whitening=False, # apply ZCA whitening
rotation_range=14, # randomly rotate images in the range (degrees, 0 to 180)
zoom_range = 0.09, # Randomly zoom image
width_shift_range=0.14, # randomly shift images horizontally (fraction of total width)
height_shift_range=0.14, # randomly shift images vertically (fraction of total height)
horizontal_flip=False, # randomly flip images
vertical_flip=False, # randomly flip images
brightness_range = (0.8, 1.0), # brightness of image
rescale = 1. / 255.)
validation_datagen = ImageDataGenerator(rescale=1./255.)
training_images = np.reshape(training_images, (-1,28,28,1))
train_datagen.fit(training_images)
testing_images = np.reshape(testing_images,(-1,28,28,1))
training_labels=tf.keras.utils.to_categorical(training_labels,num_classes=25)
testing_labels=tf.keras.utils.to_categorical(testing_labels, num_classes=25)
batch_size = 16
train_generator = train_datagen.flow(
training_images,
training_labels, batch_size=batch_size)
validation_generator = validation_datagen.flow(
testing_images,
testing_labels, batch_size=batch_size)
# Keep These
print(training_images.shape)
print(testing_images.shape)
# Their output should be:
# (27455, 28, 28, 1)
# (7172, 28, 28, 1)
# Define the model
# Use no more than 2 Conv2D and 2 MaxPooling2D
model = tf.keras.models.Sequential([
# Your Code Here
tf.keras.layers.Conv2D(64, (3, 3), activation='relu',
input_shape=(28, 28, 1)),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(25, activation='softmax')
])
# Compile Model.
model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.005),loss='categorical_crossentropy',metrics=['accuracy'])
model.summary()
# Train the Model
history = model.fit_generator(train_generator,
validation_data=validation_generator,
steps_per_epoch=len(training_images)//batch_size,
epochs=10,
validation_steps=len(testing_images)//batch_size
)
# model.evaluate(testing_images/255., testing_labels, verbose=0)
# Plot the chart for accuracy and loss on both training and validation
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'r', label='Training Loss')
plt.plot(epochs, val_loss, 'b', label='Validation Loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
but the loss rate almost doesn't change...
Epoch 1/10
WARNING:tensorflow:AutoGraph could not transform <function Model.make_train_function.<locals>.train_function at 0x0000026B4B18F948> and will run it as-is.
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: unsupported operand type(s) for -: 'NoneType' and 'int'
To silence this warning, decorate the function with #tf.autograph.experimental.do_not_convert
2022-01-27 09:40:05.564400: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cublas64_10.dll
2022-01-27 09:40:05.743540: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudnn64_7.dll
2022-01-27 09:40:06.492580: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] Internal: Invoking GPU asm compilation is supported on Cuda non-Windows platforms only
Relying on driver to perform ptx compilation.
Modify $PATH to customize ptxas location.
This message will be only logged once.
430/437 [============================>.] - ETA: 0s - loss: 3.1891 - accuracy: 0.0461WARNING:tensorflow:AutoGraph could not transform <function Model.make_test_function.<locals>.test_function at 0x0000026B490A4F78> and will run it as-is.
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: unsupported operand type(s) for -: 'NoneType' and 'int'
To silence this warning, decorate the function with #tf.autograph.experimental.do_not_convert
437/437 [==============================] - 3s 7ms/step - loss: 3.1890 - accuracy: 0.0463 - val_loss: 3.2067 - val_accuracy: 0.0230
Epoch 2/10
437/437 [==============================] - 3s 7ms/step - loss: 3.1828 - accuracy: 0.0425 - val_loss: 3.1952 - val_accuracy: 0.0333
Epoch 3/10
437/437 [==============================] - 3s 7ms/step - loss: 3.1802 - accuracy: 0.0401 - val_loss: 3.2006 - val_accuracy: 0.0230
Epoch 4/10
437/437 [==============================] - 3s 7ms/step - loss: 3.1789 - accuracy: 0.0434 - val_loss: 3.2012 - val_accuracy: 0.0348
Epoch 5/10
437/437 [==============================] - 3s 7ms/step - loss: 3.1782 - accuracy: 0.0448 - val_loss: 3.2109 - val_accuracy: 0.0345
Epoch 6/10
437/437 [==============================] - 3s 7ms/step - loss: 3.1784 - accuracy: 0.0454 - val_loss: 3.2056 - val_accuracy: 0.0230
Epoch 7/10
437/437 [==============================] - 3s 7ms/step - loss: 3.1782 - accuracy: 0.0407 - val_loss: 3.2032 - val_accuracy: 0.0230
Epoch 8/10
437/437 [==============================] - 3s 7ms/step - loss: 3.1780 - accuracy: 0.0391 - val_loss: 3.2080 - val_accuracy: 0.0230
Epoch 9/10
437/437 [==============================] - 3s 7ms/step - loss: 3.1775 - accuracy: 0.0417 - val_loss: 3.2033 - val_accuracy: 0.0230
Epoch 10/10
418/437 [===========================>..] - ETA: 0s - loss: 3.1773 - accuracy: 0.0460Traceback (most recent call last):

The only problem with this network is, learning too fast.
if you set the learning rate from 0.005 to 0.0005, this model works fine.
# Compile Model.
model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.0005),
loss='categorical_crossentropy', metrics=['accuracy'])
Do not learn too fast, or you will stuck in a local minimum and never get out.
Epoch 2/10
437/437 [==============================] - 3s 6ms/step - loss: 2.5773 - accuracy: 0.2133 - val_loss: 2.2050 - val_accuracy: 0.3542
Epoch 3/10
437/437 [==============================] - 3s 6ms/step - loss: 2.1190 - accuracy: 0.3262 - val_loss: 1.6197 - val_accuracy: 0.5278
Epoch 4/10
437/437 [==============================] - 3s 7ms/step - loss: 1.7566 - accuracy: 0.4223 - val_loss: 1.3985 - val_accuracy: 0.5492
Epoch 5/10
437/437 [==============================] - 3s 6ms/step - loss: 1.5062 - accuracy: 0.4929 - val_loss: 1.1146 - val_accuracy: 0.7000
Epoch 6/10
437/437 [==============================] - 3s 6ms/step - loss: 1.3736 - accuracy: 0.5323 - val_loss: 1.0778 - val_accuracy: 0.6756
Epoch 7/10
437/437 [==============================] - 3s 6ms/step - loss: 1.2198 - accuracy: 0.5836 - val_loss: 0.8912 - val_accuracy: 0.7650
Epoch 8/10
437/437 [==============================] - 3s 6ms/step - loss: 1.1396 - accuracy: 0.6066 - val_loss: 0.8298 - val_accuracy: 0.7486
Epoch 9/10
437/437 [==============================] - 3s 6ms/step - loss: 1.1084 - accuracy: 0.6182 - val_loss: 0.9152 - val_accuracy: 0.6830
Epoch 10/10
437/437 [==============================] - 3s 6ms/step - loss: 1.0196 - accuracy: 0.6525 - val_loss: 0.8014 - val_accuracy: 0.7307
BTW: the efficiency of the reading method is not very 'python'. This works better.
def get_data(filename):
with open(filename) as training_file:
raw_file = np.loadtxt(training_file.readlines()[
:-1], dtype=float, skiprows=1, delimiter=',')
labels=np.array([i[0] for i in raw_file])
images=np.array([i[1:785] for i in raw_file])
images=images.reshape(-1,28,28)
print(f'read file:{filename} complete')
return images, labels

Related

Prediction input shape discrepancy in LSTM [duplicate]

So I've been following Google's official tensorflow guide and trying to build a simple neural network using Keras. But when it comes to training the model, it does not use the entire dataset (with 60000 entries) and instead uses only 1875 entries for training. Any possible fix?
import tensorflow as tf
from tensorflow import keras
import numpy as np
fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
train_images = train_images / 255.0
test_images = test_images / 255.0
class_names = ['T-shirt', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle Boot']
model = keras.Sequential([
keras.layers.Flatten(input_shape=(28, 28)),
keras.layers.Dense(128, activation='relu'),
keras.layers.Dense(10)
])
model.compile(optimizer='adam',
loss= tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
model.fit(train_images, train_labels, epochs=10)
Output:
Epoch 1/10
1875/1875 [==============================] - 3s 2ms/step - loss: 0.3183 - accuracy: 0.8866
Epoch 2/10
1875/1875 [==============================] - 3s 2ms/step - loss: 0.3169 - accuracy: 0.8873
Epoch 3/10
1875/1875 [==============================] - 3s 2ms/step - loss: 0.3144 - accuracy: 0.8885
Epoch 4/10
1875/1875 [==============================] - 3s 2ms/step - loss: 0.3130 - accuracy: 0.8885
Epoch 5/10
1875/1875 [==============================] - 3s 2ms/step - loss: 0.3110 - accuracy: 0.8883
Epoch 6/10
1875/1875 [==============================] - 3s 2ms/step - loss: 0.3090 - accuracy: 0.8888
Epoch 7/10
1875/1875 [==============================] - 3s 2ms/step - loss: 0.3073 - accuracy: 0.8895
Epoch 8/10
1875/1875 [==============================] - 3s 2ms/step - loss: 0.3057 - accuracy: 0.8900
Epoch 9/10
1875/1875 [==============================] - 3s 2ms/step - loss: 0.3040 - accuracy: 0.8905
Epoch 10/10
1875/1875 [==============================] - 3s 2ms/step - loss: 0.3025 - accuracy: 0.8915
<tensorflow.python.keras.callbacks.History at 0x7fbe0e5aebe0>
Here's the original google colab notebook where I've been working on this: https://colab.research.google.com/drive/1NdtzXHEpiNnelcMaJeEm6zmp34JMcN38
The number 1875 shown during fitting the model is not the training samples; it is the number of batches.
model.fit includes an optional argument batch_size, which, according to the documentation:
If unspecified, batch_size will default to 32.
So, what happens here is - you fit with the default batch size of 32 (since you have not specified anything different), so the total number of batches for your data is
60000/32 = 1875
It does not train on 1875 samples.
Epoch 1/10
1875/1875 [===
1875 here is the number of steps, not samples. In fit method, there is an argument, batch_size. The default value for it is 32. So 1875*32=60000. The implementation is correct.
If you train it with batch_size=16, you will see the number of steps will be 3750 instead of 1875, since 60000/16=3750.
Just use batch_size = 1, if you want the entire 60000 data samples to be visible.

"No gradients provided for any variable" when trying to fit Keras Sequential

I'm trying to create and train a Sequential model like so:
def model(training: Dataset, validation: Dataset):
model = Sequential(layers=[Embedding(input_dim=1001, output_dim=16), Dropout(0.2), GlobalAveragePooling1D(), Dropout(0.2), Dense(1)])
model.compile(loss=BinaryCrossentropy(from_logits=True), optimizer='adam', metrics=BinaryAccuracy(threshold=0.0))
model.fit(x=training, validation_data=validation, epochs=10)
When I run it, I get the following error the model.fit line:
ValueError: No gradients provided for any variable: ['embedding/embeddings:0', 'dense/kernel:0', 'dense/bias:0'].
I've come across some answers talking about the use of optimizers, but how would that apply to Sequential rather than Model? Is there something else that I'm missing?
Edit: The result of print(training):
<MapDataset shapes: ((None, 250), (None,)), types: (tf.int64, tf.int32)>
Edit: A script that will reproduce the error using IMDB sample data
from tensorflow.keras import Sequential
from tensorflow import data
from keras.layers import TextVectorization
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Dropout, GlobalAveragePooling1D, Dense
from tensorflow.keras.metrics import BinaryAccuracy, BinaryCrossentropy
import os
def split_dataset(dataset: data.Dataset):
record_count = len(list(dataset))
training_count = int((70 / 100) * record_count)
validation_count = int((15 / 100) * record_count)
raw_train_ds = dataset.take(training_count)
raw_val_ds = dataset.skip(training_count).take(validation_count)
raw_test_ds = dataset.skip(training_count + validation_count)
return {"train": raw_train_ds, "test": raw_test_ds, "validate": raw_val_ds}
def clean(text, label):
return tf.strings.unicode_transcode(text, "US ASCII", "UTF-8")
def vectorize_dataset(dataset: data.Dataset):
return dataset.map(vectorize_text)
def vectorize_text(text, label):
text = tf.expand_dims(text, -1)
return vectorize_layer(text), label
url = "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"
dataset_tar = tf.keras.utils.get_file("aclImdb_v1", url,
untar=True, cache_dir='.',
cache_subdir='')
dataset_dir = os.path.join(os.path.dirname(dataset_tar), 'aclImdb')
batch_size = 32
seed = 42
dataset = tf.keras.preprocessing.text_dataset_from_directory(
'aclImdb/train',
batch_size=batch_size,
validation_split=0.2,
subset='training',
seed=seed)
split_data = split_dataset(dataset)
raw_train = split_data['train']
raw_val = split_data['validate']
raw_test = split_data['test']
vectorize_layer = TextVectorization(max_tokens=10000, output_mode="int", output_sequence_length=250, ngrams=1)
cleaned_text = raw_train.map(clean)
vectorize_layer.adapt(cleaned_text)
train = vectorize_dataset(raw_train)
test = vectorize_dataset(raw_test)
validate = vectorize_dataset(raw_val)
def model(training, validation):
sequential_model = Sequential(
layers=[Embedding(input_dim=1001, output_dim=16), Dropout(0.2), GlobalAveragePooling1D(), Dropout(0.2),
Dense(1)])
sequential_model.compile(loss=BinaryCrossentropy(from_logits=True), optimizer='adam', metrics=BinaryAccuracy(threshold=0.0))
sequential_model.fit(x=training, validation_data=validation, epochs=10)
model(train, validate)
The problem in your code is occurring at below line:
vectorize_layer = TextVectorization(max_tokens=10000, output_mode="int", output_sequence_length=250, ngrams=1)
The max_tokens in the TextVectorization layer corresponds to the total number of unique words in the vocabulary.
Embedding Layer: The Embedding layer can be understood as a lookup table that maps from integer indices (which stand for specific words) to dense vectors (their embeddings) .
In your code, the Embedding dimensions are (1001,16) that means you are only accomodating the integers that map the specific words in a range of 1001, any indices that forms a (row, column) pair, which corresponds to a value greater than 1001 are not taken care off. Therefore, the ValueError.
I changed the TextVectorization(max_tokens=5000) and also Embedding(5000, 16), and ran your code.
What I got is shown below:
def model(training, validation):
model = keras.Sequential(
[
layers.Embedding(input_dim=5000, output_dim=16),
layers.Dropout(0.2),
layers.GlobalAveragePooling1D(),
layers.Dropout(0.2),
layers.Dense(1),
]
)
model.compile(
optimizer = keras.optimizers.Adam(),
loss=keras.losses.BinaryCrossentropy(from_logits=True),
metrics=keras.metrics.BinaryAccuracy(threshold=0.0)
)
model.fit(x=training, validation_data=validation, epochs=10)
return model
Output:
Epoch 1/10 437/437 [==============================] - 10s 22ms/step - loss: 0.6797 - binary_accuracy: 0.6455 - val_loss: 0.6539 - val_binary_accuracy: 0.7554
Epoch 2/10 437/437 [==============================] - 10s 22ms/step - loss: 0.6109 - binary_accuracy: 0.7625 - val_loss: 0.5700 - val_binary_accuracy: 0.7880
Epoch 3/10 437/437 [==============================] - 9s 22ms/step - loss: 0.5263 - binary_accuracy: 0.8098 - val_loss: 0.4931 - val_binary_accuracy: 0.8233
Epoch 4/10 437/437 [==============================] - 10s 22ms/step - loss: 0.4580 - binary_accuracy: 0.8368 - val_loss: 0.4373 - val_binary_accuracy: 0.8448
Epoch 5/10 437/437 [==============================] - 10s 22ms/step - loss: 0.4072 - binary_accuracy: 0.8560 - val_loss: 0.4003 - val_binary_accuracy: 0.8522
Epoch 6/10 437/437 [==============================] - 10s 22ms/step - loss: 0.3717 - binary_accuracy: 0.8641 - val_loss: 0.3733 - val_binary_accuracy: 0.8589
Epoch 7/10 437/437 [==============================] - 10s 22ms/step - loss: 0.3451 - binary_accuracy: 0.8728 - val_loss: 0.3528 - val_binary_accuracy: 0.8582
Epoch 8/10 437/437 [==============================] - 9s 22ms/step - loss: 0.3220 - binary_accuracy: 0.8806 - val_loss: 0.3345 - val_binary_accuracy: 0.8673
Epoch 9/10 437/437 [==============================] - 9s 22ms/step - loss: 0.3048 - binary_accuracy: 0.8868 - val_loss: 0.3287 - val_binary_accuracy: 0.8673
Epoch 10/10 437/437 [==============================] - 10s 22ms/step - loss: 0.2891 - binary_accuracy: 0.8929 - val_loss: 0.3222 - val_binary_accuracy: 0.8679
BinaryCrossentropy is imported from tf.keras.metrics hence gradients could not be computed.
Correct import should have been from tensorflow.keras.losses import BinaryCrossentropy.

Validation accuracy (val_acc) does not change over the epochs

Value of val_acc does not change over the epochs.
Summary:
I'm using a pre-trained (ImageNet) VGG16 from Keras;
from keras.applications import VGG16
conv_base = VGG16(weights='imagenet', include_top=True, input_shape=(224, 224, 3))
Database from ISBI 2016 (ISIC) - which is a set of 900 images of skin lesion used for binary classification (malignant or benign) for training and validation, plus 379 images for testing -;
I use the top dense layers of VGG16 except the last one (that classifies over 1000 classes), and use a binary output with sigmoid function activation;
conv_base.layers.pop() # Remove last one
conv_base.trainable = False
model = models.Sequential()
model.add(conv_base)
model.add(layers.Dense(1, activation='sigmoid'))
Unlock the dense layers setting them to trainable;
Fetch the data, which are in two different folders, one named "malignant" and the other "benign", within the "training data" folder;
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
folder = 'ISBI2016_ISIC_Part3_Training_Data'
batch_size = 20
full_datagen = ImageDataGenerator(
rescale=1./255,
#rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
validation_split = 0.2, # 20% validation
horizontal_flip=True)
train_generator = full_datagen.flow_from_directory( # Found 721 images belonging to 2 classes.
folder,
target_size=(224, 224),
batch_size=batch_size,
subset = 'training',
class_mode='binary')
validation_generator = full_datagen.flow_from_directory( # Found 179 images belonging to 2 classes.
folder,
target_size=(224, 224),
batch_size=batch_size,
subset = 'validation',
shuffle=False,
class_mode='binary')
model.compile(loss='binary_crossentropy',
optimizer=optimizers.SGD(lr=0.001), # High learning rate
metrics=['accuracy'])
history = model.fit_generator(
train_generator,
steps_per_epoch=721 // batch_size+1,
epochs=20,
validation_data=validation_generator,
validation_steps=180 // batch_size+1,
)
Then I fine-tune it with 100 more epochs and lower learning rate, setting the last convolutional layer to trainable.
I've tried many things such as:
Changing the optimizer (RMSprop, Adam and SGD);
Removing the top dense layers of the pre-trained VGG16 and adding mine;
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
Shuffle=True in validation_generator;
Changing batch size;
Varying the learning rate (0.001, 0.0001, 2e-5).
The results are similar to the following:
Epoch 1/100
37/37 [==============================] - 33s 900ms/step - loss: 0.6394 - acc: 0.7857 - val_loss: 0.6343 - val_acc: 0.8101
Epoch 2/100
37/37 [==============================] - 30s 819ms/step - loss: 0.6342 - acc: 0.8107 - val_loss: 0.6342 - val_acc: 0.8101
Epoch 3/100
37/37 [==============================] - 30s 822ms/step - loss: 0.6324 - acc: 0.8188 - val_loss: 0.6341 - val_acc: 0.8101
Epoch 4/100
37/37 [==============================] - 31s 840ms/step - loss: 0.6346 - acc: 0.8080 - val_loss: 0.6341 - val_acc: 0.8101
Epoch 5/100
37/37 [==============================] - 31s 833ms/step - loss: 0.6395 - acc: 0.7843 - val_loss: 0.6341 - val_acc: 0.8101
Epoch 6/100
37/37 [==============================] - 31s 829ms/step - loss: 0.6334 - acc: 0.8134 - val_loss: 0.6340 - val_acc: 0.8101
Epoch 7/100
37/37 [==============================] - 31s 834ms/step - loss: 0.6334 - acc: 0.8134 - val_loss: 0.6340 - val_acc: 0.8101
Epoch 8/100
37/37 [==============================] - 31s 829ms/step - loss: 0.6342 - acc: 0.8093 - val_loss: 0.6339 - val_acc: 0.8101
Epoch 9/100
37/37 [==============================] - 31s 849ms/step - loss: 0.6330 - acc: 0.8147 - val_loss: 0.6339 - val_acc: 0.8101
Epoch 10/100
37/37 [==============================] - 30s 812ms/step - loss: 0.6332 - acc: 0.8134 - val_loss: 0.6338 - val_acc: 0.8101
Epoch 11/100
37/37 [==============================] - 31s 839ms/step - loss: 0.6338 - acc: 0.8107 - val_loss: 0.6338 - val_acc: 0.8101
Epoch 12/100
37/37 [==============================] - 30s 807ms/step - loss: 0.6334 - acc: 0.8120 - val_loss: 0.6337 - val_acc: 0.8101
Epoch 13/100
37/37 [==============================] - 32s 852ms/step - loss: 0.6334 - acc: 0.8120 - val_loss: 0.6337 - val_acc: 0.8101
Epoch 14/100
37/37 [==============================] - 31s 826ms/step - loss: 0.6330 - acc: 0.8134 - val_loss: 0.6336 - val_acc: 0.8101
Epoch 15/100
37/37 [==============================] - 32s 854ms/step - loss: 0.6335 - acc: 0.8107 - val_loss: 0.6336 - val_acc: 0.8101
And goes on the same way, with constant val_acc = 0.8101.
When I use the test set after finishing training, the confusion matrix gives me 100% correct on benign lesions (304) and 0% on malignant, as so:
Confusion Matrix
[[304 0]
[ 75 0]]
What could I be doing wrong?
Thank you.
VGG16 was trained on RGB centered data. Your ImageDataGenerator does not enable featurewise_center, however, so you're feeding your net with raw RGB data. The VGG convolutional base can't process this to provide any meaningful information, so your net ends up universally guessing the more common class.
In general, when you see this type of problem (your net exclusively guessing the most common class), it means that there's something wrong with your data, not with the net. It can be caused by a preprocessing step like this or by a significant portion of "poisoned" anomalous training data that actively harms the training process.

Keras LSTM Model not learning

I wrote this code a few days ago and I had a few bugs but with some help, I was able to fix them. The Model is not learning. I tried different batch sizes, different amount of epochs, different activation functions, checked my data a few times for flaws I wasn't able to find any. It is due in a week or so for a school project. Any help will be very much valued.
Here is the code.
from keras.layers import Dense, Input, Concatenate, Dropout
from sklearn.preprocessing import MinMaxScaler
from keras.models import Model
from keras.layers import LSTM
import tensorflow as tf
import NetworkRequest as NR
import ParseNetworkRequest as PNR
import numpy as np
def buildModel():
_Price = Input(shape=(1, 1))
_Volume = Input(shape=(1, 1))
PriceLayer = LSTM(128)(_Price)
VolumeLayer = LSTM(128)(_Volume)
merged = Concatenate(axis=1)([PriceLayer, VolumeLayer])
Dropout(0.2)
dense1 = Dense(128, input_dim=2, activation='relu', use_bias=True)(merged)
Dropout(0.2)
dense2 = Dense(64, input_dim=2, activation='relu', use_bias=True)(dense1)
Dropout(0.2)
output = Dense(1, activation='softmax', use_bias=True)(dense2)
opt = tf.keras.optimizers.Adam(learning_rate=1e-3, decay=1e-6)
_Model = Model(inputs=[_Price, _Volume], output=output)
_Model.compile(optimizer=opt, loss='mse', metrics=['accuracy'])
return _Model
if __name__ == '__main__':
api_key = "47BGPYJPFN4CEC20"
stock = "DJI"
Index = ['4. close', '5. volume']
RawData = NR.Initial_Network_Request(api_key, stock)
Closing = PNR.Parse_Network_Request(RawData, Index[0])
Volume = PNR.Parse_Network_Request(RawData, Index[1])
Length = len(Closing)
scalar = MinMaxScaler(feature_range=(0, 1))
Closing_scaled = scalar.fit_transform(np.reshape(Closing[:-1], (-1, 1)))
Volume_scaled = scalar.fit_transform(np.reshape(Volume[:-1], (-1, 1)))
Labels_scaled = scalar.fit_transform(np.reshape(Closing[1:], (-1, 1)))
Train_Closing = Closing_scaled[:int(0.9 * Length)]
Train_Closing = np.reshape(Train_Closing, (Train_Closing.shape[0], 1, 1))
Train_Volume = Volume_scaled[:int(0.9 * Length)]
Train_Volume = np.reshape(Train_Volume, (Train_Volume.shape[0], 1, 1))
Train_Labels = Labels_scaled[:int((0.9 * Length))]
Train_Labels = np.reshape(Train_Labels, (Train_Labels.shape[0], 1))
# -------------------------------------------------------------------------------------------#
Test_Closing = Closing_scaled[int(0.9 * Length):(Length - 1)]
Test_Closing = np.reshape(Test_Closing, (Test_Closing.shape[0], 1, 1))
Test_Volume = Volume_scaled[int(0.9 * Length):(Length - 1)]
Test_Volume = np.reshape(Test_Volume, (Test_Volume.shape[0], 1, 1))
Test_Labels = Labels_scaled[int(0.9 * Length):(Length - 1)]
Test_Labels = np.reshape(Test_Labels, (Test_Labels.shape[0], 1))
Predict_Closing = Closing_scaled[-1]
Predict_Closing = np.reshape(Predict_Closing, (Predict_Closing.shape[0], 1, 1))
Predict_Volume = Volume_scaled[-1]
Predict_Volume = np.reshape(Predict_Volume, (Predict_Volume.shape[0], 1, 1))
Predict_Label = Labels_scaled[-1]
Predict_Label = np.reshape(Predict_Label, (Predict_Label.shape[0], 1))
model = buildModel()
model.fit(
[
Train_Closing,
Train_Volume
],
[
Train_Labels
],
validation_data=(
[
Test_Closing,
Test_Volume
],
[
Test_Labels
]
),
epochs=10,
batch_size=Length
)
This is the output when I run it.
Using TensorFlow backend.
2020-01-01 16:31:47.905012: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2199985000 Hz
2020-01-01 16:31:47.906105: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x49214f0 executing computations on platform Host. Devices:
2020-01-01 16:31:47.906137: I tensorflow/compiler/xla/service/service.cc:175] StreamExecutor device (0): Host, Default Version
/home/martin/PycharmProjects/MarketPredictor/Model.py:26: UserWarning: Update your `Model` call to the Keras 2 API: `Model(inputs=[<tf.Tenso..., outputs=Tensor("de...)`
_Model = Model(inputs=[_Price, _Volume], output=output)
Train on 4527 samples, validate on 503 samples
Epoch 1/10
4527/4527 [==============================] - 1s 179us/step - loss: 0.4716 - accuracy: 2.2090e-04 - val_loss: 0.6772 - val_accuracy: 0.0000e+00
Epoch 2/10
4527/4527 [==============================] - 0s 41us/step - loss: 0.4716 - accuracy: 2.2090e-04 - val_loss: 0.6772 - val_accuracy: 0.0000e+00
Epoch 3/10
4527/4527 [==============================] - 0s 42us/step - loss: 0.4716 - accuracy: 2.2090e-04 - val_loss: 0.6772 - val_accuracy: 0.0000e+00
Epoch 4/10
4527/4527 [==============================] - 0s 42us/step - loss: 0.4716 - accuracy: 2.2090e-04 - val_loss: 0.6772 - val_accuracy: 0.0000e+00
Epoch 5/10
4527/4527 [==============================] - 0s 43us/step - loss: 0.4716 - accuracy: 2.2090e-04 - val_loss: 0.6772 - val_accuracy: 0.0000e+00
Epoch 6/10
4527/4527 [==============================] - 0s 39us/step - loss: 0.4716 - accuracy: 2.2090e-04 - val_loss: 0.6772 - val_accuracy: 0.0000e+00
Epoch 7/10
4527/4527 [==============================] - 0s 42us/step - loss: 0.4716 - accuracy: 2.2090e-04 - val_loss: 0.6772 - val_accuracy: 0.0000e+00
Epoch 8/10
4527/4527 [==============================] - 0s 39us/step - loss: 0.4716 - accuracy: 2.2090e-04 - val_loss: 0.6772 - val_accuracy: 0.0000e+00
Epoch 9/10
4527/4527 [==============================] - 0s 42us/step - loss: 0.4716 - accuracy: 2.2090e-04 - val_loss: 0.6772 - val_accuracy: 0.0000e+00
Epoch 10/10
4527/4527 [==============================] - 0s 38us/step - loss: 0.4716 - accuracy: 2.2090e-04 - val_loss: 0.6772 - val_accuracy: 0.0000e+00
Process finished with exit code 0
The loss is high, and the accuracy is 0.
Please help.
You're using activation functions and metrics made for a classification task, not a stock forecasting task (with a continuous target).
For continuous targets, your final activation layer should be linear. Metrics should be mse or mae, not accuracy.
accuracy would only be satisfied is the dji prediction is exactly equal to the actual price. Since dji has at least 7 digits, it's nearly impossible.
Here's my suggestion:
Use a simpler network: Not sure how big is your dataset, but sometimes using dense. layer isn't helpful. Looks like the weights of there intermediate layers are not changing at all. Try the model with just one dense layer.
Reduce dropout: Try with using one dropout layer with Dropout(0.1).
Adam defaults: Start with using adam optimizer with its default parameters.
Metric selection: As mentioned by Nicolas's answer, use a regression metric instead of accuracy.

Keras train and validation metric values are different even when using same data (Logistic regression)

I have been trying to better understand the train/validation sequence in the keras model fit() loop. So I tried out a simple training loop where I attempted to fit a simple logistic regression model with input data consisting of a single feature.
I feed the same data for both training and validation. Under those conditions, and by specifying batch size to be the same and total data size, one would expect to obtain exactly the same loss and accuracy. But this is not the case.
Here is my code:
Generate some two random data with two classes:
N = 100
x = np.concatenate([np.random.randn(N//2, 1), np.random.randn(N//2, 1)+2])
y = np.concatenate([np.zeros(N//2), np.ones(N//2)])
And plotting the two class data distribution (one feature x):
data = pd.DataFrame({'x': x.ravel(), 'y': y})
sns.violinplot(x='x', y='y', inner='point', data=data, orient='h')
pyplot.tight_layout(0)
pyplot.show()
Build and fit the keras model:
model = tf.keras.Sequential([tf.keras.layers.Dense(1, activation='sigmoid', input_dim=1)])
model.compile(optimizer=tf.keras.optimizers.SGD(2), loss='binary_crossentropy', metrics=['accuracy'])
model.fit(x, y, epochs=10, validation_data=(x, y), batch_size=N)
Notice that I have specified the data x and targets y for both training and for validation_data. Also, the batch_size is same as total size batch_size=N.
The training results are:
100/100 [==============================] - 1s 5ms/step - loss: 1.4500 - acc: 0.2300 - val_loss: 0.5439 - val_acc: 0.7200
Epoch 2/10
100/100 [==============================] - 0s 18us/step - loss: 0.5439 - acc: 0.7200 - val_loss: 0.4408 - val_acc: 0.8000
Epoch 3/10
100/100 [==============================] - 0s 16us/step - loss: 0.4408 - acc: 0.8000 - val_loss: 0.3922 - val_acc: 0.8300
Epoch 4/10
100/100 [==============================] - 0s 16us/step - loss: 0.3922 - acc: 0.8300 - val_loss: 0.3659 - val_acc: 0.8400
Epoch 5/10
100/100 [==============================] - 0s 17us/step - loss: 0.3659 - acc: 0.8400 - val_loss: 0.3483 - val_acc: 0.8500
Epoch 6/10
100/100 [==============================] - 0s 16us/step - loss: 0.3483 - acc: 0.8500 - val_loss: 0.3356 - val_acc: 0.8600
Epoch 7/10
100/100 [==============================] - 0s 17us/step - loss: 0.3356 - acc: 0.8600 - val_loss: 0.3260 - val_acc: 0.8600
Epoch 8/10
100/100 [==============================] - 0s 18us/step - loss: 0.3260 - acc: 0.8600 - val_loss: 0.3186 - val_acc: 0.8600
Epoch 9/10
100/100 [==============================] - 0s 18us/step - loss: 0.3186 - acc: 0.8600 - val_loss: 0.3127 - val_acc: 0.8700
Epoch 10/10
100/100 [==============================] - 0s 23us/step - loss: 0.3127 - acc: 0.8700 - val_loss: 0.3079 - val_acc: 0.8800
The results show that val_loss and loss are not the same at the end of each epoch, and also acc and val_acc are not exactly the same. However, based on this setup, one would expect them to be the same.
I have been going through the code in keras, particularly this part:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/engine/training.py#L1364
and so far, all I can say that the difference is due to some different computation through the computation graph.
Does anyone has any idea why there would be such difference?
So after looking more closely at the results, the loss and acc values from the training step are computed BEFORE the current batch is used to update the model.
Thus, in the case of a single batch per epoch, the train acc and loss are evaluated when the batch is fed in, then the model parameters are updated based on the provided optimizer. After the train step is finished, we compute loss and accuracy by feeding in the validation data, which is now evaluated using a new updated model.
This is evident from the training results output, where validation accuracy and loss are in epoch 1 are equal to train accuracy and loss in epoch 2, etc...
A quick check using tensorflow confirmed that values are fetched before variables are updated:
import tensorflow as tf
import numpy as np
np.random.seed(1)
x = tf.placeholder(dtype=tf.float32, shape=(None, 1), name="x")
y = tf.placeholder(dtype=tf.float32, shape=(None), name="y")
W = tf.get_variable(name="W", shape=(1, 1), dtype=tf.float32, initializer=tf.constant_initializer(0))
b = tf.get_variable(name="b", shape=1, dtype=tf.float32, initializer=tf.constant_initializer(0))
z = tf.matmul(x, W) + b
error = tf.square(z - y)
obj = tf.reduce_mean(error, name="obj")
opt = tf.train.MomentumOptimizer(learning_rate=0.025, momentum=0.9)
grads = opt.compute_gradients(obj)
train_step = opt.apply_gradients(grads)
N = 100
x_np = np.random.randn(N).reshape(-1, 1)
y_np = 2*x_np + 3 + np.random.randn(N)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(2):
res = sess.run([obj, W, b, train_step], feed_dict={x: x_np, y: y_np})
print('MSE: {}, W: {}, b: {}'.format(res[0], res[1][0, 0], res[2][0]))
Output:
MSE: 14.721437454223633, W: 0.0, b: 0.0
MSE: 13.372591018676758, W: 0.08826743811368942, b: 0.1636980175971985
Since the parameters W and b were initialized to 0, then it is clear that the fetched values is still 0 even though session was run with gradient update request...

Categories

Resources