Getting really low Accuracy on LeNet CNN on MNIST - python

I've been looking at other tutorials and they're able to get up to 90% accuracy after just 10 epochs. So I'm guessing there's something wrong in my implementation because my Accuracy is really low, it's less than 1% after 10 epochs and barely increasing. I'm using the MNIST dataset and any help would be greatly appreciated
from PIL import Image
from tensorflow.keras import datasets, layers, models
from keras.layers import Dense, Dropout, Flatten
import matplotlib.pyplot as plt
import numpy as np
import keras
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train/255
X_test = X_test/255
X_train_processed = np.reshape(X_train,[X_train.shape[0],X_train.shape[1],X_train.shape[2],1])
X_test_processed = np.reshape(X_test,[X_test.shape[0],X_test.shape[1],X_test.shape[2],1])
X_train_processed = np.pad(X_train_processed, ((0,0),(2,2),(2,2),(0,0)), 'constant')
X_test_processed = np.pad(X_test_processed, ((0,0),(2,2),(2,2),(0,0)), 'constant')
X_train_processed = tf.image.resize(
images = X_train_processed,
size = np.array([32,32])
)
X_test_processed = tf.image.resize(
images = X_test_processed,
size = np.array([32,32])
)
Y_train_processed = tf.one_hot(y_train,10)
Y_test_processed = tf.one_hot(y_test,10)
Lnet = tf.keras.Sequential()
#First Layer
Lnet.add(
tf.keras.layers.Conv2D(
filters = 6,
kernel_size = (5,5),
strides = (1,1),
padding = 'valid',
activation = 'relu',
#kernel_initializer = keras.initializers.glorot_normal(seed=0)
)
)
Lnet.add(
tf.keras.layers.AveragePooling2D(
pool_size = (2,2),
strides = (2,2),
padding = 'valid'
)
)
#Second Layer
Lnet.add(
tf.keras.layers.Conv2D(
filters = 16,
kernel_size = (5,5),
strides = (1,1),
padding = 'valid',
activation = 'relu'#,
#kernel_initializer = keras.initializers.glorot_normal(seed=0)
)
)
Lnet.add(
tf.keras.layers.AveragePooling2D(
pool_size = (2,2),
strides = (2,2),
padding = 'valid'
)
)
Lnet.add(tf.keras.layers.Flatten())
Lnet.add(
tf.keras.layers.Dense(
units = 120,
activation = 'relu'
)
)
Lnet.add(tf.keras.layers.Flatten())
Lnet.add(
tf.keras.layers.Dense(
units = 84,
activation = 'relu'
)
)
Lnet.add(
tf.keras.layers.Dense(
units = 10,
activation = 'softmax'
)
)
Lnet.compile(
loss = keras.losses.categorical_crossentropy,
optimizer = 'Adam',
metrics = ['Accuracy']
)
Lnet.fit(
x = X_train_processed,
y = Y_train_processed,
batch_size = 128,
epochs = 10,
)
score = Lnet.evaluate(
x = X_test_processed,
y = Y_test_processed
)
print(score[1])
Ouptut:
Epoch 1/10
469/469 [==============================] - 8s 18ms/step - loss: 0.3533 - accuracy: 0.0000e+00
Epoch 2/10
469/469 [==============================] - 8s 18ms/step - loss: 0.1013 - accuracy: 5.1667e-05
Epoch 3/10
469/469 [==============================] - 8s 18ms/step - loss: 0.0730 - accuracy: 2.3167e-04
Epoch 4/10
469/469 [==============================] - 10s 21ms/step - loss: 0.0582 - accuracy: 4.8833e-04
Epoch 5/10
469/469 [==============================] - 9s 19ms/step - loss: 0.0478 - accuracy: 9.3333e-04
Epoch 6/10
469/469 [==============================] - 11s 23ms/step - loss: 0.0405 - accuracy: 0.0019
Epoch 7/10
469/469 [==============================] - 12s 25ms/step - loss: 0.0371 - accuracy: 0.0026
Epoch 8/10
469/469 [==============================] - 11s 23ms/step - loss: 0.0301 - accuracy: 0.0057
Epoch 9/10
469/469 [==============================] - 12s 25ms/step - loss: 0.0280 - accuracy: 0.0065
Epoch 10/10
469/469 [==============================] - 11s 24ms/step - loss: 0.0260 - accuracy: 0.0085
313/313 [==============================] - 1s 3ms/step - loss: 0.0323 - accuracy: 0.0080
0.008030000142753124

I changed couple of imports and metric from Accuracy to 'accuracy`. Please check the changes below. With these small modification, accuracy is coming out as 98.7% (see below). One suggestion is don't mix functions from two different packages (Keras and Tensorflow).
from PIL import Image
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.layers import Dense, Dropout, Flatten
import matplotlib.pyplot as plt
import numpy as np
import keras
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train/255
X_test = X_test/255
X_train_processed = np.reshape(X_train,[X_train.shape[0],X_train.shape[1],X_train.shape[2],1])
X_test_processed = np.reshape(X_test,[X_test.shape[0],X_test.shape[1],X_test.shape[2],1])
X_train_processed = np.pad(X_train_processed, ((0,0),(2,2),(2,2),(0,0)), 'constant')
X_test_processed = np.pad(X_test_processed, ((0,0),(2,2),(2,2),(0,0)), 'constant')
X_train_processed = tf.image.resize(
images = X_train_processed,
size = np.array([32,32])
)
X_test_processed = tf.image.resize(
images = X_test_processed,
size = np.array([32,32])
)
Y_train_processed = tf.one_hot(y_train,10)
Y_test_processed = tf.one_hot(y_test,10)
Lnet = tf.keras.Sequential()
#First Layer
Lnet.add(
tf.keras.layers.Conv2D(
filters = 6,
kernel_size = (5,5),
strides = (1,1),
padding = 'valid',
activation = 'relu',
#kernel_initializer = keras.initializers.glorot_normal(seed=0)
)
)
Lnet.add(
tf.keras.layers.AveragePooling2D(
pool_size = (2,2),
strides = (2,2),
padding = 'valid'
)
)
#Second Layer
Lnet.add(
tf.keras.layers.Conv2D(
filters = 16,
kernel_size = (5,5),
strides = (1,1),
padding = 'valid',
activation = 'relu'#,
#kernel_initializer = keras.initializers.glorot_normal(seed=0)
)
)
Lnet.add(
tf.keras.layers.AveragePooling2D(
pool_size = (2,2),
strides = (2,2),
padding = 'valid'
)
)
Lnet.add(tf.keras.layers.Flatten())
Lnet.add(
tf.keras.layers.Dense(
units = 120,
activation = 'relu'
)
)
Lnet.add(tf.keras.layers.Flatten())
Lnet.add(
tf.keras.layers.Dense(
units = 84,
activation = 'relu'
)
)
Lnet.add(
tf.keras.layers.Dense(
units = 10,
activation = 'softmax'
)
)
Lnet.compile(
loss = tf.keras.losses.categorical_crossentropy,
optimizer = 'Adam',
metrics = ['accuracy']
)
Lnet.fit(
x = X_train_processed,
y = Y_train_processed,
batch_size = 128,
epochs = 10,
)
score = Lnet.evaluate(
x = X_test_processed,
y = Y_test_processed
)
print(score[1])
Epoch 9/10
469/469 [==============================] - 30s 64ms/step - loss: 0.0260 - accuracy: 0.9916
Epoch 10/10
469/469 [==============================] - 30s 64ms/step - loss: 0.0239 - accuracy: 0.9922
313/313 [==============================] - 3s 10ms/step - loss: 0.0394 - accuracy: 0.9876
0.9876000285148621
Complete code is here.

Related

Autoencoder is being trained through only one iteration per epoch

I am training an encoder with a custom dataset in google colab directory (train folder/test folder) using the code below (based on 1):
my_data_train_dir = train_path
labels_train = os.listdir(my_data_train_dir)
data_train = tf.keras.utils.image_dataset_from_directory(train_path, batch_size=1, image_size=(224, 224))
data_train_iterator = data_train.as_numpy_iterator()
batch_train = data_train_iterator.next()
my_data_test_dir = test_path
labels_test = os.listdir(my_data_test_dir)
data_test = tf.keras.utils.image_dataset_from_directory(test_path, batch_size=1, image_size=(224, 224))
data_test_iterator = data_test.as_numpy_iterator()
batch_test = data_test_iterator.next()
# Found 10903 files belonging to 67 classes.
# Found 1619 files belonging to 67 classes
encoder = keras.models.Sequential([
keras.layers.Flatten(input_shape=[224, 224, 3]),
keras.layers.Dense(400, activation="relu"),
keras.layers.Dense(200, activation="relu"),
keras.layers.Dense(100, activation="relu"),
keras.layers.Dense(50, activation="relu"),
keras.layers.Dense(25, activation="relu"),
])
decoder = keras.models.Sequential([
keras.layers.Dense(50, activation="relu", input_shape=[25]),
keras.layers.Dense(100, activation="relu"),
keras.layers.Dense(200, activation="relu"),
keras.layers.Dense(400, activation="relu"),
keras.layers.Dense(224 * 224 * 3, activation="sigmoid"),
keras.layers.Reshape([224, 224, 3])
])
stacked_autoencoder = keras.models.Sequential([encoder, decoder])
stacked_autoencoder.compile(loss="binary_crossentropy",
optimizer='adam', metrics=['accuracy'])
x_train = batch_train[0] / 255
x_test = batch_test[0] / 255
history = stacked_autoencoder.fit(x_train, x_train, epochs=10,
validation_data=[x_test, x_test])
Epoch 1/10
1/1 [==============================] - 0s 77ms/step - loss: 0.5549 - accuracy: 0.9994 - val_loss: 0.7653 - val_accuracy: 0.5443
Epoch 2/10
1/1 [==============================] - 0s 51ms/step - loss: 0.5549 - accuracy: 0.9992 - val_loss: 0.7669 - val_accuracy: 0.5444
Epoch 3/10
1/1 [==============================] - 0s 51ms/step - loss: 0.5549 - accuracy: 0.9994 - val_loss: 0.7646 - val_accuracy: 0.5443
As you can see, the training at each epoch is done through 1 iteration only knowing that we have 10903 training images in total (10903/1 = 10903 iterations). How can I solve this problem ?
Thank in advance !

Tensorflow ValueError: Shapes (64, 1) and (1, 1) are incompatible

I'm trying to build a Siamese Neural Network to analyze the MNIST dataset, however when trying to fit the model to the dataset I encounter this problem according to which I have training data and labels shapes' mismatch. I tried changing the loss function as well as tried to squeeze the labels array, and neither of "solutions" worked.
Here are the train and labels arrays' shapes:
pairTrain shape: (120000, 2, 28, 28, 1)
labelTrain shape: (120000, 1)
Here's my model:
def build_model(input_shape, embedDim=48):
inputs = Input(input_shape)
x = Conv2D(64, (2, 2), padding="same", activation="relu", input_shape=input_shape)(inputs)
x = MaxPooling2D()(x)
x = Dropout(0.3)(x)
x = Conv2D(32, (2, 2), padding="same", activation="relu")(x)
x = MaxPooling2D()(x)
x = Dropout(0.3)(x)
x = Conv2D(16, (2, 2), padding="same", activation="relu")(x)
x = MaxPooling2D()(x)
x = Dropout(0.3)(x)
outputs = Flatten()(x)
outputs = Dense(embedDim)(outputs)
model = Model(inputs, outputs)
return model
And finally here's the code that generates the error itself:
imgA = Input(shape=(28, 28, 1))
imgB = Input(shape=(28, 28, 1))
featA = build_model((28, 28, 1))(imgA)
featB = build_model((28, 28, 1))(imgB)
distance = Lambda(euclidean_distance)([featA, featB])
output = Dense(1, activation="sigmoid")(distance)
model = Model([imgA, imgB], output)
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
history = model.fit(
[pairTrain[:, 0], pairTrain[:, 1]], labelTrain,
validation_data=[[pairTest[:, 0], pairTest[:, 1]], labelTest],
batch_size=64,
epochs=10
)
model.save("output/siamese_model")
Please help me to resolve the problem.
I was not able to reproduce the error using the below code. I suspect that your labels shape is different than the one you reported or it does not contain strictly binary data (0s and 1s) only.
Also, you should use tf.keras.losses.BinaryCrossentropy instead of tf.keras.losses.CategoricalCrossentropy as your labels should be binary with the sigmoid activation in the last layer.
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Input, Flatten, Dense, Lambda
from tensorflow.keras.models import Model
import tensorflow as tf
def build_model(input_shape, embedDim=48):
inputs = Input(input_shape)
x = Conv2D(64, (2, 2), padding="same", activation="relu", input_shape=input_shape)(inputs)
x = MaxPooling2D()(x)
x = Dropout(0.3)(x)
x = Conv2D(32, (2, 2), padding="same", activation="relu")(x)
x = MaxPooling2D()(x)
x = Dropout(0.3)(x)
x = Conv2D(16, (2, 2), padding="same", activation="relu")(x)
x = MaxPooling2D()(x)
x = Dropout(0.3)(x)
outputs = Flatten()(x)
outputs = Dense(embedDim)(outputs)
model = Model(inputs, outputs)
return model
imgA = Input(shape=(28, 28, 1))
imgB = Input(shape=(28, 28, 1))
featA = build_model((28, 28, 1))(imgA)
featB = build_model((28, 28, 1))(imgB)
distance = Lambda(lambda x: x[0]-x[1])([featA, featB])
output = Dense(1, activation="sigmoid")(distance)
model = Model([imgA, imgB], output)
pairTrain = tf.random.uniform((10, 2, 28, 28, 1))
labelTrain = tf.random.uniform(shape=(10, 1), minval=0, maxval=2, dtype=tf.int32)
pairTest = tf.random.uniform((10, 2, 28, 28, 1))
labelTest = tf.random.uniform(shape=(10, 1), minval=0, maxval=2, dtype=tf.int32)
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
history = model.fit(
[pairTrain[:, 0], pairTrain[:, 1]], labelTrain,
validation_data=[[pairTest[:, 0], pairTest[:, 1]], labelTest],
batch_size=64,
epochs=10
)
model.save("output/siamese_model")
Epoch 1/10
1/1 [==============================] - 2s 2s/step - loss: 0.7061 - accuracy: 0.5000 - val_loss: 0.6862 - val_accuracy: 0.7000
Epoch 2/10
1/1 [==============================] - 0s 80ms/step - loss: 0.7882 - accuracy: 0.4000 - val_loss: 0.6751 - val_accuracy: 0.6000
Epoch 3/10
1/1 [==============================] - 0s 81ms/step - loss: 0.6358 - accuracy: 0.5000 - val_loss: 0.6755 - val_accuracy: 0.6000
Epoch 4/10
1/1 [==============================] - 0s 79ms/step - loss: 0.7027 - accuracy: 0.5000 - val_loss: 0.6759 - val_accuracy: 0.6000
Epoch 5/10
1/1 [==============================] - 0s 82ms/step - loss: 0.6970 - accuracy: 0.4000 - val_loss: 0.6752 - val_accuracy: 0.6000
Epoch 6/10
1/1 [==============================] - 0s 83ms/step - loss: 0.7564 - accuracy: 0.4000 - val_loss: 0.6779 - val_accuracy: 0.6000
Epoch 7/10
1/1 [==============================] - 0s 73ms/step - loss: 0.7123 - accuracy: 0.6000 - val_loss: 0.6818 - val_accuracy: 0.6000

training my model generates unreasonable loss as well as unreasonable val_loss amount | Loss : nan and Val_Loss : nan

Good morning everyone, I want to ask in this group, I have built a mode from my learning results but I'm having problems in the training section, when I try to train my model it produces los with nonsensical values and val_los which doesn't make sense but on acuracy produces a normal value. this happened when I added a class, in the first experiment with two classes, I managed to do it with 95% accuracy and 93% validation accuracy but after I added 2 classes I received results that didn't make sense, as for some solutions I tried but didn't help me, I've tried to use softmax and categorical_crosscetropy but that doesn't work. when i try to add num_class on dense screen part i get "ValueError: logits and labels must have the same shape ((None, 3) vs (None, 1))". Hope someone can help me . thank you. Here I attach my code
Load Datashet
training_dir = r"Dataset/train/"
validation_dir = r"Dataset/val/"
testing_dir = r"Dataset/test/"
categories = ['class_A', 'class_B', 'class_C', 'class_D']
Created Data Training
img_size = (128,128)
training_data = []
validation_data = []
testing_data = []
def create_training_data():
for category in categories:
path = os.path.join(training_dir,category)
class_num = categories.index(category)
for img in os.listdir(path):
try:
img_array = cv2.imread(os.path.join(path,img))
new_array = cv2.resize(img_array,img_size)
training_data.append([new_array,class_num])
except Exception as e:
pass
def create_validation_data():
for category in categories:
path = os.path.join(validation_dir,category)
class_num = categories.index(category)
for img in os.listdir(path):
try:
img_array = cv2.imread(os.path.join(path,img))
new_array = cv2.resize(img_array,img_size)
validation_data.append([new_array,class_num])
except Exception as e:
pass
def create_testing_data():
for category in categories:
path = os.path.join(testing_dir,category)
class_num = categories.index(category)
for img in os.listdir(path):
try:
img_array = cv2.imread(os.path.join(path,img))
new_array = cv2.resize(img_array,img_size)
testing_data.append([new_array,class_num])
except Exception as e:
pass
create_training_data()
Normalization of data to range 0-1 and labeling data
X_train = []
Y_train = []
for features,label in training_data:
X_train.append(features)
Y_train.append(label)
X_train = np.array(X_train).reshape(-1,128,128)
X_train = X_train.astype('float32')/255.0
X_train = X_train.reshape(-1,128,128,3)
print(X_train.shape)
X_val = []
Y_val = []
for features,label in validation_data:
X_val.append(features)
Y_val.append(label)
X_val = np.array(X_val).reshape(-1,128,128)
X_val = X_val.astype('float32')/255.0
X_val = X_val.reshape(-1,128,128,3)
print(X_val.shape)
X_test = []
Y_test = []
for features,label in testing_data:
X_test.append(features)
Y_test.append(label)
X_test = np.array(X_test).reshape(-1,128,128)
X_test = X_test.astype('float32')/255.0
X_test = X_test.reshape(-1,128,128,3)
print(X_test.shape)
Labeling Data using label encoder for Y_train/val/test
lb = LabelEncoder()
Y_train = lb.fit_transform(Y_train)
Y_val = lb.fit_transform(Y_val)
Y_test = lb.fit_transform(Y_test)
ImageDataGenerator
datagen = ImageDataGenerator(
rotation_range = 30,
zoom_range = 0.1,
width_shift_range=0.1,
height_shift_range=0.1,
horizontal_flip=True,
vertical_flip=False)
hyperparameter Tuning
HP_APL_DROPOUT = hp.HParam('dropout_apl', hp.RealInterval(0.05, 0.1))
HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([64, 128]))
HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.25, 0.5))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam', 'sgd', 'adamax']))
METRIC_ACCURACY = 'accuracy'
with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
hp.hparams_config(
hparams=[HP_APL_DROPOUT, HP_NUM_UNITS, HP_DROPOUT, HP_OPTIMIZER],
metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
)
Define Traini Model
def train_test_model(hparams):
model = Sequential([
Input(shape=(128,128,3)),
Conv2D(filters=32, kernel_size=3, strides=1, padding='same', activation='swish'),
BatchNormalization(),
MaxPooling2D(pool_size = (2,2)),
Conv2D(filters=64, kernel_size=3, strides=1, padding='same', activation='swish'),
BatchNormalization(),
Conv2D(filters=64, kernel_size=3, strides=1, padding='same', activation='swish'),
MaxPooling2D(pool_size = (2,2)),
Dropout(hparams[HP_APL_DROPOUT]),
Conv2D(filters=128, kernel_size=3, strides=1, padding='same', activation='swish'),
BatchNormalization(),
MaxPooling2D(pool_size = (2,2)),
Conv2D(filters=128, kernel_size=3, strides=1, padding='same', activation='swish'),
BatchNormalization(),
MaxPooling2D(pool_size = (2,2)),
Dropout(hparams[HP_APL_DROPOUT]),
Conv2D(filters=256, kernel_size=3, strides=1, padding='same', activation='swish'),
BatchNormalization(),
MaxPooling2D(pool_size = (2,2)),
Dropout(0.05),
Conv2D(filters=256, kernel_size=3, strides=1, padding='same', activation='swish'),
BatchNormalization(),
MaxPooling2D(pool_size = (2,2)),
Dropout(hparams[HP_APL_DROPOUT]),
GlobalMaxPool2D(),
Flatten(),
Dense(hparams[HP_NUM_UNITS], activation="swish"),
Dense(128, activation = 'swish'),
Dropout(hparams[HP_DROPOUT]),
Dense(1, activation='sigmoid')
]
)
model.compile(
optimizer=hparams[HP_OPTIMIZER],
loss='binary_crossentropy',
metrics=['accuracy'],
)
datagen.fit(X_train)
history = model.fit_generator(
datagen.flow(X_train,Y_train, batch_size=batch_size),
epochs = 5,
validation_data = datagen.flow(X_val,Y_val))
_, accuracy = model.evaluate(X_val, Y_val)
return accuracy
Running Model
def run(run_dir, hparams):
with tf.summary.create_file_writer(run_dir).as_default():
hp.hparams(hparams)
accuracy = train_test_model(hparams)
tf.summary.scalar(METRIC_ACCURACY, accuracy, step=50)
session_num = 0
for dropout_apl_rate in (HP_APL_DROPOUT.domain.min_value, HP_APL_DROPOUT.domain.max_value):
for num_units in HP_NUM_UNITS.domain.values:
for dropout_rate in (HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value):
for optimizer in HP_OPTIMIZER.domain.values:
hparams = {
HP_APL_DROPOUT: dropout_apl_rate,
HP_NUM_UNITS: num_units,
HP_DROPOUT: dropout_rate,
HP_OPTIMIZER: optimizer,
}
run_name = "run-%d" % session_num
print('--- Starting trial: %s' % run_name)
print({h.name: hparams[h] for h in hparams})
run('logs/hparam_tuning/' + run_name, hparams)
session_num += 1
Error Log
--- Starting trial: run-0
{'dropout_apl': 0.05, 'num_units': 64, 'dropout': 0.25, 'optimizer': 'adam'}
C:\anaconda3\envs\sub_base_one\lib\site-packages\keras\engine\training.py:1972: UserWarning: `Model.fit_generator` is deprecated and will be removed in a future version. Please use `Model.fit`, which supports generators.
warnings.warn('`Model.fit_generator` is deprecated and '
Epoch 1/5
478/478 [==============================] - 57s 111ms/step - loss: -1034990.9375 - accuracy: 0.3468 - val_loss: -3408716.0000 - val_accuracy: 0.2863
Epoch 2/5
478/478 [==============================] - 52s 109ms/step - loss: -27187870.0000 - accuracy: 0.3779 - val_loss: -70205120.0000 - val_accuracy: 0.3440
Epoch 3/5
478/478 [==============================] - 53s 111ms/step - loss: -159927984.0000 - accuracy: 0.3481 - val_loss: -305764640.0000 - val_accuracy: 0.4137
Epoch 4/5
478/478 [==============================] - 55s 114ms/step - loss: -513765920.0000 - accuracy: 0.3257 - val_loss: -822113920.0000 - val_accuracy: 0.2790
Epoch 5/5
478/478 [==============================] - 54s 113ms/step - loss: -1208896128.0000 - accuracy: 0.3155 - val_loss: -1744491776.0000 - val_accuracy: 0.3015
60/60 [==============================] - 1s 17ms/step - loss: -1766485504.0000 - accuracy: 0.3639
--- Starting trial: run-1
{'dropout_apl': 0.05, 'num_units': 64, 'dropout': 0.25, 'optimizer': 'adamax'}
Epoch 1/5
478/478 [==============================] - 56s 115ms/step - loss: -34721.3828 - accuracy: 0.3015 - val_loss: -56985.7461 - val_accuracy: 0.2465
Epoch 2/5
478/478 [==============================] - 55s 115ms/step - loss: -642847.1875 - accuracy: 0.3482 - val_loss: -1573540.5000 - val_accuracy: 0.4442
Epoch 3/5
478/478 [==============================] - 55s 114ms/step - loss: -3433380.0000 - accuracy: 0.4208 - val_loss: -6417029.0000 - val_accuracy: 0.4373
Epoch 4/5
478/478 [==============================] - 55s 115ms/step - loss: -10932957.0000 - accuracy: 0.3973 - val_loss: -16847372.0000 - val_accuracy: 0.3382
Epoch 5/5
478/478 [==============================] - 56s 117ms/step - loss: -26557560.0000 - accuracy: 0.3720 - val_loss: -38307184.0000 - val_accuracy: 0.4483
60/60 [==============================] - 1s 17ms/step - loss: -39560612.0000 - accuracy: 0.4704
--- Starting trial: run-2
{'dropout_apl': 0.05, 'num_units': 64, 'dropout': 0.25, 'optimizer': 'sgd'}
Epoch 1/5
478/478 [==============================] - 56s 114ms/step - loss: nan - accuracy: 0.2530 - val_loss: nan - val_accuracy: 0.2533
Epoch 2/5
478/478 [==============================] - 54s 113ms/step - loss: nan - accuracy: 0.2532 - val_loss: nan - val_accuracy: 0.2533
Epoch 3/5
148/478 [========>.....................] - ETA: 33s - loss: nan - accuracy: 0.2544
I hope someone can help solve the problem and tell me the cause and solution . Thank you for helping

WARNING:tensorflow:Model was constructed with shape (20, 37, 42) for input Tensor("input_5:0", shape=(20, 37, 42), dtype=float32), but

WARNING:tensorflow:Model was constructed with shape (20, 37, 42) for input Tensor("input_5:0", shape=(20, 37, 42), dtype=float32), but it was called on an input with incompatible shape (None, 37).
Hello! Deep learning noob here... I'm having trouble using LSTM layers.
The input is a length 37 float array containing 2 floats and a length 35 one-hot array converted into float. The output is a length 19 array with 0s and 1s. Like the title suggests, I'm having trouble reshaping my input data to fit the model, and I'm not even sure what input dimensions would be considered 'compatible'
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import random
inputs, outputs = [], []
for x in range(10000):
tempi, tempo = [], []
tempi.append(random.random() - 0.5)
tempi.append(random.random() - 0.5)
for x2 in range(35):
if random.random() > 0.5:
tempi.append(1.)
else:
tempi.append(0.)
for x2 in range(19):
if random.random() > 0.5:
tempo.append(1.)
else:
tempo.append(0.)
inputs.append(tempi)
outputs.append(tempo)
batch = 20
timesteps = 42
training_units = 0.85
cutting_point_i = int(len(inputs)*training_units)
cutting_point_o = int(len(outputs)*training_units)
x_train, x_test = np.asarray(inputs[:cutting_point_i]), np.asarray(inputs[cutting_point_i:])
y_train, y_test = np.asarray(outputs[:cutting_point_o]), np.asarray(outputs[cutting_point_o:])
input_layer = keras.Input(shape=(37,timesteps),batch_size=batch)
dense = layers.LSTM(150, activation="sigmoid", return_sequences=True)
x = dense(input_layer)
hidden_layer_2 = layers.LSTM(150, activation="sigmoid", return_sequences=True)(x)
output_layer = layers.Dense(10, activation="softmax")(hidden_layer_2)
model = keras.Model(inputs=input_layer, outputs=output_layer, name="my_model"
Several problems here.
Your input didn't have time steps, you need input shape (n, time steps, features)
In input_shape, the time steps dimension comes first, not last
Your last LSTM layer returned sequences, so you can't compare it with 0s and 1s
What I did:
I added time steps to your data (7)
I permuted the dimensions in input_shape
I set the final return_sequences=False
Completely fixed example with generated data:
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
batch = 20
n_samples = 1000
timesteps = 7
features = 10
x_train = np.random.rand(n_samples, timesteps, features)
y_train = keras.utils.to_categorical(np.random.randint(0, 10, n_samples))
input_layer = keras.Input(shape=(timesteps, features),batch_size=batch)
dense = layers.LSTM(16, activation="sigmoid", return_sequences=True)(input_layer)
hidden_layer_2 = layers.LSTM(16, activation="sigmoid", return_sequences=False)(dense)
output_layer = layers.Dense(10, activation="softmax")(hidden_layer_2)
model = keras.Model(inputs=input_layer, outputs=output_layer, name="my_model")
model.compile(loss='categorical_crossentropy', optimizer='adam')
history = model.fit(x_train, y_train)
Train on 1000 samples
20/1000 [..............................] - ETA: 2:50 - loss: 2.5145
200/1000 [=====>........................] - ETA: 14s - loss: 2.3934
380/1000 [==========>...................] - ETA: 5s - loss: 2.3647
560/1000 [===============>..............] - ETA: 2s - loss: 2.3549
740/1000 [=====================>........] - ETA: 1s - loss: 2.3395
900/1000 [==========================>...] - ETA: 0s - loss: 2.3363
1000/1000 [==============================] - 4s 4ms/sample - loss: 2.3353
The correct input for your model is (20, 37, 42).
Note: Here 20 is the batch_size you have explicitly specified.
Code:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
batch = 20
timesteps = 42
training_units = 0.85
x1 = tf.constant(np.random.randint(50, size =(1000,37, 42)), dtype = tf.float32)
y1 = tf.constant(np.random.randint(10, size =(1000,)), dtype = tf.int32)
input_layer = keras.Input(shape=(37,timesteps),batch_size=batch)
dense = layers.LSTM(150, activation="sigmoid", return_sequences=True)
x = dense(input_layer)
hidden_layer_2 = layers.LSTM(150, activation="sigmoid", return_sequences=True)(x)
hidden_layer_3 = layers.Flatten()(hidden_layer_2)
output_layer = layers.Dense(10, activation="softmax")(hidden_layer_3)
model = keras.Model(inputs=input_layer, outputs=output_layer, name="my_model")
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
tf.keras.utils.plot_model(model, 'my_first_model.png', show_shapes=True)
Model Architecture:
You can clearly see the Input Size.
Code to Run:
model.fit(x = x1, y = y1, batch_size = batch, epochs = 10)
Note: Whatever batch_size you have specified you have to specify the same batch_size in the model.fit() command.
Output:
Epoch 1/10
50/50 [==============================] - 4s 89ms/step - loss: 2.3288 - accuracy: 0.0920
Epoch 2/10
50/50 [==============================] - 5s 91ms/step - loss: 2.3154 - accuracy: 0.1050
Epoch 3/10
50/50 [==============================] - 5s 101ms/step - loss: 2.3114 - accuracy: 0.0900
Epoch 4/10
50/50 [==============================] - 5s 101ms/step - loss: 2.3036 - accuracy: 0.1060
Epoch 5/10
50/50 [==============================] - 5s 99ms/step - loss: 2.2998 - accuracy: 0.1000
Epoch 6/10
50/50 [==============================] - 4s 89ms/step - loss: 2.2986 - accuracy: 0.1170
Epoch 7/10
50/50 [==============================] - 4s 84ms/step - loss: 2.2981 - accuracy: 0.1300
Epoch 8/10
50/50 [==============================] - 5s 103ms/step - loss: 2.2950 - accuracy: 0.1290
Epoch 9/10
50/50 [==============================] - 5s 106ms/step - loss: 2.2960 - accuracy: 0.1210
Epoch 10/10
50/50 [==============================] - 5s 97ms/step - loss: 2.2874 - accuracy: 0.1210

Keras LSTM + TensorFlow and a number sequence (improve loss)

first of all, I'm running with the following setup:
Running on windows 10
Python 3.6.2
TensorFlow 1.8.0
Keras 2.1.6
I'm trying to predict, or at least guesstimate the following number sequence:
https://codepen.io/anon/pen/RJRPPx (limited to 20,000 for testing), the full sequence contains about one million records.
And here is the code (run.py)
import lstm
import time
import matplotlib.pyplot as plt
def plot_results(predicted_data, true_data):
fig = plt.figure(facecolor='white')
ax = fig.add_subplot(111)
ax.plot(true_data, label='True Data')
plt.plot(predicted_data, label='Prediction')
plt.legend()
plt.show()
def plot_results_multiple(predicted_data, true_data, prediction_len):
fig = plt.figure(facecolor='white')
ax = fig.add_subplot(111)
ax.plot(true_data, label='True Data')
#Pad the list of predictions to shift it in the graph to it's correct start
for i, data in enumerate(predicted_data):
padding = [None for p in range(i * prediction_len)]
plt.plot(padding + data, label='Prediction')
plt.legend()
plt.show()
#Main Run Thread
if __name__=='__main__':
global_start_time = time.time()
epochs = 10
seq_len = 50
print('> Loading data... ')
X_train, y_train, X_test, y_test = lstm.load_data('dice_amplified/primeros_20_mil.csv', seq_len, True)
print('> Data Loaded. Compiling...')
model = lstm.build_model([1, 50, 100, 1])
model.fit(
X_train,
y_train,
batch_size = 512,
nb_epoch=epochs,
validation_split=0.05)
predictions = lstm.predict_sequences_multiple(model, X_test, seq_len, 50)
#predicted = lstm.predict_sequence_full(model, X_test, seq_len)
#predicted = lstm.predict_point_by_point(model, X_test)
print('Training duration (s) : ', time.time() - global_start_time)
plot_results_multiple(predictions, y_test, 50)
I have tried to:
increase and decrease epochs.
increase and decrease batch size.
amplify the data.
The following plot represents:
epochs = 10
batch_size = 512
validation_split = 0.05
As well, as far as I understand, the loss should be decreasing with more epochs? Which doesn't seem to be happening!
Using TensorFlow backend.
> Loading data...
> Data Loaded. Compiling...
> Compilation Time : 0.03000473976135254
Train on 17056 samples, validate on 898 samples
Epoch 1/10
17056/17056 [==============================] - 31s 2ms/step - loss: 29927.0164 - val_loss: 289.8873
Epoch 2/10
17056/17056 [==============================] - 29s 2ms/step - loss: 29920.3513 - val_loss: 290.1069
Epoch 3/10
17056/17056 [==============================] - 29s 2ms/step - loss: 29920.4602 - val_loss: 292.7868
Epoch 4/10
17056/17056 [==============================] - 27s 2ms/step - loss: 29915.0955 - val_loss: 286.7317
Epoch 5/10
17056/17056 [==============================] - 26s 2ms/step - loss: 29913.6961 - val_loss: 298.7889
Epoch 6/10
17056/17056 [==============================] - 26s 2ms/step - loss: 29920.2068 - val_loss: 287.5138
Epoch 7/10
17056/17056 [==============================] - 28s 2ms/step - loss: 29914.0650 - val_loss: 295.2230
Epoch 8/10
17056/17056 [==============================] - 25s 1ms/step - loss: 29912.8860 - val_loss: 295.0592
Epoch 9/10
17056/17056 [==============================] - 28s 2ms/step - loss: 29907.4067 - val_loss: 286.9338
Epoch 10/10
17056/17056 [==============================] - 46s 3ms/step - loss: 29914.6869 - val_loss: 289.3236
Any recommendations? How could I improve it? Thanks!
Lstm.py contents:
import os
import time
import warnings
import numpy as np
from numpy import newaxis
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #Hide messy TensorFlow warnings
warnings.filterwarnings("ignore") #Hide messy Numpy warnings
def load_data(filename, seq_len, normalise_window):
f = open(filename, 'rb').read()
data = f.decode().split('\n')
sequence_length = seq_len + 1
result = []
for index in range(len(data) - sequence_length):
result.append(data[index: index + sequence_length])
if normalise_window:
result = normalise_windows(result)
result = np.array(result)
row = round(0.9 * result.shape[0])
train = result[:int(row), :]
np.random.shuffle(train)
x_train = train[:, :-1]
y_train = train[:, -1]
x_test = result[int(row):, :-1]
y_test = result[int(row):, -1]
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
return [x_train, y_train, x_test, y_test]
def normalise_windows(window_data):
normalised_data = []
for window in window_data:
normalised_window = [((float(p) / float(window[0])) - 1) for p in window]
normalised_data.append(normalised_window)
return normalised_data
def build_model(layers):
model = Sequential()
model.add(LSTM(
input_shape=(layers[1], layers[0]),
output_dim=layers[1],
return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(
layers[2],
return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(
output_dim=layers[3]))
model.add(Activation("linear"))
start = time.time()
model.compile(loss="mse", optimizer="rmsprop")
print("> Compilation Time : ", time.time() - start)
return model
def predict_point_by_point(model, data):
#Predict each timestep given the last sequence of true data, in effect only predicting 1 step ahead each time
predicted = model.predict(data)
predicted = np.reshape(predicted, (predicted.size,))
return predicted
def predict_sequence_full(model, data, window_size):
#Shift the window by 1 new prediction each time, re-run predictions on new window
curr_frame = data[0]
predicted = []
for i in range(len(data)):
predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0])
curr_frame = curr_frame[1:]
curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
return predicted
def predict_sequences_multiple(model, data, window_size, prediction_len):
#Predict sequence of 50 steps before shifting prediction run forward by 50 steps
prediction_seqs = []
for i in range(int(len(data)/prediction_len)):
curr_frame = data[i*prediction_len]
predicted = []
for j in range(prediction_len):
predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0])
curr_frame = curr_frame[1:]
curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
prediction_seqs.append(predicted)
return prediction_seqs
Addendum:
At nuric suggestion I modified the model as following:
def build_model(layers):
model = Sequential()
model.add(LSTM(input_shape=(layers[1], layers[0]), output_dim=layers[1], return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(layers[2], return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(output_dim=layers[3]))
model.add(Activation("linear"))
model.add(Dense(64, input_dim=50, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1))
start = time.time()
model.compile(loss="mse", optimizer="rmsprop")
print("> Compilation Time : ", time.time() - start)
return model
Still a bit lost on this one...
Even though you normalise the input, you don't normalise the output. The LSTM by default has a tanh output which means you will have a limited feature space, ie the dense layer won't be able to regress to large numbers.
You have a fixed length numerical input (50,), directly pass that to Dense layers with relu activation and will perform better on regression tasks, something simple like:
model = Sequential()
model.add(Dense(64, input_dim=50, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1))
For regression it is also preferable to use l2 regularizers instead of Dropout because you are not really feature extracting for classification etc.

Categories

Resources