Model learns when using keras but doesn't with tf.keras - python

The model that I am using is this:
from keras.layers import (Input, MaxPooling1D, Dropout,
BatchNormalization, Activation, Add,
Flatten, Conv1D, Dense)
from keras.models import Model
import numpy as np
class ResidualUnit(object):
"""References
----------
.. [1] K. He, X. Zhang, S. Ren, and J. Sun, "Identity Mappings in Deep Residual Networks,"
arXiv:1603.05027 [cs], Mar. 2016. https://arxiv.org/pdf/1603.05027.pdf.
.. [2] K. He, X. Zhang, S. Ren, and J. Sun, "Deep Residual Learning for Image Recognition," in 2016 IEEE Conference
on Computer Vision and Pattern Recognition (CVPR), 2016, pp. 770-778. https://arxiv.org/pdf/1512.03385.pdf
"""
def __init__(self, n_samples_out, n_filters_out, kernel_initializer='he_normal',
dropout_rate=0.8, kernel_size=17, preactivation=True,
postactivation_bn=False, activation_function='relu'):
self.n_samples_out = n_samples_out
self.n_filters_out = n_filters_out
self.kernel_initializer = kernel_initializer
self.dropout_rate = dropout_rate
self.kernel_size = kernel_size
self.preactivation = preactivation
self.postactivation_bn = postactivation_bn
self.activation_function = activation_function
def _skip_connection(self, y, downsample, n_filters_in):
"""Implement skip connection."""
# Deal with downsampling
if downsample > 1:
y = MaxPooling1D(downsample, strides=downsample, padding='same')(y)
elif downsample == 1:
y = y
else:
raise ValueError("Number of samples should always decrease.")
# Deal with n_filters dimension increase
if n_filters_in != self.n_filters_out:
# This is one of the two alternatives presented in ResNet paper
# Other option is to just fill the matrix with zeros.
y = Conv1D(self.n_filters_out, 1, padding='same',
use_bias=False,
kernel_initializer=self.kernel_initializer
)(y)
return y
def _batch_norm_plus_activation(self, x):
if self.postactivation_bn:
x = Activation(self.activation_function)(x)
x = BatchNormalization(center=False, scale=False)(x)
else:
x = BatchNormalization()(x)
x = Activation(self.activation_function)(x)
return x
def __call__(self, inputs):
"""Residual unit."""
x, y = inputs
n_samples_in = y.shape[1]
downsample = n_samples_in // self.n_samples_out
n_filters_in = y.shape[2]
y = self._skip_connection(y, downsample, n_filters_in)
# 1st layer
x = Conv1D(self.n_filters_out, self.kernel_size, padding='same',
use_bias=False,
kernel_initializer=self.kernel_initializer
)(x)
x = self._batch_norm_plus_activation(x)
if self.dropout_rate > 0:
x = Dropout(self.dropout_rate)(x)
# 2nd layer
x = Conv1D(self.n_filters_out, self.kernel_size, strides=downsample,
padding='same', use_bias=False,
kernel_initializer=self.kernel_initializer
)(x)
if self.preactivation:
x = Add()([x, y]) # Sum skip connection and main connection
y = x
x = self._batch_norm_plus_activation(x)
if self.dropout_rate > 0:
x = Dropout(self.dropout_rate)(x)
else:
x = BatchNormalization()(x)
x = Add()([x, y]) # Sum skip connection and main connection
x = Activation(self.activation_function)(x)
if self.dropout_rate > 0:
x = Dropout(self.dropout_rate)(x)
y = x
return [x, y]
# ----- Model ----- #
kernel_size = 16
kernel_initializer = 'he_normal'
signal = Input(shape=(1000, 12), dtype=np.float32, name='signal')
age_range = Input(shape=(6,), dtype=np.float32, name='age_range')
is_male = Input(shape=(1,), dtype=np.float32, name='is_male')
x = signal
x = Conv1D(64, kernel_size, padding='same', use_bias=False,
kernel_initializer=kernel_initializer
)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x, y = ResidualUnit(512, 128, kernel_size=kernel_size,
kernel_initializer=kernel_initializer
)([x, x])
x, y = ResidualUnit(256, 196, kernel_size=kernel_size,
kernel_initializer=kernel_initializer
)([x, y])
x, y = ResidualUnit(64, 256, kernel_size=kernel_size,
kernel_initializer=kernel_initializer
)([x, y])
x, _ = ResidualUnit(16, 320, kernel_size=kernel_size, kernel_initializer=kernel_initializer
)([x, y])
x = Flatten()(x)
diagn = Dense(2, activation='sigmoid', kernel_initializer=kernel_initializer)(x)
model = Model(signal, diagn)
model.summary()
# ----- Train ----- #
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
loss = 'binary_crossentropy'
lr = 0.001
batch_size = 64
opt = Adam(learning_rate=0.001)
callbacks = [ReduceLROnPlateau(monitor='val_loss',
factor=0.1,
patience=7,
min_lr=lr / 100)]
model.compile(optimizer=opt, loss=loss, metrics=['accuracy'])
history = model.fit(x_train, y_train,
batch_size=batch_size,
epochs=70,
initial_epoch=0,
validation_split=0.1,
shuffle='batch',
callbacks=callbacks,
verbose=1)
# Save final result
model.save("./final_model_middle_one.hdf5")
When I substitute the use of Keras with tf.keras, which I need to use the qkeras library, the model doesn't learn and gets stuck at a much lower accuracy at every iteration. What could be causing this?
When I use keras the accuracy start high at 83% and slightly increases during training.
Train on 17340 samples, validate on 1927 samples
Epoch 1/70
17340/17340 [==============================] - 33s 2ms/step - loss: 0.3908 - accuracy: 0.8314 - val_loss: 0.3283 - val_accuracy: 0.8710
Epoch 2/70
17340/17340 [==============================] - 31s 2ms/step - loss: 0.3641 - accuracy: 0.8416 - val_loss: 0.3340 - val_accuracy: 0.8612
Epoch 3/70
17340/17340 [==============================] - 31s 2ms/step - loss: 0.3525 - accuracy: 0.8483 - val_loss: 0.3847 - val_accuracy: 0.8550
Epoch 4/70
17340/17340 [==============================] - 31s 2ms/step - loss: 0.3354 - accuracy: 0.8563 - val_loss: 0.4641 - val_accuracy: 0.8215
Epoch 5/70
17340/17340 [==============================] - 31s 2ms/step - loss: 0.3269 - accuracy: 0.8590 - val_loss: 0.7172 - val_accuracy: 0.7870
Epoch 6/70
17340/17340 [==============================] - 31s 2ms/step - loss: 0.3202 - accuracy: 0.8630 - val_loss: 0.3599 - val_accuracy: 0.8617
Epoch 7/70
17340/17340 [==============================] - 31s 2ms/step - loss: 0.3101 - accuracy: 0.8678 - val_loss: 0.2659 - val_accuracy: 0.8934
Epoch 8/70
17340/17340 [==============================] - 31s 2ms/step - loss: 0.3058 - accuracy: 0.8688 - val_loss: 0.5683 - val_accuracy: 0.8293
Epoch 9/70
17340/17340 [==============================] - 31s 2ms/step - loss: 0.2980 - accuracy: 0.8739 - val_loss: 0.3442 - val_accuracy: 0.8643
Epoch 10/70
7424/17340 [===========>..................] - ETA: 17s - loss: 0.2966 - accuracy: 0.8707
When I use tf.keras the accuracy starts at 50% and does not increase considerably during training:
Epoch 1/70
271/271 [==============================] - 30s 110ms/step - loss: 0.9325 - accuracy: 0.5093 - val_loss: 0.6973 - val_accuracy: 0.5470 - lr: 0.0010
Epoch 2/70
271/271 [==============================] - 29s 108ms/step - loss: 0.8424 - accuracy: 0.5157 - val_loss: 0.6660 - val_accuracy: 0.6528 - lr: 0.0010
Epoch 3/70
271/271 [==============================] - 29s 108ms/step - loss: 0.8066 - accuracy: 0.5213 - val_loss: 0.6441 - val_accuracy: 0.6539 - lr: 0.0010
Epoch 4/70
271/271 [==============================] - 29s 108ms/step - loss: 0.7884 - accuracy: 0.5272 - val_loss: 0.6649 - val_accuracy: 0.6559 - lr: 0.0010
Epoch 5/70
271/271 [==============================] - 29s 108ms/step - loss: 0.7888 - accuracy: 0.5368 - val_loss: 0.6899 - val_accuracy: 0.5760 - lr: 0.0010
Epoch 6/70
271/271 [==============================] - 29s 108ms/step - loss: 0.7617 - accuracy: 0.5304 - val_loss: 0.6641 - val_accuracy: 0.6533 - lr: 0.0010
Epoch 7/70
271/271 [==============================] - 29s 108ms/step - loss: 0.7485 - accuracy: 0.5333 - val_loss: 0.6450 - val_accuracy: 0.6544 - lr: 0.0010
Epoch 8/70
271/271 [==============================] - 29s 108ms/step - loss: 0.7431 - accuracy: 0.5382 - val_loss: 0.6599 - val_accuracy: 0.6539 - lr: 0.0010
Epoch 9/70
271/271 [==============================] - 29s 108ms/step - loss: 0.7336 - accuracy: 0.5421 - val_loss: 0.6532 - val_accuracy: 0.6554 - lr: 0.0010
Epoch 10/70
271/271 [==============================] - 29s 108ms/step - loss: 0.7274 - accuracy: 0.5379 - val_loss: 0.6753 - val_accuracy: 0.6492 - lr: 0.0010
The lines that have been changed between the two trials are the lines where I import keras modules by adding 'tensorflow.' in front of them. I don't know why the results would be so different, possibly due to different default values of certain parameters?

It might be related to how the accuracy metric is computed in keras vs tf.keras. As far as I can tell the accuracy function is usually used when you have one-hot-encoded output. However, it seems that you are outputting two values [A, B] with a sigmoid function applied to each value.
Since I don't know the labels you're using, there might be two cases:
a) You want to predict A or B. If sos I would change the activation function to softmax
b) You want to predict between A or not A and B or not B. In this case I would modify the output tensor shape to have two heads, each with two values: head_A = [A, not_A] and head_B = [B, not_B]. I would then hot-encode the labels respectively and then I would assume you could use the accuracy metric.
Alternatively, you can create a custom metric that is appropriate to your output shape.

I have a similar (same?) problem, I was manipulating some examples from Kaggle, and was unable to save the model using keras. After much Googling I realised that I needed to use tensorflow.keras. This solved my problem, but the 60000 data items I have and was using for training dropped to a reported 1875. Although the error was still 10%.
1875 * 32 = 60000.
This is my fit.
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epochs, verbose=True,
callbacks=[early_stopping_monitor])
1539/1875 [=======================>......] - ETA: 3s - loss: 0.4445 - accuracy: 0.8418
It turns out that fit defaults to a batch size of 32. If I increase the batch size to 64 I get half the reported data sets, which makes sense:
model.fit(X_train, y_train, batch_size=64, validation_data=(X_test, y_test), epochs=epochs, verbose=True,
callbacks=[early_stopping_monitor])
938/938 [==============================] - 16s 17ms/step - loss: 0.4568 - accuracy: 0.8388
I noticed from your code that you've set batch_size to 64, and your reported data items reduce from 17340 to 271 which is about a 64th, this must also affect your accuracy due to the data you are using.
From the docs here: https://www.tensorflow.org/api_docs/python/tf/keras/Sequential
batch_size
Integer or None. Number of samples per gradient update. If unspecified, batch_size will default to 32. Do not specify the batch_size if your data is in the form of a dataset, generators, or keras.utils.Sequence instances (since they generate batches).
From the Keras docs: https://keras.rstudio.com/reference/fit.html, it also says that the batch size defaults to 32, it must just be reported differently when training the model.
Hope this helps.

Related

Unbelievable difference of custom metric between fit and evaluate

I run a tensorflow u-net model without dropout (but BN) with a custom metric called "average accuracy". This is literally the section of code. As you can see, datasets must be the same as I do nothing in between fit and evaluate.
model.fit(x=train_ds, epochs=epochs, validation_data=val_ds, shuffle=True,
callbacks=callbacks)
model.evaluate(train_ds)
model.evaluate(val_ds)
train_ds and val_ds are tf.Dataset. And here the output.
...
Epoch 10/10
148/148 [==============================] - 103s 698ms/step - loss: 0.1765 - accuracy: 0.5872 - average_accuracy: 0.9620 - val_loss: 0.5845 - val_accuracy: 0.5788 - val_average_accuracy: 0.5432
148/148 [==============================] - 22s 118ms/step - loss: 0.5056 - accuracy: 0.4540 - average_accuracy: 0.3654
29/29 [==============================] - 5s 122ms/step - loss: 0.5845 - accuracy: 0.5788 - average_accuracy: 0.5432
There is an unbelievable difference between average_accuracy during training (fit) and average_accuracy of evaluate (both on training dataset). I know that BN can have this effect and also that performance changes during training so they will never be equal. But from 96% to 36%?
My custom accuracy is defined here but I doubt it's my personal implementation as it should be somehow close no matter what I did (I think).
Any hint here is useful. I don't know if I should review the custom metric, the dataset, the model. It seems outside all of them.
I tried to continue training after stopping and average_accuracy starts from where it left at more than 90%.
Context of custom metric. I use it for semantic segmentation. So each image has an image of labels as output of WxHx4 (4 are my total number of classes).
It computes the average accuracy, for example, the accuracy of each class separately and then, if they were 4 classes it does sum(accuracies per class) / 4.
Here the main code:
def custom_average_accuracy(y_true, y_pred):
# Mask to remove the labels (y_true) that are zero: ex. [0, 0, 0]
remove_zeros_mask = tf.math.logical_not(tf.math.reduce_all(tf.math.logical_not(tf.cast(y_true, bool)), axis=-1))
y_true = tf.boolean_mask(y_true, remove_zeros_mask)
y_pred = tf.boolean_mask(y_pred, remove_zeros_mask)
num_cls = y_true.shape[-1]
y_pred = tf.math.argmax(y_pred, axis=-1) # ex. [0, 0, 1] -> [2]
y_true = tf.math.argmax(y_true, axis=-1)
accuracies = tf.TensorArray(tf.float32, size=0, dynamic_size=True)
for i in range(0, num_cls):
cls_mask = y_true == i
cls_y_true = tf.boolean_mask(y_true, cls_mask)
if not tf.equal(tf.size(cls_y_true), 0): # Some images don't have all the classes present.
new_acc = _accuracy(y_true=cls_y_true, y_pred=tf.boolean_mask(y_pred, cls_mask))
accuracies = accuracies.write(accuracies.size(), new_acc)
accuracies = accuracies.stack()
return tf.math.reduce_sum(accuracies) / tf.cast(len(accuracies), dtype=accuracies.dtype)
I believe the problem might be on the if not tf.equal(tf.size(cls_y_true), 0) line but I still can't seem were.
More wird information. This is exactly my lines of code:
x_input, y_true = np.concatenate([x for x, y in ds], axis=0), np.concatenate([y for x, y in ds], axis=0)
model.evaluate(x=x_input, y=y_true) # This gets 38% accuracy
model.evaluate(ds) # This gets 55% accuracy
What the hell is going on here? How can those lines of code give a different result?!?!
So now I have that if I don't do the ds = ds.shuffle() the example up (30ish vs 50ish ACC values) are Ok.
I tried to reproduce this behavior but could not find the discrepancies you noted. The only thing I changed was not tf.equal to tf.math.not_equal:
import pathlib
import tensorflow as tf
dataset_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz"
data_dir = tf.keras.utils.get_file('flower_photos', origin=dataset_url, untar=True)
data_dir = pathlib.Path(data_dir)
num_classes = 5
batch_size = 32
img_height = 180
img_width = 180
val_ds = tf.keras.utils.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="validation",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size)
train_ds = tf.keras.utils.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="training",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size)
def to_categorical(images, labels):
return images, tf.one_hot(labels, num_classes)
train_ds = train_ds.map(to_categorical)
val_ds = val_ds.map(to_categorical)
model = tf.keras.Sequential([
tf.keras.layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(num_classes)
])
def _accuracy(y_true, y_pred):
y_true.shape.assert_is_compatible_with(y_pred.shape)
if y_true.dtype != y_pred.dtype:
y_pred = tf.cast(y_pred, y_true.dtype)
reduced_sum = tf.reduce_sum(tf.cast(tf.math.equal(y_true, y_pred), tf.keras.backend.floatx()), axis=-1)
return tf.math.divide_no_nan(reduced_sum, tf.cast(tf.shape(y_pred)[-1], reduced_sum.dtype))
def custom_average_accuracy(y_true, y_pred):
# Mask to remove the labels (y_true) that are zero: ex. [0, 0, 0]
remove_zeros_mask = tf.math.logical_not(tf.math.reduce_all(tf.math.logical_not(tf.cast(y_true, bool)), axis=-1))
y_true = tf.boolean_mask(y_true, remove_zeros_mask)
y_pred = tf.boolean_mask(y_pred, remove_zeros_mask)
num_cls = y_true.shape[-1]
y_pred = tf.math.argmax(y_pred, axis=-1) # ex. [0, 0, 1] -> [2]
y_true = tf.math.argmax(y_true, axis=-1)
accuracies = tf.TensorArray(tf.float32, size=0, dynamic_size=True)
for i in range(0, num_cls):
cls_mask = y_true == i
cls_y_true = tf.boolean_mask(y_true, cls_mask)
if tf.math.not_equal(tf.size(cls_y_true), 0): # Some images don't have all the classes present.
new_acc = _accuracy(y_true=cls_y_true, y_pred=tf.boolean_mask(y_pred, cls_mask))
accuracies = accuracies.write(accuracies.size(), new_acc)
accuracies = accuracies.stack()
return tf.math.reduce_sum(accuracies) / tf.cast(len(accuracies), dtype=accuracies.dtype)
model.compile(optimizer='adam',
loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
metrics=['accuracy', custom_average_accuracy])
epochs=10
history = model.fit(
train_ds,
validation_data=val_ds,
epochs=epochs)
model.evaluate(train_ds)
model.evaluate(val_ds)
Found 3670 files belonging to 5 classes.
Using 734 files for validation.
Found 3670 files belonging to 5 classes.
Using 2936 files for training.
Epoch 1/10
92/92 [==============================] - 11s 95ms/step - loss: 1.6220 - accuracy: 0.2868 - custom_average_accuracy: 0.2824 - val_loss: 1.2868 - val_accuracy: 0.4946 - val_custom_average_accuracy: 0.4597
Epoch 2/10
92/92 [==============================] - 8s 85ms/step - loss: 1.2131 - accuracy: 0.4785 - custom_average_accuracy: 0.4495 - val_loss: 1.2051 - val_accuracy: 0.4673 - val_custom_average_accuracy: 0.4350
Epoch 3/10
92/92 [==============================] - 8s 84ms/step - loss: 1.0713 - accuracy: 0.5620 - custom_average_accuracy: 0.5404 - val_loss: 1.1070 - val_accuracy: 0.5232 - val_custom_average_accuracy: 0.5003
Epoch 4/10
92/92 [==============================] - 8s 83ms/step - loss: 0.9463 - accuracy: 0.6281 - custom_average_accuracy: 0.6203 - val_loss: 0.9880 - val_accuracy: 0.5967 - val_custom_average_accuracy: 0.5755
Epoch 5/10
92/92 [==============================] - 8s 84ms/step - loss: 0.8400 - accuracy: 0.6771 - custom_average_accuracy: 0.6730 - val_loss: 0.9420 - val_accuracy: 0.6308 - val_custom_average_accuracy: 0.6245
Epoch 6/10
92/92 [==============================] - 8s 83ms/step - loss: 0.7594 - accuracy: 0.7027 - custom_average_accuracy: 0.7004 - val_loss: 0.8972 - val_accuracy: 0.6431 - val_custom_average_accuracy: 0.6328
Epoch 7/10
92/92 [==============================] - 8s 82ms/step - loss: 0.6211 - accuracy: 0.7619 - custom_average_accuracy: 0.7563 - val_loss: 0.8999 - val_accuracy: 0.6431 - val_custom_average_accuracy: 0.6174
Epoch 8/10
92/92 [==============================] - 8s 82ms/step - loss: 0.5108 - accuracy: 0.8116 - custom_average_accuracy: 0.8046 - val_loss: 0.8809 - val_accuracy: 0.6689 - val_custom_average_accuracy: 0.6457
Epoch 9/10
92/92 [==============================] - 8s 83ms/step - loss: 0.3985 - accuracy: 0.8535 - custom_average_accuracy: 0.8534 - val_loss: 0.9364 - val_accuracy: 0.6676 - val_custom_average_accuracy: 0.6539
Epoch 10/10
92/92 [==============================] - 8s 83ms/step - loss: 0.3023 - accuracy: 0.8995 - custom_average_accuracy: 0.9010 - val_loss: 1.0118 - val_accuracy: 0.6662 - val_custom_average_accuracy: 0.6405
92/92 [==============================] - 6s 62ms/step - loss: 0.2038 - accuracy: 0.9363 - custom_average_accuracy: 0.9357
23/23 [==============================] - 2s 50ms/step - loss: 1.0118 - accuracy: 0.6662 - custom_average_accuracy: 0.663
Well, I was using a TensorFlow dataset. I changed to NumPy and now all seems logical and works.
Still, I need to know the reason tf ds didn't work but at least I don't longer have these weird results.
Not tested yet (I would need to get the code back to what it was, probably do it someday) but this might be related.

"No gradients provided for any variable" when trying to fit Keras Sequential

I'm trying to create and train a Sequential model like so:
def model(training: Dataset, validation: Dataset):
model = Sequential(layers=[Embedding(input_dim=1001, output_dim=16), Dropout(0.2), GlobalAveragePooling1D(), Dropout(0.2), Dense(1)])
model.compile(loss=BinaryCrossentropy(from_logits=True), optimizer='adam', metrics=BinaryAccuracy(threshold=0.0))
model.fit(x=training, validation_data=validation, epochs=10)
When I run it, I get the following error the model.fit line:
ValueError: No gradients provided for any variable: ['embedding/embeddings:0', 'dense/kernel:0', 'dense/bias:0'].
I've come across some answers talking about the use of optimizers, but how would that apply to Sequential rather than Model? Is there something else that I'm missing?
Edit: The result of print(training):
<MapDataset shapes: ((None, 250), (None,)), types: (tf.int64, tf.int32)>
Edit: A script that will reproduce the error using IMDB sample data
from tensorflow.keras import Sequential
from tensorflow import data
from keras.layers import TextVectorization
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Dropout, GlobalAveragePooling1D, Dense
from tensorflow.keras.metrics import BinaryAccuracy, BinaryCrossentropy
import os
def split_dataset(dataset: data.Dataset):
record_count = len(list(dataset))
training_count = int((70 / 100) * record_count)
validation_count = int((15 / 100) * record_count)
raw_train_ds = dataset.take(training_count)
raw_val_ds = dataset.skip(training_count).take(validation_count)
raw_test_ds = dataset.skip(training_count + validation_count)
return {"train": raw_train_ds, "test": raw_test_ds, "validate": raw_val_ds}
def clean(text, label):
return tf.strings.unicode_transcode(text, "US ASCII", "UTF-8")
def vectorize_dataset(dataset: data.Dataset):
return dataset.map(vectorize_text)
def vectorize_text(text, label):
text = tf.expand_dims(text, -1)
return vectorize_layer(text), label
url = "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"
dataset_tar = tf.keras.utils.get_file("aclImdb_v1", url,
untar=True, cache_dir='.',
cache_subdir='')
dataset_dir = os.path.join(os.path.dirname(dataset_tar), 'aclImdb')
batch_size = 32
seed = 42
dataset = tf.keras.preprocessing.text_dataset_from_directory(
'aclImdb/train',
batch_size=batch_size,
validation_split=0.2,
subset='training',
seed=seed)
split_data = split_dataset(dataset)
raw_train = split_data['train']
raw_val = split_data['validate']
raw_test = split_data['test']
vectorize_layer = TextVectorization(max_tokens=10000, output_mode="int", output_sequence_length=250, ngrams=1)
cleaned_text = raw_train.map(clean)
vectorize_layer.adapt(cleaned_text)
train = vectorize_dataset(raw_train)
test = vectorize_dataset(raw_test)
validate = vectorize_dataset(raw_val)
def model(training, validation):
sequential_model = Sequential(
layers=[Embedding(input_dim=1001, output_dim=16), Dropout(0.2), GlobalAveragePooling1D(), Dropout(0.2),
Dense(1)])
sequential_model.compile(loss=BinaryCrossentropy(from_logits=True), optimizer='adam', metrics=BinaryAccuracy(threshold=0.0))
sequential_model.fit(x=training, validation_data=validation, epochs=10)
model(train, validate)
The problem in your code is occurring at below line:
vectorize_layer = TextVectorization(max_tokens=10000, output_mode="int", output_sequence_length=250, ngrams=1)
The max_tokens in the TextVectorization layer corresponds to the total number of unique words in the vocabulary.
Embedding Layer: The Embedding layer can be understood as a lookup table that maps from integer indices (which stand for specific words) to dense vectors (their embeddings) .
In your code, the Embedding dimensions are (1001,16) that means you are only accomodating the integers that map the specific words in a range of 1001, any indices that forms a (row, column) pair, which corresponds to a value greater than 1001 are not taken care off. Therefore, the ValueError.
I changed the TextVectorization(max_tokens=5000) and also Embedding(5000, 16), and ran your code.
What I got is shown below:
def model(training, validation):
model = keras.Sequential(
[
layers.Embedding(input_dim=5000, output_dim=16),
layers.Dropout(0.2),
layers.GlobalAveragePooling1D(),
layers.Dropout(0.2),
layers.Dense(1),
]
)
model.compile(
optimizer = keras.optimizers.Adam(),
loss=keras.losses.BinaryCrossentropy(from_logits=True),
metrics=keras.metrics.BinaryAccuracy(threshold=0.0)
)
model.fit(x=training, validation_data=validation, epochs=10)
return model
Output:
Epoch 1/10 437/437 [==============================] - 10s 22ms/step - loss: 0.6797 - binary_accuracy: 0.6455 - val_loss: 0.6539 - val_binary_accuracy: 0.7554
Epoch 2/10 437/437 [==============================] - 10s 22ms/step - loss: 0.6109 - binary_accuracy: 0.7625 - val_loss: 0.5700 - val_binary_accuracy: 0.7880
Epoch 3/10 437/437 [==============================] - 9s 22ms/step - loss: 0.5263 - binary_accuracy: 0.8098 - val_loss: 0.4931 - val_binary_accuracy: 0.8233
Epoch 4/10 437/437 [==============================] - 10s 22ms/step - loss: 0.4580 - binary_accuracy: 0.8368 - val_loss: 0.4373 - val_binary_accuracy: 0.8448
Epoch 5/10 437/437 [==============================] - 10s 22ms/step - loss: 0.4072 - binary_accuracy: 0.8560 - val_loss: 0.4003 - val_binary_accuracy: 0.8522
Epoch 6/10 437/437 [==============================] - 10s 22ms/step - loss: 0.3717 - binary_accuracy: 0.8641 - val_loss: 0.3733 - val_binary_accuracy: 0.8589
Epoch 7/10 437/437 [==============================] - 10s 22ms/step - loss: 0.3451 - binary_accuracy: 0.8728 - val_loss: 0.3528 - val_binary_accuracy: 0.8582
Epoch 8/10 437/437 [==============================] - 9s 22ms/step - loss: 0.3220 - binary_accuracy: 0.8806 - val_loss: 0.3345 - val_binary_accuracy: 0.8673
Epoch 9/10 437/437 [==============================] - 9s 22ms/step - loss: 0.3048 - binary_accuracy: 0.8868 - val_loss: 0.3287 - val_binary_accuracy: 0.8673
Epoch 10/10 437/437 [==============================] - 10s 22ms/step - loss: 0.2891 - binary_accuracy: 0.8929 - val_loss: 0.3222 - val_binary_accuracy: 0.8679
BinaryCrossentropy is imported from tf.keras.metrics hence gradients could not be computed.
Correct import should have been from tensorflow.keras.losses import BinaryCrossentropy.

Keras LSTM Model not learning

I wrote this code a few days ago and I had a few bugs but with some help, I was able to fix them. The Model is not learning. I tried different batch sizes, different amount of epochs, different activation functions, checked my data a few times for flaws I wasn't able to find any. It is due in a week or so for a school project. Any help will be very much valued.
Here is the code.
from keras.layers import Dense, Input, Concatenate, Dropout
from sklearn.preprocessing import MinMaxScaler
from keras.models import Model
from keras.layers import LSTM
import tensorflow as tf
import NetworkRequest as NR
import ParseNetworkRequest as PNR
import numpy as np
def buildModel():
_Price = Input(shape=(1, 1))
_Volume = Input(shape=(1, 1))
PriceLayer = LSTM(128)(_Price)
VolumeLayer = LSTM(128)(_Volume)
merged = Concatenate(axis=1)([PriceLayer, VolumeLayer])
Dropout(0.2)
dense1 = Dense(128, input_dim=2, activation='relu', use_bias=True)(merged)
Dropout(0.2)
dense2 = Dense(64, input_dim=2, activation='relu', use_bias=True)(dense1)
Dropout(0.2)
output = Dense(1, activation='softmax', use_bias=True)(dense2)
opt = tf.keras.optimizers.Adam(learning_rate=1e-3, decay=1e-6)
_Model = Model(inputs=[_Price, _Volume], output=output)
_Model.compile(optimizer=opt, loss='mse', metrics=['accuracy'])
return _Model
if __name__ == '__main__':
api_key = "47BGPYJPFN4CEC20"
stock = "DJI"
Index = ['4. close', '5. volume']
RawData = NR.Initial_Network_Request(api_key, stock)
Closing = PNR.Parse_Network_Request(RawData, Index[0])
Volume = PNR.Parse_Network_Request(RawData, Index[1])
Length = len(Closing)
scalar = MinMaxScaler(feature_range=(0, 1))
Closing_scaled = scalar.fit_transform(np.reshape(Closing[:-1], (-1, 1)))
Volume_scaled = scalar.fit_transform(np.reshape(Volume[:-1], (-1, 1)))
Labels_scaled = scalar.fit_transform(np.reshape(Closing[1:], (-1, 1)))
Train_Closing = Closing_scaled[:int(0.9 * Length)]
Train_Closing = np.reshape(Train_Closing, (Train_Closing.shape[0], 1, 1))
Train_Volume = Volume_scaled[:int(0.9 * Length)]
Train_Volume = np.reshape(Train_Volume, (Train_Volume.shape[0], 1, 1))
Train_Labels = Labels_scaled[:int((0.9 * Length))]
Train_Labels = np.reshape(Train_Labels, (Train_Labels.shape[0], 1))
# -------------------------------------------------------------------------------------------#
Test_Closing = Closing_scaled[int(0.9 * Length):(Length - 1)]
Test_Closing = np.reshape(Test_Closing, (Test_Closing.shape[0], 1, 1))
Test_Volume = Volume_scaled[int(0.9 * Length):(Length - 1)]
Test_Volume = np.reshape(Test_Volume, (Test_Volume.shape[0], 1, 1))
Test_Labels = Labels_scaled[int(0.9 * Length):(Length - 1)]
Test_Labels = np.reshape(Test_Labels, (Test_Labels.shape[0], 1))
Predict_Closing = Closing_scaled[-1]
Predict_Closing = np.reshape(Predict_Closing, (Predict_Closing.shape[0], 1, 1))
Predict_Volume = Volume_scaled[-1]
Predict_Volume = np.reshape(Predict_Volume, (Predict_Volume.shape[0], 1, 1))
Predict_Label = Labels_scaled[-1]
Predict_Label = np.reshape(Predict_Label, (Predict_Label.shape[0], 1))
model = buildModel()
model.fit(
[
Train_Closing,
Train_Volume
],
[
Train_Labels
],
validation_data=(
[
Test_Closing,
Test_Volume
],
[
Test_Labels
]
),
epochs=10,
batch_size=Length
)
This is the output when I run it.
Using TensorFlow backend.
2020-01-01 16:31:47.905012: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2199985000 Hz
2020-01-01 16:31:47.906105: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x49214f0 executing computations on platform Host. Devices:
2020-01-01 16:31:47.906137: I tensorflow/compiler/xla/service/service.cc:175] StreamExecutor device (0): Host, Default Version
/home/martin/PycharmProjects/MarketPredictor/Model.py:26: UserWarning: Update your `Model` call to the Keras 2 API: `Model(inputs=[<tf.Tenso..., outputs=Tensor("de...)`
_Model = Model(inputs=[_Price, _Volume], output=output)
Train on 4527 samples, validate on 503 samples
Epoch 1/10
4527/4527 [==============================] - 1s 179us/step - loss: 0.4716 - accuracy: 2.2090e-04 - val_loss: 0.6772 - val_accuracy: 0.0000e+00
Epoch 2/10
4527/4527 [==============================] - 0s 41us/step - loss: 0.4716 - accuracy: 2.2090e-04 - val_loss: 0.6772 - val_accuracy: 0.0000e+00
Epoch 3/10
4527/4527 [==============================] - 0s 42us/step - loss: 0.4716 - accuracy: 2.2090e-04 - val_loss: 0.6772 - val_accuracy: 0.0000e+00
Epoch 4/10
4527/4527 [==============================] - 0s 42us/step - loss: 0.4716 - accuracy: 2.2090e-04 - val_loss: 0.6772 - val_accuracy: 0.0000e+00
Epoch 5/10
4527/4527 [==============================] - 0s 43us/step - loss: 0.4716 - accuracy: 2.2090e-04 - val_loss: 0.6772 - val_accuracy: 0.0000e+00
Epoch 6/10
4527/4527 [==============================] - 0s 39us/step - loss: 0.4716 - accuracy: 2.2090e-04 - val_loss: 0.6772 - val_accuracy: 0.0000e+00
Epoch 7/10
4527/4527 [==============================] - 0s 42us/step - loss: 0.4716 - accuracy: 2.2090e-04 - val_loss: 0.6772 - val_accuracy: 0.0000e+00
Epoch 8/10
4527/4527 [==============================] - 0s 39us/step - loss: 0.4716 - accuracy: 2.2090e-04 - val_loss: 0.6772 - val_accuracy: 0.0000e+00
Epoch 9/10
4527/4527 [==============================] - 0s 42us/step - loss: 0.4716 - accuracy: 2.2090e-04 - val_loss: 0.6772 - val_accuracy: 0.0000e+00
Epoch 10/10
4527/4527 [==============================] - 0s 38us/step - loss: 0.4716 - accuracy: 2.2090e-04 - val_loss: 0.6772 - val_accuracy: 0.0000e+00
Process finished with exit code 0
The loss is high, and the accuracy is 0.
Please help.
You're using activation functions and metrics made for a classification task, not a stock forecasting task (with a continuous target).
For continuous targets, your final activation layer should be linear. Metrics should be mse or mae, not accuracy.
accuracy would only be satisfied is the dji prediction is exactly equal to the actual price. Since dji has at least 7 digits, it's nearly impossible.
Here's my suggestion:
Use a simpler network: Not sure how big is your dataset, but sometimes using dense. layer isn't helpful. Looks like the weights of there intermediate layers are not changing at all. Try the model with just one dense layer.
Reduce dropout: Try with using one dropout layer with Dropout(0.1).
Adam defaults: Start with using adam optimizer with its default parameters.
Metric selection: As mentioned by Nicolas's answer, use a regression metric instead of accuracy.

Keras train and validation metric values are different even when using same data (Logistic regression)

I have been trying to better understand the train/validation sequence in the keras model fit() loop. So I tried out a simple training loop where I attempted to fit a simple logistic regression model with input data consisting of a single feature.
I feed the same data for both training and validation. Under those conditions, and by specifying batch size to be the same and total data size, one would expect to obtain exactly the same loss and accuracy. But this is not the case.
Here is my code:
Generate some two random data with two classes:
N = 100
x = np.concatenate([np.random.randn(N//2, 1), np.random.randn(N//2, 1)+2])
y = np.concatenate([np.zeros(N//2), np.ones(N//2)])
And plotting the two class data distribution (one feature x):
data = pd.DataFrame({'x': x.ravel(), 'y': y})
sns.violinplot(x='x', y='y', inner='point', data=data, orient='h')
pyplot.tight_layout(0)
pyplot.show()
Build and fit the keras model:
model = tf.keras.Sequential([tf.keras.layers.Dense(1, activation='sigmoid', input_dim=1)])
model.compile(optimizer=tf.keras.optimizers.SGD(2), loss='binary_crossentropy', metrics=['accuracy'])
model.fit(x, y, epochs=10, validation_data=(x, y), batch_size=N)
Notice that I have specified the data x and targets y for both training and for validation_data. Also, the batch_size is same as total size batch_size=N.
The training results are:
100/100 [==============================] - 1s 5ms/step - loss: 1.4500 - acc: 0.2300 - val_loss: 0.5439 - val_acc: 0.7200
Epoch 2/10
100/100 [==============================] - 0s 18us/step - loss: 0.5439 - acc: 0.7200 - val_loss: 0.4408 - val_acc: 0.8000
Epoch 3/10
100/100 [==============================] - 0s 16us/step - loss: 0.4408 - acc: 0.8000 - val_loss: 0.3922 - val_acc: 0.8300
Epoch 4/10
100/100 [==============================] - 0s 16us/step - loss: 0.3922 - acc: 0.8300 - val_loss: 0.3659 - val_acc: 0.8400
Epoch 5/10
100/100 [==============================] - 0s 17us/step - loss: 0.3659 - acc: 0.8400 - val_loss: 0.3483 - val_acc: 0.8500
Epoch 6/10
100/100 [==============================] - 0s 16us/step - loss: 0.3483 - acc: 0.8500 - val_loss: 0.3356 - val_acc: 0.8600
Epoch 7/10
100/100 [==============================] - 0s 17us/step - loss: 0.3356 - acc: 0.8600 - val_loss: 0.3260 - val_acc: 0.8600
Epoch 8/10
100/100 [==============================] - 0s 18us/step - loss: 0.3260 - acc: 0.8600 - val_loss: 0.3186 - val_acc: 0.8600
Epoch 9/10
100/100 [==============================] - 0s 18us/step - loss: 0.3186 - acc: 0.8600 - val_loss: 0.3127 - val_acc: 0.8700
Epoch 10/10
100/100 [==============================] - 0s 23us/step - loss: 0.3127 - acc: 0.8700 - val_loss: 0.3079 - val_acc: 0.8800
The results show that val_loss and loss are not the same at the end of each epoch, and also acc and val_acc are not exactly the same. However, based on this setup, one would expect them to be the same.
I have been going through the code in keras, particularly this part:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/engine/training.py#L1364
and so far, all I can say that the difference is due to some different computation through the computation graph.
Does anyone has any idea why there would be such difference?
So after looking more closely at the results, the loss and acc values from the training step are computed BEFORE the current batch is used to update the model.
Thus, in the case of a single batch per epoch, the train acc and loss are evaluated when the batch is fed in, then the model parameters are updated based on the provided optimizer. After the train step is finished, we compute loss and accuracy by feeding in the validation data, which is now evaluated using a new updated model.
This is evident from the training results output, where validation accuracy and loss are in epoch 1 are equal to train accuracy and loss in epoch 2, etc...
A quick check using tensorflow confirmed that values are fetched before variables are updated:
import tensorflow as tf
import numpy as np
np.random.seed(1)
x = tf.placeholder(dtype=tf.float32, shape=(None, 1), name="x")
y = tf.placeholder(dtype=tf.float32, shape=(None), name="y")
W = tf.get_variable(name="W", shape=(1, 1), dtype=tf.float32, initializer=tf.constant_initializer(0))
b = tf.get_variable(name="b", shape=1, dtype=tf.float32, initializer=tf.constant_initializer(0))
z = tf.matmul(x, W) + b
error = tf.square(z - y)
obj = tf.reduce_mean(error, name="obj")
opt = tf.train.MomentumOptimizer(learning_rate=0.025, momentum=0.9)
grads = opt.compute_gradients(obj)
train_step = opt.apply_gradients(grads)
N = 100
x_np = np.random.randn(N).reshape(-1, 1)
y_np = 2*x_np + 3 + np.random.randn(N)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(2):
res = sess.run([obj, W, b, train_step], feed_dict={x: x_np, y: y_np})
print('MSE: {}, W: {}, b: {}'.format(res[0], res[1][0, 0], res[2][0]))
Output:
MSE: 14.721437454223633, W: 0.0, b: 0.0
MSE: 13.372591018676758, W: 0.08826743811368942, b: 0.1636980175971985
Since the parameters W and b were initialized to 0, then it is clear that the fetched values is still 0 even though session was run with gradient update request...

Keras Metric for bucketizing a regression output

How do I define a custom keras metric for computing accuracy like so,
y_true = [12.5, 45.5]
y_predicted = [14.5, 29]
splits = [-float("inf"), 10, 20, 30, float("inf")]
"""
Splits to Classes translation =>
Class 0: -inf to 9
Class 1: 10 to 19
Class 2: 20 to 29
Class 3: 30 to inf
"""
# using the above translation,
y_true_classes = [1, 3]
y_predicted_classes = [1, 2]
accuracy = K.equal( y_true_classes, y_predicted_classes ) # => 0.5 here
return accuracy
Here is an idea on how you might you around implementing this (although probably not the best one).
def convert_to_classes(vals, splits):
out = tf.zeros_like(vals, dtype=tf.int32)
for split in splits:
out = tf.where(vals > split, out + 1, out)
return out
def my_acc(splits):
def custom_acc(y_true, y_pred):
y_true = convert_to_classes(y_true, splits)
y_pred = convert_to_classes(y_pred, splits)
return K.mean(K.equal(y_true, y_pred))
return custom_acc
The function convert_to_classes converts the floats to bucks, assuming the bounds are always +-inf.
The closure my_acc lets you define the splits (without +-inf) at compile time (added statically to the graph), and then returns a metric function as expected with keras.
Testing using tensorflow:
y_true = tf.constant([12.5, 45.5])
y_pred = tf.constant([14.5, 29])
with tf.Session() as sess:
print(sess.run(my_acc((10, 20, 30))(y_true, y_pred)))
gives the expected 0.5 accuracy.
And quick test with Keras:
x = np.random.randn(100, 10)*100
y = np.random.randn(100)*100
model = Sequential([Dense(20, activation='relu'),
Dense(1, activation=None)])
model.compile(optimizer='Adam',
loss='mse',
metrics=[my_acc(splits=(10, 20, 30))])
model.fit(x, y, batch_size=32, epochs=10)
Given the metric (named as the inner function in the closure custom_acc)
100/100 [==============================] - 0s 2ms/step - loss: 10242.2591 - custom_acc: 0.4300
Epoch 2/10
100/100 [==============================] - 0s 53us/step - loss: 10101.9658 - custom_acc: 0.4200
Epoch 3/10
100/100 [==============================] - 0s 53us/step - loss: 10011.4662 - custom_acc: 0.4300
Epoch 4/10
100/100 [==============================] - 0s 51us/step - loss: 9899.7181 - custom_acc: 0.4300
Epoch 5/10
100/100 [==============================] - 0s 50us/step - loss: 9815.1607 - custom_acc: 0.4200
Epoch 6/10
100/100 [==============================] - 0s 74us/step - loss: 9736.5554 - custom_acc: 0.4300
Epoch 7/10
100/100 [==============================] - 0s 50us/step - loss: 9667.0845 - custom_acc: 0.4400
Epoch 8/10
100/100 [==============================] - 0s 58us/step - loss: 9589.5439 - custom_acc: 0.4400
Epoch 9/10
100/100 [==============================] - 0s 61us/step - loss: 9511.8003 - custom_acc: 0.4400
Epoch 10/10
100/100 [==============================] - 0s 51us/step - loss: 9443.9730 - custom_acc: 0.4400

Categories

Resources