Tensorflow 2.* - Get an internal Keras layer prediction values [duplicate] - python

This question already has answers here:
Obtaining output of an Intermediate layer in TensorFlow/Keras
(2 answers)
Closed 8 months ago.
I have a TensorFlow model like this-
I like to know the values of the red marked layer (5 float values) for the specific input to check how the model responds at this layer (attention layer). I need this value so that I can know if my attention layer is extracting values correctly or not.
As the model is an end-to-end model, I am unsure how I can extract values of an internal layer for specific input. Can anyone please help?

You can write class Callback then pass your input and check output of each layer that you want:
import tensorflow as tf
import numpy as np
class CustomCallback(tf.keras.callbacks.Callback):
def __init__(self):
self.data = np.random.rand(1,10)
def on_epoch_end(self, epoch, logs=None):
dns_layer = self.model.layers[6]
outputs = dns_layer(self.data)
tf.print(f'\n input: {self.data}')
tf.print(f'\n output: {outputs}')
x_train = tf.random.normal((10, 32, 32))
y_train = tf.random.uniform((10, 1), maxval=10)
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.LSTM(256, input_shape=(x_train.shape[1], x_train.shape[2]), return_sequences=True))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.LSTM(256))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(10, activation='softmax'))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(5, activation='softmax'))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(10, activation='softmax'))
model.compile(optimizer='adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(False))
model.summary()
for layer in model.layers:
print(layer)
model.fit(x_train, y_train , epochs=3, callbacks=[CustomCallback()], batch_size=32)
Output:
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm (LSTM) (None, 32, 256) 295936
dropout (Dropout) (None, 32, 256) 0
lstm_1 (LSTM) (None, 256) 525312
dropout_1 (Dropout) (None, 256) 0
dense (Dense) (None, 10) 2570
dropout_2 (Dropout) (None, 10) 0
dense_1 (Dense) (None, 5) 55
dropout_3 (Dropout) (None, 5) 0
dense_2 (Dense) (None, 10) 60
=================================================================
Total params: 823,933
Trainable params: 823,933
Non-trainable params: 0
_________________________________________________________________
<keras.layers.recurrent_v2.LSTM object at 0x7f6e2163dbd0>
<keras.layers.core.dropout.Dropout object at 0x7f6da1d2efd0>
<keras.layers.recurrent_v2.LSTM object at 0x7f6d9dfe0a50>
<keras.layers.core.dropout.Dropout object at 0x7f6d9de1ec90>
<keras.layers.core.dense.Dense object at 0x7f6d9de04dd0>
<keras.layers.core.dropout.Dropout object at 0x7f6d9dd549d0>
<keras.layers.core.dense.Dense object at 0x7f6d9dd8ec90>
<keras.layers.core.dropout.Dropout object at 0x7f6d9dedd650>
<keras.layers.core.dense.Dense object at 0x7f6d9ddc2ed0>
Epoch 1/3
1/1 [==============================] - ETA: 0s - loss: 2.4188
input: [[0.91498145 0.98430978 0.22720893 0.76032816 0.78405846 0.72664182
0.7772921 0.9851892 0.41715033 0.21014543]]
output: [[0.5767021 0.04140956 0.1909151 0.06737834 0.12359484]]
1/1 [==============================] - 12s 12s/step - loss: 2.4188
Epoch 2/3
1/1 [==============================] - ETA: 0s - loss: 2.4111
input: [[0.91498145 0.98430978 0.22720893 0.76032816 0.78405846 0.72664182
0.7772921 0.9851892 0.41715033 0.21014543]]
output: [[0.5780218 0.04101932 0.18909878 0.06769065 0.12416941]]
1/1 [==============================] - 0s 376ms/step - loss: 2.4111
Epoch 3/3
1/1 [==============================] - ETA: 0s - loss: 2.3978
input: [[0.91498145 0.98430978 0.22720893 0.76032816 0.78405846 0.72664182
0.7772921 0.9851892 0.41715033 0.21014543]]
output: [[0.579072 0.04067017 0.1874026 0.0679936 0.12486164]]
1/1 [==============================] - 0s 458ms/step - loss: 2.3978

Related

sklearn girdsearchCV uses default parameters instead of param grid

I'm using a custom regressor for gridsearchCV but it is behaving strangely. It does the gridsearch with the default parameters instead of the given parameter grid, and then in the end runs it once with the parameter grid. I made a dummy example with mnist fashion (I know, not regression but it shows the problem) to demonstrate the problem (see code and output below).
As you can see in the output, the first two models that are used use the default parameters (one layer, no drop rate), even though it output the CV line ([CV 1/2]...) with the correct parameters... and if I print the self.drop_rate in the fit method it prints the correct drop_rate while the model clearly doesnt use it...
Code:
import tensorflow as tf
print("tf version: ", tf.__version__)
from sklearn.model_selection import GridSearchCV
from sklearn.base import BaseEstimator, RegressorMixin
fashion_mnist = tf.keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
def createNNModel(unit1, unit2, drop_rate,lr):
if unit2==0:
if drop_rate==0:
model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(unit1, activation='relu'),
tf.keras.layers.Dense(10)
])
else:
model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(unit1, activation='relu'),
tf.keras.layers.Dropout(drop_rate),
tf.keras.layers.Dense(10)
])
else:
if drop_rate==0:
model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(unit1, activation='relu'),
tf.keras.layers.Dense(unit2, activation='relu'),
tf.keras.layers.Dense(10)
])
else:
model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(unit1, activation='relu'),
tf.keras.layers.Dropout(drop_rate),
tf.keras.layers.Dense(unit2, activation='relu'),
tf.keras.layers.Dropout(drop_rate),
tf.keras.layers.Dense(10)
])
model.compile(loss=tf.losses.MeanSquaredError(),
optimizer=tf.optimizers.Adam(learning_rate=lr),
metrics=[tf.metrics.MeanAbsoluteError()])
return model
class MyRegressor(BaseEstimator, RegressorMixin):
def __init__(self, unit1=32, unit2=0, drop_rate=0, lr=0.001):
"""
Called when initializing the regressor
"""
self.unit1=unit1
self.unit2=unit2
self.drop_rate=drop_rate
self.lr=lr
print("INIT DR:", self.drop_rate)
self.model_=createNNModel(unit1, unit2, drop_rate,lr)
def fit(self, X, y, max_epochs=100):
"""
This should fit regressor. All the "work" should be done here.
Note: assert is not a good choice here and you should rather
use try/except blog with exceptions. This is just for short syntax.
"""
print("FIT DR: ", self.drop_rate)
self.history_ = self.model_.fit(X,y, epochs=max_epochs,
verbose=1)
self.model_.summary()
return self
def predict(self, X, y=None):
predictions = self.model_.predict(X)
return predictions
def score(self, X, y=None):
performance = self.model_.evaluate(X) #mae
return(1-performance[1])#the bigger the better
## TUNING
units1=[64]
units2=[64]
drop_outs=[0.8]
lrs=[0.01]
param_grid={'unit1': units1, 'unit2': units2, 'drop_rate': drop_outs, 'lr': lrs}
gs= GridSearchCV(MyRegressor(), param_grid, cv=2, verbose=3)
gs.fit(X=train_images, y=train_labels, max_epochs=2)
Output:
tf version: 2.9.0
INIT DR: 0
INIT DR: 0
Fitting 2 folds for each of 1 candidates, totalling 2 fits
INIT DR: 0
FIT DR: 0.8
Epoch 1/2
938/938 [==============================] - 1s 1ms/step - loss: 95.2783 - mean_absolute_error: 5.1844
Epoch 2/2
938/938 [==============================] - 1s 1ms/step - loss: 21.4664 - mean_absolute_error: 3.7982
Model: "sequential_46"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
flatten_8 (Flatten) (None, 784) 0
dense_55 (Dense) (None, 32) 25120
dense_56 (Dense) (None, 10) 330
=================================================================
Total params: 25,450
Trainable params: 25,450
Non-trainable params: 0
_________________________________________________________________
938/938 [==============================] - 1s 673us/step - loss: 0.0000e+00 - mean_absolute_error: 0.0000e+00
[CV 1/2] END drop_rate=0.8, lr=0.01, unit1=64, unit2=64;, score=1.000 total time= 3.1s
INIT DR: 0
FIT DR: 0.8
Epoch 1/2
938/938 [==============================] - 2s 1ms/step - loss: 60.8985 - mean_absolute_error: 4.7083
Epoch 2/2
938/938 [==============================] - 1s 1ms/step - loss: 20.7136 - mean_absolute_error: 3.7330
Model: "sequential_47"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
flatten_9 (Flatten) (None, 784) 0
dense_57 (Dense) (None, 32) 25120
dense_58 (Dense) (None, 10) 330
=================================================================
Total params: 25,450
Trainable params: 25,450
Non-trainable params: 0
_________________________________________________________________
938/938 [==============================] - 1s 679us/step - loss: 0.0000e+00 - mean_absolute_error: 0.0000e+00
[CV 2/2] END drop_rate=0.8, lr=0.01, unit1=64, unit2=64;, score=1.000 total time= 3.4s
INIT DR: 0
INIT DR: 0.8
FIT DR: 0.8
Epoch 1/2
1875/1875 [==============================] - 3s 2ms/step - loss: 731.5312 - mean_absolute_error: 3.8732
Epoch 2/2
1875/1875 [==============================] - 3s 2ms/step - loss: 8.3729 - mean_absolute_error: 2.5103
Model: "sequential_49"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
flatten_11 (Flatten) (None, 784) 0
dense_61 (Dense) (None, 64) 50240
dropout_8 (Dropout) (None, 64) 0
dense_62 (Dense) (None, 64) 4160
dropout_9 (Dropout) (None, 64) 0
dense_63 (Dense) (None, 10) 650
=================================================================
Total params: 55,050
Trainable params: 55,050
Non-trainable params: 0
_________________________________________________________________
See this section of the sklearn developer's guide: you shouldn't set self.model_ in the __init__ method; putting that line into the fit method probably works for what you want.
The problem is that the grid search clones its estimator, and that operates by creating a new instance of the same class (without specifying any __init__ parameters!) and then setting its parameters with set_params. So by defining model_ in __init__, your clones all get the default parameters; then you set the parameters for your custom class, but they never make it through to the model_ object itself.

Tensorflow save and load_model not working but save and load_weights does

I am using tensorflow version 2.8.0:
I have seen this issue from multiple sources all over forums, githubs, and even some here for the past 5 years with no definitive answer that has worked for me... For some reason, in certain situations, a loaded model from a previous save yields very different results from the original model evaluation. I haven't seen any well documented and investigative questions about this so I thought I'd show my full code below (simple illustration of the issue).
This is an application of transfer learning from a pre-trained tensorflow model. The model is first trained through 5 epochs on train_data, then fine tuned (with more trainable params) for 5 more. Evaluating the model on test_data shows an accuracy of 0.5671. The model is then saved and loaded in .h5 format (I have also tried the tf SavedModel format and the result is the same). The resultant loaded_model yields an evaluation accuracy on the same, unaltered test_data of 0.4535.
The result should be the same (0.5671)... so to further investigate I decided to save the fine tuned model's weights independently, construct and compile the same model architecture in new_model, and load the saved model's weights into new_model. Evaluating new_model yields the correct result, an accuracy of 0.5671. ----- Okay, so it must be the weights not saving properly right? I pulled the weights from each of these three models (model, loaded_model, new_model) and compared their flattened results. They are all the same. I really have no idea what's going on here but I'm assuming it is not random initialization, because the loaded_model evaluation results really did not perform anywhere near the fine tuned model - I would assume they would converge much closer.
import tensorflow as tf
tf.random.set_seed(42)
import pandas as pd
import numpy as np
import os
import pathlib
data_dir = pathlib.Path("101_food_classes_10_percent/train")
class_names = np.array(sorted([item.name for item in data_dir.glob('*')]))
train_dir = './101_food_classes_10_percent/train/'
test_dir = './101_food_classes_10_percent/test/'
from tensorflow.keras.preprocessing.image import ImageDataGenerator
datagen=ImageDataGenerator()
train_data = datagen.flow_from_directory(directory = train_dir,
target_size = (224,224),
batch_size = 32,
class_mode='categorical')
test_data = datagen.flow_from_directory(directory = test_dir,
target_size = (224,224),
batch_size = 32,
class_mode='categorical')
from tensorflow.keras.layers.experimental import preprocessing
data_augmentation = tf.keras.Sequential([
preprocessing.RandomFlip('horizontal'),
preprocessing.RandomRotation(0.2),
preprocessing.RandomZoom(0.2),
preprocessing.RandomHeight(0.2),
preprocessing.RandomWidth(0.2)
#preprocessing.Rescaling(1/255.) in EfficientNet it's already scaled but could use this for non-scaled
], name = 'data_augmentation')
Found 7575 images belonging to 101 classes.
Found 25250 images belonging to 101 classes.
# Build headless model - Feature Extraction
# Setup base with frozen layers
base_model = tf.keras.applications.EfficientNetB0(include_top=False)
base_model.trainable=False
inputs = tf.keras.layers.Input(shape = (224,224,3))
x = data_augmentation(inputs)
x = base_model(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x) # Pool base_model's outputs into a feature vector
outputs = tf.keras.layers.Dense(len(class_names), activation='softmax')(x)
model = tf.keras.Model(inputs,outputs)
model.compile('Adam', 'categorical_crossentropy', metrics=['accuracy'])
model.summary()
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_4 (InputLayer) [(None, 224, 224, 3)] 0
_________________________________________________________________
data_augmentation (Sequentia (None, None, None, 3) 0
_________________________________________________________________
efficientnetb0 (Functional) (None, None, None, 1280) 4049571
_________________________________________________________________
global_average_pooling2d_1 ( (None, 1280) 0
_________________________________________________________________
dense_1 (Dense) (None, 101) 129381
=================================================================
Total params: 4,178,952
Trainable params: 129,381
Non-trainable params: 4,049,571
_________________________________________________________________
history = model.fit(train_data, validation_data=test_data,
validation_steps=int(0.15*len(test_data)),
epochs=5, callbacks = [checkpoint_callback])
Epoch 1/5
237/237 [==============================] - 63s 230ms/step - loss: 3.4712 - accuracy: 0.2482 - val_loss: 2.4446 - val_accuracy: 0.4497
Epoch 2/5
237/237 [==============================] - 52s 221ms/step - loss: 2.3575 - accuracy: 0.4561 - val_loss: 2.0051 - val_accuracy: 0.5093
Epoch 3/5
237/237 [==============================] - 51s 216ms/step - loss: 1.9838 - accuracy: 0.5265 - val_loss: 1.8313 - val_accuracy: 0.5360
Epoch 4/5
237/237 [==============================] - 51s 212ms/step - loss: 1.7497 - accuracy: 0.5761 - val_loss: 1.7417 - val_accuracy: 0.5461
Epoch 5/5
237/237 [==============================] - 53s 221ms/step - loss: 1.6035 - accuracy: 0.6141 - val_loss: 1.7012 - val_accuracy: 0.5601
model.evaluate(test_data)
790/790 [==============================] - 87s 110ms/step - loss: 1.7294 - accuracy: 0.5481
[1.7294203042984009, 0.5480791926383972]
# Fine tuning: unfreeze some layers, lower leaning rate by 10x
base_model.trainable=True
# Refreeze every layer except last 5, adjust tiner tuned features down the model
for layer in base_model.layers[:-5]:
layer.trainable=False
# recompile and lower learning rate by 10x
model.compile(tf.keras.optimizers.Adam(learning_rate=0.0001), 'categorical_crossentropy', metrics=['accuracy'])
model.summary()
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_4 (InputLayer) [(None, 224, 224, 3)] 0
_________________________________________________________________
data_augmentation (Sequentia (None, None, None, 3) 0
_________________________________________________________________
efficientnetb0 (Functional) (None, None, None, 1280) 4049571
_________________________________________________________________
global_average_pooling2d_1 ( (None, 1280) 0
_________________________________________________________________
dense_1 (Dense) (None, 101) 129381
=================================================================
Total params: 4,178,952
Trainable params: 910,821
Non-trainable params: 3,268,131
_________________________________________________________________
# Fine Tune for 5 more epochs starting with last epoch left off at:
fine_tune_epochs=10 # Total number of epochs we're after: 5 feature extraction, 5 fine tuning
history_fine_tune = model.fit(train_data,
validation_data = test_data,
validation_steps=int(0.15*len(test_data)),
epochs = fine_tune_epochs,
initial_epoch = history.epoch[-1])
Epoch 5/10
237/237 [==============================] - 59s 220ms/step - loss: 1.3571 - accuracy: 0.6543 - val_loss: 1.6403 - val_accuracy: 0.5567
Epoch 6/10
237/237 [==============================] - 51s 213ms/step - loss: 1.2478 - accuracy: 0.6688 - val_loss: 1.6805 - val_accuracy: 0.5596
Epoch 7/10
237/237 [==============================] - 46s 193ms/step - loss: 1.1424 - accuracy: 0.6964 - val_loss: 1.6352 - val_accuracy: 0.5736
Epoch 8/10
237/237 [==============================] - 45s 191ms/step - loss: 1.0902 - accuracy: 0.7065 - val_loss: 1.6494 - val_accuracy: 0.5657
Epoch 9/10
237/237 [==============================] - 46s 193ms/step - loss: 1.0229 - accuracy: 0.7275 - val_loss: 1.6348 - val_accuracy: 0.5633
Epoch 10/10
237/237 [==============================] - 45s 191ms/step - loss: 0.9704 - accuracy: 0.7434 - val_loss: 1.6990 - val_accuracy: 0.5670
model.evaluate(test_data)
790/790 [==============================] - 83s 105ms/step - loss: 1.6578 - accuracy: 0.5671
[1.657836675643921, 0.5670890808105469]
model.save("./101_food_classes_10_percent/big_modelh5")
loaded_model = tf.keras.models.load_model("./101_food_classes_10_percent/big_modelh5.h5")
loaded_model.summary()
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_4 (InputLayer) [(None, 224, 224, 3)] 0
_________________________________________________________________
data_augmentation (Sequentia (None, None, None, 3) 0
_________________________________________________________________
efficientnetb0 (Functional) (None, None, None, 1280) 4049571
_________________________________________________________________
global_average_pooling2d_1 ( (None, 1280) 0
_________________________________________________________________
dense_1 (Dense) (None, 101) 129381
=================================================================
Total params: 4,178,952
Trainable params: 910,821
Non-trainable params: 3,268,131
_________________________________________________________________
loaded_model.evaluate(test_data)
790/790 [==============================] - 85s 104ms/step - loss: 2.1780 - accuracy: 0.4535 - loss: 2.1790 - accuracy
[2.1780412197113037, 0.4534653425216675]
# Try save_weights to another model
model.save_weights('my_model_weights.h5')
inputs = tf.keras.layers.Input(shape = (224,224,3))
x = data_augmentation(inputs)
x = base_model(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x) # Pool base_model's outputs into a feature vector
outputs = tf.keras.layers.Dense(len(class_names), activation='softmax')(x)
new_model = tf.keras.Model(inputs,outputs)
new_model.compile('Adam', 'categorical_crossentropy', metrics=['accuracy'])
new_model.summary()
Model: "model_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_5 (InputLayer) [(None, 224, 224, 3)] 0
_________________________________________________________________
data_augmentation (Sequentia (None, None, None, 3) 0
_________________________________________________________________
efficientnetb0 (Functional) (None, None, None, 1280) 4049571
_________________________________________________________________
global_average_pooling2d_2 ( (None, 1280) 0
_________________________________________________________________
dense_2 (Dense) (None, 101) 129381
=================================================================
Total params: 4,178,952
Trainable params: 910,821
Non-trainable params: 3,268,131
_________________________________________________________________
new_model.load_weights('my_model_weights.h5')
# Saving weights works... but not save and load_model
new_model.evaluate(test_data)
790/790 [==============================] - 88s 109ms/step - loss: 1.6578 - accuracy: 0.5671
[1.6578353643417358, 0.5670890808105469]
# Check if weights are the same?
m1 = model.get_weights()
m2 = new_model.get_weights()
m3 = loaded_model.get_weights()
len(m1)==len(m2)==len(m3)
True
from collections.abc import Iterable
def flatten(l):
for el in l:
if isinstance(el, Iterable) and not isinstance(el, (str, bytes)):
yield from flatten(el)
else:
yield el
m1 = flatten(m1)
m2 = flatten(m2)
m3 = flatten(m3)
print(list(m1)==list(m2))
print(list(m1)==list(m3))
True
True
This is because you have not saved your entire model using .h5 extension, but you are using .h5 for saving the weights. Please check below code section:
model.save("./101_food_classes_10_percent/big_modelh5") # add .h5
loaded_model = tf.keras.models.load_model("./101_food_classes_10_percent/big_modelh5.h5")
loaded_model.summary()
Use this code to save the entire model to a HDF5 file format and try again loading it:
model.save("./101_food_classes_10_percent/big_modelh5.h5")
Check this for more details on saving model in .hdf5 format.

correc Input for 2d convolutional layer, given spatial data with labels

So I'm doing a project where I want to train a neural net to assign labels to points in 3d space.
My input is a alpha carbon trace of amino acid, and my labels are it's secondary structure labels, eg. 3 classes.
My data is exactly like:
2945 training examples, each one of length 748, corresponding to 748 consecutive carbons, each one has 3 features that is xyz coordinates.
So X shape is (2945, 748, 3)
and Y shape is (2945, 748) as it has 2945 examples, each one has 748 labels for each carbon in sequence.
I want to use specifically convolutional layers, as I've read in a few papers that they are good at spatial dependencies, and would do well on such a problem, it's only that I can't get past dimensions there.
I have expanded dims with: X_train = np.expand_dims(X_train,1)
to get None, 1, 748, 3
as it is (i think): ( batch, height, width, channels)
or do I totally miss the point here?
batch will be specified later, height is 1, width of example is 748, and channels are 3? as its xyz
input_shape = (1, 748, 3)
model = Sequential(
[
Input(shape = input_shape ),
Conv2D(filters=16, kernel_size=9, padding='same',
activation = tf.nn.relu),
Dense(4, activation='softmax')
]
)
model.summary()
summary of model:
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 1, 748, 16) 64
_________________________________________________________________
dense (Dense) (None, 1, 748, 3) 51
=================================================================
Total params: 115
Trainable params: 115
Non-trainable params: 0
and ofc error:
ValueError: Shapes (None, 1, 748) and (None, 1, 748, 3) are incompatible,
I know it would work with 1 unit Dense layer, but if dimensionality is 1, do I still get classification for 3 states? should be .
Is my thinking right? or is there a misconception?
I would be very grateful for any word of advice.
Thanks in advance.
Okay,I have one_hot encoded my labels, and expanded dimension one with :
np.expand_dims(Y, 1)
dimensions achieved:
Y train shape: (2356, 1, 748, 4)
Y test shape: (589, 1, 748, 4)
what added to Dense with 4 units, matches the dimensions,
Working code snippet
import tensorflow as tf
import numpy as np
from tensorflow.keras import datasets
import tensorflow.keras as keras
X_train = np.random.random((2356,1,748,3))
y_train = np.random.random((2356, 1, 748, 4))
dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_data = dataset.shuffle(len(X_train)).batch(32)
train_data = train_data.prefetch(
buffer_size=tf.data.experimental.AUTOTUNE)
input_shape = (1, 748, 3)
model = tf.keras.Sequential(
[
keras.Input(shape = input_shape ),
keras.layers.Conv2D(filters=16, kernel_size=9, padding='same',
activation = tf.nn.relu),
keras.layers.Dense(4, activation='softmax')
])
model.summary()
model.compile(optimizer='adam',
loss=tf.keras.losses.CategoricalCrossentropy(),
metrics=['accuracy'])
model.fit(X_train, y_train, epochs=5, batch_size=5, verbose=1)
Output
Model: "sequential_10"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_10 (Conv2D) (None, 1, 748, 16) 3904
dense_10 (Dense) (None, 1, 748, 4) 68
=================================================================
Total params: 3,972
Trainable params: 3,972
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
472/472 [==============================] - 4s 5ms/step - loss: 3.4528 - accuracy: 0.2508
Epoch 2/5
472/472 [==============================] - 2s 4ms/step - loss: 3.8109 - accuracy: 0.2506
Epoch 3/5
472/472 [==============================] - 2s 5ms/step - loss: 3.8099 - accuracy: 0.2507
Epoch 4/5
472/472 [==============================] - 2s 5ms/step - loss: 3.8021 - accuracy: 0.2506
Epoch 5/5
472/472 [==============================] - 3s 5ms/step - loss: 3.7919 - accuracy: 0.2507
<keras.callbacks.History at 0x7f55c00dc550>

Keras 1D CNN always predicts the same result even if accuracy is high on training set

The validation accuracy of my 1D CNN is stuck on 0.5 and that's because I'm always getting the same prediction out of a balanced data set. At the same time my training accuracy keeps increasing and the loss decreasing as intended.
Strangely, if I do model.evaluate() on my training set (that has close to 1 accuracy in the last epoch), the accuracy will also be 0.5. How can the accuracy here differ so much from the training accuracy of the last epoch? I've also tried with a batch size of 1 for both training and evaluating and the problem persists.
Well, I've been searching for different solutions for quite some time but still no luck. Possible problems I've already looked into:
My data set is properly balanced and shuffled;
My labels are correct;
Tried adding fully connected layers;
Tried adding/removing dropout from the fully connected layers;
Tried the same architecture, but with the last layer with 1 neuron and sigmoid activation;
Tried changing the learning rates (went down to 0.0001 but still the same problem).
Here's my code:
import pathlib
import numpy as np
import ipynb.fs.defs.preprocessDataset as preprocessDataset
import pickle
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras import Input
from tensorflow.keras.layers import Conv1D, BatchNormalization, Activation, MaxPooling1D, Flatten, Dropout, Dense
from tensorflow.keras.optimizers import SGD
main_folder = pathlib.Path.cwd().parent
datasetsFolder=f'{main_folder}\\datasets'
trainDataset = preprocessDataset.loadDataset('DatasetTime_Sg12p5_Ov75_Train',datasetsFolder)
testDataset = preprocessDataset.loadDataset('DatasetTime_Sg12p5_Ov75_Test',datasetsFolder)
X_train,Y_train,Names_train=trainDataset[0],trainDataset[1],trainDataset[2]
X_test,Y_test,Names_test=testDataset[0],testDataset[1],testDataset[2]
model = Sequential()
model.add(Input(shape=X_train.shape[1:]))
model.add(Conv1D(16, 61, strides=1, padding="same"))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling1D(2, strides=2, padding="valid"))
model.add(Conv1D(32, 3, strides=1, padding="same"))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling1D(2, strides=2, padding="valid"))
model.add(Conv1D(64, 3, strides=1, padding="same"))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling1D(2, strides=2, padding="valid"))
model.add(Conv1D(64, 3, strides=1, padding="same"))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling1D(2, strides=2, padding="valid"))
model.add(Conv1D(64, 3, strides=1, padding="same"))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(200))
model.add(Activation('relu'))
model.add(Dense(2))
model.add(Activation('softmax'))
opt = SGD(learning_rate=0.01)
model.compile(loss='binary_crossentropy',optimizer=opt,metrics=['accuracy'])
model.summary()
model.fit(X_train,Y_train,epochs=10,shuffle=False,validation_data=(X_test, Y_test))
model.evaluate(X_train,Y_train)
Here's model.fit():
model.fit(X_train,Y_train,epochs=10,shuffle=False,validation_data=(X_test, Y_test))
Epoch 1/10
914/914 [==============================] - 277s 300ms/step - loss: 0.6405 - accuracy: 0.6543 - val_loss: 7.9835 - val_accuracy: 0.5000
Epoch 2/10
914/914 [==============================] - 270s 295ms/step - loss: 0.3997 - accuracy: 0.8204 - val_loss: 19.8981 - val_accuracy: 0.5000
Epoch 3/10
914/914 [==============================] - 273s 298ms/step - loss: 0.2976 - accuracy: 0.8730 - val_loss: 1.9558 - val_accuracy: 0.5002
Epoch 4/10
914/914 [==============================] - 278s 304ms/step - loss: 0.2897 - accuracy: 0.8776 - val_loss: 20.2678 - val_accuracy: 0.5000
Epoch 5/10
914/914 [==============================] - 277s 303ms/step - loss: 0.2459 - accuracy: 0.8991 - val_loss: 5.4945 - val_accuracy: 0.5000
Epoch 6/10
914/914 [==============================] - 268s 294ms/step - loss: 0.2008 - accuracy: 0.9181 - val_loss: 32.4579 - val_accuracy: 0.5000
Epoch 7/10
914/914 [==============================] - 271s 297ms/step - loss: 0.1695 - accuracy: 0.9317 - val_loss: 14.9538 - val_accuracy: 0.5000
Epoch 8/10
914/914 [==============================] - 276s 302ms/step - loss: 0.1423 - accuracy: 0.9452 - val_loss: 1.4420 - val_accuracy: 0.4988
Epoch 9/10
914/914 [==============================] - 266s 291ms/step - loss: 0.1261 - accuracy: 0.9497 - val_loss: 4.3830 - val_accuracy: 0.5005
Epoch 10/10
914/914 [==============================] - 272s 297ms/step - loss: 0.1142 - accuracy: 0.9548 - val_loss: 1.6054 - val_accuracy: 0.5009
Here's model.evaluate():
model.evaluate(X_train,Y_train)
914/914 [==============================] - 35s 37ms/step - loss: 1.7588 - accuracy: 0.5009
Here's model.summary():
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv1d (Conv1D) (None, 4096, 16) 992
_________________________________________________________________
batch_normalization (BatchNo (None, 4096, 16) 64
_________________________________________________________________
activation (Activation) (None, 4096, 16) 0
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 2048, 16) 0
_________________________________________________________________
conv1d_1 (Conv1D) (None, 2048, 32) 1568
_________________________________________________________________
batch_normalization_1 (Batch (None, 2048, 32) 128
_________________________________________________________________
activation_1 (Activation) (None, 2048, 32) 0
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 1024, 32) 0
_________________________________________________________________
conv1d_2 (Conv1D) (None, 1024, 64) 6208
_________________________________________________________________
batch_normalization_2 (Batch (None, 1024, 64) 256
_________________________________________________________________
activation_2 (Activation) (None, 1024, 64) 0
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 512, 64) 0
_________________________________________________________________
conv1d_3 (Conv1D) (None, 512, 64) 12352
_________________________________________________________________
batch_normalization_3 (Batch (None, 512, 64) 256
_________________________________________________________________
activation_3 (Activation) (None, 512, 64) 0
_________________________________________________________________
max_pooling1d_3 (MaxPooling1 (None, 256, 64) 0
_________________________________________________________________
conv1d_4 (Conv1D) (None, 256, 64) 12352
_________________________________________________________________
batch_normalization_4 (Batch (None, 256, 64) 256
_________________________________________________________________
activation_4 (Activation) (None, 256, 64) 0
_________________________________________________________________
flatten (Flatten) (None, 16384) 0
_________________________________________________________________
dropout (Dropout) (None, 16384) 0
_________________________________________________________________
dense (Dense) (None, 200) 3277000
_________________________________________________________________
activation_5 (Activation) (None, 200) 0
_________________________________________________________________
dense_1 (Dense) (None, 2) 402
_________________________________________________________________
activation_6 (Activation) (None, 2) 0
=================================================================
Total params: 3,311,834
Trainable params: 3,311,354
Non-trainable params: 480
_________________________________________________________________
... also tried with sigmoid but the issue persists ...
You don't want to be "trying" out activation functions or loss functions for a well-defined problem statement. It seems you are mixing up a single-label multi-class and a multi-label multi-class architecture.
Your output is a 2 class multi-class output with softmax activation which is great, but you use binary_crossentropy which would only make sense when used in a multi-class setting for multi-label problems.
You would want to use categorical_crossentropy instead. Furthermore, I would have suggested focal loss if there was class imbalance but it seems you have a 50,50 class proportion, so that's not necessary.
Remember, accuracy is decided based on which loss is being used! Check the different classes here. When you use binary_crossentropy the accuracy used is binaryaccuracy while with categorical_crossentropy, it uses categoricalaccuracy
Check this chart for details on what to use in what type of problem statement.
Other than that, there is a bottleneck in your network at flatten() and Dense(). The number of trainable parameters is quite high relative to other layers. I would advise using another CNN layer to bring the number of filters to say 128 and the size of sequence even smaller. And reduce the number of neurons for that Dense layer as well.
98.9% (3,277,000/3,311,354) of all of your trainable parameters reside between the Flatten and Dense layer! Not a great architectural choice!
Outside the above points, the model results are totally dependent on your data itself. I wouldn't be able to help more without knowledge of the data.
The solution for my problem was implementing Batch Renormalization: BatchNormalization(renorm=True). In addition normalizing the inputs helped a lot improving the overall performance of the neural network.

Out of memory OOM using tensorflow gradient tape but only happens when I append a list

I've been working on a data set (1000,3253) using a CNN. I'm running gradient calculations through gradient tape but it keeps running out of memory. Yet if I remove the line appending a gradient calculation to a list the script runs through all the epochs. I'm not entirely sure why this would happen but I am also new to tensorflow and the use of gradient tape. Any advice or input would be appreciated
#create a batch loop
for x, y_true in train_dataset:
#create a tape to record actions
with tf.GradientTape(watch_accessed_variables=False) as tape:
x_var = tf.Variable(x)
tape.watch([model.trainable_variables,x_var])
y_pred = model(x_var,training=True)
tape.stop_recording()
loss = los_func(y_true, y_pred)
epoch_loss_avg.update_state(loss)
epoch_accuracy.update_state(y_true, y_pred)
#pdb.set_trace()
gradients,something = tape.gradient(loss, (model.trainable_variables,x_var))
#sa_input.append(tape.gradient(loss, x_var))
del tape
#apply gradients
sa_input.append(something)
opti_func.apply_gradients(zip(gradients, model.trainable_variables))
train_loss_results.append(epoch_loss_avg.result())
train_accuracy_results.append(epoch_accuracy.result())
As you are new to TF2, would recommend to go through this guide. This guide covers training, evaluation, and prediction (inference) models in TensorFlow 2.0 in two broad situations:
When using built-in APIs for training & validation (such as model.fit(), model.evaluate(), model.predict()). This is covered in the section "Using built-in training & evaluation loops".
When writing custom loops from scratch using eager execution and the GradientTape object. This is covered in the section "Writing your own training & evaluation loops from scratch".
Below is a program where I am computing the gradients after every epoch and appending to a list. At end of the program I am converting the list to array for simplicity.
Code - This program throws OOM Error error if I use a deep network of many layers and bigger filter size
# Importing dependency
%tensorflow_version 2.x
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras import datasets
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
import numpy as np
import tensorflow as tf
# Import Data
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()
# Build Model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(32,32, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(10))
# Model Summary
model.summary()
# Model Compile
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
# Define the Gradient Fucntion
epoch_gradient = []
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# Define the Gradient Function
#tf.function
def get_gradient_func(model):
with tf.GradientTape() as tape:
logits = model(train_images, training=True)
loss = loss_fn(train_labels, logits)
grad = tape.gradient(loss, model.trainable_weights)
model.optimizer.apply_gradients(zip(grad, model.trainable_variables))
return grad
# Define the Required Callback Function
class GradientCalcCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
grad = get_gradient_func(model)
epoch_gradient.append(grad)
epoch = 4
print(train_images.shape, train_labels.shape)
model.fit(train_images, train_labels, epochs=epoch, validation_data=(test_images, test_labels), callbacks=[GradientCalcCallback()])
# (7) Convert to a 2 dimensiaonal array of (epoch, gradients) type
gradient = np.asarray(epoch_gradient)
print("Total number of epochs run:", epoch)
Output -
Model: "sequential_5"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_12 (Conv2D) (None, 30, 30, 32) 896
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 15, 15, 32) 0
_________________________________________________________________
conv2d_13 (Conv2D) (None, 13, 13, 64) 18496
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 6, 6, 64) 0
_________________________________________________________________
conv2d_14 (Conv2D) (None, 4, 4, 64) 36928
_________________________________________________________________
flatten_4 (Flatten) (None, 1024) 0
_________________________________________________________________
dense_11 (Dense) (None, 64) 65600
_________________________________________________________________
dense_12 (Dense) (None, 10) 650
=================================================================
Total params: 122,570
Trainable params: 122,570
Non-trainable params: 0
_________________________________________________________________
(50000, 32, 32, 3) (50000, 1)
Epoch 1/4
1563/1563 [==============================] - 109s 70ms/step - loss: 1.7026 - accuracy: 0.4081 - val_loss: 1.4490 - val_accuracy: 0.4861
Epoch 2/4
1563/1563 [==============================] - 145s 93ms/step - loss: 1.2657 - accuracy: 0.5506 - val_loss: 1.2076 - val_accuracy: 0.5752
Epoch 3/4
1563/1563 [==============================] - 151s 96ms/step - loss: 1.1103 - accuracy: 0.6097 - val_loss: 1.1122 - val_accuracy: 0.6127
Epoch 4/4
1563/1563 [==============================] - 152s 97ms/step - loss: 1.0075 - accuracy: 0.6475 - val_loss: 1.0508 - val_accuracy: 0.6371
Total number of epochs run: 4
Hope this answers your question. Happy Learning.

Categories

Resources