How to debug a custom loss function during model fitting?

How to debug a custom loss function during model fitting? - python

I would like to see what is happening in my loss function during model fitting.
However, I cannot figure out how to do that.
This is what I am trying but it does not work:
def custom_loss(label : tf.Tensor, pred : tf.Tensor) -> tf.Tensor:
mask = label != 0
loss_object = tf.keras.losses.BinaryCrossentropy(from_logits=True, reduction='none')
loss = loss_object(label, pred)
mask = tf.cast(mask, dtype=loss.dtype)
tf.print("\n---------------------------")
tf.print("custom_loss - str(loss):", str(loss))
tf.print("custom_loss - str(mask):", str(mask))
try:
tf.print("tf.keras.backend.eval(loss):", tf.keras.backend.eval(loss))
except:
tf.print("tf.keras.backend.eval(loss) does not work - exception!")
loss = tf.reshape(loss, shape=(batch_size, loss.shape[1], 1))
loss *= mask
loss = tf.reduce_sum(loss)/tf.reduce_sum(mask)
tf.print("\n============================")
return loss
After starting training by calling the fit() function I only get the following output:
2/277 [..............................] - ETA: 44s - loss: 0.6931 - masked_accuracy: 0.0000e+00
---------------------------
custom_loss - str(loss): Tensor("custom_loss/binary_crossentropy/weighted_loss/Mul:0", shape=(None, 20), dtype=float32)
custom_loss - str(mask): Tensor("custom_loss/Cast:0", shape=(None, 20, 1), dtype=float32)
tf.keras.backend.eval(loss) does not work - exception!
How do I display the actual value of label, pred, mask and loss?

In TF 2 Keras, it can be done by training the model in eager mode, i.e run_eagerly=True with model.fit. It's an argument avilable in model.compile method. From doc.
run_eagerly: Bool. Defaults to False. If True, this Model's logic will not be wrapped in a tf.function.
Now, the end-to-end solution can be achieved in many ways, i.e with straightforward method model.fit or customize the fit method. Here are some pointer.
loss_object = keras.losses.BinaryCrossentropy(
from_logits=True,
reduction='none'
)
def custom_loss(label : tf.Tensor, pred : tf.Tensor) -> tf.Tensor:
mask = label != 1
loss = loss_object(label, pred)
mask = tf.cast(mask, dtype=loss.dtype)
if tf.executing_eagerly():
print("custom_loss - str(loss): \n", str(loss))
print("custom_loss - str(mask): \n", str(mask), '\n'*2)
loss = tf.reshape(loss, shape=(tf.shape(loss)[0], -1))
loss *= mask
loss = tf.reduce_sum(loss) / tf.reduce_sum(mask)
return loss
With vanila model.fit:
# Construct an instance of CustomModel
inputs = keras.Input(shape=(32,))
outputs = keras.layers.Dense(1, activation=None)(inputs)
model = keras.Model(inputs, outputs)
# We don't passs a loss or metrics here.
model.compile(optimizer="adam", loss=custom_loss, run_eagerly=True)
# Just use `fit` as usual -- you can use callbacks, etc.
x = tf.random.normal([10, 32], 0, 1, tf.float32)
y = np.random.randint(2, size=(10, 1))
model.fit(x, y, epochs=5)
custom_loss - str(loss):
tf.Tensor(
[0.3215071 0.6470841 3.401876 1.6478868 0.4492059 0.67835623
0.1574089 1.3314284 1.9282155 0.5588544 ], shape=(10,), dtype=float32)
custom_loss - str(mask):
tf.Tensor(
[[0.]
[0.]
[1.]
[1.]
[1.]
[0.]
[0.]
[0.]
[0.]
[0.]], shape=(10, 1), dtype=float32)
1/1 [==============================] - 0s 20ms/step - loss: 1.8330
<keras.callbacks.History at 0x7f4332ef4d10>
Or, with custom model.fit, the output would be same as above.
class CustomModel(keras.Model):
def train_step(self, data):
x, y = data
# notice, istead of print value in custom loss -
# I can do the same here
with tf.GradientTape() as tape:
y_pred = self(x, training=True) # Forward pass
# Compute our own loss
loss = custom_loss(y, y_pred)
# Compute gradients
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
return {"loss": loss}
# Construct an instance of CustomModel
inputs = keras.Input(shape=(32,))
outputs = keras.layers.Dense(1, activation=None)(inputs)
model = CustomModel(inputs, outputs)
model.compile(optimizer="adam", run_eagerly=True)
model.fit(x, y)
And lastly, if you wanna go with more low-level operation, then you can use custom training loop. Check the mentioned blogs, they're pretty resourceful.

Related

Tensorflow gradient tape returns exploding gradient model.trainable_variables

I'm trying to train my deep learning with tensorflow gradient tape, however the accuracy does not change with the epochs. I also checked for reseting my loss and accuracy.
For the MNIST dataset my code looks the following:
(mnist_train, mnist_test), mnist_info = tfds.load('mnist', split=['train', 'test'], as_supervised=True, with_info=True)
def prepare(ds, batch_size=128):
ds = ds.cache()
ds = ds.batch(batch_size)
ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
return ds
def split_tasks(ds, predicate):
return ds.filter(predicate), ds.filter(lambda img, label: not predicate(img, label))
task_A_train, task_B_train = split_tasks(mnist_train, lambda img, label: label % 2 == 0)
task_A_train, task_B_train = prepare(task_A_train), prepare(task_B_train)
task_A_test, task_B_test = split_tasks(mnist_test, lambda img, label: label % 2 == 0)
task_A_test, task_B_test = prepare(task_A_test), prepare(task_B_test)
def evaluate(model, test_set):
acc = tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy')
for i, (imgs, labels) in enumerate(test_set):
preds = model.predict_on_batch(imgs)
acc.update_state(labels, preds)
return acc.result().numpy()
multi_task_model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(10)
])
multi_task_model.compile(optimizer='adam', loss=sparse_categorical_crossentropy, metrics='accuracy')
def l2_penalty(model, theta_A):
penalty = 0
for i, theta_i in enumerate(model.trainable_variables):
_penalty = tf.norm(theta_i - theta_A[i])
penalty += _penalty
return 0.5*penalty
def train_with_l2(model, task_A_train, task_B_train, task_A_test, task_B_test, epochs=6):
# First we're going to fit to task A and retain a copy of parameters trained on Task A
model.fit(task_A_train, epochs=epochs)
theta_A = {n: p.value() for n, p in enumerate(model.trainable_variables.copy())}
print("Task A accuracy after training on Task A: {}".format(evaluate(model, task_A_test)))
# Metrics for the custom training loop
accuracy = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
loss = tf.keras.metrics.SparseCategoricalCrossentropy('loss')
for epoch in range(epochs):
accuracy.reset_states()
loss.reset_states()
for batch, (imgs, labels) in enumerate(task_B_train):
with tf.GradientTape() as tape:
preds = model(imgs)
# Loss is crossentropy loss with regularization term for each parameter
total_loss = model.loss(labels, preds) + l2_penalty(model, theta_A)
grads = tape.gradient(total_loss, model.trainable_variables)
model.optimizer.apply_gradients(zip(grads, model.trainable_variables))
accuracy.update_state(labels, preds)
loss.update_state(labels, preds)
print("\rEpoch: {}, Batch: {}, Loss: {:.3f}, Accuracy: {:.3f}".format(
epoch+1, batch+1, loss.result().numpy(), accuracy.result().numpy()), flush=True, end=''
)
print("")
print("Task B accuracy after training trained model on Task B: {}".format(evaluate(model, task_B_test)))
print("Task A accuracy after training trained model on Task B: {}".format(evaluate(model, task_A_test)))
Does anybody see what I'm doing wrong concerning the training within gradientTape?
EDIT: I rechecked my gradients and it seems that these are exploding and thus return nan. However I cannot see why this is happening.

How to get Mean Absolute Errors (MAE) for deep learning model

I am working on a recommendation system using a deep autoencoder model. How can I define the mean absolute error(MAE) loss function, and use it to calculate the model accuracy.
Here is the model
model = deep_model(train_, layers, activation, last_activation, dropout, regularizer_encode, regularizer_decode)
model.compile(optimizer=Adam(lr=0.001), loss="mse", metrics=[ ] )
model.summary()
define the data-validate
data_valid =(train, validate)
hist_model = model.fit(x=train, y=train,
epochs=100,
batch_size=128,
validation_data= data_valid, verbose=2, shuffle=True)

you can define it yourself:
import keras.backend as K
def my_mae(y_true, y_pred):
return K.mean(K.abs(y_pred - y_true), axis=-1) # -1 is correct, using None gives different result '''
then do this:
model.compile(optimizer=Adam(learning_rate=1e-2), loss=my_mae)
but it still a better idea to call the one implemented in keras, in this way:
model.compile(optimizer=Adam(learning_rate=1e-2), loss=tf.keras.losses.MeanAbsoluteError(name="mean_absolute_error"))

I think you can you the scikit-learn function here. This will return the mae of your prediction.
I suggest fitting the model as:
model.compile(optimizer=Adam(lr=0.001), loss="mae", metrics=[])
# instead of loss="mse"
model_history = model.fit(
X_train, # instead of: x=train, y=train
y_train, # why x and y are the same as 'train'?
epochs=100,
batch_size=128,
validation_data=(X_test,y_test))
After train your model, make a prediction:
predicton = model.predict(X_test)
And get the MAE by:
mae_error = mean_absolute_error(y_test, prediction)

i did the used the RMSE to measure the model and the results were good. below the defined loss=masked_ms and metrics=[masked_rmse_clip] function
for the loss function
def masked_mse(y_true, y_pred):
# masked function
mask_true = K.cast(K.not_equal(y_true, 0), K.floatx())
# masked squared error
masked_squared_error = K.square(mask_true * (y_true - y_pred))
masked_mse = K.sum(masked_squared_error, axis=-1) / K.maximum(K.sum(mask_true, axis=-1), 1)
return masked_mse
for the metric
def masked_rmse_clip(y_true, y_pred):
# masked function
mask_true = K.cast(K.not_equal(y_true, 0), K.floatx())
y_pred = K.clip(y_pred, 1, 5)
# masked squared error
masked_squared_error = K.square(mask_true * (y_true - y_pred))
masked_mse = K.sqrt(K.sum(masked_squared_error, axis=-1) / K.maximum(K.sum(mask_true, axis=-1), 1))
return masked_mse
the model
model = deep_model(train, layers, activation, last_activation, dropout, regularizer_encode, regularizer_decode)
model.compile(optimizer=Adam(lr=0.001), loss=masked_mse, metrics=[masked_rmse_clip] )
model.summary()
data_valid =(train, validate)
hist_model = model.fit(x=train, y=train,
epochs=100,
batch_size=128,
validation_data= data_valid, verbose=2, shuffle=True)
i get this output after 100 epoch
Epoch 100/100
48/48 - 6s - loss: 0.9418 - masked_rmse_clip: 0.8024 - val_loss: 0.9853 - val_masked_rmse_clip: 0.8010
I want something like this for the MAE. so i need help with the loss and metrics function for MAE.

Single loss with Multiple output model in TF.Keras

I use tensorflow's Dataset such that y is a dictionary of 6 tensors which I all use in a single loss function which looks likes this:
def CustomLoss():
def custom_loss(y_true, y_pred):
a = tf.keras.losses.binary_crossentropy(y_true['a_0'], y_pred[0]) * y_true['a_1']
b = tf.square(y_true['b_0'] - y_pred[1]) * y_true['b_1']
c = tf.abs(y_true['c_0'] - y_pred[2]) * y_true['c_1']
return a + b + c
return custom_loss
And I have a model with 3 outputs of different shapes. When I compile the model and call fit method I get Value Error
model.compile(optimizer=optimizer, loss=CustomLoss())
model.fit(dataset, epochs=10)
ValueError: Found unexpected keys that do not correspond to any
Model output: dict_keys(['a_0', 'a_1', 'b_0', 'b_1', 'c_0', 'c_1']).
Expected: ['output_0', 'output_1', 'output_2']
where output_0, 'output_1', 'output_2' are names of the output layers.
I figured that naming the output layers by the keys in the dataset should solve the issue but the problem is I have 6 tensors in the dataset and only 3 outputs. I'm aware I can assign a loss function to every output with a single dataset ground truth tensor, but again I need to pass at least two tensors as GT.
So far I've used a custom training loop but I'd rather use the fit method. I'm using tensorflow 2.3.1
EDIT:
Example model:
inputs = x = tf.keras.layers.Input((256, 256, 3))
x = tf.keras.applications.ResNet50(include_top=False, weights=None)(x)
x1 = tf.keras.layers.Flatten()(x)
x1 = tf.keras.layers.Dense(2, name='output_1')(x1)
x2 = tf.keras.layers.Conv2D(256, 1, name='output_2')(x)
x3 = tf.keras.layers.Flatten()(x)
x3 = tf.keras.layers.Dense(64, name='output_3')(x3)
model = tf.keras.Model(inputs=inputs, outputs=[x1, x2, x3])
Custom training loop:
avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
for epoch in range(1, epochs+1):
for batch, (images, labels) in enumerate(train_dataset):
with tf.GradientTape() as tape:
outputs = model(images, training=False)
reg_loss = tf.reduce_sum(model.losses)
pred_loss = loss(labels, outputs)
total_loss = tf.reduce_sum(pred_loss) + reg_loss
grads = tape.gradient(total_loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
avg_loss.update_state(total_loss)
print(f'Epoch {epoch}/{epochs} - Loss: {avg_loss.result().numpy()}')
avg_loss.reset_states()
Minimal reproducible code:
import tensorflow as tf
def CustomLoss():
def custom_loss(y_true, y_pred):
a = tf.keras.losses.binary_crossentropy(y_true['a_0'], y_pred[0]) * y_true['a_1']
b = tf.square(y_true['b_0'] - y_pred[1]) * y_true['b_1']
b = tf.reduce_sum(b, axis=(1, 2, 3))
c = tf.abs(y_true['c_0'] - y_pred[2]) * y_true['c_1']
c = tf.reduce_sum(c, axis=1)
return a + b + c
return custom_loss
dataset = tf.data.Dataset.from_tensors((
tf.random.uniform((256, 256, 3)),
{'a_0': [0., 1.], 'a_1': [1.], 'b_0': tf.random.uniform((8, 8, 256)), 'b_1': [1.], 'c_0': tf.random.uniform((64,)), 'c_1': [1.]}
))
dataset = dataset.batch(1)
inputs = x = tf.keras.layers.Input((256, 256, 3))
x = tf.keras.applications.ResNet50(include_top=False, weights=None)(x)
x1 = tf.keras.layers.Flatten()(x)
x1 = tf.keras.layers.Dense(2, name='output_1')(x1)
x2 = tf.keras.layers.Conv2D(256, 1, name='output_2')(x)
x3 = tf.keras.layers.Flatten()(x)
x3 = tf.keras.layers.Dense(64, name='output_3')(x3)
model = tf.keras.Model(inputs=inputs, outputs=[x1, x2, x3])
optimizer = tf.keras.optimizers.Adam(1e-3)
model.compile(optimizer=optimizer, loss=CustomLoss())
model.fit(dataset, epochs=1)

Here is one approach for your case. We will still use a custom training loop but also take the leverage of the convenient .fit method by customizing this method. Please check the document for more details of this: Customizing what happens in fit()
Here is one simple demonstration, extending your reproducible code.
import tensorflow as tf
# data set
dataset = tf.data.Dataset.from_tensors((
tf.random.uniform((256, 256, 3)),
{'a_0': [0., 1.], 'a_1': [1.], 'b_0': tf.random.uniform((8, 8, 256)),
'b_1': [1.], 'c_0': tf.random.uniform((64,)), 'c_1': [1.]}
))
dataset = dataset.batch(1)
# custom loss
def loss(y_true, y_pred):
a = tf.keras.losses.binary_crossentropy(y_true['a_0'], y_pred[0]) * y_true['a_1']
b = tf.square(y_true['b_0'] - y_pred[1]) * y_true['b_1']
b = tf.reduce_sum(b, axis=(1, 2, 3))
c = tf.abs(y_true['c_0'] - y_pred[2]) * y_true['c_1']
c = tf.reduce_sum(c, axis=1)
return a + b + c
Custom Model
This is basically overriding the train_step that will run repeatedly over each batch of data.
avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
class custom_fit(tf.keras.Model):
def train_step(self, data):
images, labels = data
with tf.GradientTape() as tape:
outputs = self(images, training=True) # forward pass
reg_loss = tf.reduce_sum(self.losses)
pred_loss = loss(labels, outputs)
total_loss = tf.reduce_sum(pred_loss) + reg_loss
gradients = tape.gradient(total_loss, self.trainable_variables)
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
avg_loss.update_state(total_loss)
return {"loss": avg_loss.result()}
#property
def metrics(self):
# We list our `Metric` objects here so that `reset_states()` can be
# called automatically at the start of each epoch
# or at the start of `evaluate()`.
# If you don't implement this property, you have to call
# `reset_states()` yourself at the time of your choosing.
return [avg_loss]
Build Model
# model
inputs = x = tf.keras.layers.Input((256, 256, 3))
x = tf.keras.applications.ResNet50(include_top=False, weights=None)(x)
x1 = tf.keras.layers.Flatten()(x)
x1 = tf.keras.layers.Dense(2, name='output_1')(x1)
x2 = tf.keras.layers.Conv2D(256, 1, name='output_2')(x)
x3 = tf.keras.layers.Flatten()(x)
x3 = tf.keras.layers.Dense(64, name='output_3')(x3)
# simply pass input and outps to the custom model
custom_model = custom_fit(inputs=[inputs],
outputs=[x1, x2, x3])
Compile and Fit
custom_model.compile(optimizer='adam')
custom_model.fit(dataset, epochs=5, verbose=2)
Epoch 1/5
1/1 - 6s - loss: 73784.0078
Epoch 2/5
1/1 - 1s - loss: 64882.8984
Epoch 3/5
1/1 - 1s - loss: 54760.2500
Epoch 4/5
1/1 - 1s - loss: 47696.7031
Epoch 5/5
1/1 - 1s - loss: 40574.6328

Apply gradient descent only if TensorFlow model improves on training and validation data

I want to customize the fit function of the model in order to apply the gradient descent on the weights only if the model improved its predictions on the validation data. The reason for this is that I want to prevent overfitting.
According to this guide it should be possible to customize the fit function of the model. However, the following code runs into errors:
class CustomModel(tf.keras.Model):
def train_step(self, data):
x, y = data
with tf.GradientTape() as tape:
y_pred = self(x, training=True)
loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
### check and apply gradient
Y_pred_val = self.predict(X_val) # this does not work
acc_val = calculate_accuracy(Y_val, Y_pred_val)
if acc_val > last_acc_val:
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
###
self.compiled_metrics.update_state(y, y_pred)
return_obj = {m.name: m.result() for m in self.metrics}
return_obj["acc_val"] = acc_val
return return_obj
How could it be possible to evaluate the model inside the fit function?

You don't have to subclass fit() for this. You can just make a custom training loop. Look how I did that:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from tensorflow.keras import Model
import tensorflow as tf
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Concatenate
import tensorflow_datasets as tfds
from tensorflow.keras.regularizers import l1, l2, l1_l2
from collections import deque
dataset, info = tfds.load('mnist',
with_info=True,
split='train',
as_supervised=False)
TAKE = 1_000
data = dataset.map(lambda x: (tf.cast(x['image'],
tf.float32), x['label'])).shuffle(TAKE).take(TAKE)
len_train = int(8e-1*TAKE)
train = data.take(len_train).batch(8)
test = data.skip(len_train).take(info.splits['train'].num_examples - len_train).batch(8)
class CNN(Model):
def __init__(self):
super(CNN, self).__init__()
self.layer1 = Dense(32, activation=tf.nn.relu,
kernel_regularizer=l1(1e-2),
input_shape=info.features['image'].shape)
self.layer2 = Conv2D(filters=16,
kernel_size=(3, 3),
strides=(1, 1),
activation='relu',
input_shape=info.features['image'].shape)
self.layer3 = MaxPooling2D(pool_size=(2, 2))
self.layer4 = Conv2D(filters=32,
kernel_size=(3, 3),
strides=(1, 1),
activation=tf.nn.elu,
kernel_initializer=tf.keras.initializers.glorot_normal)
self.layer5 = MaxPooling2D(pool_size=(2, 2))
self.layer6 = Flatten()
self.layer7 = Dense(units=64,
activation=tf.nn.relu,
kernel_regularizer=l2(1e-2))
self.layer8 = Dense(units=64,
activation=tf.nn.relu,
kernel_regularizer=l1_l2(l1=1e-2, l2=1e-2))
self.layer9 = Concatenate()
self.layer10 = Dense(units=info.features['label'].num_classes)
def call(self, inputs, training=None, **kwargs):
b = self.layer1(inputs)
a = self.layer2(inputs)
a = self.layer3(a)
a = self.layer4(a)
a = self.layer5(a)
a = self.layer6(a)
a = self.layer8(a)
b = self.layer7(b)
b = self.layer6(b)
x = self.layer9([a, b])
x = self.layer10(x)
return x
cnn = CNN()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
train_loss = tf.keras.metrics.Mean()
test_loss = tf.keras.metrics.Mean()
train_acc = tf.keras.metrics.SparseCategoricalAccuracy()
test_acc = tf.keras.metrics.SparseCategoricalAccuracy()
optimizer = tf.keras.optimizers.Nadam()
template = 'Epoch {:3} Train Loss {:7.4f} Test Loss {:7.4f} ' \
'Train Acc {:6.2%} Test Acc {:6.2%} '
epochs = 5
early_stop = epochs//50
loss_hist = deque()
acc_hist = deque(maxlen=1)
acc_hist.append(0)
for epoch in range(1, epochs + 1):
train_loss.reset_states()
test_loss.reset_states()
train_acc.reset_states()
test_acc.reset_states()
for images, labels in train:
with tf.GradientTape() as tape:
logits = cnn(images, training=True)
loss = loss_object(labels, logits)
train_loss(loss)
train_acc(labels, logits)
current_acc = tf.metrics.SparseCategoricalAccuracy()(labels, logits)
if tf.greater(current_acc, acc_hist[-1]):
print('IMPROVEMENT.')
gradients = tape.gradient(loss, cnn.trainable_variables)
optimizer.apply_gradients(zip(gradients, cnn.trainable_variables))
acc_hist.append(current_acc)
for images, labels in test:
logits = cnn(images, training=False)
loss = loss_object(labels, logits)
test_loss(loss)
test_acc(labels, logits)
print(template.format(epoch,
train_loss.result(),
test_loss.result(),
train_acc.result(),
test_acc.result()))
if len(loss_hist) > early_stop and loss_hist.popleft() < min(loss_hist):
print('Early stopping. No validation loss decrease in %i epochs.' % early_stop)
break
Output:
IMPROVEMENT.
IMPROVEMENT.
IMPROVEMENT.
IMPROVEMENT.
Epoch 1 Train Loss 21.1698 Test Loss 21.3391 Train Acc 37.13% Test Acc 38.50%
IMPROVEMENT.
IMPROVEMENT.
IMPROVEMENT.
Epoch 2 Train Loss 13.8314 Test Loss 12.2496 Train Acc 50.88% Test Acc 52.50%
Epoch 3 Train Loss 13.7594 Test Loss 12.5884 Train Acc 51.75% Test Acc 53.00%
Epoch 4 Train Loss 13.1418 Test Loss 13.2374 Train Acc 52.75% Test Acc 51.50%
Epoch 5 Train Loss 13.6471 Test Loss 13.3157 Train Acc 49.63% Test Acc 51.50%
Here's the part that did the job. It's a deque and it skips the application of gradients if the last element of the deque is smaller.
for images, labels in train:
with tf.GradientTape() as tape:
logits = cnn(images, training=True)
loss = loss_object(labels, logits)
train_loss(loss)
train_acc(labels, logits)
current_acc = tf.metrics.SparseCategoricalAccuracy()(labels, logits)
if tf.greater(current_acc, acc_hist[-1]):
print('IMPROVEMENT.')
gradients = tape.gradient(loss, cnn.trainable_variables)
optimizer.apply_gradients(zip(gradients, cnn.trainable_variables))
acc_hist.append(current_acc)

Rather than create a custom fit I think it would be easier to use the callback ModelCheckpoint.
What you are trying to do is get the model that has the lowest validation error. Set it up to monitor validation loss. That way it will save the best model even if the network starts to over fit. Documentation is here.
If you do not get a model with a satisfactory validation accuracy then you will have to take other measures.
First look at your training accuracy.
My experience is that you should achieve at least 95%.
If the training accuracy is good but the validation accuracy is poor and degrades as you run more epochs that is a sign of over fitting.
You did not show the model but if you are doing classification you will probably have dense layers with the final layer using softmax activation.
Start out with model with only one dense layer and see if it trains well.
If not you may have to add additional dense hidden layers. If you do include a drop out layer to help prevent over fitting. You might also consider using regularizers. Documentation is
here..
I also find you can get improved performance if you dynamically adjust the learning rate. The callback ReduceLROnPlateau enables that capability.
Set it up to monitor validation loss and to reduce the learning rate by a factor if the loss fails to decrease. Documentation is here.

Computing gradients wrt model inputs in Tensorflow eager mode

I am interested in calculating gradients wrt. the inputs of a keras model in Tensorflow. I understand that previously this can be done by building a graph and using tf.gradients. For example here. However I would like to achieve this while experimenting in eager mode (possibly using GradientTape). Specifically, if my network has two inputs (x, y), and predicts (u, v, p) calculate e.g., du/dx for use in the loss.
Snippit below, full code at this gist.
model = tf.keras.Sequential([
tf.keras.layers.Dense(20, activation=tf.nn.relu, input_shape=(2,)), # input shape required
tf.keras.layers.Dense(20, activation=tf.nn.relu),
tf.keras.layers.Dense(20, activation=tf.nn.relu),
tf.keras.layers.Dense(20, activation=tf.nn.relu),
tf.keras.layers.Dense(3)
])
def loss(model: tf.keras.Model, inputs, outputs):
u_true, v_true = outputs[:, 0], outputs[:, 1]
prediction = model(inputs)
u_pred, v_pred = prediction[:, 0], prediction[:, 1]
loss_value = tf.reduce_mean(tf.square(u_true - u_pred)) + \
tf.reduce_mean(tf.square(v_true - v_pred))
return loss_value, u_pred, v_pred
def grad(model: tf.keras.Model, inputs, outputs):
"""
:param inputs: (batch_size, 2) -> x, y
:param outputs: (batch_size, 3) -> vx, vy, p
:return:
"""
with tf.GradientTape() as tape:
loss_value, u_pred, v_pred = loss(model, inputs, outputs)
# AttributeError: 'DeferredTensor' object has no attribute '_id'
print(tape.gradient(u_pred, model.input))
grads = tape.gradient(loss_value, model.trainable_variables)
return loss_value, grads
I've tried a few things, e.g. tape.gradient(u_pred, model.input) or tape.gradient(model.output, model.input) but these throw:
AttributeError: 'DeferredTensor' object has no attribute '_id'
Is there a way to achieve this within eager mode and if so how?

Here is an example of retrieving the gradients of the predictions with respect to the inputs using eager execution
Basically, you need to use tape.watch(inputs) [I am using features in my example - whatever you want to call your x ... ] for Tensorflow to record the change in the model output (you can do the same with loss) with respect to the inputs... (and make sure to call your tape.gradient outside of the with tf.GradientTape() context)
Look at the get_gradients function below ...
Hope this helps!
model = tf.keras.Sequential([
tf.keras.layers.Dense(10, activation=tf.nn.relu, input_shape=(len(numeric_headers),)), # input shape required
tf.keras.layers.Dense(10, activation=tf.nn.relu),
tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)
])
# model = MyModel()
loss_object = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam()
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
def get_gradients(model, features):
with tf.GradientTape() as tape:
tape.watch(features)
predictions = model(features)
gradients = tape.gradient(predictions, features)
return gradients
def train_step(features, label):
with tf.GradientTape() as tape:
predictions = model(features)
loss = loss_object(label, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_loss(loss)
train_accuracy(label, predictions)
def test_step(features, label):
predictions = model(features)
t_loss = loss_object(label, predictions)
test_loss(t_loss)
test_accuracy(label, predictions)
EPOCHS = 5
for epoch in range(EPOCHS):
for features, labels in train_ds:
train_step(features, labels)
for features, labels in train_ds:
test_step(features, labels)
template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
print (template.format(epoch+1,
train_loss.result(),
train_accuracy.result()*100,
test_loss.result(),
test_accuracy.result()*100))
if epoch == EPOCHS - 1:
for features, labels in train_ds:
print ('-')
print (get_gradients(model, features))

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to debug a custom loss function during model fitting? - python

Related

Tensorflow gradient tape returns exploding gradient model.trainable_variables

How to get Mean Absolute Errors (MAE) for deep learning model

Single loss with Multiple output model in TF.Keras

Apply gradient descent only if TensorFlow model improves on training and validation data

Computing gradients wrt model inputs in Tensorflow eager mode

Categories

Resources