How do I get non-aggregated batch statistics with tf.keras.callbacks? - python

I am trying to train a model using the keras model.fit() method. This method returns a history object which contains loss values for each epoch - however I would like to have loss values for each individual batch.
Looking online I have found suggestions to use a custom callback class with an on_batch_end(self, logs={}) method. The problem is that this method only gets passed aggregated statistics that get reset each epoch. I would like to have individual statistics for each batch.
https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/Callback#on_train_batch_end

You could do that easily with a custom training loop, where you can just append a list with the loss value of every batch.
train_loss_per_train_batch.append(loss_value.numpy())
Here's how to do all of it:
import tensorflow as tf
import tensorflow_datasets as tfds
ds = tfds.load('iris', split='train', as_supervised=True)
train = ds.take(125).shuffle(16).batch(4)
test = ds.skip(125).take(25).shuffle(16).batch(4)
model = tf.keras.Sequential([
tf.keras.layers.Dense(16, activation='relu'),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(3, activation='softmax')
])
loss_object = tf.losses.SparseCategoricalCrossentropy(from_logits=False)
def compute_loss(model, x, y, training):
out = model(x, training=training)
loss = loss_object(y_true=y, y_pred=out)
return loss
def get_grad(model, x, y):
with tf.GradientTape() as tape:
loss = compute_loss(model, x, y, training=True)
return loss, tape.gradient(loss, model.trainable_variables)
optimizer = tf.optimizers.Adam()
verbose = "Epoch {:2d} Loss: {:.3f} TLoss: {:.3f} Acc: {:.2%} TAcc: {:.2%}"
train_loss_per_train_batch = list()
for epoch in range(1, 25 + 1):
train_loss = tf.metrics.Mean()
train_acc = tf.metrics.SparseCategoricalAccuracy()
test_loss = tf.metrics.Mean()
test_acc = tf.metrics.SparseCategoricalAccuracy()
for x, y in train:
loss_value, grads = get_grad(model, x, y)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
train_loss.update_state(loss_value)
train_acc.update_state(y, model(x, training=True))
train_loss_per_train_batch.append(loss_value.numpy())
for x, y in test:
loss_value, _ = get_grad(model, x, y)
test_loss.update_state(loss_value)
test_acc.update_state(y, model(x, training=False))
print(verbose.format(epoch,
train_loss.result(),
test_loss.result(),
train_acc.result(),
test_acc.result()))

The loss for the current batch can be calculated from the provided average loss as follows:
from tensorflow.keras.callbacks import Callback
class CustomCallback(Callback):
''' This callback converts the average loss (default behavior in TF>=2.2)
into the loss for only the current batch.
'''
def on_epoch_begin(self, epoch, logs={}):
self.previous_loss_sum = 0
def on_train_batch_end(self, batch, logs={}):
# calculate loss of current batch:
current_loss_sum = (batch + 1) * logs['loss']
current_loss = current_loss_sum - self.previous_loss_sum
self.previous_loss_sum = current_loss_sum
# use current_loss:
# ...
This code can be added to any custom callback that needs the loss for the current batch instead of the average loss.
Also, if you are using Tensorflow 1 or TensorFlow 2 version <= 2.1, then do not include this code in your callback, as in those versions the current loss is already provided, instead of the average loss.

Related

Tensorflow gradient tape returns exploding gradient model.trainable_variables

I'm trying to train my deep learning with tensorflow gradient tape, however the accuracy does not change with the epochs. I also checked for reseting my loss and accuracy.
For the MNIST dataset my code looks the following:
(mnist_train, mnist_test), mnist_info = tfds.load('mnist', split=['train', 'test'], as_supervised=True, with_info=True)
def prepare(ds, batch_size=128):
ds = ds.cache()
ds = ds.batch(batch_size)
ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
return ds
def split_tasks(ds, predicate):
return ds.filter(predicate), ds.filter(lambda img, label: not predicate(img, label))
task_A_train, task_B_train = split_tasks(mnist_train, lambda img, label: label % 2 == 0)
task_A_train, task_B_train = prepare(task_A_train), prepare(task_B_train)
task_A_test, task_B_test = split_tasks(mnist_test, lambda img, label: label % 2 == 0)
task_A_test, task_B_test = prepare(task_A_test), prepare(task_B_test)
def evaluate(model, test_set):
acc = tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy')
for i, (imgs, labels) in enumerate(test_set):
preds = model.predict_on_batch(imgs)
acc.update_state(labels, preds)
return acc.result().numpy()
multi_task_model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(10)
])
multi_task_model.compile(optimizer='adam', loss=sparse_categorical_crossentropy, metrics='accuracy')
def l2_penalty(model, theta_A):
penalty = 0
for i, theta_i in enumerate(model.trainable_variables):
_penalty = tf.norm(theta_i - theta_A[i])
penalty += _penalty
return 0.5*penalty
def train_with_l2(model, task_A_train, task_B_train, task_A_test, task_B_test, epochs=6):
# First we're going to fit to task A and retain a copy of parameters trained on Task A
model.fit(task_A_train, epochs=epochs)
theta_A = {n: p.value() for n, p in enumerate(model.trainable_variables.copy())}
print("Task A accuracy after training on Task A: {}".format(evaluate(model, task_A_test)))
# Metrics for the custom training loop
accuracy = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
loss = tf.keras.metrics.SparseCategoricalCrossentropy('loss')
for epoch in range(epochs):
accuracy.reset_states()
loss.reset_states()
for batch, (imgs, labels) in enumerate(task_B_train):
with tf.GradientTape() as tape:
preds = model(imgs)
# Loss is crossentropy loss with regularization term for each parameter
total_loss = model.loss(labels, preds) + l2_penalty(model, theta_A)
grads = tape.gradient(total_loss, model.trainable_variables)
model.optimizer.apply_gradients(zip(grads, model.trainable_variables))
accuracy.update_state(labels, preds)
loss.update_state(labels, preds)
print("\rEpoch: {}, Batch: {}, Loss: {:.3f}, Accuracy: {:.3f}".format(
epoch+1, batch+1, loss.result().numpy(), accuracy.result().numpy()), flush=True, end=''
)
print("")
print("Task B accuracy after training trained model on Task B: {}".format(evaluate(model, task_B_test)))
print("Task A accuracy after training trained model on Task B: {}".format(evaluate(model, task_A_test)))
Does anybody see what I'm doing wrong concerning the training within gradientTape?
EDIT: I rechecked my gradients and it seems that these are exploding and thus return nan. However I cannot see why this is happening.

How do I add noise to the weights when calculating the loss with Keras?

I am new to Keras and am trying to customize my training step in Keras.
Quesions:
How to create the new variable weights_right using weights_right=weights- (lr+alpha)*gradients in Keras when customizing training loop?
How to feedforward the NN with weights as formal parameters? Could I customize the forward function in Keras like the code in the following below?
Background:
In the stochastic gradient descent algorithm, after feedforwarding a mini-batch data and getting the gradients on this mini-batch data, I would like to perturb the weights and create a new variable called weights_right weights_righ t= weights-(lr+alpha)*gradients (alpha is a const) and then feedforward the NN with weights_right to get the new loss.
Some parts of code in python are the following:
class Network(object):
def __init__(self, sizes):
self.num_layers = len(sizes)
self.sizes = sizes
self.weights = [np.random.randn(y,1) for y in sizes[1:]]
self.biases = [np.random.randn(y,x) for x, y in zip(sizes[:-1], sizes[1:])]
def feedforward(self, a, weights=None, biases=None):
"""Return the output of the network if ``a`` is input."""
if weights is None:
weights=self.weights
if biases is None:
biases=self.biases
#!!! Note the output layer has no activation for regression.
for b, w in zip(biases[:-1], weights[:-1]):
a = sigmoid(np.dot(w, a)+b)
a=np.dot(weights[-1],a)+biases[-1]
return a
#-----------------------------------
# The following is the important one.
#-----------------------------------
def customSGD():
for epoch in range(epochs):
random.shuffle(training_data)
mini_batches= [training_data[k:k+mini_batch_size] for k in range(0, len(training_data), mini_batch_size)]
for mini_batch in mini_batches:
gradients_on_mini_batch = get_gradients(mini_batch)
#---------------------------------------
# The following two steps are what
# I would like to archive in Keras
#---------------------------------------
# Creat new variable called weights_right
weights_right = weights-(lr+alpha)*gradients_on_mini_batch
# feed the NN with weights_right, note that the params
#in current NN are still weights, not weights_right.
pred_right = feedforward(training_data, weights_right)
loss_right = loss_func(pred_right, training_labels)
......
# update weights
weights = weights-lr*gradients_on_mini_batch
Above codes are mainly from the online book Michael Nielsen.
Any help would be appreciated. Thank you so much!
In a custom training loop, you can do whatever you like with the gradients and weights.
#tf.function
def train_step(inputs, labels):
with tf.GradientTape() as tape:
logits = model(inputs)
loss = loss_object(labels, logits)
weights = model.trainable_variables
# add manipulation of weights here
gradients = tape.gradient(loss, weights)
opt.apply_gradients(zip(gradients, model.trainable_variables))
train_loss(loss)
train_acc(labels, logits)
Here's the full running example:
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)
dataset = tf.data.Dataset.from_tensor_slices((X, y)).shuffle(150)
train_dataset = dataset.take(120).batch(4)
test_dataset = dataset.skip(120).take(30).batch(4)
class DenseModel(Model):
def __init__(self):
super(DenseModel, self).__init__()
self.dens1 = Dense(8, activation='elu')
self.dens2 = Dense(16, activation='relu')
self.dens3 = Dense(3)
def call(self, inputs, training=None, **kwargs):
x = self.dens1(inputs)
x = self.dens2(x)
x = self.dens3(x)
return x
model = DenseModel()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
train_loss = tf.keras.metrics.Mean()
test_loss = tf.keras.metrics.Mean()
train_acc = tf.keras.metrics.SparseCategoricalAccuracy()
test_acc = tf.keras.metrics.SparseCategoricalAccuracy()
opt = tf.keras.optimizers.Adam(learning_rate=1e-3)
#tf.function
def train_step(inputs, labels):
with tf.GradientTape() as tape:
logits = model(inputs)
loss = loss_object(labels, logits)
weights = model.trainable_variables
# add manipulation of weights here
gradients = tape.gradient(loss, weights)
opt.apply_gradients(zip(gradients, model.trainable_variables))
train_loss(loss)
train_acc(labels, logits)
#tf.function
def test_step(inputs, labels):
logits = model(inputs)
loss = loss_object(labels, logits)
test_loss(loss)
test_acc(labels, logits)
for epoch in range(10):
template = 'Epoch {:>2} Train Loss {:.3f} Test Loss {:.3f} ' \
'Train Acc {:.2f} Test Acc {:.2f}'
train_loss.reset_states()
test_loss.reset_states()
train_acc.reset_states()
test_acc.reset_states()
for X_train, y_train in train_dataset:
train_step(X_train, y_train)
for X_test, y_test in test_dataset:
test_step(X_test, y_test)
print(template.format(
epoch + 1,
train_loss.result(),
test_loss.result(),
train_acc.result(),
test_acc.result()
))

Apply gradient descent only if TensorFlow model improves on training and validation data

I want to customize the fit function of the model in order to apply the gradient descent on the weights only if the model improved its predictions on the validation data. The reason for this is that I want to prevent overfitting.
According to this guide it should be possible to customize the fit function of the model. However, the following code runs into errors:
class CustomModel(tf.keras.Model):
def train_step(self, data):
x, y = data
with tf.GradientTape() as tape:
y_pred = self(x, training=True)
loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
### check and apply gradient
Y_pred_val = self.predict(X_val) # this does not work
acc_val = calculate_accuracy(Y_val, Y_pred_val)
if acc_val > last_acc_val:
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
###
self.compiled_metrics.update_state(y, y_pred)
return_obj = {m.name: m.result() for m in self.metrics}
return_obj["acc_val"] = acc_val
return return_obj
How could it be possible to evaluate the model inside the fit function?
You don't have to subclass fit() for this. You can just make a custom training loop. Look how I did that:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from tensorflow.keras import Model
import tensorflow as tf
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Concatenate
import tensorflow_datasets as tfds
from tensorflow.keras.regularizers import l1, l2, l1_l2
from collections import deque
dataset, info = tfds.load('mnist',
with_info=True,
split='train',
as_supervised=False)
TAKE = 1_000
data = dataset.map(lambda x: (tf.cast(x['image'],
tf.float32), x['label'])).shuffle(TAKE).take(TAKE)
len_train = int(8e-1*TAKE)
train = data.take(len_train).batch(8)
test = data.skip(len_train).take(info.splits['train'].num_examples - len_train).batch(8)
class CNN(Model):
def __init__(self):
super(CNN, self).__init__()
self.layer1 = Dense(32, activation=tf.nn.relu,
kernel_regularizer=l1(1e-2),
input_shape=info.features['image'].shape)
self.layer2 = Conv2D(filters=16,
kernel_size=(3, 3),
strides=(1, 1),
activation='relu',
input_shape=info.features['image'].shape)
self.layer3 = MaxPooling2D(pool_size=(2, 2))
self.layer4 = Conv2D(filters=32,
kernel_size=(3, 3),
strides=(1, 1),
activation=tf.nn.elu,
kernel_initializer=tf.keras.initializers.glorot_normal)
self.layer5 = MaxPooling2D(pool_size=(2, 2))
self.layer6 = Flatten()
self.layer7 = Dense(units=64,
activation=tf.nn.relu,
kernel_regularizer=l2(1e-2))
self.layer8 = Dense(units=64,
activation=tf.nn.relu,
kernel_regularizer=l1_l2(l1=1e-2, l2=1e-2))
self.layer9 = Concatenate()
self.layer10 = Dense(units=info.features['label'].num_classes)
def call(self, inputs, training=None, **kwargs):
b = self.layer1(inputs)
a = self.layer2(inputs)
a = self.layer3(a)
a = self.layer4(a)
a = self.layer5(a)
a = self.layer6(a)
a = self.layer8(a)
b = self.layer7(b)
b = self.layer6(b)
x = self.layer9([a, b])
x = self.layer10(x)
return x
cnn = CNN()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
train_loss = tf.keras.metrics.Mean()
test_loss = tf.keras.metrics.Mean()
train_acc = tf.keras.metrics.SparseCategoricalAccuracy()
test_acc = tf.keras.metrics.SparseCategoricalAccuracy()
optimizer = tf.keras.optimizers.Nadam()
template = 'Epoch {:3} Train Loss {:7.4f} Test Loss {:7.4f} ' \
'Train Acc {:6.2%} Test Acc {:6.2%} '
epochs = 5
early_stop = epochs//50
loss_hist = deque()
acc_hist = deque(maxlen=1)
acc_hist.append(0)
for epoch in range(1, epochs + 1):
train_loss.reset_states()
test_loss.reset_states()
train_acc.reset_states()
test_acc.reset_states()
for images, labels in train:
with tf.GradientTape() as tape:
logits = cnn(images, training=True)
loss = loss_object(labels, logits)
train_loss(loss)
train_acc(labels, logits)
current_acc = tf.metrics.SparseCategoricalAccuracy()(labels, logits)
if tf.greater(current_acc, acc_hist[-1]):
print('IMPROVEMENT.')
gradients = tape.gradient(loss, cnn.trainable_variables)
optimizer.apply_gradients(zip(gradients, cnn.trainable_variables))
acc_hist.append(current_acc)
for images, labels in test:
logits = cnn(images, training=False)
loss = loss_object(labels, logits)
test_loss(loss)
test_acc(labels, logits)
print(template.format(epoch,
train_loss.result(),
test_loss.result(),
train_acc.result(),
test_acc.result()))
if len(loss_hist) > early_stop and loss_hist.popleft() < min(loss_hist):
print('Early stopping. No validation loss decrease in %i epochs.' % early_stop)
break
Output:
IMPROVEMENT.
IMPROVEMENT.
IMPROVEMENT.
IMPROVEMENT.
Epoch 1 Train Loss 21.1698 Test Loss 21.3391 Train Acc 37.13% Test Acc 38.50%
IMPROVEMENT.
IMPROVEMENT.
IMPROVEMENT.
Epoch 2 Train Loss 13.8314 Test Loss 12.2496 Train Acc 50.88% Test Acc 52.50%
Epoch 3 Train Loss 13.7594 Test Loss 12.5884 Train Acc 51.75% Test Acc 53.00%
Epoch 4 Train Loss 13.1418 Test Loss 13.2374 Train Acc 52.75% Test Acc 51.50%
Epoch 5 Train Loss 13.6471 Test Loss 13.3157 Train Acc 49.63% Test Acc 51.50%
Here's the part that did the job. It's a deque and it skips the application of gradients if the last element of the deque is smaller.
for images, labels in train:
with tf.GradientTape() as tape:
logits = cnn(images, training=True)
loss = loss_object(labels, logits)
train_loss(loss)
train_acc(labels, logits)
current_acc = tf.metrics.SparseCategoricalAccuracy()(labels, logits)
if tf.greater(current_acc, acc_hist[-1]):
print('IMPROVEMENT.')
gradients = tape.gradient(loss, cnn.trainable_variables)
optimizer.apply_gradients(zip(gradients, cnn.trainable_variables))
acc_hist.append(current_acc)
Rather than create a custom fit I think it would be easier to use the callback ModelCheckpoint.
What you are trying to do is get the model that has the lowest validation error. Set it up to monitor validation loss. That way it will save the best model even if the network starts to over fit. Documentation is here.
If you do not get a model with a satisfactory validation accuracy then you will have to take other measures.
First look at your training accuracy.
My experience is that you should achieve at least 95%.
If the training accuracy is good but the validation accuracy is poor and degrades as you run more epochs that is a sign of over fitting.
You did not show the model but if you are doing classification you will probably have dense layers with the final layer using softmax activation.
Start out with model with only one dense layer and see if it trains well.
If not you may have to add additional dense hidden layers. If you do include a drop out layer to help prevent over fitting. You might also consider using regularizers. Documentation is
here..
I also find you can get improved performance if you dynamically adjust the learning rate. The callback ReduceLROnPlateau enables that capability.
Set it up to monitor validation loss and to reduce the learning rate by a factor if the loss fails to decrease. Documentation is here.

Simple Tensorflow 2 classifier won't learn

It's a simple model architecture based on this tutorial. The dataset would look like this, although in 10 dimensions:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers, optimizers
from sklearn.datasets import make_blobs
def pre_processing(inputs, targets):
inputs = tf.cast(inputs, tf.float32)
targets = tf.cast(targets, tf.int64)
return inputs, targets
def get_data():
inputs, targets = make_blobs(n_samples=1000, n_features=10, centers=7, cluster_std=1)
data = tf.data.Dataset.from_tensor_slices((inputs, targets))
data = data.map(pre_processing)
data = data.take(count=1000).shuffle(buffer_size=1000).batch(batch_size=256)
return data
model = Sequential([
layers.Dense(8, input_shape=(10,), activation='relu'),
layers.Dense(16, activation='relu'),
layers.Dense(32, activation='relu'),
layers.Dense(7)])
#tf.function
def compute_loss(logits, labels):
return tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=labels))
#tf.function
def compute_accuracy(logits, labels):
predictions = tf.argmax(logits, axis=1)
return tf.reduce_mean(tf.cast(tf.equal(predictions, labels), tf.float32))
#tf.function
def train_step(model, optim, x, y):
with tf.GradientTape() as tape:
logits = model(x)
loss = compute_loss(logits, y)
grads = tape.gradient(loss, model.trainable_variables)
optim.apply_gradients(zip(grads, model.trainable_variables))
accuracy = compute_accuracy(logits, y)
return loss, accuracy
def train(epochs, model, optim):
train_ds = get_data()
loss = 0.
acc = 0.
for step, (x, y) in enumerate(train_ds):
loss, acc = train_step(model, optim, x, y)
if step % 500 == 0:
print(f'Epoch {epochs} loss {loss.numpy()} acc {acc.numpy()}')
return loss, acc
optim = optimizers.Adam(learning_rate=1e-6)
for epoch in range(100):
loss, accuracy = train(epoch, model, optim)
Epoch 85 loss 2.530677080154419 acc 0.140625
Epoch 86 loss 3.3184046745300293 acc 0.0
Epoch 87 loss 3.138179063796997 acc 0.30078125
Epoch 88 loss 3.7781732082366943 acc 0.0
Epoch 89 loss 3.4101686477661133 acc 0.14453125
Epoch 90 loss 2.2888522148132324 acc 0.13671875
Epoch 91 loss 5.993691444396973 acc 0.16015625
What have I done wrong?
There are two problems in your code:
The first one is that you are generating a new training dataset in each epoch (see first line of train function, i.e. get_data function is called in each epoch). Since you are using sklearn.datasets.make_blobs function to generate data clusters, there is no guarantee that the generated data clusters between different calls follow the same distribution and/or label mapping. Therefore, the best thing the model could do in each epoch on a completely different dataset is just a random guess (hence, the average 1/7 ~= 0.14 accuracy you see in the results). To resolve this problem, take the data generation out of train function (i.e. generate the data at global level once by calling get_data function), and then pass the generated data to train function as an argument in each epoch.
The second problem is that you are using a very low learning rate, i.e. 1e-6, for the optimizer; therefore, the model is stuck and effectively not training at all. Instead, use the default learning rate for Adam optimizer, i.e. 1e-3, and change it only as needed (e.g. based on the results of experiments you perform).

Computing gradients wrt model inputs in Tensorflow eager mode

I am interested in calculating gradients wrt. the inputs of a keras model in Tensorflow. I understand that previously this can be done by building a graph and using tf.gradients. For example here. However I would like to achieve this while experimenting in eager mode (possibly using GradientTape). Specifically, if my network has two inputs (x, y), and predicts (u, v, p) calculate e.g., du/dx for use in the loss.
Snippit below, full code at this gist.
model = tf.keras.Sequential([
tf.keras.layers.Dense(20, activation=tf.nn.relu, input_shape=(2,)), # input shape required
tf.keras.layers.Dense(20, activation=tf.nn.relu),
tf.keras.layers.Dense(20, activation=tf.nn.relu),
tf.keras.layers.Dense(20, activation=tf.nn.relu),
tf.keras.layers.Dense(3)
])
def loss(model: tf.keras.Model, inputs, outputs):
u_true, v_true = outputs[:, 0], outputs[:, 1]
prediction = model(inputs)
u_pred, v_pred = prediction[:, 0], prediction[:, 1]
loss_value = tf.reduce_mean(tf.square(u_true - u_pred)) + \
tf.reduce_mean(tf.square(v_true - v_pred))
return loss_value, u_pred, v_pred
def grad(model: tf.keras.Model, inputs, outputs):
"""
:param inputs: (batch_size, 2) -> x, y
:param outputs: (batch_size, 3) -> vx, vy, p
:return:
"""
with tf.GradientTape() as tape:
loss_value, u_pred, v_pred = loss(model, inputs, outputs)
# AttributeError: 'DeferredTensor' object has no attribute '_id'
print(tape.gradient(u_pred, model.input))
grads = tape.gradient(loss_value, model.trainable_variables)
return loss_value, grads
I've tried a few things, e.g. tape.gradient(u_pred, model.input) or tape.gradient(model.output, model.input) but these throw:
AttributeError: 'DeferredTensor' object has no attribute '_id'
Is there a way to achieve this within eager mode and if so how?
Here is an example of retrieving the gradients of the predictions with respect to the inputs using eager execution
Basically, you need to use tape.watch(inputs) [I am using features in my example - whatever you want to call your x ... ] for Tensorflow to record the change in the model output (you can do the same with loss) with respect to the inputs... (and make sure to call your tape.gradient outside of the with tf.GradientTape() context)
Look at the get_gradients function below ...
Hope this helps!
model = tf.keras.Sequential([
tf.keras.layers.Dense(10, activation=tf.nn.relu, input_shape=(len(numeric_headers),)), # input shape required
tf.keras.layers.Dense(10, activation=tf.nn.relu),
tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)
])
# model = MyModel()
loss_object = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam()
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
def get_gradients(model, features):
with tf.GradientTape() as tape:
tape.watch(features)
predictions = model(features)
gradients = tape.gradient(predictions, features)
return gradients
def train_step(features, label):
with tf.GradientTape() as tape:
predictions = model(features)
loss = loss_object(label, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_loss(loss)
train_accuracy(label, predictions)
def test_step(features, label):
predictions = model(features)
t_loss = loss_object(label, predictions)
test_loss(t_loss)
test_accuracy(label, predictions)
EPOCHS = 5
for epoch in range(EPOCHS):
for features, labels in train_ds:
train_step(features, labels)
for features, labels in train_ds:
test_step(features, labels)
template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
print (template.format(epoch+1,
train_loss.result(),
train_accuracy.result()*100,
test_loss.result(),
test_accuracy.result()*100))
if epoch == EPOCHS - 1:
for features, labels in train_ds:
print ('-')
print (get_gradients(model, features))

Categories

Resources