please I'm trying to build an NLP classifier on top of BERT but I'm struggling with data imbalance. I'm looking for an implementation of weighted CategoricalCrossEntropy. I've already seen a solution using class_weight parameter on fit function but it doesn't "fit" well with my data (I've one hot encoded them and it actually throws an error cause dict element are not matching.
Can someone please give me an implementation from scratch of a WeightedCategoricalCrossEntropy function allowing me me to add weights manually to Tensorflow's native CategoricalCrossEntropy.
The __call__ method of tf.losses.CategoricalCrossentropy accepts three arguments:
y_pred
y_true
sample_weights
And the sample_weight acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If sample_weight is a tensor of size [batch_size], then the total loss for each sample of the batch is rescaled by the corresponding element in the sample_weight vector. You can use it as such:
def compute_loss(model, x, y, training):
out = model(inputs=x, training=training)
sample_weight = tf.random.uniform((tf.shape(x)[0], 1),
minval=0,
maxval=1,
dtype=tf.float32)
loss = loss_object(y_true=y, y_pred=out,
sample_weight=sample_weight)
return loss
These are random values but you can change the values depending on y so it becomes a class weight rather than a sample weight. Here's a full example of a running training loop with custom sample weights:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
from tensorflow import keras as K
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPooling2D, Dropout
from tensorflow import nn as nn
from functools import partial
(xtrain, ytrain), (xtest, ytest) = tf.keras.datasets.mnist.load_data()
train = tf.data.Dataset.from_tensor_slices((xtrain, ytrain))
test = tf.data.Dataset.from_tensor_slices((xtest, ytest))
def prepare(inputs, outputs):
inputs = tf.divide(x=inputs, y=255)
inputs = tf.expand_dims(inputs, -1)
targets = tf.one_hot(indices=outputs, depth=10)
return inputs, targets
train = train.take(5_000).batch(4).map(prepare)
test = test.take(1_000).batch(4).map(prepare)
class MyCNN(K.Model):
def __init__(self):
super(MyCNN, self).__init__()
Conv = partial(Conv2D, kernel_size=(3, 3), activation=nn.relu)
MaxPool = partial(MaxPooling2D, pool_size=(2, 2))
self.conv1 = Conv(filters=8)
self.maxp1 = MaxPool()
self.conv2 = Conv(filters=16)
self.maxp2 = MaxPool()
self.conv3 = Conv(filters=32)
self.maxp3 = MaxPool()
self.flatt = Flatten()
self.dens1 = Dense(64, activation=nn.relu)
self.drop1 = Dropout(.5)
self.dens2 = Dense(10, activation=nn.softmax)
def call(self, x, training=None, **kwargs):
x = self.conv1(x)
x = self.maxp1(x)
x = self.conv2(x)
x = self.maxp2(x)
x = self.conv3(x)
x = self.maxp3(x)
x = self.flatt(x)
x = self.dens1(x)
x = self.drop1(x)
x = self.dens2(x)
return x
model = MyCNN()
loss_object = tf.losses.CategoricalCrossentropy()
def compute_loss(model, x, y, training):
out = model(inputs=x, training=training)
sample_weight = tf.random.uniform((tf.shape(x)[0], 1),
minval=0,
maxval=1,
dtype=tf.float32)
loss = loss_object(y_true=y, y_pred=out, sample_weight=sample_weight)
return loss
def get_grad(model, x, y):
with tf.GradientTape() as tape:
loss = compute_loss(model, x, y, training=False)
return loss, tape.gradient(loss, model.trainable_variables)
optimizer = tf.optimizers.Adam()
verbose = "Epoch {:2d}" \
" Loss: {:.3f} TLoss: {:.3f} Acc: {:.3%} TAcc: {:.3%}"
for epoch in range(1, 10 + 1):
train_loss = tf.metrics.Mean()
train_acc = tf.metrics.CategoricalAccuracy()
test_loss = tf.metrics.Mean()
test_acc = tf.metrics.CategoricalAccuracy()
for x, y in train:
loss_value, grads = get_grad(model, x, y)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
train_loss.update_state(loss_value)
train_acc.update_state(y, model(x, training=True))
for x, y in test:
loss_value, _ = get_grad(model, x, y)
test_loss.update_state(loss_value)
test_acc.update_state(y, model(x, training=False))
print(verbose.format(epoch,
train_loss.result(),
train_acc.result(),
test_loss.result(),
test_acc.result()))
Just to complement the answer , to transform from sample weight to class weight you can do something like this :
First one example without weight:
y_true = [[0, 1, 0], [0, 0, 1]]
y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
# Using 'auto'/'sum_over_batch_size' reduction type.
cce = tf.keras.losses.CategoricalCrossentropy()
cce(y_true, y_pred).numpy()
Now with weight implementation:
y_true = [[0, 1, 0], [0, 0, 1]]
y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
select_one_sum_indexes = tf.where(tf.equal(y_true,1))
Class_index= tf.gather(select_one_sum_indexes, 1, axis=1)
Class_index=tf.cast(Class_index, tf.int32)
Define your weight Here:
weight=tf.constant([1.2, 1,10.])#,dtype=tf.float32)
Dic for class and weight
table = tf.lookup.StaticHashTable(
initializer=tf.lookup.KeyValueTensorInitializer(
keys=tf.constant([0, 1, 2]),
values=weight
),default_value=1.)
weight_sample_class = table.lookup(Class_index)
Loss with weight_class
cce = tf.keras.losses.CategoricalCrossentropy()
cce(y_true, y_pred,sample_weight=weight_sample_class).numpy()
Related
Firstly, I have implemented a simple VGG16 network for image classification.
model = keras.applications.vgg16.VGG16(include_top = False,
weights = None,
input_shape = (32,32,3),
pooling = 'max',
classes = 10)
Whose input shape is 32 x 32. Now, I am trying to implement a patch-based neural network. The main idea is, from the input image, extract 4 image patch like this image,
and train the extracted patch image(resizing to 32 x 32 as it is input shape of our model) finally, combine their four output probability and find the final output result (Using normalizing & argmax). Like this,
How can I do that?
Thanks in advance for your help.
Note:
I am guessing using lambda layer it can be possible.
My simple VGG classification implementation is here in Colab.
I used the MNIST dataset to get every image as 4 patches with tf.image.extract_patches, which are subsequently passed as a batch:
import tensorflow as tf
from tensorflow import keras as K
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPooling2D, Dropout
from tensorflow import nn as nn
from functools import partial
import matplotlib.pyplot as plt
(xtrain, ytrain), (xtest, ytest) = tf.keras.datasets.mnist.load_data()
train = tf.data.Dataset.from_tensor_slices((xtrain, ytrain))
test = tf.data.Dataset.from_tensor_slices((xtest, ytest))
patch_s = 18
stride = xtrain.shape[1] - patch_s
get_patches = lambda x, y: (tf.reshape(
tf.image.extract_patches(
images=tf.expand_dims(x[..., None], 0),
sizes=[1, patch_s, patch_s, 1],
strides=[1, stride, stride, 1],
rates=[1, 1, 1, 1],
padding='VALID'), (4, patch_s, patch_s, 1)), y)
train = train.map(get_patches)
test = test.map(get_patches)
fig = plt.figure()
plt.subplots_adjust(wspace=.1, hspace=.2)
images, labels = next(iter(train))
for index, image in enumerate(images):
ax = plt.subplot(2, 2, index + 1)
ax.set_xticks([])
ax.set_yticks([])
ax.imshow(image)
plt.show()
Then, in the training loop, I'm getting the loss for every one of these 4 outputs:
def compute_loss(model, x, y, training):
out = model(x=x, training=training)
repeated_y = tf.repeat(tf.expand_dims(y, 0), repeats=4, axis=0)
loss = loss_object(y_true=repeated_y, y_pred=out, from_logits=True)
loss = tf.reduce_mean(loss, axis=0)
return loss
Then I'm reducing the mean of axis 0 to merge all probabilities together. Here's the full running code:
import tensorflow as tf
from tensorflow import keras as K
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPooling2D, Dropout
from tensorflow import nn as nn
from functools import partial
import matplotlib.pyplot as plt
(xtrain, ytrain), (xtest, ytest) = tf.keras.datasets.mnist.load_data()
train = tf.data.Dataset.from_tensor_slices((xtrain, ytrain))
test = tf.data.Dataset.from_tensor_slices((xtest, ytest))
patch_s = 18
stride = xtrain.shape[1] - patch_s
get_patches = lambda x, y: (tf.reshape(
tf.image.extract_patches(
images=tf.expand_dims(x[..., None], 0),
sizes=[1, patch_s, patch_s, 1],
strides=[1, stride, stride, 1],
rates=[1, 1, 1, 1],
padding='VALID'), (4, patch_s, patch_s, 1)), y)
train = train.map(get_patches)
test = test.map(get_patches)
fig = plt.figure()
plt.subplots_adjust(wspace=.1, hspace=.2)
images, labels = next(iter(train))
for index, image in enumerate(images):
ax = plt.subplot(2, 2, index + 1)
ax.set_xticks([])
ax.set_yticks([])
ax.imshow(image)
plt.show()
def prepare(inputs, targets):
inputs = tf.divide(x=inputs, y=255)
targets = tf.one_hot(indices=targets, depth=10)
return inputs, targets
train = train.take(10_000).map(prepare)
test = test.take(10_00).map(prepare)
class MyCNN(K.Model):
def __init__(self):
super(MyCNN, self).__init__()
Conv = partial(Conv2D, kernel_size=(3, 3), activation=nn.relu)
MaxPool = partial(MaxPooling2D, pool_size=(2, 2))
self.conv1 = Conv(filters=16)
self.maxp1 = MaxPool()
self.conv2 = Conv(filters=32)
self.maxp2 = MaxPool()
self.conv3 = Conv(filters=64)
self.maxp3 = MaxPool()
self.flatt = Flatten()
self.dens1 = Dense(64, activation=nn.relu)
self.drop1 = Dropout(.5)
self.dens2 = Dense(10, activation=nn.softmax)
def call(self, inputs, training=None, **kwargs):
x = self.conv1(inputs)
x = self.maxp1(x)
x = self.conv2(x)
x = self.maxp2(x)
x = self.conv3(x)
x = self.maxp3(x)
x = self.flatt(x)
x = self.dens1(x)
x = self.drop1(x)
x = self.dens2(x)
return x
model = MyCNN()
loss_object = tf.losses.categorical_crossentropy
def compute_loss(model, x, y, training):
out = model(inputs=x, training=training)
repeated_y = tf.repeat(tf.expand_dims(y, 0), repeats=4, axis=0)
loss = loss_object(y_true=repeated_y, y_pred=out, from_logits=True)
loss = tf.reduce_mean(loss, axis=0)
return loss
def get_grad(model, x, y):
with tf.GradientTape() as tape:
loss = compute_loss(model, x, y, training=False)
return loss, tape.gradient(loss, model.trainable_variables)
optimizer = tf.optimizers.Adam()
verbose = "Epoch {:2d}" \
" Loss: {:.3f} Acc: {:.3%} TLoss: {:.3f} TAcc: {:.3%}"
for epoch in range(1, 10 + 1):
train_loss = tf.metrics.Mean()
train_acc = tf.metrics.CategoricalAccuracy()
test_loss = tf.metrics.Mean()
test_acc = tf.metrics.CategoricalAccuracy()
for x, y in train:
loss_value, grads = get_grad(model, x, y)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
train_loss.update_state(loss_value)
train_acc.update_state(y, model(x, training=True))
for x, y in test:
loss_value, _ = get_grad(model, x, y)
test_loss.update_state(loss_value)
test_acc.update_state(y, model(x, training=False))
print(verbose.format(epoch,
train_loss.result(),
train_acc.result(),
test_loss.result(),
test_acc.result()))
Spoiler alert: with such small patches, it doesn't do well. Make patches bigger than 18/28 for better performance.
I have a model that has to return coordinates and then it has to return a confidence with it. My loss function has to take into account the target coordinates and the target availability's. Here is what my loss function looks like:
def loss(targets, target_availabilities, preds, confidences):
# my loss function goes here
return loss
The functional API of TensorFlow shows how to pass 2 different outputs through separate loss functions (or the same loss function which return 2 loss values, one for each pair of y_true and y_pred). How should I compile and fit my model so that it takes the targets, target_availabilities, predictions, and confidences through that single loss function?
I recommend using a custom training loop to implement this. It allows for more flexibility. As long as you return one value, you can perform any type of computation in your loss function. Let's say that you want to do this:
transformed_output = (y_pred * confidence) - availability
You can implement this in a custom loss function (assuming that your neural net architecture returns these three values):
def compute_loss(model, x, y, training):
out, avail, conf = model(inputs=x, training=training)
transformed_output = tf.add(tf.multiply(out, conf), avail)
loss = loss_object(y_true=y, y_pred=transformed_output)
return loss
This will return a value, and Tensorflow will try to minimize this value no matter what it is.
Here's a complete example. Let's say that this is 'availability`:
<tf.Tensor: shape=(1, 10), dtype=float32,
numpy=array([[0., 0., 0., 0., 1., 0., 0., 0., 1., 1.]], dtype=float32)>
And this is confidences:
<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
array([[0.09586799, 0.03268242, 0.04225421, 0.4026084 , 0.5088273 ,
0.38777208, 0.53815687, 0.41644037, 0.5709661 , 0.7587745 ]],
dtype=float32)>
Let's train a CNN to classify MNIST according to this special loss function.
import tensorflow as tf
(xtrain, ytrain), (xtest, ytest) = tf.keras.datasets.mnist.load_data()
unsqueeze = lambda x, y: (tf.expand_dims(
tf.divide(
tf.cast(x, tf.float32), 255), -1),
tf.one_hot(y, depth=10))
train = tf.data.Dataset.from_tensor_slices((xtrain, ytrain)).\
shuffle(64).\
batch(64).\
map(unsqueeze).\
prefetch(1)
test = tf.data.Dataset.from_tensor_slices((xtest, ytest)).\
shuffle(64).\
batch(64).\
map(unsqueeze).\
prefetch(1)
class CNN(tf.keras.Model):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = tf.keras.layers.Conv2D(filters=16, kernel_size=(3, 3),
strides=(1, 1),
input_shape=(28, 28, 1))
self.maxp1 = tf.keras.layers.MaxPool2D(pool_size=(2, 2))
self.conv2 = tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3),
strides=(1, 1))
self.maxp2 = tf.keras.layers.MaxPool2D(pool_size=(2, 2))
self.flat1 = tf.keras.layers.Flatten()
self.dens1 = tf.keras.layers.Dense(64, activation='relu')
self.drop1 = tf.keras.layers.Dropout(5e-1)
self.dens3 = tf.keras.layers.Dense(10)
def call(self, x, training=None, **kwargs):
x = self.conv1(x)
x = self.maxp1(x)
x = self.conv2(x)
x = self.maxp2(x)
x = self.flat1(x)
x = self.dens1(x)
x = self.drop1(x)
x = self.dens3(x)
availability = tf.cast(tf.random.uniform((len(x), 10), 0, 2,
dtype=tf.int32), tf.float32)
confidences = tf.random.uniform((len(x), 10), 0, 1, dtype=tf.float32)
return x, availability, confidences
model = CNN()
loss_object = tf.losses.CategoricalCrossentropy(from_logits=True)
def compute_loss(model, x, y, training):
out, avail, conf = model(inputs=x, training=training)
transformed_output = tf.add(tf.multiply(out, conf), avail)
loss = loss_object(y_true=y, y_pred=transformed_output)
return loss
def get_grad(model, x, y):
with tf.GradientTape() as tape:
loss = compute_loss(model, x, y, training=False)
return loss, tape.gradient(loss, model.trainable_variables)
optimizer = tf.optimizers.Adam()
verbose = "Epoch {:2d} Loss: {:.3f} TLoss: {:.3f} Acc: {:.2%} TAcc: {:.2%}"
for epoch in range(1, 10 + 1):
train_loss = tf.metrics.Mean()
train_acc = tf.metrics.CategoricalAccuracy()
test_loss = tf.metrics.Mean()
test_acc = tf.metrics.CategoricalAccuracy()
for x, y in train:
loss_value, grads = get_grad(model, x, y)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
train_loss.update_state(loss_value)
train_acc.update_state(y, model(x, training=True))
for x, y in test:
loss_value, _ = get_grad(model, x, y)
test_loss.update_state(loss_value)
test_acc.update_state(y, model(x, training=False))
print(verbose.format(epoch,
train_loss.result(),
test_loss.result(),
train_acc.result(),
test_acc.result()))
I'm doing my first Neural Network with a binary classification, but I got an error when I try to evaluate the model with:
correct = tf.nn.in_top_k(logits,y,1)
where
logits tensor is : predictions : shape [batch_size = 52, num_classes = 1], type float32
y tensor is : targets: shape [batch_size=52], type int32
I got this error :
targets[1] is out of range
[[{{node in_top_k/InTopKV2}}]]
After some debugging time , I understood that the values of my tensor y must be <= to num_classes, so the first value of the tensor y equal to 1 is considered as out of range, even tough the parameter num_classes = 1.
How can I allow my tensor values to be equal to num_classes and only strictly inferior ? Or is there another way ?
In my opinion, num_classes should equal 1 because it's a binary classification so 1 neuron output is needed.
EDIT
Here's my full code :
import tensorflow as tf
n_inputs = 28
n_hidden1 = 15
n_hidden2 = 5
n_outputs = 1
reset_graph()
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y") #None => any
def neuron_layer(X, n_neurons, name, activation=None):
with tf.name_scope(name):
n_inputs = int(X.shape[1])
stddev = 2 / np.sqrt(n_inputs)
init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev) #matrice n_inputs x n_neurons values proche de 0
W = tf.Variable(init,name="kernel") #weights random
b = tf.Variable(tf.zeros([n_neurons]), name="bias")
Z = tf.matmul(X, W) + b
tf.cast(Z,tf.int32)
if activation is not None:
return activation(Z)
else:
return Z
def to_one_hot(y):
n_classes = y.max() + 1
m = len(y)
Y_one_hot = np.zeros((m, n_classes))
Y_one_hot[np.arange(m), y] = 1
return Y_one_hot
hidden1 = neuron_layer(X, n_hidden1, name="hidden1",
activation=tf.nn.relu)
hidden2 = neuron_layer(hidden1, n_hidden2, name="hidden2",
activation=tf.nn.relu)
logits = neuron_layer(hidden2, n_outputs, name="outputs")
xentropy = tf.keras.backend.binary_crossentropy(tf.to_float(y),logits)
loss = tf.reduce_mean(xentropy)
learning_rate = 0.01
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
correct = tf.nn.in_top_k(logits,y,1)
labels_max = tf.reduce_max(y)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 40
batch_size = 50
def shuffle_batch(X, y, batch_size): #Homogeneisation et decoupage en paquets(n_batches)
rnd_idx = np.random.permutation(len(X))
n_batches = len(X) // batch_size
for batch_idx in np.array_split(rnd_idx, n_batches):
X_batch, y_batch = X[batch_idx], y[batch_idx]
yield X_batch, y_batch
with tf.Session() as sess:
init.run()
X_temp,Y_temp = X_batch,y_batch
feed_dict={X: X_batch, y: y_batch}
print("feed",feed_dict)
print("\n y_batch :",y_batch,y_batch.dtype)
print("\n X_batch :",X_batch,X_batch.dtype,X_batch.shape)
for epoch in range(n_epochs):
for X_batch, y_batch in shuffle_batch(X_train, Y_train, batch_size):
y_batch=y_batch.astype(np.int32)
X_batch=X_batch.astype(np.float32)
sess.run(training_op,feed_dict={X: X_batch, y: y_batch})
#acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
#acc_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
#print(epoch, "Batch accuracy:", acc_batch, "Val accuracy:", acc_val)
save_path = saver.save(sess, "./my_model_final.ckpt")
#some tests
print("y eval :",y.eval(feed_dict={X:X_temp,y:Y_temp}).shape)
y_one_hot=to_one_hot(y.eval(feed_dict={X:X_temp,y:Y_temp}))
print("y_one_hot :",y_one_hot.shape)
print("logits eval : ",logits.eval(feed_dict={X:X_temp,y:Y_temp}))
#print(correct.eval(feed_dict={X:X_temp,y:Y_temp}))
print(labels_max.eval(feed_dict={X:X_temp,y:Y_temp}))
As per the documentation here, tf.nn.in_top_k(predictions, targets, k) has arguments:
predictions: A Tensor of type float32. A batch_size x classes tensor.
targets: A Tensor. Must be one of the following types: int32, int64. A batch_size vector of class ids.
k: An int. Number of top elements to look at for computing precision.
As you are performing binary classification, i.e., has two classes, so the shape of logits tensor in your case should be (52, 2) while the shape of y should be (52,). Here, logits is basically one-hot encoded tensor. This is the reason why your are getting above error.
Consider the below example:
Example 1:
res = tf.nn.in_top_k([[0,1], [1,0], [0,1], [1, 0], [0, 1]], [0, 1, 1, 1, 1], 1)
Here, shape of logits is (5, 2) while y is (5,). If you will do tf.reduce_max(y), you will get 1, which is less than number of classes and hence okay.
This will work fine and output [False False True False True]
Example 2:
res = tf.nn.in_top_k([[0,1], [1,0], [0,1], [1, 0], [0, 1]], [0, 2, 1, 1, 1], 1)
If you will do tf.reduce_max(y), you will get 2, which is equal to the number of classes.
This will raises an error: InvalidArgumentError: targets[1] is out of range
EDIT: In your above code, make following modifications:
change n_outputs = 1 to n_outputs = 2
change sess.run(training_op,feed_dict={X: X_batch, y: y_batch}) to _, cost, acc = sess.run([training_op, loss, accuracy], feed_dict={X: X_batch, y: to_one_hot(y_batch)})
change correct = tf.nn.in_top_k(logits, y, 1) to correct = tf.nn.in_top_k(logits, tf.argmax(y, 1), 1)
Code(random data used):
n_inputs = 28
n_hidden1 = 15
n_hidden2 = 5
n_outputs = 2
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None, 2), name="y") #None => any
def neuron_layer(X, n_neurons, name, activation=None):
with tf.name_scope(name):
n_inputs = int(X.shape[1])
stddev = 2 / np.sqrt(n_inputs)
init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev) #matrice n_inputs x n_neurons values proche de 0
W = tf.Variable(init,name="kernel") #weights random
b = tf.Variable(tf.zeros([n_neurons]), name="bias")
Z = tf.matmul(X, W) + b
tf.cast(Z,tf.int32)
if activation is not None:
return activation(Z)
else:
return Z
def to_one_hot(y):
n_classes = y.max() + 1
m = len(y)
Y_one_hot = np.zeros((m, n_classes))
Y_one_hot[np.arange(m), y] = 1
return Y_one_hot
hidden1 = neuron_layer(X, n_hidden1, name="hidden1",
activation=tf.nn.relu)
hidden2 = neuron_layer(hidden1, n_hidden2, name="hidden2",
activation=tf.nn.relu)
logits = neuron_layer(hidden2, n_outputs, name="outputs")
xentropy = tf.keras.backend.binary_crossentropy(tf.to_float(y),logits)
loss = tf.reduce_mean(xentropy)
learning_rate = 0.01
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
correct = tf.nn.in_top_k(logits,tf.argmax(y, 1),1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 1
X_train = np.random.rand(100, 28)
X_train = X_train.astype(np.float32)
Y_train = np.random.randint(low = 0, high = 2, size = 100, dtype=np.int32)
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
_, cost, corr, acc = sess.run([training_op, loss, correct, accuracy], feed_dict={X: X_train, y: to_one_hot(Y_train)})
print(corr)
print('Loss: {} Accuracy: {}'.format(cost, acc))
When I run the following script I get the error message 'No gradients provided for any variable'. The 'grads' variable is a list of 'None' values. What is possibly going wrong in such a simple script?
import tensorflow as tf
import numpy as np
tf.enable_eager_execution()
class Model(tf.keras.Model):
def __init__(self):
super(Model, self).__init__()
self.layer = tf.keras.layers.Dense(4, activation = "linear")
def call(self, x):
y = self.layer(x)
return y
model = Model()
model._set_inputs(tf.zeros((1, 5)))
optimizer = tf.train.GradientDescentOptimizer(0.5)
# gibberish data
x_train = np.array([[0, 0, 0, 0, 1]], dtype=np.float32)
y_train = np.array([[0.1, 0.1, 0.4, 0.4]])
y_pred = model.call(x_train)
with tf.GradientTape() as tape:
loss = tf.losses.mean_squared_error(y_train, y_pred)
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
The model prediction line
y_pred = model.call(x_train)
has to be inside the with tf.GradientTape() as tape: scope.
I have a problem which deals with predicting two outputs when given a vector of predictors.
Assume that a predictor vector looks like x1, y1, att1, att2, ..., attn, which says x1, y1 are coordinates and att's are the other attributes attached to the occurrence of x1, y1 coordinates. Based on this predictor set I want to predict x2, y2. This is a time series problem, which I am trying to solve using multiple regresssion.
My question is how do I setup keras, which can give me 2 outputs in the final layer.
from keras.models import Model
from keras.layers import *
#inp is a "tensor", that can be passed when calling other layers to produce an output
inp = Input((10,)) #supposing you have ten numeric values as input
#here, SomeLayer() is defining a layer,
#and calling it with (inp) produces the output tensor x
x = SomeLayer(blablabla)(inp)
x = SomeOtherLayer(blablabla)(x) #here, I just replace x, because this intermediate output is not interesting to keep
#here, I want to keep the two different outputs for defining the model
#notice that both left and right are called with the same input x, creating a fork
out1 = LeftSideLastLayer(balbalba)(x)
out2 = RightSideLastLayer(banblabala)(x)
#here, you define which path you will follow in the graph you've drawn with layers
#notice the two outputs passed in a list, telling the model I want it to have two outputs.
model = Model(inp, [out1,out2])
model.compile(optimizer = ...., loss = ....) #loss can be one for both sides or a list with different loss functions for out1 and out2
model.fit(inputData,[outputYLeft, outputYRight], epochs=..., batch_size=...)
You can make a model with multiple output with
the Functional API
by subclassing tf.keras.Model.
Here's an example of dual outputs (regression and classification) on the Iris Dataset, using the Functional API:
from sklearn.datasets import load_iris
from tensorflow.keras.layers import Dense
from tensorflow.keras import Input, Model
import tensorflow as tf
data, target = load_iris(return_X_y=True)
X = data[:, (0, 1, 2)]
Y = data[:, 3]
Z = target
inputs = Input(shape=(3,), name='input')
x = Dense(16, activation='relu', name='16')(inputs)
x = Dense(32, activation='relu', name='32')(x)
output1 = Dense(1, name='cont_out')(x)
output2 = Dense(3, activation='softmax', name='cat_out')(x)
model = Model(inputs=inputs, outputs=[output1, output2])
model.compile(loss={'cont_out': 'mean_absolute_error',
'cat_out': 'sparse_categorical_crossentropy'},
optimizer='adam',
metrics={'cat_out': tf.metrics.SparseCategoricalAccuracy(name='acc')})
history = model.fit(X, {'cont_out': Y, 'cat_out': Z}, epochs=10, batch_size=8)
Here's a simplified version:
from sklearn.datasets import load_iris
from tensorflow.keras.layers import Dense
from tensorflow.keras import Input, Model
data, target = load_iris(return_X_y=True)
X = data[:, (0, 1, 2)]
Y = data[:, 3]
Z = target
inputs = Input(shape=(3,))
x = Dense(16, activation='relu')(inputs)
x = Dense(32, activation='relu')(x)
output1 = Dense(1)(x)
output2 = Dense(3, activation='softmax')(x)
model = Model(inputs=inputs, outputs=[output1, output2])
model.compile(loss=['mae', 'sparse_categorical_crossentropy'], optimizer='adam')
history = model.fit(X, [Y, Z], epochs=10, batch_size=8)
Here's the same example, subclassing tf.keras.Model and with a custom training loop:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras import Model
from sklearn.datasets import load_iris
tf.keras.backend.set_floatx('float64')
iris, target = load_iris(return_X_y=True)
X = iris[:, :3]
y = iris[:, 3]
z = target
ds = tf.data.Dataset.from_tensor_slices((X, y, z)).shuffle(150).batch(8)
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self.d0 = Dense(16, activation='relu')
self.d1 = Dense(32, activation='relu')
self.d2 = Dense(1)
self.d3 = Dense(3, activation='softmax')
def call(self, x, training=None, **kwargs):
x = self.d0(x)
x = self.d1(x)
a = self.d2(x)
b = self.d3(x)
return a, b
model = MyModel()
loss_obj_reg = tf.keras.losses.MeanAbsoluteError()
loss_obj_cat = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
loss_reg = tf.keras.metrics.Mean(name='regression loss')
loss_cat = tf.keras.metrics.Mean(name='categorical loss')
error_reg = tf.keras.metrics.MeanAbsoluteError()
error_cat = tf.keras.metrics.SparseCategoricalAccuracy()
#tf.function
def train_step(inputs, y_reg, y_cat):
with tf.GradientTape() as tape:
pred_reg, pred_cat = model(inputs)
reg_loss = loss_obj_reg(y_reg, pred_reg)
cat_loss = loss_obj_cat(y_cat, pred_cat)
gradients = tape.gradient([reg_loss, cat_loss], model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
loss_reg(reg_loss)
loss_cat(cat_loss)
error_reg(y_reg, pred_reg)
error_cat(y_cat, pred_cat)
for epoch in range(50):
for xx, yy, zz in ds:
train_step(xx, yy, zz)
template = 'Epoch {:>2}, SCCE: {:>5.2f},' \
' MAE: {:>4.2f}, SAcc: {:>5.1%}'
print(template.format(epoch+1,
loss_cat.result(),
error_reg.result(),
error_cat.result()))
loss_reg.reset_states()
loss_cat.reset_states()
error_reg.reset_states()
error_cat.reset_states()