Slicing Error when using custom cost function in Keras

Slicing Error when using custom cost function in Keras - python

I have example data. This data is used to set the predictor (x) and response (y) variables:
x has an Age_Years feature and y has Age_Years and target features:
I have a custom cost function that accepts multiple parameters:
# this gives the y_pred values
def calc_prob(param1, param2, param3, age):
prob = (((100*param1*pow((100/param3),-(pow((age/param2),param1))))*pow(age/param2,param1)*math.log(100/param3))/age)/100
return prob
# this serves as the custom cost function
def brier_score(y_pred, y_true):
prob = calc_prob(y_pred[:, 0], y_pred[:, 1], y_pred[:, 2], y_true['Age_Years'])
brier_score = tf.reduce_mean((prob - y_true['target']) ** 2, axis=1)
return brier_score
Ultimately, I want an output that builds a 3-parameter model that minimizes the brier_score() function. If I try to build and run the model, I get an error:
from keras import models
from keras import layers
def build_model():
model = models.Sequential()
model.add(layers.Dense(1, activation='relu', input_shape=(x.shape[1],)))
model.add(layers.Dense(5, activation='relu'))
model.add(layers.Dense(3, activation='softmax'))
model.compile(loss=losses, optimizer='adam', metrics=['accuracy'])
return model
model = build_model()
losses=[brier_score]
model.compile(loss=losses, optimizer='adam', metrics=['accuracy'])
# this line causes the error
model.fit(x=x, y=y, epochs=100, verbose=0)
ValueError: slice index 2 of dimension 1 out of bounds. for '{{node brier_score/strided_slice_2}} = StridedSlice[Index=DT_INT32, T=DT_FLOAT, begin_mask=1, ellipsis_mask=0, end_mask=1, new_axis_mask=0, shrink_axis_mask=2](Cast, brier_score/strided_slice_2/stack, brier_score/strided_slice_2/stack_1, brier_score/strided_slice_2/stack_2)' with input shapes: [?,2], [2], [2], [2] and with computed input tensors: input[1] = <0 2>, input[2] = <0 3>, input[3] = <1 1>.

You are mixing up the order of y_true and y_pred in brier_score. Here is a working example:
import tensorflow as tf
from keras import models
from keras import layers
import pandas as pd
import numpy as np
x = np.random.random((500, 1))
y = {'Age_Years': np.squeeze(x, axis=1), 'target': np.random.randint(2, size=500)}
def calc_prob(param1, param2, param3, age):
prob = (((100*param1*tf.math.pow((100/param3),-(tf.math.pow((age/param2),param1))))*tf.math.pow(age/param2,param1)*tf.math.log(100/param3))/age)/100
return prob
# this serves as the custom cost function
def brier_score(y_true, y_pred):
prob = calc_prob(y_pred[:, 0], y_pred[:, 1], y_pred[:, 2], tf.cast(y_true['Age_Years'], dtype=tf.float32))
brier_score = tf.reduce_mean((prob - tf.cast(y_true['target'], dtype=tf.float32)) ** 2, axis=-1, keepdims=True)
return brier_score
def build_model():
model = models.Sequential()
model.add(layers.Dense(1, activation='relu', input_shape=(x.shape[1],)))
model.add(layers.Dense(5, activation='relu'))
model.add(layers.Dense(3, activation='softmax'))
return model
model = build_model()
optimizer = tf.keras.optimizers.Adam()
batch_size = 10
dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(batch_size)
epochs = 2
for epoch in range(epochs):
print("\nStart of epoch %d" % (epoch,))
for step, (x_batch_train, y_batch_train) in enumerate(dataset):
with tf.GradientTape() as tape:
logits = model(x_batch_train, training=True)
loss_value = brier_score(y_batch_train, logits)
grads = tape.gradient(loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_weights))

Related

How to create a customized loss function based on features?

Suppose I have a training data set shown X which is a 6000*51 matrix and the problem is a multi-output classification problem and the target matrix Y is 6000*10 and each column of the target matrix takes 0 or 1. I can define parameters based on the features as follows:
n = 10
p = X[:,:n]
a = X[:,2:2*n]
c = X[:,2*n]
Suppose the prediction of my model is Prediction. I want to define a loss function as follows:
-np.einsum('ij,ij ->i',p,y_test).mean() + 10 *
np.mean( np.maximum( np.einsum('ij,ij ->i',a,Prediction) - c, 0) )
def knapsack_loss(X, n, cvc=1):
input_a = X[:,n:2*n]
input_a = np.float64(deepcopy(input_a ))
input_p = X[:,:n]
input_p = np.float64(deepcopy(input_p))
input_c = X[:,2*n]
input_c = np.float64(deepcopy(input_c))
def loss(y_true, y_pred):
picks = y_pred
return (-1 * K.batch_dot(picks, input_p, 1)) + cvc * K.maximum(
K.batch_dot(picks, input_a, 1) - input_c, 0)
return loss
def get_model(n_inputs, n_outputs):
model = Sequential()
model.add(Dense(100, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
model.add(Dense(n_outputs, activation='sigmoid'))
model.compile(loss= knapsack_loss(X_train, n, cvc=1),optimizer='adam')
return model
n_inputs, n_outputs = X.shape[1], Y.shape[1]
model = get_model(n_inputs, n_outputs)
model.fit(X_train, y_train, verbose=0, epochs=500)
When I run this code, I face the following error:
InvalidArgumentError: Incompatible shapes: [32] vs. [6000]
[[{{node training_14/Adam/gradients/loss_22/dense_55_loss/loss/MatMul_1_grad/BroadcastGradientArgs}}]]
I would be thankful if someone can correct it or provide and synthetic example.

Weighted categorical cross entropy

please I'm trying to build an NLP classifier on top of BERT but I'm struggling with data imbalance. I'm looking for an implementation of weighted CategoricalCrossEntropy. I've already seen a solution using class_weight parameter on fit function but it doesn't "fit" well with my data (I've one hot encoded them and it actually throws an error cause dict element are not matching.
Can someone please give me an implementation from scratch of a WeightedCategoricalCrossEntropy function allowing me me to add weights manually to Tensorflow's native CategoricalCrossEntropy.

The __call__ method of tf.losses.CategoricalCrossentropy accepts three arguments:
y_pred
y_true
sample_weights
And the sample_weight acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If sample_weight is a tensor of size [batch_size], then the total loss for each sample of the batch is rescaled by the corresponding element in the sample_weight vector. You can use it as such:
def compute_loss(model, x, y, training):
out = model(inputs=x, training=training)
sample_weight = tf.random.uniform((tf.shape(x)[0], 1),
minval=0,
maxval=1,
dtype=tf.float32)
loss = loss_object(y_true=y, y_pred=out,
sample_weight=sample_weight)
return loss
These are random values but you can change the values depending on y so it becomes a class weight rather than a sample weight. Here's a full example of a running training loop with custom sample weights:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
from tensorflow import keras as K
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPooling2D, Dropout
from tensorflow import nn as nn
from functools import partial
(xtrain, ytrain), (xtest, ytest) = tf.keras.datasets.mnist.load_data()
train = tf.data.Dataset.from_tensor_slices((xtrain, ytrain))
test = tf.data.Dataset.from_tensor_slices((xtest, ytest))
def prepare(inputs, outputs):
inputs = tf.divide(x=inputs, y=255)
inputs = tf.expand_dims(inputs, -1)
targets = tf.one_hot(indices=outputs, depth=10)
return inputs, targets
train = train.take(5_000).batch(4).map(prepare)
test = test.take(1_000).batch(4).map(prepare)
class MyCNN(K.Model):
def __init__(self):
super(MyCNN, self).__init__()
Conv = partial(Conv2D, kernel_size=(3, 3), activation=nn.relu)
MaxPool = partial(MaxPooling2D, pool_size=(2, 2))
self.conv1 = Conv(filters=8)
self.maxp1 = MaxPool()
self.conv2 = Conv(filters=16)
self.maxp2 = MaxPool()
self.conv3 = Conv(filters=32)
self.maxp3 = MaxPool()
self.flatt = Flatten()
self.dens1 = Dense(64, activation=nn.relu)
self.drop1 = Dropout(.5)
self.dens2 = Dense(10, activation=nn.softmax)
def call(self, x, training=None, **kwargs):
x = self.conv1(x)
x = self.maxp1(x)
x = self.conv2(x)
x = self.maxp2(x)
x = self.conv3(x)
x = self.maxp3(x)
x = self.flatt(x)
x = self.dens1(x)
x = self.drop1(x)
x = self.dens2(x)
return x
model = MyCNN()
loss_object = tf.losses.CategoricalCrossentropy()
def compute_loss(model, x, y, training):
out = model(inputs=x, training=training)
sample_weight = tf.random.uniform((tf.shape(x)[0], 1),
minval=0,
maxval=1,
dtype=tf.float32)
loss = loss_object(y_true=y, y_pred=out, sample_weight=sample_weight)
return loss
def get_grad(model, x, y):
with tf.GradientTape() as tape:
loss = compute_loss(model, x, y, training=False)
return loss, tape.gradient(loss, model.trainable_variables)
optimizer = tf.optimizers.Adam()
verbose = "Epoch {:2d}" \
" Loss: {:.3f} TLoss: {:.3f} Acc: {:.3%} TAcc: {:.3%}"
for epoch in range(1, 10 + 1):
train_loss = tf.metrics.Mean()
train_acc = tf.metrics.CategoricalAccuracy()
test_loss = tf.metrics.Mean()
test_acc = tf.metrics.CategoricalAccuracy()
for x, y in train:
loss_value, grads = get_grad(model, x, y)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
train_loss.update_state(loss_value)
train_acc.update_state(y, model(x, training=True))
for x, y in test:
loss_value, _ = get_grad(model, x, y)
test_loss.update_state(loss_value)
test_acc.update_state(y, model(x, training=False))
print(verbose.format(epoch,
train_loss.result(),
train_acc.result(),
test_loss.result(),
test_acc.result()))

Just to complement the answer , to transform from sample weight to class weight you can do something like this :
First one example without weight:
y_true = [[0, 1, 0], [0, 0, 1]]
y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
# Using 'auto'/'sum_over_batch_size' reduction type.
cce = tf.keras.losses.CategoricalCrossentropy()
cce(y_true, y_pred).numpy()
Now with weight implementation:
y_true = [[0, 1, 0], [0, 0, 1]]
y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
select_one_sum_indexes = tf.where(tf.equal(y_true,1))
Class_index= tf.gather(select_one_sum_indexes, 1, axis=1)
Class_index=tf.cast(Class_index, tf.int32)
Define your weight Here:
weight=tf.constant([1.2, 1,10.])#,dtype=tf.float32)
Dic for class and weight
table = tf.lookup.StaticHashTable(
initializer=tf.lookup.KeyValueTensorInitializer(
keys=tf.constant([0, 1, 2]),
values=weight
),default_value=1.)
weight_sample_class = table.lookup(Class_index)
Loss with weight_class
cce = tf.keras.losses.CategoricalCrossentropy()
cce(y_true, y_pred,sample_weight=weight_sample_class).numpy()

Difference about "BinaryCrossentropy" and "binary_crossentropy" in tf.keras.losses?

I'm training a model using TensorFlow 2.0 using tf.GradientTape(), but I find that the model's accuracy is 95% if I use tf.keras.losses.BinaryCrossentropy, but degrade to 75% if I use tf.keras.losses.binary_crossentropy. So I'm confused about the difference about the same metric here?
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
def read_data():
red_wine = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv", sep=";")
white_wine = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv", sep=";")
red_wine["type"] = 1
white_wine["type"] = 0
wines = red_wine.append(white_wine)
return wines
def get_x_y(df):
x = df.iloc[:, :-1].values.astype(np.float32)
y = df.iloc[:, -1].values.astype(np.int32)
return x, y
def build_model():
inputs = layers.Input(shape=(12,))
dense1 = layers.Dense(12, activation="relu", name="dense1")(inputs)
dense2 = layers.Dense(9, activation="relu", name="dense2")(dense1)
outputs = layers.Dense(1, activation = "sigmoid", name="outputs")(dense2)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
return model
def generate_dataset(df, batch_size=32, shuffle=True, train_or_test = "train"):
x, y = get_x_y(df)
ds = tf.data.Dataset.from_tensor_slices((x, y))
if shuffle:
ds = ds.shuffle(10000)
if train_or_test == "train":
ds = ds.batch(batch_size)
else:
ds = ds.batch(len(df))
return ds
# loss_object = tf.keras.losses.binary_crossentropy
loss_object = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
def train_step(model, optimizer, x, y):
with tf.GradientTape() as tape:
pred = model(x, training=True)
loss = loss_object(y, pred)
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
def train_model(model, train_ds, epochs=10):
for epoch in range(epochs):
print(epoch)
for x, y in train_ds:
train_step(model, optimizer, x, y)
def main():
data = read_data()
train, test = train_test_split(data, test_size=0.2, random_state=23)
train_ds = generate_dataset(train, 32, True, "train")
test_ds = generate_dataset(test, 32, False, "test")
model = build_model()
train_model(model, train_ds, 10)
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy']
)
model.evaluate(test_ds)
main()

They should indeed work the same; BinaryCrossentropy uses binary_crossentropy, with difference apparent in docstring descriptions; former's intended for two class labels, whereas later supports an arbitrary class count. However, if passing in targets in expected format, both apply same preprocessing before calling backend's binary_crossentropy, which does the actual computing.
The difference you observe is likely a reproducibility issue; ensure you set the random seed - see function below. For a more complete answer on reproducibility, see here.
Function
def reset_seeds(reset_graph_with_backend=None):
if reset_graph_with_backend is not None:
K = reset_graph_with_backend
K.clear_session()
tf.compat.v1.reset_default_graph()
print("KERAS AND TENSORFLOW GRAPHS RESET") # optional
np.random.seed(1)
random.seed(2)
tf.compat.v1.set_random_seed(3)
print("RANDOM SEEDS RESET") # optional
Usage:
import tensorflow as tf
import tensorflow.keras.backend as K
reset_seeds(K)

Thanks, I find the reasons of the inconsistent accuracy:
The shape of outputs in the model is (None, 1), but the feeded label is (None, ), which cause a wrong meaning with python's broadcast mechanism.
In the source code of tf.keras.losses.BinaryCrossentropy(), while calculating the loss, both y_pred and y_true are processed through a function called squeeze_or_expand_dimensions, which is lacked in tf.keras.losses.binary_crossentropy.
Note: Take care that whether the shape is consistent between input data and model outputs.

Computing gradients wrt model inputs in Tensorflow eager mode

I am interested in calculating gradients wrt. the inputs of a keras model in Tensorflow. I understand that previously this can be done by building a graph and using tf.gradients. For example here. However I would like to achieve this while experimenting in eager mode (possibly using GradientTape). Specifically, if my network has two inputs (x, y), and predicts (u, v, p) calculate e.g., du/dx for use in the loss.
Snippit below, full code at this gist.
model = tf.keras.Sequential([
tf.keras.layers.Dense(20, activation=tf.nn.relu, input_shape=(2,)), # input shape required
tf.keras.layers.Dense(20, activation=tf.nn.relu),
tf.keras.layers.Dense(20, activation=tf.nn.relu),
tf.keras.layers.Dense(20, activation=tf.nn.relu),
tf.keras.layers.Dense(3)
])
def loss(model: tf.keras.Model, inputs, outputs):
u_true, v_true = outputs[:, 0], outputs[:, 1]
prediction = model(inputs)
u_pred, v_pred = prediction[:, 0], prediction[:, 1]
loss_value = tf.reduce_mean(tf.square(u_true - u_pred)) + \
tf.reduce_mean(tf.square(v_true - v_pred))
return loss_value, u_pred, v_pred
def grad(model: tf.keras.Model, inputs, outputs):
"""
:param inputs: (batch_size, 2) -> x, y
:param outputs: (batch_size, 3) -> vx, vy, p
:return:
"""
with tf.GradientTape() as tape:
loss_value, u_pred, v_pred = loss(model, inputs, outputs)
# AttributeError: 'DeferredTensor' object has no attribute '_id'
print(tape.gradient(u_pred, model.input))
grads = tape.gradient(loss_value, model.trainable_variables)
return loss_value, grads
I've tried a few things, e.g. tape.gradient(u_pred, model.input) or tape.gradient(model.output, model.input) but these throw:
AttributeError: 'DeferredTensor' object has no attribute '_id'
Is there a way to achieve this within eager mode and if so how?

Here is an example of retrieving the gradients of the predictions with respect to the inputs using eager execution
Basically, you need to use tape.watch(inputs) [I am using features in my example - whatever you want to call your x ... ] for Tensorflow to record the change in the model output (you can do the same with loss) with respect to the inputs... (and make sure to call your tape.gradient outside of the with tf.GradientTape() context)
Look at the get_gradients function below ...
Hope this helps!
model = tf.keras.Sequential([
tf.keras.layers.Dense(10, activation=tf.nn.relu, input_shape=(len(numeric_headers),)), # input shape required
tf.keras.layers.Dense(10, activation=tf.nn.relu),
tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)
])
# model = MyModel()
loss_object = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam()
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
def get_gradients(model, features):
with tf.GradientTape() as tape:
tape.watch(features)
predictions = model(features)
gradients = tape.gradient(predictions, features)
return gradients
def train_step(features, label):
with tf.GradientTape() as tape:
predictions = model(features)
loss = loss_object(label, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_loss(loss)
train_accuracy(label, predictions)
def test_step(features, label):
predictions = model(features)
t_loss = loss_object(label, predictions)
test_loss(t_loss)
test_accuracy(label, predictions)
EPOCHS = 5
for epoch in range(EPOCHS):
for features, labels in train_ds:
train_step(features, labels)
for features, labels in train_ds:
test_step(features, labels)
template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
print (template.format(epoch+1,
train_loss.result(),
train_accuracy.result()*100,
test_loss.result(),
test_accuracy.result()*100))
if epoch == EPOCHS - 1:
for features, labels in train_ds:
print ('-')
print (get_gradients(model, features))

How to write masked MSE loss in Keras?

I was trying to write masked MSE loss:
def mae_loss_masked(mask):
def loss_fn(y_true, y_pred):
abs_vec = tf.multiply(tf.abs(y_pred-y_true), mask)
loss = tf.reduce_mean(abs_vec)
return loss
return loss_fn
My model:
def MobileNet_v1():
# MobileNet with dense layer on top
# Keras 2.1.6
mobilenet = MobileNet(input_shape=(config.IMAGE_H, config.IMAGE_W, config.N_CHANNELS),
alpha=1.0,
depth_multiplier=1,
include_top=False,
weights='imagenet'
)
x = Flatten()(mobilenet.output)
x = Dropout(0.5)(x)
x = Dense(config.N_LANDMARKS * 2, activation='linear')(x)
# -------------------------------------------------------
model = Model(inputs=mobilenet.input, outputs=x)
optimizer = Adadelta()
model.compile(optimizer=optimizer, loss=mae_loss_masked)
model.summary()
import sys
sys.exit()
return model
But it give an error:
TypeError: mae_loss_masked() takes 1 positional argument but 2 were given
Also a question how batch generator output should look like in this case.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Slicing Error when using custom cost function in Keras - python

Related

How to create a customized loss function based on features?

Weighted categorical cross entropy

Difference about "BinaryCrossentropy" and "binary_crossentropy" in tf.keras.losses?

Computing gradients wrt model inputs in Tensorflow eager mode

How to write masked MSE loss in Keras?

Categories

Resources