How to create a customized loss function based on features?

How to create a customized loss function based on features? - python

Suppose I have a training data set shown X which is a 6000*51 matrix and the problem is a multi-output classification problem and the target matrix Y is 6000*10 and each column of the target matrix takes 0 or 1. I can define parameters based on the features as follows:
n = 10
p = X[:,:n]
a = X[:,2:2*n]
c = X[:,2*n]
Suppose the prediction of my model is Prediction. I want to define a loss function as follows:
-np.einsum('ij,ij ->i',p,y_test).mean() + 10 *
np.mean( np.maximum( np.einsum('ij,ij ->i',a,Prediction) - c, 0) )
def knapsack_loss(X, n, cvc=1):
input_a = X[:,n:2*n]
input_a = np.float64(deepcopy(input_a ))
input_p = X[:,:n]
input_p = np.float64(deepcopy(input_p))
input_c = X[:,2*n]
input_c = np.float64(deepcopy(input_c))
def loss(y_true, y_pred):
picks = y_pred
return (-1 * K.batch_dot(picks, input_p, 1)) + cvc * K.maximum(
K.batch_dot(picks, input_a, 1) - input_c, 0)
return loss
def get_model(n_inputs, n_outputs):
model = Sequential()
model.add(Dense(100, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
model.add(Dense(n_outputs, activation='sigmoid'))
model.compile(loss= knapsack_loss(X_train, n, cvc=1),optimizer='adam')
return model
n_inputs, n_outputs = X.shape[1], Y.shape[1]
model = get_model(n_inputs, n_outputs)
model.fit(X_train, y_train, verbose=0, epochs=500)
When I run this code, I face the following error:
InvalidArgumentError: Incompatible shapes: [32] vs. [6000]
[[{{node training_14/Adam/gradients/loss_22/dense_55_loss/loss/MatMul_1_grad/BroadcastGradientArgs}}]]
I would be thankful if someone can correct it or provide and synthetic example.

Related

Slicing Error when using custom cost function in Keras

I have example data. This data is used to set the predictor (x) and response (y) variables:
x has an Age_Years feature and y has Age_Years and target features:
I have a custom cost function that accepts multiple parameters:
# this gives the y_pred values
def calc_prob(param1, param2, param3, age):
prob = (((100*param1*pow((100/param3),-(pow((age/param2),param1))))*pow(age/param2,param1)*math.log(100/param3))/age)/100
return prob
# this serves as the custom cost function
def brier_score(y_pred, y_true):
prob = calc_prob(y_pred[:, 0], y_pred[:, 1], y_pred[:, 2], y_true['Age_Years'])
brier_score = tf.reduce_mean((prob - y_true['target']) ** 2, axis=1)
return brier_score
Ultimately, I want an output that builds a 3-parameter model that minimizes the brier_score() function. If I try to build and run the model, I get an error:
from keras import models
from keras import layers
def build_model():
model = models.Sequential()
model.add(layers.Dense(1, activation='relu', input_shape=(x.shape[1],)))
model.add(layers.Dense(5, activation='relu'))
model.add(layers.Dense(3, activation='softmax'))
model.compile(loss=losses, optimizer='adam', metrics=['accuracy'])
return model
model = build_model()
losses=[brier_score]
model.compile(loss=losses, optimizer='adam', metrics=['accuracy'])
# this line causes the error
model.fit(x=x, y=y, epochs=100, verbose=0)
ValueError: slice index 2 of dimension 1 out of bounds. for '{{node brier_score/strided_slice_2}} = StridedSlice[Index=DT_INT32, T=DT_FLOAT, begin_mask=1, ellipsis_mask=0, end_mask=1, new_axis_mask=0, shrink_axis_mask=2](Cast, brier_score/strided_slice_2/stack, brier_score/strided_slice_2/stack_1, brier_score/strided_slice_2/stack_2)' with input shapes: [?,2], [2], [2], [2] and with computed input tensors: input[1] = <0 2>, input[2] = <0 3>, input[3] = <1 1>.

You are mixing up the order of y_true and y_pred in brier_score. Here is a working example:
import tensorflow as tf
from keras import models
from keras import layers
import pandas as pd
import numpy as np
x = np.random.random((500, 1))
y = {'Age_Years': np.squeeze(x, axis=1), 'target': np.random.randint(2, size=500)}
def calc_prob(param1, param2, param3, age):
prob = (((100*param1*tf.math.pow((100/param3),-(tf.math.pow((age/param2),param1))))*tf.math.pow(age/param2,param1)*tf.math.log(100/param3))/age)/100
return prob
# this serves as the custom cost function
def brier_score(y_true, y_pred):
prob = calc_prob(y_pred[:, 0], y_pred[:, 1], y_pred[:, 2], tf.cast(y_true['Age_Years'], dtype=tf.float32))
brier_score = tf.reduce_mean((prob - tf.cast(y_true['target'], dtype=tf.float32)) ** 2, axis=-1, keepdims=True)
return brier_score
def build_model():
model = models.Sequential()
model.add(layers.Dense(1, activation='relu', input_shape=(x.shape[1],)))
model.add(layers.Dense(5, activation='relu'))
model.add(layers.Dense(3, activation='softmax'))
return model
model = build_model()
optimizer = tf.keras.optimizers.Adam()
batch_size = 10
dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(batch_size)
epochs = 2
for epoch in range(epochs):
print("\nStart of epoch %d" % (epoch,))
for step, (x_batch_train, y_batch_train) in enumerate(dataset):
with tf.GradientTape() as tape:
logits = model(x_batch_train, training=True)
loss_value = brier_score(y_batch_train, logits)
grads = tape.gradient(loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_weights))

How to calculate different loss for different input in keras model

My model has two inputs and I want to calculate the loss of the two inputs separately because the loss of input 2 has to be multiplied by a weight. Then add up these two losses as the final loss for the model. The structure is somehow like this:
This is my model:
def final_loss(y_true, y_pred):
loss = x_loss_value.output + y_model.output*weight
return loss
def mymodel(input_shape): #pooling=max or avg
img_input1 = Input(shape=(input_shape[0], input_shape[1], input_shape[2], ))
image_input2 = Input(shape=(input_shape[0], input_shape[1], input_shape[2], ))
#for input1
x = Conv2D(32, (3, 3), strides=(2, 2))(img_input1)
x_dense = Dense(2, activation='softmax', name='predictions')(x)
x_loss_value = my_categorical_crossentropy_layer(x)[input1_y_true, input1_y_pred]
x_model = Model(inputs=img_input1, outputs=x_loss_value)
#for input2
y = Conv2D(32, (3, 3), strides=(2, 2))(image_input2)
y_dense = Dense(2, activation='softmax', name='predictions')(y)
y_loss_value = my_categorical_crossentropy_layer(y)[input2_y_true, input2_y_pred]
y_model = Model(inputs=img_input2, outputs=y_loss_value)
concat = concatenate([x_model.output, y_model.output])
final_dense = Dense(2, activation='softmax')(concat)
# Create model.
model = Model(inputs=[img_input1,image_input2], output = final_dense)
return model
model.compile(optimizer = optimizers.adam(lr=1e-7), loss = final_loss, metrics = ['accuracy'])
Most of the related solutions I found just customize the final loss and change the loss in Model.complie(loss=customize_loss).
However, I need to apply different losses for different inputs. I'm trying to use a customized layer like this, and get my loss value for final the loss calculation:
class my_categorical_crossentropy_layer1(Layer):
def __init__(self, **kwargs):
self.is_placeholder = True
super(my_categorical_crossentropy_layer1, self).__init__(**kwargs)
def my_categorical_crossentropy_loss(self, y_true, y_pred):
y_pred = K.constant(y_pred) if not K.is_tensor(y_pred) else y_pred
y_true = K.cast(y_true, y_pred.dtype)
return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
def call(self, y_true, y_pred):
loss = self.my_categorical_crossentropy_loss(y_true, y_pred)
self.add_loss(loss, inputs=(y_true, y_pred))
return loss
But, inside the keras model, I can't figure out how to get the y_true and y_pred of the current epoch/batch for my loss layer.
So I can't add x = my_categorical_crossentropy_layer()[y_true, y_pred] to my model.
Is there any way to do the variable calculation like this in the keras model?
Further, can Keras get the previous epoch's training loss or val loss during training process?
I want to apply the previous epoch's training loss as my weight in the final loss.

this is my proposal...
your it's a double binary classification problem that you want to carry out using a single fit. the first thing to notice is that you need to take care of dimensionality: your input is 4d while your target is 2d one-hot encoded so your network needs something to reduce dimensionality, for example, flatten or global pooling. after this, you can start fitting creating a single model with two inputs and two outputs and use two losses. in your case, the losses are weighted categorical_crossentropy. keras enable by default to set the loss weights using loss_weights parameters. to reproduce the formula loss1*1+loss2*W set the weights to [1, W]. you can use the loss_weights parameter also specifying different losses for your output in this way losses=[loss1, loss2, ....] which are linearly combined with the weights specified in the loss_weights
below a working example
input_shape = (28,28,3)
n_sample = 10
# create dummy data
X1 = np.random.uniform(0,1, (n_sample,)+input_shape) # 4d
X2 = np.random.uniform(0,1, (n_sample,)+input_shape) # 4d
y1 = tf.keras.utils.to_categorical(np.random.randint(0,2, n_sample)) # 2d
y2 = tf.keras.utils.to_categorical(np.random.randint(0,2, n_sample)) # 2d
def mymodel(input_shape, weight):
img_input1 = Input(shape=(input_shape[0], input_shape[1], input_shape[2], ))
img_input2 = Input(shape=(input_shape[0], input_shape[1], input_shape[2], ))
# for input1
x = Conv2D(32, (3, 3), strides=(2, 2))(img_input1)
x = GlobalMaxPool2D()(x) # pass from 4d to 2d
x = Dense(2, activation='softmax', name='predictions1')(x)
# for input2
y = Conv2D(32, (3, 3), strides=(2, 2))(img_input2)
y = GlobalMaxPool2D()(y) # pass from 4d to 2d
y = Dense(2, activation='softmax', name='predictions2')(y)
# Create model
model = Model([img_input1,img_input2], [x,y])
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'],
loss_weights=[1,weight])
return model
weight = 0.3
model = mymodel(input_shape, weight)
model.summary()
model.fit([X1,X2], [y1,y2], epochs=2)

Grid Search fit not accepting list of tensors

I have a siamese network and I want to perform a grid seach on it using GridSearchCV.
So I create a model using the following function:
def createMod(learn_rate=0.01, optimizer='Adam'):
#K.clear_session()
# network definition
base_network = create_base_network(input_shape)
input_a = Input(shape=input_shape)
input_b = Input(shape=input_shape)
# because we re-use the same instance `base_network`,
# the weights of the network will be shared across the two branches
processed_a = base_network(input_a)
processed_b = base_network(input_b)
distance = Lambda(euclidean_distance,
output_shape=eucl_dist_output_shape)([processed_a, processed_b])
prediction = Dense(1,activation='sigmoid')(distance)
model = Model([input_a, input_b], prediction)
if(optimizer=='SGD'):
opt = SGD(lr=learn_rate)
elif (optimizer=='RMSprop'):
opt = RMSprop(lr=learn_rate)
else:
opt = Adam(lr=learn_rate)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[accuracy])
return model
And then I create the KerasClassifier and GridSearch as follows:
model = KerasClassifier(build_fn=createMod, verbose=0)
param_grid = dict(epochs=epochs, batch_size=batch_size, learn_rate=learn_rate,optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=5)
X = [tr_pairs[:, 0], tr_pairs[:, 1]]
grid_result = grid.fit(X, tr_y)
However this throws the following value error:
ValueError: Found input variables with inconsistent numbers of samples: [2, 1054]
The shape of tr_pairs[:, 0] and tr_pairs[:, 1] is (1054, 6) and tr_y is (1054,)
The code for the base networks is:
def create_base_network(input_shape):
K.clear_session()
encoder = build_encoder(latent_dim, n_in)
decoder = build_decoder(latent_dim, n_in)
item = Input(shape=(n_in, ))
encoded_repr = encoder(item)
reconstructed_item = decoder(encoded_repr)
autoencoder = Model(item, reconstructed_item)
return autoencoder
The code for the encoder and decoder are:
def build_encoder(latent_dim, input_dim):
input_layer = Input(shape=(input_dim, ))
h = Dense(32, activation='relu', activity_regularizer=regularizers.l1(10e-5))(input_layer)
h = Dropout(0.1)(h)
h = Dense(64, activation='relu')(h)
h = Dropout(0.1)(h)
# h = Dense(128, activation='relu')(h)
# h = Dropout(0.1)(h)
latent_repr = Dense(latent_dim, activation='relu')(h)
return Model(input_layer, latent_repr)
def build_decoder(latent_dim, input_dim):
model = Sequential()
# model.add(Dense(128, input_dim=latent_dim, activation='relu'))
# model.add(Dropout(0.1))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(input_dim, activation='relu'))
z = Input(shape=(latent_dim,))
item = model(z)
return Model(z, item)
The code works when I do the normal keras model's .fit function but doesn't work here.. is there a problem somewhere in my code or is it just not possible to feed multiple inputs in Grid Search and if that is the case is there a way I can still perform the grid search?

this is workaround to pass multiple input. I create a dummy model that receives a SINGLE input in the format (n_sample, 2, 6) and then split it into two parts using Lambda layer. you can modify this according to your siamese structure.
def createMod(optimizer='Adam'):
combi_input = Input((2,6)) # (n_sample, 2, 6)
input_a = Lambda(lambda x: x[:,0])(combi_input) # (n_sample, 6)
input_b = Lambda(lambda x: x[:,1])(combi_input) # (n_sample, 6)
c = Concatenate()([input_a,input_b])
x = Dense(32)(c)
prediction = Dense(1,activation='sigmoid')(x)
model = Model(combi_input, prediction)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics='accuracy')
return model
tr_pairs = np.random.uniform(0,1, (1054, 2, 6))
tr_y = np.random.randint(0,2, 1054)
model = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=createMod, verbose=0)
batch_size = [10, 20]
epochs = [10, 5]
optimizer = ['adam','SGD']
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(tr_pairs, tr_y)

Same working model in Keras not being improved in Pytorch

I'm converting a basic LSTM many-to-one architecture to predict the next single element in a sequence, written in Keras to Pytorch. NN architecture is the following (whole code can be found here):
model = Sequential()
model.add(LSTM(
512,
input_shape=(network_input.shape[1], network_input.shape[2]),
return_sequences=True
))
model.add(Dropout(0.3))
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(512))
model.add(Dense(256))
model.add(Dropout(0.3))
model.add(Dense(n_vocab))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
Running both models with the same data (yes, I've explicitly checked that), both start with a loss value ~ 4, but after 100 epochs or so, Keras already reached a loss ~ 0.02, which gives the desired results.
However, Pytorch model is stuck around ~ 3.4 after 20 epochs. I've tried many things:
Play with LR: It explodes when LR is too high, so this means that at least parameters are being updated.
Different optimizers, SGD, Adam, RMSprop, but same results with all.
Swap between .view[], .squeeze_ and indexing when accessing last sequence element.
Add, remove and modify non-linear activation functions and dropout.
Remove manual initialization for x_0 and h_0.
Here is the code for my model:
class NNP_RNN(nn.Module):
def __init__(self):
super(NNP_RNN, self).__init__()
self.lstm_1 = nn.LSTM(input_size=1, hidden_size=512, batch_first=True)
self.lstm_2 = nn.LSTM(input_size=512, hidden_size=512, batch_first=True)
self.lstm_3 = nn.LSTM(input_size=512, hidden_size=512, batch_first=True)
self.dense_1 = nn.Linear(in_features=512, out_features=256)
self.dense_2 = nn.Linear(in_features=256, out_features=58)
def forward(self, x):
batch_size = x.size(0)
h_0 = NNP_RNN.init_hidden((1, batch_size, 512))
c_0 = NNP_RNN.init_hidden((1, batch_size, 512))
x, _ = self.lstm_1(x, (h_0, c_0))
x = F.dropout(x, 0.3)
x, _ = self.lstm_2(x, (h_0, c_0))
x = F.dropout(x, 0.2)
_, (x, _) = self.lstm_3(x, (h_0, c_0))
x = x.squeeze_(0)
x = self.dense_1(x)
x = F.dropout(x, 0.1)
x = self.dense_2(x)
return x
#staticmethod
def init_hidden(dims):
return torch.zeros(dims, device=device)
And the training process:
optimizer = torch.optim.Adam(model.parameters(), lr=0.05)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.2, verbose=True, patience=5)
criterion = nn.CrossEntropyLoss()
for epoch in range(1, epochs + 1):
epoch_loss = 0
epoch_corrects = 0
for features, labels in tqdm(data, ncols=800):
features = features.to(device)
labels = labels.to(device)
optimizer.zero_grad()
batch_size = features.size(0)
output = model(features)
loss = criterion(output, labels)
loss.backward()
optimizer.step()
corrects = torch.argmax(output, dim=1)
corrects = torch.eq(corrects, labels).sum().item()
epoch_corrects += corrects
epoch_loss += loss.clone() * batch_size
epoch_loss /= len(data.dataset)
epoch_corrects /= len(data.dataset)
print(f'Loss epoch #{epoch} = {epoch_loss:.10f}, Accuracy = {epoch_corrects}')
scheduler.step(epoch_loss)

How to write masked MSE loss in Keras?

I was trying to write masked MSE loss:
def mae_loss_masked(mask):
def loss_fn(y_true, y_pred):
abs_vec = tf.multiply(tf.abs(y_pred-y_true), mask)
loss = tf.reduce_mean(abs_vec)
return loss
return loss_fn
My model:
def MobileNet_v1():
# MobileNet with dense layer on top
# Keras 2.1.6
mobilenet = MobileNet(input_shape=(config.IMAGE_H, config.IMAGE_W, config.N_CHANNELS),
alpha=1.0,
depth_multiplier=1,
include_top=False,
weights='imagenet'
)
x = Flatten()(mobilenet.output)
x = Dropout(0.5)(x)
x = Dense(config.N_LANDMARKS * 2, activation='linear')(x)
# -------------------------------------------------------
model = Model(inputs=mobilenet.input, outputs=x)
optimizer = Adadelta()
model.compile(optimizer=optimizer, loss=mae_loss_masked)
model.summary()
import sys
sys.exit()
return model
But it give an error:
TypeError: mae_loss_masked() takes 1 positional argument but 2 were given
Also a question how batch generator output should look like in this case.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to create a customized loss function based on features? - python

Related

Slicing Error when using custom cost function in Keras

How to calculate different loss for different input in keras model

Grid Search fit not accepting list of tensors

Same working model in Keras not being improved in Pytorch

How to write masked MSE loss in Keras?

Categories

Resources