TF model not minimizing loss

TF model not minimizing loss - python

I have coded the following model:
import tensorflow as tf
mod_input = tf.keras.layers.Input(shape=(4,))
mod = tf.keras.layers.Dense(30, "selu")(mod_input)
mod = tf.keras.layers.Dense(30, "selu")(mod)
mod = tf.keras.layers.Dense(30, "selu")(mod)
pre_mu = tf.keras.layers.Dense(30, "selu")(mod)
mu = tf.keras.layers.Dense(1, "linear")(pre_mu)
model = tf.keras.Model(mod_input, mu, name="model")
class MyModel(tf.keras.Model):
def __init__(self, model, **kwargs):
super(MyModel, self).__init__(**kwargs)
self.model = model
def train_step(self, data):
x, y = data
with tf.GradientTape() as tape:
z_mean = self.model(x)
# MSE
total_loss = tf.math.reduce_mean(
tf.math.square(z_mean - y)
)
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
return {
"loss": total_loss,
}
Which on my experience, should minimize (it's an example) the MSE... however, it's not, infact, compiling it with:
mm = MyModel(model)
mm.compile(optimizer=tf.keras.optimizers.Adam())
history = mm.fit(x, y_noised, epochs=80, batch_size=1000)
I get the last loss to be 888.2213... however, with the following model, the last loss is 31.8990:
m = tf.keras.Sequential([
tf.keras.layers.Input(shape=(4,)),
tf.keras.layers.Dense(10, activation="selu"),
tf.keras.layers.Dense(10, activation="selu"),
tf.keras.layers.Dense(10, activation="selu"),
tf.keras.layers.Dense(1, activation="linear")
])
what am I missing?
BTW, If instead of compiling mm I compile mod, it works fine:
mm = MyModel(model)
mod.compile(optimizer=tf.keras.optimizers.Adam())
history = mod.fit(x, y_noised, epochs=80, batch_size=1000)
(ignoring mm)

Caught the problem...
In the train_step, y had shape (1000,) and z_mean (1000,1), so the - operator was returning a matrix with a pairwise distance (every possible pair), not a "couple" distance (first with first, second with second...)

Related

Problem building a ANN Regressor model with Autoencoder in Tensorflow 2.11

My input is a 2D numpy array of dimensions (364660, 5052). The target is (364660, 1), a regression variable. I a trying to build a guided autoencoder + ANN regressor where encoded layer of autoencoder serves as input to ann regressor. I would like to train both models at one go. However, the loss for autoencoder should be a combined autoencoder loss + ann loss. Where as ANN loss remains the same. Here is my sample code
class AutoencoderRegressor(tf.keras.Model):
def __init__(self, encoder_layers, decoder_layers, regressor_layers, autoencoder_loss_weights):
super(AutoencoderRegressor, self).__init__()
self.autoencoder = tf.keras.models.Sequential(encoder_layers + decoder_layers)
self.regressor = tf.keras.models.Sequential(regressor_layers)
self.autoencoder_loss_weights = autoencoder_loss_weights
def call(self, inputs, training=None, mask=None):
autoencoder_output = self.autoencoder(inputs)
regressor_input = self.autoencoder.get_layer(index=2).output
regressor_output = self.regressor(regressor_input)
return autoencoder_output, regressor_output
def autoencoder_loss(self, autoencoder_output, inputs):
binary_crossentropy = tf.keras.losses.BinaryCrossentropy()
mean_squared_error = tf.keras.losses.MeanSquaredError()
autoencoder_reconstruction_loss = binary_crossentropy(inputs, autoencoder_output)
autoencoder_regression_loss = mean_squared_error(inputs, autoencoder_output)
#autoencoder_loss = self.autoencoder_loss_weights[0] * autoencoder_reconstruction_loss + self.autoencoder_loss_weights[1] * autoencoder_regression_loss
autoencoder_loss = autoencoder_reconstruction_loss+autoencoder_regression_loss
return autoencoder_loss
def regressor_loss(self, regressor_output, targets):
mean_squared_error = tf.keras.losses.MeanSquaredError()
regressor_loss = mean_squared_error(targets, regressor_output)
return regressor_loss
# define the encoder layers
encoder_layers = [
tf.keras.layers.Dense(64, activation='relu', input_shape=(reduced_x_train2.shape[1],)),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(16, activation='relu')]
# define the decoder layers
decoder_layers = [
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(reduced_x_train2.shape[1], activation='sigmoid')]
# define the regressor layers
regressor_layers = [
tf.keras.layers.Dense(8, activation='relu', input_shape=(16,)),
tf.keras.layers.Dense(1, activation='linear')]
# define the
autoencoder_loss_weights = [0.8, 0.2]
autoencoder_regressor = AutoencoderRegressor(encoder_layers, decoder_layers, regressor_layers, autoencoder_loss_weights)
autoencoder_regressor.compile(optimizer='adam', loss=[autoencoder_regressor.autoencoder_loss, autoencoder_regressor.regressor_loss])
autoencoder_regressor.fit(reduced_x_train2, [reduced_x_train2, y_train], epochs=100,
batch_size=32, validation_split=0.9,shuffle =True,
verbose = 2)
I get the following error:
TypeError Traceback (most recent call last)
Input In [14], in <cell line: 60>()
56 autoencoder_regressor = AutoencoderRegressor(encoder_layers, decoder_layers, regressor_layers, autoencoder_loss_weights)
58 autoencoder_regressor.compile(optimizer='adam', loss=[autoencoder_regressor.autoencoder_loss, autoencoder_regressor.regressor_loss])
---> 60 autoencoder_regressor.fit(reduced_x_train2, [reduced_x_train2, y_train], epochs=100,
61 batch_size=32, validation_split=0.9,shuffle =True,
62 verbose = 2)
TypeError: in user code:
File "/user/iibi/amudireddy/.conda/envs/tfni10_py38/lib/python3.8/site-packages/keras/engine/training.py", line 1051, in train_function *
return step_function(self, iterator)
File "/user/iibi/amudireddy/.conda/envs/tfni10_py38/lib/python3.8/site-packages/keras/engine/training.py", line 1040, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/user/iibi/amudireddy/.conda/envs/tfni10_py38/lib/python3.8/site-packages/keras/engine/training.py", line 1030, in run_step **
outputs = model.train_step(data)
File "/user/iibi/amudireddy/.conda/envs/tfni10_py38/lib/python3.8/site-packages/keras/engine/training.py", line 890, in train_step
loss = self.compute_loss(x, y, y_pred, sample_weight)
File "/user/iibi/amudireddy/.conda/envs/tfni10_py38/lib/python3.8/site-packages/keras/engine/training.py", line 948, in compute_loss
return self.compiled_loss(
File "/user/iibi/amudireddy/.conda/envs/tfni10_py38/lib/python3.8/site-packages/keras/engine/compile_utils.py", line 215, in __call__
metric_obj.update_state(loss_metric_value, sample_weight=batch_dim)
File "/user/iibi/amudireddy/.conda/envs/tfni10_py38/lib/python3.8/site-packages/keras/utils/metrics_utils.py", line 70, in decorated
update_op = update_state_fn(*args, **kwargs)
File "/user/iibi/amudireddy/.conda/envs/tfni10_py38/lib/python3.8/site-packages/keras/metrics/base_metric.py", line 140, in update_state_fn
return ag_update_state(*args, **kwargs)
File "/user/iibi/amudireddy/.conda/envs/tfni10_py38/lib/python3.8/site-packages/keras/metrics/base_metric.py", line 449, in update_state **
sample_weight = tf.__internal__.ops.broadcast_weights(
File "/user/iibi/amudireddy/.conda/envs/tfni10_py38/lib/python3.8/site-packages/keras/engine/keras_tensor.py", line 254, in __array__
raise TypeError(
TypeError: You are passing KerasTensor(type_spec=TensorSpec(shape=(), dtype=tf.float32, name=None), name='Placeholder:0', description="created by layer 'tf.cast_15'"), an intermediate Keras symbolic input/output, to a TF API that does not allow registering custom dispatchers, such as 'tf.cond, 'tf.function', gradient tapes, or 'tf.map_fn'. Keras Functional model construction only supports TF API calls that *do* support dispatching, such as 'tf.math.add' or 'tf.reshape'. Other APIs cannot be called directly on symbolic Kerasinputs/outputs. You can work around this limitation by putting the operation in a custom Keras layer 'call' and calling that layer on this symbolic input/output.
Where am I going WRONG?
###EDITED question. I missed an implementation requirement.
In autoencoder_loss, autoencoder_reconstruction_loss should take inputs as (inputs, autoencoder_output) and autoencoder_regression_loss should take input as (targets, regressor_output)
I am not to sure how to implement this. Please help.

It seems you can't access a submodel's output layer from within your model like you did with the line regressor_input =self.autoencoder.get_layer(index=2).output. However, here is an alternative that works:
import numpy as np
import tensorflow as tf
class AutoEncoderRegressor(tf.keras.Model):
def __init__(self, encoder, decoder, regressor, loss_weights):
super(AutoEncoderRegressor, self).__init__()
self.encoder = encoder
self.decoder = decoder
self.regressor = regressor
self.loss_weights = loss_weights
def call(self, inputs, training=None, mask=None):
encoded = self.encoder(inputs)
decoded = self.decoder(encoded)
regression = self.regressor(encoded)
return decoded, regression
def autoencoder_loss(self, autoencoder_output, inputs):
return tf.keras.losses.BinaryCrossentropy()(inputs,autoencoder_output)
def regressor_loss(self, regressor_output, targets):
return tf.keras.losses.MeanSquaredError()(targets, regressor_output)
def main():
input_dim = 10
# generate random training data
rng = np.random.default_rng()
X = rng.random((100,input_dim))
y = [X, rng.random((100,1))]
encoder = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=(input_dim,)),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(16, activation='relu')
])
decoder = tf.keras.Sequential([
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(input_dim, activation='sigmoid')
])
regressor = tf.keras.Sequential([
tf.keras.layers.Dense(8, activation='relu', input_shape=(16,)),
tf.keras.layers.Dense(1, activation='linear')
])
model = AutoEncoderRegressor(encoder, decoder, regressor, loss_weights=[0.8, 0.2])
model.compile(
optimizer = 'adam',
loss = [model.autoencoder_loss, model.regressor_loss],
loss_weights = model.loss_weights
)
model.fit(X,y,
epochs = 100,
batch_size = 32,
validation_split = 0.9,
shuffle = True,
verbose = 2
)
if __name__ == "__main__":
main()
I used random training data with smaller dimensions for testing here, adjust this to your actual dataset.
EDIT Just for clarification: your model has 2 loss functions for its 2 outputs. You don't have to add those together yourself, because Tensorflow will do this automatically. With the loss_weights you've provided, the total loss of the model will be
total_loss = 0.8*autoencoder_loss+0.2*regressor_loss
Here, autoencoder_loss is the binary cross-entropy between X_true and X_pred and regressor_loss is the mean squared error between y_true and y_pred.

TensorFlow custom model fit

I want to customize TensorFlow model. I need a custom training algorithm like these:
I don't want my model to be inside the custom model just the training algorithm.
class CustomModel(keras.Model):
def __init__(self,inputs, outputs, echo=False):
super().__init__()
self.echo = echo
def train_step(self, data):
x, y = data
with tf.GradientTape() as tape:
y_pred = self(x, training=True)
loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
print(loss)
if self.echo:
print('*')
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
self.compiled_metrics.update_state(y, y_pred)
return {m.name: m.result() for m in self.metrics}
inputs = keras.Input(shape=(224,224,3))
x = keras.layers.Conv2D(32,(3,3))(inputs)
x = keras.layers.Conv2D(64,3)(x)
x = keras.layers.Conv2D(64,3)(x)
x = keras.layers.AveragePooling2D()(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(64, activation='relu')(x)
x = keras.layers.Dense(3, activation='softmax')(x)
model = CustomModel( inputs, x,echo= True)
model.compile(optimizer="adam", loss="mse", metrics=["mae"])
opt = Adam(learning_rate=0.0001)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
epochs = 5
history = model.fit_generator(train_generator,
validation_data=valid_generator, verbose=1, epochs=epochs)
error:
NotImplementedError: When subclassing the `Model` class, you should implement a `call` method.

You don't need to provide them (inputs, outputs) argument in the init function of your sub-class model. You can implement the call method in your sub-class model as follows:
class CustomModel(keras.Model):
...
...
# A call function needs to be implemented
def call(self, inputs, *args, **kwargs):
return self(inputs)
update
Based on the comments, here's a possible workaround. You build the model with the provided input/output within init.
class CustomModel(keras.Model):
def __init__(self, inputs, x, echo=False, **kwargs):#student
super().__init__(**kwargs)
self.model = keras.Model(inputs, x)
self.echo = echo
def call(self, inputs, *args, **kwargs):
return self.model(inputs)
def train_step(self, data):
x, y = data
with tf.GradientTape() as tape:
y_pred = self.model(x, training=True) # Forward pass
...
# Compute gradients
trainable_vars = self.model.trainable_variables
gradients = ...
# Update weights
...
return {m.name: m.result() for m in self.metrics}

tensorflow autodiff slower than pytorch's counterpart

I am using tensorflow 2.0 and trying to evaluate gradients for backpropagating to a simple feedforward neural network. Here's how my model looks like:
def __init__(self, input_size, output_size):
inputs = tf.keras.Input(shape=(input_size,))
hidden_layer1 = tf.keras.layers.Dense(30, activation='relu')(inputs)
outputs = tf.keras.layers.Dense(output_size)(hidden_layer1)
self.model = tf.keras.Model(inputs=inputs, outputs=outputs)
self.optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
self.loss_function = tf.keras.losses.Huber()
The forward pass to this network is fine but when I use gradient tape to train the model, it is at least 10x slower than PyTorch.
Training function:
def learn_modified_x(self, inputs, targets, actions):
with tf.GradientTape() as tape:
predictions = self.model(inputs)
predictions_for_action = gather_single_along_axis(predictions, actions)
loss = self.loss_function(targets, predictions_for_action)
grads = tape.gradient(loss, self.model.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.model.trainable_weights))
I tried commenting lines to find what is actually causing the problem. I discovered that tape.gradient is a significant contributor to this situation.
Any idea?
PyTorch implementation
def __init__(self, input_size, nb_action):
super(Network, self).__init__()
self.input_size = input_size
self.nb_action = nb_action
self.fc1 = nn.Linear(input_size, 30)
self.fc2 = nn.Linear(30, nb_action)
def forward(self, state):
x = F.relu(self.fc1(state))
q_values = self.fc2(x)
return q_values
def learn(self, batch_state, batch_next_state, batch_reward, batch_action):
outputs = self.model(batch_state).gather(1, batch_action.unsqueeze(1)).squeeze(1)
next_outputs = self.model(batch_next_state).detach().max(1)[0]
target = self.gamma*next_outputs + batch_reward
td_loss = F.smooth_l1_loss(outputs, target)
self.optimizer.zero_grad()
td_loss.backward(retain_variables = True)
self.optimizer.step()

def __init__(self,...):
...
self.model.call = tf.function(self.model.call)
...
you need use tf.function to wrap your model's call function.

Custom loss function- Keras-

I'm trying to implement a mixed model where part of it is a variational autoencoder and the other part takes the latent space and makes predictions on the properties of the input. I'd like to train these two models jointly. Here are my models:
# build encoder model
inputs = Input(shape=input_shape, name='encoder_input')
x = Dense(intermediate_dim1, activation='relu')(inputs)
x1 = Dense(intermediate_dim2, activation='relu')(x)
x2 = Dense(intermediate_dim3, activation='relu')(x1)
z_mean = Dense(latent_dim, name='z_mean')(x2)
z_log_var = Dense(latent_dim, name='z_log_var')(x2)
# use reparameterization trick to push the sampling out as input
# note that "output_shape" isn't necessary with the TensorFlow backend
z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
# instantiate encoder model
encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
# build decoder model
latent_inputs = Input(shape=(latent_dim,), name='z_sampling1')
x1 = Dense(intermediate_dim3, activation='relu')(latent_inputs)
x2 = Dense(intermediate_dim2, activation='relu')(x1)
x3 = Dense(intermediate_dim1, activation='relu')(x2)
outputs = Dense(2*original_dim+1, activation='sigmoid')(x3)
# instantiate decoder model
decoder = Model(latent_inputs, outputs, name='decoder')
#build property predictor model
latent_inputs = Input(shape=(latent_dim,), name='z_sampling2')
x1 = Dense(64, activation='relu')(latent_inputs)
x2 = Dense(128, activation='relu')(x1)
outputs = Dense(property_dim, activation='sigmoid')(x2)
predModel = Model(latent_inputs, outputs, name='predictor')
This is the full model that has the inupts of the encoder and the output of only the predictor model.
#build full model
vaeOutputs = decoder(encoder(inputs)[2])
predOutputs = predModel(encoder(inputs)[0])
vaePred = Model(inputs, [vaeOutputs,predOutputs], name='vae_fullimage')
vaePred.summary()
Now I have trouble defining the loss function and training the model:
This is my attempt:
if __name__ == '__main__':
parser = argparse.ArgumentParser()
help_ = "Load h5 model trained weights"
parser.add_argument("-w", "--weights", help=help_)
help_ = "Use mse loss instead of binary cross entropy (default)"
parser.add_argument("-m",
"--mse",
help=help_, action='store_true')
#args = parser.parse_args()
args = parser.parse_known_args()[0]
models = (encoder, decoder)
def custom_loss(y_true, y_pred):
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
reconstruction_loss = binary_crossentropy(y_true[0], y_pred[0])
reconstruction_loss*= original_dim
#y_pred = predOutputs
prediction_loss =K.square(y_pred[1] - y_true[1])
total_loss = K.mean(prediction_loss, axis= -1) + K.mean (reconstruction_loss) + K.mean(kl_loss)
return total_loss
optimizer = keras.optimizers.Adam(learning_rate=0.001)
vaePred.compile(optimizer, custom_loss)
vaePred.summary()
if args.weights:
vaePred.load_weights(args.weights)
else:
# train the autoencoder
history =vaePred.fit(x=x_train, y=[x_train,property_train],
epochs=epochs,
callbacks=callbacks,
batch_size=batch_size,
validation_data=(x_test, [x_test,property_test]))

It appears that you are training an autoencoder(AE) (a generative model that seeks to predict itself). The outputs for an AE should equal the inputs if it is perfect. Therefore you should change y_true to be inputs.
change:
prediction_loss = mse(y_true, predOutputs)
to be:
prediction_loss = mse(inputs, predOutputs)
note: I have not run or tested any of this code. It appears to be example code from Keras.

Saving and restoring Keras BLSTM CTC model

I have been working on speech emotion recognition deep neural network. I have used keras Bidirectional LSTM with CTC loss. i trained the model and saved it
model_json = model.to_json()
with open("ctc_model.json", "w") as json_file:
json_file.write(model_json)
model.save_weights("ctc_weights.h5")
The problem is i can not use this model to test on on unseen data because the model accepts 4 argument as input and calculates the ctc loss..just build the model and train. so how can i save a model in such away that in require only one input. not the labels, and length. Basically how can i save a model as this function test_func = K.function([net_input], [output])
def ctc_lambda_func(args):
y_pred, labels, input_length, label_length = args
shift = 2
y_pred = y_pred[:, shift:, :]
input_length -= shift
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
def build_model(nb_feat, nb_class, optimizer='Adadelta'):
net_input = Input(name="the_input", shape=(200, nb_feat))
forward_lstm1 = LSTM(output_dim=64,
return_sequences=True,
activation="tanh"
)(net_input)
backward_lstm1 = LSTM(output_dim=64,
return_sequences=True,
activation="tanh",
go_backwards=True
)(net_input)
blstm_output1 = Merge(mode='concat')([forward_lstm1, backward_lstm1])
forward_lstm2 = LSTM(output_dim=64,
return_sequences=True,
activation="tanh"
)(blstm_output1)
backward_lstm2 = LSTM(output_dim=64,
return_sequences=True,
activation="tanh",
go_backwards=True
)(blstm_output1)
blstm_output2 = Merge(mode='concat')([forward_lstm2, backward_lstm2])
hidden = TimeDistributed(Dense(512, activation='tanh'))(blstm_output2)
output = TimeDistributed(Dense(nb_class + 1, activation='softmax')) (hidden)
labels = Input(name='the_labels', shape=[1], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name="ctc")([output, labels, input_length, label_length])
model = Model(input=[net_input, labels, input_length, label_length], output=[loss_out])
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=optimizer, metrics=[])
test_func = K.function([net_input], [output])
return model, test_func
model, test_func = build_model(nb_feat=nb_feat, nb_class=nb_class, optimizer=optimizer)
for epoch in range(number_epoches):
inputs_train = {'the_input': X_train[i:i+batch_size],
'the_labels': y_train[i:i+batch_size],
'input_length': np.sum(X_train_mask[i:i+batch_size], axis=1, dtype=np.int32),
'label_length': np.squeeze(y_train_mask[i:i+batch_size]),
}
outputs_train = {'ctc': np.zeros([inputs_train["the_labels"].shape[0]])}
ctcloss = model.train_on_batch(x=inputs_train, y=outputs_train)
total_ctcloss += ctcloss * inputs_train["the_input"].shape[0] * 1.
loss_train[epoch] = total_ctcloss / X_train.shape[0]
Here is the my model summary

Try the following solution:
import keras.backend as K
def get_prediction_function(model):
input_tensor = model.layers[0].input
output_tensor = model.layers[-5].output
net_function = K.function([input_tensor, K.learning_phase()], [output_tensor])
def _result_function(x):
return net_function([x, 0])[0]
return _result_function
Now your network function might be obtained by:
test_function = get_prediction_function(model)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

TF model not minimizing loss - python

Caught the problem... In the train_step, y had shape (1000,) and z_mean (1000,1), so the - operator was returning a matrix with a pairwise distance (every possible pair), not a "couple" distance (first with first, second with second...)

Related

Problem building a ANN Regressor model with Autoencoder in Tensorflow 2.11

TensorFlow custom model fit

tensorflow autodiff slower than pytorch's counterpart

Custom loss function- Keras-

Saving and restoring Keras BLSTM CTC model

Categories

Resources