Keras combine value of two loss funcation - python

I have a model content one encoder and two decoder with two loss function:
input_shape = (384, 512, 3)
model = Model(inputs=input, outputs=[1_features, 2_features])
model = build_model(input_shape, 3)
losses = {
"loss1_output": "categorical_crossentropy",
"loss2_output": "categorical_crossentropy"}
lossWeights = {"loss1_output": 1.0, "loss2_output": 1.0}
EPOCHS = 50
INIT_LR = 1e-3
opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(optimizer=opt, loss=losses, loss_weights=lossWeights,
metrics=["accuracy"])
I would combine the value for both those losses in one loss value and backward the result of the combination.
My question is close to this one which I read and tried and I found the model called the loss function one time for each branch (output).

Related

ValueError: Layer Discriminator expects 1 input(s), but it received 2 input tensors

I am trying to train a GAN model with the MNIST dataset. I think I have most of the pieces in place but I am getting this error:
ValueError: Layer Discriminator expects 1 input(s), but it received 2 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(64, 28, 28) dtype=float32>, <tf.Tensor 'IteratorGetNext:1' shape=(64, 28, 28) dtype=float32>]
This comes from my train function when I call:
loss_dis = self.discriminator.train_on_batch(X_train_dis, y_train_dis)
Here you can see my full train function:
def train(self, X_train, batch_size=128, epochs=2000, save_interval=200):
half_batch = batch_size//2
y_pos_train_dis = np.ones((half_batch, 1))
y_neg_train_dis = np.zeros((half_batch, 1))
y_train_GAN = np.ones((batch_size, 1))
for epoch in range(epochs):
# Generate training data for Discriminator
# random half_batch amount of real images
X_pos_train_dis = X_train[np.random.randint(0, X_train.shape[0], half_batch)]
# random half_batch amount of generated fake images
X_neg_train_dis = self.generator.predict(np.random.normal(0, 1, (half_batch, self.input_size[0])))
# Shuffle and append data using sklearn shuffle function
X_train_dis, y_train_dis = shuffle(X_neg_train_dis, X_pos_train_dis), shuffle(y_neg_train_dis, y_pos_train_dis)
# Generate training data for combined GAN model
X_train_GAN = np.random.normal(0, 1, (batch_size, self.input_size[0]))
# Train Discriminator
loss_dis = self.discriminator.train_on_batch(X_train_dis, y_train_dis)
# Train Generator
loss_gen = self.GAN.train_on_batch(X_train_GAN, y_train_GAN)
and my initial model declaration:
def __init__(self, input_shape=(28,28,1), rand_vector_shape=(100,), lr=0.0002, beta=0.5):
# Input sizes
self.img_shape = input_shape
self.input_size = rand_vector_shape
# optimizer
self.opt = tf.keras.optimizers.Adam(lr, beta)
# Create Generator model
self.generator = self.generator_model()
self.generator.compile(loss='binary_crossentropy', optimizer = self.opt, metrics = ['accuracy'])
# print(self.generator.summary())
# Create Discriminator model
self.discriminator = self.discriminator_model()
self.discriminator.compile(loss='binary_crossentropy', optimizer = self.opt, metrics = ['accuracy'])
# print(self.discriminator.summary())
# Set the Discriminator as non trainable in the combined GAN model
self.discriminator.trainable = False
# Define model input and output
input = tf.keras.Input(self.input_size)
generated_img = self.generator(input)
output = self.discriminator(generated_img)
# Define and compile combined GAN model
self.GAN = tf.keras.Model(input, output, name="GAN")
self.GAN.compile(loss='binary_crossentropy', optimizer = self.opt, metrics=['accuracy'])
return None
def discriminator_model(self):
"""Create discriminator model."""
model = tf.keras.models.Sequential(name='Discriminator')
model.add(layers.Flatten())
model.add(layers.Dense(units=512, kernel_initializer='normal', activation='relu'))
model.add(layers.Dense(units=256, kernel_initializer='normal', activation='relu'))
model.add(layers.Dense(units=1, kernel_initializer='normal', activation='sigmoid'))
return model
def generator_model(self):
"""Create generator model."""
model = tf.keras.models.Sequential(name='Generator')
model.add(layers.Dense(units=256, kernel_initializer='normal', activation='relu'))
model.add(layers.Dense(units=512, kernel_initializer='normal', activation='relu'))
model.add(layers.Dense(units=1024, kernel_initializer='normal', activation='relu'))
model.add(layers.Dense(units=np.prod(self.img_shape), kernel_initializer='normal', activation='relu'))
model.add(layers.Reshape((28,28)))
return model
I can post the full code if that would be helpful but I imagine this is a very small mistake somewhere. I looked around online and it seems sometimes this is related to using [] instead of () but that does not seem to be the case in my code (at least from what I can see).
I can imagine that the problem is coming directly from your shuffle function:
Try concatenating your pairs of data and then using tf.random.shuffle(tensor) like:
X_train_dis, y_train_dis = tf.random.shuffle(tf.concat([X_neg_train_dis, X_pos_train_dis], axis=0)), tf.random.shuffle(tf.concat([y_neg_train_dis, y_pos_train_dis], axis=0))
It looks like the issue was that Shuffle was returning two lists rather than a concatenated one so I switched the syntax to:
X_train_dis, y_train_dis = tf.concat(shuffle(X_neg_train_dis, X_pos_train_dis, random_state=0), axis=0), tf.concat(shuffle(y_neg_train_dis, y_pos_train_dis, random_state=0), axis=0)
Note, this is using the Sklearn shuffle function.

CNN loss with multiple outputs?

I have the following model
def get_model():
epochs = 100
learning_rate = 0.1
decay_rate = learning_rate / epochs
inp = keras.Input(shape=(64, 101, 1), name="inputs")
x = layers.Conv2D(128, kernel_size=(3, 3), strides=(3, 3), padding="same")(inp)
x = layers.Conv2D(256, kernel_size=(3, 3), strides=(3, 3), padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(150)(x)
x = layers.Dense(150)(x)
out1 = layers.Dense(40000, name="sf_vec")(x)
out2 = layers.Dense(128, name="ls_weights")(x)
model = keras.Model(inp, [out1, out2], name="2_out_model")
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=decay_rate), # in caso rimettere 0.001
loss="mean_squared_error")
keras.utils.plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=True)
model.summary()
return model
that is, I want to train my neural network based on the "mix" of the loss from the first output and the loss from the second output.
I train my neural network in this way:
model.fit(x_train, [sf_train, ls_filters_train], epochs=10)
and during the training ,for example, this is shown:
Epoch 10/10 -> loss: 0.0702 - sf_vec_loss: 0.0666 - ls_weights_loss: 0.0035
I'd like to know if it's a case that the "loss" is nearly the sum between the sf_vec_loss and ls_weights_loss or if keras is actually reasoning in this way.
Also, is the network being trained on the "loss" only?
Thank you in advance :)
following the Tensorflow Documentation...
from the loss argument:
If the model has multiple outputs, you can use a different loss on
each output by passing a dictionary or a list of losses. The loss
value that will be minimized by the model will then be the sum of all
individual losses
remember also that you can also weight the loss contributions of different model outputs
from the loss_weights argument:
The loss value that will be minimized by the model will then be the
weighted sum of all individual losses, weighted by the loss_weights coefficients

Keras tuner: mismatch between number of layers used and number of layers reported

Using example from Keras Tuner website, I wrote simple tuning code
base_model = tf.keras.applications.vgg16.VGG16(input_shape=IMG_SHAPE,
include_top=False,
weights='imagenet')
base_model.trainable = False
def build_model(hp):
model = tf.keras.Sequential();
model.add(base_model);
for i in range(hp.Int('num_layers', 1, 2)):
model.add(tf.keras.layers.Conv2D(filters=hp.Int('Conv2D_' + str(i),
min_value=32,
max_value=512,
step=32),
kernel_size=3, activation='relu'));
model.add(tf.keras.layers.Dropout(hp.Choice('rate', [0.3, 0.5])));
model.add(tf.keras.layers.GlobalAveragePooling2D());
model.add(tf.keras.layers.Flatten());
model.add(tf.keras.layers.Dropout(0.2));
model.add(tf.keras.layers.Dense(5, activation='softmax'));
model.compile(optimizer=tf.keras.optimizers.RMSprop(hp.Choice('learning_rate', [1e-4, 1e-5])),
loss='categorical_crossentropy',
metrics=['accuracy']);
return model
epochs = 2
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
tuner = RandomSearch(
build_model,
objective='val_accuracy',
max_trials=24,
executions_per_trial=1,
directory=LOG_DIR);
tuner.search_space_summary();
tuner.search(train_generator,
callbacks=[callback],
epochs = epochs,
steps_per_epoch = train_generator.samples // BATCH_SIZE,
validation_data = valid_generator,
validation_steps = valid_generator.samples // BATCH_SIZE,
verbose = 1);
tuner.results_summary();
models = tuner.get_best_models(num_models=2);
However, when I run it with varying number of layers, but it shows mismatch between number of layers reported and value of num_layers. For example it reports three Conv2D layers and yet it shows num_layers as 1. Why ?
[Trial summary]
|-Trial ID: 79cd7bb6146b4c243eb2bc51f19985de
|-Score: 0.8444444537162781
|-Best step: 0
> Hyperparameters:
|-Conv2D_0: 448
|-Conv2D_1: 448
|-Conv2D_2: 512
|-learning_rate: 0.0001
|-num_layers: 1
|-rate: 0.5
Any hyperparameter seen so far will be displayed in the summary, meaning that once a trial containing three layers has been run, all subsequent summaries will contain three layer sizes. It does not mean it uses all three layers, which is indicated by by the num_layers: 1 print for this particular trial.
See omalleyt12's post here for more details:
https://github.com/keras-team/keras-tuner/issues/66#issuecomment-525923517

How to deactivate a dropout layer called with training=True in a Keras model?

I wish to view the final output of training a tf.keras model. In this case it would be an array of predictions from the softmax function, e.g. [0,0,0,1,0,1].
Other threads on here have suggested using model.predict(training_data), but this won't work for my situation since I am using dropout at training and validation, so neurons are randomly dropped and predicting again with the same data will give a different result.
def get_model():
inputs = tf.keras.layers.Input(shape=(input_dims,))
x = tf.keras.layers.Dropout(rate=dropout_rate)(inputs, training=True)
x = tf.keras.layers.Dense(units=29, activation='relu')(x)
x = tf.keras.layers.Dropout(rate=dropout_rate)(x, training=True)
x = tf.keras.layers.Dense(units=15, activation='relu')(x)
outputs = tf.keras.layers.Dense(2, activation='softmax')(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['sparse_categorical_accuracy'])
return model
myModel = get_model()
myModel.summary()
myModel.fit(X_train, y_train,
batch_size = batch_size,
epochs= epochs,
verbose = 1,
validation_data = (X_val, y_val))
In tensorflow, you can grab the output of a model after training quite easily. Here is an example from a Github repo:
input = tf.placeholder(tf.float32, shape=[None, INPUT_DIMS])
labels = tf.placeholder(tf.float32, shape=[None])
hidden = tf.nn.tanh(make_nn_layer(normalized, NUM_HIDDEN))
logits = make_nn_layer(hidden, NUM_CLASSES)
outputs = tf.argmax(logits, 1)
int_labels = tf.to_int64(labels)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, int_labels, name='xentropy')
train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
correct_prediction = tf.equal(outputs, int_labels)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
validation_dict = {
input: validation_data[:,0:7],
labels: validation_data[:,7],}
for i in range(NUM_BATCHES):
batch = training_data[numpy.random.choice(training_size, BATCH_SIZE, False),:]
train_step.run({input: batch[:,0:7], labels: batch[:,7]})
if i % 100 == 0 or i == NUM_BATCHES - 1:
print('Accuracy %.2f%% at step %d' % (accuracy.eval(validation_dict) * 100, i))
output_data = outputs.eval({input: data_vector[:,0:7]})
The only output I can get from the trained model appears to be a history object. There is also a myModel.output object, but it is a tensor that I can't evaluate without putting data into it. Any ideas?
As far as I know, you can't turn off the dropout after passing training=True when calling the layers (unless you transfer the weights to a new model with the same architecture). However, instead you can build and train your model in normal case (i.e. without using training argument in the calls) and then selectively turn on and off the dropout layer in test phase by defining a backend function (i.e. keras.backend.function()) and setting the learning phase (i.e. keras.backend.learning_phase()):
# build your model normally (i.e. without using `training=True` argument)
# train your model...
from keras import backend as K
func = K.function(model.inputs + [K.learning_phase()], model.outputs)
# run the model with dropout layers being active, i.e. learning_phase == 1
preds = func(list_of_input_arrays + [1])
# run the model with dropout layers being inactive, i.e. learning_phase == 0
preds = func(list_of_input_arrays + [0])
Update: As I suggested above, another approach is to define a new model with the same architecture but without setting training=True, and then transfer the weights from the trained model to this new model. To achieve this, I just add a training argument to your get_model() function:
def get_model(training=None):
inputs = tf.keras.layers.Input(shape=(input_dims,))
x = tf.keras.layers.Dropout(rate=dropout_rate)(inputs, training=training)
x = tf.keras.layers.Dense(units=29, activation='relu')(x)
x = tf.keras.layers.Dropout(rate=dropout_rate)(x, training=training)
x = tf.keras.layers.Dense(units=15, activation='relu')(x)
outputs = tf.keras.layers.Dense(2, activation='softmax')(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['sparse_categorical_accuracy'])
return model
# build a model with dropout layers active in both training and test phases
myModel = get_model(training=True)
# train the model
myModel.fit(...)
# build a clone of the model with dropouts deactivated in test phase
myTestModel = get_model() # note: the `training` is `None` by default
# transfer the weights from the trained model to this model
myTestModel.set_weights(myModel.get_weights())
# use the new model in test phase; the dropouts would not be active
myTestModel.predict(...)

Same NN architecture giving different accuracies in tensor flow and keras

A neural network trained on iris dataset using [4, 4] hidden layers and created separately in tensorflow and keras gives different results.
While the tensorflow model gives 96.6 % accuracy on test, keras model gives only around 50%. The various hyper parameters like learning rate, optimiser, mini batch size, etc were the same in both cases.
Keras model
model = Sequential()
model.add(Dense(units = 4, activation = 'relu', input_dim = 4))
model.add(Dropout(0.25))
model.add(Dense(units = 4, activation = 'relu'))
model.add(Dropout(0.25))
model.add(Dense(units = 3, activation = 'softmax'))
adam = Adam(epsilon = 10**(-6), lr = 0.01)
model.compile(optimizer = 'adagrad', loss = 'categorical_crossentropy', metrics = ['accuracy'])
one_hot_labels = keras.utils.to_categorical(y_train, num_classes = 3)
model.fit(X_train, one_hot_labels, epochs = 50, batch_size = 40)
Tensorflow model
feature_columns = [tf.feature_column.numeric_column(key = name,
shape = (1),
dtype = tf.float32) for name in list(X_train.columns)]
classifier = tf.estimator.DNNClassifier(hidden_units = [4, 4],
feature_columns = feature_columns,
n_classes = 3,
dropout = 0.25,
model_dir = './DNN_model')
train_input_fn = tf.estimator.inputs.pandas_input_fn(x = X_train,
y = y_train,
batch_size = 40,
num_epochs = 50,
shuffle = False)
classifier.train(input_fn = train_input_fn, steps = None)
For the keras model, I did try changing the learning rate, increasing the number of epochs, using different optimisers, etc. As such, the accuracy remained poor. Clearly, both the models are doing different things, but on the surface, they seem identical to me for all the key aspects.
Any help is appreciated.
they have the same architecture, and that's all.
The difference in performance is coming from one or more of these factors:
You have Dropout. Therefore your networks in every start behaving differently (check how the Dropout works);
Weight initializations, which method you're using in Keras and TensorFlow?
Check all parameters of the optimizer.

Categories

Resources