Actually I want to use different loss functions in training and validation phase. I tried in_tarin_phase but it doesn't work.
So I just wonder can I disable the val_loss calculation?
Below has a custom loss function:
# Build a model
inputs = Input(shape=(128,))
layer1 = Dense(64, activation='relu')(inputs)
layer2 = Dense(64, activation='relu')(layer1)
predictions = Dense(10, activation='softmax')(layer2)
model = Model(inputs=inputs, outputs=predictions)
# Define custom loss
def custom_loss(layer):
# Create a loss function that adds the MSE loss to the mean of all squared activations of a specific layer
def loss(y_true,y_pred):
return K.mean(K.square(y_pred - y_true) + K.square(layer), axis=-1)
# Return a function
return loss
# Compile the model
model.compile(optimizer='adam',
loss=custom_loss(layer), # Call the loss function with the selected layer
metrics=['accuracy'])
# train
model.fit(data, labels)
Related
I have the following model
def get_model():
epochs = 100
learning_rate = 0.1
decay_rate = learning_rate / epochs
inp = keras.Input(shape=(64, 101, 1), name="inputs")
x = layers.Conv2D(128, kernel_size=(3, 3), strides=(3, 3), padding="same")(inp)
x = layers.Conv2D(256, kernel_size=(3, 3), strides=(3, 3), padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(150)(x)
x = layers.Dense(150)(x)
out1 = layers.Dense(40000, name="sf_vec")(x)
out2 = layers.Dense(128, name="ls_weights")(x)
model = keras.Model(inp, [out1, out2], name="2_out_model")
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=decay_rate), # in caso rimettere 0.001
loss="mean_squared_error")
keras.utils.plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=True)
model.summary()
return model
that is, I want to train my neural network based on the "mix" of the loss from the first output and the loss from the second output.
I train my neural network in this way:
model.fit(x_train, [sf_train, ls_filters_train], epochs=10)
and during the training ,for example, this is shown:
Epoch 10/10 -> loss: 0.0702 - sf_vec_loss: 0.0666 - ls_weights_loss: 0.0035
I'd like to know if it's a case that the "loss" is nearly the sum between the sf_vec_loss and ls_weights_loss or if keras is actually reasoning in this way.
Also, is the network being trained on the "loss" only?
Thank you in advance :)
following the Tensorflow Documentation...
from the loss argument:
If the model has multiple outputs, you can use a different loss on
each output by passing a dictionary or a list of losses. The loss
value that will be minimized by the model will then be the sum of all
individual losses
remember also that you can also weight the loss contributions of different model outputs
from the loss_weights argument:
The loss value that will be minimized by the model will then be the
weighted sum of all individual losses, weighted by the loss_weights coefficients
I want to customize the fit function of the model in order to apply the gradient descent on the weights only if the model improved its predictions on the validation data. The reason for this is that I want to prevent overfitting.
According to this guide it should be possible to customize the fit function of the model. However, the following code runs into errors:
class CustomModel(tf.keras.Model):
def train_step(self, data):
x, y = data
with tf.GradientTape() as tape:
y_pred = self(x, training=True)
loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
### check and apply gradient
Y_pred_val = self.predict(X_val) # this does not work
acc_val = calculate_accuracy(Y_val, Y_pred_val)
if acc_val > last_acc_val:
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
###
self.compiled_metrics.update_state(y, y_pred)
return_obj = {m.name: m.result() for m in self.metrics}
return_obj["acc_val"] = acc_val
return return_obj
How could it be possible to evaluate the model inside the fit function?
You don't have to subclass fit() for this. You can just make a custom training loop. Look how I did that:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from tensorflow.keras import Model
import tensorflow as tf
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Concatenate
import tensorflow_datasets as tfds
from tensorflow.keras.regularizers import l1, l2, l1_l2
from collections import deque
dataset, info = tfds.load('mnist',
with_info=True,
split='train',
as_supervised=False)
TAKE = 1_000
data = dataset.map(lambda x: (tf.cast(x['image'],
tf.float32), x['label'])).shuffle(TAKE).take(TAKE)
len_train = int(8e-1*TAKE)
train = data.take(len_train).batch(8)
test = data.skip(len_train).take(info.splits['train'].num_examples - len_train).batch(8)
class CNN(Model):
def __init__(self):
super(CNN, self).__init__()
self.layer1 = Dense(32, activation=tf.nn.relu,
kernel_regularizer=l1(1e-2),
input_shape=info.features['image'].shape)
self.layer2 = Conv2D(filters=16,
kernel_size=(3, 3),
strides=(1, 1),
activation='relu',
input_shape=info.features['image'].shape)
self.layer3 = MaxPooling2D(pool_size=(2, 2))
self.layer4 = Conv2D(filters=32,
kernel_size=(3, 3),
strides=(1, 1),
activation=tf.nn.elu,
kernel_initializer=tf.keras.initializers.glorot_normal)
self.layer5 = MaxPooling2D(pool_size=(2, 2))
self.layer6 = Flatten()
self.layer7 = Dense(units=64,
activation=tf.nn.relu,
kernel_regularizer=l2(1e-2))
self.layer8 = Dense(units=64,
activation=tf.nn.relu,
kernel_regularizer=l1_l2(l1=1e-2, l2=1e-2))
self.layer9 = Concatenate()
self.layer10 = Dense(units=info.features['label'].num_classes)
def call(self, inputs, training=None, **kwargs):
b = self.layer1(inputs)
a = self.layer2(inputs)
a = self.layer3(a)
a = self.layer4(a)
a = self.layer5(a)
a = self.layer6(a)
a = self.layer8(a)
b = self.layer7(b)
b = self.layer6(b)
x = self.layer9([a, b])
x = self.layer10(x)
return x
cnn = CNN()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
train_loss = tf.keras.metrics.Mean()
test_loss = tf.keras.metrics.Mean()
train_acc = tf.keras.metrics.SparseCategoricalAccuracy()
test_acc = tf.keras.metrics.SparseCategoricalAccuracy()
optimizer = tf.keras.optimizers.Nadam()
template = 'Epoch {:3} Train Loss {:7.4f} Test Loss {:7.4f} ' \
'Train Acc {:6.2%} Test Acc {:6.2%} '
epochs = 5
early_stop = epochs//50
loss_hist = deque()
acc_hist = deque(maxlen=1)
acc_hist.append(0)
for epoch in range(1, epochs + 1):
train_loss.reset_states()
test_loss.reset_states()
train_acc.reset_states()
test_acc.reset_states()
for images, labels in train:
with tf.GradientTape() as tape:
logits = cnn(images, training=True)
loss = loss_object(labels, logits)
train_loss(loss)
train_acc(labels, logits)
current_acc = tf.metrics.SparseCategoricalAccuracy()(labels, logits)
if tf.greater(current_acc, acc_hist[-1]):
print('IMPROVEMENT.')
gradients = tape.gradient(loss, cnn.trainable_variables)
optimizer.apply_gradients(zip(gradients, cnn.trainable_variables))
acc_hist.append(current_acc)
for images, labels in test:
logits = cnn(images, training=False)
loss = loss_object(labels, logits)
test_loss(loss)
test_acc(labels, logits)
print(template.format(epoch,
train_loss.result(),
test_loss.result(),
train_acc.result(),
test_acc.result()))
if len(loss_hist) > early_stop and loss_hist.popleft() < min(loss_hist):
print('Early stopping. No validation loss decrease in %i epochs.' % early_stop)
break
Output:
IMPROVEMENT.
IMPROVEMENT.
IMPROVEMENT.
IMPROVEMENT.
Epoch 1 Train Loss 21.1698 Test Loss 21.3391 Train Acc 37.13% Test Acc 38.50%
IMPROVEMENT.
IMPROVEMENT.
IMPROVEMENT.
Epoch 2 Train Loss 13.8314 Test Loss 12.2496 Train Acc 50.88% Test Acc 52.50%
Epoch 3 Train Loss 13.7594 Test Loss 12.5884 Train Acc 51.75% Test Acc 53.00%
Epoch 4 Train Loss 13.1418 Test Loss 13.2374 Train Acc 52.75% Test Acc 51.50%
Epoch 5 Train Loss 13.6471 Test Loss 13.3157 Train Acc 49.63% Test Acc 51.50%
Here's the part that did the job. It's a deque and it skips the application of gradients if the last element of the deque is smaller.
for images, labels in train:
with tf.GradientTape() as tape:
logits = cnn(images, training=True)
loss = loss_object(labels, logits)
train_loss(loss)
train_acc(labels, logits)
current_acc = tf.metrics.SparseCategoricalAccuracy()(labels, logits)
if tf.greater(current_acc, acc_hist[-1]):
print('IMPROVEMENT.')
gradients = tape.gradient(loss, cnn.trainable_variables)
optimizer.apply_gradients(zip(gradients, cnn.trainable_variables))
acc_hist.append(current_acc)
Rather than create a custom fit I think it would be easier to use the callback ModelCheckpoint.
What you are trying to do is get the model that has the lowest validation error. Set it up to monitor validation loss. That way it will save the best model even if the network starts to over fit. Documentation is here.
If you do not get a model with a satisfactory validation accuracy then you will have to take other measures.
First look at your training accuracy.
My experience is that you should achieve at least 95%.
If the training accuracy is good but the validation accuracy is poor and degrades as you run more epochs that is a sign of over fitting.
You did not show the model but if you are doing classification you will probably have dense layers with the final layer using softmax activation.
Start out with model with only one dense layer and see if it trains well.
If not you may have to add additional dense hidden layers. If you do include a drop out layer to help prevent over fitting. You might also consider using regularizers. Documentation is
here..
I also find you can get improved performance if you dynamically adjust the learning rate. The callback ReduceLROnPlateau enables that capability.
Set it up to monitor validation loss and to reduce the learning rate by a factor if the loss fails to decrease. Documentation is here.
I am new to Pytorch and I'm training a model for binary classification of images. The images are currently stored as .npy files and I am loading them and training my model in batches. When I implement this, the loss function does not decrease. When I test the model on the training and test set again, the accuracy is constant at 50%. The data set is balanced.
I tried making the dataset smaller (around 125 for each class) and I still have the same problem. I expect the model to overfit the training set but this does not occur.
Please see my code below
class Network(nn.Module):
def __init__(self):
super(Network,self).__init__()
self.conv1=nn.Conv2d(in_channels=2, out_channels=32, kernel_size=3)
self.conv2=nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
self.conv3=nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3)
self.fc1=nn.Linear(in_features=128*6*6, out_features=1000)
self.fc2=nn.Linear(in_features=1000, out_features=100)
self.out=nn.Linear(in_features=100, out_features=2)
def forward(self,t):
POOL_stride=2
#Conv1
t=F.relu(self.conv1(t))
t=F.max_pool2d(t, kernel_size=2, stride=POOL_stride)
#Conv2
t=F.relu(self.conv2(t))
t=F.max_pool2d(t, kernel_size=2, stride=POOL_stride)
#Conv3
t=F.relu(self.conv3(t))
t=F.max_pool2d(t, kernel_size=2, stride=POOL_stride)
# dense 1
t=t.reshape(-1, 128*6*6)
t=self.fc1(t)
t=F.relu(t)
#dense 2
t=self.fc2(t)
t=F.relu(t)
t=self.out(t)
return t
def npy_loader(path):
sample = torch.from_numpy(np.load(path))
return sample
criterion=nn.CrossEntropyLoss()
optimizer = optim.Adam(self.model.parameters(), lr=0.003)
model = Network()
trainset = datasets.DatasetFolder(
root=train_dir,
loader=npy_loader,
extensions=['.npy']
)
train_loader = torch.utils.data.DataLoader(
trainset,
batch_size=batch_size,
shuffle=True,
)
for epoch in range(epochs):
running_loss = 0
batches = 0
for inputs, labels in train_loader:
batches = batches+1
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
output = model(inputs)
loss = criterion(output.squeeze(), labels.squeeze())
loss.backward()
optimizer.step()
running_loss += loss.item()
print('Loss :{:.4f} Epoch[{}/{}]'.format(running_loss/batches, epoch, epochs))
'''
You are providing parameters of some other self.model to optimizer while the model used for calculating the loss is different.
optimizer = optim.Adam(self.model.parameters(), lr=0.003)
model = Network()
Above is your sequence of defining optimizer and model. Notice that you are passing parameters of a different self.model to optimizer. Hence, optimizer.step() fails to update weights of desired model on which loss is being calculated. Instead it should be something like this:
model = Network()
optimizer = optim.Adam(model.parameters(), lr=0.003)
On another note, might I suggest that instead of returning 2 dimensional output from model, returning a 1-d output and using binary cross-entropy loss can also be explored, as your task is only a binary classification problem.
I wish to view the final output of training a tf.keras model. In this case it would be an array of predictions from the softmax function, e.g. [0,0,0,1,0,1].
Other threads on here have suggested using model.predict(training_data), but this won't work for my situation since I am using dropout at training and validation, so neurons are randomly dropped and predicting again with the same data will give a different result.
def get_model():
inputs = tf.keras.layers.Input(shape=(input_dims,))
x = tf.keras.layers.Dropout(rate=dropout_rate)(inputs, training=True)
x = tf.keras.layers.Dense(units=29, activation='relu')(x)
x = tf.keras.layers.Dropout(rate=dropout_rate)(x, training=True)
x = tf.keras.layers.Dense(units=15, activation='relu')(x)
outputs = tf.keras.layers.Dense(2, activation='softmax')(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['sparse_categorical_accuracy'])
return model
myModel = get_model()
myModel.summary()
myModel.fit(X_train, y_train,
batch_size = batch_size,
epochs= epochs,
verbose = 1,
validation_data = (X_val, y_val))
In tensorflow, you can grab the output of a model after training quite easily. Here is an example from a Github repo:
input = tf.placeholder(tf.float32, shape=[None, INPUT_DIMS])
labels = tf.placeholder(tf.float32, shape=[None])
hidden = tf.nn.tanh(make_nn_layer(normalized, NUM_HIDDEN))
logits = make_nn_layer(hidden, NUM_CLASSES)
outputs = tf.argmax(logits, 1)
int_labels = tf.to_int64(labels)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, int_labels, name='xentropy')
train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
correct_prediction = tf.equal(outputs, int_labels)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
validation_dict = {
input: validation_data[:,0:7],
labels: validation_data[:,7],}
for i in range(NUM_BATCHES):
batch = training_data[numpy.random.choice(training_size, BATCH_SIZE, False),:]
train_step.run({input: batch[:,0:7], labels: batch[:,7]})
if i % 100 == 0 or i == NUM_BATCHES - 1:
print('Accuracy %.2f%% at step %d' % (accuracy.eval(validation_dict) * 100, i))
output_data = outputs.eval({input: data_vector[:,0:7]})
The only output I can get from the trained model appears to be a history object. There is also a myModel.output object, but it is a tensor that I can't evaluate without putting data into it. Any ideas?
As far as I know, you can't turn off the dropout after passing training=True when calling the layers (unless you transfer the weights to a new model with the same architecture). However, instead you can build and train your model in normal case (i.e. without using training argument in the calls) and then selectively turn on and off the dropout layer in test phase by defining a backend function (i.e. keras.backend.function()) and setting the learning phase (i.e. keras.backend.learning_phase()):
# build your model normally (i.e. without using `training=True` argument)
# train your model...
from keras import backend as K
func = K.function(model.inputs + [K.learning_phase()], model.outputs)
# run the model with dropout layers being active, i.e. learning_phase == 1
preds = func(list_of_input_arrays + [1])
# run the model with dropout layers being inactive, i.e. learning_phase == 0
preds = func(list_of_input_arrays + [0])
Update: As I suggested above, another approach is to define a new model with the same architecture but without setting training=True, and then transfer the weights from the trained model to this new model. To achieve this, I just add a training argument to your get_model() function:
def get_model(training=None):
inputs = tf.keras.layers.Input(shape=(input_dims,))
x = tf.keras.layers.Dropout(rate=dropout_rate)(inputs, training=training)
x = tf.keras.layers.Dense(units=29, activation='relu')(x)
x = tf.keras.layers.Dropout(rate=dropout_rate)(x, training=training)
x = tf.keras.layers.Dense(units=15, activation='relu')(x)
outputs = tf.keras.layers.Dense(2, activation='softmax')(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['sparse_categorical_accuracy'])
return model
# build a model with dropout layers active in both training and test phases
myModel = get_model(training=True)
# train the model
myModel.fit(...)
# build a clone of the model with dropouts deactivated in test phase
myTestModel = get_model() # note: the `training` is `None` by default
# transfer the weights from the trained model to this model
myTestModel.set_weights(myModel.get_weights())
# use the new model in test phase; the dropouts would not be active
myTestModel.predict(...)
I have a model content one encoder and two decoder with two loss function:
input_shape = (384, 512, 3)
model = Model(inputs=input, outputs=[1_features, 2_features])
model = build_model(input_shape, 3)
losses = {
"loss1_output": "categorical_crossentropy",
"loss2_output": "categorical_crossentropy"}
lossWeights = {"loss1_output": 1.0, "loss2_output": 1.0}
EPOCHS = 50
INIT_LR = 1e-3
opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(optimizer=opt, loss=losses, loss_weights=lossWeights,
metrics=["accuracy"])
I would combine the value for both those losses in one loss value and backward the result of the combination.
My question is close to this one which I read and tried and I found the model called the loss function one time for each branch (output).