Custom keras metric returning nan as result - python

I wrote the custom metric below for y_true and y_pred of the shape (img_h, img_w, num_classes) (the predictions and true masks are one hot encoded and this shape is excluding the batch dimension) for multiclass semantic segmentation. This metric returns a mean f1 score (average of f1 scores for all classes in the predicted mask) from the y_true and y_pred values. But on fitting the model using model.fit() method of tensorflow, the training log always show nan as the value. I have written some custom metrics before but there is usually some value shown. Please help me understand my mistake.
The code:
class MeanF1Score(tf.keras.metrics.Metric):
def __init__(self):
super(MeanF1Score, self).__init__(name='mean_f1_score', dtype=tf.float32)
self.metric = self.add_weight(name='metric', initializer='zeros')
self.sample = self.add_weight(name='sample', initializer='zeros')
def update_state(self, y_true, y_pred, sample_weight=None):
y_true = tf.argmax(y_true, axis=-1)
y_true = tf.reshape(y_true, shape=[-1])
y_pred = tf.argmax(y_pred, axis=-1)
y_pred = tf.reshape(y_pred, shape=[-1])
cm = tf.math.confusion_matrix(y_true, y_pred)
tp = tf.linalg.diag_part(cm)
tp_fp = tf.reduce_sum(cm, axis=1)
tp_fn = tf.reduce_sum(cm, axis=0)
precision = tp / tp_fp
recall = tp / tp_fn
f1 = (2.0 * precision * recall) / (precision + recall)
f1 = tf.cast(f1, dtype=tf.float32)
f1 = tf.reduce_sum(f1)
self.metric.assign_add(f1)
self.sample.assign_add(1.0)
def result(self):
return self.metric / self.sample
def reset_state(self):
self.metric = self.add_weight(name='metric', initializer='zeros')
self.sample = self.add_weight(name='sample', initializer='zeros')

Related

Tensorflow custom loss for multiple outputs model

I am working on a image segmentation and classification model which take one 3D image (64x64x64) and return two outputs (3D mask 64x64x64 and one-hot encoded category). Two outputs has been defined like this:
seg_final_stage = Conv3D(seg_classes, kernel_size=3, strides=1, padding="same", name="Seg_Final")(decoder1)
output_seg = Activation('sigmoid', name = "Seg_Final_Sigmoid")(seg_final_stage)
class_final = Dense(texture_classes, name="Class_Final")(class_dense3)
output_class = Activation('softmax', name = "Class_Final_SoftMax")(class_final)
model = Model(input, [output_seg, output_class], name = Config["Model_Name"])
There are two different method that I have tried but both failed, it seems like each output will return its own loss instead of just one loss. Following are the loss function I have now.
# Custom loss function
def dice_coef(y_true, y_pred):
smooth = 1.
y_true_f = tf.reshape(y_true, [-1])
y_pred_f = tf.reshape(y_pred, [-1])
intersection = tf.reduce_sum(y_true_f * y_pred_f)
score = (2. * intersection + smooth) / (tf.reduce_sum(y_true_f) + tf.reduce_sum(y_pred_f) + smooth)
return score
def dice_loss(y_true, y_pred):
return 1 - dice_coef(y_true, y_pred)
def CCE(y_true, y_pred):
class_loss = CategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)
return class_loss(y_true, y_pred)
def hybrid(y_true, y_pred):
mask_weight = 0.8
class_weight = 0.2
mask_loss = dice_loss(y_true[0], y_pred[0])
class_loss = CCE(y_true[1], y_pred[1])
return mask_weight * mask_loss + class_weight * class_loss
The first try:
I use single hybrid loss function that try to sum the dice loss and CCE together with weights. However, the model returns 2 losses like output_seg_hybrid and output_class_hybrid. I thought the y_true and y_pred are formed as array so that I can that the first item to calculate dice and the second item for CCE.
model.compile(loss=hybrid,
optimizer=Nadam(learning_rate=Config["learning_rate"], beta_1=0.9, beta_2=0.999, epsilon=1e-07, name="Nadam"),
metrics=[hybrid])
The second try:
I applied 2 losses like below to ensure the output has corresponding loss so it can return 2 loss and sum-up with weights. What I get is actually 4 losses like Seg_Final_Sigmoid_dice_loss, Class_Final_SoftMax_dice_loss, Seg_Final_Sigmoid_CCE and Class_Final_SoftMax_CCE.
model.compile(loss={'Seg_Final_Sigmoid':dice_loss, 'Class_Final_SoftMax':CCE},
optimizer=model.optimizer, metrics=[dice_loss, CCE], loss_weights = [0.8,0.2])
What should I do if I just want to sum-up these two losses as one loss?

Optimizing Loss Function for Speed in Tensorflow

I'm working on a unet model for image inpainting. In order to successfully train the model I've had to create a custom loss function. However, this loss function is extremely slow when compared to the standard loss functions. Is there any way that this loss function can be optimized? The portion that's slow is MSGMS calculation portion. Before I introduced this loss component, all operations were using tensor cores on my nvidia 2070 super. I can see in the TF profiler that the most time consuming operations with this loss component aren't using tensor cores (see image).
import tensorflow as tf
import tensorflow_io as tfio
import tensorflow_addons as tfa
def tf_median_blur(filter_shape):
def median_filter2d(image):
return tfa.image.median_filter2d(
image, filter_shape=filter_shape, padding='REFLECT'
)
return median_filter2d
def gms(x_true, y_pred, c=0.0026):
x = tf.reduce_mean(x_true, axis=-1, keepdims=True)
y = tf.reduce_mean(y_pred, axis=-1, keepdims=True)
g_true = tfio.experimental.filter.prewitt(tf_median_blur((3, 3))(x))
g_pred = tfio.experimental.filter.prewitt(tf_median_blur((3, 3))(y))
g_map = (2 * g_true * g_pred + c) / (g_true ** 2 + g_pred ** 2 + c)
gms_loss = 1 - tf.reduce_mean(g_map)
return gms_loss
def msgms(x_true, y_pred):
total_loss = gms(x_true, y_pred)
for _ in range(3):
x_true = tf.nn.avg_pool2d(x_true, ksize=2, strides=2, padding='SAME')
y_pred = tf.nn.avg_pool2d(y_pred, ksize=2, strides=2, padding='SAME')
total_loss += gms(x_true, y_pred)
return total_loss / 4
def CustomLossFunc(y_true, y_pred, gamma=1, alpha=1, beta=1):
y_true = tf.cast(y_true, dtype=tf.float32)
y_pred = tf.cast(y_pred, dtype=tf.float32)
l2_loss = tf.losses.Huber()(y_true, y_pred)
gms_loss = msgms(y_true, y_pred)
ssim_loss = SSIMLoss(y_true, y_pred)
# tf.print('\n', 'l2_loss: ', l2_loss, ' gms_loss: ', gms_loss, ' ssim_loss: ', ssim_loss)
loss = (
gamma * l2_loss
+ alpha * gms_loss
+ beta * ssim_loss
)
return loss
model.compile(
optimizer=optimizer,
# loss=simple_diff
# loss=SSIMLoss
# loss=tf.keras.losses.mean_squared_logarithmic_error
# loss=MSE,
loss=CustomLossFunc
)

VAE autoencoder training fit_generator doesn't seems reduced the loss

I have tried to use the fit_generator function. It's weird that when I use fit_generator, the loss seems just slowly going down by a little. Like I already trained about 5 days my loss just decrease from 129 to 119. But I have search how long should the model training and it said on the internet like only 8 hours. I do think it's my problem but i don't know what is the problem so I wish somebody could help me! And my data was only using 100 images only.
Below is my training code
vae_model.compile(optimizer=adam_optimizer, loss = total_loss, metrics = [r_loss, kl_loss],experimental_run_tf_function=False)
N_EPOCHS = 3000
zz = '/weights.h5'
vae_model.load_weights(zz)
checkpoint_vae = ModelCheckpoint(zz, save_weights_only = True, verbose=1)
vae_model.fit_generator(data_flow,
shuffle=False,
epochs = N_EPOCHS,
initial_epoch = 0,
steps_per_epoch=tr_num / BATCH_SIZE,
callbacks=[checkpoint_vae])
LEARNING_RATE = 0.0005
LOSS_FACTOR = 10000
def r_loss(y_true, y_pred):
return K.mean(K.square(y_true - y_pred), axis = [1,2,3])
def kl_loss(y_true, y_pred):
kl_loss = -0.5 * K.sum(1 + log_var - K.square(mean_mu) - K.exp(log_var), axis = 1)
return kl_loss
def total_loss(y_true, y_pred):
return LOSS_FACTOR*r_loss(y_true, y_pred) + kl_loss(y_true, y_pred)
adam_optimizer = Adam(lr = LEARNING_RATE, beta_1=0.5, beta_2=0.9)

How to define a custom f1 metric in Keras

I'm defining a custom F1 metric in keras for a multiclass classification problem (in particular n_classes = 4 so the output layer has 4 neurons and a softmax activation function). The idea is to keep track of the true positives, false negatives and false positives so as to gradually update the f1 score batch after batch. The code is the following:
def compute_confusion_matrix(true, pred, K):
result = tf.zeros((K, K), dtype=tf.int32)
for i in range(len(true)):
result = tf.tensor_scatter_nd_add(tensor = result, indices=tf.constant([[true[i], pred[i]]]),
updates=tf.constant([1]))
return result
def f1_function(y_true, y_pred):
k = 4
y_pred_lab = np.argmax(y_pred, axis=1)
conf_mat= compute_confusion_matrix(y_true, y_pred_lab, K = k)
tp = tf.linalg.tensor_diag_part(conf_mat)
fp = tf.reduce_sum(conf_mat, axis = 0) - tp
fn = tf.reduce_sum(conf_mat, axis = 1) - tp
support = tf.reduce_sum(conf_mat, axis = 1)
return tp, fp, fn, support
The f1_function returns the true positives, false positives, false negatives and the support of each class exploiting the confusion matrix computed through the compute_confusion_matrix function. Even though these functions work when called separately, problems arise when called during the model fit.
The custom metric is defined by subclassing keras.metrics.Metric as follows:
class F1Metric(keras.metrics.Metric):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.f1_fn = f1_function
self.tp_count = self.add_weight("tp_count", initializer="zeros", shape = (4,))
self.fp_count = self.add_weight("fp_count", initializer="zeros", shape = (4,))
self.fn_count = self.add_weight("fn_count", initializer="zeros", shape = (4,))
self.support_total = self.add_weight("support_total", initializer = "zeros", shape = (4,))
def update_state(self, y_true, y_pred, sample_weight=None):
tp, fp, fn, support = self.f1_fn(y_true, y_pred)
print(tp)
self.tp_count.assign_add(tf.cast(tp, dtype=tf.float32))
self.fp_count.assign_add(tf.cast(fp, dtype=tf.float32))
self.fn_count.assign_add(tf.cast(fn, dtype=tf.float32))
self.support_total.assign_add(tf.cast(support, dtype=tf.float32))
def result(self):
precisions = self.tp_count / (self.tp_count + self.fp_count)
recalls = self.tp_count / (self.tp_count + self.fn_count)
f1 = tf.constant(2, dtype=tf.float32) * (precisions*recalls) / (precisions + recalls)
weighted_f1 = (f1 * self.support_total) / tf.reduce_sum(tf.cast(self.support_total, dtype=tf.float32))
return weighted_f1
when i use this metric in model.fit I get this error: TypeError: Scalar tensor has no len().
Any explanation for this problem? Thanks.
EDIT:
The problem above was due to the type of y_true passed to the f1_function which was <class 'tensorflow.python.framework.ops.EagerTensor'>. So I transformed it into a 1D array by doing: y_true = np.ravel(y_true).
However, during the fit it gives me the following error close to the end of the first epoch:
"Cannot assign to variable tp_count:0 due to variable shape (4,) and value shape () are incompatible."
The only thing i noticed is that the length of y_true and y_pred is no longer the same as the batch size (32) but less than that. That limit should be given by the size of the training set so it shouldn't be the cause of the problem. Any idea?

How to build a basic mnist neural net using tensorflow 2.0?

I'm trying to build a neural network model using tensorflow 2.0 and I couldn't find anything online on how to do it in tensorflow 2.0
I've tried but I couldn't figure out how to apply gradients and all etc..
Here's what's I've tried,
import math
import tensorflow as tf
(x_train,y_train),(x_test,y_test) = tf.keras.datasets.mnist.load_data()
x_train = tf.reshape(x_train,shape=(60000,28*28))
x_test = tf.reshape(x_test,shape=(10000,28*28))
x_train = tf.cast(x_train, tf.float32)
x_test = tf.cast(x_test, tf.float32)
n_input = 784
h1 = 512
h2 = 128
n_classes = 10
# weights and bias initializations
f1 = tf.Variable(tf.random.uniform(shape = (n_input,h1), minval = -(math.sqrt(6)/math.sqrt(n_input+h1)),
maxval = (math.sqrt(6)/math.sqrt(n_input+h1)))) # Xavier uniform
f2 = tf.Variable(tf.random.uniform(shape = (h1,h2), minval = -(math.sqrt(6)/math.sqrt(h1+h2)),
maxval = (math.sqrt(6)/math.sqrt(h1+h2))))
out = tf.Variable(tf.random.uniform(shape = (h2,n_classes), minval = -(math.sqrt(6/(h2+n_classes))),
maxval = math.sqrt(6/(h2+n_classes)) ))
b1 = tf.Variable(tf.random.uniform([h1]))
b2 = tf.Variable(tf.random.uniform([h2]))
b_out = tf.Variable(tf.random.uniform([n_classes]))
def mlp(x):
input1 = tf.nn.sigmoid(tf.add(tf.matmul(x, f1), b1))
input2 = tf.nn.sigmoid(tf.add(tf.matmul(input1, f2), b2))
output = tf.nn.softmax(tf.add(tf.matmul(input2, out), b_out))
return output
n_shape = x_train.shape[0]
epochs = 2
batch_size = 128
lr_rate = 0.001
data_gen = tf.data.Dataset.from_tensor_slices((x_train, y_train)).repeat().shuffle(n_shape).batch(batch_size)
def grad(x, y):
with tf.GradientTape() as tape:
y_pred = mlp(x)
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=y_pred)
loss = tf.reduce_mean(loss)
return tape.gradient(loss, [w, b])
optimizer = tf.keras.optimizers.Adam(lr_rate)
for _ in range(epochs):
no_steps = int(60000/128)
for (batch_xs, batch_ys) in data_gen.take(no_steps):
I just can't figure out how to proceed further in this case? I would really appreciate the help. Thanks
There are following problems in your code:
You forgot to rescale your data: x_train, x_test = x_train / 255.0, x_test / 255.0
w and b in line: tape.gradient(loss, [w, b]) are not defined.
Valid labels dtype in tf.nn.sparse_softmax_cross_entropy_with_logits should be int32 or int64, while for logits, it should be float16, float32, or float64. In your case, it's uint8 for labels. Convert it to int32 before passing, like below
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.cast(y, dtype=tf.int32), logits=y_pred)
As per the official documentation,
WARNING: This op expects unscaled logits, since it performs a softmax
on logits internally for efficiency. Do not call this op with the
output of softmax, as it will produce incorrect results.
So, remove tf.nn.softmax from the output of mlp function, as it performs softmax on logits internally.
For more info on tf.nn.sparse_softmax_cross_entropy_with_logits, check this.
You should modify your grad function and For loop to something like below:
def grad(x, y):
with tf.GradientTape() as tape:
y_pred = mlp(x)
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.cast(y, dtype=tf.int32), logits=y_pred)
loss = tf.reduce_mean(loss)
return loss, tape.gradient(loss, [f1, b1, f2, b2, out, b_out])
optimizer = tf.keras.optimizers.Adam(lr_rate)
for epoch in range(epochs):
no_steps = n_shape//batch_size
for (batch_xs, batch_ys) in data_gen.take(no_steps):
cost, grads = grad(batch_xs, batch_ys)
optimizer.apply_gradients(zip(grads, [f1, b1, f2, b2, out, b_out]))
print('epoch: {} loss: {}'.format(epoch, cost))

Categories

Resources