I'm defining a custom F1 metric in keras for a multiclass classification problem (in particular n_classes = 4 so the output layer has 4 neurons and a softmax activation function). The idea is to keep track of the true positives, false negatives and false positives so as to gradually update the f1 score batch after batch. The code is the following:
def compute_confusion_matrix(true, pred, K):
result = tf.zeros((K, K), dtype=tf.int32)
for i in range(len(true)):
result = tf.tensor_scatter_nd_add(tensor = result, indices=tf.constant([[true[i], pred[i]]]),
updates=tf.constant([1]))
return result
def f1_function(y_true, y_pred):
k = 4
y_pred_lab = np.argmax(y_pred, axis=1)
conf_mat= compute_confusion_matrix(y_true, y_pred_lab, K = k)
tp = tf.linalg.tensor_diag_part(conf_mat)
fp = tf.reduce_sum(conf_mat, axis = 0) - tp
fn = tf.reduce_sum(conf_mat, axis = 1) - tp
support = tf.reduce_sum(conf_mat, axis = 1)
return tp, fp, fn, support
The f1_function returns the true positives, false positives, false negatives and the support of each class exploiting the confusion matrix computed through the compute_confusion_matrix function. Even though these functions work when called separately, problems arise when called during the model fit.
The custom metric is defined by subclassing keras.metrics.Metric as follows:
class F1Metric(keras.metrics.Metric):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.f1_fn = f1_function
self.tp_count = self.add_weight("tp_count", initializer="zeros", shape = (4,))
self.fp_count = self.add_weight("fp_count", initializer="zeros", shape = (4,))
self.fn_count = self.add_weight("fn_count", initializer="zeros", shape = (4,))
self.support_total = self.add_weight("support_total", initializer = "zeros", shape = (4,))
def update_state(self, y_true, y_pred, sample_weight=None):
tp, fp, fn, support = self.f1_fn(y_true, y_pred)
print(tp)
self.tp_count.assign_add(tf.cast(tp, dtype=tf.float32))
self.fp_count.assign_add(tf.cast(fp, dtype=tf.float32))
self.fn_count.assign_add(tf.cast(fn, dtype=tf.float32))
self.support_total.assign_add(tf.cast(support, dtype=tf.float32))
def result(self):
precisions = self.tp_count / (self.tp_count + self.fp_count)
recalls = self.tp_count / (self.tp_count + self.fn_count)
f1 = tf.constant(2, dtype=tf.float32) * (precisions*recalls) / (precisions + recalls)
weighted_f1 = (f1 * self.support_total) / tf.reduce_sum(tf.cast(self.support_total, dtype=tf.float32))
return weighted_f1
when i use this metric in model.fit I get this error: TypeError: Scalar tensor has no len().
Any explanation for this problem? Thanks.
EDIT:
The problem above was due to the type of y_true passed to the f1_function which was <class 'tensorflow.python.framework.ops.EagerTensor'>. So I transformed it into a 1D array by doing: y_true = np.ravel(y_true).
However, during the fit it gives me the following error close to the end of the first epoch:
"Cannot assign to variable tp_count:0 due to variable shape (4,) and value shape () are incompatible."
The only thing i noticed is that the length of y_true and y_pred is no longer the same as the batch size (32) but less than that. That limit should be given by the size of the training set so it shouldn't be the cause of the problem. Any idea?
Related
I am calculating loss for multiclass(7) classification program using pytorch.
class AFL(nn.Module):
def __init__(self, delta=0.7, gamma=2., epsilon=1e-07):
super(AFL, self).__init__()
self.delta = delta
self.gamma = gamma
self.epsilon = epsilon
def forward(self, y_pred, y_true):
#y_pred=y_pred.size()[1]
print(y_pred.shape) #[32,7]
print(y_true.shape) #[32]
y_pred = torch.clamp(y_pred, self.epsilon, 1. - self.epsilon)
cross_entropy = np.empty(y_pred.shape)
for i in range(len(y_pred)):
for j in range(len(y_pred[i])):
cross_entropy[i][j] = -y_true * torch.log(y_pred[i][j])
#cross_entropy = -y_true * torch.log(y_pred[0][0]) #here i want to calculate cross_entropy for for each class
# Calculate losses separately for each class, only suppressing background class
back_ce = torch.pow(1 - y_pred[:,0], self.gamma) * cross_entropy[:,0]
back_ce = (1 - self.delta) * back_ce
fore_ce = cross_entropy[:,1,:,:]
fore_ce = self.delta * fore_ce
loss = torch.mean(torch.sum(torch.stack([back_ce, fore_ce], axis=-1), axis=-1))
return loss
I want to calculate back_ce for each class separately , but getting error as;
back_ce = torch.pow(1 - y_pred[:,0], self.gamma) * cross_entropy[:,0]
IndexError: too many indices for tensor of dimension 1
Can anyone please tell where i am doing wrong. size of y_pred and y_true is mentioned.
Here is the AFL for multi-class with multiple common and rare classes.
class AsymmetricFocalLoss(nn.Module):
"""For Imbalanced datasets
Parameters
----------
delta : float, optional
controls weight given to false positive and false negatives, by default 0.25
gamma : float, optional
Focal Tversky loss' focal parameter controls degree of down-weighting of easy examples, by default 2.0
epsilon : float, optional
clip values to prevent division by zero error
common : list, required
a list of common class indices
rare : list, required
a list of rare class indices
"""
def __init__(self, common, rare, delta=0.7, gamma=2., epsilon=1e-07):
super(AsymmetricFocalLoss, self).__init__()
self.delta = delta
self.gamma = gamma
self.epsilon = epsilon
self.common = common
self.rare = rare
def forward(self, y_pred, y_labels):
# assume y_pred contain probabilities (batch_size_ n_class)
# y_labels contain integer class lables (batch_size, )
# convert one-hot
y_true = torch.zeros_like(y_pred)
for i,j in enumerate(y_labels):y_true[i, j]=1
# clamp
y_pred = torch.clamp(y_pred, self.epsilon, 1. - self.epsilon)
cross_entropy = -y_true * torch.log(y_pred)
#print(f'{cross_entropy.shape=}\n{cross_entropy=}')
# Calculate losses separately for each class,
all_ce=[]
for c in self.common:
back_ce = (1 - self.delta) * (torch.pow(1 - y_pred[:,c], self.gamma) * cross_entropy[:,c])
all_ce.append(back_ce)
for r in self.rare:
fore_ce=self.delta * cross_entropy[:,r]
all_ce.append(fore_ce)
loss_stack = torch.stack(all_ce, axis=-1)
#print(f'{loss_stack.shape=}\n{loss_stack=}')
loss_sum=torch.sum(loss_stack, axis=-1)
#print(f'{loss_sum.shape=}\n{loss_sum=}')
loss = torch.mean(loss_sum)
return loss
To use this,
batch_size = 5
n_class = 7
y_pred = torch.softmax( torch.rand((batch_size, n_class)), dim=-1)
y_labels = torch.randint(0, n_class, size=(batch_size,))
print(f'{y_pred=}\n{y_labels=}')
lossF = AsymmetricFocalLoss(common = [0,2,4,6], rare = [1,3,5])
loss = lossF(y_pred, y_labels)
print(f'{loss=}')
output:
"""
y_pred=tensor([[0.1955, 0.1455, 0.0976, 0.1869, 0.1043, 0.1173, 0.1529],
[0.1613, 0.1635, 0.1121, 0.1290, 0.1571, 0.0993, 0.1777],
[0.0978, 0.1340, 0.1025, 0.1993, 0.2197, 0.1041, 0.1425],
[0.1371, 0.1113, 0.1771, 0.1560, 0.0897, 0.1554, 0.1734],
[0.1960, 0.1890, 0.1403, 0.1076, 0.1714, 0.1079, 0.0878]])
y_labels=tensor([0, 3, 2, 5, 3])
loss=tensor(1.0328)
"""
I am building a custom loss function that needs to know whether the truth and the prediction have N pixels above a threshold. This is because the logic breaks if I supply an np.where() array which is empty. I can get around this issue by using try/else to return a 'flagged constant' in the case that the function fails on the empty set, but I'd like to do something different. Here is my current method.
def some_loss(cutoff=20, min_pix=10):
def gen_loss(y_true, y_pred):
trues = tf.map_fn(fn = lambda x: x, elems = y_true)
preds = tf.map_fn(fn = lambda x: x, elems = y_pred)
for idx in tf.range(tf.shape(y_true)[0]):
# binarize both by cutoff
true = y_true[idx]
pred = y_pred[idx]
true = tf.where(true < cutoff, 0.0, 1.0)
pred = tf.where(pred < cutoff, 0.0, 1.0)
# now I sum each to get the number of pixels above threshold
n_true, n_pred = tf.reduce_sum(true), tf.reduce_sum(pred)
# then I create a switch using tf.conditional
switch = tf.cond(tf.logical_or(n_true < min_pix, n_pred < min_pix), lambda: tf.zeros_like(true), lambda: tf.ones_like(true))
# this essentially allows me to turn off the loss if either condition is met
# so I then run the function
loss = get_loss(true, pred) # returns random constant if either is below threshold
loss += tf.reduce_sum(tf.math.multiply(loss, switch))
return loss
return gen_loss
This may work, it compiles and trains a convolutional model. However, I don't like that there are random constants wandering about my loss function, and I'd rather only operate the function get_loss() if both true and pred meet the minimum conditions.
I'd prefer to make two tensors, one with samples not meeting the condition, the other with samples meeting the condition.
Separately, I've tried to use tf.conditional to test for each case and call a separate loss function in either case. The code is repeated below.
def avgMED(scaler, cutoff=20, min_N=30,c=3):
def AVGmed(y_true, y_pred):
const = tf.constant([c],tf.float32) # constant c, multiplied by MED (
batch_size = tf.cast(tf.shape(y_true)[0], tf.float32)
MSE = tf.reduce_mean(tf.square(y_true-y_pred))
y_true = tf.reshape(y_true, shape=(tf.shape(y_true)[0], -1))
y_pred = tf.reshape(y_pred, shape=(tf.shape(y_pred)[0], -1))
loss, loss_med = tf.cast(0,dtype=tf.float32), tf.cast(0,dtype=tf.float32)
# rescale
y_true = y_true*scaler.scale_
y_true = y_true+scaler.mean_
y_pred = y_pred*scaler.scale_
y_pred = y_pred+scaler.mean_
trues = tf.map_fn(fn = lambda x: x, elems=y_true)
preds = tf.map_fn(fn = lambda x: x, elems=y_pred)
min_nonzero_pixels = tf.reduce_sum(tf.constant(min_N, dtype=tf.float32))
for idx in tf.range(batch_size):
idx = tf.cast(idx, tf.int32)
true = trues[idx]
pred = preds[idx]
MSE = tf.reduce_mean(tfm.square(tfm.subtract(true,pred)))
true = tf.where(true<cutoff,0.0,1.0)
pred = tf.where(pred<cutoff,0.0,1.0)
n_true = tf.reduce_sum(true)
n_pred = tf.reduce_sum(pred)
loss_TA = tf.cond(tf.logical_or(n_true < min_nonzero_pixels, n_pred < min_nonzero_pixels), get_zero(true,pred), get_MED(true,pred))
loss_med += loss_TA.read(0)
loss += loss_med + MSE # do we benefit from reducing across the batch dimension? we should be able to look at familiar batches and see the little increase due to the distance component
tf.print(n_true,n_pred)
tf.print(loss_med)
return loss # this is essentially MSE given c ~ 0. Thus, this will show if there are some weird gradients flowing through that are preventing the model from learning
return AVGmed
def get_MED(A,B):
# takes in binary tensors
indices_A, indices_B = tf.where(A), tf.where(B)
coordX_A_TA, coordY_A_TA = find_coord(indices_A) # finds x,y coordinates and returns tensor array
coordX_B_TA, coordY_B_TA = find_coord(indices_B)
mindists_AB_TA = find_min_distances(coordX_A_TA, coordY_A_TA, coordX_B_TA, coordY_B_TA)
mindists_BA_TA = find_min_distances(coordX_B_TA, coordY_B_TA, coordX_A_TA, coordY_A_TA)
# MED = mean error distance =
med_AB = tf.reduce_mean(mindists_AB_TA.read(0))
med_BA = tf.reduce_mean(mindists_BA_TA.read(0))
avg_med = tfm.divide(tfm.add(med_AB,med_BA),tf.constant(0.5))
loss_TA = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
loss_TA.write(loss_TA.size(), avg_med)
return loss_TA
def get_zero(A,B):
loss_TA = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
loss_TA.write(loss_TA.size(), 0)
return loss_TA
However, with this framework I am now getting new errors about my generator not having enough data, which is absurd given the batch size I test with is 10 and 1 step_per_epoch on a train size of 100. Got a warning about not closing the TensorArray, which I expect happens whether the conditional is true or false. Inching closer to a solution but could use some guidance on how problematic my tensorflow logic is.
Im trying to implement this zero-inflated log normal loss function based on this paper in lightGBM (https://arxiv.org/pdf/1912.07753.pdf) (page 5). But, admittedly, I just don’t know how. I don’t understand how to get the gradient and hessian of this function in order to implement it in LGBM and I’ve never needed to implement a custom loss function in the past.
The authors of this paper have open sourced their code, and the function is available in tensorflow (https://github.com/google/lifetime_value/blob/master/lifetime_value/zero_inflated_lognormal.py), but I’m unable to translate this to fit the parameters required for a custom loss function in LightGBM. An example of how LGBM accepts custom loss functions— loglikelihood loss would be written as:
def loglikelihood(preds, train_data):
labels = train_data.get_label()
preds = 1. / (1. + np.exp(-preds))
grad = preds - labels
hess = preds * (1. - preds)
return grad, hess
Similarly, I would need to define a custom eval metric to accompany it, such as:
def binary_error(preds, train_data):
labels = train_data.get_label()
preds = 1. / (1. + np.exp(-preds))
return 'error', np.mean(labels != (preds > 0.5)), False
Both of the above two examples are taken from the following repository:
https://github.com/microsoft/LightGBM/blob/e83042f20633d7f74dda0d18624721447a610c8b/examples/python-guide/advanced_example.py#L136
Would appreciate any help on this, and especially detailed guidance to help me learn how to do this on my own.
According to the LGBM documentation for custom loss functions:
It should have the signature objective(y_true, y_pred) -> grad, hess or objective(y_true, y_pred, group) -> grad, hess:
y_true: numpy 1-D array of shape = [n_samples]
The target values.
y_pred: numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape = [n_samples, n_classes] (for multi-class task)
The predicted values. Predicted values are returned before any transformation, e.g. they are raw margin instead of probability of positive class for binary task.
group: numpy 1-D array
Group/query data. Only used in the learning-to-rank task. sum(group) = n_samples. For example, if you have a 100-document dataset with group = [10, 20, 40, 10, 10, 10], that means that you have 6 groups, where the first 10 records are in the first group, records 11-30 are in the second group, records 31-70 are in the third group, etc.
grad: numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape = [n_samples, n_classes] (for multi-class task)
The value of the first order derivative (gradient) of the loss with respect to the elements of y_pred for each sample point.
hess: numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape = [n_samples, n_classes] (for multi-class task)
The value of the second order derivative (Hessian) of the loss with respect to the elements of y_pred for each sample point.
This is the "translation", as you defined it, of the tensorflow implementation. Most of the work is just defining the functions yourself (i.e. softplus, crossentropy, etc.)
The mean absolute percentage error is used in the linked paper, not sure if that is the eval metric you want to use.
import math
import numpy as np
epsilon = 1e-7
def sigmoid(x):
return 1 / (1 + math.exp(-x))
def softplus(beta=1, threshold=20):
return 1 / beta* math.log(1 + math.exp(beta*x))
def BinaryCrossEntropy(y_true, y_pred):
y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
term_0 = (1-y_true) * np.log(1-y_pred + epsilon)
term_1 = y_true * np.log(y_pred + epsilon)
return -np.mean(term_0+term_1, axis=0)
def zero_inflated_lognormal_pred(logits):
positive_probs = sigmoid(logits[..., :1])
loc = logits[..., 1:2]
scale = softplus(logits[..., 2:])
preds = (
positive_probs *
np.exp(loc + 0.5 * np.square(scale)))
return preds
def mean_abs_pct_error(preds, train_data):
labels = train_data.get_label()
decile_labels=np.percentile(labels,np.linspace(10,100,10))
decile_preds=np.percentile(preds,np.linspace(10,100,10))
MAPE = sum(np.absolute(decile_preds - decile_labels)/decile_labels)
return 'error', MAPE, False
def zero_inflated_lognormal_loss(train_data,
logits):
labels = train_data.get_label()
positive = labels > 0
positive_logits = logits[..., :1]
classification_loss = BinaryCrossEntropy(
y_true=positive, y_pred=positive_logits)
loc = logits[..., 1:2]
scale = math.maximum(
softplus(logits[..., 2:]),
math.sqrt(epsilon))
safe_labels = positive * labels + (
1 - positive) * np.ones(labels.shape)
regression_loss = -np.mean(
positive * np.LogNormal(mean=loc, stdev=scale).log_prob(safe_labels),
axis=-1)
return classification_loss + regression_loss
I'm using tf.keras and I have a metric that I'd like to calculate where I need multiple batches of validation data in order to calculate it reliably. Is there some way to accumulate batches before calculating the metric?
I'd like to do something like this:
class MultibatchMetric(tf.keras.metrics.Metric):
def __init__(self, num_batches, name="sdr_metric", **kwargs):
super().__init__(name=name, **kwargs)
self.num_batches = num_batches
self.batch_accumulator = []
self.my_metric = []
def update_state(self, y_true, y_pred, sample_weight=None):
self.batch_accumulator.append((y_true, y_pred))
if len(self.batch_accumulator) >= self.num_batches:
metric = custom_multibatch_metric_func(self.batch_accumulator)
self.my_metric.append(metric)
self.batch_accumulator = []
def result(self):
return mean(self.my_metric)
def reset_states(self):
self.my_metric = []
self.batch_accumulator = []
However, this all needs to occur on the tensorflow graph, severely complicating things.
I had a go at your problem and it seems using the built in add_weight method can provide a solution. By making a state variable for a batch counter and an accumulator that has the size (2, num_batches * batch_size, n_outputs). Each update a batch gets stored by adding a padded batch to the state variable and gets reset when the counter reaches the maximum number of batches. You can then get the result from the accumulator by calling your metric on the accumulator state variable. I have added an example below.
class Metric(tf.keras.metrics.Metric):
def __init__(self, num_batches, batch_size, name="sdr_metric", **kwargs):
super().__init__(name=name, **kwargs)
self.num_batches = num_batches
self.batch_size = batch_size
self.batch_accumulator = self.add_weight(name='accumulator', shape=(2, num_batches * batch_size, 2), initializer='zeros')
self.batch_counter = self.add_weight(name='counter', shape=(), initializer='zeros')
#tf.function
def update_state(self, y_true, y_pred, sample_weight=None):
batch_count = self.batch_counter
batch = tf.stack([tf.cast(y_true, tf.float32), tf.cast(y_pred, tf.float32)])
paddings = [[0, 0], [batch_count * self.batch_size, (self.num_batches - batch_count - 1) * self.batch_size], [0, 0]]
padded_batch = tf.pad(batch, paddings)
self.batch_accumulator.assign_add(padded_batch)
self.batch_counter.assign_add(1)
if batch_count == self.num_batches:
self.reset_states()
#tf.function
def result(self):
if self.batch_counter == self.num_batches - 1:
return custom_multibatch_metric_func(self.batch_accumulator)
else:
return 0.
def reset_states(self):
self.batch_counter.assign(0)
self.batch_accumulator.assign(tf.zeros((2, self.num_batches * self.batch_size, 2)))
And the test problem i used to verify.
# data
n = 1028
batch_size = 32
num_batches = 3
f = 4
lr = 10e-3
x = tf.random.uniform((n, f), -1, 1)
y = tf.concat([tf.reduce_sum(x, axis=-1, keepdims=True), tf.reduce_mean(x, axis=-1, keepdims=True)], axis=-1)
ds = tf.data.Dataset.from_tensor_slices((x, y)).batch(b, drop_remainder=True)
model = tf.keras.models.Sequential([Dense(f, activation='relu'), Dense(2)])
model.compile(tf.keras.optimizers.SGD(lr), tf.keras.losses.mean_squared_error, metrics=Metric(num_batches, batch_size))
model.fit(ds, epochs=10)
Two large issues. Firstly the if statement in result call, but depending on what you require of the resulting metric you can return an idempotent value. Here where I assumed you just sum all the results so 0 has no effect. Secondly this approach requires you to drop the remainder unless you dataset size is divisible by your batch size.
I hope this was helpful eventhough this is not an optimal solution by any means.
My custom loss function in Pytorch does not update during training. The loss stays exactly the same. I am trying to write this custom loss function based on the false positive and negative rates. I am giving you a simplified version of the code. Any idea what could be happening? Does the backpropagation turns to 0? Is this not the correct way of defining a custom loss function?
I have already checked that during backpropagation the Gradient always stays TRUE (assert requires_grad). I have also tried to make a class (torch.nn.module) of the function false_pos_neg_rate, but that did not work. The Assert Requires_grad turned out to be negative and I left it out afterwards.
There is no error, the training does continue.
def false_pos_neg_rate(outputs, truths):
y = truths
y_predicted = outputs
cut_off= torch.tensor(0.5, requires_grad=True)
y_predicted =torch.where(y_predicted <= cut_off, zeros, ones)
tp, fp, tn, fn = confusion_matrix(y_predicted, y)
fp_rate = fp / (fp+tn).float()
fn_rate = fn / (fn+tp).float()
loss = fn_rate + fp_rate
return loss
for i, (samples, truths) in enumerate(train_loader):
samples = Variable(samples)
truths = Variable(truths)
outputs = model(samples)
loss = false_pos_neg_rate_torch(outputs, truths)
loss.backward()
optimizer.step()
I expect the loss function to update the model and be smaller every training step. Instead the loss stays exactly the same and nothing happens.
Please help me, what happens? Why does the model not train during training steps?
As pointed out by Umang Gupta your loss function is not differentiable. If you write, mathematically, what you are trying to do you'll see that your loss has zero gradient almost everywhere and it behaves like a "step function".
In order to train models using gradient-descent methods you must have meaningful gradients for the loss function.
Based on your tips, I updated my Loss Function. I made a dummy so you can check the first 2 functions as well. I added the rest, so you can see how it is implemented. However, still somewhere the gradient turns out to be zero. What is now the step where the gradient turns zero, or how can I check this? Please I would like to know how I can fix this :).
I tried providing you with more information so you can play around as well, but if you miss anything please do let me know!
y = Variable(torch.tensor((0, 0, 0, 1, 1,1), dtype=torch.float), requires_grad = True)
y_pred = Variable(torch.tensor((0.333, 0.2, 0.01, 0.99, 0.49, 0.51), dtype=torch.float), requires_grad = True)
def binary_y_pred(y_pred):
y_pred.register_hook(lambda grad: print(grad))
y_pred = y_pred+torch.tensor(0.5, requires_grad=True, dtype=torch.float)
y_pred = y_pred.pow(5) # this is my way working around using torch.where()
y_pred = y_pred.pow(10)
y_pred = y_pred.pow(15)
m = nn.Sigmoid()
y_pred = m(y_pred)
y_pred = y_pred-torch.tensor(0.5, requires_grad=True, dtype=torch.float)
y_pred = y_pred*2
y_pred.register_hook(lambda grad: print(grad))
return y_pred
def confusion_matrix(y_pred, y):
TP = torch.sum(y*y_pred)
TN = torch.sum((1-y)*(1-y_pred))
FP = torch.sum((1-y)*y_pred)
FN = torch.sum(y*(1-y_pred))
k_eps = torch.tensor(1e-12, requires_grad=True, dtype=torch.float)
FN_rate = FN/(TP + FN + k_eps)
FP_rate = FP/(TN + FP + k_eps)
cost = FN_rate + FP_rate
return cost
class FeedforwardNeuralNetModel(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim):
super(FeedforwardNeuralNetModel, self).__init__()
self.fc1 = nn.Linear(input_dim, hidden_dim)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(hidden_dim, output_dim)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
out = self.fc1(x)
out = self.relu1(out)
out = self.fc2(out)
out = self.sigmoid(out)
return out
model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, betas=[0.9, 0.99], amsgrad=True)
criterion = torch.nn.BCELoss(weight=None, size_average=None, reduce=None, reduction='mean')
samples= Variable(samples)
truths = Variable(truths)
outputs = model(samples)
loss = confusion_matrix(outputs, truths)
loss.backward()
optimizer.step()