I have a classifier on top of BERT, and I would like to see the predict probability for creating the ROC curve. How do I get the predict proba?. The predicted probas will be used to calculate the TPR FPR and threshold for ROC curve.
here is the code
class BertBinaryClassifier(nn.Module):
def __init__(self, dropout=0.1):
super(BertBinaryClassifier, self).__init__()
self.bert = BertModel.from_pretrained('bert-base-uncased')
self.dropout = nn.Dropout(dropout)
self.linear = nn.Linear(768, 1)
self.sigmoid = nn.Sigmoid()
def forward(self, tokens, masks=None):
_, pooled_output = self.bert(tokens, attention_mask=masks, output_all_encoded_layers=False)
dropout_output = self.dropout(pooled_output)
linear_output = self.linear(dropout_output)
prediction = self.sigmoid(linear_output)
return prediction
# Config setting
BATCH_SIZE = 4
EPOCHS = 5
# Making dataloaders
train_dataset = torch.utils.data.TensorDataset(train_tokens_tensor, train_masks_tensor, train_y_tensor)
train_sampler = torch.utils.data.RandomSampler(train_dataset)
train_dataloader = torch.utils.data.DataLoader(train_dataset, sampler=train_sampler, batch_size=BATCH_SIZE)
test_dataset = torch.utils.data.TensorDataset(test_tokens_tensor, test_masks_tensor, test_y_tensor)
test_sampler = torch.utils.data.SequentialSampler(test_dataset)
test_dataloader = torch.utils.data.DataLoader(test_dataset, sampler=test_sampler, batch_size=BATCH_SIZE)
bert_clf = BertBinaryClassifier()
bert_clf = bert_clf.cuda()
#wandb.watch(bert_clf)
optimizer = torch.optim.Adam(bert_clf.parameters(), lr=3e-6)
# training
for epoch_num in range(EPOCHS):
bert_clf.train()
train_loss = 0
for step_num, batch_data in enumerate(train_dataloader):
token_ids, masks, labels = tuple(t for t in batch_data)
token_ids, masks, labels = token_ids.to(device), masks.to(device), labels.to(device)
preds = bert_clf(token_ids, masks)
loss_func = nn.BCELoss()
batch_loss = loss_func(preds, labels)
train_loss += batch_loss.item()
bert_clf.zero_grad()
batch_loss.backward()
optimizer.step()
#wandb.log({"Training loss": train_loss})
print('Epoch: ', epoch_num + 1)
print("\r" + "{0}/{1} loss: {2} ".format(step_num, len(train_data) / BATCH_SIZE, train_loss / (step_num + 1)))
# evaluating on test
bert_clf.eval()
bert_predicted = []
all_logits = []
probs=[]
with torch.no_grad():
test_loss = 0
for step_num, batch_data in enumerate(test_dataloader):
token_ids, masks, labels = tuple(t for t in batch_data)
token_ids, masks, labels = token_ids.to(device), masks.to(device), labels.to(device)
logits = bert_clf(token_ids, masks)
pr=logits.ravel()
probs+=pr
loss_func = nn.BCELoss()
loss = loss_func(logits, labels)
test_loss += loss.item()
numpy_logits = logits.cpu().detach().numpy()
#print(numpy_logits)
#wandb.log({"Testing loss": test_loss})
bert_predicted += list(numpy_logits[:, 0] > 0.5)
all_logits += list(numpy_logits[:, 0])
I am able to get the prediction score to calculate the accuracy or f1 score. But not the probability for creating ROC curve.
Thanks
In your forward, you:
def forward(self, tokens, masks=None):
_, pooled_output = self.bert(...) # Get output of BERT
dropout_output = self.dropout(pooled_output)
linear_output = self.linear(dropout_output) # Take linear combination of outputs
# (unconstrained score - "logits")
prediction = self.sigmoid(linear_output) # Normalise scores
# (constrained between [0,1] - "probabilities")
return prediction
Hence the result of calling your model can be directly supplied to calculate the False Positive and True Positive rates e.g:
from sklearn import metrics
...
test_probs = bert_clf(token_ids, masks)
fpr, tpr, thresholds = metrics.roc_curve(labels, test_probs)
roc_auc = metrics.auc(fpr, tpr)
Related
I've initialized two identical ANN with PyTorch (both as structure and initial parameters), and I've noticed that the hyperparameters setting with Ray Tune, returns different results for the two ANN, even if I didn't have any random initialization.
Someone could explain what I'm doing wrong? I'll attach the code:
ANN Initialization:
class Featrues_model(nn.Module):
def __init__(self, n_inputs, dim_hidden, n_outputs):
super().__init__()
self.fc1 = nn.Linear(n_inputs, dim_hidden)
self.fc2 = nn.Linear(dim_hidden, n_outputs)
def forward(self, X):
X = self.fc1(X)
X = self.fc2(X)
return X
features_model_v1 = Featrues_model(len(list_input_variables),5,6)
features_model_v2 = Featrues_model(len(list_input_variables),5,6)
features_model_v2.load_state_dict(features_model_v1.state_dict())
Hyperpamameters setting
config = {
"lr": tune.choice([1e-2, 1e-5]),
"weight_decay": tune.choice([1e-2, 1e-5]),
"batch_size": tune.choice([16,64]),
"epochs": tune.choice([10,50])
}
Train & Validation Dataframe
trainset = df_final.copy()
test_abs = int(len(trainset) * 0.8)
train_subset, val_subset = random_split(
trainset, [test_abs, len(trainset) - test_abs]
)
df_train = df_final.iloc[train_subset.indices]
df_val = df_final.iloc[val_subset.indices]
Train function design
def setting_model(config, df_train, df_val, model):
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=config["lr"], weight_decay=config["weight_decay"])
BATCH_SIZE = config["batch_size"]
for epoch in range(config["epochs"]):
train_epoch_loss = 0
train_epoch_acc = 0
step = 0
for i in tqdm(range(0, df_train.shape[0], BATCH_SIZE)):
batch_X = np.array(
df_train[list_input_variables].iloc[i:i+BATCH_SIZE]
)
batch_X = torch.Tensor([x for x in batch_X])
batch_Y = np.array(
df_train[list_output_variables].iloc[i:i+BATCH_SIZE]
)
batch_Y = torch.Tensor([int(y) for y in batch_Y])
batch_Y = batch_Y.type(torch.int64)
optimizer.zero_grad()
outputs = model.forward(batch_X)
train_loss = criterion(outputs, batch_Y)
train_acc = multi_acc(outputs, batch_Y)
train_loss.backward()
optimizer.step()
train_epoch_loss += train_loss.item()
train_epoch_acc += train_acc.item()
step += 1
# print statistics
print(f"Epochs: {epoch}")
print(f"Train Loss: {train_epoch_loss/len(df_train)}")
print(f"Train Acc: {train_epoch_acc/step}")
print("\n")
# Validation loss
with torch.no_grad():
X_val = np.array(
df_val[list_input_variables]
)
X_val = torch.Tensor([x for x in X_val])
Y_val = np.array(
df_val[list_output_variables]
)
Y_val = torch.Tensor([int(y) for y in Y_val])
Y_val = Y_val.type(torch.int64)
outputs = model.forward(X_val)
_, predicted = torch.max(outputs.data, 1)
total = Y_val.size(0)
correct = (predicted == Y_val).sum().item()
loss = criterion(outputs, Y_val)
tune.report(loss=(loss.numpy()), accuracy=correct / total)
print(f"Validation Loss: {loss.numpy()/len(df_val)}")
print(f"Validation Acc: {correct / total:.3f}")
print("Finished Training")
Hyperparameters Tune
result_v1 = tune.run(
partial(setting_model, df_train=df_train, df_val=df_val, model=features_model_v1),
config=config,
fail_fast="raise",
)
result_v2 = tune.run(
partial(setting_model, df_train=df_train, df_val=df_val, model=features_model_v2),
config=config,
fail_fast="raise"
)
Output
result_v1.get_best_config()
{'lr': 1e-05, 'weight_decay': 1e-05, 'epochs': 1}
result_v2.get_best_config()
{'lr': 0.01, 'weight_decay': 1e-05, 'epochs': 1}
The issue is the use of torch.random under the hood. Since you are not directly providing a weight matrix for your layers, pytorch initializes it for you. Luckily, you can have a reproducible experiment by setting
torch.manual_seed(x) # where x is an integer
One should use only a few random seeds, otherwise you might overfit on the random seed. See lottery ticket hypothesis at https://arxiv.org/abs/1803.03635)
I try to train model but in vain. I see the error
Input contains NaN, infinity or a value too large for dtype('float32').
I think it can be connected with Mse function, because with MAE it works somehow also with RMSE it works somehow (on the second epoch i have RMSE = 10). I can't figure out what i do wrong.
# Count Nan
df = pd.read_csv('data.txt.zip', header=None)
X = df.iloc[:, 1:].values
y = df.iloc[:, 0].values
train_size = 463715
X_train = X[:train_size, :]
y_train = y[:train_size]
X_test = X[train_size:, :]
y_test = y[train_size:]
#ToTensor
X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train)
X_test = torch.FloatTensor(X_test)
y_test = torch.FloatTensor(y_test)
# Create TensorDataset
train_ds = TensorDataset(X_train, y_train)
test_ds = TensorDataset(X_test, y_test)
val_num = 92743
train_num = 370972
# Divide train data into train and validation data
train_ds, val_ds = random_split(train_ds, [train_num, val_num])
# Evaluate accuracy
def accuracy(y_true, y_pred):
return r2_score(y_true, y_pred)
# create Class
class BaselineModel(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(BaselineModel, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.linear1 = nn.Linear(90, 45)
self.linear2 = nn.Linear(45, 1)
self.linear3 = nn.Linear(45, 15)
self.linear4 = nn.Linear(15, 1)
self.batch = nn.BatchNorm2d(hidden_size)
self.relu = nn.ReLU()
self.lreku = nn.LeakyReLU()
self.elu = nn.ELU()
self.dropout = nn.Dropout(0.5)
def forward(self, x):
x = self.elu(self.linear1(x))
return self.linear2(x)
def training_step(self, criterion, batch):
x_train, y_train = batch
y_pred = self(x_train)
loss = (criterion(y_pred, y_train.unsqueeze(1)))
return loss
def validation_step(self, criterion, batch):
x_val, y_val = batch
y_pred = self(x_val)
loss = (criterion(y_pred, y_val.unsqueeze(1)))
acc = accuracy(y_val, y_pred)
return {'val_loss': loss, 'val_acc': acc}
def validation_epoch_end(self, y_pred):
batch_losses = [x['val_loss'] for x in y_pred]
epoch_loss = torch.stack(batch_losses).mean()
batch_accs = [x['val_acc'] for x in y_pred]
epoch_acc = np.mean(batch_accs)
#epoch_acc = torch.stack(batch_accs).mean()
return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
def epoch_end(self, epoch, result):
print(f"Epoch {epoch}, val_loss: {result['val_loss']}, val_acc: {result['val_acc']} ")
model = BaselineModel(input_size = 90, hidden_size = 45, output_size = 1)
# Evaluate
def evaluate(model, criterion, val_loader):
with torch.no_grad():
y_pred = [model.validation_step(criterion, batch) for batch in val_loader]
return model.validation_epoch_end(y_pred)
# Train
def train(model, criterion, optimizer, train_loader, val_loader, lr, epochs):
history = []
for epoch in range(epochs):
for batch in train_loader:
optimizer.zero_grad()
loss = model.training_step(criterion, batch)
loss.backward()
optimizer.step()
result = evaluate(model, criterion, val_loader)
model.epoch_end(epoch, result)
history.append(result)
#return history
# Create train_loader & val_loader
batch_size = 128
train_loader = DataLoader(train_ds, batch_size = batch_size, shuffle = True)
val_loader = DataLoader(val_ds, batch_size = batch_size, shuffle = True)
# Create parameters and Train
lr = 0.05
optimizer = torch.optim.SGD(model.parameters(), lr, momentum = 0.9)
criterion = F.mse_loss
epochs = 10
train(model, criterion, optimizer, train_loader, val_loader, lr, epochs)
Yes, it is because of your loss of function. if the value of the loss function after some epoch becomes very small or very large then when you want to use it in backpropagation to train the model, you face this error. To handle that, you should use Early Stopping to Halt the Training. so you should implement Callback, Callbacks provide a way to execute code and interact with the training model process automatically.
I am newbie to Machine Learning in general. I am currently trying to follow a tutorial on sentiment analysis using BERT and Transformers https://curiousily.com/posts/sentiment-analysis-with-bert-and-hugging-face-using-pytorch-and-python/
However when I train the model it has appeared that the model is overfitting
I do not know how to fix this. I have tried lowering amount of epochs, increasing batch size , shuffling my data (which is ordered) and increasing the validation split. So far nothing has worked. I have even tried changing different learning rate but the one I am using now is the smallest.
Below is my code:
PRE_TRAINED_MODEL_NAME = 'TurkuNLP/bert-base-finnish-cased-v1'
tokenizer = BertTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)
MAX_LEN = 40
#Make a PyTorch dataset
class FIDataset(Dataset):
def __init__(self, texts, targets, tokenizer, max_len):
self.texts = texts
self.targets = targets
self.tokenizer = tokenizer
self.max_len = max_len
def __len__(self):
return len(self.texts)
def __getitem__(self, item):
text = str(self.texts[item])
target = self.targets[item]
encoding = self.tokenizer.encode_plus(
text,
add_special_tokens=True,
max_length=self.max_len,
return_token_type_ids=False,
pad_to_max_length=True,
return_attention_mask=True,
return_tensors='pt',
)
return {
'text': text,
'input_ids': encoding['input_ids'].flatten(),
'attention_mask': encoding['attention_mask'].flatten(),
'targets': torch.tensor(target, dtype=torch.long)
}
#split test and train
df_train, df_test = train_test_split(
df,
test_size=0.1,
random_state=RANDOM_SEED
)
df_val, df_test = train_test_split(
df_test,
test_size=0.5,
random_state=RANDOM_SEED
)
#data loader function
def create_data_loader(df, tokenizer, max_len, batch_size):
ds = FIDataset(
texts=df.content.to_numpy(),
targets=df.sentiment.to_numpy(),
tokenizer=tokenizer,
max_len=max_len
)
return DataLoader(
ds,
batch_size=batch_size,
num_workers=4
)
BATCH_SIZE = 32
#Load data into train, test, val
train_data_loader = create_data_loader(df_train, tokenizer, MAX_LEN, BATCH_SIZE)
val_data_loader = create_data_loader(df_val, tokenizer, MAX_LEN, BATCH_SIZE)
test_data_loader = create_data_loader(df_test, tokenizer, MAX_LEN, BATCH_SIZE)
#Bert model loading
bert_model = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
# Sentiment Classifier based on Bert model just loaded
class SentimentClassifier(nn.Module):
def __init__(self, n_classes):
super(SentimentClassifier, self).__init__()
self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
self.drop = nn.Dropout(p=0.1)
self.out = nn.Linear(self.bert.config.hidden_size, n_classes)
def forward(self, input_ids, attention_mask):
returned = self.bert(
input_ids=input_ids,
attention_mask=attention_mask
)
pooled_output = returned["pooler_output"]
output = self.drop(pooled_output)
return self.out(output)
#Create a Classifier instance and move to GPU
model = SentimentClassifier(3)
model = model.to(device)
#Optimize with AdamW
EPOCHS = 6
optimizer = AdamW(model.parameters(), lr=2e-5, correct_bias=False)
total_steps = len(train_data_loader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=0,
num_training_steps=total_steps
)
loss_fn = nn.CrossEntropyLoss().to(device)
#Train each Epoch function
def train_epoch(
model,
data_loader,
loss_fn,
optimizer,
device,
scheduler,
n_examples
):
model = model.train()
losses = []
correct_predictions = 0
for d in data_loader:
input_ids = d["input_ids"].to(device)
attention_mask = d["attention_mask"].to(device)
targets = d["targets"].to(device)
outputs = model(
input_ids=input_ids,
attention_mask=attention_mask
)
_, preds = torch.max(outputs, dim=1)
loss = loss_fn(outputs, targets)
correct_predictions += torch.sum(preds == targets)
losses.append(loss.item())
loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
optimizer.step()
scheduler.step()
optimizer.zero_grad()
return correct_predictions.double() / n_examples, np.mean(losses)
import torch
history = defaultdict(list)
best_accuracy = 0
if __name__ == '__main__':
for epoch in range(EPOCHS):
print(f'Epoch {epoch + 1}/{EPOCHS}')
print('-' * 10)
train_acc, train_loss = train_epoch(
model,
train_data_loader,
loss_fn,
optimizer,
device,
scheduler,
len(df_train)
)
print(f'Train loss {train_loss} accuracy {train_acc}')
val_acc, val_loss = eval_model(
model,
val_data_loader,
loss_fn,
device,
len(df_val)
)
print(f'Val loss {val_loss} accuracy {val_acc}')
print()
history['train_acc'].append(train_acc)
history['train_loss'].append(train_loss)
history['val_acc'].append(val_acc)
history['val_loss'].append(val_loss)
if val_acc > best_accuracy:
torch.save(model.state_dict(), 'best_model_state.bin')
best_accuracy = val_acc
Broadly speaking, to reduce overfitting, you can:
increase regularization
reduce model complexity
perform early stopping
increase training data
From what you've written, you've already tried 3 and 4. In the case of neural networks, you can increase regularization by increasing dropout. You already have the code for it.
# NOTE: You don't need bert_model here since you're creating one inside
# of SentimentClassifier.
#bert_model = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
# Sentiment Classifier based on Bert model just loaded
class SentimentClassifier(nn.Module):
def __init__(self, n_classes):
super(SentimentClassifier, self).__init__()
self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
self.drop = nn.Dropout(p=0.1) # <-- INCREASE THIS VALUE
self.out = nn.Linear(self.bert.config.hidden_size, n_classes)
I'd recommend trying higher values of the Dropout probability, as I noted in your code above ("INCREASE THIS VALUE"). Keep track of the Dropout probability and the resulting observed overfitting. Try probability values of 0.1, 0.2, 0.3, 0.4, 0.5.
Usually, I've found that dropout over 0.5 doesn't do much good.
I have a ROS application where a camera node sends an image via service to a neutral network node. My training and validation dataset I use is the MNIST database. It should be very easy to predict a number, but the neural network returns the same number for every single service request.
ai_service.py
class AiService():
def __init__(self, save_path):
self.batch_size = 2800
self.epochs = 25
self.learning_rate = 0.01
self.training_data = torch.utils.data.DataLoader(datasets.MNIST(root='./data', train=True, download=True,
transform=transforms.Compose([transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))])), 200, shuffle=True)
self.validation_data = torch.utils.data.DataLoader(datasets.MNIST(root='./data', train=False, download=True,
transform=transforms.Compose([transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))])), 200, shuffle=True)
...
# Function to train the mnist dataset.
def training(self):
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(self.model.parameters(), self.learning_rate)
start_time = time()
for epoch in range(self.epochs):
running_loss = 0
# trainig phase
for images, labels in self.training_data:
optimizer.zero_grad()
image, label = images.to(self.device), labels.to(self.device)
output = self.model(image)
loss = criterion(output, label)
loss.backward()
optimizer.step() #optimizing weights
running_loss += loss.item()
else:
print("Epoch {} - Training loss: {:.10f}".format(epoch, running_loss / len(self.training_data)))
print("\nTraining Time (in minutes): {:.2f} =".format((time() - start_time) / 60))
def validating(self, request_image):
self.model.eval()
tensor_image = self.image_to_tensor(request_image)
with torch.no_grad():
output = self.model(tensor_image)
return output.cpu().data.numpy().argmax()
def image_to_tensor(self, request_image):
return transforms.ToTensor()(self.cv_bridge.imgmsg_to_cv2(request_image, 'mono8'))
neural_network.py
class NeuralNetwork(nn.Module):
# Initializes the Neural Network by setting up the layers.
def __init__(self):
super().__init__()
self.flatten = nn.Flatten()
self.input_layer = nn.Sequential(nn.Linear(28*28, 512))
self.hidden_layer1 = nn.Linear(512, 254)
self.hidden_layer2 = nn.Linear(254, 128)
self.output_layer = nn.Linear(128, 10)
def forward(self, x):
x = self.flatten(x)
x = F.relu(self.input_layer(x))
x = F.relu(self.hidden_layer1(x))
x = F.relu(self.hidden_layer2(x))
x = self.output_layer(x)
return F.log_softmax(x, 1)
I get get a training accuracy of:
My output:
My camera image:
Could it be because of the resizing and grayscaling that the picture is not recognized? I just added imshow to the def image_to_tensor(self, request_image): function and the image is barely recognisable.
I'm new to PyTorch and I'm trying to build a model for a Kaggle competition. I used a pre-trained resnet but the training and the validation loss don't decrease. I suspect I did something wrong in my implementation:
#================================================================================
class TransferResnet(nn.Module):
def __init__(self, classes=4):
super().__init__()
# Use a pretrained model
self.network = models.resnet34(pretrained=True)
# Replace last layer
num_ftrs = self.network.fc.in_features
self.network.fc = nn.Sequential(nn.Linear(num_ftrs, 128),
nn.ReLU(),
nn.Dropout(0.50),
nn.Linear(128,classes))
def forward(self, xb):
out = self.network(xb)
return out
def feed_to_network(self, batch):
images, labels = batch
out = self(images)
out = F.softmax(out, dim=1)
loss = F.cross_entropy(out, labels)
return loss, out
#======================================================
def get_scores(labels, prediction, loss=None):
"Return classification scores"
accuracy = accuracy_score(labels, prediction)
f1 = f1_score(labels, prediction,
average='weighted', zero_division=0)
precision = precision_score(labels, prediction,
average='weighted', zero_division=0)
recall = recall_score(labels, prediction,
average='weighted', zero_division=0)
if loss:
return [accuracy, f1, precision, recall, loss]
else:
return [accuracy, f1, precision, recall]
def get_predictions(model, loader):
"""This function takes a model and a data loader,
returns the list of losses, the predictions and the labels"""
with torch.no_grad():
model.eval()
losses = []
predictions = []
labels = []
for batch in loader:
loss, out = model.feed_to_network(batch)
predictions += torch.max(out, dim=1)[1].tolist()
labels += batch[1].tolist()
losses.append(loss.item())
return labels, predictions, sum(losses)/len(losses)
#=================================================================
def fit(epochs, model, train_loader, val_loader,
opt_func=torch.optim.Adam, lr=3e-4, step_size=100):
def get_parameter(optimizer, parameter="lr"):
"""Retrieve learning rate or parameter"""
if parameter == 'lr':
for param_group in optimizer.param_groups:
return param_group['lr']
torch.cuda.empty_cache()
model.train()
#Dataframe that will store the metrics
train_metrics_df = pd.DataFrame(columns=['accuracy', 'f1', 'precision',
'recall', 'loss'])
valid_metrics_df = pd.DataFrame(columns=['accuracy', 'f1', 'precision',
'recall', 'loss'])
momentum_list = []
lr_list = []
optimizer = opt_func([{"params": model.network.fc.parameters(), "lr": lr},
{"params": model.network.layer4.parameters(), "lr": lr/2},
{"params": model.network.layer3.parameters(), "lr": lr/4},
{"params": model.network.layer2.parameters(), "lr": lr/6},
{"params": model.network.layer1.parameters(), "lr": lr/8},], lr)
for epoch in range(epochs):
# Training Phase
train_label = []
train_prediction = []
train_losses = []
for batch in tqdm(train_loader):
loss, out = model.feed_to_network(batch)
loss.backward()
#momentum_list.append(get_parameter(optimizer, parameter="momentum"))
lr_list.append(get_parameter(optimizer, parameter="lr"))
optimizer.step()
optimizer.zero_grad()
#Extract labels, predictions and loss of the training set
train_prediction += torch.max(out, dim=1)[1].tolist()
train_label += batch[1].tolist()
train_losses.append(loss.item())
#Evaluation phase
val_labels, val_predictions, val_loss = get_predictions(model, val_loader)
train_metrics_df.loc[epoch] = get_scores(train_label,train_prediction,
loss=sum(train_losses)/len(train_losses))
valid_metrics_df.loc[epoch] = get_scores(val_labels, val_predictions,
loss=val_loss)
print_epoch_trainLoss = train_metrics_df.iloc[epoch]["loss"]
print_epoch_validLoss = valid_metrics_df.iloc[epoch]["loss"]
print_epoch_validAccu = valid_metrics_df.iloc[epoch]["accuracy"]
print_epoch_trainAccu = train_metrics_df.iloc[epoch]["accuracy"]
print(f"Epoch: {epoch+1}, train loss: {print_epoch_trainLoss:.2f}, "
f"validation loss: {print_epoch_validLoss:.2f}, "
f"validation accuracy: {print_epoch_validAccu:.2f}, "
f"training accuracy: {print_epoch_trainAccu:.2f}, ")
return train_metrics_df, valid_metrics_df, (momentum_list,lr_list)
All the images are normalized, cropped to proper dimensions (490x490) and some data augmentation is performed (random flip, rotations, etc...). All this code is executed on a GPU using Kaggle notebooks (my GPU is not enough for this dataset). This is my first implementation of a CNN and I do not know what I did wrong. I also tried to learn the classifier with a learning rate = 0.1, but the loss does not decrease.