I am training a FFNN for classification and wonder why my validation loss always seems to be to low compared to training loss, as accuracy is also worse for validation than training.
I found some similar questions leading to the point that it's per se possible to have better performance in validation data set than in the training set, but not in the scenario of lower loss AND lower accuracy.
here's the code i use for training my pytorch NN including the loss calculation:
optimizer = optim.Adam(model_pyt.parameters(), lr=learning_rate, betas=(0.9,0.999))
criterion = nn.CrossEntropyLoss()
for epoch in range(epochs):
start_time = time.time()
train_running_loss = 0
train_acc = 0
with torch.set_grad_enabled(True):
for i, data_pack in enumerate(training_generator):
x_data, y_data = data_pack
optimizer.zero_grad()
outputs = model_pyt(x_data)
loss = criterion(outputs, y_data)
loss.backward()
optimizer.step()
train_running_loss += loss.detach().item()
train_acc += get_accuracy(outputs, y_data, batch_size)
test_labels = torch.tensor(labels_test).long()
test_inputs = torch.tensor(np.array(data_bal_test)).float()
test_outputs = model_pyt(test_inputs)
test_loss = criterion(test_outputs, test_labels).detach().item()
test_acc = get_accuracy(test_outputs, test_labels, len(test_labels))
print('Epoch: %d | Loss: %.4f | Acc %.4f | Test-Loss: %.4f | Test-Acc %.4f | Time Elapsed: %s'
%(epoch+1, train_running_loss/(i+1), train_acc/(i+1), loss, test_acc, time_since(start_time)))
print('=====================================================================================================')
Related
I am currently working on the MNIST dataset. My model has overfit the training data and I want to reduce the overfitting by using weight_decay. I am currently using 0.1 as the value for weight_decay which is giving me bad results as my validation loss and training loss are not decreasing. However, I want to experiment with different values for weight_decay. So that i can plot the different amounts of weight_decay on the x-axis and the performance of validation set on the y-axis. How do i do that? store the values in a list and use a for loop to iterate through? Below is the code that i have tried until now.
class NN(nn.Module):
def __init__(self):
super().__init__()
self.layers = nn.Sequential(
nn.Flatten(),
nn.Linear(784,4096),
nn.ReLU(),
nn.Linear(4096,2048),
nn.ReLU(),
nn.Linear(2048,1024),
nn.ReLU(),
nn.Linear(1024,512),
nn.ReLU(),
nn.Linear(512,256),
nn.ReLU(),
nn.Linear(256,128),
nn.ReLU(),
nn.Linear(128,64),
nn.ReLU(),
nn.Linear(64,32),
nn.ReLU(),
nn.Linear(32,16),
nn.ReLU(),
nn.Linear(16,10))
def forward(self,x):
return self.layers(x)
def accuracy_and_loss(model, loss_function, dataloader):
total_correct = 0
total_loss = 0
total_examples = 0
n_batches = 0
with torch.no_grad():
for data in testloader:
images, labels = data
outputs = model(images)
batch_loss = loss_function(outputs,labels)
n_batches += 1
total_loss += batch_loss.item()
_, predicted = torch.max(outputs, dim=1)
total_examples += labels.size(0)
total_correct += (predicted == labels).sum().item()
accuracy = total_correct / total_examples
mean_loss = total_loss / n_batches
return (accuracy, mean_loss)
def define_and_train(model,dataset_training, dataset_test):
trainloader = torch.utils.data.DataLoader( small_trainset, batch_size=500, shuffle=True)
testloader = torch.utils.data.DataLoader( dataset_test, batch_size=500, shuffle=True)
values = [1e-8,1e-7,1e-6,1e-5]
model = NN()
for params in values:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay = params)
train_acc = []
val_acc = []
train_loss = []
val_loss = []
for epoch in range(100):
total_loss = 0
total_correct = 0
total_examples = 0
n_mini_batches = 0
for i,mini_batch in enumerate(trainloader,0):
images,labels = mini_batch
optimizer.zero_grad()
outputs = model(images)
loss = loss_function(outputs,labels)
loss.backward()
optimizer.step()
n_mini_batches += 1
total_loss += loss.item()
_, predicted = torch.max(outputs, dim=1)
total_examples += labels.size(0)
total_correct += (predicted == labels).sum().item()
epoch_training_accuracy = total_correct / total_examples
epoch_training_loss = total_loss / n_mini_batches
epoch_val_accuracy, epoch_val_loss = accuracy_and_loss( model, loss_function, testloader )
print('Params %f Epoch %d loss: %.3f acc: %.3f val_loss: %.3f val_acc: %.3f'
%(params, epoch+1, epoch_training_loss, epoch_training_accuracy, epoch_val_loss, epoch_val_accuracy))
train_loss.append( epoch_training_loss )
train_acc.append( epoch_training_accuracy )
val_loss.append( epoch_val_loss )
val_acc.append( epoch_val_accuracy )
history = { 'train_loss': train_loss,
'train_acc': train_acc,
'val_loss': val_loss,
'val_acc': val_acc }
return ( history, model )
This is the plot that I am getting. Where am I going wrong?
I cannot know any information. (Such as loss function, dataset size, dataset content (training and validation), results of 100 or 200 epochs, your scope of the question)
However, the overfitted model may classify the validation dataset. Because the MNIST dataset is not that hard with deep learning (compared to other image classifications).
How about adding white noise to the validation dataset? You may get a large loss on validation.
Or if you want to use your validation dataset, train the model for more at least 1000 epochs. But, as I said above, the overfitted model may classify the validation dataset.
def train_and_test(e):
epochs = e
train_losses, test_losses, val_acc, train_acc= [], [], [], []
valid_loss_min = np.Inf
model.train()
print("Model Training started.....")
for epoch in range(epochs):
running_loss = 0
batch = 0
for images, labels in trainloader:
images, labels = images.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
batch += 1
if batch % 10 == 0:
print(f" epoch {epoch + 1} batch {batch} completed")
test_loss = 0
accuracy = 0
with torch.no_grad():
print(f"validation started for {epoch + 1}")
model.eval()
for images, labels in validloader:
images, labels = images.to(device), labels.to(device)
logps = model(images)
test_loss += criterion(logps, labels)
ps = torch.exp(logps)
top_p, top_class = ps.topk(1, dim=1)
equals = top_class == labels.view(*top_class.shape)
accuracy += torch.mean(equals.type(torch.FloatTensor))
train_losses.append(running_loss / len(trainloader))
test_losses.append(test_loss / len(validloader))
val_acc.append(accuracy / len(validloader))
training_acc.append(running_loss / len(trainloader))
scheduler.step()
print("Epoch: {}/{}.. ".format(epoch + 1, epochs),"Training Loss: {:.3f}.. ".format(train_losses[-1]), "Valid Loss: {:.3f}.. ".format(test_losses[-1]),
"Valid Accuracy: {:.3f}".format(accuracy / len(validloader)), "train Accuracy: {:.3f}".format(running_loss / len(trainloader)))
model.train()
if test_loss / len(validloader) <= valid_loss_min:
print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(valid_loss_min, test_loss / len(validloader)))
torch.save({
'epoch': epoch,
'model': model,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': valid_loss_min
}, path)
valid_loss_min = test_loss / len(validloader)
print('Training Completed Succesfully !')
return train_losses, test_losses, val_acc ,train_acc
my output is
Model Training started.....
epoch 1 batch 10 completed
epoch 1 batch 20 completed
epoch 1 batch 30 completed
epoch 1 batch 40 completed
validation started for 1
Epoch: 1/2.. Training Loss: 0.088.. Valid Loss: 0.072.. Valid Accuracy: 0.979 train Accuracy: 0.088
Validation loss decreased (inf --> 0.072044). Saving model ...
I am using dataset that is multi-set classification and getting training accuracy and training loss equal so I think there is error in training accuracy code.
training_acc.append(running_loss / len(trainloader))
"train Accuracy: {:.3f}".format(running_loss / len(trainloader))
training_acc.append(accuracy / len(trainloader))
"train Accuracy: {:.3f}".format(accuracy / len(trainloader))
is also not working fine
this method should be followed to plot training loses as well as accuracy
for images , labels in trainloader:
#start = time.time()
images, labels = images.to(device), labels.to(device)
optimizer.zero_grad()# Clear the gradients, do this because gradients are accumulated as 0 in each epoch
# Forward pass - compute outputs on input data using the model
outputs = model(images) # modeling for each image batch
loss = criterion(outputs,labels) # calculating the loss
# the backward pass
loss.backward() # This is where the model learns by backpropagating
optimizer.step() # And optimizes its weights here - Update the parameters
running_loss += loss.item()
# as Output of the network are log-probabilities, need to take exponential for probabilities
ps = torch.exp(outputs)
top_p , top_class = ps.topk(1,dim=1)
equals = top_class == labels.view(*top_class.shape)
# Convert correct_counts to float and then compute the mean
acc += torch.mean(equals.type(torch.FloatTensor))
I'm new to AI and python and I'm trying to run only one batch to aim to overfit.I found the code:
iter(train_loader).next()
but I'm not sure where to implement it in my code. even if I did, how can I check after each iteration to make sure that I'm training the same batch?
train_loader = torch.utils.data.DataLoader(
dataset_train,
batch_size=48,
shuffle=True,
num_workers=2
)
net = nn.Sequential(
nn.Flatten(),
nn.Linear(128*128*3,10)
)
nepochs = 3
statsrec = np.zeros((3,nepochs))
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
for epoch in range(nepochs): # loop over the dataset multiple times
running_loss = 0.0
n = 0
for i, data in enumerate(train_loader, 0):
inputs, labels = data
# Zero the parameter gradients
optimizer.zero_grad()
# Forward, backward, and update parameters
outputs = net(inputs)
loss = loss_fn(outputs, labels)
loss.backward()
optimizer.step()
# accumulate loss
running_loss += loss.item()
n += 1
ltrn = running_loss/n
ltst, atst = stats(train_loader, net)
statsrec[:,epoch] = (ltrn, ltst, atst)
print(f"epoch: {epoch} training loss: {ltrn: .3f} test loss: {ltst: .3f} test accuracy: {atst: .1%}")
please give me a hint
If you are looking to train on a single batch, then remove your loop over your dataloader:
for i, data in enumerate(train_loader, 0):
inputs, labels = data
And simply get the first element of the train_loader iterator before looping over the epochs, otherwise next will be called at every iteration and you will run on a different batch every epoch:
inputs, labels = next(iter(train_loader))
i = 0
for epoch in range(nepochs):
optimizer.zero_grad()
outputs = net(inputs)
loss = loss_fn(outputs, labels)
loss.backward()
optimizer.step()
# ...
How can I calculate the AUC for each epoch and plot ROC curves? Is there a way I can save the neural network with the highest AUC, rather than the one with the highest val accuracy?
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
scheduler.step()
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, torch.argmax(labels,1))
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == torch.argmax(labels.data,1))
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects.double() / dataset_sizes[phase]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
return model
I am trying linear regression from boston dataset. MSE loss function is nan since the first iteration. I tried altering learning rate and batch_size but of no use.
from torch.utils.data import TensorDataset , DataLoader
inputs = torch.from_numpy(Features).to(torch.float32)
targets = torch.from_numpy(target).to(torch.float32)
train_ds = TensorDataset(inputs , targets)
train_dl = DataLoader(train_ds , batch_size = 5 , shuffle = True)
model = nn.Linear(13,1)
opt = optim.SGD(model.parameters(), lr=1e-5)
loss_fn = F.mse_loss
def fit(num_epochs, model, loss_fn, opt, train_dl):
# Repeat for given number of epochs
for epoch in range(num_epochs):
# Train with batches of data
for xb,yb in train_dl:
# 1. Generate predictions
pred = model(xb)
# 2. Calculate loss
loss = loss_fn(pred, yb)
# 3. Compute gradients
loss.backward()
# 4. Update parameters using gradients
opt.step()
# 5. Reset the gradients to zero
opt.zero_grad()
# Print the progress
if (epoch+1) % 10 == 0:
print('Epoch [{}/{}], Loss: {}'.format(epoch+1, num_epochs, loss.item()))
fit(100, model, loss_fn , opt , train_dl)
output
Pay attention to:
Use normalization: x = (x - x.mean()) / x.std()
y_train / y_test have to be (-1, 1) shapes. Use y_train.view(-1, 1) (if y_train is torch.Tensor or something)
(not your case, but for someone else) If you use torch.nn.MSELoss(reduction='sum') than you have to reduse the sum to mean. It can be done with torch.nn.MSELoss() or in train-loop: l = loss(y_pred, y) / y.shape[0].
Example:
...
loss = torch.nn.MSELoss()
...
for epoch in range(num_epochs):
for x, y in train_iter:
y_pred = model(x)
l = loss(y_pred, y)
optimizer.zero_grad()
l.backward()
optimizer.step()
print("epoch {} loss: {:.4f}".format(epoch + 1, l.item()))