def train_and_test(e):
epochs = e
train_losses, test_losses, val_acc, train_acc= [], [], [], []
valid_loss_min = np.Inf
model.train()
print("Model Training started.....")
for epoch in range(epochs):
running_loss = 0
batch = 0
for images, labels in trainloader:
images, labels = images.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
batch += 1
if batch % 10 == 0:
print(f" epoch {epoch + 1} batch {batch} completed")
test_loss = 0
accuracy = 0
with torch.no_grad():
print(f"validation started for {epoch + 1}")
model.eval()
for images, labels in validloader:
images, labels = images.to(device), labels.to(device)
logps = model(images)
test_loss += criterion(logps, labels)
ps = torch.exp(logps)
top_p, top_class = ps.topk(1, dim=1)
equals = top_class == labels.view(*top_class.shape)
accuracy += torch.mean(equals.type(torch.FloatTensor))
train_losses.append(running_loss / len(trainloader))
test_losses.append(test_loss / len(validloader))
val_acc.append(accuracy / len(validloader))
training_acc.append(running_loss / len(trainloader))
scheduler.step()
print("Epoch: {}/{}.. ".format(epoch + 1, epochs),"Training Loss: {:.3f}.. ".format(train_losses[-1]), "Valid Loss: {:.3f}.. ".format(test_losses[-1]),
"Valid Accuracy: {:.3f}".format(accuracy / len(validloader)), "train Accuracy: {:.3f}".format(running_loss / len(trainloader)))
model.train()
if test_loss / len(validloader) <= valid_loss_min:
print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(valid_loss_min, test_loss / len(validloader)))
torch.save({
'epoch': epoch,
'model': model,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': valid_loss_min
}, path)
valid_loss_min = test_loss / len(validloader)
print('Training Completed Succesfully !')
return train_losses, test_losses, val_acc ,train_acc
my output is
Model Training started.....
epoch 1 batch 10 completed
epoch 1 batch 20 completed
epoch 1 batch 30 completed
epoch 1 batch 40 completed
validation started for 1
Epoch: 1/2.. Training Loss: 0.088.. Valid Loss: 0.072.. Valid Accuracy: 0.979 train Accuracy: 0.088
Validation loss decreased (inf --> 0.072044). Saving model ...
I am using dataset that is multi-set classification and getting training accuracy and training loss equal so I think there is error in training accuracy code.
training_acc.append(running_loss / len(trainloader))
"train Accuracy: {:.3f}".format(running_loss / len(trainloader))
training_acc.append(accuracy / len(trainloader))
"train Accuracy: {:.3f}".format(accuracy / len(trainloader))
is also not working fine
this method should be followed to plot training loses as well as accuracy
for images , labels in trainloader:
#start = time.time()
images, labels = images.to(device), labels.to(device)
optimizer.zero_grad()# Clear the gradients, do this because gradients are accumulated as 0 in each epoch
# Forward pass - compute outputs on input data using the model
outputs = model(images) # modeling for each image batch
loss = criterion(outputs,labels) # calculating the loss
# the backward pass
loss.backward() # This is where the model learns by backpropagating
optimizer.step() # And optimizes its weights here - Update the parameters
running_loss += loss.item()
# as Output of the network are log-probabilities, need to take exponential for probabilities
ps = torch.exp(outputs)
top_p , top_class = ps.topk(1,dim=1)
equals = top_class == labels.view(*top_class.shape)
# Convert correct_counts to float and then compute the mean
acc += torch.mean(equals.type(torch.FloatTensor))
Related
I'm new to AI and python and I'm trying to run only one batch to aim to overfit.I found the code:
iter(train_loader).next()
but I'm not sure where to implement it in my code. even if I did, how can I check after each iteration to make sure that I'm training the same batch?
train_loader = torch.utils.data.DataLoader(
dataset_train,
batch_size=48,
shuffle=True,
num_workers=2
)
net = nn.Sequential(
nn.Flatten(),
nn.Linear(128*128*3,10)
)
nepochs = 3
statsrec = np.zeros((3,nepochs))
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
for epoch in range(nepochs): # loop over the dataset multiple times
running_loss = 0.0
n = 0
for i, data in enumerate(train_loader, 0):
inputs, labels = data
# Zero the parameter gradients
optimizer.zero_grad()
# Forward, backward, and update parameters
outputs = net(inputs)
loss = loss_fn(outputs, labels)
loss.backward()
optimizer.step()
# accumulate loss
running_loss += loss.item()
n += 1
ltrn = running_loss/n
ltst, atst = stats(train_loader, net)
statsrec[:,epoch] = (ltrn, ltst, atst)
print(f"epoch: {epoch} training loss: {ltrn: .3f} test loss: {ltst: .3f} test accuracy: {atst: .1%}")
please give me a hint
If you are looking to train on a single batch, then remove your loop over your dataloader:
for i, data in enumerate(train_loader, 0):
inputs, labels = data
And simply get the first element of the train_loader iterator before looping over the epochs, otherwise next will be called at every iteration and you will run on a different batch every epoch:
inputs, labels = next(iter(train_loader))
i = 0
for epoch in range(nepochs):
optimizer.zero_grad()
outputs = net(inputs)
loss = loss_fn(outputs, labels)
loss.backward()
optimizer.step()
# ...
How can I calculate the AUC for each epoch and plot ROC curves? Is there a way I can save the neural network with the highest AUC, rather than the one with the highest val accuracy?
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
scheduler.step()
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, torch.argmax(labels,1))
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == torch.argmax(labels.data,1))
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects.double() / dataset_sizes[phase]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
return model
I am trying to evaluate my model on the whole training set after each epoch.
This is what I did:
torch.manual_seed(1)
model = ConvNet(num_classes=num_classes)
cost_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
def compute_accuracy(model, data_loader):
correct_pred, num_examples = 0, 0
for features, targets in data_loader:
logits = model(features)
predicted_labels = torch.argmax(logits, 1)
num_examples += targets.size(0)
correct_pred += (predicted_labels == targets).sum()
return correct_pred.float()/num_examples * 100
for epoch in range(num_epochs):
model = model.train()
for features, targets in train_loader:
logits = model(features)
cost = cost_fn(logits, targets)
optimizer.zero_grad()
cost.backward()
optimizer.step()
model = model.eval()
print('Epoch: %03d/%03d training accuracy: %.2f%%' % (
epoch+1, num_epochs,
compute_accuracy(model, train_loader)))
the output was convincing:
Epoch: 001/005 training accuracy: 89.08%
Epoch: 002/005 training accuracy: 90.41%
Epoch: 003/005 training accuracy: 91.70%
Epoch: 004/005 training accuracy: 92.31%
Epoch: 005/005 training accuracy: 92.95%
But then I added another line at the end of the training loop, to also evaluate the model on the whole test set after each epoch:
for epoch in range(num_epochs):
model = model.train()
for features, targets in train_loader:
logits = model(features)
cost = cost_fn(logits, targets)
optimizer.zero_grad()
cost.backward()
optimizer.step()
model = model.eval()
print('Epoch: %03d/%03d training accuracy: %.2f%%' % (
epoch+1, num_epochs,
compute_accuracy(model, train_loader)))
print('\t\t testing accuracy: %.2f%%' % (compute_accuracy(model, test_loader)))
But the training accuracies started to change:
Epoch: 001/005 training accuracy: 89.08%
testing accuracy: 87.66%
Epoch: 002/005 training accuracy: 90.42%
testing accuracy: 89.04%
Epoch: 003/005 training accuracy: 91.84%
testing accuracy: 90.01%
Epoch: 004/005 training accuracy: 91.86%
testing accuracy: 89.83%
Epoch: 005/005 training accuracy: 92.45%
testing accuracy: 90.32%
Am I doing something wrong? I expected the training accuracies to remain the same because the manual seed is 1 in both cases.
Is this an expected output ?
The random seed had been set wasn't stop the model for learning to get higher accuracy becuase the random seed is a number for Pseudo random. In this case, you had told the model to shuffle the training data with a random number("1").
I am training a FFNN for classification and wonder why my validation loss always seems to be to low compared to training loss, as accuracy is also worse for validation than training.
I found some similar questions leading to the point that it's per se possible to have better performance in validation data set than in the training set, but not in the scenario of lower loss AND lower accuracy.
here's the code i use for training my pytorch NN including the loss calculation:
optimizer = optim.Adam(model_pyt.parameters(), lr=learning_rate, betas=(0.9,0.999))
criterion = nn.CrossEntropyLoss()
for epoch in range(epochs):
start_time = time.time()
train_running_loss = 0
train_acc = 0
with torch.set_grad_enabled(True):
for i, data_pack in enumerate(training_generator):
x_data, y_data = data_pack
optimizer.zero_grad()
outputs = model_pyt(x_data)
loss = criterion(outputs, y_data)
loss.backward()
optimizer.step()
train_running_loss += loss.detach().item()
train_acc += get_accuracy(outputs, y_data, batch_size)
test_labels = torch.tensor(labels_test).long()
test_inputs = torch.tensor(np.array(data_bal_test)).float()
test_outputs = model_pyt(test_inputs)
test_loss = criterion(test_outputs, test_labels).detach().item()
test_acc = get_accuracy(test_outputs, test_labels, len(test_labels))
print('Epoch: %d | Loss: %.4f | Acc %.4f | Test-Loss: %.4f | Test-Acc %.4f | Time Elapsed: %s'
%(epoch+1, train_running_loss/(i+1), train_acc/(i+1), loss, test_acc, time_since(start_time)))
print('=====================================================================================================')
I'm training a LSTM model for time series prediction and at each epoch my accuracy restarts from 0 as if I'm training for the first time.
I attach below the training method snippet:
def train(model, loader, epoch, mini_batch_size, sequence_size):
model.train()
correct = 0
padded_size = 0
size_input = mini_batch_size * sequence_size
for batch_idx, (inputs, labels, agreement_score) in enumerate(loader):
if(inputs.size(0) == size_input):
inputs = inputs.clone().reshape(mini_batch_size, sequence_size, inputs.size(1))
labels = labels.clone().squeeze().reshape(mini_batch_size*sequence_size)
agreement_score = agreement_score.clone().squeeze().reshape(mini_batch_size*sequence_size)
else:
padded_size = size_input - inputs.size(0)
(inputs, labels, agreement_score) = padd_incomplete_sequences(inputs, labels, agreement_score, mini_batch_size, sequence_size)
inputs, labels, agreement_score = Variable(inputs.cuda()), Variable(labels.cuda()), Variable(agreement_score.cuda())
output = model(inputs)
loss = criterion(output, labels)
loss = loss * agreement_score
loss = loss.mean()
optimizer.zero_grad()
loss.backward()
optimizer.step()
pred = output.data.max(1, keepdim = True)[1]
correct += pred.eq(labels.data.view_as(pred)).cuda().sum()
accuracy = 100. * correct / (len(loader.dataset) + padded_size)
print("Train: Epoch: {}, [{}/{} ({:.0f}%)]\t loss: {:.6f}, Accuracy: {}/{} ({:.0f}%)".format(
epoch,
batch_idx * len(output),
(len(loader.dataset) + padded_size),
100. * batch_idx / (len(loader.dataset)+padded_size),
loss.item(),
correct,
(len(loader.dataset) + padded_size),
accuracy))
accuracy = 100. * correct / (len(loader.dataset) + padded_size)
train_accuracy.append(accuracy)
train_epochs.append(epoch)
train_loss.append(loss.item())
According to that my loop looks like:
for epoch in range(1, 10):
train(audio_lstm_model, train_rnn_audio_loader, epoch, MINI_BATCH_SIZE, SEQUENCE_SIZE_AUDIO)
evaluation(audio_lstm_model,validation_rnn_audio_loader, epoch, MINI_BATCH_SIZE, SEQUENCE_SIZE_AUDIO)
Consequently, my accuracy and loss restarts at every epoch:
Train: Epoch: 1, [0/1039079 (0%)] loss: 0.921637, Accuracy: 0/1039079 (0%)
...
Train: Epoch: 1, [10368/1039079 (0%)] loss: 0.523242, Accuracy: 206010/1039079 (19%)
Test set: loss: 151.4845, Accuracy: 88222/523315 (16%)
Train: Epoch: 2, [0/1039079 (0%)] loss: 0.921497, Accuracy: 0/1039079 (0%)
If anyone has any clue about it, your help is welcomed!
Have a nice day!
The problem turn out to be the fact that the sequence size was too small for the network in order to be able to make some predictions from it.
So after increasing the sequence length by some orders of magnitude, I was able to improve my model after each epoch.