make pytorch faster on colab using GPU

make pytorch faster on colab using GPU - python

I use colab(with gpu) to run my code but it took along time approximately 12 hours per epoch in another hand, when I used keras it took 1 hour per epoch .
I want to run the code in PyTorch to finetune it . so, how to make pytorch faster?
# function to train the model
def train():
model.train()
total_loss, total_accuracy = 0, 0
# empty list to save model predictions
total_preds=[]
Labels=[]
# iterate over batches
for step,batch in enumerate(train_dataloader):
# progress update after every 50 batches.
if step % 10 == 0 and not step == 0:
print(' Batch {:>5,} of {:>5,}.'.format(step, len(train_dataloader)))
# push the batch to gpu
#batch = [r for r in batch]
sent_id, mask, labels = batch['input_ids'],batch['attention_mask'],batch['labels']
# clear previously calculated gradients
model.zero_grad()
#print(7)
# get model predictions for the current batch
preds = model(sent_id, mask, labels)
preds =torch.argmax(preds, dim=1)
preds=preds.detach().numpy()
labels = labels.detach().numpy()
alpha=0.25
gamma=2
ce_loss = dice_loss(preds, labels)
total_loss = total_loss + ce_loss
# clip the the gradients to 1.0. It helps in preventing the exploding gradient problem
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
# update parameters
optimizer.step()
total_preds.append(preds)
total_accuracy += (preds == labels).sum()
# compute the training loss of the epoch
avg_loss = total_loss / len(traindataset)
avg_accuracy = total_accuracy / len(traindataset)
#returns the loss and predictions
return avg_loss, total_preds, avg_accuracy

Related

Validation loss being lower than the training loss and it does not decrease in pytorch

I was trying to train a image to image translation model using transunet. I split my data by 70%, 15%, 15% for training, validation and testing. But when I monitor the loss curve, I find that the validation loss is much lower than the training loss.
loss curve:
The code is here:
criterion = nn.L1Loss()
net = net.cuda()
net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
optimizer = torch.optim.Adam(net.parameters(), lr=lr, betas=(0.9, 0.999))
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3)
for epoch in range(1, total_epoch + 1):
print('---------- Epoch:'+str(epoch)+ ' ----------')
# data_loader_iter = iter(data_loader)
data_loader_iter = data_loader
train_epoch_loss = 0.
print('Train:')
for img, mask in tqdm(data_loader_iter,ncols=20,total=len(data_loader_iter)):
net.train()
img, mask = img.to(device), mask.to(device)
optimizer.zero_grad()
pred = net(img)
train_loss = criterion(pred, mask)
train_epoch_loss += train_loss
train_loss.backward()
optimizer.step()
train_epoch_loss /= len(data_loader_iter)
val_data_loader_num = val_data_loader
test_data_loader_num = test_data_loader
val_epoch_loss = 0
test_epoch_loss = 0
#Validation
print('Validation:')
with torch.no_grad():
for val_img, val_mask in tqdm(val_data_loader_num,ncols=20,total=len(val_data_loader_num)):
val_img, val_mask = val_img.to(device), val_mask.to(device)
net.eval()
predict = net(val_img)
val_loss = criterion(predict, val_mask)
val_epoch_loss += val_loss
val_epoch_loss = val_epoch_loss / len(val_data_loader_num)
Another problem is that, when I was testing the model, one class could not be predicted properly, that class is always clustered at the edge of the image， see the green class in the figure below:
while the ground truth looks like this:
I know there seems to be to many problems, but anyone has faced similar problems? Thanks in advance!

labels.data: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first

I created a function that trains a model in pyTorch to classify pictures into placeholder images and product images. Now i am trying to obtain the f1_score and added these lines to the Code:
# !!!THIS LINE SHOULD OBTAIN F1_SCORE!!!!
f1score = f1_score(labels.data, preds)
After adding this, i get the error
can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.
Here you can see the complete function and it should be easy to find the referred line, as i highlighted it in Capslock:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
since = time.time()
print("model is : ",model)
val_acc_history = []
val_loss_history = []
train_acc_history = []
train_loss_history = []
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)
# zero the parameter gradients (This can be changed to the Adam and other optimizers)
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
# Get model outputs and calculate loss
# Special case for inception because in training it has an auxiliary output. In train
# mode we calculate the loss by summing the final output and the auxiliary output
# but in testing we only consider the final output.
if is_inception and phase == 'train':
# From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
outputs, aux_outputs = model(inputs)
loss1 = criterion(outputs, labels)
loss2 = criterion(aux_outputs, labels)
loss = loss1 + 0.4*loss2
else:
outputs = model(inputs)
loss = criterion(outputs, labels)
_, preds = torch.max(outputs, 1)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
# !!!THIS LINE SHOULD OBTAIN F1_SCORE!!!!
f1score = f1_score(labels.data, preds)
epoch_loss = running_loss / len(dataloaders[phase].dataset)
epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
if phase == 'val':
val_acc_history.append(epoch_acc)
val_loss_history.append(epoch_loss)
if phase == 'train':
train_acc_history.append(epoch_acc)
train_loss_history.append(epoch_loss)
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
return model, val_acc_history, train_acc_history,val_loss_history,train_loss_history
I already tried this, but this is also not working:
# !!!THIS LINE SHOULD OBTAIN F1_SCORE!!!!
f1score = f1_score(labels.cpu().data, preds)

I got the error by myself, my first try to solve it was nearly right, but I had to add .cpu() to preds as well:
# !!!THIS LINE SHOULD OBTAIN F1_SCORE!!!!
f1score = f1_score(labels.cpu().data, preds.cpu())

I am trying to train my network on iris data but having the following error. Although I changed the type as well, but it still pops up the same error

'num_epochs = 500
train_loss = []
test_loss = []
train_accuracy = []
test_accuracy = []
for epoch in range(num_epochs):
train_correct = 0
train_total = 0
for i, (items, classes) in enumerate(train_loader):
items = Variable(items)
classes = Variable(classes)
net. train() # Put the network into training mode
optimizer.zero_grad() # Clear off the gradients from any past operation
outputs = net(items) # Do the forward pass
loss = criterion(outputs, classes) # Calculate the loss
loss.backward() # Calculate the gradients with help of back propagation
optimizer.step() # Ask the optimizer to adjust the parameters based on the gradients
train_total += classes.size(0)
_, predicted = torch.max(outputs.data, 1)
train_correct += (predicted == classes.data).sum()
print ('Epoch %d/%d, Iteration %d/%d, Loss: %.4f'
%(epoch+1, num_epochs, i+1, len(train_ds)//batch_size, loss.data[0]))
net.eval() # Put the network into evaluation mode
train_loss.append(loss.data[0])
train_accuracy.append((100 * train_correct / train_total))
test_items = torch.FloatTensor(test_ds.data.values[:, 0:4])
test_classes = torch.LongTensor(test_ds.data.values[:, 4])
outputs = net(Variable(test_items))
loss = criterion(outputs, Variable(test_classes))
test_loss.append(loss.data[0])
_, predicted = torch.max(outputs.data, 1)
total = test_classes.size(0)
correct = (predicted == test_classes).sum()
test_accuracy.append((100 * correct / total))`'
And the error I get is
RuntimeError
Traceback (most recent call last)
<ipython-input-21-d25f2c091906> in <module>
21 optimizer.zero_grad() # Clear off the gradients from any past operation
22 outputs = net(items) # Do the forward pass
---> 23 loss = criterion(outputs, classes) # Calculate the loss
24 loss.backward() # Calculate the gradients with help of back propagation
25 optimizer.step() # Ask the optimizer to adjust the parameters based on the gradients
RuntimeError: expected scalar type Long but found Int

I seems it is this line loss = criterion(outputs, classes) generating the error.
Change classes = Variable(classes) to classes = Variable(classes).long() to convert classes to type Long has the criterion method expects.

How to evaluate a single image in PyTorch model?

I used this code to train a model:
def train(model, epochs):
for epoch in range(epochs):
for idx, batch in enumerate(train_loader):
x, bndbox = batch # unpack batch
pred_bndbox = model(x)# forward pass
#print('label:', bndbox, 'prediction:', pred_bndbox)
loss = criterion(pred_bndbox, bndbox) # compute loss for this batch
optimiser.zero_grad()# zero gradients of optimiser
loss.backward() # backward pass (find rate of change of loss with respect to model parameters)
optimiser.step()# take optimisation step
print('Epoch:', epoch, 'Batch:', idx, 'Loss:', loss.item())
writer.add_scalar('DETECTION Loss/Train', loss, epoch*len(train_loader) + idx) # write loss to a graph
train(cnn, epochs)
torch.save(cnn.state_dict(), str(time.time()))# save model
def visualise(model, n):
model.eval()
for idx, batch in enumerate(test_loader):
x, y = batch
pred_bndbox = model(x)
S40dataset.show(batch, pred_bndbox=pred_bndbox)
if idx == n:
break
How do I evaluate the model prediction on a single image to check the operation of the neural network?

You can use:
model.eval() # turn the model to evaluate mode
with torch.no_grad(): # does not calculate gradient
class_index = model(single_image).argmax() #gets the prediction for the image's class
This code will save the network's prediction as the index of the class in the class_index variable. You have to save the image you would like to examine in the single_image variable in the right shape.
Hope that helps.

Matrix Factorization with PyTorch using GPU

The below code does run, but it's very slow as it's using a for loops. At my university, servers with GPU resources are available. Likewise, I'd like to understand how to use batches to train the model more effectively.
import torch
import torch.nn as nn
import torch.nn.functional as F
class MatrixFactorization(torch.nn.Module):
def __init__(self, n_items=len(movie_ids), n_factors=300):
super().__init__()
self.vectors = nn.Embedding(n_items, n_factors,sparse=True)
def forward(self, i,j):
feat_i = self.vectors(i)
feat_j = self.vectors(j)
result = (feat_i * feat_j).sum(-1)
return result
model = MatrixFactorization(n_items= len(movie_ids),n_factors=300)
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
epochs = 100
for epoch in range(epochs):
loss = 0
for r,c in zip(r_index, c_index):
i = torch.LongTensor([int(r)])
j = torch.LongTensor([int(c)])
rating = torch.FloatTensor([Xij[i, j]])
# predict
prediction = model(i, j)
loss += loss_fn(prediction, rating)
# Reset the gradients to 0
optimizer.zero_grad()
# backpropagate
loss.backward()
# update weights
optimizer.step()
print(loss)
I've tried the below alteration but it produced a warning. I'm not sure why my target sizes are mismatched, but that appears to be the cause of the issue.
epochs = 50
for epoch in range(epochs):
loss = 0
# predict
i = torch.LongTensor(r_index)
j = torch.LongTensor(c_index)
ratings = Xij[i, j]
prediction = model(i, j)
loss += loss_fn(prediction, rating)
# Reset the gradients to 0
optimizer.zero_grad()
# backpropagate
loss.backward()
# update weights
optimizer.step()
print(loss)
And the warning (not sure where I went wrong):
/anaconda3/lib/python3.6/site-packages/torch/nn/modules/loss.py:431: UserWarning: Using a target size (torch.Size([1])) that is different to the input size (torch.Size([5931640])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.
return F.mse_loss(input, target, reduction=self.reduction)

There is a typo in your second code snippet,
loss += loss_fn(prediction, ratings) # instead of rating

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

make pytorch faster on colab using GPU - python

Related

Validation loss being lower than the training loss and it does not decrease in pytorch

labels.data: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first

I am trying to train my network on iris data but having the following error. Although I changed the type as well, but it still pops up the same error

How to evaluate a single image in PyTorch model?

Matrix Factorization with PyTorch using GPU

Categories

Resources