How to run one batch in pytorch? - python

I'm new to AI and python and I'm trying to run only one batch to aim to overfit.I found the code:
iter(train_loader).next()
but I'm not sure where to implement it in my code. even if I did, how can I check after each iteration to make sure that I'm training the same batch?
train_loader = torch.utils.data.DataLoader(
dataset_train,
batch_size=48,
shuffle=True,
num_workers=2
)
net = nn.Sequential(
nn.Flatten(),
nn.Linear(128*128*3,10)
)
nepochs = 3
statsrec = np.zeros((3,nepochs))
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
for epoch in range(nepochs): # loop over the dataset multiple times
running_loss = 0.0
n = 0
for i, data in enumerate(train_loader, 0):
inputs, labels = data
# Zero the parameter gradients
optimizer.zero_grad()
# Forward, backward, and update parameters
outputs = net(inputs)
loss = loss_fn(outputs, labels)
loss.backward()
optimizer.step()
# accumulate loss
running_loss += loss.item()
n += 1
ltrn = running_loss/n
ltst, atst = stats(train_loader, net)
statsrec[:,epoch] = (ltrn, ltst, atst)
print(f"epoch: {epoch} training loss: {ltrn: .3f} test loss: {ltst: .3f} test accuracy: {atst: .1%}")
please give me a hint

If you are looking to train on a single batch, then remove your loop over your dataloader:
for i, data in enumerate(train_loader, 0):
inputs, labels = data
And simply get the first element of the train_loader iterator before looping over the epochs, otherwise next will be called at every iteration and you will run on a different batch every epoch:
inputs, labels = next(iter(train_loader))
i = 0
for epoch in range(nepochs):
optimizer.zero_grad()
outputs = net(inputs)
loss = loss_fn(outputs, labels)
loss.backward()
optimizer.step()
# ...

Related

Why the Validation Loss is too high?

I tried to write a code which is about the brand detection. But while training the model, there are high losses in every epoch. I tried to normalize the dataset, however nothings changed. Am I doing something wrong?
My code is as below:
train_link = "C:/Users\proin\OneDrive\Masaüstü\Data_2/train"
test_link = "C:/Users\proin\OneDrive\Masaüstü\Data_2/test"
val_link = "C:/Users\proin\OneDrive\Masaüstü\Data_2/validation"
transforming_train = transforms.Compose([transforms.Resize((300, 300)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
transforming_test = transforms.Compose([transforms.Resize((300, 300)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
I did also try to plug mean and std values in normalize function but nothings changed.
Let me continue:
trainset = torchvision.datasets.ImageFolder(train_link, transform = transforming_train)
testset = torchvision.datasets.ImageFolder(test_link, transform = transforming_test)
valset = torchvision.datasets.ImageFolder(val_link, transform = transforming_test)
batch_size = 1
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
shuffle=True)
valloader = torch.utils.data.DataLoader(valset, batch_size=batch_size,
shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=1,
  shuffle=False)
Because ImageFolder has no argument of normalize, I couldn't plug it in here.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
And I'm using the CUDA as well.
def train_log_loss_network(model, train_loader, val_loader=None, epochs=50, device="cpu"):
loss_fn = nn.CrossEntropyLoss() #CrossEntropy is another name for the Logistic Regression loss function. Like before, we phrase learning as minimize a loss function. This is the loss we are going to minimize!
#We need an optimizer! Adam is a good default one that works "well enough" for most problems
#To tell Adam what to optimize, we give it the model's parameters - because thats what the learning will adjust
# optimizer = torch.optim.Adam(model.parameters())
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
#Devices can be spcified by a string, or a special torch object
#If it is a string, lets get the correct device
if device.__class__ == str:
device = torch.device(device)
model.to(device)#Place the model on the correct compute resource
for epoch in range(epochs):
model = model.train()#Put our model in training mode
running_loss = 0.0
for inputs, labels in train_loader: #tqdm(train_loader):
#Move the batch to the device we are using.
inputs, labels = inputs.cuda(), labels.cuda()
inputs = inputs.to(device)
labels = labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
y_pred = model(inputs)
# Compute loss.
loss = loss_fn(y_pred, labels.long())
# Backward pass: compute gradient of the loss with respect to model parameters
loss.backward()
# Calling the step function on an Optimizer makes an update to its parameters
optimizer.step()
running_loss += loss.item() * inputs.size(0)
if val_loader is None:
print("Loss after epoch {} is {}".format(epoch + 1, running_loss))
else:#Lets find out validation performance as we go!
model = model.eval() #Set the model to "evaluation" mode, b/c we don't want to make any updates!
predictions = []
targets = []
for inputs, labels in val_loader:
#Move the batch to the device we are using.
inputs = inputs.to(device)
labels = labels.to(device)
y_pred = model(inputs)
# Get predicted classes
# y_pred will have a shape (Batch_size, C)
#We are asking for which class had the largest response along dimension #1, the C dimension
for pred in torch.argmax(y_pred, dim=1).cpu().numpy():
predictions.append(pred)
for l in labels.cpu().numpy():
targets.append(l)
#print("Network Accuracy: ", )
print("Loss after epoch {} is {}. Accuracy: {}".format(epoch + 1, running_loss, accuracy_score(predictions, targets)))
And lastly,
train_log_loss_network(model, trainloader, val_loader=valloader, epochs=10, device=device)
Even I tried different epoch number and different conv layer, the results are similar.
Loss after epoch 1 is 72568.83042097092. Accuracy: 0.0036231884057971015
Loss after epoch 2 is 72568.78793954849. Accuracy: 0.0036231884057971015
Loss after epoch 3 is 72568.74511051178. Accuracy: 0.0036231884057971015
Loss after epoch 4 is 72568.7018828392. Accuracy: 0.014492753623188406
Loss after epoch 5 is 72568.65722990036. Accuracy: 0.014492753623188406

MNIST overfitting

I am currently working on the MNIST dataset. My model has overfit the training data and I want to reduce the overfitting by using weight_decay. I am currently using 0.1 as the value for weight_decay which is giving me bad results as my validation loss and training loss are not decreasing. However, I want to experiment with different values for weight_decay. So that i can plot the different amounts of weight_decay on the x-axis and the performance of validation set on the y-axis. How do i do that? store the values in a list and use a for loop to iterate through? Below is the code that i have tried until now.
class NN(nn.Module):
def __init__(self):
super().__init__()
self.layers = nn.Sequential(
nn.Flatten(),
nn.Linear(784,4096),
nn.ReLU(),
nn.Linear(4096,2048),
nn.ReLU(),
nn.Linear(2048,1024),
nn.ReLU(),
nn.Linear(1024,512),
nn.ReLU(),
nn.Linear(512,256),
nn.ReLU(),
nn.Linear(256,128),
nn.ReLU(),
nn.Linear(128,64),
nn.ReLU(),
nn.Linear(64,32),
nn.ReLU(),
nn.Linear(32,16),
nn.ReLU(),
nn.Linear(16,10))
def forward(self,x):
return self.layers(x)
def accuracy_and_loss(model, loss_function, dataloader):
total_correct = 0
total_loss = 0
total_examples = 0
n_batches = 0
with torch.no_grad():
for data in testloader:
images, labels = data
outputs = model(images)
batch_loss = loss_function(outputs,labels)
n_batches += 1
total_loss += batch_loss.item()
_, predicted = torch.max(outputs, dim=1)
total_examples += labels.size(0)
total_correct += (predicted == labels).sum().item()
accuracy = total_correct / total_examples
mean_loss = total_loss / n_batches
return (accuracy, mean_loss)
def define_and_train(model,dataset_training, dataset_test):
trainloader = torch.utils.data.DataLoader( small_trainset, batch_size=500, shuffle=True)
testloader = torch.utils.data.DataLoader( dataset_test, batch_size=500, shuffle=True)
values = [1e-8,1e-7,1e-6,1e-5]
model = NN()
for params in values:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay = params)
train_acc = []
val_acc = []
train_loss = []
val_loss = []
for epoch in range(100):
total_loss = 0
total_correct = 0
total_examples = 0
n_mini_batches = 0
for i,mini_batch in enumerate(trainloader,0):
images,labels = mini_batch
optimizer.zero_grad()
outputs = model(images)
loss = loss_function(outputs,labels)
loss.backward()
optimizer.step()
n_mini_batches += 1
total_loss += loss.item()
_, predicted = torch.max(outputs, dim=1)
total_examples += labels.size(0)
total_correct += (predicted == labels).sum().item()
epoch_training_accuracy = total_correct / total_examples
epoch_training_loss = total_loss / n_mini_batches
epoch_val_accuracy, epoch_val_loss = accuracy_and_loss( model, loss_function, testloader )
print('Params %f Epoch %d loss: %.3f acc: %.3f val_loss: %.3f val_acc: %.3f'
%(params, epoch+1, epoch_training_loss, epoch_training_accuracy, epoch_val_loss, epoch_val_accuracy))
train_loss.append( epoch_training_loss )
train_acc.append( epoch_training_accuracy )
val_loss.append( epoch_val_loss )
val_acc.append( epoch_val_accuracy )
history = { 'train_loss': train_loss,
'train_acc': train_acc,
'val_loss': val_loss,
'val_acc': val_acc }
return ( history, model )
This is the plot that I am getting. Where am I going wrong?
I cannot know any information. (Such as loss function, dataset size, dataset content (training and validation), results of 100 or 200 epochs, your scope of the question)
However, the overfitted model may classify the validation dataset. Because the MNIST dataset is not that hard with deep learning (compared to other image classifications).
How about adding white noise to the validation dataset? You may get a large loss on validation.
Or if you want to use your validation dataset, train the model for more at least 1000 epochs. But, as I said above, the overfitted model may classify the validation dataset.

Very high validation loss/small train loss in Pytorch, while finetuning resnet 50

I am training model to classify 2 types of images. I have decided to take a transfer-learning approach, freeze every part of resnet50 and new layer and start finetuning process. My dataset is not perfectly balanced but i used weights for that purpose.Please take a look at validation loss vs training loss graph. It seems to be extremely inconsitent. Could you please take a look at my code? I am new to Pytorch, maybe there is something wrong with my method and code. Final accuracy tested on test set is 86%. Thank you!
learning_rate = 1e-1
num_epochs = 100
patience = 10
batch_size = 100
weights = [4, 1]
model = models.resnet50(pretrained=True)
# Replace last layer
num_features = model.fc.in_features
model.fc = nn.Sequential(
nn.Linear(num_features, 512),
nn.ReLU(inplace=True),
nn.Linear(512, 64),
nn.Dropout(0.5, inplace=True),
nn.Linear(64, 2))
class_weights = torch.FloatTensor(weights).cuda()
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
running_loss = 0
losses = []
# To freeze the residual layers
for param in model.parameters():
param.requires_grad = False
for param in model.fc.parameters():
param.requires_grad = True
# Find total parameters and trainable parameters
total_params = sum(p.numel() for p in model.parameters())
print(f'{total_params:,} total parameters.')
total_trainable_params = sum(
p.numel() for p in model.parameters() if p.requires_grad)
print(f'{total_trainable_params:,} training parameters.')
24,590,082 total parameters.
1,082,050 training parameters.
# initialize the early_stopping object
early_stopping = pytorchtools.EarlyStopping(patience=patience, verbose=True)
for epoch in range(num_epochs):
##########################
#######TRAIN MODEL########
##########################
epochs_loss=0
##Switch to train mode
model.train()
for i, (images, labels) in enumerate(train_dl):
# Move tensors to the configured device
images = images.to(device)
labels = labels.to(device)
# Forward pass
# Backprpagation and optimization
optimizer.zero_grad()
outputs = model(images).to(device)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
#calculate train_loss
train_losses.append(loss.item())
##########################
#####VALIDATE MODEL#######
##########################
model.eval()
for images, labels in val_dl:
images = images.to(device)
labels = labels.to(device)
outputs = model(images).to(device)
loss = criterion(outputs,labels)
valid_losses.append(loss.item())
# print training/validation statistics
# calculate average loss over an epoch
train_loss = np.average(train_losses)
valid_loss = np.average(valid_losses)
# print(train_loss)
avg_train_losses.append(train_loss)
avg_valid_losses.append(valid_loss)
print_msg = (f'train_loss: {train_loss:.5f} ' + f'valid_loss: {valid_loss:.5f}')
print(print_msg)
# clear lists to track next epoch
train_losses = []
valid_losses = []
early_stopping(valid_loss, model)
print(epoch)
if early_stopping.early_stop:
print("Early stopping")
break

Pytorch MSE loss function nan during training

I am trying linear regression from boston dataset. MSE loss function is nan since the first iteration. I tried altering learning rate and batch_size but of no use.
from torch.utils.data import TensorDataset , DataLoader
inputs = torch.from_numpy(Features).to(torch.float32)
targets = torch.from_numpy(target).to(torch.float32)
train_ds = TensorDataset(inputs , targets)
train_dl = DataLoader(train_ds , batch_size = 5 , shuffle = True)
model = nn.Linear(13,1)
opt = optim.SGD(model.parameters(), lr=1e-5)
loss_fn = F.mse_loss
def fit(num_epochs, model, loss_fn, opt, train_dl):
# Repeat for given number of epochs
for epoch in range(num_epochs):
# Train with batches of data
for xb,yb in train_dl:
# 1. Generate predictions
pred = model(xb)
# 2. Calculate loss
loss = loss_fn(pred, yb)
# 3. Compute gradients
loss.backward()
# 4. Update parameters using gradients
opt.step()
# 5. Reset the gradients to zero
opt.zero_grad()
# Print the progress
if (epoch+1) % 10 == 0:
print('Epoch [{}/{}], Loss: {}'.format(epoch+1, num_epochs, loss.item()))
fit(100, model, loss_fn , opt , train_dl)
output
Pay attention to:
Use normalization: x = (x - x.mean()) / x.std()
y_train / y_test have to be (-1, 1) shapes. Use y_train.view(-1, 1) (if y_train is torch.Tensor or something)
(not your case, but for someone else) If you use torch.nn.MSELoss(reduction='sum') than you have to reduse the sum to mean. It can be done with torch.nn.MSELoss() or in train-loop: l = loss(y_pred, y) / y.shape[0].
Example:
...
loss = torch.nn.MSELoss()
...
for epoch in range(num_epochs):
for x, y in train_iter:
y_pred = model(x)
l = loss(y_pred, y)
optimizer.zero_grad()
l.backward()
optimizer.step()
print("epoch {} loss: {:.4f}".format(epoch + 1, l.item()))

Model weights are not being updatesd, but loss is decreasing

The following code is to train an MLP with images of size 64*64, while using the loss ||output - input||^2.
For some reason, my weights per epoch are not being updated as shown at the end.
class MLP(nn.Module):
def __init__(self, size_list):
super(MLP, self).__init__()
layers = []
self.size_list = size_list
for i in range(len(size_list) - 2):
layers.append(nn.Linear(size_list[i],size_list[i+1]))
layers.append(nn.ReLU())
layers.append(nn.Linear(size_list[-2], size_list[-1]))
self.net = nn.Sequential(*layers)
def forward(self, x):
return self.net(x)
model_1 = MLP([4096, 64, 4096])
And for training each epoch:
def train_epoch(model, train_loader, criterion, optimizer):
model.train()
model.to(device)
running_loss = 0.0
start_time = time.time()
# train batch
for batch_idx, (data) in enumerate(train_loader):
optimizer.zero_grad()
data = data.to(device)
outputs = model(data)
loss = criterion(outputs, data)
running_loss += loss.item()
loss.backward()
optimizer.step()
end_time = time.time()
weight_ll = model.net[0].weight
running_loss /= len(train_loader)
print('Training Loss: ', running_loss, 'Time: ',end_time - start_time, 's')
return running_loss, outputs, weight_ll
for training the data:
n_epochs = 20
Train_loss = []
weights=[]
criterion = nn.MSELoss()
optimizer = optim.SGD(model_1.parameters(), lr = 0.1)
for i in range(n_epochs):
train_loss, output, weights_ll = train_epoch(model_1, trainloader, criterion, optimizer)
Train_loss.append(train_loss)
weights.append(weights_ll)
print('='*20)
Now, when I print the weights of the first fully connected layer per epoch they aren't being updated.
print(weights[0][0])
print(weights[19][0])
The output for the above is (showing the weight in epoch 0 and in epoch 19):
tensor([ 0.0086, 0.0069, -0.0048, ..., -0.0082, -0.0115, -0.0133],
grad_fn=<SelectBackward>)
tensor([ 0.0086, 0.0069, -0.0048, ..., -0.0082, -0.0115, -0.0133],
grad_fn=<SelectBackward>)
What may be going wrong? Looking at my loss, it's decreasing at a steady rate but there is no change in the weights.
Try to change it weight_ll = model.net[0].weight.clone().detach() or just weight_ll = model.net[0].weight.clone() in your train_epoch() function. And you will see the weights differ.
Explanation: weights_ll are always the last epoch values if you do not clone it. It will be regarded as the same tensor in the graph. That's why your weights[0][0] equals to weights[19][0], they are actually the same tensor.

Categories

Resources