I was assigned to write a simple network with nn.ModuleDict. So, here it is:
third_model = torch.nn.ModuleDict({
'flatten': torch.nn.Flatten(),
'fc1': torch.nn.Linear(32 * 32 * 3, 1024),
'relu': torch.nn.ReLU(),
'fc2': torch.nn.Linear(1024, 240),
'relu': torch.nn.ReLU(),
'fc3': torch.nn.Linear(240, 10)})
Then I tried to train it (with cuda):
third_model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(third_model.parameters(), lr=0.001, momentum=0.9)
train(third_model, criterion, optimizer, train_dataloader, test_dataloader)
Function "train(model, criterion, optimizer, train_dataloader, test_dataloader)" trains the model and visualizes loss and accuracy of the model. It works properly.
Train:
def train(model, criterion, optimizer, train_dataloader, test_dataloader):
train_loss_log = []
train_acc_log = []
val_loss_log = []
val_acc_log = []
for epoch in range(NUM_EPOCH):
model.train()
train_loss = 0.
train_size = 0
train_acc = 0.
for imgs, labels in train_dataloader:
imgs, labels = imgs.to(device), labels.to(device)
optimizer.zero_grad()
y_pred = model(imgs)
loss = criterion(y_pred, labels)
loss.backward()
optimizer.step()
train_loss += loss.item()
train_size += y_pred.size(0)
train_loss_log.append(loss.data / y_pred.size(0))
_, pred_classes = torch.max(y_pred, 1)
train_acc += (pred_classes == labels).sum().item()
train_acc_log.append(np.mean((pred_classes == labels).cpu().numpy()))
val_loss = 0.
val_size = 0
val_acc = 0.
model.eval()
with torch.no_grad():
for imgs, labels in test_dataloader:
imgs, labels = imgs.to(device), labels.to(device)
pred = model(imgs)
loss = criterion(pred, labels)
val_loss += loss.item()
val_size += pred.size(0)
_, pred_classes = torch.max(pred, 1)
val_acc += (pred_classes == labels).sum().item()
val_loss_log.append(val_loss / val_size)
val_acc_log.append(val_acc / val_size)
clear_output()
plot_history(train_loss_log, val_loss_log, 'loss')
plot_history(train_acc_log, val_acc_log, 'accuracy')
print('Train loss:', train_loss / train_size)
print('Train acc:', train_acc / train_size)
print('Val loss:', val_loss / val_size)
print('Val acc:', val_acc / val_size)
I've already trained models coded with nn.Sequential and everything is okay. However, with nn.ModuleDict I get an error:
TypeError Traceback (most recent call last)
<ipython-input-144-8b33ad3aad2c> in <module>()
2 optimizer = optim.SGD(third_model.parameters(), lr=0.001, momentum=0.9)
3
----> 4 train(third_model, criterion, optimizer, train_dataloader, test_dataloader)
1 frames
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
720 result = self._slow_forward(*input, **kwargs)
721 else:
--> 722 result = self.forward(*input, **kwargs)
723 for hook in itertools.chain(
724 _global_forward_hooks.values(),
TypeError: forward() takes 1 positional argument but 2 were given
Tried to find any documentation on nn.ModuleDict, but it seems like there are no examples of coding networks with it.
It seems like the problem might be with linear layers, although I do not know why.
So, I hope anyone could explain where the mistake is. Would be very grateful for any possible advice.
A nn.moduleDict is a container and its forward function is not defined. It should be used to store sub-modules/networks.
You should using nn.Sequential initialized with as ordered dictionary, OrderedDict:
third_model = torch.nn.Sequential(
OrderedDict([
('flatten', torch.nn.Flatten()),
('fc1', torch.nn.Linear(32 * 32 * 3, 1024)),
('relu', torch.nn.ReLU()),
('fc2', torch.nn.Linear(1024, 240)),
('relu', torch.nn.ReLU()),
('fc3', torch.nn.Linear(240, 10))]))
Related
I am currently working on the MNIST dataset. My model has overfit the training data and I want to reduce the overfitting by using weight_decay. I am currently using 0.1 as the value for weight_decay which is giving me bad results as my validation loss and training loss are not decreasing. However, I want to experiment with different values for weight_decay. So that i can plot the different amounts of weight_decay on the x-axis and the performance of validation set on the y-axis. How do i do that? store the values in a list and use a for loop to iterate through? Below is the code that i have tried until now.
class NN(nn.Module):
def __init__(self):
super().__init__()
self.layers = nn.Sequential(
nn.Flatten(),
nn.Linear(784,4096),
nn.ReLU(),
nn.Linear(4096,2048),
nn.ReLU(),
nn.Linear(2048,1024),
nn.ReLU(),
nn.Linear(1024,512),
nn.ReLU(),
nn.Linear(512,256),
nn.ReLU(),
nn.Linear(256,128),
nn.ReLU(),
nn.Linear(128,64),
nn.ReLU(),
nn.Linear(64,32),
nn.ReLU(),
nn.Linear(32,16),
nn.ReLU(),
nn.Linear(16,10))
def forward(self,x):
return self.layers(x)
def accuracy_and_loss(model, loss_function, dataloader):
total_correct = 0
total_loss = 0
total_examples = 0
n_batches = 0
with torch.no_grad():
for data in testloader:
images, labels = data
outputs = model(images)
batch_loss = loss_function(outputs,labels)
n_batches += 1
total_loss += batch_loss.item()
_, predicted = torch.max(outputs, dim=1)
total_examples += labels.size(0)
total_correct += (predicted == labels).sum().item()
accuracy = total_correct / total_examples
mean_loss = total_loss / n_batches
return (accuracy, mean_loss)
def define_and_train(model,dataset_training, dataset_test):
trainloader = torch.utils.data.DataLoader( small_trainset, batch_size=500, shuffle=True)
testloader = torch.utils.data.DataLoader( dataset_test, batch_size=500, shuffle=True)
values = [1e-8,1e-7,1e-6,1e-5]
model = NN()
for params in values:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay = params)
train_acc = []
val_acc = []
train_loss = []
val_loss = []
for epoch in range(100):
total_loss = 0
total_correct = 0
total_examples = 0
n_mini_batches = 0
for i,mini_batch in enumerate(trainloader,0):
images,labels = mini_batch
optimizer.zero_grad()
outputs = model(images)
loss = loss_function(outputs,labels)
loss.backward()
optimizer.step()
n_mini_batches += 1
total_loss += loss.item()
_, predicted = torch.max(outputs, dim=1)
total_examples += labels.size(0)
total_correct += (predicted == labels).sum().item()
epoch_training_accuracy = total_correct / total_examples
epoch_training_loss = total_loss / n_mini_batches
epoch_val_accuracy, epoch_val_loss = accuracy_and_loss( model, loss_function, testloader )
print('Params %f Epoch %d loss: %.3f acc: %.3f val_loss: %.3f val_acc: %.3f'
%(params, epoch+1, epoch_training_loss, epoch_training_accuracy, epoch_val_loss, epoch_val_accuracy))
train_loss.append( epoch_training_loss )
train_acc.append( epoch_training_accuracy )
val_loss.append( epoch_val_loss )
val_acc.append( epoch_val_accuracy )
history = { 'train_loss': train_loss,
'train_acc': train_acc,
'val_loss': val_loss,
'val_acc': val_acc }
return ( history, model )
This is the plot that I am getting. Where am I going wrong?
I cannot know any information. (Such as loss function, dataset size, dataset content (training and validation), results of 100 or 200 epochs, your scope of the question)
However, the overfitted model may classify the validation dataset. Because the MNIST dataset is not that hard with deep learning (compared to other image classifications).
How about adding white noise to the validation dataset? You may get a large loss on validation.
Or if you want to use your validation dataset, train the model for more at least 1000 epochs. But, as I said above, the overfitted model may classify the validation dataset.
I encountered this error while training my neural network in pytorch. I am trying to pass images of size 48*48, in B&W which have only 3 features. As you can see here:
torch.Size([3, 48, 48]) 0
I am not sure how relevant this is, but I have a 1000 different images for training, as well as 1000 for validation.
I am new at this so my model is pretty simple. This is the model I wrote:
model.fc = nn.Sequential(nn.Linear(2304, 100),
nn.Linear(100, 50),
nn.Linear(50, 20),
nn.Linear(20, 3),
nn.ReLU(),
nn.LogSoftmax(dim=1))
# criterion = nn.NLLLoss()
criterion = nn.CrossEntropyLoss() #NLLLoss()
lr = 0.001
optimizer = optim.Adam(model.fc.parameters(), lr)
model.to(device)
Whenever I train using the next train function, I get the next error:
def train(epochs):
# epochs = 10
steps = 0
running_loss = 0
print_every = 10
train_losses, val_losses = [], []
# start timeit
starttime = timeit.default_timer()
print(epochs)
for epoch in range(epochs):
for inputs, labels in train_dataloader:
steps += 1
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
logps = model.forward(inputs)
loss = criterion(logps, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if steps % print_every == 0:
val_loss = 0
accuracy = 0
model.eval()
with torch.no_grad():
for inputs, labels in val_dataloader:
inputs, labels = inputs.to(device),labels.to(device)
logps = model.forward(inputs)
batch_loss = criterion(logps, labels)
val_loss += batch_loss.item()
ps = torch.exp(logps)
top_p, top_class = ps.topk(1, dim=1)
equals = top_class == labels.view(*top_class.shape)
accuracy +=torch.mean(equals.type(torch.FloatTensor)).item()
train_losses.append(running_loss/len(train_dataloader))
val_losses.append(val_loss/len(val_dataloader))
print(f"Epoch {epoch+1}/{epochs}.. "
f"Train loss: {running_loss/print_every:.3f}.. "
f"Val loss: {val_loss/len(val_dataloader):.3f}.. "
f"Val accuracy: {accuracy/len(val_dataloader):.3f}")
running_loss = 0
model.train()
torch.save(model, 'aerialmodel.pth')
print("Training took:", round(timeit.default_timer() - starttime, 3), "s.")
print(epochs, "epochs")
print("Learning rate:", lr)
print(f"Val accuracy: {accuracy/len(val_dataloader):.3f}")
Error:
/usr/local/lib/python3.7/dist-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
147 Variable._execution_engine.run_backward(
148 tensors, grad_tensors_, retain_graph, create_graph, inputs,
--> 149 allow_unreachable=True, accumulate_grad=True) # allow_unreachable flag
150
151
RuntimeError: Function AddmmBackward returned an invalid gradient at index 2 - got [2048, 100] but expected shape compatible with [2304, 100]
Any help is welcomed! Thank you very much!
I try my code is not working I'm using a simple dataset:
epochs = 100
losses = []
for i in range(epochs):
y_pred = model.forward(X)
loss = criterion(y_pred, y)
print("epoch:", i, "loss:", loss.item())
losses.append(loss)
optimizer.zero_grad()
loss.backward()
optimizer.step()
thanks,
Philippe.
You can load dataset using a custom data generator
train_dataset = DatasetGenerator()
or loaded data
inps = torch.arange(10 * 5, dtype=torch.float32).view(10, 5)
tgts = torch.arange(10 * 5, dtype=torch.float32).view(10, 5)
train_dataset = TensorDataset(inps, tgts)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
epochs = 100
losses = []
for i in range(epochs):
for ii, (data, target) in enumerate(train_loader):
y_pred = model(data)
optimizer.zero_grad()
loss = criterion(y_pred, target)
losses.append(loss)
loss.backward()
optimizer.step()
The following code is to train an MLP with images of size 64*64, while using the loss ||output - input||^2.
For some reason, my weights per epoch are not being updated as shown at the end.
class MLP(nn.Module):
def __init__(self, size_list):
super(MLP, self).__init__()
layers = []
self.size_list = size_list
for i in range(len(size_list) - 2):
layers.append(nn.Linear(size_list[i],size_list[i+1]))
layers.append(nn.ReLU())
layers.append(nn.Linear(size_list[-2], size_list[-1]))
self.net = nn.Sequential(*layers)
def forward(self, x):
return self.net(x)
model_1 = MLP([4096, 64, 4096])
And for training each epoch:
def train_epoch(model, train_loader, criterion, optimizer):
model.train()
model.to(device)
running_loss = 0.0
start_time = time.time()
# train batch
for batch_idx, (data) in enumerate(train_loader):
optimizer.zero_grad()
data = data.to(device)
outputs = model(data)
loss = criterion(outputs, data)
running_loss += loss.item()
loss.backward()
optimizer.step()
end_time = time.time()
weight_ll = model.net[0].weight
running_loss /= len(train_loader)
print('Training Loss: ', running_loss, 'Time: ',end_time - start_time, 's')
return running_loss, outputs, weight_ll
for training the data:
n_epochs = 20
Train_loss = []
weights=[]
criterion = nn.MSELoss()
optimizer = optim.SGD(model_1.parameters(), lr = 0.1)
for i in range(n_epochs):
train_loss, output, weights_ll = train_epoch(model_1, trainloader, criterion, optimizer)
Train_loss.append(train_loss)
weights.append(weights_ll)
print('='*20)
Now, when I print the weights of the first fully connected layer per epoch they aren't being updated.
print(weights[0][0])
print(weights[19][0])
The output for the above is (showing the weight in epoch 0 and in epoch 19):
tensor([ 0.0086, 0.0069, -0.0048, ..., -0.0082, -0.0115, -0.0133],
grad_fn=<SelectBackward>)
tensor([ 0.0086, 0.0069, -0.0048, ..., -0.0082, -0.0115, -0.0133],
grad_fn=<SelectBackward>)
What may be going wrong? Looking at my loss, it's decreasing at a steady rate but there is no change in the weights.
Try to change it weight_ll = model.net[0].weight.clone().detach() or just weight_ll = model.net[0].weight.clone() in your train_epoch() function. And you will see the weights differ.
Explanation: weights_ll are always the last epoch values if you do not clone it. It will be regarded as the same tensor in the graph. That's why your weights[0][0] equals to weights[19][0], they are actually the same tensor.
I used the transfer learning approach to train a model and saved the best-detected weights. In another script, I tried to use the saved weights for prediction. But I am getting errors as follows. I have used ResNet for finetuning the network and have 4 classes.
RuntimeError: Error(s) in loading state_dict for ResNet:
size mismatch for fc.bias: copying a param of torch.Size([1000]) from
checkpoint, where the shape is torch.Size([4]) in current model.
size mismatch for fc.weight: copying a param of torch.Size([1000,
512]) from checkpoint, where the shape is torch.Size([4, 512]) in
current model.
I am using the following code for prediction of output:
checkpoint = torch.load("./models/custom_model13.model")
model = resnet18(pretrained=True)
model.load_state_dict(checkpoint)
model.eval()
def predict_image(image_path):
transformation = transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
image_tensor = transformation(image).float()
image_tensor = image_tensor.unsqueeze_(0)
if torch.cuda.is_available():
image_tensor.cuda()
input = Variable(image_tensor)
output = model(input)
index = output.data.numpy().argmax()
return index
if __name__ == "main":
imagefile = "image.png"
imagepath = os.path.join(os.getcwd(),imagefile)
prediction = predict_image(imagepath)
print("Predicted Class: ",prediction)
And the following code to train and save the model:
Data_dir = 'Dataset'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
data_transforms[x])
for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
shuffle=True, num_workers=4)
for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print (device)
def save_models(epochs, model):
torch.save(model.state_dict(), "custom_model{}.model".format(epochs))
print("Checkpoint Saved")
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
scheduler.step()
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects.double() / dataset_sizes[phase]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'train' and epoch_acc > best_acc:
save_models(epoch,model)
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
return model
def visualize_model(model, num_images=6):
was_training = model.training
model.eval()
images_so_far = 0
fig = plt.figure()
with torch.no_grad():
for i, (inputs, labels) in enumerate(dataloaders['val']):
inputs = inputs.to(device)
labels = labels.to(device)
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
for j in range(inputs.size()[0]):
images_so_far += 1
ax = plt.subplot(num_images//2, 2, images_so_far)
ax.axis('off')
ax.set_title('predicted: {}'.format(class_names[preds[j]]))
imshow(inputs.cpu().data[j])
if images_so_far == num_images:
model.train(mode=was_training)
return
model.train(mode=was_training)
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 4)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
num_epochs=25)
Cause:
You trained a model derived from resnet18 in this way:
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 4)
That is, you changed the last nn.Linear layer to output 4 dim prediction instead of the default 1000.
When you try and load the model for prediction, your code is:
model = resnet18(pretrained=True)
model.load_state_dict(checkpoint)
You did not apply the same change of the last nn.Linear layer to model therefore the checkpoint you are trying to load does not fit.
Fix:
(1) Apply the same change before loading the checkpoint:
model = resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 4) # make the change
model.load_state_dict(checkpoint) # load
(2) Even better, use num_classes argument to construct resnet with the desired number of outputs to begin with:
model = resnet18(pretrained=True, num_classes=4)
model.load_state_dict(checkpoint) # load