Expected hidden[0] size (2, 8, 256), got [8, 256] - python

I have correct shape of hidden layer for printing as below.
print(h0.shape)
print(x.shape)
torch.Size([2, 8, 256])
torch.Size([8, 300, 300])
But I still have error as Expected hidden[0] size (2, 8, 256), got [8, 256]
What could be wrong?
The whole code is as follows.
import torch
import torch.nn as nn
import torchvision
import matplotlib.pyplot as plt
import torchvision.transforms as tt
from torchvision.datasets import ImageFolder
from PIL import Image
import numpy as np
from torch.autograd import Variable
seq_len = input_size
hidden_size = 256 #size of hidden layers
num_classes = 5
num_epochs = 20
batch_size = 8
learning_rate = 0.001
# Fully connected neural network with one hidden layer
num_layers = 2 # 2 RNN layers are stacked
class LSTM(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(LSTM, self).__init__()
self.num_layers = num_layers
self.hidden_size = hidden_size
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)#batch must have first dimension
#our inpyt needs to have shape
#x -> (batch_size, seq, input_size)
self.fc = nn.Linear(hidden_size, num_classes)#this fc is after RNN. So needs the last hidden size of RNN
def forward(self, x):
#according to ducumentation of RNN in pytorch
#rnn needs input, h_0 for inputs at RNN (h_0 is initial hidden state)
#the following one is initial hidden layer
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)#first one is number of layers and second one is batch size
#output has two outputs. The first tensor contains the output features of the hidden last layer for all time steps
#the second one is hidden state f
print(h0.shape)
print(x.shape)
out, _ = self.lstm(x, h0)
print(out.shape)
#output has batch_size, seq_len, hidden size
#we need to decode hidden state only the last time step
#out (N, 30, 128)
#Since we need only the last time step
#Out (N, 128)
out = out[:, -1, :] #-1 for last time step, take all for N and 128
out = self.fc(out)
return out
stacked_lstm_model = LSTM(input_size, hidden_size, num_layers, num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()#cross entropy has softmax at output
optimizer = torch.optim.Adam(stacked_lstm_model.parameters(), lr=learning_rate) #optimizer used gradient optimization using Adam
# Train the model
n_total_steps = len(train_dl)
for epoch in range(num_epochs):
t_losses=[]
for i, (images, labels) in enumerate(train_dl):
# origin shape: [8, 1, 300, 300]
# resized: [8, 300, 300]
images = images.reshape(-1, seq_len, input_size).to(device)
labels = labels.to(device)
# Forward pass
outputs = stacked_lstm_model(images)
loss = criterion(outputs, labels)
t_losses.append(loss)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')
avgd_trainloss = sum(t_losses)/len(t_losses)
acc=0
v_losses=[]
with torch.no_grad():
n_correct = 0
n_samples = 0
for v_images, v_labels in valid_dl:
v_images = v_images.reshape(-1, seq_len, input_size).to(device)
v_labels = v_labels.to(device)
v_outputs = stacked_lstm_model(v_images)
v_loss = criterion(v_outputs, v_labels)
v_losses.append(v_loss)
# max returns (value ,index)
_, v_predicted = torch.max(v_outputs.data, 1)
n_samples += v_labels.size(0)
n_correct += (v_predicted == v_labels).sum().item()
acc = 100.0 * n_correct / n_samples
avgd_validloss = sum(v_losses)/len(v_losses)
print (f'Epoch [{epoch+1}/{num_epochs}], Train loss: {avgd_trainloss.item():.4f}, Valid loss: {avgd_validloss.item():.4f}, Valid accu: {acc.item():.2f}')
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
n_correct = 0
n_samples = 0
for images, labels in test_dl:
images = images.reshape(-1, seq_len, input_size).to(device)
labels = labels.to(device)
outputs = stacked_lstm_model(images)
# max returns (value ,index)
_, predicted = torch.max(outputs.data, 1)
n_samples += labels.size(0)
n_correct += (predicted == labels).sum().item()
acc = 100.0 * n_correct / n_samples
print(f'Accuracy of the network on test images: {acc} %')

The LSTM requires two hidden states, not one. So instead of
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
use
h0 = (torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device), torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device))
So you need two hidden states in a tuple.

Related

BERT Debugging (not enough values to unpack (expected 2, got 1))

I'm new to BERT and trying to test it on my dataset. The code is as the followings:
# Import BERT model and Tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-chinese")
bert = AutoModelForMaskedLM.from_pretrained("bert-base-chinese")
class BERT_Arch(nn.Module):
def __init__(self, bert):
super(BERT_Arch, self).__init__()
self.bert = bert
# dropout layer
self.dropout = nn.Dropout(0.1)
# relu activation function
self.relu = nn.ReLU()
# dense layer 1
self.fc1 = nn.Linear(768,512)
# dense layer 2 (Output layer)
self.fc2 = nn.Linear(512,2)
#softmax activation function
self.softmax = nn.LogSoftmax(dim=1)
#define the forward pass
def forward(self, sent_id, mask):
#pass the inputs to the model
_, cls_hs = self.bert(sent_id, attention_mask=mask)
x = self.fc1(cls_hs)
x = self.relu(x)
x = self.dropout(x)
# output layer
x = self.fc2(x)
# apply softmax activation
x = self.softmax(x)
return x
# function to train the model
def train():
model.train()
total_loss, total_accuracy = 0, 0
# empty list to save model predictions
total_preds=[]
# iterate over batches
for step,batch in enumerate(train_dataloader):
# progress update after every 50 batches.
if step % 50 == 0 and not step == 0:
print('Batch {:>5,} of {:>5,}.'.format(step, len(train_dataloader)))
# push the batch to gpu
batch = [r.to(device) for r in batch]
sent_id, mask, labels = batch
# clear previously calculated gradients
model.zero_grad()
# get model predictions for the current batch
preds = model(sent_id, mask)
# compute the loss between actual and predicted values
loss = cross_entropy(preds, labels)
# add on to the total loss
total_loss = total_loss + loss.item()
# backward pass to calculate the gradients
loss.backward()
# clip the the gradients to 1.0. It helps in preventing the exploding gradient problem
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
# update parameters
optimizer.step()
# model predictions are stored on GPU. So, push it to CPU
preds=preds.detach().cpu().numpy()
# append the model predictions
total_preds.append(preds)
# compute the training loss of the epoch
avg_loss = total_loss / len(train_dataloader)
# predictions are in the form of (no. of batches, size of batch, no. of classes).
# reshape the predictions in form of (number of samples, no. of classes)
total_preds = np.concatenate(total_preds, axis=0)
#returns the loss and predictions
return avg_loss, total_preds
# set initial loss to infinite
best_valid_loss = float('inf')
# empty lists to store training and validation loss of each epoch
train_losses=[]
valid_losses=[]
#for each epoch
for epoch in range(epochs):
print('\n Epoch {:} / {:}'.format(epoch + 1, epochs))
#train model
train_loss, _ = train()
#evaluate model
valid_loss, _ = evaluate()
#save the best model
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model.state_dict(), 'saved_weights.pt')
# append training and validation loss
train_losses.append(train_loss)
valid_losses.append(valid_loss)
print(f'\nTraining Loss: {train_loss:.3f}')
print(f'Validation Loss: {valid_loss:.3f}')
The error that I get is not enough values to unpack (expected 2, got 1). I have checked the tensor of input_ids and mask, and they looks like the followings:
tensor([[101, 102],
[101, 102],
[101, 102],
...,
[101, 102],
[101, 102],
[101, 102]])
tensor([[1, 1],
[1, 1],
[1, 1],
...,
[1, 1],
[1, 1],
[1, 1]])
tensor([0, 0, 0, ..., 0, 0, 0])
I think that the dimension of tensors is not wrong, so don't need to unsqueeze them as other answers show. Can someone check this for me? thanks ahead!
The complete errors prompt:
10 print('\n Epoch {:} / {:}'.format(epoch + 1, epochs))
11 #train model
---> 12 train_loss, _ = train()
13 #evaluate model
14 valid_loss, _ = evaluate()
16 model.zero_grad()
17 # get model predictions for the current batch
---> 18 preds = model(sent_id, mask)
19 # compute the loss between actual and predicted values
20 loss = cross_entropy(preds, labels)
19 def forward(self, sent_id, mask):
20 #pass the inputs to the model
---> 21 _, cls_hs = self.bert(sent_id, attention_mask=mask)
22 x = self.fc1(cls_hs)
23 x = self.relu(x)
ValueError: not enough values to unpack (expected 2, got 1)

Pytorch error: RuntimeError: input.size(-1) must be equal to input_size. Expected 7, got 1

I am new to machine learning. Right now I am building a LSTM neural network. My input is 7 features and my output is 2 labels. However, when I put 7 into the input of the LSTM, I get this error. It works when I put 1 input, but I get very inaccurate results obviously.
I want to take in time input (hour, minute, second, etc as features) and predict lat & lon value (2 labels)
My code is below:
n_feature= feature_train.shape[1] # number of columns in input matrix
class LSTM(nn.Module):
def __init__(self, num_classes, input_size, hidden_size, num_layers):
super(LSTM, self).__init__()
self.num_classes = num_classes
self.num_layers = num_layers
self.input_size = input_size
self.hidden_size = hidden_size
self.seq_length = 7
self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
num_layers=num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, num_classes)
def forward(self, x):
h_0 = Variable(torch.zeros(
self.num_layers, x.size(0), self.hidden_size))
c_0 = Variable(torch.zeros(
self.num_layers, x.size(0), self.hidden_size))
# Propagate input through LSTM
ula, (h_out, _) = self.lstm(x, (h_0, c_0))
h_out = h_out.view(-1, self.hidden_size)
out = self.fc(h_out)
return out
num_epochs = 4
learning_rate = 0.01
input_size = 7
hidden_size =30
num_layers = 1
num_outputs = 2
lstm = LSTM(num_outputs, input_size, hidden_size, num_layers)
criterion = torch.nn.MSELoss() # mean-squared error for regression
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)
# convert to tensor
xTrain = torch.tensor(np.array(feature_train).reshape(-1,n_feature), dtype = torch.float)
yTrain = torch.tensor(np.array(label_train).reshape(-1,2), dtype = torch.float)
xTrainValidation = torch.tensor(np.array(feature_validation).reshape(-1,n_feature), dtype = torch.float)
yTrainValidation = torch.tensor(np.array(label_validation).reshape(-1,2), dtype = torch.float)
xTrain = xTrain[:, :, None]
yTrain = yTrain[:, :, None]
xTrainValidation = xTrainValidation[:, :, None]
yTrainValidation = yTrainValidation[:, :, None]
# Train the model
for epoch in range(num_epochs):
outputs = lstm(xTrain)
optimizer.zero_grad()
outputs = outputs[:, :, None]
# obtain the loss function
loss = criterion(outputs, yTrain)
loss.backward()
optimizer.step()
print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))
torch.save(lstm, 'saved_model.pth')```
[enter image description here][1]
[1]: https://i.stack.imgur.com/jbKcu.png
Your LSTM input should have its last dimension equal to the input_size. Which is the value given on initialization of the nn.LSTM module, you currently have it set to 7. However, your model input is xTrain and has a shape of (*, feature_train.shape[1], 1). This size mismatch is the reason behind this error.

Training Loss decreasing but Validation Loss is stable

I am trying to train a neural network I took from this paper https://scholarworks.rit.edu/cgi/viewcontent.cgi?referer=&httpsredir=1&article=10455&context=theses. See this image: Neural Network Architechture
I am using pytorch-lightning to use multi-GPU training.
I am feeding this network 3-channel optical flows (UVC: U is horizontal temporal displacement, V is vertical temporal displacement, C represents the confidence map).
Ouputs represent the frame to frame pose and they are in the form of a vector of 6 floating values ( translationX, tanslationY, translationZ, Yaw, Pitch, Roll). Translations vary from -0.25 to 3 in meters and rotations vary from -6 to 6 in degrees.
Outputs dataset is taken from kitti-odometry dataset, there is 11 video sequences, I used the first 8 for training and a portion of the remaining 3 sequences for evaluating during training.
I trained the model for 200 epochs ( took 33 hours on 8 GPUs ).
During this training, training loss decreases but validation loss remains constant during the whole training process.
transform = transforms.Compose(
[cv_resize((370,1242)),
flow_transform_and_uint8_and_tensor(),
transforms.Normalize((0.3973, 0.2952, 0.4500), (0.4181, 0.4362, 0.3526))])
batch_size = 8
val_data_percentage = 0.06
epochs = 200
learning_rate = 0.0001
train_dataset = FlowsAndPoses("./uvc_flows_png/train/", "./relative_poses/train/", transform)
test_dataset = FlowsAndPoses("./uvc_flows_png/test/", "./relative_poses/test/", transform)
dataset_length = test_dataset.__len__()
test_dataset, val_dataset = random_split(test_dataset,[int(dataset_length*(1-val_data_percentage)),dataset_length - int(dataset_length*(1-val_data_percentage))])
print("Train: ",train_dataset.__len__(), " Validation: ", val_dataset.__len__())
criterion = nn.L1Loss()
class Net(pl.LightningModule):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 64, 7, 2)
self.conv2 = nn.Conv2d(64, 128, 5, 2)
self.conv3 = nn.Conv2d(128, 256, 5, 2)
self.conv4 = nn.Conv2d(256, 256, 3, 1)
self.conv5 = nn.Conv2d(256, 512, 3, 2)
self.conv6 = nn.Conv2d(512, 512, 3, 1)
self.conv7 = nn.Conv2d(512, 512, 3, 2)
self.conv8 = nn.Conv2d(512, 512, 3, 1)
self.conv9 = nn.Conv2d(512, 1024, 3, 2)
self.fc1 = nn.Linear(32768, 1024)
self.drop = nn.Dropout(0.5)
self.fc2 = nn.Linear(1024, 6)
self.net_relu = nn.LeakyReLU(0.1)
def forward(self, x):
x = self.net_relu(self.conv1(x))
x = self.net_relu(self.conv2(x))
x = self.net_relu(self.conv3(x))
x = self.net_relu(self.conv4(x))
x = self.net_relu(self.conv5(x))
x = self.net_relu(self.conv6(x))
x = self.net_relu(self.conv7(x))
x = self.net_relu(self.conv8(x))
x = self.net_relu(self.conv9(x))
x = torch.flatten(x, 1) # flatten all dimensions except batch
x = self.net_relu(self.fc1(x))
x = self.drop(x)
x = self.fc2(x)
return x
def training_step(self, batch, batch_idx):
running_loss = 0
print("Training: ")
inputs, labels = batch
outputs = self.forward(inputs.float())
loss = criterion(outputs, labels.float())
self.log("my_loss", loss, on_epoch=True)
return loss
def training_epoch_end(self, training_step_outputs):
training_loss_file = open("losses/training_loss"+str(self.current_epoch)+"_"+str(self.global_step), "w")
training_loss_file.write(str(training_step_outputs))
training_loss_file.close()
try:
torch.save(self.state_dict(), "checkpoints/trained_model_epoch"+str(self.current_epoch)+".pth")
except:
print("error saving")
def validation_step(self, batch, batch_idx):
inputs, labels = batch
outputs = self.forward(inputs.float())
loss = criterion(outputs, labels.float())
self.log("val_loss", loss)
return loss
def validation_epoch_end(self, validation_step_outputs):
valid_loss_file = open("losses/validation_loss"+str(self.current_epoch)+"_"+str(self.global_step), "w")
valid_loss_file.write(str(validation_step_outputs))
valid_loss_file.close()
def configure_optimizers(self):
optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate)
return optimizer
autoencoder = Net()
trainer = pl.Trainer(gpus=[0,1,2,3,4,5,6,7], accelerator="gpu", strategy="ddp", enable_checkpointing=True, max_epochs=epochs, check_val_every_n_epoch=1)
trainer.fit(autoencoder, DataLoader(train_dataset, batch_size=batch_size, shuffle=True), DataLoader(val_dataset, batch_size=batch_size, shuffle=True))
Zero Grad and optimizer.step are handled by the pytorch-lightning library.
The results I got are in the following images:
Training loss
Validation loss during training
If anyone has suggestions on how to address this problem, I would really apreciate it.

How to fix the error where the target batch size does not match when I use CrossEntropyLoss function?

I am working on a trainning task with CNN. When I created the loss function with CrossEntropyLoss and trained the dataset, the error reminded me that the batch size is not matched.
This is the main code for trainning:
net = SimpleConvolutionalNetwork()
train_history, val_history = train(net, batch_size=32, n_epochs=10, learning_rate=0.001)
plot_losses(train_history, val_history)
This is the neuron network code:
class SimpleConvolutionalNetwork(nn.Module):
# Q: why the scope of input not changed after relu??
def __init__(self) -> None:
super(SimpleConvolutionalNetwork, self).__init__()
# define convolutional filting layer(3 grids) and output size(18 channels)
self.conv1 = nn.Conv2d(3, 18, kernel_size=3, stride=1, padding=1)
# define pooling layer with max-pooling function
self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
# define FCL and output layer by Linear function
self.fc1 = nn.Linear(18*16*16, 64)
self.fc2 = nn.Linear(64, 10)
# Q: where the pooling layer??
def forward(self, x):
# input shape: 3(grids) * 32 * 32(32*32 is the scope of each grid)
# filted by conv1 defined in the construction function
# then relu the filted x
x = F.relu(self.conv1(x))
# now let 18*32*32 -> 18*16*16
x = x.view(-1, 18*16*16)
# two step for 18*16*16(totally 4608) -> 64
# output by FC firstly, then relu again the output
x = F.relu(self.fc1(x))
# 64 -> 10 finally
x = self.fc2(x)
return x
In the train function, the error place is at the construction of loss function. Because it is a very long context, the main part is showed below:
def train(net, batch_size, n_epochs, learning_rate):
...
# load the training dataset
train_loader = get_train_loader(batch_size)
# get validation dataset
val_loader = get_val_loader(batch_size)
# set batch size
n_minibatches = len(train_loader)
# set loss function and validation test checking
criterion, optimizer = createLossAndOptimizer(net, learning_rate)
train_history = []
val_history = []
training_start_time = time.time()
best_error = np.inf
best_model_path = "best_model_path"
# GPU if possible
net = net.to(device)
for epoch in range(n_epochs):
running_loss = 0.0
print_every = n_minibatches
start_time = time.time()
total_train_loss = 0.0
# step1: training the datasets
for i, (inputs, labels) in enumerate(train_loader):
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
#print statistics
running_loss += loss.item()
total_train_loss += loss.item()
# print every 10th of epoch
if (i + 1) % (print_every + 1) == 0:
print("Epoch {}, {:d}% \t train_loss: {:.2f} took: {:.2f}s".format(
epoch + 1, int(100 * (i + 1) / n_minibatches), running_loss / print_every,
time.time() - start_time))
running_loss = 0.0
start_time = time.time()
train_history.append(total_train_loss / len(train_loader))
...
the loss construction funciton and dataset loading are like this:
def createLossAndOptimizer(net, learning_rate=0.001):
# define a cross-entropy loss function:
criterion = nn.CrossEntropyLoss()
# optimizer include three parameters: net, learning rate, and
# momentum rate for validate the dataset from over-fitting(default
# value is 0.9)
optimizer = opt.Adam(net.parameters(), lr=learning_rate)
return criterion, optimizer
def get_train_loader(batch_size):
return th.utils.data.DataLoader(train_set,batch_size=batch_size,sampler=train_sampler, num_workers=num_workers)
def get_val_loader(batch_size):
return th.utils.data.DataLoader(train_set,batch_size=batch_size,sampler=train_sampler, num_workers=num_workers)
However, the error reminded me that the input batch size is more than the target batch size:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-19-07b692e7a2bb> in <module>()
173 net = SimpleConvolutionalNetwork()
174
--> 175 train_history, val_history = train(net, batch_size=32, n_epochs=10, learning_rate=0.001)
176
177 plot_losses(train_history, val_history)
3 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)
2844 if size_average is not None or reduce is not None:
2845 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2846 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
2847
2848
ValueError: Expected input batch_size (128) to match target batch_size (32).
I primarily thought that I mistakely set the incorrect parameters because of the 'labels' which is size 4. But I don't know how to fix it. Thanks for answering.
In forward method of SimpleConvolutionalNetwork after applying conv1, tensor x has shape of (batch_size, 18, 32, 32). So when doing x = x.view(-1, 18 * 16 * 16) shape of x turns to (batch_size * 4, 18 * 16 * 16) and because fully-connected layers applyed further don't change this new batch size, output has shape (batch_size * 4, 10). My suggestion would be using pooling right after convolution, like:
x = F.relu(self.conv1(x)) # after that x will have shape (batch_size, 18, 32, 32)
x = self.pool(x) # after that x will have shape (batch_size, 18, 16, 16)
That way forward will return tensor with shape (batch_size, 10) and batch size mismatch error won't occur.

What exactly does the forward function output in Pytorch?

This example is taken verbatim from the PyTorch Documentation. Now I do have some background on Deep Learning in general and know that it should be obvious that the forward call represents a forward pass, passing through different layers and finally reaching the end, with 10 outputs in this case, then you take the output of the forward pass and compute the loss using the loss function one defined. Now, I forgot what exactly the output from the forward() pass yields me in this scenario.
I thought that the last layer in a Neural Network should be some sort of activation function like sigmoid() or softmax(), but I did not see these being defined anywhere, furthermore, when I was doing a project now, I found out that softmax() is called later on. So I just want to clarify what exactly is the outputs = net(inputs) giving me, from this link, it seems to me by default the output of a PyTorch model's forward pass is logits?
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
shuffle=True, num_workers=2)
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
net = Net()
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
for epoch in range(2): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
print(outputs)
break
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.0
print('Finished Training')
it seems to me by default the output of a PyTorch model's forward pass
is logits
As I can see from the forward pass, yes, your function is passing the raw output
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
So, where is softmax? Right here:
criterion = nn.CrossEntropyLoss()
It's a bit masked, but inside this function is handled the softmax computation which, of course, works with the raw output of your last layer
This is softmax calculation:
where z_i are the raw outputs of the neural network
So, in conclusion, there is no activation function in your last input because it's handled by the nn.CrossEntropyLoss class
Answering what's the raw output that comes from nn.Linear: The raw output of a neural network layer is the linear combination of the values that come from the neurons of the previous layer

Categories

Resources