I am working on a video animation project using PyTorch. My dataset contains 3904x60 mfcc audio features(input) and corresponding 3904x3 video features(output). The goal is to train a neural network model such that given an unknown audio feature, the model maps it into its corresponding video feature. In other words, the neural network performs a 60 to 3 feature mapping. I have already built the neural network following this tutorial:
class ConvNet(nn.Module):
def __init__(self):
super().__init__()
self.layer1 = nn.Sequential(
nn.Conv1d(1, 32, kernel_size=5, stride=1, padding=2),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2, stride=2))
self.layer2 = nn.Sequential(
nn.Conv1d(32, 64, kernel_size=5, stride=1, padding=2),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2, stride=2))
self.drop_out = nn.Dropout()
self.fc1 = nn.Linear(15 * 64, 1000)
self.fc2 = nn.Linear(1000, 3)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = out.reshape(out.size(0), -1)
out = self.drop_out(out)
out = self.fc1(out)
out = self.fc2(out)
return out
and my training code looks like:
model = ConvNet()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
for i, (a, v) in enumerate(train_loader):
# Run the forward pass
a = a.float()
v = v.long()
outputs = model(a.view(a.size(0),1,a.size(1)))
loss = criterion(outputs, v)
loss_list.append(loss.item())
# Backprop and perform Adam optimisation
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Track the accuracy
total = labels.size(0)
_, predicted = torch.max(outputs.data, 1)
correct = (predicted == labels).sum().item()
acc_list.append(correct / total)
if (i + 1) % 100 == 0:
print('Epoch[{}/{}],Step[{}/{}],Loss{:.4f},Accuracy{:.2f}%'
.format(epoch + 1, num_epochs, i + 1, total_step, loss.item(),
(correct / total) * 100))
but received an error in training:
---> 15 loss = criterion(outputs, v)
multi-target not supported at /Users/soumith/miniconda2/conda-bld/pytorch_1532623076075/work/aten/src/THNN/generic/ClassNLLCriterion.c:21
I defined the batch size to be 4 so each a and v in the iteration should be a 4 by 60 tensor and a 4 by 3 tensor, respectively. How do I solve this problem?
The issue could be because of the definition of the target function that you use for nn.CrossEntropyLoss(). v is a 4 x 3 tensor you say, which doesn't appear correct.
In loss = criterion(outputs, v) , the loss function expects v to be a tensor of size minibatch with each value depicting on of the C classes (i.e. 0 to C-1). See the 'Shape' tab in https://pytorch.org/docs/stable/nn.html?highlight=crossentropyloss#torch.nn.CrossEntropyLoss
Target: (N) where each value is 0≤targets[i]≤C−1
Related
I am trying to train a neural network I took from this paper https://scholarworks.rit.edu/cgi/viewcontent.cgi?referer=&httpsredir=1&article=10455&context=theses. See this image: Neural Network Architechture
I am using pytorch-lightning to use multi-GPU training.
I am feeding this network 3-channel optical flows (UVC: U is horizontal temporal displacement, V is vertical temporal displacement, C represents the confidence map).
Ouputs represent the frame to frame pose and they are in the form of a vector of 6 floating values ( translationX, tanslationY, translationZ, Yaw, Pitch, Roll). Translations vary from -0.25 to 3 in meters and rotations vary from -6 to 6 in degrees.
Outputs dataset is taken from kitti-odometry dataset, there is 11 video sequences, I used the first 8 for training and a portion of the remaining 3 sequences for evaluating during training.
I trained the model for 200 epochs ( took 33 hours on 8 GPUs ).
During this training, training loss decreases but validation loss remains constant during the whole training process.
transform = transforms.Compose(
[cv_resize((370,1242)),
flow_transform_and_uint8_and_tensor(),
transforms.Normalize((0.3973, 0.2952, 0.4500), (0.4181, 0.4362, 0.3526))])
batch_size = 8
val_data_percentage = 0.06
epochs = 200
learning_rate = 0.0001
train_dataset = FlowsAndPoses("./uvc_flows_png/train/", "./relative_poses/train/", transform)
test_dataset = FlowsAndPoses("./uvc_flows_png/test/", "./relative_poses/test/", transform)
dataset_length = test_dataset.__len__()
test_dataset, val_dataset = random_split(test_dataset,[int(dataset_length*(1-val_data_percentage)),dataset_length - int(dataset_length*(1-val_data_percentage))])
print("Train: ",train_dataset.__len__(), " Validation: ", val_dataset.__len__())
criterion = nn.L1Loss()
class Net(pl.LightningModule):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 64, 7, 2)
self.conv2 = nn.Conv2d(64, 128, 5, 2)
self.conv3 = nn.Conv2d(128, 256, 5, 2)
self.conv4 = nn.Conv2d(256, 256, 3, 1)
self.conv5 = nn.Conv2d(256, 512, 3, 2)
self.conv6 = nn.Conv2d(512, 512, 3, 1)
self.conv7 = nn.Conv2d(512, 512, 3, 2)
self.conv8 = nn.Conv2d(512, 512, 3, 1)
self.conv9 = nn.Conv2d(512, 1024, 3, 2)
self.fc1 = nn.Linear(32768, 1024)
self.drop = nn.Dropout(0.5)
self.fc2 = nn.Linear(1024, 6)
self.net_relu = nn.LeakyReLU(0.1)
def forward(self, x):
x = self.net_relu(self.conv1(x))
x = self.net_relu(self.conv2(x))
x = self.net_relu(self.conv3(x))
x = self.net_relu(self.conv4(x))
x = self.net_relu(self.conv5(x))
x = self.net_relu(self.conv6(x))
x = self.net_relu(self.conv7(x))
x = self.net_relu(self.conv8(x))
x = self.net_relu(self.conv9(x))
x = torch.flatten(x, 1) # flatten all dimensions except batch
x = self.net_relu(self.fc1(x))
x = self.drop(x)
x = self.fc2(x)
return x
def training_step(self, batch, batch_idx):
running_loss = 0
print("Training: ")
inputs, labels = batch
outputs = self.forward(inputs.float())
loss = criterion(outputs, labels.float())
self.log("my_loss", loss, on_epoch=True)
return loss
def training_epoch_end(self, training_step_outputs):
training_loss_file = open("losses/training_loss"+str(self.current_epoch)+"_"+str(self.global_step), "w")
training_loss_file.write(str(training_step_outputs))
training_loss_file.close()
try:
torch.save(self.state_dict(), "checkpoints/trained_model_epoch"+str(self.current_epoch)+".pth")
except:
print("error saving")
def validation_step(self, batch, batch_idx):
inputs, labels = batch
outputs = self.forward(inputs.float())
loss = criterion(outputs, labels.float())
self.log("val_loss", loss)
return loss
def validation_epoch_end(self, validation_step_outputs):
valid_loss_file = open("losses/validation_loss"+str(self.current_epoch)+"_"+str(self.global_step), "w")
valid_loss_file.write(str(validation_step_outputs))
valid_loss_file.close()
def configure_optimizers(self):
optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate)
return optimizer
autoencoder = Net()
trainer = pl.Trainer(gpus=[0,1,2,3,4,5,6,7], accelerator="gpu", strategy="ddp", enable_checkpointing=True, max_epochs=epochs, check_val_every_n_epoch=1)
trainer.fit(autoencoder, DataLoader(train_dataset, batch_size=batch_size, shuffle=True), DataLoader(val_dataset, batch_size=batch_size, shuffle=True))
Zero Grad and optimizer.step are handled by the pytorch-lightning library.
The results I got are in the following images:
Training loss
Validation loss during training
If anyone has suggestions on how to address this problem, I would really apreciate it.
I am working on a trainning task with CNN. When I created the loss function with CrossEntropyLoss and trained the dataset, the error reminded me that the batch size is not matched.
This is the main code for trainning:
net = SimpleConvolutionalNetwork()
train_history, val_history = train(net, batch_size=32, n_epochs=10, learning_rate=0.001)
plot_losses(train_history, val_history)
This is the neuron network code:
class SimpleConvolutionalNetwork(nn.Module):
# Q: why the scope of input not changed after relu??
def __init__(self) -> None:
super(SimpleConvolutionalNetwork, self).__init__()
# define convolutional filting layer(3 grids) and output size(18 channels)
self.conv1 = nn.Conv2d(3, 18, kernel_size=3, stride=1, padding=1)
# define pooling layer with max-pooling function
self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
# define FCL and output layer by Linear function
self.fc1 = nn.Linear(18*16*16, 64)
self.fc2 = nn.Linear(64, 10)
# Q: where the pooling layer??
def forward(self, x):
# input shape: 3(grids) * 32 * 32(32*32 is the scope of each grid)
# filted by conv1 defined in the construction function
# then relu the filted x
x = F.relu(self.conv1(x))
# now let 18*32*32 -> 18*16*16
x = x.view(-1, 18*16*16)
# two step for 18*16*16(totally 4608) -> 64
# output by FC firstly, then relu again the output
x = F.relu(self.fc1(x))
# 64 -> 10 finally
x = self.fc2(x)
return x
In the train function, the error place is at the construction of loss function. Because it is a very long context, the main part is showed below:
def train(net, batch_size, n_epochs, learning_rate):
...
# load the training dataset
train_loader = get_train_loader(batch_size)
# get validation dataset
val_loader = get_val_loader(batch_size)
# set batch size
n_minibatches = len(train_loader)
# set loss function and validation test checking
criterion, optimizer = createLossAndOptimizer(net, learning_rate)
train_history = []
val_history = []
training_start_time = time.time()
best_error = np.inf
best_model_path = "best_model_path"
# GPU if possible
net = net.to(device)
for epoch in range(n_epochs):
running_loss = 0.0
print_every = n_minibatches
start_time = time.time()
total_train_loss = 0.0
# step1: training the datasets
for i, (inputs, labels) in enumerate(train_loader):
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
#print statistics
running_loss += loss.item()
total_train_loss += loss.item()
# print every 10th of epoch
if (i + 1) % (print_every + 1) == 0:
print("Epoch {}, {:d}% \t train_loss: {:.2f} took: {:.2f}s".format(
epoch + 1, int(100 * (i + 1) / n_minibatches), running_loss / print_every,
time.time() - start_time))
running_loss = 0.0
start_time = time.time()
train_history.append(total_train_loss / len(train_loader))
...
the loss construction funciton and dataset loading are like this:
def createLossAndOptimizer(net, learning_rate=0.001):
# define a cross-entropy loss function:
criterion = nn.CrossEntropyLoss()
# optimizer include three parameters: net, learning rate, and
# momentum rate for validate the dataset from over-fitting(default
# value is 0.9)
optimizer = opt.Adam(net.parameters(), lr=learning_rate)
return criterion, optimizer
def get_train_loader(batch_size):
return th.utils.data.DataLoader(train_set,batch_size=batch_size,sampler=train_sampler, num_workers=num_workers)
def get_val_loader(batch_size):
return th.utils.data.DataLoader(train_set,batch_size=batch_size,sampler=train_sampler, num_workers=num_workers)
However, the error reminded me that the input batch size is more than the target batch size:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-19-07b692e7a2bb> in <module>()
173 net = SimpleConvolutionalNetwork()
174
--> 175 train_history, val_history = train(net, batch_size=32, n_epochs=10, learning_rate=0.001)
176
177 plot_losses(train_history, val_history)
3 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)
2844 if size_average is not None or reduce is not None:
2845 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2846 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
2847
2848
ValueError: Expected input batch_size (128) to match target batch_size (32).
I primarily thought that I mistakely set the incorrect parameters because of the 'labels' which is size 4. But I don't know how to fix it. Thanks for answering.
In forward method of SimpleConvolutionalNetwork after applying conv1, tensor x has shape of (batch_size, 18, 32, 32). So when doing x = x.view(-1, 18 * 16 * 16) shape of x turns to (batch_size * 4, 18 * 16 * 16) and because fully-connected layers applyed further don't change this new batch size, output has shape (batch_size * 4, 10). My suggestion would be using pooling right after convolution, like:
x = F.relu(self.conv1(x)) # after that x will have shape (batch_size, 18, 32, 32)
x = self.pool(x) # after that x will have shape (batch_size, 18, 16, 16)
That way forward will return tensor with shape (batch_size, 10) and batch size mismatch error won't occur.
Introduction:
I am trying to make an autoencoder learn 32 features like position, velocity, etc in 32 time steps => 32x32 ‘image’.
For this I just made a simple linear model that uses in every layer the Tanh function with an encoder and a decoder that are symmetric.
During training, I added my own version of dropout for just the input. (in the future I will use the nn.Dropout)
Problem:
I get large spikes in loss function “sqrt(MSE)” at irregular intervals. (Batch_Size = 6000)
Loss Graph
What I have tried: (small test, 1000 epochs max)
clip_grad_norm_(model.parameters(), max_norm = 0.5).
Tried ReLu and ELU.
Activation function Batch = N / 2 (I wanted to do N but the memory of my gpu was not enough).
Not adding noise or dropout (the noise/dropout I think helps but does not solve the problem).
Remove the square root on the MSE loss.
Can someone explain to me why this happens and how to fix it?
def rand_bin_array(p_zeros, shape):
size = 1
for e in shape:
size *= e
arr = np.ones(size)
arr[:int(size * p_zeros)] = 0
np.random.shuffle(arr)
arr = arr.reshape(shape)
return arr
class Autoencoder_Liniar(nn.Module):
def __init__(self):
super().__init__()
self.encoder = nn.Sequential(
nn.Linear(1024, 921),
nn.Tanh(),
nn.Linear(921, 736),
nn.Tanh(),
nn.Linear(736, 515),
nn.Tanh(),
nn.Linear(515, 309),
nn.Tanh(),
nn.Linear(309, 128),
nn.Tanh(),
nn.Linear(128, 64),
nn.Tanh(),
)
self.decoder = nn.Sequential(
nn.Linear(64, 128),
nn.Tanh(),
nn.Linear(128, 309),
nn.Tanh(),
nn.Linear(309, 515),
nn.Tanh(),
nn.Linear(515, 736),
nn.Tanh(),
nn.Linear(736, 921),
nn.Tanh(),
nn.Linear(921, 1024),
nn.Tanh()
)
def forward(self, x):
enc = self.encoder(x)
dec = self.decoder(enc)
return dec
torch.manual_seed(0)
model = Autoencoder_Liniar().cuda()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
random.seed(0)
epochs = 10000
batch_size = 6000
test_b_size = 5000
train_losses = []
test_losses = []
for i in range(epochs):
avg_loss = 0
random.shuffle(train_data)
for b in range(train_nr // batch_size):
start = b * batch_size
data = torch.FloatTensor(train_data[start : start + batch_size]).cuda()
noise_power = max(0.8 - i/epochs, 0.1)
noise = torch.FloatTensor(rand_bin_array(noise_power, data.shape)).cuda()
y_pred = model(data * noise)
loss = torch.sqrt(criterion(y_pred, data))
optimizer.zero_grad()
loss.backward()
#torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=0.5)
optimizer.step()
avg_loss += loss.item()
if b % 20 == 0:
print(f'EPOCH: {i} BATCH: {b} LOSS: {loss.item()}')
train_losses.append(avg_loss / (train_nr // batch_size))
with torch.no_grad():
avg_loss = 0
for b in range(test_nr // test_b_size):
start = b * test_b_size
data = np.array(test_data[start : start + test_b_size])
data = torch.FloatTensor(data).cuda()
y_pred = model(data)
loss = torch.sqrt(criterion(y_pred, data))
avg_loss += loss.item()
test_losses.append(avg_loss / (test_nr // test_b_size))
Added code for getting gradient's norm over epochs graph (without noise/dropout)
Gradient clipped at 0.3
total_norm = 0
for p in model.parameters():
param_norm = p.grad.detach().data.norm(2)
total_norm += param_norm.item() ** 2
total_norm = total_norm ** 0.5
avg_grad += total_norm
optimizer.step()
The answer was clipping the gradient with the clip_grad_norm_, but at a lower value.
The value was decided after making the gradient's norm over epochs graph.
I am learning pytorch and I have created binary classification algorithm. After having trained the model I have very low loss and quite good accuracy. However, on validation the accuracy is exactly 50%. I am wondering if I loaded samples incorrectly or the algorithm does not perform well.
Here you can find the plot of Training loss and accuracy.
Here is my training method:
epochs = 15
itr = 1
p_itr = 100
model.train()
total_loss = 0
loss_list = []
acc_list = []
for epoch in range(epochs):
for samples, labels in train_loader:
samples, labels = samples.to(device), labels.to(device)
optimizer.zero_grad()
output = model(samples)
labels = labels.unsqueeze(-1)
labels = labels.float()
loss = criterion(output, labels)
loss.backward()
optimizer.step()
total_loss += loss.item()
scheduler.step()
#if itr%p_itr == 0:
pred = torch.round(output)
correct = pred.eq(labels)
acc = torch.mean(correct.float())
print('[Epoch {}/{}] Iteration {} -> Train Loss: {:.4f}, Accuracy: {:.3f}'.format(epoch+1, epochs, itr, total_loss/p_itr, acc))
loss_list.append(total_loss/p_itr)
acc_list.append(acc)
total_loss = 0
itr += 1
Here, I am loading data from the path:
train_list_cats = glob.glob(os.path.join(train_cats_dir,'*.jpg'))
train_list_dogs = glob.glob(os.path.join(train_dogs_dir,'*.jpg'))
train_list = train_list_cats + train_list_dogs
val_list_cats = glob.glob(os.path.join(validation_cats_dir,'*.jpg'))
val_list_dogs = glob.glob(os.path.join(validation_dogs_dir,'*.jpg'))
val_list = val_list_cats + val_list_dogs
I am not attaching the model architecture, however I can add it if required.
I think that my training method is correct, although, I am not sure about training/validation data processing.
Edit:
The network params are as follow:
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001)
criterion = nn.BCELoss()
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[500,1000,1500], gamma=0.5)
Activation function is sigmoid.
The network architecture:
self.layer1 = nn.Sequential(
nn.Conv2d(3,16,kernel_size=3),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Dropout(p=0.2)
)
self.layer2 = nn.Sequential(
nn.Conv2d(16,32, kernel_size=3),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Dropout(p=0.2)
)
self.layer3 = nn.Sequential(
nn.Conv2d(32,64, kernel_size=3),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Dropout(p=0.2)
)
self.fc1 = nn.Linear(17*17*64,512)
self.fc2 = nn.Linear(512,1)
self.relu = nn.ReLU()
self.sigmoid = nn.Sigmoid()
def forward(self,x):
out = self.layer1(x)
out = self.layer2(out)
out = self.layer3(out)
out = out.view(out.size(0),-1)
out = self.relu(self.fc1(out))
out = self.fc2(out)
return torch.sigmoid(out)
Going by your "Training loss and accuracy" plot your model is overfitting. Your train loss is near zero after 25 epochs and you continue training for 200+ epochs. This is wrong way to train a model. You should rather be doing early stopping based on the validation set. ie. Run one epoch of train and one epoch of eval and repeat. Stop when your train epoch is improving and the corresponding eval epoch is not improving.
This example is taken verbatim from the PyTorch Documentation. Now I do have some background on Deep Learning in general and know that it should be obvious that the forward call represents a forward pass, passing through different layers and finally reaching the end, with 10 outputs in this case, then you take the output of the forward pass and compute the loss using the loss function one defined. Now, I forgot what exactly the output from the forward() pass yields me in this scenario.
I thought that the last layer in a Neural Network should be some sort of activation function like sigmoid() or softmax(), but I did not see these being defined anywhere, furthermore, when I was doing a project now, I found out that softmax() is called later on. So I just want to clarify what exactly is the outputs = net(inputs) giving me, from this link, it seems to me by default the output of a PyTorch model's forward pass is logits?
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
shuffle=True, num_workers=2)
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
net = Net()
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
for epoch in range(2): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
print(outputs)
break
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.0
print('Finished Training')
it seems to me by default the output of a PyTorch model's forward pass
is logits
As I can see from the forward pass, yes, your function is passing the raw output
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
So, where is softmax? Right here:
criterion = nn.CrossEntropyLoss()
It's a bit masked, but inside this function is handled the softmax computation which, of course, works with the raw output of your last layer
This is softmax calculation:
where z_i are the raw outputs of the neural network
So, in conclusion, there is no activation function in your last input because it's handled by the nn.CrossEntropyLoss class
Answering what's the raw output that comes from nn.Linear: The raw output of a neural network layer is the linear combination of the values that come from the neurons of the previous layer