Why do saved pytorch models retrain after loading?

Why do saved pytorch models retrain after loading? - python

import torch
import torchvision
n_epochs = 3
batch_size_train = 64
batch_size_test = 1000
learning_rate = 0.01
momentum = 0.5
log_interval = 10
random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)
train_loader = torch.utils.data.DataLoader(
torchvision.datasets.MNIST('./files', train=True, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(
(0.1307,), (0.3081,))
])),
batch_size=batch_size_train, shuffle=True)
test_loader = torch.utils.data.DataLoader(
torchvision.datasets.MNIST('./files', train=False, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(
(0.1307,), (0.3081,))
])),
batch_size=batch_size_test, shuffle=True)
examples = enumerate(test_loader)
batch_idx, (example_data, example_targets) = next(examples)
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.log_softmax(x)
network = Net()
optimizer = optim.SGD(network.parameters(), lr=learning_rate,
momentum=momentum)
train_losses = []
train_counter = []
test_losses = []
test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]
def train(epoch):
network.train()
for batch_idx, (data, target) in enumerate(train_loader):
optimizer.zero_grad()
output = network(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
train_losses.append(loss.item())
train_counter.append(
(batch_idx*64) + ((epoch-1)*len(train_loader.dataset)))
def test():
network.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
output = network(data)
test_loss += F.nll_loss(output, target, size_average=False).item()
pred = output.data.max(1, keepdim=True)[1]
correct += pred.eq(target.data.view_as(pred)).sum()
test_loss /= len(test_loader.dataset)
test_losses.append(test_loss)
print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
test()
for epoch in range(1, n_epochs + 1):
train(epoch)
test()
torch.save(network.state_dict(), './results/model.pth')
Other file:
PATH = "results/model.pth"
model = torch.load(PATH)
When this is called, instead of loading the model parameters, Pytorch retrains the entire model. The model is just retrained the same way (ie. they take the exact same steps to get to the same local minimum).
PATH = "results/model.pth"
model = Net()
model.load_state_dict(torch.load(PATH))
has the same result.
Is there any way I can load the model without retraining the whole thing?

I just tried executing the code, and it works perfect. load_state_dict did not retrain the model:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.log_softmax(x)
network = Net()
PATH = "results/model.pth"
network.load_state_dict(torch.load(PATH))
# works perfect
By the way, state_dict only contains the model weights and not the dataset, so load_state_dict can never re-train the model.
I think the problem is how the original code is organized. The tranining procedure starts running inmediately after Class Net is defined, so you cannot import Net from this file without re-running everything.
Ideally, the training and the testing procedure should be wrapped inside an if __name__=='__main__' statement (at the end of the file), so that you can safely import Net without re-running any calculations:
# source_file.py
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.log_softmax(x)
def train(epoch):
network.train()
for batch_idx, (data, target) in enumerate(train_loader):
optimizer.zero_grad()
output = network(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
train_losses.append(loss.item())
train_counter.append(
(batch_idx*64) + ((epoch-1)*len(train_loader.dataset)))
def test():
network.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
output = network(data)
test_loss += F.nll_loss(output, target, size_average=False).item()
pred = output.data.max(1, keepdim=True)[1]
correct += pred.eq(target.data.view_as(pred)).sum()
test_loss /= len(test_loader.dataset)
test_losses.append(test_loss)
print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
if __name__ == '__main__':
n_epochs = 3
batch_size_train = 64
batch_size_test = 1000
learning_rate = 0.01
momentum = 0.5
log_interval = 10
random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)
train_loader = torch.utils.data.DataLoader(
torchvision.datasets.MNIST('./files', train=True, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(
(0.1307,), (0.3081,))
])),
batch_size=batch_size_train, shuffle=True)
test_loader = torch.utils.data.DataLoader(
torchvision.datasets.MNIST('./files', train=False, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(
(0.1307,), (0.3081,))
])),
batch_size=batch_size_test, shuffle=True)
examples = enumerate(test_loader)
batch_idx, (example_data, example_targets) = next(examples)
network = Net()
optimizer = optim.SGD(network.parameters(), lr=learning_rate,
momentum=momentum)
train_losses = []
train_counter = []
test_losses = []
test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]
test()
for epoch in range(1, n_epochs + 1):
train(epoch)
test()
PATH = './results/model.pth'
torch.save(network.state_dict(), PATH)
Then, when you reload the model in a second file, you can write:
from my_source_file import Net
network = Net()
network.load_state_dict(torch.load(PATH))
Here is a website with more information about if __name__ == '__main__':
https://realpython.com/if-name-main-python/
PS. Another option, that I personally use, is to define the neural network in a separate file than the training procedure. This is useful to make big projects look more organized, or even to experiment with different neural network designs.
Previous answer:
We should use load_state_dict to restore models:
model = TheModelClass(*args, **kwargs)
model.load_state_dict(torch.load(PATH))
model.eval()
https://pytorch.org/tutorials/beginner/saving_loading_models.html

Related

Pythorch LSTM timeseries prediction hidden[0] size match error

I want to learn 100 for 3000days and predict 100 next day but I don't know why this error is happen and resolve this problem
please help me
batchsize = 100, hidden_dim = 10, seq_length = 60, data_dim = 100, output_dim = 100
class Net(nn.Module):
def __init__(self, input_dim, hidden_dim, batch_size, output_dim, layers):
super(Net, self).__init__()
self.hidden_dim = hidden_dim
self.batch_size = batch_size
self.output_dim = output_dim
self.layers = layers
self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=layers,
batch_first=True)
self.fc = nn.Linear(hidden_dim, output_dim, bias = True)
self.hidden = self.reset_hidden_state()
def reset_hidden_state(self):
return (
torch.zeros(self.layers, self.batch_size, self.hidden_dim),
torch.zeros(self.layers, self.batch_size, self.hidden_dim))
def forward(self, x):
x, self.hidden = self.lstm(x, self.hidden)
x = self.fc(x[:, -1,:]) # [batch_size, seq_len, hidden_dim]
return x
# Train part
def train_model(model, train_df, num_epochs = None, lr = None, verbose = 10, patience = 10):
criterion = nn.MSELoss().to(device)
optimizer = optim.Adam(model.parameters(), lr = learning_rate)
nb_epochs = num_epochs
train_hist = np.zeros(nb_epochs)
for epoch in range(nb_epochs):
avg_cost = 0
total_batch = len(train_df)
for batch_idx, samples in enumerate(train_df):
x_train, y_train = samples
# seq별 hidden state reset
model.reset_hidden_state()
model.hidden = [hidden.to(device) for hidden in model.reset_hidden_state()]
outputs = model(x_train)
loss = criterion(outputs, y_train)
optimizer.zero_grad()
loss.backward()
optimizer.step()
avg_cost += loss/total_batch
train_hist[epoch] = avg_cost
if epoch % verbose == 0:
print('Epoch:', '%04d' % (epoch), 'train loss :', '{:.4f}'.format(avg_cost))
if (epoch % patience == 0) & (epoch != 0):
if train_hist[epoch-patience] < train_hist[epoch]:
print('\n Early Stopping')
break
return model.eval(), train_hist
prediction part
net = Net(data_dim, hidden_dim, batch , output_dim, 1).to(device)
model, train_hist = train_model(net, dataloader, num_epochs = nb_epochs, lr = learning_rate, verbose = 20, patience = 10)
with torch.no_grad():
pred = np.zeros((2940,1,100))
for pr in range(len(trainX_tensor)):
model.reset_hidden_state()
predicted = model(torch.unsqueeze(trainX_tensor[pr], 0)).cpu()
print(predicted.shape)
#predicted = predicted.item()
pred[pr,:,:] = predicted
#print(pr)
# INVERSE
pred_inverse = scaler.inverse_transform(pred)
Expected hidden[0] size (1, 1, 10), got [1, 100, 10]
how can i resolve hidden[0] size fix?

Using a target size (torch.Size([400])) that is different to the input size (torch.Size([400, 1]))

I'm currently switching from tensorflow to pytorch and facing the warning UserWarning: Using a target size (torch.Size([400])) that is different to the input size (torch.Size([400, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size
I came across that unsqueeze(1) on my target could help to resolve my problem, however, I do so obtain problems in regard of the multitarget which results from the shape my loss function (crossentropy) expects.
Here is a minimal example to my code:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn.functional as F
X1 = torch.randn(400, 1, 9999)
X2 = torch.randn((400,1, 9999))
aux1 = torch.randn(400,1)
aux2 = torch.randn(400,1)
aux3 = torch.randn(400,1)
y1 = torch.rand(400,)
y2 = torch.rand(400,)
y3 = torch.rand(400,)
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn.functional as F
# In[18]:
class MultiTaskDataset:
def __init__(self,
amplitude,
phase,
weight,
temperature,
humidity,
shelf_life_clf,
shelf_life_pred,
thickness_pred
):
self.amplitude = amplitude
self.phase = phase
self.weight = weight
self.temperature = temperature
self.humidity = humidity
self.shelf_life_clf = shelf_life_clf
self.shelf_life_pred = shelf_life_pred
self.thickness_pred = thickness_pred
def __len__(self):
return self.amplitude.shape[0]
def __getitem__(self, idx):
#inputs
amplitude = self.amplitude[idx]
phase = self.phase[idx]
weight = self.weight[idx]
temperature = self.temperature[idx]
humidity = self.humidity[idx]
#outputs
shelf_life_clf = self.shelf_life_clf[idx]
shelf_life_reg = self.shelf_life_pred[idx]
thickness_pred = self.thickness_pred[idx]
return ([torch.tensor(amplitude, dtype=torch.float32),
torch.tensor(phase, dtype=torch.float32),
torch.tensor(weight, dtype=torch.float32),
torch.tensor(temperature, dtype=torch.float32),
torch.tensor(humidity, dtype=torch.float32)],
[torch.tensor(shelf_life_clf, dtype=torch.long),
torch.tensor(shelf_life_reg, dtype=torch.float32),
torch.tensor(thickness_pred, dtype=torch.float32)])
# In[19]:
# train loader
dataset = MultiTaskDataset(X1, X2, aux1, aux2, aux3,
y1,y2,y3)
train_loader = DataLoader(dataset, batch_size=512, shuffle=True, num_workers=0)
# test loader
# In[20]:
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
self.features_amp = nn.Sequential(
nn.LazyConv1d(1, 3, 1),
)
self.features_phase = nn.Sequential(
nn.LazyConv1d(1, 3, 1),
)
self.backbone1 = nn.Sequential(
nn.LazyConv1d(64,3,1),
nn.LazyConv1d(64,3,1),
nn.AvgPool1d(3),
nn.Dropout(0.25),
)
self.backbone2 = nn.Sequential(
nn.Conv1d(64, 32,3,1),
nn.Conv1d(32, 32,3,1),
nn.AvgPool1d(3),
nn.Dropout(0.25),
)
self.backbone3 = nn.Sequential(
nn.Conv1d(32, 16,3,1),
nn.Conv1d(16, 16,3,1),
nn.AvgPool1d(3),
nn.Dropout(0.25),
)
self.classifier = nn.LazyLinear(2)
self.shelf_life_reg = nn.LazyLinear(1)
self.thickness_reg = nn.LazyLinear(1)
def forward(self, x1, x2, aux1, aux2, aux3):
x1 = self.features_amp(x1)
x2 = self.features_phase(x2)
x1 = x1.view(x1.size(0),-1)
x2 = x2.view(x2.size(0),-1)
x = torch.cat((x1, x2), dim=-1)
print(x.size())
x = x.unsqueeze(1)
print(x.size())
x = self.backbone1(x)
print(x.size())
x = torch.flatten(x, start_dim=1, end_dim=-1)
x = torch.cat([x, aux1, aux2, aux3], dim=-1)
shelf_life_clf = self.classifier(x)
shelf_life_reg = self.shelf_life_reg(x)
thickness_reg = self.thickness_reg(x)
return (shelf_life_clf,
shelf_life_reg,
thickness_reg)
model = MyModel()
optimizer = optim.Adam(model.parameters(), lr=0.003)
criterion1 = nn.CrossEntropyLoss()
criterion2 = nn.MSELoss()
criterion3 = nn.MSELoss()
# In[21]:
def train(epoch):
model.train()
#exp_lr_scheduler.step()
arr_loss = []
#first_batch = next(iter(train_loader))
for batch_idx, (data, target) in enumerate(train_loader):
#amp, phase = data
clf, reg1, reg2 = target
#print(amp.shape, phase.shape)
#print(target[2].shape)
if torch.cuda.is_available():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data = [data[i].cuda() for i in range(len(data))]
target = [target[i].cuda() for i in range(len(target))]
model.to(device)
optimizer.zero_grad()
output1, output2, output3 = model(*data)
#losses
loss = criterion1(output1, target[0].long())
loss1 = criterion2(output2, target[1].float())
loss2 = criterion3(output3, target[2].float())
loss = loss + loss1 + loss2
#metrices
loss.backward()
optimizer.step()
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
100. * (batch_idx + 1) / len(train_loader), loss.data))
arr_loss.append(loss.data)
return arr_loss
def averaged_accuracy(outputs, targets):
assert len(outputs) != len(targets), "number of outputs should equal the number of targets"
accuracy = []
for i in range(len(outputs)):
_, predicted = torch.max(output1.data, 1)
total += target[0].size(0)
correct += (predicted == target[0]).sum()
acc = correct / total *100
accuracy.append(acc)
return torch.mean(accuracy)
# In[22]:
optimizer = optim.Adam(model.parameters(), lr=0.00003)
criterion1 = nn.CrossEntropyLoss()
criterion2 = nn.MSELoss()
criterion3 = nn.MSELoss()
n_epochs = 10
for epoch in range(n_epochs):
train(epoch)
Can anybody provide guidance to resolve this problem?

RuntimeError: Given input size: (512x1x3). Calculated output size: (512x0x1). Output size is too small

I am working on 3D Cardiac CT images (axial/sagittal/coronal). I am using the 2D pre-trained model vgg13. But I am facing the following issues can somebody guide me by viewing my error in code.
Following are the parameters
Classifier
(
(layer): Sequential(
(0): Linear(in_features=45, out_features=5, bias=True)
)
)
torch.Size([50, 1, 62, 62])
this is a vgg.py file consisting of axial/sagittal/coronal model code using pre-trained vgg13 model. The input features are 45 and the output features are 5 with the torch.size(1,62,62)
import torch
import torch.nn as nn
from torchvision import models
__all__ = ['vggNet']
class vggNet(nn.Module):
def __init__(self, is_emr=False, mode='sum'):
super().__init__()
self.is_emr = is_emr
self.mode = mode
in_dim = 45
self.axial_model = models.vgg13(pretrained=True)
out_channels = self.axial_model.features[0].out_channels
self.axial_model.features[0] = nn.Conv2d(1, out_channels, kernel_size=7, stride=1, padding=0, bias=False)
self.axial_model.features[3] = nn.MaxPool2d(1)
num_ftrs = self.axial_model.classifier[6].in_features
self.axial_model.classifier[6] = nn.Linear(num_ftrs, 15)
self.sa_co_model = models.vgg13(pretrained=True)
self.sa_co_model.features[0] = nn.Conv2d(1, out_channels, kernel_size=7, stride=1, padding=(3,0), bias=False)
self.sa_co_model.features[3] = nn.MaxPool2d(1)
self.sa_co_model.classifier[6] = nn.Linear(num_ftrs, 15)
if self.is_emr:
self.emr_model = EMRModel()
self.classifier = Classifier(in_dim)
print(self.classifier)
def forward(self, axial, sagittal, coronal, emr):
axial = axial[:,:,:-3,:-3]
sagittal = sagittal[:,:,:,:-3]
coronal = coronal[:,:,:,:-3]
print(axial.shape)
axial_feature = self.axial_model(axial)
sagittal_feature = self.sa_co_model(sagittal)
coronal_feature = self.sa_co_model(coronal)
out = torch.cat([axial_feature, sagittal_feature, coronal_feature], dim=1)
if self.is_emr:
emr_feature = self.emr_model(emr)
out = self.classifier(out)
out += emr_feature
return out
class EMRModel(nn.Module):
def __init__(self):
super().__init__()
self.layer = nn.Sequential(
nn.Linear(7, 256),
nn.BatchNorm1d(256),
nn.LeakyReLU(negative_slope=0.2),
nn.Dropout(p=0.2, inplace=True),
nn.Linear(256, 256),
nn.BatchNorm1d(256),
nn.LeakyReLU(negative_slope=0.2),
nn.Dropout(p=0.2, inplace=True),
nn.Linear(256, 5),
)
def forward(self, x):
return self.layer(x)
class Classifier(nn.Module):
def __init__(self, in_dim):
super().__init__()
self.layer = nn.Sequential(
nn.Linear(in_dim, 5)
)
def forward(self, x):
return self.layer(x)
class ConvBN(nn.Module):
def __init__(self, in_dim, out_dim, **kwargs):
super().__init__()
self.layer = nn.Sequential(
nn.Conv2d(in_dim, out_dim, bias=False, **kwargs),
nn.BatchNorm2d(out_dim),
nn.LeakyReLU(negative_slope=0.2))
def forward(self, x):
return self.layer(x)
if __name__ == "__main__":
images = torch.randn(2,1,65,65)
model = vgg()
out = model(images,images,images)
model = models.vgg13(pretrained=True)
for k, v in model.state_dict().items():
print(k)
this is ct-pretrained file that take heart_patches of size 512*512 and trained the model to calculate the confusion matrix and accuracy of the model using 5 targets
import os
import sys
import random
import time
import argparse
import numpy as np
from PIL import Image
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import torch
import torch.nn.functional as F
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.data import DataLoader
from torchvision import transforms
import models
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-m', type=str, default='vggNet')
parser.add_argument('-r', type=int, default=1)
parser.add_argument('-g', type=int, default=5)
parser.add_argument('-k', type=int, default=5)
parser.add_argument('-b', type=int, default=50)
parser.add_argument('-e', type=int, default=1000)
parser.add_argument('-l', type=float, default=0.0001)
parser.add_argument('-i', type=int, default=50)
return parser.parse_args()
def build_model():
if name == 'resnet':
return models.ResNet(is_emr=is_emr)
elif name == 'hardnet':
return models.HardNet(is_emr=is_emr)
elif name == 'densenet':
return models.DenseNet(is_emr=is_emr)
elif name == 'vggNet':
return models.vggNet(is_emr=is_emr)
def train(model, loader, optimizer):
model.train()
running_loss = 0
running_metric = 0
for batch_idx, (axial, sagittal, coronal, emr, target) in enumerate(loader):
axial, sagittal, coronal, emr, target = axial.cuda(), sagittal.cuda(), coronal.cuda(), emr.cuda(), target.cuda()
optimizer.zero_grad()
output = model(axial, sagittal, coronal, emr)
loss = loss_fn(output, target)
loss.backward()
optimizer.step()
pred = output.argmax(dim=1)
running_loss += loss.item()
running_metric += metric_fn(pred, target)
running_loss /= (batch_idx+1)
running_metric /= (batch_idx+1)
return running_loss, running_metric
def validate(model, loader):
model.eval()
running_loss = 0
running_metric = 0
with torch.no_grad():
for batch_idx, (axial, sagittal, coronal, emr, target) in enumerate(loader):
axial, sagittal, coronal, emr, target = axial.cuda(), sagittal.cuda(), coronal.cuda(), emr.cuda(), target.cuda()
output = model(axial, sagittal, coronal, emr)
loss = loss_fn(output, target)
pred = output.argmax(dim=1)
running_loss += loss.item()
running_metric += metric_fn(pred, target)
running_loss /= (batch_idx+1)
running_metric /= (batch_idx+1)
return running_loss, running_metric
def test(model, loader):
model.eval()
outputs = []
targets = []
with torch.no_grad():
for batch_idx, (axial, sagittal, coronal, emr, target) in enumerate(loader):
axial, sagittal, coronal, emr = axial.cuda(), sagittal.cuda(), coronal.cuda(), emr.cuda()
output = model(axial, sagittal, coronal, emr)
pred = output.argmax(dim=1).cpu()
outputs = torch.cat([outputs, pred], dim=0) if batch_idx else pred
targets = torch.cat([targets, target], dim=0) if batch_idx else target
# Confusion matrix whose i-th row and j-th column entry indicates the number of samples with true label being i-th class and predicted label being j-th class.
cm = confusion_matrix(targets, outputs)
num_class = 5
sum_acc = 0
sum_f1 = 0
txt = ''
for i in range(num_class):
tp = 0
tn = 0
fp = 0
fn = 0
tp = cm[i,i]
fn = cm[i,:].sum() - tp
for j in range(num_class):
if i == j:
fp = cm[:,j].sum() - tp
else:
tn += cm[:,j].sum() - cm[i,j]
txt += f'{class_name[i]} tp:{tp}, tn:{tn}, fp:{fp}, fn:{fn}\n'
acc = (tp+tn)/(tp+tn+fp+fn)
f1 = 2*tp/(2*tp+fp+fn)
sum_acc += acc
sum_f1 += f1
with open(f'{out_path}/results.txt', 'a') as f:
f.write(f'{str(cm)}\n')
f.write(txt)
f.write(f'Accuracy: {sum_acc/num_class:.4f}\n')
f.write(f'F-1 score: {sum_f1/num_class:.4f}\n')
def loss_fn(output, target):
loss = F.cross_entropy(output, target)
return loss
def metric_fn(output, target):
num_data = output.size(0)
target = target.view_as(output)
correct = output.eq(target).sum().item()
return correct / num_data
args = parse_args()
name = args.m
gpu_num = str(args.g)
is_emr = bool(args.r)
batch_size = args.b
num_epochs = args.e
min_epoch = 1
interval = args.i
lr = args.l
k_fold = args.k
in_path = f'/data/knight/data/image/heart/patches'
out_path = f'/data/results/results_heart/{name}_9'
os.makedirs(out_path, exist_ok=True)
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = gpu_num
#SEED = 42
#os.environ["PYTHONHASHSEED"] = str(SEED)
#random.seed(SEED)
#np.random.seed(SEED)
#
#torch.manual_seed(SEED)
#torch.cuda.manual_seed(SEED)
#torch.backends.cudnn.deterministic = True
#torch.backends.cudnn.benchmark = False
class_name = ['0_BG', '1_LM', '2_CX', '3_LAD', '4_RCA']
train_data_selection = np.array([[2, 3, 5], [1, 3, 4], [2, 4, 5], [1, 2, 5], [1, 3, 4]])
val_data_selection = np.array([[4], [5], [1], [3], [2]])
test_data_selection = np.array([[1], [2], [3], [4], [5]])
dataset = np.array([f'{in_path}/fold_{i+1}' for i in range(k_fold)])
transform = transforms.Compose([
transforms.ToTensor()])
for k in range(k_fold):
train_data = models.CTDataset(dataset[train_data_selection[k]-1], transform=transform)
val_data = models.CTDataset(dataset[val_data_selection[k]-1], transform=transform)
test_data = models.CTDataset(dataset[test_data_selection[k]-1], transform=transform)
if is_emr:
mean_df, stddev_df = train_data.get_emr_mean_stddev()
train_data.emr_normalize(mean_df, stddev_df)
val_data.emr_normalize(mean_df, stddev_df)
test_data.emr_normalize(mean_df, stddev_df)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
model = build_model().cuda()
optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
#scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)
stop_epoch = 0
best_loss = sys.maxsize
for epoch in range(num_epochs):
start_time = time.time()
loss, metric = train(model, train_loader, optimizer)
val_loss, val_metric = validate(model, val_loader)
scheduler.step()
if (epoch+1) >= min_epoch and (val_loss - best_loss) < 0:
stop_epoch = epoch
best_loss = val_loss
torch.save(model.state_dict(), f'{out_path}/checkpoint_{k+1}.pth')
if (epoch+1) % 1 == 0:
print(f'Epoch: {epoch+1}/{num_epochs} - ', end='')
print(f'ACC: {val_metric:.4f} - ', end='')
print(f'Loss: {val_loss:.4f} - ', end='')
print(f'took {time.time() - start_time:.2f}s')
if stop_epoch + interval < epoch:
break
model = build_model().cuda()
model.load_state_dict(torch.load(f'{out_path}/checkpoint_{k+1}.pth'))
test(model, test_loader)

Pytorch: 1D target tensor expected, multi-target not supported

I want to train a 1D CNN on time series. I get the following error message 1D target tensor expected, multi-target not supported
Here is the code with simulated data corresponding to the structures of my data as well as the error message
import torch
from torch.utils.data import DataLoader
import torch.utils.data as data
import torch.nn as nn
import numpy as np
import random
from tqdm.notebook import tqdm
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
train_dataset = []
n_item = 20
for i in range(0,n_item):
train_data = np.random.uniform(-10, 10, 500)
train_dataset.append(train_data)
train_dataset = np.asarray(train_dataset)
train_dataset.shape
ecg_train = torch.from_numpy(train_dataset).float()
labels_train = np.random.randint(2, size=n_item)
labels_train = torch.from_numpy(labels_train).long()
val_dataset = []
n_item = 10
for i in range(0,n_item):
val_data = np.random.uniform(-10, 10, 500)
val_dataset.append(val_data)
val_dataset = np.asarray(val_dataset)
val_dataset.shape
ecg_validation = torch.from_numpy(val_dataset).float()
labels_validation = np.random.randint(2, size=n_item)
labels_validation = torch.from_numpy(labels_validation).long()
class ECGNet(data.Dataset):
"""ImageNet Limited dataset."""
def __init__(self, ecgs, labls, transform=None):
self.ecg = ecgs
self.target = labls
self.transform = transform
def __getitem__(self, idx):
ecgVec = self.ecg[idx] #.reshape(10, -1)
labelID = self.target[idx].reshape(1)
return ecgVec,labelID
def __len__(self):
return len(self.ecg)
train_data = ECGNet(ecg_train,
labels_train,
)
print("size of Training dataset: {}".format(len(train_data)))
validation_data = ECGNet(ecg_validation,
labels_validation,
)
print("size of Training dataset: {}".format(len(validation_data)))
batch_size = 1
train_dataloader = DataLoader(dataset = train_data,
batch_size=batch_size,
shuffle = True,
num_workers = 0)
val_dataloader = DataLoader(dataset = validation_data,
batch_size=batch_size,
shuffle = True,
num_workers = 0)
def train_epoch(model, train_dataloader, optimizer, loss_fn):
losses = []
correct_predictions = 0
# Iterate mini batches over training dataset
for images, labels in tqdm(train_dataloader):
images = images.to(device)
#labels = labels.squeeze_()
labels = labels.to(device)
#labels = labels.to(device=device, dtype=torch.int64)
# Run predictions
output = model(images)
# Set gradients to zero
optimizer.zero_grad()
# Compute loss
loss = loss_fn(output, labels)
# Backpropagate (compute gradients)
loss.backward()
# Make an optimization step (update parameters)
optimizer.step()
# Log metrics
losses.append(loss.item())
predicted_labels = output.argmax(dim=1)
correct_predictions += (predicted_labels == labels).sum().item()
accuracy = 100.0 * correct_predictions / len(train_dataloader.dataset)
# Return loss values for each iteration and accuracy
mean_loss = np.array(losses).mean()
return mean_loss, accuracy
def evaluate(model, dataloader, loss_fn):
losses = []
correct_predictions = 0
with torch.no_grad():
for images, labels in dataloader:
images = images.to(device)
#labels = labels.squeeze_()
labels = labels.to(device=device, dtype=torch.int64)
# Run predictions
output = model(images)
# Compute loss
loss = loss_fn(output, labels)
# Save metrics
predicted_labels = output.argmax(dim=1)
correct_predictions += (predicted_labels == labels).sum().item()
losses.append(loss.item())
mean_loss = np.array(losses).mean()
accuracy = 100.0 * correct_predictions / len(dataloader.dataset)
# Return mean loss and accuracy
return mean_loss, accuracy
def train(model, train_dataloader, val_dataloader, optimizer, n_epochs, loss_function):
# We will monitor loss functions as the training progresses
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []
for epoch in range(n_epochs):
model.train()
train_loss, train_accuracy = train_epoch(model, train_dataloader, optimizer, loss_fn)
model.eval()
val_loss, val_accuracy = evaluate(model, val_dataloader, loss_fn)
train_losses.append(train_loss)
val_losses.append(val_loss)
train_accuracies.append(train_accuracy)
val_accuracies.append(val_accuracy)
print('Epoch {}/{}: train_loss: {:.4f}, train_accuracy: {:.4f}, val_loss: {:.4f}, val_accuracy: {:.4f}'.format(epoch+1, n_epochs,
train_losses[-1],
train_accuracies[-1],
val_losses[-1],
val_accuracies[-1]))
return train_losses, val_losses, train_accuracies, val_accuracies
class Simple1DCNN(torch.nn.Module):
def __init__(self):
super(Simple1DCNN, self).__init__()
self.layer1 = torch.nn.Conv1d(in_channels=50,
out_channels=20,
kernel_size=5,
stride=2)
self.act1 = torch.nn.ReLU()
self.layer2 = torch.nn.Conv1d(in_channels=20,
out_channels=10,
kernel_size=1)
self.fc1 = nn.Linear(10* 3, 2)
def forward(self, x):
print(x.shape)
x = x.view(1, 50,-1)
print(x.shape)
x = self.layer1(x)
print(x.shape)
x = self.act1(x)
print(x.shape)
x = self.layer2(x)
print(x.shape)
x = x.view(1,-1)
print(x.shape)
x = self.fc1(x)
print(x.shape)
print(x)
return x
model_a = Simple1DCNN()
model_a = model_a.to(device)
criterion = nn.CrossEntropyLoss()
loss_fn = torch.nn.CrossEntropyLoss()
n_epochs_a = 50
learning_rate_a = 0.01
alpha_a = 1e-5
momentum_a = 0.9
optimizer = torch.optim.SGD(model_a.parameters(),
momentum = momentum_a,
nesterov = True,
weight_decay = alpha_a,
lr=learning_rate_a)
train_losses_a, val_losses_a, train_acc_a, val_acc_a = train(model_a,
train_dataloader,
val_dataloader,
optimizer,
n_epochs_a,
loss_fn
)
Error message:
cpu
size of Training dataset: 20
size of Training dataset: 10
0%| | 0/20 [00:00<?, ?it/s]
torch.Size([1, 500])
torch.Size([1, 50, 10])
torch.Size([1, 20, 3])
torch.Size([1, 20, 3])
torch.Size([1, 10, 3])
torch.Size([1, 30])
torch.Size([1, 2])
tensor([[ 0.5785, -1.0169]], grad_fn=<AddmmBackward>)
Traceback (most recent call last):
File "SO_question.py", line 219, in <module>
train_losses_a, val_losses_a, train_acc_a, val_acc_a = train(model_a,
File "SO_question.py", line 137, in train
train_loss, train_accuracy = train_epoch(model, train_dataloader, optimizer, loss_fn)
File "SO_question.py", line 93, in train_epoch
loss = loss_fn(output, labels)
File "/Users/mymac/Documents/programming/python/mainenv/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/Users/mymac/Documents/programming/python/mainenv/lib/python3.8/site-packages/torch/nn/modules/loss.py", line 961, in forward
return F.cross_entropy(input, target, weight=self.weight,
File "/Users/mymac/Documents/programming/python/mainenv/lib/python3.8/site-packages/torch/nn/functional.py", line 2468, in cross_entropy
return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
File "/Users/mymac/Documents/programming/python/mainenv/lib/python3.8/site-packages/torch/nn/functional.py", line 2264, in nll_loss
ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
RuntimeError: 1D target tensor expected, multi-target not supported
What am I doing wrong?

You are using nn.CrossEntropyLoss as the criterion for your training. You correctly passed the labels as indices of the ground truth class: 0s and 1s. However, as the error message suggests, it needs to be a 1D tensor!
Simply remove the reshape in ECGNet's __getitem__:
def __getitem__(self, idx):
ecgVec = self.ecg[idx]
labelID = self.target[idx]
return ecgVec,labelID
Edit
I want to increase the batch_size to 8. But now I get the error [...]
You are doing a lot of broadcasting (flattening) which surely will affect the batch size. As a general rule of thumb never fiddle with axis=0. For instance, if you have an input shape of (8, 500), straight off you have a problem when doing x.view(1, 50, -1). Since the resulting tensor will be (1, 50, 80) (the desired shape would have been (8, 50, 10)). Instead, you could broadcast with x.view(x.size(0), 50, -1).
Same with x.view(1, -1) later down forward. You are looking to flatten the tensor, but you should not flatten it along with the batches, they need to stay separated! It's safer to use torch.flatten, yet I prefer nn.Flatten which flattens from axis=1 to axis=-1 by default.
My personal advice is to start with a simple setup (without train loops etc...) to verify the architecture and intermediate output shapes. Then, add the necessary logic to handle the training.
class ECGNet(data.Dataset):
"""ImageNet Limited dataset."""
def __init__(self, ecgs, labls, transform=None):
self.ecg = ecgs
self.target = labls
self.transform = transform
def __getitem__(self, idx):
ecgVec = self.ecg[idx]
labelID = self.target[idx]
return ecgVec, labelID
def __len__(self):
return len(self.ecg)
class Simple1DCNN(nn.Module):
def __init__(self):
super(Simple1DCNN, self).__init__()
self.layer1 = nn.Conv1d(in_channels=50,
out_channels=20,
kernel_size=5,
stride=2)
self.act1 = nn.ReLU()
self.layer2 = nn.Conv1d(in_channels=20,
out_channels=10,
kernel_size=1)
self.fc1 = nn.Linear(10*3, 2)
self.flatten = nn.Flatten()
def forward(self, x):
x = x.view(x.size(0), 50, -1)
x = self.layer1(x)
x = self.act1(x)
x = self.layer2(x)
x = self.flatten(x)
x = self.fc1(x)
return x
batch_size = 8
train_data = ECGNet(ecg_train, labels_train)
train_dl = DataLoader(dataset=train_data,
batch_size=batch_size,
shuffle=True,
num_workers=0)
model = Simple1DCNN()
criterion = nn.CrossEntropyLoss()
Then
>>> x, y = next(iter(train_dl))
>>> y_hat = model(x)
>>> y_hat.shape
torch.Size([8, 2])
Also, make sure your loss works:
>>> criterion(y_hat, y)
tensor(..., grad_fn=<NllLossBackward>)

Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same

I'm trying to implement ResNet18 on pyTorch but I'm having some troubles with it. My code is this:
device = torch.device("cuda:0")
class ResnetBlock(nn.Module):
def __init__(self, strides, nf, nf0, reps, bn):
super(ResnetBlock, self).__init__()
self.adapt = strides == 2
self.layers = []
self.relus = []
self.adapt_layer = nn.Conv2d(nf0, nf, kernel_size=1, stride=strides, padding=0) if self.adapt else None
for i in range(reps):
self.layers.append(nn.Sequential(
nn.Conv2d(nf0, nf, kernel_size=3, stride=strides, padding=1),
nn.BatchNorm2d(nf, eps=0.001, momentum=0.99),
nn.ReLU(),
nn.Conv2d(nf, nf, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(nf, eps=0.001, momentum=0.99)))
self.relus.append(nn.ReLU())
strides = 1
nf0 = nf
def forward(self, x):
for i, (layer, relu) in enumerate(zip(self.layers, self.relus)):
rama = layer(x)
if self.adapt and i == 0:
x = self.adapt_layer(x)
x = x + rama
x = relu(x)
return x
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
nn.MaxPool2d(kernel_size=2, stride=2))
self.blocks = nn.Sequential(
ResnetBlock(1, 64, 64, 2, bn),
ResnetBlock(2, 128, 64, 2, bn),
ResnetBlock(2, 256, 128, 2, bn),
ResnetBlock(2, 512, 256, 2, bn))
self.fcout = nn.Linear(512, 10)
def forward(self, x):
out = self.layer1(x)
out = self.blocks(out)
out = out.reshape(out.size(0), -1)
out = self.fcout(out)
return out
num_epochs = 50
num_classes = 10
batch_size = 50
learning_rate = 0.00001
trans = transforms.ToTensor()
train_dataset = torchvision.datasets.CIFAR10(root="./dataset_pytorch", train=True, download=True, transform=trans)
test_dataset = torchvision.datasets.CIFAR10(root="./dataset_pytorch", train=False, download=True, transform=trans)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)
def weights_init(m):
if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight.data)
nn.init.zeros_(m.bias.data)
model = ConvNet()
model.apply(weights_init)
model.to(device)
summary(model, (3,32,32))
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, eps=1e-6)
# Train the model
total_step = len(train_loader)
loss_list = []
acc_list = []
acc_list_test = []
for epoch in range(num_epochs):
total = 0
correct = 0
for i, (images, labels) in enumerate(train_loader):
images = images.to(device)
labels = labels.to(device)
optimizer.zero_grad()
# Run the forward pass
outputs = model(images)
loss = criterion(outputs, labels)
loss_list.append(loss.item())
# Backprop and perform Adam optimisation
loss.backward()
optimizer.step()
# Track the accuracy
total += labels.size(0)
_, predicted = torch.max(outputs.data, 1)
correct += (predicted == labels).sum().item()
acc_list.append(correct / total)
print("Train")
print('Epoch [{}/{}], Accuracy: {:.2f}%'
.format(epoch + 1, num_epochs, (correct / total) * 100))
total_test = 0
correct_test = 0
for i, (images, labels) in enumerate(test_loader):
images = images.to(device)
labels = labels.to(device)
# Run the forward pass
outputs = model(images)
# Track the accuracy
total_test += labels.size(0)
_, predicted = torch.max(outputs.data, 1)
correct_test += (predicted == labels).sum().item()
acc_list_test.append(correct_test / total_test)
print("Test")
print('Epoch [{}/{}], Accuracy: {:.2f}%'
.format(epoch + 1, num_epochs, (correct_test / total_test) * 100))
It's weird because it's throwing me that error Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same even though I've moved both the model and the data to cuda.
I guess it's related with how I defined or used "ResnetBlock", because if I remove from ConvNet those blocks (removing the line out = self.blocks(out)), the code works. But I don't know what I'm doing wrong.

The problem is in this line:
model.to(device)
to is not in-place. It returns the converted model. You need to change it to:
model = model.to(device)
EDIT: Another problem: vanilla list cannot be tracked by PyTorch. You need to use nn.ModuleList.
From
self.layers = []
self.relus = []
To
self.layers = nn.ModuleList()
self.relus = nn.ModuleList()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Why do saved pytorch models retrain after loading? - python

Related

Pythorch LSTM timeseries prediction hidden[0] size match error

Using a target size (torch.Size([400])) that is different to the input size (torch.Size([400, 1]))

RuntimeError: Given input size: (512x1x3). Calculated output size: (512x0x1). Output size is too small

Pytorch: 1D target tensor expected, multi-target not supported

Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same

Categories

Resources