i have been working on a code to train and test the images dataset. But i am getting this error at every instance of output = model(images)
class ConvNeuralNet(nn.Module):
# Determine what layers and their order in CNN object
def __init__(self, num_classes):
super(ConvNeuralNet, self).__init__()
self.conv_layer1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3)
self.conv_layer2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3)
self.max_pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2)
self.conv_layer3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
self.conv_layer4 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3)
self.max_pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2)
self.fc1 = nn.Linear(1600, 128)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(128, num_classes)
for i, (images, labels) in enumerate(train_loader):
# Move tensors to the configured device
device = torch.device('cpu')
images = images.to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _forward_unimplemented(self, *input)
199 registered hooks while the latter silently ignores them.
200 """
--> 201 raise NotImplementedError
202
203
NotImplementedError:
I have checked that there is no indentation error so i don't understand what's wrong here.
When you subclass nn.Module, you need to implement a forward() method.
Here's an update to your ConvNeuralNet class:
class ConvNeuralNet(nn.Module):
# Determine what layers and their order in CNN object
def __init__(self, num_classes):
super(ConvNeuralNet, self).__init__()
self.conv_layer1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3)
self.conv_layer2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3)
self.max_pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2)
self.conv_layer3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
self.conv_layer4 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3)
self.max_pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2)
self.fc1 = nn.Linear(1600, 128)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(128, num_classes)
## UPDATE: Implement forward() method
def forward(self, x):
# First three layers (from above)
x = self.conv_layer1(x)
x = self.conv_layer2(x)
x = self.max_pool1(x)
# Next three layers
x = self.conv_layer3(x)
x = self.conv_layer4(x)
x = self.max_pool2(x)
# Final three layers
x = self.fc1(x)
x = self.relu1(x)
x = self.fc2(x)
return x
Notice how x (the input data) moves through each layer you defined and is eventually returned.
From the documentation for nn.Module:
forward(*input)
Defines the computation performed at every call.
Should be overridden by all subclasses.
Related
I am trying to use AlexNet to classify spectrogram images generated for 3s audio segments. I am aware that the input image to AlexNet must be 224x224 and have transformed the train and test datasets accordingly. I am encountering the following error: mat1 and mat2 shapes cannot be multiplied (256x65536 and 1024x4096 - see link for full error message) and I am not entirely sure why. Can anyone help me figure out where I am going wrong?
https://i.stack.imgur.com/cnVKp.png
Transform data
data_transform_train = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(256),
transforms.ToTensor(),
transforms.Normalize(norm_mean_train, norm_std_train),
])
Create dataloaders
train_size = int(len(train_data_df))
test_size = int(len(test_data_df))
ins_dataset_train = Audio(
df=train_data_df[:train_size],
transform=data_transform_train,
)
ins_dataset_test = Audio(
df=test_data_df[:test_size],
transform=data_transform_test,
)
train_loader = torch.utils.data.DataLoader(
ins_dataset_train,
batch_size=256,
shuffle=True
)
test_loader = torch.utils.data.DataLoader(
ins_dataset_test,
batch_size=256,
shuffle=True
)
AlexNet Model
class AlexNet(nn.Module):
def __init__(self, output_dim):
super().__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, 3, 2, 1), # in_channels, out_channels, kernel_size, stride, padding
nn.MaxPool2d(2), # kernel_size
nn.ReLU(inplace=True),
nn.Conv2d(64, 192, 3, padding=1),
nn.MaxPool2d(2),
nn.ReLU(inplace=True),
nn.Conv2d(192, 384, 3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, 3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, 3, padding=1),
nn.MaxPool2d(2),
nn.ReLU(inplace=True)
)
self.classifier = nn.Sequential(
nn.Dropout(0.5),
nn.Linear(256 * 2 * 2, 4096),
nn.ReLU(inplace=True),
nn.Dropout(0.5),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, output_dim),
)
def forward(self, x):
x = self.features(x)
h = x.view(x.shape[0], -1)
x = self.classifier(h)
return x, h
output_dim = 2
model = AlexNet(output_dim)
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'The model has {count_parameters(model):,} trainable parameters')
def initialize_parameters(m):
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight.data, nonlinearity='relu')
nn.init.constant_(m.bias.data, 0)
elif isinstance(m, nn.Linear):
nn.init.xavier_normal_(m.weight.data, gain=nn.init.calculate_gain('relu'))
nn.init.constant_(m.bias.data, 0)
model.apply(initialize_parameters)
Learning Rate Finder
class LRFinder:
def __init__(self, model, optimizer, criterion, device):
self.optimizer = optimizer
self.model = model
self.criterion = criterion
self.device = device
torch.save(model.state_dict(), 'init_params.pt')
def range_test(self, iterator, end_lr=10, num_iter=100, smooth_f=0.05, diverge_th=5):
lrs = []
losses = []
best_loss = float('inf')
lr_scheduler = ExponentialLR(self.optimizer, end_lr, num_iter)
iterator = IteratorWrapper(iterator)
for iteration in range(num_iter):
loss = self._train_batch(iterator)
lrs.append(lr_scheduler.get_last_lr()[0])
lr_scheduler.step()
if iteration > 0:
loss = smooth_f * loss + (1 - smooth_f) * losses[-1]
if loss < best_loss:
best_loss = loss
losses.append(loss)
if loss > diverge_th * best_loss:
print("Stopping early, the loss has diverged")
break
model.load_state_dict(torch.load('init_params.pt'))
return lrs, losses
def _train_batch(self, iterator):
self.model.train()
self.optimizer.zero_grad()
x, y = iterator.get_batch()
x = x.to(self.device)
y = y.to(self.device)
y_pred, _ = self.model(x)
loss = self.criterion(y_pred, y)
loss.backward()
self.optimizer.step()
return loss.item()
from torch.optim.lr_scheduler import _LRScheduler
class ExponentialLR(_LRScheduler):
def __init__(self, optimizer, end_lr, num_iter, last_epoch=-1):
self.end_lr = end_lr
self.num_iter = num_iter
super(ExponentialLR, self).__init__(optimizer, last_epoch)
def get_lr(self):
curr_iter = self.last_epoch
r = curr_iter / self.num_iter
return [base_lr * (self.end_lr / base_lr) ** r
for base_lr in self.base_lrs]
class IteratorWrapper:
def __init__(self, iterator):
self.iterator = iterator
self._iterator = iter(iterator)
def __next__(self):
try:
inputs, labels = next(self._iterator)
except StopIteration:
self._iterator = iter(self.iterator)
inputs, labels, *_ = next(self._iterator)
return inputs, labels
def get_batch(self):
return next(self)
start_learning_rate = 1e-7
optimizer = optim.Adam(model.parameters(), lr=start_learning_rate)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
criterion = nn.CrossEntropyLoss()
model = model.to(device)
criterion = criterion.to(device)
END_LR = 10
NUM_ITER = 100[![enter image description here][1]][1]
lr_finder = LRFinder(model, optimizer, criterion, device)
lrs, losses = lr_finder.range_test(train_loader, END_LR, NUM_ITER)
This is my attention layer code :
implementation of attention layer
**class Attention(nn.Module):
def __init__(self, feature_dim, step_dim, bias=True, **kwargs):
super(Attention, self).__init__(**kwargs)
self.supports_masking = True
self.bias = bias
self.feature_dim = feature_dim
self.step_dim = step_dim
self.features_dim = 0
weight = torch.zeros(feature_dim, 1)
nn.init.kaiming_uniform_(weight)
self.weight = nn.Parameter(weight)
if bias:
self.b = nn.Parameter(torch.zeros(step_dim))
def forward(self, x, mask=None):
feature_dim = self.feature_dim
step_dim = self.step_dim
eij = torch.mm(
x.contiguous().view(-1, feature_dim),
self.weight
).view(-1, step_dim)
if self.bias:
eij = eij + self.b
eij = torch.tanh(eij)
a = torch.exp(eij)
if mask is not None:
a = a * mask
a = a / (torch.sum(a, 1, keepdim=True) + 1e-10)
weighted_input = x * torch.unsqueeze(a, -1)
return torch.sum(weighted_input, 1)**
This is RNN codes :
**# Instantiate the model w/ hyperparams
weights_matrix = weights_matrix
output_size = 13 # number of classes to predict
hidden_dim = 64
drop_prob = 0.5
# The RNN model that will be used to perform classification
class AttentionLSTM(nn.Module):
def __init__(self, weights_matrix, output_size, hidden_dim, drop_prob):
super(AttentionLSTM, self).__init__()
# embedding layers
self.embedding, self.num_embeddings, self.embeddings_size = create_emb_layer(weights_matrix, True)
# embedding dropout
self.dropout = nn.Dropout2d(drop_prob)
# First lstm and GRU layers
self.lstm1 = nn.LSTM(self.embeddings_size, hidden_dim, batch_first=True, bidirectional=True)
self.gru1 = nn.GRU(hidden_dim * 2, hidden_dim, bidirectional=True, batch_first=True)
# attention layer
self.attention = Attention(hidden_dim*2, seq_length)
# Second lstm and GRU layers
self.lstm2 = nn.LSTM(hidden_dim * 2, hidden_dim, batch_first=True, bidirectional=True)
self.gru2 = nn.GRU(hidden_dim * 2, hidden_dim, bidirectional=True, batch_first=True)
# linear
self.fc = nn.Linear(hidden_dim * 2, hidden_dim * 2)
self.out = nn.Linear(hidden_dim * 2, output_size)
# activation functions
self.sigmoid = nn.Sigmoid() # for hidden layers
self.softmax = nn.Softmax(dim=1) # for output layer
def forward(self, x):
batch_size = x.size(0)
# embedding output
x = x.long()
embeds = self.embedding(x)
embeds = torch.squeeze(torch.unsqueeze(embeds, 0))
# lstm, and gru outputs
lstm_out1, _ = self.lstm1(embeds)
gru_out1, _ = self.gru1(lstm_out1)
gru_out1 = gru_out1.view(batch_size, -1, hidden_dim * 2)
attention_out = self.attention(gru_out1, seq_length)
attention_out = attention_out.view(batch_size, -1, hidden_dim * 2)
attention_out = self.sigmoid(attention_out)
lstm_out2, _ = self.lstm2(attention_out)
# slice lstm_out to just get output of last element of the input sequence
lstm_out2 = lstm_out2[:, -1]
gru_out2, _ = self.gru2(lstm_out2)
# linear outputs
fc_out = self.softmax(self.fc(gru_out2))
final_out = self.out(fc_out)
return final_out**
I am sure that my dataset is balanced after pre-processing step but my model always predict the same output. Precision and fscore are changing for each input, however, this problem makes my recall score 1.0 since output is always same whatever input is.
If anybody help me, i will be appreciated
It required some time to build networks from your requirements but I provided a few samples to create a customer layer or model, you start from an embedded layer and suddenly random leaves of data create different input every time GRU and LSTM learning layers may provide good results when they had :
Matching input and target layer and parameters.
Learning scopes when they can differentiate input, repeating of gated current, and LSTM is specifically used when patterns of data are
significant such as pictures or continue data.
Linear, and Sigmoid provide contrast differentiate and softmax sometime we required when compared based on distribution values. This
is supposed to create contrast output excepted softmax applied on
weights of values.
Loss Fn is based on a similar output dimension/expectation
[ Sample ]:
class create_emb_layer( tf.keras.layers.Embedding ):
def __init__( self, weights_matrix, bidirectional=True ):
self.num_embeddings = weights_matrix[0]
self.embeddings_size = weights_matrix[1]
self.bidirectional = bidirectional
super(create_emb_layer, self).__init__( self.embeddings_size, self.num_embeddings )
def build(self, input_shape):
self.kernel = self.add_weight("kernel",
shape=[int(input_shape[-1]),
self.input_dim])
def call(self, inputs):
return tf.matmul(inputs, self.kernel)
[ My model ]:
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=( 32, 32, 4 )),
tf.keras.layers.Normalization(mean=3., variance=2.),
tf.keras.layers.Normalization(mean=4., variance=6.),
tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Reshape((128, 225)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96, return_sequences=True, return_state=False)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(192, activation='relu'),
tf.keras.layers.Dense(10),
])
[ Output ]:
I have a model which looks as follows:
IMG_WIDTH = IMG_HEIGHT = 224
class AlexNet(nn.Module):
def __init__(self, output_dim):
super(AlexNet, self).__init__()
self._to_linear = None
self.x = torch.randn(3, IMG_WIDTH, IMG_HEIGHT).view(-1, 3, IMG_WIDTH, IMG_HEIGHT)
self.features = nn.Sequential(
nn.Conv2d(3, 64, 3, 2, 1), # in_channels, out_channels, kernel_size, stride, padding
nn.MaxPool2d(2),
nn.ReLU(inplace=True),
nn.Conv2d(64, 192, 3, padding=1),
nn.MaxPool2d(2),
nn.ReLU(inplace=True),
nn.Conv2d(192, 384, 3, padding=1),
nn.MaxPool2d(2),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, 3, padding=1),
nn.MaxPool2d(2),
nn.ReLU(inplace=True),
nn.Conv2d(256, 512, 3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 256, 3, padding=1),
nn.MaxPool2d(2),
nn.ReLU(inplace=True)
)
self.conv(self.x)
self.classifier = nn.Sequential(
nn.Dropout(.5),
nn.Linear(self._to_linear, 4096),
nn.ReLU(inplace=True),
nn.Dropout(.5),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, output_dim),
)
def conv(self, x):
x = self.features(x)
if self._to_linear is None:
self._to_linear = x.shape[1] * x.shape[2] * x.shape[3]
return x
def forward(self, x):
x = self.conv(x)
h = x.view(x.shape[0], -1)
x = self.classifier(h)
return x, h
Here is my optimizer and loss functions:
optimizer = torch.optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss().to(device)
Here is my train and evaluate functions:
def train(model, iterator, optimizer, criterion, device):
epoch_loss, epoch_acc = 0, 0
model.train()
for (x, y) in iterator:
# features and labels to the device
x = x.to(device)
y = y.to(device).long()
# Zero the gradients
optimizer.zero_grad()
y_pred, _ = model(x)
# Calculate the loss and accuracy
loss = criterion(y_pred.squeeze(), y)
acc = binary_accuracy(y_pred, y)
# Backward propagate
loss.backward()
# Update the weights
optimizer.step()
epoch_loss +=loss.item()
epoch_acc += acc.item()
return epoch_loss/len(iterator), epoch_acc/len(iterator)
def evaluate(model, iterator, criterion, device):
epoch_loss, epoch_acc = 0, 0
model.eval()
with torch.no_grad():
for (x, y) in iterator:
x = x.to(device)
y = y.to(device).long()
y_pred, _ = model(x)
loss = criterion(y_pred, y)
acc = binary_accuracy(y_pred, y)
epoch_loss += loss.item()
epoch_acc += acc.item()
return epoch_loss/len(iterator), epoch_acc/len(iterator)
This is the error that I'm getting:
RuntimeError: result type Float can't be cast to the desired output type Long
What may be possibly my problem because I have tried to convert my labels to long tensors as follows:
y = y.to(device).long()
But it seems not to work.
I was getting the same error doing this:
loss_fn(output, target)
where the output was Tensor torch.float32 and target was Tensor torch.int64. What solved this problem was calling the loss function like this:
loss_fn(output, target.float())
I encountered this error while using a library (Huggingface). In that case you do not have access to the code that computes the loss. You do not convert the data type of your labels that you pass to the library. What worked for me was:
labels = labels.astype(np.float32).tolist()
I'm trying to implement ResNet18 on pyTorch but I'm having some troubles with it. My code is this:
device = torch.device("cuda:0")
class ResnetBlock(nn.Module):
def __init__(self, strides, nf, nf0, reps, bn):
super(ResnetBlock, self).__init__()
self.adapt = strides == 2
self.layers = []
self.relus = []
self.adapt_layer = nn.Conv2d(nf0, nf, kernel_size=1, stride=strides, padding=0) if self.adapt else None
for i in range(reps):
self.layers.append(nn.Sequential(
nn.Conv2d(nf0, nf, kernel_size=3, stride=strides, padding=1),
nn.BatchNorm2d(nf, eps=0.001, momentum=0.99),
nn.ReLU(),
nn.Conv2d(nf, nf, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(nf, eps=0.001, momentum=0.99)))
self.relus.append(nn.ReLU())
strides = 1
nf0 = nf
def forward(self, x):
for i, (layer, relu) in enumerate(zip(self.layers, self.relus)):
rama = layer(x)
if self.adapt and i == 0:
x = self.adapt_layer(x)
x = x + rama
x = relu(x)
return x
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
nn.MaxPool2d(kernel_size=2, stride=2))
self.blocks = nn.Sequential(
ResnetBlock(1, 64, 64, 2, bn),
ResnetBlock(2, 128, 64, 2, bn),
ResnetBlock(2, 256, 128, 2, bn),
ResnetBlock(2, 512, 256, 2, bn))
self.fcout = nn.Linear(512, 10)
def forward(self, x):
out = self.layer1(x)
out = self.blocks(out)
out = out.reshape(out.size(0), -1)
out = self.fcout(out)
return out
num_epochs = 50
num_classes = 10
batch_size = 50
learning_rate = 0.00001
trans = transforms.ToTensor()
train_dataset = torchvision.datasets.CIFAR10(root="./dataset_pytorch", train=True, download=True, transform=trans)
test_dataset = torchvision.datasets.CIFAR10(root="./dataset_pytorch", train=False, download=True, transform=trans)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)
def weights_init(m):
if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight.data)
nn.init.zeros_(m.bias.data)
model = ConvNet()
model.apply(weights_init)
model.to(device)
summary(model, (3,32,32))
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, eps=1e-6)
# Train the model
total_step = len(train_loader)
loss_list = []
acc_list = []
acc_list_test = []
for epoch in range(num_epochs):
total = 0
correct = 0
for i, (images, labels) in enumerate(train_loader):
images = images.to(device)
labels = labels.to(device)
optimizer.zero_grad()
# Run the forward pass
outputs = model(images)
loss = criterion(outputs, labels)
loss_list.append(loss.item())
# Backprop and perform Adam optimisation
loss.backward()
optimizer.step()
# Track the accuracy
total += labels.size(0)
_, predicted = torch.max(outputs.data, 1)
correct += (predicted == labels).sum().item()
acc_list.append(correct / total)
print("Train")
print('Epoch [{}/{}], Accuracy: {:.2f}%'
.format(epoch + 1, num_epochs, (correct / total) * 100))
total_test = 0
correct_test = 0
for i, (images, labels) in enumerate(test_loader):
images = images.to(device)
labels = labels.to(device)
# Run the forward pass
outputs = model(images)
# Track the accuracy
total_test += labels.size(0)
_, predicted = torch.max(outputs.data, 1)
correct_test += (predicted == labels).sum().item()
acc_list_test.append(correct_test / total_test)
print("Test")
print('Epoch [{}/{}], Accuracy: {:.2f}%'
.format(epoch + 1, num_epochs, (correct_test / total_test) * 100))
It's weird because it's throwing me that error Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same even though I've moved both the model and the data to cuda.
I guess it's related with how I defined or used "ResnetBlock", because if I remove from ConvNet those blocks (removing the line out = self.blocks(out)), the code works. But I don't know what I'm doing wrong.
The problem is in this line:
model.to(device)
to is not in-place. It returns the converted model. You need to change it to:
model = model.to(device)
EDIT: Another problem: vanilla list cannot be tracked by PyTorch. You need to use nn.ModuleList.
From
self.layers = []
self.relus = []
To
self.layers = nn.ModuleList()
self.relus = nn.ModuleList()
this is my model :
# basic LeNet5 network
class LeNet5_mode0 (nn.Module) :
# constructor
def __init__(self):
super(LeNet5_mode0, self).__init__() # call to super constructor
# define layers
# 6 # 28x28
self.conv1 = nn.Sequential(
# Lenet's first conv layer is 3x32x32, squeeze color channels into 1 and pad 2
nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 5, stride = 1, padding = 2),
nn.ReLU(),
nn.MaxPool2d(kernel_size = 2, stride = 2)
)
# 16 # 10x10
self.conv2 = nn.Sequential(
nn.Conv2d(in_channels = 6, out_channels = 16, kernel_size = 5, stride = 1, padding = 0),
nn.ReLU(),
nn.MaxPool2d(kernel_size =2, stride = 2)
)
self.fc1 = nn.Sequential(
nn.Linear(in_features = 16*5*5, out_features = 120),
nn.ReLU()
)
self.fc2 = nn.Sequential(
nn.Linear(in_features = 120, out_features = 84),
nn.ReLU()
)
self.classifier = nn.Sequential(
nn.Linear(in_features = 84,out_features = 10),
nn.Softmax(dim = 1) # dim =1 meaning do softmax on the colums of 84x10
)
# define forward function
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(-1, 16*5*5) # reshape the tensor to [-1,16*5*5]
x = self.fc1(x)
x = self.fc2(x)
x = self.classifier(x)
return x
and I train this model once with :
criterion = nn.CrossEntropyLoss() # aka, LogLoss
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[5,10,15], gamma=0.5)
and then save with with
torch.save(model.state_dict(), savepath)
and load it with
model.load_state_dict(torch.load(loadpath))
so far no problem . but when i change the optimizer a little to
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005, weight_decay = 0.0005)
and use the same save & load method
I receive the following error:
in loading state_dict for LeNet5_mode0:
Unexpected key(s) in state_dict: "conv1.1.weight", "conv1.1.bias", "conv1.1.running_mean", "conv1.1.running_var", "conv1.1.num_batches_tracked", "conv2.1.weight", "conv2.1.bias", "conv2.1.running_mean", "conv2.1.running_var", "conv2.1.num_batches_tracked".
how can it be fixed? why different optimizer have that effect on the saving of the trained network?