Hello I am building a DQN model for reinforcement learning on cartpole and want to print my model summary like keras model.summary() function
Here is my model class.
class DQN():
''' Deep Q Neural Network class. '''
def __init__(self, state_dim, action_dim, hidden_dim=64, lr=0.05):
super(DQN, self).__init__()
self.criterion = torch.nn.MSELoss()
self.model = torch.nn.Sequential(
torch.nn.Linear(state_dim, hidden_dim),
torch.nn.ReLU(),
torch.nn.Linear(hidden_dim, hidden_dim*2),
torch.nn.ReLU(),
torch.nn.Linear(hidden_dim*2, action_dim)
)
self.optimizer = torch.optim.Adam(self.model.parameters(), lr)
def update(self, state, y):
"""Update the weights of the network given a training sample. """
y_pred = self.model(torch.Tensor(state))
loss = self.criterion(y_pred, Variable(torch.Tensor(y)))
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
def predict(self, state):
""" Compute Q values for all actions using the DQL. """
with torch.no_grad():
return self.model(torch.Tensor(state))
Here is the model instance with the parameters passed.
# Number of states = 4
n_state = env.observation_space.shape[0]
# Number of actions = 2
n_action = env.action_space.n
# Number of episodes
episodes = 150
# Number of hidden nodes in the DQN
n_hidden = 50
# Learning rate
lr = 0.001
simple_dqn = DQN(n_state, n_action, n_hidden, lr)
I tried using torchinfo summary
from torchinfo import summary
simple_dqn = DQN(n_state, n_action, n_hidden, lr)
summary(simple_dqn, input_size=(4, 2, 50))
But I get the following error
NotImplementedError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/torchinfo/torchinfo.py in forward_pass(model, x, batch_dim, cache_forward_pass, device, mode, **kwargs)
286 if isinstance(x, (list, tuple)):
--> 287 _ = model.to(device)(*x, **kwargs)
288 elif isinstance(x, dict):
4 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1147
-> 1148 result = forward_call(*input, **kwargs)
1149 if _global_forward_hooks or self._forward_hooks:
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _forward_unimplemented(self, *input)
200 """
--> 201 raise NotImplementedError(f"Module [{type(self).__name__}] is missing the required \"forward\" function")
202
NotImplementedError: Module [DQN] is missing the required "forward" function
The above exception was the direct cause of the following exception:
RuntimeError Traceback (most recent call last)
<ipython-input-24-ee921f7e5cb5> in <module>
1 from torchinfo import summary
2 simple_dqn = DQN(n_state, n_action, n_hidden, lr)
----> 3 summary(simple_dqn, input_size=(4, 2, 50))
/usr/local/lib/python3.7/dist-packages/torchinfo/torchinfo.py in summary(model, input_size, input_data, batch_dim, cache_forward_pass, col_names, col_width, depth, device, dtypes, mode, row_settings, verbose, **kwargs)
216 )
217 summary_list = forward_pass(
--> 218 model, x, batch_dim, cache_forward_pass, device, model_mode, **kwargs
219 )
220 formatting = FormattingOptions(depth, verbose, columns, col_width, rows)
/usr/local/lib/python3.7/dist-packages/torchinfo/torchinfo.py in forward_pass(model, x, batch_dim, cache_forward_pass, device, mode, **kwargs)
297 "Failed to run torchinfo. See above stack traces for more details. "
298 f"Executed layers up to: {executed_layers}"
--> 299 ) from e
300 finally:
301 if hooks:
RuntimeError: Failed to run torchinfo. See above stack traces for more details. Executed layers up to: []
Any help is appreciated.
If you look at the stack trace , you can see it throws this error at the beginning.
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _forward_unimplemented(self, *input)
200 """
--> 201 raise NotImplementedError(f"Module [{type(self).__name__}] is missing the required \"forward\" function")
202
NotImplementedError: Module [DQN] is missing the required "forward" function
This is the main error you should be looking at. Which is that you are missing the forward function your model should have. Here is an example of how you would implement it.
def forward(self, x):
x = self.pool(torch.nn.relu(self.conv1(x)))
x = self.pool(torch.nn.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = torch.nn.relu(self.fc1(x))
x = torch.nn.relu(self.fc2(x))
x = self.fc3(x)
return x
summary() takes in a model as its first parameter. simple_dqn is not a model, it's your custom class. Try passing simple_dqn.model (i.e. summary(simple_dqn.model, input_size=(4, 2, 50)))
Related
Goal: implement bidirectionality in LSTM.
I'm new to Deep Learning and chose pytorch-lightening for minimal coding. Progress has been made, thanks to responses from prior posts.
forward() now needs to facilitate nn.LSTM(... bidirectional=True).
I'm basing my latest amendments on this disscuss.pytorch.org response.
Error
Error is based on mismatch of shapes.
Which data needs to be shaped for which layers?
I'm far out of my depths.
RuntimeError: shape '[-1, 38]' is invalid for input of size 1
Code
from argparse import ArgumentParser
import torchmetrics
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
class LSTMClassifier(nn.Module):
def __init__(self,
num_classes,
batch_size=10,
embedding_dim=100,
hidden_dim=50,
vocab_size=128):
super(LSTMClassifier, self).__init__()
initrange = 0.1
self.num_labels = num_classes
n = len(self.num_labels)
self.hidden_dim = hidden_dim
self.batch_size = batch_size
self.num_layers = 1
self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
self.word_embeddings.weight.data.uniform_(-initrange, initrange)
self.lstm = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim, num_layers=self.num_layers, batch_first=True, bidirectional=True) # !
#self.classifier = nn.Linear(hidden_dim, self.num_labels[0])
self.classifier = nn.Linear(2 * hidden_dim, self.num_labels[0]) # !
def repackage_hidden(h):
"""Wraps hidden states in new Tensors, to detach them from their history."""
if isinstance(h, torch.Tensor):
return h.detach()
else:
return tuple(repackage_hidden(v) for v in h)
def forward(self, sentence, labels=None):
embeds = self.word_embeddings(sentence)
# lstm_out, _ = self.lstm(embeds) # lstm_out - 2 tensors, _ - hidden layer
lstm_out, hidden = self.lstm(embeds)
# Calculate number of directions
self.num_directions = 2 if self.lstm.bidirectional == True else 1
# Extract last hidden state
# final_state = hidden.view(self.num_layers, self.num_directions, self.batch_size, self.hidden_dim)[-1]
final_state = hidden[0].view(self.num_layers, self.num_directions, self.batch_size, self.hidden_dim)[-1]
# Handle directions
final_hidden_state = None
if self.num_directions == 1:
final_hidden_state = final_state.squeeze(0)
elif self.num_directions == 2:
h_1, h_2 = final_state[0], final_state[1]
# final_hidden_state = h_1 + h_2 # Add both states (requires changes to the input size of first linear layer + attention layer)
final_hidden_state = torch.cat((h_1, h_2), 1) # Concatenate both states
print("len(final_hidden_state)", len(final_hidden_state))
print("len(labels)", len(labels))
print("final_hidden_state.shape", final_hidden_state.shape)
print("labels", labels)
self.linear_dims = [0]
# Define set of fully connected layers (Linear Layer + Activation Layer) * #layers
self.linears = nn.ModuleList()
for i in range(0, len(self.linear_dims)-1):
linear_layer = nn.Linear(self.linear_dims[i], self.linear_dims[i+1])
self.init_weights(linear_layer)
self.linears.append(linear_layer)
if i == len(self.linear_dims) - 1:
break # no activation after output layer!!!
self.linears.append(nn.ReLU())
X = final_hidden_state
# Push through linear layers
for l in self.linears:
X = l(X)
# tag_space = self.classifier(hidden[:,0,:] + hidden[:,-1,:]) # ! # torch.flip(lstm_out[:,-1,:], [0, 1]) - 1 tensor
#logits = F.log_softmax(final_hidden_state, dim=1)
logits = F.cross_entropy(final_hidden_state, labels[0].view(-1))
loss = None
if labels:
# print("len(logits.view(-1, self.num_labels[0]))", len(logits.view(-1, self.num_labels[0])))
print("len(self.num_labels)", len(self.num_labels))
print("self.num_labels[0]", self.num_labels[0])
print("len(labels[0].view(-1))", len(labels[0].view(-1)))
loss = F.cross_entropy(logits.view(-1, self.num_labels[0]), labels[0].view(-1))
return loss, logits
class LSTMTaggerModel(pl.LightningModule):
def __init__(
self,
num_classes,
class_map,
from_checkpoint=False,
model_name='last.ckpt',
learning_rate=3e-6,
**kwargs,
):
super().__init__()
self.save_hyperparameters()
self.learning_rate = learning_rate
self.model = LSTMClassifier(num_classes=num_classes)
# self.model.load_state_dict(torch.load(model_name), strict=False) # !
self.class_map = class_map
self.num_classes = num_classes
self.valid_acc = torchmetrics.Accuracy()
self.valid_f1 = torchmetrics.F1()
def forward(self, *input, **kwargs):
return self.model(*input, **kwargs)
def training_step(self, batch, batch_idx):
x, y_true = batch
loss, _ = self(x, labels=y_true)
self.log('train_loss', loss)
return loss
def validation_step(self, batch, batch_idx):
x, y_true = batch
_, y_pred = self(x, labels=y_true)
preds = torch.argmax(y_pred, axis=1)
self.valid_acc(preds, y_true[0])
self.log('val_acc', self.valid_acc, prog_bar=True)
self.valid_f1(preds, y_true[0])
self.log('f1', self.valid_f1, prog_bar=True)
def configure_optimizers(self):
'Prepare optimizer and schedule (linear warmup and decay)'
opt = torch.optim.Adam(params=self.parameters(), lr=self.learning_rate)
sch = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=10)
return [opt], [sch]
def training_epoch_end(self, training_step_outputs):
avg_loss = torch.tensor([x['loss']
for x in training_step_outputs]).mean()
self.log('train_loss', avg_loss)
print(f'###score: train_loss### {avg_loss}')
def validation_epoch_end(self, val_step_outputs):
acc = self.valid_acc.compute()
f1 = self.valid_f1.compute()
self.log('val_score', acc)
self.log('f1', f1)
print(f'###score: val_score### {acc}')
def add_model_specific_args(parent_parser):
parser = parent_parser.add_argument_group("OntologyTaggerModel")
parser = ArgumentParser(parents=[parent_parser], add_help=False)
parser.add_argument("--learning_rate", default=2e-3, type=float)
return parent_parser
Traceback:
Global seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
| Name | Type | Params
---------------------------------------------
0 | model | LSTMClassifier | 77.4 K
1 | valid_acc | Accuracy | 0
2 | valid_f1 | F1 | 0
---------------------------------------------
77.4 K Trainable params
0 Non-trainable params
77.4 K Total params
0.310 Total estimated model params size (MB)
Validation sanity check: 0it [00:00, ?it/s]
len(final_hidden_state) 10
len(labels) 1
final_hidden_state.shape torch.Size([10, 100])
labels [tensor([ 2, 31, 26, 37, 22, 5, 31, 36, 5, 10])]
len(self.num_labels) 1
self.num_labels[0] 38
len(labels[0].view(-1)) 10
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-16-3f817f701f20> in <module>
11 """.split()
12
---> 13 run_training(args)
<ipython-input-5-bb0d8b014e32> in run_training(input)
66 shutil.copyfile(labels_file_orig, labels_file_cp)
67 trainer = pl.Trainer.from_argparse_args(args, callbacks=[checkpoint_callback], logger=loggers)
---> 68 trainer.fit(model, dm)
69 model_file = os.path.join(args.modeldir, 'last.ckpt')
70 trainer.save_checkpoint(model_file, weights_only=True)
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in fit(self, model, train_dataloader, val_dataloaders, datamodule)
497
498 # dispath `start_training` or `start_testing` or `start_predicting`
--> 499 self.dispatch()
500
501 # plugin will finalized fitting (e.g. ddp_spawn will load trained model)
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in dispatch(self)
544
545 else:
--> 546 self.accelerator.start_training(self)
547
548 def train_or_test_or_predict(self):
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/accelerators/accelerator.py in start_training(self, trainer)
71
72 def start_training(self, trainer):
---> 73 self.training_type_plugin.start_training(trainer)
74
75 def start_testing(self, trainer):
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py in start_training(self, trainer)
112 def start_training(self, trainer: 'Trainer') -> None:
113 # double dispatch to initiate the training loop
--> 114 self._results = trainer.run_train()
115
116 def start_testing(self, trainer: 'Trainer') -> None:
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in run_train(self)
605 self.progress_bar_callback.disable()
606
--> 607 self.run_sanity_check(self.lightning_module)
608
609 # set stage for logging
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in run_sanity_check(self, ref_model)
858
859 # run eval step
--> 860 _, eval_results = self.run_evaluation(max_batches=self.num_sanity_val_batches)
861
862 self.on_sanity_check_end()
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in run_evaluation(self, max_batches, on_epoch)
723 # lightning module methods
724 with self.profiler.profile("evaluation_step_and_end"):
--> 725 output = self.evaluation_loop.evaluation_step(batch, batch_idx, dataloader_idx)
726 output = self.evaluation_loop.evaluation_step_end(output)
727
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/evaluation_loop.py in evaluation_step(self, batch, batch_idx, dataloader_idx)
164 model_ref._current_fx_name = "validation_step"
165 with self.trainer.profiler.profile("validation_step"):
--> 166 output = self.trainer.accelerator.validation_step(args)
167
168 # capture any logged information
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/accelerators/accelerator.py in validation_step(self, args)
175
176 with self.precision_plugin.val_step_context(), self.training_type_plugin.val_step_context():
--> 177 return self.training_type_plugin.validation_step(*args)
178
179 def test_step(self, args):
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py in validation_step(self, *args, **kwargs)
129
130 def validation_step(self, *args, **kwargs):
--> 131 return self.lightning_module.validation_step(*args, **kwargs)
132
133 def test_step(self, *args, **kwargs):
<ipython-input-15-6ef4e0993417> in validation_step(self, batch, batch_idx)
130 def validation_step(self, batch, batch_idx):
131 x, y_true = batch
--> 132 _, y_pred = self(x, labels=y_true)
133 preds = torch.argmax(y_pred, axis=1)
134 self.valid_acc(preds, y_true[0])
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-15-6ef4e0993417> in forward(self, *input, **kwargs)
120
121 def forward(self, *input, **kwargs):
--> 122 return self.model(*input, **kwargs)
123
124 def training_step(self, batch, batch_idx):
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-15-6ef4e0993417> in forward(self, sentence, labels)
93 print("self.num_labels[0]", self.num_labels[0])
94 print("len(labels[0].view(-1))", len(labels[0].view(-1)))
---> 95 loss = F.cross_entropy(logits.view(-1, self.num_labels[0]), labels[0].view(-1))
96 return loss, logits
97
RuntimeError: shape '[-1, 38]' is invalid for input of size 1
My problem was 2 things.
One, I had to run classifier() before calculating cross_entropy().
Secondly, I had to pass X, final_hidden_layer.flatten().
X = final_hidden_state
# Push through linear layers
for l in self.linears:
X = l(X)
logits = self.classifier(X)
This achieves a working model. However, the first epoch's validation score is 0%.
This will require further work.
Question:
What changes to LSTMClassifier do I need to make, in order to have this LSTM work bidirectionally?
I'm basing my amendments on this disscuss.pytorch.org response.
I think the problem is in forward(). It learns from the last state of LSTM neural network, by slicing:
tag_space = self.classifier(lstm_out[:,-1,:])
Do I need to sum up or concatenate the values of the 2 layers/ directions?
Working Code:
from argparse import ArgumentParser
import torchmetrics
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
class LSTMClassifier(nn.Module):
def __init__(self,
num_classes,
batch_size=10,
embedding_dim=100,
hidden_dim=50,
vocab_size=128):
super(LSTMClassifier, self).__init__()
initrange = 0.1
self.num_labels = num_classes
n = len(self.num_labels)
self.hidden_dim = hidden_dim
self.batch_size = batch_size
self.num_layers = 1
self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
self.word_embeddings.weight.data.uniform_(-initrange, initrange)
self.lstm = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim, num_layers=self.num_layers, batch_first=True, bidirectional=True) # !
#self.classifier = nn.Linear(hidden_dim, self.num_labels[0])
self.classifier = nn.Linear(2 * hidden_dim, self.num_labels[0]) # !
def repackage_hidden(h):
"""Wraps hidden states in new Tensors, to detach them from their history."""
if isinstance(h, torch.Tensor):
return h.detach()
else:
return tuple(repackage_hidden(v) for v in h)
def forward(self, sentence, labels=None):
embeds = self.word_embeddings(sentence)
# lstm_out, _ = self.lstm(embeds) # lstm_out - 2 tensors, _ - hidden layer
lstm_out, hidden = self.lstm(embeds)
# Calculate number of directions
self.num_directions = 2 if self.lstm.bidirectional == True else 1
# Extract last hidden state
# final_state = hidden.view(self.num_layers, self.num_directions, self.batch_size, self.hidden_dim)[-1]
final_state = hidden[0].view(self.num_layers, self.num_directions, self.batch_size, self.hidden_dim)[-1]
# Handle directions
final_hidden_state = None
if self.num_directions == 1:
final_hidden_state = final_state.squeeze(0)
elif self.num_directions == 2:
h_1, h_2 = final_state[0], final_state[1]
# final_hidden_state = h_1 + h_2 # Add both states (requires changes to the input size of first linear layer + attention layer)
final_hidden_state = torch.cat((h_1, h_2), 1) # Concatenate both states
print("len(final_hidden_state)", len(final_hidden_state))
print("len(labels)", len(labels))
# tag_space = self.classifier(hidden[:,0,:] + hidden[:,-1,:]) # ! # torch.flip(lstm_out[:,-1,:], [0, 1]) - 1 tensor
logits = F.log_softmax(final_hidden_state, dim=1) # tag_space
loss = None
if labels:
loss = F.cross_entropy(logits.view(-1, self.num_labels[0]), labels[0].view(-1))
return loss, logits
class LSTMTaggerModel(pl.LightningModule):
def __init__(
self,
num_classes,
class_map,
from_checkpoint=False,
model_name='last.ckpt',
learning_rate=3e-6,
**kwargs,
):
super().__init__()
self.save_hyperparameters()
self.learning_rate = learning_rate
self.model = LSTMClassifier(num_classes=num_classes)
# self.model.load_state_dict(torch.load(model_name), strict=False) # !
self.class_map = class_map
self.num_classes = num_classes
self.valid_acc = torchmetrics.Accuracy()
self.valid_f1 = torchmetrics.F1()
def forward(self, *input, **kwargs):
return self.model(*input, **kwargs)
def training_step(self, batch, batch_idx):
x, y_true = batch
loss, _ = self(x, labels=y_true)
self.log('train_loss', loss)
return loss
def validation_step(self, batch, batch_idx):
x, y_true = batch
_, y_pred = self(x, labels=y_true)
preds = torch.argmax(y_pred, axis=1)
self.valid_acc(preds, y_true[0])
self.log('val_acc', self.valid_acc, prog_bar=True)
self.valid_f1(preds, y_true[0])
self.log('f1', self.valid_f1, prog_bar=True)
def configure_optimizers(self):
'Prepare optimizer and schedule (linear warmup and decay)'
opt = torch.optim.Adam(params=self.parameters(), lr=self.learning_rate)
sch = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=10)
return [opt], [sch]
def training_epoch_end(self, training_step_outputs):
avg_loss = torch.tensor([x['loss']
for x in training_step_outputs]).mean()
self.log('train_loss', avg_loss)
print(f'###score: train_loss### {avg_loss}')
def validation_epoch_end(self, val_step_outputs):
acc = self.valid_acc.compute()
f1 = self.valid_f1.compute()
self.log('val_score', acc)
self.log('f1', f1)
print(f'###score: val_score### {acc}')
def add_model_specific_args(parent_parser):
parser = parent_parser.add_argument_group("OntologyTaggerModel")
parser = ArgumentParser(parents=[parent_parser], add_help=False)
parser.add_argument("--learning_rate", default=2e-3, type=float)
return parent_parser
Runtime:
Global seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
| Name | Type | Params
---------------------------------------------
0 | model | LSTMClassifier | 77.4 K
1 | valid_acc | Accuracy | 0
2 | valid_f1 | F1 | 0
---------------------------------------------
77.4 K Trainable params
0 Non-trainable params
77.4 K Total params
0.310 Total estimated model params size (MB)
Validation sanity check: 0it [00:00, ?it/s]
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-18-3f817f701f20> in <module>
11 """.split()
12
---> 13 run_training(args)
<ipython-input-5-bb0d8b014e32> in run_training(input)
66 shutil.copyfile(labels_file_orig, labels_file_cp)
67 trainer = pl.Trainer.from_argparse_args(args, callbacks=[checkpoint_callback], logger=loggers)
---> 68 trainer.fit(model, dm)
69 model_file = os.path.join(args.modeldir, 'last.ckpt')
70 trainer.save_checkpoint(model_file, weights_only=True)
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in fit(self, model, train_dataloader, val_dataloaders, datamodule)
497
498 # dispath `start_training` or `start_testing` or `start_predicting`
--> 499 self.dispatch()
500
501 # plugin will finalized fitting (e.g. ddp_spawn will load trained model)
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in dispatch(self)
544
545 else:
--> 546 self.accelerator.start_training(self)
547
548 def train_or_test_or_predict(self):
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/accelerators/accelerator.py in start_training(self, trainer)
71
72 def start_training(self, trainer):
---> 73 self.training_type_plugin.start_training(trainer)
74
75 def start_testing(self, trainer):
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py in start_training(self, trainer)
112 def start_training(self, trainer: 'Trainer') -> None:
113 # double dispatch to initiate the training loop
--> 114 self._results = trainer.run_train()
115
116 def start_testing(self, trainer: 'Trainer') -> None:
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in run_train(self)
605 self.progress_bar_callback.disable()
606
--> 607 self.run_sanity_check(self.lightning_module)
608
609 # set stage for logging
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in run_sanity_check(self, ref_model)
858
859 # run eval step
--> 860 _, eval_results = self.run_evaluation(max_batches=self.num_sanity_val_batches)
861
862 self.on_sanity_check_end()
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in run_evaluation(self, max_batches, on_epoch)
723 # lightning module methods
724 with self.profiler.profile("evaluation_step_and_end"):
--> 725 output = self.evaluation_loop.evaluation_step(batch, batch_idx, dataloader_idx)
726 output = self.evaluation_loop.evaluation_step_end(output)
727
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/evaluation_loop.py in evaluation_step(self, batch, batch_idx, dataloader_idx)
164 model_ref._current_fx_name = "validation_step"
165 with self.trainer.profiler.profile("validation_step"):
--> 166 output = self.trainer.accelerator.validation_step(args)
167
168 # capture any logged information
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/accelerators/accelerator.py in validation_step(self, args)
175
176 with self.precision_plugin.val_step_context(), self.training_type_plugin.val_step_context():
--> 177 return self.training_type_plugin.validation_step(*args)
178
179 def test_step(self, args):
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py in validation_step(self, *args, **kwargs)
129
130 def validation_step(self, *args, **kwargs):
--> 131 return self.lightning_module.validation_step(*args, **kwargs)
132
133 def test_step(self, *args, **kwargs):
<ipython-input-17-542f29e75b1a> in validation_step(self, batch, batch_idx)
104 def validation_step(self, batch, batch_idx):
105 x, y_true = batch
--> 106 _, y_pred = self(x, labels=y_true)
107 preds = torch.argmax(y_pred, axis=1)
108 self.valid_acc(preds, y_true[0])
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-17-542f29e75b1a> in forward(self, *input, **kwargs)
94
95 def forward(self, *input, **kwargs):
---> 96 return self.model(*input, **kwargs)
97
98 def training_step(self, batch, batch_idx):
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-17-542f29e75b1a> in forward(self, sentence, labels)
67 loss = None
68 if labels:
---> 69 loss = F.cross_entropy(logits.view(-1, self.num_labels[0]), labels[0].view(-1))
70 return loss, logits
71
RuntimeError: shape '[-1, 38]' is invalid for input of size 1000
It sounds like you're trying to load a pretrained model (which uses an unidirectional LSTM) into a model which has a bidirectional LSTM in its state dict. There are several things you can do here, as there are innate differences between your pretrained state dict and your bidirectional state dict:
Definitely use model.load_state_dict(model_params,strict=False) (see this link). This will stop the complaining when you use a model that's different to what you're trying to learn. It means that your forward pass will be pretrained but not your backward pass.
If you do this ^ you will need to sum or otherwise condense the final time steps for the forward and backward case because the classifier will then have a different shape otherwise. strict=False though will ignore this, so only do this if you care about having a pretrained first layer in your classifier.
If you don't want to do the above two, you can copy the weights for model.lstm.weight_ih_l0_reverse and other missing parameters from the forward direction in the state dict, as it's just a python dictionary. It is not ideal because obviously the forward and backward pass will learn different things, but will stop the error and be in a reasonably good initialisation space. You will still have the same error in two though where your LSTM output is twice as big as it was.
As written in the title above it is the Pytroch Error: "IndexError: index out of range in self". This error occurs as soon as a dataset of more than 500 rows is used. Also when I reload the model and try to run a second data set, I get this error. I have tried everything possible to manually set the embedding size, so everything I have found on the net has not worked. Attached the model, optimizer and runtime, would be very grateful for your help.
class Model(nn.Module):
def __init__(self, embedding_size, num_numerical_cols, output_size, layers, p=0.4):
super().__init__()
self.all_embeddings = nn.ModuleList([nn.Embedding(ni, nf) for ni, nf in embedding_size])
self.embedding_dropout = nn.Dropout(p)
self.batch_norm_num = nn.BatchNorm1d(num_numerical_cols)
all_layers = []
num_categorical_cols = sum((nf for ni, nf in embedding_size))
input_size = num_categorical_cols + num_numerical_cols
for i in layers:
all_layers.append(nn.Linear(input_size, i))
all_layers.append(nn.ReLU(inplace=True))
all_layers.append(nn.BatchNorm1d(i))
all_layers.append(nn.Dropout(p))
input_size = i
all_layers.append(nn.Linear(layers[-1], output_size))
self.layers = nn.Sequential(*all_layers)
def forward(self, x_categorical, x_numerical):
embeddings = []
for i,e in enumerate(self.all_embeddings):
embeddings.append(e(x_categorical[:,i]))
x = torch.cat(embeddings, 1)
x = self.embedding_dropout(x)
x_numerical = self.batch_norm_num(x_numerical)
x = torch.cat([x, x_numerical], 1)
x = self.layers(x)
return x
model = Model(categorical_embedding_sizes, numerical_data.shape[1], 5, [400,100,50], p=0.4)
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
epochs = 100
aggregated_losses = []
for i in range(epochs):
i += 1
y_pred = model(categorical_train_data, numerical_train_data)
single_loss = loss_function(y_pred, train_outputs)
aggregated_losses.append(single_loss)
print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')
optimizer.zero_grad()
single_loss.backward()
optimizer.step()
print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')
Here is the error as described:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-157-202810d3193a> in <module>
4 for i in range(epochs):
5 i += 1
----> 6 y_pred = model(categorical_train_data, numerical_train_data)
7 single_loss = loss_function(y_pred, train_outputs)
8 aggregated_losses.append(single_loss)
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
<ipython-input-117-fd6404aba4b5> in forward(self, x_categorical, x_numerical)
25 embeddings = []
26 for i,e in enumerate(self.all_embeddings):
---> 27 embeddings.append(e(x_categorical[:,i]))
28 x = torch.cat(embeddings, 1)
29 x = self.embedding_dropout(x)
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\sparse.py in forward(self, input)
112 return F.embedding(
113 input, self.weight, self.padding_idx, self.max_norm,
--> 114 self.norm_type, self.scale_grad_by_freq, self.sparse)
115
116 def extra_repr(self):
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
1722 # remove once script supports set_grad_enabled
1723 _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 1724 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
1725
1726
IndexError: index out of range in self
I got a custom keras Model which I want to optimize for hyperparameters while having a good tracking of whats going on and visualization. Therefor I want to pass hparams to the custom model like this:
class Model_hparams(tf.keras.Model):
def __init__(self, hparams):
super(Model_hparams, self).__init__()
self.hps = hparams
def build(self, inputs_shape):
self.conv1 = tf.keras.layers.Conv1D(filters=self.hps[HP_NUM_UNITS_1],
kernel_size=self.hps[HP_LEN_CONV_1],
activation='relu',
input_shape=inputs_shape[1:])
self.pool1 = tf.keras.layers.MaxPool1D(pool_size=2)
self.bn1 = tf.keras.layers.BatchNormalization()
self.dense1 = tf.keras.layers.Dense(1)
# actually, here are even more layers
def call(self, x, training=True):
x = self.conv1(x)
x = self.pool1(x)
x = self.bn1(x, training=training)
x = self.dense1(x)
return x
I followed the guide from TF:
from tensorboard.plugins.hparams import api as hp
HP_NUM_UNITS_1 = hp.HParam('num_units_1', hp.Discrete([16, 32]))
HP_LEN_CONV_1 = hp.HParam('len_conv_1', hp.Discrete([3]))
METRIC = 'mae'
with tf.summary.create_file_writer("../../model_output/hparams").as_default():
hp.hparams_config(
hparams=[HP_NUM_UNITS_1,
HP_LEN_CONV_1,],
metrics=[hp.Metric(METRIC, display_name='Test_MAE')],
)
def run(run_dir, hparams):
with tf.summary.create_file_writer(run_dir).as_default():
hp.hparams(hparams) # record the values used in this trial
test_mae = train_model(hparams)
tf.summary.scalar('Mean_Average_Error', test_mae, step=1)
Now my training fuction calls the model with my training procedure which looks like this (simplified):
def train_model(hparams):
model=Model_hparams(hparams)
for batch in dataset:
#...
with tf.GradientTape() as tape:
predictions = model(batch, training=True)
#...
The actual optimization starts here:
n=0
for num_units_1 in HP_NUM_UNITS_1.domain.values:
for len_conv_1 in HP_LEN_CONV_1.domain.values:
hparams = {HP_NUM_UNITS_1: num_units_1,
HP_LEN_CONV_1: len_conv_1}
run_name = "run-%d" % n
run("../../model_output/hparams/" + run_name, hparams)
n += 1
However, if I run this, an error occures when I want to instantiate my model:
<ipython-input-99-17dd66300f5b> in __init__(self, hparams)
72 def __init__(self, hparams):
73 super(Model_hparams, self).__init__()
---> 74 self.hps = hparams
75
76 def build(self, inputs_shape):
c:\users\123\anaconda3\envs\python_3_8_env1\lib\site-packages\tensorflow\python\keras\engine\training.py in __setattr__(self, name, value)
312 isinstance(v, (base_layer.Layer,
313 data_structures.TrackableDataStructure)) or
--> 314 base_layer_utils.has_weights(v) for v in nest.flatten(value)):
315 try:
316 self._base_model_initialized
c:\users\123\anaconda3\envs\python_3_8_env1\lib\site-packages\tensorflow\python\util\nest.py in flatten(structure, expand_composites)
339 return [None]
340 expand_composites = bool(expand_composites)
--> 341 return _pywrap_utils.Flatten(structure, expand_composites)
342
343
TypeError: '<' not supported between instances of 'HParam' and 'HParam'
I´m not sure why this happens and I cannot get it to work. I cannot find anything in the docs.
Is there anything I´m missing??
Thanks for the support.
tf.keras.Model class overrides __setattr__ function, so you can not set mismatched variables. However, you can bypass this function below trick.
object.__setattr__(self, 'hps', hparams)
.. instead of
self.hps = hparams
class Model_hparams(tf.keras.Model):
def __init__(self, hparams):
super(Model_hparams, self).__init__()
object.__setattr__(self, 'hps', hparams)
When I want to do a forward pass with an initialized model = nn.Sequential object, I simply use:
out = model(X)
# OR
out = model.forward(X)
However, I have tried extending the Sequential class, and now both of these methods suddenly require a second argument. For example, note in the following method my call to self(x):
def train(self, trainloader, epochs):
for e in range(epochs):
for x, y in trainloader:
x = x.view(x.shape[0], -1)
self.optimizer.zero_grad()
loss = self.criterion(self(x), y) # CALL OCCURS HERE
loss.backward()
self.optimizer.step()
This code now gives me TypeError: forward() missing 1 required positional argument: 'target'.
My Question: Since I have done nothing but extend the class, why is this?
Code for full class below:
class Network(nn.Sequential):
def __init__(self, layers):
super().__init__(self.init_modules(layers))
self.criterion = nn.NLLLoss()
self.optimizer = optim.Adam(self.parameters(), lr=0.003)
def init_modules(self, layers):
n_layers = len(layers)
modules = OrderedDict()
# Layer definitions for input and inner layers:
for i in range(n_layers - 2):
modules[f'fc{i}'] = nn.Linear(layers[i], layers[i+1])
modules[f'relu{i}'] = nn.ReLU()
# Definition for output layer:
modules['fc_out'] = nn.Linear(layers[-2], layers[-1])
modules['smax_out'] = nn.LogSoftmax(dim=1)
return modules
def train(self, trainloader, epochs):
for e in range(epochs):
for x, y in trainloader:
x = x.view(x.shape[0], -1)
self.optimizer.zero_grad()
loss = self.criterion(self(x), y)
loss.backward()
self.optimizer.step()
Full stack trace:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-63-490e0b9eef22> in <module>
----> 1 model2.train(trainloader, 5, plot_loss=True)
<ipython-input-61-e173e5672f18> in train(self, trainloader, epochs, plot_loss)
32 x = x.view(x.shape[0], -1)
33 self.optimizer.zero_grad()
---> 34 loss = self.criterion(self(x), y)
35 loss.backward()
36 self.optimizer.step()
c:\program files\python38\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
c:\program files\python38\lib\site-packages\torch\nn\modules\container.py in forward(self, input)
98 def forward(self, input):
99 for module in self:
--> 100 input = module(input)
101 return input
102
c:\program files\python38\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
TypeError: forward() missing 1 required positional argument: 'target'
Q: Since I have done nothing but extend the class, why is this?
Actually, you have. You chose the "wrong" base class. The forward of the nn.Sequential simply goes through all modules, and when you defined:
self.criterion = nn.NLLLoss()
you registered the loss as a module. Therefore, when you call self(x), you're actually calling self.criterion(x) at some point, hence the TypeError.
Note that self.criterion is an instance of the class nn.NLLLoss. You need to pass a torch tensor as a second argument, make sure y satisfies that type requirement. Also, I am not sure about this line loss = self.criterion(self(x), y) why call self(x)? The first argument is supposed to be the input data passed through an activation function.