Related
Goal: implement bidirectionality in LSTM.
I'm new to Deep Learning and chose pytorch-lightening for minimal coding. Progress has been made, thanks to responses from prior posts.
forward() now needs to facilitate nn.LSTM(... bidirectional=True).
I'm basing my latest amendments on this disscuss.pytorch.org response.
Error
Error is based on mismatch of shapes.
Which data needs to be shaped for which layers?
I'm far out of my depths.
RuntimeError: shape '[-1, 38]' is invalid for input of size 1
Code
from argparse import ArgumentParser
import torchmetrics
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
class LSTMClassifier(nn.Module):
def __init__(self,
num_classes,
batch_size=10,
embedding_dim=100,
hidden_dim=50,
vocab_size=128):
super(LSTMClassifier, self).__init__()
initrange = 0.1
self.num_labels = num_classes
n = len(self.num_labels)
self.hidden_dim = hidden_dim
self.batch_size = batch_size
self.num_layers = 1
self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
self.word_embeddings.weight.data.uniform_(-initrange, initrange)
self.lstm = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim, num_layers=self.num_layers, batch_first=True, bidirectional=True) # !
#self.classifier = nn.Linear(hidden_dim, self.num_labels[0])
self.classifier = nn.Linear(2 * hidden_dim, self.num_labels[0]) # !
def repackage_hidden(h):
"""Wraps hidden states in new Tensors, to detach them from their history."""
if isinstance(h, torch.Tensor):
return h.detach()
else:
return tuple(repackage_hidden(v) for v in h)
def forward(self, sentence, labels=None):
embeds = self.word_embeddings(sentence)
# lstm_out, _ = self.lstm(embeds) # lstm_out - 2 tensors, _ - hidden layer
lstm_out, hidden = self.lstm(embeds)
# Calculate number of directions
self.num_directions = 2 if self.lstm.bidirectional == True else 1
# Extract last hidden state
# final_state = hidden.view(self.num_layers, self.num_directions, self.batch_size, self.hidden_dim)[-1]
final_state = hidden[0].view(self.num_layers, self.num_directions, self.batch_size, self.hidden_dim)[-1]
# Handle directions
final_hidden_state = None
if self.num_directions == 1:
final_hidden_state = final_state.squeeze(0)
elif self.num_directions == 2:
h_1, h_2 = final_state[0], final_state[1]
# final_hidden_state = h_1 + h_2 # Add both states (requires changes to the input size of first linear layer + attention layer)
final_hidden_state = torch.cat((h_1, h_2), 1) # Concatenate both states
print("len(final_hidden_state)", len(final_hidden_state))
print("len(labels)", len(labels))
print("final_hidden_state.shape", final_hidden_state.shape)
print("labels", labels)
self.linear_dims = [0]
# Define set of fully connected layers (Linear Layer + Activation Layer) * #layers
self.linears = nn.ModuleList()
for i in range(0, len(self.linear_dims)-1):
linear_layer = nn.Linear(self.linear_dims[i], self.linear_dims[i+1])
self.init_weights(linear_layer)
self.linears.append(linear_layer)
if i == len(self.linear_dims) - 1:
break # no activation after output layer!!!
self.linears.append(nn.ReLU())
X = final_hidden_state
# Push through linear layers
for l in self.linears:
X = l(X)
# tag_space = self.classifier(hidden[:,0,:] + hidden[:,-1,:]) # ! # torch.flip(lstm_out[:,-1,:], [0, 1]) - 1 tensor
#logits = F.log_softmax(final_hidden_state, dim=1)
logits = F.cross_entropy(final_hidden_state, labels[0].view(-1))
loss = None
if labels:
# print("len(logits.view(-1, self.num_labels[0]))", len(logits.view(-1, self.num_labels[0])))
print("len(self.num_labels)", len(self.num_labels))
print("self.num_labels[0]", self.num_labels[0])
print("len(labels[0].view(-1))", len(labels[0].view(-1)))
loss = F.cross_entropy(logits.view(-1, self.num_labels[0]), labels[0].view(-1))
return loss, logits
class LSTMTaggerModel(pl.LightningModule):
def __init__(
self,
num_classes,
class_map,
from_checkpoint=False,
model_name='last.ckpt',
learning_rate=3e-6,
**kwargs,
):
super().__init__()
self.save_hyperparameters()
self.learning_rate = learning_rate
self.model = LSTMClassifier(num_classes=num_classes)
# self.model.load_state_dict(torch.load(model_name), strict=False) # !
self.class_map = class_map
self.num_classes = num_classes
self.valid_acc = torchmetrics.Accuracy()
self.valid_f1 = torchmetrics.F1()
def forward(self, *input, **kwargs):
return self.model(*input, **kwargs)
def training_step(self, batch, batch_idx):
x, y_true = batch
loss, _ = self(x, labels=y_true)
self.log('train_loss', loss)
return loss
def validation_step(self, batch, batch_idx):
x, y_true = batch
_, y_pred = self(x, labels=y_true)
preds = torch.argmax(y_pred, axis=1)
self.valid_acc(preds, y_true[0])
self.log('val_acc', self.valid_acc, prog_bar=True)
self.valid_f1(preds, y_true[0])
self.log('f1', self.valid_f1, prog_bar=True)
def configure_optimizers(self):
'Prepare optimizer and schedule (linear warmup and decay)'
opt = torch.optim.Adam(params=self.parameters(), lr=self.learning_rate)
sch = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=10)
return [opt], [sch]
def training_epoch_end(self, training_step_outputs):
avg_loss = torch.tensor([x['loss']
for x in training_step_outputs]).mean()
self.log('train_loss', avg_loss)
print(f'###score: train_loss### {avg_loss}')
def validation_epoch_end(self, val_step_outputs):
acc = self.valid_acc.compute()
f1 = self.valid_f1.compute()
self.log('val_score', acc)
self.log('f1', f1)
print(f'###score: val_score### {acc}')
def add_model_specific_args(parent_parser):
parser = parent_parser.add_argument_group("OntologyTaggerModel")
parser = ArgumentParser(parents=[parent_parser], add_help=False)
parser.add_argument("--learning_rate", default=2e-3, type=float)
return parent_parser
Traceback:
Global seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
| Name | Type | Params
---------------------------------------------
0 | model | LSTMClassifier | 77.4 K
1 | valid_acc | Accuracy | 0
2 | valid_f1 | F1 | 0
---------------------------------------------
77.4 K Trainable params
0 Non-trainable params
77.4 K Total params
0.310 Total estimated model params size (MB)
Validation sanity check: 0it [00:00, ?it/s]
len(final_hidden_state) 10
len(labels) 1
final_hidden_state.shape torch.Size([10, 100])
labels [tensor([ 2, 31, 26, 37, 22, 5, 31, 36, 5, 10])]
len(self.num_labels) 1
self.num_labels[0] 38
len(labels[0].view(-1)) 10
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-16-3f817f701f20> in <module>
11 """.split()
12
---> 13 run_training(args)
<ipython-input-5-bb0d8b014e32> in run_training(input)
66 shutil.copyfile(labels_file_orig, labels_file_cp)
67 trainer = pl.Trainer.from_argparse_args(args, callbacks=[checkpoint_callback], logger=loggers)
---> 68 trainer.fit(model, dm)
69 model_file = os.path.join(args.modeldir, 'last.ckpt')
70 trainer.save_checkpoint(model_file, weights_only=True)
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in fit(self, model, train_dataloader, val_dataloaders, datamodule)
497
498 # dispath `start_training` or `start_testing` or `start_predicting`
--> 499 self.dispatch()
500
501 # plugin will finalized fitting (e.g. ddp_spawn will load trained model)
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in dispatch(self)
544
545 else:
--> 546 self.accelerator.start_training(self)
547
548 def train_or_test_or_predict(self):
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/accelerators/accelerator.py in start_training(self, trainer)
71
72 def start_training(self, trainer):
---> 73 self.training_type_plugin.start_training(trainer)
74
75 def start_testing(self, trainer):
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py in start_training(self, trainer)
112 def start_training(self, trainer: 'Trainer') -> None:
113 # double dispatch to initiate the training loop
--> 114 self._results = trainer.run_train()
115
116 def start_testing(self, trainer: 'Trainer') -> None:
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in run_train(self)
605 self.progress_bar_callback.disable()
606
--> 607 self.run_sanity_check(self.lightning_module)
608
609 # set stage for logging
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in run_sanity_check(self, ref_model)
858
859 # run eval step
--> 860 _, eval_results = self.run_evaluation(max_batches=self.num_sanity_val_batches)
861
862 self.on_sanity_check_end()
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in run_evaluation(self, max_batches, on_epoch)
723 # lightning module methods
724 with self.profiler.profile("evaluation_step_and_end"):
--> 725 output = self.evaluation_loop.evaluation_step(batch, batch_idx, dataloader_idx)
726 output = self.evaluation_loop.evaluation_step_end(output)
727
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/evaluation_loop.py in evaluation_step(self, batch, batch_idx, dataloader_idx)
164 model_ref._current_fx_name = "validation_step"
165 with self.trainer.profiler.profile("validation_step"):
--> 166 output = self.trainer.accelerator.validation_step(args)
167
168 # capture any logged information
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/accelerators/accelerator.py in validation_step(self, args)
175
176 with self.precision_plugin.val_step_context(), self.training_type_plugin.val_step_context():
--> 177 return self.training_type_plugin.validation_step(*args)
178
179 def test_step(self, args):
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py in validation_step(self, *args, **kwargs)
129
130 def validation_step(self, *args, **kwargs):
--> 131 return self.lightning_module.validation_step(*args, **kwargs)
132
133 def test_step(self, *args, **kwargs):
<ipython-input-15-6ef4e0993417> in validation_step(self, batch, batch_idx)
130 def validation_step(self, batch, batch_idx):
131 x, y_true = batch
--> 132 _, y_pred = self(x, labels=y_true)
133 preds = torch.argmax(y_pred, axis=1)
134 self.valid_acc(preds, y_true[0])
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-15-6ef4e0993417> in forward(self, *input, **kwargs)
120
121 def forward(self, *input, **kwargs):
--> 122 return self.model(*input, **kwargs)
123
124 def training_step(self, batch, batch_idx):
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-15-6ef4e0993417> in forward(self, sentence, labels)
93 print("self.num_labels[0]", self.num_labels[0])
94 print("len(labels[0].view(-1))", len(labels[0].view(-1)))
---> 95 loss = F.cross_entropy(logits.view(-1, self.num_labels[0]), labels[0].view(-1))
96 return loss, logits
97
RuntimeError: shape '[-1, 38]' is invalid for input of size 1
My problem was 2 things.
One, I had to run classifier() before calculating cross_entropy().
Secondly, I had to pass X, final_hidden_layer.flatten().
X = final_hidden_state
# Push through linear layers
for l in self.linears:
X = l(X)
logits = self.classifier(X)
This achieves a working model. However, the first epoch's validation score is 0%.
This will require further work.
Question:
What changes to LSTMClassifier do I need to make, in order to have this LSTM work bidirectionally?
I'm basing my amendments on this disscuss.pytorch.org response.
I think the problem is in forward(). It learns from the last state of LSTM neural network, by slicing:
tag_space = self.classifier(lstm_out[:,-1,:])
Do I need to sum up or concatenate the values of the 2 layers/ directions?
Working Code:
from argparse import ArgumentParser
import torchmetrics
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
class LSTMClassifier(nn.Module):
def __init__(self,
num_classes,
batch_size=10,
embedding_dim=100,
hidden_dim=50,
vocab_size=128):
super(LSTMClassifier, self).__init__()
initrange = 0.1
self.num_labels = num_classes
n = len(self.num_labels)
self.hidden_dim = hidden_dim
self.batch_size = batch_size
self.num_layers = 1
self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
self.word_embeddings.weight.data.uniform_(-initrange, initrange)
self.lstm = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim, num_layers=self.num_layers, batch_first=True, bidirectional=True) # !
#self.classifier = nn.Linear(hidden_dim, self.num_labels[0])
self.classifier = nn.Linear(2 * hidden_dim, self.num_labels[0]) # !
def repackage_hidden(h):
"""Wraps hidden states in new Tensors, to detach them from their history."""
if isinstance(h, torch.Tensor):
return h.detach()
else:
return tuple(repackage_hidden(v) for v in h)
def forward(self, sentence, labels=None):
embeds = self.word_embeddings(sentence)
# lstm_out, _ = self.lstm(embeds) # lstm_out - 2 tensors, _ - hidden layer
lstm_out, hidden = self.lstm(embeds)
# Calculate number of directions
self.num_directions = 2 if self.lstm.bidirectional == True else 1
# Extract last hidden state
# final_state = hidden.view(self.num_layers, self.num_directions, self.batch_size, self.hidden_dim)[-1]
final_state = hidden[0].view(self.num_layers, self.num_directions, self.batch_size, self.hidden_dim)[-1]
# Handle directions
final_hidden_state = None
if self.num_directions == 1:
final_hidden_state = final_state.squeeze(0)
elif self.num_directions == 2:
h_1, h_2 = final_state[0], final_state[1]
# final_hidden_state = h_1 + h_2 # Add both states (requires changes to the input size of first linear layer + attention layer)
final_hidden_state = torch.cat((h_1, h_2), 1) # Concatenate both states
print("len(final_hidden_state)", len(final_hidden_state))
print("len(labels)", len(labels))
# tag_space = self.classifier(hidden[:,0,:] + hidden[:,-1,:]) # ! # torch.flip(lstm_out[:,-1,:], [0, 1]) - 1 tensor
logits = F.log_softmax(final_hidden_state, dim=1) # tag_space
loss = None
if labels:
loss = F.cross_entropy(logits.view(-1, self.num_labels[0]), labels[0].view(-1))
return loss, logits
class LSTMTaggerModel(pl.LightningModule):
def __init__(
self,
num_classes,
class_map,
from_checkpoint=False,
model_name='last.ckpt',
learning_rate=3e-6,
**kwargs,
):
super().__init__()
self.save_hyperparameters()
self.learning_rate = learning_rate
self.model = LSTMClassifier(num_classes=num_classes)
# self.model.load_state_dict(torch.load(model_name), strict=False) # !
self.class_map = class_map
self.num_classes = num_classes
self.valid_acc = torchmetrics.Accuracy()
self.valid_f1 = torchmetrics.F1()
def forward(self, *input, **kwargs):
return self.model(*input, **kwargs)
def training_step(self, batch, batch_idx):
x, y_true = batch
loss, _ = self(x, labels=y_true)
self.log('train_loss', loss)
return loss
def validation_step(self, batch, batch_idx):
x, y_true = batch
_, y_pred = self(x, labels=y_true)
preds = torch.argmax(y_pred, axis=1)
self.valid_acc(preds, y_true[0])
self.log('val_acc', self.valid_acc, prog_bar=True)
self.valid_f1(preds, y_true[0])
self.log('f1', self.valid_f1, prog_bar=True)
def configure_optimizers(self):
'Prepare optimizer and schedule (linear warmup and decay)'
opt = torch.optim.Adam(params=self.parameters(), lr=self.learning_rate)
sch = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=10)
return [opt], [sch]
def training_epoch_end(self, training_step_outputs):
avg_loss = torch.tensor([x['loss']
for x in training_step_outputs]).mean()
self.log('train_loss', avg_loss)
print(f'###score: train_loss### {avg_loss}')
def validation_epoch_end(self, val_step_outputs):
acc = self.valid_acc.compute()
f1 = self.valid_f1.compute()
self.log('val_score', acc)
self.log('f1', f1)
print(f'###score: val_score### {acc}')
def add_model_specific_args(parent_parser):
parser = parent_parser.add_argument_group("OntologyTaggerModel")
parser = ArgumentParser(parents=[parent_parser], add_help=False)
parser.add_argument("--learning_rate", default=2e-3, type=float)
return parent_parser
Runtime:
Global seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
| Name | Type | Params
---------------------------------------------
0 | model | LSTMClassifier | 77.4 K
1 | valid_acc | Accuracy | 0
2 | valid_f1 | F1 | 0
---------------------------------------------
77.4 K Trainable params
0 Non-trainable params
77.4 K Total params
0.310 Total estimated model params size (MB)
Validation sanity check: 0it [00:00, ?it/s]
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-18-3f817f701f20> in <module>
11 """.split()
12
---> 13 run_training(args)
<ipython-input-5-bb0d8b014e32> in run_training(input)
66 shutil.copyfile(labels_file_orig, labels_file_cp)
67 trainer = pl.Trainer.from_argparse_args(args, callbacks=[checkpoint_callback], logger=loggers)
---> 68 trainer.fit(model, dm)
69 model_file = os.path.join(args.modeldir, 'last.ckpt')
70 trainer.save_checkpoint(model_file, weights_only=True)
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in fit(self, model, train_dataloader, val_dataloaders, datamodule)
497
498 # dispath `start_training` or `start_testing` or `start_predicting`
--> 499 self.dispatch()
500
501 # plugin will finalized fitting (e.g. ddp_spawn will load trained model)
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in dispatch(self)
544
545 else:
--> 546 self.accelerator.start_training(self)
547
548 def train_or_test_or_predict(self):
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/accelerators/accelerator.py in start_training(self, trainer)
71
72 def start_training(self, trainer):
---> 73 self.training_type_plugin.start_training(trainer)
74
75 def start_testing(self, trainer):
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py in start_training(self, trainer)
112 def start_training(self, trainer: 'Trainer') -> None:
113 # double dispatch to initiate the training loop
--> 114 self._results = trainer.run_train()
115
116 def start_testing(self, trainer: 'Trainer') -> None:
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in run_train(self)
605 self.progress_bar_callback.disable()
606
--> 607 self.run_sanity_check(self.lightning_module)
608
609 # set stage for logging
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in run_sanity_check(self, ref_model)
858
859 # run eval step
--> 860 _, eval_results = self.run_evaluation(max_batches=self.num_sanity_val_batches)
861
862 self.on_sanity_check_end()
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in run_evaluation(self, max_batches, on_epoch)
723 # lightning module methods
724 with self.profiler.profile("evaluation_step_and_end"):
--> 725 output = self.evaluation_loop.evaluation_step(batch, batch_idx, dataloader_idx)
726 output = self.evaluation_loop.evaluation_step_end(output)
727
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/evaluation_loop.py in evaluation_step(self, batch, batch_idx, dataloader_idx)
164 model_ref._current_fx_name = "validation_step"
165 with self.trainer.profiler.profile("validation_step"):
--> 166 output = self.trainer.accelerator.validation_step(args)
167
168 # capture any logged information
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/accelerators/accelerator.py in validation_step(self, args)
175
176 with self.precision_plugin.val_step_context(), self.training_type_plugin.val_step_context():
--> 177 return self.training_type_plugin.validation_step(*args)
178
179 def test_step(self, args):
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py in validation_step(self, *args, **kwargs)
129
130 def validation_step(self, *args, **kwargs):
--> 131 return self.lightning_module.validation_step(*args, **kwargs)
132
133 def test_step(self, *args, **kwargs):
<ipython-input-17-542f29e75b1a> in validation_step(self, batch, batch_idx)
104 def validation_step(self, batch, batch_idx):
105 x, y_true = batch
--> 106 _, y_pred = self(x, labels=y_true)
107 preds = torch.argmax(y_pred, axis=1)
108 self.valid_acc(preds, y_true[0])
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-17-542f29e75b1a> in forward(self, *input, **kwargs)
94
95 def forward(self, *input, **kwargs):
---> 96 return self.model(*input, **kwargs)
97
98 def training_step(self, batch, batch_idx):
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-17-542f29e75b1a> in forward(self, sentence, labels)
67 loss = None
68 if labels:
---> 69 loss = F.cross_entropy(logits.view(-1, self.num_labels[0]), labels[0].view(-1))
70 return loss, logits
71
RuntimeError: shape '[-1, 38]' is invalid for input of size 1000
It sounds like you're trying to load a pretrained model (which uses an unidirectional LSTM) into a model which has a bidirectional LSTM in its state dict. There are several things you can do here, as there are innate differences between your pretrained state dict and your bidirectional state dict:
Definitely use model.load_state_dict(model_params,strict=False) (see this link). This will stop the complaining when you use a model that's different to what you're trying to learn. It means that your forward pass will be pretrained but not your backward pass.
If you do this ^ you will need to sum or otherwise condense the final time steps for the forward and backward case because the classifier will then have a different shape otherwise. strict=False though will ignore this, so only do this if you care about having a pretrained first layer in your classifier.
If you don't want to do the above two, you can copy the weights for model.lstm.weight_ih_l0_reverse and other missing parameters from the forward direction in the state dict, as it's just a python dictionary. It is not ideal because obviously the forward and backward pass will learn different things, but will stop the error and be in a reasonably good initialisation space. You will still have the same error in two though where your LSTM output is twice as big as it was.
I got an error of ValueError: Input tensors to a Functional must come from tf.keras.Input. Received: 0 (missing previous layer metadata) and i cant find the cause
this is my error trace and my code
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-15-8058f3a2fd50> in <module>()
6 test_loss, test_accuracy = eg.test(dg.user_test)
7 print('Test set: Loss=%.4f ; Accuracy=%.1f%%' % (test_loss, test_accuracy * 100))
----> 8 eg.save_embeddings('embeddings.csv')
7 frames
<ipython-input-5-54ff9897b1c3> in save_embeddings(self, file_name)
66 inp = self.m.input # input placeholder
67 outputs = [layer.output for layer in self.m.layers] # all layer outputs
---> 68 functor = K.function([inp, K.learning_phase()], outputs ) # evaluation function
69
70 #append embeddings to vectors
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py in function(inputs, outputs, updates, name, **kwargs)
3934 from tensorflow.python.keras import models # pylint: disable=g-import-not-at-top
3935 from tensorflow.python.keras.utils import tf_utils # pylint: disable=g-import-not-at-top
-> 3936 model = models.Model(inputs=inputs, outputs=outputs)
3937
3938 wrap_outputs = isinstance(outputs, list) and len(outputs) == 1
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in __new__(cls, *args, **kwargs)
240 # Functional model
241 from tensorflow.python.keras.engine import functional # pylint: disable=g-import-not-at-top
--> 242 return functional.Functional(*args, **kwargs)
243 else:
244 return super(Model, cls).__new__(cls, *args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/tracking/base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/functional.py in __init__(self, inputs, outputs, name, trainable)
113 # 'arguments during initialization. Got an unexpected argument:')
114 super(Functional, self).__init__(name=name, trainable=trainable)
--> 115 self._init_graph_network(inputs, outputs)
116
117 #trackable.no_automatic_dependency_tracking
/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/tracking/base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/functional.py in _init_graph_network(self, inputs, outputs)
142 base_layer_utils.create_keras_history(self._nested_outputs)
143
--> 144 self._validate_graph_inputs_and_outputs()
145
146 # A Network does not create weights of its own, thus it is already
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/functional.py in _validate_graph_inputs_and_outputs(self)
637 'must come from `tf.keras.Input`. '
638 'Received: ' + str(x) +
--> 639 ' (missing previous layer metadata).')
640 # Check that x is an input tensor.
641 # pylint: disable=protected-access
ValueError: Input tensors to a Functional must come from `tf.keras.Input`. Received: 0 (missing previous layer metadata).
and this is my snipped code:
class EmbeddingsGenerator:
def __init__(self, train_users, data):
self.train_users = train_users
#preprocess
self.data = data.sort_values(by=['timestamp'])
#make them start at 0
self.data['userId'] = self.data['userId'] - 1
self.data['itemId'] = self.data['itemId'] - 1
self.user_count = self.data['userId'].max() + 1
self.movie_count = self.data['itemId'].max() + 1
self.user_movies = {} #list of rated movies by each user
for userId in range(self.user_count):
self.user_movies[userId] = self.data[self.data.userId == userId]['itemId'].tolist()
self.m = self.model()
def model(self, hidden_layer_size=100):
m = Sequential()
m.add(Dense(hidden_layer_size, input_shape=(1, self.movie_count)))
m.add(Dropout(0.2))
m.add(Dense(self.movie_count, activation='softmax'))
m.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
return m
def generate_input(self, user_id):
'''
Returns a context and a target for the user_id
context: user's history with one random movie removed
target: id of random removed movie
'''
user_movies_count = len(self.user_movies[user_id])
#picking random movie
random_index = np.random.randint(0, user_movies_count-1) # -1 avoids taking the last movie
#setting target
target = np.zeros((1, self.movie_count))
target[0][self.user_movies[user_id][random_index]] = 1
#setting context
context = np.zeros((1, self.movie_count))
context[0][self.user_movies[user_id][:random_index] + self.user_movies[user_id][random_index+1:]] = 1
return context, target
def train(self, nb_epochs = 300, batch_size = 10000):
'''
Trains the model from train_users's history
'''
for i in range(nb_epochs):
print('%d/%d' % (i+1, nb_epochs))
batch = [self.generate_input(user_id=np.random.choice(self.train_users) - 1) for _ in range(batch_size)]
X_train = np.array([b[0] for b in batch])
y_train = np.array([b[1] for b in batch])
self.m.fit(X_train, y_train, epochs=1, validation_split=0.5)
def test(self, test_users, batch_size = 100000):
'''
Returns [loss, accuracy] on the test set
'''
batch_test = [self.generate_input(user_id=np.random.choice(test_users) - 1) for _ in range(batch_size)]
X_test = np.array([b[0] for b in batch_test])
y_test = np.array([b[1] for b in batch_test])
return self.m.evaluate(X_test, y_test)
def save_embeddings(self, file_name):
'''
Generates a csv file containg the vector embedding for each movie.
'''
inp = self.m.input # input placeholder
outputs = [layer.output for layer in self.m.layers] # all layer outputs
functor = K.function([inp, K.learning_phase()], outputs ) # evaluation function
#append embeddings to vectors
vectors = []
for movie_id in range(self.movie_count):
movie = np.zeros((1, 1, self.movie_count))
movie[0][0][movie_id] = 1
layer_outs = functor([movie])
vector = [str(v) for v in layer_outs[0][0][0]]
vector = '|'.join(vector)
vectors.append([movie_id, vector])
#saves as a csv file
embeddings = pd.DataFrame(vectors, columns=['item_id', 'vectors']).astype({'item_id': 'int32'})
embeddings.to_csv(file_name, sep=';', index=False)
files.download(file_name)
this is the part of code which call the save_embeddings method
if True: # Generate embeddings?
eg = EmbeddingsGenerator(dg.user_train, pd.read_csv('ml-100k/u.data', sep='\t', names=['userId', 'itemId', 'rating', 'timestamp']))
eg.train(nb_epochs=300)
train_loss, train_accuracy = eg.test(dg.user_train)
print('Train set: Loss=%.4f ; Accuracy=%.1f%%' % (train_loss, train_accuracy * 100))
test_loss, test_accuracy = eg.test(dg.user_test)
print('Test set: Loss=%.4f ; Accuracy=%.1f%%' % (test_loss, test_accuracy * 100))
eg.save_embeddings('embeddings.csv')
does anyone know a solution for this Error? I am trying to switch my PyTorch network to an Federated Learning network but i always get this Error.
I'm using Google Colab an train on GPU. When I print the size of embeds I get 0, but I don't understand why the data is not used there.
RuntimeError Traceback (most recent call last)
<ipython-input-42-fd4a5223524b> in <module>()
----> 1 model, history = train_model(net, dataloaders_dict, criterion, optimizer, num_epochs=10)
2 #model = train_model(net, dataloaders_dict, criterion, optimizer, num_epochs=10)
6 frames
<ipython-input-41-a386f044d41f> in train_model(model, dataloaders, criterion, optimizer, num_epochs, batch_size)
68 # detaching it from its history on the last instance.
69
---> 70 outputs = model(inputs)
71
72 loss = criterion(outputs, labels)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
--> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
<ipython-input-36-64e9a7d68b11> in forward(self, sentence)
11 def forward(self, sentence):
12 embeds = self.word_embeddings(sentence)
---> 13 lstm_out, (h,t) = self.lstm(embeds)
14 lstm_out = self.dropout(lstm_out)
15 tag_space = self.output(lstm_out[:,-1,:])
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
--> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py in forward(self, input, hx)
554 hx = self.permute_hidden(hx, sorted_indices)
555
--> 556 self.check_forward_args(input, hx, batch_sizes)
557 if batch_sizes is None:
558 result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py in check_forward_args(self, input, hidden, batch_sizes)
506 def check_forward_args(self, input, hidden, batch_sizes):
507 # type: (Tensor, Tuple[Tensor, Tensor], Optional[Tensor]) -> None
--> 508 self.check_input(input, batch_sizes)
509 expected_hidden_size = self.get_expected_hidden_size(input, batch_sizes)
510
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py in check_input(self, input, batch_sizes)
157 raise RuntimeError(
158 'input.size(-1) must be equal to input_size. Expected {}, got {}'.format(
--> 159 self.input_size, input.size(-1)))
160
161 def get_expected_hidden_size(self, input, batch_sizes):
RuntimeError: input.size(-1) must be equal to input_size. Expected 200, got 0
class LSTM(nn.Module):
def __init__(self, embedding_dim, hidden_layers,vocab_size,num_layers,pretrained_weights):
super(LSTM, self).__init__()
self.word_embeddings = nn.Embedding(vocab_size, embedding_dim,_weight=pretrained_weights, padding_idx=0)
self.lstm = nn.LSTM(embedding_dim, hidden_size=hidden_layers,num_layers=num_layers, batch_first=True)
self.output = nn.Linear(hidden_layers, vocab_size, bias=False)
self.dropout = nn.Dropout(0.1)
def forward(self, sentence):
embeds = self.word_embeddings(sentence)
lstm_out, (h,t) = self.lstm(embeds)
lstm_out = self.dropout(lstm_out)
tag_space = self.output(lstm_out[:,-1,:])
return tag_space
The Error throws in this line: lstm_out, (h,t) = self.lstm(embeds)
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
train_loss = 0
acc_score = 0
valid_loss = 0
acc_valid_score = 0
#Variables to store the losses temporary
train_loss_result = 0
acc_score_result = 0
valid_loss_result = 0
acc_valid_score_result = 0
valid_loss_not_decreased = 0
if valid_loss_not_decreased == 5:
break
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
for inputs,labels in dataloaders[phase]:
# Location of current batch
worker = inputs.location # <---- Where will send the model to
#model.to(device)
model = model.send(worker) # <---- for Federated Learning
inputs, labels = inputs.to(device), labels.to(device)
print("--------> INPUT: ",inputs)
print("--------> LABEL: ",labels)
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
# Get model outputs and calculate loss
# backward + optimize only if in training phase
if phase == 'train':
# we need to clear out the hidden state of the LSTM,
# detaching it from its history on the last instance.
outputs = model(inputs)
loss = criterion(outputs, labels)
acc = binary_accuracy(outputs,labels)
acc_score = acc_score + acc
train_loss = train_loss + loss.item()
loss.backward()
optimizer.step()
I also have this error when I run LSTM model by using Pysyft.Besides this,the model of LSTM can run without using Pysyft. Actually,the model of CNN can run successfully by using Pysyft.So,I think there maybe some bugs in the Pysyft.
I am trying to run the Deep Image Prior's Super Resolution. While running it, I am getting this error. Can anyone tell me where I am going wrong ? I am using CUDA-9.2, GPU 12GB which are sufficient to run this code. Someone told me to reduce the batch size and I don't know how to do it. I am new to this.
from __future__ import print_function
import matplotlib.pyplot as plt
%matplotlib inline
import argparse
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import numpy as np
from models import *
import torch
import torch.optim
from skimage.measure import compare_psnr
from models.downsampler import Downsampler
from utils.sr_utils import *
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark =True
dtype = torch.cuda.FloatTensor
imsize = -1
factor = 4 # 8
enforse_div32 = 'CROP' # we usually need the dimensions to be divisible by a power of two (32 in this case)
PLOT = True
# To produce images from the paper we took *_GT.png images from LapSRN viewer for corresponding factor,
# e.g. x4/zebra_GT.png for factor=4, and x8/zebra_GT.png for factor=8
path_to_image = '/home/smitha/Documents/Falcon.png'
imgs = load_LR_HR_imgs_sr(path_to_image , imsize, factor, enforse_div32)
imgs['bicubic_np'], imgs['sharp_np'], imgs['nearest_np'] = get_baselines(imgs['LR_pil'], imgs['HR_pil'])
if PLOT:
plot_image_grid([imgs['HR_np'], imgs['bicubic_np'], imgs['sharp_np'], imgs['nearest_np']], 4,12);
print ('PSNR bicubic: %.4f PSNR nearest: %.4f' % (
compare_psnr(imgs['HR_np'], imgs['bicubic_np']),
compare_psnr(imgs['HR_np'], imgs['nearest_np'])))
input_depth = 32
INPUT = 'noise'
pad = 'reflection'
OPT_OVER = 'net'
KERNEL_TYPE='lanczos2'
LR = 0.01
tv_weight = 0.0
OPTIMIZER = 'adam'
if factor == 4:
num_iter = 2000
reg_noise_std = 0.03
elif factor == 8:
num_iter = 4000
reg_noise_std = 0.05
else:
assert False, 'We did not experiment with other factors'
net_input = get_noise(input_depth, INPUT, (imgs['HR_pil'].size[1], imgs['HR_pil'].size[0])).type(dtype).detach()
NET_TYPE = 'skip' # UNet, ResNet
net = get_net(input_depth, 'skip', pad,
skip_n33d=128,
skip_n33u=128,
skip_n11=4,
num_scales=5,
upsample_mode='bilinear').type(dtype)
# Losses
mse = torch.nn.MSELoss().type(dtype)
img_LR_var = np_to_torch(imgs['LR_np']).type(dtype)
downsampler = Downsampler(n_planes=3, factor=factor, kernel_type=KERNEL_TYPE, phase=0.5, preserve_size=True).type(dtype)
def closure():
global i, net_input
if reg_noise_std > 0:
net_input = net_input_saved + (noise.normal_() * reg_noise_std)
out_HR = net(net_input)
out_LR = downsampler(out_HR)
total_loss = mse(out_LR, img_LR_var)
if tv_weight > 0:
total_loss += tv_weight * tv_loss(out_HR)
total_loss.backward()
# Log
psnr_LR = compare_psnr(imgs['LR_np'], torch_to_np(out_LR))
psnr_HR = compare_psnr(imgs['HR_np'], torch_to_np(out_HR))
print ('Iteration %05d PSNR_LR %.3f PSNR_HR %.3f' % (i, psnr_LR, psnr_HR), '\r', end='')
# History
psnr_history.append([psnr_LR, psnr_HR])
if PLOT and i % 100 == 0:
out_HR_np = torch_to_np(out_HR)
plot_image_grid([imgs['HR_np'], imgs['bicubic_np'], np.clip(out_HR_np, 0, 1)], factor=13, nrow=3)
i += 1
return total_loss
psnr_history = []
net_input_saved = net_input.detach().clone()
noise = net_input.detach().clone()
i = 0
p = get_params(OPT_OVER, net, net_input)
optimize(OPTIMIZER, p, closure, LR, num_iter)
out_HR_np = np.clip(torch_to_np(net(net_input)), 0, 1)
result_deep_prior = put_in_center(out_HR_np, imgs['orig_np'].shape[1:])
# For the paper we acually took `_bicubic.png` files from LapSRN viewer and used `result_deep_prior` as our result
plot_image_grid([imgs['HR_np'],
imgs['bicubic_np'],
out_HR_np], factor=4, nrow=1);
The Error is :
Starting optimization with ADAM
/home/smitha/anaconda3/lib/python3.6/site-packages/torch/nn/modules/upsampling.py:122: UserWarning: nn.Upsampling is deprecated. Use nn.functional.interpolate instead.
warnings.warn("nn.Upsampling is deprecated. Use nn.functional.interpolate instead.")
/home/smitha/anaconda3/lib/python3.6/site-packages/torch/nn/functional.py:1961: UserWarning: Default upsampling behavior when mode=bilinear is changed to align_corners=False since 0.4.0. Please specify align_corners=True if the old behavior is desired. See the documentation of nn.Upsample for details.
"See the documentation of nn.Upsample for details.".format(mode))
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-6-0fe781a02812> in <module>()
5 i = 0
6 p = get_params(OPT_OVER, net, net_input)
----> 7 optimize(OPTIMIZER, p, closure, LR, num_iter)
~/Documents/deep-image-prior/utils/common_utils.py in optimize(optimizer_type, parameters, closure, LR, num_iter)
227 for j in range(num_iter):
228 optimizer.zero_grad()
--> 229 closure()
230 optimizer.step()
231 else:
<ipython-input-5-887ba7755977> in closure()
5 net_input = net_input_saved + (noise.normal_() * reg_noise_std)
6
----> 7 out_HR = net(net_input)
8 out_LR = downsampler(out_HR)
9
~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
475 result = self._slow_forward(*input, **kwargs)
476 else:
--> 477 result = self.forward(*input, **kwargs)
478 for hook in self._forward_hooks.values():
479 hook_result = hook(self, input, result)
~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/container.py in forward(self, input)
89 def forward(self, input):
90 for module in self._modules.values():
---> 91 input = module(input)
92 return input
93
~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
475 result = self._slow_forward(*input, **kwargs)
476 else:
--> 477 result = self.forward(*input, **kwargs)
478 for hook in self._forward_hooks.values():
479 hook_result = hook(self, input, result)
~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/container.py in forward(self, input)
89 def forward(self, input):
90 for module in self._modules.values():
---> 91 input = module(input)
92 return input
93
~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
475 result = self._slow_forward(*input, **kwargs)
476 else:
--> 477 result = self.forward(*input, **kwargs)
478 for hook in self._forward_hooks.values():
479 hook_result = hook(self, input, result)
~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/conv.py in forward(self, input)
299 def forward(self, input):
300 return F.conv2d(input, self.weight, self.bias, self.stride,
--> 301 self.padding, self.dilation, self.groups)
302
303
RuntimeError: CUDA error: out of memory
If I reduce the size of the input image I get this error.
RuntimeError Traceback (most recent call last)
<ipython-input-6-0fe781a02812> in <module>()
5 i = 0
6 p = get_params(OPT_OVER, net, net_input)
----> 7 optimize(OPTIMIZER, p, closure, LR, num_iter)
~/Documents/deep-image-prior/utils/common_utils.py in optimize(optimizer_type, parameters, closure, LR, num_iter)
227 for j in range(num_iter):
228 optimizer.zero_grad()
--> 229 closure()
230 optimizer.step()
231 else:
<ipython-input-5-887ba7755977> in closure()
8 out_LR = downsampler(out_HR)
9
---> 10 total_loss = mse(out_LR, img_LR_var)
11
12 if tv_weight > 0:
~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
475 result = self._slow_forward(*input, **kwargs)
476 else:
--> 477 result = self.forward(*input, **kwargs)
478 for hook in self._forward_hooks.values():
479 hook_result = hook(self, input, result)
~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
419
420 def forward(self, input, target):
--> 421 return F.mse_loss(input, target, reduction=self.reduction)
422
423
~/anaconda3/lib/python3.6/site-packages/torch/nn/functional.py in mse_loss(input, target, size_average, reduce, reduction)
1714 else:
1715 reduction = _Reduction.get_enum(reduction)
-> 1716 return _pointwise_loss(lambda a, b: (a - b) ** 2, torch._C._nn.mse_loss, input, target, reduction)
1717
1718
~/anaconda3/lib/python3.6/site-packages/torch/nn/functional.py in _pointwise_loss(lambd, lambd_optimized, input, target, reduction)
1672 return torch.mean(d) if reduction == 'elementwise_mean' else torch.sum(d)
1673 else:
-> 1674 return lambd_optimized(input, target, reduction)
1675
1676
RuntimeError: input and target shapes do not match: input [1 x 3 x 64 x 64], target [1 x 1 x 64 x 64] at /opt/conda/conda-bld/pytorch_1532502421238/work/aten/src/THCUNN/generic/MSECriterion.cu:12