Train Pytorch Autoencoder with custom dataset - python

I am new to Pytorch. I was able to build an autoencoder model and train it using the MINST dataset.
However, I need to train the model using a custom dataset.
I am getting the error 'ToTensor' object is not iterable when i try to train with the custom dataset.
Below is a code of my dataset class
class AutoEncoderDataSet(Dataset):
def __init__(self, in_dir, transform):
self._transforms = transform
self.img_paths = []
files = os.listdir(in_dir)
for file in files:
self.img_paths.append(os.path.join(in_dir, file))
def __getitem__(self, index):
img, img_trans = Image.open(self.img_paths[index]), Image.open(self.img_paths[index])
x, y = transform(img), transform(img_trans)
return x, y
def __len__(self):
return len(self.img_paths)
Here is how I am generating the dataloader
transform = transforms.Compose([torchvision.transforms.ToTensor()])
train_dataset = AutoEncoderDataSet('./datasets/train/', transform)
batch_size = 512
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True
When I try to train using the data generated with the custom dataset class, I am getting the error mentioned above.
Below is code for training the model
epochs = 2048
for epoch in range(epochs):
loss = 0
for batch_features, _ in train_loader:
# reshape mini-batch data to [N, 784] matrix
# load it to the active device
batch_features = batch_features.view(-1, 250*250).to(device)
# reset the gradients back to zero
# PyTorch accumulates gradients on subsequent backward passes
optimizer.zero_grad()
# compute ecoder output
outputs = model(batch_features)
# compute training reconstruction loss
train_loss = criterion(outputs, batch_features)
# compute accumulated gradients
train_loss.backward()
# perform parameter update based on current gradients
optimizer.step()
# add the mini-batch training loss to epoch loss
loss += train_loss.item()
# compute the epoch training loss
loss = loss / len(train_loader)
# display the epoch training loss
print("epoch : {}/{}, recon loss = {:.8f}".format(epoch + 1, epochs, loss))
And this is the error I am getting
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_11164/1462449221.py in <module>
3 for epoch in range(epochs):
4 loss = 0
----> 5 for batch_features, _ in test_loader:
6 # reshape mini-batch data to [N, 784] matrix
7 # load it to the active device
~\AppData\Local\Programs\Python\Python38\lib\site-packages\torch\utils\data\dataloader.py in __next__(self)
519 if self._sampler_iter is None:
520 self._reset()
--> 521 data = self._next_data()
522 self._num_yielded += 1
523 if self._dataset_kind == _DatasetKind.Iterable and \
~\AppData\Local\Programs\Python\Python38\lib\site-packages\torch\utils\data\dataloader.py in _next_data(self)
559 def _next_data(self):
560 index = self._next_index() # may raise StopIteration
--> 561 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
562 if self._pin_memory:
563 data = _utils.pin_memory.pin_memory(data)
~\AppData\Local\Programs\Python\Python38\lib\site-packages\torch\utils\data\_utils\fetch.py in fetch(self, possibly_batched_index)
47 def fetch(self, possibly_batched_index):
48 if self.auto_collation:
---> 49 data = [self.dataset[idx] for idx in possibly_batched_index]
50 else:
51 data = self.dataset[possibly_batched_index]
~\AppData\Local\Programs\Python\Python38\lib\site-packages\torch\utils\data\_utils\fetch.py in <listcomp>(.0)
47 def fetch(self, possibly_batched_index):
48 if self.auto_collation:
---> 49 data = [self.dataset[idx] for idx in possibly_batched_index]
50 else:
51 data = self.dataset[possibly_batched_index]
~\AppData\Local\Programs\Python\Python38\lib\site-packages\torchvision\datasets\folder.py in __getitem__(self, index)
232 sample = self.loader(path)
233 if self.transform is not None:
--> 234 sample = self.transform(sample)
235 if self.target_transform is not None:
236 target = self.target_transform(target)
~\AppData\Local\Programs\Python\Python38\lib\site-packages\torchvision\transforms\transforms.py in __call__(self, img)
58
59 def __call__(self, img):
---> 60 for t in self.transforms:
61 img = t(img)
62 return img
TypeError: 'ToTensor' object is not iterable
Any suggestions would be greatly appreciated.

Related

How to make an LSTM Bidirectional?

Question:
What changes to LSTMClassifier do I need to make, in order to have this LSTM work bidirectionally?
I'm basing my amendments on this disscuss.pytorch.org response.
I think the problem is in forward(). It learns from the last state of LSTM neural network, by slicing:
tag_space = self.classifier(lstm_out[:,-1,:])
Do I need to sum up or concatenate the values of the 2 layers/ directions?
Working Code:
from argparse import ArgumentParser
import torchmetrics
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
class LSTMClassifier(nn.Module):
def __init__(self,
num_classes,
batch_size=10,
embedding_dim=100,
hidden_dim=50,
vocab_size=128):
super(LSTMClassifier, self).__init__()
initrange = 0.1
self.num_labels = num_classes
n = len(self.num_labels)
self.hidden_dim = hidden_dim
self.batch_size = batch_size
self.num_layers = 1
self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
self.word_embeddings.weight.data.uniform_(-initrange, initrange)
self.lstm = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim, num_layers=self.num_layers, batch_first=True, bidirectional=True) # !
#self.classifier = nn.Linear(hidden_dim, self.num_labels[0])
self.classifier = nn.Linear(2 * hidden_dim, self.num_labels[0]) # !
def repackage_hidden(h):
"""Wraps hidden states in new Tensors, to detach them from their history."""
if isinstance(h, torch.Tensor):
return h.detach()
else:
return tuple(repackage_hidden(v) for v in h)
def forward(self, sentence, labels=None):
embeds = self.word_embeddings(sentence)
# lstm_out, _ = self.lstm(embeds) # lstm_out - 2 tensors, _ - hidden layer
lstm_out, hidden = self.lstm(embeds)
# Calculate number of directions
self.num_directions = 2 if self.lstm.bidirectional == True else 1
# Extract last hidden state
# final_state = hidden.view(self.num_layers, self.num_directions, self.batch_size, self.hidden_dim)[-1]
final_state = hidden[0].view(self.num_layers, self.num_directions, self.batch_size, self.hidden_dim)[-1]
# Handle directions
final_hidden_state = None
if self.num_directions == 1:
final_hidden_state = final_state.squeeze(0)
elif self.num_directions == 2:
h_1, h_2 = final_state[0], final_state[1]
# final_hidden_state = h_1 + h_2 # Add both states (requires changes to the input size of first linear layer + attention layer)
final_hidden_state = torch.cat((h_1, h_2), 1) # Concatenate both states
print("len(final_hidden_state)", len(final_hidden_state))
print("len(labels)", len(labels))
# tag_space = self.classifier(hidden[:,0,:] + hidden[:,-1,:]) # ! # torch.flip(lstm_out[:,-1,:], [0, 1]) - 1 tensor
logits = F.log_softmax(final_hidden_state, dim=1) # tag_space
loss = None
if labels:
loss = F.cross_entropy(logits.view(-1, self.num_labels[0]), labels[0].view(-1))
return loss, logits
class LSTMTaggerModel(pl.LightningModule):
def __init__(
self,
num_classes,
class_map,
from_checkpoint=False,
model_name='last.ckpt',
learning_rate=3e-6,
**kwargs,
):
super().__init__()
self.save_hyperparameters()
self.learning_rate = learning_rate
self.model = LSTMClassifier(num_classes=num_classes)
# self.model.load_state_dict(torch.load(model_name), strict=False) # !
self.class_map = class_map
self.num_classes = num_classes
self.valid_acc = torchmetrics.Accuracy()
self.valid_f1 = torchmetrics.F1()
def forward(self, *input, **kwargs):
return self.model(*input, **kwargs)
def training_step(self, batch, batch_idx):
x, y_true = batch
loss, _ = self(x, labels=y_true)
self.log('train_loss', loss)
return loss
def validation_step(self, batch, batch_idx):
x, y_true = batch
_, y_pred = self(x, labels=y_true)
preds = torch.argmax(y_pred, axis=1)
self.valid_acc(preds, y_true[0])
self.log('val_acc', self.valid_acc, prog_bar=True)
self.valid_f1(preds, y_true[0])
self.log('f1', self.valid_f1, prog_bar=True)
def configure_optimizers(self):
'Prepare optimizer and schedule (linear warmup and decay)'
opt = torch.optim.Adam(params=self.parameters(), lr=self.learning_rate)
sch = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=10)
return [opt], [sch]
def training_epoch_end(self, training_step_outputs):
avg_loss = torch.tensor([x['loss']
for x in training_step_outputs]).mean()
self.log('train_loss', avg_loss)
print(f'###score: train_loss### {avg_loss}')
def validation_epoch_end(self, val_step_outputs):
acc = self.valid_acc.compute()
f1 = self.valid_f1.compute()
self.log('val_score', acc)
self.log('f1', f1)
print(f'###score: val_score### {acc}')
def add_model_specific_args(parent_parser):
parser = parent_parser.add_argument_group("OntologyTaggerModel")
parser = ArgumentParser(parents=[parent_parser], add_help=False)
parser.add_argument("--learning_rate", default=2e-3, type=float)
return parent_parser
Runtime:
Global seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
| Name | Type | Params
---------------------------------------------
0 | model | LSTMClassifier | 77.4 K
1 | valid_acc | Accuracy | 0
2 | valid_f1 | F1 | 0
---------------------------------------------
77.4 K Trainable params
0 Non-trainable params
77.4 K Total params
0.310 Total estimated model params size (MB)
Validation sanity check: 0it [00:00, ?it/s]
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-18-3f817f701f20> in <module>
11 """.split()
12
---> 13 run_training(args)
<ipython-input-5-bb0d8b014e32> in run_training(input)
66 shutil.copyfile(labels_file_orig, labels_file_cp)
67 trainer = pl.Trainer.from_argparse_args(args, callbacks=[checkpoint_callback], logger=loggers)
---> 68 trainer.fit(model, dm)
69 model_file = os.path.join(args.modeldir, 'last.ckpt')
70 trainer.save_checkpoint(model_file, weights_only=True)
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in fit(self, model, train_dataloader, val_dataloaders, datamodule)
497
498 # dispath `start_training` or `start_testing` or `start_predicting`
--> 499 self.dispatch()
500
501 # plugin will finalized fitting (e.g. ddp_spawn will load trained model)
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in dispatch(self)
544
545 else:
--> 546 self.accelerator.start_training(self)
547
548 def train_or_test_or_predict(self):
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/accelerators/accelerator.py in start_training(self, trainer)
71
72 def start_training(self, trainer):
---> 73 self.training_type_plugin.start_training(trainer)
74
75 def start_testing(self, trainer):
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py in start_training(self, trainer)
112 def start_training(self, trainer: 'Trainer') -> None:
113 # double dispatch to initiate the training loop
--> 114 self._results = trainer.run_train()
115
116 def start_testing(self, trainer: 'Trainer') -> None:
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in run_train(self)
605 self.progress_bar_callback.disable()
606
--> 607 self.run_sanity_check(self.lightning_module)
608
609 # set stage for logging
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in run_sanity_check(self, ref_model)
858
859 # run eval step
--> 860 _, eval_results = self.run_evaluation(max_batches=self.num_sanity_val_batches)
861
862 self.on_sanity_check_end()
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py in run_evaluation(self, max_batches, on_epoch)
723 # lightning module methods
724 with self.profiler.profile("evaluation_step_and_end"):
--> 725 output = self.evaluation_loop.evaluation_step(batch, batch_idx, dataloader_idx)
726 output = self.evaluation_loop.evaluation_step_end(output)
727
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/trainer/evaluation_loop.py in evaluation_step(self, batch, batch_idx, dataloader_idx)
164 model_ref._current_fx_name = "validation_step"
165 with self.trainer.profiler.profile("validation_step"):
--> 166 output = self.trainer.accelerator.validation_step(args)
167
168 # capture any logged information
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/accelerators/accelerator.py in validation_step(self, args)
175
176 with self.precision_plugin.val_step_context(), self.training_type_plugin.val_step_context():
--> 177 return self.training_type_plugin.validation_step(*args)
178
179 def test_step(self, args):
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py in validation_step(self, *args, **kwargs)
129
130 def validation_step(self, *args, **kwargs):
--> 131 return self.lightning_module.validation_step(*args, **kwargs)
132
133 def test_step(self, *args, **kwargs):
<ipython-input-17-542f29e75b1a> in validation_step(self, batch, batch_idx)
104 def validation_step(self, batch, batch_idx):
105 x, y_true = batch
--> 106 _, y_pred = self(x, labels=y_true)
107 preds = torch.argmax(y_pred, axis=1)
108 self.valid_acc(preds, y_true[0])
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-17-542f29e75b1a> in forward(self, *input, **kwargs)
94
95 def forward(self, *input, **kwargs):
---> 96 return self.model(*input, **kwargs)
97
98 def training_step(self, batch, batch_idx):
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-17-542f29e75b1a> in forward(self, sentence, labels)
67 loss = None
68 if labels:
---> 69 loss = F.cross_entropy(logits.view(-1, self.num_labels[0]), labels[0].view(-1))
70 return loss, logits
71
RuntimeError: shape '[-1, 38]' is invalid for input of size 1000
It sounds like you're trying to load a pretrained model (which uses an unidirectional LSTM) into a model which has a bidirectional LSTM in its state dict. There are several things you can do here, as there are innate differences between your pretrained state dict and your bidirectional state dict:
Definitely use model.load_state_dict(model_params,strict=False) (see this link). This will stop the complaining when you use a model that's different to what you're trying to learn. It means that your forward pass will be pretrained but not your backward pass.
If you do this ^ you will need to sum or otherwise condense the final time steps for the forward and backward case because the classifier will then have a different shape otherwise. strict=False though will ignore this, so only do this if you care about having a pretrained first layer in your classifier.
If you don't want to do the above two, you can copy the weights for model.lstm.weight_ih_l0_reverse and other missing parameters from the forward direction in the state dict, as it's just a python dictionary. It is not ideal because obviously the forward and backward pass will learn different things, but will stop the error and be in a reasonably good initialisation space. You will still have the same error in two though where your LSTM output is twice as big as it was.

RuntimeError: CUDA error: device-side assert triggered - Resnet18 on PyTorch

I'm trying to use PyTorch's Resnet18 model with my image data. Given the complexity of the model as well as the size of the data, I'd like to run it using CUDA. I'm doing the follow:
resnet_cnn = models.resnet18(pretrained = True)
num_ftrs = resnet_cnn.fc.in_features
resnet_cnn.fc = nn.Linear(num_ftrs, 8)
criterion = nn.CrossEntropyLoss().cuda()
optimizer_ft = optim.SGD(resnet_cnn.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=5, gamma=0.1)
After this, I attempt to train and test my model with the following loop:
count = 0
loss_list = []
iteration_list = []
accuracy_list = []
epochs = 30
for epoch in range(epochs):
for i, (images, labels) in enumerate(trainloader):
resnet_cnn = resnet_cnn.cuda()
images.cuda()
labels.cuda()
optimizer_ft.zero_grad()
outputs = resnet_cnn(images.cuda())
loss = criterion(outputs.cuda(), labels.cuda())
loss.backward()
optimizer_ft.step()
count += 1
if count % 50 == 0:
correct = 0
total = 0
for i, (images, labels) in enumerate(testloader):
# images.to(device)
# labels.to(device)
outputs = resnet_cnn(images.cuda())
predicted = torch.max(outputs.data, 1)[1]
total += len(labels)
correct += (predicted == labels.cuda()).sum()
accuracy = 100 * correct / float(total)
loss_list.append(loss.data)
iteration_list.append(count)
accuracy_list.append(accuracy)
if count % 500 == 0:
print("Iteration: {} Loss: {} Accuracy: {} %".format(count, loss.data, accuracy))
But I am met with the following error:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-48-cb669e8d47c0> in <module>()
7 for epoch in range(epochs):
8 for i, (images, labels) in enumerate(trainloader):
----> 9 resnet_cnn = resnet_cnn.cuda()
10 images.cuda()
11 labels.cuda()
3 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in cuda(self, device)
489 Module: self
490 """
--> 491 return self._apply(lambda t: t.cuda(device))
492
493 def xpu(self: T, device: Optional[Union[int, device]] = None) -> T:
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _apply(self, fn)
385 def _apply(self, fn):
386 for module in self.children():
--> 387 module._apply(fn)
388
389 def compute_should_use_set_data(tensor, tensor_applied):
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _apply(self, fn)
407 # `with torch.no_grad():`
408 with torch.no_grad():
--> 409 param_applied = fn(param)
410 should_use_set_data = compute_should_use_set_data(param, param_applied)
411 if should_use_set_data:
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in <lambda>(t)
489 Module: self
490 """
--> 491 return self._apply(lambda t: t.cuda(device))
492
493 def xpu(self: T, device: Optional[Union[int, device]] = None) -> T:
RuntimeError: CUDA error: device-side assert triggered
I can't figure what I'm doing wrong as I've trained a manually defined CNN in the same way. Thank you in advance.

Custom transformation on Subset of CIFAR10 - PyTorch

I am trying to create a custom transformation to part of the CIFAR10 data set which superimposing of an image over the dataset. I was able to download the data and divide it into subsets. Using the following code:
transform_train = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
traindata = datasets.CIFAR10('./data', train=True, download=True,
transform= transform_train)
partitions = 5
traindata_split = torch.utils.data.random_split(traindata, [int(traindata.data.shape[0] / partitions) for _ in range(partitions)])
then I wanted to modify part of the splits so I created the following class and functions to use as as follows:
class MyDataset(Dataset): # https://discuss.pytorch.org/t/torch-utils-data-dataset-random-split/32209/3
def __init__(self, subset, transform=None):
self.subset = subset
self.transform = transform
def __getitem__(self, index):
x, y = self.subset[index]
if self.transform:
x = self.transform(x)
return x, y
def __len__(self):
return len(self.subset)
and
class ImageSuperImpose(object):
""" Image input as PIL and output as PIL
To be used as part of torchvision.transforms
Args: p, a threshold value to control image thinning
"""
def __init__(self, p=0):
self.p = p
def __call__(self, image):
img = cv2.imread('img.jpg')
img = img('float32')/255
imgSm = cv2.resize(img,(32,32))
np_arr = image.cpu().detach().numpy().T
sample = cv2.addWeighted(np_arr, 1, imgSm, 1, 0)
sample = sample.T
t = torch.from_numpy(sample)
return sample
transform_train2 = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
ImagePoisoning(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
datasetA = MyDataset(
traindata_split[0], transform= transform_train2
)
test_loader = torch.utils.data.DataLoader(datasetA, batch_size=128, shuffle=True)
But when I tried to train the model on the subset I got the following error:
RuntimeError: The size of tensor a (32) must match the size of tensor b (3) at non-singleton dimension 0
** UPDATE**
Here is the full given error
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-20-7428084b03be> in <module>()
----> 1 train(model, opt, test_loader, 3)
9 frames
<ipython-input-14-fcb03e1d7685> in client_update(client_model, optimizer, train_loader, epoch)
5 client_model.train()
6 for e in range(epoch):
----> 7 for batch_idx, (data, target) in enumerate(train_loader):
8 data, target = data.to(device), target.to(device)
9 optimizer.zero_grad()
/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in __next__(self)
433 if self._sampler_iter is None:
434 self._reset()
--> 435 data = self._next_data()
436 self._num_yielded += 1
437 if self._dataset_kind == _DatasetKind.Iterable and \
/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in _next_data(self)
473 def _next_data(self):
474 index = self._next_index() # may raise StopIteration
--> 475 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
476 if self._pin_memory:
477 data = _utils.pin_memory.pin_memory(data)
/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
42 def fetch(self, possibly_batched_index):
43 if self.auto_collation:
---> 44 data = [self.dataset[idx] for idx in possibly_batched_index]
45 else:
46 data = self.dataset[possibly_batched_index]
/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/fetch.py in <listcomp>(.0)
42 def fetch(self, possibly_batched_index):
43 if self.auto_collation:
---> 44 data = [self.dataset[idx] for idx in possibly_batched_index]
45 else:
46 data = self.dataset[possibly_batched_index]
<ipython-input-7-1bde43acaff0> in __getitem__(self, index)
7 x, y = self.subset[index]
8 if self.transform:
----> 9 x = self.transform(x)
10 return x, y
11
/usr/local/lib/python3.6/dist-packages/torchvision/transforms/transforms.py in __call__(self, img)
65 def __call__(self, img):
66 for t in self.transforms:
---> 67 img = t(img)
68 return img
69
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
/usr/local/lib/python3.6/dist-packages/torchvision/transforms/transforms.py in forward(self, tensor)
224 Tensor: Normalized Tensor image.
225 """
--> 226 return F.normalize(tensor, self.mean, self.std, self.inplace)
227
228 def __repr__(self):
/usr/local/lib/python3.6/dist-packages/torchvision/transforms/functional.py in normalize(tensor, mean, std, inplace)
282 if std.ndim == 1:
283 std = std.view(-1, 1, 1)
--> 284 tensor.sub_(mean).div_(std)
285 return tensor
286
RuntimeError: The size of tensor a (32) must match the size of tensor b (3) at non-singleton dimension 0

RuntimeError: input.size(-1) must be equal to input_size. Expected 200, got 0 ---- PySyft / PyTorch / Federated Learning

does anyone know a solution for this Error? I am trying to switch my PyTorch network to an Federated Learning network but i always get this Error.
I'm using Google Colab an train on GPU. When I print the size of embeds I get 0, but I don't understand why the data is not used there.
RuntimeError Traceback (most recent call last)
<ipython-input-42-fd4a5223524b> in <module>()
----> 1 model, history = train_model(net, dataloaders_dict, criterion, optimizer, num_epochs=10)
2 #model = train_model(net, dataloaders_dict, criterion, optimizer, num_epochs=10)
6 frames
<ipython-input-41-a386f044d41f> in train_model(model, dataloaders, criterion, optimizer, num_epochs, batch_size)
68 # detaching it from its history on the last instance.
69
---> 70 outputs = model(inputs)
71
72 loss = criterion(outputs, labels)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
--> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
<ipython-input-36-64e9a7d68b11> in forward(self, sentence)
11 def forward(self, sentence):
12 embeds = self.word_embeddings(sentence)
---> 13 lstm_out, (h,t) = self.lstm(embeds)
14 lstm_out = self.dropout(lstm_out)
15 tag_space = self.output(lstm_out[:,-1,:])
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
--> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py in forward(self, input, hx)
554 hx = self.permute_hidden(hx, sorted_indices)
555
--> 556 self.check_forward_args(input, hx, batch_sizes)
557 if batch_sizes is None:
558 result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py in check_forward_args(self, input, hidden, batch_sizes)
506 def check_forward_args(self, input, hidden, batch_sizes):
507 # type: (Tensor, Tuple[Tensor, Tensor], Optional[Tensor]) -> None
--> 508 self.check_input(input, batch_sizes)
509 expected_hidden_size = self.get_expected_hidden_size(input, batch_sizes)
510
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py in check_input(self, input, batch_sizes)
157 raise RuntimeError(
158 'input.size(-1) must be equal to input_size. Expected {}, got {}'.format(
--> 159 self.input_size, input.size(-1)))
160
161 def get_expected_hidden_size(self, input, batch_sizes):
RuntimeError: input.size(-1) must be equal to input_size. Expected 200, got 0
class LSTM(nn.Module):
def __init__(self, embedding_dim, hidden_layers,vocab_size,num_layers,pretrained_weights):
super(LSTM, self).__init__()
self.word_embeddings = nn.Embedding(vocab_size, embedding_dim,_weight=pretrained_weights, padding_idx=0)
self.lstm = nn.LSTM(embedding_dim, hidden_size=hidden_layers,num_layers=num_layers, batch_first=True)
self.output = nn.Linear(hidden_layers, vocab_size, bias=False)
self.dropout = nn.Dropout(0.1)
def forward(self, sentence):
embeds = self.word_embeddings(sentence)
lstm_out, (h,t) = self.lstm(embeds)
lstm_out = self.dropout(lstm_out)
tag_space = self.output(lstm_out[:,-1,:])
return tag_space
The Error throws in this line: lstm_out, (h,t) = self.lstm(embeds)
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
train_loss = 0
acc_score = 0
valid_loss = 0
acc_valid_score = 0
#Variables to store the losses temporary
train_loss_result = 0
acc_score_result = 0
valid_loss_result = 0
acc_valid_score_result = 0
valid_loss_not_decreased = 0
if valid_loss_not_decreased == 5:
break
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
for inputs,labels in dataloaders[phase]:
# Location of current batch
worker = inputs.location # <---- Where will send the model to
#model.to(device)
model = model.send(worker) # <---- for Federated Learning
inputs, labels = inputs.to(device), labels.to(device)
print("--------> INPUT: ",inputs)
print("--------> LABEL: ",labels)
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
# Get model outputs and calculate loss
# backward + optimize only if in training phase
if phase == 'train':
# we need to clear out the hidden state of the LSTM,
# detaching it from its history on the last instance.
outputs = model(inputs)
loss = criterion(outputs, labels)
acc = binary_accuracy(outputs,labels)
acc_score = acc_score + acc
train_loss = train_loss + loss.item()
loss.backward()
optimizer.step()
I also have this error when I run LSTM model by using Pysyft.Besides this,the model of LSTM can run without using Pysyft. Actually,the model of CNN can run successfully by using Pysyft.So,I think there maybe some bugs in the Pysyft.

pytorch error: multi-target not supported in CrossEntropyLoss()

I am on a project using acceleration data to predict some activities.
But I have problems on the loss calculation. I am using CrossEntropyLoss for it.
Data is used for it like below
I use the first 4 data of each rows to predict the index like the last one of each rows.
1 84 84 81 4
81 85 85 80 1
81 82 84 80 1
1 85 84 2 0
81 85 82 80 1
81 82 84 80 1
81 25 84 80 5
The error messages are like below.
minoh#minoh-VirtualBox:~/cow$ python lec5.py
Traceback (most recent call last):
File "lec5.py", line 97, in <module>
train(epoch)
File "lec5.py", line 74, in train
loss = criterion(y_pred, labels)
File "/home/minoh/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py", line 357, in __call__
result = self.forward(*input, **kwargs)
File "/home/minoh/anaconda3/lib/python3.6/site-packages/torch/nn/modules/loss.py", line 679, in forward
self.ignore_index, self.reduce)
File "/home/minoh/anaconda3/lib/python3.6/site-packages/torch/nn/functional.py", line 1161, in cross_entropy
return nll_loss(log_softmax(input, 1), target, weight, size_average, ignore_index, reduce)
File "/home/minoh/anaconda3/lib/python3.6/site-packages/torch/nn/functional.py", line 1052, in nll_loss
return torch._C._nn.nll_loss(input, target, weight, size_average, ignore_index, reduce)
RuntimeError: multi-target not supported at /opt/conda/conda-bld/pytorch_1518243271935/work/torch/lib/THNN/generic/ClassNLLCriterion.c:22
My code is based on Sung Kim's pytorch
import numpy as np
import torch
from torch.autograd import Variable
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
class CowDataset(Dataset):
def __init__(self):
xy_str = np.loadtxt('cow_test', delimiter = ' ', dtype = np.str)
xy = xy_str.astype(np.float32)
xy_int = xy_str.astype(np.int)
self.len = xy.shape[0]
self.x_data = torch.from_numpy(xy[:, 0:4])
self.y_data = torch.from_numpy(xy_int[:, [4]])
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
def __len__(self):
return self.len
dataset = CowDataset()
train_loader = DataLoader(dataset = dataset, batch_size = 32, shuffle = True)
class CowTestset(Dataset):
def __init__(self):
xy_str = np.loadtxt('cow_test2', delimiter = ' ', dtype =np.str)
xy = xy_str.astype(np.float32)
xy_int = xy_str.astype(np.int)
self.len = xy.shape[0]
self.x_data = torch.from_numpy(xy[:, 0:4])
self.y_data = torch.from_numpy(xy_int[:, [4]])
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
def __len__(self):
return self.len
testset = CowTestset()
test_loader = DataLoader(dataset = testset, batch_size = 32, shuffle = True)
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.l1 = torch.nn.Linear(4,5)
self.l2 = torch.nn.Linear(5,7)
self.l3 = torch.nn.Linear(7,6)
self.sigmoid = torch.nn.Sigmoid()
def forward(self, x):
out1 = self.sigmoid(self.l1(x))
out2 = self.sigmoid(self.l2(out1))
y_pred = self.sigmoid(self.l3(out2))
return y_pred
model = Model()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.1, momentum = 0.5)
def train(epoch):
model.train()
for batch_idx, (inputs, labels) in enumerate(train_loader):
inputs, labels = Variable(inputs), Variable(labels)
optimizer.zero_grad()
y_pred = model(inputs)
loss = criterion(y_pred, labels)
loss.backward()
optimizer.step()
if batch_idx % 10 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.data[0]))
def test():
model.eval()
test_loss = 0
correct = 0
for data, target in test_loader:
data, target = Variable(data, volatile = True), Variable(target)
print(target)
output = model(data)
test_loss += criterion(output, target).data[0]
pred = output.data.max(1, keepdim = True)[1]
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss, correct, len(test_loader.dataset), 100.* correct / len(test_loader.dataset)))
for epoch in range(1,7):
train(epoch)
test()
Ok. So I reproduced your problem and after some search and reading the API of CrossEntropyLoss(), I have found it's because you have a wrong label dimension.
Offical docs of CrossEntropyLoss here. And you can see
Input: (N,C) where C = number of classes
Target: (N) where each value is 0≤targets[i]≤C−1
While here, in your criterion() function, you have a batchSize x 7 input and batchSize x 1 label. The confusing point is, say your batchSize is 10, a 10x1 tensor can not be regarded as a size-10 tensor, which is what the loss function expectes. You must explictly do the size conversion.
Solution:
Add labels = labels.squeeze_() before you call loss = criterion(y_pred, labels) and do the same thing in your test code. The squeeze_() funciton removes size-1 dimensions inplace. So you have a batchSize-size label now.

Categories

Resources