I tried to write a segmentation model, i am quite new to the topic and i got to the dead spot.
From what i tried to debug, i think that the shape of my mask batches does not match the size of the predicted batches and therefore i got the following error:
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-84-abd99309752a> in <module>()
3 for i in range(EPOCHS):
4 #train_loss = train_func(trainloader,model,optimizer)
----> 5 valid_loss = eval_func(validloader,model)
6
7 if valid_loss <best_loss:
4 frames
<ipython-input-82-328c759ec537> in eval_func(dataloader, model)
6 images = images.to(DEVICE)
7 masks = mask.to(DEVICE)
----> 8 logits, loss = model(images,masks)
9 total_loss += loss.item()
10 return total_loss / len(dataloader)
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1108 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1109 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1110 return forward_call(*input, **kwargs)
1111 # Do not call functions when jit is used
1112 full_backward_hooks, non_full_backward_hooks = [], []
<ipython-input-79-567e281ae719> in forward(self, images, masks)
15 if mask != None:
16 print(logits.size)
---> 17 return logits, lossF(logits,masks)
18 return logits
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1108 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1109 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1110 return forward_call(*input, **kwargs)
1111 # Do not call functions when jit is used
1112 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.7/dist-packages/segmentation_models_pytorch/losses/dice.py in forward(self, y_pred, y_true)
58 def forward(self, y_pred: torch.Tensor, y_true: torch.Tensor) -> torch.Tensor:
59
---> 60 assert y_true.size(0) == y_pred.size(0)
61
62 if self.from_logits:
AssertionError:
I have no clue how to fix the error in code. I tried few adjustments in the SegmentationDataset, but it did not help. You can find relevant (to my mind) part of the code below.
import albumentations as A
def get_train_augs():
return A.Compose([
#A.Resize(IMG_SIZE,IMG_SIZE, interpolation = cv2.INTER_LINEAR),
A.RandomCrop(width=IMG_SIZE, height=IMG_SIZE),
A.HorizontalFlip(p=0.5),
A.RandomBrightnessContrast(p=.75)
])
def get_val_augs():
return A.Compose([
A.RandomCrop(width=IMG_SIZE, height=IMG_SIZE),
])
class SegmentationDataset(Dataset):
def __init__(self,df,augumentations):
self.df = df
self.augumentations = augumentations
def __len__(self):
return len(self.df)
def __getitem__(self,idx):
row = self.df.iloc [idx]
image_path = row.Images
mask_path = row.Masks
image = cv2.imread(image_path)
image = cv2.cvtColor(np.float32(image), cv2.COLOR_BGR2RGB)
mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
mask = (mask==33)*1 + (mask==79)*1
mask = (mask>0)*1
mask = np.expand_dims(mask, axis=-1)
if self.augumentations:
data = self.augumentations(image = image, mask = mask)
image = data['image']
mask = data['mask']
image = np.transpose(image, (2,0,1)).astype(np.float32)
mask = np.transpose(mask, (2,0,1)).astype(np.float32)
image = torch.Tensor(image)/255.0
mask = torch.Tensor(mask)
mask = torch.round(torch.Tensor(mask))
return image, mask
class SegmentationModel(nn.Module):
def __init__(self):
super(SegmentationModel,self).__init__()
self.backbone = smp.Unet(
encoder_name=ENCODER,
encoder_weights=WEIGHTS,
in_channels =3,
classes = 1,
activation=None
)
def forward(self,images, masks= None):
logits = self.backbone(images)
lossF = DiceLoss(mode = 'binary')
if mask != None:
return logits, lossF(logits,masks)
return logits
def train_func(dataloader, model,optimizer):
model.train()
total_loss = 0.0
for images, masks in tqdm(dataloader):
images = images.to(DEVICE)
masks = mask.to(DEVICE)
optimizer.zero_grad()
logits, loss = model(images,masks)
loss.backward()
optimizer.step()
total_loss += loss.item()
print(mask.size)
return total_loss / len(dataloader)
train_loss = train_func(trainloader,model,optimizer)
def eval_func(dataloader, model):
model.eval()
total_loss = 0.0
with torch.no_grad():
for images, masks in tqdm(dataloader):
images = images.to(DEVICE)
masks = mask.to(DEVICE)
logits, loss = model(images,masks)
total_loss += loss.item()
return total_loss / len(dataloader)
Train_Images = os.listdir(os.path.join(os.getcwd(), 'uavid_train/Images'))
for k in range(0,len(Train_Images)): Train_Images[k] = 'uavid_train/Images/' + Train_Images[k]
Train_Labels = os.listdir(os.path.join(os.getcwd(), 'uavid_train/Labels'))
for k in range(0,len(Train_Labels)): Train_Labels[k] = 'uavid_train/Labels/' + Train_Labels[k]
Train_DF = pd.DataFrame([Train_Images, Train_Labels]).T
Train_DF.columns = ['Images', 'Masks']
Val_Images = os.listdir(os.path.join(os.getcwd(), 'uavid_val/Images'))
for k in range(0,len(Val_Images)): Val_Images[k] = 'uavid_val/Images/' + Val_Images[k]
Val_Labels = os.listdir(os.path.join(os.getcwd(), 'uavid_val/Labels'))
for k in range(0,len(Val_Labels)): Val_Labels[k] = 'uavid_val/Labels/' + Val_Labels[k]
Val_DF = pd.DataFrame([ Val_Images, Val_Labels]).T
Val_DF.columns = ['Images', 'Masks']
trainloader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
validloader = DataLoader(valset, batch_size=len(valset))
model = SegmentationModel()
model.to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr = LR)
best_loss = np.Inf
for i in range(EPOCHS):
#train_loss = train_func(trainloader,model,optimizer)
valid_loss = eval_func(validloader,model)
if valid_loss <best_loss:
torch.save(model.state_dict(),"best-model.pt")
print('SAVED')
best_loss = valid_loss
print(f"Epoch : {i+1} Train Loss : {train_loss} Valid Loss : {valid_loss}")
What you have to do is to convert your mask into one-hot-encoded version and then convert it into a single channel. Let's say you have 3-classes in you your mask that are described by 3 colors: [255,0,0], [0,255,0], [0,0,255]. And your input mask is standard RGB image. You can write a function that converts it into the desired format like this:
def convert_mask(rgb_mask):
colormaps = [[255,0,0], [0,255,0], [0,0,255]]
output_mask = list()
for colormap in colormaps:
cmap = np.all(np.equal(mask, colormap), axis=-1)
output_mask.append(cmap)
m = np.stack(output_mask, axis=-1) # one hot mask
m = np.argmax(m, axis=-1) # single channel mask
return m
This function should return a (N,1,H,W) mask that should work during trainig.
Hope it helps.
Related
def train_fn(data_loader, model, optimizer):
model.train()
total_loss = 0.0
for images, masks in tqdm(data_loader):
images = images.to(DEVICE)
masks = masks.to(DEVICE)
optimizer.zero_grad()
logits, loss = model(images,masks)
loss.backward()
optimizer.step()
total_loss += loss.item()
return total_loss/ len(data_loader)
def eval_fn(data_loader, model):
model.eval()
total_loss = 0.0
with torch.no_grad():
for images, masks in tqdm(data_loader):
images = images.to(DEVICE)
masks = masks.to(DEVICE)
logits, loss = model(images,masks)
total_loss += loss.item()
return total_loss/ len(data_loader)
optimizer = torch.optim.Adam(model.parameters(), lr = LR)
best_valid_loss = np.Inf
for i in range(EPOCHS):
train_loss = train_fn(trainloader, model, optimizer)
valid_loss = eval_fn(validloader, model)
if valid_loss < best_valid_loss:
torch.save(model.state_dict(), 'best_model.pt')
print("SAVED_MODEL")
best_valid_loss = valid_loss
print(f"Epoch : {i+1} Train_loss: {train_loss} Valid_loss: {valid_loss}")
I get the following error when I try to train the model:
0%| | 0/15 [00:00<?, ?it/s]
NotImplementedError Traceback (most recent call last)
in ()
4
5
----> 6 train_loss = train_fn(trainloader, model, optimizer)
7 valid_loss = eval_fn(validloader, model)
8
2 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _forward_unimplemented(self, *input)
199 registered hooks while the latter silently ignores them.
200 """
--> 201 # raise NotImplementedError
202
203
NotImplementedError:
How do I deal with this?
Looking at the link you provided in the comment, your model definition looks like this:
class SegmentationModel(nn.Module):
def __init__(self):
super(SegmentationModel,self).__init__()
self.arc = smp.Unet(
encoder_name = ENCODER,
encoder_weights = WEIGHTS,
in_channels = 3,
classes = 1,
activation = None
)
def forward(self, images, masks = None):
logits = self.arc(images)
if masks != None:
loss1 = DiceLoss(mode = 'binary')(logits, masks)
loss2 = nn.BCEWithLogitsLoss()(logits,masks)
return logits, loss1 + loss2
return logits
If you look close, you'll see forward() has an erratic extra indentation, making it an internal function inside __init__() rather than a method of a SegmentationModel. Shift it a bit to left, and it should work fine:
class SegmentationModel(nn.Module):
def __init__(self):
super(SegmentationModel,self).__init__()
self.arc = smp.Unet(
encoder_name = ENCODER,
encoder_weights = WEIGHTS,
in_channels = 3,
classes = 1,
activation = None
)
def forward(self, images, masks = None):
logits = self.arc(images)
if masks != None:
loss1 = DiceLoss(mode = 'binary')(logits, masks)
loss2 = nn.BCEWithLogitsLoss()(logits,masks)
return logits, loss1 + loss2
return logits
I've been working on this Segformer (using transformers for image segmentation) I've been having issues with getting a message stating that the target is out of bounds.
So here is the code that I am using.
!pip install --upgrade transformers
!nvidia-smi
from torch.utils.data import Dataset, DataLoader
from transformers import AdamW
import torch
from torch import nn
from sklearn.metrics import accuracy_score
from tqdm.notebook import tqdm
import os
from PIL import Image
from transformers import SegformerForSemanticSegmentation,
SegformerFeatureExtractor
import pandas as pd
import cv2
import numpy as np
import albumentations as aug
os.getcwd()
ok, from here this is my current working directoy:
'/data/users/me/Rugd_dataset'
My images are stored in two folders one called images, the nest mask. Each have a sub directory of test and train
WIDTH = 512
Height = 512
class ImageSegmentationDataset(Dataset):
"""Image segmentation dataset."""
def __init__(self, root_dir, feature_extractor, transforms=None, train=True):
"""
Args:
root_dir (string): Root directory of the dataset containing the images + annotations.
feature_extractor (SegFormerFeatureExtractor): feature extractor to prepare images + segmentation maps.
train (bool): Whether to load "training" or "validation" images + annotations.
"""
self.root_dir = root_dir
self.feature_extractor = feature_extractor
self.train = train
self.transforms = transforms
sub_path = "train" if self.train else "test"
self.img_dir = os.path.join(self.root_dir, "images", sub_path)
self.ann_dir = os.path.join(self.root_dir, "mask", sub_path)
# read images
image_file_names = []
for root, dirs, files in os.walk(self.img_dir):
image_file_names.extend(files)
self.images = sorted(image_file_names)
# read annotations
annotation_file_names = []
for root, dirs, files in os.walk(self.ann_dir):
annotation_file_names.extend(files)
self.annotations = sorted(annotation_file_names)
assert len(self.images) == len(self.annotations), "There must be as many images as there are segmentation maps"
def __len__(self):
return len(self.images)
def __getitem__(self, idx):
image = cv2.imread(os.path.join(self.img_dir,
self.images[idx]))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
segmentation_map = cv2.imread(os.path.join(self.ann_dir, self.annotations[idx]))
segmentation_map = cv2.cvtColor(segmentation_map, cv2.COLOR_BGR2GRAY)
# image = Image.open()
# segmentation_map = Image.open()
if self.transforms is not None:
augmented = self.transforms(image=image, mask=segmentation_map)
# randomly crop + pad both image and segmentation map to same size
encoded_inputs = self.feature_extractor(augmented['image'], augmented['mask'], return_tensors="pt")
else:
encoded_inputs = self.feature_extractor(image, segmentation_map, return_tensors="pt")
for k,v in encoded_inputs.items():
encoded_inputs[k].squeeze_() # remove batch dimension
return encoded_inputs
transform = aug.Compose([
aug.Flip(p=0.5)
])
dir_path = r'images/train'
count = 0
# Iterate directory
for path in os.listdir(dir_path):
# check if current path is a file
if os.path.isfile(os.path.join(dir_path, path)):
count += 1
print('File count:', count)
current count = File count: 7386
dir_path = r'mask/train'
count = 0
# Iterate directory
for path in os.listdir(dir_path):
# check if current path is a file
if os.path.isfile(os.path.join(dir_path, path)):
count += 1
print('File count:', count)
this is what I receive as a File count: 7386. So my paths to the images seem to fit.
root_dir = '/data/users/christopher.kehl/Rugd_dataset'
feature_extractor = SegformerFeatureExtractor(align=False, reduce_zero_label=False)
train_dataset = ImageSegmentationDataset(root_dir=root_dir, feature_extractor=feature_extractor, transforms=transform)
valid_dataset = ImageSegmentationDataset(root_dir=root_dir, feature_extractor=feature_extractor, transforms=None, train=False)
print("Number of training examples:", len(train_dataset))
print("Number of validation examples:", len(valid_dataset))
Number of training examples: 7387
Number of validation examples: 100
encoded_inputs = train_dataset[-1]
so the index here is supposed to be zero, but I can only get this to work with a 1 or an -1, anything other than 0.
encoded_inputs["pixel_values"].shape
torch.Size([3, 512, 512])
encoded_inputs["labels"].shape
torch.Size([512, 512])
encoded_inputs["labels"].shape
torch.Size([512, 512])
encoded_inputs["labels"].squeeze().unique()
tensor([ 29, 60, 76, 91, 107])
mask = encoded_inputs["labels"].numpy()
import matplotlib.pyplot as plt
plt.imshow(mask)
shows the images of a mask
from torch.utils.data import DataLoader
train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=4)
batch = next(iter(train_dataloader))
for k,v in batch.items():
print(k, v.shape)
pixel_values torch.Size([4, 3, 512, 512])
labels torch.Size([4, 512, 512])
batch["labels"].shape
torch.Size([4, 512, 512])
classes = pd.read_csv('class_dict_seg.csv')['name']
id2label = classes.to_dict()
label2id = {v: k for k, v in id2label.items()}
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/mit-b5", ignore_mismatched_sizes=True,
num_labels=len(id2label), id2label=id2label, label2id=label2id,
reshape_last_stage=True)
optimizer = AdamW(model.parameters(), lr=0.00006)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print("Model Initialized!")
for epoch in range(1, 11): # loop over the dataset multiple times
print("Epoch:", epoch)
pbar = tqdm(train_dataloader)
accuracies = []
losses = []
val_accuracies = []
val_losses = []
model.train()
for idx, batch in enumerate(pbar):
# get the inputs;
pixel_values = batch["pixel_values"].to(device)
labels = batch["labels"].to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward
outputs = model(pixel_values=pixel_values, labels=labels)
# evaluate
upsampled_logits = nn.functional.interpolate(outputs.logits, size=labels.shape[-2:], mode="bilinear", align_corners=False)
predicted = upsampled_logits.argmax(dim=1)
mask = (labels != 255) # we don't include the background class in the accuracy calculation
pred_labels = predicted[mask].detach().cpu().numpy()
true_labels = labels[mask].detach().cpu().numpy()
accuracy = accuracy_score(pred_labels, true_labels)
loss = outputs.loss
accuracies.append(accuracy)
losses.append(loss.item())
pbar.set_postfix({'Batch': idx, 'Pixel-wise accuracy': sum(accuracies)/len(accuracies), 'Loss': sum(losses)/len(losses)})
# backward + optimize
loss.backward()
optimizer.step()
else:
model.eval()
with torch.no_grad():
for idx, batch in enumerate(valid_dataloader):
pixel_values = batch["pixel_values"].to(device)
labels = batch["labels"].to(device)
outputs = model(pixel_values=pixel_values, labels=labels)
upsampled_logits = nn.functional.interpolate(outputs.logits, size=labels.shape[-2:], mode="bilinear", align_corners=False)
predicted = upsampled_logits.argmax(dim=1)
mask = (labels != 255) # we don't include the background class in the accuracy calculation
pred_labels = predicted[mask].detach().cpu().numpy()
true_labels = labels[mask].detach().cpu().numpy()
accuracy = accuracy_score(pred_labels, true_labels)
val_loss = outputs.loss
val_accuracies.append(accuracy)
val_losses.append(val_loss.item())
print(f"Train Pixel-wise accuracy: {sum(accuracies)/len(accuracies)}\
Train Loss: {sum(losses)/len(losses)}\
Val Pixel-wise accuracy: {sum(val_accuracies)/len(val_accuracies)}\
Val Loss: {sum(val_losses)/len(val_losses)}")
Here is the error Block:
IndexError: Target 151 is out of bounds.
Full Trace
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-32-50d0deee01ff> in <module>
16
17 # forward
---> 18 outputs = model(pixel_values=pixel_values, labels=labels)
19
20 # evaluate
~/.local/lib/python3.6/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/.local/lib/python3.6/site-packages/transformers/models/segformer/modeling_segformer.py in forward(self, pixel_values, labels, output_attentions, output_hidden_states, return_dict)
807 )
808 loss_fct = CrossEntropyLoss(ignore_index=self.config.semantic_loss_ignore_index)
--> 809 loss = loss_fct(upsampled_logits, labels)
810
811 if not return_dict:
~/.local/lib/python3.6/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/.local/lib/python3.6/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
1150 return F.cross_entropy(input, target, weight=self.weight,
1151 ignore_index=self.ignore_index, reduction=self.reduction,
-> 1152 label_smoothing=self.label_smoothing)
1153
1154
~/.local/lib/python3.6/site-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)
2844 if size_average is not None or reduce is not None:
2845 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2846 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
2847
2848
IndexError: Target 150 is out of bounds.
I am trying to use the Pytorch-geometric hypergraph convolution network's layer for training no hypergraph. I am using the Amazon review dataset provided by https://www.cs.cornell.edu/~arb/data/amazon-reviews/
Since a node doesn't have a feature, I merely put constants in all nodes as features. Further for hyperedges, I construct an incidence matrix.
First, download the dataset using the command:
!wget -P /tmp https://github.com/gravitogen/hosting_datasets/releases/download/amazon_review_1.0/amazon-reviews.zip
!unzip /tmp/amazon-reviews.zip
Code is below:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import HypergraphConv
import numpy as np
import pandas as pd
class HyperGraph1(nn.Module):
def __init__(self, nfeatures, nhiddden, nclass, dropout,nlayer=1):
super(HyperGraph1, self).__init__()
self.conv1 = HypergraphConv(nfeatures, 16)
self.conv2 = HypergraphConv(16,nclass)
self.dropout_p = dropout
def forward(self, data):
x, edge_index = data.x, data.edge_index
x = self.conv1(x, edge_index)
x = F.relu(x)
x = F.dropout(x, training=self.training)
x = self.conv2(x, edge_index)
return F.log_softmax(x, dim=1)
folder = 'amazon-reviews/'
node_indices_files = 'node-labels-amazon-reviews.txt'
node_label_files = 'label-names-amazon-reviews.txt'
hyperedge_files = 'hyperedges-amazon-reviews.txt'
node_indices = pd.read_csv(folder + node_indices_files, header=None)
labels_annot = pd.read_csv(folder + node_label_files, header = None)
hyper_edge = []
with open(folder + hyperedge_files) as f:
for line in f.readlines():
chunks = line.split(',')
chunks = [int(i) for i in chunks]
hyper_edge.append(chunks)
nodes = node_indices.index.to_list()
labels = node_indices[node_indices.columns[0]].to_list()
# generating sparse incidence matrix for storing hyperedges
node_on_edge_list = []
edge_indices = []
counter = 0
for l in hyper_edge:
node_on_edge_list += l
edge_indices += [counter]*len(l)
counter = counter + 1
hyperedge_index = torch.tensor([ node_on_edge_list, edge_indices])
# generate `Data` for pytorch-geometric
x = np.zeros(len(nodes)*2, dtype=int)
x = x.reshape(len(nodes),2).tolist()
X = torch.tensor(x, dtype=torch.float)
Y = torch.tensor(labels, dtype=torch.int)
from torch_geometric.data import Data
data = Data(x=X, edge_index=hyperedge_index, y=Y)
# Now get ready for training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = HyperGraph1(nfeatures=2, nhiddden=16, nclass=labels_annot.shape[0],dropout=0.1).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
model.train()
random_nums = np.random.rand(1, len(data.y))
train_np = random_nums > 0.5
train_mask = train_np.tolist()[0]
test_np = random_nums <= 0.5
test_mask = test_np.tolist()[0]
data.train_mask = torch.tensor(train_mask)
data.test_mask = torch.tensor(test_mask)
for epoch in range(2000):
optimizer.zero_grad()
out = model(data)
loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()
However, upon attempt to train the network, I get the following error:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-98-40e1b6f967df> in <module>
1 for epoch in range(2000):
2 optimizer.zero_grad()
----> 3 out = model(data)
4 loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
5 loss.backward()
~/anaconda3/envs/gcn/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
<ipython-input-84-0bf0aa6f166c> in forward(self, data)
8 def forward(self, data):
9 x, edge_index = data.x, data.edge_index
---> 10 x = self.conv1(x, edge_index)
11 x = F.relu(x)
12 x = F.dropout(x, training=self.training)
~/anaconda3/envs/gcn/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/gcn/lib/python3.7/site-packages/torch_geometric/nn/conv/hypergraph_conv.py in forward(self, x, hyperedge_index, hyperedge_weight, hyperedge_attr)
139
140 D = scatter_add(hyperedge_weight[hyperedge_index[1]],
--> 141 hyperedge_index[0], dim=0, dim_size=num_nodes)
142 D = 1.0 / D
143 D[D == float("inf")] = 0
~/anaconda3/envs/gcn/lib/python3.7/site-packages/torch_scatter/scatter.py in scatter_add(src, index, dim, out, dim_size)
27 out: Optional[torch.Tensor] = None,
28 dim_size: Optional[int] = None) -> torch.Tensor:
---> 29 return scatter_sum(src, index, dim, out, dim_size)
30
31
~/anaconda3/envs/gcn/lib/python3.7/site-packages/torch_scatter/scatter.py in scatter_sum(src, index, dim, out, dim_size)
19 size[dim] = int(index.max()) + 1
20 out = torch.zeros(size, dtype=src.dtype, device=src.device)
---> 21 return out.scatter_add_(dim, index, src)
22 else:
23 return out.scatter_add_(dim, index, src)
RuntimeError: index 2268264 is out of bounds for dimension 0 with size 2268264
I am clueless about how to fix this.Note that 2268264 is the number of nodes in the graph.
I mimicked the dataset construction from https://pytorch-geometric.readthedocs.io/en/latest/notes/introduction.html so I am not sure what mistake I did here.
I am working on clinical EHR. I am currently referring to this blog and github link here. I have generated the dataset and processed it as per the instructions in the notebooks present in the repository. I am facing an issue trying to train the model.
build_EHRNN class.
torch.manual_seed(1)
class build_EHRNN(nn.Module):
def __init__(self, inputDimSize=4894, hiddenDimSize=[200,200], batchSize=100, embSize=200, numClass=4894, dropout=0.5, logEps=1e-8):
super(build_EHRNN, self).__init__()
self.inputDimSize = inputDimSize
self.hiddenDimSize = hiddenDimSize
self.numClass = numClass
self.embSize = embSize
self.batchSize = batchSize
self.dropout = nn.Dropout(p=0.5)
self.logEps = logEps
# Embedding inputs
self.W_emb = nn.Parameter(torch.randn(self.inputDimSize, self.embSize).cuda())
self.b_emb = nn.Parameter(torch.zeros(self.embSize).cuda())
self.W_out = nn.Parameter(torch.randn(self.hiddenDimSize, self.numClass).cuda())
self.b_out = nn.Parameter(torch.zeros(self.numClass).cuda())
self.params = [self.W_emb, self.W_out,
self.b_emb, self.b_out]
def forward(self,x, y, h, lengths, mask):
self.emb = torch.tanh(torch.matmul(x, self.W_emb) + self.b_emb)
input_values = self.emb
self.outputs = [input_values]
for i, hiddenSize in enumerate([self.hiddenDimSize, self.hiddenDimSize]): # iterate over layers
rnn = EHRNN(self.inputDimSize,hiddenSize,self.embSize,self.batchSize,self.numClass) # calculate hidden states
hidden_state = []
h = self.init_hidden().cuda()
for i,seq in enumerate(input_values): # loop over sequences in each batch
h = rnn(seq, h)
hidden_state.append(h)
hidden_state = self.dropout(torch.stack(hidden_state)) # apply dropout between layers
input_values = hidden_state
y_linear = torch.matmul(hidden_state, self.W_out) + self.b_out # fully connected layer
yhat = F.softmax(y_linear, dim=1) # yhat
yhat = yhat*mask[:,:,None] # apply mask
# Loss calculation
cross_entropy = -(y * torch.log(yhat + self.logEps) + (1. - y) * torch.log(1. - yhat + self.logEps))
last_step = -torch.mean(y[-1] * torch.log(yhat[-1] + self.logEps) + (1. - y[-1]) * torch.log(1. - yhat[-1] + self.logEps))
prediction_loss = torch.sum(torch.sum(cross_entropy, dim=0),dim=1)/ torch.cuda.FloatTensor(lengths)
cost = torch.mean(prediction_loss) + 0.000001 * (self.W_out ** 2).sum() # regularize
return (yhat, hidden_state, cost)
def init_hidden(self):
return torch.zeros(self.batchSize, self.hiddenDimSize) # initial state
Creating instance and training model
model = build_EHRNN(inputDimSize=4894, hiddenDimSize=200, batchSize=100, embSize=200, numClass=4894, dropout=0.5, logEps=1e-8)
model = model.to(device)
optimizer = torch.optim.Adadelta(model.parameters(), lr = 0.01, rho=0.90)
max_epochs = 10
loss_all = []
iteration = 0
for e in range(max_epochs):
for index in random.sample(range(n_batches), n_batches):
batchX = train[0][:n_batches*batchSize][index*batchSize:(index+1)*batchSize]
batchY = train[1][:n_batches*batchSize][index*batchSize:(index+1)*batchSize]
optimizer.zero_grad()
x, y, lengths, mask = padding(batchX, batchY, 4894, 4894)
if torch.cuda.is_available():
x, y, lenghts, mask = x.cuda(), y.cuda(), lengths, mask.cuda()
outputs, hidden, cost = model(x,y, h, lengths, mask)
if torch.cuda.is_available():
cost.cuda()
cost.backward()
nn.utils.clip_grad_norm_(model.parameters(), 5)
optimizer.step()
Error:
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-14-cff1f002dced> in <module>()
17 x, y, lenghts, mask = x.cuda(), y.cuda(), lengths, mask.cuda()
18
---> 19 outputs, hidden, cost = model(x,y, h, lengths, mask)
20
21 if torch.cuda.is_available():
NameError: name 'h' is not defined
Update:
Removing 'h' param produces the following error
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-14-6495250d91c9> in <module>()
18
19 # outputs, hidden, cost = model(x,y, h, lengths, mask)
---> 20 outputs, hidden, cost = model(x, y, lengths, mask)
21
22 if torch.cuda.is_available():
1 frames
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
720 result = self._slow_forward(*input, **kwargs)
721 else:
--> 722 result = self.forward(*input, **kwargs)
723 for hook in itertools.chain(
724 _global_forward_hooks.values(),
<ipython-input-7-3c831fe3ca8d> in forward(self, x, y, lengths, mask)
36 h = rnn(seq, h)
37 hidden_state.append(h)
---> 38 hidden_state = self.dropout(torch.stack(hidden_state)) # apply dropout between layers
39 input_values = hidden_state
40
RuntimeError: stack expects a non-empty TensorList
I think I fixed your error:
replace your forward method to:
def forward(self,x, y, lengths, mask):
self.emb = torch.tanh(torch.matmul(x, self.W_emb) + self.b_emb)
input_values = self.emb
self.outputs = [input_values]
for i, hiddenSize in enumerate([self.hiddenDimSize, self.hiddenDimSize]): # iterate over layers
rnn = EHRNN(self.inputDimSize,hiddenSize,self.embSize,self.batchSize,self.numClass) # calculate hidden states
hidden_state = []
h = self.init_hidden().cuda()
for i,seq in enumerate(input_values): # loop over sequences in each batch
h = rnn(seq, h)
hidden_state.append(h)
hidden_state = self.dropout(torch.stack(hidden_state)) # apply dropout between layers
input_values = hidden_state
y_linear = torch.matmul(hidden_state, self.W_out) + self.b_out # fully connected layer
yhat = F.softmax(y_linear, dim=1) # yhat
yhat = yhat*mask[:,:,None] # apply mask
and replace the line where the error happens to:
outputs, hidden, cost = model(x, y, lengths, mask)
I'm attempting to modify this feedforward network taken from https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/feedforward_neural_network/main.py
to utilize my own dataset.
I define a custom dataset of two 1 dim arrays as input and two scalars the corresponding output :
x = torch.tensor([[5.5, 3,3,4] , [1 , 2,3,4], [9 , 2,3,4]])
print(x)
y = torch.tensor([1,2,3])
print(y)
import torch.utils.data as data_utils
my_train = data_utils.TensorDataset(x, y)
my_train_loader = data_utils.DataLoader(my_train, batch_size=50, shuffle=True)
I've updated the hyperparameters to match new input_size (2) & num_classes (3).
I've also changed images = images.reshape(-1, 28*28).to(device) to images = images.reshape(-1, 4).to(device)
As the training set is minimal I've changed the batch_size to 1.
Upon making these modifications I receive error when attempting to train :
RuntimeError Traceback (most recent call
last) in ()
51
52 # Forward pass
---> 53 outputs = model(images)
54 loss = criterion(outputs, labels)
55
/home/.local/lib/python3.6/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
489 result = self._slow_forward(*input, **kwargs)
490 else:
--> 491 result = self.forward(*input, **kwargs)
492 for hook in self._forward_hooks.values():
493 hook_result = hook(self, input, result)
in forward(self, x)
31
32 def forward(self, x):
---> 33 out = self.fc1(x)
34 out = self.relu(out)
35 out = self.fc2(out)
/home/.local/lib/python3.6/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
489 result = self._slow_forward(*input, **kwargs)
490 else:
--> 491 result = self.forward(*input, **kwargs)
492 for hook in self._forward_hooks.values():
493 hook_result = hook(self, input, result)
/home/.local/lib/python3.6/site-packages/torch/nn/modules/linear.py in forward(self, input)
53
54 def forward(self, input):
---> 55 return F.linear(input, self.weight, self.bias)
56
57 def extra_repr(self):
/home/.local/lib/python3.6/site-packages/torch/nn/functional.py
in linear(input, weight, bias)
990 if input.dim() == 2 and bias is not None:
991 # fused op is marginally faster
--> 992 return torch.addmm(bias, input, weight.t())
993
994 output = input.matmul(weight.t())
RuntimeError: size mismatch, m1: [3 x 4], m2: [2 x 3] at
/pytorch/aten/src/THC/generic/THCTensorMathBlas.cu:249
How to amend code to match expected dimensionality ? I'm unsure what code to change as I've changed all parameters that require updating ?
Source prior to changes :
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Hyper-parameters
input_size = 784
hidden_size = 500
num_classes = 10
num_epochs = 5
batch_size = 100
learning_rate = 0.001
# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='../../data',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = torchvision.datasets.MNIST(root='../../data',
train=False,
transform=transforms.ToTensor())
# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
# Fully connected neural network with one hidden layer
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
out = self.fc1(x)
out = self.relu(out)
out = self.fc2(out)
return out
model = NeuralNet(input_size, hidden_size, num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# Move tensors to the configured device
images = images.reshape(-1, 28*28).to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images = images.reshape(-1, 28*28).to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))
# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')
Source post changes :
x = torch.tensor([[5.5, 3,3,4] , [1 , 2,3,4], [9 , 2,3,4]])
print(x)
y = torch.tensor([1,2,3])
print(y)
import torch.utils.data as data_utils
my_train = data_utils.TensorDataset(x, y)
my_train_loader = data_utils.DataLoader(my_train, batch_size=50, shuffle=True)
print(my_train)
print(my_train_loader)
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Hyper-parameters
input_size = 2
hidden_size = 3
num_classes = 3
num_epochs = 5
batch_size = 1
learning_rate = 0.001
# MNIST dataset
train_dataset = my_train
# Data loader
train_loader = my_train_loader
# Fully connected neural network with one hidden layer
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
out = self.fc1(x)
out = self.relu(out)
out = self.fc2(out)
return out
model = NeuralNet(input_size, hidden_size, num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# Move tensors to the configured device
images = images.reshape(-1, 4).to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images = images.reshape(-1, 4).to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))
# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')
You need to change input_size to 4 (2*2), and not 2 as your modified code currently shows.
If you compare it to the original MNIST example, you'll see that input_size is set to 784 (28*28) and not just to 28.