I am having errors in executing the train function of my code in MLP.
This is the error:
mat1 and mat2 shapes cannot be multiplied (128x10 and 48x10)
My code for the train function is this:
class net(nn.Module):
def __init__(self, input_dim2, hidden_dim2, output_dim2):
super(net, self).__init__()
self.input_dim2 = input_dim2
self.fc1 = nn.Linear(input_dim2, hidden_dim2)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_dim2, hidden_dim2)
self.fc3 = nn.Linear(hidden_dim2, output_dim2)
def forward(self, x):
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.relu(x)
x = self.fc3(x)
x = F.softmax(self.fc3(x))
return x
model = net(input_dim2, hidden_dim2, output_dim2) #create the network
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate2)
def train(num_epochs2):
for i in range(num_epochs2):
tmp_loss = []
for (x,y) in train_loader:
print(y.shape)
print(x.shape)
outputs = model(x) #forward pass
print(outputs.shape)
loss = criterion(outputs, y) #loss computation
tmp_loss.append(loss.item()) #recording the loss
optimizer.zero_grad() #all the accumulated gradient
loss.backward() #auto-differentiaton - accumulation of gradient
optimizer.step() # a gradient step
print("Loss at {}th epoch: {}".format(i, np.mean(tmp_loss)))
I don't know where I'm wrong. My code seems to work okay.
From the limited message, I guess the place you are wrong are the following snippets:
x = self.fc3(x)
x = F.softmax(self.fc3(x))
Try to replace with:
x = self.fc3(x)
x = F.softmax(x)
A good question should include: error backtrace information and complete toy example which could repeat the errors!
Here an relu activation seems to be missing in the 'init' function. Or there is an extra relu activation in the forward function. Look at the code below and try to figure out what is extra or missing.
def __init__(self, input_dim2, hidden_dim2, output_dim2):
super(net, self).__init__()
self.input_dim2 = input_dim2
self.fc1 = nn.Linear(input_dim2, hidden_dim2)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_dim2, hidden_dim2)
self.fc3 = nn.Linear(hidden_dim2, output_dim2)
def forward(self, x):
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.relu(x)
x = self.fc3(x)
x = F.softmax(self.fc3(x))
return x
Related
I try to fight with overfitting, this is why I decided to look through documentation (https://pytorch-lightning.readthedocs.io/en/stable/common/evaluation_basic.html#train-with-the-validation-loop), where I found that you can pass in Trainer.fit training and validation dataloader. The question is that - should I use this method, or I can simply pass the dataloader class in Trainer.fit to prevent overfitting ?
Code DataLoader:
class ClassifierDataModule(pl.LightningDataModule):
def __init__(self, train_dataset:pd.DataFrame, val_dataset:pd.DataFrame, batch_size:int):
super().__init__()
self.prepare_data_per_node = False
self.train_dataset = train_dataset
self.val_dataset = val_dataset
self.batch_size=batch_size
def train_dataloader(self):
return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=os.cpu_count())
def val_dataloader(self):
return DataLoader(self.val_dataset, batch_size=self.batch_size, shuffle=True, num_workers=os.cpu_count())
data_module_classifier = ClassifierDataModule(train_dataset,val_dataset,test_dataset,BATCH_SIZE )
And here is my Trainer.fit():
model = MulticlassClassificationLIGHT(class_weights)
#trainer.fit(model, data_module_classifier) # SHOULD I USE THIS METHOD TO PREVENT OVERFITTING
trainer.fit(model, data_module_classifier.train_dataloader(),data_module_classifier.val_dataloader() ) # OR THIS ONE ?
My LightningModule just in case:
class MulticlassClassificationLIGHT(pl.LightningModule):
def __init__(self,class_weights):
super(MulticlassClassificationLIGHT, self).__init__()
self.num_feature=35
self.num_class=36
self.layer_1 = nn.Linear(self.num_feature, 512)
self.layer_2 = nn.Linear(512, 128)
self.layer_3 = nn.Linear(128, 64)
self.layer_out = nn.Linear(64, self.num_class)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(p=0.2)
self.batchnorm1 = nn.BatchNorm1d(512)
self.batchnorm2 = nn.BatchNorm1d(128)
self.batchnorm3 = nn.BatchNorm1d(64)
self.loss = nn.CrossEntropyLoss(weight=class_weights.to(device))
def forward(self, x):
x = self.layer_1(x)
x = self.batchnorm1(x)
x = self.relu(x)
x = self.layer_2(x)
x = self.batchnorm2(x)
x = self.relu(x)
x = self.dropout(x)
x = self.layer_3(x)
x = self.batchnorm3(x)
x = self.relu(x)
x = self.dropout(x)
x = self.layer_out(x)
return x
def training_step(self, batch, batch_idx):
x, y = batch
logits = self.forward(x)
loss = self.loss(logits, y)
self.log("train_loss", loss, prog_bar=True, logger=True)
return loss
def validation_step(self, batch, batch_idx):
x, y = batch
logits = self.forward(x)
loss = self.loss(logits, y)
self.log("val_loss", loss, prog_bar=True, logger=True) # I ask Trainer to "ModelCheckpoint" this loss
return loss
Passing validation data loader during training does not fix overfitting. It allows to measure the overfitting/underfitting of the model. We want performance on validation data to be closer to performance on training data in case of a well-fit model.
Regarding the syntax, This should work :
trainer.fit(model=model, train_dataloaders =data_module_classifier.train_dataloader(), val_dataloaders =data_module_classifier.val_dataloader())
documentation for fit here - https://pytorch-lightning.readthedocs.io/en/latest/common/trainer.html#trainer-class-api
I am unable to plot graph-neural-networking. I have seen few related questions(1, 2, 3) to this topic but their answers do not apply to graph-neural-networks.
What makes it different is that the input vector include objects of different dimensions e.g. properties matrix dimension is [n_nodes, n_node_features], adjacency matrix dimension is [n_nodes, n_nodes] etc. Here is the example of my Model:
class GIN0(Model):
def __init__(self, channels, n_layers):
super().__init__()
self.conv1 = GINConv(channels, epsilon=0, mlp_hidden=[channels, channels])
self.convs = []
for _ in range(1, n_layers):
self.convs.append(
GINConv(channels, epsilon=0, mlp_hidden=[channels, channels])
)
self.pool = GlobalAvgPool()
self.dense1 = Dense(channels, activation="relu")
self.dropout = Dropout(0.5)
self.dense2 = Dense(channels, activation="relu")
def call(self, inputs):
x, a, i = inputs
x = self.conv1([x, a])
for conv in self.convs:
x = conv([x, a])
x = self.pool([x, i])
x = self.dense1(x)
x = self.dropout(x)
return self.dense2(x)
One of the answers in 2 suggested to add build_graph function as follows:
class my_model(Model):
def __init__(self, dim):
super(my_model, self).__init__()
self.Base = VGG16(input_shape=(dim), include_top = False, weights = 'imagenet')
self.GAP = L.GlobalAveragePooling2D()
self.BAT = L.BatchNormalization()
self.DROP = L.Dropout(rate=0.1)
self.DENS = L.Dense(256, activation='relu', name = 'dense_A')
self.OUT = L.Dense(1, activation='sigmoid')
def call(self, inputs):
x = self.Base(inputs)
g = self.GAP(x)
b = self.BAT(g)
d = self.DROP(b)
d = self.DENS(d)
return self.OUT(d)
# AFAIK: The most convenient method to print model.summary()
# similar to the sequential or functional API like.
def build_graph(self):
x = Input(shape=(dim))
return Model(inputs=[x], outputs=self.call(x))
dim = (124,124,3)
model = my_model((dim))
model.build((None, *dim))
model.build_graph().summary()
However, I am not sure how to define dim or Input Layer using tf.keras.layers.Input for such a hybrid data-structure as described above.
Any suggestions?
Here is the minimal code to plot such subclass multi-input model. Note, as stated in the comment above, there are some issue of your GINConv which is from spektral and it's not related to the main query. So, I will give general soluton of such multi-input modeling scenarios. To make it work with your speckral, please reach to the package author for further discussion.
From specktral repo, here, I got the idea the shape of the input tensors.
x, y = next(iter(loader_tr))
bs_x = list(x[0].shape)
bs_y = list(x[1].shape)
bs_z = list(x[2].shape)
bs_x, bs_y, bs_z
([1067, 4], [1067, 1067], [1067])
Similar model, it also takes same amount of inputs and with same shape. But without GINConv.
class GIN0(Model):
def __init__(self, channels, n_layers):
super().__init__()
self.conv1 = tf.keras.layers.Conv1D(channels, 3, activation='relu')
self.conv2 = tf.keras.layers.Conv1D(channels, 3, activation='relu')
self.dense1 = Dense(channels, activation="relu")
self.dropout = Dropout(0.5)
self.dense2 = Dense(n_out, activation="softmax")
def call(self, inputs):
x, a, i = inputs
x = self.conv1(x)
x = tf.keras.layers.GlobalAveragePooling1D()(x)
a = self.conv2(a)
a = tf.keras.layers.GlobalAveragePooling1D()(a)
x = tf.keras.layers.Concatenate(axis=1)([a, x, i])
x = self.dense1(x)
x = self.dropout(x)
return self.dense2(x)
def build_graph(self):
x = tf.keras.Input(shape=bs_x)
y = tf.keras.Input(shape=bs_y)
z = tf.keras.Input(shape=bs_z)
return tf.keras.Model(
inputs=[x, y, z],
outputs=self.call([x, y, z])
)
model = GIN0(channels, layers)
model.build(
[
(None, *bs_x),
(None, *bs_y),
(None, *bs_z)
]
)
# OK
model.build_graph().summary()
# OK
tf.keras.utils.plot_model(
model.build_graph(), show_shapes=True
)
I have two functions like this (code source of the functions is here):
device = torch.device('cuda')
dataset = TUDataset(root='/tmp/MUTAG', name='MUTAG', use_node_attr=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True)
train_dataset = dataset #just for testing
val_dataset = dataset
test_dataset = dataset
graph_train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
graph_val_loader = DataLoader(val_dataset, batch_size=8)
gnn_layer_by_name = {
"GCN": geom_nn.GCNConv,
"GAT": geom_nn.GATConv,
"GraphConv": geom_nn.GraphConv
}
class GCNLayer(nn.Module):
def __init__(self, c_in, c_out):
super().__init__()
self.projection = nn.Linear(c_in, c_out)
def forward(self, node_feats, adj_matrix):
num_neighbours = adj_matrix.sum(dim=-1, keepdims=True)
node_feats = self.projection(node_feats)
node_feats = torch.bmm(adj_matrix, node_feats)
node_feats = node_feats / num_neighbours
return node_feats
class GNNModel(nn.Module)
def __init__(self, c_in, c_hidden, c_out, num_layers, activation_function, optimizer_name, learning_rate, dp_rate_linear,layer_name="GCN", **kwargs):
super().__init__()
gnn_layer = gnn_layer_by_name[layer_name]
layers = []
activation_function = eval(activation_function) ##not great to use
in_channels, out_channels = c_in, c_hidden
for l_idx in range(num_layers-1):
layers += [
gnn_layer(in_channels=in_channels,
out_channels=out_channels,
**kwargs),
activation_function,
nn.Dropout(p=dp_rate_linear)
]
in_channels = c_hidden
layers += [gnn_layer(in_channels=in_channels,
out_channels=c_out,
**kwargs)]
self.layers = nn.ModuleList(layers)
def forward(self, x, edge_index):
for l in self.layers:
if isinstance(l, geom_nn.MessagePassing):
x = l(x, edge_index)
else:
x = l(x)
return x
class GraphGNNModel(nn.Module):
def __init__(self, c_in, c_hidden, c_out, dp_rate_linear,**kwargs):
super().__init__()
self.GNN = GNNModel(c_in=c_in,
c_hidden=c_hidden,
c_out=c_hidden,
dp_rate_linear = dp_rate_linear,
**kwargs)
self.head = nn.Sequential(
nn.Dropout(p=dp_rate_linear),
nn.Linear(c_hidden, c_out)
)
def forward(self, x, edge_index, batch_idx):
x = self.GNN(x, edge_index)
x = geom_nn.global_mean_pool(x, batch_idx)
x = self.head(x)
return x
As you can see, I really don't need GNNModel and GraphGNNModel to be two separate functions, the second function is just adding a sequential layer to the end of the first function.
I tried combining the functions by doing:
class GNNModel(nn.Module):
def __init__(self, c_in, c_hidden, c_out, num_layers, activation_function, optimizer_name, learning_rate, dp_rate_linear,layer_name="GCN" ,**kwargs):
"""
Inputs:
c_in - Dimension of input features
c_hidden - Dimension of hidden features
c_out - Dimension of the output features. Usually number of classes in classification
num_layers - Number of "hidden" graph layers
layer_name - String of the graph layer to use
dp_rate_linear - Dropout rate to apply throughout the network
kwargs - Additional arguments for the graph layer (e.g. number of heads for GAT; i'm not using gat here)
activation_function - Activation function
"""
super().__init__()
gnn_layer = gnn_layer_by_name[layer_name]
layers = []
activation_function = eval(activation_function) ##not great to use
in_channels, out_channels = c_in, c_hidden
for l_idx in range(num_layers-1):
layers += [
gnn_layer(in_channels=in_channels,
out_channels=out_channels,
**kwargs),
activation_function,
nn.Dropout(p=dp_rate_linear)
]
in_channels = c_hidden
layers += [gnn_layer(in_channels=in_channels,
out_channels=c_out,
**kwargs)]
self.layers = nn.ModuleList(layers)
self.head = nn.Sequential(
nn.Dropout(p=dp_rate_linear),
nn.Linear(c_hidden, c_out)
)
def forward(self, x, edge_index):
for l in self.layers:
if isinstance(l, geom_nn.MessagePassing): #passing data between conv
x = l(x, edge_index) #what is this
else:
x = l(x)
x = self.GNN(x, edge_index)
x = geom_nn.global_mean_pool(x, batch_idx)
x = self.head(x)
return x
But I get the error:
TypeError: forward() takes 3 positional arguments but 4 were given
Could someone show me the correct way to combine these (the exact explanation of the code is in the Graph level tasks/graph classification of here?
Try adding batch_idx as param in your new forward function. I noted some other inconsistencies like, where is geom_nn being passed to the function? you probably want to use self.geom_nn, and for that you need to fix the __init__() part as well.
def forward(self, x, edge_index, batch_idx): #here you must pass batch_idx
for l in self.layers:
if isinstance(l, geom_nn.MessagePassing): #passing data between conv
x = l(x, edge_index) #what is this
else:
x = l(x)
x = self.GNN(x, edge_index)
x = geom_nn.global_mean_pool(x, batch_idx) #here you use batch_idx
#where is geom_nn coming from???
x = self.head(x)
return x
I need your kind help.
How can I determine a CNNs model to train a multiple labels for image classification?
My data is a bunch of spectra. Each spectrum has 4 labels. How can I build a CNN to classify those images.
How to compose the forward function and initialization function
What kinds of layers do you recommend?
First, the metadata structure goes like this:
enter image description here
Here is my Dataset
class OurDataset(Dataset):
spectra_dir = f"./data/spectrograms_fm"
metaData_path = f"./data/FMAudio/metaData.csv"
def __init__(self):
self.audio_labels = panda.read_csv(self.metaData_path)
self.transform = torchvision.transforms.Compose([
torchvision.transforms.Resize((201, 81)),
torchvision.transforms.ToTensor()
])
def __len__(self):
return len(self.audio_labels)
def __getitem__(self, idx):
img = PILImage.open(f"./data/spectrograms_fm/mutiplelabels/{self.audio_labels.iloc[idx, 8]}.png").convert("RGB")
img = self.transform(img)
label_A = torch.tensor(self.audio_labels.iloc[idx, 4])
label_Fw = torch.tensor(self.audio_labels.iloc[idx, 5])
label_P = torch.tensor(self.audio_labels.iloc[idx, 6])
label_Fi = torch.tensor(self.audio_labels.iloc[idx, 7])
return img, label_A, label_Fw, label_P, label_Fi
I defined the training function
def train(dataloader, model, loss, optimizer):
model.train()
size = len(dataloader.dataset)
for batch, (img_tensors, Y_A, Y_Fw, Y_P, Y_Fi) in enumerate(dataloader):
optimizer.zero_grad()
pred = model(img_tensors.float())
loss_A = cost(pred, Y_A)
loss_Fw = cost(pred, Y_Fw)
loss_P = cost(pred, Y_P)
loss_Fi = cost(pred, Y_Fi)
loss = loss_A + loss_Fw + loss_P + loss_Fi
loss.backward()
optimizer.step()
I used the official CNN model from Microsoft pytorch tutorial image classification which is not apt to my case.
class CNNet(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=5)
self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
self.conv2_drop = nn.Dropout2d()
self.flatten = nn.Flatten()
self.fc1 = nn.Linear(2112, 50)
self.fc2 = nn.Linear(50, 4)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 4))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 4))
x = self.flatten(x)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = F.relu(self.fc2(x))
return F.log_softmax(x,dim=1)
Can you write my a demo?
Cheers
I am trying to build a neural network with two inputs and one output in pytorch.
However, I get an error and cannot get it to work.
python code is below.
import torch
import numpy as np
import os
import pandas as pd
import glob
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.linear0 = nn.Linear(2, 256)
self.linear1 = nn.Linear(256, 128)
self.linear2 = nn.Linear(128, 64)
self.linear3 = nn.Linear(64, 32)
self.linear4 = nn.Linear(32, 16)
self.linear5 = nn.Linear(16, 8)
self.linear6 = nn.Linear(8, 4)
# self.linear7 = nn.Linear(4, 1)
def forward(self, x):
x = self.linear0(x)
x = torch.sigmoid(x)
x = self.linear1(x)
x = torch.sigmoid(x)
x = self.linear2(x)
x = torch.sigmoid(x)
x = self.linear3(x)
x = torch.sigmoid(x)
x = self.linear4(x)
x = torch.sigmoid(x)
x = self.linear5(x)
x = torch.sigmoid(x)
x = self.linear6(x)
# x = torch.sigmoid(x)
# x = self.linear7
return F.log_softmax(x, dim=1)
net = Model()
x = torch.tensor(a[0].values)
y = torch.tensor(a[1].values)
def train(model, optimizer, E, iteration, x, y):
losses = []
for i in range(iteration):
optimizer.zero_grad() # 勾配情報を0に初期化
y_pred = model(x) # 予測
loss = E(y_pred.reshape(y.shape), y) # 損失を計算(shapeを揃える)
loss.backward() # 勾配の計算
optimizer.step() # 勾配の更新
losses.append(loss.item()) # 損失値の蓄積
print('epoch=', i+1, 'loss=', loss)
return model, losses
optimizer = optim.RMSprop(net.parameters(), lr=0.01) # 最適化にRMSpropを設定
E = nn.MSELoss()
net, losses = train(model=net, optimizer=optimizer, E=E, iteration=5000, x=x, y=y)
y_pred = test(net, X_test)
input data is 2Dimention.
like this ↓
output data is 1Dimention.
The error is as follows.
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
1846 if has_torch_function_variadic(input, weight, bias):
1847 return handle_torch_function(linear, (input, weight, bias), input, weight, bias=bias)
-> 1848 return torch._C._nn.linear(input, weight, bias)
1849
1850
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x100 and 2x256)
What should I do?
You're getting an error at the first layer of your neural network because there is a dimension mismatch. The weights are shape (2,256), so it expects an input of shape (N,2).
It looks like you provide 100 training examples, so N=100, but your input is shape (100,1) instead of (100,2). In your code, it looks like a is (100,2), but x = a[0] is (100,1).