I have two functions like this (code source of the functions is here):
device = torch.device('cuda')
dataset = TUDataset(root='/tmp/MUTAG', name='MUTAG', use_node_attr=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True)
train_dataset = dataset #just for testing
val_dataset = dataset
test_dataset = dataset
graph_train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
graph_val_loader = DataLoader(val_dataset, batch_size=8)
gnn_layer_by_name = {
"GCN": geom_nn.GCNConv,
"GAT": geom_nn.GATConv,
"GraphConv": geom_nn.GraphConv
}
class GCNLayer(nn.Module):
def __init__(self, c_in, c_out):
super().__init__()
self.projection = nn.Linear(c_in, c_out)
def forward(self, node_feats, adj_matrix):
num_neighbours = adj_matrix.sum(dim=-1, keepdims=True)
node_feats = self.projection(node_feats)
node_feats = torch.bmm(adj_matrix, node_feats)
node_feats = node_feats / num_neighbours
return node_feats
class GNNModel(nn.Module)
def __init__(self, c_in, c_hidden, c_out, num_layers, activation_function, optimizer_name, learning_rate, dp_rate_linear,layer_name="GCN", **kwargs):
super().__init__()
gnn_layer = gnn_layer_by_name[layer_name]
layers = []
activation_function = eval(activation_function) ##not great to use
in_channels, out_channels = c_in, c_hidden
for l_idx in range(num_layers-1):
layers += [
gnn_layer(in_channels=in_channels,
out_channels=out_channels,
**kwargs),
activation_function,
nn.Dropout(p=dp_rate_linear)
]
in_channels = c_hidden
layers += [gnn_layer(in_channels=in_channels,
out_channels=c_out,
**kwargs)]
self.layers = nn.ModuleList(layers)
def forward(self, x, edge_index):
for l in self.layers:
if isinstance(l, geom_nn.MessagePassing):
x = l(x, edge_index)
else:
x = l(x)
return x
class GraphGNNModel(nn.Module):
def __init__(self, c_in, c_hidden, c_out, dp_rate_linear,**kwargs):
super().__init__()
self.GNN = GNNModel(c_in=c_in,
c_hidden=c_hidden,
c_out=c_hidden,
dp_rate_linear = dp_rate_linear,
**kwargs)
self.head = nn.Sequential(
nn.Dropout(p=dp_rate_linear),
nn.Linear(c_hidden, c_out)
)
def forward(self, x, edge_index, batch_idx):
x = self.GNN(x, edge_index)
x = geom_nn.global_mean_pool(x, batch_idx)
x = self.head(x)
return x
As you can see, I really don't need GNNModel and GraphGNNModel to be two separate functions, the second function is just adding a sequential layer to the end of the first function.
I tried combining the functions by doing:
class GNNModel(nn.Module):
def __init__(self, c_in, c_hidden, c_out, num_layers, activation_function, optimizer_name, learning_rate, dp_rate_linear,layer_name="GCN" ,**kwargs):
"""
Inputs:
c_in - Dimension of input features
c_hidden - Dimension of hidden features
c_out - Dimension of the output features. Usually number of classes in classification
num_layers - Number of "hidden" graph layers
layer_name - String of the graph layer to use
dp_rate_linear - Dropout rate to apply throughout the network
kwargs - Additional arguments for the graph layer (e.g. number of heads for GAT; i'm not using gat here)
activation_function - Activation function
"""
super().__init__()
gnn_layer = gnn_layer_by_name[layer_name]
layers = []
activation_function = eval(activation_function) ##not great to use
in_channels, out_channels = c_in, c_hidden
for l_idx in range(num_layers-1):
layers += [
gnn_layer(in_channels=in_channels,
out_channels=out_channels,
**kwargs),
activation_function,
nn.Dropout(p=dp_rate_linear)
]
in_channels = c_hidden
layers += [gnn_layer(in_channels=in_channels,
out_channels=c_out,
**kwargs)]
self.layers = nn.ModuleList(layers)
self.head = nn.Sequential(
nn.Dropout(p=dp_rate_linear),
nn.Linear(c_hidden, c_out)
)
def forward(self, x, edge_index):
for l in self.layers:
if isinstance(l, geom_nn.MessagePassing): #passing data between conv
x = l(x, edge_index) #what is this
else:
x = l(x)
x = self.GNN(x, edge_index)
x = geom_nn.global_mean_pool(x, batch_idx)
x = self.head(x)
return x
But I get the error:
TypeError: forward() takes 3 positional arguments but 4 were given
Could someone show me the correct way to combine these (the exact explanation of the code is in the Graph level tasks/graph classification of here?
Try adding batch_idx as param in your new forward function. I noted some other inconsistencies like, where is geom_nn being passed to the function? you probably want to use self.geom_nn, and for that you need to fix the __init__() part as well.
def forward(self, x, edge_index, batch_idx): #here you must pass batch_idx
for l in self.layers:
if isinstance(l, geom_nn.MessagePassing): #passing data between conv
x = l(x, edge_index) #what is this
else:
x = l(x)
x = self.GNN(x, edge_index)
x = geom_nn.global_mean_pool(x, batch_idx) #here you use batch_idx
#where is geom_nn coming from???
x = self.head(x)
return x
Related
I am unable to plot graph-neural-networking. I have seen few related questions(1, 2, 3) to this topic but their answers do not apply to graph-neural-networks.
What makes it different is that the input vector include objects of different dimensions e.g. properties matrix dimension is [n_nodes, n_node_features], adjacency matrix dimension is [n_nodes, n_nodes] etc. Here is the example of my Model:
class GIN0(Model):
def __init__(self, channels, n_layers):
super().__init__()
self.conv1 = GINConv(channels, epsilon=0, mlp_hidden=[channels, channels])
self.convs = []
for _ in range(1, n_layers):
self.convs.append(
GINConv(channels, epsilon=0, mlp_hidden=[channels, channels])
)
self.pool = GlobalAvgPool()
self.dense1 = Dense(channels, activation="relu")
self.dropout = Dropout(0.5)
self.dense2 = Dense(channels, activation="relu")
def call(self, inputs):
x, a, i = inputs
x = self.conv1([x, a])
for conv in self.convs:
x = conv([x, a])
x = self.pool([x, i])
x = self.dense1(x)
x = self.dropout(x)
return self.dense2(x)
One of the answers in 2 suggested to add build_graph function as follows:
class my_model(Model):
def __init__(self, dim):
super(my_model, self).__init__()
self.Base = VGG16(input_shape=(dim), include_top = False, weights = 'imagenet')
self.GAP = L.GlobalAveragePooling2D()
self.BAT = L.BatchNormalization()
self.DROP = L.Dropout(rate=0.1)
self.DENS = L.Dense(256, activation='relu', name = 'dense_A')
self.OUT = L.Dense(1, activation='sigmoid')
def call(self, inputs):
x = self.Base(inputs)
g = self.GAP(x)
b = self.BAT(g)
d = self.DROP(b)
d = self.DENS(d)
return self.OUT(d)
# AFAIK: The most convenient method to print model.summary()
# similar to the sequential or functional API like.
def build_graph(self):
x = Input(shape=(dim))
return Model(inputs=[x], outputs=self.call(x))
dim = (124,124,3)
model = my_model((dim))
model.build((None, *dim))
model.build_graph().summary()
However, I am not sure how to define dim or Input Layer using tf.keras.layers.Input for such a hybrid data-structure as described above.
Any suggestions?
Here is the minimal code to plot such subclass multi-input model. Note, as stated in the comment above, there are some issue of your GINConv which is from spektral and it's not related to the main query. So, I will give general soluton of such multi-input modeling scenarios. To make it work with your speckral, please reach to the package author for further discussion.
From specktral repo, here, I got the idea the shape of the input tensors.
x, y = next(iter(loader_tr))
bs_x = list(x[0].shape)
bs_y = list(x[1].shape)
bs_z = list(x[2].shape)
bs_x, bs_y, bs_z
([1067, 4], [1067, 1067], [1067])
Similar model, it also takes same amount of inputs and with same shape. But without GINConv.
class GIN0(Model):
def __init__(self, channels, n_layers):
super().__init__()
self.conv1 = tf.keras.layers.Conv1D(channels, 3, activation='relu')
self.conv2 = tf.keras.layers.Conv1D(channels, 3, activation='relu')
self.dense1 = Dense(channels, activation="relu")
self.dropout = Dropout(0.5)
self.dense2 = Dense(n_out, activation="softmax")
def call(self, inputs):
x, a, i = inputs
x = self.conv1(x)
x = tf.keras.layers.GlobalAveragePooling1D()(x)
a = self.conv2(a)
a = tf.keras.layers.GlobalAveragePooling1D()(a)
x = tf.keras.layers.Concatenate(axis=1)([a, x, i])
x = self.dense1(x)
x = self.dropout(x)
return self.dense2(x)
def build_graph(self):
x = tf.keras.Input(shape=bs_x)
y = tf.keras.Input(shape=bs_y)
z = tf.keras.Input(shape=bs_z)
return tf.keras.Model(
inputs=[x, y, z],
outputs=self.call([x, y, z])
)
model = GIN0(channels, layers)
model.build(
[
(None, *bs_x),
(None, *bs_y),
(None, *bs_z)
]
)
# OK
model.build_graph().summary()
# OK
tf.keras.utils.plot_model(
model.build_graph(), show_shapes=True
)
I have a one hop GCN layer
class GCN_AISUMMER(nn.Module):
"""
"""
def __init__(self, in_features, out_features, bias=True):
super().__init__()
self.linear = nn.Linear(in_features, out_features, bias=bias)
def forward(self, X, A):
"""
A: adjecency matrix
X: graph signal
"""
L = create_graph_lapl_norm(A)
num_neighbours = L.sum(dim=-1, keepdims=True)
x = self.linear(X)
node_feats = torch.bmm(L, x)
node_feats = node_feats / num_neighbours
return node_feats
which is used in the following neural net
class GNN(nn.Module):
def __init__(self,
in_features = 12,
hidden_dim = 128,
classes = 2,
dropout = 0.5):
super(GNN, self).__init__()
self.conv1 = GCN_AISUMMER(in_features, hidden_dim)
self.conv2 = GCN_AISUMMER(hidden_dim, hidden_dim)
self.conv3 = GCN_AISUMMER(hidden_dim, hidden_dim)
self.fc = nn.Linear(hidden_dim, classes)
self.dropout = dropout
def forward(self, x,A):
x = self.conv1(x, A)
x = F.relu(x)
x = self.conv2(x, A)
x = F.relu(x)
x = self.conv3(x, A)
x = F.dropout(x, p=self.dropout, training=self.training)
# aggregate node embeddings
x = x.mean(dim=1)
# final classification layer
return self.fc(x)
I tried to print out the weight of input data after training. I tried print(model.conv1.weight) and gotAttributeError: 'GCN_AISUMMER' object has no attribute 'weight'
print(model.trainable_weights) and gotAttributeError: 'GNN' object has no attribute 'trainable_weights'
I got the weight of fc1, when I use print(model.fc1.weight), but I want to got the weight of input data after training.
I'm trying to use LSTM and transformer to do binary-classification, but it does not improve the performance than normal LSTM model, sometimes it will go even worse. Input shape of training data is (3014, 48, 178), input data is time-series medical data, the following code is for transformer.
class TokenAndPositionEmbedding(layers.Layer):
def __init__(self, maxlen, vocab_size, embed_dim):
super(TokenAndPositionEmbedding, self).__init__()
self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
def call(self, x):
maxlen = tf.shape(x)[-1]
positions = tf.range(start=0, limit=maxlen, delta=1)
positions = self.pos_emb(positions)
x = self.token_emb(x)
return x + positions
class TransformerBlock(layers.Layer):
def __init__(self, embed_dim, num_heads, ff_dim, rate=0.001):
super(TransformerBlock, self).__init__()
self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
self.ffn = Sequential(
[layers.Dense(ff_dim, activation="relu"),layers.Dense(embed_dim),]
)
self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
self.dropout1 = layers.Dropout(rate)
self.dropout2 = layers.Dropout(rate)
def call(self, inputs, training):
attn_output = self.att(inputs, inputs)
attn_output = self.dropout1(attn_output, training=training)
out1 = self.layernorm1(inputs + attn_output)
ffn_output = self.ffn(out1)
ffn_output = self.dropout2(ffn_output, training=training)
return self.layernorm2(out1 + ffn_output)
class PositionEmbeddingFixedWeights(layers.Layer):
def __init__(self, sequence_length, output_dim, **kwargs):
super(PositionEmbeddingFixedWeights, self).__init__(**kwargs)
position_embedding_matrix = self.get_position_encoding(sequence_length, output_dim)
self.position_embedding_layer = layers.Embedding(
input_dim=sequence_length, output_dim=output_dim,
weights=[position_embedding_matrix],
trainable=False
)
def get_position_encoding(self, seq_len, d, n=10000):
P = np.zeros((seq_len, d))
for k in range(seq_len):
for i in np.arange(int(d/2)):
denominator = np.power(n, 2*i/d)
P[k, 2*i] = np.sin(k/denominator)
P[k, 2*i+1] = np.cos(k/denominator)
return P
def call(self, inputs):
position_indices = tf.range(tf.shape(inputs)[-2])
embedded_indices = self.position_embedding_layer(position_indices)
return embedded_indices
Model code is
model = Sequential([tf.keras.Input(shape=(48,178)),
BatchNormalization(),
tf.keras.layers.GRU(units = 128,recurrent_dropout=0.5,activation='tanh', dropout=0.5,return_sequences = True,activity_regularizer=regularizers.L2(0.01)),
TransformerBlock(128, 48, 178),
tf.keras.layers.GlobalAveragePooling1D(),
Dense(60,activation='tanh'),
Dense(1,activation='sigmoid')])
It had troubled me for a long time. I'm trying to use LSTM to dealing with the time-series feature, and transformer to learn the importance between features, but it seems not working.
I am having errors in executing the train function of my code in MLP.
This is the error:
mat1 and mat2 shapes cannot be multiplied (128x10 and 48x10)
My code for the train function is this:
class net(nn.Module):
def __init__(self, input_dim2, hidden_dim2, output_dim2):
super(net, self).__init__()
self.input_dim2 = input_dim2
self.fc1 = nn.Linear(input_dim2, hidden_dim2)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_dim2, hidden_dim2)
self.fc3 = nn.Linear(hidden_dim2, output_dim2)
def forward(self, x):
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.relu(x)
x = self.fc3(x)
x = F.softmax(self.fc3(x))
return x
model = net(input_dim2, hidden_dim2, output_dim2) #create the network
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate2)
def train(num_epochs2):
for i in range(num_epochs2):
tmp_loss = []
for (x,y) in train_loader:
print(y.shape)
print(x.shape)
outputs = model(x) #forward pass
print(outputs.shape)
loss = criterion(outputs, y) #loss computation
tmp_loss.append(loss.item()) #recording the loss
optimizer.zero_grad() #all the accumulated gradient
loss.backward() #auto-differentiaton - accumulation of gradient
optimizer.step() # a gradient step
print("Loss at {}th epoch: {}".format(i, np.mean(tmp_loss)))
I don't know where I'm wrong. My code seems to work okay.
From the limited message, I guess the place you are wrong are the following snippets:
x = self.fc3(x)
x = F.softmax(self.fc3(x))
Try to replace with:
x = self.fc3(x)
x = F.softmax(x)
A good question should include: error backtrace information and complete toy example which could repeat the errors!
Here an relu activation seems to be missing in the 'init' function. Or there is an extra relu activation in the forward function. Look at the code below and try to figure out what is extra or missing.
def __init__(self, input_dim2, hidden_dim2, output_dim2):
super(net, self).__init__()
self.input_dim2 = input_dim2
self.fc1 = nn.Linear(input_dim2, hidden_dim2)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_dim2, hidden_dim2)
self.fc3 = nn.Linear(hidden_dim2, output_dim2)
def forward(self, x):
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.relu(x)
x = self.fc3(x)
x = F.softmax(self.fc3(x))
return x
I'm a beginner with pytorch framework and I'm trying to add a multiheaded self attention on top of another architecture (BERT) (this is a simple question but I'm not familiar with PyTorch):
UPDATE 1
import math
class PositionalEncoding(nn.Module):
def __init__(self, d_model, dropout=0.1, max_len=5000):
super(PositionalEncoding, self).__init__()
self.dropout = nn.Dropout(p=dropout)
self.d_model = d_model
pe = torch.zeros(max_len, d_model)
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0).transpose(0, 1)
self.register_buffer('pe', pe)
def forward(self, x, seq_len = 768, mask = None):
pos_emb = self.pe[:, :seq_len]
x = x * mask[:, :, None].float()
x = x + pos_emb
return x
The problem in how to add the transformer is in the following class:
class CamemBERTQA(nn.Module):
def __init__(self,bert_type, hidden_size, num_labels, num_inter_layers=1, heads = 12, do_lower_case = True):
super(CamemBERTQA, self).__init__()
self.do_lower_case = do_lower_case
self.bert_type = bert_type
self.hidden_size = hidden_size
self.num_labels = num_labels
self.num_inter_layers = num_inter_layers
self.camembert = CamembertModel.from_pretrained(self.bert_type)
# ---------------- Transformer ------------------------------------------
self.d_model = self.hidden_size # 768
dropout = 0.1
self.pos_emb = PositionalEncoding(d_model = self.d_model, dropout = dropout)
self.transformer_inter = nn.ModuleList(
[nn.TransformerEncoderLayer(d_model = self.d_model, nhead = heads, dim_feedforward = 2048, dropout = dropout)
for _ in range(num_inter_layers)])
# ---------------- Transformer ------------------------------------------
self.qa_outputs = nn.Linear(self.hidden_size, self.num_labels)
def forward(self, input_ids, mask=None):
bert_output = self.camembert(input_ids = input_ids) # input_ids is a tensor
# ---------------- Transformer ------------------------------------------
seq_len = self.hidden_size
x = self.pos_emb(x = bert_output, seq_len = seq_len, mask = None)
for i in range(self.num_inter_layers):
x = self.transformer_inter[i](i, x, x, 1 - mask) # all_tokens * max_tokens * dim
output = self.layer_norm(x)
# ---------------- Transformer ------------------------------------------
sequence_output = output[0]
logits = self.qa_outputs(sequence_output)
start_logits, end_logits = logits.split(1, dim=-1)
start_logits = start_logits.squeeze(-1)
end_logits = end_logits.squeeze(-1)
outputs = (start_logits, end_logits,)
return x
Thank you so much.
So it seems that you're trying to add a Transformer network on top of the BERT component. It has to be mentioned that the self-attention network is only a part of the Transformer network, meaning that Transformers have other components besides self-attention as well. I would recommend using the Transformer (which has the self-attention component included) as an encoder that receives BERT vectors and transforms them into another representation (in another space).
Try this instead of self.attention = MultiHeadAttention():
self.transformer_inter = nn.ModuleList(
[TransformerEncoderLayer(d_model, heads, d_ff, dropout)
for _ in range(num_inter_layers)])
and then in forward(), call self.transformer_inter through a loop which will give you the representations produced by Transformer architecture. Like this:
def forward(self, bert_output, mask):
batch_size, seq_len = bert_output.size(0), bert_output.size(1)
# Transformer Encoder
pos_emb = self.pos_emb.pe[:, :seq_len]
x = bert_output * mask[:, :, None].float()
x = x + pos_emb
for i in range(self.num_inter_layers):
x = self.transformer_inter[i](i, x, x, 1 - mask) # all_tokens * max_tokens * dim
x = self.layer_norm(x) # Transformer also normalizes the outputs from each layer.
# x is the encoded vectors by Transformer encoder
return x
Then using a nn.Linear(.) layer, do another transformation to map the hidden_size to the number of labels for your task, which will give you the logits for each label. These all should be done within BERT class that you have posted.
Note that the TransformerEncoderLayer is a placeholder class that I used above. So you have to either implement it or use open source packages. As Transformers are quite well-known, I think you won't have trouble finding an implementation of it.