Why LSTM+transformer is not working well? - python

I'm trying to use LSTM and transformer to do binary-classification, but it does not improve the performance than normal LSTM model, sometimes it will go even worse. Input shape of training data is (3014, 48, 178), input data is time-series medical data, the following code is for transformer.
class TokenAndPositionEmbedding(layers.Layer):
def __init__(self, maxlen, vocab_size, embed_dim):
super(TokenAndPositionEmbedding, self).__init__()
self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
def call(self, x):
maxlen = tf.shape(x)[-1]
positions = tf.range(start=0, limit=maxlen, delta=1)
positions = self.pos_emb(positions)
x = self.token_emb(x)
return x + positions
class TransformerBlock(layers.Layer):
def __init__(self, embed_dim, num_heads, ff_dim, rate=0.001):
super(TransformerBlock, self).__init__()
self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
self.ffn = Sequential(
[layers.Dense(ff_dim, activation="relu"),layers.Dense(embed_dim),]
)
self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
self.dropout1 = layers.Dropout(rate)
self.dropout2 = layers.Dropout(rate)
def call(self, inputs, training):
attn_output = self.att(inputs, inputs)
attn_output = self.dropout1(attn_output, training=training)
out1 = self.layernorm1(inputs + attn_output)
ffn_output = self.ffn(out1)
ffn_output = self.dropout2(ffn_output, training=training)
return self.layernorm2(out1 + ffn_output)
class PositionEmbeddingFixedWeights(layers.Layer):
def __init__(self, sequence_length, output_dim, **kwargs):
super(PositionEmbeddingFixedWeights, self).__init__(**kwargs)
position_embedding_matrix = self.get_position_encoding(sequence_length, output_dim)
self.position_embedding_layer = layers.Embedding(
input_dim=sequence_length, output_dim=output_dim,
weights=[position_embedding_matrix],
trainable=False
)
def get_position_encoding(self, seq_len, d, n=10000):
P = np.zeros((seq_len, d))
for k in range(seq_len):
for i in np.arange(int(d/2)):
denominator = np.power(n, 2*i/d)
P[k, 2*i] = np.sin(k/denominator)
P[k, 2*i+1] = np.cos(k/denominator)
return P
def call(self, inputs):
position_indices = tf.range(tf.shape(inputs)[-2])
embedded_indices = self.position_embedding_layer(position_indices)
return embedded_indices
Model code is
model = Sequential([tf.keras.Input(shape=(48,178)),
BatchNormalization(),
tf.keras.layers.GRU(units = 128,recurrent_dropout=0.5,activation='tanh', dropout=0.5,return_sequences = True,activity_regularizer=regularizers.L2(0.01)),
TransformerBlock(128, 48, 178),
tf.keras.layers.GlobalAveragePooling1D(),
Dense(60,activation='tanh'),
Dense(1,activation='sigmoid')])
It had troubled me for a long time. I'm trying to use LSTM to dealing with the time-series feature, and transformer to learn the importance between features, but it seems not working.

Related

How to plot Graph-neural-network model-graph when using tensorflow Model Subclass API with spektral layers?

I am unable to plot graph-neural-networking. I have seen few related questions(1, 2, 3) to this topic but their answers do not apply to graph-neural-networks.
What makes it different is that the input vector include objects of different dimensions e.g. properties matrix dimension is [n_nodes, n_node_features], adjacency matrix dimension is [n_nodes, n_nodes] etc. Here is the example of my Model:
class GIN0(Model):
def __init__(self, channels, n_layers):
super().__init__()
self.conv1 = GINConv(channels, epsilon=0, mlp_hidden=[channels, channels])
self.convs = []
for _ in range(1, n_layers):
self.convs.append(
GINConv(channels, epsilon=0, mlp_hidden=[channels, channels])
)
self.pool = GlobalAvgPool()
self.dense1 = Dense(channels, activation="relu")
self.dropout = Dropout(0.5)
self.dense2 = Dense(channels, activation="relu")
def call(self, inputs):
x, a, i = inputs
x = self.conv1([x, a])
for conv in self.convs:
x = conv([x, a])
x = self.pool([x, i])
x = self.dense1(x)
x = self.dropout(x)
return self.dense2(x)
One of the answers in 2 suggested to add build_graph function as follows:
class my_model(Model):
def __init__(self, dim):
super(my_model, self).__init__()
self.Base = VGG16(input_shape=(dim), include_top = False, weights = 'imagenet')
self.GAP = L.GlobalAveragePooling2D()
self.BAT = L.BatchNormalization()
self.DROP = L.Dropout(rate=0.1)
self.DENS = L.Dense(256, activation='relu', name = 'dense_A')
self.OUT = L.Dense(1, activation='sigmoid')
def call(self, inputs):
x = self.Base(inputs)
g = self.GAP(x)
b = self.BAT(g)
d = self.DROP(b)
d = self.DENS(d)
return self.OUT(d)
# AFAIK: The most convenient method to print model.summary()
# similar to the sequential or functional API like.
def build_graph(self):
x = Input(shape=(dim))
return Model(inputs=[x], outputs=self.call(x))
dim = (124,124,3)
model = my_model((dim))
model.build((None, *dim))
model.build_graph().summary()
However, I am not sure how to define dim or Input Layer using tf.keras.layers.Input for such a hybrid data-structure as described above.
Any suggestions?
Here is the minimal code to plot such subclass multi-input model. Note, as stated in the comment above, there are some issue of your GINConv which is from spektral and it's not related to the main query. So, I will give general soluton of such multi-input modeling scenarios. To make it work with your speckral, please reach to the package author for further discussion.
From specktral repo, here, I got the idea the shape of the input tensors.
x, y = next(iter(loader_tr))
bs_x = list(x[0].shape)
bs_y = list(x[1].shape)
bs_z = list(x[2].shape)
bs_x, bs_y, bs_z
([1067, 4], [1067, 1067], [1067])
Similar model, it also takes same amount of inputs and with same shape. But without GINConv.
class GIN0(Model):
def __init__(self, channels, n_layers):
super().__init__()
self.conv1 = tf.keras.layers.Conv1D(channels, 3, activation='relu')
self.conv2 = tf.keras.layers.Conv1D(channels, 3, activation='relu')
self.dense1 = Dense(channels, activation="relu")
self.dropout = Dropout(0.5)
self.dense2 = Dense(n_out, activation="softmax")
def call(self, inputs):
x, a, i = inputs
x = self.conv1(x)
x = tf.keras.layers.GlobalAveragePooling1D()(x)
a = self.conv2(a)
a = tf.keras.layers.GlobalAveragePooling1D()(a)
x = tf.keras.layers.Concatenate(axis=1)([a, x, i])
x = self.dense1(x)
x = self.dropout(x)
return self.dense2(x)
def build_graph(self):
x = tf.keras.Input(shape=bs_x)
y = tf.keras.Input(shape=bs_y)
z = tf.keras.Input(shape=bs_z)
return tf.keras.Model(
inputs=[x, y, z],
outputs=self.call([x, y, z])
)
model = GIN0(channels, layers)
model.build(
[
(None, *bs_x),
(None, *bs_y),
(None, *bs_z)
]
)
# OK
model.build_graph().summary()
# OK
tf.keras.utils.plot_model(
model.build_graph(), show_shapes=True
)

neural network no attribute weight

I have a one hop GCN layer
class GCN_AISUMMER(nn.Module):
"""
"""
def __init__(self, in_features, out_features, bias=True):
super().__init__()
self.linear = nn.Linear(in_features, out_features, bias=bias)
def forward(self, X, A):
"""
A: adjecency matrix
X: graph signal
"""
L = create_graph_lapl_norm(A)
num_neighbours = L.sum(dim=-1, keepdims=True)
x = self.linear(X)
node_feats = torch.bmm(L, x)
node_feats = node_feats / num_neighbours
return node_feats
which is used in the following neural net
class GNN(nn.Module):
def __init__(self,
in_features = 12,
hidden_dim = 128,
classes = 2,
dropout = 0.5):
super(GNN, self).__init__()
self.conv1 = GCN_AISUMMER(in_features, hidden_dim)
self.conv2 = GCN_AISUMMER(hidden_dim, hidden_dim)
self.conv3 = GCN_AISUMMER(hidden_dim, hidden_dim)
self.fc = nn.Linear(hidden_dim, classes)
self.dropout = dropout
def forward(self, x,A):
x = self.conv1(x, A)
x = F.relu(x)
x = self.conv2(x, A)
x = F.relu(x)
x = self.conv3(x, A)
x = F.dropout(x, p=self.dropout, training=self.training)
# aggregate node embeddings
x = x.mean(dim=1)
# final classification layer
return self.fc(x)
I tried to print out the weight of input data after training. I tried print(model.conv1.weight) and gotAttributeError: 'GCN_AISUMMER' object has no attribute 'weight'
print(model.trainable_weights) and gotAttributeError: 'GNN' object has no attribute 'trainable_weights'
I got the weight of fc1, when I use print(model.fc1.weight), but I want to got the weight of input data after training.

Combining two pytorch functions into one

I have two functions like this (code source of the functions is here):
device = torch.device('cuda')
dataset = TUDataset(root='/tmp/MUTAG', name='MUTAG', use_node_attr=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True)
train_dataset = dataset #just for testing
val_dataset = dataset
test_dataset = dataset
graph_train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
graph_val_loader = DataLoader(val_dataset, batch_size=8)
gnn_layer_by_name = {
"GCN": geom_nn.GCNConv,
"GAT": geom_nn.GATConv,
"GraphConv": geom_nn.GraphConv
}
class GCNLayer(nn.Module):
def __init__(self, c_in, c_out):
super().__init__()
self.projection = nn.Linear(c_in, c_out)
def forward(self, node_feats, adj_matrix):
num_neighbours = adj_matrix.sum(dim=-1, keepdims=True)
node_feats = self.projection(node_feats)
node_feats = torch.bmm(adj_matrix, node_feats)
node_feats = node_feats / num_neighbours
return node_feats
class GNNModel(nn.Module)
def __init__(self, c_in, c_hidden, c_out, num_layers, activation_function, optimizer_name, learning_rate, dp_rate_linear,layer_name="GCN", **kwargs):
super().__init__()
gnn_layer = gnn_layer_by_name[layer_name]
layers = []
activation_function = eval(activation_function) ##not great to use
in_channels, out_channels = c_in, c_hidden
for l_idx in range(num_layers-1):
layers += [
gnn_layer(in_channels=in_channels,
out_channels=out_channels,
**kwargs),
activation_function,
nn.Dropout(p=dp_rate_linear)
]
in_channels = c_hidden
layers += [gnn_layer(in_channels=in_channels,
out_channels=c_out,
**kwargs)]
self.layers = nn.ModuleList(layers)
def forward(self, x, edge_index):
for l in self.layers:
if isinstance(l, geom_nn.MessagePassing):
x = l(x, edge_index)
else:
x = l(x)
return x
class GraphGNNModel(nn.Module):
def __init__(self, c_in, c_hidden, c_out, dp_rate_linear,**kwargs):
super().__init__()
self.GNN = GNNModel(c_in=c_in,
c_hidden=c_hidden,
c_out=c_hidden,
dp_rate_linear = dp_rate_linear,
**kwargs)
self.head = nn.Sequential(
nn.Dropout(p=dp_rate_linear),
nn.Linear(c_hidden, c_out)
)
def forward(self, x, edge_index, batch_idx):
x = self.GNN(x, edge_index)
x = geom_nn.global_mean_pool(x, batch_idx)
x = self.head(x)
return x
As you can see, I really don't need GNNModel and GraphGNNModel to be two separate functions, the second function is just adding a sequential layer to the end of the first function.
I tried combining the functions by doing:
class GNNModel(nn.Module):
def __init__(self, c_in, c_hidden, c_out, num_layers, activation_function, optimizer_name, learning_rate, dp_rate_linear,layer_name="GCN" ,**kwargs):
"""
Inputs:
c_in - Dimension of input features
c_hidden - Dimension of hidden features
c_out - Dimension of the output features. Usually number of classes in classification
num_layers - Number of "hidden" graph layers
layer_name - String of the graph layer to use
dp_rate_linear - Dropout rate to apply throughout the network
kwargs - Additional arguments for the graph layer (e.g. number of heads for GAT; i'm not using gat here)
activation_function - Activation function
"""
super().__init__()
gnn_layer = gnn_layer_by_name[layer_name]
layers = []
activation_function = eval(activation_function) ##not great to use
in_channels, out_channels = c_in, c_hidden
for l_idx in range(num_layers-1):
layers += [
gnn_layer(in_channels=in_channels,
out_channels=out_channels,
**kwargs),
activation_function,
nn.Dropout(p=dp_rate_linear)
]
in_channels = c_hidden
layers += [gnn_layer(in_channels=in_channels,
out_channels=c_out,
**kwargs)]
self.layers = nn.ModuleList(layers)
self.head = nn.Sequential(
nn.Dropout(p=dp_rate_linear),
nn.Linear(c_hidden, c_out)
)
def forward(self, x, edge_index):
for l in self.layers:
if isinstance(l, geom_nn.MessagePassing): #passing data between conv
x = l(x, edge_index) #what is this
else:
x = l(x)
x = self.GNN(x, edge_index)
x = geom_nn.global_mean_pool(x, batch_idx)
x = self.head(x)
return x
But I get the error:
TypeError: forward() takes 3 positional arguments but 4 were given
Could someone show me the correct way to combine these (the exact explanation of the code is in the Graph level tasks/graph classification of here?
Try adding batch_idx as param in your new forward function. I noted some other inconsistencies like, where is geom_nn being passed to the function? you probably want to use self.geom_nn, and for that you need to fix the __init__() part as well.
def forward(self, x, edge_index, batch_idx): #here you must pass batch_idx
for l in self.layers:
if isinstance(l, geom_nn.MessagePassing): #passing data between conv
x = l(x, edge_index) #what is this
else:
x = l(x)
x = self.GNN(x, edge_index)
x = geom_nn.global_mean_pool(x, batch_idx) #here you use batch_idx
#where is geom_nn coming from???
x = self.head(x)
return x

Pytorch variable size input for GRUCell

I'm new in PyTorch and I'm trying to train something with variable size inputs. And I want to use nn.GRUCell not nn.GRU. I firstly post padded the inputs, such that it could be of the same length. I want to whether my Model class is correct or not. What I'm doing is, I'm storing the output after every time_step into output_sequence and then only keeping the output only for the trajectory_length point.
class Model(nn.Module):
def __init__(self, cell, length, input_size, hidden_size, num_layers, output_size):
super(Model,self).__init__()
self.cell = cell
self.length = length
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.output_size = output_size
self.rnn = cell(self.input_size, self.hidden_size)
self.linear1 = nn.Linear(self.hidden_size+1, self.hidden_size)
self.linear2 = nn.Linear(self.hidden_size, self.output_size)
def forward(self, state, batch_size, action_masks, trajectory_length):
hidden_layer = torch.zeros(batch_size, self.hidden_size)
state = torch.transpose(state, 0, 1).float()
output_sequence = []
for i in range(self.length):
hidden_layer = self.rnn(state[i], hidden_layer)
output_sequence.append(hidden_layer)
output = torch.stack([output_sequence[l-1][i] for i, l in enumerate(trajectory_length)])
output = self.linear2(output)
for i in range(batch_size):
output[i][~action_masks[i]] = float('-inf')
return output

No gradients provided Tensorflow Keras with custom Training Step

I am trying to experiment with different implementations of VAE in tensorflow Keras. In the following model I get an error that no gradients are being provided for any variables in any layer.
tfkl = tf.keras.layers
class sampling2(tfk.layers.Layer):
def call(self, inputs):
z_mean, z_log_var = inputs
batch_size = tf.shape(z_mean)[0]
dim_z = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch_size, dim_z))
z_sample = z_mean + tf.exp(0.5 * z_log_var) * epsilon
return z_sample
class encoder2(tfk.layers.Layer):
def __init__(self, latent_dim = 30, intermediate_dim = 200, name= 'encoder2', **kwargs):
super(encoder2, self).__init__(name = name, **kwargs)
self.dense_1 = tfkl.Dense(intermediate_dim, activation="relu")
self.dense_mean = tfkl.Dense(latent_dim)
self.dense_log_var = tfkl.Dense(latent_dim)
self.sampling = sampling2()
def call(self, inputs):
x = self.dense_1(inputs)
z_mean = self.dense_mean(x)
z_log_var = self.dense_log_var(x)
z = self.sampling((z_mean, z_log_var))
return z_mean, z_log_var, z
class decoder2(tfk.layers.Layer):
def __init__(self, original_dim, intermediate_dim= 200, name = 'decoder2', **kwargs):
super(decoder2, self).__init__(name = name, **kwargs)
self.dense_1 = tfkl.Dense(intermediate_dim, activation='relu')
self.dense_output = tfkl.Dense(original_dim, activation = 'sigmoid')
def call(self, inputs):
x = self.dense_1(inputs)
logits = self.dense_output(x)
return logits
class VAE2(tfk.Model):
def __init__(self, original_dim, intermediate_dim = 800, latent_dim = 50,
name = 'VAE2', **kwargs):
super(VAE2, self).__init__(name = name, **kwargs)
self.original_dim = original_dim
self.encoder = encoder2(latent_dim = latent_dim, intermediate_dim = intermediate_dim)
self.decoder = decoder2(original_dim, intermediate_dim = intermediate_dim)
def call(self,inputs):
z_mean, z_log_var, z = self.encoder(inputs)
reconstructed = self.decoder(z)
return reconstructed
def training_step(self, inputs):
dense_train_batch = tf.sparse.to_dense(inputs)
with tf.GradientTape() as tape:
z_mean, z_log_var, z = self.encoder(dense_train_batch)
reconstructed = self.decoder(z)
kl_loss = -0.5 * tf.reduce_mean(z_log_var - tf.square(z_mean) -
tf.exp(z_log_var)+1)
self.add_loss(kl_loss)
grads = tape.gradient(loss, self.trainable_weights)
optimizer.apply_gradients(zip(grads, self.trainable_weights))
loss_fn = keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
vae2 = VAE2(df_track_names_reduced.shape[0])
vae2.compile(optimizer=keras.optimizers.Adam(learning_rate = 0.001, amsgrad = True), loss = loss_fn)
vae2.fit(train_dataset, epochs =20)
Bellow I am providing the error message
ValueError: No gradients provided for any variable: ['VAE2/encoder2/dense_8/kernel:0','VAE2/encoder2/dense_8/bias:0', 'VAE2/encoder2/dense_9/kernel:0', 'VAE2/encoder2/dense_9/bias:0', 'VAE2/encoder2/dense_10/kernel:0', 'VAE2/encoder2/dense_10/bias:0', 'VAE2/decoder2
/dense_11/kernel:0', 'VAE2/decoder2/dense_11/bias:0', 'VAE2/decoder2/dense_12/kernel:0',
'VAE2/decoder2/dense_12/bias:0'].
You have to pass a loss tensor to tape.gradient not a function. Calculate binary loss, add it to kl_loss: loss = binary_loss + kl_loss and then pass to tape.gradient().
If you apply gradients manually - you should not call model.compile(), model.fit(). Build your custom loop instead. See here: https://keras.io/guides/writing_a_training_loop_from_scratch/.
But I don't think you really need applying gradients manually. I would just add kl_loss within call function. See here: https://keras.io/api/losses/

Categories

Resources