How to combine EmbeddingBag and LSTM layers? - python

Python 3.9.6 Pytorch 1.9.0
I created a neural network with three layers, the first of which is EmbeddingBag and second- LSTM.
class PyTorchNetwork(nn.Module):
def __init__(self, vocab_size, embed_dim, hidden_dim, dropout):
nn.Module.__init__(self)
#embedding layer
self.embedding = nn.EmbeddingBag(num_embeddings = vocab_size,
embedding_dim = embed_dim,
sparse = True)
#lstm layer
self.lstm = nn.LSTM(input_size = embed_dim,
hidden_size = hidden_dim,
num_layers = 1,
dropout = dropout)
self.fc = nn.Linear(in_features=hidden_dim,
out_features=1)
self.act = nn.Sigmoid()
#init network small weights
self.init_weights()
def forward(self, text, offsets):
#text.shape = torch.Size([363])
#offsets.shape = torch.Size([64])
embedded = self.embedding(text, offsets)
#embedded.shape= torch.Size([64, 32])
_, (hidden, _) = self.lstm(embedded) #ERROR!!!
dense_outputs = self.fc(hidden.squeeze(0))
outputs = self.act(dense_outputs)
return outputs
When I start training, the output from the first layer doesn't match in dimensionality to the input of the second layer. The program gives an error:
vocab_size = len(vocabulary)
embed_dim = 32
hidden_dim = 16
dropout = self.DROP_OUT
model = PyTorchNetwork(vocab_size, embed_dim, hidden_dim, dropout).to(self.device)
predicted_label = model(text, offsets)
Full error trace:
> File "/Presentation/MixinButton.py", line 31, in _press_button_start
> self.controller.perform_business_task()
> File "/ControllerPresentation/AController.py", line 16, in perform_business_task
> result = self.perform_task()
> File "/ControllerPresentation/TuningPredictionModelController.py", line 36, in perform_task
> return Initialization.ibusiness.tuning_prediction_model(self.signal_message, self.analysis_type, self.operation, self.severity_of_disease, self.print_only_final_results)
> File "/Business/IBusiness.py", line 35, in tuning_prediction_model
> return self._perform_task(task)
> File "/Business/IBusiness.py", line 59, in _perform_task
> task.run()
> File "/Business/TuningPredictionModelTask.py", line 27, in run
> result = optimizator.learn(severity, silently=True)
> File "/home/ivan/eclipse-workspace/GompertzLaw/NeuralNetwork/PytorchOptimizator.py", line 94, in learn
> self._train(train_dataloader, model, optimizer, epoch, silently)
> File "/NeuralNetwork/PytorchOptimizator.py", line 115, in _train
> predicted_label = model(text, offsets)
> File "/home/ivan/.local/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
> return forward_call(*input, **kwargs)
> File "/NeuralNetwork/PyTorchNetwork.py", line 46, in forward
> _, (hidden, _) = self.lstm(embedded)
> File "/home/ivan/.local/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
> return forward_call(*input, **kwargs)
> File "/home/ivan/.local/lib/python3.9/site-packages/torch/nn/modules/rnn.py", line 677, in forward
> self.check_forward_args(input, hx, batch_sizes)
> File "/home/ivan/.local/lib/python3.9/site-packages/torch/nn/modules/rnn.py", line 620, in check_forward_args
> self.check_input(input, batch_sizes)
> File "/home/ivan/.local/lib/python3.9/site-packages/torch/nn/modules/rnn.py", line 201, in check_input
> raise RuntimeError( RuntimeError: input must have 3 dimensions, got 2
How do I fix the error?

Related

Pytorch transformer decoder inplace modified error (although I didn't use inplace operations..)

I am studying by designing a model structure using Transformer encoder and decoder.
I trained the classification model as a result of the encoder and trained the generative model with the decoder result (the result of the encoder as an input).
Exports multiple results to output.
The following error occurred while learning:
I tracked the error using torch.autograd.set_detect_anomaly(True).
I saw an article about the same error on the PyTorch forum.
However, they were mostly using inplace operations such as += or x[:, 0]=0. So it was solved when I fixed.
But I didn't use any of these operations.
I tried to change unsqueeze() and squeeze() to view(), and also attach clone() to tensor maipulation. but error hasn't be fixed.
What is the problem?
model code
import torch
import torch.nn as nn
import random
from torch.nn.utils.rnn import pad_sequence
import math
from pytorch_pretrained_bert import BertTokenizer, BertForSequenceClassification, BertForQuestionAnswering
from tqdm import tqdm
import pandas as pd
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
class SelfAttention(nn.Module):
def __init__(self, embedding_dim, num_heads):
super(SelfAttention, self).__init__()
self.multihead_attn = nn.MultiheadAttention(embed_dim=embedding_dim, num_heads=num_heads,
batch_first=True)
def forward(self, x):
query = x
key = x
value = x
attn_output = self.multihead_attn(query, key, value, need_weights=False)
return attn_output
class Encoder(nn.Module):
def __init__(self, embedding_dim):
super(Encoder, self).__init__()
self.embedding_dim = embedding_dim
# self.pos_encoder = PositionalEncoding()
self.encoder_layer = nn.TransformerEncoderLayer(d_model=self.embedding_dim, nhead=8, batch_first=True)
self.encoder = nn.TransformerEncoder(encoder_layer=self.encoder_layer, num_layers=6)
self.feedforward = nn.Linear(self.embedding_dim, 1)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
out = self.encoder(x)
cls_out = torch.mean(out, dim=-2)
cls_out = self.feedforward(cls_out)
cls_out = self.sigmoid(cls_out)
return out, cls_out
class Decoder(nn.Module):
def __init__(self, embedding_dim):
super(Decoder, self).__init__()
# self.bert = BertForQuestionAnswering.from_pretrained('bert-base-uncased')
self.embedding_dim = embedding_dim
self.decoder_layer = nn.TransformerDecoderLayer(d_model=self.embedding_dim, nhead=8, batch_first=True)
self.decoder = nn.TransformerDecoder(decoder_layer=self.decoder_layer, num_layers=6)
def forward(self, tgt, memory):
out = self.decoder(tgt, memory)
return out
class AlzhBERT(nn.Module):
def __init__(self, embedding_dim):
super(AlzhBERT, self).__init__()
self.embedding_dim = embedding_dim
self.max_sent_length = 7
self.token_level_attn = nn.ModuleList([SelfAttention(self.embedding_dim, num_heads=8) for _ in range(10)])
self.token_level_attn_single = SelfAttention(self.embedding_dim, num_heads=8)
self.sentence_level_attn = SelfAttention(self.embedding_dim, num_heads=8)
self.encoder = Encoder(embedding_dim=embedding_dim)
self.decoder = Decoder(embedding_dim=embedding_dim)
def forward(self, X_batch):
i = 0
enc_outs = {}
dec_outs = {}
for datastruct in X_batch:
enc_outs[i] = []
dec_outs[i] = []
j=0
for section in datastruct.sections:
print(i, " + ", j)
inv = section.inv.requires_grad_(True).to(device)
y_dec = section.next_uttr.requires_grad_(True).to(device)
par = section.par
# print(par)
try:
tmp = par.dim()
except AttributeError:
print(par)
print("attr err")
j = j+1
continue
# par = par.permute(1,0,2) # (seq_len, sent_len, embed) => 한 번에 self attention
# 여러개 self_attention
# for p in par:
result = self.token_level_attn_single(par.to(device).requires_grad_(True))[0]
res = torch.mean(result, dim=-2).unsqueeze(0)
res_sent = self.sentence_level_attn(res.to(device))[0]
context = torch.mean(res_sent, dim=-3)
inv_input = torch.mean(inv, dim=-2)
# x_enc = torch.concat((inv_input, context))
# x_enc = x_enc.view([1, -1, self.embedding_dim])
enc_out, cls_out = self.encoder(torch.concat([inv_input, context]).unsqueeze(0))
# y_dec = torch.mean(y_dec, dim=-2).to(device)
# enc_out = torch.mean(enc_out, dim=-2).unsqueeze(0).to(device)
dec_out = self.decoder(y_dec, enc_out.to(device))
enc_outs[i].append(cls_out)
dec_outs[i].append(dec_out)
j = j+1
enc_outs[i] = torch.tensor(enc_outs[i], requires_grad=True)
i = i + 1
return enc_outs, dec_outs
train code
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print("device: ", device)
torch.autograd.set_detect_adnomaly(True)
def train_loop(dataloader, model, loss_fn, optimizer, epochs):
# dataloader = dataloader["train"]
size = len(dataloader.dataset)
writer = SummaryWriter()
enc_optimizer = optimizer[0]
dec_optimizer = optimizer[1]
for epoch in range(epochs):
enc_loss_hist = []
dec_loss_hist = []
accuracy = []
print("======== epoch ", epoch, "==========\n")
for i, (Xs, ys) in tqdm(enumerate(dataloader), desc="Train..."):
X_folds, y_folds = cross_validation(10, Xs, ys)
model.train()
for X, y in zip(X_folds['train'], y_folds['train']): # Xf는 DataStruct의 리스트임
# print("<Check Data>")
# print("X 0: ", X[0])
# print("label 0: ", y[0])
# Prediction and Loss
# X = batch_to_tensor(X)
# X = torch.tensor(X).to(device)
y = torch.tensor(y, dtype=torch.float32).to(device)
enc_preds, dec_preds = model(X)
for k in range(len(X)):
for t in range(len(enc_preds[k])):
enc_loss = loss_fn(y[k].to(device), enc_preds[k][t].to(device)).requires_grad_(True)
dec_loss = loss_fn(X[k].sections[t].next_uttr.to(device), dec_preds[k][t].to(device)).requires_grad_(True)
cls_out = torch.tensor(1 if enc_preds[k][t] >= 0.5 else 0)
cls_loss = torch.sum(cls_out == y[k])
accuracy.append(cls_loss)
# Backpropagation
enc_optimizer.zero_grad()
dec_optimizer.zero_grad()
enc_loss.backward(retain_graph=True)
enc_optimizer.step()
dec_loss.backward()
dec_optimizer.step()
enc_loss_hist.append(enc_loss)
dec_loss_hist.append(dec_loss)
cross_validation_loop(X_folds["valid"], y_folds["valid"], model, loss_fn, epoch)
enc_loss_save = torch.mean(torch.tensor(enc_loss_hist))
dec_loss_save = torch.mean(torch.tensor(dec_loss_hist))
accuracy_save = torch.mean(torch.tensor(accuracy, dtype=torch.float32))
writer.add_scalar("Avg Enc Loss/train", enc_loss_save, epoch)
writer.add_scalar("Avg Dec Loss/train", dec_loss_save, epoch)
writer.add_scalar("Avg Accuracy/train", accuracy_save)
if device == "cuda":
saved_model_dir = "/home/juny/AlzheimerModel/checkpoint"
else:
saved_model_dir = "./saved_model"
now = datetime.now()
torch.save({
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': enc_optimizer.state_dict(),
'loss': [enc_loss_save, dec_loss_save],
}, os.path.join('/home/juny/AlzheimerModel/checkpoint',
now.strftime("%Y-%m-%d-%H-%M") + "-e" + str(epoch) + ".pt"))
torch.save(model.state_dict(), os.path.join(saved_model_dir, "saved_model" + now.strftime("%Y-%m-%d-%H-%M") + ".pt"))
encloss, decloss, current = enc_loss_save, dec_loss_save.item(), i * len(X)
print(f"enc loss: {encloss:>7f} dec loss: {decloss:>7f} [{current:>5d}/{size:>5d}")
writer.flush()
writer.close()
error
C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\autograd\__init__.py:173: UserWarning: Error detected in NativeLayerNormBackward0. Traceback of forward call that caused the error:
File "C:/Users/usr/PycharmProjects/project/train.py", line 265, in <module>
train_loop(dataloader=train_dataloader, model=model, loss_fn=loss_fn, optimizer=(enc_optimizer, dec_optimizer), epochs=epochs)
File "C:/Users/usr/PycharmProjects/project/train.py", line 47, in train_loop
enc_preds, dec_preds = model(X)
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\modules\module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\usr\PycharmProjects\project\AlzhBERT.py", line 139, in forward
dec_out = self.decoder(y_dec, enc_out.to(device))
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\modules\module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\usr\PycharmProjects\project\AlzhBERT.py", line 84, in forward
out = self.decoder(tgt, memory)
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\modules\module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\modules\transformer.py", line 291, in forward
output = mod(output, memory, tgt_mask=tgt_mask,
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\modules\module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\modules\transformer.py", line 578, in forward
x = self.norm3(x + self._ff_block(x))
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\modules\module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\modules\normalization.py", line 189, in forward
return F.layer_norm(
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\functional.py", line 2503, in layer_norm
return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
(Triggered internally at C:\cb\pytorch_1000000000000\work\torch\csrc\autograd\python_anomaly_mode.cpp:104.)
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
Train...: 0it [00:05, ?it/s]
Traceback (most recent call last):
File "C:/Users/usr/PycharmProjects/project/train.py", line 265, in <module>
train_loop(dataloader=train_dataloader, model=model, loss_fn=loss_fn, optimizer=(enc_optimizer, dec_optimizer), epochs=epochs)
File "C:/Users/usr/PycharmProjects/project/train.py", line 65, in train_loop
loss.backward(retain_graph=True)
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\_tensor.py", line 396, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\autograd\__init__.py", line 173, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [768]] is at version 1; expected version 0 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
Process finished with exit code 1

Rare case with: mat1 and mat2 shapes cannot be multiplied

self.model = DQNetwork(11, 256, 3)
class DQNetwork(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super().__init__()
self.linear1 = nn.Linear(input_size, hidden_size)
self.linear2 = nn.Linear(hidden_size, hidden_size)
self.linear3 = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = F.relu(self.linear1(x))
x = F.relu(self.linear2(x))
x = self.linear3(x)
return x
Traceback (most recent call last):
File "E:/Work/Programming/PyArk/main.py", line 32, in <module>
agent.train()
File "E:\Work\Programming\PyArk\Agent\agent.py", line 31, in train
self.step(states, actions, rewards, next_states, dones)
File "E:\Work\Programming\PyArk\Agent\agent.py", line 20, in step
self._trainer.train(state, action, reward, next_state, done)
File "E:\Work\Programming\PyArk\Agent\DQN\dqn_trainer.py", line 32, in train
prediction = self.model(state)
File "E:\Work\Programming\PyArk\venv\lib\site-packages\torch\nn\modules\module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "E:\Work\Programming\PyArk\Agent\DQN\dqn_network.py", line 19, in forward
x = F.relu(self.linear1(x))
File "E:\Work\Programming\PyArk\venv\lib\site-packages\torch\nn\modules\module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "E:\Work\Programming\PyArk\venv\lib\site-packages\torch\nn\modules\linear.py", line 103, in forward
return F.linear(input, self.weight, self.bias)
File "E:\Work\Programming\PyArk\venv\lib\site-packages\torch\nn\functional.py", line 1848, in linear
return torch._C._nn.linear(input, weight, bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (11x5 and 11x256)
I don't understand why this error is popping out
I use the same code in other projects... what is going on..?
model( torch.zeros(11,5) ) --> model( torch.zeros(5,11) )

Expected object of device type cuda but got device type cpu

I am trying to switch the training of my network from cpu to gpu but keep getting the following error.
I am getting the following error
Expected object of device type cuda but got device type cpu for argument #1 'self' in call to _thnn_conv2d_forward
Error occurs, No graph saved
Traceback (most recent call last):
File "<ipython-input-6-2720a5ea768d>", line 12, in <module>
tb.add_graph(network, images)
File "E:\Anaconda\lib\site-packages\torch\utils\tensorboard\writer.py", line 707, in add_graph
self._get_file_writer().add_graph(graph(model, input_to_model, verbose))
File "E:\Anaconda\lib\site-packages\torch\utils\tensorboard\_pytorch_graph.py", line 291, in graph
raise e
File "E:\Anaconda\lib\site-packages\torch\utils\tensorboard\_pytorch_graph.py", line 285, in graph
trace = torch.jit.trace(model, args)
File "E:\Anaconda\lib\site-packages\torch\jit\__init__.py", line 882, in trace
check_tolerance, _force_outplace, _module_class)
File "E:\Anaconda\lib\site-packages\torch\jit\__init__.py", line 1034, in trace_module
module._c._create_method_from_trace(method_name, func, example_inputs, var_lookup_fn, _force_outplace)
File "E:\Anaconda\lib\site-packages\torch\nn\modules\module.py", line 530, in __call__
result = self._slow_forward(*input, **kwargs)
File "E:\Anaconda\lib\site-packages\torch\nn\modules\module.py", line 516, in _slow_forward
result = self.forward(*input, **kwargs)
File "<ipython-input-5-cd44a4e4fb73>", line 52, in forward
t = F.relu(self.conv1(t))
File "E:\Anaconda\lib\site-packages\torch\nn\modules\module.py", line 530, in __call__
result = self._slow_forward(*input, **kwargs)
File "E:\Anaconda\lib\site-packages\torch\nn\modules\module.py", line 516, in _slow_forward
result = self.forward(*input, **kwargs)
File "E:\Anaconda\lib\site-packages\torch\nn\modules\conv.py", line 345, in forward
return self.conv2d_forward(input, self.weight)
File "E:\Anaconda\lib\site-packages\torch\nn\modules\conv.py", line 342, in conv2d_forward
self.padding, self.dilation, self.groups)
RuntimeError: Expected object of device type cuda but got device type cpu for argument #1 'self' in call to _thnn_conv2d_forward```**
I think it says that argument is in type cpu but I changed it in the training part.
I have the following code
Conv-neural network
class Network(nn.Module):
def __init__(self):
super(Network, self).__init__()
self.conv1 = nn.Conv2d( in_channels= 1, out_channels= 6, kernel_size=5 )
self.conv2 = nn.Conv2d( in_channels= 6, out_channels= 12, kernel_size=5 )
self.fc1 = nn.Linear( in_features = 12*4*4, out_features = 120 )
self.fc2 = nn.Linear( in_features = 120, out_features = 60 )
self.out = nn.Linear( in_features = 60, out_features = 10 )
def forward(self, t):
t = F.relu(self.conv1(t))
t = F.max_pool2d(t, kernel_size=2, stride=2)
t = F.relu(self.conv2(t))
t = F.max_pool2d(t, kernel_size=2, stride=2)
t = F.relu(self.fc1(t.reshape(-1, 12*4*4)))
t = F.relu(self.fc2(t))
t = self.out(t)
return t
The training part
parameters = dict(
lr = [.01, .001]
, batch_size = [10, 100, 1000]
, shuffle = [True, False]
)
param_values = [v for v in parameters.values()]
param_values
for lr, batch_size, shuffle in product(*param_values):
network = Network()
network.to(device)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle = shuffle)
optimizer = optim.Adam(network.parameters(), lr=lr)
images, labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(images)
comment = f' batch_size={batch_size} lr={lr} shuffle={shuffle}'
tb = SummaryWriter(comment = comment)
tb.add_image('images', grid)
tb.add_graph(network, images)
for epoch in range(10):
total_loss = 0
total_correct = 0
for batch in train_loader: # Get batch
images, labels = batch
images = images.to(device) # Changing data to gpu
preds = network(images)
loss = F.cross_entropy(preds, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item() * batch_size
total_correct += get_num_correct(preds, labels)
tb.add_scalar('Loss:', total_loss, epoch)
tb.add_scalar('Number Correct:', total_correct, epoch)
tb.add_scalar('Accuracy:', total_correct/len(train_set), epoch)
#tb.add_histogram('conv1.bias', network.conv1.bias, epoch)
#tb.add_histogram('conv1.weight', network.conv1.weight, epoch)
#tb.add_histogram('conv1.weight.grap', network.conv1.weight.grad, epoch)
for name, weight in network.named_parameters():
tb.add_histogram(name, weight, epoch)
tb.add_histogram(f'{name}.grad', weight.grad, epoch)
print("epoch:", epoch, "total_correct:", total_correct, "loss:",total_loss)
tb.close()
I am new to deep learning so any help will be highly appreciated. Thanks
You missed moving your labels to gpu i.e.
labels = labels.to(device)
You also need to move these to gpu:
images, labels = next(iter(train_loader))
images = images.to(device)
labels = labels.to(device)

Tensorflow - RuntimeError: Cannot get value inside Tensorflow graph function

I'm trying to create a recurrent neural network with the Keras functional API in TensorFlow. The RNN takes in tweets and classifies them as positive or negative.
attention_input = keras.Input(shape=(512,), name='attention')
a = keras.layers.Dense(1, activation='sigmoid')(attention_input)
attention_output = keras.layers.Multiply()([attention_input, a])
attention = keras.Model(inputs=attention_input, outputs=attention_output, name='attention_model')
inputs1 = keras.Input(shape=(100,), name='lstm')
x = keras.layers.Embedding(len(tokenizer.word_counts)+1,
100,
weights=[embedding_matrix],
input_length=100,
trainable=True)(inputs1)
x = keras.layers.Bidirectional(tf.keras.layers.LSTM(256, return_sequences=True))(x)
x = keras.layers.TimeDistributed(attention)(x)
x = tf.unstack(x, num=256)
t_sum = x[0]
for i in range(256 - 1):
t_sum = keras.layers.Add()([t_sum, x[i+1]])
lstm = keras.Model(inputs=inputs1, outputs=t_sum, name='lstm_model')
inputs2 = keras.Input(shape=(100,), name='dense')
x = keras.layers.Dense(256, activation='relu')(inputs2)
x = keras.layers.Dropout(0.2)(x)
x = keras.layers.Dense(128, activation='relu')(x)
x = keras.layers.Dropout(0.2)(x)
outputs2 = keras.layers.Dense(1, activation='sigmoid')(x)
dense = keras.Model(inputs=inputs2, outputs=outputs2, name='txt_model')
inputs = keras.Input(shape=(100,), name='text')
x = lstm(inputs)
outputs = dense(x)
model = keras.Model(inputs=inputs, outputs=outputs, name='text_model')
model.compile(
loss = 'binary_crossentropy',
optimizer = 'adam',
metrics = ['acc',
tf.keras.metrics.Precision(),
tf.keras.metrics.Recall()])
I get the following runtime error
2019-04-13 10:29:34.855192: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2
Traceback (most recent call last):
File ".\main.py", line 25, in <module>
' -> '.join(permutation).lower() : { ** results.get(' -> '.join(permutation).lower(), {}), ** framework.runtime.evaluate(path, permutation, classifiers, cached) }
File "C:\Users\steff\Desktop\Skole\MsT\framework\framework\runtime.py", line 30, in evaluate
classifier.lower() : framework.classifiers.list[classifier.lower()](data)
File "C:\Users\steff\Desktop\Skole\MsT\framework\framework\classifiers\rnn.py", line 93, in evaluate
x = lstm(inputs)
File "C:\Users\steff\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\keras\engine\base_layer.py", line 612, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "C:\Users\steff\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\keras\engine\network.py", line 870, in call
return self._run_internal_graph(inputs, training=training, mask=mask)
File "C:\Users\steff\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\keras\engine\network.py", line 1011, in _run_internal_graph
output_tensors = layer(computed_tensors, **kwargs)
File "C:\Users\steff\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\keras\engine\base_layer.py", line 669, in __call__
self.set_weights(self._initial_weights)
File "C:\Users\steff\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\keras\engine\base_layer.py", line 938, in set_weights
param_values = backend.batch_get_value(params)
File "C:\Users\steff\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\keras\backend.py", line 2837, in batch_get_value
raise RuntimeError('Cannot get value inside Tensorflow graph function.')
RuntimeError: Cannot get value inside Tensorflow graph function.
I can see from the errors that it has something to do with my LSTM model, but I can't see what is the cause of the problem.
I think that you are using Tensorflow 2.0. If this is the case then using the parameter embeddings_initializer= instead of weights= worked.
x = tf.keras.layers.Embedding(vocabulary_size, embedding_dim, embeddings_initializer=tf.keras.initializers.Constant(embedding_matrix), trainable=False)

Dynamic LSTM in Tensorflow - Optimizer Issues

I have implemented the following LSTM class in Tensorflow, where the unroll operation is inspired by the dynamic_rnn() implementation within Tensorflow:
class LSTM():
def __init__(self, dim_x, dim_h, batch_size):
self.batch_size = batch_size
self.dim_x = dim_x
self.dim_h = dim_h
self.W_x_h = normal([dim_x, 4*dim_h])
self.W_h_h = normal([dim_h, 4*dim_h])
self.b_h = zeros([4*dim_h])
self.h_0 = zeros([batch_size, dim_h])
self.c_0 = zeros([batch_size, dim_h])
def lstmStep(self, x_t, h_t_minus, c_t_minus):
lstm_mat = tf.matmul(x_t, self.W_x_h) + tf.matmul(h_t_minus, self.W_h_h) \
+ self.b_h
i_lin, f_lin, o_lin, g_lin = tf.split(1, 4, lstm_mat)
i_t = tf.sigmoid(i_lin); f_t = tf.sigmoid(f_lin)
o_t = tf.sigmoid(o_lin); g_t = tf.tanh(g_lin)
c_t = c_t_minus * f_t + i_t * g_t
h_t = o_t * tf.tanh(c_t)
return h_t, c_t
def lstmUnroll(self, in_batch):
seq_len = array_ops.shape(in_batch)[0]
in_batch_ta = tensor_array_ops.TensorArray(dtype = in_batch.dtype, size = seq_len)
in_batch_ta = in_batch_ta.unpack(in_batch)
h_arr = tensor_array_ops.TensorArray(dtype = in_batch.dtype, size = seq_len)
time = array_ops.constant(0, dtype=tf.int32)
inputs_got_shape = in_batch.get_shape().with_rank(3)
(const_time_steps, const_batch_size, const_depth) = inputs_got_shape.as_list()
def compute(time, h_t, c_t, h_arr_t):
x_t = in_batch_ta.read(time)
h_t, c_t = self.lstmStep(x_t, h_t, c_t)
h_arr_t = h_arr_t.write(time, h_t)
return [time+1, h_t, c_t, h_arr_t]
(_1, _2, _3, h_arr) = control_flow_ops.While(
cond=lambda time, _1, _2, _3: time < seq_len,
body=compute,
loop_vars=(time, self.h_0, self.c_0, h_arr),
parallel_iterations=32)
output = h_arr.pack()
return output
I define a graph using the LSTM with some cost function. The graph compiles properly, and I'm able to forward propagate using 'in_batch' which is of size [sequence_length, batch_size, input_dim]. 'sequence_length' can vary for different batches. However, when I use an optimizer(Adam) with the cost function, I get the following error message:
Traceback (most recent call last):
File "textToImage.py", line 351, in <module>
opt = tf.train.AdamOptimizer().minimize(temp)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 192, in minimize
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 297, in apply_gradients
update_ops.append(self._apply_dense(grad, var))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/adam.py", line 129, in _apply_dense
self._epsilon_t, grad, use_locking=self._use_locking).op
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/gen_training_ops.py", line 81, in apply_adam
use_locking=use_locking, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/op_def_library.py", line 655, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2042, in create_op
set_shapes_for_outputs(ret)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1528, in set_shapes_for_outputs
shapes = shape_func(op)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/training_ops.py", line 72, in _ApplyAdamShape
grad_shape = op.inputs[9].get_shape().merge_with(v_shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/tensor_shape.py", line 541, in merge_with
self.assert_same_rank(other)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/tensor_shape.py", line 584, in assert_same_rank
"Shapes %s and %s must have the same rank" % (self, other))
ValueError: Shapes () and (1000, 512) must have the same rank
Here 1000 is 'dim_x' and 512 is 4*'dim_h', so the error is for 'W_x_h'. I have tried using '.set_shape()' for 'x_t', 'h_t', 'c_t' and 'output' in 'lstmUnroll()', still fails.
Any ideas to make it work with the optimizer?

Categories

Resources