I’m trying to implement a prediction of low birth rate using nengo and tensorflow with a SNN model.\
But, I got the following Value error (in Anaconda):
Traceback (most recent call last):
File "C:\Users\USER\NengoPRJ\nengo_lowbirth.py", line 95, in <module>
sim.fit(train_data, {out_p: train_labels}, epochs=epochs)
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\nengo\utils\magic.py", line 181, in __call__
return self.wrapper(self.__wrapped__, self.instance, args, kwargs)
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\nengo_dl\simulator.py", line 66, in require_open
return wrapped(*args, **kwargs)
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\nengo_dl\simulator.py", line 869, in fit
"fit", x=x, y=y, n_steps=n_steps, stateful=stateful, **kwargs
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\nengo\utils\magic.py", line 181, in __call__
return self.wrapper(self.__wrapped__, self.instance, args, kwargs)
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\nengo_dl\simulator.py", line 50, in with_self
output = wrapped(*args, **kwargs)
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\nengo_dl\simulator.py", line 1032, in _call_keras
outputs = getattr(self.keras_model, func_type)(**func_args)
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\tensorflow_core\python\keras\engine\training.py", line 819, in fit
use_multiprocessing=use_multiprocessing)
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\tensorflow_core\python\keras\engine\training_arrays.py", line 680, in fit
steps_name='steps_per_epoch')
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\tensorflow_core\python\keras\engine\training_arrays.py", line 189, in model_iteration
f = _make_execution_function(model, mode)
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\tensorflow_core\python\keras\engine\training_arrays.py", line 571, in _make_execution_function
return model._make_execution_function(mode)
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\tensorflow_core\python\keras\engine\training.py", line 2125, in _make_execution_function
self._make_train_function()
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\tensorflow_core\python\keras\engine\training.py", line 2057, in _make_train_function
params=self._collected_trainable_weights, loss=self.total_loss)
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 503, in get_updates
grads = self.get_gradients(loss, params)
File "C:\ProgramData\Anaconda3\envs\tf210\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 397, in get_gradients
"K.argmax, K.round, K.eval.".format(param))
ValueError: Variable <tf.Variable 'TensorGraph/base_params/trainable_float32_1:0' shape=(1,) dtype=float32> has `None` for gradient. Please make sure that all of your ops have a gradient defined (i.e. are differentiable). Common ops without gradient: K.argmax, K.round, K.eval.
(in Google Colab):
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-4-205839bf5640> in <module>()
96 loss={out_p: tf.losses.SparseCategoricalCrossentropy(from_logits=True)})
97
---> 98 sim.fit(train_data, {out_p: train_labels}, epochs=epochs)
99
100
13 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py in get_gradients(self, loss, params)
467 "gradient defined (i.e. are differentiable). "
468 "Common ops without gradient: "
--> 469 "K.argmax, K.round, K.eval.".format(param))
470 grads = self._clip_gradients(grads)
471 return grads
ValueError: Variable <tf.Variable 'TensorGraph/base_params/trainable_float32_1:0' shape=(1,) dtype=float32> has `None` for gradient. Please make sure that all of your ops have a gradient defined (i.e. are differentiable). Common ops without gradient: K.argmax, K.round, K.eval.
I found many solution in github and stackoverflow.
https://github.com/tensorflow/tensorflow/issues/1511
https://github.com/huggingface/transformers/issues/5427
But it couldn't resolve my error.
My code are as follows:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import random
import nengo
import nengo_dl
import requests
seed = 1
amp =1
max_rates = 100
intercepts = 0
tau_rc = 0.02
noise_filter = 0.1 #noise_filter
train_data_rate = 0.85
learning_rate = 0.001
epochs = 5
np.random.seed(seed)
do_train = True
url = "https://github.com/nfmcclure/tensorflow_cookbook/raw/master/01_Introduction/07_Working_with_Data_Sources/birthweight_data/birthweight.dat"
birth_file = requests.get(url)
birth_all_data = birth_file.text.split('\r\n')
birth_header = [x for x in birth_all_data[0].split('\t') if len(x)>=1]
birth_data = [[float(x) for x in y.split('\t') if len(x)>=1] for y in birth_all_data[1:] if len(y)>=1]
data_size = len(birth_data)
x_data = np.array([x[1:8] for x in birth_data])
y_data = np.array([y[0] for y in birth_data])
train_samples = round(data_size*train_data_rate)
train_indices = np.random.choice(data_size, train_samples, replace=False)
testset = set(range(data_size)) - set(train_indices)
test_indices = np.array(list(testset))
x_train = x_data[train_indices]
y_train = np.transpose([y_data[train_indices]])
x_test = x_data[test_indices]
y_test = np.transpose([y_data[test_indices]])
def normalize_cols(m):
col_max = m.max(axis=0)
col_min = m.min(axis=0)
return (m - col_min) / (col_max - col_min)
x_train = np.nan_to_num(normalize_cols(x_train))
x_test = np.nan_to_num(normalize_cols(x_test))
##################################################
nfeatures = 7
#minibatch_size = 189 - train_samples
minibatch_size=1
with nengo.Network(seed=seed) as net:
net.config[nengo.Ensemble].max_rates = nengo.dists.Choice([max_rates])
net.config[nengo.Ensemble].intercepts = nengo.dists.Choice([intercepts])
neuron_type=nengo.LIF(amplitude=amp, tau_rc=tau_rc)
nengo_dl.configure_settings(stateful=False)
inp = nengo.Node([0] * nfeatures)
ens = nengo.Ensemble(1, 1, neuron_type=neuron_type)
x = nengo.Connection(inp, ens.neurons, transform=nengo_dl.dists.Glorot(), synapse=None)
inp_p = nengo.Probe(inp)
out_p = nengo.Probe(x, label="out_p")
out_p_filt = nengo.Probe(x, synapse=noise_filter, label="out_p_filt")
sim = nengo_dl.Simulator(net, minibatch_size=minibatch_size)
n_steps=20
train_data = np.reshape(x_train, (x_train.shape[0], 1, nfeatures))
train_labels = np.reshape(y_train, (y_train.shape[0], 1, 1))
test_data = np.tile(np.reshape(x_test, (x_test.shape[0], 1, nfeatures)), (1, n_steps, 1))
test_labels = np.tile(np.reshape(y_test, (y_test.shape[0], 1, 1)), (1, n_steps, 1))
def accuracy(outputs, targets):
return 100 * tf.reduce_mean(tf.cast(tf.equal(tf.round(tf.sigmoid(outputs)), targets), tf.float32))
sim.compile(loss={out_p_filt: accuracy})
print("accuracy before training:", sim.evaluate(test_data, {out_p_filt: test_labels}, verbose=0)["loss"])
do_training = do_train;
if do_training:
sim.compile(optimizer=tf.optimizers.Adam(learning_rate=learning_rate),
loss={out_p: tf.losses.SparseCategoricalCrossentropy(from_logits=True)})
sim.fit(train_data, {out_p: train_labels}, epochs=epochs)
System:
python:3.7.7
tensorflow:2.2.0 and 2.1.0
nengo:3.0.0
nengo-dl:3.2.0
How can I solve this problem?
Thanks in advance.
I am trying to get code working from the following repo, which is based off this paper. It had a lot of errors, but I mostly got it working. However, I keep getting the same problem and I really do not understand how to troubleshoot this/what is even going wrong.
The error occurs the second time the validation if statement critera is met. The first time is always works, then breaks on the second. I'm including the output it prints before breaking if its helpful. See error below:
step = 1, train_loss = 1204.7784423828125, train_accuracy = 0.13725490868091583
counter = 1, dev_loss = 1188.6639287274584, dev_accuacy = 0.2814199453625912
step = 2, train_loss = 1000.983154296875, train_accuracy = 0.26249998807907104
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/client/session.py in _do_call(self, fn, *args)
1364 try:
-> 1365 return fn(*args)
1366 except errors.OpError as e:
7 frames
InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: Incompatible shapes: [2,185] vs. [2,229]
[[{{node loss/cond/add_1}}]]
[[viterbi_decode/cond/rnn_1/while/Switch_3/_541]]
(1) Invalid argument: Incompatible shapes: [2,185] vs. [2,229]
[[{{node loss/cond/add_1}}]]
0 successful operations.
0 derived errors ignored.
During handling of the above exception, another exception occurred:
InvalidArgumentError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/client/session.py in _do_call(self, fn, *args)
1382 '\nsession_config.graph_options.rewrite_options.'
1383 'disable_meta_optimizer = True')
-> 1384 raise type(e)(node_def, op, message)
1385
1386 def _extend_graph(self):
InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: Incompatible shapes: [2,185] vs. [2,229]
[[node loss/cond/add_1 (defined at /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py:1748) ]]
[[viterbi_decode/cond/rnn_1/while/Switch_3/_541]]
(1) Invalid argument: Incompatible shapes: [2,185] vs. [2,229]
[[node loss/cond/add_1 (defined at /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py:1748) ]]
0 successful operations.
0 derived errors ignored.
Original stack trace for 'loss/cond/add_1':
File "/usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/usr/local/lib/python3.6/dist-packages/traitlets/config/application.py", line 664, in launch_instance
app.start()
File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelapp.py", line 477, in start
ioloop.IOLoop.instance().start()
File "/usr/local/lib/python3.6/dist-packages/tornado/ioloop.py", line 888, in start
handler_func(fd_obj, events)
File "/usr/local/lib/python3.6/dist-packages/tornado/stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
self._handle_recv()
File "/usr/local/lib/python3.6/dist-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
self._run_callback(callback, msg)
File "/usr/local/lib/python3.6/dist-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
callback(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/tornado/stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
handler(stream, idents, msg)
File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "/usr/local/lib/python3.6/dist-packages/ipykernel/ipkernel.py", line 196, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python3.6/dist-packages/ipykernel/zmqshell.py", line 533, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 2822, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-11-90859dc83f76>", line 66, in <module>
main()
File "<ipython-input-11-90859dc83f76>", line 12, in main
model = DAModel()
File "<ipython-input-9-682db36e2a23>", line 148, in __init__
self.logits, self.labels, self.dialogue_lengths)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/contrib/crf/python/ops/crf.py", line 257, in crf_log_likelihood
transition_params)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/contrib/crf/python/ops/crf.py", line 116, in crf_sequence_score
false_fn=_multi_seq_fn)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/utils.py", line 202, in smart_cond
pred, true_fn=true_fn, false_fn=false_fn, name=name)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/smart_cond.py", line 59, in smart_cond
name=name)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/util/deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/control_flow_ops.py", line 1235, in cond
orig_res_f, res_f = context_f.BuildCondBranch(false_fn)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/control_flow_ops.py", line 1061, in BuildCondBranch
original_result = fn()
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/contrib/crf/python/ops/crf.py", line 104, in _multi_seq_fn
unary_scores = crf_unary_score(tag_indices, sequence_lengths, inputs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/contrib/crf/python/ops/crf.py", line 287, in crf_unary_score
flattened_tag_indices = array_ops.reshape(offsets + tag_indices, [-1])
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/math_ops.py", line 899, in binary_op_wrapper
return func(x, y, name=name)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/math_ops.py", line 1197, in _add_dispatch
return gen_math_ops.add_v2(x, y, name=name)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/gen_math_ops.py", line 549, in add_v2
"AddV2", x=x, y=y, name=name)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/op_def_library.py", line 794, in _apply_op_helper
op_def=op_def)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/util/deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py", line 3357, in create_op
attrs, op_def, compute_device)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py", line 3426, in _create_op_internal
op_def=op_def)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py", line 1748, in __init__
self._traceback = tf_stack.extract_stack()
Here is the code (which is slightly different from the repo in order to get it to run:
Versions:
Python 3
tensorflow == 1.15.0
pandas == 0.25.3
numpy == 1.17.5
import glob
import pandas as pd
import tensorflow as tf
import pandas as pd
import numpy as np
# preprocess data
file_list = []
for f in glob.glob('swda/*'):
file_list.append(f)
df_list = []
for i in file_list:
df = pd.read_csv(i)
df_list.append(df)
text_list = []
label_list = []
for df in df_list:
df['utterance_no_specialchar_'] = df.utterance_no_specialchar.astype(str)
text = df.utterance_no_specialchar_.tolist()
labels = df.da_category.tolist()
text_list.append(text)
label_list.append(labels)
### new preprocessing step
text_list = [[[j] for j in i] for i in text_list]
tok_data = [y[0] for x in text_list for y in x]
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(tok_data)
sequences = []
for x in text_list:
tmp = []
for y in x:
tmp.append(tokenizer.texts_to_sequences(y)[0])
sequences.append(tmp)
def _pad_sequences(sequences, pad_tok, max_length):
"""
Args:
sequences: a generator of list or tuple
pad_tok: the char to pad with
Returns:
a list of list where each sublist has same length
"""
sequence_padded, sequence_length = [], []
for seq in sequences:
seq = list(seq)
seq_ = seq[:max_length] + [pad_tok]*max(max_length - len(seq), 0)
sequence_padded += [seq_]
sequence_length += [min(len(seq), max_length)]
return sequence_padded, sequence_length
def pad_sequences(sequences, pad_tok, nlevels=1):
"""
Args:
sequences: a generator of list or tuple
pad_tok: the char to pad with
nlevels: "depth" of padding, for the case where we have characters ids
Returns:
a list of list where each sublist has same length
"""
if nlevels == 1:
max_length = max(map(lambda x : len(x), sequences))
sequence_padded, sequence_length = _pad_sequences(sequences,
pad_tok, max_length)
elif nlevels == 2:
max_length_word = max([max(map(lambda x: len(x), seq))
for seq in sequences])
sequence_padded, sequence_length = [], []
for seq in sequences:
# all words are same length now
sp, sl = _pad_sequences(seq, pad_tok, max_length_word)
sequence_padded += [sp]
sequence_length += [sl]
max_length_sentence = max(map(lambda x : len(x), sequences))
sequence_padded, _ = _pad_sequences(sequence_padded,
[pad_tok]*max_length_word, max_length_sentence)
sequence_length, _ = _pad_sequences(sequence_length, 0,
max_length_sentence)
return sequence_padded, sequence_length
def minibatches(data, labels, batch_size):
data_size = len(data)
start_index = 0
num_batches_per_epoch = int((len(data) + batch_size - 1) / batch_size)
for batch_num in range(num_batches_per_epoch):
start_index = batch_num * batch_size
end_index = min((batch_num + 1) * batch_size, data_size)
yield data[start_index: end_index], labels[start_index: end_index]
def select(parameters, length):
"""Select the last valid time step output as the sentence embedding
:params parameters: [batch, seq_len, hidden_dims]
:params length: [batch]
:Returns : [batch, hidden_dims]
"""
shape = tf.shape(parameters)
idx = tf.range(shape[0])
idx = tf.stack([idx, length - 1], axis = 1)
return tf.gather_nd(parameters, idx)
class DAModel():
def __init__(self):
with tf.variable_scope("placeholder"):
self.dialogue_lengths = tf.placeholder(tf.int32, shape = [None], name = "dialogue_lengths")
self.word_ids = tf.placeholder(tf.int32, shape = [None,None,None], name = "word_ids")
self.utterance_lengths = tf.placeholder(tf.int32, shape = [None, None], name = "utterance_lengths")
self.labels = tf.placeholder(tf.int32, shape = [None, None], name = "labels")
self.clip = tf.placeholder(tf.float32, shape = [], name = 'clip')
######################## EMBEDDINGS ###########################################
with tf.variable_scope("embeddings"):
_word_embeddings = tf.get_variable(
name = "_word_embeddings",
dtype = tf.float32,
shape = [words, word_dim],
initializer = tf.random_uniform_initializer()
)
word_embeddings = tf.nn.embedding_lookup(_word_embeddings,self.word_ids, name="word_embeddings")
self.word_embeddings = tf.nn.dropout(word_embeddings, 0.8)
with tf.variable_scope("utterance_encoder"):
s = tf.shape(self.word_embeddings)
batch_size = s[0] * s[1]
time_step = s[-2]
word_embeddings = tf.reshape(self.word_embeddings, [batch_size, time_step, word_dim])
length = tf.reshape(self.utterance_lengths, [batch_size])
fw = tf.nn.rnn_cell.LSTMCell(hidden_size_lstm_1, forget_bias=0.8, state_is_tuple= True)
bw = tf.nn.rnn_cell.LSTMCell(hidden_size_lstm_1, forget_bias=0.8, state_is_tuple= True)
output, _ = tf.nn.bidirectional_dynamic_rnn(fw, bw, word_embeddings,sequence_length=length, dtype = tf.float32)
output = tf.concat(output, axis = -1) # [batch_size, time_step, dim]
# Select the last valid time step output as the utterance embedding,
# this method is more concise than TensorArray with while_loop
# output = select(output, self.utterance_lengths) # [batch_size, dim]
output = select(output, length) # [batch_size, dim]
# output = tf.reshape(output, s[0], s[1], 2 * hidden_size_lstm_1)
output = tf.reshape(output, [s[0], s[1], 2 * hidden_size_lstm_1])
output = tf.nn.dropout(output, 0.8)
with tf.variable_scope("bi-lstm"):
cell_fw = tf.contrib.rnn.BasicLSTMCell(hidden_size_lstm_2, state_is_tuple = True)
cell_bw = tf.contrib.rnn.BasicLSTMCell(hidden_size_lstm_2, state_is_tuple = True)
(output_fw, output_bw), _ = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, output, sequence_length = self.dialogue_lengths, dtype = tf.float32)
outputs = tf.concat([output_fw, output_bw], axis = -1)
outputs = tf.nn.dropout(outputs, 0.8)
with tf.variable_scope("proj1"):
output = tf.reshape(outputs, [-1, 2 * hidden_size_lstm_2])
W = tf.get_variable("W", dtype = tf.float32, shape = [2 * hidden_size_lstm_2, proj1], initializer= tf.contrib.layers.xavier_initializer())
b = tf.get_variable("b", dtype = tf.float32, shape = [proj1], initializer=tf.zeros_initializer())
output = tf.nn.relu(tf.matmul(output, W) + b)
with tf.variable_scope("proj2"):
W = tf.get_variable("W", dtype = tf.float32, shape = [proj1, proj2], initializer= tf.contrib.layers.xavier_initializer())
b = tf.get_variable("b", dtype = tf.float32, shape = [proj2], initializer=tf.zeros_initializer())
output = tf.nn.relu(tf.matmul(output, W) + b)
with tf.variable_scope("logits"):
nstep = tf.shape(outputs)[1]
W = tf.get_variable("W", dtype = tf.float32,shape=[proj2, tags], initializer = tf.random_uniform_initializer())
b = tf.get_variable("b", dtype = tf.float32,shape = [tags],initializer=tf.zeros_initializer())
pred = tf.matmul(output, W) + b
self.logits = tf.reshape(pred, [-1, nstep, tags])
with tf.variable_scope("loss"):
log_likelihood, self.trans_params = tf.contrib.crf.crf_log_likelihood(
self.logits, self.labels, self.dialogue_lengths)
self.loss = tf.reduce_mean(-log_likelihood) + tf.nn.l2_loss(W) + tf.nn.l2_loss(b)
#tf.summary.scalar("loss", self.loss)
with tf.variable_scope("viterbi_decode"):
viterbi_sequence, _ = tf.contrib.crf.crf_decode(self.logits, self.trans_params, self.dialogue_lengths)
batch_size = tf.shape(self.dialogue_lengths)[0]
output_ta = tf.TensorArray(dtype = tf.float32, size = 1, dynamic_size = True)
def body(time, output_ta_1):
length = self.dialogue_lengths[time]
vcode = viterbi_sequence[time][:length]
true_labs = self.labels[time][:length]
accurate = tf.reduce_sum(tf.cast(tf.equal(vcode, true_labs), tf.float32))
output_ta_1 = output_ta_1.write(time, accurate)
return time + 1, output_ta_1
def condition(time, output_ta_1):
return time < batch_size
i = 0
[time, output_ta] = tf.while_loop(condition, body, loop_vars = [i, output_ta])
output_ta = output_ta.stack()
accuracy = tf.reduce_sum(output_ta)
self.accuracy = accuracy / tf.reduce_sum(tf.cast(self.dialogue_lengths, tf.float32))
#tf.summary.scalar("accuracy", self.accuracy)
with tf.variable_scope("train_op"):
optimizer = tf.train.AdagradOptimizer(0.1)
#if tf.greater(self.clip , 0):
grads, vs = zip(*optimizer.compute_gradients(self.loss))
grads, gnorm = tf.clip_by_global_norm(grads, self.clip)
self.train_op = optimizer.apply_gradients(zip(grads, vs))
#else:
# self.train_op = optimizer.minimize(self.loss)
#self.merged = tf.summary.merge_all()
### Set model variables
hidden_size_lstm_1 = 200
hidden_size_lstm_2 = 200
tags = 39 # assuming number of classes to predict?
word_dim = 300
proj1 = 200
proj2 = 100
words = 20001
# words = 8759 + 1 # max(num_unique_word_tokens)
batchSize = 2
log_dir = "train"
model_dir = "DAModel"
model_name = "ckpt"
### Run model
def main():
# tokenize and vectorize text data to prepare for embedding
train_data = sequences[:75]
train_labels = label_list[:75]
dev_data = sequences[75:]
dev_labels = label_list[75:]
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.4
with tf.Session(config = config) as sess:
model = DAModel()
sess.run(tf.global_variables_initializer())
clip = 2
saver = tf.train.Saver()
#writer = tf.summary.FileWriter("D:\\Experimemts\\tensorflow\\DA\\train", sess.graph)
writer = tf.summary.FileWriter("train", sess.graph)
counter = 0
for epoch in range(10):
for dialogues, labels in minibatches(train_data, train_labels, batchSize):
_, dialogue_lengthss = pad_sequences(dialogues, 0)
word_idss, utterance_lengthss = pad_sequences(dialogues, 0, nlevels = 2)
true_labs = labels
labs_t, _ = pad_sequences(true_labs, 0)
counter += 1
train_loss, train_accuracy, _ = sess.run([model.loss, model.accuracy,model.train_op], feed_dict = {model.word_ids: word_idss, model.utterance_lengths: utterance_lengthss, model.dialogue_lengths: dialogue_lengthss, model.labels:labs_t, model.clip :clip} )
#writer.add_summary(summary, global_step = counter)
print("step = {}, train_loss = {}, train_accuracy = {}".format(counter, train_loss, train_accuracy))
train_precision_summ = tf.Summary()
train_precision_summ.value.add(
tag='train_accuracy', simple_value=train_accuracy)
writer.add_summary(train_precision_summ, counter)
train_loss_summ = tf.Summary()
train_loss_summ.value.add(
tag='train_loss', simple_value=train_loss)
writer.add_summary(train_loss_summ, counter)
if counter % 1 == 0:
loss_dev = []
acc_dev = []
for dev_dialogues, dev_labels in minibatches(dev_data, dev_labels, batchSize):
_, dialogue_lengthss = pad_sequences(dev_dialogues, 0)
word_idss, utterance_lengthss = pad_sequences(dev_dialogues, 0, nlevels = 2)
true_labs = dev_labels
labs_t, _ = pad_sequences(true_labs, 0)
dev_loss, dev_accuacy = sess.run([model.loss, model.accuracy], feed_dict = {model.word_ids: word_idss, model.utterance_lengths: utterance_lengthss, model.dialogue_lengths: dialogue_lengthss, model.labels:labs_t})
loss_dev.append(dev_loss)
acc_dev.append(dev_accuacy)
valid_loss = sum(loss_dev) / len(loss_dev)
valid_accuracy = sum(acc_dev) / len(acc_dev)
dev_precision_summ = tf.Summary()
dev_precision_summ.value.add(
tag='dev_accuracy', simple_value=valid_accuracy)
writer.add_summary(dev_precision_summ, counter)
dev_loss_summ = tf.Summary()
dev_loss_summ.value.add(
tag='dev_loss', simple_value=valid_loss)
writer.add_summary(dev_loss_summ, counter)
print("counter = {}, dev_loss = {}, dev_accuacy = {}".format(counter, valid_loss, valid_accuracy))
if __name__ == "__main__":
tf.reset_default_graph()
main()
The data comes from here and looks like this:
[[['what '],
['do you want to start '],
['f uh laughter you hit you hit f uh '],
['it doesnt matter '],
['f um were discussing the capital punishment i believe '],
['right '],
['you are right '],
['yeah '],
[' i i suppose i should have '],
['f uh which '],
['i am am pro capital punishment except that i dont like the way its done '],
['uhhuh '],
['f uh yeah '],
['f uh i f uh i guess i i hate to see anyone die f uh ']
...
]]
The dataset to train the model can be found here:
https://github.com/cmeaton/Hierarchical_BiLSTM-CRF_Encoder/tree/master/swda_parsed
I'm having a hard time understanding what this error even means and how to approach understanding it. Any advice would be much appreciated. Thanks.
Introduction
I think the main problem is a data mismatch in the sizes of the arrays (or matrixes or other structure) you are feeding sess.run. Specifically when you are calling:
train_loss, train_accuracy, _ = sess.run([model.loss, model.accuracy,model.train_op], feed_dict = {model.word_ids: word_idss, model.utterance_lengths: utterance_lengthss, model.dialogue_lengths: dialogue_lengthss, model.labels:labs_t, model.clip :clip} )
And more specifically, this error here hints that it's a mismatch problem:
tensorflow.python.framework.errors_impl.InvalidArgumentError:
indices[317] = [317, -1] does not index into param shape [318,39,400]
[[{{node utterance_encoder/GatherNd}}]]
I considered maybe that running on a fresh install might result in a error-free run.
I am getting similar errors but also a whole list of warnings.
Please note I am running on windows 7 and using python 3.6.1.
Versions
I have tried the following tensorflow versions but with no success:
tf 1.15
tf 1.14
tf 1.13.1
tf 1.12
tf 1.11
tf 1.10
tf 1.10 with downgraded keras to 2.2.1
Steps
Installed python 3.6.1 (supported version of tensorflow). Installed for All Users. Set the Path. Installed in C:\Python36
pip3 install --user --upgrade tensorflow==1.15
pip3 install --user --upgrade pandas == 0.25.3
pip3 install --user --upgrade numpy == 1.17.5
Download the following: https://github.com/cmeaton/Hierarchical_BiLSTM-CRF_Encoder/tree/master/swda_parsed
Run the provided code
Result (Includes Many Warnings)
I think the following might be important:
tensorflow.python.framework.errors_impl.InvalidArgumentError: indices[317] = [317, -1] does not index into param shape [318,39,400]
[[{{node utterance_encoder/GatherNd}}]]
Full Trace
WARNING:tensorflow:From test.py:313: The name tf.reset_default_graph is deprecated. Please use tf.compat.v1.reset_default_graph instead.
WARNING:tensorflow:From test.py:256: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.
WARNING:tensorflow:From test.py:259: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.
2020-01-31 12:13:10.096283: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2
WARNING:tensorflow:From test.py:119: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.
WARNING:tensorflow:From test.py:121: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.
WARNING:tensorflow:From test.py:130: The name tf.get_variable is deprecated. Please use tf.compat.v1.get_variable instead.
WARNING:tensorflow:From test.py:137: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From test.py:147: LSTMCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
WARNING:tensorflow:From test.py:150: bidirectional_dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.
Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API
WARNING:tensorflow:From D:\Users\bakopme\AppData\Roaming\Python\Python36\site-packages\tensorflow_core\python\ops\rnn.py:464: dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
WARNING:tensorflow:From D:\Users\bakopme\AppData\Roaming\Python\Python36\site-packages\tensorflow_core\python\ops\rnn_cell_impl.py:958: Layer.add_variable (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.
Instructions for updating:
Please use `layer.add_weight` method instead.
WARNING:tensorflow:From D:\Users\bakopme\AppData\Roaming\Python\Python36\site-packages\tensorflow_core\python\ops\rnn_cell_impl.py:962: calling Zeros.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
WARNING:tensorflow:From D:\Users\bakopme\AppData\Roaming\Python\Python36\site-packages\tensorflow_core\python\ops\rnn.py:244: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
WARNING:tensorflow:
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
* https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
* https://github.com/tensorflow/addons
* https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.
WARNING:tensorflow:From test.py:163: BasicLSTMCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
WARNING:tensorflow:From test.py:223: The name tf.train.AdagradOptimizer is deprecated. Please use tf.compat.v1.train.AdagradOptimizer instead.
WARNING:tensorflow:From D:\Users\bakopme\AppData\Roaming\Python\Python36\site-packages\tensorflow_core\python\training\adagrad.py:76: calling Constant.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
WARNING:tensorflow:From test.py:261: The name tf.global_variables_initializer is deprecated. Please use tf.compat.v1.global_variables_initializer instead.
WARNING:tensorflow:From test.py:263: The name tf.train.Saver is deprecated. Please use tf.compat.v1.train.Saver instead.
WARNING:tensorflow:From test.py:265: The name tf.summary.FileWriter is deprecated. Please use tf.compat.v1.summary.FileWriter instead.
2020-01-31 12:13:16.563989: W tensorflow/core/framework/op_kernel.cc:1651] OP_REQUIRES failed at gather_nd_op.cc:47 : Invalid argument: indices[317] = [317, -1] does not index into param shape [318,39,400]
Traceback (most recent call last):
File "D:\Users\bakopme\AppData\Roaming\Python\Python36\site-packages\tensorflow_core\python\client\session.py", line 1365, in _do_call
return fn(*args)
File "D:\Users\bakopme\AppData\Roaming\Python\Python36\site-packages\tensorflow_core\python\client\session.py", line 1350, in _run_fn
target_list, run_metadata)
File "D:\Users\bakopme\AppData\Roaming\Python\Python36\site-packages\tensorflow_core\python\client\session.py", line 1443, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: indices[317] = [317, -1] does not index into param shape [318,39,400]
[[{{node utterance_encoder/GatherNd}}]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "test.py", line 314, in <module>
main()
File "test.py", line 274, in main
train_loss, train_accuracy, _ = sess.run([model.loss, model.accuracy,model.train_op], feed_dict = {model.word_ids: word_idss, model.utterance_lengths: utterance_lengthss, model.dialogue_lengths: dialogue_lengthss, model.labels:labs_t, model.clip :clip} )
File "D:\Users\bakopme\AppData\Roaming\Python\Python36\site-packages\tensorflow_core\python\client\session.py", line 956, in run
run_metadata_ptr)
File "D:\Users\bakopme\AppData\Roaming\Python\Python36\site-packages\tensorflow_core\python\client\session.py", line 1180, in _run
feed_dict_tensor, options, run_metadata)
File "D:\Users\bakopme\AppData\Roaming\Python\Python36\site-packages\tensorflow_core\python\client\session.py", line 1359, in _do_run
run_metadata)
File "D:\Users\bakopme\AppData\Roaming\Python\Python36\site-packages\tensorflow_core\python\client\session.py", line 1384, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: indices[317] = [317, -1] does not index into param shape [318,39,400]
[[node utterance_encoder/GatherNd (defined at D:\Users\bakopme\AppData\Roaming\Python\Python36\site-packages\tensorflow_core\python\framework\ops.py:1748) ]]
Original stack trace for 'utterance_encoder/GatherNd':
File "test.py", line 314, in <module>
main()
File "test.py", line 260, in main
model = DAModel()
File "test.py", line 155, in __init__
output = select(output, length) # [batch_size, dim]
File "test.py", line 114, in select
return tf.gather_nd(parameters, idx)
File "D:\Users\bakopme\AppData\Roaming\Python\Python36\site-packages\tensorflow_core\python\util\dispatch.py", line 180, in wrapper
return target(*args, **kwargs)
File "D:\Users\bakopme\AppData\Roaming\Python\Python36\site-packages\tensorflow_core\python\ops\array_ops.py", line 4277, in gather_nd
return gen_array_ops.gather_nd(params, indices, name=name)
File "D:\Users\bakopme\AppData\Roaming\Python\Python36\site-packages\tensorflow_core\python\ops\gen_array_ops.py", line 3975, in gather_nd
"GatherNd", params=params, indices=indices, name=name)
File "D:\Users\bakopme\AppData\Roaming\Python\Python36\site-packages\tensorflow_core\python\framework\op_def_library.py", line 794, in _apply_op_helper
op_def=op_def)
File "D:\Users\bakopme\AppData\Roaming\Python\Python36\site-packages\tensorflow_core\python\util\deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "D:\Users\bakopme\AppData\Roaming\Python\Python36\site-packages\tensorflow_core\python\framework\ops.py", line 3357, in create_op
attrs, op_def, compute_device)
File "D:\Users\bakopme\AppData\Roaming\Python\Python36\site-packages\tensorflow_core\python\framework\ops.py", line 3426, in _create_op_internal
op_def=op_def)
File "D:\Users\bakopme\AppData\Roaming\Python\Python36\site-packages\tensorflow_core\python\framework\ops.py", line 1748, in __init__
self._traceback = tf_stack.extract_stack()
Let's focus on the error:
Invalid argument: Incompatible shapes: [2,185] vs. [2,229]
The problem seems to be that an operation between two tensors fails, because their shapes are incompatible.
It's possible that the tensorflow version you've selected is less permissive than the one used by the author.
According to this issue, the author guesses he used tensorflow==1.8.
So first I would suggest you try to use this earlier version, or others before\after that (1.7, 1.9, 1.10 etc).
Also, earlier versions may not have the keras package integrated to them as it is today, so you may want to use a specific keras version as well.
For example according to this issue, what helped was to downgrade to keras==2.2.2.
If that doesn't help, maybe one of these will: 1, 2, 3, 4, 5, 6
I'm trying to develop a recurrent neural network in tensorflow 1.1.0 and i wrote a function that should return a LSTM.
def LSTM(x, num_units, num, num_layers=3):
cells = []
for i in range(num_layers):
cell = LSTMCell(num_units=num_units, state_is_tuple=True)
cell = DropoutWrapper(cell=cell, output_keep_prob=0.5)
cells.append(cell)
lstm = MultiRNNCell(cells=cells, state_is_tuple=True)
val, state = tf.nn.dynamic_rnn(lstm, x, dtype=tf.float32)
val = tf.transpose(val, [1, 0, 2]) # rendo il risultato una sequenza
last = tf.gather(val, int(val.get_shape()[0]) - 1) # prendo l'output dell'ultimo elemento
return last
This function actually works but if i try to reuse it more than one time i get the following error:
C:\ProgramData\Anaconda3\envs\obra\python.exe C:/Users/Simone/Desktop/Cobra/LSTM_Function_Filtro.py
Traceback (most recent call last):
File "C:/Users/Simone/Desktop/Cobra/LSTM_Function_Filtro.py", line 81, in <module>
Lstm2 = tf.nn.relu(tf.matmul(lyrs.LSTM(concat1, num_hidden, 1), W2) + B2)
File "C:\Users\Simone\Desktop\Cobra\Layers_OK.py", line 62, in LSTM
val, state = tf.nn.dynamic_rnn(lstm, x, dtype=tf.float32)
File "C:\ProgramData\Anaconda3\envs\obra\lib\site-packages\tensorflow\python\ops\rnn.py", line 553, in dynamic_rnn
dtype=dtype)
File "C:\ProgramData\Anaconda3\envs\obra\lib\site-packages\tensorflow\python\ops\rnn.py", line 720, in _dynamic_rnn_loop
swap_memory=swap_memory)
File "C:\ProgramData\Anaconda3\envs\obra\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2623, in while_loop
result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "C:\ProgramData\Anaconda3\envs\obra\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2456, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "C:\ProgramData\Anaconda3\envs\obra\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2406, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "C:\ProgramData\Anaconda3\envs\obra\lib\site-packages\tensorflow\python\ops\rnn.py", line 705, in _time_step
(output, new_state) = call_cell()
File "C:\ProgramData\Anaconda3\envs\obra\lib\site-packages\tensorflow\python\ops\rnn.py", line 691, in <lambda>
call_cell = lambda: cell(input_t, state)
File "C:\ProgramData\Anaconda3\envs\obra\lib\site-packages\tensorflow\contrib\rnn\python\ops\core_rnn_cell_impl.py", line 953, in __call__
cur_inp, new_state = cell(cur_inp, cur_state)
File "C:\ProgramData\Anaconda3\envs\obra\lib\site-packages\tensorflow\contrib\rnn\python\ops\core_rnn_cell_impl.py", line 713, in __call__
output, new_state = self._cell(inputs, state, scope)
File "C:\ProgramData\Anaconda3\envs\obra\lib\site-packages\tensorflow\contrib\rnn\python\ops\core_rnn_cell_impl.py", line 398, in __call__
reuse=self._reuse) as unit_scope:
File "C:\ProgramData\Anaconda3\envs\obra\lib\contextlib.py", line 59, in __enter__
return next(self.gen)
File "C:\ProgramData\Anaconda3\envs\obra\lib\site-packages\tensorflow\contrib\rnn\python\ops\core_rnn_cell_impl.py", line 93, in _checked_scope
"the argument reuse=True." % (scope_name, type(cell).__name__))
ValueError: Attempt to have a second RNNCell use the weights of a variable scope that already has weights: 'rnn/multi_rnn_cell/cell_0/lstm_cell'; and the cell was not constructed as LSTMCell(..., reuse=True). To share the weights of an RNNCell, simply reuse it in your second calculation, or create a new one with the argument reuse=True.
Also i tried to add tf.get_variable_scope().reuse_variables() at the end of the for cycle, but i get the error
Variable rnn/multi_rnn_cell/cell_0/lstm_cell/weights does not exist, or was not created with tf.get_variable(). Did you mean to set reuse=None in VarScope?
If I am not mistaken you want to share the parameters or create another RNN by using the same code. If so you can use tf.variable_scope as follows:
def LSTM(x, num_units, num_layers=3, reuse=False, scope="MultiRNNCell"):
with tf.variable_scope(name_or_scope=scope, reuse=reuse):
cells = []
for i in range(num_layers):
cell = tf.nn.rnn_cell.LSTMCell(num_units=num_units, state_is_tuple=True)
cell = tf.nn.rnn_cell.DropoutWrapper(cell=cell, output_keep_prob=0.5)
cells.append(cell)
lstm = tf.nn.rnn_cell.MultiRNNCell(cells=cells, state_is_tuple=True)
val, state = tf.nn.dynamic_rnn(lstm, x, dtype=tf.float32)
val = tf.transpose(val, [1, 0, 2]) # rendo il risultato una sequenza
last = tf.gather(val, int(val.get_shape()[0]) - 1) # prendo l'output dell'ultimo elemento
return last
At your first use you should pass reuse argument False so that tensorflow creates the variables. To share the parameters with another RNN then passing True is enough. If you want to create a new model then I suggest you passing a new scope name together with reuse=False. The following example runs should make it easier to follow. I created a dummy placeholder.
def list_parameters():
num_param = 0
for v in tf.global_variables():
print(v.name)
num_param += np.prod(v.get_shape().as_list())
print("# of parameters: " + str(num_param))
x = tf.placeholder(dtype=tf.float32,
shape=[32, 50, 100],
name='input_data')
lstm1 = LSTM(x, 64, 3, reuse=False, scope="MultiRNNCell")
list_parameters()
MultiRNNCell/rnn/multi_rnn_cell/cell_0/lstm_cell/kernel:0
MultiRNNCell/rnn/multi_rnn_cell/cell_0/lstm_cell/bias:0
MultiRNNCell/rnn/multi_rnn_cell/cell_1/lstm_cell/kernel:0
MultiRNNCell/rnn/multi_rnn_cell/cell_1/lstm_cell/bias:0
MultiRNNCell/rnn/multi_rnn_cell/cell_2/lstm_cell/kernel:0
MultiRNNCell/rnn/multi_rnn_cell/cell_2/lstm_cell/bias:0
# of parameters: 108288
lstm2 = LSTM(x, 64, 3, reuse=True, scope="MultiRNNCell")
list_parameters()
MultiRNNCell/rnn/multi_rnn_cell/cell_0/lstm_cell/kernel:0
MultiRNNCell/rnn/multi_rnn_cell/cell_0/lstm_cell/bias:0
MultiRNNCell/rnn/multi_rnn_cell/cell_1/lstm_cell/kernel:0
MultiRNNCell/rnn/multi_rnn_cell/cell_1/lstm_cell/bias:0
MultiRNNCell/rnn/multi_rnn_cell/cell_2/lstm_cell/kernel:0
MultiRNNCell/rnn/multi_rnn_cell/cell_2/lstm_cell/bias:0
# of parameters: 108288
Note that lstm1 and lstm2 are sharing parameters.
lstm3 = LSTM(x, 64, 3, reuse=False, scope="NewMultiRNNCell")
list_parameters()
MultiRNNCell/rnn/multi_rnn_cell/cell_0/lstm_cell/kernel:0
MultiRNNCell/rnn/multi_rnn_cell/cell_0/lstm_cell/bias:0
MultiRNNCell/rnn/multi_rnn_cell/cell_1/lstm_cell/kernel:0
MultiRNNCell/rnn/multi_rnn_cell/cell_1/lstm_cell/bias:0
MultiRNNCell/rnn/multi_rnn_cell/cell_2/lstm_cell/kernel:0
MultiRNNCell/rnn/multi_rnn_cell/cell_2/lstm_cell/bias:0
NewMultiRNNCell/rnn/multi_rnn_cell/cell_0/lstm_cell/kernel:0
NewMultiRNNCell/rnn/multi_rnn_cell/cell_0/lstm_cell/bias:0
NewMultiRNNCell/rnn/multi_rnn_cell/cell_1/lstm_cell/kernel:0
NewMultiRNNCell/rnn/multi_rnn_cell/cell_1/lstm_cell/bias:0
NewMultiRNNCell/rnn/multi_rnn_cell/cell_2/lstm_cell/kernel:0
NewMultiRNNCell/rnn/multi_rnn_cell/cell_2/lstm_cell/bias:0
# of parameters: 216576
A new set of parameters are created for lstm3 because scope, and hence variable names are different. Finally, this post clearly explains variable naming.
I have implemented the following LSTM class in Tensorflow, where the unroll operation is inspired by the dynamic_rnn() implementation within Tensorflow:
class LSTM():
def __init__(self, dim_x, dim_h, batch_size):
self.batch_size = batch_size
self.dim_x = dim_x
self.dim_h = dim_h
self.W_x_h = normal([dim_x, 4*dim_h])
self.W_h_h = normal([dim_h, 4*dim_h])
self.b_h = zeros([4*dim_h])
self.h_0 = zeros([batch_size, dim_h])
self.c_0 = zeros([batch_size, dim_h])
def lstmStep(self, x_t, h_t_minus, c_t_minus):
lstm_mat = tf.matmul(x_t, self.W_x_h) + tf.matmul(h_t_minus, self.W_h_h) \
+ self.b_h
i_lin, f_lin, o_lin, g_lin = tf.split(1, 4, lstm_mat)
i_t = tf.sigmoid(i_lin); f_t = tf.sigmoid(f_lin)
o_t = tf.sigmoid(o_lin); g_t = tf.tanh(g_lin)
c_t = c_t_minus * f_t + i_t * g_t
h_t = o_t * tf.tanh(c_t)
return h_t, c_t
def lstmUnroll(self, in_batch):
seq_len = array_ops.shape(in_batch)[0]
in_batch_ta = tensor_array_ops.TensorArray(dtype = in_batch.dtype, size = seq_len)
in_batch_ta = in_batch_ta.unpack(in_batch)
h_arr = tensor_array_ops.TensorArray(dtype = in_batch.dtype, size = seq_len)
time = array_ops.constant(0, dtype=tf.int32)
inputs_got_shape = in_batch.get_shape().with_rank(3)
(const_time_steps, const_batch_size, const_depth) = inputs_got_shape.as_list()
def compute(time, h_t, c_t, h_arr_t):
x_t = in_batch_ta.read(time)
h_t, c_t = self.lstmStep(x_t, h_t, c_t)
h_arr_t = h_arr_t.write(time, h_t)
return [time+1, h_t, c_t, h_arr_t]
(_1, _2, _3, h_arr) = control_flow_ops.While(
cond=lambda time, _1, _2, _3: time < seq_len,
body=compute,
loop_vars=(time, self.h_0, self.c_0, h_arr),
parallel_iterations=32)
output = h_arr.pack()
return output
I define a graph using the LSTM with some cost function. The graph compiles properly, and I'm able to forward propagate using 'in_batch' which is of size [sequence_length, batch_size, input_dim]. 'sequence_length' can vary for different batches. However, when I use an optimizer(Adam) with the cost function, I get the following error message:
Traceback (most recent call last):
File "textToImage.py", line 351, in <module>
opt = tf.train.AdamOptimizer().minimize(temp)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 192, in minimize
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 297, in apply_gradients
update_ops.append(self._apply_dense(grad, var))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/adam.py", line 129, in _apply_dense
self._epsilon_t, grad, use_locking=self._use_locking).op
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/gen_training_ops.py", line 81, in apply_adam
use_locking=use_locking, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/op_def_library.py", line 655, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2042, in create_op
set_shapes_for_outputs(ret)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1528, in set_shapes_for_outputs
shapes = shape_func(op)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/training_ops.py", line 72, in _ApplyAdamShape
grad_shape = op.inputs[9].get_shape().merge_with(v_shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/tensor_shape.py", line 541, in merge_with
self.assert_same_rank(other)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/tensor_shape.py", line 584, in assert_same_rank
"Shapes %s and %s must have the same rank" % (self, other))
ValueError: Shapes () and (1000, 512) must have the same rank
Here 1000 is 'dim_x' and 512 is 4*'dim_h', so the error is for 'W_x_h'. I have tried using '.set_shape()' for 'x_t', 'h_t', 'c_t' and 'output' in 'lstmUnroll()', still fails.
Any ideas to make it work with the optimizer?