CTC loss Tensorflow, No valid path found - python

I'm trying to use ctc_loss in order to predict some labels, but it's the first time I use this function and I got an error.
def bi_rnn(x_bi):
# Prepare data shape to match `rnn` function requirements
# Current data input shape: (batch_size, timesteps, n_input)
# Required shape: 'timesteps' tensors list of shape (batch_size, num_input)
# Unstack to get a list of 'timesteps' tensors of shape (batch_size, num_input)
x_bi_shape = x_bi.get_shape().as_list()
x_bi = tf.unstack(x_bi, x_bi_shape[2], 2)
# Define lstm cells with tensorflow
# Forward direction cell
lstm_fw_cell = rnn.BasicLSTMCell(num_neurons, forget_bias=1.0)
# Backward direction cell
lstm_bw_cell = rnn.BasicLSTMCell(num_neurons, forget_bias=1.0)
bi_outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x_bi, dtype=tf.float32)
w_bi = init_weights([num_neurons * 2, num_classes])
b_bi = init_bias([num_classes])
bi_outputs = tf.squeeze(input=bi_outputs, axis=1)
return tf.matmul(bi_outputs, w_bi) + b_bi
def sparse_tuple_from(sequences, dtype=np.int32):
"""Create a sparse representention of x.
Args:
sequences: a list of lists of type dtype where each element is a sequence
Returns:
A tuple with (indices, values, shape)
"""
indices = []
values = []
for n, seq in enumerate(sequences):
indices.extend(zip([n] * len(seq), range(len(seq))))
values.extend(seq)
indices = np.asarray(indices, dtype=np.int64)
values = np.asarray(values, dtype=dtype)
shape = np.asarray([len(sequences), np.asarray(indices).max(0)[1] + 1], dtype=np.int64)
return indices, values, shape
bi_output_1 = bi_rnn(vectors)
targets = tf.sparse_placeholder(tf.int32, name="targets_sparse_tensor")
bi_output_1_shape = bi_output_1.get_shape().as_list()
global_step = tf.Variable(0, trainable=False)
bi_output_1 = tf.reshape(bi_output_1, [bi_output_1_shape[0], batch_size, bi_output_1_shape[1]])
# 1d array of size [batch_size]
seq_len = tf.placeholder(tf.int32, [None])
loss = tf.nn.ctc_loss(labels=targets, inputs=bi_output_1, sequence_length=seq_len)
cost = tf.reduce_mean(loss)
optimizer = tf.train.MomentumOptimizer(learning_rate=0.001, momentum=0.9).minimize(cost, global_step=global_step)
here is how I train the net:
with tf.Session() as sess:
sess.run(init)
for i in range(1, epochs):
loss = 0
start = 0
for cont in range(int(n_samples / batch_size)):
batch_x, batch_y = feature_set.next_batch_training_set(batch_size, start)
batch_y = sparse_tuple_from(batch_y)
seq_len_training = np.ones(1) * 7
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, targets: batch_y, seq_len: seq_len_training})
loss += c
print('Epoch:', i, ' loss:', loss)
with:
batch_size = 1
bi_output_1 = Tensor("Reshape:0", shape=(25, 1, 37), dtype=float32)
batch_x has shape : (1, 20, 100, 1)
batch_y has shape : (1, 7) (before the function sparse_tuple_from())
I got this error:
2017-11-23 18:50:14.033220: W C:\tf_jenkins\home\workspace\rel-win\M\windows-gpu\PY\35\tensorflow\core\util\ctc\ctc_loss_calculator.cc:144] No valid path found.
and I don't know how to solve this. I tried to follow this example, but I got no help.

Related

Problem with dimension of placeholder in tensorflow

I have a problem with a tensorflow NN. I got an error "You must feed a value for placeholder tensor 'palceholders_5/Placeholder' with dtype float and shape [?,1]"
I have tried different shapes of the placeholder as well as the shape of the numpy array in feed dictionary. The code is given below.
import numpy as np
import tensorflow as tf
###################################
# data
###################################
N = 100
w_true = 5
b_true = 2
noise_scale = 0.1
x_np = np.random.rand(N,1)
noise = np.random.normal(scale=noise_scale, size = (N,1))
y_np = np.reshape(w_true*x_np+b_true+noise,(-1))
###########################
# Model
###########################
n_hidden = 10
with tf.name_scope("palceholders"):
x = tf.placeholder(tf.float32, (None,1))
y = tf.placeholder(tf.float32, (None,))
keep_prob = tf.placeholder(tf.float32)
with tf.name_scope("hidden-layer"):
W = tf.Variable(tf.random_normal((1, n_hidden)))
b = tf.Variable(tf.random_normal((n_hidden,)))
x_hidden = tf.nn.relu(tf.matmul(x,W) + b)
#add droput
x_hidden = tf.nn.dropout(x_hidden, keep_prob)
with tf.name_scope("output"):
W = tf.Variable(tf.random_normal((n_hidden,1)))
b = tf.Variable(tf.random_normal((1,)))
y_logit = tf.matmul(x_hidden, W) + b
y_one_prob = tf.sigmoid(y_logit)
y_pred = tf.round(y_one_prob)
with tf.name_scope("loss"):
y_expand = tf.expand_dims(y,1)
entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_logit, labels=y_expand)
l = tf.reduce_sum(entropy)
with tf.name_scope("optim"):
train_op = tf.train.AdamOptimizer(.001).minimize(l)
with tf.name_scope("summaries"):
tf.summary.scalar("loss",l)
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter("tmp/full-train", tf.get_default_graph())
#########################
# train model
#########################
step = 0
n_epochs = 15
batch_size = 10
dropout_prob = 0.5
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(n_epochs):
pos = 0
while pos < N:
batch_x = x_np[pos:pos+batch_size]
batch_x = batch_x.reshape((-1,1))
print(batch_x.shape)
batch_y = y_np[pos:pos+batch_size]
feed_dict = {x:batch_x, y:batch_y, keep_prob:dropout_prob}
_, summary, loss = sess.run([train_op, merged, l], feed_dict=feed_dict)
print("epoch %d, step %d, loss %f" % (epoch, step, loss))
train_writer.add_summary(summary, step)
step += 1
pos += batch_size
I get an error:
"InvalidArgumentError: You must feed a value for placeholder tensor 'palceholders_5/Placeholder' with dtype float and shape [?,1]"

ValueError: Dimensions must be equal, but are 2 and 3799 for 'softmax_cross_entropy_with_logits_sg

I am developing a neural network model for classifying benign and malware apks.
I have tried using tf.squeeze() function but after using it I am unable to use optimizer
def neural_network_model(data):
l1 = tf.add(tf.matmul(data,hidden_1_layer['weight']), hidden_1_layer['bias'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1,hidden_2_layer['weight']), hidden_2_layer['bias'])
l2 = tf.nn.relu(l2)
l3 = tf.add(tf.matmul(l2,hidden_3_layer['weight']), hidden_3_layer['bias'])
l3 = tf.nn.relu(l3)
output = tf.matmul(l3,output_layer['weight']) + output_layer['bias']
return output
def train_neural_network(x):
prediction = neural_network_model(x)
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels= y) )
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
The shape of pred and y must be same however by running the code I am having different shape of pred is (3799,2) whereas the shape of y is (1,3799).
My remarks:
If your labels aren't one-hot encoded you can use tf.nn.sparse_softmax_cross_entropy_with_logits() without converting it to the one-hot encoded representation. Otherwise, tf.nn.softmax_cross_entropy_with_logits() accepts only one-hot encoded labels.
You can't pass numpy values as inputs to the loss function (or as inputs to anything except for feed_dict in session.run()) if you're writing code in a graph mode. Use placeholders instead.
Following is the example to illustrate how to use placeholders and feed numpy arrays of data.
import numpy as np
import tensorflow as tf
# Dummy data with 3 classes for illustration
n_classes =3
x_train = np.random.normal(size=(3799, 2)) # 3799 samples of size (2, ) each
y_train = np.random.randint(low=0, high=n_classes, size=(1, 3799))
# Define placeholders here
x = tf.placeholder(tf.float32, shape=(None, 2))
y = tf.placeholder(tf.int32, shape=(1, None))
# Define your network here
w = tf.Variable(tf.random_normal(shape=[2, n_classes]), dtype=tf.float32)
b = tf.Variable(tf.zeros([n_classes, ]), dtype=tf.float32)
logits = tf.matmul(x, w) + b
labels = tf.squeeze(y)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
labels=labels)
cost = tf.reduce_mean(xentropy)
train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
# Training
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
cost_val = sess.run(cost, feed_dict={x:x_train, y:y_train})
print(cost_val) # 1.8630761
sess.run(train_op, feed_dict={x:x_train, y:y_train}) # optimizer step
cost_val = sess.run(cost, feed_dict={x:x_train, y:y_train})
print(cost_val) # 1.8619089

Tensorflow throws "Dimensions must be equal, but are 100 and 0 for 'MatMul' (op: 'MatMul') with input shapes: [0,100], [0,100]."

I am trying to learn tensorflow after a tutorial, but I don't want to use mnist database, so I can learn database management in python( I am new to it, and it's a hard learning curve going from c++/java to it)
So, here is my code. I've tried printing shapes, values, and all sort of stuff, but none seemed to work. Note: if i make x of shape [0, 100] and the weights [100, 0], the error from matmul goes away, but the resul is of shape [0,0] and cannot be added to the biases. I am 100% sure it's a newbie error, but i will appreciate any help from you. Thanks in advance.
import tensorflow as tf
import pandas as pd
data = pd.read_csv('trainingData.txt', sep = "\t", header = None )
data.columns = ["in", "out"]
data_x = data.loc[: , "in"]
data_y = data.loc[: , "out"]
n_noduri_hl1 = 100
n_noduri_hl2 = 250
n_noduri_hl3 = 100
batch_size = 100
x = tf.placeholder("float", [0, 100])
y = tf.placeholder('float')
def Neural_Network(data):
# input * wheight + bias
hidden_1 = {'weight': tf.Variable(tf.random_normal([0, n_noduri_hl1])),
'biases': tf.Variable(tf.random_normal([n_noduri_hl1]))}
hidden_2 = {'weight': tf.Variable(tf.random_normal([n_noduri_hl1, n_noduri_hl2])),
'biases': tf.Variable(tf.random_normal([n_noduri_hl2]))}
hidden_3 = {'weight': tf.Variable(tf.random_normal([n_noduri_hl2, n_noduri_hl3])),
'biases': tf.Variable(tf.random_normal([n_noduri_hl3]))}
output_layer = {'weight': tf.Variable(tf.random_normal([n_noduri_hl3, 1])),
'biases': tf.Variable(tf.random_normal([1]))}
#calcul
print("data: ", data, "matmul: ", tf.matmul(data, hidden_1['weight']))
l1 = tf.add(tf.matmul(data, hidden_1['weight']), hidden_1['biases'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1, hidden_2['weight']), hidden_2['biases'])
l2 = tf.nn.relu(l2)
l3 = tf.add(tf.matmul(l2, hidden_3['weight']), hidden_3['biases'])
l3 = tf.nn.relu(l3)
output = tf.matmul(l3, output_layer['weight']) + output_layer['biases']
return output
def get_next_batch(dataptr, batch_size, index):
batch = dataptr.loc[index: index+batch_size]
print(batch)
return batch
def train(x):
predictie = Neural_Network(x)
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits = predictie, labels = y))
optimizer = tf.train.AdamOptimizer().minimize(cost)
epoci = 10
index = 0
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoca in range(epoci):
loss = 0
for _ in range(int(len(data)/batch_size)):
ep_x = get_next_batchin(data_x, batch_size, index)
ep_y = get_next_batchout(data_ybatch_size, index)
index += batch_size
_, c = sess.run([optimizer, cost], feed_dict = {x: ep_x, y: ep_y})
loss += c
print('Epoca: ', epoca, " din ", epoci, " loss: ", loss)
corect = tf.equal(tf.argmax(predictie, 1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(corect, 'float'))
print('Acuratete: ', accuracy.eval({x: data.loc[: , "in"], y: data.loc[: , "out"]}))
train(x)
Instead of 0, your placeholder should have None for the first dimension (the batch dimension) and the following dimensions should be the size of the description vector / matrix.
For example, x = tf.placeholder("float", [None, 64, 64, 3]) would be the place holder for a batch of 64 x 64 pixel RGB colour images.
When performing 2D matrix multiplication, the number of columns of the first operand must match the number of rows of the second operand. This is just how matrix multiplication is defined.

placeholders definition for nd-array input in tensorflow

I'm trying to build LSTM RNN based on this guide:
http://monik.in/a-noobs-guide-to-implementing-rnn-lstm-using-tensorflow/
My input is ndarray with the size of 89102*39 (89102 rows, 39 features). There are 3 labels for the data - 0,1,2
It seems like I'm having a problem with the placeholders definition but I'm not sure what it is.
My code is:
data = tf.placeholder(tf.float32, [None, 1000, 39])
target = tf.placeholder(tf.float32, [None, 3])
cell = tf.nn.rnn_cell.LSTMCell(self.num_hidden)
val, state = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32)
val = tf.transpose(val, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1)
weight = tf.Variable(tf.truncated_normal([self.num_hidden, int(target.get_shape()[1])]))
bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))
prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
cross_entropy = -tf.reduce_sum(target * tf.log(tf.clip_by_value(prediction, 1e-10, 1.0)))
optimizer = tf.train.AdamOptimizer()
minimize = optimizer.minimize(cross_entropy)
mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1))
error = tf.reduce_mean(tf.cast(mistakes, tf.float32))
init_op = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init_op)
batch_size = 1000
no_of_batches = int(len(train_input) / batch_size)
epoch = 5000
for i in range(epoch):
ptr = 0
for j in range(no_of_batches):
inp, out = train_input[ptr:ptr + batch_size], train_output[ptr:ptr + batch_size]
ptr += batch_size
sess.run(minimize, {data: inp, target: out})
print( "Epoch - ", str(i))
And I'm getting to following error:
File , line 133, in execute_graph
sess.run(minimize, {data: inp, target: out})
File "/usr/local/lib/python3.5/dist-
packages/tensorflow/python/client/session.py", line 789, in run
run_metadata_ptr)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 975, in _run
% (np_val.shape, subfeed_t.name, str(subfeed_t.get_shape())))
ValueError: Cannot feed value of shape (1000, 39) for Tensor 'Placeholder:0', which has shape '(1000, 89102, 39)'
Any idea what might be causing the problem?
As indicated here, The dynamic_rnn function takes the batch inputs of shape
[batch_size, truncated_backprop_length, input_size]
In the link that you provided, the shape of the placeholder was
data = tf.placeholder(tf.float32, [None, 20,1])
This means that they chose truncated_backprop_length=20 and input_size=1.
Their data was the following 3D array:
[
array([[0],[0],[1],[0],[0],[1],[0],[1],[1],[0],[0],[0],[1],[1],[1],[1],[1],[1],[0],[0]]),
array([[1],[1],[0],[0],[0],[0],[1],[1],[1],[1],[1],[0],[0],[1],[0],[0],[0],[1],[0],[1]]),
.....
]
Based on your code, it seems that train_input is a 2D array and not a 3D array. Hence, you need to transform it into a 3D array. In order to do that, you need to decide which parameters you want to use for truncated_backprop_length and input_size. Afterwards, you need to define
data appropriately.
For example, if you want truncated_backprop_length and input_size to be 39 and 1 respectively, you can do
import numpy as np
train_input=np.reshape(train_input,(len(train_input),39,1))
data = tf.placeholder(tf.float32, [None, 39,1])
I changed your code according to the above discussion and run it on some random data that I produced. It runs without throwing an error. See the code below:
import tensorflow as tf
import numpy as np
num_hidden=5
train_input=np.random.rand(89102,39)
train_input=np.reshape(train_input,(len(train_input),39,1))
train_output=np.random.rand(89102,3)
data = tf.placeholder(tf.float32, [None, 39, 1])
target = tf.placeholder(tf.float32, [None, 3])
cell = tf.nn.rnn_cell.LSTMCell(num_hidden)
val, state = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32)
val = tf.transpose(val, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1)
weight = tf.Variable(tf.truncated_normal([num_hidden, int(target.get_shape()[1])]))
bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))
prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
cross_entropy = -tf.reduce_sum(target * tf.log(tf.clip_by_value(prediction, 1e-10, 1.0)))
optimizer = tf.train.AdamOptimizer()
minimize = optimizer.minimize(cross_entropy)
mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1))
error = tf.reduce_mean(tf.cast(mistakes, tf.float32))
init_op = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init_op)
batch_size = 1000
no_of_batches = int(len(train_input) / batch_size)
epoch = 5000
for i in range(epoch):
ptr = 0
for j in range(no_of_batches):
inp, out = train_input[ptr:ptr + batch_size], train_output[ptr:ptr + batch_size]
ptr += batch_size
sess.run(minimize, {data: inp, target: out})
print( "Epoch - ", str(i))

Splitting ndarray gives unexpected results (TensorFlow RNN tutorial)

I am following a tutorial on rnn's in TensorFlow but I have a question concerning the input formats.
They are taking raw_x (one hot vector) and basically first cutting that up in pieces of length 200 (batch_size) to form data_x. That is good.
Then they further cut up data_x in pieces of length 5 (num_step, or graph width) with:
for i in range(epoch_size):
x = data_x[:, i * num_steps:(i + 1) * num_steps]
y = data_y[:, i * num_steps:(i + 1) * num_steps]
yield (x, y)
However, if I look in the data, the slices of x do not match data_x. The first one does, but then they diverge.
Am I misunderstanding the above code? I would like to understand how x is being created or what it is supposed to look like.
I had expected the second item to be 0 1 0 1 0.
Also, I thought an epoch is when you go through the data completely, from this it seems that they split up the data in 1000 parts (epoch size)?
If it helps, this is my full code. I am trying to figure out what is going on in x. at line 48:
import numpy as np
import tensorflow as tf
# %matplotlib inline
import matplotlib.pyplot as plt
# Global config variables
num_steps = 5 # number of truncated backprop steps ('n' in the discussion above)
batch_size = 200
num_classes = 2
state_size = 4
learning_rate = 0.1
def gen_data(size=1000000):
print('generating data');
X = np.array(np.random.choice(2, size=(size,)))
Y = []
for i in range(size):
threshold = 0.5
if X[i-3] == 1:
threshold += 0.5
if X[i-8] == 1:
threshold -= 0.25
if np.random.rand() > threshold:
Y.append(0)
else:
Y.append(1)
return X, np.array(Y)
# adapted from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/rnn/ptb/reader.py
def gen_batch(raw_data, batch_size, num_steps):
print('generating batches');
raw_x, raw_y = raw_data
data_length = len(raw_x)
# partition raw data into batches and stack them vertically in a data matrix
batch_partition_length = data_length // batch_size
data_x = np.zeros([batch_size, batch_partition_length], dtype=np.int32)
data_y = np.zeros([batch_size, batch_partition_length], dtype=np.int32)
for i in range(batch_size):
data_x[i] = raw_x[batch_partition_length * i:batch_partition_length * (i + 1)]
data_y[i] = raw_y[batch_partition_length * i:batch_partition_length * (i + 1)]
# further divide batch partitions into num_steps for truncated backprop
epoch_size = batch_partition_length // num_steps
for i in range(epoch_size):
x = data_x[:, i * num_steps:(i + 1) * num_steps]
y = data_y[:, i * num_steps:(i + 1) * num_steps]
yield (x, y)
def gen_epochs(n, num_steps):
for i in range(n):
yield gen_batch(gen_data(), batch_size, num_steps)
"""
Placeholders
"""
x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')
init_state = tf.zeros([batch_size, state_size])
"""
RNN Inputs
"""
# Turn our x placeholder into a list of one-hot tensors:
# rnn_inputs is a list of num_steps tensors with shape [batch_size, num_classes]
x_one_hot = tf.one_hot(x, num_classes)
rnn_inputs = tf.unstack(x_one_hot, axis=1)
"""
Definition of rnn_cell
This is very similar to the __call__ method on Tensorflow's BasicRNNCell. See:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn_cell.py
"""
with tf.variable_scope('rnn_cell'):
W = tf.get_variable('W', [num_classes + state_size, state_size])
b = tf.get_variable('b', [state_size], initializer=tf.constant_initializer(0.0))
def rnn_cell(rnn_input, state):
with tf.variable_scope('rnn_cell', reuse=True):
W = tf.get_variable('W', [num_classes + state_size, state_size])
b = tf.get_variable('b', [state_size], initializer=tf.constant_initializer(0.0))
return tf.tanh(tf.matmul(tf.concat(axis=1, values=[rnn_input, state]), W) + b)
"""
Adding rnn_cells to graph
This is a simplified version of the "rnn" function from Tensorflow's api. See:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn.py
"""
state = init_state
rnn_outputs = []
for rnn_input in rnn_inputs:
state = rnn_cell(rnn_input, state)
rnn_outputs.append(state)
final_state = rnn_outputs[-1]
"""
Predictions, loss, training step
Losses and total_loss are simlar to the "sequence_loss_by_example" and "sequence_loss"
functions, respectively, from Tensorflow's api. See:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/seq2seq.py
"""
#logits and predictions
with tf.variable_scope('softmax'):
W = tf.get_variable('W', [state_size, num_classes])
b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))
logits = [tf.matmul(rnn_output, W) + b for rnn_output in rnn_outputs]
predictions = [tf.nn.softmax(logit) for logit in logits]
# Turn our y placeholder into a list labels
y_as_list = [tf.squeeze(i, axis=[1]) for i in tf.split(axis=1, num_or_size_splits=num_steps, value=y)]
#losses and train_step
losses = [tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit,labels=label) for \
logit, label in zip(logits, y_as_list)]
total_loss = tf.reduce_mean(losses)
train_step = tf.train.AdagradOptimizer(learning_rate).minimize(total_loss)
"""
Function to train the network
"""
def train_network(num_epochs, num_steps, state_size=4, verbose=True):
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
training_losses = []
for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps)):
training_loss = 0
training_state = np.zeros((batch_size, state_size))
if verbose:
print("\nEPOCH", idx)
for step, (X, Y) in enumerate(epoch):
tr_losses, training_loss_, training_state, _ = \
sess.run([losses,
total_loss,
final_state,
train_step],
feed_dict={x:X, y:Y, init_state:training_state})
training_loss += training_loss_
if step % 100 == 0 and step > 0:
if verbose:
print("Average loss at step", step,
"for last 250 steps:", training_loss/100)
training_losses.append(training_loss/100)
training_loss = 0
return training_losses
training_losses = train_network(1,num_steps)
plt.plot(training_losses)
Seems like the batches are actually transposed.
So the first elements of the x-matrix (200 x 5) will fit the first 5 elements of x_raw.
Then only in the next iteration, the next 5-10 elements of x_raw will be in the first elements (again) of x.

Categories

Resources