SummaryWriter not outputting graph in TensorFlow [duplicate] - python

This question already has answers here:
Save Tensorflow graph for viewing in Tensorboard without summary operations
(5 answers)
Closed 5 years ago.
I am trying to use tensorboard to analyse a graph in tensorflow with summaryWriter. However, TensorFlow is not outputting a 'graph' folder with information. Perhaps I am missing a command or it is not in the right place?
writer = tf.train.SummaryWriter(logs_path, graph=tf.get_default_graph());
Is what I used. I think this may not work for TensorFlow 1.0 anymore (just the summarywriter command)
import numpy as np
import tensorflow as tf
# %matplotlib inline
import matplotlib.pyplot as plt
# Global config variables
num_steps = 5 # number of truncated backprop steps ('n' in the discussion above)
batch_size = 200
num_classes = 2
state_size = 4
learning_rate = 0.1
logs_path = "./graph"
def gen_data(size=1000000):
X = np.array(np.random.choice(2, size=(size,)))
Y = []
for i in range(size):
threshold = 0.5
if X[i-3] == 1:
threshold += 0.5
if X[i-8] == 1:
threshold -= 0.25
if np.random.rand() > threshold:
Y.append(0)
else:
Y.append(1)
return X, np.array(Y)
# adapted from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/rnn/ptb/reader.py
def gen_batch(raw_data, batch_size, num_steps):
raw_x, raw_y = raw_data
data_length = len(raw_x)
# partition raw data into batches and stack them vertically in a data matrix
batch_partition_length = data_length // batch_size
data_x = np.zeros([batch_size, batch_partition_length], dtype=np.int32)
data_y = np.zeros([batch_size, batch_partition_length], dtype=np.int32)
for i in range(batch_size):
data_x[i] = raw_x[batch_partition_length * i:batch_partition_length * (i + 1)]
data_y[i] = raw_y[batch_partition_length * i:batch_partition_length * (i + 1)]
# further divide batch partitions into num_steps for truncated backprop
epoch_size = batch_partition_length // num_steps
for i in range(epoch_size):
x = data_x[:, i * num_steps:(i + 1) * num_steps]
y = data_y[:, i * num_steps:(i + 1) * num_steps]
yield (x, y)
def gen_epochs(n, num_steps):
for i in range(n):
yield gen_batch(gen_data(), batch_size, num_steps)
"""
Placeholders
"""
x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')
init_state = tf.zeros([batch_size, state_size])
"""
Inputs
"""
x_one_hot = tf.one_hot(x, num_classes)
rnn_inputs = tf.unstack(x_one_hot, axis=1)
"""
RNN
"""
cell = tf.contrib.rnn.BasicRNNCell(state_size)
rnn_outputs, final_state = tf.contrib.rnn.static_rnn(cell, rnn_inputs, initial_state=init_state)
"""
Predictions, loss, training step
"""
with tf.variable_scope('softmax'):
W = tf.get_variable('W', [state_size, num_classes])
b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))
logits = [tf.matmul(rnn_output, W) + b for rnn_output in rnn_outputs]
predictions = [tf.nn.softmax(logit) for logit in logits]
y_as_list = [tf.squeeze(i, axis=[1]) for i in tf.split(axis=1, num_or_size_splits=num_steps, value=y)]
loss_weights = [tf.ones([batch_size]) for i in range(num_steps)]
losses = tf.contrib.legacy_seq2seq.sequence_loss_by_example(logits, y_as_list, loss_weights)
tf.scalar_summary("losses", losses)
total_loss = tf.reduce_mean(losses)
train_step = tf.train.AdagradOptimizer(learning_rate).minimize(total_loss)
# Not sure why this is not outputting a graph for tensorboard
writer = tf.train.SummaryWriter(logs_path, graph=tf.get_default_graph());
"""
Function to train the network
"""
def train_network(num_epochs, num_steps, state_size=4, verbose=True):
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
training_losses = []
saved = gen_epochs(num_epochs, num_steps);
for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps)):
training_loss = 0
training_state = np.zeros((batch_size, state_size))
if verbose:
print("\nEPOCH", idx)
for step, (X, Y) in enumerate(epoch):
tr_losses, training_loss_, training_state, _ = \
sess.run([losses,
total_loss,
final_state,
train_step],
feed_dict={x:X, y:Y, init_state:training_state})
training_loss += training_loss_
if step % 100 == 0 and step > 0:
if verbose:
print("Average loss at step", step,
"for last 250 steps:", training_loss/100)
training_losses.append(training_loss/100)
training_loss = 0
return training_losses
training_losses = train_network(1,num_steps)
plt.plot(training_losses)
# tensorboard --logdir="my_graph"

This worked for me:
writer = tf.summary.FileWriter(logdir='logdir', graph=tf.get_default_graph())
writer.flush()

Related

tf.get_variable initializer Tensorflow

How important is to choose the right value for the initializer in tensorflow?
With this code:
a = tf.get_variable('a', initializer=0.1)
b = tf.get_variable('b', initializer=-3.0)
with:
a = tf.get_variable('a', initializer=0.1)
b = tf.get_variable('b', initializer=0.0)
Why in the second example tensorflow doesn`t manage to fit the data properly? There is anything that can be done changing number_epochs or learning_rate?
This is my code:
# TensorFlow Model
# Config
num_epochs = 2000
learning_rate = 0.0001
# /Config
# Creating the graph
ops.reset_default_graph()
tf.disable_v2_behavior()
X = tf.placeholder(tf.float32, name='X')
Y = tf.placeholder(tf.float32, name='Y')
a = tf.get_variable('a', initializer=0.1)
b = tf.get_variable('b', initializer=-3.0)
h = a * X + b
cost = tf.reduce_mean( (h - Y)**2 )
optimizer = tf.train.GradientDescentOptimizer(
learning_rate=learning_rate
).minimize(cost)
init = tf.global_variables_initializer()
# Running the Model
found_a = 0
found_b = 0
with tf.Session() as sess:
sess.run(init)
for epoch in range(num_epochs):
_, costValue = sess.run(
[optimizer, cost],
feed_dict={
X: x,
Y: y,
}
)
found_a = a.eval()
found_b = b.eval()
if epoch % (num_epochs/10) == 0: # Every 10 percent
print("... epoch: " + str(epoch))
print(f"cost[{str(costValue)}] / a[{str(a.eval())}] / b[{str(b.eval())}]")
# Seing the obtained values in a plot
xrange = np.linspace(x.min(), x.max(), 2)
# Plot points
plt.plot(x, y, 'ro')
# Plot resulting function
plt.plot(xrange, xrange * found_a + found_b, 'b')
plt.show()

Problem with dimension of placeholder in tensorflow

I have a problem with a tensorflow NN. I got an error "You must feed a value for placeholder tensor 'palceholders_5/Placeholder' with dtype float and shape [?,1]"
I have tried different shapes of the placeholder as well as the shape of the numpy array in feed dictionary. The code is given below.
import numpy as np
import tensorflow as tf
###################################
# data
###################################
N = 100
w_true = 5
b_true = 2
noise_scale = 0.1
x_np = np.random.rand(N,1)
noise = np.random.normal(scale=noise_scale, size = (N,1))
y_np = np.reshape(w_true*x_np+b_true+noise,(-1))
###########################
# Model
###########################
n_hidden = 10
with tf.name_scope("palceholders"):
x = tf.placeholder(tf.float32, (None,1))
y = tf.placeholder(tf.float32, (None,))
keep_prob = tf.placeholder(tf.float32)
with tf.name_scope("hidden-layer"):
W = tf.Variable(tf.random_normal((1, n_hidden)))
b = tf.Variable(tf.random_normal((n_hidden,)))
x_hidden = tf.nn.relu(tf.matmul(x,W) + b)
#add droput
x_hidden = tf.nn.dropout(x_hidden, keep_prob)
with tf.name_scope("output"):
W = tf.Variable(tf.random_normal((n_hidden,1)))
b = tf.Variable(tf.random_normal((1,)))
y_logit = tf.matmul(x_hidden, W) + b
y_one_prob = tf.sigmoid(y_logit)
y_pred = tf.round(y_one_prob)
with tf.name_scope("loss"):
y_expand = tf.expand_dims(y,1)
entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_logit, labels=y_expand)
l = tf.reduce_sum(entropy)
with tf.name_scope("optim"):
train_op = tf.train.AdamOptimizer(.001).minimize(l)
with tf.name_scope("summaries"):
tf.summary.scalar("loss",l)
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter("tmp/full-train", tf.get_default_graph())
#########################
# train model
#########################
step = 0
n_epochs = 15
batch_size = 10
dropout_prob = 0.5
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(n_epochs):
pos = 0
while pos < N:
batch_x = x_np[pos:pos+batch_size]
batch_x = batch_x.reshape((-1,1))
print(batch_x.shape)
batch_y = y_np[pos:pos+batch_size]
feed_dict = {x:batch_x, y:batch_y, keep_prob:dropout_prob}
_, summary, loss = sess.run([train_op, merged, l], feed_dict=feed_dict)
print("epoch %d, step %d, loss %f" % (epoch, step, loss))
train_writer.add_summary(summary, step)
step += 1
pos += batch_size
I get an error:
"InvalidArgumentError: You must feed a value for placeholder tensor 'palceholders_5/Placeholder' with dtype float and shape [?,1]"

ValueError: Cannot feed value of shape (256, 0) for Tensor 'Placeholder_2:0', which has shape '(?, 500)'

#Import TsnsorFlow
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#Imort data
data = pd.read_csv("C:\\testdata1004.csv")
#Drop data variable
data = data.drop(['DATE'],1)
#Dimensions of dataset
n = data.shape[0]
p = data.shape[1]
#Make data a numpy array
data = data.values
#Training and test data
train_start = 0
train_end = int(np.floor(0.8*n))
test_start = train_end
test_end = n
data_train = data[np.arange(train_start, train_end), :]
data_test = data[np.arange(test_start, test_end), :]
#Scale data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(data_train)
data_train = scaler.transform(data_train)
data_test = scaler.transform(data_test)
#Build X and Y
X_train = data_train[:, 1:]
y_train = data_train[:, 0]
X_test = data_test[:, 1:]
y_test = data_test[:, 0]
#Define a and b as placeholders
a = tf.placeholder(dtype=tf.int8)
b = tf.placeholder(dtype=tf.int8)
#Define a and b as placeholders
c = tf.add(a,b)
#Initialize the graph
graph = tf.Session()
#Run the graph
graph.run(c, feed_dict={a: 5, b: 4})
#Model architecture parameters
n_stocks = 500
n_neurons_1 = 1024
n_neurons_2 = 512
n_neurons_3 = 256
n_neurons_4 = 128
n_target = 1
#Placeholder
X = tf.placeholder(dtype=tf.float32, shape=[None, n_stocks])
Y = tf.placeholder(dtype=tf.float32, shape=[None])
#Initializers
sigma = 1
weight_initializer = tf.variance_scaling_initializer(mode="fan_avg",distribution="uniform", scale=sigma)
bias_initializer = tf.zeros_initializer()
#Layer 1: Variables for hidden weights and biases
W_hidden_1 = tf.Variable(weight_initializer([n_stocks,n_neurons_1]))
bias_hidden_1 = tf.Variable(bias_initializer([n_neurons_1]))
#Layer 2: VAriables for hidden weights and biases
W_hidden_2 = tf.Variable(weight_initializer([n_neurons_1,n_neurons_2]))
bias_hidden_2 = tf.Variable(bias_initializer([n_neurons_2]))
#Layer 3: VAriables for hidden weights and biases
W_hidden_3 = tf.Variable(weight_initializer([n_neurons_2,n_neurons_3]))
bias_hidden_3 = tf.Variable(bias_initializer([n_neurons_3]))
#Layer 4: VAriables for hidden weights and biases
W_hidden_4 = tf.Variable(weight_initializer([n_neurons_3,n_neurons_4]))
bias_hidden_4 = tf.Variable(bias_initializer([n_neurons_4]))
#Output layer : Variables for output weights and biases
W_out = tf.Variable(weight_initializer([n_neurons_4,n_target]))
bias_out = tf.Variable(bias_initializer([n_target]))
#Hidden layer
hidden_1 = tf.nn.relu(tf.add(tf.matmul(X, W_hidden_1),bias_hidden_1))
hidden_2 = tf.nn.relu(tf.add(tf.matmul(hidden_1,W_hidden_2),bias_hidden_2))
hidden_3 = tf.nn.relu(tf.add(tf.matmul(hidden_2,W_hidden_3),bias_hidden_3))
hidden_4 = tf.nn.relu(tf.add(tf.matmul(hidden_3,W_hidden_4),bias_hidden_4))
#Output layer (must be transposed)
out = tf.transpose(tf.add(tf.matmul(hidden_4, W_out),bias_out))
#Cost function
mse = tf.reduce_mean(tf.squared_difference(out, Y))
#Optimizer
opt = tf.train.AdamOptimizer().minimize(mse)
#Make Session
net = tf.Session()
#Run initializer
net.run(tf.global_variables_initializer())
#Setup interactive plot
plt.ion()
fig = plt.figure()
ax1 = fig.add_subplot(111)
line1, = ax1.plot(y_test)
line2, = ax1.plot(y_test*0.5)
plt.show()
#Number of epochs and batch size
epochs = 10
batch_size = 256
for e in range(epochs):
#suffle training data
shuffle_indices = np.random.permutation(np.arange(len(y_train)))
X_train = X_train[shuffle_indices]
y_train = y_train[shuffle_indices]
#Minibatch training
for i in range(0, len(y_train) // batch_size) :
start = i * batch_size
batch_x = X_train[start:start + batch_size]
batch_y = y_train[start:start + batch_size]
#Run optimizer with batch
net.run(opt, feed_dict={X:batch_x, Y: batch_y})
#Show progress
if np.mod(i,5) == 0:
#Prediction
pred = net.run(out, feed_dict={X: X_test})
line2.set_ydata(pred)
plt.title('Epoch ' + str(e) + ', Batch ' + str(i))
file_name = 'img/epoch_' + str(e) + '_batch_' + str(i) + '.jpg'
plt.savefig(file_name)
plt.pause(0.01)
#Print final MSE after Training
mse_final = net.run(mse, feed_dict= {X: X_test, Y: y_test})
print(mse_final)
​
I implemented tensorflow code, but it doesn't work and i got error
this code is just an example, that i have. I dont know what is the reason i got the error and the way I can fix. could you tell me how to fix it? Thank you
I implemented tensorflow code, but it doesn't work and i got error
this code is just an example, that i have. I dont know what is the reason i got the error and the way I can fix. could you tell me how to fix it? Thank you
The None is for batch size, but then you need another dimension for length of the data you input into y. Try changing:
Y = tf.placeholder(dtype=tf.float32, shape=[None,1])

LSTM Loss remains the same after numerous iterations

Thanks for looking into this question! :)
I attempted to train an LSTM network to predict next 10-day stock prices of Google based on past 30-day stock prices. I trained the LSTM but the loss barely reduced even after 200 iterations. I suspected that the issue might be due to the feed_dict in tf Session. However, I have not identified any issue with that (perhaps due to my superficial knowledge). It seems that the optimizer refreshes every iterations in the tf Session.
Would appreciate if I could seek advice on what might have gone wrong in the code, if my understanding on the usage of Optimizer has been wrong.
Thanks for your help!!
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sys
import csv
import random
import tensorflow as tf
from tensorflow.contrib import rnn
# Define data reader
def read_data(fname):
with open(fname) as f:
data = list(csv.reader(f))
d_mat = np.array(data)
d_trsp = np.transpose(d_mat)
date = np.transpose(d_trsp[0])
p_open = d_trsp[1]
vol = d_trsp[6]
chg = d_trsp[7]
chg = [float(i) for i in chg]
return vol, chg
vol, training_data = read_data('GOOGL.csv')
training_data = training_data[0:300]
print("Loading training data..")
#Split data for learning
ratio_train = 0.70
ratio_valid = 0.90-ratio_train
ratio_test = 0.10 #fixed at 10% of dataset
# Parameters
learning_rate = 0.005
training_iters = 100
display_step = 1
x_size = 30
y_size = 5
n_hidden = 256
# Variables
x = tf.placeholder("float", [265, x_size])
y = tf.placeholder("float", [265, y_size])
weights = {
'out': tf.Variable(tf.random_normal([n_hidden, y_size]))
}
biases = {
'out': tf.Variable(tf.random_normal([y_size]))
}
# Preprocess Data
def prod_data(data):
x = []
y = []
iter = len(data)-x_size-y_size
for i in range(0, iter):
x.append(data[i:i+x_size])
y.append(data[i+x_size+1: i+x_size+1+y_size])
return x, y
a,b = prod_data(training_data)
# Define RNN architecture
def RNN(x, weights, biases):
# Reshape x to [1, n_input]
x = tf.reshape(x, [-1, x_size])
x = tf.split(x, x_size, 1)
rnn_cell = rnn.MultiRNNCell([rnn.BasicLSTMCell(n_hidden), rnn.BasicLSTMCell(n_hidden)])
outputs, states = rnn.static_rnn(rnn_cell, x, dtype = tf.float32)
return tf.matmul(outputs[-1], weights['out'] + biases['out'])
pred = RNN(x, weights, biases)
# Loss and Optimizer
cost = tf.reduce_mean((pred-y)**2)
optimizer = tf.train.RMSPropOptimizer(learning_rate = learning_rate).minimize(cost)
# Initialization
init = tf.global_variables_initializer()
# Launch Tensor graph
with tf.Session() as sess:
sess.run(init)
step = 0
loss_total = 0
loss_coll = []
end_offset = len(training_data)-y_size-x_size-1
while step < training_iters:
_, loss, model_pred = sess.run([optimizer, cost, pred], \
feed_dict={x: a, y: b})
# Update total loss and accuracy
loss_total += loss
loss_coll.append(loss)
if (step+1) % display_step == 0:
print("Loss at step " + str(step) + " = " + str(loss))
loss_total = 0
step += 1
print("Optimization Finished!")

Splitting ndarray gives unexpected results (TensorFlow RNN tutorial)

I am following a tutorial on rnn's in TensorFlow but I have a question concerning the input formats.
They are taking raw_x (one hot vector) and basically first cutting that up in pieces of length 200 (batch_size) to form data_x. That is good.
Then they further cut up data_x in pieces of length 5 (num_step, or graph width) with:
for i in range(epoch_size):
x = data_x[:, i * num_steps:(i + 1) * num_steps]
y = data_y[:, i * num_steps:(i + 1) * num_steps]
yield (x, y)
However, if I look in the data, the slices of x do not match data_x. The first one does, but then they diverge.
Am I misunderstanding the above code? I would like to understand how x is being created or what it is supposed to look like.
I had expected the second item to be 0 1 0 1 0.
Also, I thought an epoch is when you go through the data completely, from this it seems that they split up the data in 1000 parts (epoch size)?
If it helps, this is my full code. I am trying to figure out what is going on in x. at line 48:
import numpy as np
import tensorflow as tf
# %matplotlib inline
import matplotlib.pyplot as plt
# Global config variables
num_steps = 5 # number of truncated backprop steps ('n' in the discussion above)
batch_size = 200
num_classes = 2
state_size = 4
learning_rate = 0.1
def gen_data(size=1000000):
print('generating data');
X = np.array(np.random.choice(2, size=(size,)))
Y = []
for i in range(size):
threshold = 0.5
if X[i-3] == 1:
threshold += 0.5
if X[i-8] == 1:
threshold -= 0.25
if np.random.rand() > threshold:
Y.append(0)
else:
Y.append(1)
return X, np.array(Y)
# adapted from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/rnn/ptb/reader.py
def gen_batch(raw_data, batch_size, num_steps):
print('generating batches');
raw_x, raw_y = raw_data
data_length = len(raw_x)
# partition raw data into batches and stack them vertically in a data matrix
batch_partition_length = data_length // batch_size
data_x = np.zeros([batch_size, batch_partition_length], dtype=np.int32)
data_y = np.zeros([batch_size, batch_partition_length], dtype=np.int32)
for i in range(batch_size):
data_x[i] = raw_x[batch_partition_length * i:batch_partition_length * (i + 1)]
data_y[i] = raw_y[batch_partition_length * i:batch_partition_length * (i + 1)]
# further divide batch partitions into num_steps for truncated backprop
epoch_size = batch_partition_length // num_steps
for i in range(epoch_size):
x = data_x[:, i * num_steps:(i + 1) * num_steps]
y = data_y[:, i * num_steps:(i + 1) * num_steps]
yield (x, y)
def gen_epochs(n, num_steps):
for i in range(n):
yield gen_batch(gen_data(), batch_size, num_steps)
"""
Placeholders
"""
x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')
init_state = tf.zeros([batch_size, state_size])
"""
RNN Inputs
"""
# Turn our x placeholder into a list of one-hot tensors:
# rnn_inputs is a list of num_steps tensors with shape [batch_size, num_classes]
x_one_hot = tf.one_hot(x, num_classes)
rnn_inputs = tf.unstack(x_one_hot, axis=1)
"""
Definition of rnn_cell
This is very similar to the __call__ method on Tensorflow's BasicRNNCell. See:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn_cell.py
"""
with tf.variable_scope('rnn_cell'):
W = tf.get_variable('W', [num_classes + state_size, state_size])
b = tf.get_variable('b', [state_size], initializer=tf.constant_initializer(0.0))
def rnn_cell(rnn_input, state):
with tf.variable_scope('rnn_cell', reuse=True):
W = tf.get_variable('W', [num_classes + state_size, state_size])
b = tf.get_variable('b', [state_size], initializer=tf.constant_initializer(0.0))
return tf.tanh(tf.matmul(tf.concat(axis=1, values=[rnn_input, state]), W) + b)
"""
Adding rnn_cells to graph
This is a simplified version of the "rnn" function from Tensorflow's api. See:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn.py
"""
state = init_state
rnn_outputs = []
for rnn_input in rnn_inputs:
state = rnn_cell(rnn_input, state)
rnn_outputs.append(state)
final_state = rnn_outputs[-1]
"""
Predictions, loss, training step
Losses and total_loss are simlar to the "sequence_loss_by_example" and "sequence_loss"
functions, respectively, from Tensorflow's api. See:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/seq2seq.py
"""
#logits and predictions
with tf.variable_scope('softmax'):
W = tf.get_variable('W', [state_size, num_classes])
b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))
logits = [tf.matmul(rnn_output, W) + b for rnn_output in rnn_outputs]
predictions = [tf.nn.softmax(logit) for logit in logits]
# Turn our y placeholder into a list labels
y_as_list = [tf.squeeze(i, axis=[1]) for i in tf.split(axis=1, num_or_size_splits=num_steps, value=y)]
#losses and train_step
losses = [tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit,labels=label) for \
logit, label in zip(logits, y_as_list)]
total_loss = tf.reduce_mean(losses)
train_step = tf.train.AdagradOptimizer(learning_rate).minimize(total_loss)
"""
Function to train the network
"""
def train_network(num_epochs, num_steps, state_size=4, verbose=True):
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
training_losses = []
for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps)):
training_loss = 0
training_state = np.zeros((batch_size, state_size))
if verbose:
print("\nEPOCH", idx)
for step, (X, Y) in enumerate(epoch):
tr_losses, training_loss_, training_state, _ = \
sess.run([losses,
total_loss,
final_state,
train_step],
feed_dict={x:X, y:Y, init_state:training_state})
training_loss += training_loss_
if step % 100 == 0 and step > 0:
if verbose:
print("Average loss at step", step,
"for last 250 steps:", training_loss/100)
training_losses.append(training_loss/100)
training_loss = 0
return training_losses
training_losses = train_network(1,num_steps)
plt.plot(training_losses)
Seems like the batches are actually transposed.
So the first elements of the x-matrix (200 x 5) will fit the first 5 elements of x_raw.
Then only in the next iteration, the next 5-10 elements of x_raw will be in the first elements (again) of x.

Categories

Resources