I am building several simple networks to predict the bike rentals at 500 stations in the upcoming hour, given rentals at all stations in the previous 24 hours. I am working with two architectures, one with a graph convolution (which amounts to updating each station with a learned linear combination of other stations, at each hour) and a FNN layer to prediction, and a second with a graph convolution -> LSTM -> FNN to prediction.
Before I describe more, I'm getting poorer performance for my model which includes an LSTM unit, which is confusing me.
See these two images for a description of each architecture, for each architecture I also add hourly meta-data (weather, time, etc) as variation, they are in the images in red, and not relevant to my question. Image links at the bottom of the post.
[Architecture 1: GCNN + FNN][1]
[Architecture 2: GCNN + LSTM + FNN][2]
Confusingly, the test RMSE for the first model is 3.46, for the second model its 3.57. Could someone please explain to me why the second wouldn't be lower, as it seems to be running the exact same processes, except with an additional LSTM unit.
Here are relevant snippets of my code for the GCNN+FNN model:
def gcnn_ddgf(hidden_layer, node_num, feature_in, horizon, learning_rate, beta, batch_size, early_stop_th, training_epochs, X_training, Y_training, X_val, Y_val, X_test, Y_test, scaler, display_step):
n_output_vec = node_num * horizon # length of output vector at the final layer
early_stop_k = 0 # early stop patience
best_val = 10000
traing_error = 0
test_error = 0
pred_Y = []
tf.reset_default_graph()
batch_size = batch_size
early_stop_th = early_stop_th
training_epochs = training_epochs
# tf Graph input and output
X = tf.placeholder(tf.float32, [None, node_num, feature_in]) # X is the input signal
Y = tf.placeholder(tf.float32, [None, n_output_vec]) # y is the regression output
# define dictionaries to store layers weight & bias
weights_hidden = {}
weights_A = {}
biases = {}
vec_length = feature_in
weights_hidden['h1'] = tf.Variable(tf.random_normal([vec_length, hidden_layer], stddev=0.5))
biases['b1'] = tf.Variable(tf.random_normal([1, hidden_layer], stddev=0.5))
weights_A['A1'] = tf.Variable(tf.random_normal([node_num, node_num], stddev=0.5))
weights_hidden['out'] = tf.Variable(tf.random_normal([hidden_layer, horizon], stddev=0.5))
biases['bout'] = tf.Variable(tf.random_normal([1, horizon], stddev=0.5))
# Construct model
pred= gcn(X, weights_hidden, weights_A, biases, node_num, horizon) #see below
pred = scaler.inverse_transform(pred)
Y_original = scaler.inverse_transform(Y)
cost = tf.sqrt(tf.reduce_mean(tf.pow(pred - Y_original, 2)))
#optimizer = tf.train.RMSPropOptimizer(learning_rate, decay).minimize(cost)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=beta).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
avg_cost_sq = 0.
num_train = X_training.shape[0]
total_batch = int(num_train/batch_size)
for i in range(total_batch):
_, c = sess.run([optimizer, cost], feed_dict={X: X_training[i*batch_size:(i+1)*batch_size,],
Y: Y_training[i*batch_size:(i+1)*batch_size,]})
avg_cost_sq += np.square(c) * batch_size #/ total_batch
# rest part of training dataset
if total_batch * batch_size != num_train:
_, c = sess.run([optimizer, cost], feed_dict={X: X_training[total_batch*batch_size:num_train,],
Y: Y_training[total_batch*batch_size:num_train,]})
avg_cost_sq += np.square(c) * (num_train - total_batch*batch_size)
avg_cost = np.sqrt(avg_cost_sq / num_train)
# validation
c_val, = sess.run([cost], feed_dict={X: X_val, Y: Y_val})
if c_val < best_val:
# testing
c_tes, preds, Y_true = sess.run([cost, pred, Y_original], feed_dict={X: X_test,Y: Y_test})
best_val = c_val
test_error = c_tes
traing_error = avg_cost
pred_Y = preds
early_stop_k = 0 # reset to 0
# update early stopping patience
if c_val >= best_val:
early_stop_k += 1
# threshold
if early_stop_k == early_stop_th:
break
if epoch % display_step == 0:
print ("Epoch:", '%04d' % (epoch+1), "Training RMSE: ","{:.9f}".format(avg_cost))
print("Validation RMSE: ", c_val)
print("Lowest test RMSE: ", test_error)
print("epoch is ", epoch)
print("training RMSE is ", traing_error)
print("Optimization Finished! the lowest validation RMSE is ", best_val)
print("The test RMSE is ", test_error)
return best_val, pred_Y ,Y_true,test_error
# code that creates the model
def gcn(signal_in, weights_hidden, weights_A, biases, node_num, horizon):
signal_in = tf.transpose(signal_in, [1, 0, 2]) # node_num, batch, feature_in
feature_len = signal_in.shape[2] # feature vector length at the node of the input graph
signal_in = tf.reshape(signal_in, [node_num, -1]) # node_num, batch*feature_in
Adj = 0.5*(weights_A['A1'] + tf.transpose(weights_A['A1']))
Adj = normalize_adj(Adj)
Z = tf.matmul(Adj, signal_in) # node_num, batch*feature_in
Z = tf.reshape(Z, [-1, int(feature_len)]) # node_num * batch, feature_in
signal_output = tf.add(tf.matmul(Z, weights_hidden['h1']), biases['b1'])
signal_output = tf.nn.relu(signal_output) # node_num * batch, hidden_vec
final_output = tf.add(tf.matmul(signal_output, weights_hidden['out']), biases['bout']) # node_num * batch, horizon
# final_output = tf.nn.relu(final_output)
final_output = tf.reshape(final_output, [node_num, -1, horizon]) # node_num, batch, horizon
final_output = tf.transpose(final_output, [1, 0, 2]) # batch, node_num, horizon
final_output = tf.reshape(final_output, [-1, node_num*horizon]) # batch, node_num*horizon
return final_output
And the code for the GCNN+LSTM+FNN model:
def gcnn_ddgf_lstm(node_num, feature_in, learning_rate, beta, batch_size, early_stop_th, training_epochs, X_training,
Y_training, X_val, Y_val, X_test, Y_test, scaler, lstm_layer):
n_output_vec = node_num # length of output vector at the final layer
early_stop_k = 0 # early stop patience
display_step = 1 # frequency of printing results
best_val = 10000
traing_error = 0
test_error = 0
predic_res = []
tf.reset_default_graph()
batch_size = batch_size
early_stop_th = early_stop_th
training_epochs = training_epochs
# tf Graph input and output
X = tf.placeholder(tf.float32, [None, node_num, feature_in]) # X is the input signal
Y = tf.placeholder(tf.float32, [None, n_output_vec]) # y is the regression output
lstm_cell = tf.nn.rnn_cell.LSTMCell(lstm_layer, state_is_tuple=True)
# define dictionaries to store layers weight & bias
weights_hidden = {}
weights_A = {}
biases = {}
weights_A['A1'] = tf.Variable(tf.random_normal([node_num, node_num], stddev=0.5))
weights_hidden['h1'] = tf.Variable(tf.random_normal([lstm_layer, node_num], stddev=0.5))
biases['h1'] = tf.Variable(tf.random_normal([1, node_num], stddev=0.5))
weights_hidden['out'] = tf.Variable(tf.random_normal([node_num, node_num], stddev=0.5))
biases['bout'] = tf.Variable(tf.random_normal([1, node_num], stddev=0.5))
# Construct model
pred= gcn_lstm(X, weights_hidden, weights_A, biases, node_num, lstm_cell)
# pred = scaler.inverse_transform(pred)
# Y_original = scaler.inverse_transform(Y)
cost = tf.sqrt(tf.reduce_mean(tf.pow(pred - Y, 2)))
#optimizer = tf.train.RMSPropOptimizer(learning_rate, decay).minimize(cost)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=beta).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
avg_cost_sq = 0.
num_train = X_training.shape[0]
total_batch = int(num_train/batch_size)
for i in range(total_batch):
_, c = sess.run([optimizer, cost], feed_dict={X: X_training[i*batch_size:(i+1)*batch_size,],
Y: Y_training[i*batch_size:(i+1)*batch_size,]})
avg_cost_sq += np.square(c) * batch_size #/ total_batch
# rest part of training dataset
if total_batch * batch_size != num_train:
_, c = sess.run([optimizer, cost], feed_dict={X: X_training[total_batch*batch_size:num_train,],
Y: Y_training[total_batch*batch_size:num_train,]})
avg_cost_sq += np.square(c) * (num_train - total_batch*batch_size)
avg_cost = np.sqrt(avg_cost_sq / num_train)
# validation
c_val, = sess.run([cost], feed_dict={X: X_val, Y: Y_val})
if c_val < best_val:
c_tes, preds = sess.run([cost, pred], feed_dict={X: X_test,Y: Y_test})
best_val = c_val
# save model
#saver.save(sess, './bikesharing_gcnn_ddgf')
test_error = c_tes
traing_error = avg_cost
early_stop_k = 0 # reset to 0
# update early stopping patience
if c_val >= best_val:
early_stop_k += 1
# threshold
if early_stop_k == early_stop_th:
pred_Y = scaler.inverse_transform(preds)
Y_true = scaler.inverse_transform(Y_test)
test_err = tf.sqrt(tf.reduce_mean(tf.pow(pred_Y - Y_true, 2)))
break
if epoch % display_step == 0:
print ("Epoch:", '%04d' % (epoch+1), "Training RMSE: ","{:.9f}".format(avg_cost))
print("Validation RMSE: ", c_val)
print("Lowest test RMSE: ", test_error)
print("epoch is ", epoch)
print("training RMSE is ", traing_error)
print("Optimization Finished! the lowest validation RMSE is ", best_val)
print("The scaled test RMSE is ", test_error)
return pred_Y, Y_true
def gcn_lstm(signal_in, weights_hidden, weights_A, biases, node_num, lstm_cell):
signal_in = tf.transpose(signal_in, [1, 0, 2]) # node_num, batch, feature_in
feature_len = signal_in.shape[2] # feature vector length at the node of the input graph
signal_in = tf.reshape(signal_in, [node_num, -1]) # node_num, batch*feature_in
Adj = 0.5*(weights_A['A1'] + tf.transpose(weights_A['A1']))
Adj = normalize_adj(Adj)
Z = tf.matmul(Adj, signal_in) # node_num, batch*feature_in
Z = tf.reshape(Z, [node_num, -1, int(feature_len)]) # node_num, batch, feature_in
Z = tf.transpose(Z,[1,2,0]) # batch, feature_in, node_num
# init_state = cell.zero_state(batch_size, tf.float32)
_, Z = tf.nn.dynamic_rnn(lstm_cell, Z, dtype = tf.float32) # init_state?
dense_output = tf.add(tf.matmul(Z[1], weights_hidden['h1']), biases['h1'])
dense_output = tf.nn.relu(dense_output)
final_output = tf.add(tf.matmul(dense_output, weights_hidden['out']), biases['bout']) # batch, node_num*horizon
return final_output
In particular, should I be weary that _, Z = tf.nn.dynamic_rnn(lstm_cell, Z, dtype = tf.float32) causes my variables defined elsewhere not to train?
Thanks a lot for any help :)
[1]: https://i.stack.imgur.com/MAO2t.png
[2]: https://i.stack.imgur.com/UDjHw.png
I resolved this.
I have three years of bike use data to make the prediction, and was using the ~last three months as my validation/test set. The last few months were winter with lower bike use. I got expected results (GCNN+LSTM outperforms GCNN, though not by much) when I shuffled my training data prior to allocating to sets (with sequences preserved for LSTM)
I'm working on implementaion of word2vec architecture from scratch. But my model doesn't converge.
class SkipGramBatcher:
def __init__(self, text):
self.text = text.results
def get_batches(self, batch_size):
n_batches = len(self.text)//batch_size
pairs = []
for idx in range(0, len(self.text)):
window_size = 5
idx_neighbors = self._get_neighbors(self.text, idx, window_size)
#one_hot_idx = self._to_one_hot(idx)
#idx_pairs = [(one_hot_idx, self._to_one_hot(idx_neighbor)) for idx_neighbor in idx_neighbors]
idx_pairs = [(idx,idx_neighbor) for idx_neighbor in idx_neighbors]
pairs.extend(idx_pairs)
for idx in range(0, len(pairs), batch_size):
X = [pair[0] for pair in pairs[idx:idx+batch_size]]
Y = [pair[1] for pair in pairs[idx:idx+batch_size]]
yield X,Y
def _get_neighbors(self, text, idx, window_size):
text_length = len(text)
start = max(idx-window_size,0)
end = min(idx+window_size+1,text_length)
neighbors_words = set(text[start:end])
return list(neighbors_words)
def _to_one_hot(self, indexes):
n_values = np.max(indexes) + 1
return np.eye(n_values)[indexes]
I use text8 corpus and have applied preprocessing techniques such as stemming, lemmatization and subsampling. Also I've excluded English stop words and limited vocabulary
vocab_size = 20000
text_len = len(text)
test_text_len = int(text_len*0.15)
preprocessed_text = PreprocessedText(text,vocab_size)
I use tensorflow for graph computation
train_graph = tf.Graph()
with train_graph.as_default():
inputs = tf.placeholder(tf.int32, [None], name='inputs')
labels = tf.placeholder(tf.int32, [None, None], name='labels')
n_embedding = 300
with train_graph.as_default():
embedding = tf.Variable(tf.random_uniform((vocab_size, n_embedding), -1, 1))
embed = tf.nn.embedding_lookup(embedding, inputs)
And apply negative sampling
# Number of negative labels to sample
n_sampled = 100
with train_graph.as_default():
softmax_w = tf.Variable(tf.truncated_normal((vocab_size, n_embedding))) # create softmax weight matrix here
softmax_b = tf.Variable(tf.zeros(vocab_size), name="softmax_bias") # create softmax biases here
# Calculate the loss using negative sampling
loss = tf.nn.sampled_softmax_loss(
weights=softmax_w,
biases=softmax_b,
labels=labels,
inputs=embed,
num_sampled=n_sampled,
num_classes=vocab_size)
cost = tf.reduce_mean(loss)
optimizer = tf.train.AdamOptimizer().minimize(cost)
Finally I train my model
epochs = 10
batch_size = 64
avg_loss = []
with train_graph.as_default():
saver = tf.train.Saver()
with tf.Session(graph=train_graph) as sess:
iteration = 1
loss = 0
sess.run(tf.global_variables_initializer())
for e in range(1, epochs+1):
batches = skip_gram_batcher.get_batches(batch_size)
start = time.time()
for batch_x,batch_y in batches:
feed = {inputs: batch_x,
labels: np.array(batch_y)[:, None]}
train_loss, _ = sess.run([cost, optimizer], feed_dict=feed)
loss += train_loss
if iteration % 100 == 0:
end = time.time()
print("Epoch {}/{}".format(e, epochs),
"Iteration: {}".format(iteration),
"Avg. Batch loss: {:.4f}".format(loss/iteration),
"{:.4f} sec/batch".format((end-start)/100))
#loss = 0
avg_loss.append(loss/iteration)
start = time.time()
iteration += 1
save_path = saver.save(sess, "checkpoints/text8.ckpt")
But after running this model my average batch loss doesn't decrease dramatically
I guess I should have made a mistake somewhere. Any help is apprciated
What makes you say "my average batch loss doesn't decrease dramatically"? The graph you've attached shows some (unlabeled) value decreasing significantly, and still decreasing at a strong slope towards the end of data.
"Convergence" would show up as the improvement-in-loss first slowing, then stopping.
But if your loss is still noticeably dropping, just keep training! Using more epochs can be especially important on small datasets – like the tiny text8 you're using.
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
df = pd.read_csv("FuelConsumption.csv")
df = df[['ENGINE SIZE','CYLINDERS', 'Mcity', 'Mhwy', 'Mcmb', 'McmbMPG', 'CO2']]
features = np.asanyarray(df[['ENGINE SIZE','CYLINDERS', 'Mcity', 'Mhwy', 'Mcmb', 'McmbMPG']])
label = np.asanyarray(df[['CO2']])
mu = np.mean(features,axis=0)
sigma = np.std(features,axis=0)
feature_normalized = (features - mu)/sigma
n_training_samples = feature_normalized.shape[0]
n_dim = feature_normalized.shape[1]
feature_reshaped = np.reshape(features,[n_training_samples,n_dim])
label_reshaped = np.reshape(label,[n_training_samples,1])
train_X, test_X, train_Y, test_Y = train_test_split\
(feature_reshaped, label_reshaped,shuffle = True , test_size=0.25, random_state=42)
print("shape of training input = ", train_X.shape)
print("shape of training output = ", train_Y.shape)
numFeatures = train_X.shape[1]
print("Number of features = ", numFeatures)
numLabels = train_Y.shape[1]
print("Number of labels = ", numLabels)
learning_rate = 0.01
training_epochs = 1000
X = tf.placeholder(tf.float32,[None,numFeatures])
Y = tf.placeholder(tf.float32,[None,numLabels])
W = tf.Variable(tf.ones([numFeatures,numLabels]))
B = tf.Variable(tf.ones([1,numLabels]))
init = tf.global_variables_initializer()
Y_model = tf.add(tf.matmul(X, W, name="apply_weights"), B, name="add_bias")
cost = tf.reduce_mean(tf.square(Y_model - Y))
training_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
sess = tf.Session()
sess.run(init)
loss_values = []
train_data = []
for epoch in range(training_epochs):
_, loss_val, a_val, b_val = sess.run([training_step, cost, W, B],feed_dict={X: train_X,Y: train_Y})
loss_values.append(loss_val)
if epoch % 20 == 0:
print(epoch, loss_val, a_val, b_val)
train_data.append([a_val, b_val])
plt.plot(loss_values, '--')
plt.show()
I am trying to predict CO2 emission using multiple variables like Cyliners, Milage, engine size etc using linear regression. I used above code in tensorflow. When I try to run the model loss value, weight and biases are getting updated only for 20 iterations and after that they are infinity(Nan). The problem here is with the code or selection of cost function/Optimizer?
I have plotted loss values which looks like .
If only Engine size and cylinders are used as features result is good. Any other features(Milage in city, Milage in highway, Milage combined) results in the above mentioned problem. I am attching the scttered plot of Mcity, Mhwy. Is it a problem with data itself?Please take a look at scattered plot of milage in city and hwy.
I am currently facing an issue with the next_batch function. When I try to implement it with the code I have below, I get this error.
AttributeError: 'str' object has no attribute 'nextBatch'
I am suspecting that the issue is stemming from the next_batch function. I have tried to create my own implementation of the function from the tensorflow example. I have based my code for this Logistic Regression model on the example below.
https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/2_BasicModels/logistic_regression.py
I have based the code for the next_batch function on a previous StackOverflow post.
how to implement tensorflow's next_batch for own data
Thanks for the help in advance.
from __future__ import print_function
import tensorflow as tf
import os as os
from LogRegModel import csvToTrainTF, csvtoTestTF
dir_path = os.path.dirname(os.path.realpath(__file__))
filename1 = dir_path + "/TrainData2.csv"
filename2 = dir_path + "/TestData2.csv"
filenameTrain = 'TrainData2.tfrecords'
filenameTest = 'TestData2.tfrecords'
trainData = csvToTrainTF(filename1, filenameTrain)
testData = csvtoTestTF(filename2, filenameTest)
learning_rate = 0.01
training_epochs = 20
batch_size = 32
display_step = 1
x = tf.placeholder(tf.float32, [None, 9])
y = tf.placeholder(tf.float32, [None, 2])
W = tf.Variable(tf.zeros([9,2]))
b = tf.Variable(tf.zeros([2]))
pred = tf.nn.softmax(tf.matmul(x,W) + b)
cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices = 1))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
start = 0
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(992/batch_size)
for i in range(total_batch):
batch_xs, batch_ys = trainData.nextBatch(batch_size) # Error is occurring in the train.next_batch call? Could you please tell me how to write out the function?
_, c = sess.run([optimizer, cost], feed_dict={x: batch_xs, y: batch_ys})
avg_cost += c/total_batch
if (epoch+1) % display_step == 0:
print("Epoch:", '%40d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))
print("Optimization Finished")
# Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
#I am not sure if this is the correct way to print out the accuracy. Could you please check this?
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("Accuracy:", accuracy.eval({x: trainData[0:9], y: trainData[10]}))
Above is the code for the main function
Below is code for next_batch
def next_batch(self,batch_size,shuffle = True):
start = self._index_in_epoch
if start == 0 and self._epochs_completed == 0:
idx = np.arange(0, self._num_examples) # get all possible indexes
np.random.shuffle(idx) # shuffle indexe
self._data = self.data[idx] # get list of `num` random samples
# go to the next batch
if start + batch_size > self._num_examples:
self._epochs_completed += 1
rest_num_examples = self._num_examples - start
data_rest_part = self.data[start:self._num_examples]
idx0 = np.arange(0, self._num_examples) # get all possible indexes
np.random.shuffle(idx0) # shuffle indexes
self._data = self.data[idx0] # get list of `num` random samples
start = 0
self._index_in_epoch = batch_size - rest_num_examples #avoid the case where the #sample != integar times of batch_size
end = self._index_in_epoch
data_new_part = self._data[start:end]
return np.concatenate((data_rest_part, data_new_part), axis=0)
else:
self._index_in_epoch += batch_size
end = self._index_in_epoch
return self._data[start:end]
This question already has answers here:
Save Tensorflow graph for viewing in Tensorboard without summary operations
(5 answers)
Closed 5 years ago.
I am trying to use tensorboard to analyse a graph in tensorflow with summaryWriter. However, TensorFlow is not outputting a 'graph' folder with information. Perhaps I am missing a command or it is not in the right place?
writer = tf.train.SummaryWriter(logs_path, graph=tf.get_default_graph());
Is what I used. I think this may not work for TensorFlow 1.0 anymore (just the summarywriter command)
import numpy as np
import tensorflow as tf
# %matplotlib inline
import matplotlib.pyplot as plt
# Global config variables
num_steps = 5 # number of truncated backprop steps ('n' in the discussion above)
batch_size = 200
num_classes = 2
state_size = 4
learning_rate = 0.1
logs_path = "./graph"
def gen_data(size=1000000):
X = np.array(np.random.choice(2, size=(size,)))
Y = []
for i in range(size):
threshold = 0.5
if X[i-3] == 1:
threshold += 0.5
if X[i-8] == 1:
threshold -= 0.25
if np.random.rand() > threshold:
Y.append(0)
else:
Y.append(1)
return X, np.array(Y)
# adapted from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/rnn/ptb/reader.py
def gen_batch(raw_data, batch_size, num_steps):
raw_x, raw_y = raw_data
data_length = len(raw_x)
# partition raw data into batches and stack them vertically in a data matrix
batch_partition_length = data_length // batch_size
data_x = np.zeros([batch_size, batch_partition_length], dtype=np.int32)
data_y = np.zeros([batch_size, batch_partition_length], dtype=np.int32)
for i in range(batch_size):
data_x[i] = raw_x[batch_partition_length * i:batch_partition_length * (i + 1)]
data_y[i] = raw_y[batch_partition_length * i:batch_partition_length * (i + 1)]
# further divide batch partitions into num_steps for truncated backprop
epoch_size = batch_partition_length // num_steps
for i in range(epoch_size):
x = data_x[:, i * num_steps:(i + 1) * num_steps]
y = data_y[:, i * num_steps:(i + 1) * num_steps]
yield (x, y)
def gen_epochs(n, num_steps):
for i in range(n):
yield gen_batch(gen_data(), batch_size, num_steps)
"""
Placeholders
"""
x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')
init_state = tf.zeros([batch_size, state_size])
"""
Inputs
"""
x_one_hot = tf.one_hot(x, num_classes)
rnn_inputs = tf.unstack(x_one_hot, axis=1)
"""
RNN
"""
cell = tf.contrib.rnn.BasicRNNCell(state_size)
rnn_outputs, final_state = tf.contrib.rnn.static_rnn(cell, rnn_inputs, initial_state=init_state)
"""
Predictions, loss, training step
"""
with tf.variable_scope('softmax'):
W = tf.get_variable('W', [state_size, num_classes])
b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))
logits = [tf.matmul(rnn_output, W) + b for rnn_output in rnn_outputs]
predictions = [tf.nn.softmax(logit) for logit in logits]
y_as_list = [tf.squeeze(i, axis=[1]) for i in tf.split(axis=1, num_or_size_splits=num_steps, value=y)]
loss_weights = [tf.ones([batch_size]) for i in range(num_steps)]
losses = tf.contrib.legacy_seq2seq.sequence_loss_by_example(logits, y_as_list, loss_weights)
tf.scalar_summary("losses", losses)
total_loss = tf.reduce_mean(losses)
train_step = tf.train.AdagradOptimizer(learning_rate).minimize(total_loss)
# Not sure why this is not outputting a graph for tensorboard
writer = tf.train.SummaryWriter(logs_path, graph=tf.get_default_graph());
"""
Function to train the network
"""
def train_network(num_epochs, num_steps, state_size=4, verbose=True):
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
training_losses = []
saved = gen_epochs(num_epochs, num_steps);
for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps)):
training_loss = 0
training_state = np.zeros((batch_size, state_size))
if verbose:
print("\nEPOCH", idx)
for step, (X, Y) in enumerate(epoch):
tr_losses, training_loss_, training_state, _ = \
sess.run([losses,
total_loss,
final_state,
train_step],
feed_dict={x:X, y:Y, init_state:training_state})
training_loss += training_loss_
if step % 100 == 0 and step > 0:
if verbose:
print("Average loss at step", step,
"for last 250 steps:", training_loss/100)
training_losses.append(training_loss/100)
training_loss = 0
return training_losses
training_losses = train_network(1,num_steps)
plt.plot(training_losses)
# tensorboard --logdir="my_graph"
This worked for me:
writer = tf.summary.FileWriter(logdir='logdir', graph=tf.get_default_graph())
writer.flush()