Splitting ndarray gives unexpected results (TensorFlow RNN tutorial) - python

I am following a tutorial on rnn's in TensorFlow but I have a question concerning the input formats.
They are taking raw_x (one hot vector) and basically first cutting that up in pieces of length 200 (batch_size) to form data_x. That is good.
Then they further cut up data_x in pieces of length 5 (num_step, or graph width) with:
for i in range(epoch_size):
x = data_x[:, i * num_steps:(i + 1) * num_steps]
y = data_y[:, i * num_steps:(i + 1) * num_steps]
yield (x, y)
However, if I look in the data, the slices of x do not match data_x. The first one does, but then they diverge.
Am I misunderstanding the above code? I would like to understand how x is being created or what it is supposed to look like.
I had expected the second item to be 0 1 0 1 0.
Also, I thought an epoch is when you go through the data completely, from this it seems that they split up the data in 1000 parts (epoch size)?
If it helps, this is my full code. I am trying to figure out what is going on in x. at line 48:
import numpy as np
import tensorflow as tf
# %matplotlib inline
import matplotlib.pyplot as plt
# Global config variables
num_steps = 5 # number of truncated backprop steps ('n' in the discussion above)
batch_size = 200
num_classes = 2
state_size = 4
learning_rate = 0.1
def gen_data(size=1000000):
print('generating data');
X = np.array(np.random.choice(2, size=(size,)))
Y = []
for i in range(size):
threshold = 0.5
if X[i-3] == 1:
threshold += 0.5
if X[i-8] == 1:
threshold -= 0.25
if np.random.rand() > threshold:
Y.append(0)
else:
Y.append(1)
return X, np.array(Y)
# adapted from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/rnn/ptb/reader.py
def gen_batch(raw_data, batch_size, num_steps):
print('generating batches');
raw_x, raw_y = raw_data
data_length = len(raw_x)
# partition raw data into batches and stack them vertically in a data matrix
batch_partition_length = data_length // batch_size
data_x = np.zeros([batch_size, batch_partition_length], dtype=np.int32)
data_y = np.zeros([batch_size, batch_partition_length], dtype=np.int32)
for i in range(batch_size):
data_x[i] = raw_x[batch_partition_length * i:batch_partition_length * (i + 1)]
data_y[i] = raw_y[batch_partition_length * i:batch_partition_length * (i + 1)]
# further divide batch partitions into num_steps for truncated backprop
epoch_size = batch_partition_length // num_steps
for i in range(epoch_size):
x = data_x[:, i * num_steps:(i + 1) * num_steps]
y = data_y[:, i * num_steps:(i + 1) * num_steps]
yield (x, y)
def gen_epochs(n, num_steps):
for i in range(n):
yield gen_batch(gen_data(), batch_size, num_steps)
"""
Placeholders
"""
x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')
init_state = tf.zeros([batch_size, state_size])
"""
RNN Inputs
"""
# Turn our x placeholder into a list of one-hot tensors:
# rnn_inputs is a list of num_steps tensors with shape [batch_size, num_classes]
x_one_hot = tf.one_hot(x, num_classes)
rnn_inputs = tf.unstack(x_one_hot, axis=1)
"""
Definition of rnn_cell
This is very similar to the __call__ method on Tensorflow's BasicRNNCell. See:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn_cell.py
"""
with tf.variable_scope('rnn_cell'):
W = tf.get_variable('W', [num_classes + state_size, state_size])
b = tf.get_variable('b', [state_size], initializer=tf.constant_initializer(0.0))
def rnn_cell(rnn_input, state):
with tf.variable_scope('rnn_cell', reuse=True):
W = tf.get_variable('W', [num_classes + state_size, state_size])
b = tf.get_variable('b', [state_size], initializer=tf.constant_initializer(0.0))
return tf.tanh(tf.matmul(tf.concat(axis=1, values=[rnn_input, state]), W) + b)
"""
Adding rnn_cells to graph
This is a simplified version of the "rnn" function from Tensorflow's api. See:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn.py
"""
state = init_state
rnn_outputs = []
for rnn_input in rnn_inputs:
state = rnn_cell(rnn_input, state)
rnn_outputs.append(state)
final_state = rnn_outputs[-1]
"""
Predictions, loss, training step
Losses and total_loss are simlar to the "sequence_loss_by_example" and "sequence_loss"
functions, respectively, from Tensorflow's api. See:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/seq2seq.py
"""
#logits and predictions
with tf.variable_scope('softmax'):
W = tf.get_variable('W', [state_size, num_classes])
b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))
logits = [tf.matmul(rnn_output, W) + b for rnn_output in rnn_outputs]
predictions = [tf.nn.softmax(logit) for logit in logits]
# Turn our y placeholder into a list labels
y_as_list = [tf.squeeze(i, axis=[1]) for i in tf.split(axis=1, num_or_size_splits=num_steps, value=y)]
#losses and train_step
losses = [tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit,labels=label) for \
logit, label in zip(logits, y_as_list)]
total_loss = tf.reduce_mean(losses)
train_step = tf.train.AdagradOptimizer(learning_rate).minimize(total_loss)
"""
Function to train the network
"""
def train_network(num_epochs, num_steps, state_size=4, verbose=True):
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
training_losses = []
for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps)):
training_loss = 0
training_state = np.zeros((batch_size, state_size))
if verbose:
print("\nEPOCH", idx)
for step, (X, Y) in enumerate(epoch):
tr_losses, training_loss_, training_state, _ = \
sess.run([losses,
total_loss,
final_state,
train_step],
feed_dict={x:X, y:Y, init_state:training_state})
training_loss += training_loss_
if step % 100 == 0 and step > 0:
if verbose:
print("Average loss at step", step,
"for last 250 steps:", training_loss/100)
training_losses.append(training_loss/100)
training_loss = 0
return training_losses
training_losses = train_network(1,num_steps)
plt.plot(training_losses)

Seems like the batches are actually transposed.
So the first elements of the x-matrix (200 x 5) will fit the first 5 elements of x_raw.
Then only in the next iteration, the next 5-10 elements of x_raw will be in the first elements (again) of x.

Related

Denoising linear autoencoder learns to output a constant instead of denoising

I am trying to create a denoising autoencoder for 1d cyclic signals like cos(x) etc.
The process of creating the dataset is that I pass a list of cyclic functions and for each example generated it rolls random coefficients for each function in the list so every function generated is different yet cyclic. eg - 0.856cos(x) - 1.3cos(0.1x)
Then I add noise and normalize the signal to be between [0, 1).
Next, I train my autoencoder on it but it learns to output a constant (usually 0.5). my guess is that it happens because 0.5 is the usual mean value of the normalized functions. But this is not the result im aspiring to get at all.
I am providing the code I wrote for the autoencoder, the data generator and the training loop as well as two pictures depicting the problem im having.
first example:
second example:
Linear autoencoder:
class LinAutoencoder(nn.Module):
def __init__(self, in_channels, K, B, z_dim, out_channels):
super(LinAutoencoder, self).__init__()
self.in_channels = in_channels
self.K = K # number of samples per 2pi interval
self.B = B # how many intervals
self.out_channels = out_channels
encoder_layers = []
decoder_layers = []
encoder_layers += [
nn.Linear(in_channels * K * B, 2*z_dim, bias=True),
nn.ReLU(),
nn.Linear(2*z_dim, z_dim, bias=True),
nn.ReLU(),
nn.Linear(z_dim, z_dim, bias=True),
nn.ReLU()
]
decoder_layers += [
nn.Linear(z_dim, z_dim, bias=True),
nn.ReLU(),
nn.Linear(z_dim, 2*z_dim, bias=True),
nn.ReLU(),
nn.Linear(2*z_dim, out_channels * K * B, bias=True),
nn.Tanh()
]
self.encoder = nn.Sequential(*encoder_layers)
self.decoder = nn.Sequential(*decoder_layers)
def forward(self, x):
batch_size = x.shape[0]
x_flat = torch.flatten(x, start_dim=1)
enc = self.encoder(x_flat)
dec = self.decoder(enc)
res = dec.view((batch_size, self.out_channels, self.K * self.B))
return res
The data generator:
def lincomb_generate_data(batch_size, intervals, sample_length, functions, noise_type="gaussian", **kwargs)->torch.tensor:
channels = 1
mul_term = 2 * np.pi / sample_length
positions = np.arange(0, sample_length * intervals)
x_axis = positions * mul_term
X = np.tile(x_axis, (channels, 1))
y = X
Y = np.repeat(y[np.newaxis, :], batch_size, axis=0)
if noise_type == "gaussian":
# defaults to 0, 0.4
noise_mean = kwargs.get("noise_mean", 0)
noise_std = kwargs.get("noise_std", 0.4)
noise = np.random.normal(noise_mean, noise_std, Y.shape)
if noise_type == "uniform":
# defaults to 0, 1
noise_low = kwargs.get("noise_low", 0)
noise_high = kwargs.get("noise_high", 1)
noise = np.random.uniform(noise_low, noise_high, Y.shape)
coef_lo = -2
coef_hi = 2
coef_mat = np.random.uniform(coef_lo, coef_hi, (batch_size, len(functions))) # creating a matrix of coefficients
coef_mat = np.where(np.abs(coef_mat) < 10**-1, 0, coef_mat)
for i in range(batch_size):
curr_res = np.zeros((channels, sample_length * intervals))
for func_id, function in enumerate(functions):
curr_func = functions[func_id]
curr_coef = coef_mat[i][func_id]
curr_res += curr_coef * curr_func(Y[i, :, :])
Y[i, :, :] = curr_res
clean = Y
noisy = clean + noise
# Normalizing
clean -= clean.min(axis=2, keepdims=2)
clean /= clean.max(axis=2, keepdims=2) + 1e-5 #avoiding zero division
noisy -= noisy.min(axis=2, keepdims=2)
noisy /= noisy.max(axis=2, keepdims=2) + 1e-5 #avoiding zero division
clean = torch.from_numpy(clean)
noisy = torch.from_numpy(noisy)
return x_axis, clean, noisy
Training loop:
functions = [lambda x: np.cos(0.1*x),
lambda x: np.cos(x),
lambda x: np.cos(3*x)]
num_epochs = 200
lin_loss_list = []
criterion = torch.nn.MSELoss()
lin_optimizer = torch.optim.SGD(lin_model.parameters(), lr=0.01, momentum=0.9)
_, val_clean, val_noisy = util.lincomb_generate_data(batch_size, B, K, functions, noise_type="gaussian")
print("STARTED TRAINING")
for epoch in range(num_epochs):
# generate data returns the x-axis used for plotting as well as the clean and noisy data
_, t_clean, t_noisy = util.lincomb_generate_data(batch_size, B, K, functions, noise_type="gaussian")
# ===================forward=====================
lin_output = lin_model(t_noisy.float())
lin_loss = criterion(lin_output.float(), t_clean.float())
lin_loss_list.append(lin_loss.data)
# ===================backward====================
lin_optimizer.zero_grad()
lin_loss.backward()
lin_optimizer.step()
val_lin_loss = F.mse_loss(lin_model(val_noisy.float()), val_clean.float())
print("DONE TRAINING")
edit: shared the parameters requested
L = 1
K = 512
B = 2
batch_size = 64
z_dim = 64
noise_mean = 0
noise_std = 0.4
The problem was I didnt use nn.BatchNorm1d in my model so i guess something wrong happened during training (probably vanishing gradients).

Problem with dimension of placeholder in tensorflow

I have a problem with a tensorflow NN. I got an error "You must feed a value for placeholder tensor 'palceholders_5/Placeholder' with dtype float and shape [?,1]"
I have tried different shapes of the placeholder as well as the shape of the numpy array in feed dictionary. The code is given below.
import numpy as np
import tensorflow as tf
###################################
# data
###################################
N = 100
w_true = 5
b_true = 2
noise_scale = 0.1
x_np = np.random.rand(N,1)
noise = np.random.normal(scale=noise_scale, size = (N,1))
y_np = np.reshape(w_true*x_np+b_true+noise,(-1))
###########################
# Model
###########################
n_hidden = 10
with tf.name_scope("palceholders"):
x = tf.placeholder(tf.float32, (None,1))
y = tf.placeholder(tf.float32, (None,))
keep_prob = tf.placeholder(tf.float32)
with tf.name_scope("hidden-layer"):
W = tf.Variable(tf.random_normal((1, n_hidden)))
b = tf.Variable(tf.random_normal((n_hidden,)))
x_hidden = tf.nn.relu(tf.matmul(x,W) + b)
#add droput
x_hidden = tf.nn.dropout(x_hidden, keep_prob)
with tf.name_scope("output"):
W = tf.Variable(tf.random_normal((n_hidden,1)))
b = tf.Variable(tf.random_normal((1,)))
y_logit = tf.matmul(x_hidden, W) + b
y_one_prob = tf.sigmoid(y_logit)
y_pred = tf.round(y_one_prob)
with tf.name_scope("loss"):
y_expand = tf.expand_dims(y,1)
entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_logit, labels=y_expand)
l = tf.reduce_sum(entropy)
with tf.name_scope("optim"):
train_op = tf.train.AdamOptimizer(.001).minimize(l)
with tf.name_scope("summaries"):
tf.summary.scalar("loss",l)
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter("tmp/full-train", tf.get_default_graph())
#########################
# train model
#########################
step = 0
n_epochs = 15
batch_size = 10
dropout_prob = 0.5
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(n_epochs):
pos = 0
while pos < N:
batch_x = x_np[pos:pos+batch_size]
batch_x = batch_x.reshape((-1,1))
print(batch_x.shape)
batch_y = y_np[pos:pos+batch_size]
feed_dict = {x:batch_x, y:batch_y, keep_prob:dropout_prob}
_, summary, loss = sess.run([train_op, merged, l], feed_dict=feed_dict)
print("epoch %d, step %d, loss %f" % (epoch, step, loss))
train_writer.add_summary(summary, step)
step += 1
pos += batch_size
I get an error:
"InvalidArgumentError: You must feed a value for placeholder tensor 'palceholders_5/Placeholder' with dtype float and shape [?,1]"

numpy squeeze side effects

I've trained a simple machine learning model, a polynomial regression. The pseudocode of prediction function is as follows:
def f(x):
"""
x is a np.ndarray of shape (m, )
"""
# X is stacked of x ** 0, x ** 1, x ** 2, ..., x ** (n - 1) by rows
# X is of shape of (m, n)
# m is the number of training examples
X = generate(x)
Y = np.dot(X, W)
return Y
W is trained parameters. Here the shape of Y is (m, 1), but if I return Y.squeeze(), say of shape (m,), I get a very different standard deviation on the test set, say 70 for the former and 8 for the latter.
I use random initialisation, but I've trained and tested many times, the std of the squeezed version is much smaller. So I just wonder why.
I just show the complete codes below, and you can test by yourself. My questions are in line 90 and line 91
# python: 3.5.2
# encoding: utf-8
# numpy: 1.14.1
import numpy as np
import matplotlib.pyplot as plt
def load_data(filename):
xys = []
with open(filename, 'r') as f:
for line in f:
xys.append(map(float, line.strip().split()))
xs, ys = zip(*xys)
return np.asarray(xs), np.asarray(ys)
def evaluate(ys, ys_pred):
std = np.sqrt(np.mean(np.abs(ys - ys_pred) ** 2))
return std
def linear_regression(x_train, y_train, n=2, learning_rate=0.0005, epochs=1000, l2=0, Print=False):
"""
This target function is: y = b + w1 * x^1 + w2 * x^2 + ...
also y = b + np.dot(w.T, x)
:param x_train: np.ndarray
:param y_train: np.ndarray
:return: a trained model (as a function), trained by x_train and y_train
"""
# get the number of train e.g.
m = x_train.shape[0]
# set and initialize parameters here
# intercept
b = np.float64(-10)
# weights
w = np.float64(np.random.randn(n, 1))
# convert the x_train matrix to a design matrix
X = np.zeros((n, m), dtype=np.float64)
for i in range(n):
X[i, :] = x_train ** (i + 1)
X = np.float64(X)
Y = np.float64(np.reshape(y_train, newshape=(1, m)))
# if plot of the training process is needed
costs = []
# train on the dataset
for epoch in range(epochs):
# compute the gradient of cost on w
Z = b + np.dot(w.T, X)
dZ = Z - Y
dw = 1./m * np.dot(X, dZ.T)
db = 1./m * np.squeeze(np.sum(dZ))
# update the parameters, for w, I also set "weight decay"
w -= learning_rate * dw + l2 * w
b -= learning_rate * db
cost = np.squeeze(0.5/m * np.dot(dZ, dZ.T))
costs.append(cost)
if Print == True and epoch % 25 == 0:
print("Cost after " + str(epoch) + " iterations " + ": " + str(cost))
# plot the costs
if Print == True:
plt.plot(costs)
plt.show()
def pred(x):
assert type(x) is np.ndarray
m = x.shape[0]
# convert the x_train matrix to a design matrix
X = np.zeros((n, m))
for i in range(n):
X[i, :] = x ** (i + 1)
# to predict
Y = b + np.dot(w.T, X)
return Y.T
# return Y.squeeze()
return pred
if __name__ == '__main__':
train_file = 'train.txt'
test_file = 'test.txt'
# load data
x_train, y_train = load_data(train_file)
x_test, y_test = load_data(test_file)
print(x_train.shape)
print(x_test.shape)
# use a trained linear-regression model
f = linear_regression(x_train, y_train, n=2, epochs=10000, Print=False, learning_rate=1e-8, l2=5e-2)
# compute the predictions
y_test_pred = f(x_test)
# use the test set to evaluate the model
std = evaluate(y_test, y_test_pred)
print('the standard deviation:{:.1f}'.format(std))
# show the result
plt.plot(x_train, y_train, 'ro', markersize=3)
plt.plot(x_test, y_test, 'k')
plt.plot(x_test, y_test_pred)
plt.xlabel('x')
plt.ylabel('y')
plt.title('Linear Regression')
plt.legend(['train', 'test', 'pred'])
plt.show()

Why does my TensorFlow Neural Network for XOR only have an accuracy of around 0.5?

I Wrote a Neural Network in TensorFlow for the XOR input. I have used 1 hidden layer with 2 units and softmax classification. The input is of the form <1, x_1, x_2, zero, one> , where
1 is the bias
x_1 and x_2 are either between 0 and 1 for all the combination {00, 01, 10, 11}. Selected to be normally distributed around 0 or 1
zero: is 1 if the output is zero
one: is 1 if the output is one
The accuracy is always around 0.5. What has gone wrong? Is the architecture of the neural network wrong, or is there something with the code?
import tensorflow as tf
import numpy as np
from random import randint
DEBUG=True
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))
def model(X, weight_hidden, weight_output):
# [1,3] x [3,n_hiddent_units] = [1,n_hiddent_units]
hiddern_units_output = tf.nn.sigmoid(tf.matmul(X, weight_hidden))
# [1,n_hiddent_units] x [n_hiddent_units, 2] = [1,2]
return hiddern_units_output
#return tf.matmul(hiddern_units_output, weight_output)
def getHiddenLayerOutput(X, weight_hidden):
hiddern_units_output = tf.nn.sigmoid(tf.matmul(X, weight_hidden))
return hiddern_units_output
total_inputs = 100
zeros = tf.zeros([total_inputs,1])
ones = tf.ones([total_inputs,1])
around_zeros = tf.random_normal([total_inputs,1], mean=0, stddev=0.01)
around_ones = tf.random_normal([total_inputs,1], mean=1, stddev=0.01)
batch_size = 10
n_hiddent_units = 2
X = tf.placeholder("float", [None, 3])
Y = tf.placeholder("float", [None, 2])
weight_hidden = init_weights([3, n_hiddent_units])
weight_output = init_weights([n_hiddent_units, 2])
hiddern_units_output = getHiddenLayerOutput(X, weight_hidden)
py_x = model(X, weight_hidden, weight_output)
#cost = tf.square(Y - py_x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=py_x, labels=Y))
train_op = tf.train.GradientDescentOptimizer(0.05).minimize(cost)
with tf.Session() as sess:
tf.global_variables_initializer().run()
trX_0_0 = sess.run(tf.concat([ones, around_zeros, around_zeros, ones, zeros], axis=1))
trX_0_1 = sess.run(tf.concat([ones, around_zeros, around_ones, zeros, ones], axis=1))
trX_1_0 = sess.run(tf.concat([ones, around_ones, around_zeros, zeros, ones], axis=1))
trX_1_1 = sess.run(tf.concat([ones, around_ones, around_ones, ones, zeros], axis=1))
trX = sess.run(tf.concat([trX_0_0, trX_0_1, trX_1_0, trX_1_1], axis=0))
trX = sess.run(tf.random_shuffle(trX))
print(trX)
for i in range(10):
for start, end in zip(range(0, len(trX), batch_size), range(batch_size, len(trX) + 1, batch_size)):
trY = tf.identity(trX[start:end,3:5])
trY = sess.run(tf.reshape(trY,[batch_size, 2]))
sess.run(train_op, feed_dict={ X: trX[start:end,0:3], Y: trY })
start_index = randint(0, (total_inputs*4)-batch_size)
y_0 = sess.run(py_x, feed_dict={X: trX[start_index:start_index+batch_size,0:3]})
print("iteration :",i, " accuracy :", np.mean(np.absolute(trX[start_index:start_index+batch_size,3:5]-y_0)),"\n")
Check the comments section for the updated code
The problem was with the randomly assigned weights. Here is the modified version, obtained after a series of trail-and-error.

SummaryWriter not outputting graph in TensorFlow [duplicate]

This question already has answers here:
Save Tensorflow graph for viewing in Tensorboard without summary operations
(5 answers)
Closed 5 years ago.
I am trying to use tensorboard to analyse a graph in tensorflow with summaryWriter. However, TensorFlow is not outputting a 'graph' folder with information. Perhaps I am missing a command or it is not in the right place?
writer = tf.train.SummaryWriter(logs_path, graph=tf.get_default_graph());
Is what I used. I think this may not work for TensorFlow 1.0 anymore (just the summarywriter command)
import numpy as np
import tensorflow as tf
# %matplotlib inline
import matplotlib.pyplot as plt
# Global config variables
num_steps = 5 # number of truncated backprop steps ('n' in the discussion above)
batch_size = 200
num_classes = 2
state_size = 4
learning_rate = 0.1
logs_path = "./graph"
def gen_data(size=1000000):
X = np.array(np.random.choice(2, size=(size,)))
Y = []
for i in range(size):
threshold = 0.5
if X[i-3] == 1:
threshold += 0.5
if X[i-8] == 1:
threshold -= 0.25
if np.random.rand() > threshold:
Y.append(0)
else:
Y.append(1)
return X, np.array(Y)
# adapted from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/rnn/ptb/reader.py
def gen_batch(raw_data, batch_size, num_steps):
raw_x, raw_y = raw_data
data_length = len(raw_x)
# partition raw data into batches and stack them vertically in a data matrix
batch_partition_length = data_length // batch_size
data_x = np.zeros([batch_size, batch_partition_length], dtype=np.int32)
data_y = np.zeros([batch_size, batch_partition_length], dtype=np.int32)
for i in range(batch_size):
data_x[i] = raw_x[batch_partition_length * i:batch_partition_length * (i + 1)]
data_y[i] = raw_y[batch_partition_length * i:batch_partition_length * (i + 1)]
# further divide batch partitions into num_steps for truncated backprop
epoch_size = batch_partition_length // num_steps
for i in range(epoch_size):
x = data_x[:, i * num_steps:(i + 1) * num_steps]
y = data_y[:, i * num_steps:(i + 1) * num_steps]
yield (x, y)
def gen_epochs(n, num_steps):
for i in range(n):
yield gen_batch(gen_data(), batch_size, num_steps)
"""
Placeholders
"""
x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')
init_state = tf.zeros([batch_size, state_size])
"""
Inputs
"""
x_one_hot = tf.one_hot(x, num_classes)
rnn_inputs = tf.unstack(x_one_hot, axis=1)
"""
RNN
"""
cell = tf.contrib.rnn.BasicRNNCell(state_size)
rnn_outputs, final_state = tf.contrib.rnn.static_rnn(cell, rnn_inputs, initial_state=init_state)
"""
Predictions, loss, training step
"""
with tf.variable_scope('softmax'):
W = tf.get_variable('W', [state_size, num_classes])
b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))
logits = [tf.matmul(rnn_output, W) + b for rnn_output in rnn_outputs]
predictions = [tf.nn.softmax(logit) for logit in logits]
y_as_list = [tf.squeeze(i, axis=[1]) for i in tf.split(axis=1, num_or_size_splits=num_steps, value=y)]
loss_weights = [tf.ones([batch_size]) for i in range(num_steps)]
losses = tf.contrib.legacy_seq2seq.sequence_loss_by_example(logits, y_as_list, loss_weights)
tf.scalar_summary("losses", losses)
total_loss = tf.reduce_mean(losses)
train_step = tf.train.AdagradOptimizer(learning_rate).minimize(total_loss)
# Not sure why this is not outputting a graph for tensorboard
writer = tf.train.SummaryWriter(logs_path, graph=tf.get_default_graph());
"""
Function to train the network
"""
def train_network(num_epochs, num_steps, state_size=4, verbose=True):
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
training_losses = []
saved = gen_epochs(num_epochs, num_steps);
for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps)):
training_loss = 0
training_state = np.zeros((batch_size, state_size))
if verbose:
print("\nEPOCH", idx)
for step, (X, Y) in enumerate(epoch):
tr_losses, training_loss_, training_state, _ = \
sess.run([losses,
total_loss,
final_state,
train_step],
feed_dict={x:X, y:Y, init_state:training_state})
training_loss += training_loss_
if step % 100 == 0 and step > 0:
if verbose:
print("Average loss at step", step,
"for last 250 steps:", training_loss/100)
training_losses.append(training_loss/100)
training_loss = 0
return training_losses
training_losses = train_network(1,num_steps)
plt.plot(training_losses)
# tensorboard --logdir="my_graph"
This worked for me:
writer = tf.summary.FileWriter(logdir='logdir', graph=tf.get_default_graph())
writer.flush()

Categories

Resources