Tensorflow: feeding placeholder in loop within loop fails - python

I am trying to repeatedly train a neural network at different sizes of hidden layer to determine how many neurons it should be. I wrote a net that works fine when doing one pass through. The code is:
import tensorflow as tf
import nn
def train(layers, data, folder = 'run1'):
input_layer_size, hidden_layer_size, num_labels = layers;
X, y, X_val, y_val = data;
X_placeholder = tf.placeholder(tf.float32, shape=(None, input_layer_size), name='X')
y_placeholder = tf.placeholder(tf.uint8, shape=(None, num_labels), name='y')
Theta1 = tf.Variable(nn.randInitializeWeights(input_layer_size, hidden_layer_size), name='Theta1')
bias1 = tf.Variable(nn.randInitializeWeights(hidden_layer_size, 1), name='bias1')
Theta2 = tf.Variable(nn.randInitializeWeights(hidden_layer_size, num_labels), name='Theta2')
bias2 = tf.Variable(nn.randInitializeWeights(num_labels, 1), name='bias2')
cost = nn.cost(X_placeholder, y_placeholder, Theta1, bias1, Theta2, bias2)
optimize = tf.train.GradientDescentOptimizer(0.6).minimize(cost)
accuracy, precision, recall, f1 = nn.evaluate(X_placeholder, y_placeholder, Theta1, bias1, Theta2, bias2)
cost_summary = tf.summary.scalar('cost', cost);
accuracy_summary = tf.summary.scalar('accuracy', accuracy);
precision_summary = tf.summary.scalar('precision', precision);
recall_summary = tf.summary.scalar('recall', recall);
f1_summary = tf.summary.scalar('f1', f1);
summaries = tf.summary.merge_all();
sess = tf.Session();
saver = tf.train.Saver()
init = tf.global_variables_initializer()
sess.run(init)
writer = tf.summary.FileWriter('./tmp/logs/' + folder, sess.graph)
NUM_STEPS = 20;
for step in range(NUM_STEPS):
sess.run(optimize, feed_dict={X_placeholder: X, y_placeholder: y});
if (step > 0) and ((step + 1) % 10 == 0):
summary = sess.run(summaries, feed_dict={X_placeholder: X_val, y_placeholder: y_val});
# writer.add_summary(summary, step);
print('Step', step + 1, 'of', NUM_STEPS);
save_path = saver.save(sess, './tmp/model_' + folder + '.ckpt')
print("Model saved in file: %s" % save_path)
sess.close();
When I put this call in a loop however, I make it through only the first iteration. It seems to fail during the second iteration the first time I hit this line:
summary = sess.run(summaries, feed_dict={X_placeholder: X_val, y_placeholder: y_val});
I get the error: InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'X' with dtype float
I logged both X and X_val right before feeding and they look exactly like they do on each preceding run. If I comment that second run part out it works great, but I kinda need my summaries...
My outer loop looks like this:
import train
import loadData
input_layer_size = 5513;
num_labels = 128;
data = loadData.load(input_layer_size, num_labels);
for hidden_layer_size in range(50, 500, 50):
train.train([input_layer_size, hidden_layer_size, num_labels], data, 'run' + str(hidden_layer_size))

Because you call the train function inside a loop then each time it runs it is creating a new copy of the placeholders. The first time it runs it is fine because there is only one copy. The second time it runs now you have duplicate placeholders. The solution is to separate out the code that builds the model from the code that runs the training.

Related

Is there a way to configure the output shape of a RNN?

I'm trying to create a RNN to guess what notes are being played on a piano, given a sound file of piano notes (WAV format). I'm currently cutting the WAV clips into ten-second chunks (2D), padding shorter sections to 10 seconds with zeroes so the input is all regular. However, when I pass in the clips to the RNN, it gives an output of one less dimension (1D) (when taking the last state - should I be taking the state series?).
I've created a simpler RNN to analyze single notes files (2D) and produce one output (1D), which has been successful. However, when trying to apply this same technique to full clips with multiple notes and notes starting/stopping it seems to break down, as I can't seem to change the output shape.
def weight_variable(shape):
initer = tf.truncated_normal_initializer(stddev=0.01)
return tf.get_variable('W', dtype=tf.float32, shape=shape, initializer=initer)
def bias_variable(shape):
initial = tf.constant(0., shape=shape, dtype=tf.float32)
return tf.get_variable('b', dtype=tf.float32,initializer=initial)
def RNN(x, weights, biases, timesteps, num_hidden):
x = tf.unstack(x, timesteps, 1)
# Define a rnn cell with tensorflow
lstm_cell = rnn.LSTMCell(num_hidden)
states_series, current_state = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
return tf.matmul(current_state[1], weights) + biases
# return [tf.matmul(temp,weights) + biases for temp in states_series]
# does this even make sense
# x is for data, y is for targets, shapes are [index, time, frequency], [index, time, output note (s)] respectively
x_train, x_valid, y_train, y_valid = load_data() # removed test
print("Size of:")
print("- Training-set:\t\t{}".format(y_train.shape[0]))
print("- Validation-set:\t{}".format(y_valid.shape[0]))
# print("- Test-set\t{}".format(len(y_test)))
learning_rate = 0.001 # The optimization initial learning rate
epochs = 1000 # Total number of training epochs
batch_size = 100 # Training batch size
display_freq = 100 # Frequency of displaying the training results
threshold = 0.7 # Threshold for determining a "note"
num_hidden_units = 15 # Number of hidden units of the RNN
# Placeholders for inputs (x) and outputs(y)
x = tf.placeholder(tf.float32, shape=(None, stepCount, num_input))
y = tf.placeholder(tf.float32, shape=(None, stepCount, n_classes))
# create weight matrix initialized randomly from N~(0, 0.01)
W = weight_variable(shape=[num_hidden_units, n_classes])
# create bias vector initialized as zero
b = bias_variable(shape=[n_classes])
output_logits = RNN(x, W, b, stepCount, num_hidden_units)
y_pred = tf.nn.softmax(output_logits)
# Define the loss function, optimizer, and accuracy, etc.
# (code removed, irrelevant)
# Creating the op for initializing all variables
init = tf.global_variables_initializer()
sess = tf.InteractiveSession()
sess.run(init)
global_step = 0
# Number of training iterations in each epoch
num_tr_iter = int(y_train.shape[0] / batch_size)
for epoch in range(epochs):
print('Training epoch: {}'.format(epoch + 1))
x_train, y_train = randomize(x_train, y_train)
for iteration in range(num_tr_iter):
global_step += 1
start = iteration * batch_size
end = (iteration + 1) * batch_size
x_batch, y_batch = get_next_batch(x_train, y_train, start, end)
# Run optimization op (backprop)
feed_dict_batch = {x: x_batch, y: y_batch}
sess.run(optimizer, feed_dict=feed_dict_batch)
if iteration % display_freq == 0:
# Calculate and display the batch loss and accuracy
loss_batch, acc_batch = sess.run([loss, accuracy],
feed_dict=feed_dict_batch)
print("iter {0:3d}:\t Loss={1:.2f},\tTraining Accuracy={2:.01%}".
format(iteration, loss_batch, acc_batch))
testLoss.append(loss_batch)
testAcc.append(acc_batch)
# Run validation after every epoch
feed_dict_valid = {x: x_valid[:1000].reshape((-1, stepCount, num_input)), y: y_valid[:1000]}
loss_valid, acc_valid = sess.run([loss, accuracy], feed_dict=feed_dict_valid)
print('---------------------------------------------------------')
print("Epoch: {0}, validation loss: {1:.2f}, validation accuracy: {2:.01%}".
format(epoch + 1, loss_valid, acc_valid))
print('---------------------------------------------------------')
validLoss.append(loss_valid)
validAcc.append(acc_batch)
Currently, this is outputting a 1D array of predictions, which really does not make sense in my scenario, but I'm not sure how to change it (it should be outputting predictions for each timestep - i.e. predictions of what notes are playing at each moment in time).

tensorflow shuffle_batch and feed_dict error

This is the main part of my code.
I'm confused on function shuffle_batch and feed_dict.
In my code below, the features and labels I put into the function are "list".(I also tried "array" before.But it seems doesn't matter.)
What I want to do is make my testing data(6144,26) and training data(1024,13) into batch:(100,26) and (100,13),then set them as the feed_dict for the placeholders.
My questions are:
1.The outputs of the function tf.train.batch_shuffle are Tensors.But I can not put tensors in the feed_dict,right?
2.When I compiled the last two rows,error says,got shape [6144, 26], but wanted [6144] .I know it may be a dimension error,but how can I fix it.
Thanks a lot.
import tensorflow as tf
import scipy.io as sio
#import signal matfile
#[('label', (8192, 13), 'double'), ('clipped_DMT', (8192, 26), 'double')]
file = sio.loadmat('DMTsignal.mat')
#get array(clipped_DMT)
data_cDMT = file['clipped_DMT']
#get array(label)
data_label = file['label']
with tf.variable_scope('split_cDMT'):
cDMT_test_list = []
cDMT_training_list = []
for i in range(0,8192):
if i % 4 == 0:
cDMT_test_list.append(data_cDMT[i])
else:
cDMT_training_list.append(data_cDMT[i])
with tf.variable_scope('split_label'):
label_test_list = []
label_training_list = []
for i in range(0,8192):
if i % 4 == 0:
label_test_list.append(data_label[i])
else:
label_training_list.append(data_label[i])
#set parameters
n_features = cDMT_training.shape[1]
n_labels = label_training.shape[1]
learning_rate = 0.8
hidden_1 = 256
hidden_2 = 128
training_steps = 1000
BATCH_SIZE = 100
#set Graph input
with tf.variable_scope('cDMT_Inputs'):
X = tf.placeholder(tf.float32,[None, n_features],name = 'Input_Data')
with tf.variable_scope('labels_Inputs'):
Y = tf.placeholder(tf.float32,[None, n_labels],name = 'Label_Data')
#set variables
#Initialize both W and b as tensors full of zeros
with tf.variable_scope('layerWeights'):
h1 = tf.Variable(tf.random_normal([n_features,hidden_1]))
h2 = tf.Variable(tf.random_normal([hidden_1,hidden_2]))
w_out = tf.Variable(tf.random_normal([hidden_2,n_labels]))
with tf.variable_scope('layerBias'):
b1 = tf.Variable(tf.random_normal([hidden_1]))
b2 = tf.Variable(tf.random_normal([hidden_2]))
b_out = tf.Variable(tf.random_normal([n_labels]))
#create model
def neural_net(x):
layer_1 = tf.add(tf.matmul(x,h1),b1)
layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1,h2),b2))
out_layer = tf.add(tf.matmul(layer_2,w_out),b_out)
return out_layer
nn_out = neural_net(X)
#loss and optimizer
with tf.variable_scope('Loss'):
loss = tf.reduce_mean(tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = nn_out,labels = Y)))
with tf.name_scope('Train'):
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
with tf.name_scope('Accuracy'):
correct_prediction = tf.equal(tf.argmax(nn_out,1),tf.argmax(Y,1))
#correct_prediction = tf.metrics.accuracy (labels = Y, predictions =nn_out)
acc = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
# Initialize
init = tf.global_variables_initializer()
# start computing & training
with tf.Session() as sess:
sess.run(init)
for step in range(training_steps):
#set batch
cmt_train_bat,label_train_bat = sess.run(tf.train.shuffle_batch([cDMT_training_list,label_training_list],batch_size = BATCH_SIZE,capacity=50000,min_after_dequeue=10000))
cmt_test_bat,label_test_bat = sess.run(tf.train.shuffle_batch([cDMT_test_list,label_test_list],batch_size = BATCH_SIZE,capacity=50000,min_after_dequeue=10000))
From the Session.run doc:
The optional feed_dict argument allows the caller to override the
value of tensors in the graph. Each key in feed_dict can be one of the
following types:
If the key is a tf.Tensor, the value may be a Python scalar, string,
list, or numpy ndarray that can be converted to the same dtype as that
tensor. Additionally, if the key is a tf.placeholder, the shape of the
value will be checked for compatibility with the placeholder.
...
So you are right: for X and Y (which are placeholders) you can't feed a tensor and tf.train.shuffle_batch is not designed to work with placeholders.
You can follow one of two ways:
get rid of placeholders and use tf.TFRecordReader in combination with tf.train.shuffle_batch, as suggested here. This way you'll have only tensors in your model and you won't need to "feed" anything additionally.
batch and shuffle the data yourself in numpy and feed into placeholders. This takes just several lines of code, so I find it easier, though both paths are valid.
Take also into account performance considerations.

Making predictions with tensorflow

I'm really a beginner with tensor flow and in all of this field, but I've seen all the lectures of Andrej Karpathy in CS231n class so I'm understanding the code.
So this is the code (not mine): https://github.com/nfmcclure/tensorflow_cookbook/tree/master/09_Recurrent_Neural_Networks/02_Implementing_RNN_for_Spam_Prediction
# Implementing an RNN in TensorFlow
# ----------------------------------
#
# We implement an RNN in TensorFlow to predict spam/ham from texts
#
# https://github.com/nfmcclure/tensorflow_cookbook/blob/master/09_Recurrent_Neural_Networks/02_Implementing_RNN_for_Spam_Prediction/02_implementing_rnn.py
import os
import re
import io
import glob
import requests
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from zipfile import ZipFile
from tensorflow.python.framework import ops
ops.reset_default_graph()
# Start a graph
sess = tf.Session()
# Set RNN parameters
epochs = 20
batch_size = 250
max_sequence_length = 25
rnn_size = 10
embedding_size = 50
min_word_frequency = 10
learning_rate = 0.0005
dropout_keep_prob = tf.placeholder(tf.float32)
# Download or open data
data_dir = 'temp'
data_file = 'text_data.txt'
if not os.path.exists(data_dir):
os.makedirs(data_dir)
if not os.path.isfile(os.path.join(data_dir, data_file)):
zip_url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/00228/smsspamcollection.zip'
r = requests.get(zip_url)
z = ZipFile(io.BytesIO(r.content))
file = z.read('SMSSpamCollection')
# Format Data
text_data = file.decode()
text_data = text_data.encode('ascii', errors='ignore')
text_data = text_data.decode().split('\n')
# Save data to text file
with open(os.path.join(data_dir, data_file), 'w') as file_conn:
for text in text_data:
file_conn.write("{}\n".format(text))
else:
# Open data from text file
text_data = []
with open(os.path.join(data_dir, data_file), 'r') as file_conn:
for row in file_conn:
text_data.append(row)
text_data = text_data[:-1]
text_data = [x.split('\t') for x in text_data if len(x) >= 1]
text_data = [x for x in text_data if len(x) > 1]
print([list(x) for x in zip(*text_data)])
[text_data_target, text_data_train] = [list(x) for x in zip(*text_data)]
# Create a text cleaning function
def clean_text(text_string):
text_string = re.sub(r'([^\s\w]|_|[0-9])+', '', text_string)
text_string = " ".join(text_string.split())
text_string = text_string.lower()
return (text_string)
# Clean texts
text_data_train = [clean_text(x) for x in text_data_train]
# Change texts into numeric vectors
vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(max_sequence_length,
min_frequency=min_word_frequency)
text_processed = np.array(list(vocab_processor.fit_transform(text_data_train)))
# Shuffle and split data
text_processed = np.array(text_processed)
text_data_target = np.array([1 if x == 'ham' else 0 for x in text_data_target])
shuffled_ix = np.random.permutation(np.arange(len(text_data_target)))
x_shuffled = text_processed[shuffled_ix]
y_shuffled = text_data_target[shuffled_ix]
# Split train/test set
ix_cutoff = int(len(y_shuffled) * 0.80)
x_train, x_test = x_shuffled[:ix_cutoff], x_shuffled[ix_cutoff:]
y_train, y_test = y_shuffled[:ix_cutoff], y_shuffled[ix_cutoff:]
vocab_size = len(vocab_processor.vocabulary_)
print("Vocabulary Size: {:d}".format(vocab_size))
print("80-20 Train Test split: {:d} -- {:d}".format(len(y_train), len(y_test)))
# Create placeholders
x_data = tf.placeholder(tf.int32, [None, max_sequence_length])
y_output = tf.placeholder(tf.int32, [None])
# Create embedding
embedding_mat = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0))
embedding_output = tf.nn.embedding_lookup(embedding_mat, x_data)
# embedding_output_expanded = tf.expand_dims(embedding_output, -1)
# Define the RNN cell
# tensorflow change >= 1.0, rnn is put into tensorflow.contrib directory. Prior version not test.
if tf.__version__[0] >= '1':
cell = tf.contrib.rnn.BasicRNNCell(num_units=rnn_size)
else:
cell = tf.nn.rnn_cell.BasicRNNCell(num_units=rnn_size)
output, state = tf.nn.dynamic_rnn(cell, embedding_output, dtype=tf.float32)
output = tf.nn.dropout(output, dropout_keep_prob)
# Get output of RNN sequence
output = tf.transpose(output, [1, 0, 2])
last = tf.gather(output, int(output.get_shape()[0]) - 1)
weight = tf.Variable(tf.truncated_normal([rnn_size, 2], stddev=0.1))
bias = tf.Variable(tf.constant(0.1, shape=[2]))
logits_out = tf.matmul(last, weight) + bias
# Loss function
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_out,
labels=y_output) # logits=float32, labels=int32
loss = tf.reduce_mean(losses)
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits_out, 1), tf.cast(y_output, tf.int64)), tf.float32))
optimizer = tf.train.RMSPropOptimizer(learning_rate)
train_step = optimizer.minimize(loss)
init = tf.global_variables_initializer()
sess.run(init)
train_loss = []
test_loss = []
train_accuracy = []
test_accuracy = []
# Start training
for epoch in range(epochs):
# Shuffle training data
shuffled_ix = np.random.permutation(np.arange(len(x_train)))
x_train = x_train[shuffled_ix]
y_train = y_train[shuffled_ix]
num_batches = int(len(x_train) / batch_size) + 1
# TO DO CALCULATE GENERATIONS ExACTLY
for i in range(num_batches):
# Select train data
min_ix = i * batch_size
max_ix = np.min([len(x_train), ((i + 1) * batch_size)])
x_train_batch = x_train[min_ix:max_ix]
y_train_batch = y_train[min_ix:max_ix]
# Run train step
train_dict = {x_data: x_train_batch, y_output: y_train_batch, dropout_keep_prob: 0.5}
sess.run(train_step, feed_dict=train_dict)
# Run loss and accuracy for training
temp_train_loss, temp_train_acc = sess.run([loss, accuracy], feed_dict=train_dict)
train_loss.append(temp_train_loss)
train_accuracy.append(temp_train_acc)
# Run Eval Step
test_dict = {x_data: x_test, y_output: y_test, dropout_keep_prob: 1.0}
temp_test_loss, temp_test_acc = sess.run([loss, accuracy], feed_dict=test_dict)
test_loss.append(temp_test_loss)
test_accuracy.append(temp_test_acc)
print('Epoch: {}, Test Loss: {:.2}, Test Acc: {:.2}'.format(epoch + 1, temp_test_loss, temp_test_acc))
# Plot loss over time
epoch_seq = np.arange(1, epochs + 1)
plt.plot(epoch_seq, train_loss, 'k--', label='Train Set')
plt.plot(epoch_seq, test_loss, 'r-', label='Test Set')
plt.title('Softmax Loss')
plt.xlabel('Epochs')
plt.ylabel('Softmax Loss')
plt.legend(loc='upper left')
plt.show()
# Plot accuracy over time
plt.plot(epoch_seq, train_accuracy, 'k--', label='Train Set')
plt.plot(epoch_seq, test_accuracy, 'r-', label='Test Set')
plt.title('Test Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(loc='upper left')
plt.show()
def findFiles(path): return glob.glob(path)
pred_array = "words"
pred_num = np.array(list(vocab_processor.fit_transform(pred_array)))
print(pred_num)
pred_output = tf.placeholder(tf.float32,[1,len(pred_array),max_sequence_length])
feed_dict = {pred_output: [pred_num]}
classification = sess.run(losses, feed_dict)
print(classification)
It's a RNN spam classifier, and It's working great (accept for the part I wrote at the end where I'm trying to create the predictions).
I'm just want to understand how to create a prediction function to this, something that looks like that:
def predict(text): # text is a string (my mail)
# Doing prediction stuff
return (top result) # ham or spam
The last few lines are my last try is giving me the following error:
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder' with dtype float
[[Node: Placeholder = Placeholder[dtype=DT_FLOAT, shape=<unknown>, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Also I tried to do something using Making predictions with a TensorFlow model, and I also read https://www.tensorflow.org/serving/serving_basic and every thing I've tried failed...
Since I'm just a beginner explanations are welcomed, but I'm not sure I'll know how to code it so can you please post the code answer too.
(Python 3.6 btw)
Thanks!
If you take a look at how the original code does the training and testing steps, specifically how they set up their train_dict and test_dict, you see that they feed values to each of the tensors defined as placeholder in the graph. Basically placeholders need to be given some value if they are going to be used in whatever calculation you are asking your network to do. Since you are looking for predictions from the network, you probably do not need to provide an expected output, but you will need to give it input data x_data, and a value for dropout_keep_prob. This should be dropout_keep_prob=1.0 for prediction.
You also want a prediction, not the loss of the network. The loss is basically a measure of how far your network's output is from what you expect, but since you are trying to predict something for new data you really just want to see what the network says it is. You can do this using the logits_out op directly, or we can add an op that converts your logits into a probability distribution over your classes. Either way you can look at the distribution to get an idea of how likely the network thinks your data falls into each category, or you can take the max value of this vector to just output the network's best guess.
So you might try something like:
prediction = tf.nn.softmax(logits_out)
feed_dict = {x_data: your_input_data, dropout_keep_prob: 1.0}
pred = sess.run(prediction, feed_dict)
best_guess = np.argmax(pred) # highest-rated class

Training LSTM network and predicting with different starting points

This is a simple example of using LSTM cell from tensor flow. I am generating a sin wave and training my network for ten periods and I'm trying to predict the eleventh period. The predictor values X are one epoch lag of the true y. After training, I save the session to the disk and I restore it at prediction time - this is typical of training and deploying models to production.
When I predict the last period, y_predicted is matching very well the true y.
If I try to predict the sin wave using an arbitrary starting point, (i.e. uncomment line 114)
test_data = test_data[16:]
such that the true values of y would be shifted by a quarter period, it seems like the LSTM prediction still starts at zero and it takes a couple of epochs to catch up with the true values, eventually matching the previous prediction. As a matter of fact it seems that the prediction in the second case is still a full sin wave instead of the 3/4 wave.
What is the reason why this is happening. If I implement a regressor I would like to use it starting with any point.
https://github.com/fbora/mytensorflow/issues/1
import os
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow.contrib.rnn as rnn
def sin_signal():
'''
generate a sin function
the train set is ten periods in length
the test set is one additional period
the return variable is in pandas format for easy plotting
'''
phase = np.arange(0, 2*np.pi*11, 0.1)
y = np.sin(phase)
data = pd.DataFrame.from_dict({'phase': phase, 'y':y})
# fill the last element by 0 - it's the end of the period anyways
data['X'] = data.y.shift(-1).fillna(0.0)
train_data = data[data.phase<=2*np.pi*10].copy()
test_data = data[data.phase>2*np.pi*10].copy()
return train_data, test_data
class lstm_model():
def __init__(self, size_x, size_y, num_units=32, num_layers=3, keep_prob=0.5):
# def single_unit():
# return rnn.DropoutWrapper(
# rnn.LSTMCell(num_units), output_keep_prob=keep_prob)
def single_unit():
return rnn.LSTMCell(num_units)
self.graph = tf.Graph()
with self.graph.as_default():
'''input place holders'''
self.X = tf.placeholder(tf.float32, [None, size_x], name='X')
self.y = tf.placeholder(tf.float32, [None, size_y], name='y')
'''network'''
cell = rnn.MultiRNNCell([single_unit() for _ in range(num_layers)])
X = tf.expand_dims(self.X, -1)
val, state = tf.nn.dynamic_rnn(cell, X, time_major=True, dtype=tf.float32)
val = tf.transpose(val, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0])-1)
weights = tf.Variable(tf.truncated_normal([num_units, size_y], 0.0, 1.0), name='weights')
bias = tf.Variable(tf.zeros(size_y), name='bias')
predicted_y = tf.nn.xw_plus_b(last, weights, bias, name='predicted_y')
'''optimizer'''
optimizer = tf.train.AdamOptimizer(name='adam_optimizer')
global_step = tf.Variable(0, trainable=False, name='global_step')
self.loss = tf.reduce_mean(tf.squared_difference(predicted_y, self.y), name='mse_loss')
self.train_op = optimizer.minimize(self.loss, global_step=global_step, name='training_op')
'''initializer'''
self.init_op = tf.global_variables_initializer()
class lstm_regressor():
def __init__(self):
if not os.path.isdir('./check_pts'):
os.mkdir('./check_pts')
#staticmethod
def get_shape(dataframe):
df_shape = dataframe.shape
num_rows = df_shape[0]
num_cols = 1 if len(df_shape)<2 else df_shape[1]
return num_rows, num_cols
def train(self, X_train, y_train, iterations):
train_pts, size_x = lstm_regressor.get_shape(X_train)
train_pts, size_y = lstm_regressor.get_shape(y_train)
model = lstm_model(size_x=size_x, size_y=size_y, num_units=32, num_layers=1)
with tf.Session(graph=model.graph) as sess:
sess.run(model.init_op)
saver = tf.train.Saver()
feed_dict={
model.X: X_train.values.reshape(-1, size_x),
model.y: y_train.values.reshape(-1, size_y)
}
for step in range(iterations):
_, loss = sess.run([model.train_op, model.loss], feed_dict=feed_dict)
if step%100==0:
print('step={}, loss={}'.format(step, loss))
saver.save(sess, './check_pts/lstm')
def predict(self, X_test):
test_pts, size_x = lstm_regressor.get_shape(X_test)
X_np = X_test.values.reshape(-1, size_x)
graph = tf.Graph()
with graph.as_default():
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver = tf.train.import_meta_graph('./check_pts/lstm.meta')
saver.restore(sess, './check_pts/lstm')
X = graph.get_tensor_by_name('X:0')
y_tf = graph.get_tensor_by_name('predicted_y:0')
y_np = sess.run(y_tf, feed_dict={X: X_np})
return y_np.reshape(test_pts)
def main():
train_data, test_data = sin_signal()
regressor = lstm_regressor()
regressor.train(train_data.X, train_data.y, iterations=1000)
# test_data = test_data[16:]
y_predicted = regressor.predict(test_data.X)
test_data['y_predicted'] = y_predicted
test_data[['y', 'y_predicted']].plot()
if __name__ == '__main__':
main()
I suspect that since you are starting your predictions at an arbitrary starting point in the future, there is a gap of values between what your model was trained on and what it is starting to see for predictions, and the State of your LSTM has not updated with the values in that gap?
*** UPDATE:
In your code, you have this:
val, state = tf.nn.dynamic_rnn(cell, X, time_major=True, dtype=tf.float32)
and then during training this:
_, loss = sess.run([model.train_op, model.loss], feed_dict=feed_dict)
I would suggest feeding the initial State into dynamic_rnn and re-feeding the updated state at each training iteration, something like this:
inState = tf.placeholder(tf.float32, [YOUR_DIMENSIONS], name='inState')
val, state = tf.nn.dynamic_rnn(cell, X, time_major=True, dtype=tf.float32, initial_state=inState)
And during training:
iState = np.zeros([YOUR_DIMENSIONS])
feed_dict={
model.X: X_train.values.reshape(-1, size_x),
model.y: y_train.values.reshape(-1, size_y),
inState: iState # feed initial value for state placeholder
}
_, loss, oState = sess.run([model.train_op, model.loss, model.state], feed_dict=feed_dict) # run one additional variable from the session
iState = oState # assign latest out-state to be re-fed as in-state
So, this way your model not only learns the parameters during training, but also keeps track of everything that it's seen during training in the State. NOW, you save this State with the rest of your session and use it during the prediction stage.
The small difficulty with this is that technically this State is a placeholder, so it won't be saved in the Graph automatically in my experience. So you create another variable manually at the end of training and assign the State to it; this way it is saved in the graph for later:
# make sure this variable is declared BEFORE the saver is declared
savedState = tf.get_variable('savedState', shape=[YOUR_DIMENSIONS])
# then, at the end of training:
assignOp = tf.assign(savedState, oState)
sess.run(assignOp)
# now save your graph
So now once you restore the Graph, if you want to start your predictions after some artificial gap, then somehow you still have to run your model through this gap so as to update the state. In my case, I just run one dummy prediction for the whole gap, just so as to update the state, and then you continue at your normal intervals from here.
Hope this helps...

Getting very high values in linear regression

I am trying to make a simple MLP to predict values of a pixel of an image - original blog .
Here's my earlier attempt using Keras in python - link
I've tried to do the same in tensorflow, but I am getting very large output values (~10^12) when they should be less than 1.
Here's my code:
import numpy as np
import cv2
from random import shuffle
import tensorflow as tf
'''
Image preprocessing
'''
image_file = cv2.imread("Mona Lisa.jpg")
h = image_file.shape[0]
w = image_file.shape[1]
preX = []
preY = []
for i in xrange(h):
for j in xrange(w):
preX.append([i,j])
preY.append(image_file[i,j,:].astype('float32')/255.0)
print preX[:5], preY[:5]
zipped = [i for i in zip(preX,preY)]
shuffle(zipped)
X_train = np.array([i for (i,j) in zipped]).astype('float32')
Y_train = np.array([j for (i,j) in zipped]).astype('float32')
print X_train[:10], Y_train[:10]
'''
Tensorflow code
'''
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
x = tf.placeholder(tf.float32, shape=[None,2])
y = tf.placeholder(tf.float32, shape=[None,3])
'''
Layers
'''
w1 = weight_variable([2,300])
b1 = bias_variable([300])
L1 = tf.nn.relu(tf.matmul(X_train,w1)+b1)
w2 = weight_variable([300,3])
b2 = bias_variable([3])
y_model = tf.matmul(L1,w2)+b2
'''
Training
'''
# criterion
MSE = tf.reduce_mean(tf.square(tf.sub(y,y_model)))
# trainer
train_op = tf.train.GradientDescentOptimizer(learning_rate = 0.01).minimize(MSE)
nb_epochs = 10
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
cost = 0
for i in range(nb_epochs):
sess.run(train_op, feed_dict ={x: X_train, y: Y_train})
cost += sess.run(MSE, feed_dict ={x: X_train, y: Y_train})
cost /= nb_epochs
print cost
'''
Prediction
'''
pred = sess.run(y_model,feed_dict = {x:X_train})*255.0
print pred[:10]
output_image = []
index = 0
h = image_file.shape[0]
w = image_file.shape[1]
for i in xrange(h):
row = []
for j in xrange(w):
row.append(pred[index])
index += 1
row = np.array(row)
output_image.append(row)
output_image = np.array(output_image)
output_image = output_image.astype('uint8')
cv2.imwrite('out_mona_300x3_tf.png',output_image)
First of all, I think that instead of running the train_op and then the MSE
you can run both ops in a list and reduce your computational cost significantly.
for i in range(nb_epochs):
cost += sess.run([MSE, train_op], feed_dict ={x: X_train, y: Y_train})
Secondly, I suggest always writing out your cost function so you can see what is going on during the training phase. Either manually print it out or use tensorboard to log your cost and plot it (you can find examples on the official tf page).
You can also monitor your weights to see that they aren't blowing up.
A few things you can try:
Reduce learning rate, add regularization to weights.
Check that your training set (pixels) really consist of the values that
you expect them to.
You give the input layer weights and the output layer weights the same names w and b, so it seems something goes wrong in the gradient-descent procedure. Actually I'm surprised tensorflow doesn't issue an error or at leas a warning (or am I missing something?)

Categories

Resources