Tensorflow apply_gradients throws error - python

I get the error:
TypeError: Input 'ref' of 'AssignAdd' Op requires l-value input
on the line apply_gradient_op = opt.apply_gradients(grads, global_step=stepNum) of the function train below.
def x1_x2_diff_net_v0():
x = tf.placeholder(tf.float32, [None, 4])
lb = tf.placeholder(tf.float32, [None, 2])
#First fc layer
with tf.variable_scope('fc1') as scope:
w = tfu.get_weights([4,100], name='fc1_w')
b = tfu.get_bias([1,100], name='fc1_b')
fc1 = tf.nn.relu(tf.matmul(x, w) + b)
#Prediction layer
with tf.variable_scope('pred') as scope:
w = tfu.get_weights([100,2], name='pred_w')
b = tfu.get_bias([1, 2], name='pred_b')
pred = tf.nn.relu(tf.matmul(fc1, w) + b)
#Define the loss
loss = tf.nn.l2_loss(pred - lb, name='loss')
return loss
def train(stepNum, initLr=0.01):
g = tf.Graph()
with g.as_default():
loss = x1_x2_diff_net_v0()
lr = tf.train.exponential_decay(initLr, stepNum, 100,
0.1, staircase=True)
for tv in tf.trainable_variables():
print (tv.name)
# Compute gradients.
opt = tf.train.GradientDescentOptimizer(lr)
grads = opt.compute_gradients(loss)
# Apply gradients.
apply_gradient_op = opt.apply_gradients(grads, global_step=stepNum)
Any pointers on what might be going wrong? I took snippets of code from te method train in cifar10.py example file.

Oops! I was passing an integer into stepNum instead of tf.Variable. Its resolved now. It would be great if the error messages would be more intuitive.

Related

InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder' with dtype float and shape

I have written the following code in my Pycharm which does Fully Connect Layer (FCL) in Tensorflow. The placeholder happens invalid argument error. So I entered all the dtype, shape, and name in the placeholder, but I still get invalid argument error.
I want to make new Signal(1, 222) through FCL model.
input Signal(1, 222) => output Signal(1, 222)
maxPredict: Find the index with the highest value in the output signal.
calculate Y: Get the frequency array value corresponding to maxPredict.
loss: Use the difference between true Y and calculate Y as a loss.
loss = tf.abs(trueY - calculateY)`
Code (occur Error)
x = tf.placeholder(dtype=tf.float32, shape=[1, 222], name='inputX')
ERROR
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'inputX' with dtype float and shape [1,222]
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'inputX' with dtype float and shape [1,222]
[[{{node inputX}} = Placeholderdtype=DT_FLOAT, shape=[1,222], _device="/job:localhost/replica:0/task:0/device:CPU:0"]]
During handling of the above exception, another exception occurred:
New Error Case
I changed my Code.
x = tf.placeholder(tf.float32, [None, 222], name='inputX')
Error Case 1
tensorFreq = tf.convert_to_tensor(basicFreq, tf.float32)
newY = tf.gather(tensorFreq, maxPredict) * 60
loss = tf.abs(y - tf.Variable(newY))
ValueError: initial_value must have a shape specified: Tensor("mul:0", shape=(?,), dtype=float32)
Error Case 2
tensorFreq = tf.convert_to_tensor(basicFreq, tf.float32)
newY = tf.gather(tensorFreq, maxPredict) * 60
loss = tf.abs(y - newY)
Traceback (most recent call last):
File "D:/PycharmProject/DetectionSignal/TEST_FCL_StackOverflow.py", line 127, in
trainStep = opt.minimize(loss)
File "C:\Users\Heewony\Anaconda3\envs\TSFW_pycharm\lib\site-packages\tensorflow\python\training\optimizer.py", line 407, in minimize
([str(v) for _, v in grads_and_vars], loss))
ValueError: No gradients provided for any variable, check your graph for ops that do not support gradients, between variables [tf.Variable 'Variable:0' shape=(222, 1024) dtype=float32_ref, tf.Variable 'Variable_1:0' shape=(1024,) dtype=float32_re, ......... tf.Variable 'Variable_5:0' shape=(222,) dtype=float32_ref] and loss Tensor("Abs:0", dtype=float32).
Development environment
OS Platform and Distribution: Windows 10 x64
TensorFlow installed from: Anaconda
Tensorflow version 1.12.0:
python 3.6.7 :
Mobile device: N/A
Exact command to reproduce: N/A
GPU model and memory: NVIDIA GeForce CTX 1080 Ti
CUDA/cuDNN: 9.0/7.4
Model and Function
def Model_FCL(inputX):
data = inputX # input Signals
# Fully Connected Layer 1
flatConvh1 = tf.reshape(data, [-1, 222])
fcW1 = tf.Variable(tf.truncated_normal(shape=[222, 1024], stddev=0.05))
fcb1 = tf.Variable(tf.constant(0.1, shape=[1024]))
fch1 = tf.nn.relu(tf.matmul(flatConvh1, fcW1) + fcb1)
# Fully Connected Layer 2
flatConvh2 = tf.reshape(fch1, [-1, 1024])
fcW2 = tf.Variable(tf.truncated_normal(shape=[1024, 1024], stddev=0.05))
fcb2 = tf.Variable(tf.constant(0.1, shape=[1024]))
fch2 = tf.nn.relu(tf.matmul(flatConvh2, fcW2) + fcb2)
# Output Layer
fcW3 = tf.Variable(tf.truncated_normal(shape=[1024, 222], stddev=0.05))
fcb3 = tf.Variable(tf.constant(0.1, shape=[222]))
logits = tf.add(tf.matmul(fch2, fcW3), fcb3)
predictY = tf.nn.softmax(logits)
return predictY, logits
def loadMatlabData(fileName):
contentsMat = sio.loadmat(fileName)
dataInput = contentsMat['dataInput']
dataLabel = contentsMat['dataLabel']
dataSize = dataInput.shape
dataSize = dataSize[0]
return dataInput, dataLabel, dataSize
def getNextSignal(num, data, labels, WINDOW_SIZE, OUTPUT_SIZE):
shuffleSignal = data[num]
shuffleLabels = labels[num]
# shuffleSignal = shuffleSignal.reshape(1, WINDOW_SIZE)
# shuffleSignal = np.asarray(shuffleSignal, np.float32)
return shuffleSignal, shuffleLabels
def getBasicFrequency():
# basicFreq => shape(222)
basicFreq = np.array([0.598436736688, 0.610649731314, ... 3.297508549096])
return basicFreq
Graph
basicFreq = getBasicFrequency()
myGraph = tf.Graph()
with myGraph.as_default():
# define input data & output data 입력받기 위한 placeholder
x = tf.placeholder(dtype=tf.float32, shape=[1, 222], name='inputX') # Signal size = [1, 222]
y = tf.placeholder(tf.float32, name='trueY') # Float value size = [1]
print('inputzz ', x, y)
print('Graph ', myGraph.get_operations())
print('TrainVariable ', tf.trainable_variables())
predictY, logits = Model_FCL(x) # Predict Signal, size = [1, 222]
maxPredict = tf.argmax(predictY, 1, name='maxPredict') # Find max index of Predict Signal
tensorFreq = tf.convert_to_tensor(basicFreq, tf.float32)
newY = tf.gather(tensorFreq, maxPredict) * 60 # Find the value that corresponds to the Freq array index
loss = tf.abs(y - tf.Variable(newY)) # Calculate absolute (true Y - predict Y)
opt = tf.train.AdamOptimizer(learning_rate=0.0001)
trainStep = opt.minimize(loss)
print('Graph ', myGraph.get_operations())
print('TrainVariable ', tf.trainable_variables())
Session
with tf.Session(graph=myGraph) as sess:
sess.run(tf.global_variables_initializer())
dataFolder = './'
writer = tf.summary.FileWriter('./logMyGraph', sess.graph)
startTime = datetime.datetime.now()
numberSummary = 0
accuracyTotalTrain = []
for trainEpoch in range(1, 25 + 1):
arrayTrain = []
dataPPG, dataLabel, dataSize = loadMatlabData(dataFolder + "TestValues.mat")
for i in range(dataSize):
batchSignal, valueTrue = getNextSignal(i, dataPPG, dataLabel, 222, 222)
_, lossPrint, valuePredict = sess.run([trainStep, loss, newY], feed_dict={x: batchSignal, y: valueTrue})
print('Train ', i, ' ', valueTrue, ' - ', valuePredict, ' Loss ', lossPrint)
arrayTrain.append(lossPrint)
writer.add_summary(tf.Summary(value=[tf.Summary.Value(tag='Loss', simple_value=float(lossPrint))]),
numberSummary)
numberSummary += 1
accuracyTotalTrain.append(np.mean(arrayTrain))
print('Final Train : ', accuracyTotalTrain)
sess.close()
It seems that the variable batchSignal is of a wrong type or shape. It must be a numpy array of shape exactly [1, 222]. If you want to use a batch of examples of size n × 222, the placeholder x should have a shape of [None, 222] and placeholder y shape [None].
By the way, consider using tf.layers.dense instead of explicitly initializing variables and implementing the layers yourself.
There should be two things to change.
Error Case 0. You don't need to reshape your flow between layers. You can use None at the first dimension to pass a dynamic batch size.
Error Case 1. You can use directly your newY as output of the NN. You only use tf.Variable to define weights or bias.
Error Case 2. And it seems that tensorflow doesn't have gradient descent implementation for neither tf.abs() nor tf.gather(). With a regression problem, the mean square error is often sufficient.
Herein, how I rewrite your code. I don't have your matlab part so I can't debug your python/matlab interface:
Model:
def Model_FCL(inputX):
# Fully Connected Layer 1
fcW1 = tf.get_variable('w1', shape=[222, 1024], initializer=tf.initializer.truncated_normal())
fcb1 = tf.get_variable('b1', shape=[222], initializer=tf.initializer.truncated_normal())
# fcb1 = tf.get_variable('b1', shape=[None, 222], trainable=False, initializer=tf.constant_initializer(valueThatYouWant)) # if you want to fix your bias constant
fch1 = tf.nn.relu(tf.matmul(inputX, fcW1) + fcb1, name='relu1')
# Fully Connected Layer 2
fcW2 = tf.get_variable('w2', shape=[1024, 1024], initializer=tf.initializer.truncated_normal())
fcb2 = tf.get_variable('b2', shape=[222], initializer=tf.initializer.truncated_normal())
# fcb2 = tf.get_variable('b2', shape=[None, 222], trainable=False, initializer=tf.constant_initializer(valueThatYouWant)) # if you want to fix your bias constant
fch2 = tf.nn.relu(tf.matmul(fch1, fcW2) + fcb2, name='relu2')
# Output Layer
fcW3 = tf.get_variable('w3', shape=[1024, 222], initializer=tf.initializer.truncated_normal())
fcb3 = tf.get_variable('b3', shape=[222], initializer=tf.initializer.truncated_normal())
# fcb2 = tf.get_variable('b2', shape=[None, 222], trainable=False, initializer=tf.constant_initializer(valueThatYouWant)) # if you want to fix your bias constant
logits = tf.add(tf.matmul(fch2, fcW3), fcb3)
predictY = tf.nn.softmax(logits) #I'm not sure that it will learn if you do softmax then abs/MSE
return predictY, logits
Graph:
with myGraph.as_default():
# define input data & output data 입력받기 위한 placeholder
# put None(dynamic batch size) not -1 at the first dimension so that you can change your batch size
x = tf.placeholder(tf.float32, shape=[None, 222], name='inputX') # Signal size = [1, 222]
y = tf.placeholder(tf.float32, shape=[None], name='trueY') # Float value size = [1]
...
predictY, logits = Model_FCL(x) # Predict Signal, size = [1, 222]
maxPredict = tf.argmax(predictY, 1, name='maxPredict') # Find max index of Predict Signal
tensorFreq = tf.convert_to_tensor(basicFreq, tf.float32)
newY = tf.gather(tensorFreq, maxPredict) * 60 # Find the value that corresponds to the Freq array index
loss = tf.losses.mean_squared_error(labels=y, predictions=newY) # maybe use MSE for regression problem
# loss = tf.abs(y - newY) # Calculate absolute (true Y - predict Y) #tensorflow doesn't have gradient descent implementation for tf.abs
opt = tf.train.AdamOptimizer(learning_rate=0.0001)
trainStep = opt.minimize(loss)
If you are still getting the same error even after feeding the right numpy shape and also maintaining the correct dtypes (np.int32 or np.float32) as suggested by the error message, then the following code should solve your problem:
#this code will print the list of placeholders and other variables declared in the memory which is causing your error
[n.name for n in tf.get_default_graph().as_graph_def().node]
#it will reset your declared placeholders so you can start over
tf.reset_default_graph()
This problem could also be solved by restarting the kernel repeatedly for each debug however it's not feasible.

tf.nn.static_rnn - input must be a sequence

I'm trying to create a 2 layer lstm (incl. dropout) but get an error message that 'inputs must be a sequence'.
I use embeddings as the input and not sure how to change these to be a sequence? Any explanations are greatly appreciated.
This is my graph definition:
with tf.name_scope('Placeholders'):
input_x = tf.placeholder(tf.int32, [None, n_steps], name='input_x')
input_y = tf.placeholder(tf.float32, [None, n_classes], name='input_y')
dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob')
with tf.name_scope('Embedding_layer'):
embeddings_var = tf.Variable(tf.random_uniform([vocab_size, EMBEDDING_DIM], -1.0, 1.0), trainable=True)
embedded_chars = tf.nn.embedding_lookup(embeddings_var, input_x)
print(embedded_chars, 'embed')
def get_a_cell(lstm_size, keep_prob):
lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_size)
drop = tf.nn.rnn_cell.DropoutWrapper(lstm, output_keep_prob=dropout_keep_prob)
return drop
with tf.name_scope('lstm'):
cell = tf.nn.rnn_cell.MultiRNNCell(
[get_a_cell(num_hidden, dropout_keep_prob) for _ in range(num_layers)]
)
lstm_outputs, state = tf.nn.static_rnn(cell=cell,inputs=embedded_chars, dtype=tf.float32)
with tf.name_scope('Fully_connected'):
W = tf.Variable(tf.truncated_normal([num_hidden, n_classes], stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=n_classes))
output = tf.nn.xw_plus_b(lstm_outputs,W,b)
predictions = tf.argmax(output, 1, name='predictions')
with tf.name_scope('Loss'):
# Cross-entropy loss and optimizer initialization
loss1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=output, labels=input_y))
global_step = tf.Variable(0, name="global_step", trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(loss1, global_step=global_step)
with tf.name_scope('Accuracy'):
# Accuracy metrics
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(tf.nn.softmax(output)), input_y), tf.float32))
with tf.name_scope('num_correct'):
correct_predictions = tf.equal(predictions, tf.argmax(input_y, 1))
num_correct = tf.reduce_sum(tf.cast(correct_predictions, 'float'), name='num_correct')
EDIT:
when changing static_rnn to dynamic_rnn the error message changes to the following, failing on the bias (b) variable:
TypeError: 'int' object is not iterable
After I changed the bias term to this:
b = tf.Variable(tf.random_normal([n_classes]))
and get a new error message:
ValueError: Shape must be rank 2 but is rank 3 for 'Fully_connected/xw_plus_b/MatMul' (op: 'MatMul') with input shapes: [?,27,128], [128,6].
If we assume you use tf.dynamic_rnn (for the case of tf.static_rnn, the first problem is because you don't give the input in the right format, tf.static_rnn except a sequence of tensor such as list of tensors [batch_size x seq_len] and not a single tensor with shape [batch_size x seq_len x dim] whereas tf.dynamic_rnn deals with such tensors as input)
I invite you to read the documentation of tf.nn_dynamic_rnn to see that for your classification problem you might not want to use lstm_outputs but state which basically contain the last output of your RNN, because lstm_output contains all the outputs , whereas here you are interested on only in the last_output (except if you want to do something like attention for classification , here you'll need all the outputs).
To get the last output you'll basically need to do that:
lstm_outputs, state = tf.nn.dynamic_rnn(cell=cell,inputs=embedded_chars, dtype=tf.float32)
last_output = state[-1].h
state[-1] to take the state of the last cell, then h contains the last output and pass last_output to your feed forward network.
Full code
(working, but compute wrong accuracy see comments)
n_classes = 6
n_steps = 27
num_hidden=128
dropout_keep_prob =0.5
vocab_size=10000
EMBEDDING_DIM=300
num_layers = 2
with tf.name_scope('Placeholders'):
input_x = tf.placeholder(tf.int32, [None, n_steps], name='input_x')
input_y = tf.placeholder(tf.float32, [None, n_classes], name='input_y')
dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob')
with tf.name_scope('Embedding_layer'):
embeddings_var = tf.Variable(tf.random_uniform([vocab_size, EMBEDDING_DIM], -1.0, 1.0), trainable=True)
embedded_chars = tf.nn.embedding_lookup(embeddings_var, input_x)
print(embedded_chars, 'embed')
def get_a_cell(lstm_size, keep_prob):
lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_size)
drop = tf.nn.rnn_cell.DropoutWrapper(lstm, output_keep_prob=dropout_keep_prob)
return drop
with tf.name_scope('lstm'):
cell = tf.nn.rnn_cell.MultiRNNCell(
[get_a_cell(num_hidden, dropout_keep_prob) for _ in range(num_layers)]
)
lstm_outputs, state = tf.nn.dynamic_rnn(cell=cell,inputs=embedded_chars, dtype=tf.float32)
last_output = state[-1].h
with tf.name_scope('Fully_connected'):
W = tf.Variable(tf.truncated_normal([num_hidden, n_classes], stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[n_classes]))
output = tf.nn.xw_plus_b(last_output,W,b)
predictions = tf.argmax(output, 1, name='predictions')
with tf.name_scope('Loss'):
# Cross-entropy loss and optimizer initialization
loss1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=output, labels=input_y))
global_step = tf.Variable(0, name="global_step", trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(loss1, global_step=global_step)
with tf.name_scope('Accuracy'):
# Accuracy metrics
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(tf.nn.softmax(output)), input_y), tf.float32))
with tf.name_scope('num_correct'):
correct_predictions = tf.equal(predictions, tf.argmax(input_y, 1))
num_correct = tf.reduce_sum(tf.cast(correct_predictions, 'float'), name='num_correct')

ValueError: Tensor("BN_1/moments/Squeeze:0", shape=(32, 256, 32), dtype=float32) must be from the same graph as Tensor

I'm trying to get started with TensorFlow in python, building a simple CNN with batch normalization. But when i create a new graph to run, exception happens to BN.
My key codes is as follows
**# exception here**
def batch_norm(x, beta, gamma, phase_train, scope='bn', decay=0.9, eps=1e-5):
with tf.variable_scope(scope):
batch_mean, batch_var = tf.nn.moments(x, [0], name='moments')
ema = tf.train.ExponentialMovingAverage(decay=decay)
def mean_var_with_update():
ema_apply_op = ema.apply([batch_mean, batch_var])
with tf.control_dependencies([ema_apply_op]):
return tf.identity(batch_mean), tf.identity(batch_var)
mean, var = tf.cond(phase_train, mean_var_with_update, lambda: (ema.average(batch_mean), ema.average(batch_var)))
normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, eps)
return normed
training code:
# start training
output = conv2d_net()
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.002).minimize(loss)
predict = tf.reshape(output, [-1, MAX_CAPTCHA, CHAR_SET_LEN])
max_idx_p = tf.argmax(predict, 2)
max_idx_l = tf.argmax(tf.reshape(Y, [-1, MAX_CAPTCHA, CHAR_SET_LEN]), 2)
correct_pred = tf.equal(max_idx_p, max_idx_l)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
step = 0
while True:
batch_x, batch_y = get_next_batch(64)
_, loss_ = sess.run([optimizer, loss],
feed_dict={X: batch_x, Y: batch_y, keep_prob: 0.75, train_phase: True})
print(step, loss_)
if step % 10 == 0 and step != 0:
batch_x_test, batch_y_test = get_next_batch(100)
acc = sess.run(accuracy,
feed_dict={X: batch_x_test, Y: batch_y_test, keep_prob: 1., train_phase: False})
print("step %s,accuracy:%s" % (step, acc))
if acc > 0.05:
# stop training and save parameters in layer
result_weights['wc1'] = weights['wc1'].eval(sess)
...
break
step += 1
Create new graph for exporting:
EXPORT_DIR = './model'
if os.path.exists(EXPORT_DIR):
shutil.rmtree(EXPORT_DIR)
g = tf.Graph()
with g.as_default():
x_2 = tf.placeholder(tf.float32, shape=[None, IMAGE_HEIGHT * IMAGE_WIDTH], name="input")
x_image = tf.reshape(x_2, shape=[-1, IMAGE_HEIGHT, IMAGE_WIDTH, 1])
# fill trained parameters and create new cnn layers
WC1 = tf.constant(result_weights['wc1'], name="WC1")
...
**# crash here!!!**
CONV1 = conv2d(WC1, BC1, x_image, tf.constant(0.0, shape=[32]),
tf.random_normal(shape=[32], mean=1.0, stddev=0.02), scope='BN_1')
OUTPUT = tf.add(tf.matmul(FULL1, W_OUT), B_OUT)
OUTPUT = tf.nn.sigmoid(OUTPUT, name="output")
sess = tf.Session()
sess.run(tf.global_variables_initializer())
graph_def = g.as_graph_def()
tf.train.write_graph(graph_def, EXPORT_DIR, 'phone_model_graph.pb', as_text=True)
I create a new graph at last. The exception means it uses incorrect parameter in old training graph. How to explain it?
Thank you very much!
Log is:
I call batch_norm in fuction conv2d. It seems no tensor passed to the new graph.
def conv2d(w, b, x, tf_constant, tf_random_normal, scope, keep_p=1., phase=tf.constant(False)):
out = tf.nn.bias_add(tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME'), b)
out = batch_norm(out, tf_constant, tf_random_normal, phase, scope=scope)
out = tf.nn.relu(out)
out = tf.nn.max_pool(out, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
out = tf.nn.dropout(out, keep_p)
return out
I create a new graph at last.
That's the key statement here: upon creation of a new graph one can't use any tensor from the old graph. See a detailed explanation in this question. According to the stacktrace, at least one of the tensors that is passed to the batch_norm is defined before g.as_default(), that's why tensorflow crashes. From your code snippets it's unclear how exactly the batch_norm is called, so I can't say which one.
You can check this hypothesis by printing x.graph and g and checking if these values are different. In order to avoid this problem you can either do all the work inside one graph (which is a recommended way) or define both graphs in different python scopes thus making impossible to accidentally reuse the same python variable in two graphs.

some questions about the function run_epoch in ptb_word_lm.py of tensorflow rnn tuorial

tensorflow rnn ptb language model tuorial https://github.com/tensorflow/models/tree/master/tutorials/rnn/ptb
I have two question about run_epoch function in ptb_word_lm.py, only cpu device
for step in range(model.input.epoch_size):
feed_dict = {}
for i, (c, h) in enumerate(model.initial_state):
feed_dict[c] = state[i].c
feed_dict[h] = state[i].h
vals = session.run(fetches, feed_dict)
cost = vals["cost"]
state = vals["final_state"]
Qustion1: why do here need to creat a feed_dict to sess, I think in Class PTBmodel, it has create the initial state for lstm network
cell = tf.contrib.rnn.MultiRNNCell(
[cell for _ in range(config.num_layers)], state_is_tuple=True)
self._initial_state = cell.zero_state(config.batch_size, data_type())
state = self._initial_state
outputs = []
with tf.variable_scope("RNN"):
for time_step in range(self.num_steps):
if time_step > 0: tf.get_variable_scope().reuse_variables()
(cell_output, state) = cell(inputs[:, time_step, :], state)
outputs.append(cell_output)
Qustion2: why can sessrion.run(fetches, feed_dict) return values here, however, I try this in a test code, it return None
import tensorflow as tf
# Model parameters
W = tf.Variable([.3], dtype=tf.float32)
b = tf.Variable([-.3], dtype=tf.float32)
# Model input and output
x = tf.placeholder(tf.float32)
linear_model = W * x + b
y = tf.placeholder(tf.float32)
# loss
loss = tf.reduce_sum(tf.square(linear_model - y)) # sum of the squares
# optimizer
optimizer = tf.train.GradientDescentOptimizer(0.01)
# train = optimizer.minimize(loss)
# training data
x_train = [1, 2, 3, 4]
y_train = [0, -1, -2, -3]
# training loop
init = tf.global_variables_initializer()
tvars = tf.trainable_variables()
grads = tf.gradients(loss, tvars)
train = optimizer.apply_gradients(
zip(grads, tvars))
sess = tf.Session()
sess.run(init) # reset values to wrong
for i in range(100):
print sess.run(train, {x: x_train, y: y_train})
it just print
None
None
None
..
..
thank you!

placeholders definition for nd-array input in tensorflow

I'm trying to build LSTM RNN based on this guide:
http://monik.in/a-noobs-guide-to-implementing-rnn-lstm-using-tensorflow/
My input is ndarray with the size of 89102*39 (89102 rows, 39 features). There are 3 labels for the data - 0,1,2
It seems like I'm having a problem with the placeholders definition but I'm not sure what it is.
My code is:
data = tf.placeholder(tf.float32, [None, 1000, 39])
target = tf.placeholder(tf.float32, [None, 3])
cell = tf.nn.rnn_cell.LSTMCell(self.num_hidden)
val, state = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32)
val = tf.transpose(val, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1)
weight = tf.Variable(tf.truncated_normal([self.num_hidden, int(target.get_shape()[1])]))
bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))
prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
cross_entropy = -tf.reduce_sum(target * tf.log(tf.clip_by_value(prediction, 1e-10, 1.0)))
optimizer = tf.train.AdamOptimizer()
minimize = optimizer.minimize(cross_entropy)
mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1))
error = tf.reduce_mean(tf.cast(mistakes, tf.float32))
init_op = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init_op)
batch_size = 1000
no_of_batches = int(len(train_input) / batch_size)
epoch = 5000
for i in range(epoch):
ptr = 0
for j in range(no_of_batches):
inp, out = train_input[ptr:ptr + batch_size], train_output[ptr:ptr + batch_size]
ptr += batch_size
sess.run(minimize, {data: inp, target: out})
print( "Epoch - ", str(i))
And I'm getting to following error:
File , line 133, in execute_graph
sess.run(minimize, {data: inp, target: out})
File "/usr/local/lib/python3.5/dist-
packages/tensorflow/python/client/session.py", line 789, in run
run_metadata_ptr)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 975, in _run
% (np_val.shape, subfeed_t.name, str(subfeed_t.get_shape())))
ValueError: Cannot feed value of shape (1000, 39) for Tensor 'Placeholder:0', which has shape '(1000, 89102, 39)'
Any idea what might be causing the problem?
As indicated here, The dynamic_rnn function takes the batch inputs of shape
[batch_size, truncated_backprop_length, input_size]
In the link that you provided, the shape of the placeholder was
data = tf.placeholder(tf.float32, [None, 20,1])
This means that they chose truncated_backprop_length=20 and input_size=1.
Their data was the following 3D array:
[
array([[0],[0],[1],[0],[0],[1],[0],[1],[1],[0],[0],[0],[1],[1],[1],[1],[1],[1],[0],[0]]),
array([[1],[1],[0],[0],[0],[0],[1],[1],[1],[1],[1],[0],[0],[1],[0],[0],[0],[1],[0],[1]]),
.....
]
Based on your code, it seems that train_input is a 2D array and not a 3D array. Hence, you need to transform it into a 3D array. In order to do that, you need to decide which parameters you want to use for truncated_backprop_length and input_size. Afterwards, you need to define
data appropriately.
For example, if you want truncated_backprop_length and input_size to be 39 and 1 respectively, you can do
import numpy as np
train_input=np.reshape(train_input,(len(train_input),39,1))
data = tf.placeholder(tf.float32, [None, 39,1])
I changed your code according to the above discussion and run it on some random data that I produced. It runs without throwing an error. See the code below:
import tensorflow as tf
import numpy as np
num_hidden=5
train_input=np.random.rand(89102,39)
train_input=np.reshape(train_input,(len(train_input),39,1))
train_output=np.random.rand(89102,3)
data = tf.placeholder(tf.float32, [None, 39, 1])
target = tf.placeholder(tf.float32, [None, 3])
cell = tf.nn.rnn_cell.LSTMCell(num_hidden)
val, state = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32)
val = tf.transpose(val, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1)
weight = tf.Variable(tf.truncated_normal([num_hidden, int(target.get_shape()[1])]))
bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))
prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
cross_entropy = -tf.reduce_sum(target * tf.log(tf.clip_by_value(prediction, 1e-10, 1.0)))
optimizer = tf.train.AdamOptimizer()
minimize = optimizer.minimize(cross_entropy)
mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1))
error = tf.reduce_mean(tf.cast(mistakes, tf.float32))
init_op = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init_op)
batch_size = 1000
no_of_batches = int(len(train_input) / batch_size)
epoch = 5000
for i in range(epoch):
ptr = 0
for j in range(no_of_batches):
inp, out = train_input[ptr:ptr + batch_size], train_output[ptr:ptr + batch_size]
ptr += batch_size
sess.run(minimize, {data: inp, target: out})
print( "Epoch - ", str(i))

Categories

Resources