I've been struggling with this pet problem for a while now, so any help would be appreciated!
I have a csv file, with a few random columns, and a final column that's based on the sum of the last few values from the first column. I'm trying to use an LSTM model to capture this structure, i.e. to predict the last column given the first few.
Here's the model I've been using:
# Generate test data
train_input = train_input.reshape(m, n_input, 1) # is nr of rows, n_input is number of input columns
NUM_EXAMPLES = int(m * training_size)
test_input = train_input[NUM_EXAMPLES:]
test_output = train_output[NUM_EXAMPLES:]
train_input = train_input[:NUM_EXAMPLES]
train_output = train_output[:NUM_EXAMPLES]
#
# # Design model
#
data = tf.placeholder(tf.float32, [None, n_input, 1])
target = tf.placeholder(tf.float32, [None, n_classes])
num_hidden = 24
cell = tf.contrib.rnn.LSTMCell(num_hidden, state_is_tuple=True)
val, state = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32)
val = tf.transpose(val, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1)
weight = tf.Variable(tf.truncated_normal([num_hidden, int(target.get_shape()[1])]))
bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))
prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
cross_entropy = -tf.reduce_sum(target * tf.log(tf.clip_by_value(prediction,1e-10,1.0)))
optimizer = tf.train.AdamOptimizer()
minimize = optimizer.minimize(cross_entropy)
mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1))
error = tf.reduce_mean(tf.cast(mistakes, tf.float32))
init_op = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init_op)
no_of_batches = int(len(train_input)/batch_size)
for i in range(epoch):
ptr = 0
for j in range(no_of_batches):
inp, out = train_input[ptr:ptr+batch_size], train_output[ptr:ptr+batch_size]
ptr+=batch_size
sess.run(minimize,{data: inp, target: out})
print("Epoch - {}".format(i))
incorrect = sess.run(error,{data: test_input, target: test_output})
print('Epoch {:2d} error {:3.1f}%'.format(i + 1, 100 * incorrect))
sess.close()
I've tried several spreadsheets with random numbers, and I'm consistently getting around 83% error rate. On the other hand, this algorithm CAN learn if the target column is not sequential.
Thanks in advance!
I can't clearly get your point, do you mean you have a csv file like this?
x1 x2 x3 x4 ... xn
v11 v21 v31 v41 ... vn1
v12 v22 v32 v42 ... vn2
...
v1n v2n v3n v4n ... vnn
y1 y2 y3 y4 ... yn
And yn based on sum(vn1+...+vnn)? Like a * sum(V) + b?
Related
I had a curious experience with Keras.
Info: input dataset shapes
16 features, 5000 observations
target variable: 1 dimension
Problem: Regression
While writing code for students I developed a toy network using tf using the following code (I know is not a complete example but I hope it will give you enough information)
n1 = 15 # Number of neurons in layer 1
n2 = 15 # Number of neurons in layer 2
n3 = 15
nx = number_of_x_points
n_dim = nx
n4 = 1
stddev_f = 0.1
tf.set_random_seed(5)
X = tf.placeholder(tf.float32, [n_dim, None])
Y = tf.placeholder(tf.float32, [10, None])
W1 = tf.Variable(tf.random_normal([n1, n_dim], stddev=stddev_f))
b1 = tf.Variable(tf.constant(0.0, shape = [n1,1]) )
W2 = tf.Variable(tf.random_normal([n2, n1], stddev=stddev_f))
b2 = tf.Variable(tf.constant(0.0, shape = [n2,1]))
W3 = tf.Variable(tf.random_normal([n3,n2], stddev = stddev_f))
b3 = tf.Variable(tf.constant(0.0, shape = [n3,1]))
W4 = tf.Variable(tf.random_normal([n4,n3], stddev = stddev_f))
b4 = tf.Variable(tf.constant(0.0, shape = [n4,1]))
X = tf.placeholder(tf.float32, [nx, None]) # Inputs
Y = tf.placeholder(tf.float32, [1, None]) # Labels
Z1 = tf.nn.sigmoid(tf.matmul(W1, X) + b1) # n1 x n_dim * n_dim x n_obs = n1 x n_obs
Z2 = tf.nn.sigmoid(tf.matmul(W2, Z1) + b2) # n2 x n1 * n1 * n_obs = n2 x n_obs
Z3 = tf.nn.sigmoid(tf.matmul(W3, Z2) + b3)
Z4 = tf.matmul(W4, Z3) + b4
y_ = tf.sigmoid(Z4)
cost = tf.reduce_mean(tf.square(y_-Y))
learning_rate = 0.005
training_step = tf.train.AdamOptimizer(learning_rate).minimize(cost)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
training_epochs = 1000
cost_history = np.empty(shape=[1], dtype = float)
cost_meas_history = np.empty(shape=[1], dtype = float)
train_x = np.transpose(data)
train_y = np.transpose(targets)
cost_history = []
for epoch in range(training_epochs+1):
for i in range(0, train_x.shape[0], batch_size):
x_batch = train_x[i:i + batch_size,:]
y_batch = train_y[i:i + batch_size,:]
sess.run(training_step, feed_dict = {X: x_batch, Y: y_batch})
cost_ = sess.run(cost, feed_dict={ X:train_x, Y: train_y})
cost_history = np.append(cost_history, cost_)
if (epoch % 5000 == 0):
print("Reached epoch",epoch,"cost J =", cost_)
this code is working quite well and it takes on my laptop for 1000 epochs 5 sec. Now I developed the same network with keras with the code
model = tf.keras.Sequential()
model.add(layers.Dense(15, input_dim=16, activation='sigmoid'))
model.add(layers.Dense(15, activation='sigmoid'))
model.add(layers.Dense(15, activation='sigmoid'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(optimizer=tf.train.AdamOptimizer(0.005),
loss='mse',
metrics=['mae'])
# Training Phase
model.fit(train_x.transpose(), train_y.transpose()/100.0, epochs=1000, batch_size=100,verbose = 0)
This code takes 43 sec. Has anyone any idea what this is the case? Now I expected Keras to be slower but not that much slower. What am I missing?
Thanks, Umberto
Ok I found the reason... It was my mistake. Due to a series of mistakes, due to programming at night after midnight (...), I realized I was comparing batch GD and mini-batch GD. My apologies to everyone and thanks to today that noticed my mistake...
If someone thinks this should be deleted is fine with me.
Now Keras and plain TF are taking exactly the same time. Thanks everyone for reading.
Best, Umberto
I try to build a model that will identify by data and try to see the LOSS function
loss =tf.reduce_mean(-(y_ * tf.log(y)+(1- y_)* tf.log (1-y)))
But as of now I only get NAN on the prediction and printing NAN in the LOSS function
np_labels = np.array(labels)
np_labels = np_labels.reshape([np_labels.shape[0], 1])
features = 910
hidden_layer_nodes = 100
x = tf.placeholder(tf.float32, [None, features])
y_ = tf.placeholder(tf.float32, [None, 1])
W1 = tf.Variable(tf.truncated_normal([features,hidden_layer_nodes], stddev=0.1))
b1 = tf.Variable(tf.constant(0.1, shape=[hidden_layer_nodes]))
z1 = tf.add(tf.matmul(x,W1),b1)
a1 = tf.nn.relu(z1)
W2 = tf.Variable(tf.truncated_normal([hidden_layer_nodes,1], stddev=0.1))
b2 = tf.Variable(0.)
z2 = tf.matmul(a1,W2) + b2
y = 1 / (1.0 + tf.exp(-z2))
loss =tf.reduce_mean(-(y_ * tf.log(y)+(1- y_)* tf.log (1-y)))
update = tf.train.AdamOptimizer(0.01).minimize(loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(0,50):
sess.run(update, feed_dict = {x:fvecs, y_:np_labels})
print(sess.run(loss, feed_dict={x: fvecs, y_: np_labels}))
# sess.run(update, feed_dict = {x:data_x, y_:data_y})
# print(sess.run(loss, feed_dict={x: data_x, y_: data_y}))
print('prediction: ', y.eval(session=sess, feed_dict = {x:[[493.9, 702.6, .....
i want to print the loss
Thanks
This is not a TensorFlow-Issue. This results from the very bad idea of implementing the loss-function yourself.
import tensorflow as tf
z2 = tf.random_normal([8, 10]) * 20
y_ = tf.random_uniform([8, 1], minval=0, maxval=10, dtype=tf.float32)
y = 1 / (1.0 + tf.exp(-z2))
loss = tf.reduce_mean(-(y_ * tf.log(y)+(1- y_)* tf.log (1-y)))
with tf.Session() as sess:
print sess.run(loss) # will always fail with high prob
Will give Inf just because of missing the log-sum-exp trick which then causes your implementation to fail due to numerical instabilities (a folklore example which produces overflows). Just run this code several times and you get either NaN or Inf.
Solution would be:
replace y = tf.sigmoid(-z2) by y = tf.identity(z2) to just get the untransformed logits
replace loss = .. by loss = tf.nn.sigmoid_cross_entropy_with_logits(...) to use the numerical stable way
See the docs of sigmoid_cross_entropy_with_logits which explicitly describes this issue.
I am trying to build a deep network using TF after using Martin Gorner's video as a reference. I has some success with the shallow network example; however the deep network's accuracy is collapsing after reaching around 98% accuracy for some reason.
The network implemented is used to recognise MNIST numerical characters using a five layer network. I am training with batches of 100 for 10000 iterations. The accuracy steadily increases until it reaches around 98%, then collapses completely to 9.8%.
Any ideas please?
"""Tensor flow character recognition of Numerals"""
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
# layer K will have 200 neuron and so on
K = 200
L = 100
M = 60
N = 30
# ----- Initialization -----
# the None will become the batch size of 100
# 28 by 28 grayscale images described by a single byte
X = tf.placeholder(tf.float32, [None, 784])
# training will require computing variables W and b
W1 = tf.Variable(tf.truncated_normal([28*28, K], stddev=0.1))
B1 = tf.Variable(tf.zeros([K]))
W2 = tf.Variable(tf.truncated_normal([K, L], stddev=0.1))
B2 = tf.Variable(tf.zeros([L]))
W3 = tf.Variable(tf.truncated_normal([L, M], stddev=0.1))
B3 = tf.Variable(tf.zeros([M]))
W4 = tf.Variable(tf.truncated_normal([M, N], stddev=0.1))
B4 = tf.Variable(tf.zeros([N]))
W5 = tf.Variable(tf.truncated_normal([N, 10], stddev=0.1))
B5 = tf.Variable(tf.zeros([10]))
init = tf.global_variables_initializer()
# ----- Model -----
# the model Y = WX+b
# reshape is used to flatted the image into a 1D array of 784 locations
# -1 is used to tell python to figure the reshape as there's only on solution
#Y = tf.nn.softmax(tf.matmul(tf.reshape(X, [-1, 784]), W) + b)
Y1 = tf.nn.relu(tf.matmul(X, W1) + B1)
Y2 = tf.nn.relu(tf.matmul(Y1, W2) + B2)
Y3 = tf.nn.relu(tf.matmul(Y2, W3) + B3)
Y4 = tf.nn.relu(tf.matmul(Y3, W4) + B4)
Y5 = tf.nn.softmax(tf.matmul(Y4, W5) + B5)
# placeholder for correct answers
# e.g. correct answer for 2 will be [0 0 1 0 0 0 0 0 0 0 ]
Y_ = tf.placeholder(tf.float32, [None, 10])
# the loss function
cross_entropy = tf.reduce_sum(Y_ * tf.log(Y5)) * -1
# ----- Success Metrics -----
# calculate the % of correct answers found in batch
is_correct = tf.equal(tf.argmax(Y5, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
# ----- Training Step -----
# pick an optimizer and tell it to minimize the cross entropy loss function
optimizer = tf.train.GradientDescentOptimizer(0.003)
train_step = optimizer.minimize(cross_entropy)
# create the execution session
sess = tf.Session()
sess.run(init)
for i in range(10000):
# load a batch of images from mnist
batch_X, batch_Y = mnist.train.next_batch(100)
train_data = {X: batch_X, Y_: batch_Y}
# ----- Execution -----
# train
sess.run(train_step, feed_dict=train_data)
# test for success
a, c = sess.run([accuracy, cross_entropy], feed_dict=train_data)
# this is only to display information
if i%100 == 0:
# check for success on whole data set
test_data = {X: mnist.test.images, Y_:mnist.test.labels}
a, c = sess.run([accuracy, cross_entropy], feed_dict=test_data)
print(a)
It is the accuracy on the validation set which collapses. right ?
so, you may be dramatically overfitting.
98% is possibly the best you can achieve with a network with such a capacity/structure.
I'm trying to build LSTM RNN based on this guide:
http://monik.in/a-noobs-guide-to-implementing-rnn-lstm-using-tensorflow/
My input is ndarray with the size of 89102*39 (89102 rows, 39 features). There are 3 labels for the data - 0,1,2
It seems like I'm having a problem with the placeholders definition but I'm not sure what it is.
My code is:
data = tf.placeholder(tf.float32, [None, 1000, 39])
target = tf.placeholder(tf.float32, [None, 3])
cell = tf.nn.rnn_cell.LSTMCell(self.num_hidden)
val, state = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32)
val = tf.transpose(val, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1)
weight = tf.Variable(tf.truncated_normal([self.num_hidden, int(target.get_shape()[1])]))
bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))
prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
cross_entropy = -tf.reduce_sum(target * tf.log(tf.clip_by_value(prediction, 1e-10, 1.0)))
optimizer = tf.train.AdamOptimizer()
minimize = optimizer.minimize(cross_entropy)
mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1))
error = tf.reduce_mean(tf.cast(mistakes, tf.float32))
init_op = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init_op)
batch_size = 1000
no_of_batches = int(len(train_input) / batch_size)
epoch = 5000
for i in range(epoch):
ptr = 0
for j in range(no_of_batches):
inp, out = train_input[ptr:ptr + batch_size], train_output[ptr:ptr + batch_size]
ptr += batch_size
sess.run(minimize, {data: inp, target: out})
print( "Epoch - ", str(i))
And I'm getting to following error:
File , line 133, in execute_graph
sess.run(minimize, {data: inp, target: out})
File "/usr/local/lib/python3.5/dist-
packages/tensorflow/python/client/session.py", line 789, in run
run_metadata_ptr)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 975, in _run
% (np_val.shape, subfeed_t.name, str(subfeed_t.get_shape())))
ValueError: Cannot feed value of shape (1000, 39) for Tensor 'Placeholder:0', which has shape '(1000, 89102, 39)'
Any idea what might be causing the problem?
As indicated here, The dynamic_rnn function takes the batch inputs of shape
[batch_size, truncated_backprop_length, input_size]
In the link that you provided, the shape of the placeholder was
data = tf.placeholder(tf.float32, [None, 20,1])
This means that they chose truncated_backprop_length=20 and input_size=1.
Their data was the following 3D array:
[
array([[0],[0],[1],[0],[0],[1],[0],[1],[1],[0],[0],[0],[1],[1],[1],[1],[1],[1],[0],[0]]),
array([[1],[1],[0],[0],[0],[0],[1],[1],[1],[1],[1],[0],[0],[1],[0],[0],[0],[1],[0],[1]]),
.....
]
Based on your code, it seems that train_input is a 2D array and not a 3D array. Hence, you need to transform it into a 3D array. In order to do that, you need to decide which parameters you want to use for truncated_backprop_length and input_size. Afterwards, you need to define
data appropriately.
For example, if you want truncated_backprop_length and input_size to be 39 and 1 respectively, you can do
import numpy as np
train_input=np.reshape(train_input,(len(train_input),39,1))
data = tf.placeholder(tf.float32, [None, 39,1])
I changed your code according to the above discussion and run it on some random data that I produced. It runs without throwing an error. See the code below:
import tensorflow as tf
import numpy as np
num_hidden=5
train_input=np.random.rand(89102,39)
train_input=np.reshape(train_input,(len(train_input),39,1))
train_output=np.random.rand(89102,3)
data = tf.placeholder(tf.float32, [None, 39, 1])
target = tf.placeholder(tf.float32, [None, 3])
cell = tf.nn.rnn_cell.LSTMCell(num_hidden)
val, state = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32)
val = tf.transpose(val, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1)
weight = tf.Variable(tf.truncated_normal([num_hidden, int(target.get_shape()[1])]))
bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))
prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
cross_entropy = -tf.reduce_sum(target * tf.log(tf.clip_by_value(prediction, 1e-10, 1.0)))
optimizer = tf.train.AdamOptimizer()
minimize = optimizer.minimize(cross_entropy)
mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1))
error = tf.reduce_mean(tf.cast(mistakes, tf.float32))
init_op = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init_op)
batch_size = 1000
no_of_batches = int(len(train_input) / batch_size)
epoch = 5000
for i in range(epoch):
ptr = 0
for j in range(no_of_batches):
inp, out = train_input[ptr:ptr + batch_size], train_output[ptr:ptr + batch_size]
ptr += batch_size
sess.run(minimize, {data: inp, target: out})
print( "Epoch - ", str(i))
I Wrote a Neural Network in TensorFlow for the XOR input. I have used 1 hidden layer with 2 units and softmax classification. The input is of the form <1, x_1, x_2, zero, one> , where
1 is the bias
x_1 and x_2 are either between 0 and 1 for all the combination {00, 01, 10, 11}. Selected to be normally distributed around 0 or 1
zero: is 1 if the output is zero
one: is 1 if the output is one
The accuracy is always around 0.5. What has gone wrong? Is the architecture of the neural network wrong, or is there something with the code?
import tensorflow as tf
import numpy as np
from random import randint
DEBUG=True
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))
def model(X, weight_hidden, weight_output):
# [1,3] x [3,n_hiddent_units] = [1,n_hiddent_units]
hiddern_units_output = tf.nn.sigmoid(tf.matmul(X, weight_hidden))
# [1,n_hiddent_units] x [n_hiddent_units, 2] = [1,2]
return hiddern_units_output
#return tf.matmul(hiddern_units_output, weight_output)
def getHiddenLayerOutput(X, weight_hidden):
hiddern_units_output = tf.nn.sigmoid(tf.matmul(X, weight_hidden))
return hiddern_units_output
total_inputs = 100
zeros = tf.zeros([total_inputs,1])
ones = tf.ones([total_inputs,1])
around_zeros = tf.random_normal([total_inputs,1], mean=0, stddev=0.01)
around_ones = tf.random_normal([total_inputs,1], mean=1, stddev=0.01)
batch_size = 10
n_hiddent_units = 2
X = tf.placeholder("float", [None, 3])
Y = tf.placeholder("float", [None, 2])
weight_hidden = init_weights([3, n_hiddent_units])
weight_output = init_weights([n_hiddent_units, 2])
hiddern_units_output = getHiddenLayerOutput(X, weight_hidden)
py_x = model(X, weight_hidden, weight_output)
#cost = tf.square(Y - py_x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=py_x, labels=Y))
train_op = tf.train.GradientDescentOptimizer(0.05).minimize(cost)
with tf.Session() as sess:
tf.global_variables_initializer().run()
trX_0_0 = sess.run(tf.concat([ones, around_zeros, around_zeros, ones, zeros], axis=1))
trX_0_1 = sess.run(tf.concat([ones, around_zeros, around_ones, zeros, ones], axis=1))
trX_1_0 = sess.run(tf.concat([ones, around_ones, around_zeros, zeros, ones], axis=1))
trX_1_1 = sess.run(tf.concat([ones, around_ones, around_ones, ones, zeros], axis=1))
trX = sess.run(tf.concat([trX_0_0, trX_0_1, trX_1_0, trX_1_1], axis=0))
trX = sess.run(tf.random_shuffle(trX))
print(trX)
for i in range(10):
for start, end in zip(range(0, len(trX), batch_size), range(batch_size, len(trX) + 1, batch_size)):
trY = tf.identity(trX[start:end,3:5])
trY = sess.run(tf.reshape(trY,[batch_size, 2]))
sess.run(train_op, feed_dict={ X: trX[start:end,0:3], Y: trY })
start_index = randint(0, (total_inputs*4)-batch_size)
y_0 = sess.run(py_x, feed_dict={X: trX[start_index:start_index+batch_size,0:3]})
print("iteration :",i, " accuracy :", np.mean(np.absolute(trX[start_index:start_index+batch_size,3:5]-y_0)),"\n")
Check the comments section for the updated code
The problem was with the randomly assigned weights. Here is the modified version, obtained after a series of trail-and-error.