Tensorflow streaming metrics are not working - python

I am trying to implement the algorithm here: https://arxiv.org/pdf/1702.02098.pdf
For some reason I always get 0.2 recall and 0 accuracy. Am I using the streaming metrics correctly? The documentation only has a pseudo code. Here is my code:
import tensorflow as tf
import numpy as np
from nltk.corpus import brown
from gensim.models import Word2Vec
from sklearn.preprocessing import LabelBinarizer
from tensorflow.contrib.metrics import streaming_accuracy, streaming_recall
data = brown.tagged_sents()
tags = set()
for sent in data:
for token, tag in sent:
tags.add(tag)
label_processor = LabelBinarizer()
label_processor.fit(list(tags))
embedding_dim = 100
word2vec = Word2Vec(brown.sents(), size=embedding_dim, min_count=1)
embedding = word2vec.wv
del word2vec # Saves RAM
test = 0.1
val = 0.1
data_length = len(data)
inds = np.random.permutation(np.arange(data_length))
test_inds = inds[:int(data_length*test)]
val_inds = inds[int(data_length*test):int(data_length*(val+test))]
train_inds = inds[int(data_length*(val+test)):]
val_x = []
val_y = []
for i in val_inds:
x = []
tags = []
for token, tag in data[i]:
x.append(embedding[token])
tags.append(tag)
x = np.array(x)
x = x.reshape(x.shape[0], 1, x.shape[1], 1)
y = np.array(label_processor.transform(tags))
val_x.append(x)
val_y.append(y)
val_x = np.concatenate(val_x, axis=0)
val_y = np.concatenate(val_y, axis=0)
test_x = []
test_y = []
for i in test_inds:
x = []
tags = []
for token, tag in data[i]:
x.append(embedding[token])
tags.append(tag)
x = np.array(x)
x = x.reshape(x.shape[0], 1, x.shape[1], 1)
y = np.array(label_processor.transform(tags))
test_x.append(x)
test_y.append(y)
test_x = np.concatenate(test_x, axis=0)
test_y = np.concatenate(test_y, axis=0)
learning_rate = 0.001
n_iter = 12000
display_step = 100
depth = 5
label_processor
n_classes = label_processor.classes_.shape[0]
dropout_prob = 0.50
x = tf.placeholder(tf.float32, [None, 1, embedding_dim, 1])
y = tf.placeholder(tf.float32, [None, n_classes])
dropout = tf.placeholder(tf.float32, [])
depth_tensor = tf.constant(depth, tf.float32)
def data_processor(data, train_inds, word2vec, label_processor, n_iter):
inds = np.random.randint(len(train_inds), size=(n_iter))
for i in inds:
x = []
tags = []
for token, tag in data[train_inds[i]]:
x.append(word2vec[token])
tags.append(tag)
x = np.array(x)
x = x.reshape(x.shape[0], 1, x.shape[1], 1)
y = np.array(label_processor.transform(tags))
yield x, y
def model(x, y, weights, biases, dropout, depth_tensor):
net = tf.nn.dropout(x, dropout)
net = tf.nn.conv2d(net, weights['first'], strides=[1, 1, 1, 1], padding='SAME')
net = tf.nn.bias_add(net, biases['first'])
net = tf.nn.relu(net)
net_flat = tf.reshape(net, [-1, weights['out'].get_shape().as_list()[0]])
preds = tf.add(tf.matmul(net_flat, weights['out']), biases['out'])
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=preds, labels=y))
for i in range(1, depth):
net = tf.nn.dropout(net, dropout)
net = tf.nn.atrous_conv2d(net, weights['iterated'], rate=2**i, padding='SAME')
net = tf.nn.bias_add(net, biases['iterated'])
net = tf.nn.relu(net)
net_flat = tf.reshape(net, [-1, weights['out'].get_shape().as_list()[0]])
preds = tf.add(tf.matmul(net_flat, weights['out']), biases['out'])
cost += tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=preds, labels=y))
return preds, tf.divide(cost, depth_tensor)
weights = {'first': tf.Variable(tf.random_normal([1, 3, 1, 10])),
'iterated': tf.Variable(tf.random_normal([1, 3, 10, 10])),
'out': tf.Variable(tf.random_normal([embedding_dim*10, n_classes]))}
biases = {'first': tf.Variable(tf.random_normal([10])),
'iterated': tf.Variable(tf.random_normal([10])),
'out': tf.Variable(tf.random_normal([n_classes]))}
preds, cost = model(x, y, weights, biases, dropout, depth_tensor)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
accuracy, update_accuracy = streaming_accuracy(y, preds)
recall, update_recall = streaming_recall(y, preds)
init = tf.global_variables_initializer()
init2 = tf.local_variables_initializer()
with tf.Session() as sess:
sess.run(init)
i = 1
for batch_x, batch_y in data_processor(data, train_inds, embedding, label_processor, n_iter):
sess.run(optimizer,
feed_dict={x: batch_x, y: batch_y,
dropout: dropout_prob})
if i % display_step == 0:
loss = sess.run(cost,
feed_dict={x: batch_x, y: batch_y, dropout: dropout_prob})
print("Iter:{}, Minibatch Loss:{:.6f}".format(i,loss))
i += 1
sess.run(init2)
for batch_x, batch_y in data_processor(data, val_inds, embedding, label_processor, n_iter):
recall, accuracy = sess.run([update_recall, update_accuracy],
feed_dict={x:batch_x, y: batch_y, dropout: 1})
f1 = 2 * recall * accuracy / (recall + accuracy)
print("Testing Accuracy:", accuracy,"Testing Recall:", recall, "Testing F1 Score:", f1)
And here is the part where I used the streaming metrics:
accuracy, update_accuracy = streaming_accuracy(y, preds)
recall, update_recall = streaming_recall(y, preds)
init = tf.global_variables_initializer()
init2 = tf.local_variables_initializer()
with tf.Session() as sess:
sess.run(init)
i = 1
for batch_x, batch_y in data_processor(data, train_inds, embedding, label_processor, n_iter):
sess.run(optimizer,
feed_dict={x: batch_x, y: batch_y,
dropout: dropout_prob})
if i % display_step == 0:
loss = sess.run(cost,
feed_dict={x: batch_x, y: batch_y, dropout: dropout_prob})
print("Iter:{}, Minibatch Loss:{:.6f}".format(i,loss))
i += 1
sess.run(init2)
for batch_x, batch_y in data_processor(data, val_inds, embedding, label_processor, n_iter):
recall, accuracy = sess.run([update_recall, update_accuracy],
feed_dict={x:batch_x, y: batch_y, dropout: 1})
f1 = 2 * recall * accuracy / (recall + accuracy)

While, I believe it's because you have only run update op but fetch op.
The codes below:
for batch_x, batch_y in data_processor(data, val_inds, embedding, label_processor, n_iter):
recall, accuracy = sess.run([update_recall, update_accuracy],
feed_dict={x:batch_x, y: batch_y, dropout: 1})
f1 = 2 * recall * accuracy / (recall + accuracy)
Should be:
for batch_x, batch_y in data_processor(data, val_inds, embedding, label_processor, n_iter):
_, recall_value, _, accuracy_value = sess.run([update_recall, recall, update_accuracy, accuracy],
feed_dict={x:batch_x, y: batch_y, dropout: 1})
f1 = 2 * recall_value * accuracy_value / (recall_value + accuracy_value)
The tensor accuracy and recall holds the actual value and the update* are just used to update the internal counter variables to calculate the final metric value.
By the way: I've not tested the codes. Please let me know if it doesn't work as you expected.

Related

ValueError: Cannot feed value of shape (1, 10712) for Tensor 'Placeholder:0', which has shape '(?, 784)'

it is actually my first time using TensorFlow for image recognition and while testing this code here I got this error and could not fix the problem for like 2 days now , however i tyred to rechape the X and Y but same thing , my goal is to train a model to automatically read correctly a printed image of a number
please help and thank you in advance
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
from PIL import Image
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) # y labels are oh-encoded
n_train = mnist.train.num_examples # 55,000
n_validation = mnist.validation.num_examples # 5000
n_test = mnist.test.num_examples # 10,000
n_input = 784 # input layer (28x28 pixels)
n_hidden1 = 512 # 1st hidden layer
n_hidden2 = 256 # 2nd hidden layer
n_hidden3 = 128 # 3rd hidden layer
n_output = 10 # output layer (0-9 digits)
learning_rate = 1e-4
n_iterations = 1000
batch_size = 128
dropout = 0.5
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_output])
keep_prob = tf.placeholder(tf.float32)
weights = {
'w1': tf.Variable(tf.truncated_normal([n_input, n_hidden1], stddev=0.1)),
'w2': tf.Variable(tf.truncated_normal([n_hidden1, n_hidden2], stddev=0.1)),
'w3': tf.Variable(tf.truncated_normal([n_hidden2, n_hidden3], stddev=0.1)),
'out': tf.Variable(tf.truncated_normal([n_hidden3, n_output], stddev=0.1)),
}
biases = {
'b1': tf.Variable(tf.constant(0.1, shape=[n_hidden1])),
'b2': tf.Variable(tf.constant(0.1, shape=[n_hidden2])),
'b3': tf.Variable(tf.constant(0.1, shape=[n_hidden3])),
'out': tf.Variable(tf.constant(0.1, shape=[n_output]))
}
layer_1 = tf.add(tf.matmul(X, weights['w1']), biases['b1'])
layer_2 = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
layer_3 = tf.add(tf.matmul(layer_2, weights['w3']), biases['b3'])
layer_drop = tf.nn.dropout(layer_3, keep_prob)
output_layer = tf.matmul(layer_3, weights['out']) + biases['out']
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(
labels=Y, logits=output_layer
))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_pred = tf.equal(tf.argmax(output_layer, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
# train on mini batches
for i in range(n_iterations):
batch_x, batch_y = mnist.train.next_batch(batch_size)
sess.run(train_step, feed_dict={
X: batch_x, Y: batch_y, keep_prob: dropout
})
# print loss and accuracy (per minibatch)
if i % 100 == 0:
minibatch_loss, minibatch_accuracy = sess.run(
[cross_entropy, accuracy],
feed_dict={X: batch_x, Y: batch_y, keep_prob: 1.0}
)
print(
"Iteration",
str(i),
"\t| Loss =",
str(minibatch_loss),
"\t| Accuracy =",
str(minibatch_accuracy)
)
test_accuracy = sess.run(accuracy, feed_dict={X: mnist.test.images, Y: mnist.test.labels, keep_prob: 1.0})
print("\nAccuracy on test set:", test_accuracy)
img = np.invert(Image.open("C:\\Users\\ADEM\\Desktop\\msi_youssef\\PFE\\test\\numbers\\rafik1.png").convert('L')).ravel()
prediction = sess.run(tf.argmax(output_layer, 1), feed_dict={X: [img]})
print ("Prediction for test image:", np.squeeze(prediction))```

Compute validation loss in parallel with training loss

I am trying to compute the validation loss on each iteration after the weights are computed (on the train set). How can I use the resulted weight tensor to predict the values on the validation set?
I tried using two arrays to store the loss values at each step of the session.
X_tr, X_val, y_tr, y_val = train_test_split(train_set, y_train, test_size=0.2, random_state=42)
x = tf.placeholder(tf.float32, X_tr.shape, name = 'data')
y = tf.placeholder(tf.float32, y_tr.shape, name = 'labels')
W = tf.Variable(tf.zeros([len(train_set.columns),1]), dtype = tf.float32, name = 'weights')
b = tf.Variable(0, dtype = tf.float32, name = 'bias')
y_pred = tf.matmul(x, W) + b
loss = tf.reduce_mean(tf.square(y - y_pred), name = 'loss')
optimizer = tf.train.MomentumOptimizer(learning_rate = 0.0006, momentum = 0.90)
train_op = optimizer.minimize(loss)
losses_t, losses_v = [], []
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(1000):
_, loss_t = sess.run([train_op, loss], feed_dict = {x: X_tr, y: y_tr})
losses_t.append(loss_t)
loss_v = sess.run(loss, feed_dict = {x: X_val, y: y_val})
losses_v.append(loss_v)
if(i % 20 == 0):
print('Training loss is: ', loss_t)
print('Validation loss is: ', loss_v)
W_value, b_value = sess.run([W, b])
The error:
ValueError: Cannot feed value of shape (292, 220) for Tensor 'data_6:0', which has shape '(1166, 220)'
The problem was that I was giving the same parameters to both sess.run functions. I created instead new placeholders (to fit the shape of the input X_val and y_val) and a loss function for the validation loss specific to its test set (X_val, y_val).
losses_t = []
losses_v = []
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(3000):
_, loss_t = sess.run([train_op, loss], feed_dict = {x: X_tr, y: y_tr})
losses_t.append(loss_t)
loss_v = sess.run(lossv, feed_dict = {xv: X_val, yv: y_val})
losses_v.append(loss_v)
if(i % 20 == 0):
print('Training loss is: ', loss_t)
print('Validation loss is: ', loss_v)
W_value, b_value = sess.run([W, b])

tf.nn.in_top_k(logits,y,1) out of range error but equal actually

I'm doing my first Neural Network with a binary classification, but I got an error when I try to evaluate the model with:
correct = tf.nn.in_top_k(logits,y,1)
where
logits tensor is : predictions : shape [batch_size = 52, num_classes = 1], type float32
y tensor is : targets: shape [batch_size=52], type int32
I got this error :
targets[1] is out of range
[[{{node in_top_k/InTopKV2}}]]
After some debugging time , I understood that the values of my tensor y must be <= to num_classes, so the first value of the tensor y equal to 1 is considered as out of range, even tough the parameter num_classes = 1.
How can I allow my tensor values to be equal to num_classes and only strictly inferior ? Or is there another way ?
In my opinion, num_classes should equal 1 because it's a binary classification so 1 neuron output is needed.
EDIT
Here's my full code :
import tensorflow as tf
n_inputs = 28
n_hidden1 = 15
n_hidden2 = 5
n_outputs = 1
reset_graph()
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y") #None => any
def neuron_layer(X, n_neurons, name, activation=None):
with tf.name_scope(name):
n_inputs = int(X.shape[1])
stddev = 2 / np.sqrt(n_inputs)
init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev) #matrice n_inputs x n_neurons values proche de 0
W = tf.Variable(init,name="kernel") #weights random
b = tf.Variable(tf.zeros([n_neurons]), name="bias")
Z = tf.matmul(X, W) + b
tf.cast(Z,tf.int32)
if activation is not None:
return activation(Z)
else:
return Z
def to_one_hot(y):
n_classes = y.max() + 1
m = len(y)
Y_one_hot = np.zeros((m, n_classes))
Y_one_hot[np.arange(m), y] = 1
return Y_one_hot
hidden1 = neuron_layer(X, n_hidden1, name="hidden1",
activation=tf.nn.relu)
hidden2 = neuron_layer(hidden1, n_hidden2, name="hidden2",
activation=tf.nn.relu)
logits = neuron_layer(hidden2, n_outputs, name="outputs")
xentropy = tf.keras.backend.binary_crossentropy(tf.to_float(y),logits)
loss = tf.reduce_mean(xentropy)
learning_rate = 0.01
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
correct = tf.nn.in_top_k(logits,y,1)
labels_max = tf.reduce_max(y)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 40
batch_size = 50
def shuffle_batch(X, y, batch_size): #Homogeneisation et decoupage en paquets(n_batches)
rnd_idx = np.random.permutation(len(X))
n_batches = len(X) // batch_size
for batch_idx in np.array_split(rnd_idx, n_batches):
X_batch, y_batch = X[batch_idx], y[batch_idx]
yield X_batch, y_batch
with tf.Session() as sess:
init.run()
X_temp,Y_temp = X_batch,y_batch
feed_dict={X: X_batch, y: y_batch}
print("feed",feed_dict)
print("\n y_batch :",y_batch,y_batch.dtype)
print("\n X_batch :",X_batch,X_batch.dtype,X_batch.shape)
for epoch in range(n_epochs):
for X_batch, y_batch in shuffle_batch(X_train, Y_train, batch_size):
y_batch=y_batch.astype(np.int32)
X_batch=X_batch.astype(np.float32)
sess.run(training_op,feed_dict={X: X_batch, y: y_batch})
#acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
#acc_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
#print(epoch, "Batch accuracy:", acc_batch, "Val accuracy:", acc_val)
save_path = saver.save(sess, "./my_model_final.ckpt")
#some tests
print("y eval :",y.eval(feed_dict={X:X_temp,y:Y_temp}).shape)
y_one_hot=to_one_hot(y.eval(feed_dict={X:X_temp,y:Y_temp}))
print("y_one_hot :",y_one_hot.shape)
print("logits eval : ",logits.eval(feed_dict={X:X_temp,y:Y_temp}))
#print(correct.eval(feed_dict={X:X_temp,y:Y_temp}))
print(labels_max.eval(feed_dict={X:X_temp,y:Y_temp}))
As per the documentation here, tf.nn.in_top_k(predictions, targets, k) has arguments:
predictions: A Tensor of type float32. A batch_size x classes tensor.
targets: A Tensor. Must be one of the following types: int32, int64. A batch_size vector of class ids.
k: An int. Number of top elements to look at for computing precision.
As you are performing binary classification, i.e., has two classes, so the shape of logits tensor in your case should be (52, 2) while the shape of y should be (52,). Here, logits is basically one-hot encoded tensor. This is the reason why your are getting above error.
Consider the below example:
Example 1:
res = tf.nn.in_top_k([[0,1], [1,0], [0,1], [1, 0], [0, 1]], [0, 1, 1, 1, 1], 1)
Here, shape of logits is (5, 2) while y is (5,). If you will do tf.reduce_max(y), you will get 1, which is less than number of classes and hence okay.
This will work fine and output [False False True False True]
Example 2:
res = tf.nn.in_top_k([[0,1], [1,0], [0,1], [1, 0], [0, 1]], [0, 2, 1, 1, 1], 1)
If you will do tf.reduce_max(y), you will get 2, which is equal to the number of classes.
This will raises an error: InvalidArgumentError: targets[1] is out of range
EDIT: In your above code, make following modifications:
change n_outputs = 1 to n_outputs = 2
change sess.run(training_op,feed_dict={X: X_batch, y: y_batch}) to _, cost, acc = sess.run([training_op, loss, accuracy], feed_dict={X: X_batch, y: to_one_hot(y_batch)})
change correct = tf.nn.in_top_k(logits, y, 1) to correct = tf.nn.in_top_k(logits, tf.argmax(y, 1), 1)
Code(random data used):
n_inputs = 28
n_hidden1 = 15
n_hidden2 = 5
n_outputs = 2
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None, 2), name="y") #None => any
def neuron_layer(X, n_neurons, name, activation=None):
with tf.name_scope(name):
n_inputs = int(X.shape[1])
stddev = 2 / np.sqrt(n_inputs)
init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev) #matrice n_inputs x n_neurons values proche de 0
W = tf.Variable(init,name="kernel") #weights random
b = tf.Variable(tf.zeros([n_neurons]), name="bias")
Z = tf.matmul(X, W) + b
tf.cast(Z,tf.int32)
if activation is not None:
return activation(Z)
else:
return Z
def to_one_hot(y):
n_classes = y.max() + 1
m = len(y)
Y_one_hot = np.zeros((m, n_classes))
Y_one_hot[np.arange(m), y] = 1
return Y_one_hot
hidden1 = neuron_layer(X, n_hidden1, name="hidden1",
activation=tf.nn.relu)
hidden2 = neuron_layer(hidden1, n_hidden2, name="hidden2",
activation=tf.nn.relu)
logits = neuron_layer(hidden2, n_outputs, name="outputs")
xentropy = tf.keras.backend.binary_crossentropy(tf.to_float(y),logits)
loss = tf.reduce_mean(xentropy)
learning_rate = 0.01
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
correct = tf.nn.in_top_k(logits,tf.argmax(y, 1),1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 1
X_train = np.random.rand(100, 28)
X_train = X_train.astype(np.float32)
Y_train = np.random.randint(low = 0, high = 2, size = 100, dtype=np.int32)
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
_, cost, corr, acc = sess.run([training_op, loss, correct, accuracy], feed_dict={X: X_train, y: to_one_hot(Y_train)})
print(corr)
print('Loss: {} Accuracy: {}'.format(cost, acc))

tensorflow GradientDescentOptimizer not updating variables?

I'm new to machine learning. I started with the simplest example of classification mnist handwritten images with softmax and gradient descent. By referencing some other examples, I came up with my own Logistic regression below:
import tensorflow as tf
import numpy as np
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = np.float32(x_train / 255.0)
x_test = np.float32(x_test / 255.0)
X = tf.placeholder(tf.float32, [None, 28, 28])
Y = tf.placeholder(tf.uint8, [100])
XX = tf.reshape(X, [-1, 784])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
def err(x, y):
predictions = tf.matmul(x, W) + b
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf.reshape(y, [-1, 1]), logits=predictions))
# value = tf.reduce_mean(y * tf.log(predictions))
# loss = -tf.reduce_mean(tf.one_hot(y, 10) * tf.log(predictions)) * 100.
return loss
# cost = err(np.reshape(x_train[:100], (-1, 784)), y_train[:100])
cost = err(tf.reshape(X, (-1, 784)), Y)
optimizer = tf.train.GradientDescentOptimizer(0.005).minimize(cost)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
# temp = sess.run(tf.matmul(XX, W) + b, feed_dict={X: x_train[:100]})
temp = sess.run(cost, feed_dict={X: x_train[:100], Y: y_train[:100]})
print(temp)
# print(temp.dtype)
# print(type(temp))
for i in range(100):
sess.run(optimizer, feed_dict={X: x_train[i * 100: 100 * (i + 1)], Y: y_train[i * 100: 100 * (i + 1)]})
# sess.run(optimizer, feed_dict={X: x_train[: 100], Y: y_train[:100]})
temp = sess.run(cost, feed_dict={X: x_train[:100], Y: y_train[:100]})
print(temp)
sess.close()
I tried to run the optimizer some iterations, feeding data with train image data and labeles. In my understanding, during the optimizer run, the variables of 'W' and 'b' should be update so the model would produce different result before and after training. But with this code, the printed costs of the model before and after optimizer run were the same. What can be wrong to make this happen?
You are initializing the weights matrix W with zeros and as a result, all parameters receive the same gradient value at each weights update. For weights initialization use tf.truncated_normal(), tf.random_normal(), tf.contrib.layers.xavier_initializer() or something else, but not zeros.
This is a similar question.

Neural network for regression not learning in tensorflow

I've modified tensor flow example to fit on my data, given here: data
But my neural network is not learning at all, I tried to use different no. of hidden layers, learning rate and optimization functions, but it didn't help.My code is given below:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import learn
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn import datasets, linear_model
from sklearn import cross_validation
from sklearn import preprocessing
import numpy as np
filename_queue = tf.train.string_input_producer(["file0.csv"])
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
# Default values, in case of empty columns. Also specifies the type of the
# decoded result.
record_defaults = [[0], [0], [0], [0]]
col1, col2, col3, col4 = tf.decode_csv(
value, record_defaults=record_defaults)
features = tf.stack([col1, col2, col3])
with tf.Session() as sess:
# Start populating the filename queue.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
x = np.zeros(shape=(1813,3))
y = np.zeros(shape=(1813))
for i in range(1813):
# Retrieve a single instance:
x1, y1 = sess.run([features, col4])
x[i] = x1
y[i] = y1
coord.request_stop()
coord.join(threads)
#standard_scaler = preprocessing.StandardScaler()
#x = standard_scaler.fit_transform(x)
# Split in test and train data
X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(x, y, test_size=0.2)
total_len = X_train.shape[0]
# Parameters
learning_rate = 0.001
training_epochs = 500
batch_size = 5
display_step = 1
# Network Parameters
n_hidden_1 = 50
n_input = X_train.shape[1]
n_classes = 1
# tf Graph input
x = tf.placeholder("float", [None, 3])
y = tf.placeholder("float", [None])
# Create model
def multilayer_perceptron(x, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Output layer with linear activation
out_layer = tf.matmul(layer_1, weights['out']) + biases['out']
return out_layer
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes], 0, 0.1))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_classes], 0, 0.1))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases)
#reshape(pred, [-1])
tf.shape(pred)
tf.shape(y)
print("Prediction matrix:", pred)
print("Output matrix:", y)
# Define loss and optimizer
cost = tf.reduce_mean(tf.square(pred-y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Launch the graph
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(total_len/batch_size)
print(total_batch)
# Loop over all batches
for i in range(total_batch-1):
batch_x = X_train[i*batch_size:(i+1)*batch_size]
batch_y = Y_train[i*batch_size:(i+1)*batch_size]
# Run optimization op (backprop) and cost op (to get loss value)
_, c, p = sess.run([optimizer, cost, pred], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# sample prediction
label_value = batch_y
estimate = p
err = label_value-estimate
print ("num batch:", total_batch)
# Display logs per epoch step
if epoch % display_step == 0:
print ("Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.9f}".format(avg_cost))
print ("[*]----------------------------")
for i in xrange(5):
print ("label value:", label_value[i], \
"estimated value:", estimate[i])
print ("[*]============================")
print ("Optimization Finished!")
# Test model
correct_prediction = tf.equal(tf.argmax(pred), tf.argmax(y))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Accuracy:", accuracy.eval({x: X_test, y: Y_test}))
and result looks like that: (label value = expected result)
result

Categories

Resources