tensorflow GradientDescentOptimizer not updating variables? - python

I'm new to machine learning. I started with the simplest example of classification mnist handwritten images with softmax and gradient descent. By referencing some other examples, I came up with my own Logistic regression below:
import tensorflow as tf
import numpy as np
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = np.float32(x_train / 255.0)
x_test = np.float32(x_test / 255.0)
X = tf.placeholder(tf.float32, [None, 28, 28])
Y = tf.placeholder(tf.uint8, [100])
XX = tf.reshape(X, [-1, 784])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
def err(x, y):
predictions = tf.matmul(x, W) + b
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf.reshape(y, [-1, 1]), logits=predictions))
# value = tf.reduce_mean(y * tf.log(predictions))
# loss = -tf.reduce_mean(tf.one_hot(y, 10) * tf.log(predictions)) * 100.
return loss
# cost = err(np.reshape(x_train[:100], (-1, 784)), y_train[:100])
cost = err(tf.reshape(X, (-1, 784)), Y)
optimizer = tf.train.GradientDescentOptimizer(0.005).minimize(cost)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
# temp = sess.run(tf.matmul(XX, W) + b, feed_dict={X: x_train[:100]})
temp = sess.run(cost, feed_dict={X: x_train[:100], Y: y_train[:100]})
print(temp)
# print(temp.dtype)
# print(type(temp))
for i in range(100):
sess.run(optimizer, feed_dict={X: x_train[i * 100: 100 * (i + 1)], Y: y_train[i * 100: 100 * (i + 1)]})
# sess.run(optimizer, feed_dict={X: x_train[: 100], Y: y_train[:100]})
temp = sess.run(cost, feed_dict={X: x_train[:100], Y: y_train[:100]})
print(temp)
sess.close()
I tried to run the optimizer some iterations, feeding data with train image data and labeles. In my understanding, during the optimizer run, the variables of 'W' and 'b' should be update so the model would produce different result before and after training. But with this code, the printed costs of the model before and after optimizer run were the same. What can be wrong to make this happen?

You are initializing the weights matrix W with zeros and as a result, all parameters receive the same gradient value at each weights update. For weights initialization use tf.truncated_normal(), tf.random_normal(), tf.contrib.layers.xavier_initializer() or something else, but not zeros.
This is a similar question.

Related

Compute validation loss in parallel with training loss

I am trying to compute the validation loss on each iteration after the weights are computed (on the train set). How can I use the resulted weight tensor to predict the values on the validation set?
I tried using two arrays to store the loss values at each step of the session.
X_tr, X_val, y_tr, y_val = train_test_split(train_set, y_train, test_size=0.2, random_state=42)
x = tf.placeholder(tf.float32, X_tr.shape, name = 'data')
y = tf.placeholder(tf.float32, y_tr.shape, name = 'labels')
W = tf.Variable(tf.zeros([len(train_set.columns),1]), dtype = tf.float32, name = 'weights')
b = tf.Variable(0, dtype = tf.float32, name = 'bias')
y_pred = tf.matmul(x, W) + b
loss = tf.reduce_mean(tf.square(y - y_pred), name = 'loss')
optimizer = tf.train.MomentumOptimizer(learning_rate = 0.0006, momentum = 0.90)
train_op = optimizer.minimize(loss)
losses_t, losses_v = [], []
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(1000):
_, loss_t = sess.run([train_op, loss], feed_dict = {x: X_tr, y: y_tr})
losses_t.append(loss_t)
loss_v = sess.run(loss, feed_dict = {x: X_val, y: y_val})
losses_v.append(loss_v)
if(i % 20 == 0):
print('Training loss is: ', loss_t)
print('Validation loss is: ', loss_v)
W_value, b_value = sess.run([W, b])
The error:
ValueError: Cannot feed value of shape (292, 220) for Tensor 'data_6:0', which has shape '(1166, 220)'
The problem was that I was giving the same parameters to both sess.run functions. I created instead new placeholders (to fit the shape of the input X_val and y_val) and a loss function for the validation loss specific to its test set (X_val, y_val).
losses_t = []
losses_v = []
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(3000):
_, loss_t = sess.run([train_op, loss], feed_dict = {x: X_tr, y: y_tr})
losses_t.append(loss_t)
loss_v = sess.run(lossv, feed_dict = {xv: X_val, yv: y_val})
losses_v.append(loss_v)
if(i % 20 == 0):
print('Training loss is: ', loss_t)
print('Validation loss is: ', loss_v)
W_value, b_value = sess.run([W, b])

Neural network for regression not learning in tensorflow

I've modified tensor flow example to fit on my data, given here: data
But my neural network is not learning at all, I tried to use different no. of hidden layers, learning rate and optimization functions, but it didn't help.My code is given below:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import learn
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn import datasets, linear_model
from sklearn import cross_validation
from sklearn import preprocessing
import numpy as np
filename_queue = tf.train.string_input_producer(["file0.csv"])
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
# Default values, in case of empty columns. Also specifies the type of the
# decoded result.
record_defaults = [[0], [0], [0], [0]]
col1, col2, col3, col4 = tf.decode_csv(
value, record_defaults=record_defaults)
features = tf.stack([col1, col2, col3])
with tf.Session() as sess:
# Start populating the filename queue.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
x = np.zeros(shape=(1813,3))
y = np.zeros(shape=(1813))
for i in range(1813):
# Retrieve a single instance:
x1, y1 = sess.run([features, col4])
x[i] = x1
y[i] = y1
coord.request_stop()
coord.join(threads)
#standard_scaler = preprocessing.StandardScaler()
#x = standard_scaler.fit_transform(x)
# Split in test and train data
X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(x, y, test_size=0.2)
total_len = X_train.shape[0]
# Parameters
learning_rate = 0.001
training_epochs = 500
batch_size = 5
display_step = 1
# Network Parameters
n_hidden_1 = 50
n_input = X_train.shape[1]
n_classes = 1
# tf Graph input
x = tf.placeholder("float", [None, 3])
y = tf.placeholder("float", [None])
# Create model
def multilayer_perceptron(x, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Output layer with linear activation
out_layer = tf.matmul(layer_1, weights['out']) + biases['out']
return out_layer
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes], 0, 0.1))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_classes], 0, 0.1))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases)
#reshape(pred, [-1])
tf.shape(pred)
tf.shape(y)
print("Prediction matrix:", pred)
print("Output matrix:", y)
# Define loss and optimizer
cost = tf.reduce_mean(tf.square(pred-y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Launch the graph
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(total_len/batch_size)
print(total_batch)
# Loop over all batches
for i in range(total_batch-1):
batch_x = X_train[i*batch_size:(i+1)*batch_size]
batch_y = Y_train[i*batch_size:(i+1)*batch_size]
# Run optimization op (backprop) and cost op (to get loss value)
_, c, p = sess.run([optimizer, cost, pred], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# sample prediction
label_value = batch_y
estimate = p
err = label_value-estimate
print ("num batch:", total_batch)
# Display logs per epoch step
if epoch % display_step == 0:
print ("Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.9f}".format(avg_cost))
print ("[*]----------------------------")
for i in xrange(5):
print ("label value:", label_value[i], \
"estimated value:", estimate[i])
print ("[*]============================")
print ("Optimization Finished!")
# Test model
correct_prediction = tf.equal(tf.argmax(pred), tf.argmax(y))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Accuracy:", accuracy.eval({x: X_test, y: Y_test}))
and result looks like that: (label value = expected result)
result

Feeding timeseries data into Tensorflow for LSTM classifier training

I have a dataframe of shape (38307, 26) with timestamp as index:
I'm trying to implement a LSTM classifier but I'm struggling to feed it into the DataFlow
The final arrays I'm trying to feed are of shape '(X_train = (38307, 25), y_train = (38307, 2))'
I have added the code in case
# Parametres
learning_rate = 0.001
training_epochs = 100
batch_size = 128
display_step = 10
# Network Parameters
n_input = 25 # features= 25
n_steps = 28 # timesteps
n_hidden = 128 # hidden layer num of features
n_classes = 2 # Binary classification
# TF Graph input
x = tf.placeholder("float32", [None, n_steps, n_input])
y = tf.placeholder("float32", [None, n_classes])
# TF Weights
weights = {
'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
'out': tf.Variable(tf.random_normal([n_classes]))
}
pred = RNN(x, weights, biases)
# Initialize the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
for epoch in range(training_epochs):
avg_cost = 0
total_batch = int(len(X_train)/batch_size)
X_batches = np.array_split(X_train, total_batch)
Y_batches = np.array_split(y_train, total_batch)
#Loop over all batches
for i in range(total_batch):
batch_x, batch_y = X_batches[i], Y_batches[i]
# batch_y.shape = (batch_y.shape[0]), 1)
# Run optimization op (backprop) and cost op(to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
#Display logs per epoch step
if epoch % display_step == 0:
print(("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost)))
print('Optimization finished')
# Store session for analysis with TensorBoard
writer = tf.summary.FileWriter("/tmp/test", sess.graph)
#Test model
print("Accuracy:", accuracy.eval({x: X_test, y: y_test}))
global result
result = tf.argmax(pred, 1).eval({x: X_test, y: y_test})
EDIT the RNN function:
def RNN(x, weights, biases):
# Prepare data shape to match 'rnn' function requirements
# Current data input shape: (batch_size, n_steps, n_input)
# Required Shape: 'n_steps' tensors list of shape (batch size, n_input)
# Permuting batch_size and n_steps
x = tf.transpose(x, [1, 0, 2])
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, n_input])
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(0, n_steps, x)
# x = tf.split(x, n_steps, 0) # Syntax change this version
# LSTM tensorflow using rnn from tensorflow.contrib
lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
# Get LSTM cell output
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
# Linear activation, using rnn inner loop last output
return tf.matmul(outputs[-1], weights['out']) + biases['out']
Unfortunately, the most important part of your code, is hidden in the RNN function.
Some tips to help you out: I guess you are trying to build a dynamic RNN... (is that correct? ) In that case, a common mistake I see is that people confuse the time major and batch major setting of these RNNs. In other words, is you input data [batch,time,variables], or [time,batch,variables].
More about this can be found here: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.dynamic_rnn.md

TensorFlow: Cannot feed value of shape when using CSV file as input

I'm a newbie for TensorFlow. I have been trying to feed MNIST in CSV form. Each row contains (label,pixel i-j,...) which i = row, j = column. Then, I am trying to train model based on the tutorials in TensorFlow website.
However, I got the error
ValueError: Cannot feed value of shape (28, 28) for Tensor u'Placeholder_12:0', which has shape '(?, 784)'
Could you please suggest me what is wrong with my code?
import tensorflow as tf
import numpy as np
x_file = open("mnist_train_100.csv",'r')
x_list = x_file.readlines()
x_file.close()
output_nodes = 10
for r in x_list:
pixs = r.split(',')
inp = (np.asfarray(pixs[1:])).reshape(28,28)
targets = np.zeros(output_nodes)
targets[int(pixs[0])] = 1
test_file = open("mnist_test_10.csv",'r')
test_list = test_file.readlines()
test_file.close()
for i in test_list:
pixels = i.split(',')
test_input = (np.asfarray(pixels[1:])).reshape(28,28)
test_targets = np.zeros(output_nodes)
test_targets[int(pixels[0])] = 1
x = tf.placeholder(tf.float32, [None, 784])
W = tf.Variable(tf.zeros([784, 10])) #weight
b = tf.Variable(tf.zeros([10]))
y = tf.nn.softmax(tf.matmul(x, W) + b)
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
for i in range(1000):
sess.run(train_step, feed_dict={x: inp, y_: targets})
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: test_input, y_: test_targets}))
Thank you in advance.
test_input = (np.asfarray(pixels[1:])).reshape(28,28)
Drop the reshape. Your x is of shape:
x = tf.placeholder(tf.float32, [None, 784])
Which does not compute:
sess.run(accuracy, feed_dict={x: test_input, y_: test_targets})

computation of test data in tensorflow tutorial

I was going through the tutorial of tensorflow-
https://www.tensorflow.org/versions/r0.9/tutorials/mnist/beginners/index.html
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
x = tf.placeholder(tf.float32, [None, 784])
W = tf.Variable(tf.zeros([784, 10])) #weights
b = tf.Variable(tf.zeros([10])) #bias
y = tf.nn.softmax(tf.matmul(x, W) + b)
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
for i in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
Towards the very end, we pass in test data to the placeholders. y_ is matrix containing true values. and y is the matrix with predicted values. My question is when is y computed for the test data. The W matrix has been trained by backpropagation. But this trained matrix must be multiplied with new input x (test data) to give the prediction y. Where does this happen?
Normally i have seen sequential execution of code, and in the last few lines, y isn't called explicitly.
accuracy depends on correct_prediction which depends on y.
So when you call sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}), y is computed before accuracy is computed. All this happen inside the TensorFlow graph.
The TensorFlow graph is the same for train and test. The only difference is the data you feed to the placeholders x and y_.
y is computed here:
y = tf.nn.softmax(tf.matmul(x, W) + b) # Line 7
specifically what you are looking for is with in that line:
tf.matmul(x, W) + b
the output of which is put through the softmax function to identify the class.
This is computed in each of the 1000 passes through the graph, each time the variables W, and b are updated by GradientDescent and y is computed and compared against y_ to determine the loss.

Categories

Resources