TensorFlow loss returns NaN - python

I'm trying to write my first simple neural net with my own data based on various tutorials and information. I'm stuck at the point where I think I prepared the model and I'm trying to run it, but when I want to find out the cost function changes in every epochs, it returns NaN.
My code is:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
df = pd.read_excel("mydataset.xlsx")
# Preparing the dataset, doing some stuff here
df2 = df.dropna(subset=['wl'])
df2 = df2.sample(frac=1)
df2_X = df2[['param1','param2','param3','param4','param5','param6','param7']]
df2_y = df2[['numerical_result_param']]
# Spliting the dataset...
train_X, test_X, train_y, test_y = df2_X[:210], df2_X[210:], df2_y[:210], df2_y[210:]
# Creating model:
X = tf.placeholder("float", shape=[None, train_X.shape[1]])
y = tf.placeholder("float", shape=[None, train_y.shape[1]])
hl_size = 256 # Number of neurons in hidden layer
weights = {
'hl': tf.Variable(tf.random_normal([train_X.shape[1], hl_size])),
'out': tf.Variable(tf.random_normal([hl_size, train_y.shape[1]]))
}
biases = {
'hl': tf.Variable(tf.random_normal([hl_size])),
'out': tf.Variable(tf.random_normal([train_y.shape[1]]))
}
def multilayer_perceptron(x):
hl_layer = tf.add(tf.matmul(x, weights['hl']), biases['hl'])
hl_layer = tf.nn.relu(hl_layer)
out_layer = tf.matmul(hl_layer, weights['out']) + biases['out']
return out_layer
logits = multilayer_perceptron(X)
hm_epochs = 100 # Number of epochs
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits))
optimizer = tf.train.AdamOptimizer(0.01).minimize(cost) # Training optimizer
# Running the session
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
for epoch in range(hm_epochs):
epoch_loss = 0
_, c = sess.run([optimizer, cost], feed_dict={X: train_X, y: train_y})
epoch_loss += c
print('Epoch',epoch,'out of',hm_epochs,'loss:',epoch_loss)
And it returns:
Epoch 0 out of 100 loss: nan
Epoch 1 out of 100 loss: nan
etc.
I'd appreciate any help and ideas what I did wrong!

Related

Got very poor accuracy rate whenever I train any network

I am trying to train a network on Alabone dataset downloaded from "UCI machine learning repository" site. The dataset is look like:
M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7
I have given the exact same column names as they have mentioned. But when I try to apply a neural network to train it, it always gives very poor accuracy rate about 50% just.
I am new in the field so I don't know if I am using a wrong Activation function?, or executing wrong code?, or didn't have preprocess the data well?.
So please help me to find the mistake I have done.
Here's my whole code:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
def read_dataset():
df = pd.read_csv("abalone.data.txt")
X = np.array(df.drop("Sex", 1))
y = np.array(df["Sex"])
encoder = LabelEncoder()
encoder.fit(y)
y = encoder.transform(y)
Y = one_hot_encode(y)
# print(X.shape)
return X, Y
def one_hot_encode(label):
n_label = len(label)
n_unique_label = len(np.unique(label))
one_hot_encode = np.zeros((n_label, n_unique_label))
one_hot_encode[np.arange(n_label), label] = 1
return one_hot_encode
X, y = read_dataset()
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2)
n_nodes_1 = 60
n_nodes_2 = 60
n_nodes_3 = 60
n_nodes_4 = 60
model_path = "C:\\Users\Kashif\Projects\DeepLearning-Tensorflow\Learnings\AlaboneDetection\AlaboneModel"
n_class = 3
input_size = X.shape[1]
x = tf.placeholder(tf.float32, [None, input_size])
y = tf.placeholder(tf.float32, [None, n_class])
def neural_network(x):
hidden_1 = {"weights": tf.Variable(tf.random_normal([input_size, n_nodes_1])),
"biases": tf.Variable(tf.random_normal([n_nodes_1]))}
hidden_2 = {"weights": tf.Variable(tf.random_normal([n_nodes_1, n_nodes_2])),
"biases": tf.Variable(tf.random_normal([n_nodes_2]))}
hidden_3 = {"weights": tf.Variable(tf.random_normal([n_nodes_2, n_nodes_3])),
"biases": tf.Variable(tf.random_normal([n_nodes_3]))}
hidden_4 = {"weights": tf.Variable(tf.random_normal([n_nodes_3, n_nodes_4])),
"biases": tf.Variable(tf.random_normal([n_nodes_4]))}
out_layer = {"weights": tf.Variable(tf.random_normal([n_nodes_4, n_class])),
"biases": tf.Variable(tf.random_normal([n_class]))}
# (input * weights) + biases
layer_1 = tf.add(tf.matmul(x, hidden_1["weights"]), hidden_1["biases"])
layer_1 = tf.nn.relu(layer_1)
layer_2 = tf.add(tf.matmul(layer_1, hidden_2["weights"]), hidden_2["biases"])
layer_2 = tf.nn.relu(layer_2)
layer_3 = tf.add(tf.matmul(layer_2, hidden_3["weights"]), hidden_3["biases"])
layer_3 = tf.nn.relu(layer_3)
layer_4 = tf.add(tf.matmul(layer_3, hidden_4["weights"]), hidden_4["biases"])
layer_4 = tf.nn.relu(layer_4)
output = tf.matmul(layer_4, out_layer["weights"]) + out_layer["biases"]
return output
def train_neural_network(x):
prediction = neural_network(x)
cost_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction, labels=y))
optimizer = tf.train.AdamOptimizer().minimize(cost_function)
init = tf.global_variables_initializer()
loss_trace = []
accuracy_trace = []
#saver = tf.train.Saver()
epochs = 1000
with tf.Session() as sess:
sess.run(init)
for i in range(epochs):
sess.run(optimizer, feed_dict={x: train_X, y: train_y})
loss = sess.run(cost_function, feed_dict={x: train_X, y: train_y})
accuracy = np.mean(np.argmax(sess.run(prediction,feed_dict={x:train_X,y:train_y}),axis=1) == np.argmax(train_y,axis=1))
loss_trace.append(loss)
accuracy_trace.append(accuracy)
print('Epoch:', (i + 1), 'loss:', loss, 'accuracy:', accuracy)
#saver.save(sess, model_path)
print('Final training result:', 'loss:', loss, 'accuracy:', accuracy)
loss_test = sess.run(cost_function, feed_dict={x: test_X, y: test_y})
test_pred = np.argmax(sess.run(prediction, feed_dict={x: test_X, y: test_y}), axis=1)
accuracy_test = np.mean(test_pred == np.argmax(test_y, axis=1))
print('Results on test dataset:', 'loss:', loss_test, 'accuracy:', accuracy_test)
train_neural_network(x)
And here's my last result of final three epochs and final accuracy result.
Epoch: 997 loss: 24.625622 accuracy: 0.518407662376534
Epoch: 998 loss: 22.168245 accuracy: 0.48757856929063154
Epoch: 999 loss: 21.896841 accuracy: 0.5001496557916791
Epoch: 1000 loss: 22.28085 accuracy: 0.4968572283747381
Final training result: loss: 22.28085 accuracy: 0.4968572283747381
Results on test dataset: loss: 23.206755 accuracy: 0.4688995215311005
I am new to tensorflow. Maybe you can try two things:
1.decreasing learning rate.such as 0.0001. because your loss is oscillation
2.increase the number of the layer. because your model maybe under-fitting.
If above ways can't solve your problem, you can print your data and check whether train_X and train_y is correct

Neural Network - ValueError: Cannot feed value of shape

I'm new in Python and Tensorflow . For the beginning I watched the MNIST tutorial and understood it so far.
But now I have to create a new Neural Network with numerical input_datas.
I got a dataset which delivers an input_data and v_data.
If I run input_data.shape -> (1000,25,4)
If I run v_data.shape -> (1000,2)
What I tried to do is to split the data for (Training + Validation) and Testing.
Training + Validation = 90% of train_data (90% of the input.pkl)
Testing data = the remaining 10%
And then I devided the 90% of the input_data in training and validation (70% training, 30% validation)
The network should correctly predict based on v_data, but I still get an error. See the code and the error below.
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# Imports
import tensorflow as tf
import pickle as pkl
import numpy as np
# load data
with open('input.pkl', 'rb') as f:
input_data = pkl.load(f)
f.close()
X_train, y_train = input_data
#split data into train / validation and test
X_input = X_train[0:900]
y_input = y_train[0:900]
#print (X_input.shape)
#print (y_input.shape)
X_train_data = X_input[0:630]
X_test_data = X_input[630:900]
y_train_data = y_input[0:630]
y_test_data = y_input[630:900]
# Variables
hidden_layer_1_nodes = 300
hidden_layer_2_nodes = 100
output_layer_nodes = 100
epochs = 10
classes = 2
epoch_errors = []
stddev = 0.035
learning_rate = 0.08
batch_size = 100
#print (X_train_data[0])
# TF Placeholders
X = tf.placeholder('float', [25, 4], name='X')
y = tf.placeholder('float', name='y')
# Weights Matrices
W1 = tf.Variable(tf.truncated_normal([4, hidden_layer_1_nodes], stddev=stddev), name='W1')
W2 = tf.Variable(tf.truncated_normal([hidden_layer_1_nodes, hidden_layer_2_nodes], stddev=stddev), name='W2')
W3 = tf.Variable(tf.truncated_normal([hidden_layer_2_nodes, output_layer_nodes], stddev=stddev), name='W3')
W4 = tf.Variable(tf.truncated_normal([output_layer_nodes, classes], stddev=stddev), name='W4')
# Biases Vectors
b1 = tf.Variable(tf.truncated_normal([hidden_layer_1_nodes], stddev=stddev), name='b1')
b2 = tf.Variable(tf.truncated_normal([hidden_layer_2_nodes], stddev=stddev), name='b2')
b3 = tf.Variable(tf.truncated_normal([output_layer_nodes], stddev=stddev), name='b3')
b4 = tf.Variable(tf.truncated_normal([classes], stddev=stddev), name='b4')
# Define the Neural Network
def nn_model(X):
input_layer = {'weights': W1, 'biases': b1}
hidden_layer_1 = {'weights': W2, 'biases': b2}
hidden_layer_2 = {'weights': W3, 'biases': b3}
output_layer = {'weights': W4, 'biases': b4}
input_layer_sum = tf.add(tf.matmul(X, input_layer['weights']), input_layer['biases'])
input_layer_sum = tf.nn.relu(input_layer_sum)
hidden_layer_1_sum = tf.add(tf.matmul(input_layer_sum, hidden_layer_1['weights']), hidden_layer_1['biases'])
hidden_layer_1_sum = tf.nn.relu(hidden_layer_1_sum)
hidden_layer_2_sum = tf.add(tf.matmul(hidden_layer_1_sum, hidden_layer_2['weights']), hidden_layer_2['biases'])
hidden_layer_2_sum = tf.nn.relu(hidden_layer_2_sum)
output_layer_sum = tf.add(tf.matmul(hidden_layer_2_sum, output_layer['weights']), output_layer['biases'])
return output_layer_sum
# Train the Neural Network
def nn_train(X):
pred = nn_model(X)
pred = tf.identity(pred)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate).minimize(cost)
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
#saver = tf.train.Saver()
sess.run(init_op)
for epoch in range(epochs):
epoch_loss = 0.0
i = 0
while i < len(X_train_data):
start = i
end = i+batch_size
batch_x = np.array(X_train_data[start:end])
batch_y = np.array(y_train_data[start:end])
_, c = sess.run([optimizer, cost], feed_dict={X: batch_x, y: batch_y})
epoch_loss += c
i+= batch_size
epoch_errors.append(epoch_loss)
print('Epoch ', epoch + 1, ' of ', epochs, ' with loss: ', epoch_loss)
correct_result = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_result, 'float'))
print('Acc: ', accuracy.eval({X:X_test_data, y:y_test_data}))
if __name__ == "__main__":
nn_train(X)
The following error
ValueError: Cannot feed value of shape (100, 25, 4) for Tensor 'X:0', which has shape '(25, 4)'
occurs in line 105
_, c = sess.run([optimizer, cost], feed_dict={X: batch_x, y: batch_y})
The placeholder you have defined for the input X has the shape (25,4)
tf.placeholder('float', [25, 4], name='X')
But the input you are providing is of the shape (100, 25, 4) where 100 is your batch size.
Change the definition to
tf.placeholder('float', [None, 25, 4], name='X')
and the error should be gone. Here, None takes care of batch size, automatically.
Update: Sorry, I didn't go through the whole code. You code needs a few fixes.
The correct syntax for feeding data to a placeholder is:
X = tf.placeholder(tf.float32, [None, input_dim], name='X')
Now, if you are dealing with the images, your input_dim will be the length of the flattened array for one example i.e. If your image has dimension 25x4, the input_dim should be 25*4=100. It should be equal to the first dimension of your weight layer 1 here W1.
Also, before feeding your batch you will need to reshape it.
Below is the fixed code(changes are commented):
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# Imports
import tensorflow as tf
import pickle as pkl
import numpy as np
# load data
with open('input.pkl', 'rb') as f:
input_data = pkl.load(f)
f.close()
X_train, y_train = input_data
#split data into train / validation and test
X_input = X_train[0:900]
y_input = y_train[0:900]
#print (X_input.shape)
#print (y_input.shape)
X_train_data = X_input[0:630]
X_test_data = X_input[630:900]
y_train_data = y_input[0:630]
y_test_data = y_input[630:900]
# Variables
hidden_layer_1_nodes = 300
hidden_layer_2_nodes = 100
output_layer_nodes = 100
epochs = 10
classes = 2
epoch_errors = []
stddev = 0.035
learning_rate = 0.08
batch_size = 100
#print (X_train_data[0])
# TF Placeholders
# input data should be of the shape (batch_size, flatten data for one example). Also, the correct shape of y"
X = tf.placeholder(tf.float32, [None, 25 * 4], name='X')
y = tf.placeholder(tf.float32, [None, classes] name='y')
# Weights Matrices. First dimension of W1 == second dimension of X
W1 = tf.Variable(tf.truncated_normal([25 * 4, hidden_layer_1_nodes], stddev=stddev), name='W1')
W2 = tf.Variable(tf.truncated_normal([hidden_layer_1_nodes, hidden_layer_2_nodes], stddev=stddev), name='W2')
W3 = tf.Variable(tf.truncated_normal([hidden_layer_2_nodes, output_layer_nodes], stddev=stddev), name='W3')
W4 = tf.Variable(tf.truncated_normal([output_layer_nodes, classes], stddev=stddev), name='W4')
# Biases Vectors
b1 = tf.Variable(tf.truncated_normal([hidden_layer_1_nodes], stddev=stddev), name='b1')
b2 = tf.Variable(tf.truncated_normal([hidden_layer_2_nodes], stddev=stddev), name='b2')
b3 = tf.Variable(tf.truncated_normal([output_layer_nodes], stddev=stddev), name='b3')
b4 = tf.Variable(tf.truncated_normal([classes], stddev=stddev), name='b4')
# Define the Neural Network
def nn_model(X):
input_layer = {'weights': W1, 'biases': b1}
hidden_layer_1 = {'weights': W2, 'biases': b2}
hidden_layer_2 = {'weights': W3, 'biases': b3}
output_layer = {'weights': W4, 'biases': b4}
input_layer_sum = tf.add(tf.matmul(X, input_layer['weights']), input_layer['biases'])
input_layer_sum = tf.nn.relu(input_layer_sum)
hidden_layer_1_sum = tf.add(tf.matmul(input_layer_sum, hidden_layer_1['weights']), hidden_layer_1['biases'])
hidden_layer_1_sum = tf.nn.relu(hidden_layer_1_sum)
hidden_layer_2_sum = tf.add(tf.matmul(hidden_layer_1_sum, hidden_layer_2['weights']), hidden_layer_2['biases'])
hidden_layer_2_sum = tf.nn.relu(hidden_layer_2_sum)
output_layer_sum = tf.add(tf.matmul(hidden_layer_2_sum, output_layer['weights']), output_layer['biases'])
return output_layer_sum
# Train the Neural Network
def nn_train(X):
pred = nn_model(X)
pred = tf.identity(pred)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate).minimize(cost)
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
#saver = tf.train.Saver()
sess.run(init_op)
for epoch in range(epochs):
epoch_loss = 0.0
i = 0
while i < len(X_train_data):
start = i
end = i+batch_size
# reshape before feeding.
batch_x = np.array(X_train_data[start:end]).reshape(batch_size, 25 * 4)
batch_y = np.array(y_train_data[start:end]).reshape(batch_size, classes)
_, c = sess.run([optimizer, cost], feed_dict={X: batch_x, y: batch_y})
epoch_loss += c
i+= batch_size
epoch_errors.append(epoch_loss)
print('Epoch ', epoch + 1, ' of ', epochs, ' with loss: ', epoch_loss)
correct_result = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_result, 'float'))
print('Acc: ', accuracy.eval({X:X_test_data.reshape(-1, 25 * 4), y:y_test_data.reshape(-1, classes)}))
if __name__ == "__main__":
nn_train(X)
UPDATE: Sorry I posted the wrong error:
#Kumar, I changed the batch size to 30 (so 21*30 = 630). It prints now the epochs, but in some weird way:
Epoch 1 of 10 with loss: 1680690.2648780346
Epoch 2 of 10 with loss: 2382142.9208984375
Epoch 3 of 10 with loss: 4215628.857421875
Epoch 4 of 10 with loss: 9046892.295166016
Epoch 5 of 10 with loss: 23961644.453125
Epoch 6 of 10 with loss: 31733882.34375
Epoch 7 of 10 with loss: 46124696.609375
Epoch 8 of 10 with loss: 61760446.28125
Epoch 9 of 10 with loss: 89145610.59375
Epoch 10 of 10 with loss: 121249417.25
And I received a next error for:
print('Acc: ', accuracy.eval({X:X_test_data, y:y_test_data}))
ValueError: Cannot feed value of shape (270, 25, 4) for Tensor 'X:0', which has shape '(?, 100)'
Process finished with exit code 1

Inferior performance of Tensorflow compared to sklearn

I'm comparing the performance of Tensorflow with sklearn on two datasets:
A toy dataset in sklearn
MNIST dataset
Here is my code (Python):
from __future__ import print_function
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
import tensorflow as tf
from sklearn.datasets import load_digits
import numpy as np
# digits = load_digits()
# data = digits.data
# labels = digits.target
# convert to binary labels
# y = np.zeros((labels.shape[0],10))
# y[np.arange(labels.shape[0]),labels] = 1
x_train = mnist.train.images
y_train = mnist.train.labels
x_test = mnist.test.images
y_test = mnist.test.labels
n_train = mnist.train.images.shape[0]
# import pdb;pdb.set_trace()
# Parameters
learning_rate = 1e-3
lambda_val = 1e-5
training_epochs = 30
batch_size = 200
display_step = 1
# Network Parameters
n_hidden_1 = 300 # 1st layer number of neurons
n_input = x_train.shape[1] # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Create model
def multilayer_perceptron(x):
# Hidden fully connected layer with 256 neurons
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
# Activation
layer_1_relu = tf.nn.relu(layer_1)
# Output fully connected layer with a neuron for each class
out_layer = tf.matmul(layer_1_relu, weights['out']) + biases['out']
return out_layer
# Construct model
logits = multilayer_perceptron(X)
# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y)) + lambda_val*tf.nn.l2_loss(weights['h1']) + lambda_val*tf.nn.l2_loss(weights['out'])
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
# Test model
pred = tf.nn.softmax(logits) # Apply softmax to logits
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# Initializing the variables
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(n_train/batch_size)
# Loop over all batches
ptr = 0
for i in range(total_batch):
next_ptr = ptr + batch_size
if next_ptr > len(x_train):
next_ptr = len(x_train)
batch_x, batch_y = x_train[ptr:next_ptr],y_train[ptr:next_ptr]
ptr += batch_size
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
Y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1), "cost={:.9f}".format(avg_cost))
print("Optimization Finished!")
print("Accuracy on training set: ", accuracy.eval({X:x_train,Y:y_train}))
print("Accuracy on testing set:", accuracy.eval({X: x_test, Y: y_test}))
print("Experimenting sklearn...")
# now experiment with sklearn
from sklearn.datasets import load_digits
import numpy as np
from sklearn.neural_network import MLPClassifier
import time
# use MLP
t_start = time.time()
print('fitting MLP...')
clf = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(300,),max_iter=training_epochs)
clf.fit(x_train,y_train)
print('fitted MLP in {:.2f} seconds'.format(time.time() - t_start))
print('predicting...')
labels_predicted = clf.predict(x_test)
print('accuracy: {:.2f} %'.format(np.mean(np.argmax(y_test,axis=1) == np.argmax(labels_predicted,axis=1)) * 100))
The code is adapted from a github repository. For this testing, I'm using a traditional neural network (MLP) with only one hidden layer of size 300.
Following is the result for the both datasets:
sklearn digits: ~83% (tensorflow), ~90% (sklearn)
MNIST: ~94% (tensorflow), ~97% (sklearn)
I'm using the same model for both libraries. All the parameters (number of hidden layers, number of hidden units, learning_rate, l2 regularization constant, number of training epochs, batch size) and optimization algorithms are the same (Adam optimizer, beta parameters for Adam optimizer, no momentum, etc).
I wonder if sklearn has done a magic implementation over tensorflow? Can anyone help answer?
Thank you very much.

Neural network for regression not learning in tensorflow

I've modified tensor flow example to fit on my data, given here: data
But my neural network is not learning at all, I tried to use different no. of hidden layers, learning rate and optimization functions, but it didn't help.My code is given below:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import learn
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn import datasets, linear_model
from sklearn import cross_validation
from sklearn import preprocessing
import numpy as np
filename_queue = tf.train.string_input_producer(["file0.csv"])
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
# Default values, in case of empty columns. Also specifies the type of the
# decoded result.
record_defaults = [[0], [0], [0], [0]]
col1, col2, col3, col4 = tf.decode_csv(
value, record_defaults=record_defaults)
features = tf.stack([col1, col2, col3])
with tf.Session() as sess:
# Start populating the filename queue.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
x = np.zeros(shape=(1813,3))
y = np.zeros(shape=(1813))
for i in range(1813):
# Retrieve a single instance:
x1, y1 = sess.run([features, col4])
x[i] = x1
y[i] = y1
coord.request_stop()
coord.join(threads)
#standard_scaler = preprocessing.StandardScaler()
#x = standard_scaler.fit_transform(x)
# Split in test and train data
X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(x, y, test_size=0.2)
total_len = X_train.shape[0]
# Parameters
learning_rate = 0.001
training_epochs = 500
batch_size = 5
display_step = 1
# Network Parameters
n_hidden_1 = 50
n_input = X_train.shape[1]
n_classes = 1
# tf Graph input
x = tf.placeholder("float", [None, 3])
y = tf.placeholder("float", [None])
# Create model
def multilayer_perceptron(x, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Output layer with linear activation
out_layer = tf.matmul(layer_1, weights['out']) + biases['out']
return out_layer
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes], 0, 0.1))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_classes], 0, 0.1))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases)
#reshape(pred, [-1])
tf.shape(pred)
tf.shape(y)
print("Prediction matrix:", pred)
print("Output matrix:", y)
# Define loss and optimizer
cost = tf.reduce_mean(tf.square(pred-y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Launch the graph
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(total_len/batch_size)
print(total_batch)
# Loop over all batches
for i in range(total_batch-1):
batch_x = X_train[i*batch_size:(i+1)*batch_size]
batch_y = Y_train[i*batch_size:(i+1)*batch_size]
# Run optimization op (backprop) and cost op (to get loss value)
_, c, p = sess.run([optimizer, cost, pred], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# sample prediction
label_value = batch_y
estimate = p
err = label_value-estimate
print ("num batch:", total_batch)
# Display logs per epoch step
if epoch % display_step == 0:
print ("Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.9f}".format(avg_cost))
print ("[*]----------------------------")
for i in xrange(5):
print ("label value:", label_value[i], \
"estimated value:", estimate[i])
print ("[*]============================")
print ("Optimization Finished!")
# Test model
correct_prediction = tf.equal(tf.argmax(pred), tf.argmax(y))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Accuracy:", accuracy.eval({x: X_test, y: Y_test}))
and result looks like that: (label value = expected result)
result

nan cost in tensorflow training perceptron

I am trying to train a single layer perceptron (basing my code on this) on the following data file in tensor flow:
1,1,0.05,-1.05
1,1,0.1,-1.1
....
where the last column is the label (function of 3 parameters) and the first three columns are the function argument. The code that reads the data and trains the model (I simplify it for readability):
import tensorflow as tf
... # some basics to read the data
example, label = read_file_format(filename_queue)
... # model construction and parameter setting
n_hidden_1 = 4 # 1st layer number of features
n_input = 3
n_output = 1
...
# calls a function which produces a prediction
pred = multilayer_perceptron(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
_, c = sess.run([optimizer, cost], feed_dict={x: example.reshape(1,3), y: label.reshape(-1,1)})
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1), "Cost:",c)
but when I run it, something seems to be very wrong:
('Epoch:', '0001', 'Cost:', nan)
('Epoch:', '0002', 'Cost:', nan)
....
('Epoch:', '0015', 'Cost:', nan)
This is the complete code for the multilaye_perceptron function, etc:
# Parameters
learning_rate = 0.001
training_epochs = 15
display_step = 1
# Network Parameters
n_hidden_1 = 4 # 1st layer number of features
n_input = 3
n_output = 1
# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_output])
# Create model
def multilayer_perceptron(x, weights, biases):
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Output layer with linear activation
out_layer = tf.matmul(layer_1, weights['out']) + biases['out']
return out_layer
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_hidden_1, n_output]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_output]))
}
Is this one example at a time? I would go batches and increase batch size to 128 or similar, as long as you are getting nans.
When I am getting nans it is usually either of the three:
- batch size too small (in your case then just 1)
- log(0) somewhere
- learning rate too high and uncapped gradients

Categories

Resources