Related
I am trying to train a network on Alabone dataset downloaded from "UCI machine learning repository" site. The dataset is look like:
M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7
I have given the exact same column names as they have mentioned. But when I try to apply a neural network to train it, it always gives very poor accuracy rate about 50% just.
I am new in the field so I don't know if I am using a wrong Activation function?, or executing wrong code?, or didn't have preprocess the data well?.
So please help me to find the mistake I have done.
Here's my whole code:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
def read_dataset():
df = pd.read_csv("abalone.data.txt")
X = np.array(df.drop("Sex", 1))
y = np.array(df["Sex"])
encoder = LabelEncoder()
encoder.fit(y)
y = encoder.transform(y)
Y = one_hot_encode(y)
# print(X.shape)
return X, Y
def one_hot_encode(label):
n_label = len(label)
n_unique_label = len(np.unique(label))
one_hot_encode = np.zeros((n_label, n_unique_label))
one_hot_encode[np.arange(n_label), label] = 1
return one_hot_encode
X, y = read_dataset()
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2)
n_nodes_1 = 60
n_nodes_2 = 60
n_nodes_3 = 60
n_nodes_4 = 60
model_path = "C:\\Users\Kashif\Projects\DeepLearning-Tensorflow\Learnings\AlaboneDetection\AlaboneModel"
n_class = 3
input_size = X.shape[1]
x = tf.placeholder(tf.float32, [None, input_size])
y = tf.placeholder(tf.float32, [None, n_class])
def neural_network(x):
hidden_1 = {"weights": tf.Variable(tf.random_normal([input_size, n_nodes_1])),
"biases": tf.Variable(tf.random_normal([n_nodes_1]))}
hidden_2 = {"weights": tf.Variable(tf.random_normal([n_nodes_1, n_nodes_2])),
"biases": tf.Variable(tf.random_normal([n_nodes_2]))}
hidden_3 = {"weights": tf.Variable(tf.random_normal([n_nodes_2, n_nodes_3])),
"biases": tf.Variable(tf.random_normal([n_nodes_3]))}
hidden_4 = {"weights": tf.Variable(tf.random_normal([n_nodes_3, n_nodes_4])),
"biases": tf.Variable(tf.random_normal([n_nodes_4]))}
out_layer = {"weights": tf.Variable(tf.random_normal([n_nodes_4, n_class])),
"biases": tf.Variable(tf.random_normal([n_class]))}
# (input * weights) + biases
layer_1 = tf.add(tf.matmul(x, hidden_1["weights"]), hidden_1["biases"])
layer_1 = tf.nn.relu(layer_1)
layer_2 = tf.add(tf.matmul(layer_1, hidden_2["weights"]), hidden_2["biases"])
layer_2 = tf.nn.relu(layer_2)
layer_3 = tf.add(tf.matmul(layer_2, hidden_3["weights"]), hidden_3["biases"])
layer_3 = tf.nn.relu(layer_3)
layer_4 = tf.add(tf.matmul(layer_3, hidden_4["weights"]), hidden_4["biases"])
layer_4 = tf.nn.relu(layer_4)
output = tf.matmul(layer_4, out_layer["weights"]) + out_layer["biases"]
return output
def train_neural_network(x):
prediction = neural_network(x)
cost_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction, labels=y))
optimizer = tf.train.AdamOptimizer().minimize(cost_function)
init = tf.global_variables_initializer()
loss_trace = []
accuracy_trace = []
#saver = tf.train.Saver()
epochs = 1000
with tf.Session() as sess:
sess.run(init)
for i in range(epochs):
sess.run(optimizer, feed_dict={x: train_X, y: train_y})
loss = sess.run(cost_function, feed_dict={x: train_X, y: train_y})
accuracy = np.mean(np.argmax(sess.run(prediction,feed_dict={x:train_X,y:train_y}),axis=1) == np.argmax(train_y,axis=1))
loss_trace.append(loss)
accuracy_trace.append(accuracy)
print('Epoch:', (i + 1), 'loss:', loss, 'accuracy:', accuracy)
#saver.save(sess, model_path)
print('Final training result:', 'loss:', loss, 'accuracy:', accuracy)
loss_test = sess.run(cost_function, feed_dict={x: test_X, y: test_y})
test_pred = np.argmax(sess.run(prediction, feed_dict={x: test_X, y: test_y}), axis=1)
accuracy_test = np.mean(test_pred == np.argmax(test_y, axis=1))
print('Results on test dataset:', 'loss:', loss_test, 'accuracy:', accuracy_test)
train_neural_network(x)
And here's my last result of final three epochs and final accuracy result.
Epoch: 997 loss: 24.625622 accuracy: 0.518407662376534
Epoch: 998 loss: 22.168245 accuracy: 0.48757856929063154
Epoch: 999 loss: 21.896841 accuracy: 0.5001496557916791
Epoch: 1000 loss: 22.28085 accuracy: 0.4968572283747381
Final training result: loss: 22.28085 accuracy: 0.4968572283747381
Results on test dataset: loss: 23.206755 accuracy: 0.4688995215311005
I am new to tensorflow. Maybe you can try two things:
1.decreasing learning rate.such as 0.0001. because your loss is oscillation
2.increase the number of the layer. because your model maybe under-fitting.
If above ways can't solve your problem, you can print your data and check whether train_X and train_y is correct
I'm new in Python and Tensorflow . For the beginning I watched the MNIST tutorial and understood it so far.
But now I have to create a new Neural Network with numerical input_datas.
I got a dataset which delivers an input_data and v_data.
If I run input_data.shape -> (1000,25,4)
If I run v_data.shape -> (1000,2)
What I tried to do is to split the data for (Training + Validation) and Testing.
Training + Validation = 90% of train_data (90% of the input.pkl)
Testing data = the remaining 10%
And then I devided the 90% of the input_data in training and validation (70% training, 30% validation)
The network should correctly predict based on v_data, but I still get an error. See the code and the error below.
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# Imports
import tensorflow as tf
import pickle as pkl
import numpy as np
# load data
with open('input.pkl', 'rb') as f:
input_data = pkl.load(f)
f.close()
X_train, y_train = input_data
#split data into train / validation and test
X_input = X_train[0:900]
y_input = y_train[0:900]
#print (X_input.shape)
#print (y_input.shape)
X_train_data = X_input[0:630]
X_test_data = X_input[630:900]
y_train_data = y_input[0:630]
y_test_data = y_input[630:900]
# Variables
hidden_layer_1_nodes = 300
hidden_layer_2_nodes = 100
output_layer_nodes = 100
epochs = 10
classes = 2
epoch_errors = []
stddev = 0.035
learning_rate = 0.08
batch_size = 100
#print (X_train_data[0])
# TF Placeholders
X = tf.placeholder('float', [25, 4], name='X')
y = tf.placeholder('float', name='y')
# Weights Matrices
W1 = tf.Variable(tf.truncated_normal([4, hidden_layer_1_nodes], stddev=stddev), name='W1')
W2 = tf.Variable(tf.truncated_normal([hidden_layer_1_nodes, hidden_layer_2_nodes], stddev=stddev), name='W2')
W3 = tf.Variable(tf.truncated_normal([hidden_layer_2_nodes, output_layer_nodes], stddev=stddev), name='W3')
W4 = tf.Variable(tf.truncated_normal([output_layer_nodes, classes], stddev=stddev), name='W4')
# Biases Vectors
b1 = tf.Variable(tf.truncated_normal([hidden_layer_1_nodes], stddev=stddev), name='b1')
b2 = tf.Variable(tf.truncated_normal([hidden_layer_2_nodes], stddev=stddev), name='b2')
b3 = tf.Variable(tf.truncated_normal([output_layer_nodes], stddev=stddev), name='b3')
b4 = tf.Variable(tf.truncated_normal([classes], stddev=stddev), name='b4')
# Define the Neural Network
def nn_model(X):
input_layer = {'weights': W1, 'biases': b1}
hidden_layer_1 = {'weights': W2, 'biases': b2}
hidden_layer_2 = {'weights': W3, 'biases': b3}
output_layer = {'weights': W4, 'biases': b4}
input_layer_sum = tf.add(tf.matmul(X, input_layer['weights']), input_layer['biases'])
input_layer_sum = tf.nn.relu(input_layer_sum)
hidden_layer_1_sum = tf.add(tf.matmul(input_layer_sum, hidden_layer_1['weights']), hidden_layer_1['biases'])
hidden_layer_1_sum = tf.nn.relu(hidden_layer_1_sum)
hidden_layer_2_sum = tf.add(tf.matmul(hidden_layer_1_sum, hidden_layer_2['weights']), hidden_layer_2['biases'])
hidden_layer_2_sum = tf.nn.relu(hidden_layer_2_sum)
output_layer_sum = tf.add(tf.matmul(hidden_layer_2_sum, output_layer['weights']), output_layer['biases'])
return output_layer_sum
# Train the Neural Network
def nn_train(X):
pred = nn_model(X)
pred = tf.identity(pred)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate).minimize(cost)
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
#saver = tf.train.Saver()
sess.run(init_op)
for epoch in range(epochs):
epoch_loss = 0.0
i = 0
while i < len(X_train_data):
start = i
end = i+batch_size
batch_x = np.array(X_train_data[start:end])
batch_y = np.array(y_train_data[start:end])
_, c = sess.run([optimizer, cost], feed_dict={X: batch_x, y: batch_y})
epoch_loss += c
i+= batch_size
epoch_errors.append(epoch_loss)
print('Epoch ', epoch + 1, ' of ', epochs, ' with loss: ', epoch_loss)
correct_result = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_result, 'float'))
print('Acc: ', accuracy.eval({X:X_test_data, y:y_test_data}))
if __name__ == "__main__":
nn_train(X)
The following error
ValueError: Cannot feed value of shape (100, 25, 4) for Tensor 'X:0', which has shape '(25, 4)'
occurs in line 105
_, c = sess.run([optimizer, cost], feed_dict={X: batch_x, y: batch_y})
The placeholder you have defined for the input X has the shape (25,4)
tf.placeholder('float', [25, 4], name='X')
But the input you are providing is of the shape (100, 25, 4) where 100 is your batch size.
Change the definition to
tf.placeholder('float', [None, 25, 4], name='X')
and the error should be gone. Here, None takes care of batch size, automatically.
Update: Sorry, I didn't go through the whole code. You code needs a few fixes.
The correct syntax for feeding data to a placeholder is:
X = tf.placeholder(tf.float32, [None, input_dim], name='X')
Now, if you are dealing with the images, your input_dim will be the length of the flattened array for one example i.e. If your image has dimension 25x4, the input_dim should be 25*4=100. It should be equal to the first dimension of your weight layer 1 here W1.
Also, before feeding your batch you will need to reshape it.
Below is the fixed code(changes are commented):
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# Imports
import tensorflow as tf
import pickle as pkl
import numpy as np
# load data
with open('input.pkl', 'rb') as f:
input_data = pkl.load(f)
f.close()
X_train, y_train = input_data
#split data into train / validation and test
X_input = X_train[0:900]
y_input = y_train[0:900]
#print (X_input.shape)
#print (y_input.shape)
X_train_data = X_input[0:630]
X_test_data = X_input[630:900]
y_train_data = y_input[0:630]
y_test_data = y_input[630:900]
# Variables
hidden_layer_1_nodes = 300
hidden_layer_2_nodes = 100
output_layer_nodes = 100
epochs = 10
classes = 2
epoch_errors = []
stddev = 0.035
learning_rate = 0.08
batch_size = 100
#print (X_train_data[0])
# TF Placeholders
# input data should be of the shape (batch_size, flatten data for one example). Also, the correct shape of y"
X = tf.placeholder(tf.float32, [None, 25 * 4], name='X')
y = tf.placeholder(tf.float32, [None, classes] name='y')
# Weights Matrices. First dimension of W1 == second dimension of X
W1 = tf.Variable(tf.truncated_normal([25 * 4, hidden_layer_1_nodes], stddev=stddev), name='W1')
W2 = tf.Variable(tf.truncated_normal([hidden_layer_1_nodes, hidden_layer_2_nodes], stddev=stddev), name='W2')
W3 = tf.Variable(tf.truncated_normal([hidden_layer_2_nodes, output_layer_nodes], stddev=stddev), name='W3')
W4 = tf.Variable(tf.truncated_normal([output_layer_nodes, classes], stddev=stddev), name='W4')
# Biases Vectors
b1 = tf.Variable(tf.truncated_normal([hidden_layer_1_nodes], stddev=stddev), name='b1')
b2 = tf.Variable(tf.truncated_normal([hidden_layer_2_nodes], stddev=stddev), name='b2')
b3 = tf.Variable(tf.truncated_normal([output_layer_nodes], stddev=stddev), name='b3')
b4 = tf.Variable(tf.truncated_normal([classes], stddev=stddev), name='b4')
# Define the Neural Network
def nn_model(X):
input_layer = {'weights': W1, 'biases': b1}
hidden_layer_1 = {'weights': W2, 'biases': b2}
hidden_layer_2 = {'weights': W3, 'biases': b3}
output_layer = {'weights': W4, 'biases': b4}
input_layer_sum = tf.add(tf.matmul(X, input_layer['weights']), input_layer['biases'])
input_layer_sum = tf.nn.relu(input_layer_sum)
hidden_layer_1_sum = tf.add(tf.matmul(input_layer_sum, hidden_layer_1['weights']), hidden_layer_1['biases'])
hidden_layer_1_sum = tf.nn.relu(hidden_layer_1_sum)
hidden_layer_2_sum = tf.add(tf.matmul(hidden_layer_1_sum, hidden_layer_2['weights']), hidden_layer_2['biases'])
hidden_layer_2_sum = tf.nn.relu(hidden_layer_2_sum)
output_layer_sum = tf.add(tf.matmul(hidden_layer_2_sum, output_layer['weights']), output_layer['biases'])
return output_layer_sum
# Train the Neural Network
def nn_train(X):
pred = nn_model(X)
pred = tf.identity(pred)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate).minimize(cost)
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
#saver = tf.train.Saver()
sess.run(init_op)
for epoch in range(epochs):
epoch_loss = 0.0
i = 0
while i < len(X_train_data):
start = i
end = i+batch_size
# reshape before feeding.
batch_x = np.array(X_train_data[start:end]).reshape(batch_size, 25 * 4)
batch_y = np.array(y_train_data[start:end]).reshape(batch_size, classes)
_, c = sess.run([optimizer, cost], feed_dict={X: batch_x, y: batch_y})
epoch_loss += c
i+= batch_size
epoch_errors.append(epoch_loss)
print('Epoch ', epoch + 1, ' of ', epochs, ' with loss: ', epoch_loss)
correct_result = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_result, 'float'))
print('Acc: ', accuracy.eval({X:X_test_data.reshape(-1, 25 * 4), y:y_test_data.reshape(-1, classes)}))
if __name__ == "__main__":
nn_train(X)
UPDATE: Sorry I posted the wrong error:
#Kumar, I changed the batch size to 30 (so 21*30 = 630). It prints now the epochs, but in some weird way:
Epoch 1 of 10 with loss: 1680690.2648780346
Epoch 2 of 10 with loss: 2382142.9208984375
Epoch 3 of 10 with loss: 4215628.857421875
Epoch 4 of 10 with loss: 9046892.295166016
Epoch 5 of 10 with loss: 23961644.453125
Epoch 6 of 10 with loss: 31733882.34375
Epoch 7 of 10 with loss: 46124696.609375
Epoch 8 of 10 with loss: 61760446.28125
Epoch 9 of 10 with loss: 89145610.59375
Epoch 10 of 10 with loss: 121249417.25
And I received a next error for:
print('Acc: ', accuracy.eval({X:X_test_data, y:y_test_data}))
ValueError: Cannot feed value of shape (270, 25, 4) for Tensor 'X:0', which has shape '(?, 100)'
Process finished with exit code 1
I'm comparing the performance of Tensorflow with sklearn on two datasets:
A toy dataset in sklearn
MNIST dataset
Here is my code (Python):
from __future__ import print_function
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
import tensorflow as tf
from sklearn.datasets import load_digits
import numpy as np
# digits = load_digits()
# data = digits.data
# labels = digits.target
# convert to binary labels
# y = np.zeros((labels.shape[0],10))
# y[np.arange(labels.shape[0]),labels] = 1
x_train = mnist.train.images
y_train = mnist.train.labels
x_test = mnist.test.images
y_test = mnist.test.labels
n_train = mnist.train.images.shape[0]
# import pdb;pdb.set_trace()
# Parameters
learning_rate = 1e-3
lambda_val = 1e-5
training_epochs = 30
batch_size = 200
display_step = 1
# Network Parameters
n_hidden_1 = 300 # 1st layer number of neurons
n_input = x_train.shape[1] # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Create model
def multilayer_perceptron(x):
# Hidden fully connected layer with 256 neurons
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
# Activation
layer_1_relu = tf.nn.relu(layer_1)
# Output fully connected layer with a neuron for each class
out_layer = tf.matmul(layer_1_relu, weights['out']) + biases['out']
return out_layer
# Construct model
logits = multilayer_perceptron(X)
# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y)) + lambda_val*tf.nn.l2_loss(weights['h1']) + lambda_val*tf.nn.l2_loss(weights['out'])
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
# Test model
pred = tf.nn.softmax(logits) # Apply softmax to logits
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# Initializing the variables
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(n_train/batch_size)
# Loop over all batches
ptr = 0
for i in range(total_batch):
next_ptr = ptr + batch_size
if next_ptr > len(x_train):
next_ptr = len(x_train)
batch_x, batch_y = x_train[ptr:next_ptr],y_train[ptr:next_ptr]
ptr += batch_size
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
Y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1), "cost={:.9f}".format(avg_cost))
print("Optimization Finished!")
print("Accuracy on training set: ", accuracy.eval({X:x_train,Y:y_train}))
print("Accuracy on testing set:", accuracy.eval({X: x_test, Y: y_test}))
print("Experimenting sklearn...")
# now experiment with sklearn
from sklearn.datasets import load_digits
import numpy as np
from sklearn.neural_network import MLPClassifier
import time
# use MLP
t_start = time.time()
print('fitting MLP...')
clf = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(300,),max_iter=training_epochs)
clf.fit(x_train,y_train)
print('fitted MLP in {:.2f} seconds'.format(time.time() - t_start))
print('predicting...')
labels_predicted = clf.predict(x_test)
print('accuracy: {:.2f} %'.format(np.mean(np.argmax(y_test,axis=1) == np.argmax(labels_predicted,axis=1)) * 100))
The code is adapted from a github repository. For this testing, I'm using a traditional neural network (MLP) with only one hidden layer of size 300.
Following is the result for the both datasets:
sklearn digits: ~83% (tensorflow), ~90% (sklearn)
MNIST: ~94% (tensorflow), ~97% (sklearn)
I'm using the same model for both libraries. All the parameters (number of hidden layers, number of hidden units, learning_rate, l2 regularization constant, number of training epochs, batch size) and optimization algorithms are the same (Adam optimizer, beta parameters for Adam optimizer, no momentum, etc).
I wonder if sklearn has done a magic implementation over tensorflow? Can anyone help answer?
Thank you very much.
I've modified tensor flow example to fit on my data, given here: data
But my neural network is not learning at all, I tried to use different no. of hidden layers, learning rate and optimization functions, but it didn't help.My code is given below:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import learn
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn import datasets, linear_model
from sklearn import cross_validation
from sklearn import preprocessing
import numpy as np
filename_queue = tf.train.string_input_producer(["file0.csv"])
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
# Default values, in case of empty columns. Also specifies the type of the
# decoded result.
record_defaults = [[0], [0], [0], [0]]
col1, col2, col3, col4 = tf.decode_csv(
value, record_defaults=record_defaults)
features = tf.stack([col1, col2, col3])
with tf.Session() as sess:
# Start populating the filename queue.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
x = np.zeros(shape=(1813,3))
y = np.zeros(shape=(1813))
for i in range(1813):
# Retrieve a single instance:
x1, y1 = sess.run([features, col4])
x[i] = x1
y[i] = y1
coord.request_stop()
coord.join(threads)
#standard_scaler = preprocessing.StandardScaler()
#x = standard_scaler.fit_transform(x)
# Split in test and train data
X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(x, y, test_size=0.2)
total_len = X_train.shape[0]
# Parameters
learning_rate = 0.001
training_epochs = 500
batch_size = 5
display_step = 1
# Network Parameters
n_hidden_1 = 50
n_input = X_train.shape[1]
n_classes = 1
# tf Graph input
x = tf.placeholder("float", [None, 3])
y = tf.placeholder("float", [None])
# Create model
def multilayer_perceptron(x, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Output layer with linear activation
out_layer = tf.matmul(layer_1, weights['out']) + biases['out']
return out_layer
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes], 0, 0.1))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_classes], 0, 0.1))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases)
#reshape(pred, [-1])
tf.shape(pred)
tf.shape(y)
print("Prediction matrix:", pred)
print("Output matrix:", y)
# Define loss and optimizer
cost = tf.reduce_mean(tf.square(pred-y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Launch the graph
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(total_len/batch_size)
print(total_batch)
# Loop over all batches
for i in range(total_batch-1):
batch_x = X_train[i*batch_size:(i+1)*batch_size]
batch_y = Y_train[i*batch_size:(i+1)*batch_size]
# Run optimization op (backprop) and cost op (to get loss value)
_, c, p = sess.run([optimizer, cost, pred], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# sample prediction
label_value = batch_y
estimate = p
err = label_value-estimate
print ("num batch:", total_batch)
# Display logs per epoch step
if epoch % display_step == 0:
print ("Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.9f}".format(avg_cost))
print ("[*]----------------------------")
for i in xrange(5):
print ("label value:", label_value[i], \
"estimated value:", estimate[i])
print ("[*]============================")
print ("Optimization Finished!")
# Test model
correct_prediction = tf.equal(tf.argmax(pred), tf.argmax(y))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Accuracy:", accuracy.eval({x: X_test, y: Y_test}))
and result looks like that: (label value = expected result)
result
I am trying to train a single layer perceptron (basing my code on this) on the following data file in tensor flow:
1,1,0.05,-1.05
1,1,0.1,-1.1
....
where the last column is the label (function of 3 parameters) and the first three columns are the function argument. The code that reads the data and trains the model (I simplify it for readability):
import tensorflow as tf
... # some basics to read the data
example, label = read_file_format(filename_queue)
... # model construction and parameter setting
n_hidden_1 = 4 # 1st layer number of features
n_input = 3
n_output = 1
...
# calls a function which produces a prediction
pred = multilayer_perceptron(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
_, c = sess.run([optimizer, cost], feed_dict={x: example.reshape(1,3), y: label.reshape(-1,1)})
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1), "Cost:",c)
but when I run it, something seems to be very wrong:
('Epoch:', '0001', 'Cost:', nan)
('Epoch:', '0002', 'Cost:', nan)
....
('Epoch:', '0015', 'Cost:', nan)
This is the complete code for the multilaye_perceptron function, etc:
# Parameters
learning_rate = 0.001
training_epochs = 15
display_step = 1
# Network Parameters
n_hidden_1 = 4 # 1st layer number of features
n_input = 3
n_output = 1
# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_output])
# Create model
def multilayer_perceptron(x, weights, biases):
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Output layer with linear activation
out_layer = tf.matmul(layer_1, weights['out']) + biases['out']
return out_layer
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_hidden_1, n_output]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_output]))
}
Is this one example at a time? I would go batches and increase batch size to 128 or similar, as long as you are getting nans.
When I am getting nans it is usually either of the three:
- batch size too small (in your case then just 1)
- log(0) somewhere
- learning rate too high and uncapped gradients