Based on the layer function
def neuron_layer(X, n_neurons, name, activation_fn=None):
with tf.name_scope(name):
n_inputs = int(X.get_shape()[1])
stddev = 2 / np.sqrt(n_inputs)
init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)
W = tf.Variable(init, name="kernel")
b = tf.Variable(tf.zeros([n_neurons]), name="bias")
Z = tf.matmul(X, W) + b
if activation_fn is not None:
return activation_fn(Z)
else:
return Z
The following network for a binary classification problem is constructed:
n_hidden1 = 100
n_hidden2 = 120
n_outputs = 1 # single value prediction
n_inputs = X_test.shape[1]
reset_graph()
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.float32, shape=(None), name="y")
layer1 = neuron_layer(X, n_hidden1, "layer1", activation_fn=tf.nn.relu)
layer2 = neuron_layer(layer1, n_hidden2, "layer2", activation_fn=tf.nn.relu)
prediction = neuron_layer(layer2, n_outputs, "output",activation_fn=tf.nn.sigmoid)
cost = tf.losses.log_loss(y,prediction)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
init = tf.global_variables_initializer()
The training routine
learning_rate = 0.01
n_epochs = 20
batch_size = 60
num_rec = X_train.shape[0]
n_batches = int(np.ceil(num_rec / batch_size))
acc_test = 0. # assign the result of accuracy testing to this variable
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epochs):
for batch_index in range(n_batches):
X_batch,y_batch = random_batch(X_train,Y_train,batch_size)
_,opt = sess.run([optimizer,cost], feed_dict={X: X_batch, y: y_batch})
loss, acc = sess.run([cost, accuracy], feed_dict={X: X_batch,y: y_batch})
print("epoch " + str(epoch) + ", Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc))
print("Optimization Finished!")
_, acc_test = sess.run([cost, accuracy], feed_dict={X:X_test,y:Y_test})
generates the following output:
epoch 0, Loss= -6.756775, Training Accuracy= 1.00000 Optimization
Finished!
[. . .]
epoch 19, Loss=
-6.769919, Training Accuracy= 1.00000 Optimization Finished!
and the the accuracy on the test set acc_test is 1.0.
The batches are generated by
def random_batch(X_train, y_train, batch_size):
np.random.seed(42)
rnd_indices = np.random.randint(0, len(X_train), batch_size)
X_batch = X_train[rnd_indices]
y_batch = y_train[rnd_indices]
return X_batch, y_batch
the input shapes are
print(X_batch.shape,y_batch.shape,X_test.shape,Y_test.shape)
>(60, 3) (60, 1) (2500, 3) (2500, 1)
Obviously, the accuracy on the training and test tests can't be correct. Where could be the problem in the network, training or evaluation procedure?
The model is overfitting due to which you are getting abnormally high accuracy at initial epochs, to avoid overfitting you can use regularization methods or increase dataset by augmenting. Use ImageDataGenerator for augmentation, it will provide images to model in batches. Try setting dropout to 0.2. Enable early stopping in callbacks, it will terminate training when model performance degrades. Try playing with patience in early stopping.
Related
I am trying linear regression from boston dataset. MSE loss function is nan since the first iteration. I tried altering learning rate and batch_size but of no use.
from torch.utils.data import TensorDataset , DataLoader
inputs = torch.from_numpy(Features).to(torch.float32)
targets = torch.from_numpy(target).to(torch.float32)
train_ds = TensorDataset(inputs , targets)
train_dl = DataLoader(train_ds , batch_size = 5 , shuffle = True)
model = nn.Linear(13,1)
opt = optim.SGD(model.parameters(), lr=1e-5)
loss_fn = F.mse_loss
def fit(num_epochs, model, loss_fn, opt, train_dl):
# Repeat for given number of epochs
for epoch in range(num_epochs):
# Train with batches of data
for xb,yb in train_dl:
# 1. Generate predictions
pred = model(xb)
# 2. Calculate loss
loss = loss_fn(pred, yb)
# 3. Compute gradients
loss.backward()
# 4. Update parameters using gradients
opt.step()
# 5. Reset the gradients to zero
opt.zero_grad()
# Print the progress
if (epoch+1) % 10 == 0:
print('Epoch [{}/{}], Loss: {}'.format(epoch+1, num_epochs, loss.item()))
fit(100, model, loss_fn , opt , train_dl)
output
Pay attention to:
Use normalization: x = (x - x.mean()) / x.std()
y_train / y_test have to be (-1, 1) shapes. Use y_train.view(-1, 1) (if y_train is torch.Tensor or something)
(not your case, but for someone else) If you use torch.nn.MSELoss(reduction='sum') than you have to reduse the sum to mean. It can be done with torch.nn.MSELoss() or in train-loop: l = loss(y_pred, y) / y.shape[0].
Example:
...
loss = torch.nn.MSELoss()
...
for epoch in range(num_epochs):
for x, y in train_iter:
y_pred = model(x)
l = loss(y_pred, y)
optimizer.zero_grad()
l.backward()
optimizer.step()
print("epoch {} loss: {:.4f}".format(epoch + 1, l.item()))
I am trying to train a network on Alabone dataset downloaded from "UCI machine learning repository" site. The dataset is look like:
M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7
I have given the exact same column names as they have mentioned. But when I try to apply a neural network to train it, it always gives very poor accuracy rate about 50% just.
I am new in the field so I don't know if I am using a wrong Activation function?, or executing wrong code?, or didn't have preprocess the data well?.
So please help me to find the mistake I have done.
Here's my whole code:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
def read_dataset():
df = pd.read_csv("abalone.data.txt")
X = np.array(df.drop("Sex", 1))
y = np.array(df["Sex"])
encoder = LabelEncoder()
encoder.fit(y)
y = encoder.transform(y)
Y = one_hot_encode(y)
# print(X.shape)
return X, Y
def one_hot_encode(label):
n_label = len(label)
n_unique_label = len(np.unique(label))
one_hot_encode = np.zeros((n_label, n_unique_label))
one_hot_encode[np.arange(n_label), label] = 1
return one_hot_encode
X, y = read_dataset()
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2)
n_nodes_1 = 60
n_nodes_2 = 60
n_nodes_3 = 60
n_nodes_4 = 60
model_path = "C:\\Users\Kashif\Projects\DeepLearning-Tensorflow\Learnings\AlaboneDetection\AlaboneModel"
n_class = 3
input_size = X.shape[1]
x = tf.placeholder(tf.float32, [None, input_size])
y = tf.placeholder(tf.float32, [None, n_class])
def neural_network(x):
hidden_1 = {"weights": tf.Variable(tf.random_normal([input_size, n_nodes_1])),
"biases": tf.Variable(tf.random_normal([n_nodes_1]))}
hidden_2 = {"weights": tf.Variable(tf.random_normal([n_nodes_1, n_nodes_2])),
"biases": tf.Variable(tf.random_normal([n_nodes_2]))}
hidden_3 = {"weights": tf.Variable(tf.random_normal([n_nodes_2, n_nodes_3])),
"biases": tf.Variable(tf.random_normal([n_nodes_3]))}
hidden_4 = {"weights": tf.Variable(tf.random_normal([n_nodes_3, n_nodes_4])),
"biases": tf.Variable(tf.random_normal([n_nodes_4]))}
out_layer = {"weights": tf.Variable(tf.random_normal([n_nodes_4, n_class])),
"biases": tf.Variable(tf.random_normal([n_class]))}
# (input * weights) + biases
layer_1 = tf.add(tf.matmul(x, hidden_1["weights"]), hidden_1["biases"])
layer_1 = tf.nn.relu(layer_1)
layer_2 = tf.add(tf.matmul(layer_1, hidden_2["weights"]), hidden_2["biases"])
layer_2 = tf.nn.relu(layer_2)
layer_3 = tf.add(tf.matmul(layer_2, hidden_3["weights"]), hidden_3["biases"])
layer_3 = tf.nn.relu(layer_3)
layer_4 = tf.add(tf.matmul(layer_3, hidden_4["weights"]), hidden_4["biases"])
layer_4 = tf.nn.relu(layer_4)
output = tf.matmul(layer_4, out_layer["weights"]) + out_layer["biases"]
return output
def train_neural_network(x):
prediction = neural_network(x)
cost_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction, labels=y))
optimizer = tf.train.AdamOptimizer().minimize(cost_function)
init = tf.global_variables_initializer()
loss_trace = []
accuracy_trace = []
#saver = tf.train.Saver()
epochs = 1000
with tf.Session() as sess:
sess.run(init)
for i in range(epochs):
sess.run(optimizer, feed_dict={x: train_X, y: train_y})
loss = sess.run(cost_function, feed_dict={x: train_X, y: train_y})
accuracy = np.mean(np.argmax(sess.run(prediction,feed_dict={x:train_X,y:train_y}),axis=1) == np.argmax(train_y,axis=1))
loss_trace.append(loss)
accuracy_trace.append(accuracy)
print('Epoch:', (i + 1), 'loss:', loss, 'accuracy:', accuracy)
#saver.save(sess, model_path)
print('Final training result:', 'loss:', loss, 'accuracy:', accuracy)
loss_test = sess.run(cost_function, feed_dict={x: test_X, y: test_y})
test_pred = np.argmax(sess.run(prediction, feed_dict={x: test_X, y: test_y}), axis=1)
accuracy_test = np.mean(test_pred == np.argmax(test_y, axis=1))
print('Results on test dataset:', 'loss:', loss_test, 'accuracy:', accuracy_test)
train_neural_network(x)
And here's my last result of final three epochs and final accuracy result.
Epoch: 997 loss: 24.625622 accuracy: 0.518407662376534
Epoch: 998 loss: 22.168245 accuracy: 0.48757856929063154
Epoch: 999 loss: 21.896841 accuracy: 0.5001496557916791
Epoch: 1000 loss: 22.28085 accuracy: 0.4968572283747381
Final training result: loss: 22.28085 accuracy: 0.4968572283747381
Results on test dataset: loss: 23.206755 accuracy: 0.4688995215311005
I am new to tensorflow. Maybe you can try two things:
1.decreasing learning rate.such as 0.0001. because your loss is oscillation
2.increase the number of the layer. because your model maybe under-fitting.
If above ways can't solve your problem, you can print your data and check whether train_X and train_y is correct
I'm using this code as a base and wish to view the learning progress by the loss within Tensorboard. After adding the writer and attempting to add_summary, I receive the above error presented within the title.
""" Convolutional Neural Network.
Build and train a convolutional neural network with TensorFlow.
This example is using the MNIST database of handwritten digits
(http://yann.lecun.com/exdb/mnist/)
Author: Aymeric Damien
Project: https://github.com/aymericdamien/TensorFlow-Examples/
"""
from __future__ import division, print_function, absolute_import
import tensorflow as tf
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
# Training Parameters
learning_rate = 0.001
num_steps = 200
batch_size = 128
display_step = 10
# Network Parameters
num_input = 784 # MNIST data input (img shape: 28*28)
num_classes = 10 # MNIST total classes (0-9 digits)
dropout = 0.75 # Dropout, probability to keep units
# tf Graph input
X = tf.placeholder(tf.float32, [None, num_input])
Y = tf.placeholder(tf.float32, [None, num_classes])
keep_prob = tf.placeholder(tf.float32) # dropout (keep probability)
# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def maxpool2d(x, k=2):
# MaxPool2D wrapper
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
padding='SAME')
# Create model
def conv_net(x, weights, biases, dropout):
# MNIST data input is a 1-D vector of 784 features (28*28 pixels)
# Reshape to match picture format [Height x Width x Channel]
# Tensor input become 4-D: [Batch Size, Height, Width, Channel]
x = tf.reshape(x, shape=[-1, 28, 28, 1])
# Convolution Layer
conv1 = conv2d(x, weights['wc1'], biases['bc1'])
# Max Pooling (down-sampling)
conv1 = maxpool2d(conv1, k=2)
# Convolution Layer
conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
# Max Pooling (down-sampling)
conv2 = maxpool2d(conv2, k=2)
# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
# Apply Dropout
fc1 = tf.nn.dropout(fc1, dropout)
# Output, class prediction
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
return out
# Store layers weight & bias
weights = {
# 5x5 conv, 1 input, 32 outputs
'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
# 5x5 conv, 32 inputs, 64 outputs
'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
# fully connected, 7*7*64 inputs, 1024 outputs
'wd1': tf.Variable(tf.random_normal([7*7*64, 1024])),
# 1024 inputs, 10 outputs (class prediction)
'out': tf.Variable(tf.random_normal([1024, num_classes]))
}
biases = {
'bc1': tf.Variable(tf.random_normal([32])),
'bc2': tf.Variable(tf.random_normal([64])),
'bd1': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([num_classes]))
}
# Construct model
logits = conv_net(X, weights, biases, keep_prob)
prediction = tf.nn.softmax(logits)
# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
# Evaluate model
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()
# Start training
with tf.Session() as sess:
# Run the initializer
sess.run(init)
for step in range(1, num_steps+1):
batch_x, batch_y = mnist.train.next_batch(batch_size)
# Run optimization op (backprop)
sess.run(train_op, feed_dict={X: batch_x, Y: batch_y, keep_prob: 0.8})
if step % display_step == 0 or step == 1:
# Calculate batch loss and accuracy
loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
Y: batch_y,
keep_prob: 1.0})
print("Step " + str(step) + ", Minibatch Loss= " + \
"{:.4f}".format(loss) + ", Training Accuracy= " + \
"{:.3f}".format(acc))
print("Optimization Finished!")
# Calculate accuracy for 256 MNIST test images
print("Testing Accuracy:", \
sess.run(accuracy, feed_dict={X: mnist.test.images[:256],
Y: mnist.test.labels[:256],
keep_prob: 1.0}))
Running it works completely fine, I then added the writer variable to write to the root tmp directory of my OS.
# Start training
with tf.Session() as sess:
writer = tf.summary.FileWriter("/tmp/log/", graph=sess.graph)
# Run the initializer
sess.run(init)
for step in range(1, num_steps+1):
batch_x, batch_y = mnist.train.next_batch(batch_size)
# Run optimization op (backprop)
sess.run(train_op, feed_dict={X: batch_x, Y: batch_y, keep_prob: 0.8})
if step % display_step == 0 or step == 1:
# Calculate batch loss and accuracy
summary, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
Y: batch_y,
keep_prob: 1.0})
print("Step " + str(step) + ", Minibatch Loss= " + \
"{:.4f}".format(summary) + ", Training Accuracy= " + \
"{:.3f}".format(acc))
writer.add_summary(summary=summary, global_step=step)
writer.flush()
writer.close()
print("Optimization Finished!")
# Calculate accuracy for 256 MNIST test images
print("Testing Accuracy:", \
sess.run(accuracy, feed_dict={X: mnist.test.images[:256],
Y: mnist.test.labels[:256],
keep_prob: 1.0}))
But that's when the error occurs. What exactly am I doing wrong and what is the correction that needs to be applied to fix this. I'm still quite new to Tensorflow but I've used summary writer before without this error.
I also used this tutorial as well in hopes of fixing this issue:
https://www.tensorflow.org/guide/summaries_and_tensorboard
Error just so everyone knows the culprit
Step 1, Minibatch Loss= 98733.8750, Training Accuracy= 0.102
Traceback (most recent call last):
File "convolutional_network_raw.py", line 137, in <module>
writer.add_summary(summary=summary, global_step=step)
File "/home/kyle/.conda/envs/csc/lib/python3.6/site-packages/tensorflow/python/summary/writer/writer.py", line 126, in add_summary
for value in summary.value:
AttributeError: 'numpy.float32' object has no attribute 'value'
The summary that goes into the file writer is a tf.Summary object. You need to first convert the loss value into a summary object, for example below (also change the variable in the print so you don't get an error):
loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
Y: batch_y,
keep_prob: 1.0})
summary = tf.Summary(value=[tf.Summary.Value(tag="loss", simple_value=loss)])
print("Step " + str(step) + ", Minibatch Loss= " + \
"{:.4f}".format(loss) + ", Training Accuracy= " + \
"{:.3f}".format(acc))
I'm comparing the performance of Tensorflow with sklearn on two datasets:
A toy dataset in sklearn
MNIST dataset
Here is my code (Python):
from __future__ import print_function
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
import tensorflow as tf
from sklearn.datasets import load_digits
import numpy as np
# digits = load_digits()
# data = digits.data
# labels = digits.target
# convert to binary labels
# y = np.zeros((labels.shape[0],10))
# y[np.arange(labels.shape[0]),labels] = 1
x_train = mnist.train.images
y_train = mnist.train.labels
x_test = mnist.test.images
y_test = mnist.test.labels
n_train = mnist.train.images.shape[0]
# import pdb;pdb.set_trace()
# Parameters
learning_rate = 1e-3
lambda_val = 1e-5
training_epochs = 30
batch_size = 200
display_step = 1
# Network Parameters
n_hidden_1 = 300 # 1st layer number of neurons
n_input = x_train.shape[1] # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Create model
def multilayer_perceptron(x):
# Hidden fully connected layer with 256 neurons
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
# Activation
layer_1_relu = tf.nn.relu(layer_1)
# Output fully connected layer with a neuron for each class
out_layer = tf.matmul(layer_1_relu, weights['out']) + biases['out']
return out_layer
# Construct model
logits = multilayer_perceptron(X)
# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y)) + lambda_val*tf.nn.l2_loss(weights['h1']) + lambda_val*tf.nn.l2_loss(weights['out'])
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
# Test model
pred = tf.nn.softmax(logits) # Apply softmax to logits
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# Initializing the variables
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(n_train/batch_size)
# Loop over all batches
ptr = 0
for i in range(total_batch):
next_ptr = ptr + batch_size
if next_ptr > len(x_train):
next_ptr = len(x_train)
batch_x, batch_y = x_train[ptr:next_ptr],y_train[ptr:next_ptr]
ptr += batch_size
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
Y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1), "cost={:.9f}".format(avg_cost))
print("Optimization Finished!")
print("Accuracy on training set: ", accuracy.eval({X:x_train,Y:y_train}))
print("Accuracy on testing set:", accuracy.eval({X: x_test, Y: y_test}))
print("Experimenting sklearn...")
# now experiment with sklearn
from sklearn.datasets import load_digits
import numpy as np
from sklearn.neural_network import MLPClassifier
import time
# use MLP
t_start = time.time()
print('fitting MLP...')
clf = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(300,),max_iter=training_epochs)
clf.fit(x_train,y_train)
print('fitted MLP in {:.2f} seconds'.format(time.time() - t_start))
print('predicting...')
labels_predicted = clf.predict(x_test)
print('accuracy: {:.2f} %'.format(np.mean(np.argmax(y_test,axis=1) == np.argmax(labels_predicted,axis=1)) * 100))
The code is adapted from a github repository. For this testing, I'm using a traditional neural network (MLP) with only one hidden layer of size 300.
Following is the result for the both datasets:
sklearn digits: ~83% (tensorflow), ~90% (sklearn)
MNIST: ~94% (tensorflow), ~97% (sklearn)
I'm using the same model for both libraries. All the parameters (number of hidden layers, number of hidden units, learning_rate, l2 regularization constant, number of training epochs, batch size) and optimization algorithms are the same (Adam optimizer, beta parameters for Adam optimizer, no momentum, etc).
I wonder if sklearn has done a magic implementation over tensorflow? Can anyone help answer?
Thank you very much.
I'm building an MLP for a classification problem. The number of output classes is 8 and my input data consist of 81 columns of binary features and in total 2416 data points.
So far my NN was 3 hidden layers with 400, 300 and 200 nodes respectively and the learning rate is set to 0.0001.
For training I wanted to implement batches with the following code:
number_of_examples = X_train.shape[0]
batch_size = 60
n_epochs = 10000
number_of_batches = X_train.shape[0] // batch_size
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
for i in range(number_of_batches):
batch_start = (i - epoch) * batch_size % number_of_examples
batch_stop = (i - epoch + 1) * batch_size % number_of_examples
if (batch_stop < batch_start):
batch_stop = number_of_examples
X_batch = X_train[batch_start:number_of_examples, :]
y_batch = y_train[batch_start:number_of_examples]
data = np.column_stack((X_train, y_train))
np.random.shuffle(data)
X_train = data[:, :81]
y_train = data[:, 81]
else:
X_batch = X_train[batch_start:batch_stop, :]
y_batch = y_train[batch_start:batch_stop]
sess.run(training_op, feed_dict = {X: X_train, y: y_train})
accuracy_val, loss_val, accuracy_summary_str, loss_summary_str = sess.run([accuracy, loss, accuracy_summary, loss_summary], feed_dict={X: X_batch y: y_batch})
acc_test = accuracy.eval(feed_dict = {X: X_test, y: y_test})
When I executed the code I saw that the train accuracy was going up and down. At first I thought to decrease the learning rate in case it was too big for gradient descent to converge to the minimum. The accuracy kept on going up and down and also the learning was very slow (after 200 iterations the accuracy was between 30% and 35%).
I then removed all the batches implementation and used the whole dataset in hope that I can isolate the problem but the problem remained.
The code without batches is:
number_of_examples = X_train.shape[0]
batch_size = 60
n_epochs = 10000
number_of_batches = X_train.shape[0] // batch_size
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
sess.run(training_op, feed_dict = {X: X_train, y: y_train})
accuracy_val, loss_val, accuracy_summary_str, loss_summary_str = sess.run([accuracy, loss, accuracy_summary, loss_summary], feed_dict={X: X_train, y: y_train})
acc_test = accuracy.eval(feed_dict = {X: X_test, y: y_test})
At this point I'm quite confused about what is going wrong. It is not the poor performance that troubles me at this point but the unstable behavior of accuracy.
Just to be complete I use ReLu as an activation function on the hidden layers and softmax on the output. The loss function is the cross entropy.