get_reduce_mean is always 0 - python

I am trying to train an AI program that predicts stock values. Every single time, my cost is 0 and my test is 100%. I can not seem to find what I am doing wrong.
placeholder1 = tf.placeholder(tf.float32, shape=[None, 3])
#trainers
dates_train = np.array(dates[0:8000]).astype(np.float32)
highPrice_train = np.array(highPrice[0:8000]).astype(np.float32)
print(dates_train[0][0])
#testers
dates_test = np.array(dates[8000:9564]).astype(np.float32)
highPrice_test = np.array(highPrice[8000:9564]).astype(np.float32)
def get_training_batch(n):
n = min(n,7999)
idx = np.random.choice(7999,n)
return dates_train[idx],highPrice_train[idx]
n_hidden_1 = 100
n_hidden_2 = 100
weights = {
'h1' : tf.Variable(tf.random_normal([3, n_hidden_1])),
'h2' : tf.Variable(tf.random_normal([n_hidden_1,n_hidden_2])),
'out' : tf.Variable(tf.random_normal([n_hidden_2,1]))
}
biases = {
'b1' : tf.Variable(tf.random_normal([n_hidden_1])),
'b2' : tf.Variable(tf.random_normal([n_hidden_2])),
'out' : tf.Variable(tf.random_normal([1]))
}
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(placeholder1, weights['h1']), biases['b1']))
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['h2']), biases['b2']))
y = tf.matmul(layer_2,weights['out']) + biases['out']
placeholder2 = tf.placeholder(tf.float32,shape=[None,1])
print("Mean")
print(sum(highPrice)/len(highPrice))
mean = tf.reduce_mean(highPrice)
print(mean)
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=y, labels=placeholder2))
print("Printing cross_entropy")
print(cross_entropy)
rate = 0.01
optimizer = tf.train.GradientDescentOptimizer(rate).minimize(cross_entropy)
print(optimizer)
prediction = tf.nn.softmax(y)
print(prediction)
##Training
correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(placeholder2,1))
accuracy = 100 * tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(accuracy)
epochs = 1000
batch_size = 10
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
cost = []
accu = []
test_accu = []
for ep in range(epochs):
x_feed,y_feed = get_training_batch(batch_size)
y_feed = np.reshape(y_feed,[10,1])
_,cos,predictions,acc = sess.run([optimizer, cross_entropy, prediction, accuracy], feed_dict={placeholder1:x_feed, placeholder2:y_feed})
highPrice_test = np.reshape(highPrice_test,[1564,1])
test_acc = accuracy.eval(feed_dict={placeholder1:dates_test, placeholder2:highPrice_test})
cost.append(cos)
accu.append(acc)
test_accu.append(test_acc)
if(ep % (epochs // 10) == 0):
print('[%d]: Cos: %.4f, Acc: %.1f%%, Test Acc: %.1f%%' % (ep,cos,acc,test_acc))
plt.plot(cost)
plt.title('cost')
plt.show()
plt.plot(accu)
plt.title('Train Accuracy')
plt.show()
plt.plot(test_accu)
plt.title('Test Accuracy')
plt.show()
index = 36
p = sess.run(prediction, feed_dict = {placeholder1:dates_train[index:index +1]})[0]
[0]: Cos: 0.0000, Acc: 100.0%, Test Acc: 100.0%
[100]: Cos: 0.0000, Acc: 100.0%, Test Acc: 100.0%
That is my output for every single test. I expect there to be a cost and accuracy should not be 100%

It seems the problem is that softmax_cross_entropy_with_logits_v2 needs more than 1 output class: Cost function always returning zero for a binary classification in tensorflow. If I change highPrice to 2 dimensional it works.
As a side note, if I understand your problem correctly, you are trying to predict the exact stock price. A better way may be to just predict whether it is going up or down, so you can create categorical labels say (up, no change, down).
import tensorflow as tf
y_dimensions = 2
placeholder1 = tf.placeholder(tf.float32, shape=[None, 3])
dates = np.array([pd.date_range('2012-10-01', periods=10000, freq='10min'),
pd.date_range('2012-10-01', periods=10000, freq='20min'),
pd.date_range('2012-10-01', periods=10000,
freq='30min')]).T
highPrice = np.random.random((10000, y_dimensions)) * 100
# training set
dates_train = np.array(dates[0:8000]).astype(np.float32)
highPrice_train = np.array(highPrice[0:8000]).astype(np.float32)
print("dates train", dates_train[0])
# testing set
dates_test = np.array(dates[8000:9564]).astype(np.float32)
highPrice_test = np.array(highPrice[8000:9564]).astype(np.float32)
def get_training_batch(n):
n = min(n, 7999)
idx = np.random.choice(7999, n) # create size n sample from range 7999
#print("len batch:", len(idx))
return dates_train[idx], highPrice_train[idx]
n_hidden_1 = 100
n_hidden_2 = 100
weights = {
'h1': tf.Variable(tf.random_normal([3, n_hidden_1])),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_hidden_2, y_dimensions]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'out': tf.Variable(tf.random_normal([1]))
}
layer_1 = tf.nn.sigmoid(
tf.add(tf.matmul(placeholder1, weights['h1']), biases['b1']))
layer_2 = tf.nn.sigmoid(
tf.add(tf.matmul(layer_1, weights['h2']), biases['b2']))
y = tf.matmul(layer_2, weights['out']) + biases['out']
placeholder2 = tf.placeholder(tf.float32, shape=[None, y_dimensions])
print("Mean:", sum(highPrice) / len(highPrice))
mean = tf.reduce_mean(highPrice)
print("TF mean:", mean)
# labels are high prices, logits are model output
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits_v2(logits=y,
labels=placeholder2))
print("cross_entropy:", cross_entropy)
rate = 0.01
optimizer = tf.train.GradientDescentOptimizer(rate).minimize(cross_entropy)
print("optimizer:", optimizer)
prediction = tf.nn.softmax(y)
print("Prediction:", prediction)
##Training
correct_prediction = tf.equal(tf.argmax(prediction, 1),
tf.argmax(placeholder2, 1))
accuracy = 100 * tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("accuracy:", accuracy)
epochs = 300
batch_size = 10
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
cost = []
accu = []
test_accu = []
for ep in range(epochs):
x_feed, y_feed = get_training_batch(batch_size)
y_feed = np.reshape(y_feed, [batch_size, y_dimensions])
_, cos, predictions, acc = sess.run(
[optimizer, cross_entropy, prediction, accuracy],
feed_dict={placeholder1: x_feed, placeholder2: y_feed})
highPrice_test = np.reshape(highPrice_test, [1564, y_dimensions])
test_acc = accuracy.eval(
feed_dict={placeholder1: dates_test, placeholder2: highPrice_test})
# create history
cost.append(cos)
accu.append(acc)
test_accu.append(test_acc)
# every 10 epochs
if ep % (epochs // 10) == 0:
print('[%d]: Cos: %.4f, Acc: %.1f%%, Test Acc: %.1f%%' % (
ep, cos, acc, test_acc))
plt.plot(cost)
plt.title('cost')
plt.show()
plt.plot(accu)
plt.title('Train Accuracy')
plt.show()
plt.plot(test_accu)
plt.title('Test Accuracy')
plt.show()
index = 78
p = sess.run(prediction,
feed_dict={placeholder1: dates_train[index:index + 1]})[0]
print("final x input for prediction:", dates_train[index:index + 1])
print("final y prediction:", p)
Output:
[0]: Cos: 232.5091, Acc: 50.0%, Test Acc: 50.4%
[30]: Cos: 1119.8948, Acc: 70.0%, Test Acc: 49.6%
[60]: Cos: 554.2071, Acc: 50.0%, Test Acc: 50.4%
[90]: Cos: 668.4500, Acc: 60.0%, Test Acc: 50.4%
[120]: Cos: 1485.1707, Acc: 20.0%, Test Acc: 50.4%
[150]: Cos: 2667.8867, Acc: 50.0%, Test Acc: 50.4%
[180]: Cos: 806.8883, Acc: 50.0%, Test Acc: 50.4%
[210]: Cos: 105.7802, Acc: 50.0%, Test Acc: 49.6%
[240]: Cos: 2002.2031, Acc: 50.0%, Test Acc: 50.4%
[270]: Cos: 3357.0098, Acc: 20.0%, Test Acc: 50.4%

Related

Siamese network on MNIST dataset is not getting trained

I train Siamese network with constructive loss on two classes of MNIST dataset to identify whether two images are similar or not. Although the loss is decreasing in the beginning, it freezes later with accuracy around 0.5.
The model is trained on pairs of images and a label (0.0 for different, 1.0 for identical). I used only two classes for simplicity (zeros and ones) and prepared the dataset, so that it contains every pair of images. I've checked that the dataset is consistent (image pairs from dataset). I've also experimented with data normalization, different batch sizes, learning rates, initializations and regularization constants with no luck.
This is the model:
class Encoder(Model):
"""
A network that finds a 50-dimensional representation of the input images
so that the distances between them minimize the constructive loss
"""
def __init__(self):
super(Encoder, self).__init__(name='encoder')
self.cv = Conv2D(32, (3, 3), activation='relu', padding='Same',
input_shape=(28, 28, 1),
kernel_regularizer=tf.keras.regularizers.l2(0.01))
self.pool = MaxPooling2D((2, 2))
self.flatten = Flatten()
self.dense = Dense(50, activation=None,
kernel_regularizer=tf.keras.regularizers.l2(0.01))
def call(self, inputs, training=None, mask=None):
""" Forward pass for one image """
x = self.cv(inputs)
x = self.pool(x)
x = self.flatten(x)
x = self.dense(x)
return x
#staticmethod
def distance(difference):
""" The D function from the paper which is used in loss """
distance = tf.sqrt(tf.reduce_sum(tf.pow(difference, 2), 0))
return distance
The loss and accuracy:
def simnet_loss(target, x1, x2):
difference = x1 - x2
distance_vector = tf.map_fn(lambda x: Encoder.distance(x), difference)
loss = tf.map_fn(lambda distance: target * tf.square(distance) +
(1.0 - target) * tf.square(tf.maximum(0.0, 1.0 - distance)), distance_vector)
average_loss = tf.reduce_mean(loss)
return average_loss
def accuracy(y_true, y_pred):
distance_vector = tf.map_fn(lambda x: Encoder.distance(x), y_pred)
accuracy = tf.keras.metrics.binary_accuracy(y_true, distance_vector)
return accuracy
Training:
def train_step(images, labels):
with tf.GradientTape() as tape:
x1, x2 = images[:, 0, :, :, :], images[:, 1, :, :, :]
x1 = model(x1)
x2 = model(x2)
loss = simnet_loss(labels, x1, x2)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
return loss
model = Encoder()
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
for epoch in range(n_epoch):
epoch_loss = 0
n_batches = int(x_train.shape[0]/batch_size)
for indices in np.array_split(np.arange(x_train.shape[0]), indices_or_sections=n_batches):
x = np.take(x_train, indices, axis=0)
y = np.take(y_train, indices, axis=0)
epoch_loss += train_step(x, y)
epoch_loss = epoch_loss / n_batches
accuracy = test_step(x_train, y_train)
val_accuracy = test_step(x_test, y_test)
tf.print("epoch:", epoch, "loss:", epoch_loss, "accuracy:", accuracy,
"val_accuracy:", val_accuracy, output_stream=sys.stdout)
The code above produces:
epoch: 0 loss: 0.755419433 accuracy: 0.318898171 val_accuracy:
0.310316473
epoch: 1 loss: 0.270610392 accuracy: 0.369466901 val_accuracy:
0.360871345
epoch: 2 loss: 0.262594223 accuracy: 0.430587918 val_accuracy:
0.418002456
epoch: 3 loss: 0.258690506 accuracy: 0.428258181 val_accuracy:
0.427044809
epoch: 4 loss: 0.25654456 accuracy: 0.43497327 val_accuracy:
0.44800657
epoch: 5 loss: 0.255373538 accuracy: 0.444840342 val_accuracy:
0.454993844
epoch: 6 loss: 0.254594624 accuracy: 0.453885168 val_accuracy:
0.454171807

My model got a loss value of 0, but it just classifies all the input into the same class, what's wrong?

I trained this model to classify the images in the dataset fashion-mnist. When the weights have not been trained the loss value seems normal, but after the first epoch, the loss reduces to 0, and all the input images are classified into class 0.
If regularization added, the weights update slower, but eventually get the same result, say all the images classified to class 0 with a loss value of 0.
import tensorflow as tf
from tensorflow import keras
import numpy as np
EPOCH = 10
BATCH_SIZE = 30
DATA_SIZE = 60000
REGULARIZER = 0.001
def main():
fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
train_images = train_images / 255.0
test_images = test_images / 255.0
train_labels = train_labels.reshape((60000, 1))
train_images = train_images.reshape((60000, 784))
test_images = test_images.reshape((10000, 784))
judge_labels = test_labels.reshape((10000, 1))
x = tf.placeholder(tf.float32, (None, 784))
y_ = tf.placeholder(tf.float32, (None, 1))
w1 = tf.Variable(np.random.rand(784 * 24).reshape([784, 24]) * 10, dtype=tf.float32)
# tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(REGULARIZER)(w1))
w2 = tf.Variable(np.random.rand(24 * 24).reshape([24, 24]) * 10, dtype=tf.float32)
# tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(REGULARIZER)(w2))
w3 = tf.Variable(np.random.rand(24 * 10).reshape([24, 10]) * 10, dtype=tf.float32)
# tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(REGULARIZER)(w3))
bias1 = tf.constant(1, shape=(24,), dtype=tf.float32)
bias2 = tf.constant(1, shape=(24,), dtype=tf.float32)
y1 = tf.nn.relu(tf.matmul(x, w1) + bias1)
y2 = tf.nn.relu(tf.matmul(y1, w2) + bias2)
y = tf.matmul(y2, w3)
predict = tf.argmax(y, axis=1)
y_spy = tf.nn.softmax(y, axis=1)
ce = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.argmax(y_, 1), logits=y)
# loss = tf.reduce_mean(ce) + tf.add_n(tf.get_collection('losses'))
loss = tf.reduce_mean(ce)
train_step = tf.train.GradientDescentOptimizer(0.001).minimize(loss)
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
print('current out loss: ', end='')
print(sess.run(loss, feed_dict={x: test_images, y_: judge_labels}))
pre = sess.run(predict, feed_dict={x: test_images})
miss = pre - test_labels
print('right number: ', end='')
print((np.sum(miss == 0)))
for epoch in range(EPOCH):
for i in range(DATA_SIZE // BATCH_SIZE):
start = i * BATCH_SIZE
end = (i + 1) * BATCH_SIZE
_ = sess.run([train_step], feed_dict={x: train_images[start:end],
y_: train_labels[start:end]})
print('epochs %d :' % epoch)
print('current in loss: ', end='')
print(sess.run(loss, feed_dict={x: train_images[start:end],
y_: train_labels[start:end]}))
print('current out loss: ', end='')
print(sess.run(loss, feed_dict={x: test_images, y_: judge_labels}))
miss = sess.run(predict, feed_dict={x: test_images}) - test_labels
print('right number: ', end='')
print((np.sum(miss == 0)))
if __name__ == "__main__":
main()
Mistake 1: Loss function should be
ce = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.reshape(y_, [-1]), logits=y)
because labels are flat values for this loss function. (change y placeholder to int32 type)
Mistake 2: The weight are initialled to very large values.
GradientDescentOptimizer is very slow optimizer. Use AdamOptimizer instead
Fixed code:
import tensorflow as tf
from tensorflow import keras
import numpy as np
EPOCH = 10
BATCH_SIZE = 64
DATA_SIZE = 60000
REGULARIZER = 0.001
def main():
fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
train_images = train_images / 255.0
test_images = test_images / 255.0
train_labels = train_labels.reshape((60000, 1))
train_images = train_images.reshape((60000, 784))
test_images = test_images.reshape((10000, 784))
judge_labels = test_labels.reshape((10000, 1))
x = tf.placeholder(tf.float32, (None, 784))
y_ = tf.placeholder(tf.int32, (None, 1))
w1 = tf.Variable(np.random.rand(784 * 24).reshape([784, 24]), dtype=tf.float32)
# tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(REGULARIZER)(w1))
w2 = tf.Variable(np.random.rand(24 * 24).reshape([24, 24]), dtype=tf.float32)
# tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(REGULARIZER)(w2))
w3 = tf.Variable(np.random.rand(24 * 10).reshape([24, 10]), dtype=tf.float32)
# tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(REGULARIZER)(w3))
bias1 = tf.constant(1, shape=(24,), dtype=tf.float32)
bias2 = tf.constant(1, shape=(24,), dtype=tf.float32)
y1 = tf.nn.relu(tf.matmul(x, w1) + bias1)
y2 = tf.nn.relu(tf.matmul(y1, w2) + bias2)
y = tf.matmul(y2, w3)
predict = tf.argmax(y, axis=1)
y_spy = tf.nn.softmax(y, axis=1)
ce = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.reshape(y_, [-1]), logits=y)
# loss = tf.reduce_mean(ce) + tf.add_n(tf.get_collection('losses'))
loss = tf.reduce_mean(ce)
train_step = tf.train.AdamOptimizer(0.001).minimize(loss)
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
print('current out loss: ', end='')
print(sess.run(loss, feed_dict={x: test_images, y_: judge_labels}))
pre = sess.run(predict, feed_dict={x: test_images})
miss = pre - test_labels
print('right number: ', end='')
print((np.sum(miss == 0)))
for epoch in range(EPOCH):
for i in range(DATA_SIZE // BATCH_SIZE):
start = i * BATCH_SIZE
end = (i + 1) * BATCH_SIZE
_ = sess.run([train_step], feed_dict={x: train_images[start:end],
y_: train_labels[start:end]})
print('epochs %d :' % epoch)
print('current in loss: ', end='')
print(sess.run(loss, feed_dict={x: train_images[start:end],
y_: train_labels[start:end]}))
print('current out loss: ', end='')
print(sess.run(loss, feed_dict={x: test_images, y_: judge_labels}))
miss = sess.run(predict, feed_dict={x: test_images}) - test_labels
print('right number: ', end='')
print((np.sum(miss == 0)))
miss = sess.run(predict, feed_dict={x: test_images})
print (miss[0:10], test_labels[0:10])
if __name__ == "__main__":
main()
Output (selective):
...
Sample predictions: [9 2 4 3 2 4 4 4 7 7], Actual: [9 2 1 1 6 1 4 6 5 7]
...
Sample predictions: [9 2 1 1 6 1 4 6 1 7], Actual: [9 2 1 1 6 1 4 6 5 7]
...
Sample predictions: [7 2 1 1 6 1 4 6 1 7], Actual: [9 2 1 1 6 1 4 6 5 7]
...
Sample predictions: [9 2 1 1 6 1 4 6 1 7], Actual: [9 2 1 1 6 1 4 6 5 7]
...
Code with train, validation loss and train, validation accuracy and shuffling train data for each epoch
import tensorflow as tf
from tensorflow import keras
import numpy as np
from sklearn.metrics import classification_report, accuracy_score
EPOCH = 30
BATCH_SIZE = 64
DATA_SIZE = 60000
REGULARIZER = 0.001
def main():
fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
train_images = train_images / 255.0
test_images = test_images / 255.0
train_labels = train_labels.reshape((60000, 1))
train_images = train_images.reshape((60000, 784))
test_images = test_images.reshape((10000, 784))
judge_labels = test_labels.reshape((10000, 1))
x = tf.placeholder(tf.float32, (None, 784))
y_ = tf.placeholder(tf.int32, (None, 1))
w1 = tf.Variable(np.random.rand(784 * 24).reshape([784, 24]), dtype=tf.float32)
tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(REGULARIZER)(w1))
w2 = tf.Variable(np.random.rand(24 * 24).reshape([24, 24]), dtype=tf.float32)
tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(REGULARIZER)(w2))
w3 = tf.Variable(np.random.rand(24 * 10).reshape([24, 10]), dtype=tf.float32)
tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(REGULARIZER)(w3))
bias1 = tf.constant(1, shape=(24,), dtype=tf.float32)
bias2 = tf.constant(1, shape=(24,), dtype=tf.float32)
y1 = tf.nn.relu(tf.matmul(x, w1) + bias1)
y2 = tf.nn.relu(tf.matmul(y1, w2) + bias2)
y = tf.matmul(y2, w3)
predict = tf.argmax(y, axis=1)
ce = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.reshape(y_, [-1]), logits=y)
loss = tf.reduce_mean(ce)
train_step = tf.train.AdamOptimizer(0.001).minimize(loss)
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
idx = np.arange(DATA_SIZE)
for epoch in range(EPOCH):
train_loss = list()
train_predict = list()
np.random.shuffle(idx)
train_images = train_images[idx]
train_labels = train_labels[idx]
for i in range(DATA_SIZE // BATCH_SIZE):
start = i * BATCH_SIZE
end = (i + 1) * BATCH_SIZE
_, loss_, p_ = sess.run([train_step, loss, predict], feed_dict={x: train_images[start:end],
y_: train_labels[start:end]})
train_loss.append(loss_)
train_predict.extend(p_)
test_loss, test_predict = sess.run([loss, predict], feed_dict={x: test_images,
y_: judge_labels})
print ("Epoch: {}, Train Loss: {:.3f}, Test Loss: {:.3f},"\
"Train Acc: {:.3f}, Test Acc: {:.3f}".format(
epoch+1, np.mean(train_loss), test_loss,
accuracy_score(train_labels[0:len(train_predict)], train_predict),
accuracy_score(judge_labels, test_predict)))
if __name__ == "__main__":
main()
Output:
....
Epoch: 27, Train Loss: 0.842, Test Loss: 1.015,Train Acc: 0.816, Test Acc: 0.798
Epoch: 28, Train Loss: 0.832, Test Loss: 0.880,Train Acc: 0.816, Test Acc: 0.806
Epoch: 29, Train Loss: 0.788, Test Loss: 0.886,Train Acc: 0.820, Test Acc: 0.805
Epoch: 30, Train Loss: 0.704, Test Loss: 0.742,Train Acc: 0.826, Test Acc: 0.815

Trouble with adding an extra layer to neural net in Tensorflow

I'm trying to add a second hidden layer to my neural net, training on the MNIST dataset. With only a simple hidden layer the training works fine, and the accuracy increases steadily.
When I try to add the second layer, the accuracy gets stuck on 0.117 each time i start training. Just can't figure out what I'm doing wrong here?
I've tried adding sigmoid to my y with no luck.
XTrain = XTrain[0:10000,:]
YTrain = YTrain[0:10000]
K = len(set(YTrain))
N = len(YTrain)
M = 12 #Hidden layer units
D = XTrain.shape[1]
tfX = tf.placeholder(tf.float32, [None, D])
tfY = tf.placeholder(tf.float32, [None, K])
# HIDDEN LAYER 1
W1 = tf.Variable(tf.random_normal([D,M], stddev=0.01))
b1 = tf.Variable(tf.random_normal([M], stddev=0.01))
# HIDDEN LAYER 2
W2 = tf.Variable(tf.random_normal([M,M], stddev=0.01))
b2 = tf.Variable(tf.random_normal([M], stddev=0.01))
# OUTPUT LAYER
W3 = tf.Variable(tf.random_normal([M,K], stddev=0.01))
b3 = tf.Variable(tf.random_normal([K], stddev=0.01))
# MODEL
h1 = tf.nn.sigmoid(tf.matmul(tfX, W1) + b1)
h2 = tf.nn.sigmoid(tf.matmul(h1, W2) + b2)
y = tf.matmul(h2,W3) + b3
# Softmax and cross-entropy
cost = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits_v2(
labels = tfY,
logits = y)
)
# Targets One-Hot encoded
T = np.zeros((N,K))
for i in range(N):
T[i,YTrain[i]] = 1
#Gradient descent
train_op = tf.train.GradientDescentOptimizer(0.05).minimize(cost)
predict_op = tf.argmax(y, 1)
# Start session and initialize variables
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
# TRAIN
for i in range(10000):
sess.run(train_op, feed_dict={tfX: XTrain, tfY: T})
pred = sess.run(predict_op, feed_dict={tfX: XTrain, tfY: T})
if i % 20 == 0:
print("Accuracy:", np.mean(YTrain == pred))
When I start training the output looks like this:
Accuracy: 0.0991
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
I figured out a solution to the problem myself.
Apparently the initialization of the weights weren't right. It works if I change the initialization to:
# HIDDEN LAYER 1
W1 = tf.Variable(tf.random_normal([D,M], stddev=1) / np.sqrt(D))
b1 = tf.Variable(tf.random_normal([M], stddev=1))
# HIDDEN LAYER 2
W2 = tf.Variable(tf.random_normal([M,M], stddev=1) / np.sqrt(M))
b2 = tf.Variable(tf.random_normal([M], stddev=1))
# OUTPUT LAYER
W3 = tf.Variable(tf.random_normal([M,K], stddev=1) / np.sqrt(M))
b3 = tf.Variable(tf.random_normal([K], stddev=1))
Why I'm still not quite sure of, would appreciate any answers and feedback.

Got very poor accuracy rate whenever I train any network

I am trying to train a network on Alabone dataset downloaded from "UCI machine learning repository" site. The dataset is look like:
M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7
I have given the exact same column names as they have mentioned. But when I try to apply a neural network to train it, it always gives very poor accuracy rate about 50% just.
I am new in the field so I don't know if I am using a wrong Activation function?, or executing wrong code?, or didn't have preprocess the data well?.
So please help me to find the mistake I have done.
Here's my whole code:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
def read_dataset():
df = pd.read_csv("abalone.data.txt")
X = np.array(df.drop("Sex", 1))
y = np.array(df["Sex"])
encoder = LabelEncoder()
encoder.fit(y)
y = encoder.transform(y)
Y = one_hot_encode(y)
# print(X.shape)
return X, Y
def one_hot_encode(label):
n_label = len(label)
n_unique_label = len(np.unique(label))
one_hot_encode = np.zeros((n_label, n_unique_label))
one_hot_encode[np.arange(n_label), label] = 1
return one_hot_encode
X, y = read_dataset()
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2)
n_nodes_1 = 60
n_nodes_2 = 60
n_nodes_3 = 60
n_nodes_4 = 60
model_path = "C:\\Users\Kashif\Projects\DeepLearning-Tensorflow\Learnings\AlaboneDetection\AlaboneModel"
n_class = 3
input_size = X.shape[1]
x = tf.placeholder(tf.float32, [None, input_size])
y = tf.placeholder(tf.float32, [None, n_class])
def neural_network(x):
hidden_1 = {"weights": tf.Variable(tf.random_normal([input_size, n_nodes_1])),
"biases": tf.Variable(tf.random_normal([n_nodes_1]))}
hidden_2 = {"weights": tf.Variable(tf.random_normal([n_nodes_1, n_nodes_2])),
"biases": tf.Variable(tf.random_normal([n_nodes_2]))}
hidden_3 = {"weights": tf.Variable(tf.random_normal([n_nodes_2, n_nodes_3])),
"biases": tf.Variable(tf.random_normal([n_nodes_3]))}
hidden_4 = {"weights": tf.Variable(tf.random_normal([n_nodes_3, n_nodes_4])),
"biases": tf.Variable(tf.random_normal([n_nodes_4]))}
out_layer = {"weights": tf.Variable(tf.random_normal([n_nodes_4, n_class])),
"biases": tf.Variable(tf.random_normal([n_class]))}
# (input * weights) + biases
layer_1 = tf.add(tf.matmul(x, hidden_1["weights"]), hidden_1["biases"])
layer_1 = tf.nn.relu(layer_1)
layer_2 = tf.add(tf.matmul(layer_1, hidden_2["weights"]), hidden_2["biases"])
layer_2 = tf.nn.relu(layer_2)
layer_3 = tf.add(tf.matmul(layer_2, hidden_3["weights"]), hidden_3["biases"])
layer_3 = tf.nn.relu(layer_3)
layer_4 = tf.add(tf.matmul(layer_3, hidden_4["weights"]), hidden_4["biases"])
layer_4 = tf.nn.relu(layer_4)
output = tf.matmul(layer_4, out_layer["weights"]) + out_layer["biases"]
return output
def train_neural_network(x):
prediction = neural_network(x)
cost_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction, labels=y))
optimizer = tf.train.AdamOptimizer().minimize(cost_function)
init = tf.global_variables_initializer()
loss_trace = []
accuracy_trace = []
#saver = tf.train.Saver()
epochs = 1000
with tf.Session() as sess:
sess.run(init)
for i in range(epochs):
sess.run(optimizer, feed_dict={x: train_X, y: train_y})
loss = sess.run(cost_function, feed_dict={x: train_X, y: train_y})
accuracy = np.mean(np.argmax(sess.run(prediction,feed_dict={x:train_X,y:train_y}),axis=1) == np.argmax(train_y,axis=1))
loss_trace.append(loss)
accuracy_trace.append(accuracy)
print('Epoch:', (i + 1), 'loss:', loss, 'accuracy:', accuracy)
#saver.save(sess, model_path)
print('Final training result:', 'loss:', loss, 'accuracy:', accuracy)
loss_test = sess.run(cost_function, feed_dict={x: test_X, y: test_y})
test_pred = np.argmax(sess.run(prediction, feed_dict={x: test_X, y: test_y}), axis=1)
accuracy_test = np.mean(test_pred == np.argmax(test_y, axis=1))
print('Results on test dataset:', 'loss:', loss_test, 'accuracy:', accuracy_test)
train_neural_network(x)
And here's my last result of final three epochs and final accuracy result.
Epoch: 997 loss: 24.625622 accuracy: 0.518407662376534
Epoch: 998 loss: 22.168245 accuracy: 0.48757856929063154
Epoch: 999 loss: 21.896841 accuracy: 0.5001496557916791
Epoch: 1000 loss: 22.28085 accuracy: 0.4968572283747381
Final training result: loss: 22.28085 accuracy: 0.4968572283747381
Results on test dataset: loss: 23.206755 accuracy: 0.4688995215311005
I am new to tensorflow. Maybe you can try two things:
1.decreasing learning rate.such as 0.0001. because your loss is oscillation
2.increase the number of the layer. because your model maybe under-fitting.
If above ways can't solve your problem, you can print your data and check whether train_X and train_y is correct

tf.metrics.accuracy and hand-written accuracy function give different results

I am trying to see how tf.metrics.accuracy works. I want to compare batch accuracy results of the function given below
with tf.name_scope('Accuracy1'):
correct_prediction = tf.equal(tf.argmax(predictions, 1), tf.argmax(y, 1))
accuracy1 = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy")
with
with tf.name_scope('Accuracy2'):
accuracy2, accuracy_op = tf.metrics.accuracy(labels=tf.argmax(y, 1), predictions=tf.argmax(predictions, 1))
Minimal working example is provided below:
import numpy as np
import pandas as pd
import tensorflow as tf
import math
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
num_steps=28
num_inputs = 28
num_classes = 10
num_neurons = 128
num_layers = 3
batch_size = 500
graph = tf.Graph()
with graph.as_default():
with tf.name_scope("graph_inputs"):
X = tf.placeholder(tf.float32, [None, num_steps, num_inputs], name='input_placeholder')
y = tf.placeholder(tf.float32, [None, num_classes], name='labels_placeholder')
output_keep_prob = tf.placeholder_with_default(1.0, shape=(), name ="output_dropout")
def build_lstm_cell(num_neurons, output_keep_prob):
"""Returns a dropout-wrapped LSTM-cell.
See https://stackoverflow.com/a/44882273/2628369 for why this local function is necessary.
Returns:
tf.contrib.rnn.DropoutWrapper: The dropout-wrapped LSTM cell.
"""
initializer = tf.contrib.layers.xavier_initializer()
lstm_cell = tf.contrib.rnn.LSTMCell(num_units=num_neurons, initializer=initializer, forget_bias=1.0, state_is_tuple=True, name='LSTM_cell')
lstm_cell_drop = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=output_keep_prob)
return lstm_cell_drop
with tf.name_scope("LSTM"):
with tf.name_scope("Cell"):
multi_layer_cell = tf.contrib.rnn.MultiRNNCell([build_lstm_cell(num_neurons, output_keep_prob) for _ in range(num_layers)], state_is_tuple=True)
with tf.name_scope("Model"):
outputs, states = tf.nn.dynamic_rnn(cell=multi_layer_cell, inputs=X, swap_memory=False, time_major = False, dtype=tf.float32)#[Batch_size, time_steps, num_neurons]
with tf.name_scope("Graph_Outputs"):
outputs = tf.transpose(outputs, [1, 0, 2]) # [num_timesteps, batch_size, num_neurons]
outputs = tf.gather(outputs, int(outputs.get_shape()[0]) - 1) # [batch_size, num_neurons]
with tf.variable_scope('Softmax'):
logits = tf.layers.dense(inputs = outputs, units = num_classes, name="logits") #[Batch_size, num_classes]
with tf.name_scope('Predictions'):
predictions = tf.nn.softmax(logits, name="predictions") #[Batch_size, num_classes]
with tf.name_scope('Accuracy1'):
correct_prediction = tf.equal(tf.argmax(predictions, 1), tf.argmax(y, 1))
accuracy1 = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy")
with tf.name_scope('Accuracy2'):
accuracy2, accuracy_op = tf.metrics.accuracy(labels=tf.argmax(y, 1), predictions=tf.argmax(predictions, 1))
with tf.name_scope('Loss'):
xentropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=y)
loss = tf.reduce_mean(xentropy, name="loss")
with tf.name_scope('Train'):
optimizer= tf.train.AdamOptimizer(learning_rate=0.0001)
trainer=optimizer.minimize(loss, name="training_op")
with tf.Session(graph = graph) as sess:
tf.global_variables_initializer().run()
total_batch = mnist.train.num_examples // batch_size
for batch in range(total_batch):
tf.local_variables_initializer().run()
xBatch, yBatch = mnist.train.next_batch(batch_size)
xBatch = xBatch.reshape((batch_size, num_steps, num_inputs))
sess.run(trainer, feed_dict={X: xBatch, y: yBatch, output_keep_prob: 0.5})
miniBatchAccuracy1 = sess.run(accuracy1, feed_dict={X: xBatch, y: yBatch, output_keep_prob: 0.5})
print('[hand-written] Batch {} accuracy: {}'.format(batch, miniBatchAccuracy1))
accuracy_op_val = sess.run(accuracy_op, feed_dict={X: xBatch, y: yBatch, output_keep_prob: 0.5})
miniBatchAccuracy2 = sess.run(accuracy2)
print("[tf.metrics.accuracy] Batch {} accuracy: {}".format(batch, miniBatchAccuracy2))
sess.close()
I print the accuracy values of each batches using these two approaches and they are different. Should not the results be the same?
[hand-written] Batch 0 accuracy: 0.09600000083446503
[tf.metrics.accuracy] Batch 0 accuracy: 0.09399999678134918
[hand-written] Batch 1 accuracy: 0.1120000034570694
[tf.metrics.accuracy] Batch 1 accuracy: 0.07800000160932541
[hand-written] Batch 2 accuracy: 0.10199999809265137
[tf.metrics.accuracy] Batch 2 accuracy: 0.09600000083446503
[hand-written] Batch 3 accuracy: 0.12999999523162842
[tf.metrics.accuracy] Batch 3 accuracy: 0.12800000607967377
[hand-written] Batch 4 accuracy: 0.1379999965429306
[tf.metrics.accuracy] Batch 4 accuracy: 0.10199999809265137
[hand-written] Batch 5 accuracy: 0.16200000047683716
[tf.metrics.accuracy] Batch 5 accuracy: 0.1340000033378601
[hand-written] Batch 6 accuracy: 0.1340000033378601
[tf.metrics.accuracy] Batch 6 accuracy: 0.12600000202655792
[hand-written] Batch 7 accuracy: 0.12999999523162842
[tf.metrics.accuracy] Batch 7 accuracy: 0.16200000047683716
...
...
...
...
When measuring the accuracy for both cases, you are passing the dropout rate as 0.5. This is the reason its giving two different values. Set the dropout value at 1.0 and you should see similar values for both cases.

Categories

Resources