Trouble with adding an extra layer to neural net in Tensorflow - python

I'm trying to add a second hidden layer to my neural net, training on the MNIST dataset. With only a simple hidden layer the training works fine, and the accuracy increases steadily.
When I try to add the second layer, the accuracy gets stuck on 0.117 each time i start training. Just can't figure out what I'm doing wrong here?
I've tried adding sigmoid to my y with no luck.
XTrain = XTrain[0:10000,:]
YTrain = YTrain[0:10000]
K = len(set(YTrain))
N = len(YTrain)
M = 12 #Hidden layer units
D = XTrain.shape[1]
tfX = tf.placeholder(tf.float32, [None, D])
tfY = tf.placeholder(tf.float32, [None, K])
# HIDDEN LAYER 1
W1 = tf.Variable(tf.random_normal([D,M], stddev=0.01))
b1 = tf.Variable(tf.random_normal([M], stddev=0.01))
# HIDDEN LAYER 2
W2 = tf.Variable(tf.random_normal([M,M], stddev=0.01))
b2 = tf.Variable(tf.random_normal([M], stddev=0.01))
# OUTPUT LAYER
W3 = tf.Variable(tf.random_normal([M,K], stddev=0.01))
b3 = tf.Variable(tf.random_normal([K], stddev=0.01))
# MODEL
h1 = tf.nn.sigmoid(tf.matmul(tfX, W1) + b1)
h2 = tf.nn.sigmoid(tf.matmul(h1, W2) + b2)
y = tf.matmul(h2,W3) + b3
# Softmax and cross-entropy
cost = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits_v2(
labels = tfY,
logits = y)
)
# Targets One-Hot encoded
T = np.zeros((N,K))
for i in range(N):
T[i,YTrain[i]] = 1
#Gradient descent
train_op = tf.train.GradientDescentOptimizer(0.05).minimize(cost)
predict_op = tf.argmax(y, 1)
# Start session and initialize variables
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
# TRAIN
for i in range(10000):
sess.run(train_op, feed_dict={tfX: XTrain, tfY: T})
pred = sess.run(predict_op, feed_dict={tfX: XTrain, tfY: T})
if i % 20 == 0:
print("Accuracy:", np.mean(YTrain == pred))
When I start training the output looks like this:
Accuracy: 0.0991
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127
Accuracy: 0.1127

I figured out a solution to the problem myself.
Apparently the initialization of the weights weren't right. It works if I change the initialization to:
# HIDDEN LAYER 1
W1 = tf.Variable(tf.random_normal([D,M], stddev=1) / np.sqrt(D))
b1 = tf.Variable(tf.random_normal([M], stddev=1))
# HIDDEN LAYER 2
W2 = tf.Variable(tf.random_normal([M,M], stddev=1) / np.sqrt(M))
b2 = tf.Variable(tf.random_normal([M], stddev=1))
# OUTPUT LAYER
W3 = tf.Variable(tf.random_normal([M,K], stddev=1) / np.sqrt(M))
b3 = tf.Variable(tf.random_normal([K], stddev=1))
Why I'm still not quite sure of, would appreciate any answers and feedback.

Related

Single loss with Multiple output model in TF.Keras

I use tensorflow's Dataset such that y is a dictionary of 6 tensors which I all use in a single loss function which looks likes this:
def CustomLoss():
def custom_loss(y_true, y_pred):
a = tf.keras.losses.binary_crossentropy(y_true['a_0'], y_pred[0]) * y_true['a_1']
b = tf.square(y_true['b_0'] - y_pred[1]) * y_true['b_1']
c = tf.abs(y_true['c_0'] - y_pred[2]) * y_true['c_1']
return a + b + c
return custom_loss
And I have a model with 3 outputs of different shapes. When I compile the model and call fit method I get Value Error
model.compile(optimizer=optimizer, loss=CustomLoss())
model.fit(dataset, epochs=10)
ValueError: Found unexpected keys that do not correspond to any
Model output: dict_keys(['a_0', 'a_1', 'b_0', 'b_1', 'c_0', 'c_1']).
Expected: ['output_0', 'output_1', 'output_2']
where output_0, 'output_1', 'output_2' are names of the output layers.
I figured that naming the output layers by the keys in the dataset should solve the issue but the problem is I have 6 tensors in the dataset and only 3 outputs. I'm aware I can assign a loss function to every output with a single dataset ground truth tensor, but again I need to pass at least two tensors as GT.
So far I've used a custom training loop but I'd rather use the fit method. I'm using tensorflow 2.3.1
EDIT:
Example model:
inputs = x = tf.keras.layers.Input((256, 256, 3))
x = tf.keras.applications.ResNet50(include_top=False, weights=None)(x)
x1 = tf.keras.layers.Flatten()(x)
x1 = tf.keras.layers.Dense(2, name='output_1')(x1)
x2 = tf.keras.layers.Conv2D(256, 1, name='output_2')(x)
x3 = tf.keras.layers.Flatten()(x)
x3 = tf.keras.layers.Dense(64, name='output_3')(x3)
model = tf.keras.Model(inputs=inputs, outputs=[x1, x2, x3])
Custom training loop:
avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
for epoch in range(1, epochs+1):
for batch, (images, labels) in enumerate(train_dataset):
with tf.GradientTape() as tape:
outputs = model(images, training=False)
reg_loss = tf.reduce_sum(model.losses)
pred_loss = loss(labels, outputs)
total_loss = tf.reduce_sum(pred_loss) + reg_loss
grads = tape.gradient(total_loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
avg_loss.update_state(total_loss)
print(f'Epoch {epoch}/{epochs} - Loss: {avg_loss.result().numpy()}')
avg_loss.reset_states()
Minimal reproducible code:
import tensorflow as tf
def CustomLoss():
def custom_loss(y_true, y_pred):
a = tf.keras.losses.binary_crossentropy(y_true['a_0'], y_pred[0]) * y_true['a_1']
b = tf.square(y_true['b_0'] - y_pred[1]) * y_true['b_1']
b = tf.reduce_sum(b, axis=(1, 2, 3))
c = tf.abs(y_true['c_0'] - y_pred[2]) * y_true['c_1']
c = tf.reduce_sum(c, axis=1)
return a + b + c
return custom_loss
dataset = tf.data.Dataset.from_tensors((
tf.random.uniform((256, 256, 3)),
{'a_0': [0., 1.], 'a_1': [1.], 'b_0': tf.random.uniform((8, 8, 256)), 'b_1': [1.], 'c_0': tf.random.uniform((64,)), 'c_1': [1.]}
))
dataset = dataset.batch(1)
inputs = x = tf.keras.layers.Input((256, 256, 3))
x = tf.keras.applications.ResNet50(include_top=False, weights=None)(x)
x1 = tf.keras.layers.Flatten()(x)
x1 = tf.keras.layers.Dense(2, name='output_1')(x1)
x2 = tf.keras.layers.Conv2D(256, 1, name='output_2')(x)
x3 = tf.keras.layers.Flatten()(x)
x3 = tf.keras.layers.Dense(64, name='output_3')(x3)
model = tf.keras.Model(inputs=inputs, outputs=[x1, x2, x3])
optimizer = tf.keras.optimizers.Adam(1e-3)
model.compile(optimizer=optimizer, loss=CustomLoss())
model.fit(dataset, epochs=1)
Here is one approach for your case. We will still use a custom training loop but also take the leverage of the convenient .fit method by customizing this method. Please check the document for more details of this: Customizing what happens in fit()
Here is one simple demonstration, extending your reproducible code.
import tensorflow as tf
# data set
dataset = tf.data.Dataset.from_tensors((
tf.random.uniform((256, 256, 3)),
{'a_0': [0., 1.], 'a_1': [1.], 'b_0': tf.random.uniform((8, 8, 256)),
'b_1': [1.], 'c_0': tf.random.uniform((64,)), 'c_1': [1.]}
))
dataset = dataset.batch(1)
# custom loss
def loss(y_true, y_pred):
a = tf.keras.losses.binary_crossentropy(y_true['a_0'], y_pred[0]) * y_true['a_1']
b = tf.square(y_true['b_0'] - y_pred[1]) * y_true['b_1']
b = tf.reduce_sum(b, axis=(1, 2, 3))
c = tf.abs(y_true['c_0'] - y_pred[2]) * y_true['c_1']
c = tf.reduce_sum(c, axis=1)
return a + b + c
Custom Model
This is basically overriding the train_step that will run repeatedly over each batch of data.
avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
class custom_fit(tf.keras.Model):
def train_step(self, data):
images, labels = data
with tf.GradientTape() as tape:
outputs = self(images, training=True) # forward pass
reg_loss = tf.reduce_sum(self.losses)
pred_loss = loss(labels, outputs)
total_loss = tf.reduce_sum(pred_loss) + reg_loss
gradients = tape.gradient(total_loss, self.trainable_variables)
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
avg_loss.update_state(total_loss)
return {"loss": avg_loss.result()}
#property
def metrics(self):
# We list our `Metric` objects here so that `reset_states()` can be
# called automatically at the start of each epoch
# or at the start of `evaluate()`.
# If you don't implement this property, you have to call
# `reset_states()` yourself at the time of your choosing.
return [avg_loss]
Build Model
# model
inputs = x = tf.keras.layers.Input((256, 256, 3))
x = tf.keras.applications.ResNet50(include_top=False, weights=None)(x)
x1 = tf.keras.layers.Flatten()(x)
x1 = tf.keras.layers.Dense(2, name='output_1')(x1)
x2 = tf.keras.layers.Conv2D(256, 1, name='output_2')(x)
x3 = tf.keras.layers.Flatten()(x)
x3 = tf.keras.layers.Dense(64, name='output_3')(x3)
# simply pass input and outps to the custom model
custom_model = custom_fit(inputs=[inputs],
outputs=[x1, x2, x3])
Compile and Fit
custom_model.compile(optimizer='adam')
custom_model.fit(dataset, epochs=5, verbose=2)
Epoch 1/5
1/1 - 6s - loss: 73784.0078
Epoch 2/5
1/1 - 1s - loss: 64882.8984
Epoch 3/5
1/1 - 1s - loss: 54760.2500
Epoch 4/5
1/1 - 1s - loss: 47696.7031
Epoch 5/5
1/1 - 1s - loss: 40574.6328

get_reduce_mean is always 0

I am trying to train an AI program that predicts stock values. Every single time, my cost is 0 and my test is 100%. I can not seem to find what I am doing wrong.
placeholder1 = tf.placeholder(tf.float32, shape=[None, 3])
#trainers
dates_train = np.array(dates[0:8000]).astype(np.float32)
highPrice_train = np.array(highPrice[0:8000]).astype(np.float32)
print(dates_train[0][0])
#testers
dates_test = np.array(dates[8000:9564]).astype(np.float32)
highPrice_test = np.array(highPrice[8000:9564]).astype(np.float32)
def get_training_batch(n):
n = min(n,7999)
idx = np.random.choice(7999,n)
return dates_train[idx],highPrice_train[idx]
n_hidden_1 = 100
n_hidden_2 = 100
weights = {
'h1' : tf.Variable(tf.random_normal([3, n_hidden_1])),
'h2' : tf.Variable(tf.random_normal([n_hidden_1,n_hidden_2])),
'out' : tf.Variable(tf.random_normal([n_hidden_2,1]))
}
biases = {
'b1' : tf.Variable(tf.random_normal([n_hidden_1])),
'b2' : tf.Variable(tf.random_normal([n_hidden_2])),
'out' : tf.Variable(tf.random_normal([1]))
}
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(placeholder1, weights['h1']), biases['b1']))
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['h2']), biases['b2']))
y = tf.matmul(layer_2,weights['out']) + biases['out']
placeholder2 = tf.placeholder(tf.float32,shape=[None,1])
print("Mean")
print(sum(highPrice)/len(highPrice))
mean = tf.reduce_mean(highPrice)
print(mean)
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=y, labels=placeholder2))
print("Printing cross_entropy")
print(cross_entropy)
rate = 0.01
optimizer = tf.train.GradientDescentOptimizer(rate).minimize(cross_entropy)
print(optimizer)
prediction = tf.nn.softmax(y)
print(prediction)
##Training
correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(placeholder2,1))
accuracy = 100 * tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(accuracy)
epochs = 1000
batch_size = 10
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
cost = []
accu = []
test_accu = []
for ep in range(epochs):
x_feed,y_feed = get_training_batch(batch_size)
y_feed = np.reshape(y_feed,[10,1])
_,cos,predictions,acc = sess.run([optimizer, cross_entropy, prediction, accuracy], feed_dict={placeholder1:x_feed, placeholder2:y_feed})
highPrice_test = np.reshape(highPrice_test,[1564,1])
test_acc = accuracy.eval(feed_dict={placeholder1:dates_test, placeholder2:highPrice_test})
cost.append(cos)
accu.append(acc)
test_accu.append(test_acc)
if(ep % (epochs // 10) == 0):
print('[%d]: Cos: %.4f, Acc: %.1f%%, Test Acc: %.1f%%' % (ep,cos,acc,test_acc))
plt.plot(cost)
plt.title('cost')
plt.show()
plt.plot(accu)
plt.title('Train Accuracy')
plt.show()
plt.plot(test_accu)
plt.title('Test Accuracy')
plt.show()
index = 36
p = sess.run(prediction, feed_dict = {placeholder1:dates_train[index:index +1]})[0]
[0]: Cos: 0.0000, Acc: 100.0%, Test Acc: 100.0%
[100]: Cos: 0.0000, Acc: 100.0%, Test Acc: 100.0%
That is my output for every single test. I expect there to be a cost and accuracy should not be 100%
It seems the problem is that softmax_cross_entropy_with_logits_v2 needs more than 1 output class: Cost function always returning zero for a binary classification in tensorflow. If I change highPrice to 2 dimensional it works.
As a side note, if I understand your problem correctly, you are trying to predict the exact stock price. A better way may be to just predict whether it is going up or down, so you can create categorical labels say (up, no change, down).
import tensorflow as tf
y_dimensions = 2
placeholder1 = tf.placeholder(tf.float32, shape=[None, 3])
dates = np.array([pd.date_range('2012-10-01', periods=10000, freq='10min'),
pd.date_range('2012-10-01', periods=10000, freq='20min'),
pd.date_range('2012-10-01', periods=10000,
freq='30min')]).T
highPrice = np.random.random((10000, y_dimensions)) * 100
# training set
dates_train = np.array(dates[0:8000]).astype(np.float32)
highPrice_train = np.array(highPrice[0:8000]).astype(np.float32)
print("dates train", dates_train[0])
# testing set
dates_test = np.array(dates[8000:9564]).astype(np.float32)
highPrice_test = np.array(highPrice[8000:9564]).astype(np.float32)
def get_training_batch(n):
n = min(n, 7999)
idx = np.random.choice(7999, n) # create size n sample from range 7999
#print("len batch:", len(idx))
return dates_train[idx], highPrice_train[idx]
n_hidden_1 = 100
n_hidden_2 = 100
weights = {
'h1': tf.Variable(tf.random_normal([3, n_hidden_1])),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_hidden_2, y_dimensions]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'out': tf.Variable(tf.random_normal([1]))
}
layer_1 = tf.nn.sigmoid(
tf.add(tf.matmul(placeholder1, weights['h1']), biases['b1']))
layer_2 = tf.nn.sigmoid(
tf.add(tf.matmul(layer_1, weights['h2']), biases['b2']))
y = tf.matmul(layer_2, weights['out']) + biases['out']
placeholder2 = tf.placeholder(tf.float32, shape=[None, y_dimensions])
print("Mean:", sum(highPrice) / len(highPrice))
mean = tf.reduce_mean(highPrice)
print("TF mean:", mean)
# labels are high prices, logits are model output
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits_v2(logits=y,
labels=placeholder2))
print("cross_entropy:", cross_entropy)
rate = 0.01
optimizer = tf.train.GradientDescentOptimizer(rate).minimize(cross_entropy)
print("optimizer:", optimizer)
prediction = tf.nn.softmax(y)
print("Prediction:", prediction)
##Training
correct_prediction = tf.equal(tf.argmax(prediction, 1),
tf.argmax(placeholder2, 1))
accuracy = 100 * tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("accuracy:", accuracy)
epochs = 300
batch_size = 10
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
cost = []
accu = []
test_accu = []
for ep in range(epochs):
x_feed, y_feed = get_training_batch(batch_size)
y_feed = np.reshape(y_feed, [batch_size, y_dimensions])
_, cos, predictions, acc = sess.run(
[optimizer, cross_entropy, prediction, accuracy],
feed_dict={placeholder1: x_feed, placeholder2: y_feed})
highPrice_test = np.reshape(highPrice_test, [1564, y_dimensions])
test_acc = accuracy.eval(
feed_dict={placeholder1: dates_test, placeholder2: highPrice_test})
# create history
cost.append(cos)
accu.append(acc)
test_accu.append(test_acc)
# every 10 epochs
if ep % (epochs // 10) == 0:
print('[%d]: Cos: %.4f, Acc: %.1f%%, Test Acc: %.1f%%' % (
ep, cos, acc, test_acc))
plt.plot(cost)
plt.title('cost')
plt.show()
plt.plot(accu)
plt.title('Train Accuracy')
plt.show()
plt.plot(test_accu)
plt.title('Test Accuracy')
plt.show()
index = 78
p = sess.run(prediction,
feed_dict={placeholder1: dates_train[index:index + 1]})[0]
print("final x input for prediction:", dates_train[index:index + 1])
print("final y prediction:", p)
Output:
[0]: Cos: 232.5091, Acc: 50.0%, Test Acc: 50.4%
[30]: Cos: 1119.8948, Acc: 70.0%, Test Acc: 49.6%
[60]: Cos: 554.2071, Acc: 50.0%, Test Acc: 50.4%
[90]: Cos: 668.4500, Acc: 60.0%, Test Acc: 50.4%
[120]: Cos: 1485.1707, Acc: 20.0%, Test Acc: 50.4%
[150]: Cos: 2667.8867, Acc: 50.0%, Test Acc: 50.4%
[180]: Cos: 806.8883, Acc: 50.0%, Test Acc: 50.4%
[210]: Cos: 105.7802, Acc: 50.0%, Test Acc: 49.6%
[240]: Cos: 2002.2031, Acc: 50.0%, Test Acc: 50.4%
[270]: Cos: 3357.0098, Acc: 20.0%, Test Acc: 50.4%

tf.metrics.accuracy and hand-written accuracy function give different results

I am trying to see how tf.metrics.accuracy works. I want to compare batch accuracy results of the function given below
with tf.name_scope('Accuracy1'):
correct_prediction = tf.equal(tf.argmax(predictions, 1), tf.argmax(y, 1))
accuracy1 = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy")
with
with tf.name_scope('Accuracy2'):
accuracy2, accuracy_op = tf.metrics.accuracy(labels=tf.argmax(y, 1), predictions=tf.argmax(predictions, 1))
Minimal working example is provided below:
import numpy as np
import pandas as pd
import tensorflow as tf
import math
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
num_steps=28
num_inputs = 28
num_classes = 10
num_neurons = 128
num_layers = 3
batch_size = 500
graph = tf.Graph()
with graph.as_default():
with tf.name_scope("graph_inputs"):
X = tf.placeholder(tf.float32, [None, num_steps, num_inputs], name='input_placeholder')
y = tf.placeholder(tf.float32, [None, num_classes], name='labels_placeholder')
output_keep_prob = tf.placeholder_with_default(1.0, shape=(), name ="output_dropout")
def build_lstm_cell(num_neurons, output_keep_prob):
"""Returns a dropout-wrapped LSTM-cell.
See https://stackoverflow.com/a/44882273/2628369 for why this local function is necessary.
Returns:
tf.contrib.rnn.DropoutWrapper: The dropout-wrapped LSTM cell.
"""
initializer = tf.contrib.layers.xavier_initializer()
lstm_cell = tf.contrib.rnn.LSTMCell(num_units=num_neurons, initializer=initializer, forget_bias=1.0, state_is_tuple=True, name='LSTM_cell')
lstm_cell_drop = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=output_keep_prob)
return lstm_cell_drop
with tf.name_scope("LSTM"):
with tf.name_scope("Cell"):
multi_layer_cell = tf.contrib.rnn.MultiRNNCell([build_lstm_cell(num_neurons, output_keep_prob) for _ in range(num_layers)], state_is_tuple=True)
with tf.name_scope("Model"):
outputs, states = tf.nn.dynamic_rnn(cell=multi_layer_cell, inputs=X, swap_memory=False, time_major = False, dtype=tf.float32)#[Batch_size, time_steps, num_neurons]
with tf.name_scope("Graph_Outputs"):
outputs = tf.transpose(outputs, [1, 0, 2]) # [num_timesteps, batch_size, num_neurons]
outputs = tf.gather(outputs, int(outputs.get_shape()[0]) - 1) # [batch_size, num_neurons]
with tf.variable_scope('Softmax'):
logits = tf.layers.dense(inputs = outputs, units = num_classes, name="logits") #[Batch_size, num_classes]
with tf.name_scope('Predictions'):
predictions = tf.nn.softmax(logits, name="predictions") #[Batch_size, num_classes]
with tf.name_scope('Accuracy1'):
correct_prediction = tf.equal(tf.argmax(predictions, 1), tf.argmax(y, 1))
accuracy1 = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy")
with tf.name_scope('Accuracy2'):
accuracy2, accuracy_op = tf.metrics.accuracy(labels=tf.argmax(y, 1), predictions=tf.argmax(predictions, 1))
with tf.name_scope('Loss'):
xentropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=y)
loss = tf.reduce_mean(xentropy, name="loss")
with tf.name_scope('Train'):
optimizer= tf.train.AdamOptimizer(learning_rate=0.0001)
trainer=optimizer.minimize(loss, name="training_op")
with tf.Session(graph = graph) as sess:
tf.global_variables_initializer().run()
total_batch = mnist.train.num_examples // batch_size
for batch in range(total_batch):
tf.local_variables_initializer().run()
xBatch, yBatch = mnist.train.next_batch(batch_size)
xBatch = xBatch.reshape((batch_size, num_steps, num_inputs))
sess.run(trainer, feed_dict={X: xBatch, y: yBatch, output_keep_prob: 0.5})
miniBatchAccuracy1 = sess.run(accuracy1, feed_dict={X: xBatch, y: yBatch, output_keep_prob: 0.5})
print('[hand-written] Batch {} accuracy: {}'.format(batch, miniBatchAccuracy1))
accuracy_op_val = sess.run(accuracy_op, feed_dict={X: xBatch, y: yBatch, output_keep_prob: 0.5})
miniBatchAccuracy2 = sess.run(accuracy2)
print("[tf.metrics.accuracy] Batch {} accuracy: {}".format(batch, miniBatchAccuracy2))
sess.close()
I print the accuracy values of each batches using these two approaches and they are different. Should not the results be the same?
[hand-written] Batch 0 accuracy: 0.09600000083446503
[tf.metrics.accuracy] Batch 0 accuracy: 0.09399999678134918
[hand-written] Batch 1 accuracy: 0.1120000034570694
[tf.metrics.accuracy] Batch 1 accuracy: 0.07800000160932541
[hand-written] Batch 2 accuracy: 0.10199999809265137
[tf.metrics.accuracy] Batch 2 accuracy: 0.09600000083446503
[hand-written] Batch 3 accuracy: 0.12999999523162842
[tf.metrics.accuracy] Batch 3 accuracy: 0.12800000607967377
[hand-written] Batch 4 accuracy: 0.1379999965429306
[tf.metrics.accuracy] Batch 4 accuracy: 0.10199999809265137
[hand-written] Batch 5 accuracy: 0.16200000047683716
[tf.metrics.accuracy] Batch 5 accuracy: 0.1340000033378601
[hand-written] Batch 6 accuracy: 0.1340000033378601
[tf.metrics.accuracy] Batch 6 accuracy: 0.12600000202655792
[hand-written] Batch 7 accuracy: 0.12999999523162842
[tf.metrics.accuracy] Batch 7 accuracy: 0.16200000047683716
...
...
...
...
When measuring the accuracy for both cases, you are passing the dropout rate as 0.5. This is the reason its giving two different values. Set the dropout value at 1.0 and you should see similar values for both cases.

Neural Network - ValueError: Cannot feed value of shape

I'm new in Python and Tensorflow . For the beginning I watched the MNIST tutorial and understood it so far.
But now I have to create a new Neural Network with numerical input_datas.
I got a dataset which delivers an input_data and v_data.
If I run input_data.shape -> (1000,25,4)
If I run v_data.shape -> (1000,2)
What I tried to do is to split the data for (Training + Validation) and Testing.
Training + Validation = 90% of train_data (90% of the input.pkl)
Testing data = the remaining 10%
And then I devided the 90% of the input_data in training and validation (70% training, 30% validation)
The network should correctly predict based on v_data, but I still get an error. See the code and the error below.
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# Imports
import tensorflow as tf
import pickle as pkl
import numpy as np
# load data
with open('input.pkl', 'rb') as f:
input_data = pkl.load(f)
f.close()
X_train, y_train = input_data
#split data into train / validation and test
X_input = X_train[0:900]
y_input = y_train[0:900]
#print (X_input.shape)
#print (y_input.shape)
X_train_data = X_input[0:630]
X_test_data = X_input[630:900]
y_train_data = y_input[0:630]
y_test_data = y_input[630:900]
# Variables
hidden_layer_1_nodes = 300
hidden_layer_2_nodes = 100
output_layer_nodes = 100
epochs = 10
classes = 2
epoch_errors = []
stddev = 0.035
learning_rate = 0.08
batch_size = 100
#print (X_train_data[0])
# TF Placeholders
X = tf.placeholder('float', [25, 4], name='X')
y = tf.placeholder('float', name='y')
# Weights Matrices
W1 = tf.Variable(tf.truncated_normal([4, hidden_layer_1_nodes], stddev=stddev), name='W1')
W2 = tf.Variable(tf.truncated_normal([hidden_layer_1_nodes, hidden_layer_2_nodes], stddev=stddev), name='W2')
W3 = tf.Variable(tf.truncated_normal([hidden_layer_2_nodes, output_layer_nodes], stddev=stddev), name='W3')
W4 = tf.Variable(tf.truncated_normal([output_layer_nodes, classes], stddev=stddev), name='W4')
# Biases Vectors
b1 = tf.Variable(tf.truncated_normal([hidden_layer_1_nodes], stddev=stddev), name='b1')
b2 = tf.Variable(tf.truncated_normal([hidden_layer_2_nodes], stddev=stddev), name='b2')
b3 = tf.Variable(tf.truncated_normal([output_layer_nodes], stddev=stddev), name='b3')
b4 = tf.Variable(tf.truncated_normal([classes], stddev=stddev), name='b4')
# Define the Neural Network
def nn_model(X):
input_layer = {'weights': W1, 'biases': b1}
hidden_layer_1 = {'weights': W2, 'biases': b2}
hidden_layer_2 = {'weights': W3, 'biases': b3}
output_layer = {'weights': W4, 'biases': b4}
input_layer_sum = tf.add(tf.matmul(X, input_layer['weights']), input_layer['biases'])
input_layer_sum = tf.nn.relu(input_layer_sum)
hidden_layer_1_sum = tf.add(tf.matmul(input_layer_sum, hidden_layer_1['weights']), hidden_layer_1['biases'])
hidden_layer_1_sum = tf.nn.relu(hidden_layer_1_sum)
hidden_layer_2_sum = tf.add(tf.matmul(hidden_layer_1_sum, hidden_layer_2['weights']), hidden_layer_2['biases'])
hidden_layer_2_sum = tf.nn.relu(hidden_layer_2_sum)
output_layer_sum = tf.add(tf.matmul(hidden_layer_2_sum, output_layer['weights']), output_layer['biases'])
return output_layer_sum
# Train the Neural Network
def nn_train(X):
pred = nn_model(X)
pred = tf.identity(pred)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate).minimize(cost)
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
#saver = tf.train.Saver()
sess.run(init_op)
for epoch in range(epochs):
epoch_loss = 0.0
i = 0
while i < len(X_train_data):
start = i
end = i+batch_size
batch_x = np.array(X_train_data[start:end])
batch_y = np.array(y_train_data[start:end])
_, c = sess.run([optimizer, cost], feed_dict={X: batch_x, y: batch_y})
epoch_loss += c
i+= batch_size
epoch_errors.append(epoch_loss)
print('Epoch ', epoch + 1, ' of ', epochs, ' with loss: ', epoch_loss)
correct_result = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_result, 'float'))
print('Acc: ', accuracy.eval({X:X_test_data, y:y_test_data}))
if __name__ == "__main__":
nn_train(X)
The following error
ValueError: Cannot feed value of shape (100, 25, 4) for Tensor 'X:0', which has shape '(25, 4)'
occurs in line 105
_, c = sess.run([optimizer, cost], feed_dict={X: batch_x, y: batch_y})
The placeholder you have defined for the input X has the shape (25,4)
tf.placeholder('float', [25, 4], name='X')
But the input you are providing is of the shape (100, 25, 4) where 100 is your batch size.
Change the definition to
tf.placeholder('float', [None, 25, 4], name='X')
and the error should be gone. Here, None takes care of batch size, automatically.
Update: Sorry, I didn't go through the whole code. You code needs a few fixes.
The correct syntax for feeding data to a placeholder is:
X = tf.placeholder(tf.float32, [None, input_dim], name='X')
Now, if you are dealing with the images, your input_dim will be the length of the flattened array for one example i.e. If your image has dimension 25x4, the input_dim should be 25*4=100. It should be equal to the first dimension of your weight layer 1 here W1.
Also, before feeding your batch you will need to reshape it.
Below is the fixed code(changes are commented):
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# Imports
import tensorflow as tf
import pickle as pkl
import numpy as np
# load data
with open('input.pkl', 'rb') as f:
input_data = pkl.load(f)
f.close()
X_train, y_train = input_data
#split data into train / validation and test
X_input = X_train[0:900]
y_input = y_train[0:900]
#print (X_input.shape)
#print (y_input.shape)
X_train_data = X_input[0:630]
X_test_data = X_input[630:900]
y_train_data = y_input[0:630]
y_test_data = y_input[630:900]
# Variables
hidden_layer_1_nodes = 300
hidden_layer_2_nodes = 100
output_layer_nodes = 100
epochs = 10
classes = 2
epoch_errors = []
stddev = 0.035
learning_rate = 0.08
batch_size = 100
#print (X_train_data[0])
# TF Placeholders
# input data should be of the shape (batch_size, flatten data for one example). Also, the correct shape of y"
X = tf.placeholder(tf.float32, [None, 25 * 4], name='X')
y = tf.placeholder(tf.float32, [None, classes] name='y')
# Weights Matrices. First dimension of W1 == second dimension of X
W1 = tf.Variable(tf.truncated_normal([25 * 4, hidden_layer_1_nodes], stddev=stddev), name='W1')
W2 = tf.Variable(tf.truncated_normal([hidden_layer_1_nodes, hidden_layer_2_nodes], stddev=stddev), name='W2')
W3 = tf.Variable(tf.truncated_normal([hidden_layer_2_nodes, output_layer_nodes], stddev=stddev), name='W3')
W4 = tf.Variable(tf.truncated_normal([output_layer_nodes, classes], stddev=stddev), name='W4')
# Biases Vectors
b1 = tf.Variable(tf.truncated_normal([hidden_layer_1_nodes], stddev=stddev), name='b1')
b2 = tf.Variable(tf.truncated_normal([hidden_layer_2_nodes], stddev=stddev), name='b2')
b3 = tf.Variable(tf.truncated_normal([output_layer_nodes], stddev=stddev), name='b3')
b4 = tf.Variable(tf.truncated_normal([classes], stddev=stddev), name='b4')
# Define the Neural Network
def nn_model(X):
input_layer = {'weights': W1, 'biases': b1}
hidden_layer_1 = {'weights': W2, 'biases': b2}
hidden_layer_2 = {'weights': W3, 'biases': b3}
output_layer = {'weights': W4, 'biases': b4}
input_layer_sum = tf.add(tf.matmul(X, input_layer['weights']), input_layer['biases'])
input_layer_sum = tf.nn.relu(input_layer_sum)
hidden_layer_1_sum = tf.add(tf.matmul(input_layer_sum, hidden_layer_1['weights']), hidden_layer_1['biases'])
hidden_layer_1_sum = tf.nn.relu(hidden_layer_1_sum)
hidden_layer_2_sum = tf.add(tf.matmul(hidden_layer_1_sum, hidden_layer_2['weights']), hidden_layer_2['biases'])
hidden_layer_2_sum = tf.nn.relu(hidden_layer_2_sum)
output_layer_sum = tf.add(tf.matmul(hidden_layer_2_sum, output_layer['weights']), output_layer['biases'])
return output_layer_sum
# Train the Neural Network
def nn_train(X):
pred = nn_model(X)
pred = tf.identity(pred)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate).minimize(cost)
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
#saver = tf.train.Saver()
sess.run(init_op)
for epoch in range(epochs):
epoch_loss = 0.0
i = 0
while i < len(X_train_data):
start = i
end = i+batch_size
# reshape before feeding.
batch_x = np.array(X_train_data[start:end]).reshape(batch_size, 25 * 4)
batch_y = np.array(y_train_data[start:end]).reshape(batch_size, classes)
_, c = sess.run([optimizer, cost], feed_dict={X: batch_x, y: batch_y})
epoch_loss += c
i+= batch_size
epoch_errors.append(epoch_loss)
print('Epoch ', epoch + 1, ' of ', epochs, ' with loss: ', epoch_loss)
correct_result = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_result, 'float'))
print('Acc: ', accuracy.eval({X:X_test_data.reshape(-1, 25 * 4), y:y_test_data.reshape(-1, classes)}))
if __name__ == "__main__":
nn_train(X)
UPDATE: Sorry I posted the wrong error:
#Kumar, I changed the batch size to 30 (so 21*30 = 630). It prints now the epochs, but in some weird way:
Epoch 1 of 10 with loss: 1680690.2648780346
Epoch 2 of 10 with loss: 2382142.9208984375
Epoch 3 of 10 with loss: 4215628.857421875
Epoch 4 of 10 with loss: 9046892.295166016
Epoch 5 of 10 with loss: 23961644.453125
Epoch 6 of 10 with loss: 31733882.34375
Epoch 7 of 10 with loss: 46124696.609375
Epoch 8 of 10 with loss: 61760446.28125
Epoch 9 of 10 with loss: 89145610.59375
Epoch 10 of 10 with loss: 121249417.25
And I received a next error for:
print('Acc: ', accuracy.eval({X:X_test_data, y:y_test_data}))
ValueError: Cannot feed value of shape (270, 25, 4) for Tensor 'X:0', which has shape '(?, 100)'
Process finished with exit code 1

Neural Network With One Hidden Layer Cannot Be Trained

I was trying to implement an NN with one hidden layer by using TensorFlow to recognize MNIST handwritten digits. I was using gradient descent method to train the NN. However, it seems that my training toward the NN did not work at all, as the testing accuracy did not change at all during the training process.
Can anyone help me figure out what went wrong?
Here is my code.
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
batch_size = 100
n_batch = mnist.train.num_examples // batch_size
x = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])
#First layer of the NN
W1 = tf.Variable(tf.zeros([784,10]))
b1 = tf.Variable(tf.zeros([10]))
out1 = tf.nn.softmax(tf.matmul(x, W1) + b1)
#Second layer of the NN
W2 = tf.Variable(tf.zeros([10,10]))
b2 = tf.Variable(tf.zeros([10]))
prediction = tf.nn.softmax(tf.matmul(out1, W2) + b2)
loss = tf.reduce_mean(tf.square(y - prediction))
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss)
init = tf.global_variables_initializer()
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(prediction, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
sess.run(init)
for epoch in range(101):
for batch in range(n_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
sess.run(train_step, feed_dict={x:batch_xs, y:batch_ys})
acc = sess.run(accuracy, feed_dict={x:mnist.test.images, y:mnist.test.labels})
print("Iter " + str(epoch) + ", Testing Accuracy " + str(acc))
Do not initialize your model with all zeros. If you do so, it is likely that the gradient at that point (in the parameter space) is also zero. This results in the gradient update to be non existent, thus your parameters will simply not change. To avoid that use random initialization.
i.e.
Change
#First layer of the NN
W1 = tf.Variable(tf.zeros([784,10]))
b1 = tf.Variable(tf.zeros([10]))
out1 = tf.nn.softmax(tf.matmul(x, W1) + b1)
#Second layer of the NN
W2 = tf.Variable(tf.zeros([10,10]))
b2 = tf.Variable(tf.zeros([10]))
to
#First layer of the NN
W1 = tf.Variable(tf.truncated_normal([784,10], stddev=0.1))
b1 = tf.Variable(tf.truncated_normal([10], stddev=0.1))
out1 = tf.nn.sigmoid(tf.matmul(x, W1) + b1)
# out1 = tf.nn.softmax(tf.matmul(x, W1) + b1)
#Second layer of the NN
W2 = tf.Variable(tf.truncated_normal([10,10], stddev=0.1))
b2 = tf.Variable(tf.truncated_normal([10],stddev=0.1))
Now the model is able to train. You'll also see that I removed the softmax non linearity from the first layer and substituted it with a sigmoid. I did that because softmax layers impose restrictions to the output: it forces that layer's output to add up to one (that's one reason it's often used in the very last layer: to achieve probability interpretation of the final output) . This restriction caused the model to stop learning at 30% accuracy in a quick test. By using a sigmoid the accuracy reached 89%, a much better performance.
Other examples of non linearities you could have used in intermediate layers could be:
Hyperbolic tangent
ReLU

Categories

Resources