tensorflow mnist example with my own get_next_minibatch - python

I just started using tensorflow and I followed the tutorial example on MNIST dataset. It went well, I got like around 90% accuracy.
But after I replace the next_batch with my own version, the result was way worse than it used to be, usually 50%.
Instead of using the data Tensorflow downloaded and parsed, I download the dataset from this website. Using numpy to get what I want.
df = pd.read_csv('mnist_train.csv', header=None)
X = df.drop(0,1)
Y = df[0]
temp = np.zeros((Y.size, Y.max()+1))
temp[np.arange(Y.size),Y] = 1
np.save('X',X)
np.save('Y',temp)
do the same thing to the test data, then following the tutorial, nothing is changed
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
X = np.load('X.npy')
Y = np.load('Y.npy')
X_test = np.load('X_test.npy')
Y_test = np.load('Y_test.npy')
BATCHES = 1000
W = tf.Variable(tf.truncated_normal([784,10], stddev=0.1))
# W = tf.Variable(tf.zeros([784,10]))
b = tf.Variable(tf.zeros([10]))
y = tf.nn.softmax(tf.matmul(x, W) + b)
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
train_step = tf.train.GradientDescentOptimizer(0.05).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
right here is my own get_mini_batch, I shuffle the original data's index, then every time I get 100 data out of it, which seems to be like the exact same thing example code does. The only difference is data I throw away some of the data in the tail.
pos = 0
idx = np.arange(X.shape[0])
np.random.shuffle(idx)
for _ in range(1000):
batch_xs, batch_ys = X[idx[range(pos,pos+BATCHES)],:], Y[idx[range(pos,pos+BATCHES)],]
if pos+BATCHES >= X.shape[0]:
pos = 0
idx = np.arange(X.shape[0])
np.random.shuffle(idx)
pos += BATCHES
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
print(sess.run(accuracy, feed_dict={x: X_test, y_: Y_test}))
It confuses me why my version is way worse than the tutorial one.

Like lejilot said, we should normalize the data before we push it into the neural network.
See this post

Related

Why is my simple neural network not learning?

I am new to TensorFlow and neural networks. I am trying to build a neural network that can classify images in the CIFAR-10 dataset.
Here is my code:
import tensorflow as tf
import pickle
import numpy as np
import random
image_size= 32*32*3 # because 3 channels
n_classes = 10
lay1_size = 50
batch_size = 100
def unpickle(filename):
with open(filename,'rb') as f:
data = pickle.load(f, encoding='latin1')
x = data['data']
y = data['labels']
# shuffle the data
z = list(zip(x,y))
random.shuffle(z)
x, y = zip(*z)
x = x[:batch_size]
y = y[:batch_size]
# covert decimals to one hot arrays
y = np.eye(n_classes)[[y]]
return x, y
# set up network
def add_layer(inputs, in_size, out_size, activation_function=None):
W = tf.Variable(tf.random_normal([in_size, out_size]), dtype=tf.float32)
b = tf.Variable(tf.zeros([1,out_size]) + 0.1, dtype=tf.float32)
Wx_plus_b = tf.matmul(inputs, W) + b
if activation_function is None:
output = Wx_plus_b
else:
output = activation_function(Wx_plus_b)
return output
def compute_accuracy(v_xs, v_ys):
global prediction
y_pre = sess.run(prediction, feed_dict={xs:v_xs})
correct_prediction = tf.equal(tf.argmax(y_pre,1), tf.argmax(v_ys, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
result = sess.run(accuracy, feed_dict={xs:v_xs, ys:v_ys})
return result
xs = tf.placeholder(tf.float32, [None,image_size])
ys = tf.placeholder(tf.float32)
lay1 = add_layer(xs, image_size, lay1_size, activation_function=tf.nn.tanh)
lay2 = add_layer(lay1, lay1_size, lay1_size, activation_function=tf.nn.tanh)
prediction = add_layer(lay2, lay1_size, n_classes, activation_function=tf.nn.softmax)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys*tf.log(prediction), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
#train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
# run network
sess = tf.Session()
sess.run(tf.initialize_all_variables())
x_test, y_test = unpickle('test_batch')
for i in range(1000):
x_train, y_train = unpickle('data_batch_1')
sess.run(train_step, feed_dict={xs:x_train,ys:y_train})
if i % 50 == 0:
print(compute_accuracy(x_test, y_test))
sess.close()
I am using two hidden layers with 50 nodes in each layer. I am running 1,000 cycles, where in each cycle I shuffle data in the dataset and pick the first 100 images of that shuffle to train on.
I am consistently getting ~0.1 accuracy, the machine is not learning at all.
When I modify the code to use the MNIST dataset instead of the CIFAR-10 dataset I get ~0.87 accuracy.
I took code from an MNIST tutorial and am trying to modify it to classify CIFAR-10 data.
I can't figure out what's wrong here. How do I get my algorithm to learn?

Tensorflow - Testing a mnist neural net with my own images

I'm trying to write a script that will allow me to draw an image of a digit and then determine what digit it is with a model trained on MNIST.
Here is my code:
import random
import image
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import numpy as np
import scipy.ndimage
mnist = input_data.read_data_sets( "MNIST_data/", one_hot=True )
x = tf.placeholder(tf.float32, [None, 784])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.nn.softmax(tf.matmul(x, W) + b)
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize (cross_entropy)
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
for i in range( 1000 ):
batch_xs, batch_ys = mnist.train.next_batch( 1000 )
sess.run(train_step, feed_dict= {x: batch_xs, y_: batch_ys})
print ("done with training")
data = np.ndarray.flatten(scipy.ndimage.imread("im_01.jpg", flatten=True))
result = sess.run(tf.argmax(y,1), feed_dict={x: [data]})
print (' '.join(map(str, result)))
For some reason the results are always wrong but has a 92% accuracy when I use the standard testing method.
I think the problem might be how I encoded the image:
data = np.ndarray.flatten(scipy.ndimage.imread("im_01.jpg", flatten=True))
I tried looking in the tensorflow code for the next_batch() function to see how they did it, but I have no idea how I can compare against my approach.
The problem might be somewhere else too.
Any help to make the accuracy 80+% would be greatly appreciated.
I found my mistake: it encoded the reverse, blacks were at 255 instead of 0.
data = np.vectorize(lambda x: 255 - x)(np.ndarray.flatten(scipy.ndimage.imread("im_01.jpg", flatten=True)))
Fixed it.

How does one use the official Batch Normalization layer in TensorFlow?

I was trying to use batch normalization to train my Neural Networks using TensorFlow but it was unclear to me how to use the official layer implementation of Batch Normalization (note this is different from the one from the API).
After some painful digging on the their github issues it seems that one needs a tf.cond to use it properly and also a 'resue=True' flag so that the BN shift and scale variables are properly reused. After figuring that out I provided a small description of how I believe is the right way to use it here.
Now I have written a short script to test it (only a single layer and a ReLu, hard to make it smaller than this). However, I am not 100% sure how to test it. Right now my code runs with no error messages but returns NaNs unexpectedly. Which lowers my confidence that the code I gave in the other post might be right. Or maybe the network I have is weird. Either way, does someone know whats wrong? Here is the code:
import tensorflow as tf
# download and install the MNIST data automatically
from tensorflow.examples.tutorials.mnist import input_data
from tensorflow.contrib.layers.python.layers import batch_norm as batch_norm
def batch_norm_layer(x,train_phase,scope_bn):
bn_train = batch_norm(x, decay=0.999, center=True, scale=True,
is_training=True,
reuse=None, # is this right?
trainable=True,
scope=scope_bn)
bn_inference = batch_norm(x, decay=0.999, center=True, scale=True,
is_training=False,
reuse=True, # is this right?
trainable=True,
scope=scope_bn)
z = tf.cond(train_phase, lambda: bn_train, lambda: bn_inference)
return z
def get_NN_layer(x, input_dim, output_dim, scope, train_phase):
with tf.name_scope(scope+'vars'):
W = tf.Variable(tf.truncated_normal(shape=[input_dim, output_dim], mean=0.0, stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[output_dim]))
with tf.name_scope(scope+'Z'):
z = tf.matmul(x,W) + b
with tf.name_scope(scope+'BN'):
if train_phase is not None:
z = batch_norm_layer(z,train_phase,scope+'BN_unit')
with tf.name_scope(scope+'A'):
a = tf.nn.relu(z) # (M x D1) = (M x D) * (D x D1)
return a
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
# placeholder for data
x = tf.placeholder(tf.float32, [None, 784])
# placeholder that turns BN during training or off during inference
train_phase = tf.placeholder(tf.bool, name='phase_train')
# variables for parameters
hiden_units = 25
layer1 = get_NN_layer(x, input_dim=784, output_dim=hiden_units, scope='layer1', train_phase=train_phase)
# create model
W_final = tf.Variable(tf.truncated_normal(shape=[hiden_units, 10], mean=0.0, stddev=0.1))
b_final = tf.Variable(tf.constant(0.1, shape=[10]))
y = tf.nn.softmax(tf.matmul(layer1, W_final) + b_final)
### training
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean( -tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]) )
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
steps = 3000
for iter_step in xrange(steps):
#feed_dict_batch = get_batch_feed(X_train, Y_train, M, phase_train)
batch_xs, batch_ys = mnist.train.next_batch(100)
# Collect model statistics
if iter_step%1000 == 0:
batch_xstrain, batch_xstrain = batch_xs, batch_ys #simualtes train data
batch_xcv, batch_ycv = mnist.test.next_batch(5000) #simualtes CV data
batch_xtest, batch_ytest = mnist.test.next_batch(5000) #simualtes test data
# do inference
train_error = sess.run(fetches=cross_entropy, feed_dict={x: batch_xs, y_:batch_ys, train_phase: False})
cv_error = sess.run(fetches=cross_entropy, feed_dict={x: batch_xcv, y_:batch_ycv, train_phase: False})
test_error = sess.run(fetches=cross_entropy, feed_dict={x: batch_xtest, y_:batch_ytest, train_phase: False})
def do_stuff_with_errors(*args):
print args
do_stuff_with_errors(train_error, cv_error, test_error)
# Run Train Step
sess.run(fetches=train_step, feed_dict={x: batch_xs, y_:batch_ys, train_phase: True})
# list of booleans indicating correct predictions
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
# accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels, train_phase: False}))
when I run it I get:
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
(2.3474066, 2.3498712, 2.3461707)
(0.49414295, 0.88536006, 0.91152304)
(0.51632041, 0.393666, nan)
0.9296
it used to be all the last ones were nan and now only a few of them. Is everything fine or am I paranoic?
I am not sure if this will solve your problem, the documentation for BatchNorm is not quite easy-to-use/informative, so here is a short recap on how to use simple BatchNorm:
First of all, you define your BatchNorm layer. If you want to use it after an affine/fully-connected layer, you do this (just an example, order can be different/as you desire):
...
inputs = tf.matmul(inputs, W) + b
inputs = tf.layers.batch_normalization(inputs, training=is_training)
inputs = tf.nn.relu(inputs)
...
The function tf.layers.batch_normalization calls variable-initializers. These are internal-variables and need a special scope to be called, which is in the tf.GraphKeys.UPDATE_OPS. As such, you must call your optimizer function as follows (after all layers have been defined!):
...
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(extra_update_ops):
trainer = tf.train.AdamOptimizer()
updateModel = trainer.minimize(loss, global_step=global_step)
...
You can read more about it here. I know it's a little late to answer your question, but it might help other people coming across BatchNorm problems in tensorflow! :)
training =tf.placeholder(tf.bool, name = 'training')
lr_holder = tf.placeholder(tf.float32, [], name='learning_rate')
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
optimizer = tf.train.AdamOptimizer(learning_rate = lr).minimize(cost)
when defining the layers, you need to use the placeholder 'training'
batchNormal_layer = tf.layers.batch_normalization(pre_batchNormal_layer, training=training)

Cannot feed value of shape (500,) for Tensor 'x_17:0', which has shape '(?, 500)'

I'm just learning TensorFlow, so sorry if this is obvious. I've checked the documentation and experimented quite a bit and I just can't seem to get this to work.
def train_network():
OUT_DIMS = 1
FIN_SIZE = 500
x = tf.placeholder(tf.float32, [OUT_DIMS, FIN_SIZE], name="x")
w = tf.Variable(tf.zeros([FIN_SIZE, OUT_DIMS]), name="w")
b = tf.Variable(tf.zeros([OUT_DIMS]), name="b")
y = tf.tanh(tf.matmul(x, w) + b)
yhat = tf.placeholder(tf.float32, [None, OUT_DIMS])
cross_entropy = -tf.reduce_sum(yhat*tf.log(y))
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
# Launch the model
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
for this_x, this_y in yield_financials():
sess.run(train_step, feed_dict={x: this_x,
yhat: this_y})
print(end=".")
sys.stdout.flush()
yield_financials() outputs an numpy array of 500 numbers and the number that I want it to guess. I've tried shuffling OUT_DIMS and FIN_SIZE around, I tried accumulating them into batches to more closely match what the tutorial looked like, I tried setting OUT_DIMS to 0, removing it entirely, and I tried replacing None with other numbers, but have not made any progress.
Try
this_x = np.reshape(this_x,(1, FIN_SIZE))
sess.run(train_step, feed_dict={x: this_x,
yhat: this_y})
I had the same problem and I solved this problem.I hope that it's helpful for u.
firstly,I transformed load data into :
train_data = np.genfromtxt(train_data1, delimiter=',')
train_label = np.transpose(train_label1, delimiter=',')
test_data = np.genfromtxt(test_data1, delimiter=',')
test_label = np.transpose(test_label1, delimiter=',')
Then,transformed trX, trY, teX, teY data into:
# convert the data
trX, trY, teX, teY = train_data,train_label, test_data, test_label
temp = trY.shape
trY = trY.reshape(temp[0], 1)
trY = np.concatenate((1-trY, trY), axis=1)
temp = teY.shape
teY = teY.reshape(temp[0], 1)
teY = np.concatenate((1-teY, teY), axis=1)
Finally,I transformed launching the graph in a session into:
with tf.Session() as sess:
# you need to initialize all variables
tf.initialize_all_variables().run()
for i in range(100):
sess.run(train_op, feed_dict={X: trX, Y: trY})
print(i, np.mean(np.argmax(teY, axis=1) == sess.run(predict_op, feed_dict={X: teX})))
That's all.

TensorFlow: parameters do not update when training

I'm implementing a classification model using TensorFlow
The problem that I'm facing is that my weights and error are not being updated when I run the training step. As a result, my network keeps returning the same results.
I've developed my model based on the MNIST example from the TensorFlow website.
import numpy as np
import tensorflow as tf
sess = tf.InteractiveSession()
#load dataset
dataset = np.loadtxt('char8k.txt', dtype='float', comments='#', delimiter=",")
Y = np.asmatrix( dataset[:,0] )
X = np.asmatrix( dataset[:,1:1201] )
m = 11527
labels = 26
# y is update to 11527x26
Yt = np.zeros((m,labels))
for i in range(0,m):
index = Y[0,i] - 1
Yt[i,index]= 1
Y = Yt
Y = np.asmatrix(Y)
#------------------------------------------------------------------------------
#graph settings
x = tf.placeholder(tf.float32, shape=[None, 1200])
y_ = tf.placeholder(tf.float32, shape=[None, 26])
Wtest = tf.Variable(tf.truncated_normal([1200,26], stddev=0.001))
W = tf.Variable(tf.truncated_normal([1200,26], stddev=0.001))
b = tf.Variable(tf.zeros([26]))
sess.run(tf.initialize_all_variables())
y = tf.nn.softmax(tf.matmul(x,W) + b)
cross_entropy = -tf.reduce_sum(y_*tf.log(y))
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
Wtest = W
for i in range(10):
print("iteracao:")
print(i)
Xbatch = X[np.random.randint(X.shape[0],size=100),:]
Ybatch = Y[np.random.randint(Y.shape[0],size=100),:]
train_step.run(feed_dict={x: Xbatch, y_: Ybatch})
print("atualizacao de pesos")
print(Wtest==W)#monitora atualizaƧao dos pesos
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("precisao:Y")
print accuracy.eval(feed_dict={x: X, y_: Y})
print(" ")
print(" ")
The issue probably arises from how you initialize the weight matrix, W. If it is initialized to all zeroes, all of the neurons will follow the same gradient in each step, which leads to the network not training. Replacing the line
W = tf.Variable(tf.zeros([1200,26]))
...with something like
W = tf.Variable(tf.truncated_normal([1200,26], stddev=0.001))
...should cause it to start training.
This question on the CrossValidated site has a good explanation of why you should not initialize all of your weights to zero.

Categories

Resources