Why is my simple neural network not learning?

Why is my simple neural network not learning? - python

I am new to TensorFlow and neural networks. I am trying to build a neural network that can classify images in the CIFAR-10 dataset.
Here is my code:
import tensorflow as tf
import pickle
import numpy as np
import random
image_size= 32*32*3 # because 3 channels
n_classes = 10
lay1_size = 50
batch_size = 100
def unpickle(filename):
with open(filename,'rb') as f:
data = pickle.load(f, encoding='latin1')
x = data['data']
y = data['labels']
# shuffle the data
z = list(zip(x,y))
random.shuffle(z)
x, y = zip(*z)
x = x[:batch_size]
y = y[:batch_size]
# covert decimals to one hot arrays
y = np.eye(n_classes)[[y]]
return x, y
# set up network
def add_layer(inputs, in_size, out_size, activation_function=None):
W = tf.Variable(tf.random_normal([in_size, out_size]), dtype=tf.float32)
b = tf.Variable(tf.zeros([1,out_size]) + 0.1, dtype=tf.float32)
Wx_plus_b = tf.matmul(inputs, W) + b
if activation_function is None:
output = Wx_plus_b
else:
output = activation_function(Wx_plus_b)
return output
def compute_accuracy(v_xs, v_ys):
global prediction
y_pre = sess.run(prediction, feed_dict={xs:v_xs})
correct_prediction = tf.equal(tf.argmax(y_pre,1), tf.argmax(v_ys, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
result = sess.run(accuracy, feed_dict={xs:v_xs, ys:v_ys})
return result
xs = tf.placeholder(tf.float32, [None,image_size])
ys = tf.placeholder(tf.float32)
lay1 = add_layer(xs, image_size, lay1_size, activation_function=tf.nn.tanh)
lay2 = add_layer(lay1, lay1_size, lay1_size, activation_function=tf.nn.tanh)
prediction = add_layer(lay2, lay1_size, n_classes, activation_function=tf.nn.softmax)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys*tf.log(prediction), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
#train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
# run network
sess = tf.Session()
sess.run(tf.initialize_all_variables())
x_test, y_test = unpickle('test_batch')
for i in range(1000):
x_train, y_train = unpickle('data_batch_1')
sess.run(train_step, feed_dict={xs:x_train,ys:y_train})
if i % 50 == 0:
print(compute_accuracy(x_test, y_test))
sess.close()
I am using two hidden layers with 50 nodes in each layer. I am running 1,000 cycles, where in each cycle I shuffle data in the dataset and pick the first 100 images of that shuffle to train on.
I am consistently getting ~0.1 accuracy, the machine is not learning at all.
When I modify the code to use the MNIST dataset instead of the CIFAR-10 dataset I get ~0.87 accuracy.
I took code from an MNIST tutorial and am trying to modify it to classify CIFAR-10 data.
I can't figure out what's wrong here. How do I get my algorithm to learn?

Related

capture input and output (labels) for neural network model

i'm trying to create a neural network model for a kaggle competition using mnist dataset. currently, my code looks like this since i am trying to capture certain metrics. however, i can't seem to figure out how to turn this into an output to submit.
current:
import time
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import pandas as pd
import numpy as np
from tensorflow.python.framework import ops
ops.reset_default_graph()
#requests.packages.urllib3.disable_warnings()
import ssl
try:
_create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
# Legacy Python that doesn't verify HTTPS certificates by default
pass
else:
# Handle target environment that doesn't support HTTPS verification
ssl._create_default_https_context = _create_unverified_https_context
# Load training and testing data directly from TensorFlow
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
# Initialize metrics
metrics = {}
# Initialize metric names
names = ['Number of Hidden Layers', 'Nodes per Layer', 'Time in Seconds',
'Training Set Accuracy', 'Test Set Accuracy']
# Set fixed parameters
n_epochs = 20
batch_size = 50
learning_rate = 0.01
# Function that creates batch generator used in training
def shuffle_batch(X, y, batch_size):
rnd_idx = np.random.permutation(len(X))
n_batches = len(X) // batch_size
for batch_idx in np.array_split(rnd_idx, n_batches):
X_batch, y_batch = X[batch_idx], y[batch_idx]
yield X_batch, y_batch
# Start timer
start = time.process_time()
n_hidden = 300
# Reset the session
tf.reset_default_graph()
#ops.reset_default_graph()
tf.set_random_seed(2141)
#tf.random.set_seed(2141)
np.random.seed(9347)
# Set X and y placeholders
X = tf.placeholder(tf.float32, shape=(None, 784), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
with tf.name_scope("dnn"):
hidden1 = tf.layers.dense(X, n_hidden, name="hidden1",
activation=tf.nn.relu)
hidden2 = tf.layers.dense(hidden1, n_hidden, name="hidden2",
activation=tf.nn.relu)
logits = tf.layers.dense(hidden2, 10, name="outputs")
y_proba = tf.nn.softmax(logits)
with tf.name_scope("loss"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")
with tf.name_scope("train"):
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
with tf.Session() as sess:
tf.global_variables_initializer().run()
for epoch in range(n_epochs):
for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
acc_train = accuracy.eval(feed_dict={X: X_train, y: y_train})
acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
# Record the clock time it takes
duration = time.process_time() - start
metrics['Model 1'] = [2, n_hidden, duration, acc_train, acc_test]
# Convert metrics dictionary to dataframe for display
results_summary = pd.DataFrame.from_dict(metrics, orient='index')
results_summary.columns = names
# Sort by model number
results_summary.reset_index(inplace=True)
results_summary.sort_values(by=['index'], axis=0, inplace=True)
results_summary.set_index(['index'], inplace=True)
results_summary.index.name = None
# Export to csv
results_summary.to_csv('results_summary.csv')
results_summary
i need to create an output that looks something like this in csv file:
ImageId Label
0 1 2
1 2 0
2 3 9
3 4 0
4 5 3
would i have to recreate the whole thing in order to actually create "y_pred" when doing something like model.predict(X_test), or can i just reshape the existing code in some way to do this? ideally, i would like to capture predicted values and compare them to true values using a confusion matrix.
i've tried to the following but keep getting errors like AttributeError: module 'tensorflow.compat.v1' has no attribute 'run':
feed_dict = {X: X_test}
classification = tf.run(y_proba, feed_dict)
label = numpy.argmax(classification, axis=-1)
thanks in advance

Tensorflow Image Classifier Accuracy Fails to Change

I am new to tensorflow. I'm creating a simple fully connected neural network for image classification. The image is (-1, 224, 224, 3), and label is (-1, 2). However, the result of my code is that the accuracy does not improve at all; it stays at 47% and does not change - even if changed learning rate, optimizer, and different test set.. Any help will be greatly appreciated! Thank you!
import matplotlib.pyplot as plt
from util.MacOSFile import MacOSFile
import numpy as np
import _pickle as pickle
import tensorflow as tf
def pickle_load(file_path):
with open(file_path, "rb") as f:
return pickle.load(MacOSFile(f))
###hyperparameters###
batch_size = 32
iterations = 10
###loading training data start###
data = pickle_load('training.pickle')
x_train = []
y_train = []
for features, labels in data:
x_train.append(features)
y_train.append(labels)
x_train = np.array(x_train)
y_train = np.array(y_train)
###################################
###loading test data start###
data = pickle_load('testing.pickle')
x_test = []
y_test = []
for features, labels in data:
x_test.append(features)
y_test.append(labels)
x_test = np.array(x_test)
y_test = np.array(y_test)
###################################
###neural network###
x_s = tf.placeholder(tf.float32, [None, 224, 224, 3])
y_s = tf.placeholder(tf.float32, [None, 2])
x_image = tf.reshape(x_s, [-1, 150528])
W_1 = tf.Variable(tf.truncated_normal([150528, 8224]))
b_1 = tf.Variable(tf.zeros([8224]))
h_fc1 = tf.nn.relu(tf.matmul(x_image, W_1) + b_1)
W_2 = tf.Variable(tf.truncated_normal([8224, 1028]))
b_2 = tf.Variable(tf.zeros([1028]))
h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_2) + b_2)
W_3 = tf.Variable(tf.truncated_normal([1028, 2]))
b_3 = tf.Variable(tf.zeros([2]))
prediction = tf.nn.softmax(tf.matmul(h_fc2, W_3) + b_3)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_s, logits=prediction)
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(cross_entropy)
init = tf.global_variables_initializer()
###neural network end###
with tf.Session() as sess:
sess.run(init)
train_sample_size = len(data) #how many data points?
max_batches_in_data = int(train_sample_size/batch_size) #max number of batches possible; 623
for iteration in range(iterations):
print('Iteration ', iteration)
epoch = int(iteration/max_batches_in_data)
start_idx = (iteration-epoch*max_batches_in_data)*batch_size
end_idx = (iteration+1 - epoch*max_batches_in_data)*batch_size
mini_x_train = x_train[start_idx: end_idx]
mini_y_train = y_train[start_idx: end_idx]
##actual training is here
sess.run(train_step, feed_dict={x_s: mini_x_train, y_s: mini_y_train})
#test accuracy#
y_pre = sess.run(prediction, feed_dict={x_s: x_train[:100]})
correct_prediction = tf.equal(tf.argmax(y_pre,1), tf.argmax(y_train[:100], 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
result = sess.run(accuracy, feed_dict={x_s: x_train[:100], y_s: y_train[:100]})
print("Result: {0}".format(result))

I've made a few observations, firstly your code is a bit outdated, you don't have to manually set up fully connected layers, there is something for that:dense layers.
If you load in images, why don't you use convolutional layers as well?
I also recommend the adam optimizer just leave the parameters to their default value. I hope I helped a bit :)

Tensorflow - Retrieving weights/biases of the trained feedforward neural network after training

I'm currently trying to create a simple web application for interactive neural network training with Flask. What I'm struggling with is retrieving the weights of the hidden layers after a feedforward neural network has been trained - my goal is to make a real back-end for the Tensorflow's Playground.
Take into consideration the following weights initialisation:
# Weight initializations
tW1 = init_weights(shape=(n_features, hidden_nodes))
tW2 = init_weights(shape=(hidden_nodes, output_nodes))
How does one go about retrieving the calculated weights of tW1 and tW2 after the training has been completed in Tensorflow?
Here's a sample of the code:
def retrieve_data():
"""Retrieves the data - to be expanded for custom database access + S3 retrieval + URL"""
result = pd.read_csv('snp_data.csv', parse_dates=['Date'], index_col=['Date'])
return result
def get_columns(data, columns):
features = data.ix[:, columns]
return features
def preprocess(data):
"""Data preprocessing"""
result = (data - data.mean()) / data.std(ddof=0)
result = result.fillna(0)
return result
def init_weights(shape):
""" Weights initialization """
weights = tf.random_normal(shape=shape, stddev=0.1)
return tf.Variable(weights)
def forwardprop(X, w_1, w_2):
"""Forward propagation"""
h = tf.nn.relu(tf.matmul(X, w_1))
y_hat = tf.matmul(h, w_2)
return y_hat
# #app.route('/train')
def train():
data = retrieve_data()
train_x = get_columns(data, columns=['Open', 'Close'])
train_x = preprocess(data=train_x).as_matrix().astype(np.float32)
train_x = train_x[:(len(train_x) - (len(train_x) % 32))]
train_y = get_columns(data, columns=['Adj Close']).as_matrix().astype(np.float32)
train_y = train_y[:(len(train_y) - (len(train_y) % 32))]
# Number of input nodes
n_features = train_x.shape[1]
# Number of output nodes
output_nodes = train_y.shape[1]
# Number of hidden nodes
hidden_nodes = 20
# TF Placeholders for the inputs and outputs
tx = tf.placeholder(tf.float32, shape=(None, n_features))
ty = tf.placeholder(tf.float32, shape=(None, output_nodes))
# Weight initializations
tW1 = init_weights(shape=(n_features, hidden_nodes))
tW2 = init_weights(shape=(hidden_nodes, output_nodes))
# Forward propagation
y_hat = forwardprop(tx, tW1, tW2)
# Backward Propagation
tMSE = tf.reduce_mean(tf.square(y_hat - ty))
learning_rate = 0.001
tOptimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
tOptimize = tOptimizer.minimize(tMSE)
batch_size = 32
n_epochs = 8
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for i_e in range(n_epochs):
for i in range(0, train_x.shape[0], batch_size):
batch_X = train_x[i:i + batch_size, ...]
batch_y = train_y[i:i + batch_size]
_, loss = sess.run([tOptimize, tMSE], feed_dict={tx: batch_X, ty: batch_y})
print(i, loss)
return 'Flask Dockerized'

This should be as simple as final_tW1, final_tW2 = sess.run([tW1, tW2]) after the for loop has completed. You don't need to feed anything because the variables maintain their own values that don't depend on placeholders.

Predicting an output once we are done with training our neural network

I have a dataset with 5 columns, I am feeding in first 3 columns as my Inputs and the other 2 columns as my outputs. I have successfully executed the program but i am not sure how to test the model by giving my own values as input and getting a predicted output from the model.
Can anyone please help me, How can I actually test the model with my own value after training is done ? I am using Tensorflow in Python..I am able to display accuracy of testing,but How do I actually predict with value if I pass some random input(here,I need to pass 3 input values to get 2 output values)
Here,is my code:
# Implementation of a simple MLP network with one hidden layer. Tested on the iris data set.
# Requires: numpy, sklearn>=0.18.1, tensorflow>=1.0
# NOTE: In order to make the code simple, we rewrite x * W_1 + b_1 = x' * W_1'
# where x' = [x | 1] and W_1' is the matrix W_1 appended with a new row with elements b_1's.
# Similarly, for h * W_2 + b_2
import tensorflow as tf
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
import pandas as pd
RANDOM_SEED = 1000
tf.set_random_seed(RANDOM_SEED)
def init_weights(shape):
""" Weight initialization """
weights = tf.random_normal(shape, stddev=0.1)
return tf.Variable(weights)
def forwardprop(X, w_1, w_2):
"""
Forward-propagation.
IMPORTANT: yhat is not softmax since TensorFlow's softmax_cross_entropy_with_logits() does that internally.
"""
h = tf.nn.sigmoid(tf.matmul(X, w_1)) # The \sigma function
yhat = tf.matmul(h, w_2) # The \varphi function
return yhat
def get_iris_data():
""" Read the iris data set and split them into training and test sets """
df = pd.read_csv("H:\MiniThessis\Sample.csv")
train_X = np.array(df[df.columns[0:3]])
train_Y = np.array(df[df.columns[3:]])
print(train_X)
# Convert into one-hot vectors
#num_labels = len(np.unique(train_Y))
#all_Y = np.eye(num_labels)[train_Y] # One liner trick!
#print()
return train_test_split(train_X, train_Y, test_size=0.33, random_state=RANDOM_SEED)
def main():
train_X, test_X, train_y, test_y = get_iris_data()
# Layer's sizes
x_size = train_X.shape[1] # Number of input nodes: 4 features and 1 bias
h_size = 256 # Number of hidden nodes
y_size = train_y.shape[1] # Number of outcomes (3 iris flowers)
# Symbols
X = tf.placeholder("float", shape=[None, x_size])
y = tf.placeholder("float", shape=[None, y_size])
# Weight initializations
w_1 = init_weights((x_size, h_size))
w_2 = init_weights((h_size, y_size))
# Forward propagation
yhat = forwardprop(X, w_1, w_2)
predict = tf.argmax(yhat, axis=1)
# Backward propagation
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=yhat))
updates = tf.train.GradientDescentOptimizer(0.01).minimize(cost)
# Run SGD
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
for epoch in range(3):
# Train with each example
for i in range(len(train_X)):
sess.run(updates, feed_dict={X: train_X[i: i + 1], y: train_y[i: i + 1]})
train_accuracy = np.mean(np.argmax(train_y, axis=1) == sess.run(predict, feed_dict={X: train_X, y: train_y}))
test_accuracy = np.mean(np.argmax(test_y, axis=1) ==sess.run(predict, feed_dict={X: test_X, y: test_y}))
print("Epoch = %d, train accuracy = %.2f%%, test accuracy = %.2f%%"
% (epoch + 1, 100. * train_accuracy, 100. * test_accuracy))
correct_Prediction = tf.equal((tf.arg_max(predict,1)),(tf.arg_max(y,1)))
best = sess.run([predict], feed_dict={X: np.array([[20.14, 46.93, 1014.66]])})
#print(correct_Prediction)
print(best)
sess.close()
if __name__ == '__main__':
main()

TensorFlow: parameters do not update when training

I'm implementing a classification model using TensorFlow
The problem that I'm facing is that my weights and error are not being updated when I run the training step. As a result, my network keeps returning the same results.
I've developed my model based on the MNIST example from the TensorFlow website.
import numpy as np
import tensorflow as tf
sess = tf.InteractiveSession()
#load dataset
dataset = np.loadtxt('char8k.txt', dtype='float', comments='#', delimiter=",")
Y = np.asmatrix( dataset[:,0] )
X = np.asmatrix( dataset[:,1:1201] )
m = 11527
labels = 26
# y is update to 11527x26
Yt = np.zeros((m,labels))
for i in range(0,m):
index = Y[0,i] - 1
Yt[i,index]= 1
Y = Yt
Y = np.asmatrix(Y)
#------------------------------------------------------------------------------
#graph settings
x = tf.placeholder(tf.float32, shape=[None, 1200])
y_ = tf.placeholder(tf.float32, shape=[None, 26])
Wtest = tf.Variable(tf.truncated_normal([1200,26], stddev=0.001))
W = tf.Variable(tf.truncated_normal([1200,26], stddev=0.001))
b = tf.Variable(tf.zeros([26]))
sess.run(tf.initialize_all_variables())
y = tf.nn.softmax(tf.matmul(x,W) + b)
cross_entropy = -tf.reduce_sum(y_*tf.log(y))
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
Wtest = W
for i in range(10):
print("iteracao:")
print(i)
Xbatch = X[np.random.randint(X.shape[0],size=100),:]
Ybatch = Y[np.random.randint(Y.shape[0],size=100),:]
train_step.run(feed_dict={x: Xbatch, y_: Ybatch})
print("atualizacao de pesos")
print(Wtest==W)#monitora atualizaçao dos pesos
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("precisao:Y")
print accuracy.eval(feed_dict={x: X, y_: Y})
print(" ")
print(" ")

The issue probably arises from how you initialize the weight matrix, W. If it is initialized to all zeroes, all of the neurons will follow the same gradient in each step, which leads to the network not training. Replacing the line
W = tf.Variable(tf.zeros([1200,26]))
...with something like
W = tf.Variable(tf.truncated_normal([1200,26], stddev=0.001))
...should cause it to start training.
This question on the CrossValidated site has a good explanation of why you should not initialize all of your weights to zero.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Why is my simple neural network not learning? - python

Related

capture input and output (labels) for neural network model

Tensorflow Image Classifier Accuracy Fails to Change

Tensorflow - Retrieving weights/biases of the trained feedforward neural network after training

Predicting an output once we are done with training our neural network

TensorFlow: parameters do not update when training

Categories

Resources