My tensorflow neural network accuracy does not change - python

I wants to build a neural network for Student Admission dataset(admit, gre, gpa, rank)
I made admit and rank one-hot as follows
one_hot_data = pd.concat([data, pd.get_dummies(data['rank'], prefix='rank')], axis=1)
one_hot_data = pd.concat([one_hot_data, pd.get_dummies(data['admit'], prefix='admit')], axis=1)
# Drop the previous rank column
data = one_hot_data.drop('rank', axis=1)
data = one_hot_data.drop('admit', axis=1)
print(data.shape)
I split the data using train_test_split and scale using minmax_scale
But neural network is as folows
n_features = X_train.shape[1]
n_labels = y_train.shape[1]
features = tf.placeholder(tf.float32, [None, n_features])
labels = tf.placeholder(tf.float32, [None, n_labels])
w = [
tf.Variable(tf.random_normal((n_features, 16)), name='Weights_layer_0'),
tf.Variable(tf.random_normal((16, 4)), name='Weights_layer_1'),
tf.Variable(tf.random_normal((4, n_labels)), name='Weights_layer_2'),
]
n_layers = len(w)
b = [
tf.Variable(tf.zeros(16), name='Bias_layer_0'),
tf.Variable(tf.zeros(4), name='Bias_layer_1'),
tf.Variable(tf.zeros(n_labels), name='Bias_layer_2'),
]
def neural_network(input, weights, biases):
for i in range(n_layers-1):
layer = tf.add(tf.matmul(input if i==0 else layer, weights[i]),biases[i])
layer = tf.nn.relu(layer)
# layer = tf.nn.dropout(layer, keep_prob=0.6)
out_layer = tf.add(tf.matmul(layer, weights[-1]),biases[-1])
return out_layer
loss_ = []
res = []
prediction = neural_network(features, w, b)
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=prediction, labels=labels))
optim = tf.train.AdadeltaOptimizer(0.0001).minimize(loss)
correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.device('/gpu'):
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(10):
for m,n in zip(X_train_batches, y_train_batches):
_, l = sess.run([optim, loss],feed_dict={features: m, labels: n})
loss_.append(l)
acc = sess.run([accuracy], feed_dict={features: X_train, labels: y_train})
print(i, acc)
test_accuracy = sess.run(accuracy,feed_dict={features: X_test, labels: y_test})
print(test_accuracy)
res = sess.run(neural_network(features,w,b),feed_dict={features: X})
But accuracy doesn't change
0 [0.4857143]
1 [0.4857143]
2 [0.4857143]
3 [0.4857143]
4 [0.4857143]
5 [0.4857143]
6 [0.4857143]
7 [0.4857143]
8 [0.4857143]
9 [0.4857143]
10 [0.4857143]
0.5333333
and loss stays the same
[0.5546836, 0.5546756, 0.5546678, 0.55466014, 0.55465263, 0.5546452, 0.55463773, 0.55463034, 0.5546232, 0.5546159, 0.5546088, 0.5546016, 0.5545944, 0.5545874, 0.5545803, 0.5545734, 0.55456626, 0.5545592, 0.5545522, 0.5545452]
What is missing? Is my neural network correct? Full code

There may be many possible causes here (and we don't have your data), but, according to my experience, a frequent mistake in such cases is initializing the weights with the default argument of stddev=1.0 in tf.random_normal() (see the docs), as you do here.
A stddev=1.0 is a huge value, and it alone can make your NN go astray. Change it to stddev=0.01 for all your initial weights:
w = [
tf.Variable(tf.random_normal((n_features, 16), stddev=0.01), name='Weights_layer_0'),
tf.Variable(tf.random_normal((16, 4), stddev=0.01), name='Weights_layer_1'),
tf.Variable(tf.random_normal((4, n_labels), stddev=0.01), name='Weights_layer_2'),
]
Other than that, as already suggested in the comments, a learning rate of 0.0001 seems way too small here (given how slowly the loss is decreasing); experiment with higher values (0.01 - 0.001).

Related

Tensorflow's loss function returns NAN after changing RNN to LSTM cell

I am training a model to predict Time Series using an RNN model. This model is trained without any issue. Here's the original code:
tf.reset_default_graph()
num_inputs = 1
num_neurons = 100
num_outputs = 1
learning_rate = 0.0001
num_train_iterations = 2000
batch_size = 1
X = tf.placeholder(tf.float32, [None, time_steps-1, num_inputs])
y = tf.placeholder(tf.float32, [None, time_steps-1, num_outputs])
cell = tf.contrib.rnn.OutputProjectionWrapper(
tf.contrib.rnn.BasicRNNCell(num_units=num_neurons, activation=tf.nn.relu),
output_size=num_outputs)
outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
loss = tf.reduce_mean(tf.square(outputs - y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train = optimizer.minimize(loss)
init = tf.global_variables_initializer()
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.75)
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
sess.run(init)
for iteration in range(num_train_iterations):
elx,ely = next_batch(training_data, time_steps)
sess.run(train, feed_dict={X: elx, y: ely})
if iteration % 100 == 0:
mse = loss.eval(feed_dict={X: elx, y: ely})
print(iteration, "\tMSE:", mse)
The problem comes when I change tf.contrib.rnn.BasicRNNCell to tf.contrib.rnn.BasicLSTMCell, there's a huge slowdown in speed and the loss function (MSE variable becomes NAN). My best bet is that MSE is the incorrect loss function and that I should try cross entropy. I searched for similar code and found that tf.nn.softmax_cross_entropy_with_logits() could be the solution but still don't understand how to implement it in my problem.
Usually the "NAN" occurs when your gradients blow up.
Here is some code for tf.softmax. Have a try.
#Output Layer
logit = tf.add(tf.matmul(H1,w2),b2)
cross_entropy =
tf.nn.softmax_cross_entropy_with_logits(logits=logit,labels=Y)
#Cost
cost = (tf.reduce_mean(cross_entropy))
#Optimizer
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
#Prediction
y_pred = tf.nn.softmax(logit)
pred = tf.argmax(y_pred, axis=1 )

Accuracy very bad in tensorflow logistic regression

I am trying to write a program that predicts if one has malignant tumor or benign tumor
Dataset is from:https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Prognostic%29
This is my code and my accuracy is at about 65% which is no better than a coin flip. Any help would be appreciated
import tensorflow as tf
import pandas as pd
import numpy as np
df = pd.read_csv(r'D:\wholedesktop\logisticReal.txt')
df.drop(['id'], axis=1, inplace=True)
x_data = np.array(df.drop(['class'], axis=1))
x_data = x_data.astype(np.float64)
y = df['class']
y.replace(2, 0, inplace=True)
y.replace(4, 1, inplace=True)
y_data = np.array(y)
# y shape = 681,1
# x shape = 681,9
x = tf.placeholder(name='x', dtype=np.float32)
y = tf.placeholder(name='y', dtype=np.float32)
w = tf.Variable(dtype=np.float32, initial_value=np.random.random((9, 1)))
b = tf.Variable(dtype=np.float32, initial_value=np.random.random((1, 1)))
y_ = (tf.add(tf.matmul(x, w), b))
error = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y_, labels=y))
goal = tf.train.GradientDescentOptimizer(0.05).minimize(error)
prediction = tf.round(tf.sigmoid(y_))
correct = tf.cast(tf.equal(prediction, y), dtype=np.float64)
accuracy = tf.reduce_mean(correct)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(2000):
sess.run(goal, feed_dict={x: x_data, y: y_data})
print(i, sess.run(accuracy, feed_dict={x: x_data, y: y_data}))
weight = sess.run(w)
bias = sess.run(b)
print(weight)
print(bias)
Your neural network only has a single layer, so the best it can do is fit a straight line to your data that separates the different classes. This is vastly insufficient for a general (high-dimensional) data set. The power of (deep) neural networks lies in the connectivity between many layers of neurons. In your example, you could add more layers manually by passing the output of matmul to a new matmul with different weights and biases, or you could use the contrib.layers collection to make it more concise:
x = tf.placeholder(name='x', dtype=np.float32)
fc1 = tf.contrib.layers.fully_connected(inputs=x, num_outputs=16, activation_fn=tf.nn.relu)
fc2 = tf.contrib.layers.fully_connected(inputs=fc1, num_outputs=32, activation_fn=tf.nn.relu)
fc3 = tf.contrib.layers.fully_connected(inputs=fc2, num_outputs=64, activation_fn=tf.nn.relu)
The trick is to pass the output from one layer as input to the next layer. As you add more and more layers, your accuracy will go up (probably because of over-fitting, use dropout to remedy that).

How to make Feed Forward NN more accurate?

I just finished writing my first ever Neural Network and it finally works, but it works really bad. I get about 0.37 accuracy. Any tips on how to make it more accurate? I have already tried different learning rates and also different number of hidden layer units, but I never get above 0.37 accuracy. I'm trying to classify data into one of the 3 classes 0, 1 or 2. I use a 1 hot Matrix as my Y. How could I improve my code?
X = data[1:, 2:]
m, n = X.shape
labels = data[1:, 1]
Y = np.zeros((m,3))
i = 0
for label in labels:
if label == 0:
Y[i,0] = 1
elif label == 1:
Y[i,1] = 1
elif label == 2:
Y[i,2] = 1
i += 1
slice_size = math.floor(m/5)
X_test = X[-slice_size:, :]
Y_test = Y[-slice_size:]
X_train = X[:slice_size, :]
Y_train = Y[:slice_size]
learning_rate = 0.00001
num_steps = 200
batch_size = 100
display_step = 2
n_nodes_hl1 = 5
n_nodes_hl2 = 5
n_nodes_hl3 = 5
n_classes = 3
n_inputs = 16
training_epochs = 500
x = tf.placeholder('float32', [None,n])
y = tf.placeholder('float32', [None, n_classes])
weights = {
'h1': tf.Variable(tf.random_normal([n_inputs, n_nodes_hl1])),
'h2': tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])),
'h3': tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])),
'out': tf.Variable(tf.random_normal([n_nodes_hl1, n_classes]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_nodes_hl1])),
'b2': tf.Variable(tf.random_normal([n_nodes_hl2])),
'b3': tf.Variable(tf.random_normal([n_nodes_hl3])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
def neural_network(data):
layer_1 = tf.add(tf.matmul(data, weights['h1']), biases['b1'])
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
output = tf.matmul(layer_3, weights['out']) + biases['out']
return output
logits = neural_network(x)
prediction = tf.nn.softmax(logits)
loss_op =
tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,
labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y_train, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for step in range(1, num_steps+1):
x_step = np.asarray(X_train[step,:])
y_step = np.asarray(Y_train[step])
x_step = np.reshape(x_step, (1, n))
y_step = np.reshape(y_step, (1,n_classes))
sess.run(train_op, feed_dict={x:x_step , y:y_step})
if step % display_step == 0 or step == 1:
#Calculate batch loss and accuracy
loss, acc = sess.run([loss_op, accuracy], feed_dict={x: x_step,
y: y_step})
print("Step " + str(step) + ", Minibatch Loss= " +
"{:.4f}".format(loss) + ", Training Accuracy= " +
"{:.3f}".format(acc))
x_step_test = np.asarray(X_test)
y_step_test = np.asarray(Y_test)
x_step_test = np.reshape(x_step, (1, n))
y_step_test = np.reshape(y_step, (1,n_classes))
print("Optimization Finished!")
print("Testing Accuracy:",
sess.run(accuracy, feed_dict={x: x_step_test,
y: y_step_test}))
1.
x_step_test = np.asarray(X_test)
y_step_test = np.asarray(Y_test)
x_step_test = np.reshape(x_step, (1, n))
y_step_test = np.reshape(y_step, (1,n_classes))
Shouldn't this be:
x_step_test = np.asarray(X_test)
y_step_test = np.asarray(Y_test)
x_step_test = np.reshape(x_step_test, (1, n))
y_step_test = np.reshape(y_step_test, (1,n_classes))
Also check how u r taking the batches, there might be some problem.
Use train_test_split from sklearn.model_selection, it splits your train and test data after shuffling. Not shuffling your data might create problem if ur data have some pattern, eg. u have 99 data points, first 33 contain its a dog another 33 contains its a cat and for last 33 its a mouse, your neural net will train only on 66 dog and cat images and won't learn to recognise mouse.
Increase the learning rate, AdamOptimizer already decays the lr, use something like 0.1 or 0.01.
I guess tensorflow part is correct.

Neural network can't seem to learn a simple relationship TensorFlow

I'm experimenting with TensorFlow (which seems amazing so far!) and I'm playing around with a toy example a 1 class classification problem. I'm generating some features and if the first feature is above a threshold then the example is "positive"
Full code here:
https://gist.github.com/tnbredillet/f136c2bc40815517e0aa1139bd2060ee
The problem is that it seems that the model is unable to capture that simple relationship.
Of course I'm missing a lot of stuff (CV, regularization, batch normalization, hyperparameter tuning) to name a few.
But still I would expect the model to manage to figure that one out right ?
Maybe there's simply a bug in my code?
Would welcome any insights :-)
EDIT:
Data generating code:
num_examples = 100000
split = 0.2
num_features = 1
def generate_input_data(num_examples, num_features):
features = []
labels = []
for i in xrange(num_examples):
features.append(np.random.rand(num_features) * np.random.randint(1, 10) + np.random.rand(num_features))
if np.random.randint(101) > 90:
features[i-1][np.random.randint(num_features)] = 0
hard = ceil(np.sum(features[i-1])) % 2
easy = 0
if features[i-1][0] > 3:
easy = 1
labels.append(easy)
df = pd.concat(
[
pd.DataFrame(features),
pd.Series(labels).rename('labels')
],
axis=1,
)
return df
def one_hot_encoding(train_df):
#TODO: handle categorical feature one hot encoding.
return 0, 0
def scale_data(train_df, test_df):
categorical_columns, encoding = one_hot_encoding(train_df)
scaler = MinMaxScaler(feature_range=(0,1))
scaler.fit(train_df.drop(['labels'], axis=1))
train_df = pd.concat(
[
pd.DataFrame(scaler.transform(train_df.drop('labels', axis=1))),
train_df['labels']
],
axis=1,
)
test_df = pd.concat(
[
pd.DataFrame(scaler.transform(test_df.drop('labels', axis=1))),
test_df['labels']
],
axis=1,
)
return train_df, test_df
def preprocess_data(train_df, test_df):
all_dfs = [train_df, test_df]
features = set()
for df in all_dfs:
features |= set(df.columns)
for df in all_dfs:
for f in features:
if f not in df.columns:
df[f] = 0.0
for df in all_dfs:
df.sort_index(axis=1, inplace=True)
train_df, test_df = scale_data(train_df, test_df)
train_df = shuffle(train_df).reset_index(drop=True)
return train_df, test_df
def get_data(num_examples, split):
train_df = generate_input_data(num_examples, num_features)
test_df = generate_input_data(int(ceil(num_examples*split)), num_features)
return preprocess_data(train_df, test_df)
def get_batch(df, batch_size, epoch):
start = batch_size*epoch-batch_size
end = batch_size*epoch
if end > len(df):
end = len(df)
size = end - start
batch_x = df.drop('labels', axis=1)[start:end].as_matrix()
batch_y = df['labels'][start:end].as_matrix().reshape(size, 1)
return batch_x, batch_y
And the network definition/training and evaluation:
train_df, test_df = get_data(num_examples, split)
n_hidden_1 = 8
n_hidden_2 = 4
learning_rate = 0.01
batch_size = 500
num_epochs = 200
display_epoch = 50
def neural_net(x):
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
weights = {
'h1': tf.Variable(tf.random_normal([num_features, n_hidden_1])),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_hidden_2, 1]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'out': tf.Variable(tf.random_normal([1]))
}
X = tf.placeholder(tf.float32, shape=(None, num_features))
Y = tf.placeholder(tf.float32, shape=(None, 1))
logits = neural_net(X)
loss_op = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
predictions = tf.sigmoid(logits)
predicted_class = tf.greater(predictions, 0.5)
correct = tf.equal(predicted_class, tf.equal(Y,1.0))
accuracy = tf.reduce_mean( tf.cast(correct, 'float') )
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
for epoch in range(1, num_epochs + 1):
batch_x, batch_y = get_batch(train_df, batch_size, epoch)
sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
if epoch % display_epoch == 0 or epoch == 1:
loss, acc , pred, fff= sess.run([loss_op, accuracy, predictions, logits],
feed_dict={X: batch_x,
Y: batch_y})
c = ', '.join('{}={}'.format(*t) for t in zip(pred, batch_y))
print("[{}] Batch loss={:.4f}, Accuracy={:.5f}, Logits vs labels= {}".format(epoch, loss, acc, c))
print("Optimization Finished!")
batch_x, batch_y = get_batch(test_df, batch_size, 1)
print("Testing Accuracy:", \
sess.run(accuracy, feed_dict={X: batch_x,
Y: batch_y}))
final output:
[1] Batch loss=3.2160, Accuracy=0.41000
[50] Batch loss=0.6661, Accuracy=0.61800
[100] Batch loss=0.6472, Accuracy=0.65200
[150] Batch loss=0.6538, Accuracy=0.64000
[200] Batch loss=0.6508, Accuracy=0.64400
Optimization Finished!
('Testing Accuracy:', 0.63999999)
In this case it is not a machine learning algorithm problem, but a bug in your data generation which is scrambling the relationship that you intend. In this function:
def generate_input_data(num_examples, num_features):
features = []
labels = []
for i in xrange(num_examples):
features.append(np.random.rand(num_features) * np.random.randint(1, 10) + np.random.rand(num_features))
if np.random.randint(101) > 90:
features[i-1][np.random.randint(num_features)] = 0
hard = ceil(np.sum(features[i-1])) % 2
easy = 0
if features[i-1][0] > 3:
easy = 1
labels.append(easy)
df = pd.concat(
[
pd.DataFrame(features),
pd.Series(labels).rename('labels')
],
axis=1,
)
return df
You are indexing features by i-1 to determine the label. However, xrange will generate numbers starting from 0, so you don't need to subtract the 1. In fact, when you do, the relationship becomes close to random, and essentially unpredictable, so even though the rest of your model is OK, it won't be able to score well.
So you need to index by i instead e.g. if features[i][0] > 3.

LSTM-RNN: num_classes usage

I am using LSTM RNN to detect whether a heart beat is arrhythmic or not. So the output classes are:[0,1] and n_classes=2, but when this code is executed:
# Fit training using batch data
_, loss, acc = sess.run(
[optimizer, cost, accuracy],
feed_dict={
x: batch_xs,
y: batch_ys
}
)
It gives following error
ValueError: Cannot feed value of shape (1, 1) for Tensor 'Placeholder_1:0', which has shape '(?, 2)'
Here is the whole code:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import tensorflow as tf # Version 1.0.0 (some previous versions are used in past commits)
from sklearn import metrics
import _pickle as cPickle
import os
import pandas as pd
import functions as f
[ml2_train_input,ml2_train_output,ml2_train_peaks,ml2_test_input,ml2_test_output,ml2_test_peaks]=f.get_ml2(0.5)
ml2_train_output=f.get_binary_output(ml2_train_output[:52500])
ml2_test_output=f.get_binary_output(ml2_test_output[:52500])
# Output classes to learn how to classify
LABELS = [0,1 ]
training_data_count = len(ml2_train_input[:52500]) # training series
test_data_count = len(ml2_test_input[:52500]) # testing series
n_input = 360 # 360 input parameters per timestep
# LSTM Neural Network's internal structure
n_hidden = 8 # Hidden layer num of features
n_classes = 2 # Total classes
# Training
learning_rate = 0.005
lambda_loss_amount = 0.0015
training_iters = training_data_count * 10 # Loop 10 times on the dataset
batch_size = 500
display_iter = 1000 # To show test set accuracy during training
X_test=np.array(ml2_test_input[:52500])
y_test=np.array(ml2_test_output[:52500])
# Some debugging info
print("Some useful info to get an insight on dataset's shape and normalisation:")
print("(X shape, y shape, every X's mean, every X's standard deviation)")
print(X_test.shape, y_test.shape, np.mean(X_test), np.std(X_test))
print("The dataset is therefore properly normalised, as expected, but not yet one-hot encoded.")
def LSTM_RNN(_X, _weights, _biases):
# Function returns a tensorflow LSTM (RNN) artificial neural network from given parameters.
# Moreover, two LSTM cells are stacked which adds deepness to the neural network.
# Note, some code of this notebook is inspired from an slightly different
# RNN architecture used on another dataset, some of the credits goes to
# "aymericdamien" under the MIT license.
# (NOTE: This step could be greatly optimised by shaping the dataset once
# input shape: (batch_size, n_steps, n_input)
# permute n_steps and batch_size
# Reshape to prepare input to hidden activation
#_X = tf.reshape(_X, [-1, n_input])
# new shape: (n_steps*batch_size, n_input)
# Linear activation
_X = tf.nn.relu(tf.matmul(_X, _weights['hidden']) + _biases['hidden'])
# Split data because rnn cell needs a list of inputs for the RNN inner loop
_X = tf.split(_X, 500,0)
# new shape: n_steps * (batch_size, n_hidden)
# Define two stacked LSTM cells (two recurrent layers deep) with tensorflow
lstm_cell_1 = tf.contrib.rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True,reuse=None)
lstm_cell_2 = tf.contrib.rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True,reuse=None)
lstm_cells = tf.contrib.rnn.MultiRNNCell([lstm_cell_1, lstm_cell_2], state_is_tuple=True)
# Get LSTM cell output
outputs, states = tf.contrib.rnn.static_rnn(lstm_cells, _X, dtype=tf.float32)
# Get last time step's output feature for a "many to one" style classifier,
# as in the image describing RNNs at the top of this page
lstm_last_output = outputs[-1]
# Linear activation
return tf.matmul(lstm_last_output, _weights['out']) + _biases['out']
def extract_batch_size(_train, step, batch_size):
# Function to fetch a "batch_size" amount of data from "(X|y)_train" data.
shape = list(_train.shape)
shape[0] = batch_size
batch_s = np.empty(shape)
for i in range(batch_size):
# Loop index
index = ((step-1)*batch_size + i) % len(_train)
batch_s[i] = _train[index]
return batch_s
def one_hot(y_):
# Function to encode output labels from number indexes
# e.g.: [[5], [0], [3]] --> [[0, 0, 0, 0, 0, 1], [1, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0]]
y_ = y_.reshape(len(y_))
n_values = int(np.max(y_)) + 1
return np.eye(n_values)[np.array(y_, dtype=np.int32)] # Returns FLOATS
# Graph input/output
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
# Graph weights
weights = {
'hidden': tf.Variable(tf.random_normal([n_input, n_hidden])), # Hidden layer weights
'out': tf.Variable(tf.random_normal([n_hidden, n_classes], mean=1.0))
}
biases = {
'hidden': tf.Variable(tf.random_normal([n_hidden])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
pred = LSTM_RNN(x, weights, biases)
# Loss, optimizer and evaluation
l2 = lambda_loss_amount * sum(
tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables()
) # L2 loss prevents this overkill neural network to overfit the data
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=pred)) + l2 # Softmax loss
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Adam Optimizer
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# To keep track of training's performance
test_losses = []
test_accuracies = []
train_losses = []
train_accuracies = []
# Launch the graph
sess = tf.InteractiveSession(config=tf.ConfigProto(log_device_placement=True))
init = tf.global_variables_initializer()
sess.run(init)
X_train=np.array(ml2_train_input[:52500])
y_train=np.array(ml2_train_output[:52500])
step = 1
while step * batch_size <= training_iters:
batch_xs = extract_batch_size(X_train, step, batch_size)
batch_ys = one_hot(extract_batch_size(y_train, step, batch_size))
# Fit training using batch data
_, loss, acc = sess.run(
[optimizer, cost, accuracy],
feed_dict={
x: batch_xs,
y: batch_ys
}
)
train_losses.append(loss)
train_accuracies.append(acc)
# Evaluate network only at some steps for faster training:
if (step*batch_size % display_iter == 0) or (step == 1) or (step * batch_size > training_iters):
# To not spam console, show training accuracy/loss in this "if"
print("Training iter #" + str(step*batch_size) + \
": Batch Loss = " + "{:.6f}".format(loss) + \
", Accuracy = {}".format(acc))
# Evaluation on the test set (no learning made here - just evaluation for diagnosis)
loss, acc = sess.run(
[cost, accuracy],
feed_dict={
x: X_test,
y: one_hot(y_test)
}
)
test_losses.append(loss)
test_accuracies.append(acc)
print("PERFORMANCE ON TEST SET: " + \
"Batch Loss = {}".format(loss) + \
", Accuracy = {}".format(acc))
step += 1
print("Optimization Finished!")
Please help!
I feel you should convert your Y values to categorical (one-hot encoded) than it should work. So try to convert your Y values to categorical

Categories

Resources