Is there a way to configure the output shape of a RNN? - python

I'm trying to create a RNN to guess what notes are being played on a piano, given a sound file of piano notes (WAV format). I'm currently cutting the WAV clips into ten-second chunks (2D), padding shorter sections to 10 seconds with zeroes so the input is all regular. However, when I pass in the clips to the RNN, it gives an output of one less dimension (1D) (when taking the last state - should I be taking the state series?).
I've created a simpler RNN to analyze single notes files (2D) and produce one output (1D), which has been successful. However, when trying to apply this same technique to full clips with multiple notes and notes starting/stopping it seems to break down, as I can't seem to change the output shape.
def weight_variable(shape):
initer = tf.truncated_normal_initializer(stddev=0.01)
return tf.get_variable('W', dtype=tf.float32, shape=shape, initializer=initer)
def bias_variable(shape):
initial = tf.constant(0., shape=shape, dtype=tf.float32)
return tf.get_variable('b', dtype=tf.float32,initializer=initial)
def RNN(x, weights, biases, timesteps, num_hidden):
x = tf.unstack(x, timesteps, 1)
# Define a rnn cell with tensorflow
lstm_cell = rnn.LSTMCell(num_hidden)
states_series, current_state = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
return tf.matmul(current_state[1], weights) + biases
# return [tf.matmul(temp,weights) + biases for temp in states_series]
# does this even make sense
# x is for data, y is for targets, shapes are [index, time, frequency], [index, time, output note (s)] respectively
x_train, x_valid, y_train, y_valid = load_data() # removed test
print("Size of:")
print("- Training-set:\t\t{}".format(y_train.shape[0]))
print("- Validation-set:\t{}".format(y_valid.shape[0]))
# print("- Test-set\t{}".format(len(y_test)))
learning_rate = 0.001 # The optimization initial learning rate
epochs = 1000 # Total number of training epochs
batch_size = 100 # Training batch size
display_freq = 100 # Frequency of displaying the training results
threshold = 0.7 # Threshold for determining a "note"
num_hidden_units = 15 # Number of hidden units of the RNN
# Placeholders for inputs (x) and outputs(y)
x = tf.placeholder(tf.float32, shape=(None, stepCount, num_input))
y = tf.placeholder(tf.float32, shape=(None, stepCount, n_classes))
# create weight matrix initialized randomly from N~(0, 0.01)
W = weight_variable(shape=[num_hidden_units, n_classes])
# create bias vector initialized as zero
b = bias_variable(shape=[n_classes])
output_logits = RNN(x, W, b, stepCount, num_hidden_units)
y_pred = tf.nn.softmax(output_logits)
# Define the loss function, optimizer, and accuracy, etc.
# (code removed, irrelevant)
# Creating the op for initializing all variables
init = tf.global_variables_initializer()
sess = tf.InteractiveSession()
sess.run(init)
global_step = 0
# Number of training iterations in each epoch
num_tr_iter = int(y_train.shape[0] / batch_size)
for epoch in range(epochs):
print('Training epoch: {}'.format(epoch + 1))
x_train, y_train = randomize(x_train, y_train)
for iteration in range(num_tr_iter):
global_step += 1
start = iteration * batch_size
end = (iteration + 1) * batch_size
x_batch, y_batch = get_next_batch(x_train, y_train, start, end)
# Run optimization op (backprop)
feed_dict_batch = {x: x_batch, y: y_batch}
sess.run(optimizer, feed_dict=feed_dict_batch)
if iteration % display_freq == 0:
# Calculate and display the batch loss and accuracy
loss_batch, acc_batch = sess.run([loss, accuracy],
feed_dict=feed_dict_batch)
print("iter {0:3d}:\t Loss={1:.2f},\tTraining Accuracy={2:.01%}".
format(iteration, loss_batch, acc_batch))
testLoss.append(loss_batch)
testAcc.append(acc_batch)
# Run validation after every epoch
feed_dict_valid = {x: x_valid[:1000].reshape((-1, stepCount, num_input)), y: y_valid[:1000]}
loss_valid, acc_valid = sess.run([loss, accuracy], feed_dict=feed_dict_valid)
print('---------------------------------------------------------')
print("Epoch: {0}, validation loss: {1:.2f}, validation accuracy: {2:.01%}".
format(epoch + 1, loss_valid, acc_valid))
print('---------------------------------------------------------')
validLoss.append(loss_valid)
validAcc.append(acc_batch)
Currently, this is outputting a 1D array of predictions, which really does not make sense in my scenario, but I'm not sure how to change it (it should be outputting predictions for each timestep - i.e. predictions of what notes are playing at each moment in time).

Related

Getting Error while running cost sigmoid cost function

I have a training data, with 1000 rows. I am using Tensorflow for training this data. Also trying to divide this into mini-batches of size 32. While Training the data, i am getting the error as mentioned below
InvalidArgumentError: Incompatible shapes: [1000] vs. [32]
[[{{node logistic_loss_1/mul}}]]
On the contrary, if i don't divide my training data into minibatches, or use a single minibatch of size 1000, the code works fine.
I have defined weights as tf.Variables and running the tensorflow session. See the code below
def sigmoid_cost(z,Y):
print("Entered Cost")
z = tf.squeeze(z)
Y = tf.cast(Y_train,tf.float64)
logits = tf.transpose(z)
labels = (Y)
print(logits.shape)
print(labels.shape)
return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=labels,logits=logits))
def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.0001,
num_epochs = 1500, minibatch_size = 32, print_cost = True):
hidden_layer = 4
m,n = X_train.shape
n_y = Y_train.shape[0]
X = tf.placeholder(tf.float64,shape=(None,n), name="X")
Y = tf.placeholder(tf.float64,shape=(None),name="Y")
parameters = init_params(n)
z4, parameters = fwd_model(X,parameters)
cost = sigmoid_cost(z4,Y)
num_minibatch = m/minibatch_size
print("Getting Minibatches")
num_minibatch = tf.cast(num_minibatch,tf.int32)
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)
print("Gradient Defination Done")
init = tf.global_variables_initializer()
init_op = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
sess.run(init_op)
for epoch in range(0,num_epochs):
minibatches = []
minibatches = minibatch(X_train,Y_train,minibatch_size)
minibatch_cost = 0
for i in range (0,len(minibatches)):
(X_m,Y_m) = minibatches[i]
Y_m = np.squeeze(Y_m)
print("Minibatch %d X shape Y Shape ",i, X_m.shape,Y_m.shape)
_ , minibatch_cost = sess.run([optimizer, cost], feed_dict={X: X_m, Y: Y_m})
print("Mini Batch Cost is ",minibatch_cost)
epoch_cost = minibatch_cost/num_minibatch
if print_cost == True and epoch % 100 == 0:
print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
print(epoch_cost)
For some reason, while running the cost function the size of either X or Y batch is being taken as 32, 100 or vice-versa. Any help would be appreciated.
I think you are getting above error because of Y = tf.cast(Y_train, tf.float64) line inside sigmoid_cost function. Here, Y_train has 1000 rows, but loss function is expecting 32(which is your batch size).
It should be Y = tf.cast(Y, tf.float64). Infact, there is no need to cast data type here as Y is already of type tf.float64. Check below line:
Y = tf.placeholder(tf.float64,shape=(None),name="Y")
That's why, when you were using a single minibatch of size 1000(full Y_train data), your code was working fine.

Get a prediction from Tensor Flow Model

I want to get predictions from my trained tensor flow model. The following is the code I have for training my model.
def train_model(self, train, test, learning_rate=0.0001, num_epochs=16, minibatch_size=32, print_cost=True, graph_filename='costs'):
# Ensure that model can be rerun without overwriting tf variables
ops.reset_default_graph()
# For reproducibility
tf.set_random_seed(42)
seed = 42
# Get input and output shapes
(n_x, m) = train.images.T.shape
n_y = train.labels.T.shape[0]
costs = []
# Create placeholders of shape (n_x, n_y)
X, Y = self.create_placeholders(n_x, n_y)
# Initialize parameters
parameters = self.initialize_parameters()
# Forward propagation
Z3 = self.forward_propagation(X, parameters)
# Cost function
cost = self.compute_cost(Z3, Y)
# Backpropagation (using Adam optimizer)
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
# Initialize variables
init = tf.global_variables_initializer()
# Start session to compute Tensorflow graph
with tf.Session() as sess:
# Run initialization
sess.run(init)
# Training loop
for epoch in range(num_epochs):
epoch_cost = 0.
num_minibatches = int(m / minibatch_size)
seed = seed + 1
for i in range(num_minibatches):
# Get next batch of training data and labels
minibatch_X, minibatch_Y = train.next_batch(minibatch_size)
# Execute optimizer and cost function
_, minibatch_cost = sess.run([optimizer, cost], feed_dict={X: minibatch_X.T, Y: minibatch_Y.T})
# Update epoch cost
epoch_cost += minibatch_cost / num_minibatches
# Print the cost every epoch
if print_cost == True:
print("Cost after epoch {epoch_num}: {cost}".format(epoch_num=epoch, cost=epoch_cost))
costs.append(epoch_cost)
# Plot costs
plt.figure(figsize=(16,5))
plt.plot(np.squeeze(costs), color='#2A688B')
plt.xlim(0, num_epochs-1)
plt.ylabel("cost")
plt.xlabel("iterations")
plt.title("learning rate = {rate}".format(rate=learning_rate))
plt.savefig(graph_filename, dpi=300)
plt.show()
# Save parameters
parameters = sess.run(parameters)
print("Parameters have been trained!")
# Calculate correct predictions
correct_prediction = tf.equal(tf.argmax(Z3), tf.argmax(Y))
# Calculate accuracy on test set
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Train Accuracy:", accuracy.eval({X: train.images.T, Y: train.labels.T}))
print ("Test Accuracy:", accuracy.eval({X: test.images.T, Y: test.labels.T}))
return parameters
After training the model, I want to extract the prediction from the model.
So I add
print(sess.run(accuracy, feed_dict={X: test.images.T}))
But I am seeing the below error after running the above code:
InvalidArgumentError: You must feed a value for placeholder tensor 'Y'
with dtype float and shape [10,?]
[[{{node Y}} = Placeholderdtype=DT_FLOAT, shape=[10,?], _device="/job:localhost/replica:0/task:0/device:CPU:0"]]
Any help is welcome..
The tensor accuracy is a function of the tensor correct_prediction, which in turn is a function of (among the rest) Y.
So you're correctly being told that you should feed values for that placeholder too.
I'm assuming Y hold your labels, so it should also make intuitive sense that your feed_dict would also contain the correct Y values.
Hope that helps.
Good luck!

Getting error in epoch_cost "operands could not be broadcast together with shapes (32,) (5,) (32,) " while applying minibatches in neural net

I am doing this project in which i applied minibatches in neural network and calculating epoch cost:-
def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.0001, num_epochs = 1500,
minibatch_size = 32, print_cost = True):
ops.reset_default_graph()
tf.set_random_seed(1)
seed = 3
costs = []
(n_x, m) = X_train.shape
n_y = Y_train.shape[0]
#create placeholder
X, Y = create_placeholder(n_x, n_y)
# init parameter
parameters = init_parameter()
# forward prop
Z3 = forward_prop(X, parameters)
# compute cost
cost = compute_cost(Z3, Y)
# optimizer
optimizer = tf.train.AdamOptimizer(learning_rate= learning_rate).minimize(cost)
# Initialize all variables
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(num_epochs):
epoch_cost = 0
num_minibatche = int(m/ minibatch_size)
seed = seed + 1
minibatches = random_mini_batches(X_train, Y_train,
minibatch_size, seed)
for minibatch in minibatches:
(minibatch_X, minibatch_Y) = minibatch
_, minibatch_cost = sess.run([optimizer, cost], feed_dict
= {X: minibatch_X, Y: minibatch_Y})
epoch_cost += minibatch_cost / num_minibatche
# Print the cost every epoch
if print_cost == True and epoch % 100 == 0:
print ("Cost after epoch " ,epoch, np.mean(epoch_cost))
if print_cost == True and epoch % 5 == 0:
costs.append(epoch_cost)
# plot the cost
plt.plot(np.squeeze(costs))
plt.ylabel('cost')
plt.xlabel('iterations (per tens)')
plt.title("Learning rate =" + str(learning_rate))
plt.show()
# save the parameters
parameters = sess.run(parameters)
print ("Parameters have been trained!")
# Calculate the correct predictions
correct_prediction = tf.equal(tf.argmax(Z3), tf.argmax(Y))
# Calculate accuracy on the test set
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train}))
print ("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test}))
return parameters
SO when i run this code i am getting this error on line:-
---->epoch_cost += minibatch_cost / num_minibatche
---->ValueError: operands could not be broadcast together with shapes (32,) (5,) (32,)
I took minibatche_size = 32 and number of training examples = 1381
But i am totally confused why i am getting this error.
The code you've posted is missing a lot of parts, like the whole model() function, so it's difficult to debug. But based on just what we have, some things here that are supposed to be scalars are in fact, arrays.
epoch_cost starts out as a scalar zero with epoch_cost = 0. Then you add some value to it, then try to print np.mean( epoch_cost ). Why do you take the mean of a scalar? Looks like the code was different earlier, and the migration to a scalar epoch_cost was not successful.
It is easy to imagine that minibatch_cost is returned as an array from TensorFlow - one cost value for each member of the batch. In that case you would need to apply np.mean() right there, like
epoch_cost += np.mean( minibatch_cost ) / num_minibatche
Maybe even num_minibatche somehow became a vector. It comes from
num_minibatche = int(m/ minibatch_size)
and minibatch_size is supposedly 32, so that's all right. But m comes from
(n_x, m) = X_train.shape
and we know nothing of X_train. Maybe m somehow became a vector, and in turn num_minibatche too. You will need to print the value for num_minibatche once calculated and make sure it's what it's supposed to be.
Hope this helps. If you post the whole code, I can help you more.

tensorflow, mini-batch, tf.placeholder - read state of nodes at given iteration

I want to print the value of MSE at each epoch/batch combination. the code below reports the tensor object representing the mse instead of its value at each iteration:
print("Epoch", epoch, "Batch_Index", batch_index, "MSE:", mse)
Example line of output:
Epoch 0 Batch_Index 0 MSE: Tensor("mse_2:0", shape=(), dtype=float32)
I understand it is because MSE is referencing tf.placeholder nodes which by themselves do not have any data. But once I run the below code:
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
the data should be already available thus values for all nodes depending on that data should be accessible as well, I think requesting an evaluation of the MSE in the print statement results in error
print("Epoch", epoch, "Batch_Index", batch_index, "MSE:", mse.eval())
Output2:
InvalidArgumentError: You must feed a value for placeholder tensor 'X_2' with dtype float and shape [?,9]
...
This tells me that mse.eval() does not see the data defined in sess.run()
Why do we experience such behavior?
How should we change the code to make it report MSA at each specified iteration?
import numpy as np
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()
m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data] # ADD COLUMN OF 1s for BIAS!
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)
init = tf.global_variables_initializer()
n_epochs = 100
batch_size = 100
n_batches = int(np.ceil(m / batch_size))
learning_rate = 0.01
def fetch_batch(epoch, batch_index, batch_size):
np.random.seed(epoch * n_batches + batch_index) # not shown in the book
indices = np.random.randint(m, size=batch_size) # not shown
X_batch = scaled_housing_data_plus_bias[indices] # not shown
y_batch = housing.target.reshape(-1, 1)[indices] # not shown
return X_batch, y_batch
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epochs):
for batch_index in range(n_batches):
X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
if (epoch % 50 == 0 and batch_index % 100 == 0):
print("Epoch", epoch, "Batch_Index", batch_index, "MSE:", mse)
best_theta = theta.eval()
best_theta
First, I think this kind of debugging and printing and stuff is much easier to do with eager execution enabled in tensorflow.
Without eager execution enabled, "print" in tensorflow will never print the dynamic value of a tensor; it'll only print the name of the tensor, which is rarely what you want. Instead, use tf.Print to inspect the runtime value of the tensor (by doing something like tensor = tf.Print(tensor, [tensor]) as tf.Print does not execute unless its output is used somewhere).
i made it work by modifying the print statement to the following:
print("Epoch", epoch, "Batch_Index", batch_index, "MSE:", mse.eval(feed_dict={X: scaled_housing_data_plus_bias, y: housing_target}))
moreover by referencing complete data set (not batches) i was able to test the generalization of the current batch-based model to the whole sample. It should be easy to extend it to test on the test and hold-out samples as training of the model progresses
i am afraid that such on-the-fly evaluation (even on batches) can have impact on performance of the model. I will do further tests of that.

LSTM-RNN: num_classes usage

I am using LSTM RNN to detect whether a heart beat is arrhythmic or not. So the output classes are:[0,1] and n_classes=2, but when this code is executed:
# Fit training using batch data
_, loss, acc = sess.run(
[optimizer, cost, accuracy],
feed_dict={
x: batch_xs,
y: batch_ys
}
)
It gives following error
ValueError: Cannot feed value of shape (1, 1) for Tensor 'Placeholder_1:0', which has shape '(?, 2)'
Here is the whole code:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import tensorflow as tf # Version 1.0.0 (some previous versions are used in past commits)
from sklearn import metrics
import _pickle as cPickle
import os
import pandas as pd
import functions as f
[ml2_train_input,ml2_train_output,ml2_train_peaks,ml2_test_input,ml2_test_output,ml2_test_peaks]=f.get_ml2(0.5)
ml2_train_output=f.get_binary_output(ml2_train_output[:52500])
ml2_test_output=f.get_binary_output(ml2_test_output[:52500])
# Output classes to learn how to classify
LABELS = [0,1 ]
training_data_count = len(ml2_train_input[:52500]) # training series
test_data_count = len(ml2_test_input[:52500]) # testing series
n_input = 360 # 360 input parameters per timestep
# LSTM Neural Network's internal structure
n_hidden = 8 # Hidden layer num of features
n_classes = 2 # Total classes
# Training
learning_rate = 0.005
lambda_loss_amount = 0.0015
training_iters = training_data_count * 10 # Loop 10 times on the dataset
batch_size = 500
display_iter = 1000 # To show test set accuracy during training
X_test=np.array(ml2_test_input[:52500])
y_test=np.array(ml2_test_output[:52500])
# Some debugging info
print("Some useful info to get an insight on dataset's shape and normalisation:")
print("(X shape, y shape, every X's mean, every X's standard deviation)")
print(X_test.shape, y_test.shape, np.mean(X_test), np.std(X_test))
print("The dataset is therefore properly normalised, as expected, but not yet one-hot encoded.")
def LSTM_RNN(_X, _weights, _biases):
# Function returns a tensorflow LSTM (RNN) artificial neural network from given parameters.
# Moreover, two LSTM cells are stacked which adds deepness to the neural network.
# Note, some code of this notebook is inspired from an slightly different
# RNN architecture used on another dataset, some of the credits goes to
# "aymericdamien" under the MIT license.
# (NOTE: This step could be greatly optimised by shaping the dataset once
# input shape: (batch_size, n_steps, n_input)
# permute n_steps and batch_size
# Reshape to prepare input to hidden activation
#_X = tf.reshape(_X, [-1, n_input])
# new shape: (n_steps*batch_size, n_input)
# Linear activation
_X = tf.nn.relu(tf.matmul(_X, _weights['hidden']) + _biases['hidden'])
# Split data because rnn cell needs a list of inputs for the RNN inner loop
_X = tf.split(_X, 500,0)
# new shape: n_steps * (batch_size, n_hidden)
# Define two stacked LSTM cells (two recurrent layers deep) with tensorflow
lstm_cell_1 = tf.contrib.rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True,reuse=None)
lstm_cell_2 = tf.contrib.rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True,reuse=None)
lstm_cells = tf.contrib.rnn.MultiRNNCell([lstm_cell_1, lstm_cell_2], state_is_tuple=True)
# Get LSTM cell output
outputs, states = tf.contrib.rnn.static_rnn(lstm_cells, _X, dtype=tf.float32)
# Get last time step's output feature for a "many to one" style classifier,
# as in the image describing RNNs at the top of this page
lstm_last_output = outputs[-1]
# Linear activation
return tf.matmul(lstm_last_output, _weights['out']) + _biases['out']
def extract_batch_size(_train, step, batch_size):
# Function to fetch a "batch_size" amount of data from "(X|y)_train" data.
shape = list(_train.shape)
shape[0] = batch_size
batch_s = np.empty(shape)
for i in range(batch_size):
# Loop index
index = ((step-1)*batch_size + i) % len(_train)
batch_s[i] = _train[index]
return batch_s
def one_hot(y_):
# Function to encode output labels from number indexes
# e.g.: [[5], [0], [3]] --> [[0, 0, 0, 0, 0, 1], [1, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0]]
y_ = y_.reshape(len(y_))
n_values = int(np.max(y_)) + 1
return np.eye(n_values)[np.array(y_, dtype=np.int32)] # Returns FLOATS
# Graph input/output
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
# Graph weights
weights = {
'hidden': tf.Variable(tf.random_normal([n_input, n_hidden])), # Hidden layer weights
'out': tf.Variable(tf.random_normal([n_hidden, n_classes], mean=1.0))
}
biases = {
'hidden': tf.Variable(tf.random_normal([n_hidden])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
pred = LSTM_RNN(x, weights, biases)
# Loss, optimizer and evaluation
l2 = lambda_loss_amount * sum(
tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables()
) # L2 loss prevents this overkill neural network to overfit the data
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=pred)) + l2 # Softmax loss
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Adam Optimizer
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# To keep track of training's performance
test_losses = []
test_accuracies = []
train_losses = []
train_accuracies = []
# Launch the graph
sess = tf.InteractiveSession(config=tf.ConfigProto(log_device_placement=True))
init = tf.global_variables_initializer()
sess.run(init)
X_train=np.array(ml2_train_input[:52500])
y_train=np.array(ml2_train_output[:52500])
step = 1
while step * batch_size <= training_iters:
batch_xs = extract_batch_size(X_train, step, batch_size)
batch_ys = one_hot(extract_batch_size(y_train, step, batch_size))
# Fit training using batch data
_, loss, acc = sess.run(
[optimizer, cost, accuracy],
feed_dict={
x: batch_xs,
y: batch_ys
}
)
train_losses.append(loss)
train_accuracies.append(acc)
# Evaluate network only at some steps for faster training:
if (step*batch_size % display_iter == 0) or (step == 1) or (step * batch_size > training_iters):
# To not spam console, show training accuracy/loss in this "if"
print("Training iter #" + str(step*batch_size) + \
": Batch Loss = " + "{:.6f}".format(loss) + \
", Accuracy = {}".format(acc))
# Evaluation on the test set (no learning made here - just evaluation for diagnosis)
loss, acc = sess.run(
[cost, accuracy],
feed_dict={
x: X_test,
y: one_hot(y_test)
}
)
test_losses.append(loss)
test_accuracies.append(acc)
print("PERFORMANCE ON TEST SET: " + \
"Batch Loss = {}".format(loss) + \
", Accuracy = {}".format(acc))
step += 1
print("Optimization Finished!")
Please help!
I feel you should convert your Y values to categorical (one-hot encoded) than it should work. So try to convert your Y values to categorical

Categories

Resources