I've copied a tensorflow code from Sirajology's github. It should load a .csv into a single layer neural net.
My question is, how and where do I put the .csv file into the code?
Also, I don't understand if the code will automatically split the .csv into train and test data, or if I need to do that with some different code before it is fed to the neural net?
I've been spending a lot of time with python and tensorflow and understand some basic concepts but am still a total newb. Any help is appreciated! Thanks!!!
#I have eliminated all code that is obviously irrelevant to the question
tf.app.flags.DEFINE_string('train', None,
'File containing the training data (labels & features).')
tf.app.flags.DEFINE_string('test', None,
'File containing the test data (labels & features).')
tf.app.flags.DEFINE_integer('num_epochs', 1,
'Number of examples to separate from the training '
'data for the validation set.')
tf.app.flags.DEFINE_boolean('verbose', False, 'Produce verbose output.')
FLAGS = tf.app.flags.FLAGS
# Extract numpy representations of the labels and features given rows consisting of:
# label, feat_0, feat_1, ..., feat_n
def extract_data(filename):
# Arrays to hold the labels and feature vectors.
labels = []
fvecs = []
# Iterate over the rows, splitting the label from the features. Convert labels
# to integers and features to floats.
for line in file(filename):
row = line.split(",")
labels.append(int(row[0]))
fvecs.append([float(x) for x in row[1:]])
# Convert the array of float arrays into a numpy float matrix.
fvecs_np = np.matrix(fvecs).astype(np.float32)
# Convert the array of int labels into a numpy array.
labels_np = np.array(labels).astype(dtype=np.uint8)
# Convert the int numpy array into a one-hot matrix.
labels_onehot = (np.arange(NUM_LABELS) == labels_np[:, None]).astype(np.float32)
# Return a pair of the feature matrix and the one-hot label matrix.
return fvecs_np,labels_onehot
def main(argv=None):
# Be verbose?
verbose = FLAGS.verbose
# Get the data.
train_data_filename = FLAGS.train
test_data_filename = FLAGS.test
# Extract it into numpy matrices.
train_data,train_labels = extract_data(train_data_filename)
test_data, test_labels = extract_data(test_data_filename)
# Get the shape of the training data.
train_size,num_features = train_data.shape
# Get the number of epochs for training.
num_epochs = FLAGS.num_epochs
# This is where training samples and labels are fed to the graph.
# These placeholder nodes will be fed a batch of training data at each
# training step using the {feed_dict} argument to the Run() call below.
x = tf.placeholder("float", shape=[None, num_features])
y_ = tf.placeholder("float", shape=[None, NUM_LABELS])
# For the test data, hold the entire dataset in one constant node.
test_data_node = tf.constant(test_data)
# Define and initialize the network.
# These are the weights that inform how much each feature contributes to
# the classification.
W = tf.Variable(tf.zeros([num_features,NUM_LABELS]))
b = tf.Variable(tf.zeros([NUM_LABELS]))
y = tf.nn.softmax(tf.matmul(x,W) + b)
# Optimization.
cross_entropy = -tf.reduce_sum(y_*tf.log(y))
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
# Evaluation.
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# Create a local session to run this computation.
with tf.Session() as s:
# Run all the initializers to prepare the trainable parameters.
tf.initialize_all_variables().run()
if verbose:
print ('Initialized!')
print
print ('Training.')
# Iterate and train.
for step in xrange(num_epochs * train_size // BATCH_SIZE):
if verbose:
print (step,)
offset = (step * BATCH_SIZE) % train_size
batch_data = train_data[offset:(offset + BATCH_SIZE), :]
batch_labels = train_labels[offset:(offset + BATCH_SIZE)]
train_step.run(feed_dict={x: batch_data, y_: batch_labels})
if verbose and offset >= train_size-BATCH_SIZE:
print
# Give very detailed output.
if verbose:
print
print ('Weight matrix.')
print (s.run(W))
print
print ('Bias vector.')
print (s.run(b))
print
print ("Applying model to first test instance.")
first = test_data[:1]
print ("Point =", first)
print ("Wx+b = ", s.run(tf.matmul(first,W)+b))
print ("softmax(Wx+b) = ", s.run(tf.nn.softmax(tf.matmul(first,W)+b)))
print
print ("Accuracy:", accuracy.eval(feed_dict={x: test_data, y_: test_labels}))
if __name__ == '__main__':
tf.app.run()
It's expecting to receive it as argument in the terminal.
The lines below are checking for it:
tf.app.flags.DEFINE_string('train', None,
'File containing the training data (labels & features).')
tf.app.flags.DEFINE_string('test', None,
'File containing the test data (labels & features).')
tf.app.flags.DEFINE_integer('num_epochs', 1,
'Number of examples to separate from the training '
'data for the validation set.')
So, you just have to run it as:
python YourScript.py --train FileName.csv --test TestName.csv --num_epochs 5 --verbose True
Related
I'm training a network to classify audio. First I extract logmel-spectrograms from my audio data, save these in arrays and train my network using these. At each epoch I inference on my test data to get an accuracy estimate.
My training dataset is 24GB and test dataset is 6GB. Both are too large for the RAM. I found that I could extract the logmel-specs from my training data before running the network, save each minibatch in a pickle file, then load these one by one during training.
However, I use .eval() to get the accuracy from my my whole test data at once. This worked when I used smaller datasets as there was no need to split my data up into chunks using different pickle files. However, I'm now trying to figure out how to run the .eval() line or equivalent so that it provides accuracy for the whole test dataset, rather than the smaller chunks I've split it into. Is there a way I can get overall accuracy for my test data using pickle files or another method?
Here is the key component of code at the end where I think this can be done:
correct = tf.equal(tf.argmax(logits, 1), tf.argmax(labels_input, 1))
test_accuracy = tf.reduce_mean(tf.cast(correct, 'float')) #changes correct to type: float
test_accuracy1 = test_accuracy.eval({features_input:X_test, labels_input:y_test})
test_accuracy_scores.append(test_accuracy1)
print('Test accuracy:', test_accuracy1)
Here is my entire codeblock for the network:
### Train NN, output results
r"""This uses the VGGish model definition within a larger model which adds two
layers on top, and then trains this larger model.
We input log-mel spectrograms (X_train) calculated above with associated labels
(y_train), and feed the batches into the model. Once the model is trained, it
is then executed on the test log-mel spectrograms (X_test) and the accuracy is output.
Alongside .csv file with the predictions for each 0.96s chunk and their true
class is also output for the test data. Column1 = the logit for the first class,
Column2 = the logit for the scond class etc. The final column is the true class.
"""
num_min_batches = len(os.listdir(pickle_files_dir))/2
os.chdir(scripts_directory)
def main(X):
with tf.Graph().as_default(), tf.Session() as sess:
# Define VGGish.
embeddings = vggish_slim.define_vggish_slim(training=FLAGS.train_vggish)
# Define a shallow classification model and associated training ops on top
# of VGGish.
with tf.variable_scope('mymodel'):
# Add a fully connected layer with 100 units. Add an activation function
# to the embeddings since they are pre-activation.
num_units = 100
fc = slim.fully_connected(tf.nn.relu(embeddings), num_units)
# Add a classifier layer at the end, consisting of parallel logistic
# classifiers, one per class. This allows for multi-class tasks.
logits = slim.fully_connected(
fc, _NUM_CLASSES, activation_fn=None, scope='logits')
tf.sigmoid(logits, name='prediction')
linear_out= slim.fully_connected(
fc, _NUM_CLASSES, activation_fn=None, scope='linear_out')
logits = tf.sigmoid(linear_out, name='logits')
# Add training ops.
with tf.variable_scope('train'):
global_step = tf.train.create_global_step()
# Labels are assumed to be fed as a batch multi-hot vectors, with
# a 1 in the position of each positive class label, and 0 elsewhere.
labels_input = tf.placeholder(
tf.float32, shape=(None, _NUM_CLASSES), name='labels')
# Cross-entropy label loss.
xent = tf.nn.sigmoid_cross_entropy_with_logits(
logits=logits, labels=labels_input, name='xent')
loss = tf.reduce_mean(xent, name='loss_op')
tf.summary.scalar('loss', loss)
# We use the same optimizer and hyperparameters as used to train VGGish.
optimizer = tf.train.AdamOptimizer(
learning_rate=vggish_params.LEARNING_RATE,
epsilon=vggish_params.ADAM_EPSILON)
train_op = optimizer.minimize(loss, global_step=global_step)
# Initialize all variables in the model, and then load the pre-trained
# VGGish checkpoint.
sess.run(tf.global_variables_initializer())
vggish_slim.load_vggish_slim_checkpoint(sess, FLAGS.checkpoint)
# The training loop.
features_input = sess.graph.get_tensor_by_name(
vggish_params.INPUT_TENSOR_NAME)
validation_accuracy_scores = []
test_accuracy_scores = []
for epoch in range(num_epochs):
epoch_loss = 0
i=0
while i < num_min_batches:
#print('mini batch'+str(i))
X_pickle_file = pickle_files_dir + 'X_train_mini_batch_' + str(i)
with open(X_pickle_file, "rb") as fp: # Unpickling
batch_x = pickle.load(fp)
y_pickle_file = pickle_files_dir + 'y_train_mini_batch_' + str(i)
with open(y_pickle_file, "rb") as fp: # Unpickling
batch_y = pickle.load(fp)
_, c = sess.run([train_op, loss], feed_dict={features_input: batch_x, labels_input: batch_y})
epoch_loss += c
i+=1
#print no. of epochs and loss
print('Epoch', epoch+1, 'completed out of', num_epochs,', loss:',epoch_loss)
#note this adds a small computational cost
correct = tf.equal(tf.argmax(logits, 1), tf.argmax(labels_input, 1))
test_accuracy = tf.reduce_mean(tf.cast(correct, 'float')) #changes correct to type: float
test_accuracy1 = test_accuracy.eval({features_input:X_test, labels_input:y_test})
test_accuracy_scores.append(test_accuracy1)
print('Test accuracy:', test_accuracy1)
if __name__ == '__main__':
tf.app.run()
I am trying to feed my model with batches of data incrementally as the dataset if very large. Building on the tutorial here I have written a data generator as below:
split_index = round(len(train) * 0.9)
shuffled_train = train.sample(frac=1)
df_train = shuffled_train.iloc[:split_index]
df_val = shuffled_train.iloc[split_index:]
# Convert validation set to fixed array
x_val = df_to_data(df_val)
y_val = df_val[classes].values
def data_generator(df, batch_size, gensim_embedding_model):
"""
Given a raw dataframe, generates infinite batches of FastText vectors.
"""
batch_i = 0 # Counter inside the current batch vector
batch_x = None # The current batch's x data
batch_y = None # The current batch's y data
while True: # Loop forever
df = df.sample(frac=1) # Shuffle df each epoch
for i, row in df.iterrows():
comment = row['comment_text']
if batch_x is None:
batch_x = np.zeros((batch_size, window_length, n_features), dtype='float32')
batch_y = np.zeros((batch_size, len(classes)), dtype='float32')
batch_x[batch_i] = text_to_vector(comment, gensim_embedding_model) # LINE A
batch_y[batch_i] = row[classes].values
batch_i += 1
if batch_i == batch_size:
# Ready to yield the batch
yield batch_x, batch_y
batch_x = None
batch_y = None
batch_i = 0
Where 'LINE A' calls a function that looks up words in 'comment' in a pre-trained embedding model (gensim_embedding_model) and populates the vector.
I understand that this creates an embedding representation of the batch, which is fed into the model to train incrementally, and replaces the Keras EmbeddingLayer which tries to fit the embedding representation of the entire dataset into memory.
However, the Keras EmbeddingLayer has a parameter 'trainable', which can control if the parameters (i.e., word embedding weights in this case) should be treated 'as-is' or only as initial to be tuned. But using a data generator, I do not see how it is possible to set this parameter (or equivalent).
Am I right that if data generator is used in such a way, then the embedding weights are only used as-is and will not be possible to be further tuned? I.e., this is equivalent to 'trainable=False'. But with this implementation it is not possible to have 'trainable=True'?
Thanks
i'm struggling to understand tensorflow, and I can't find good basic examples that don't rely on the MNIST dataset. I've tried to create a classification nn for some public datasets where they provide a number of (unknown) features, and a label for each sample. There's one where they provide around 90 features of audio analysis, and the year of publication as the label. (https://archive.ics.uci.edu/ml/datasets/yearpredictionmsd)
Needless to say, I didn't manage to train the network, and little could I do for understanding the provided features.
I'm now trying to generate artificial data, and try to train a network around it. The data are pairs of number (position), and the label is 1 if that position is inside a circle of radius r around an arbitrary point (5,5).
numrows=10000
circlex=5
circley=5
circler=3
data = np.random.rand(numrows,2)*10
labels = [ math.sqrt( math.pow(x-circlex, 2) + math.pow(y-circley, 2) ) for x,y in data ]
labels = list( map(lambda x: x<circler, labels) )
If tried many combinations of network shape, parameters, optimizers, learning rates, etc (I admit the math is not strong on this one), but eithere there's no convergence, or it sucks (70% accuracy on last test).
Current version (labels converted to one_hot encoding [1,0] and [0,1] (outside, inside).
# model creation
graph=tf.Graph()
with graph.as_default():
X = tf.placeholder(tf.float32, [None, 2] )
layer1 = tf.layers.dense(X, 2)
layer2 = tf.layers.dense(layer1, 2)
Y = tf.nn.softmax(layer2)
y_true = tf.placeholder(tf.float32, [None, 2] )
loss=tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=Y, labels=y_true) )
optimizer = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
def accuracy(predictions, labels):
return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
/ predictions.shape[0])
# training
with tf.Session(graph=graph) as session:
tf.global_variables_initializer().run()
for step in range(1000):
_, l, predictions = session.run([optimizer,loss,Y], feed_dict={X:data, y_true:labels})
if step % 100 == 0:
print("Loss at step %d: %f" % (step, l)
print("Accuracy %f" % accuracy(predictions, labels))
The acuracy in this example is around 70% (loss around 0.6).
The question is... what am I doing wrong?
UPDATE
I'm leaving the question as originally asked. Main lessons I learned:
Normalize your input data. The mean should be around 0, and the range ~ between -1 and 1.
Blue: normalized data, Red: raw input data as created above
Batch your input data. If the subsets used are random enough, it decreases the number of iterations needed without hurting accuracy too much.
Don't forget activation functions between layers :)
The input:
Plotting the synthetic data with two classes.
Output from the code above:
All outputs are classified as a single class and because of class imbalance, accuracy is high 70%.
Issues with the code
Even though there are two layers defined, no activation function defined between the two. So tf.softmax( ((x*w1)+b1) * w2 + b2) squashes down to a single layer. There is just a single hyperplane trying to separate this input and the hyperplane lies outside the input space, thats why you get all inputs classified as a single class.
Bug: Softmax is applied twice: on the logits as well as during entropy_loss.
The entire input is given as a single batch, instead of mini-batches.
Inputs need to be normalized.
Fixing the above issues and the output becomes:
The above output makes sense, as the model has two hidden layers and so we have two hyperplanes trying to separate the data. The final layer then combines these two hyperplanes in such a way to minimize error.
Increasing the hidden layer from 2 to 3:
With 3 hidden layers, we get 3 hyperplanes and we can see the final layer adjusts these hyperplanes to separate the data well.
Code:
# Normalize data
data = (data - np.mean(data)) /np.sqrt(np.var(data))
n_hidden = 3
batch_size = 128
# Feed batch data
def get_batch(inputX, inputY, batch_size):
duration = len(inputX)
for i in range(0,duration//batch_size):
idx = i*batch_size
yield inputX[idx:idx+batch_size], inputY[idx:idx+batch_size]
# Create the graph
tf.reset_default_graph()
graph=tf.Graph()
with graph.as_default():
X = tf.placeholder(tf.float32, [None, 2] )
layer1 = tf.layers.dense(X, n_hidden, activation=tf.nn.sigmoid)
layer2 = tf.layers.dense(layer1, 2)
Y = tf.nn.softmax(layer2)
y_true = tf.placeholder(tf.int32, [None] )
loss = tf.losses.sparse_softmax_cross_entropy(logits=layer2, labels=y_true)
optimizer = tf.train.GradientDescentOptimizer(0.1).minimize(loss)
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(Y, 1),tf.argmax(tf.one_hot(y_true,2), 1)), tf.float32))
# training
with tf.Session(graph=graph) as session:
session.run(tf.global_variables_initializer())
for epoch in range(10):
acc_avg = 0.
loss_avg = 0.
for step in range(10000//batch_size):
for inputX, inputY in get_batch(data, labels, batch_size):
_, l, acc = session.run([optimizer,loss,accuracy], feed_dict={X:inputX, y_true:inputY})
acc_avg += acc
loss_avg += l
print("Loss at step %d: %f" % (step, loss_avg*batch_size/10000))
print("Accuracy %f" % (acc_avg*batch_size/10000))
#Get prediction
pred = session.run(Y, feed_dict={X:data})
# Plotting function
import matplotlib.pylab as plt
plt.scatter(data[:,0], data[:,1], s=20, c=np.argmax(pred,1), cmap='jet', vmin=0, vmax=1)
plt.show()
I have a dataset with 5 columns, I am feeding in first 3 columns as my Inputs and the other 2 columns as my outputs.
I have successfully executed the program but i am not sure how to test the model by giving my own values as input and getting a predicted output from the model.
Can anyone please help me, How can I actually test the model with my own value after training is done ?
I am using Tensorflow in Python. I am able to display accuracy of testing,but How do I actually predict with value if I pass some random input (here,I need to pass 3 input values to get 2 output values)
Here is my code:
# Implementation of a simple MLP network with one hidden layer. Tested on the iris data set.
# Requires: numpy, sklearn>=0.18.1, tensorflow>=1.0
# NOTE: In order to make the code simple, we rewrite x * W_1 + b_1 = x' * W_1'
# where x' = [x | 1] and W_1' is the matrix W_1 appended with a new row with elements b_1's.
# Similarly, for h * W_2 + b_2
import tensorflow as tf
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
import pandas as pd
RANDOM_SEED = 1000
tf.set_random_seed(RANDOM_SEED)
def init_weights(shape):
""" Weight initialization """
weights = tf.random_normal(shape, stddev=0.1)
return tf.Variable(weights)
def forwardprop(X, w_1, w_2):
"""
Forward-propagation.
IMPORTANT: yhat is not softmax since TensorFlow's softmax_cross_entropy_with_logits() does that internally.
"""
h = tf.nn.sigmoid(tf.matmul(X, w_1)) # The \sigma function
yhat = tf.matmul(h, w_2) # The \varphi function
return yhat
def get_iris_data():
""" Read the iris data set and split them into training and test sets """
df = pd.read_csv("H:\MiniThessis\Sample.csv")
train_X = np.array(df[df.columns[0:3]])
train_Y = np.array(df[df.columns[3:]])
print(train_X)
# Convert into one-hot vectors
#num_labels = len(np.unique(train_Y))
#all_Y = np.eye(num_labels)[train_Y] # One liner trick!
#print()
return train_test_split(train_X, train_Y, test_size=0.33, random_state=RANDOM_SEED)
def main():
train_X, test_X, train_y, test_y = get_iris_data()
# Layer's sizes
x_size = train_X.shape[1] # Number of input nodes: 4 features and 1 bias
h_size = 256 # Number of hidden nodes
y_size = train_y.shape[1] # Number of outcomes (3 iris flowers)
# Symbols
X = tf.placeholder("float", shape=[None, x_size])
y = tf.placeholder("float", shape=[None, y_size])
# Weight initializations
w_1 = init_weights((x_size, h_size))
w_2 = init_weights((h_size, y_size))
# Forward propagation
yhat = forwardprop(X, w_1, w_2)
predict = tf.argmax(yhat, axis=1)
# Backward propagation
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=yhat))
updates = tf.train.GradientDescentOptimizer(0.01).minimize(cost)
# Run SGD
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
for epoch in range(3):
# Train with each example
for i in range(len(train_X)):
sess.run(updates, feed_dict={X: train_X[i: i + 1], y: train_y[i: i + 1]})
train_accuracy = np.mean(np.argmax(train_y, axis=1) == sess.run(predict, feed_dict={X: train_X, y: train_y}))
test_accuracy = np.mean(np.argmax(test_y, axis=1) ==sess.run(predict, feed_dict={X: test_X, y: test_y}))
print("Epoch = %d, train accuracy = %.2f%%, test accuracy = %.2f%%"
% (epoch + 1, 100. * train_accuracy, 100. * test_accuracy))
correct_Prediction = tf.equal((tf.arg_max(predict,1)),(tf.arg_max(y,1)))
best = sess.run([predict], feed_dict={X: np.array([[20.14, 46.93, 1014.66]])})
#print(correct_Prediction)
print(best)
sess.close()
if __name__ == '__main__':
main()
Note, this answer takes some assumptions on the input, which you did not provide in your original question.
At this point I think your homework will require some more work to be finished. The 4th and 5th column are probably the petal width (a real value) and the Iris type (encoded as a category). This means that as a result you will probably need a real valued output (petal width) and a categorical prediction. This means that your softmax_cross_entry_with_logits will probably not play nicely with the petal width. Also you are applying an argmax as part of the prediction and this willl return the index with the highest value (in this case the petal width or the catergorically encoded value) which also does not make sense. So as a debugging aid why not start with:
print(sess.run([yhat], feed_dict={X: np.array([[20.14, 46.93, 1014.66]])})
This will print out yhat, (based upon the inputs [20.14, 46.93, 1014.66] (that's a massive flower, completely unlike anything inside the dataset) and will contain 2 outputs.
I'm struggling with solving this issue and I believe it is due to my data. I'm thinking about this as a few to many regression problem, but there could be a better approach in tensorflow.
Training Data
I have some data generated from a video sequence. For each frame of video I have a distribution of x,y positions for each cluster. There are 157,110 frames and 200,000 clusters. The frames and clusters are the inputs, which are integers and I think could be considered labels (I'll be using another network to learn the sequences of clusters later on). As each histogram is related to both a frame and clusterID, the input is not "one hot". The histograms (outputs) have 19+8 (x+y) bins where each count is rarely above 10, and could be normalized.
A subset of the training data is available here: The first two columns are the frame and clusterID (inputs) and the remaining 19+8 columns are the histograms (outputs).
What is the best network to learn to generate the appropriate histogram for a given frame/clusterID pair?
The following code is my current attempt using an MLP. It does not converge; in fact cost does not decrease at all. Is there something wrong in my implementation, or my choice of MLP, or a lack of scaling in my input data?
#!/usr/bin/python
# This program uses tensorflow to learn cluster probabilities and associate them with frame and cluster IDs
# Arguments
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("clusterProbabilityfile", help="CSV file containing cluster probabilities")
parser.add_argument("trainingIterations", type=int, help="CSV file containing cluster probabilities")
args = parser.parse_args()
# Imports for ML
import tensorflow as tf
import numpy as np
from tensorflow.python.framework import dtypes
# Imports for loading CSV file
from tensorflow.python.platform import gfile
import csv
# Global vars
numInputUnits = 2;
numOutputUnits = 19+8
numHiddenUnits = (numOutputUnits-numInputUnits)/2
workingDirectory = args.clusterProbabilityfile.split('/')[0]+"/"
columnSplit = 2 # Column number that splits
# Shuffle training set
def shuffleTrainingSet(trainingSet):
trainingIndecies = np.arange(len(trainingSet.data)) # assumes len(data) == len(target)
np.random.shuffle(trainingIndecies) # shuffle indecies
data = trainingSet.data[trainingIndecies]
target = trainingSet.target[trainingIndecies]
training_set = tf.contrib.learn.datasets.base.Dataset(data=data, target=target)
return training_set
# Load training data from CSV file, convert to numpy arrays and construct Dataset
# Modified from tf.contrib.learn.datasets.base.load_csv_without_header
# Should these be randomized???
with gfile.Open(args.clusterProbabilityfile) as csv_file:
data_file = csv.reader(csv_file)
data, target = [], []
for row in data_file:
target.append(row[columnSplit+1:]) # All elements past the split column.
data.append(row[:columnSplit]) # All elements before and including the split column.
target = np.array(target, dtype=int)
data = np.array(data, dtype=int)
training_set = tf.contrib.learn.datasets.base.Dataset(data=data, target=target)
training_set = shuffleTrainingSet(training_set)
# Construct computation graph
# MLP approach (from https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/multilayer_perceptron.py)
# Single hidden layer!
inputVec = tf.placeholder(tf.float32, [None, numInputUnits])
outputVec = tf.placeholder(tf.float32, [None, numOutputUnits])
# Weights
hiddenWeights = tf.Variable(tf.random_normal([numInputUnits, numHiddenUnits])) # inputUnits -> hiddenUnits
outputWeights = tf.Variable(tf.random_normal([numHiddenUnits, numOutputUnits])) # hiddenUnits -> outputUnits
# Biases
hiddenBiases = tf.Variable(tf.random_normal([numHiddenUnits]))
outputBiases = tf.Variable(tf.random_normal([numOutputUnits]))
# Contruct MLP from layers
hiddenLayer = tf.add(tf.matmul(inputVec, hiddenWeights), hiddenBiases) # input * weight + bias = hidden
hiddenLayer = tf.nn.relu(hiddenLayer) # RELU Activation function for hidden layer.
outputLayer = tf.add(tf.matmul(hiddenLayer, outputWeights), outputBiases) # hidden * weight + bias = output
# loss and optimizer
#cross_entropy = -(outputVec * tf.log(outputLayer) + (1 - outputVec) * tf.log(1 - outputLayer))
#cost = tf.reduce_mean(cross_entropy)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(outputLayer, outputVec))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
# Compute graph
sess = tf.Session()
sess.run(tf.initialize_all_variables())
for epoch in range(args.trainingIterations):
training_set = shuffleTrainingSet(training_set) # Reshuffle for each epoch.
epochCost = sess.run(cost, feed_dict={inputVec: training_set.data, outputVec: training_set.target})
print("{:d}\t{:f}".format(epoch, epochCost))
# Evaluate model
correct_prediction = tf.equal(tf.argmax(outputLayer,1), tf.argmax(outputVec,1)) # compare output layer with target output vector.
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("Cost:", sess.run(cost,feed_dict={inputVec: training_set.data, outputVec: training_set.target}))
print("Accuracy:", sess.run(accuracy,feed_dict={inputVec: training_set.data, outputVec: training_set.target}))