Inferior performance of Tensorflow compared to sklearn - python

I'm comparing the performance of Tensorflow with sklearn on two datasets:
A toy dataset in sklearn
MNIST dataset
Here is my code (Python):
from __future__ import print_function
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
import tensorflow as tf
from sklearn.datasets import load_digits
import numpy as np
# digits = load_digits()
# data = digits.data
# labels = digits.target
# convert to binary labels
# y = np.zeros((labels.shape[0],10))
# y[np.arange(labels.shape[0]),labels] = 1
x_train = mnist.train.images
y_train = mnist.train.labels
x_test = mnist.test.images
y_test = mnist.test.labels
n_train = mnist.train.images.shape[0]
# import pdb;pdb.set_trace()
# Parameters
learning_rate = 1e-3
lambda_val = 1e-5
training_epochs = 30
batch_size = 200
display_step = 1
# Network Parameters
n_hidden_1 = 300 # 1st layer number of neurons
n_input = x_train.shape[1] # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Create model
def multilayer_perceptron(x):
# Hidden fully connected layer with 256 neurons
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
# Activation
layer_1_relu = tf.nn.relu(layer_1)
# Output fully connected layer with a neuron for each class
out_layer = tf.matmul(layer_1_relu, weights['out']) + biases['out']
return out_layer
# Construct model
logits = multilayer_perceptron(X)
# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y)) + lambda_val*tf.nn.l2_loss(weights['h1']) + lambda_val*tf.nn.l2_loss(weights['out'])
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
# Test model
pred = tf.nn.softmax(logits) # Apply softmax to logits
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# Initializing the variables
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(n_train/batch_size)
# Loop over all batches
ptr = 0
for i in range(total_batch):
next_ptr = ptr + batch_size
if next_ptr > len(x_train):
next_ptr = len(x_train)
batch_x, batch_y = x_train[ptr:next_ptr],y_train[ptr:next_ptr]
ptr += batch_size
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
Y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1), "cost={:.9f}".format(avg_cost))
print("Optimization Finished!")
print("Accuracy on training set: ", accuracy.eval({X:x_train,Y:y_train}))
print("Accuracy on testing set:", accuracy.eval({X: x_test, Y: y_test}))
print("Experimenting sklearn...")
# now experiment with sklearn
from sklearn.datasets import load_digits
import numpy as np
from sklearn.neural_network import MLPClassifier
import time
# use MLP
t_start = time.time()
print('fitting MLP...')
clf = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(300,),max_iter=training_epochs)
clf.fit(x_train,y_train)
print('fitted MLP in {:.2f} seconds'.format(time.time() - t_start))
print('predicting...')
labels_predicted = clf.predict(x_test)
print('accuracy: {:.2f} %'.format(np.mean(np.argmax(y_test,axis=1) == np.argmax(labels_predicted,axis=1)) * 100))
The code is adapted from a github repository. For this testing, I'm using a traditional neural network (MLP) with only one hidden layer of size 300.
Following is the result for the both datasets:
sklearn digits: ~83% (tensorflow), ~90% (sklearn)
MNIST: ~94% (tensorflow), ~97% (sklearn)
I'm using the same model for both libraries. All the parameters (number of hidden layers, number of hidden units, learning_rate, l2 regularization constant, number of training epochs, batch size) and optimization algorithms are the same (Adam optimizer, beta parameters for Adam optimizer, no momentum, etc).
I wonder if sklearn has done a magic implementation over tensorflow? Can anyone help answer?
Thank you very much.

Related

Neural network for regression not learning in tensorflow

I've modified tensor flow example to fit on my data, given here: data
But my neural network is not learning at all, I tried to use different no. of hidden layers, learning rate and optimization functions, but it didn't help.My code is given below:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import learn
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn import datasets, linear_model
from sklearn import cross_validation
from sklearn import preprocessing
import numpy as np
filename_queue = tf.train.string_input_producer(["file0.csv"])
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
# Default values, in case of empty columns. Also specifies the type of the
# decoded result.
record_defaults = [[0], [0], [0], [0]]
col1, col2, col3, col4 = tf.decode_csv(
value, record_defaults=record_defaults)
features = tf.stack([col1, col2, col3])
with tf.Session() as sess:
# Start populating the filename queue.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
x = np.zeros(shape=(1813,3))
y = np.zeros(shape=(1813))
for i in range(1813):
# Retrieve a single instance:
x1, y1 = sess.run([features, col4])
x[i] = x1
y[i] = y1
coord.request_stop()
coord.join(threads)
#standard_scaler = preprocessing.StandardScaler()
#x = standard_scaler.fit_transform(x)
# Split in test and train data
X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(x, y, test_size=0.2)
total_len = X_train.shape[0]
# Parameters
learning_rate = 0.001
training_epochs = 500
batch_size = 5
display_step = 1
# Network Parameters
n_hidden_1 = 50
n_input = X_train.shape[1]
n_classes = 1
# tf Graph input
x = tf.placeholder("float", [None, 3])
y = tf.placeholder("float", [None])
# Create model
def multilayer_perceptron(x, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Output layer with linear activation
out_layer = tf.matmul(layer_1, weights['out']) + biases['out']
return out_layer
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes], 0, 0.1))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_classes], 0, 0.1))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases)
#reshape(pred, [-1])
tf.shape(pred)
tf.shape(y)
print("Prediction matrix:", pred)
print("Output matrix:", y)
# Define loss and optimizer
cost = tf.reduce_mean(tf.square(pred-y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Launch the graph
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(total_len/batch_size)
print(total_batch)
# Loop over all batches
for i in range(total_batch-1):
batch_x = X_train[i*batch_size:(i+1)*batch_size]
batch_y = Y_train[i*batch_size:(i+1)*batch_size]
# Run optimization op (backprop) and cost op (to get loss value)
_, c, p = sess.run([optimizer, cost, pred], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# sample prediction
label_value = batch_y
estimate = p
err = label_value-estimate
print ("num batch:", total_batch)
# Display logs per epoch step
if epoch % display_step == 0:
print ("Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.9f}".format(avg_cost))
print ("[*]----------------------------")
for i in xrange(5):
print ("label value:", label_value[i], \
"estimated value:", estimate[i])
print ("[*]============================")
print ("Optimization Finished!")
# Test model
correct_prediction = tf.equal(tf.argmax(pred), tf.argmax(y))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Accuracy:", accuracy.eval({x: X_test, y: Y_test}))
and result looks like that: (label value = expected result)
result

Does anyone know how to allocate more memory for the graph? Tensorflow: "ValueError: GraphDef cannot be larger than 2GB."

I am learning how to do transfer learning from an already trained AlexNet on images. I am striping off the last fully connected layer in AlexNet and creating my own layer with the number of classes in my traffic signs labels. I don't want to retrain the layers previous to this new layer only the new layer itself.
I am having trouble finding the bug that is causing this error. I have searched the web for solutions, but I don't believe any of them apply.
Please take a moment and see if you can find where I am creating the extra nodes that are causing the Graph to grow. Thanks!
import pickle
import tensorflow as tf
from sklearn.model_selection import train_test_split
from alexnet import AlexNet
import os
import numpy as np
# TODO: Load traffic signs data.
# Load pickled data
# Where training and testing data is saved.
training_file = os.getcwd()+"/train.p"
print(training_file)
with open(training_file, mode='rb') as f:
train = pickle.load(f)
X_train, y_train = train['features'], train['labels']
print('Train data shape = ', X_train.shape)
X_train_original = np.copy(X_train)
# TODO: Split data into training and validation sets.
X_train, X_valid, y_train, y_valid = train_test_split(X_train,
y_train,
train_size=.8,
random_state=42)
# TODO: Define placeholders and resize operation.
nb_classes = 43
x = tf.placeholder(tf.float32, (None, 32, 32, 3), name='x')
y = tf.placeholder(tf.int32, (None), name='y')
one_hot_y = tf.one_hot(y, 43, name='one_hot_y')
resized = tf.image.resize_images(x, (227, 227))
print('Resized data shape = ', resized.shape)
# TODO: pass placeholder as first argument to `AlexNet`.
fc7 = AlexNet(resized, feature_extract=True)
# NOTE: `tf.stop_gradient` prevents the gradient from flowing backwards
# past this point, keeping the weights before and up to `fc7` frozen.
# This also makes training faster, less work to do!
fc7 = tf.stop_gradient(fc7)
# TODO: Add the final layer for traffic sign classification.
shape = (fc7.get_shape().as_list()[-1], nb_classes) # use this shape for the weight matrix
#fc8W = np.random.normal(0, .15, size=(shape[0], shape[1])).astype(np.float32)
#print(fc8W)
#fc8b = np.random.normal(0, .15, size=(shape[1])).astype(np.float32)
#print(fc8b)
mu = 0
sigma = 0.05
fc8W = tf.Variable(tf.truncated_normal(shape, stddev=1e-2))
fc8b = tf.Variable(tf.zeros(nb_classes))
#logits = tf.matmul(fc7, fc8W) + fc8b; print(logits)
logits = tf.nn.xw_plus_b(fc7, fc8W, fc8b)
#probs = tf.nn.softmax(logits)
# TODO: Define loss, training, accuracy operations.
# HINT: Look back at your traffic signs project solution, you may
# be able to reuse some the code.
rate = 0.001
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=one_hot_y)
loss_operation = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate = rate)
training_operation = optimizer.minimize(loss_operation)
# TODO: Train and evaluate the feature extraction model.
#Shuffle data
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)
#Epochs for training and batch sizes defined.
EPOCHS = 10
BATCH_SIZE = 128
### Evaluation function.
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1))
accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#saver = tf.train.Saver()
def evaluate(X_data, y_data):
num_examples = len(X_data)
total_accuracy = 0
sess = tf.get_default_session()
for offset in range(0, num_examples, BATCH_SIZE):
batch_x, batch_y = X_data[offset:offset+BATCH_SIZE], y_data[offset:offset+BATCH_SIZE]
accuracy = sess.run(accuracy_operation, feed_dict={x: batch_x, y: batch_y, keep_prob: 1})
total_accuracy += (accuracy * len(batch_x))
return total_accuracy / num_examples
### Training function.
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
num_examples = len(X_train)
print("Training...")
print()
for i in range(EPOCHS):
X_train, y_train = shuffle(X_train, y_train)
for offset in range(0, num_examples, BATCH_SIZE):
end = offset + BATCH_SIZE
batch_x, batch_y = X_train[offset:end], y_train[offset:end]
sess.run(training_operation, feed_dict={x: batch_x, y: batch_y})
train_accuracy = evaluate(X_train, y_train)
validation_accuracy = evaluate(X_valid, y_valid)
print("EPOCH {} ...".format(i+1))
print('Training Accuracy = {:.3f}'.format(train_accuracy))
print("Validation Accuracy = {:.3f}".format(validation_accuracy))
print()

Feeding timeseries data into Tensorflow for LSTM classifier training

I have a dataframe of shape (38307, 26) with timestamp as index:
I'm trying to implement a LSTM classifier but I'm struggling to feed it into the DataFlow
The final arrays I'm trying to feed are of shape '(X_train = (38307, 25), y_train = (38307, 2))'
I have added the code in case
# Parametres
learning_rate = 0.001
training_epochs = 100
batch_size = 128
display_step = 10
# Network Parameters
n_input = 25 # features= 25
n_steps = 28 # timesteps
n_hidden = 128 # hidden layer num of features
n_classes = 2 # Binary classification
# TF Graph input
x = tf.placeholder("float32", [None, n_steps, n_input])
y = tf.placeholder("float32", [None, n_classes])
# TF Weights
weights = {
'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
'out': tf.Variable(tf.random_normal([n_classes]))
}
pred = RNN(x, weights, biases)
# Initialize the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
for epoch in range(training_epochs):
avg_cost = 0
total_batch = int(len(X_train)/batch_size)
X_batches = np.array_split(X_train, total_batch)
Y_batches = np.array_split(y_train, total_batch)
#Loop over all batches
for i in range(total_batch):
batch_x, batch_y = X_batches[i], Y_batches[i]
# batch_y.shape = (batch_y.shape[0]), 1)
# Run optimization op (backprop) and cost op(to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
#Display logs per epoch step
if epoch % display_step == 0:
print(("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost)))
print('Optimization finished')
# Store session for analysis with TensorBoard
writer = tf.summary.FileWriter("/tmp/test", sess.graph)
#Test model
print("Accuracy:", accuracy.eval({x: X_test, y: y_test}))
global result
result = tf.argmax(pred, 1).eval({x: X_test, y: y_test})
EDIT the RNN function:
def RNN(x, weights, biases):
# Prepare data shape to match 'rnn' function requirements
# Current data input shape: (batch_size, n_steps, n_input)
# Required Shape: 'n_steps' tensors list of shape (batch size, n_input)
# Permuting batch_size and n_steps
x = tf.transpose(x, [1, 0, 2])
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, n_input])
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(0, n_steps, x)
# x = tf.split(x, n_steps, 0) # Syntax change this version
# LSTM tensorflow using rnn from tensorflow.contrib
lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
# Get LSTM cell output
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
# Linear activation, using rnn inner loop last output
return tf.matmul(outputs[-1], weights['out']) + biases['out']
Unfortunately, the most important part of your code, is hidden in the RNN function.
Some tips to help you out: I guess you are trying to build a dynamic RNN... (is that correct? ) In that case, a common mistake I see is that people confuse the time major and batch major setting of these RNNs. In other words, is you input data [batch,time,variables], or [time,batch,variables].
More about this can be found here: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.dynamic_rnn.md

Neural network with a CSV file in TensorFlow

I am using two tutorials to figure out how to take a CVS file of format:
feature1,feature2....feature20,label
feature1,feature2....feature20,label
...
and train a neural network on it. What I do in the code below is read in the CVS file and group 100 lines at a time into batches: x_batch and y_batch. Next, i try to have the NN learn in batches. However, I get the following error:
"ValueError: Cannot feed value of shape (99,) for Tensor 'Placeholder_1:0', which has shape '(?, 4)'"
I am wondering what i am doing wrong and what another approach might be.
import tensorflow as tf
filename_queue = tf.train.string_input_producer(["VOL_TRAIN.csv"])
line_reader = tf.TextLineReader(skip_header_lines=1)
_, csv_row = line_reader.read(filename_queue)
# Type information and column names based on the decoded CSV.
[[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[""]]
record_defaults = [[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0]]
in1,in2,in3,in4,in5,in6,in7,in8,in9,in10,in11,in12,in13,in14,in15,in16,in17,in18,in19,in20,out = \
tf.decode_csv(csv_row, record_defaults=record_defaults)
# Turn the features back into a tensor.
features = tf.pack([in1,in2,in3,in4,in5,in6,in7,in8,in9,in10,in11,in12,in13,in14,in15,in16,in17,in18,in19,in20])
# Parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100
display_step = 1
num_examples= 33500
# Network Parameters
n_hidden_1 = 256 # 1st layer number of features
n_hidden_2 = 256 # 2nd layer number of features
n_input = 20 # MNIST data input (img shape: 28*28)
n_classes = 4 # MNIST total classes (0-9 digits)
# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])
# Create model
def multilayer_perceptron(x, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Hidden layer with RELU activation
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
# Output layer with linear activation
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
with tf.Session() as sess:
#tf.initialize_all_variables().run()
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(num_examples/batch_size)
# Loop over all batches
for i in range(total_batch):
batch_x = []
batch_y = []
for iteration in range(1, batch_size):
example, label = sess.run([features, out])
batch_x.append(example)
batch_y.append(label)
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# Display logs per epoch step
if epoch % display_step == 0:
print ("Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.9f}".format(avg_cost))
print ("Optimization Finished!")
coord.request_stop()
coord.join(threads)
Your placeholder y specifies you input an array of unknown length, with arrays of length "n_classes" (which is 4). In your feed_dict you give the array batch_y, which is an array of length 99 (your batch_size) with numbers.
What you want to do is change your batch_y variable to have one-hot vectors as input. Please let me know if this works!

Tensorflow: Numpy equivalent of tf batches and reshape

I am trying to classify some images using Tensorflow using the LSTM method in image classification with one-hot encoding output and a softmax classifier at the last LSTM output. My dataset is CSV and had to research a lot in Numpy and Tensorflow on how to do some modifications. I'm still getting an error:
AttributeError: 'numpy.ndarray' object has no attribute 'next_batch'
which if you will see, i can't use next_batch(batch_size) along with my dataset and also the next tf.reshape needs to be replaced with its Numpy equivalent.
My question: How should I correct these 2 issues?
'''
Tensorflow LSTM classification of 16x30 images.
'''
from __future__ import print_function
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
import numpy as np
from numpy import genfromtxt
from sklearn.cross_validation import train_test_split
import pandas as pd
'''
a Tensorflow LSTM that will sequentially input several lines from each single image
i.e. The Tensorflow graph will take a flat (1,480) features image as it was done in Multi-layer
perceptron MNIST Tensorflow tutorial, but then reshape it in a sequential manner with 16 features each and 30 time_steps.
'''
blaine = genfromtxt('./Desktop/Blaine_CSV_lstm.csv',delimiter=',') # CSV transform to array
target = [row[0] for row in blaine] # 1st column in CSV as the targets
data = blaine[:, 1:480] #flat feature vectors
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.05, random_state=42)
f=open('cs-training.csv','w') #1st split for training
for i,j in enumerate(X_train):
k=np.append(np.array(y_train[i]),j )
f.write(",".join([str(s) for s in k]) + '\n')
f.close()
f=open('cs-testing.csv','w') #2nd split for test
for i,j in enumerate(X_test):
k=np.append(np.array(y_test[i]),j )
f.write(",".join([str(s) for s in k]) + '\n')
f.close()
ss = pd.Series(y_train) #indexing series needed for later Pandas Dummies one-hot vectors
gg = pd.Series(y_test)
new_data = genfromtxt('cs-training.csv',delimiter=',') # Training data
new_test_data = genfromtxt('cs-testing.csv',delimiter=',') # Test data
x_train=np.array([ i[1::] for i in new_data])
y_train_onehot = pd.get_dummies(ss)
x_test=np.array([ i[1::] for i in new_test_data])
y_test_onehot = pd.get_dummies(gg)
# General Parameters
learning_rate = 0.001
training_iters = 100000
batch_size = 128
display_step = 10
# Tensorflow LSTM Network Parameters
n_input = 16 # MNIST data input (img shape: 28*28)
n_steps = 30 # timesteps
n_hidden = 128 # hidden layer num of features
n_classes = 20 # MNIST total classes (0-9 digits)
# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_classes])
# Define weights
weights = {
'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
'out': tf.Variable(tf.random_normal([n_classes]))
}
def RNN(x, weights, biases):
# Prepare data shape to match `rnn` function requirements
# Current data input shape: (batch_size, n_steps, n_input)
# Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
# Permuting batch_size and n_steps
x = tf.transpose(x, [1, 0, 2])
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, n_input])
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(0, n_steps, x)
# Define a lstm cell with tensorflow
lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
# Get lstm cell output
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
# Linear activation, using rnn inner loop last output
return tf.matmul(outputs[-1], weights['out']) + biases['out']
pred = RNN(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
x_train, y_train = new_data.next_batch(batch_size)
# Reshape data to get 30 seq of 16 elements
x_train = x_train.reshape((batch_size, n_steps, n_input))
# Run optimization op (backprop)
sess.run(optimizer, feed_dict={x: x_train, y: y_train})
if step % display_step == 0:
# Calculate batch accuracy
acc = sess.run(accuracy, feed_dict={x: x_train, y: y_train})
# Calculate batch loss
loss = sess.run(cost, feed_dict={x: x_train, y: y_train})
print("Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc))
step += 1
print("Optimization Finished!")
You can make your own function called next batch that given a numpy array and indices will return that slice of the numpy array for you.
def nextbatch(x,i,j):
return x[i:j,...]
You could also pass in what step you are in and maybe do modulo but this is the basic that will get it to work.
As for the resphape use:
x_train = np.reshape(x_train,(batch_size, n_steps, n_input))

Categories

Resources