I am trying to classify some images using Tensorflow using the LSTM method in image classification with one-hot encoding output and a softmax classifier at the last LSTM output. My dataset is CSV and had to research a lot in Numpy and Tensorflow on how to do some modifications. I'm still getting an error:
AttributeError: 'numpy.ndarray' object has no attribute 'next_batch'
which if you will see, i can't use next_batch(batch_size) along with my dataset and also the next tf.reshape needs to be replaced with its Numpy equivalent.
My question: How should I correct these 2 issues?
'''
Tensorflow LSTM classification of 16x30 images.
'''
from __future__ import print_function
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
import numpy as np
from numpy import genfromtxt
from sklearn.cross_validation import train_test_split
import pandas as pd
'''
a Tensorflow LSTM that will sequentially input several lines from each single image
i.e. The Tensorflow graph will take a flat (1,480) features image as it was done in Multi-layer
perceptron MNIST Tensorflow tutorial, but then reshape it in a sequential manner with 16 features each and 30 time_steps.
'''
blaine = genfromtxt('./Desktop/Blaine_CSV_lstm.csv',delimiter=',') # CSV transform to array
target = [row[0] for row in blaine] # 1st column in CSV as the targets
data = blaine[:, 1:480] #flat feature vectors
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.05, random_state=42)
f=open('cs-training.csv','w') #1st split for training
for i,j in enumerate(X_train):
k=np.append(np.array(y_train[i]),j )
f.write(",".join([str(s) for s in k]) + '\n')
f.close()
f=open('cs-testing.csv','w') #2nd split for test
for i,j in enumerate(X_test):
k=np.append(np.array(y_test[i]),j )
f.write(",".join([str(s) for s in k]) + '\n')
f.close()
ss = pd.Series(y_train) #indexing series needed for later Pandas Dummies one-hot vectors
gg = pd.Series(y_test)
new_data = genfromtxt('cs-training.csv',delimiter=',') # Training data
new_test_data = genfromtxt('cs-testing.csv',delimiter=',') # Test data
x_train=np.array([ i[1::] for i in new_data])
y_train_onehot = pd.get_dummies(ss)
x_test=np.array([ i[1::] for i in new_test_data])
y_test_onehot = pd.get_dummies(gg)
# General Parameters
learning_rate = 0.001
training_iters = 100000
batch_size = 128
display_step = 10
# Tensorflow LSTM Network Parameters
n_input = 16 # MNIST data input (img shape: 28*28)
n_steps = 30 # timesteps
n_hidden = 128 # hidden layer num of features
n_classes = 20 # MNIST total classes (0-9 digits)
# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_classes])
# Define weights
weights = {
'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
'out': tf.Variable(tf.random_normal([n_classes]))
}
def RNN(x, weights, biases):
# Prepare data shape to match `rnn` function requirements
# Current data input shape: (batch_size, n_steps, n_input)
# Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
# Permuting batch_size and n_steps
x = tf.transpose(x, [1, 0, 2])
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, n_input])
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(0, n_steps, x)
# Define a lstm cell with tensorflow
lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
# Get lstm cell output
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
# Linear activation, using rnn inner loop last output
return tf.matmul(outputs[-1], weights['out']) + biases['out']
pred = RNN(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
x_train, y_train = new_data.next_batch(batch_size)
# Reshape data to get 30 seq of 16 elements
x_train = x_train.reshape((batch_size, n_steps, n_input))
# Run optimization op (backprop)
sess.run(optimizer, feed_dict={x: x_train, y: y_train})
if step % display_step == 0:
# Calculate batch accuracy
acc = sess.run(accuracy, feed_dict={x: x_train, y: y_train})
# Calculate batch loss
loss = sess.run(cost, feed_dict={x: x_train, y: y_train})
print("Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc))
step += 1
print("Optimization Finished!")
You can make your own function called next batch that given a numpy array and indices will return that slice of the numpy array for you.
def nextbatch(x,i,j):
return x[i:j,...]
You could also pass in what step you are in and maybe do modulo but this is the basic that will get it to work.
As for the resphape use:
x_train = np.reshape(x_train,(batch_size, n_steps, n_input))
Related
I have trained model in tensorflow as follows :
batch_size = 128
graph = tf.Graph()
with graph.as_default():
# Input data. For the training data, we use a placeholder that will be fed
# at run time with a training minibatch.
tf_train_dataset = tf.placeholder(tf.float32,
shape=(batch_size, image_size * image_size))
tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
tf_valid_dataset = tf.constant(valid_dataset)
tf_test_dataset = tf.constant(test_dataset)
# Variables.
weights = tf.Variable(
tf.truncated_normal([image_size * image_size, num_labels]))
biases = tf.Variable(tf.zeros([num_labels]))
# Training computation.
logits = tf.matmul(tf_train_dataset, weights) + biases
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
# Optimizer.
optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
# Predictions for the training, validation, and test data.
train_prediction = tf.nn.softmax(logits)
valid_prediction = tf.nn.softmax(
tf.matmul(tf_valid_dataset, weights) + biases)
test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)
num_steps = 3001
with tf.Session(graph=graph) as session:
tf.global_variables_initializer().run()
print("Initialized")
for step in range(num_steps):
# Pick an offset within the training data, which has been randomized.
# Note: we could use better randomization across epochs.
offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
# Generate a minibatch.
batch_data = train_dataset[offset:(offset + batch_size), :]
batch_labels = train_labels[offset:(offset + batch_size), :]
# Prepare a dictionary telling the session where to feed the minibatch.
# The key of the dictionary is the placeholder node of the graph to be fed,
# and the value is the numpy array to feed to it.
feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
_, l, predictions = session.run(
[optimizer, loss, train_prediction], feed_dict=feed_dict)
if (step % 500 == 0):
print("Minibatch loss at step %d: %f" % (step, l))
print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
print("Validation accuracy: %.1f%%" % accuracy(
valid_prediction.eval(), valid_labels))
print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))
Now I want to use a single image as input which is going to be reshaped to same format as my training image and get prediction for 10 classes as probabilities. This question has been asked multiple times and I have hard time understating their solutions, one of the best answers is to use this code :
feed_dict = {x: [your_image]}
classification = tf.run(y, feed_dict)
print classification
What is x and y equivalent in my code? lets assume I pick one of images from test dataset to predict as:
img = train_dataset[678]
And I am expecting an array with 10 probabilities.
Let me answer to my own question:
First these lines of code must be changed, we have to use None instead of const batch size so we later can feed single image as input:
tf_train_dataset = tf.placeholder(tf.float32, shape=(None, image_size * image_size),name="train_to_restore")
tf_train_labels = tf.placeholder(tf.float32, shape=(None, num_labels))
and inside the session I used this code to feed new image to the model:
from skimage import io
img = io.imread('newimage.png', as_grey=True)
nx, ny = img.shape
img_flat = img.reshape(nx * ny)
IMG = np.reshape(img,(1,784))
answer = session.run(train_prediction, feed_dict={tf_train_dataset: IMG})
print(answer)
My images are 28*28 in training set so make sure your new image is also 28*28 the you have to flatten it to 1*784 and give it to your model and receive probabilities for prediction
You could also use tf.keras.utils.load_img. This allows you to import a single image and then have your model make a prediction on it.
This link will show you the arguments to pass in and what they mean:
https://www.tensorflow.org/api_docs/python/tf/keras/utils/load_img
Here's an example of it being used. Really all you have to do is change the file path from the tutorial:
https://www.tensorflow.org/tutorials/images/classification#predict_on_new_data
I'm comparing the performance of Tensorflow with sklearn on two datasets:
A toy dataset in sklearn
MNIST dataset
Here is my code (Python):
from __future__ import print_function
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
import tensorflow as tf
from sklearn.datasets import load_digits
import numpy as np
# digits = load_digits()
# data = digits.data
# labels = digits.target
# convert to binary labels
# y = np.zeros((labels.shape[0],10))
# y[np.arange(labels.shape[0]),labels] = 1
x_train = mnist.train.images
y_train = mnist.train.labels
x_test = mnist.test.images
y_test = mnist.test.labels
n_train = mnist.train.images.shape[0]
# import pdb;pdb.set_trace()
# Parameters
learning_rate = 1e-3
lambda_val = 1e-5
training_epochs = 30
batch_size = 200
display_step = 1
# Network Parameters
n_hidden_1 = 300 # 1st layer number of neurons
n_input = x_train.shape[1] # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Create model
def multilayer_perceptron(x):
# Hidden fully connected layer with 256 neurons
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
# Activation
layer_1_relu = tf.nn.relu(layer_1)
# Output fully connected layer with a neuron for each class
out_layer = tf.matmul(layer_1_relu, weights['out']) + biases['out']
return out_layer
# Construct model
logits = multilayer_perceptron(X)
# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y)) + lambda_val*tf.nn.l2_loss(weights['h1']) + lambda_val*tf.nn.l2_loss(weights['out'])
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
# Test model
pred = tf.nn.softmax(logits) # Apply softmax to logits
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# Initializing the variables
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(n_train/batch_size)
# Loop over all batches
ptr = 0
for i in range(total_batch):
next_ptr = ptr + batch_size
if next_ptr > len(x_train):
next_ptr = len(x_train)
batch_x, batch_y = x_train[ptr:next_ptr],y_train[ptr:next_ptr]
ptr += batch_size
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
Y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1), "cost={:.9f}".format(avg_cost))
print("Optimization Finished!")
print("Accuracy on training set: ", accuracy.eval({X:x_train,Y:y_train}))
print("Accuracy on testing set:", accuracy.eval({X: x_test, Y: y_test}))
print("Experimenting sklearn...")
# now experiment with sklearn
from sklearn.datasets import load_digits
import numpy as np
from sklearn.neural_network import MLPClassifier
import time
# use MLP
t_start = time.time()
print('fitting MLP...')
clf = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(300,),max_iter=training_epochs)
clf.fit(x_train,y_train)
print('fitted MLP in {:.2f} seconds'.format(time.time() - t_start))
print('predicting...')
labels_predicted = clf.predict(x_test)
print('accuracy: {:.2f} %'.format(np.mean(np.argmax(y_test,axis=1) == np.argmax(labels_predicted,axis=1)) * 100))
The code is adapted from a github repository. For this testing, I'm using a traditional neural network (MLP) with only one hidden layer of size 300.
Following is the result for the both datasets:
sklearn digits: ~83% (tensorflow), ~90% (sklearn)
MNIST: ~94% (tensorflow), ~97% (sklearn)
I'm using the same model for both libraries. All the parameters (number of hidden layers, number of hidden units, learning_rate, l2 regularization constant, number of training epochs, batch size) and optimization algorithms are the same (Adam optimizer, beta parameters for Adam optimizer, no momentum, etc).
I wonder if sklearn has done a magic implementation over tensorflow? Can anyone help answer?
Thank you very much.
I've modified tensor flow example to fit on my data, given here: data
But my neural network is not learning at all, I tried to use different no. of hidden layers, learning rate and optimization functions, but it didn't help.My code is given below:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import learn
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn import datasets, linear_model
from sklearn import cross_validation
from sklearn import preprocessing
import numpy as np
filename_queue = tf.train.string_input_producer(["file0.csv"])
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
# Default values, in case of empty columns. Also specifies the type of the
# decoded result.
record_defaults = [[0], [0], [0], [0]]
col1, col2, col3, col4 = tf.decode_csv(
value, record_defaults=record_defaults)
features = tf.stack([col1, col2, col3])
with tf.Session() as sess:
# Start populating the filename queue.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
x = np.zeros(shape=(1813,3))
y = np.zeros(shape=(1813))
for i in range(1813):
# Retrieve a single instance:
x1, y1 = sess.run([features, col4])
x[i] = x1
y[i] = y1
coord.request_stop()
coord.join(threads)
#standard_scaler = preprocessing.StandardScaler()
#x = standard_scaler.fit_transform(x)
# Split in test and train data
X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(x, y, test_size=0.2)
total_len = X_train.shape[0]
# Parameters
learning_rate = 0.001
training_epochs = 500
batch_size = 5
display_step = 1
# Network Parameters
n_hidden_1 = 50
n_input = X_train.shape[1]
n_classes = 1
# tf Graph input
x = tf.placeholder("float", [None, 3])
y = tf.placeholder("float", [None])
# Create model
def multilayer_perceptron(x, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Output layer with linear activation
out_layer = tf.matmul(layer_1, weights['out']) + biases['out']
return out_layer
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes], 0, 0.1))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_classes], 0, 0.1))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases)
#reshape(pred, [-1])
tf.shape(pred)
tf.shape(y)
print("Prediction matrix:", pred)
print("Output matrix:", y)
# Define loss and optimizer
cost = tf.reduce_mean(tf.square(pred-y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Launch the graph
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(total_len/batch_size)
print(total_batch)
# Loop over all batches
for i in range(total_batch-1):
batch_x = X_train[i*batch_size:(i+1)*batch_size]
batch_y = Y_train[i*batch_size:(i+1)*batch_size]
# Run optimization op (backprop) and cost op (to get loss value)
_, c, p = sess.run([optimizer, cost, pred], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# sample prediction
label_value = batch_y
estimate = p
err = label_value-estimate
print ("num batch:", total_batch)
# Display logs per epoch step
if epoch % display_step == 0:
print ("Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.9f}".format(avg_cost))
print ("[*]----------------------------")
for i in xrange(5):
print ("label value:", label_value[i], \
"estimated value:", estimate[i])
print ("[*]============================")
print ("Optimization Finished!")
# Test model
correct_prediction = tf.equal(tf.argmax(pred), tf.argmax(y))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Accuracy:", accuracy.eval({x: X_test, y: Y_test}))
and result looks like that: (label value = expected result)
result
I am learning how to do transfer learning from an already trained AlexNet on images. I am striping off the last fully connected layer in AlexNet and creating my own layer with the number of classes in my traffic signs labels. I don't want to retrain the layers previous to this new layer only the new layer itself.
I am having trouble finding the bug that is causing this error. I have searched the web for solutions, but I don't believe any of them apply.
Please take a moment and see if you can find where I am creating the extra nodes that are causing the Graph to grow. Thanks!
import pickle
import tensorflow as tf
from sklearn.model_selection import train_test_split
from alexnet import AlexNet
import os
import numpy as np
# TODO: Load traffic signs data.
# Load pickled data
# Where training and testing data is saved.
training_file = os.getcwd()+"/train.p"
print(training_file)
with open(training_file, mode='rb') as f:
train = pickle.load(f)
X_train, y_train = train['features'], train['labels']
print('Train data shape = ', X_train.shape)
X_train_original = np.copy(X_train)
# TODO: Split data into training and validation sets.
X_train, X_valid, y_train, y_valid = train_test_split(X_train,
y_train,
train_size=.8,
random_state=42)
# TODO: Define placeholders and resize operation.
nb_classes = 43
x = tf.placeholder(tf.float32, (None, 32, 32, 3), name='x')
y = tf.placeholder(tf.int32, (None), name='y')
one_hot_y = tf.one_hot(y, 43, name='one_hot_y')
resized = tf.image.resize_images(x, (227, 227))
print('Resized data shape = ', resized.shape)
# TODO: pass placeholder as first argument to `AlexNet`.
fc7 = AlexNet(resized, feature_extract=True)
# NOTE: `tf.stop_gradient` prevents the gradient from flowing backwards
# past this point, keeping the weights before and up to `fc7` frozen.
# This also makes training faster, less work to do!
fc7 = tf.stop_gradient(fc7)
# TODO: Add the final layer for traffic sign classification.
shape = (fc7.get_shape().as_list()[-1], nb_classes) # use this shape for the weight matrix
#fc8W = np.random.normal(0, .15, size=(shape[0], shape[1])).astype(np.float32)
#print(fc8W)
#fc8b = np.random.normal(0, .15, size=(shape[1])).astype(np.float32)
#print(fc8b)
mu = 0
sigma = 0.05
fc8W = tf.Variable(tf.truncated_normal(shape, stddev=1e-2))
fc8b = tf.Variable(tf.zeros(nb_classes))
#logits = tf.matmul(fc7, fc8W) + fc8b; print(logits)
logits = tf.nn.xw_plus_b(fc7, fc8W, fc8b)
#probs = tf.nn.softmax(logits)
# TODO: Define loss, training, accuracy operations.
# HINT: Look back at your traffic signs project solution, you may
# be able to reuse some the code.
rate = 0.001
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=one_hot_y)
loss_operation = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate = rate)
training_operation = optimizer.minimize(loss_operation)
# TODO: Train and evaluate the feature extraction model.
#Shuffle data
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)
#Epochs for training and batch sizes defined.
EPOCHS = 10
BATCH_SIZE = 128
### Evaluation function.
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1))
accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#saver = tf.train.Saver()
def evaluate(X_data, y_data):
num_examples = len(X_data)
total_accuracy = 0
sess = tf.get_default_session()
for offset in range(0, num_examples, BATCH_SIZE):
batch_x, batch_y = X_data[offset:offset+BATCH_SIZE], y_data[offset:offset+BATCH_SIZE]
accuracy = sess.run(accuracy_operation, feed_dict={x: batch_x, y: batch_y, keep_prob: 1})
total_accuracy += (accuracy * len(batch_x))
return total_accuracy / num_examples
### Training function.
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
num_examples = len(X_train)
print("Training...")
print()
for i in range(EPOCHS):
X_train, y_train = shuffle(X_train, y_train)
for offset in range(0, num_examples, BATCH_SIZE):
end = offset + BATCH_SIZE
batch_x, batch_y = X_train[offset:end], y_train[offset:end]
sess.run(training_operation, feed_dict={x: batch_x, y: batch_y})
train_accuracy = evaluate(X_train, y_train)
validation_accuracy = evaluate(X_valid, y_valid)
print("EPOCH {} ...".format(i+1))
print('Training Accuracy = {:.3f}'.format(train_accuracy))
print("Validation Accuracy = {:.3f}".format(validation_accuracy))
print()
I have a dataframe of shape (38307, 26) with timestamp as index:
I'm trying to implement a LSTM classifier but I'm struggling to feed it into the DataFlow
The final arrays I'm trying to feed are of shape '(X_train = (38307, 25), y_train = (38307, 2))'
I have added the code in case
# Parametres
learning_rate = 0.001
training_epochs = 100
batch_size = 128
display_step = 10
# Network Parameters
n_input = 25 # features= 25
n_steps = 28 # timesteps
n_hidden = 128 # hidden layer num of features
n_classes = 2 # Binary classification
# TF Graph input
x = tf.placeholder("float32", [None, n_steps, n_input])
y = tf.placeholder("float32", [None, n_classes])
# TF Weights
weights = {
'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
'out': tf.Variable(tf.random_normal([n_classes]))
}
pred = RNN(x, weights, biases)
# Initialize the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
for epoch in range(training_epochs):
avg_cost = 0
total_batch = int(len(X_train)/batch_size)
X_batches = np.array_split(X_train, total_batch)
Y_batches = np.array_split(y_train, total_batch)
#Loop over all batches
for i in range(total_batch):
batch_x, batch_y = X_batches[i], Y_batches[i]
# batch_y.shape = (batch_y.shape[0]), 1)
# Run optimization op (backprop) and cost op(to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
#Display logs per epoch step
if epoch % display_step == 0:
print(("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost)))
print('Optimization finished')
# Store session for analysis with TensorBoard
writer = tf.summary.FileWriter("/tmp/test", sess.graph)
#Test model
print("Accuracy:", accuracy.eval({x: X_test, y: y_test}))
global result
result = tf.argmax(pred, 1).eval({x: X_test, y: y_test})
EDIT the RNN function:
def RNN(x, weights, biases):
# Prepare data shape to match 'rnn' function requirements
# Current data input shape: (batch_size, n_steps, n_input)
# Required Shape: 'n_steps' tensors list of shape (batch size, n_input)
# Permuting batch_size and n_steps
x = tf.transpose(x, [1, 0, 2])
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, n_input])
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(0, n_steps, x)
# x = tf.split(x, n_steps, 0) # Syntax change this version
# LSTM tensorflow using rnn from tensorflow.contrib
lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
# Get LSTM cell output
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
# Linear activation, using rnn inner loop last output
return tf.matmul(outputs[-1], weights['out']) + biases['out']
Unfortunately, the most important part of your code, is hidden in the RNN function.
Some tips to help you out: I guess you are trying to build a dynamic RNN... (is that correct? ) In that case, a common mistake I see is that people confuse the time major and batch major setting of these RNNs. In other words, is you input data [batch,time,variables], or [time,batch,variables].
More about this can be found here: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.dynamic_rnn.md