Neural network for regression not learning in tensorflow - python

I've modified tensor flow example to fit on my data, given here: data
But my neural network is not learning at all, I tried to use different no. of hidden layers, learning rate and optimization functions, but it didn't help.My code is given below:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import learn
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn import datasets, linear_model
from sklearn import cross_validation
from sklearn import preprocessing
import numpy as np
filename_queue = tf.train.string_input_producer(["file0.csv"])
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
# Default values, in case of empty columns. Also specifies the type of the
# decoded result.
record_defaults = [[0], [0], [0], [0]]
col1, col2, col3, col4 = tf.decode_csv(
value, record_defaults=record_defaults)
features = tf.stack([col1, col2, col3])
with tf.Session() as sess:
# Start populating the filename queue.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
x = np.zeros(shape=(1813,3))
y = np.zeros(shape=(1813))
for i in range(1813):
# Retrieve a single instance:
x1, y1 = sess.run([features, col4])
x[i] = x1
y[i] = y1
coord.request_stop()
coord.join(threads)
#standard_scaler = preprocessing.StandardScaler()
#x = standard_scaler.fit_transform(x)
# Split in test and train data
X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(x, y, test_size=0.2)
total_len = X_train.shape[0]
# Parameters
learning_rate = 0.001
training_epochs = 500
batch_size = 5
display_step = 1
# Network Parameters
n_hidden_1 = 50
n_input = X_train.shape[1]
n_classes = 1
# tf Graph input
x = tf.placeholder("float", [None, 3])
y = tf.placeholder("float", [None])
# Create model
def multilayer_perceptron(x, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Output layer with linear activation
out_layer = tf.matmul(layer_1, weights['out']) + biases['out']
return out_layer
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes], 0, 0.1))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_classes], 0, 0.1))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases)
#reshape(pred, [-1])
tf.shape(pred)
tf.shape(y)
print("Prediction matrix:", pred)
print("Output matrix:", y)
# Define loss and optimizer
cost = tf.reduce_mean(tf.square(pred-y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Launch the graph
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(total_len/batch_size)
print(total_batch)
# Loop over all batches
for i in range(total_batch-1):
batch_x = X_train[i*batch_size:(i+1)*batch_size]
batch_y = Y_train[i*batch_size:(i+1)*batch_size]
# Run optimization op (backprop) and cost op (to get loss value)
_, c, p = sess.run([optimizer, cost, pred], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# sample prediction
label_value = batch_y
estimate = p
err = label_value-estimate
print ("num batch:", total_batch)
# Display logs per epoch step
if epoch % display_step == 0:
print ("Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.9f}".format(avg_cost))
print ("[*]----------------------------")
for i in xrange(5):
print ("label value:", label_value[i], \
"estimated value:", estimate[i])
print ("[*]============================")
print ("Optimization Finished!")
# Test model
correct_prediction = tf.equal(tf.argmax(pred), tf.argmax(y))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Accuracy:", accuracy.eval({x: X_test, y: Y_test}))
and result looks like that: (label value = expected result)
result

Related

tensorflow GradientDescentOptimizer not updating variables?

I'm new to machine learning. I started with the simplest example of classification mnist handwritten images with softmax and gradient descent. By referencing some other examples, I came up with my own Logistic regression below:
import tensorflow as tf
import numpy as np
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = np.float32(x_train / 255.0)
x_test = np.float32(x_test / 255.0)
X = tf.placeholder(tf.float32, [None, 28, 28])
Y = tf.placeholder(tf.uint8, [100])
XX = tf.reshape(X, [-1, 784])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
def err(x, y):
predictions = tf.matmul(x, W) + b
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf.reshape(y, [-1, 1]), logits=predictions))
# value = tf.reduce_mean(y * tf.log(predictions))
# loss = -tf.reduce_mean(tf.one_hot(y, 10) * tf.log(predictions)) * 100.
return loss
# cost = err(np.reshape(x_train[:100], (-1, 784)), y_train[:100])
cost = err(tf.reshape(X, (-1, 784)), Y)
optimizer = tf.train.GradientDescentOptimizer(0.005).minimize(cost)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
# temp = sess.run(tf.matmul(XX, W) + b, feed_dict={X: x_train[:100]})
temp = sess.run(cost, feed_dict={X: x_train[:100], Y: y_train[:100]})
print(temp)
# print(temp.dtype)
# print(type(temp))
for i in range(100):
sess.run(optimizer, feed_dict={X: x_train[i * 100: 100 * (i + 1)], Y: y_train[i * 100: 100 * (i + 1)]})
# sess.run(optimizer, feed_dict={X: x_train[: 100], Y: y_train[:100]})
temp = sess.run(cost, feed_dict={X: x_train[:100], Y: y_train[:100]})
print(temp)
sess.close()
I tried to run the optimizer some iterations, feeding data with train image data and labeles. In my understanding, during the optimizer run, the variables of 'W' and 'b' should be update so the model would produce different result before and after training. But with this code, the printed costs of the model before and after optimizer run were the same. What can be wrong to make this happen?
You are initializing the weights matrix W with zeros and as a result, all parameters receive the same gradient value at each weights update. For weights initialization use tf.truncated_normal(), tf.random_normal(), tf.contrib.layers.xavier_initializer() or something else, but not zeros.
This is a similar question.

Inferior performance of Tensorflow compared to sklearn

I'm comparing the performance of Tensorflow with sklearn on two datasets:
A toy dataset in sklearn
MNIST dataset
Here is my code (Python):
from __future__ import print_function
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
import tensorflow as tf
from sklearn.datasets import load_digits
import numpy as np
# digits = load_digits()
# data = digits.data
# labels = digits.target
# convert to binary labels
# y = np.zeros((labels.shape[0],10))
# y[np.arange(labels.shape[0]),labels] = 1
x_train = mnist.train.images
y_train = mnist.train.labels
x_test = mnist.test.images
y_test = mnist.test.labels
n_train = mnist.train.images.shape[0]
# import pdb;pdb.set_trace()
# Parameters
learning_rate = 1e-3
lambda_val = 1e-5
training_epochs = 30
batch_size = 200
display_step = 1
# Network Parameters
n_hidden_1 = 300 # 1st layer number of neurons
n_input = x_train.shape[1] # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Create model
def multilayer_perceptron(x):
# Hidden fully connected layer with 256 neurons
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
# Activation
layer_1_relu = tf.nn.relu(layer_1)
# Output fully connected layer with a neuron for each class
out_layer = tf.matmul(layer_1_relu, weights['out']) + biases['out']
return out_layer
# Construct model
logits = multilayer_perceptron(X)
# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y)) + lambda_val*tf.nn.l2_loss(weights['h1']) + lambda_val*tf.nn.l2_loss(weights['out'])
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
# Test model
pred = tf.nn.softmax(logits) # Apply softmax to logits
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# Initializing the variables
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(n_train/batch_size)
# Loop over all batches
ptr = 0
for i in range(total_batch):
next_ptr = ptr + batch_size
if next_ptr > len(x_train):
next_ptr = len(x_train)
batch_x, batch_y = x_train[ptr:next_ptr],y_train[ptr:next_ptr]
ptr += batch_size
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
Y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1), "cost={:.9f}".format(avg_cost))
print("Optimization Finished!")
print("Accuracy on training set: ", accuracy.eval({X:x_train,Y:y_train}))
print("Accuracy on testing set:", accuracy.eval({X: x_test, Y: y_test}))
print("Experimenting sklearn...")
# now experiment with sklearn
from sklearn.datasets import load_digits
import numpy as np
from sklearn.neural_network import MLPClassifier
import time
# use MLP
t_start = time.time()
print('fitting MLP...')
clf = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(300,),max_iter=training_epochs)
clf.fit(x_train,y_train)
print('fitted MLP in {:.2f} seconds'.format(time.time() - t_start))
print('predicting...')
labels_predicted = clf.predict(x_test)
print('accuracy: {:.2f} %'.format(np.mean(np.argmax(y_test,axis=1) == np.argmax(labels_predicted,axis=1)) * 100))
The code is adapted from a github repository. For this testing, I'm using a traditional neural network (MLP) with only one hidden layer of size 300.
Following is the result for the both datasets:
sklearn digits: ~83% (tensorflow), ~90% (sklearn)
MNIST: ~94% (tensorflow), ~97% (sklearn)
I'm using the same model for both libraries. All the parameters (number of hidden layers, number of hidden units, learning_rate, l2 regularization constant, number of training epochs, batch size) and optimization algorithms are the same (Adam optimizer, beta parameters for Adam optimizer, no momentum, etc).
I wonder if sklearn has done a magic implementation over tensorflow? Can anyone help answer?
Thank you very much.

Tensorflow streaming metrics are not working

I am trying to implement the algorithm here: https://arxiv.org/pdf/1702.02098.pdf
For some reason I always get 0.2 recall and 0 accuracy. Am I using the streaming metrics correctly? The documentation only has a pseudo code. Here is my code:
import tensorflow as tf
import numpy as np
from nltk.corpus import brown
from gensim.models import Word2Vec
from sklearn.preprocessing import LabelBinarizer
from tensorflow.contrib.metrics import streaming_accuracy, streaming_recall
data = brown.tagged_sents()
tags = set()
for sent in data:
for token, tag in sent:
tags.add(tag)
label_processor = LabelBinarizer()
label_processor.fit(list(tags))
embedding_dim = 100
word2vec = Word2Vec(brown.sents(), size=embedding_dim, min_count=1)
embedding = word2vec.wv
del word2vec # Saves RAM
test = 0.1
val = 0.1
data_length = len(data)
inds = np.random.permutation(np.arange(data_length))
test_inds = inds[:int(data_length*test)]
val_inds = inds[int(data_length*test):int(data_length*(val+test))]
train_inds = inds[int(data_length*(val+test)):]
val_x = []
val_y = []
for i in val_inds:
x = []
tags = []
for token, tag in data[i]:
x.append(embedding[token])
tags.append(tag)
x = np.array(x)
x = x.reshape(x.shape[0], 1, x.shape[1], 1)
y = np.array(label_processor.transform(tags))
val_x.append(x)
val_y.append(y)
val_x = np.concatenate(val_x, axis=0)
val_y = np.concatenate(val_y, axis=0)
test_x = []
test_y = []
for i in test_inds:
x = []
tags = []
for token, tag in data[i]:
x.append(embedding[token])
tags.append(tag)
x = np.array(x)
x = x.reshape(x.shape[0], 1, x.shape[1], 1)
y = np.array(label_processor.transform(tags))
test_x.append(x)
test_y.append(y)
test_x = np.concatenate(test_x, axis=0)
test_y = np.concatenate(test_y, axis=0)
learning_rate = 0.001
n_iter = 12000
display_step = 100
depth = 5
label_processor
n_classes = label_processor.classes_.shape[0]
dropout_prob = 0.50
x = tf.placeholder(tf.float32, [None, 1, embedding_dim, 1])
y = tf.placeholder(tf.float32, [None, n_classes])
dropout = tf.placeholder(tf.float32, [])
depth_tensor = tf.constant(depth, tf.float32)
def data_processor(data, train_inds, word2vec, label_processor, n_iter):
inds = np.random.randint(len(train_inds), size=(n_iter))
for i in inds:
x = []
tags = []
for token, tag in data[train_inds[i]]:
x.append(word2vec[token])
tags.append(tag)
x = np.array(x)
x = x.reshape(x.shape[0], 1, x.shape[1], 1)
y = np.array(label_processor.transform(tags))
yield x, y
def model(x, y, weights, biases, dropout, depth_tensor):
net = tf.nn.dropout(x, dropout)
net = tf.nn.conv2d(net, weights['first'], strides=[1, 1, 1, 1], padding='SAME')
net = tf.nn.bias_add(net, biases['first'])
net = tf.nn.relu(net)
net_flat = tf.reshape(net, [-1, weights['out'].get_shape().as_list()[0]])
preds = tf.add(tf.matmul(net_flat, weights['out']), biases['out'])
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=preds, labels=y))
for i in range(1, depth):
net = tf.nn.dropout(net, dropout)
net = tf.nn.atrous_conv2d(net, weights['iterated'], rate=2**i, padding='SAME')
net = tf.nn.bias_add(net, biases['iterated'])
net = tf.nn.relu(net)
net_flat = tf.reshape(net, [-1, weights['out'].get_shape().as_list()[0]])
preds = tf.add(tf.matmul(net_flat, weights['out']), biases['out'])
cost += tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=preds, labels=y))
return preds, tf.divide(cost, depth_tensor)
weights = {'first': tf.Variable(tf.random_normal([1, 3, 1, 10])),
'iterated': tf.Variable(tf.random_normal([1, 3, 10, 10])),
'out': tf.Variable(tf.random_normal([embedding_dim*10, n_classes]))}
biases = {'first': tf.Variable(tf.random_normal([10])),
'iterated': tf.Variable(tf.random_normal([10])),
'out': tf.Variable(tf.random_normal([n_classes]))}
preds, cost = model(x, y, weights, biases, dropout, depth_tensor)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
accuracy, update_accuracy = streaming_accuracy(y, preds)
recall, update_recall = streaming_recall(y, preds)
init = tf.global_variables_initializer()
init2 = tf.local_variables_initializer()
with tf.Session() as sess:
sess.run(init)
i = 1
for batch_x, batch_y in data_processor(data, train_inds, embedding, label_processor, n_iter):
sess.run(optimizer,
feed_dict={x: batch_x, y: batch_y,
dropout: dropout_prob})
if i % display_step == 0:
loss = sess.run(cost,
feed_dict={x: batch_x, y: batch_y, dropout: dropout_prob})
print("Iter:{}, Minibatch Loss:{:.6f}".format(i,loss))
i += 1
sess.run(init2)
for batch_x, batch_y in data_processor(data, val_inds, embedding, label_processor, n_iter):
recall, accuracy = sess.run([update_recall, update_accuracy],
feed_dict={x:batch_x, y: batch_y, dropout: 1})
f1 = 2 * recall * accuracy / (recall + accuracy)
print("Testing Accuracy:", accuracy,"Testing Recall:", recall, "Testing F1 Score:", f1)
And here is the part where I used the streaming metrics:
accuracy, update_accuracy = streaming_accuracy(y, preds)
recall, update_recall = streaming_recall(y, preds)
init = tf.global_variables_initializer()
init2 = tf.local_variables_initializer()
with tf.Session() as sess:
sess.run(init)
i = 1
for batch_x, batch_y in data_processor(data, train_inds, embedding, label_processor, n_iter):
sess.run(optimizer,
feed_dict={x: batch_x, y: batch_y,
dropout: dropout_prob})
if i % display_step == 0:
loss = sess.run(cost,
feed_dict={x: batch_x, y: batch_y, dropout: dropout_prob})
print("Iter:{}, Minibatch Loss:{:.6f}".format(i,loss))
i += 1
sess.run(init2)
for batch_x, batch_y in data_processor(data, val_inds, embedding, label_processor, n_iter):
recall, accuracy = sess.run([update_recall, update_accuracy],
feed_dict={x:batch_x, y: batch_y, dropout: 1})
f1 = 2 * recall * accuracy / (recall + accuracy)
While, I believe it's because you have only run update op but fetch op.
The codes below:
for batch_x, batch_y in data_processor(data, val_inds, embedding, label_processor, n_iter):
recall, accuracy = sess.run([update_recall, update_accuracy],
feed_dict={x:batch_x, y: batch_y, dropout: 1})
f1 = 2 * recall * accuracy / (recall + accuracy)
Should be:
for batch_x, batch_y in data_processor(data, val_inds, embedding, label_processor, n_iter):
_, recall_value, _, accuracy_value = sess.run([update_recall, recall, update_accuracy, accuracy],
feed_dict={x:batch_x, y: batch_y, dropout: 1})
f1 = 2 * recall_value * accuracy_value / (recall_value + accuracy_value)
The tensor accuracy and recall holds the actual value and the update* are just used to update the internal counter variables to calculate the final metric value.
By the way: I've not tested the codes. Please let me know if it doesn't work as you expected.

Does anyone know how to allocate more memory for the graph? Tensorflow: "ValueError: GraphDef cannot be larger than 2GB."

I am learning how to do transfer learning from an already trained AlexNet on images. I am striping off the last fully connected layer in AlexNet and creating my own layer with the number of classes in my traffic signs labels. I don't want to retrain the layers previous to this new layer only the new layer itself.
I am having trouble finding the bug that is causing this error. I have searched the web for solutions, but I don't believe any of them apply.
Please take a moment and see if you can find where I am creating the extra nodes that are causing the Graph to grow. Thanks!
import pickle
import tensorflow as tf
from sklearn.model_selection import train_test_split
from alexnet import AlexNet
import os
import numpy as np
# TODO: Load traffic signs data.
# Load pickled data
# Where training and testing data is saved.
training_file = os.getcwd()+"/train.p"
print(training_file)
with open(training_file, mode='rb') as f:
train = pickle.load(f)
X_train, y_train = train['features'], train['labels']
print('Train data shape = ', X_train.shape)
X_train_original = np.copy(X_train)
# TODO: Split data into training and validation sets.
X_train, X_valid, y_train, y_valid = train_test_split(X_train,
y_train,
train_size=.8,
random_state=42)
# TODO: Define placeholders and resize operation.
nb_classes = 43
x = tf.placeholder(tf.float32, (None, 32, 32, 3), name='x')
y = tf.placeholder(tf.int32, (None), name='y')
one_hot_y = tf.one_hot(y, 43, name='one_hot_y')
resized = tf.image.resize_images(x, (227, 227))
print('Resized data shape = ', resized.shape)
# TODO: pass placeholder as first argument to `AlexNet`.
fc7 = AlexNet(resized, feature_extract=True)
# NOTE: `tf.stop_gradient` prevents the gradient from flowing backwards
# past this point, keeping the weights before and up to `fc7` frozen.
# This also makes training faster, less work to do!
fc7 = tf.stop_gradient(fc7)
# TODO: Add the final layer for traffic sign classification.
shape = (fc7.get_shape().as_list()[-1], nb_classes) # use this shape for the weight matrix
#fc8W = np.random.normal(0, .15, size=(shape[0], shape[1])).astype(np.float32)
#print(fc8W)
#fc8b = np.random.normal(0, .15, size=(shape[1])).astype(np.float32)
#print(fc8b)
mu = 0
sigma = 0.05
fc8W = tf.Variable(tf.truncated_normal(shape, stddev=1e-2))
fc8b = tf.Variable(tf.zeros(nb_classes))
#logits = tf.matmul(fc7, fc8W) + fc8b; print(logits)
logits = tf.nn.xw_plus_b(fc7, fc8W, fc8b)
#probs = tf.nn.softmax(logits)
# TODO: Define loss, training, accuracy operations.
# HINT: Look back at your traffic signs project solution, you may
# be able to reuse some the code.
rate = 0.001
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=one_hot_y)
loss_operation = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate = rate)
training_operation = optimizer.minimize(loss_operation)
# TODO: Train and evaluate the feature extraction model.
#Shuffle data
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)
#Epochs for training and batch sizes defined.
EPOCHS = 10
BATCH_SIZE = 128
### Evaluation function.
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1))
accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#saver = tf.train.Saver()
def evaluate(X_data, y_data):
num_examples = len(X_data)
total_accuracy = 0
sess = tf.get_default_session()
for offset in range(0, num_examples, BATCH_SIZE):
batch_x, batch_y = X_data[offset:offset+BATCH_SIZE], y_data[offset:offset+BATCH_SIZE]
accuracy = sess.run(accuracy_operation, feed_dict={x: batch_x, y: batch_y, keep_prob: 1})
total_accuracy += (accuracy * len(batch_x))
return total_accuracy / num_examples
### Training function.
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
num_examples = len(X_train)
print("Training...")
print()
for i in range(EPOCHS):
X_train, y_train = shuffle(X_train, y_train)
for offset in range(0, num_examples, BATCH_SIZE):
end = offset + BATCH_SIZE
batch_x, batch_y = X_train[offset:end], y_train[offset:end]
sess.run(training_operation, feed_dict={x: batch_x, y: batch_y})
train_accuracy = evaluate(X_train, y_train)
validation_accuracy = evaluate(X_valid, y_valid)
print("EPOCH {} ...".format(i+1))
print('Training Accuracy = {:.3f}'.format(train_accuracy))
print("Validation Accuracy = {:.3f}".format(validation_accuracy))
print()

Feeding timeseries data into Tensorflow for LSTM classifier training

I have a dataframe of shape (38307, 26) with timestamp as index:
I'm trying to implement a LSTM classifier but I'm struggling to feed it into the DataFlow
The final arrays I'm trying to feed are of shape '(X_train = (38307, 25), y_train = (38307, 2))'
I have added the code in case
# Parametres
learning_rate = 0.001
training_epochs = 100
batch_size = 128
display_step = 10
# Network Parameters
n_input = 25 # features= 25
n_steps = 28 # timesteps
n_hidden = 128 # hidden layer num of features
n_classes = 2 # Binary classification
# TF Graph input
x = tf.placeholder("float32", [None, n_steps, n_input])
y = tf.placeholder("float32", [None, n_classes])
# TF Weights
weights = {
'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
'out': tf.Variable(tf.random_normal([n_classes]))
}
pred = RNN(x, weights, biases)
# Initialize the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
for epoch in range(training_epochs):
avg_cost = 0
total_batch = int(len(X_train)/batch_size)
X_batches = np.array_split(X_train, total_batch)
Y_batches = np.array_split(y_train, total_batch)
#Loop over all batches
for i in range(total_batch):
batch_x, batch_y = X_batches[i], Y_batches[i]
# batch_y.shape = (batch_y.shape[0]), 1)
# Run optimization op (backprop) and cost op(to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
#Display logs per epoch step
if epoch % display_step == 0:
print(("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost)))
print('Optimization finished')
# Store session for analysis with TensorBoard
writer = tf.summary.FileWriter("/tmp/test", sess.graph)
#Test model
print("Accuracy:", accuracy.eval({x: X_test, y: y_test}))
global result
result = tf.argmax(pred, 1).eval({x: X_test, y: y_test})
EDIT the RNN function:
def RNN(x, weights, biases):
# Prepare data shape to match 'rnn' function requirements
# Current data input shape: (batch_size, n_steps, n_input)
# Required Shape: 'n_steps' tensors list of shape (batch size, n_input)
# Permuting batch_size and n_steps
x = tf.transpose(x, [1, 0, 2])
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, n_input])
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(0, n_steps, x)
# x = tf.split(x, n_steps, 0) # Syntax change this version
# LSTM tensorflow using rnn from tensorflow.contrib
lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
# Get LSTM cell output
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
# Linear activation, using rnn inner loop last output
return tf.matmul(outputs[-1], weights['out']) + biases['out']
Unfortunately, the most important part of your code, is hidden in the RNN function.
Some tips to help you out: I guess you are trying to build a dynamic RNN... (is that correct? ) In that case, a common mistake I see is that people confuse the time major and batch major setting of these RNNs. In other words, is you input data [batch,time,variables], or [time,batch,variables].
More about this can be found here: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.dynamic_rnn.md

Categories

Resources