I'm really a beginner with tensor flow and in all of this field, but I've seen all the lectures of Andrej Karpathy in CS231n class so I'm understanding the code.
So this is the code (not mine): https://github.com/nfmcclure/tensorflow_cookbook/tree/master/09_Recurrent_Neural_Networks/02_Implementing_RNN_for_Spam_Prediction
# Implementing an RNN in TensorFlow
# ----------------------------------
#
# We implement an RNN in TensorFlow to predict spam/ham from texts
#
# https://github.com/nfmcclure/tensorflow_cookbook/blob/master/09_Recurrent_Neural_Networks/02_Implementing_RNN_for_Spam_Prediction/02_implementing_rnn.py
import os
import re
import io
import glob
import requests
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from zipfile import ZipFile
from tensorflow.python.framework import ops
ops.reset_default_graph()
# Start a graph
sess = tf.Session()
# Set RNN parameters
epochs = 20
batch_size = 250
max_sequence_length = 25
rnn_size = 10
embedding_size = 50
min_word_frequency = 10
learning_rate = 0.0005
dropout_keep_prob = tf.placeholder(tf.float32)
# Download or open data
data_dir = 'temp'
data_file = 'text_data.txt'
if not os.path.exists(data_dir):
os.makedirs(data_dir)
if not os.path.isfile(os.path.join(data_dir, data_file)):
zip_url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/00228/smsspamcollection.zip'
r = requests.get(zip_url)
z = ZipFile(io.BytesIO(r.content))
file = z.read('SMSSpamCollection')
# Format Data
text_data = file.decode()
text_data = text_data.encode('ascii', errors='ignore')
text_data = text_data.decode().split('\n')
# Save data to text file
with open(os.path.join(data_dir, data_file), 'w') as file_conn:
for text in text_data:
file_conn.write("{}\n".format(text))
else:
# Open data from text file
text_data = []
with open(os.path.join(data_dir, data_file), 'r') as file_conn:
for row in file_conn:
text_data.append(row)
text_data = text_data[:-1]
text_data = [x.split('\t') for x in text_data if len(x) >= 1]
text_data = [x for x in text_data if len(x) > 1]
print([list(x) for x in zip(*text_data)])
[text_data_target, text_data_train] = [list(x) for x in zip(*text_data)]
# Create a text cleaning function
def clean_text(text_string):
text_string = re.sub(r'([^\s\w]|_|[0-9])+', '', text_string)
text_string = " ".join(text_string.split())
text_string = text_string.lower()
return (text_string)
# Clean texts
text_data_train = [clean_text(x) for x in text_data_train]
# Change texts into numeric vectors
vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(max_sequence_length,
min_frequency=min_word_frequency)
text_processed = np.array(list(vocab_processor.fit_transform(text_data_train)))
# Shuffle and split data
text_processed = np.array(text_processed)
text_data_target = np.array([1 if x == 'ham' else 0 for x in text_data_target])
shuffled_ix = np.random.permutation(np.arange(len(text_data_target)))
x_shuffled = text_processed[shuffled_ix]
y_shuffled = text_data_target[shuffled_ix]
# Split train/test set
ix_cutoff = int(len(y_shuffled) * 0.80)
x_train, x_test = x_shuffled[:ix_cutoff], x_shuffled[ix_cutoff:]
y_train, y_test = y_shuffled[:ix_cutoff], y_shuffled[ix_cutoff:]
vocab_size = len(vocab_processor.vocabulary_)
print("Vocabulary Size: {:d}".format(vocab_size))
print("80-20 Train Test split: {:d} -- {:d}".format(len(y_train), len(y_test)))
# Create placeholders
x_data = tf.placeholder(tf.int32, [None, max_sequence_length])
y_output = tf.placeholder(tf.int32, [None])
# Create embedding
embedding_mat = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0))
embedding_output = tf.nn.embedding_lookup(embedding_mat, x_data)
# embedding_output_expanded = tf.expand_dims(embedding_output, -1)
# Define the RNN cell
# tensorflow change >= 1.0, rnn is put into tensorflow.contrib directory. Prior version not test.
if tf.__version__[0] >= '1':
cell = tf.contrib.rnn.BasicRNNCell(num_units=rnn_size)
else:
cell = tf.nn.rnn_cell.BasicRNNCell(num_units=rnn_size)
output, state = tf.nn.dynamic_rnn(cell, embedding_output, dtype=tf.float32)
output = tf.nn.dropout(output, dropout_keep_prob)
# Get output of RNN sequence
output = tf.transpose(output, [1, 0, 2])
last = tf.gather(output, int(output.get_shape()[0]) - 1)
weight = tf.Variable(tf.truncated_normal([rnn_size, 2], stddev=0.1))
bias = tf.Variable(tf.constant(0.1, shape=[2]))
logits_out = tf.matmul(last, weight) + bias
# Loss function
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_out,
labels=y_output) # logits=float32, labels=int32
loss = tf.reduce_mean(losses)
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits_out, 1), tf.cast(y_output, tf.int64)), tf.float32))
optimizer = tf.train.RMSPropOptimizer(learning_rate)
train_step = optimizer.minimize(loss)
init = tf.global_variables_initializer()
sess.run(init)
train_loss = []
test_loss = []
train_accuracy = []
test_accuracy = []
# Start training
for epoch in range(epochs):
# Shuffle training data
shuffled_ix = np.random.permutation(np.arange(len(x_train)))
x_train = x_train[shuffled_ix]
y_train = y_train[shuffled_ix]
num_batches = int(len(x_train) / batch_size) + 1
# TO DO CALCULATE GENERATIONS ExACTLY
for i in range(num_batches):
# Select train data
min_ix = i * batch_size
max_ix = np.min([len(x_train), ((i + 1) * batch_size)])
x_train_batch = x_train[min_ix:max_ix]
y_train_batch = y_train[min_ix:max_ix]
# Run train step
train_dict = {x_data: x_train_batch, y_output: y_train_batch, dropout_keep_prob: 0.5}
sess.run(train_step, feed_dict=train_dict)
# Run loss and accuracy for training
temp_train_loss, temp_train_acc = sess.run([loss, accuracy], feed_dict=train_dict)
train_loss.append(temp_train_loss)
train_accuracy.append(temp_train_acc)
# Run Eval Step
test_dict = {x_data: x_test, y_output: y_test, dropout_keep_prob: 1.0}
temp_test_loss, temp_test_acc = sess.run([loss, accuracy], feed_dict=test_dict)
test_loss.append(temp_test_loss)
test_accuracy.append(temp_test_acc)
print('Epoch: {}, Test Loss: {:.2}, Test Acc: {:.2}'.format(epoch + 1, temp_test_loss, temp_test_acc))
# Plot loss over time
epoch_seq = np.arange(1, epochs + 1)
plt.plot(epoch_seq, train_loss, 'k--', label='Train Set')
plt.plot(epoch_seq, test_loss, 'r-', label='Test Set')
plt.title('Softmax Loss')
plt.xlabel('Epochs')
plt.ylabel('Softmax Loss')
plt.legend(loc='upper left')
plt.show()
# Plot accuracy over time
plt.plot(epoch_seq, train_accuracy, 'k--', label='Train Set')
plt.plot(epoch_seq, test_accuracy, 'r-', label='Test Set')
plt.title('Test Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(loc='upper left')
plt.show()
def findFiles(path): return glob.glob(path)
pred_array = "words"
pred_num = np.array(list(vocab_processor.fit_transform(pred_array)))
print(pred_num)
pred_output = tf.placeholder(tf.float32,[1,len(pred_array),max_sequence_length])
feed_dict = {pred_output: [pred_num]}
classification = sess.run(losses, feed_dict)
print(classification)
It's a RNN spam classifier, and It's working great (accept for the part I wrote at the end where I'm trying to create the predictions).
I'm just want to understand how to create a prediction function to this, something that looks like that:
def predict(text): # text is a string (my mail)
# Doing prediction stuff
return (top result) # ham or spam
The last few lines are my last try is giving me the following error:
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder' with dtype float
[[Node: Placeholder = Placeholder[dtype=DT_FLOAT, shape=<unknown>, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Also I tried to do something using Making predictions with a TensorFlow model, and I also read https://www.tensorflow.org/serving/serving_basic and every thing I've tried failed...
Since I'm just a beginner explanations are welcomed, but I'm not sure I'll know how to code it so can you please post the code answer too.
(Python 3.6 btw)
Thanks!
If you take a look at how the original code does the training and testing steps, specifically how they set up their train_dict and test_dict, you see that they feed values to each of the tensors defined as placeholder in the graph. Basically placeholders need to be given some value if they are going to be used in whatever calculation you are asking your network to do. Since you are looking for predictions from the network, you probably do not need to provide an expected output, but you will need to give it input data x_data, and a value for dropout_keep_prob. This should be dropout_keep_prob=1.0 for prediction.
You also want a prediction, not the loss of the network. The loss is basically a measure of how far your network's output is from what you expect, but since you are trying to predict something for new data you really just want to see what the network says it is. You can do this using the logits_out op directly, or we can add an op that converts your logits into a probability distribution over your classes. Either way you can look at the distribution to get an idea of how likely the network thinks your data falls into each category, or you can take the max value of this vector to just output the network's best guess.
So you might try something like:
prediction = tf.nn.softmax(logits_out)
feed_dict = {x_data: your_input_data, dropout_keep_prob: 1.0}
pred = sess.run(prediction, feed_dict)
best_guess = np.argmax(pred) # highest-rated class
Related
i'm trying to create a neural network model for a kaggle competition using mnist dataset. currently, my code looks like this since i am trying to capture certain metrics. however, i can't seem to figure out how to turn this into an output to submit.
current:
import time
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import pandas as pd
import numpy as np
from tensorflow.python.framework import ops
ops.reset_default_graph()
#requests.packages.urllib3.disable_warnings()
import ssl
try:
_create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
# Legacy Python that doesn't verify HTTPS certificates by default
pass
else:
# Handle target environment that doesn't support HTTPS verification
ssl._create_default_https_context = _create_unverified_https_context
# Load training and testing data directly from TensorFlow
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
# Initialize metrics
metrics = {}
# Initialize metric names
names = ['Number of Hidden Layers', 'Nodes per Layer', 'Time in Seconds',
'Training Set Accuracy', 'Test Set Accuracy']
# Set fixed parameters
n_epochs = 20
batch_size = 50
learning_rate = 0.01
# Function that creates batch generator used in training
def shuffle_batch(X, y, batch_size):
rnd_idx = np.random.permutation(len(X))
n_batches = len(X) // batch_size
for batch_idx in np.array_split(rnd_idx, n_batches):
X_batch, y_batch = X[batch_idx], y[batch_idx]
yield X_batch, y_batch
# Start timer
start = time.process_time()
n_hidden = 300
# Reset the session
tf.reset_default_graph()
#ops.reset_default_graph()
tf.set_random_seed(2141)
#tf.random.set_seed(2141)
np.random.seed(9347)
# Set X and y placeholders
X = tf.placeholder(tf.float32, shape=(None, 784), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
with tf.name_scope("dnn"):
hidden1 = tf.layers.dense(X, n_hidden, name="hidden1",
activation=tf.nn.relu)
hidden2 = tf.layers.dense(hidden1, n_hidden, name="hidden2",
activation=tf.nn.relu)
logits = tf.layers.dense(hidden2, 10, name="outputs")
y_proba = tf.nn.softmax(logits)
with tf.name_scope("loss"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")
with tf.name_scope("train"):
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
with tf.Session() as sess:
tf.global_variables_initializer().run()
for epoch in range(n_epochs):
for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
acc_train = accuracy.eval(feed_dict={X: X_train, y: y_train})
acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
# Record the clock time it takes
duration = time.process_time() - start
metrics['Model 1'] = [2, n_hidden, duration, acc_train, acc_test]
# Convert metrics dictionary to dataframe for display
results_summary = pd.DataFrame.from_dict(metrics, orient='index')
results_summary.columns = names
# Sort by model number
results_summary.reset_index(inplace=True)
results_summary.sort_values(by=['index'], axis=0, inplace=True)
results_summary.set_index(['index'], inplace=True)
results_summary.index.name = None
# Export to csv
results_summary.to_csv('results_summary.csv')
results_summary
i need to create an output that looks something like this in csv file:
ImageId Label
0 1 2
1 2 0
2 3 9
3 4 0
4 5 3
would i have to recreate the whole thing in order to actually create "y_pred" when doing something like model.predict(X_test), or can i just reshape the existing code in some way to do this? ideally, i would like to capture predicted values and compare them to true values using a confusion matrix.
i've tried to the following but keep getting errors like AttributeError: module 'tensorflow.compat.v1' has no attribute 'run':
feed_dict = {X: X_test}
classification = tf.run(y_proba, feed_dict)
label = numpy.argmax(classification, axis=-1)
thanks in advance
I succeeded to build a linear regression neural network with 1 inputs and 1 outputs.
I am building a linear regression neural network with 5 inputs and 1 outputs now.
Here is the formula:
y = 3e + d^2 + 9c + 11b^6 + a + 19
However, no matter how many neurons, epochs and hidden layers I use, I cannot predict the a good result.
The predicted outputs are always within a small range. However, there are large variance among the expected outputs.
Predicted output vs Expected output
I guess it may be because of the choice of activation function, loss function and optimizer.
If not, multiple input neural network may need alternative method to build.
Here is my code:
import torch
import torch.nn as nn #neural network model
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torch.autograd import Variable
from sklearn.preprocessing import MinMaxScaler
from pickle import dump
#Load datasets
dataset = pd.read_csv('testB_200.csv')
X = dataset.iloc[:, :-1].values
Y = dataset.iloc[:, -1:].values
X_scaler = MinMaxScaler()
Y_scaler = MinMaxScaler()
print(X_scaler.fit(X))
print(Y_scaler.fit(Y))
X = X_scaler.transform(X)
Y = Y_scaler.transform(Y)
#save the scaler
dump(X_scaler, open('X_scaler.pkl', 'wb'))
dump(Y_scaler, open('Y_scaler.pkl', 'wb'))
train = int((len(dataset)+1)*0.8)
test = train + 1
print(train)
print(test)
x_temp_train = X[:train]
y_temp_train = Y[:train]
x_temp_test = X[test:]
y_temp_test = Y[test:]
X_train = torch.FloatTensor(x_temp_train)
Y_train = torch.FloatTensor(y_temp_train)
X_test = torch.FloatTensor(x_temp_test)
Y_test = torch.FloatTensor(y_temp_test)
D_in = 5 # D_in is input features
H = 12 # H is hidden dimension
H2 =8
H3 =4
D_out = 1 # D_out is output features.
#Define a Artifical Neural Network model
class Net(nn.Module):
#------------------3 hidden Layers------------------------------
def __init__(self, D_in, H, H2, H3, D_out):
super(Net, self).__init__()
self.linear1 = nn.Linear(D_in, H)
self.linear2 = nn.Linear(H, H2)
self.linear3 = nn.Linear(H2, H3)
self.linear4 = nn.Linear(H3, D_out)
def forward(self, x):
#activation function should be used here e.g: hidden = F.relu(...)
h_relu = self.linear1(x).clamp(min=0) #min=0 is like ReLU
middle = self.linear2(h_relu).clamp(min=0)
middle2 = self.linear3(middle).clamp(min=0)
prediction = self.linear4(middle2)
return prediction
model = Net(D_in, H, H2, H3, D_out)
print(model)
#Define a Loss function and optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.2) #2e-7, lr=learning rate=0.2
#Training model
inputs = Variable(X_train)
outputs = Variable(Y_train)
inputs_val = Variable(X_test)
outputs_val = Variable(Y_test)
loss_values = []
val_values = []
epoch = []
epoch_value=25
for i in range(epoch_value):
for phase in ['train', 'val']:
if phase == 'train':
#print('train loss')
model.train() # Set model to training mode
prediction = model(inputs)
loss = criterion(prediction, outputs)
#print(loss)
loss_values.append(loss.item())
optimizer.zero_grad() #zero the parameter gradients
epoch.append(i)
loss.backward() #compute gradients(dloss/dx)
optimizer.step() #updates the parameters
elif phase == 'val':
#print('validation loss')
model.eval() # Set model to evaluate mode
prediction_val = model(inputs_val)
loss_val = criterion(prediction_val, outputs_val)
#print(loss_val)
val_values.append(loss_val.item())
optimizer.zero_grad() #zero the parameter gradients
torch.save(model.state_dict(), 'formula2.pth') #save model
#Plot train_loss vs validation loss
plt.plot(epoch,loss_values)
plt.plot(epoch, val_values)
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train','validation'], loc='upper left')
plt.show()
#plot prediciton vs expected value
prediction_val = prediction_val.detach().numpy()
prediction_val = Y_scaler.inverse_transform(prediction_val)
#print('predict')
#print(prediction_val)
Y_test = Y_scaler.inverse_transform(Y_test)
#print('test')
#print(Y_test)
plt.plot(Y_test)
plt.plot(prediction_val)
plt.legend(['expected','predict'], loc='upper left')
plt.show()
Model Loss vs Validation Loss
Validation vs Expected outputs
Thanks for your time.
This is a simple example of using LSTM cell from tensor flow. I am generating a sin wave and training my network for ten periods and I'm trying to predict the eleventh period. The predictor values X are one epoch lag of the true y. After training, I save the session to the disk and I restore it at prediction time - this is typical of training and deploying models to production.
When I predict the last period, y_predicted is matching very well the true y.
If I try to predict the sin wave using an arbitrary starting point, (i.e. uncomment line 114)
test_data = test_data[16:]
such that the true values of y would be shifted by a quarter period, it seems like the LSTM prediction still starts at zero and it takes a couple of epochs to catch up with the true values, eventually matching the previous prediction. As a matter of fact it seems that the prediction in the second case is still a full sin wave instead of the 3/4 wave.
What is the reason why this is happening. If I implement a regressor I would like to use it starting with any point.
https://github.com/fbora/mytensorflow/issues/1
import os
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow.contrib.rnn as rnn
def sin_signal():
'''
generate a sin function
the train set is ten periods in length
the test set is one additional period
the return variable is in pandas format for easy plotting
'''
phase = np.arange(0, 2*np.pi*11, 0.1)
y = np.sin(phase)
data = pd.DataFrame.from_dict({'phase': phase, 'y':y})
# fill the last element by 0 - it's the end of the period anyways
data['X'] = data.y.shift(-1).fillna(0.0)
train_data = data[data.phase<=2*np.pi*10].copy()
test_data = data[data.phase>2*np.pi*10].copy()
return train_data, test_data
class lstm_model():
def __init__(self, size_x, size_y, num_units=32, num_layers=3, keep_prob=0.5):
# def single_unit():
# return rnn.DropoutWrapper(
# rnn.LSTMCell(num_units), output_keep_prob=keep_prob)
def single_unit():
return rnn.LSTMCell(num_units)
self.graph = tf.Graph()
with self.graph.as_default():
'''input place holders'''
self.X = tf.placeholder(tf.float32, [None, size_x], name='X')
self.y = tf.placeholder(tf.float32, [None, size_y], name='y')
'''network'''
cell = rnn.MultiRNNCell([single_unit() for _ in range(num_layers)])
X = tf.expand_dims(self.X, -1)
val, state = tf.nn.dynamic_rnn(cell, X, time_major=True, dtype=tf.float32)
val = tf.transpose(val, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0])-1)
weights = tf.Variable(tf.truncated_normal([num_units, size_y], 0.0, 1.0), name='weights')
bias = tf.Variable(tf.zeros(size_y), name='bias')
predicted_y = tf.nn.xw_plus_b(last, weights, bias, name='predicted_y')
'''optimizer'''
optimizer = tf.train.AdamOptimizer(name='adam_optimizer')
global_step = tf.Variable(0, trainable=False, name='global_step')
self.loss = tf.reduce_mean(tf.squared_difference(predicted_y, self.y), name='mse_loss')
self.train_op = optimizer.minimize(self.loss, global_step=global_step, name='training_op')
'''initializer'''
self.init_op = tf.global_variables_initializer()
class lstm_regressor():
def __init__(self):
if not os.path.isdir('./check_pts'):
os.mkdir('./check_pts')
#staticmethod
def get_shape(dataframe):
df_shape = dataframe.shape
num_rows = df_shape[0]
num_cols = 1 if len(df_shape)<2 else df_shape[1]
return num_rows, num_cols
def train(self, X_train, y_train, iterations):
train_pts, size_x = lstm_regressor.get_shape(X_train)
train_pts, size_y = lstm_regressor.get_shape(y_train)
model = lstm_model(size_x=size_x, size_y=size_y, num_units=32, num_layers=1)
with tf.Session(graph=model.graph) as sess:
sess.run(model.init_op)
saver = tf.train.Saver()
feed_dict={
model.X: X_train.values.reshape(-1, size_x),
model.y: y_train.values.reshape(-1, size_y)
}
for step in range(iterations):
_, loss = sess.run([model.train_op, model.loss], feed_dict=feed_dict)
if step%100==0:
print('step={}, loss={}'.format(step, loss))
saver.save(sess, './check_pts/lstm')
def predict(self, X_test):
test_pts, size_x = lstm_regressor.get_shape(X_test)
X_np = X_test.values.reshape(-1, size_x)
graph = tf.Graph()
with graph.as_default():
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver = tf.train.import_meta_graph('./check_pts/lstm.meta')
saver.restore(sess, './check_pts/lstm')
X = graph.get_tensor_by_name('X:0')
y_tf = graph.get_tensor_by_name('predicted_y:0')
y_np = sess.run(y_tf, feed_dict={X: X_np})
return y_np.reshape(test_pts)
def main():
train_data, test_data = sin_signal()
regressor = lstm_regressor()
regressor.train(train_data.X, train_data.y, iterations=1000)
# test_data = test_data[16:]
y_predicted = regressor.predict(test_data.X)
test_data['y_predicted'] = y_predicted
test_data[['y', 'y_predicted']].plot()
if __name__ == '__main__':
main()
I suspect that since you are starting your predictions at an arbitrary starting point in the future, there is a gap of values between what your model was trained on and what it is starting to see for predictions, and the State of your LSTM has not updated with the values in that gap?
*** UPDATE:
In your code, you have this:
val, state = tf.nn.dynamic_rnn(cell, X, time_major=True, dtype=tf.float32)
and then during training this:
_, loss = sess.run([model.train_op, model.loss], feed_dict=feed_dict)
I would suggest feeding the initial State into dynamic_rnn and re-feeding the updated state at each training iteration, something like this:
inState = tf.placeholder(tf.float32, [YOUR_DIMENSIONS], name='inState')
val, state = tf.nn.dynamic_rnn(cell, X, time_major=True, dtype=tf.float32, initial_state=inState)
And during training:
iState = np.zeros([YOUR_DIMENSIONS])
feed_dict={
model.X: X_train.values.reshape(-1, size_x),
model.y: y_train.values.reshape(-1, size_y),
inState: iState # feed initial value for state placeholder
}
_, loss, oState = sess.run([model.train_op, model.loss, model.state], feed_dict=feed_dict) # run one additional variable from the session
iState = oState # assign latest out-state to be re-fed as in-state
So, this way your model not only learns the parameters during training, but also keeps track of everything that it's seen during training in the State. NOW, you save this State with the rest of your session and use it during the prediction stage.
The small difficulty with this is that technically this State is a placeholder, so it won't be saved in the Graph automatically in my experience. So you create another variable manually at the end of training and assign the State to it; this way it is saved in the graph for later:
# make sure this variable is declared BEFORE the saver is declared
savedState = tf.get_variable('savedState', shape=[YOUR_DIMENSIONS])
# then, at the end of training:
assignOp = tf.assign(savedState, oState)
sess.run(assignOp)
# now save your graph
So now once you restore the Graph, if you want to start your predictions after some artificial gap, then somehow you still have to run your model through this gap so as to update the state. In my case, I just run one dummy prediction for the whole gap, just so as to update the state, and then you continue at your normal intervals from here.
Hope this helps...
I am trying to repeatedly train a neural network at different sizes of hidden layer to determine how many neurons it should be. I wrote a net that works fine when doing one pass through. The code is:
import tensorflow as tf
import nn
def train(layers, data, folder = 'run1'):
input_layer_size, hidden_layer_size, num_labels = layers;
X, y, X_val, y_val = data;
X_placeholder = tf.placeholder(tf.float32, shape=(None, input_layer_size), name='X')
y_placeholder = tf.placeholder(tf.uint8, shape=(None, num_labels), name='y')
Theta1 = tf.Variable(nn.randInitializeWeights(input_layer_size, hidden_layer_size), name='Theta1')
bias1 = tf.Variable(nn.randInitializeWeights(hidden_layer_size, 1), name='bias1')
Theta2 = tf.Variable(nn.randInitializeWeights(hidden_layer_size, num_labels), name='Theta2')
bias2 = tf.Variable(nn.randInitializeWeights(num_labels, 1), name='bias2')
cost = nn.cost(X_placeholder, y_placeholder, Theta1, bias1, Theta2, bias2)
optimize = tf.train.GradientDescentOptimizer(0.6).minimize(cost)
accuracy, precision, recall, f1 = nn.evaluate(X_placeholder, y_placeholder, Theta1, bias1, Theta2, bias2)
cost_summary = tf.summary.scalar('cost', cost);
accuracy_summary = tf.summary.scalar('accuracy', accuracy);
precision_summary = tf.summary.scalar('precision', precision);
recall_summary = tf.summary.scalar('recall', recall);
f1_summary = tf.summary.scalar('f1', f1);
summaries = tf.summary.merge_all();
sess = tf.Session();
saver = tf.train.Saver()
init = tf.global_variables_initializer()
sess.run(init)
writer = tf.summary.FileWriter('./tmp/logs/' + folder, sess.graph)
NUM_STEPS = 20;
for step in range(NUM_STEPS):
sess.run(optimize, feed_dict={X_placeholder: X, y_placeholder: y});
if (step > 0) and ((step + 1) % 10 == 0):
summary = sess.run(summaries, feed_dict={X_placeholder: X_val, y_placeholder: y_val});
# writer.add_summary(summary, step);
print('Step', step + 1, 'of', NUM_STEPS);
save_path = saver.save(sess, './tmp/model_' + folder + '.ckpt')
print("Model saved in file: %s" % save_path)
sess.close();
When I put this call in a loop however, I make it through only the first iteration. It seems to fail during the second iteration the first time I hit this line:
summary = sess.run(summaries, feed_dict={X_placeholder: X_val, y_placeholder: y_val});
I get the error: InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'X' with dtype float
I logged both X and X_val right before feeding and they look exactly like they do on each preceding run. If I comment that second run part out it works great, but I kinda need my summaries...
My outer loop looks like this:
import train
import loadData
input_layer_size = 5513;
num_labels = 128;
data = loadData.load(input_layer_size, num_labels);
for hidden_layer_size in range(50, 500, 50):
train.train([input_layer_size, hidden_layer_size, num_labels], data, 'run' + str(hidden_layer_size))
Because you call the train function inside a loop then each time it runs it is creating a new copy of the placeholders. The first time it runs it is fine because there is only one copy. The second time it runs now you have duplicate placeholders. The solution is to separate out the code that builds the model from the code that runs the training.
I am new to TensorFlow and neural networks. I am trying to build a neural network that can classify images in the CIFAR-10 dataset.
Here is my code:
import tensorflow as tf
import pickle
import numpy as np
import random
image_size= 32*32*3 # because 3 channels
n_classes = 10
lay1_size = 50
batch_size = 100
def unpickle(filename):
with open(filename,'rb') as f:
data = pickle.load(f, encoding='latin1')
x = data['data']
y = data['labels']
# shuffle the data
z = list(zip(x,y))
random.shuffle(z)
x, y = zip(*z)
x = x[:batch_size]
y = y[:batch_size]
# covert decimals to one hot arrays
y = np.eye(n_classes)[[y]]
return x, y
# set up network
def add_layer(inputs, in_size, out_size, activation_function=None):
W = tf.Variable(tf.random_normal([in_size, out_size]), dtype=tf.float32)
b = tf.Variable(tf.zeros([1,out_size]) + 0.1, dtype=tf.float32)
Wx_plus_b = tf.matmul(inputs, W) + b
if activation_function is None:
output = Wx_plus_b
else:
output = activation_function(Wx_plus_b)
return output
def compute_accuracy(v_xs, v_ys):
global prediction
y_pre = sess.run(prediction, feed_dict={xs:v_xs})
correct_prediction = tf.equal(tf.argmax(y_pre,1), tf.argmax(v_ys, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
result = sess.run(accuracy, feed_dict={xs:v_xs, ys:v_ys})
return result
xs = tf.placeholder(tf.float32, [None,image_size])
ys = tf.placeholder(tf.float32)
lay1 = add_layer(xs, image_size, lay1_size, activation_function=tf.nn.tanh)
lay2 = add_layer(lay1, lay1_size, lay1_size, activation_function=tf.nn.tanh)
prediction = add_layer(lay2, lay1_size, n_classes, activation_function=tf.nn.softmax)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys*tf.log(prediction), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
#train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
# run network
sess = tf.Session()
sess.run(tf.initialize_all_variables())
x_test, y_test = unpickle('test_batch')
for i in range(1000):
x_train, y_train = unpickle('data_batch_1')
sess.run(train_step, feed_dict={xs:x_train,ys:y_train})
if i % 50 == 0:
print(compute_accuracy(x_test, y_test))
sess.close()
I am using two hidden layers with 50 nodes in each layer. I am running 1,000 cycles, where in each cycle I shuffle data in the dataset and pick the first 100 images of that shuffle to train on.
I am consistently getting ~0.1 accuracy, the machine is not learning at all.
When I modify the code to use the MNIST dataset instead of the CIFAR-10 dataset I get ~0.87 accuracy.
I took code from an MNIST tutorial and am trying to modify it to classify CIFAR-10 data.
I can't figure out what's wrong here. How do I get my algorithm to learn?