I'm a beginner in Tensorflow and I found this neural network for binary classification which is giving me decent results, I would like to know after I run the session how can I save the model? I already try from the official website but nothing is working.
class AnnMLP():
def train(self,X_input,y_input,test,range_iteration,learning_rate): X = tf.compat.v1.placeholder(tf.float32, [None,27]) Y = tf.compat.v1.placeholder(tf.float32, [None,1])
# input
W1 = tf.Variable(tf.random.normal([27,60], seed=0), name='weight1')
b1 = tf.Variable(tf.random.normal([60], seed=0), name='bias1')
layer1 = tf.nn.sigmoid(tf.matmul(X,W1) + b1)
dropout_layer = keras.layers.Dropout(rate=0.4)
layer1=dropout_layer(layer1)
# hidden1
W2 = tf.Variable(tf.random.normal([60,60], seed=0), name='weight2')
b2 = tf.Variable(tf.random.normal([60], seed=0), name='bias2')
layer2 = tf.nn.sigmoid(tf.matmul(layer1,W2) + b2)
dropout_layer = keras.layers.Dropout(rate=0.4)
layer2=dropout_layer(layer2)
# hidden2
W3 = tf.Variable(tf.random.normal([60,90], seed=0), name='weight3')
b3 = tf.Variable(tf.random.normal([90], seed=0), name='bias3')
layer3 = tf.nn.sigmoid(tf.matmul(layer2,W3) + b3)
dropout_layer = keras.layers.Dropout(rate=0.4)
layer3=dropout_layer(layer3)
# output
W4 = tf.Variable(tf.random.normal([90,1], seed=0), name='weight4')
b4 = tf.Variable(tf.random.normal([1], seed=0), name='bias4')
logits = tf.matmul(layer3,W4) + b4
hypothesis = tf.nn.sigmoid(logits)
cost_i = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits,labels=Y)
cost = tf.reduce_mean(cost_i)
train =tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
#train = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(cost) train = tf.train.AdadeltaOptimizer(learning_rate=learning_rate).minimize(cost)
prediction = tf.cast(hypothesis > 0.5, dtype=tf.float32)
correct_prediction = tf.equal(prediction, Y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, dtype=tf.float32))
print("\n============Processing============")
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for step in range(range_iteration):
sess.run(train, feed_dict={X: X_input, Y: y_input})
if step % 1000 == 0:
loss, acc = sess.run([cost, accuracy], feed_dict={X: X_input, Y: y_input})
print("Step: {:5}\tLoss: {:.3f}\tAcc: {:.2%}".format(step, loss, acc))
train_acc = sess.run(accuracy, feed_dict={X: X_input, Y: y_input})
if test == True:
test_acc,test_predict,test_correct = sess.run([accuracy,prediction,correct_prediction], feed_dict={X: X_test, Y: y_test})
return test_predict
Tensorflow provides privilege to load and restore entire model and checkpoints. For more details find the link here.
Tensorflow < 2
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run()
# Do some work with the model.
# Save the variables to disk.
save_path = saver.save(sess, "/tmp/model.ckpt")
print("Model saved in path: %s" % save_path)
Tensorflow > 2
# Create and train a new model instance.
model = create_model()
model.fit(train_images, train_labels, epochs=10)
# Save the entire model to a HDF5 file.
model.save('my_model.h5')
Tensorflow provides privilege to load and restore entire model and checkpoints
Tensorflow < 2
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run()
# Do some work with the model.
# Save the variables to disk.
save_path = saver.save(sess, "/tmp/model.ckpt")
print("Model saved in path: %s" % save_path)
Tensorflow>2
# Create and train a new model instance.
model = create_model()
model.fit(train_images, train_labels, epochs=10)
# Save the entire model to a HDF5 file.
model.save('my_model.h5'
Related
I want to see test accuracy in tensorboard, but it seems I get accuracy with training data. I print test accuracy on console, and it is showing about 70%, but in tensorboard, the curve showed accuracy is growing and finally almost 100%.
This is my code:
def train_crack_captcha_cnn(is_train, checkpoint_dir):
global max_acc
X = tf.placeholder(tf.float32, [None, dr.ROWS, dr.COLS, dr.CHANNELS])
Y = tf.placeholder(tf.float32, [None, 1, 1, 2])
output, end_points = resnet_v2_50(X, num_classes = 2)
global_steps = tf.Variable(1, trainable=False)
learning_rate = tf.train.exponential_decay(0.001, global_steps, 100, 0.9)
with tf.device('/device:GPU:0'):
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=Y, logits=output))
# optimizer 为了加快训练 learning_rate应该开始大,然后慢慢衰
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss, global_step=global_steps)
predict = tf.argmax(output, axis = 3)
l = tf.argmax(Y, axis = 3)
correct_pred = tf.equal(predict, l)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
## tensorboard
tf.summary.scalar('test_accuracy', accuracy)
tf.summary.scalar("loss", loss)
tf.summary.scalar("learning_rate", learning_rate)
saver = tf.train.Saver()
with tf.Session(config=tf.ConfigProto(allow_soft_placement = True)) as sess:
if is_train:
writer = tf.summary.FileWriter("/tmp/cnn_log/log", graph = sess.graph)
sess.run(tf.global_variables_initializer())
step_value = sess.run(global_steps)
while step_value < 100000:
step_value = sess.run(global_steps)
merged = tf.summary.merge_all()
batch_x, batch_y = get_next_batch()
result, _, _loss= sess.run([merged, optimizer, loss], feed_dict={X: batch_x, Y: batch_y})
writer.add_summary(result, step_value)
print('step : {} loss : {}'.format(step_value, _loss))
# 每100 step计算一次准确率
if step_value % 20 == 0:
acc = sess.run(accuracy, feed_dict={X: validation, Y: validation_labels})
print('accuracy : {}'.format(acc))
# 如果准确率大于max_acc,保存模型,完成训练
if acc > max_acc:
max_acc = float(acc) #转换类型防止变为同一个引用
saver.save(sess, checkpoint_dir + "/" + str(step_value) + '-' + str(acc) + "/model.ckpt", global_step=global_steps)
##### predict #####
# predict_y = sess.run(output, feed_dict={X: test})
# data = pd.DataFrame([i for i in range(1, len(predict_y) + 1)], columns = ['id'])
# predict_y = np.argmax(predict_y, axis = 3)
# predict_y = np.reshape(predict_y,(-1))
# print(predict_y)
# predict_y = pd.Series(predict_y, name='label')
# data['label'] = predict_y
# data.to_csv("gender_submission.csv" + str(step), index=False)
##### end #####
writer.close()
else:
ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
acc = sess.run(accuracy, feed_dict={X: validation, Y: validation_labels})
print('accuracy : {}'.format(acc))
I add accuracy into tensorboard like this:
tf.summary.scalar('test_accuracy', accuracy)
and every 20 step, I get one accuracy about test data, and print the result to console, which is not the same with data shown on tensorboard.
Why?
I am trying to make a character recognition classifier for bangla alphabets. The images are size of 50x50. There are in total of 50 classes. Using the below CNN model to train but I am encountering this error: "ValueError: Validation size should be between 0 and 0. Received: 5000."
How do I resolve this?
MODEL
# Python 3.6.0
# tensorflow 1.1.0
import os
import os.path as path
import tensorflow as tf
from tensorflow.python.tools import freeze_graph
from tensorflow.python.tools import optimize_for_inference_lib
from tensorflow.examples.tutorials.mnist import input_data
MODEL_NAME = 'mnist_convnet'
NUM_STEPS = 3000
BATCH_SIZE = 16
def model_input(input_node_name, keep_prob_node_name):
x = tf.placeholder(tf.float32, shape=[None, 50*50], name=input_node_name)
keep_prob = tf.placeholder(tf.float32, name=keep_prob_node_name)
y_ = tf.placeholder(tf.float32, shape=[None, 50])
return x, keep_prob, y_
def build_model(x, keep_prob, y_, output_node_name):
x_image = tf.reshape(x, [-1, 50, 50, 1])
# 50*50*1
conv1 = tf.layers.conv2d(x_image, 64, 3, 1, 'same', activation=tf.nn.relu)
# 50*50*64
pool1 = tf.layers.max_pooling2d(conv1, 2, 2, 'same')
# 14*14*64
conv2 = tf.layers.conv2d(pool1, 128, 3, 1, 'same', activation=tf.nn.relu)
# 14*14*128
pool2 = tf.layers.max_pooling2d(conv2, 2, 2, 'same')
# 7*7*128
conv3 = tf.layers.conv2d(pool2, 256, 3, 1, 'same', activation=tf.nn.relu)
# 7*7*256
pool3 = tf.layers.max_pooling2d(conv3, 2, 2, 'same')
# 4*4*256
flatten = tf.reshape(pool3, [-1, 4*4*256])
fc = tf.layers.dense(flatten, 1024, activation=tf.nn.relu)
dropout = tf.nn.dropout(fc, keep_prob)
logits = tf.layers.dense(dropout, 50)
outputs = tf.nn.softmax(logits, name=output_node_name)
# loss
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=logits))
# train step
train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)
# accuracy
correct_prediction = tf.equal(tf.argmax(outputs, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar("loss", loss)
tf.summary.scalar("accuracy", accuracy)
merged_summary_op = tf.summary.merge_all()
return train_step, loss, accuracy, merged_summary_op
def train(x, keep_prob, y_, train_step, loss, accuracy,
merged_summary_op, saver):
print("training start...")
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init_op)
tf.train.write_graph(sess.graph_def, 'out',
MODEL_NAME + '.pbtxt', True)
# op to write logs to Tensorboard
summary_writer = tf.summary.FileWriter('logs/',
graph=tf.get_default_graph())
for step in range(NUM_STEPS):
batch = mnist.train.next_batch(BATCH_SIZE)
if step % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x: batch[0], y_: batch[1], keep_prob: 1.0})
print('step %d, training accuracy %f' % (step, train_accuracy))
_, summary = sess.run([train_step, merged_summary_op],
feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
summary_writer.add_summary(summary, step)
saver.save(sess, 'out/' + MODEL_NAME + '.chkp')
test_accuracy = accuracy.eval(feed_dict={x: mnist.test.images,
y_: mnist.test.labels,
keep_prob: 1.0})
print('test accuracy %g' % test_accuracy)
print("training finished!")
def export_model(input_node_names, output_node_name):
freeze_graph.freeze_graph('out/' + MODEL_NAME + '.pbtxt', None, False,
'out/' + MODEL_NAME + '.chkp', output_node_name, "save/restore_all",
"save/Const:0", 'out/frozen_' + MODEL_NAME + '.pb', True, "")
input_graph_def = tf.GraphDef()
with tf.gfile.Open('out/frozen_' + MODEL_NAME + '.pb', "rb") as f:
input_graph_def.ParseFromString(f.read())
output_graph_def = optimize_for_inference_lib.optimize_for_inference(
input_graph_def, input_node_names, [output_node_name],
tf.float32.as_datatype_enum)
with tf.gfile.FastGFile('out/opt_' + MODEL_NAME + '.pb', "wb") as f:
f.write(output_graph_def.SerializeToString())
print("graph saved!")
def main():
if not path.exists('out'):
os.mkdir('out')
input_node_name = 'input'
keep_prob_node_name = 'keep_prob'
output_node_name = 'output'
x, keep_prob, y_ = model_input(input_node_name, keep_prob_node_name)
train_step, loss, accuracy, merged_summary_op = build_model(x, keep_prob, y_, output_node_name)
saver = tf.train.Saver()
train(x, keep_prob, y_, train_step, loss, accuracy, merged_summary_op, saver)
export_model([input_node_name, keep_prob_node_name], output_node_name)
if __name__ == '__main__':
main()
ERROR
ValueError Traceback (most recent call last)
<ipython-input-2-2015e0ea466d> in <module>()
136
137 if __name__ == '__main__':
--> 138 main()
<ipython-input-2-2015e0ea466d> in main()
131 saver = tf.train.Saver()
132
--> 133 train(x, keep_prob, y_, train_step, loss, accuracy, merged_summary_op, saver)
134
135 export_model([input_node_name, keep_prob_node_name], output_node_name)
<ipython-input-2-2015e0ea466d> in train(x, keep_prob, y_, train_step, loss, accuracy, merged_summary_op, saver)
67 print("training start...")
68
---> 69 mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
70
71 init_op = tf.global_variables_initializer()
/anaconda3/envs/nlpTFnltk/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py in read_data_sets(train_dir, fake_data, one_hot, dtype, reshape, validation_size)
247 raise ValueError(
248 'Validation size should be between 0 and {}. Received: {}.'
--> 249 .format(len(train_images), validation_size))
250
251 validation_images = train_images[:validation_size]
ValueError: Validation size should be between 0 and 0. Received: 5000.
You're using the MNIST tutorial code, which is calling read_data_sets from here; note that validation_size of 5000 comes from that function's default parameters. It's expecting to get data from the following files:
TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
TEST_LABELS = 't10k-labels-idx1-ubyte.gz'
Normally it would try to download those files if it doesn't find them, but the fact that you're getting a validation_size of 0 suggests it isn't doing so. This wouldn't help you anyway, since you don't want to use the MNIST data.
Even if you rename your train and test files to match the above filenames, your code won't work because the MNIST code is also calling extract_labels, which has a default parameter num_classes=10 while you want this to be 50. Your best bet is probably to get rid of the MNIST import completely and read about how to set up an input pipeline; it's not difficult compared to the stuff you've done already.
I've trained my NN in Tensorflow and saved the model like this:
def neural_net(x):
layer_1 = tf.layers.dense(inputs=x, units=195, activation=tf.nn.sigmoid)
out_layer = tf.layers.dense(inputs=layer_1, units=6)
return out_layer
train_x = pd.read_csv("data_x.csv", sep=" ")
train_y = pd.read_csv("data_y.csv", sep=" ")
train_x = train_x / 6 - 0.5
train_size = 0.9
train_cnt = int(floor(train_x.shape[0] * train_size))
x_train = train_x.iloc[0:train_cnt].values
y_train = train_y.iloc[0:train_cnt].values
x_test = train_x.iloc[train_cnt:].values
y_test = train_y.iloc[train_cnt:].values
x = tf.placeholder("float", [None, 386])
y = tf.placeholder("float", [None, 6])
nn_output = neural_net(x)
cost = tf.reduce_mean(tf.losses.mean_squared_error(labels=y, predictions=nn_output))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
training_epochs = 5000
display_step = 1000
batch_size = 30
keep_prob = tf.placeholder("float")
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(training_epochs):
total_batch = int(len(x_train) / batch_size)
x_batches = np.array_split(x_train, total_batch)
y_batches = np.array_split(y_train, total_batch)
for i in range(total_batch):
batch_x, batch_y = x_batches[i], y_batches[i]
_, c = sess.run([optimizer, cost],
feed_dict={
x: batch_x,
y: batch_y,
keep_prob: 0.8
})
saver.save(sess, 'trained_model', global_step=1000)
Now I want to use the trained model in a different file. Of course there are many many examples of restoring and saving the model, I went through lots of them. Still I couldn't make any of them work, there is always some kind of error. So this is my restore file, could you please help me to make it restore the saved model?
saver = tf.train.import_meta_graph('trained_model-1000.meta')
y_pred = []
with tf.Session() as sess:
saver.restore(sess, tf.train.latest_checkpoint('./'))
sess.run([y_pred], feed_dict={x: input_values})
E.g. this attempt gave me the error "The session graph is empty. Add operations to the graph before calling run()." So what operation should I add to the graph and how? I don't know what that operation should be in my model... I don't understand this whole concept of saving/restoring in Tensorflow. Or should I do the restoring completely differently? Thanks in advance!
Forgive me if I am wrong but tf.train.Saver() only saves the variable values not the graph itself. This means that if you want to load the model in a different file you need to rebuild the graph or somehow load the graph as well. Tensorflow documentation states:
The tf.train.Saver object not only saves variables to checkpoint files, it also restores variables. Note that when you restore variables from a file you do not have to initialize them beforehand.
Consider the following example:
One file that saves the model:
# Create some variables.
v1 = tf.get_variable("v1", shape=[3], initializer = tf.zeros_initializer)
v2 = tf.get_variable("v2", shape=[5], initializer = tf.zeros_initializer)
inc_v1 = v1.assign(v1+1)
dec_v2 = v2.assign(v2-1)
# Add an op to initialize the variables.
init_op = tf.global_variables_initializer()
# Add ops to save and restore all the variables.
saver = tf.train.Saver()
# Later, launch the model, initialize the variables, do some work, and save the
# variables to disk.
with tf.Session() as sess:
sess.run(init_op)
# Do some work with the model.
inc_v1.op.run()
dec_v2.op.run()
# Save the variables to disk.
save_path = saver.save(sess, "/tmp/model.ckpt")
print("Model saved in file: %s" % save_path)
The other file that loads the previously saved model:
tf.reset_default_graph()
# Create some variables.
v1 = tf.get_variable("v1", shape=[3])
v2 = tf.get_variable("v2", shape=[5])
# Add ops to save and restore all the variables.
saver = tf.train.Saver()
# Later, launch the model, use the saver to restore variables from disk, and
# do some work with the model.
with tf.Session() as sess:
# Restore variables from disk.
saver.restore(sess, "/tmp/model.ckpt")
print("Model restored.")
# Check the values of the variables
print("v1 : %s" % v1.eval())
print("v2 : %s" % v2.eval())
output = sess.run(nn_output, feed_dict={ x: batch_x, keep_prob: 0.8 })
Where nn_output is the name is the output variable of the last layer of you network. You can save you variable using:
saver = tf.train.Saver([nn_output])
saver.save(sess, 'my_test_model',global_step=1000) # save every 1000 steps
and therefore in your code:
out_layer = tf.layers.dense(inputs=layer_1, units=6)
should be :
out_layer = tf.layers.dense(inputs=layer_1, units=6, name='nn_output')
To restore:
with tf.Session() as sess:
saver = tf.train.import_meta_graph('my_test_model')
saver.restore(sess,tf.train.latest_checkpoint('./'))
Now you should have access to that node of the graph. If the name is not specified, it is difficult to recover that particular layer.
You can know use tf.saved_model.builder.SavedModelBuilder function.
The main lines for the saving:
builder = tf.saved_model.builder.SavedModelBuilder(graph_location)
builder.add_meta_graph_and_variables(sess, ["cnn_mnist"])
builder.save()
A code to save the model :
...
def main(_):
# Import data
mnist = input_data.read_data_sets(FLAGS.data_dir)
# Create the model
x = tf.placeholder(tf.float32, [None, 784])
# Define loss and optimizer
y_ = tf.placeholder(tf.int64, [None])
# Build the graph for the deep net
y_conv, keep_prob = deepnn(x) # an unknow model model
with tf.name_scope('loss'):
cross_entropy = tf.losses.sparse_softmax_cross_entropy(
labels=y_, logits=y_conv)
cross_entropy = tf.reduce_mean(cross_entropy)
with tf.name_scope('adam_optimizer'):
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
with tf.name_scope('accuracy'):
correct_prediction = tf.equal(tf.argmax(y_conv, 1), y_)
correct_prediction = tf.cast(correct_prediction, tf.float32)
accuracy = tf.reduce_mean(correct_prediction)
graph_location ="tmp/"
print('Saving graph to: %s' % graph_location)
**builder = tf.saved_model.builder.SavedModelBuilder(graph_location)**
train_writer = tf.summary.FileWriter(graph_location)
train_writer.add_graph(tf.get_default_graph())
saver = tf.train.Saver(max_to_keep=1)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
**builder.add_meta_graph_and_variables(sess, ["cnn_mnist"])**
for i in range(20000):
batch = mnist.train.next_batch(50)
if i % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x: batch[0], y_: batch[1], keep_prob: 1.0})
print('step %d, training accuracy %g' % (i, train_accuracy))
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
print('test accuracy %g' % accuracy.eval(feed_dict={
x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
**builder.save()**
saver.save(sess, "tmp/my_checkpoint.ckpt", global_step =0)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', type=str,
default='/tmp/tensorflow/mnist/input_data',
help='Directory for storing input data')
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
`
A code to restore the model :
import tensorflow as tf
# récupération des poids
export_dir = "tmp"
sess = tf.Session()
tf.saved_model.loader.load(sess,["cnn_mnist"], export_dir)
#trainable_var = tf.trainable_variables()
trainable_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
for var in trainable_var:
print(var.name)`
This question is old. But if someone else is struggling with doing predictions with a trained model (with TF 1.x) this code might help.
Pay attention that
Your network construction/defining code must be executed before the Saver() instance is created. Otherwise you get the error: ValueError: No variables to save. In the code below the LeNet(x) method constructs the network for input placeholder x.
You should not initialize the variables in the session. Because obviously you are loading them from the saved model.
# all the network construction code
# (e.g. defining the variables and layers)
# must be exectured before the creation of
# the Saver() object. Otherwise you get the
# error: ValueError: No variables to save.
logits = LeNet(x)
saver = tf.train.Saver()
index = random.randint(0, len(X_train))
image = X_train[index].squeeze()
label = y_train[index]
print("Label: ", label)
plt.figure(figsize=(1,1))
plt.imshow(image, cmap="gray")
plt.show()
with tf.Session() as sess:
saver.restore(sess, tf.train.latest_checkpoint('./checkpoints/'))
logits_output = sess.run(logits, feed_dict={x: image.reshape((1, 32, 32, 1))})
logits_output = logits_output.squeeze()
pred_output = np.exp(logits_output)/sum(np.exp(logits_output)) #softmax
print("Logits: ", logits_output)
print("Prediction output:", pred_output)
print("Predicted Label: ", np.argmax(pred_output))
I'm getting this error:
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value rnn/output_projection_wrapper/bias
[[Node: rnn/output_projection_wrapper/bias/read = Identity[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](rnn/output_projection_wrapper/bias)]]
This is my code:
n_steps = 20
n_inputs = 1
n_neurons = 100
n_outputs = 1
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])
cell = tf.contrib.rnn.OutputProjectionWrapper(
tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu),
output_size=n_outputs)
outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
learning_rate = 0.001
loss = tf.reduce_mean(tf.square(outputs - y)) # MSE
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_iterations = 1500
batch_size = 50
with tf.Session() as sess:
init.run()
for iteration in range(n_iterations):
X_batch, y_batch = next_batch(batch_size, n_steps)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
if iteration % 100 == 0:
mse = loss.eval(feed_dict={X: X_batch, y: y_batch})
print(iteration, "\tMSE:", mse)
saver.save(sess, "./my_time_series_model") # not shown in the book
with tf.Session() as sess:
X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))
y_pred = sess.run(outputs, feed_dict={X: X_new})
How can I fix this?
Here, the problem occurs with the second session, as you didn't initialize variables with that session . So it's better to define only one session for one graph (as reinitialization will overwrite the trained variables. )
sess_config = tf.ConfigProto(allow_soft_placement=True,
log_device_placement=True)
sess = tf.Session(config=sess_config)
sess.run(init)
# use this session for all computations
for iteration in range(n_iterations):
X_batch, y_batch = next_batch(batch_size, n_steps)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
if iteration % 100 == 0:
mse = loss.eval(feed_dict={X: X_batch, y: y_batch})
print(iteration, "\tMSE:", mse)
saver.save(sess, "./my_time_series_model") # not shown in the book
X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))
y_pred = sess.run(outputs, feed_dict={X: X_new})
I'm trying to restore a model in Tensorflow which I've trained. The problem is that it does not seem like the weights are properly restored.
For the training I've got the weights and biases defined as:
W = {
'h1': tf.Variable(tf.random_normal([n_inputs, n_hidden_1]), name='wh1'),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2]), name='wh2'),
'o': tf.Variable(tf.random_normal([n_hidden_2, n_classes]), name='wo')
}
b = {
'b1': tf.Variable(tf.random_normal([n_hidden_1]), name='bh1'),
'b2': tf.Variable(tf.random_normal([n_hidden_2]), name='bh2'),
'o': tf.Variable(tf.random_normal([n_classes]), name='bo')
}
Then I do some training on my own custom 2D image dataset and save the model by calling the tf.saver
saver = tf.train.Saver()
saver.save(sess, 'tf.model')
Later I want to restore that model with the exact same weights, so I build the model as before (also with the random_normal initialization) and call the tf.saver.restore
saver = tf.train.import_meta_graph('tf.model.meta')
saver.restore(sess, tf.train.latest_checkpoint('./'))
Now, if i call:
temp = sess.run(W['h1'][0][0])
print temp
I get random values, and not the restored value of the weight.
I've drawn a blank on this one, can somebody point me in the right direction?
FYI, I've tried (without) luck to simply declare the tf.Variables, but I keep getting:
ValueError: initial_value must be specified.
even though Tensorflow themselves state that it should be possible to simply declare with no initial value (https://www.tensorflow.org/programmers_guide/variables part: Restoring Values)
Update 1
When I, as suggested, run
all_vars = tf.global_variables()
for v in all_vars:
print v.name
I get the following output:
wh1:0
wh2:0
wo:0
bh1:0
bh2:0
bo:0
wh1:0
wh2:0
wo:0
bh1:0
bh2:0
bo:0
beta1_power:0
beta2_power:0
wh1/Adam:0
wh1/Adam_1:0
wh2/Adam:0
wh2/Adam_1:0
wo/Adam:0
wo/Adam_1:0
bh1/Adam:0
bh1/Adam_1:0
bh2/Adam:0
bh2/Adam_1:0
bo/Adam:0
bo/Adam_1:0
Which shows that the variables indeed is read. However invoking
print sess.run("wh1:0")
Results in the error: Attempting to use uninitialized value wh1
So with the help of you guys, I ended up dividing the saving and restoring parts of my program into two files, to ensure that no unwanted variables were initialized.
Train and Save routines fnn.py
def build(self, topology):
"""
Builds the topology of the model
"""
# Sanity check
assert len(topology) == 4
n_inputs = topology[0]
n_hidden_1 = topology[1]
n_hidden_2 = topology[2]
n_classes = topology[3]
# Sanity check
assert self.img_h * self.img_w == n_inputs
# Instantiate TF Placeholders
self.x = tf.placeholder(tf.float32, [None, n_inputs], name='x')
self.y = tf.placeholder(tf.float32, [None, n_classes], name='y')
self.W = {
'h1': tf.Variable(tf.random_normal([n_inputs, n_hidden_1]), name='wh1'),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2]), name='wh2'),
'o': tf.Variable(tf.random_normal([n_hidden_2, n_classes]), name='wo')
}
self.b = {
'b1': tf.Variable(tf.random_normal([n_hidden_1]), name='bh1'),
'b2': tf.Variable(tf.random_normal([n_hidden_2]), name='bh2'),
'o': tf.Variable(tf.random_normal([n_classes]), name='bo')
}
# Create model
self.l1 = tf.nn.sigmoid(tf.add(tf.matmul(self.x, self.W['h1']), self.b['b1']))
self.l2 = tf.nn.sigmoid(tf.add(tf.matmul(self.l1, self.W['h2']), self.b['b2']))
logits = tf.add(tf.matmul(self.l2, self.W['o']), self.b['o'])
# Define predict operation
self.predict_op = tf.argmax(logits, 1)
probs = tf.nn.softmax(logits, name='probs')
# Define cost function
self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, self.y))
# Adding these to collection so we can restore them again
tf.add_to_collection('inputs', self.x)
tf.add_to_collection('inputs', self.y)
tf.add_to_collection('outputs', logits)
tf.add_to_collection('outputs', probs)
tf.add_to_collection('outputs', self.predict_op)
def train(self, X, Y, n_epochs=10, learning_rate=0.001, logs_path=None):
"""
Trains the Model
"""
self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.cost)
costs = []
# Instantiate TF Saver
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
# start the threads used for reading files
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
# Compute total number of batches
total_batch = int(self.get_num_examples() / self.batch_size)
# start training
for epoch in range(n_epochs):
for i in range(total_batch):
batch_xs, batch_ys = sess.run([X, Y])
# run the training step with feed of images
_, cost = sess.run([self.optimizer, self.cost], feed_dict={self.x: batch_xs,
self.y: batch_ys})
costs.append(cost)
print "step %d" % (epoch * total_batch + i)
#costs.append(cost)
print "Epoch %d" % epoch
saver.save(sess, self.model_file)
temp = sess.run(self.W['h1'][0][0])
print temp
if self.visu:
plt.plot(costs)
plt.show()
# finalize
coord.request_stop()
coord.join(threads)
Predict routine fnn_eval.py:
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
g = tf.get_default_graph()
# restore the model
self.saver = tf.train.import_meta_graph(self.model_file)
self.saver.restore(sess, tf.train.latest_checkpoint('./tfmodels/fnn/'))
wh1 = g.get_tensor_by_name("wh1:0")
print sess.run(wh1[0][0])
x, y = tf.get_collection('inputs')
logits, probs, predict_op = tf.get_collection('outputs')
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
predictions = []
print Y.eval()
for i in range(1):#range(self.get_num_examples()):
batch_xs = sess.run(X)
# Reshape batch_xs if only a single image is given
# (numpy is 4D: batch_size * heigth * width * channels)
batch_xs = np.reshape(batch_xs, (-1, self.img_w * self.img_h))
prediction, probabilities, logit = sess.run([predict_op, probs, logits], feed_dict={x: batch_xs})
predictions.append(prediction[0])
# finalize
coord.request_stop()
coord.join(threads)
I guess the problem might be caused by creating a new variable when you restore the model, not getting the already existed variable. I tried this code
saver = tf.train.import_meta_graph('./model.ckpt-10.meta')
w1 = None
for v in tf.global_variables():
print v.name
w1 = tf.get_variable('wh1', [])
init = tf.global_variables_initializer()
sess.run(init)
saver.restore(sess, './model.ckpt-10')
for v in tf.global_variables():
print v.name
and clearly you can see the output that it creates a new variable called wh1_1:0.
If you try this
w1 = None
for v in tf.global_variables():
print v.name
if v.name == 'wh1:0':
w1 = v
init = [tf.global_variables_initializer(), tf.local_variables_initializer()]
sess.run(init)
saver.restore(sess, './model.ckpt-10')
for v in tf.global_variables():
print v.name
temp = sess.run(w1)
print temp[0][0]
There will be no problem.
Tensorflow suggests that it is better to use tf.variable_scope() (link) like this
with tf.variable_scope("foo"):
v = tf.get_variable("v", [1])
with tf.variable_scope("foo", reuse=True):
v1 = tf.get_variable("v", [1])
assert v1 == v
I have meet the same problem when saving model to saved_model format. Anyone using the function add_meta_graph_and_variables to save the model for serving, be careful about this parameter "legacy_init_op: Legacy support for op or group of ops to execute after the restore op upon a load."
You want to pass in a var_list to the Saver.
In your case, the variable list would come from your W and b dictionaries: var_list = list(W.values())+list(b.values()). Then, to restore the model, pass in var_list to the Saver: saver = tf.train.Saver(var_list=var_list).
Next, you need to get your checkpoint state: model = tf.train.get_checkpoint_state(<your saved model directory>). After that you can restore the trained weights.
var_list = list(W.values())+list(b.values())
saver = tf.train.Saver(var_list=var_list)
model = tf.get_checkpoint_state('./model/')
with tf.Session() as sess:
saver.restore(sess,model.model_checkpoint_path)
#Now use the pretrained weights