How to debug a Python program that freezes on one line? - python

When I run my code ,it just stay in the line image_batch, label_batch = sess.run([test_images, test_labels]) without any error prompt. It just stays here and can't move.
Here is my code:
# coding=utf-8
from color_1 import read_and_decode, get_batch, get_test_batch
import color_inference
import cv2
import os
import time
import numpy as np
import tensorflow as tf
import color_train
import math
batch_size=128
num_examples = 10000
crop_size=56
def evaluate():
image_holder = tf.placeholder(tf.float32, [batch_size, 56, 56, 3], name='x-input')
label_holder = tf.placeholder(tf.int32, [batch_size], name='y-input')
test_image, test_label = read_and_decode('val.tfrecords')
test_images, test_labels = get_test_batch(test_image, test_label, batch_size, crop_size)
y=color_inference.inference(image_holder)
num_iter = int(math.ceil(num_examples / batch_size))
true_count = 0
total_sample_count = num_iter * batch_size
top_k_op = tf.nn.in_top_k(y, label_holder, 1)
saver = tf.train.Saver()
with tf.Session() as sess:
ckpt=tf.train.get_checkpoint_state(color_train.MODEL_SAVE_PATH)
if ckpt and ckpt.model_checkpoint_path:
ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
saver.restore(sess, os.path.join(color_train.MODEL_SAVE_PATH, ckpt_name))
print('Loading success, global_step is %s' % global_step)
image_batch, label_batch = sess.run([test_images, test_labels])
predictions = sess.run([top_k_op], feed_dict={image_holder: image_batch,
label_holder: label_batch})
true_count += np.sum(predictions)
print("Count is:%g" % true_count)
precision = true_count * 1.0 / total_sample_count
print("After %s training step,the prediction is :%g",global_step,precision)
else:
print('No checkpoint file found')
return
def main(argv=None):
evaluate()
if __name__=='__main__':
tf.app.run()
My last question is similar with this ,but the code is litter different with this, maybe you can get something in last question.

Seems like you are not starting the queue-runners / initializing the variables properly. I have seen similar behavior with my models when i forgot to to that.
When this is the case you most likely get stuck at the line
image_batch, label_batch = sess.run([test_images, test_labels])
because the threads that pull data from the tfrecords have not been started.
Before you initialize your session setup an op for initializing the variables and a thread-coordinator:
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
coord = tf.train.Coordinator()
then at the very start of your session, before pulling any data from the tfrecords you run the op and start the queue runners:
sess.run(init_op)
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
# main loop goes here, like training and evaluating

Related

Tensorboard tf events file is too large despite having very few steps

I have pasted part of the code (all the part that refers to the tensorboard part) below. I am only recording the loss scalar variable and only adding the summary once for one epoch. I ran it for a total of 3 epochs. That should ideally be a very small tfevents file. However, the tfevents file is 1.3GB. I am not sure what is causing the file to be so big.
Happy to share the rest of the code if needed
def do_training(update_op, loss, summary_op):
writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
try:
step = 0
while True:
if step % (X_train.shape[0]/batch_size) == 0:
_, loss_value = sess.run((update_op, loss))
summary = sess.run(summary_op)
writer.add_summary(summary, global_step=step)
print('Step {} with loss {}'.format(step, loss_value))
else:
_, loss_value = sess.run((update_op, loss))
step += 1
except tf.errors.OutOfRangeError:
# we're through the dataset
pass
writer.close()
saver.save(sess,save_path)
print('Final loss: {}'.format(loss_value))
def serial_training(model_fn, dataset):
iterator = dataset.make_one_shot_iterator()
loss = model_fn(lambda: iterator.get_next())
tf.summary.scalar("loss", loss)
summary_op = tf.summary.merge_all()
optimizer = tf.train.AdamOptimizer(learning_rate=0.0002)
global_step = tf.train.get_or_create_global_step()
update_op = optimizer.minimize(loss, global_step=global_step)
do_training(update_op, loss, summary_op)
tf.reset_default_graph()
serial_training(training_model,training_dataset(epochs=3,batch_size=batch_size))

How to predict values with a trained Tensorflow model

I've trained my NN in Tensorflow and saved the model like this:
def neural_net(x):
layer_1 = tf.layers.dense(inputs=x, units=195, activation=tf.nn.sigmoid)
out_layer = tf.layers.dense(inputs=layer_1, units=6)
return out_layer
train_x = pd.read_csv("data_x.csv", sep=" ")
train_y = pd.read_csv("data_y.csv", sep=" ")
train_x = train_x / 6 - 0.5
train_size = 0.9
train_cnt = int(floor(train_x.shape[0] * train_size))
x_train = train_x.iloc[0:train_cnt].values
y_train = train_y.iloc[0:train_cnt].values
x_test = train_x.iloc[train_cnt:].values
y_test = train_y.iloc[train_cnt:].values
x = tf.placeholder("float", [None, 386])
y = tf.placeholder("float", [None, 6])
nn_output = neural_net(x)
cost = tf.reduce_mean(tf.losses.mean_squared_error(labels=y, predictions=nn_output))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
training_epochs = 5000
display_step = 1000
batch_size = 30
keep_prob = tf.placeholder("float")
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(training_epochs):
total_batch = int(len(x_train) / batch_size)
x_batches = np.array_split(x_train, total_batch)
y_batches = np.array_split(y_train, total_batch)
for i in range(total_batch):
batch_x, batch_y = x_batches[i], y_batches[i]
_, c = sess.run([optimizer, cost],
feed_dict={
x: batch_x,
y: batch_y,
keep_prob: 0.8
})
saver.save(sess, 'trained_model', global_step=1000)
Now I want to use the trained model in a different file. Of course there are many many examples of restoring and saving the model, I went through lots of them. Still I couldn't make any of them work, there is always some kind of error. So this is my restore file, could you please help me to make it restore the saved model?
saver = tf.train.import_meta_graph('trained_model-1000.meta')
y_pred = []
with tf.Session() as sess:
saver.restore(sess, tf.train.latest_checkpoint('./'))
sess.run([y_pred], feed_dict={x: input_values})
E.g. this attempt gave me the error "The session graph is empty. Add operations to the graph before calling run()." So what operation should I add to the graph and how? I don't know what that operation should be in my model... I don't understand this whole concept of saving/restoring in Tensorflow. Or should I do the restoring completely differently? Thanks in advance!
Forgive me if I am wrong but tf.train.Saver() only saves the variable values not the graph itself. This means that if you want to load the model in a different file you need to rebuild the graph or somehow load the graph as well. Tensorflow documentation states:
The tf.train.Saver object not only saves variables to checkpoint files, it also restores variables. Note that when you restore variables from a file you do not have to initialize them beforehand.
Consider the following example:
One file that saves the model:
# Create some variables.
v1 = tf.get_variable("v1", shape=[3], initializer = tf.zeros_initializer)
v2 = tf.get_variable("v2", shape=[5], initializer = tf.zeros_initializer)
inc_v1 = v1.assign(v1+1)
dec_v2 = v2.assign(v2-1)
# Add an op to initialize the variables.
init_op = tf.global_variables_initializer()
# Add ops to save and restore all the variables.
saver = tf.train.Saver()
# Later, launch the model, initialize the variables, do some work, and save the
# variables to disk.
with tf.Session() as sess:
sess.run(init_op)
# Do some work with the model.
inc_v1.op.run()
dec_v2.op.run()
# Save the variables to disk.
save_path = saver.save(sess, "/tmp/model.ckpt")
print("Model saved in file: %s" % save_path)
The other file that loads the previously saved model:
tf.reset_default_graph()
# Create some variables.
v1 = tf.get_variable("v1", shape=[3])
v2 = tf.get_variable("v2", shape=[5])
# Add ops to save and restore all the variables.
saver = tf.train.Saver()
# Later, launch the model, use the saver to restore variables from disk, and
# do some work with the model.
with tf.Session() as sess:
# Restore variables from disk.
saver.restore(sess, "/tmp/model.ckpt")
print("Model restored.")
# Check the values of the variables
print("v1 : %s" % v1.eval())
print("v2 : %s" % v2.eval())
output = sess.run(nn_output, feed_dict={ x: batch_x, keep_prob: 0.8 })
Where nn_output is the name is the output variable of the last layer of you network. You can save you variable using:
saver = tf.train.Saver([nn_output])
saver.save(sess, 'my_test_model',global_step=1000) # save every 1000 steps
and therefore in your code:
out_layer = tf.layers.dense(inputs=layer_1, units=6)
should be :
out_layer = tf.layers.dense(inputs=layer_1, units=6, name='nn_output')
To restore:
with tf.Session() as sess:
saver = tf.train.import_meta_graph('my_test_model')
saver.restore(sess,tf.train.latest_checkpoint('./'))
Now you should have access to that node of the graph. If the name is not specified, it is difficult to recover that particular layer.
You can know use tf.saved_model.builder.SavedModelBuilder function.
The main lines for the saving:
builder = tf.saved_model.builder.SavedModelBuilder(graph_location)
builder.add_meta_graph_and_variables(sess, ["cnn_mnist"])
builder.save()
A code to save the model :
...
def main(_):
# Import data
mnist = input_data.read_data_sets(FLAGS.data_dir)
# Create the model
x = tf.placeholder(tf.float32, [None, 784])
# Define loss and optimizer
y_ = tf.placeholder(tf.int64, [None])
# Build the graph for the deep net
y_conv, keep_prob = deepnn(x) # an unknow model model
with tf.name_scope('loss'):
cross_entropy = tf.losses.sparse_softmax_cross_entropy(
labels=y_, logits=y_conv)
cross_entropy = tf.reduce_mean(cross_entropy)
with tf.name_scope('adam_optimizer'):
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
with tf.name_scope('accuracy'):
correct_prediction = tf.equal(tf.argmax(y_conv, 1), y_)
correct_prediction = tf.cast(correct_prediction, tf.float32)
accuracy = tf.reduce_mean(correct_prediction)
graph_location ="tmp/"
print('Saving graph to: %s' % graph_location)
**builder = tf.saved_model.builder.SavedModelBuilder(graph_location)**
train_writer = tf.summary.FileWriter(graph_location)
train_writer.add_graph(tf.get_default_graph())
saver = tf.train.Saver(max_to_keep=1)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
**builder.add_meta_graph_and_variables(sess, ["cnn_mnist"])**
for i in range(20000):
batch = mnist.train.next_batch(50)
if i % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x: batch[0], y_: batch[1], keep_prob: 1.0})
print('step %d, training accuracy %g' % (i, train_accuracy))
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
print('test accuracy %g' % accuracy.eval(feed_dict={
x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
**builder.save()**
saver.save(sess, "tmp/my_checkpoint.ckpt", global_step =0)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', type=str,
default='/tmp/tensorflow/mnist/input_data',
help='Directory for storing input data')
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
`
A code to restore the model :
import tensorflow as tf
# récupération des poids
export_dir = "tmp"
sess = tf.Session()
tf.saved_model.loader.load(sess,["cnn_mnist"], export_dir)
#trainable_var = tf.trainable_variables()
trainable_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
for var in trainable_var:
print(var.name)`
This question is old. But if someone else is struggling with doing predictions with a trained model (with TF 1.x) this code might help.
Pay attention that
Your network construction/defining code must be executed before the Saver() instance is created. Otherwise you get the error: ValueError: No variables to save. In the code below the LeNet(x) method constructs the network for input placeholder x.
You should not initialize the variables in the session. Because obviously you are loading them from the saved model.
# all the network construction code
# (e.g. defining the variables and layers)
# must be exectured before the creation of
# the Saver() object. Otherwise you get the
# error: ValueError: No variables to save.
logits = LeNet(x)
saver = tf.train.Saver()
index = random.randint(0, len(X_train))
image = X_train[index].squeeze()
label = y_train[index]
print("Label: ", label)
plt.figure(figsize=(1,1))
plt.imshow(image, cmap="gray")
plt.show()
with tf.Session() as sess:
saver.restore(sess, tf.train.latest_checkpoint('./checkpoints/'))
logits_output = sess.run(logits, feed_dict={x: image.reshape((1, 32, 32, 1))})
logits_output = logits_output.squeeze()
pred_output = np.exp(logits_output)/sum(np.exp(logits_output)) #softmax
print("Logits: ", logits_output)
print("Prediction output:", pred_output)
print("Predicted Label: ", np.argmax(pred_output))

tensorflow calculations without retraining

I want to train a model once in tensorflow, and then want to use the trained model for predicting some functions. Before we get into specifics, lets define a couple of functions ...
def runTF(func, inpDict):
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
result = sess.run(func, feed_dict = inpDict)
sess.close()
return result
and
def optTF(opt, res, others, inpDict, nSteps, printSteps=50):
os, re = [], []
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
for i in range(nSteps):
# First run the optimizer ...
sess.run(opt, feed_dict = inpDict)
# Save all the data you want to save
temp = sess.run( [res] + others, feed_dict = inpDict)
re.append(temp[0])
os.append(temp[1:])
if (i%printSteps) == 0:
print('{:5d}'.format(i))
sess.close()
return re, os
Here are a couple of steps for what I am doing ...
A. Generating some data
N = 500
features = 2
nSteps = 1000
X = np.array([np.random.random(N), np.random.random(N)])
data = [X.T, X[0].reshape(-1, 1)]
B. create a simple linear model
d = tf.placeholder(shape = np.shape(data[0]), dtype = tf.float32, name='d') # input layer
dOut = tf.placeholder(shape = np.shape(data[1]), dtype = tf.float32, name='dOut') # output layer
W = tf.Variable( np.random.randn(features, 1), dtype = tf.float32, name='W')
b = tf.Variable( np.zeros((1,1)), dtype = tf.float32, name='b')
result = tf.matmul(d, W)+b
cost = tf.reduce_mean((dOut - result)**2)
optimizer = tf.train.AdamOptimizer(learning_rate = 0.1).minimize(cost)
C. Lets run the optimiser
inpDict = {d: data[0], dOut:data[1]}
ot = optTF(optimizer, result, [W, cost], inpDict, 200, 50)
Here, I have checked the result, and see that it is what I want. So the optimiser is working fine. The model has been optimised. Now, I want to make a prediction with some other data. So I do ...
r = runTF(result, inpDict)
This new result is not what I would expect from the same trained model.
Now, as long as I stay within the same tf.Session(), we are ok. However, I do want to be able to do predictions even when I an done with a session. So my question is, how do I use a model once we have trained it in one session in a different session?
Note, the entire thing is something that I did in a different session?
Edited:
I edited the two functions to incorporate the saving ...
def runTF(func, inpDict, modelFile=None):
if modelFile is not None:
saver = tf.train.Saver()
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
if modelFile is not None:
ckpt = tf.train.get_checkpoint_state(modelFile)
print(modelFile, ckpt)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
print('Session restored')
result = sess.run(func, feed_dict = inpDict)
sess.close()
return result
and
def optTF(opt, res, others, inpDict, nSteps, printSteps=50, modelFile='models/temp.ckpt'):
os, re = [], []
saver = tf.train.Saver()
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
for i in range(nSteps):
# First run the optimizer ...
sess.run(opt, feed_dict = inpDict)
# Save all the data you want to save
temp = sess.run( [res] + others, feed_dict = inpDict)
re.append(temp[0])
os.append(temp[1:])
if (i%printSteps) == 0:
print('{:5d}'.format(i))
path = saver.save(sess, modelFile)
print('Model saved in: {}'.format(path))
sess.close()
return re, os
And running the model as:
ot = optTF(optimizer, result, [cost]+weights+biases, inpDict, 200, 50)
r = runTF([result], inpDict, 'models/temp.ckpt')
Still nothing! I checked that:
The value of ckpt is None
The models folder has the following files:
checkpoint
temp.ckpt.index
temp.ckpt.data-00000-of-00001
temp.ckpt.meta
You need to save and restore the session you are creating and training. As in
init = tf.initialize_all_variables()
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(init)
if restore:
ckpt = tf.train.get_checkpoint_state(save_path)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
else:
# ... training code omitted ...
saver.save(sess, save_path)
checkout also https://www.tensorflow.org/programmers_guide/saved_model if you want to create a full model with estimator instead of just one session.

Why does this tensorflow code crash?

I've built a toy model for image classification. The program is loosely structured like the cifar10 tutorial. Training starts fine, but eventually the program crashes. I've finalized the graph just in case somewhere ops were being added to it, and in tensorboard it looks great, but without fail it eventually freezes and forces a hard restart (or long wait for an eventual reboot). The exit makes it seem like a GPU memory issue, but the model is small and should fit. If I allocate the full GPU memory (which gives another 4gb), it will still crash.
The data are 256x256x3 images and labels stored in a tfrecords file. The training function code looks like:
def train():
with tf.Graph().as_default():
global_step = tf.contrib.framework.get_or_create_global_step()
train_images_batch, train_labels_batch = distorted_inputs(batch_size=BATCH_SIZE)
train_logits = inference(train_images_batch)
train_batch_loss = loss(train_logits, train_labels_batch)
train_op = training(train_batch_loss, global_step, 0.1)
merged = tf.summary.merge_all()
saver = tf.train.Saver(tf.global_variables())
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.75)
sess_config=tf.ConfigProto(gpu_options=gpu_options)
sess = tf.Session(config=sess_config)
train_summary_writer = tf.summary.FileWriter(
os.path.join(ROOT, 'logs', 'train'), sess.graph)
init = tf.global_variables_initializer()
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
tf.Graph().finalize()
for i in range(5540):
start_time = time.time()
summary, _, batch_loss = sess.run([merged, train_op, train_batch_loss])
duration = time.time() - start_time
train_summary_writer.add_summary(summary, i)
if i % 10 == 0:
msg = 'batch: {} loss: {:.6f} time: {:.8} sec/batch'.format(
i, batch_loss, str(time.time() - start_time))
print(msg)
coord.request_stop()
coord.join(threads)
sess.close()
The loss and training op are cross_entropy and the adam optimizer respectively:
def loss(logits, labels):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits, name='cross_entropy_per_example')
xentropy_mean = tf.reduce_mean(xentropy, name='cross_entropy')
tf.add_to_collection('losses', xentropy_mean)
return xentropy_mean
def training(loss, global_step, learning_rate):
optimizer = tf.train.AdamOptimizer(learning_rate)
train_op = optimizer.minimize(loss, global_step=global_step)
return train_op
And the batches are generated with
def distorted_inputs(batch_size):
filename_queue = tf.train.string_input_producer(
['data/train.tfrecords'], num_epochs=None)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example,
features={'label': tf.FixedLenFeature([], tf.int64),
'image': tf.FixedLenFeature([], tf.string)})
label = features['label']
label = tf.cast(label, tf.int32)
image = tf.decode_raw(features['image'], tf.uint8)
image = (tf.cast(image, tf.float32) / 255) - 0.5
image = tf.reshape(image, shape=[256, 256, 3])
# data augmentation
image = tf.image.random_flip_up_down(image)
image = tf.image.random_flip_left_right(image)
print('filling the queue with {} images ' \
'before starting to train'.format(MIN_QUEUE_EXAMPLES))
return _generate_batch(image, label, MIN_QUEUE_EXAMPLES, BATCH_SIZE)
and
def _generate_batch(image, label,
min_queue_examples=MIN_QUEUE_EXAMPLES,
batch_size=BATCH_SIZE):
images_batch, labels_batch = tf.train.shuffle_batch(
[image, label], batch_size=batch_size,
num_threads=12, capacity=min_queue_examples + 3 * BATCH_SIZE,
min_after_dequeue=min_queue_examples)
tf.summary.image('images', images_batch)
return images_batch, labels_batch
What am I missing?
So I resolved this. Here's the solution in case it's useful to someone else. TL,DR: it's a hardware issue.
Specifically, it's a PCIe bus error, the same error as that with the most votes here. Possibly this is caused by message signalled interrupts being incompatible with the PLX switches, as suggested here. Also in that thread is what resolved the issue, setting kernel parameter pci=nommconf to disable the msi's.
Between Tensorflow, Torch, and Theano, tf is the only deep learning framework that triggers this issue. Why, I'm not sure.

Transform map to mapPartition using pyspark

I am trying to load a tensorflow model from disk and predicting the values.
Code
def get_value(row):
print("**********************************************")
graph = tf.Graph()
rowkey = row[0]
checkpoint_file = "/home/sahil/Desktop/Relation_Extraction/data/1485336002/checkpoints/model-300"
print("Loading model................................")
with graph.as_default():
session_conf = tf.ConfigProto(
allow_soft_placement=allow_soft_placement,
log_device_placement=log_device_placement)
sess = tf.Session(config=session_conf)
with sess.as_default():
# Load the saved meta graph and restore variables
saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
saver.restore(sess, checkpoint_file)
input_x = graph.get_operation_by_name("X_train").outputs[0]
dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]
predictions = graph.get_operation_by_name("output/predictions").outputs[0]
batch_predictions = sess.run(predictions, {input_x: [row[1]], dropout_keep_prob: 1.0})
print(batch_predictions)
return (rowkey, batch_predictions)
I have a RDD which consists of a tuple (rowkey, input_vector). I want to use the loaded model to predict the score/class of the input.
Code to call get_value()
result = data_rdd.map(lambda iter: get_value(iter))
result.foreach(print)
The problem is every time I call the map, the model is loaded everytime for each tuple and it takes a lot of time.
I am thinking of loading the model using mapPartitions and then use map to call get_value function.
I have no clue as how to convert the code to a mapPartition where I load the tensorflow model only once per parition and reduce the running time.
Thanks in advance.
I am not sure if I get your question correctly, but we can optimise your code a bit here.
graph = tf.Graph()
checkpoint_file = "/home/sahil/Desktop/Relation_Extraction/data/1485336002/checkpoints/model-300"
with graph.as_default():
session_conf = tf.ConfigProto(
allow_soft_placement=allow_soft_placement,
log_device_placement=log_device_placement)
sess = tf.Session(config=session_conf)
s = sess.as_default()
saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
saver.restore(sess, checkpoint_file)
input_x = graph.get_operation_by_name("X_train").outputs[0]
dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]
predictions = graph.get_operation_by_name("output/predictions").outputs[0]
session_pickle = cPickle.dumps(sess)
def get_value(key, vector, session_pickle):
sess = cPickle.loads(session_pickle)
rowkey = key
batch_predictions = sess.run(predictions, {input_x: [vector], dropout_keep_prob: 1.0})
print(batch_predictions)
return (rowkey, batch_predictions
result = data_rdd.map(lambda (key, row): get_value(key=key, vector = row , session_pickle = session_pickle))
result.foreach(print)
So you can serialize your tensorflow session. Though I haven't tested your code here. Run this and leave a comment.
I guess that the below code is a huge improvement as it uses mapPartitions.
Code
def predict(rows):
graph = tf.Graph()
checkpoint_file = "/home/sahil/Desktop/Relation_Extraction/data/1485336002/checkpoints/model-300"
print("Loading model................................")
with graph.as_default():
session_conf = tf.ConfigProto(
allow_soft_placement=allow_soft_placement,
log_device_placement=log_device_placement)
sess = tf.Session(config=session_conf)
with sess.as_default():
# Load the saved meta graph and restore variables
saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
saver.restore(sess, checkpoint_file)
print("**********************************************")
# Get the placeholders from the graph by name
input_x = graph.get_operation_by_name("X_train").outputs[0]
dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]
# Tensors we want to evaluate
predictions = graph.get_operation_by_name("output/predictions").outputs[0]
# Generate batches for one epoch
for row in rows:
X_test = [row[1]]
batch_predictions = sess.run(predictions, {input_x: X_test, dropout_keep_prob:
yield (row[0], batch_predictions)
result = data_rdd.mapPartitions(lambda iter: predict(iter))
result.foreach(print)

Categories

Resources