I am exporting a savedModel which takes a string placeholder as the input tensor. I injected a graph to preprocess this string tensor so that it can be passed into the model. However, I am using py_func to perform my python string operations on the tensor.
Here input_text is the input tensor in the savedModel signature. I created another placeholder with default input_ints which is initialized with result of performing py_func on input_text. I initially had input_text as an operation (input_ints =tf.py_func(preprocess, [input_text], tf.int64)) but then tf.nn.dynamic_rnn was not accepting a tensor with unspecified shape.
# Create the graph object
with tf.name_scope('inputs'):
input_text = tf.placeholder(tf.string, name="input_text")
input_ints = tf.placeholder_with_default(
tf.py_func(preprocess, [input_text], tf.int64), shape=[None, None])
def lstm_cell():
# Your basic LSTM cell
lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size, reuse=tf.get_variable_scope().reuse)
# Add dropout to the cell
return tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
# def create_rnn():
with tf.name_scope("Embeddings"):
embedding = tf.Variable(tf.random_uniform((vocab_size, embed_size), -1, 1))
embed = tf.nn.embedding_lookup(embedding, input_ints)
with tf.name_scope("RNN_layers"):
cell = tf.contrib.rnn.MultiRNNCell([lstm_cell() for _ in range(lstm_layers)])
initial_state = cell.zero_state(batch_size, tf.float32)
with tf.name_scope("RNN_forward"):
outputs, final_state = tf.nn.dynamic_rnn(cell, embed, initial_state=initial_state)
with tf.name_scope('predictions'):
predictions = tf.contrib.layers.fully_connected(outputs[:, -1], 1, activation_fn=tf.sigmoid)
Now using the above implementation, I can export the model properly but when restoring the model, I get the following error:
2017-11-23 17:29:14.600184: W tensorflow/core/framework/op_kernel.cc:1192] Unknown: KeyError: 'pyfunc_0'
Traceback (most recent call last):
File "/Users/sakibarrahman/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1327, in _do_call
return fn(*args)
File "/Users/sakibarrahman/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1306, in _run_fn
status, run_metadata)
File "/Users/sakibarrahman/anaconda/lib/python3.6/contextlib.py", line 89, in __exit__
next(self.gen)
File "/Users/sakibarrahman/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.UnknownError: KeyError: 'pyfunc_0'
[[Node: inputs/PyFunc = PyFunc[Tin=[DT_STRING], Tout=[DT_INT64], token="pyfunc_0", _device="/job:localhost/replica:0/task:0/cpu:0"](_arg_inputs/input_text_0_0)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "neural_load_model.py", line 85, in <module>
result = sess.run(output_tensor, {input_tensor: "Charter Communications, Inc. (CHTR) Stock Rating Reaffirmed by Goldman Sachs Group, Inc. (The)"})
File "/Users/sakibarrahman/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 895, in run
run_metadata_ptr)
File "/Users/sakibarrahman/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1124, in _run
feed_dict_tensor, options, run_metadata)
File "/Users/sakibarrahman/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1321, in _do_run
options, run_metadata)
File "/Users/sakibarrahman/anaconda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1340, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.UnknownError: KeyError: 'pyfunc_0'
[[Node: inputs/PyFunc = PyFunc[Tin=[DT_STRING], Tout=[DT_INT64], token="pyfunc_0", _device="/job:localhost/replica:0/task:0/cpu:0"](_arg_inputs/input_text_0_0)]]
Caused by op 'inputs/PyFunc', defined at:
File "neural_load_model.py", line 74, in <module>
model = tf.saved_model.loader.load(sess, [tf.saved_model.tag_constants.SERVING], import_path)
File "/Users/sakibarrahman/anaconda/lib/python3.6/site-packages/tensorflow/python/saved_model/loader_impl.py", line 216, in load
saver = tf_saver.import_meta_graph(meta_graph_def_to_load, **saver_kwargs)
File "/Users/sakibarrahman/anaconda/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1698, in import_meta_graph
**kwargs)
File "/Users/sakibarrahman/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/meta_graph.py", line 656, in import_scoped_meta_graph
producer_op_list=producer_op_list)
File "/Users/sakibarrahman/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/importer.py", line 313, in import_graph_def
op_def=op_def)
File "/Users/sakibarrahman/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2630, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/Users/sakibarrahman/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1204, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
UnknownError (see above for traceback): KeyError: 'pyfunc_0'
[[Node: inputs/PyFunc = PyFunc[Tin=[DT_STRING], Tout=[DT_INT64], token="pyfunc_0", _device="/job:localhost/replica:0/task:0/cpu:0"](_arg_inputs/input_text_0_0)]]
I have looked at this issue posted on Github but I am not sure as to how to implement this. Also, I am just loading the model and passing in a string for input and not using 'freeze_graph'.
My code for saving the model:
saver = tf.train.Saver()
#Define new functions
def preprocess(text):
.
.
.
tf.reset_default_graph()
.
.
.
#Define new placeholder that was not in the original model graph
#Define new placeholder with default value initialized with py_func that was not in the original model graph
with tf.name_scope('inputs'):
input_text = tf.placeholder(tf.string, name="input_text")
input_ints = tf.placeholder_with_default(
tf.py_func(preprocess, [input_text], tf.int64), shape=[None, None])
.
.
.
#Define placeholders and ops that I need and were in the original graph
saver = tf.train.Saver()
#Serving the model
with tf.Session() as sess:
#Restore from old checkpoint
saver.restore(sess, import_path)
print ('Exporting trained model to %s'%(export_path))
builder = saved_model_builder.SavedModelBuilder(export_path)
original_assets_directory = export_path + '/assets'
original_assets_filename = "vocabulary.pickle"
original_assets_filepath = write_vocab(original_assets_directory,
original_assets_filename)
# Set up the assets collection.
assets_filepath = tf.constant(original_assets_filepath)
tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, assets_filepath)
filename_tensor = tf.Variable(
original_assets_filename,
name="vocab_tensor",
trainable=False,
collections=[])
assign_filename_op = filename_tensor.assign(original_assets_filename)
# Build the signature_def_map.
classification_inputs = utils.build_tensor_info(input_text)
classification_outputs_classes = utils.build_tensor_info(predictions)
classification_signature = signature_def_utils.build_signature_def(
inputs={signature_constants.CLASSIFY_INPUTS: classification_inputs},
outputs={
signature_constants.CLASSIFY_OUTPUT_CLASSES:
classification_outputs_classes,
},
method_name=signature_constants.CLASSIFY_METHOD_NAME)
legacy_init_op = tf.group(
tf.tables_initializer(), name='legacy_init_op')
#add the sigs to the servable
builder.add_meta_graph_and_variables(
sess, [tag_constants.SERVING],
signature_def_map={
signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
classification_signature
},
assets_collection=tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS),
legacy_init_op=tf.group(assign_filename_op))
print ("added meta graph and variables")
builder.save()
print("model saved")
My code for loading the model. Not defining the function or the placeholders leads to the 'pyfunc_0' error:
#Define preprocess function
def preprocess(text_bin):
#Define new placeholders
with tf.name_scope('inputs'):
input_text = tf.placeholder(tf.string, name="input_text")
input_ints = tf.placeholder_with_default(
tf.py_func(preprocess, [input_text], tf.int64), shape=[None, None])
with tf.Session(graph=tf.Graph()) as sess:
# restore save model
model = tf.saved_model.loader.load(sess, [tf.saved_model.tag_constants.SERVING], import_path)
print("model restored")
loaded_graph = tf.get_default_graph()
# get necessary tensors by name
input_tensor_name = model.signature_def[signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY].inputs[signature_constants.CLASSIFY_INPUTS].name
input_tensor = loaded_graph.get_tensor_by_name(input_tensor_name)
output_tensor_name = model.signature_def[signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY].outputs[signature_constants.CLASSIFY_OUTPUT_CLASSES].name
output_tensor = loaded_graph.get_tensor_by_name(output_tensor_name)
result = sess.run(output_tensor, {input_tensor: "Some String"})
print (result)
Update:
Defining the functions and placeholders when loading the savedModel seems to work. However, I don't know why they are not being added to the graph prior to using the builder to save the model
It looks like your model has a custom layer. You can follow the model code and find that. So, you can define that function before graph loading. Also, the function definition order is important.
The preprocess function that was being used was not really part of the graph, so py_func() wouldn't know which function to use when loading the savedModel. There is currently no easy way to do preprocessing within Tensorflow Serve flow. It has to be done on the client side before using the model, or a custom op may have to be created so that it can be a part of the model. The other alternative may be to create a custom servable.
Related
I Am trying to implement ELMO embeddings via tensorflow in a neural network. Here is a code snippet of my network :
def get_elmo_embeds_model():
input_text = tf.keras.layers.Input(shape=(1,), dtype=tf.string)
embedding = tf.keras.layers.Lambda(ELMoEmbedding, output_shape=(1024, ))(input_text)
print(embedding.shape)
conv_1d_layer = tf.keras.layers.Conv1D(256,5,activation='relu')(embedding)
max_pool_1 = tf.keras.layers.MaxPooling1D(5)(conv_1d_layer)
x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(256,name="BiLSTM")) (max_pool_1)
dropout_2 = tf.keras.layers.Dropout(0.2)(x)
flatten_1 = tf.keras.layers.Flatten()(dropout_2)
pred = tf.keras.layers.Dense(1, activation='sigmoid')(flatten_1)
model = tf.keras.models.Model(inputs=[input_text], outputs=pred)
return model
text_only_model = get_elmo_embeds_model()
text_only_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy',precision_mat,recall_mat,f1_mat])
text_only_model.summary()
with tf.Session() as session:
K.set_session(session)
session.run(tf.global_variables_initializer())
session.run(tf.tables_initializer())
history = text_only_model.fit_generator(text_elmo_train,epochs=EPOCHS, validation_steps=VALIDATION_STEPS,
steps_per_epoch=STEPS_PER_EPOCH,validation_data = text_elmo_valid)
When running this model, I Am getting the following Error :
steps_per_epoch=STEPS_PER_EPOCH,validation_data = text_elmo_valid)
File "/home/.local/lib/python3.7/site-packages/keras/engine/training_v1.py", line 796, in fit
use_multiprocessing=use_multiprocessing)
File "/home/.local/lib/python3.7/site-packages/keras/engine/training_generator_v1.py", line 586, in fit
steps_name='steps_per_epoch')
File "/home/.local/lib/python3.7/site-packages/keras/engine/training_generator_v1.py", line 306, in model_iteration
steps_name='validation_steps')
File "/home/.local/lib/python3.7/site-packages/keras/engine/training_generator_v1.py", line 252, in model_iteration
batch_outs = batch_function(*batch_data)
File "/home/.local/lib/python3.7/site-packages/keras/engine/training_v1.py", line 1152, in test_on_batch
outputs = self.test_function(inputs) # pylint: disable=not-callable
File "/home/.local/lib/python3.7/site-packages/keras/backend.py", line 4187, in __call__
run_metadata=self.run_metadata)
File "/home/.conda/envs/test_multimod/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 1485, in __call__
run_metadata_ptr)
tensorflow.python.framework.errors_impl.UnimplementedError: TensorArray has size zero, but element shape [?,256] is not fully defined. Currently only static shapes are supported when packing zero-size TensorArrays.
I have checked the data which is being passed out and it has no null values in it, but still this error exists while running this function.
def create_dataset(csv_path, vocab):
dataset = tf.data.TextLineDataset(csv_path).skip(1)
dataset = dataset.map(lambda sentence : tf.string_split([sentence]).values)
dataset = dataset.map(lambda tokens : (vocab.lookup(tokens), tf.size(tokens)))
return dataset
Above is the function I am using to lookup into table
Below I am trying to pad the sentence with from lookup table
def input_fn( sentence, labels, id_pad):
dataset = tf.data.Dataset.zip((sentence, labels))
padded_shapes = ((tf.TensorShape([None]),
tf.TensorShape([])),
tf.TensorShape([]))
padded_value = ((id_pad,0),
"")
dataset = (dataset
.padded_batch(128,padded_shapes=padded_shapes,padding_values=padded_value)
.prefetch(1))
iterator = dataset.make_initializable_iterator()
((sentence, sentence_length),(label)) = iterator.get_next()
init_op = iterator.initializer
inputs = {
'sentence':sentence,
'sentence_length': sentence_length,
'label': label,
'init_op':init_op
}
return inputs
Below I am creating and running the session:
vocab = tf.contrib.lookup.index_table_from_file( 'data\\vocab.txt', num_oov_buckets=1)
sentence_data = create_dataset('data\\csv\\amazon_feature.csv',vocab)
label_data = tf.data.TextLineDataset('data\\csv\\amazon_label.csv').skip(1)
id_pad = vocab.lookup(tf.constant('<PAD>'))
input = input_fn(sentence_data,label_data,id_pad)
with tf.Session() as sess:
sess.run([input['init_op'], tf.tables_initializer(), tf.global_variables_initializer()])
Error stacktrace is this.
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\client\session.py", line 1322, in _do_call
return fn(*args)
File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\client\session.py", line 1307, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\client\session.py", line 1409, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.FailedPreconditionError: Table not initialized.
[[Node: string_to_index_Lookup/hash_table_Lookup = LookupTableFindV2[Tin=DT_STRING, Tout=DT_INT64, _device="/job:localhost/replica:0/task:0/device:CPU:0"](string_to_index/hash_table, Const, string_to_index/hash_table/Const)]]
During handling of the above exception, another exception occurred:
u should run tables_initializer firstly,then run others.like below:
with tf.Session as sess:
sess.run(tf.tables_initializer())
...
I'm trying to use Tensorflow to do some classification with the tf.contrib.layers package, and I've run into a problem I can't quite figure out. As far as I can tell from examples (e.g. this and it's tutorial), everything with the graph is handled by the API. I can download and run the same code in my environment perfectly well.
However, when I run my code, I get the an error that my global step is not from the same graph as my loss, which seems bizarre: ValueError: Tensor("global_step:0", shape=(), dtype=int64_ref) must be from the same graph as Tensor("softmax_cross_entropy_loss/value:0", shape=(), dtype=float32). The error occurs during the construction of the train_op
Here's my tensorflow code (I do have some other code for handling the loading of the data, but it doesn't use anything from tensorflow). Sorry that the code is sort of messy right now: I've been tearing it apart trying to figure this error out.
import numpy as np
import tensorflow as tf
from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib
import data # my data loading module
def train(training_file, vocab_path, hidden_units=[10, 20, 10], estimator=tf.contrib.learn.DNNClassifier):
"""
Given a training CSV file, train a Tensorflow neural network
"""
training_set = data.load(training_file)
vocab = tf.contrib.learn.preprocessing.VocabularyProcessor(data.DOC_LENGTH)
vocab = vocab.restore(vocab_path)
training_data = tf.one_hot(training_set.data, len(vocab.vocabulary_._mapping), dtype=tf.float32)
training_targets = tf.constant(np.array(training_set.targets, dtype=np.int32))
classifier = tf.contrib.learn.Estimator(model_fn=lambda features, targets, mode, params: model_fn(features, targets, mode, params, hidden_units))
classifier.fit(input_fn=lambda: (training_data, training_targets), steps=2000)
return classifier
def model_fn(features, targets, mode, params, hidden_units):
if len(hidden_units) <= 0:
raise ValueError("Hidden units must be a iterable of ints of length >= 1")
# Define the network
network = tf.contrib.layers.relu(features, hidden_units[0])
for i in range(1, len(hidden_units)):
network = tf.contrib.layers.relu(network, hidden_units[i])
# Flatten the network
network = tf.reshape(network, [-1, hidden_units[-1] * data.DOC_LENGTH])
# Add dropout to enhance feature use
network = tf.layers.dropout(inputs=network, rate=0.5, training=mode == tf.contrib.learn.ModeKeys.TRAIN)
# Calculate the logits
logits = tf.contrib.layers.fully_connected(network, 15)
loss = None
train_op = None
if mode != tf.contrib.learn.ModeKeys.INFER:
targets = tf.cast(tf.one_hot(targets, 15, 1, 0), dtype=tf.float32)
loss = tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=targets)
if mode == tf.contrib.learn.ModeKeys.TRAIN:
# This train_op causes the error
train_op = tf.contrib.layers.optimize_loss(
loss=loss,
global_step=tf.train.get_global_step(),
optimizer='Adam',
learning_rate=0.01)
predictions = {
"classes": tf.argmax(input=logits, axis=1),
"probabilities": tf.nn.softmax(logits, name="softmax_tensor")
}
return model_fn_lib.ModelFnOps(mode=mode, predictions=predictions, loss=loss, train_op=train_op)
def main(unusedargv):
# ... parses arguments
classifier = train(args.train_data, args.vocab)
print(evaluate(classifier, args.train_data))
print(evaluate(classifier, args.test_data))
if __name__ == "__main__":
tf.app.run()
Here's the full stack trace:
File "categorize.py", line 126, in main
classifier = train(args.train_data, args.vocab)
File "categorize.py", line 39, in train
classifier.fit(input_fn=lambda: (training_data, training_targets), steps=2000)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 280, in new_func
return func(*args, **kwargs)
File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 426, in fit
loss = self._train_model(input_fn=input_fn, hooks=hooks)
File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 934, in _train_model
model_fn_ops = self._call_legacy_get_train_ops(features, labels)
File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 1003, in _call_legacy_get_train_ops
train_ops = self._get_train_ops(features, labels)
File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 1162, in _get_train_ops
return self._call_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN)
File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 1133, in _call_model_fn
model_fn_results = self._model_fn(features, labels, **kwargs)
File "categorize.py", line 37, in <lambda>
classifier = tf.contrib.learn.Estimator(model_fn=lambda features, targets, mode, params: model_fn(features, targets, mode, params, hidden_units))
File "categorize.py", line 73, in model_fn
learning_rate=0.01)
File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/layers/python/layers/optimizers.py", line 152, in optimize_loss
with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]):
File "/usr/local/Cellar/python3/3.6.0_1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/contextlib.py", line 82, in __enter__
return next(self.gen)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 1410, in variable_scope
g = ops._get_graph_from_inputs(values) # pylint: disable=protected-access
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3968, in _get_graph_from_inputs
_assert_same_graph(original_graph_element, graph_element)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3907, in _assert_same_graph
"%s must be from the same graph as %s." % (item, original_item))
ValueError: Tensor("global_step:0", shape=(), dtype=int64_ref) must be from the same graph as Tensor("softmax_cross_entropy_loss/value:0", shape=(), dtype=float32).
Here's my code:
The context of two functions are different, so, you need to use the tf.Graph() in the calling function to set the default graph as follows.
def train(...):
with tf.Graph().as_default():
...
...
training_data = tf.one_hot(training_set.data, len(vocab.vocabulary_._mapping), dtype=tf.float32)
training_targets = tf.constant(np.array(training_set.targets, dtype=np.int32))
classifier = tf.contrib.learn.Estimator(model_fn=lambda features, targets, mode, params: model_fn(features, targets, mode, params, hidden_units))
classifier.fit(input_fn=lambda: (training_data, training_targets), steps=2000)
return classifier
I figured out the problem! This may specified have to do with the Estimator interface, but basically I needed to move my tensorflow variable definition into the Estimator. I ended up making a method to do this, but it also worked when I defined the variables in the lambda:
def train(training_file, vocab_path, hidden_units=[10, 20, 10]):
"""
Given a training CSV file, train a Tensorflow neural network
"""
training_set = data.load(training_file)
vocab = tf.contrib.learn.preprocessing.VocabularyProcessor(data.DOC_LENGTH)
vocab = vocab.restore(vocab_path)
# Note not defining the variables here
training_data = training_set.data
training_targets = np.array(training_set.targets, dtype=np.int32)
classifier = tf.contrib.learn.Estimator(model_fn=lambda features, targets, mode, params: model_fn(features, targets, mode, params, hidden_units))
# Note the variable definition here
classifier.fit(
input_fn=lambda:
(tf.one_hot(training_data, len(vocab.vocabulary_._mapping), dtype=tf.float32)
tf.constant(training_targets)),
steps=2000))
return classifier
I working on Ubuntu 14.04 ,i wrote a code for Recognition of letters whith Tensorflow V 0.11 ,
i'm creat a code source for uses the model LeNet5
my code source :
`
import PIL
import numpy
import tensorflow as tf
# from tensorflow.examples.tutorials.mnist import input_data
import Input as input_data
from tensorflow.python.framework.importer import import_graph_def
from Resize import Resize_img
# these functions to optimize the accurancy of the mnist training
#from imp_image import imp_img
import scipy.misc
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
# ============================================================ End Functions part
# mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
class MNIST:
def __init__(self):
# Open the compuation session
self.sess = tf.InteractiveSession()
# Load the network
self.Deep_Network()
def Deep_Network(self):
# nodes for the input images and target output classes.
# supervised classifier
self.x = tf.placeholder(tf.float32, shape=[None, 784])
self.y_ = tf.placeholder(tf.float32, shape=[None, 10])
# First convolutionanal Layer =====================================
# It will consist of convolution, followed by max pooling
# The convolutional will compute 32 features for each 5x5 patch.
self.W_conv1 = weight_variable([5, 5, 1, 32])
self.b_conv1 = bias_variable([32])
# To apply the layer, we first reshape x to a 4d tensor,
# with the second and third dimensions corresponding to image width and height,
# and the final dimension corresponding to the number of color channels.
self.x_image = tf.reshape(self.x, [-1, 28, 28, 1])
# We then convolve x_image with the weight tensor, add the bias, apply the ReLU function, and finally max pool.
self.h_conv1 = tf.nn.relu(conv2d(self.x_image, self.W_conv1) + self.b_conv1)
self.h_pool1 = max_pool_2x2(self.h_conv1)
# Second Convolutional Layer =====================================
# In order to build a deep network, we stack several layers of this type.
# The second layer will have 64 features for each 5x5 patch.
self.W_conv2 = weight_variable([5, 5, 32, 64])
self.b_conv2 = bias_variable([64])
self.h_conv2 = tf.nn.relu(conv2d(self.h_pool1, self.W_conv2) + self.b_conv2)
self.h_pool2 = max_pool_2x2(self.h_conv2)
# Densely Connected Layer
# Now that the image size has been reduced to 7x7, we add a fully-connected layer with 1024 neurons
# to allow processing on the entire image. We reshape the tensor from the pooling layer into
# a batch of vectors, multiply by a weight matrix, add a bias, and apply a ReLU.
self.W_fc1 = weight_variable([7 * 7 * 64, 1024])
self.b_fc1 = bias_variable([1024])
self.h_pool2_flat = tf.reshape(self.h_pool2, [-1, 7 * 7 * 64])
self.h_fc1 = tf.nn.relu(
tf.matmul(self.h_pool2_flat, self.W_fc1) + self.b_fc1) # ReLu Computes rectified linear: max(features, 0).
# Dropout
self.keep_prob = tf.placeholder(tf.float32)
self.h_fc1_drop = tf.nn.dropout(self.h_fc1, self.keep_prob)
# Readout Layer ========================================
# Finally, we add a softmax layer, just like for the one layer softmax regression above.
self.W_fc2 = weight_variable([1024, 10])
self.b_fc2 = bias_variable([10])
self.y_conv = tf.nn.softmax(tf.matmul(self.h_fc1_drop, self.W_fc2) + self.b_fc2)
self.cross_entropy = -tf.reduce_sum(self.y_ * tf.log(self.y_conv))
self.correct_prediction = tf.equal(tf.argmax(self.y_conv, 1), tf.argmax(self.y_, 1))
self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))
def Prediction(self, imageName):
# Load the trained model
' Restore the model '
'here i should create the model saver'
Saved_model_dir = '/home/brm17/Desktop/PFE/'
saver = tf.train.Saver()
ckpt = tf.train.get_checkpoint_state(Saved_model_dir)
'verifie if the saved model exists or not!'
if ckpt and ckpt.model_checkpoint_path:
saver.restore(self.sess, ckpt.model_checkpoint_path)
else:
print '# No saved model found!'
exit() # exit the prgm
# image_test = 'number-3.jpg'
ResizedImage = Resize_img(imageName)
ImageInput = ResizedImage.mnist_image_input.reshape(1, -1)
print 'Predection > ', tf.argmax(self.y_conv, 1).eval(feed_dict={self.x: ImageInput, self.keep_prob: 1.0})
# print("test accuracy %g"%accuracy.eval(feed_dict={x: myTestImg, y_: myLabel, keep_prob: 1.0}))
def main():
image = '/home/brm17/Desktop/PFE/n2.jpeg'
model = MNIST()
model.Prediction(image)
if __name__ == "__main__":
main()
`
if i run this code , he print the error :
brm17#Brahim:~/Desktop/PFE$ python LeNet5.py
Traceback (most recent call last):
File "LeNet5.py", line 137, in <module>
model.Prediction(image)
File "LeNet5.py", line 120, in Prediction
saver.restore(self.sess, ckpt.model_checkpoint_path)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 1129, in restore
{self.saver_def.filename_tensor_name: save_path})
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 710, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 908, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 958, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 978, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors.NotFoundError: Tensor name "Variable_1" not found in checkpoint files /home/brm17/Desktop/PFE/MNISTmodel-20000
[[Node: save/restore_slice_1 = RestoreSlice[dt=DT_FLOAT, preferred_shard=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_save/Const_0, save/restore_slice_1/tensor_name, save/restore_slice_1/shape_and_slice)]]
Caused by op u'save/restore_slice_1', defined at:
File "LeNet5.py", line 137, in <module>
model.Prediction(image)
File "LeNet5.py", line 115, in Prediction
saver = tf.train.Saver()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 861, in __init__
restore_sequentially=restore_sequentially)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 519, in build
filename_tensor, vars_to_save, restore_sequentially, reshape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 272, in _AddRestoreOps
values = self.restore_op(filename_tensor, vs, preferred_shard)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 187, in restore_op
preferred_shard=preferred_shard)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/io_ops.py", line 203, in _restore_slice
preferred_shard, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_io_ops.py", line 359, in _restore_slice
preferred_shard=preferred_shard, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 703, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2317, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1239, in __init__
self._traceback = _extract_stack()
what is the problem and how resolved this ?
Covißio,
I think the problem is as follows:
You created a network, and saved this network...
You changed the network, and did not remove the saved network
Now you try to reload your network from an old version, but a new variable you created does not exist.
Can you try to either:
Remove the saved state of your network and retrain it
Remove the saving and loading of your network and see if this works?
You can remove the state of your network by removing the checkpoint file in your folder /home/brm17/Desktop/PFE/
Edit: read your code thoroughly and the problem is that if there is no checkpoint you dont start retraining your network... Maybe you start by writing this before saving, loading, and changing your network.
Good luck and let me know if this works!
I have one bug I cannot find out the reason. Here is the code:
with tf.Graph().as_default():
global_step = tf.Variable(0, trainable=False)
images = tf.placeholder(tf.float32, shape = [FLAGS.batch_size,33,33,1])
labels = tf.placeholder(tf.float32, shape = [FLAGS.batch_size,21,21,1])
logits = inference(images)
losses = loss(logits, labels)
train_op = train(losses, global_step)
saver = tf.train.Saver(tf.all_variables())
summary_op = tf.merge_all_summaries()
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)
for step in xrange(FLAGS.max_steps):
start_time = time.time()
data_batch, label_batch = SRCNN_inputs.next_batch(np_data, np_label,
FLAGS.batch_size)
_, loss_value = sess.run([train_op, losses], feed_dict={images: data_batch, labels: label_batch})
duration = time.time() - start_time
def next_batch(np_data, np_label, batchsize,
training_number = NUM_EXAMPLES_PER_EPOCH_TRAIN):
perm = np.arange(training_number)
np.random.shuffle(perm)
data = np_data[perm]
label = np_label[perm]
data_batch = data[0:batchsize,:]
label_batch = label[0:batchsize,:]
return data_batch, label_batch
where np_data is the whole training samples read from hdf5 file, and the same to np_label.
After I run the code, I got the error like this :
2016-07-07 11:16:36.900831: step 0, loss = 55.22 (218.9 examples/sec; 0.585 sec/batch)
Traceback (most recent call last):
File "<ipython-input-1-19672e1f8f12>", line 1, in <module>
runfile('/home/kang/Documents/work_code_PC1/tf_SRCNN/SRCNN_train.py', wdir='/home/kang/Documents/work_code_PC1/tf_SRCNN')
File "/usr/lib/python3/dist-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 685, in runfile
execfile(filename, namespace)
File "/usr/lib/python3/dist-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 85, in execfile
exec(compile(open(filename, 'rb').read(), filename, 'exec'), namespace)
File "/home/kang/Documents/work_code_PC1/tf_SRCNN/SRCNN_train.py", line 155, in <module>
train_test()
File "/home/kang/Documents/work_code_PC1/tf_SRCNN/SRCNN_train.py", line 146, in train_test
summary_str = sess.run(summary_op)
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py", line 372, in run
run_metadata_ptr)
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py", line 636, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py", line 708, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py", line 728, in _do_call
raise type(e)(node_def, op, message)
InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder' with dtype float and shape [128,33,33,1]
[[Node: Placeholder = Placeholder[dtype=DT_FLOAT, shape=[128,33,33,1], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
[[Node: truediv/_74 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_56_truediv", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op 'Placeholder', defined at:
So, It shows that for the step 0 it has the result, which means that the data has been fed into the Placeholders.
But why does it come the error of feeding data into Placeholder in the next time?
When I try to comment the code summary_op = tf.merge_all_summaries() and the code works fine. why is it the case?
When I try to comment the code summary_op = tf.merge_all_summaries() and the code works fine. why is it the case?
summary_op is an operation. If there exists (and this is true in your case) a summary operation related to the result of another operation that depends upon the values of the placeholders, you have to feed the graph the required values.
So, your line summary_str = sess.run(summary_op) needs the dictionary of the values to store.
Usually, instead of re-executing the operations to log the values, you run the operations and the summary_op once.
Do something like
if step % LOGGING_TIME_STEP == 0:
_, loss_value, summary_str = sess.run([train_op, losses, summary_op], feed_dict={images: data_batch, labels: label_batch})
else:
_, loss_value = sess.run([train_op, losses], feed_dict={images: data_batch, labels: label_batch})