Running different models in one script in Tensorflow 1.9

Running different models in one script in Tensorflow 1.9 - python

I have very simple model which consists of one tf.Variable() and here is who code:
import tensorflow as tf
save_path="model1/model1.ckpt"
num_input = 2
n_nodes_hl1 = 2
with tf.variable_scope("model1"):
hidden_1_layer = {
'weights' : tf.Variable(tf.random_normal([num_input, n_nodes_hl1]), name='Weight1')
}
def train_model():
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
save_model(sess)
def save_model(sess):
saver = tf.train.Saver(tf.global_variables(), save_path)
saver.save(sess, save_path)
def load_model(sess):
saver = tf.train.Saver(tf.global_variables(), save_path)
saver.restore(sess, save_path)
def run_model():
print("model1 running...")
with tf.Session() as sess:
load_model(sess)
x = sess.run(hidden_1_layer)
print(x)
#train_model()
The second model is completely the same, but with changed names "model1" to "model2". Both models are trained, saved and work perfect separately. So now I want to test them using following script:
import model1 as m1
import model2 as m2
m1.run_model()
m2.run_model()
And here I got an error message:
NotFoundError (see above for traceback): Key model2/Weight2 not found in checkpoint
So it looks like running imports causes adding all variables to common graph (even though they are in separate variable scopes) and then it cannot find variable from model2 saved in checkpoint in model1.
Can anyone solve my problem?
Is it possible in Tensorflow to run a few different models in one script?
EDIT - PROBLEM SOLVED
The solution is very easy. What you have to do is to create separate graphs for each model like. It means that all tensors you declare or calculate must be within that graph. You also must put it as an argument in Session, like: tf.Session(graph=self.graph)
Whole example below:
import tensorflow as tf
save_path="model1/model1.ckpt"
class model1:
num_input = 2
n_nodes_hl1 = 2
def init(self):
self.graph = tf.Graph()
with self.graph.as_default():
with tf.variable_scope("model1"):
self.hidden_1_layer = {
'weights' : tf.Variable(tf.random_normal([self.num_input, self.n_nodes_hl1]), name='Weight1')
}
def train_model(self):
init = tf.global_variables_initializer()
with tf.Session(graph = self.graph) as sess:
sess.run(init)
self.save_model(sess)
def save_model(self, sess):
saver = tf.train.Saver(tf.global_variables(), save_path)
saver.save(sess, save_path)
def load_model(self, sess):
saver = tf.train.Saver(tf.global_variables(), save_path)
saver.restore(sess, save_path)
def run_model(self):
print("model1 running...")
with tf.Session(graph = self.graph) as sess:
self.load_model(sess)
x = sess.run(self.hidden_1_layer)
print(x)

Oh! the common "I want to use several models" question! just make sure that you reset the graph after each model:
tf.reset_default_graph()
Your code would look like:
import tensorflow as tf
import model1 as m1
m1.run_model()
tf.reset_default_graph()
import model2 as m2
m2.run_model()
Why? The moment you create a variable in tensorflow using tf.Variable, that variable is added to the default graph. If you import both models one after the other, you just created all the variables in the default graph! This is by far the easiest solution. Consider the default graph as a blackboard: you can draw your fancy ML model, but you need to wipe it clean before reuse!
NOTE: If you are wondering, the alternative is to create separate graphs for each of the models, but it is much more worrysome and I only recommend it for times when you must have both models at the same time.
EXTRA: Encapsulating your model in a Tensorflow class
A fancier way to do it while avoiding several graphs (seriously, it is horrible!) is to encapsulate the whole model in a class. Thus, your code would look like this:
import tensorflow as tf
class model():
self.num_input = 2
self.n_nodes_hl1 = 2
def init(self, new_save_path)
self.save_path=new_save_path
tf.reset_default_graph()
with tf.variable_scope("model1"):
self.hidden_1_layer = {
'weights' : tf.Variable(tf.random_normal([self.num_input,
self.n_nodes_hl1]), name='Weight1')
}
self.saver = tf.train.Saver(tf.global_variables(), self.save_path)
self.sess = tf.Session()
self.sess.run(tf.global_variables_initializer())
def save_model(self):
self.saver.save(self.sess, self.save_path)
def load_model(self):
self.saver.restore(self.sess, self.save_path)
def run_model(self):
print("model1 running...")
load_model()
x = sess.run(self.hidden_1_layer)
print(x)
#train_model(self)
This way you could simply do:
import model
m1 = model('model1/model1.ckpt') # These two lines could be put into one
m1.run_model() # m1 = model('model1/model1.ckpt').run_model()
m2 = model('model2/model2.ckpt')
m2.run_model()
You still want it in a for loop?
import model
model_file_list = ['model1/model1.ckpt', 'model2/model2.ckpt']
for model_file in model_list:
m = model(model_file ).run_model()
# Run tests, print stuff, save stuff here!

Related

how to export tf model for serving directly from session (no creating of tf checkpoint) to minimize export time

I wanted to share my findings on how to export a tf model for serving directly from session without creating model checkpoint. my use case requires minimum time to create a pb file, therefore I wanted to get a model.pb file directly from session without creating model checkpoint.
most examples online (and documentation refers to the common case of creating a model checkpoint and loading it in order to create a tf-serving (pb) file. of course this use case is good in case export performance time is not an issue.

import tensorflow as tf
from tensorflow.python.framework import importer
output_path = '/export_directory' # be sure to create it before export
input_ops = ['name/s_of_model_input/s']
output_ops = ['name/s_of_model_output/s']
session = tf.compat.v1.Session()
def get_ops_dict(ops, graph, name='op_'):
out_dict = dict()
for i, op in enumerate(ops):
out_dict[name + str(i)] = tf.compat.v1.saved_model.build_tensor_info(graph.get_tensor_by_name(op + ':0'))
return out_dict
def add_meta_graph(pbtxt_tmp_path, graph_def):
with tf.Graph().as_default() as graph:
importer.import_graph_def(graph_def, name="")
os.unlink(pbtxt_tmp_path)
# used to rename model input/outputs
inputs_dict = get_ops_dict(input_ops, graph, name='input_')
outputs_dict = get_ops_dict(output_ops, graph, name='output_')
prediction_signature = (
tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
inputs=inputs_dict,
outputs=outputs_dict,
method_name=tf.saved_model.PREDICT_METHOD_NAME))
legacy_init_op = tf.group(tf.compat.v1.tables_initializer(), name='legacy_init_op')
builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(output_path+'/export')
builder.add_meta_graph_and_variables(
session,
tags=[tf.saved_model.SERVING],
signature_def_map={
tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY: prediction_signature},
legacy_init_op=legacy_init_op)
builder.save()
return prediction_signature
def export_model(session, output_path, output_ops):
graph_def = session.graph_def
tf.io.write_graph(graph_or_graph_def=graph_def, logdir=output_path,
name='model.pbtxt', as_text=False)
frozen_graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(
session, graph_def, output_ops)
prediction_signature = add_meta_graph(output_path+'/model.pbtxt', frozen_graph_def)

NotFoundError :Tensor name "prediction/InceptionResnetV2/AuxLogits/Conv2d_1b_1x1/BatchNorm/beta" not found in checkpoint files

Trying to run the Inceptionv2 Tensorflow model with the architecture and the checkpoint inception_resnet_v2_2016_08_30.ckpt. And my code is for predicting the probability of each classification, for a given image.
I try to construt the tensorflow code using class according to the awesome blog here. But we had error:
NotFoundError (see above for traceback): Tensor name "prediction/InceptionResnetV2/AuxLogits/Conv2d_1b_1x1/BatchNorm/beta"not found in checkpoint files inception_resnet_v2_2016_08_30.ckpt.
My error code as follows.
from inception_resnet_v2 import *
import functools
import inception_preprocessing
import matplotlib.pyplot as plt
import os
import numpy as np
import tensorflow as tf
from scipy.misc import imread
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
def doublewrap(function):
"""
A decorator decorator, allowing to use the decorator to be used without
parentheses if no arguments are provided. All arguments must be optional.
"""
#functools.wraps(function)
def decorator(*args, **kwargs):
if len(args) == 1 and len(kwargs) == 0 and callable(args[0]):
return function(args[0])
else:
return lambda wrapee: function(wrapee, args, *kwargs)
return decorator
#doublewrap
def define_scope(function, scope=None, args, *kwargs):
"""
A decorator for functions that define TensorFlow operations. The wrapped
function will only be executed once. Subsequent calls to it will directly
return the result so that operations are added to the graph only once.
The operations added by the function live within a tf.variable_scope(). If
this decorator is used with arguments, they will be forwarded to the
variable scope. The scope name defaults to the name of the wrapped
function.
"""
attribute = '_cache_' + function.__name__
name = scope or function.__name__
#property
#functools.wraps(function)
def decorator(self):
if not hasattr(self, attribute):
with tf.variable_scope(name, args, *kwargs):
setattr(self, attribute, function(self))
return getattr(self, attribute)
return decorator
class Inception(object):
def __init__(self,
image):
self.image = image
self.process_data # call function process_data
self.prediction
#define_scope
def process_data(self):
image_size = inception_resnet_v2.default_image_size
image = inception_preprocessing.preprocess_image(self.image, image_size, image_size, is_training=False, )
image1 = tf.expand_dims(image, 0)
return image1
#define_scope
def prediction(self):
'''Creates the Inception Resnet V2 model.'''
arg_scope = inception_resnet_v2_arg_scope()
with tf.contrib.slim.arg_scope(arg_scope):
logits, end_points = inception_resnet_v2(self.process_data, is_training=False)
probabilities = tf.nn.softmax(logits)
return probabilities
def main():
tf.reset_default_graph()
image = tf.placeholder(tf.float32, [None, None, 3])
model = Inception(image)
saver = tf.train.Saver()
with tf.Session() as sess:
saver.restore(sess,
'inception_resnet_v2_2016_08_30.ckpt')
probabilities = sess.run(model.prediction, feed_dict={image: data})
print(probabilities)
if _name_ == '__main__':
data = imread('ILSVRC2012_test_00000003 .JPEG', mode='RGB').astype(np.float)
main()
However, if we don't construct the code using class as above, and we just run sucessfully.
The following is the code which ran without errors.
from inception_resnet_v2 import *
import inception_preprocessing
import os
import numpy as np
import tensorflow as tf
from scipy.misc import imread
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
slim = tf.contrib.slim
tf.reset_default_graph()
# prepare data
data = imread('ILSVRC2012_test_00000003.JPEG', mode='RGB').astype(np.float)
image = tf.placeholder(tf.float32, [None, None, 3])
# pre-processing image
image_size = inception_resnet_v2.default_image_size
processed_image = inception_preprocessing.preprocess_image(image, image_size, image_size, is_training=False,)
processed_image = tf.expand_dims(processed_image, 0)
# Creates the Inception Resnet V2 model.
arg_scope = inception_resnet_v2_arg_scope()
with slim.arg_scope(arg_scope):
logits, end_points = inception_resnet_v2(processed_image, is_training=False)
probabilities = tf.nn.softmax(logits)
saver = tf.train.Saver()
with tf.Session() as sess:
saver.restore(sess, './inception_resnet_v2_2016_08_30.ckpt')
print(sess.run(probabilities, feed_dict={image:data}))
Any help would be appreciated!

The decorator wraps the Inception network into a variable scope named after the function, prediction in this case. As a result, the variable names in the checkpoint don't match up with variable names in the graph anymore.
To verify this, you can change tf.variable_scope() to tf.name_scope() in the decorator. In most use cases, this should also not influence the rest of your program.
If you need the variable scope, you can pass a dict into tf.train.Saver() that maps variable names in the checkpoint to variable objects in the graph.
It's also possible to automate this by reading the variable names in the checkpoint using tf.python.pywrap_tensorflow. NewCheckpointReader() but I don't have a code example ready to share for this.

Parallel fitting of multiple Keras Models on single GPU

I'm trying to fit multiple small Keras models in parallel on a single GPU. Because of reasons i need to get them out of a list and train them one step at a time. Since I was not lucky with the standard multiprocessing module i use pathos.
What I tried to do is something like this:
from pathos.multiprocessing import ProcessPool as Pool
import tensorflow as tf
import keras.backend as K
def multiprocess_step(self, model):
K.set_session(sess)
with sess.graph.as_default():
model = step(model, sess)
return model
def step(model, sess):
K.set_session(sess)
with sess.graph.as_default():
model.fit(x=data['X_train'], y=data['y_train'],
batch_size=batch_size
validation_data=(data['X_test'], data['y_test']),
verbose=verbose,
shuffle=True,
initial_epoch=self.step_num - 1)
return model
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.visible_device_list = "0"
sess = tf.Session(config=config)
K.set_session(sess)
with sess.graph.as_default():
pool = Pool(8).map
model_list = pool(multiprocess_step, model_list)
but whatever I try I keep getting an error claiming that the models dont seem to be on the same graph...
ValueError: Tensor("training/RMSprop/Variable:0", shape=(25, 352), dtype=float32_ref) must be from the same graph as Tensor("RMSprop/rho/read:0", shape=(), dtype=float32).
The exception originates in the model.fit() row so I must have done something wrong with the assignment of the session graph even though I tried to set that in every possible location?
Does anyone have experience with something similar?

The following was suggested on the Keras issue tracker. I'm not sure about the relative merits of the approach compared to using multiprocessing.
in_1 = Input()
lstm_1 = LSTM(...)(in_1)
out_1 = Dense(...)(lstm_1)
in_2 = Input()
lstm_2 = LSTM(...)(in_2)
out_2 = Dense(...)(lstm_2)
model_1 = Model(input=in_1, output=out_1)
model_2 = Model(input=in_2, output=out_2)
model = Model(input = [in_1, in_2], output = [out_1, out_2])
model.compile(...)
model.fit(...)
model_1.predict(...)
model_2.predict(...)

Considering the backend is set to tensorflow for the keras. you can use code and do parallel processing for multiple model invocation/ multiple model loading.
def model1(dir_model):
model = os.path.join(dir_model, 'model.json')
dir_weights = os.path.join(dir_model, 'model.h5')
graph1 = Graph()
with graph1.as_default():
session1 = Session(graph=graph1, config=config)
with session1.as_default():
with open(model, 'r') as data:
model_json = data.read()
model_1 = model_from_json(model_json)
model_1.load_weights(dir_weights)
return model_1,gap_weights,session1,graph1
def model_2(dir_model):
model = os.path.join(dir_model, 'model.json')
dir_weights = os.path.join(dir_model, 'model.h5')
graph2 = Graph()
with graph2.as_default():
session2 = Session(graph=graph2, config=config)
with session2.as_default():
with open(model, 'r') as data:
model_json = data.read()
model_2 = model_from_json(model_json)
model_2.load_weights(dir_weights)
return model_2,session2,graph2
and for invocation of the specific model do the following experiments.
for model 1 predict do the following
K.set_session(session2)
with graph2.as_default():
img_pred[img_name] =
patch_dict[np.argmax(np.squeeze(model_2.predict(img_invoke)))
and for the model 2 it follows same as
K.set_session(session2)
with graph2.as_default():
img_pred[img_name] =
patch_dict[np.argmax(np.squeeze(model_2.predict(img_invoke)))]

Variable not created when restore graph with import_meta_graph?

I am trying to restore graph from model which I train with TensorFlow tutorials, then I try to restore the model:
import tensorflow as tf
import reader
from ptb_word_lm import PTBInput, PTBModel, get_config, run_epoch
def main(_):
checkpoint_path = "/Users/roger/data/ptb_out"
checkpoint_path = tf.train.latest_checkpoint(checkpoint_path)
raw_data = reader.ptb_raw_data("/Users/roger/data/simple-examples/small_data")
train_data, valid_data, test_data, _ = raw_data
config = get_config()
eval_config = get_config()
eval_config.batch_size = 1
eval_config.num_steps = 1
with tf.Session() as session:
initializer = tf.random_uniform_initializer(-config.init_scale,
config.init_scale)
saver = tf.train.import_meta_graph(checkpoint_path + ".meta")
saver.restore(session, checkpoint_path)
with tf.name_scope("Test"):
test_input = PTBInput(config=eval_config, data=test_data, name="TestInput")
with tf.variable_scope("Model", reuse=True, initializer=initializer):
mtest = PTBModel(is_training=False, config=eval_config,
input_=test_input)
test_perplexity = run_epoch(session, mtest)
print("Test Perplexity: %.3f" % test_perplexity)
if __name__ == "__main__":
tf.app.run()
However, I find that Varible Model/embedding which created here is not restored from graph. So I get error like this:
ValueError: Variable Model/embedding does not exist, or was not created with tf.get_variable(). Did you mean to set reuse=None in VarScope?
So how can I restore the model correctly?

I think, since you set reuse=True in your variable scope, it tries to find that variable instead of creating it when you call PTBModel(). If you use get_variable() with reuse=True in a scope, it will never create a variable.

Tensorflow load graph into specific scope

I'm loading a pretrained network into Tensorflow using the methods below that are within a Network class (hence the calls to self.xyz). First, define_network() is called, then I do initialization of other variables and optimizers, then load_model() is called.
However, despite using tf.variable_scope(self.name) the variables from the graph are loaded into the generic space of variables. This is problematic as I have two instances of this class that each load the same network and I want to separate the out into different scopes.
How can I load the variables into a specific scope?
P.S. Feel free to correct me on any errors in my code!
def load_model(self):
with tf.variable_scope(self.name) as scope:
self.saver.restore(self.sess, self.model_path)
print("Loaded model from {}".format(self.model_path))
def define_model(self):
with tf.variable_scope(self.name) as scope:
self.saver = tf.train.import_meta_graph(self.model_path + '.meta')
print("Loaded model from {}".format(self.model_path + '.meta'))
graph = tf.get_default_graph()
self.inputs = []
inp_names = ['i_hand1:0', 'i_hand2:0', 'i_flop1:0', 'i_flop2:0', 'i_flop3:0',
'i_turn:0', 'i_river:0', 'i_other:0', 'i_allowed_mod:0', 'keras_learning_phase:0']
for inp in inp_names:
self.inputs.append(tf.get_default_graph().get_tensor_by_name(inp))
self.outputs = tf.get_default_graph().get_tensor_by_name("Tanh:0")
self.add_output_conversions()
all_vars = tf.trainable_variables()
for var in all_vars:
self.var[var.name] = var

I think your problem can be solved by adding an argument into
self.saver = tf.train.import_meta_graph(self.model_path + '.meta', 'import_scope'=self.name)
Here's the reference

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Running different models in one script in Tensorflow 1.9 - python

Related

how to export tf model for serving directly from session (no creating of tf checkpoint) to minimize export time

NotFoundError :Tensor name "prediction/InceptionResnetV2/AuxLogits/Conv2d_1b_1x1/BatchNorm/beta" not found in checkpoint files

Parallel fitting of multiple Keras Models on single GPU

Variable not created when restore graph with import_meta_graph?

Tensorflow load graph into specific scope

Categories

Resources