Tensorflow: model wrapper that can release GPU resources

Tensorflow: model wrapper that can release GPU resources - python

Here is a wrapper for tensorflow .pb frozen model (imagenet classification):
import tensorflow as tf
import numpy as np
import cv2
from numba import cuda
class ModelWrapper():
def __init__(self, model_filepath):
self.graph_def = self.load_graph_def(model_filepath)
self.graph = self.load_graph(self.graph_def)
self.set_inputs_and_outputs()
self.sess = tf.Session(graph=self.graph)
print(self.__class__.__name__, 'call __init__') #
def load_graph_def(self, model_filepath):
# Expects frozen graph in .pb format
with tf.gfile.GFile(model_filepath, "rb") as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
return graph_def
def load_graph(self, graph_def):
with tf.Graph().as_default() as graph:
tf.import_graph_def(graph_def, name="")
return graph
def set_inputs_and_outputs(self):
input_list = []
for op in self.graph.get_operations(): # tensorflow.python.framework.ops.Operation
if op.type == "Placeholder":
input_list.append(op.name)
print('Inputs:', input_list)
all_name_list = []
input_name_list = []
for node in self.graph_def.node: # tensorflow.core.framework.node_def_pb2.NodeDef
all_name_list.append(node.name)
input_name_list.extend(node.input)
output_list = list(set(all_name_list) - set(input_name_list))
print('Outputs:', output_list)
self.inputs = []
self.input_tensor_names = [name + ":0" for name in input_list]
for input_tensor_name in self.input_tensor_names:
self.inputs.append(self.graph.get_tensor_by_name(input_tensor_name))
self.outputs = []
self.output_tensor_names = [name + ":0" for name in output_list]
for output_tensor_name in self.output_tensor_names:
self.outputs.append(self.graph.get_tensor_by_name(output_tensor_name))
input_dim_list = []
for op in self.graph.get_operations(): # tensorflow.python.framework.ops.Operation
if op.type == "Placeholder":
bs = op.get_attr('shape').dim[0].size
h = op.get_attr('shape').dim[1].size
w = op.get_attr('shape').dim[2].size
c = op.get_attr('shape').dim[3].size
input_dim_list.append([bs, h, w ,c])
assert len(input_dim_list) == 1
_, self.input_img_h, self.input_img_w, _ = input_dim_list[0]
def predict(self, img):
h, w, c = img.shape
if h != self.input_img_h or w != self.input_img_w:
img = cv2.resize(img, (self.input_img_w, self.input_img_h))
batch = img[np.newaxis, ...]
feed_dict = {self.inputs[0]: batch}
outputs = self.sess.run(self.outputs, feed_dict=feed_dict) # (1, 1001)
output = outputs[0]
return output
def __del__(self):
print(self.__class__.__name__, 'call __del__') #
import time #
time.sleep(3) #
cuda.close()
What I'm trying to do is to clean up GPU memory after I don't need model anymore, in this example I just create and delete model in the loop, but in real life it can be several different models.
wget https://storage.googleapis.com/download.tensorflow.org/models/inception_v3_2016_08_28_frozen.pb.tar.gz
tar -xvzf inception_v3_2016_08_28_frozen.pb.tar.gz
rm -f imagenet_slim_labels.txt
rm -f inception_v3_2016_08_28_frozen.pb.tar.gz
import os
import time
import tensorflow as tf
import numpy as np
from model_wrapper import ModelWrapper
MODEL_FILEPATH = './inception_v3_2016_08_28_frozen.pb'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
def create_and_delete_in_loop():
for i in range(10):
print('-'*60)
print('i:', i)
model = ModelWrapper(MODEL_FILEPATH)
input_batch = np.zeros((model.input_img_h, model.input_img_w, 3), np.uint8)
y_pred = model.predict(input_batch)
print('y_pred.shape', y_pred.shape)
print('np.argmax(y_pred)', np.argmax(y_pred))
del model
if __name__ == "__main__":
create_and_delete_in_loop()
print('START WAITING')
time.sleep(10)
print('END OF THE PROGRAM!')
Output:
------------------------------------------------------------
i: 0
Inputs: ['input']
Outputs: ['InceptionV3/Predictions/Reshape_1']
ModelWrapper call __init__
y_pred.shape (1, 1001)
np.argmax(y_pred) 112
ModelWrapper call __del__
------------------------------------------------------------
i: 1
Inputs: ['input']
Outputs: ['InceptionV3/Predictions/Reshape_1']
ModelWrapper call __init__
Segmentation fault (core dumped)
What is the proper way of releasing GPU memory?

TL;DR Run your function as a new process+ .
tf.reset_default_graph() is not guaranteed to release memory#. When a process dies, all the memory it was given (including your GPU Memory) will be released. Not only does this help keep things neatly organized, but also, you can analyze how much CPU, GPU, RAM, GPU Memory each process consumes.
For example, if you had these functions,
def train_model(x, y, params):
model = ModelWrapper(params.filepath)
model.fit(x, y, epochs=params.epochs)
def predict_model(x, params):
model = ModelWrapper(params.filepath)
y_pred = model.predict(x)
print(y_pred.shape)
You can use it like,
import multiprocessing
for i in range(8):
print(f"Training Model {i} from {params.filepath}")
process_train = multiprocessing.Process(train_model, args=(x_train, y_train, params))
process_train.start()
process_train.join()
print("Predicting")
process_predict = multiprocessing.Process(predict_model, args=(x_train, params))
process_predict.start()
process_predict.join()
This way python fires a new process for your tasks, which can run with their own memory.
Bonus Tip: You can also choose to run them in parallel if you have many CPUs and GPUs available: you just need to call process_train.join() after the loop in that case. If you had eight GPUs, you can use this parent script to serve parameters, while each of the individual processes shall run on a different GPU.
# I tried a variety of things, separately and together, before I started using processes,
tf.reset_default_graph()
K.clear_session()
cuda.select_device(0); cuda.close()
model = get_new_model() # overwrite
model = None
del model
gc.collect()
+ I also considered using threads, subprocess.Popen, but I was satisfied with multiprocessing since it offered full decoupling that made it a lot easier to manage and allocate resources.

Related

Program modify load vgg16.ckpt file method (original Linux to windows)

I install the Faste-RCNN script from here, this version is modified to fit python3. After following all the instructions I still got an error as below:
Loading initial model weights from ./data/imagenet_weights/vgg16.ckpt
Unable to open table file .\data\imagenet_weights\vgg16.ckpt: Data loss: not an sstable (bad magic number): perhaps your file is in a different file format and you need to use a different restore operator?
2022-02-25 09:30:16.917233: W C:\tf_jenkins\workspace\rel-win\M\windows-gpu\PY\36\tensorflow\core\util\tensor_slice_reader.cc:95] Could not open .\data\imagenet_weights\vgg16.ckpt: Data loss: not an sstable (bad magic number): perhaps your file is in a different file format and you need to use a different restore operator? Traceback (most recent call last):
File "D:/Object detection/FRCNNPY3.6/train.py", line 218, in <module>
train.train()
File "D:/Object detection/FRCNNPY3.6/train.py", line 123, in train
variables_to_restore = self.net.get_variables_to_restore(variables, var_keep_dic)
File "D:\Object detection\FRCNNPY3.6\lib\nets\vgg16.py", line 66, in get_variables_to_restore
if v.name.split(':')[0] in var_keep_dic:
TypeError: argument of type 'NoneType' is not iterable
I was trying to solve the " 'NoneType' is not iterable " error, but still no progress.
Of course, the file be stored as vgg16.ckpt type and the path is D:\Object detection\FRCNNPY3.6\data\imagenet_weights.
From the error message I find out this line "Loading initial model weights from ./data/imagenet_weights/vgg16.ckpt" shows the path where it loading, the direction of slash is wrong for windows, this form is used in Linux, but I don't know where can I modify it.
The complete program as below:
import time
import numpy as np
import tensorflow as tf
from tensorflow.python import pywrap_tensorflow
import lib.config.config as cfg
from lib.datasets import roidb as rdl_roidb
from lib.datasets.factory import get_imdb
from lib.datasets.imdb import imdb as imdb2
from lib.layer_utils.roi_data_layer import RoIDataLayer
from lib.nets.vgg16 import vgg16
from lib.utils.timer import Timer
try:
import cPickle as pickle
except ImportError:
import pickle
import os
def get_training_roidb(imdb):
"""Returns a roidb (Region of Interest database) for use in training."""
if True:
print('Appending horizontally-flipped training examples...')
imdb.append_flipped_images()
print('done')
print('Preparing training data...')
rdl_roidb.prepare_roidb(imdb)
print('done')
return imdb.roidb
def combined_roidb(imdb_names):
"""
Combine multiple roidbs
"""
def get_roidb(imdb_name):
imdb = get_imdb(imdb_name)
print('Loaded dataset `{:s}` for training'.format(imdb.name))
imdb.set_proposal_method("gt")
print('Set proposal method: {:s}'.format("gt"))
roidb = get_training_roidb(imdb)
return roidb
roidbs = [get_roidb(s) for s in imdb_names.split('+')]
roidb = roidbs[0]
if len(roidbs) > 1:
for r in roidbs[1:]:
roidb.extend(r)
tmp = get_imdb(imdb_names.split('+')[1])
imdb = imdb2(imdb_names, tmp.classes)
else:
imdb = get_imdb(imdb_names)
return imdb, roidb
class Train:
def __init__(self):
# Create network
if cfg.FLAGS.network == 'vgg16':
self.net = vgg16(batch_size=cfg.FLAGS.ims_per_batch)
else:
raise NotImplementedError
self.imdb, self.roidb = combined_roidb("voc_2007_trainval")
self.data_layer = RoIDataLayer(self.roidb, self.imdb.num_classes)
self.output_dir = cfg.get_output_dir(self.imdb, 'default')
def train(self):
# Create session
tfconfig = tf.ConfigProto(allow_soft_placement=True)
tfconfig.gpu_options.allow_growth = True
sess = tf.Session(config=tfconfig)
with sess.graph.as_default():
tf.set_random_seed(cfg.FLAGS.rng_seed)
layers = self.net.create_architecture(sess, "TRAIN", self.imdb.num_classes, tag='default')
loss = layers['total_loss']
lr = tf.Variable(cfg.FLAGS.learning_rate, trainable=False)
momentum = cfg.FLAGS.momentum
optimizer = tf.train.MomentumOptimizer(lr, momentum)
gvs = optimizer.compute_gradients(loss)
# Double bias
# Double the gradient of the bias if set
if cfg.FLAGS.double_bias:
final_gvs = []
with tf.variable_scope('Gradient_Mult'):
for grad, var in gvs:
scale = 1.
if cfg.FLAGS.double_bias and '/biases:' in var.name:
scale *= 2.
if not np.allclose(scale, 1.0):
grad = tf.multiply(grad, scale)
final_gvs.append((grad, var))
train_op = optimizer.apply_gradients(final_gvs)
else:
train_op = optimizer.apply_gradients(gvs)
# We will handle the snapshots ourselves
self.saver = tf.train.Saver(max_to_keep=100000)
# Write the train and validation information to tensorboard
# writer = tf.summary.FileWriter(self.tbdir, sess.graph)
# valwriter = tf.summary.FileWriter(self.tbvaldir)
# Load weights
# Fresh train directly from ImageNet weights
print('Loading initial model weights from {:s}'.format(cfg.FLAGS.pretrained_model))
variables = tf.global_variables()
# Initialize all variables first
sess.run(tf.variables_initializer(variables, name='init'))
var_keep_dic = self.get_variables_in_checkpoint_file(cfg.FLAGS.pretrained_model)
# Get the variables to restore, ignorizing the variables to fix
variables_to_restore = self.net.get_variables_to_restore(variables, var_keep_dic)
restorer = tf.train.Saver(variables_to_restore)
restorer.restore(sess, cfg.FLAGS.pretrained_model)
print('Loaded.')
# Need to fix the variables before loading, so that the RGB weights are changed to BGR
# For VGG16 it also changes the convolutional weights fc6 and fc7 to
# fully connected weights
self.net.fix_variables(sess, cfg.FLAGS.pretrained_model)
print('Fixed.')
sess.run(tf.assign(lr, cfg.FLAGS.learning_rate))
last_snapshot_iter = 0
timer = Timer()
iter = last_snapshot_iter + 1
last_summary_time = time.time()
while iter < cfg.FLAGS.max_iters + 1:
# Learning rate
if iter == cfg.FLAGS.step_size + 1:
# Add snapshot here before reducing the learning rate
# self.snapshot(sess, iter)
sess.run(tf.assign(lr, cfg.FLAGS.learning_rate * cfg.FLAGS.gamma))
timer.tic()
# Get training data, one batch at a time
blobs = self.data_layer.forward()
# Compute the graph without summary
try:
rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, total_loss = self.net.train_step(sess, blobs, train_op)
except Exception:
# if some errors were encountered image is skipped without increasing iterations
print('image invalid, skipping')
continue
timer.toc()
iter += 1
# Display training information
if iter % (cfg.FLAGS.display) == 0:
print('iter: %d / %d, total loss: %.6f\n >>> rpn_loss_cls: %.6f\n '
'>>> rpn_loss_box: %.6f\n >>> loss_cls: %.6f\n >>> loss_box: %.6f\n ' % \
(iter, cfg.FLAGS.max_iters, total_loss, rpn_loss_cls, rpn_loss_box, loss_cls, loss_box))
print('speed: {:.3f}s / iter'.format(timer.average_time))
if iter % cfg.FLAGS.snapshot_iterations == 0:
self.snapshot(sess, iter )
def get_variables_in_checkpoint_file(self, file_name):
try:
reader = pywrap_tensorflow.NewCheckpointReader(file_name)
var_to_shape_map = reader.get_variable_to_shape_map()
return var_to_shape_map
except Exception as e: # pylint: disable=broad-except
print(str(e))
if "corrupted compressed block contents" in str(e):
print("It's likely that your checkpoint file has been compressed "
"with SNAPPY.")
def snapshot(self, sess, iter):
net = self.net
if not os.path.exists(self.output_dir):
os.makedirs(self.output_dir)
# Store the model snapshot
filename = 'vgg16_faster_rcnn_iter_{:d}'.format(iter) + '.ckpt'
# filename = os.path.join(self.output_dir, filename)
filename = os.path.join(self.output_dir, filename)
self.saver.save(sess, filename)
print('Wrote snapshot to: {:s}'.format(filename))
# Also store some meta information, random state, etc.
nfilename = 'vgg16_faster_rcnn_iter_{:d}'.format(iter) + '.pkl'
nfilename = os.path.join(self.output_dir, nfilename)
# current state of numpy random
st0 = np.random.get_state()
# current position in the database
cur = self.data_layer._cur
# current shuffled indeces of the database
perm = self.data_layer._perm
# Dump the meta info
with open(nfilename, 'wb') as fid:
pickle.dump(st0, fid, pickle.HIGHEST_PROTOCOL)
pickle.dump(cur, fid, pickle.HIGHEST_PROTOCOL)
pickle.dump(perm, fid, pickle.HIGHEST_PROTOCOL)
pickle.dump(iter, fid, pickle.HIGHEST_PROTOCOL)
return filename, nfilename
if __name__ == '__main__':
train = Train()
train.train()

Tensorflow inference too slow when loading multiple models

I finetuned two Mobilenet models on diferent datasets based on the tensorflow object_detection API example from here. When I use eager mode (tf.executing_eagerly() is True) using only one model then the inference runs at 0.036 seconds per image. When I load two models Keras required to convert to graph mode (tf.executing_eagerly() is False) and the inference runs at 1.8 seconds per image. What I'm doing wrong?
def inference(pipeline_config, checkpoint_path):
print('Building model and restoring weights', flush=True)
num_classes = 3
# Load pipeline config and build a detection model.
configs = config_util.get_configs_from_pipeline_file(pipeline_config)
model_config = configs['model']
model_config.ssd.num_classes = num_classes
detection_model = model_builder.build(
model_config=model_config, is_training=False)
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
ckpt.restore(checkpoint_path).expect_partial()
# Run model through a dummy image so that variables are created
image, shapes = detection_model.preprocess(tf.zeros([1, 320, 320, 3]))
prediction_dict = detection_model.predict(image, shapes)
_ = detection_model.postprocess(prediction_dict, shapes)
print('Weights restored!')
return detection_model
def get_model_detection_function(detection_model):
"""Get a tf.function for detection."""
# Again, uncomment this decorator if you want to run inference eagerly
#tf.function
def detect(input_tensor):
"""Run detection on an input image.
Args:
input_tensor: A [1, height, width, 3] Tensor of type tf.float32.
Note that height and width can be anything since the image will be
immediately resized according to the needs of the model within this
function.
Returns:
A dict containing 3 Tensors (`detection_boxes`, `detection_classes`,
and `detection_scores`).
"""
preprocessed_image, shapes = detection_model.preprocess(input_tensor)
prediction_dict = detection_model.predict(preprocessed_image, shapes)
return detection_model.postprocess(prediction_dict, shapes)
return detect
def mainProcess():
print('Loading model 1...')
g1 = tf.Graph()
s1 = tf.compat.v1.Session(graph=g1)
with g1.as_default(), s1.as_default():
detection_model_1 = inference('config_1/pipeline.config', 'Checkpoint_1/ckpt-1')
detect_fn_1 = get_model_detection_function(detection_model_1)
s1.run(tf.compat.v1.global_variables_initializer())
print('Loading model 2...')
g2 = tf.Graph()
s2 = tf.compat.v1.Session(graph=g2)
with g2.as_default():
detection_model_2 = inference('config_2/pipeline.config', 'Checkpoint_2/ckpt-1')
detect_fn_2 = get_model_detection_function(detection_model_2)
s2.run(tf.compat.v1.global_variables_initializer())
for i, f in enumerate(listdir('images_dir/')):
...
... read the image
...
with g1.as_default():
with s1.as_default():
sec = time.time()
input_tensor = tf.convert_to_tensor(test_img, dtype=tf.float32)
detections = detect_fn_1(input_tensor)
detections = s1.run(detections)
curr = time.time()
print("Finished iterating in: " + str(curr - sec) + " seconds")
# the same for detection_model_2
For eager mode with only one model the mainProcess is:
def mainProcess():
print('Loading model...')
detection_model_1 = inference('config_1/pipeline.config', 'Checkpoint_1/ckpt-1')
detect_fn_1 = get_model_detection_function(detection_model_1)
for i, f in enumerate(listdir('images_dir/')):
...
... read the image
...
sec = time.time()
input_tensor = tf.convert_to_tensor(test_img, dtype=tf.float32)
detections = detect_fn_1(input_tensor)
print(detections['detection_boxes'][0].numpy())
print(detections['detection_scores'][0].numpy())
curr = time.time()
print("Finished iterating in: " + str(curr - sec) + " seconds")

Running inference on InceptionV3 network twice bring totally different results

When i calculate inception score, i got NaN most of the time.
Trying to investigate why it happen i found that running the network twice on the same images can lead for some of the images to totally different results (difference greater than 0.9 while the maximum difference can be 1), the images which got high difference changed from run to run.
My GPU is 2080ti, i use Ubuntu with tensorflow=1.13.1.
i try to change drivers, tensorflow version, run form docker, the same problem happen all the time.
I have another server at the university which has the same GPU (2080ti), and when i try to run there the problem disappear.
Thanks for the help.
my script
# Code derived from tensorflow/tensorflow/models/image/imagenet/classify_image.py
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os.path
import tarfile
import numpy as np
from six.moves import urllib
import tensorflow as tf
import sys
import warnings
from scipy import linalg
MODEL_DIR = '/tmp/imagenet'
DATA_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz'
softmax = None
pool3 = None
# Call this function with list of images. Each of elements should be a
# numpy array with values ranging from 0 to 255.
def get_features(images):
assert ((images.shape[3]) == 3)
assert (np.max(images) > 10)
assert (np.min(images) >= 0.0)
images = images.astype(np.float32)
bs = 100
sess = tf.get_default_session()
preds = []
for inp in np.array_split(images, round(images.shape[0] / bs)):
sys.stdout.write(".")
sys.stdout.flush()
pred = sess.run(softmax, {'InputTensor:0': inp})
preds.append(pred)
preds = np.concatenate(preds, 0)
return preds
# This function is called automatically.
def _init_inception():
global softmax
global pool3
if not os.path.exists(MODEL_DIR):
os.makedirs(MODEL_DIR)
filename = DATA_URL.split('/')[-1]
filepath = os.path.join(MODEL_DIR, filename)
if not os.path.exists(filepath):
def _progress(count, block_size, total_size):
sys.stdout.write('\r>> Downloading %s %.1f%%' % (
filename, float(count * block_size) / float(total_size) * 100.0))
sys.stdout.flush()
filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)
print()
statinfo = os.stat(filepath)
print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')
tarfile.open(filepath, 'r:gz').extractall(MODEL_DIR)
with tf.gfile.GFile(os.path.join(
MODEL_DIR, 'classify_image_graph_def.pb'), 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
# Import model with a modification in the input tensor to accept arbitrary
# batch size.
input_tensor = tf.placeholder(tf.float32, shape=[None, None, None, 3],
name='InputTensor')
_ = tf.import_graph_def(graph_def, name='inception_v3',
input_map={'ExpandDims:0': input_tensor})
# Works with an arbitrary minibatch size.
pool3 = tf.get_default_graph().get_tensor_by_name('inception_v3/pool_3:0')
ops = pool3.graph.get_operations()
for op_idx, op in enumerate(ops):
if 'inception_v3' in op.name:
for o in op.outputs:
shape = o.get_shape()
shape = [s.value for s in shape]
new_shape = []
for j, s in enumerate(shape):
if s == 1 and j == 0:
new_shape.append(None)
else:
new_shape.append(s)
o.set_shape(tf.TensorShape(new_shape))
w = tf.get_default_graph().get_operation_by_name("inception_v3/softmax/logits/MatMul").inputs[1]
logits = tf.matmul(tf.squeeze(pool3, [1, 2]), w)
softmax = tf.nn.softmax(logits)
_init_inception()
if __name__ =='__main__':
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
with tf.Session() as sess:
preds1 = get_features(x_train)
preds2 = get_features(x_train)
print(abs(preds1-preds2).max())

resave tf1.x saved_model.pb into new tf2.0 saved_model.pb

I have an old trained tf1.x model (let it be Model1), constructed with Placeholders, tf.contrib and so on. I can use this model by restoring graph from .ckpt checkpoint in tf.Session (in tf1.x).
I resolved the easiest way to use the Model1 is to export it:
# tf1.x code
tf.saved_model.simple_save(sess, saved_Model1_path,
inputs={'input':'Placeholder:0'}, outputs={'output':'.../Sigmoid:0'})
I can use the obtained saved_model.pb even in tf2.0:
# tf2.0 code
Model1 = tf.saved_model.load(saved_Model1_path)
out = Model1.signatures['serving_default'](tf.convert_to_tensor(data))['output'].numpy()
out = Model1.signatures['serving_default'].prune('Placeholder:0', '.../Sigmoid:0')(data)
out = Model1.prune('Placeholder:0', '.../Sigmoid:0')(data)
Now imagine, I have a pre/post processing written with tf2.0 tf.function.
I wish the construction of preprocessing -> Model1-> postprocessing to be exported in a single saved_model.pb in tf2.0.
And here come the problems due to saved_model.pb of Model1 utilizes tf.Placeholders (smth like this, I'm not an expert here).
At the same time, I can easily build saved_model.pb from other tf2.0 exported model:
import os
import tensorflow as tf
assert tf.__version__[0] == '2'
class M1(tf.Module):
def __init__(self):
super(M1, self).__init__()
self.v = tf.Variable(2.)
#tf.function(input_signature=[tf.TensorSpec([], tf.float32)])
def M1_func(self, x):
return x * self.v
# build some saved_model.pb
m1 = M1()
path_1 = './save1'
path_to_save = os.path.realpath(path_1)
tf.saved_model.save(m1, path_to_save)
# load built saved_model.pb and check it works
m1 = tf.saved_model.load(path_1)
assert 6 == m1.M1_func(3.).numpy()
# build other saved_model.pb using first saved_model.pb as a part of computing graph
class M2(tf.Module):
def __init__(self):
super(M2, self).__init__()
self.run = m1
self.v = tf.Variable(3.)
#tf.function(input_signature=[tf.TensorSpec([], tf.float32)])
def M2_func(self, x):
return self.run.M1_func(x) * self.v
m2 = M2()
path_2 = './save2'
path_to_save = os.path.realpath(path_2)
tf.saved_model.save(m2, path_to_save)
m2 = tf.saved_model.load(path_2)
assert 18 == m2.M2_func(3.).numpy()
But when I'm trying to do the same, except replacing first saved_model.pb from tf2.0 saving on tf1.x saving, It doesn't work:
# save first saved_model.pb with tf1.x
import tensorflow as tf
assert tf.__version__[0] == '1'
inp = tf.placeholder(shape=[],dtype=tf.float32)
a = tf.Variable(1.5)
out = a*inp
sess = tf.Session()
sess.run(tf.global_variables_initializer())
assert 7.5 == out.eval({inp:5.}, sess)
path_3 = './save3'
path_to_save = os.path.realpath(path_3)
tf.saved_model.simple_save(sess, path_to_save, inputs={'input': inp}, outputs={'output': out})
Now switch to tf2.0 and try to build new saved_model.pb with first one as a part of a computing graph:
import os
import tensorflow as tf
assert tf.__version__[0] == '2'
path_3 = './save3'
path_to_save = os.path.realpath(path_3)
m1 = tf.saved_model.load(path_to_save)
class M2(tf.Module):
def __init__(self):
super(M2, self).__init__()
self.run = m1.signatures['serving_default'].prune('Placeholder:0', 'mul:0')
self.v = tf.Variable(3.)
#tf.function(input_signature=[tf.TensorSpec([], tf.float32)])
def M2_func(self, x):
return self.run(x) * self.v
m2 = M2()
assert 22.5 == m2.M2_func(5.) # ofc eager execution works
# now save M2 to saved_model.pb and check it works (it does not)
path_4 = './save4'
path_to_save = os.path.realpath(path_4)
tf.saved_model.save(m2, path_to_save)
m2 = tf.saved_model.load(path_4)
m2.M2_func(5.) # error:
tensorflow.python.framework.errors_impl.FailedPreconditionError: Attempting to use uninitialized value StatefulPartitionedCall/StatefulPartitionedCall/Variable
[[{{node StatefulPartitionedCall/StatefulPartitionedCall/Variable/read}}]] [Op:__inference_restored_function_body_207]
Function call stack:
restored_function_body
So the question is: how to save this architecture in a single saved_model.pb in tf2.0
preprocessing (tf2.0 #tf.function) -> Model1 (saved_model.pb created in tf1.x) -> postprocessing (tf2.0 #tf.function)

The problem was solved. Look at this exporting function and how to use it. This function implementation accepts a single input tensor name and a list of out tensor names.
import tensorflow as tf
def export_tf1(session, in_tnsr_fullname, out_tnsrS_fullname, export_dir='./export'):
assert isinstance(in_tnsr_fullname, str)
assert all([isinstance(out_tnsr_fullname, str) for out_tnsr_fullname in out_tnsrS_fullname])
in_tnsr_name = in_tnsr_fullname.split(':')[0]
out_tnsrS_name = [out_tnsr_fullname.split(':')[0] for out_tnsr_fullname in out_tnsrS_fullname]
graph_def = tf.graph_util.convert_variables_to_constants(session, session.graph.as_graph_def(), out_tnsrS_name)
tf.reset_default_graph()
outs = tf.import_graph_def(graph_def, name="", return_elements=out_tnsrS_fullname)
g = outs[0].graph
builder = tf.saved_model.builder.SavedModelBuilder(export_dir)
with tf.Session(graph=g) as sess:
input_signatures = {in_tnsr_name: g.get_tensor_by_name(in_tnsr_fullname)}
output_signatures = {}
for out_tnsr_name, out_tnsr_fullname in zip(out_tnsrS_name, out_tnsrS_fullname):
output_signatures[out_tnsr_name] = g.get_tensor_by_name(out_tnsr_fullname)
signature = tf.saved_model.signature_def_utils.predict_signature_def(input_signatures, output_signatures)
builder.add_meta_graph_and_variables(
sess,
[tf.saved_model.tag_constants.SERVING],
{tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature},
clear_devices=True
)
builder.save()
How to use the exporting function to receive .pb from tf1_ckpt checkpoint:
import tensorflow as tf
assert tf.__version__[0] == '1'
g = tf.get_default_graph()
sess = tf.Session(graph=g)
ckpt_tf1_path = 'some_directory/name.ckpt' # just an example
tf.train.Saver().restore(sess, ckpt_tf1_path)
input_tensor_name = 'x_tnsr:0' # just an example
out_tensor_name = 'y_tnsr:0' # just an example
export_tf1(sess, input_tensor_name, [out_tensor_name], export_dir)
How to reuse .pb from tf1_ckpt in .pb with tf2.0:
import tensorflow as tf
assert tf.__version__[0] == '2'
class Export(tf.Module):
def __init__(self):
super(Export, self).__init__()
tf1_saved_model_directory = 'directory/saved_model' # just an example
self.tf1_model = tf.saved_model.load(tf1_saved_model_directory)
input_tensor_name = 'x_tnsr:0' # just an example
out_tensor_name = 'y_tnsr:0' # just an example
self.tf1_model = self.tf1_model.prune(input_tensor_name, out_tensor_name)
#tf.function
def __call__(self, x):
out = self.tf1_model(x)
return out
export_dir = './saved_model'
tf.saved_model.save(Export(), export_dir)

Broadcasting a keras model with pyspark [duplicate]

I am using Caffe to do image classification, can I am using MAC OS X, Pyhton.
Right now I know how to classify a list of images using Caffe with Spark python, but if I want to make it faster, I want to use Spark.
Therefore, I tried to apply the image classification on each element of an RDD, the RDD created from a list of image_path. However, Spark does not allow me to do so.
Here is my code:
This is the code for image classification:
# display image name, class number, predicted label
def classify_image(image_path, transformer, net):
image = caffe.io.load_image(image_path)
transformed_image = transformer.preprocess('data', image)
net.blobs['data'].data[...] = transformed_image
output = net.forward()
output_prob = output['prob'][0]
pred = output_prob.argmax()
labels_file = caffe_root + 'data/ilsvrc12/synset_words.txt'
labels = np.loadtxt(labels_file, str, delimiter='\t')
lb = labels[pred]
image_name = image_path.split(images_folder_path)[1]
result_str = 'image: '+image_name+' prediction: '+str(pred)+' label: '+lb
return result_str
This this the code generates Caffe parameters and apply the classify_image method on each element of the RDD:
def main():
sys.path.insert(0, caffe_root + 'python')
caffe.set_mode_cpu()
model_def = caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt'
model_weights = caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'
net = caffe.Net(model_def,
model_weights,
caffe.TEST)
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1)
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1))
transformer.set_mean('data', mu)
transformer.set_raw_scale('data', 255)
transformer.set_channel_swap('data', (2,1,0))
net.blobs['data'].reshape(50,
3,
227, 227)
image_list= []
for image_path in glob.glob(images_folder_path+'*.jpg'):
image_list.append(image_path)
images_rdd = sc.parallelize(image_list)
transformer_bc = sc.broadcast(transformer)
net_bc = sc.broadcast(net)
image_predictions = images_rdd.map(lambda image_path: classify_image(image_path, transformer_bc, net_bc))
print image_predictions
if __name__ == '__main__':
main()
As you can see, here I tried to broadcast the caffe parameters, transformer_bc = sc.broadcast(transformer), net_bc = sc.broadcast(net)
The error is:
RuntimeError: Pickling of "caffe._caffe.Net" instances is not enabled
Before I am doing the broadcast, the error was :
Driver stacktrace.... Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):....
So, do you know, is there any way I can classify images using Caffe and Spark but also take advantage of Spark?

When you work with complex, non-native objects initialization has to moved directly to the workers for example with singleton module:
net_builder.py:
import cafe
net = None
def build_net(*args, **kwargs):
... # Initialize net here
return net
def get_net(*args, **kwargs):
global net
if net is None:
net = build_net(*args, **kwargs)
return net
main.py:
import net_builder
sc.addPyFile("net_builder.py")
def classify_image(image_path, transformer, *args, **kwargs):
net = net_builder.get_net(*args, **kwargs)
It means you'll have to distribute all required files as well. It can be done either manually or using SparkFiles mechanism.
On a side note you should take a look at the SparkNet package.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Tensorflow: model wrapper that can release GPU resources - python

Related

Program modify load vgg16.ckpt file method (original Linux to windows)

Tensorflow inference too slow when loading multiple models

Running inference on InceptionV3 network twice bring totally different results

resave tf1.x saved_model.pb into new tf2.0 saved_model.pb

Broadcasting a keras model with pyspark [duplicate]

Categories

Resources