I am loading a TensorFlow model from a .pb file. I want to change the weights of all the layers. I am able to extract the weights but I am not able to change the weights.
I converted the graph_def model to TensorFlow model but even then I cannot assign a new value to the weights as the weights are stored in a tensor of type "Const".
b = graph_tf.get_tensor_by_name("Variable_1:0")
tf.assign(b, np.ones((1,1,64,64)))
I am getting the following error:
AttributeError: 'Tensor' object has no attribute 'assign'
Please provide a way to resolve this issue. Thanks in advance.
Here is one way you can achieve something like that. You want to replace some constant operations with variables initialized to the value of those operations, so you can first extract those constant values, and then create the graph with the variables initalized to those. See the example below.
import tensorflow as tf
# Example graph
with tf.Graph().as_default():
inp = tf.placeholder(tf.float32, [None, 3], name='Input')
w = tf.constant([[1.], [2.], [3.]], tf.float32, name='W')
out = tf.squeeze(inp # w, 1, name='Output')
gd = tf.get_default_graph().as_graph_def()
# Extract weight values
with tf.Graph().as_default():
w, = tf.graph_util.import_graph_def(gd, return_elements=['W:0'])
# Get the constant weight values
with tf.Session() as sess:
w_val = sess.run(w)
# Alternatively, since it is a constant,
# you can get the values from the operation attribute directly
w_val = tf.make_ndarray(w.op.get_attr('value'))
# Make new graph
with tf.Graph().as_default():
# Make variables initialized with stored values
w = tf.Variable(w_val, name='W')
init_op = tf.global_variables_initializer()
# Import graph
inp, out = tf.graph_util.import_graph_def(
gd, input_map={'W:0': w},
return_elements=['Input:0', 'Output:0'])
# Change value operation
w_upd = w[2].assign([5.])
# Test
with tf.Session() as sess:
sess.run(init_op)
print(sess.run(w))
# [[1.]
# [2.]
# [3.]]
sess.run(w_upd)
print(sess.run(w))
# [[1.]
# [2.]
# [5.]]
Related
I want to use my pretrained separable convolution (which is a part of a bigger module) in another separable convolution in a other model.
In the trained module I tried
with tf.variable_scope('sep_conv_ker' + str(input_shape[-1])):
sep_conv2d = tf.reshape(
tf.layers.separable_conv2d(inputs_flatten,input_shape[-1] ,
[1,input_shape[-2]]
trainable=trainable),
[inputs_flatten.shape[0],1,input_shape[-1],INNER_LAYER_WIDTH])
and
all_variables = tf.trainable_variables()
scope1_variables = tf.contrib.framework.filter_variables(all_variables, include_patterns=['sep_conv_ker'])
sep_conv_weights_saver = tf.train.Saver(scope1_variables, sharded=True, max_to_keep=20)
Inside sess.run
sep_conv_weights_saver.save(sess,os.path.join(LOG_DIR + MODEL_SPEC_LOG_DIR,
"init_weights",MODEL_SPEC_SUFFIX + 'epoch_' + str(epoch) + '.ckpt'))
But I cannot understand when and how should I load the weights to the separable convolution in the other module, it has different name, and different scope,
Furthermore, as I'm using a defined tf.layer does it mean I need to access each individual weight in the new graph and assign it?
My current solution doesn't work and I think that the weights are being initialized after the assignment somehow Furthermore, loading a whole new graph just for few weights seems weird, isn't it?
###IN THE OLD GRAPH###
all_variables = tf.trainable_variables()
scope1_variables = tf.contrib.framework.filter_variables(all_variables, include_patterns=['sep_conv_ker'])
vars = dict((var.op.name.split("/")[-1] + str(idx), var) for idx,var in enumerate(scope1_variables))
sep_conv_weights_saver = tf.train.Saver(vars, sharded=True, max_to_keep=20)
In the new graph is a function that basiclly takes the variables from the old graph and assigning them, loading the meta_graph is redundant
def load_pretrained(sess):
sep_conv2d_vars = [var for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) if ("sep_conv_ker" in var.op.name)]
var_dict = dict((var.op.name.split("/")[-1] + str(idx), var) for idx, var in enumerate(sep_conv2d_vars))
new_saver = tf.train.import_meta_graph(
tf.train.latest_checkpoint('log/train/sep_conv_ker/global_neighbors40/init_weights') + '.meta')
# saver = tf.train.Saver(var_list=var_dict)
new_saver.restore(sess,
tf.train.latest_checkpoint('log/train/sep_conv_ker/global_neighbors40/init_weights'))
graph = tf.get_default_graph()
sep_conv2d_trained = dict(("".join(var.op.name.split("/")[-2:]),var) for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) if ("sep_conv_ker_init" in var.op.name))
for var in sep_conv2d_vars:
tf.assign(var,sep_conv2d_trained["".join(var.op.name.split("/")[-2:])])
You need to make sure that the variables have the same in the variable file and in the graph where you load the variables. You can write a script that will convert the variables names.
With tf.contrib.framework.list_variables(ckpt), you can find out what variables of what shapes you have in the checkpoint and create respective variables with the new names (I believe, you can write a regex that will fix the names) and correct shape.
Then you load the original variables with tf.contrib.framework.load_checkpoint(ckpt) assign ops tf.assign(var, loaded) that will assigning the variables with new names with the saved values.
Runn the assign ops in a session.
Save the new variables.
Minimum example:
Original model (variables in scope "regression"):
import tensorflow as tf
x = tf.placeholder(tf.float32, [None, 3])
regression = tf.layers.dense(x, 1, name="regression")
session = tf.Session()
session.run(tf.global_variables_initializer())
saver = tf.train.Saver(tf.trainable_variables())
saver.save(session, './model')
Renaming script:
import tensorflow as tf
assign_ops = []
reader = tf.contrib.framework.load_checkpoint("./model")
for name, shape in tf.contrib.framework.list_variables("./model"):
new_name = name.replace("regression/", "foo/bar/")
new_var = tf.get_variable(new_name, shape)
assign_ops.append(tf.assign(new_var, reader.get_tensor(name)))
session = tf.Session()
saver = tf.train.Saver(tf.trainable_variables())
session.run(assign_ops)
saver.save(session, './model-renamed')
Model where you load the renamed variables (the same variables in score "foo/bar"):
import tensorflow as tf
with tf.variable_scope("foo"):
x = tf.placeholder(tf.float32, [None, 3])
regression = tf.layers.dense(x, 1, name="bar")
session = tf.Session()
session.run(tf.global_variables_initializer())
saver = tf.train.Saver(tf.trainable_variables())
saver.restore(session, './model-renamed')
What's contained in a TF checkpoint? Estimators for example store a separate file that contains the GraphDef proto and you can basically do a tf.import_graph_def(), then create a tf.train.Saver() and restore a checkpoint into the graph. Now if you have another GraphDef describing a completely different graph that just happens to share the exact same variable names together with matching variable dimensions, will you be able to load the checkpoint into that graph? In other words, is it just a variable name to value mapping or does it assume something else about a graph that would be checked during loading? What if you try to load a checkpoint into a graph that is a subset of the original graph (i.e. tensor dimensions and names match, but some names are missing)?
When do people start reading the documentation (?):
https://www.tensorflow.org/mobile/prepare_models
These are different concepts. You can load just the weights as long as the shapes match. If there is a miss-match you just get:
Restoring from checkpoint failed. This is most likely due to a
mismatch between the current graph and the graph from the checkpoint.
Please ensure that you have not altered the graph expected based on
the checkpoint.
However, you can tweak a non-trivial case, where the graph is completely different:
import tensorflow as tf
import numpy as np
test_data = np.arange(4).reshape(1, 2, 2, 1)
# a simple graph and everything is fine
input = tf.placeholder(dtype=tf.float32, shape=[1, 2, 2, 1])
output = tf.layers.conv2d(input, 3, kernel_size=1, name='test', use_bias=False)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print(sess.run(output, {input: test_data}))
saver = tf.train.Saver()
save_path = saver.save(sess, "/tmp/model.ckpt")
print(tf.trainable_variables())
# reset previous elements
tf.reset_default_graph()
# a new graph
input = tf.placeholder(dtype=tf.float32, shape=[1, 2, 2, 1])
# and wait: this is complete different but same name and shape
W = tf.get_variable('test/kernel', shape=[1, 1, 1, 3])
# but the graph has different operations
output = input + W
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(sess, "/tmp/model.ckpt")
print(sess.run(output, {input: test_data}))
In my case I got:
# 1st version (original graph)
[[[[-0. -0. -0. ]
[-0.08429337 -1.0156475 -0.42691123]]
[[-0.16858673 -2.031295 -0.85382247]
[-0.2528801 -3.0469427 -1.2807337 ]]]]
# 2nd version (altered graph)
[[[[-0.08429337 -1.0156475 -0.42691123]
[ 0.91570663 -0.01564753 0.57308877]]
[[ 1.9157066 0.98435247 1.5730888 ]
[ 2.9157066 1.9843525 2.5730886 ]]]]
I am creating a tensor flow code and get an error when I try to run with variables.
The base code is
import tensor flow as tf
import numpy as np
graph = tf.Graph()
with graph.as_default():
with tf.name_scope("variables"):
# keep track of how many times the model has been run
global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="global_step")
# keep track of sum of all outputs over time
total_output = tf.Variable(0, dtype=tf.float32, trainable=False, name="total_output")
with tf.name_scope("transformation"):
# separate input layer
with tf.name_scope("input"):
# create input placeholder which takes in a vector
a = tf.placeholder(tf.float32, shape=[None], name = "input_placeholder_A")
#separate the middle layer
with tf.name_scope("middle"):
b = tf.reduce_prod(a, name = "product_b")
c = tf.reduce_sum(a, name = "sum_c")
# separate the output layer
with tf.name_scope("output"):
output = tf.add(b,c, name="output")
# separate the update layer and store the variables
with tf.name_scope("update"):
update_total = total_output.assign(output)
increment_step = global_step.assign_add(1)
# now create namescope summaries and store these in the summary
with tf.name_scope("summaries"):
avg = tf.divide(update_total, tf.cast(increment_step, tf.float32), name = "average")
# create summary for output node
tf.summary.scalar("output_summary", output)
tf.summary.scalar("total_summary",update_total)
tf.summary.scalar("average_summary",avg)
with tf.name_scope("global_ops"):
init = tf.initialize_all_variables()
merged_summaries = tf.summary.merge_all()
sess = tf.Session(graph=graph)
writer = tf.summary.FileWriter('./improved_graph', graph)
sess.run(init)
def run_graph(input_tensor):
feed_dict = {a: input_tensor}
_, step, summary = sess.run([output, increment_step, merged_summaries],feed_dict=feed_dict)
writer.add_summary(summary, global_step=step)
when I try to run the above code
run_graph([2,8])
I get the error
InvalidArgumentError Traceback (most recent call
last) InvalidArgumentError (see above for traceback): You must feed a
value for placeholder tensor
'transformation_2/input/input_placeholder_A' with dtype float and
shape [?][[Node: transformation_2/input/input_placeholder_A =
Placeholderdtype=DT_FLOAT, shape=[?],
_device="/job:localhost/replica:0/task:0/device:CPU:0"]]
I do not understand what I am doing wrong in this since the code is all corrected for the version of tensor flow installed.
Your placeholder a is defined as being of type float32 but [5, 8] contain int values.
run_graph([2., 8.]) or run_graph(np.array([5, 8], dtype=np.float32)) should work.
I have read through previous strings. My data are in the form of an array fed to a placeholder. Trying to convert the data to a tensor before feeding produces a different (inverse) error message. Other solutions similarly do not seem to work in this situation. Here is minimal code.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from tensorflow.contrib.factorization import KMeans
X = tf.placeholder(tf.float32, shape=[None, 10], name="X")
data = np.random.randn(2,10)
def lump(X):
# Build KMeans graph
kmeans = KMeans(inputs=X, num_clusters=k, distance_metric='cosine',
use_mini_batch=True)
(all_scores, cluster_idx, scores, cluster_centers_initialized, cluster_centers_var, init_op,
train_op) = kmeans.training_graph()
cluster_idx = cluster_idx[0] # fix for cluster_idx being a tuple
avg_distance = tf.reduce_mean(scores)
return cluster_idx, scores
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
idx, d = sess.run(lump,feed_dict={X: data})
Correct, you can't evaluate just lump, because it's a function (returning tensors), not a tensor or an op. You probably meant to do something like this:
cluster_idx, scores = lump(X)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
idx, d = sess.run([cluster_idx, scores], feed_dict={X: data})
Note that lump() is invoked before tf.global_variables_initializer(), because it defines new variables in the graph, so they must be initialized.
The code still fails, because lump is clearly not finished and has issues with dimensions, but it is the right way to evaluate something in a session.
I have wrote a program with Tensorflow that identifies a number of figures in an image. The model is trained with a function and then used with another function to label the figures. The training have been done on my computer and the resulting model upload to aws with the solve function.
I my computer it works well, but when create a lambda in aws it works strange and start giving different answers with the same test data.
The model in the solve function is this:
# Recreate neural network from model file generated during training
# input
x = tf.placeholder(tf.float32, [None, size_of_image])
# weights
W = tf.Variable(tf.zeros([size_of_image, num_chars]))
# biases
b = tf.Variable(tf.zeros([num_chars]))
The solve function code to label the figures is this:
for testi in range(captcha_letters_num):
# load model from file
saver = tf.train.import_meta_graph(model_path + '.meta',
clear_devices=True)
saver.restore(sess, model_path)
# Data to label
test_x = np.asarray(char_imgs[testi], dtype=np.float32)
predict_op = model(test_x, W, b)
op = sess.run(predict_op, feed_dict={x: test_x})
# find max probability from the probability distribution returned by softmax
max_probability = op[0][0]
max_probability_index = -1
for i in range(num_chars):
if op[0][i] > max_probability:
max_probability = op[0][i]
max_probability_index = i
# append it to final output
final_text += char_map_list[max_probability_index]
# Reset the model so it can be used again
tf.reset_default_graph()
With the same test data it gives different answers, don't know why.
Solved!
What I finally do was to keep the Session outside the loop and initialize the variables. After ending the loop, reset the graph.
saver = tf.train.Saver()
sess = tf.Session()
# Initialize variables
sess.run(tf.global_variables_initializer())
.
.
.
# passing each of the 5 characters through the NNet
for testi in range(captcha_letters_num):
# Data to label
test_x = np.asarray(char_imgs[testi], dtype=np.float32)
predict_op = model(test_x, W, b)
op = sess.run(predict_op, feed_dict={x: test_x})
# find max probability from the probability distribution returned by softmax
max_probability = op[0][0]
max_probability_index = -1
for i in range(num_chars):
if op[0][i] > max_probability:
max_probability = op[0][i]
max_probability_index = i
# append it to final output
final_text += char_map_list[max_probability_index]
# Reset the model so it can be used again
tf.reset_default_graph()
sess.close()