Tensorflow reuse variables in different name scope

Tensorflow reuse variables in different name scope - python

I've got the problem of reuse variable in different name scope. The code below separate source embedding and target embedding in two different spaces, What I want to do is to put source and target in the same space, reusing the variables in lookup table.
''' Applying bidirectional encoding for source-side inputs and first-word decoding.
'''
def decode_first_word(self, source_vocab_id_tensor, source_mask_tensor, scope, reuse):
with tf.name_scope('Word_Embedding_Layer'):
with tf.variable_scope('Source_Side'):
source_embedding_tensor = self._src_lookup_table(source_vocab_id_tensor)
with tf.name_scope('Encoding_Layer'):
source_concated_hidden_tensor = self._encoder.get_biencoded_tensor(\
source_embedding_tensor, source_mask_tensor)
with tf.name_scope('Decoding_Layer_First'):
rvals = self.decode_next_word(source_concated_hidden_tensor, source_mask_tensor, \
None, None, None, scope, reuse)
return rvals + [source_concated_hidden_tensor]
''' Applying one-step decoding.
'''
def decode_next_word(self, enc_concat_hidden, src_mask, cur_dec_hidden, \
cur_trg_wid, trg_mask=None, scope=None, reuse=False, \
src_side_pre_act=None):
with tf.name_scope('Word_Embedding_Layer'):
with tf.variable_scope('Target_Side'):
cur_trg_wemb = None
if None == cur_trg_wid:
pass
else:
cur_trg_wemb = self._trg_lookup_table(cur_trg_wid)
I want to make them as follows ,so there will only be one embedding node in the whole graph:
def decode_first_word_shared_embedding(self, source_vocab_id_tensor, source_mask_tensor, scope, reuse):
with tf.name_scope('Word_Embedding_Layer'):
with tf.variable_scope('Bi_Side'):
source_embedding_tensor = self._bi_lookup_table(source_vocab_id_tensor)
with tf.name_scope('Encoding_Layer'):
source_concated_hidden_tensor = self._encoder.get_biencoded_tensor(\
source_embedding_tensor, source_mask_tensor)
with tf.name_scope('Decoding_Layer_First'):
rvals = self.decode_next_word_shared_embedding(source_concated_hidden_tensor, source_mask_tensor, \
None, None, None, scope, reuse)
return rvals + [source_concated_hidden_tensor]
def decode_next_word_shared_embedding(self, enc_concat_hidden, src_mask, cur_dec_hidden, \
cur_trg_wid, trg_mask=None, scope=None, reuse=False, \
src_side_pre_act=None):
with tf.name_scope('Word_Embedding_Layer'):
cur_trg_wemb = None
if None == cur_trg_wid:
pass
else:
with tf.variable_scope('Bi_Side'):
cur_trg_wemb = self._bi_lookup_table(cur_trg_wid)
How to achieve this?

I solved it by using a dictionary to save the the weight matrix of embedding. A hint from https://www.tensorflow.org/versions/r0.12/how_tos/variable_scope/

One of the solutions is to save the variable_scope instance and reuse it.
def decode_first_word_shared_embedding(self, source_vocab_id_tensor, source_mask_tensor, scope, reuse):
with tf.name_scope('Word_Embedding_Layer'):
with tf.variable_scope('Bi_Side'):
source_embedding_tensor = self._bi_lookup_table(source_vocab_id_tensor)
shared_variable_scope = tf.get_variable_scope()
with tf.name_scope('Encoding_Layer'):
source_concated_hidden_tensor = self._encoder.get_biencoded_tensor(\
source_embedding_tensor, source_mask_tensor)
with tf.name_scope('Decoding_Layer_First'):
rvals = self.decode_next_word_shared_embedding(source_concated_hidden_tensor, source_mask_tensor, \
None, None, None, scope, reuse)
return rvals + [source_concated_hidden_tensor],
def decode_next_word_shared_embedding(self, enc_concat_hidden, src_mask, cur_dec_hidden, shared_variable_scope, \
cur_trg_wid, trg_mask=None, scope=None, reuse=False, \
src_side_pre_act=None):
with tf.variable_scope('Target_Side'):
cur_trg_wemb = None
if None == cur_trg_wid:
pass
else:
with tf.variable_scope(shared_variable_scope, reuse=True):
cur_trg_wemb = self._bi_lookup_table(cur_trg_wid)
And this is my demo code:
with tf.variable_scope('Word_Embedding_Layer'):
with tf.variable_scope('Bi_Side'):
v = tf.get_variable('bi_var', [1], dtype=tf.float32)
reuse_scope = tf.get_variable_scope()
with tf.variable_scope('Target_side'):
# some other codes.
with tf.variable_scope(reuse_scope, reuse=True):
w = tf.get_variable('bi_var', [1], dtype=tf.float32)
print(v.name)
print(w.name)
assert v==w
Output:
Word_Embedding_Layer/Bi_Side/bi_var:0
Word_Embedding_Layer/Bi_Side/bi_var:0

Related

How to get the symbolic gradient in Tensorflow 2.x

I want to convert https://web.casadi.org/blog/tensorflow/ , which was written in Tensorflow 1 with casadi, using Tensorflow 2. I have changed the code yet tf.disable_v2_behavior() had to be done to get it working.
import casadi as ca
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
class TensorFlowEvaluator(ca.Callback):
def __init__(self,t_in,t_out,session, opts={}):
"""
t_in: list of inputs (tensorflow placeholders)
t_out: list of outputs (tensors dependent on those placeholders)
session: a tensorflow session
"""
ca.Callback.__init__(self)
assert isinstance(t_in,list)
self.t_in = t_in
assert isinstance(t_out,list)
self.t_out = t_out
self.construct("TensorFlowEvaluator", opts)
self.session = session
self.refs = []
def get_n_in(self): return len(self.t_in)
def get_n_out(self): return len(self.t_out)
def get_sparsity_in(self,i):
return ca.Sparsity.dense(*self.t_in[i].get_shape().as_list())
def get_sparsity_out(self,i):
return ca.Sparsity.dense(*self.t_out[i].get_shape().as_list())
def eval(self,arg):
# Associate each tensorflow input with the numerical argument passed by CasADi
d = dict((v,arg[i].toarray()) for i,v in enumerate(self.t_in))
# Evaluate the tensorflow expressions
ret = self.session.run(self.t_out,feed_dict=d)
return ret
# Vanilla tensorflow offers just the reverse mode AD
def has_reverse(self,nadj): return nadj==1
def get_reverse(self,nadj,name,inames,onames,opts):
# Construct tensorflow placeholders for the reverse seeds
adj_seed = [tf.placeholder(shape=self.sparsity_out(i).shape,dtype=tf.float64) for i in range(self.n_out())]
# Construct the reverse tensorflow graph through 'gradients'
grad = tf.gradients(self.t_out, self.t_in,grad_ys=adj_seed)
# Create another TensorFlowEvaluator object
callback = TensorFlowEvaluator(self.t_in+adj_seed,grad,self.session)
# Make sure you keep a reference to it
self.refs.append(callback)
# Package it in the nominal_in+nominal_out+adj_seed form that CasADi expects
nominal_in = self.mx_in()
nominal_out = self.mx_out()
adj_seed = self.mx_out()
return ca.Function(name,nominal_in+nominal_out+adj_seed,callback.call(nominal_in+adj_seed),inames,onames)
if __name__=="__main__":
a = tf.placeholder(shape=(2,2),dtype=tf.float64)
b = tf.placeholder(shape=(2,1),dtype=tf.float64)
y = tf.matmul(tf.sin(a), b)
with tf.Session() as session:
f_tf = TensorFlowEvaluator([a,b], [y], session)
a = ca.MX.sym("a",2,2)
b = ca.MX.sym("a",2,1)
y = f_tf(a,b)
yref = ca.mtimes(ca.sin(a),b)
f = ca.Function('f',[a,b],[ca.jacobian(y,a)])
fref = ca.Function('f',[a,b],[ca.jacobian(yref,a)])
print(f(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
print(fref(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
Now I want to write this purely using Tensorflow 2.x. Eager execution is enabled by default I was thinking to use #tf.function to calculate the gradient,
#tf.function
def f_k(input_dat):
y = tf.matmul(tf.sin(input_dat[0]), input_dat[1])
grads = tf.gradients([y], input_dat)
# grads = tape.gradient([y], input_dat)
tf.print('tf >>', grads)
print('print >>', grads)
return y, grads
Here is the update the code at the moment,
import casadi as ca
import tensorflow as tf
from casadi import Sparsity
class TensorFlowEvaluator(ca.Callback):
def __init__(self, t_in, t_out, model, opts={}):
"""
t_in: list of inputs (tensorflow placeholders)
t_out: list of outputs (tensors dependent on those placeholders)
"""
ca.Callback.__init__(self)
assert isinstance(t_in,list)
self.t_in = t_in
assert isinstance(t_out,list)
self.t_out = t_out
self.construct("TensorFlowEvaluator", opts)
self.refs = []
self.model = model
def get_n_in(self): return len(self.t_in)
def get_n_out(self): return len(self.t_out)
def get_sparsity_in(self, i):
tesnor_shape = self.t_in[i].get_shape().as_list()
return Sparsity.dense(tesnor_shape[0], tesnor_shape[1])
# return Sparsity.dense(4, 1)
def get_sparsity_out(self, i):
return Sparsity.dense(2, 1)
def eval(self, arg):
# Associate each tensorflow input with the numerical argument passed by CasADi
print(arg)
# d = dict((v, arg[i].toarray()) for i,v in enumerate(self.t_in))
updated_t = []
for i,v in enumerate(self.t_in):
updated_t.append(tf.Variable(arg[i].toarray()))
# Evaluate the tensorflow expressions
if not tf.is_tensor(self.t_out[0]):
ret = self.t_out[0](updated_t)[0].numpy()
else:
ret = self.t_out[0](updated_t).numpy()
return [ca.DM(ret)]
# Vanilla tensorflow offers just the reverse mode AD
def has_reverse(self,nadj): return nadj==1
def get_reverse(self, nadj, name, inames, onames, opts):
initializer = tf.random_normal_initializer(mean=1., stddev=2.)
adj_seed = [ tf.Variable(initializer(shape=self.sparsity_out(i).shape, dtype=tf.float64)) for i in range(self.n_out())]
tf.config.run_functions_eagerly(False)
print("=============== self.t_in========", self.t_out)
print("=============== self.t_out========", self.t_in)
# grad = tape.gradient(mean, self.t_in, output_gradients=adj_seed)
out_, grad = self.t_out[0](self.t_in)
print("============== grad========", grad)
# Create another TensorFlowEvaluator object
callback = TensorFlowEvaluator(self.t_in + adj_seed, grad, self.model)
# Make sure you keep a reference to it
self.refs.append(callback)
# Package it in the nominal_in+nominal_out+adj_seed form that CasADi expects
nominal_in = self.mx_in()
nominal_out = self.mx_out()
adj_seed = self.mx_out()
return ca.Function(name, nominal_in+nominal_out+adj_seed, callback.call(nominal_in + adj_seed), inames, onames)
if __name__=="__main__":
initializer = tf.random_normal_initializer(mean=1., stddev=2.)
a = tf.Variable(initializer(shape=(2,2), dtype=tf.float64))
b = tf.Variable(initializer(shape=(2,1), dtype=tf.float64))
#tf.function
def f_k(input_dat):
y = tf.matmul(tf.sin(input_dat[0]), input_dat[1])
grads = tf.gradients([y], input_dat)
# grads = tape.gradient([y], input_dat)
tf.print('tf >>', grads)
print('print >>', grads)
return y, grads
f_tf = TensorFlowEvaluator([a,b], [f_k], None)
a = ca.MX.sym("a",2,2)
b = ca.MX.sym("a",2,1)
y = f_tf(a,b)
yref = ca.mtimes(ca.sin(a),b)
f = ca.Function('f',[a,b],[ca.jacobian(y,a)])
fref = ca.Function('f',[a,b],[ca.jacobian(yref,a)])
print(fref(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
print(f(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
Problem:
In the get_reverse method, when calculating the gradient, i.e., grad = tf.gradients(self.t_out, self.t_in,grad_ys=adj_seed), I get symbolic form, i.e., [<tf.Tensor 'gradients/Sin_grad/mul:0' shape=(2, 2) dtype=float32>, <tf.Tensor 'gradients/MatMul_grad/MatMul_1:0' shape=(2, 1) dtype=float32>] in Tensorflow 1.
However, in Tensorflow 2, I always get numerical results. I can access the graph but those are not callable. self.t_out[0].get_concrete_function(self.t_in).graph similar to here
What would be the better way to get the symbolic gradient like in Tensorflow 1?
Expected Behaviour:
out_, grad = self.t_out[0](self.t_in)
grad should return symbolic form of the gradient rather than numerical evaluation

Updating Unrolled GAN to TF2

I am trying to implement the Unrolled GAN model as described here, with example code. However, it was implemented using TF1, and I have been doing my best to update it but I am relatively new to python and TF (only been using it for the past ~6 months).
The line(s) that I cannot seem to make work (for the moment, there may be more) is this one:
gen_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "generator")
disc_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "discriminator")
These both return empty lists, and I cannot see what I am missing. Even without specifying a scope, the get_collection() returns []. Earlier, we define both generator and discriminator as scopes like so:
def generator(z, output_dim=2, n_hidden=128, n_layer=2):
with tf.compat.v1.variable_scope("generator"):
h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
x = slim.fully_connected(h, output_dim, activation_fn=None)
return x
def discriminator(x, n_hidden=128, n_layer=2, reuse=False):
with tf.compat.v1.variable_scope("discriminator", reuse=reuse):
h = slim.stack(x, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
log_d = slim.fully_connected(h, 1, activation_fn=None)
return log_d
Is there a problem with the definition of the scope?
Here is my updated code in full, in case there is maybe something I missed elsewhere:
%pylab inline
from collections import OrderedDict
import tensorflow as tf
import tensorflow_probability as tfp
ds = tfp.distributions
# slim = tf.contrib.slim
import tf_slim as slim
from keras.optimizers import Adam
try:
from moviepy.video.io.bindings import mplfig_to_npimage
import moviepy.editor as mpy
generate_movie = True
except:
print("Warning: moviepy not found.")
generate_movie = False
def remove_original_op_attributes(graph):
"""Remove _original_op attribute from all operations in a graph."""
for op in graph.get_operations():
op._original_op = None
def graph_replace(*args, **kwargs):
"""Monkey patch graph_replace so that it works with TF 1.0"""
remove_original_op_attributes(tf.get_default_graph())
return _graph_replace(*args, **kwargs)
def extract_update_dict(update_ops):
"""Extract variables and their new values from Assign and AssignAdd ops.
Args:
update_ops: list of Assign and AssignAdd ops, typically computed using Keras' opt.get_updates()
Returns:
dict mapping from variable values to their updated value
"""
name_to_var = {v.name: v for v in tf.compat.v1.global_variables()}
updates = OrderedDict()
for update in update_ops:
var_name = update.op.inputs[0].name
var = name_to_var[var_name]
value = update.op.inputs[1]
if update.op.type == 'Assign':
updates[var.value()] = value
elif update.op.type == 'AssignAdd':
updates[var.value()] = var + value
else:
raise ValueError("Update op type (%s) must be of type Assign or AssignAdd"%update_op.op.type)
return updates
def sample_mog(batch_size, n_mixture=8, std=0.01, radius=1.0):
thetas = np.linspace(0, 2 * np.pi, n_mixture)
xs, ys = radius * np.sin(thetas), radius * np.cos(thetas)
cat = ds.Categorical(tf.zeros(n_mixture))
comps = [ds.MultivariateNormalDiag([xi, yi], [std, std]) for xi, yi in zip(xs.ravel(), ys.ravel())]
data = ds.Mixture(cat, comps)
return data.sample(batch_size)
def generator(z, output_dim=2, n_hidden=128, n_layer=2):
with tf.compat.v1.variable_scope("generator"):
h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
x = slim.fully_connected(h, output_dim, activation_fn=None)
return x
def discriminator(x, n_hidden=128, n_layer=2, reuse=False):
with tf.compat.v1.variable_scope("discriminator", reuse=reuse):
h = slim.stack(x, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
log_d = slim.fully_connected(h, 1, activation_fn=None)
return log_d
params = dict(
batch_size=512,
disc_learning_rate=1e-4,
gen_learning_rate=1e-3,
beta1=0.5,
epsilon=1e-8,
max_iter=25000,
viz_every=5000,
z_dim=256,
x_dim=2,
unrolling_steps=5,
)
tf.compat.v1.reset_default_graph()
data = sample_mog(params['batch_size'])
noise = ds.Normal(tf.zeros(params['z_dim']),
tf.ones(params['z_dim'])).sample(params['batch_size'])
# Construct generator and discriminator nets
# with slim.arg_scope([slim.fully_connected], weights_initializer=tf.orthogonal_initializer(gain=1.4)): ## old
with slim.arg_scope([slim.fully_connected], weights_initializer=tf.keras.initializers.Orthogonal(gain=1.4)):
samples = generator(noise, output_dim=params['x_dim'])
real_score = discriminator(data)
fake_score = discriminator(samples, reuse=True)
# Saddle objective
loss = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(logits=tf.cast(real_score, dtype=tf.float32), labels=tf.cast(tf.ones_like(real_score), dtype=tf.float32)) +
tf.nn.sigmoid_cross_entropy_with_logits(logits=tf.cast(fake_score, dtype=tf.float32), labels=tf.cast(tf.zeros_like(fake_score), dtype=tf.float32)))
gen_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "generator")
disc_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "discriminator")
# Vanilla discriminator update
d_opt = Adam(lr=params['disc_learning_rate'], beta_1=params['beta1'], epsilon=params['epsilon'])
# updates = d_opt.get_updates(disc_vars, [], loss) ## old
updates = d_opt.get_updates(loss, [])
d_train_op = tf.group(*updates, name="d_train_op")
### I HAVE NOT UPDATED BEYOND THIS POINT ###
# Unroll optimization of the discrimiantor
if params['unrolling_steps'] > 0:
# Get dictionary mapping from variables to their update value after one optimization step
update_dict = extract_update_dict(updates)
cur_update_dict = update_dict
for i in xrange(params['unrolling_steps'] - 1):
# Compute variable updates given the previous iteration's updated variable
cur_update_dict = graph_replace(update_dict, cur_update_dict)
# Final unrolled loss uses the parameters at the last time step
unrolled_loss = graph_replace(loss, cur_update_dict)
else:
unrolled_loss = loss
# Optimize the generator on the unrolled loss
g_train_opt = tf.train.AdamOptimizer(params['gen_learning_rate'], beta1=params['beta1'], epsilon=params['epsilon'])
g_train_op = g_train_opt.minimize(-unrolled_loss, var_list=gen_vars)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

The implementation of get_collection:
def get_collection(key, scope=None):
"""Wrapper for `Graph.get_collection()` using the default graph.
See `tf.Graph.get_collection`
for more details.
Args:
key: The key for the collection. For example, the `GraphKeys` class contains
many standard names for collections.
scope: (Optional.) If supplied, the resulting list is filtered to include
only items whose `name` attribute matches using `re.match`. Items without
a `name` attribute are never returned if a scope is supplied and the
choice or `re.match` means that a `scope` without special tokens filters
by prefix.
Returns:
The list of values in the collection with the given `name`, or
an empty list if no value has been added to that collection. The
list contains the values in the order under which they were
collected.
#compatibility(eager)
Collections are not supported when eager execution is enabled.
#end_compatibility
"""
return get_default_graph().get_collection(key, scope)
It looks like in this code, key and scope arguments are swapped. If you provide "generator" or "discriminator" as the key with no scope i.e;
gen_vars = tf.compat.v1.get_collection("generator")
disc_vars = tf.compat.v1.get_collection("discriminator")
You should get results (I was able to reproduce locally with Tensorflow 2.2.0). The only issue I could not quite identify is, when providing scope, the function returns an empty list again, regardless of the scope value you provide. For example, tf.compat.v1.GLOBAL_VARIABLES should return everything, but that is not the case:
gen_vars = tf.compat.v1.get_default_graph().get_collection('generator', tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) # returns []
gen_vars = tf.compat.v1.get_default_graph().get_collection('generator', tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) # returns []
disc_vars = tf.compat.v1.get_collection('generator') # returns a list of tensors
Update
It looks like even creating the variables in the context manager doesn't add them to the graph collection. I had to call tf.compat.v1.add_to_collection('generator', x) and tf.compat.v1.add_to_collection('discriminator', log_d) in the respective functions to get those results.
Update #2
I searched around and it doesn't appear there's a context manager which enables you to add variables declared within it to a Tensorflow collection. For the sake of completeness of this answer though, I have implemented one:
from contextlib import contextmanager
#contextmanager
def collection_scope(collection_name):
import inspect
from tensorflow.python.framework.ops import EagerTensor
collection = tf.compat.v1.get_collection_ref(collection_name)
yield
# this is a bit of a hack, but it works...
f = inspect.currentframe().f_back.f_back
# only take variables which were declared within the context manager
tf_variables = set([val.ref() for val in f.f_locals.values() if isinstance(val, EagerTensor)]) - \
set([val.ref() for val in f.f_back.f_locals.values() if isinstance(val, EagerTensor)])
collection.extend(tf_variables)
You can then drop this in your functions in place of the variable scope (tf.compat.v1.variable_scope) context managers. For example, instead of:
def generator(z, output_dim=2, n_hidden=128, n_layer=2):
with tf.compat.v1.variable_scope('generator'):
h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
x = slim.fully_connected(h, output_dim, activation_fn=None)
return x
Do the following:
def generator(z, output_dim=2, n_hidden=128, n_layer=2):
with collection_scope('generator'):
h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
x = slim.fully_connected(h, output_dim, activation_fn=None)
return x
With this change, all tensors declared within the scope of the context manager will be added to the collection "generator" - tf.compat.v1.get_collection('generator') will return the correct list of tensors.

Tensorflow ExponentialMovingAverage with PartitionedVariable

I'm trying to use tf.train.ExponentialMovingAverage with a PartitionedVariable.
I use a custom_getter to create an EMA version of the graph.
If I don't use a partitioner to create my variable the following code works as expected : after setting the variable to zero, with a decay of 1, the EMA version of this variable keeps the original value.
However, if I use a partitioner, I have the following issues
tf1.12 the ema_getter is unable to find the average of the PartitionedVariable hence the two variables are the same object
tf.1.15 I get an AttributeError: 'PartitionedVariable' object has no attribute 'experimental_ref'
Here is my code
import tensorflow as tf
import numpy as np
def ema_getter(ema):
def _ema_getter(getter, name, *args, **kwargs):
var = getter(name, *args, **kwargs)
ema_var = ema.average(var)
if not ema_var:
tf.logging.warning(f"Unable to find EMA of {name}")
return ema_var if ema_var else var
return _ema_getter
if __name__ == "__main__":
use_partitioner = True
var = tf.get_variable(
name='var',
shape=[10, 2],
initializer=tf.ones_initializer(),
partitioner=tf.fixed_size_partitioner(2, axis=0) if use_partitioner else None
)
var_sum = tf.reduce_sum(var)
ema = tf.train.ExponentialMovingAverage(1.0)
variables = tf.get_collection(
tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)
ema_op = ema.apply(variables)
with tf.variable_scope(tf.get_variable_scope(), reuse=True, custom_getter=ema_getter(ema)):
var_ema = tf.get_variable(
name='var',
shape=[10, 2],
partitioner=tf.fixed_size_partitioner(2, axis=0) if use_partitioner else None
)
print(f"EMA variable name: {var_ema.name}")
var_ema_sum = tf.reduce_sum(var_ema)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(ema_op)
print(sess.run(var_sum)) # 20.0
print(sess.run(var_ema_sum)) # 20.0
sess.run(tf.assign(var, tf.zeros_like(var)))
sess.run(ema_op)
print(sess.run(var_sum)) # 0.0
print(sess.run(var_ema_sum)) # should be 20.0

My understanding so far is that the PartitionedVariable does not act as a standard Variable but is merely a shell for a list of other Variable.
The custom_getter needs to take this into account and manually retrieve and reconstruct a ParitionedVariable using the ema versions of the original PartitionedVariable variables.
However, this seems hacky -- using a .__class__ because I could not find a way to import PartitionedVariable in a clean way, or accessing the private attribute _partitions for ex.
Sharing my current fix here
import tensorflow as tf
import numpy as np
def ema_getter(ema):
def _ema_getter(getter, name, *args, **kwargs):
var = getter(name, *args, **kwargs)
# Manually reconstruct if PartitionedVariable
if var.__class__.__name__ == "PartitionedVariable":
ema_vs = [ema.average(v) for v in var]
ema_var = var.__class__(
name=var.name,
shape=var.shape,
dtype=var.dtype,
variable_list=ema_vs,
partitions=var._partitions,
)
else:
ema_var = ema.average(var)
if not ema_var:
tf.logging.warning(f"Unable to find EMA of {name}")
return ema_var if ema_var else var
return _ema_getter
if __name__ == "__main__":
use_partitioner = True
var = tf.get_variable(
name='var',
shape=[10, 2],
initializer=tf.ones_initializer(),
partitioner=tf.fixed_size_partitioner(2, axis=0) if use_partitioner else None
)
var_sum = tf.reduce_sum(var)
ema = tf.train.ExponentialMovingAverage(1.0)
variables = tf.get_collection(
tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)
ema_op = ema.apply(variables)
with tf.variable_scope(tf.get_variable_scope(), reuse=True, custom_getter=ema_getter(ema)):
var_ema = tf.get_variable(
name='var',
shape=[10, 2],
partitioner=tf.fixed_size_partitioner(2, axis=0) if use_partitioner else None
)
print(f"EMA variable name: {var_ema.name}")
var_ema_sum = tf.reduce_sum(var_ema)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(ema_op)
print(sess.run(var_sum))
print(sess.run(var_ema_sum))
sess.run(tf.assign(var, tf.zeros_like(var)))
sess.run(ema_op)
print(sess.run(var_sum))
print(sess.run(var_ema_sum))

Getting access to class's variables inside another class's def

I'm trying override a str method in Person() class:
'''class Person(object):
def __init__(self, Nose = None, Neck = None, RShoulder = None, RElbow = None, RWrist = None, LShoulder = None, LElbow = None, LWrist = None, MidHip = None, RHip = None, RKnee = None, RAnkle = None, LHip = None, LKnee = None, LAnkle = None, REye = None, LEye = None, REar = None, LEar = None, LBigToe = None, LSmallToe = None, LHeel = None, RBigToe = None, RSmallToe = None, RHeel = None):
self.Nose = Nose
self.Neck = Neck
self.RShoulder = RShoulder
self.RElbow = RElbow
self.RWrist = RWrist
self.LShoulder = LShoulder
self.LElbow = LElbow
self.LWrist = LWrist
self.MidHip = MidHip
self.RHip = RHip
self.RKnee = RKnee
self.RAnkle = RAnkle
self.LHip = LHip
self.LKnee = LKnee
self.LAnkle = LAnkle
self.REye = REye
self.LEye = LEye
self.REar = REar
self.LEar = LEar
self.LBigToe = LBigToe
self.LSmallToe = LSmallToe
self.LHeel = LHeel
self.RBigToe = RBigToe
self.RSmallToe = RSmallToe
self.RHeel = RHeel
def __str__(self):
return 'Nose = %s\nNeck = \n%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s'%(self.Nose,self.Neck,self.RShoulder,self.RElbow,self.RWrist,self.LShoulder,self.LElbow,self.LWrist,self.MidHip,self.RHip,self.RKnee,self.RAnkle,self.LHip,self.LKnee,self.LAnkle,self.REye,self.LEye,self.REar,self.LEar,self.LBigToe,self.LSmallToe,self.LHeel,self.RBigToe,self.RSmallToe,self.RHeel)'''
And I want to find more elegant way to return a string which will look like that:
Nose = something
Neck = something
...
...
...

Question: elegant way to return a string which will look like ...
You can use the built-in vars function to get the __dict__ of the class variable and format it using .format(... and .join(....
Reference:
vars([object])
Return the __dict__ attribute for a module, class, instance, or any other object with a __dict__ attribute.
.format(value[, format_spec])
Convert a value to a “formatted” representation, as controlled by a standard formatting syntax that is used by most built-in types: Format Specification Mini-Language.
<str>.join(iterable)
Return a string which is the concatenation of the strings in iterable.
class Person:
def __init__(self, **kwargs):
self.Nose = kwargs.get('Nose', None)
self.Neck = kwargs.get('Neck', None)
self.RShoulder = kwargs.get('RShoulder', None)
def __str__(self):
return '\n'.join(('{} = {}'
.format(k, v) for k, v in vars(self).items()))
p = Person(Nose=1, Neck=1)
print(p)
Output:
Nose = 1
Neck = 1
RShoulder = None
Tested with Python: 3.6

Error when referencing a class within its init method

I am putting the (NMT Tensorflow code) inside a main class. The code base has two classes - 'Encoder' and 'Decoder'. They are referenced in their respective 'init' methods. However it raises an error - 'Undefined named Encoder'.
class TranslationModel(ModelBase):
pathToZip = tf.keras.utils.get_file('spa-eng.zip', origin='http://download.tensorflow.org/data/spa-eng.zip', extract=True)
pathToFile = os.path.dirname(pathToZip)+"/spa-eng/spa.txt"
def __init__(self,
batchSize = 64,
bufferSize = None,
numberOfBatches = None,
units = 1024,
vocabInputSize = None,
vocabTargetSize = None,
optimizer = tf.train.AdamOptimizer(),
dataSetPath = None,
inputTensor = None,
targetTensor = None,
inputLanguage = None,
targetLanguage = None,
maxLengthInput = None,
maxLengthTarget = None,
embeddingDimension = 256, *arg, **kwargs):
self.batchSize = 64
self.bufferSize = None
self.numberOfBatches = None
self.units = units
self.vocabInputSize = None
self.vocabTargetSize = None
self.optimizer = optimizer
self.dataSetPath = dataSetPath
self.targetTensor = targetTensor
self.inputTensor = inputTensor
self.inputLanguage = inputLanguage
self.targetLanguage = targetLanguage
self.maxLengthInput = maxLengthInput
self.maxLengthTarget = maxLengthTarget
self.embeddingDimension = embeddingDimension
super().__init__(*arg, **kwargs)
#OTHER FUNCTIONS HERE
class Encoder(tf.keras.Model):
def __init__(self, vocabSize, embeddingDimension, encoderUnits, batchSize):
super(Encoder, self).__init__() # Raises error - 'Undefined named Encoder'
#Other code here
class Decoder(tf.keras.Model):
def __init__(self, vocabSize, embeddingDimension, dec_units, batchSize):
super('Decoder', self).__init__() # Raises error - 'Undefined named Decoder'
## Other code

It's because when you have a class inside of another and you want to identify it, you should do it this way : OutterClass.InnerClass
it won't work if you just use : InnerClass
for your case it's TranslationModel.Encoder

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Tensorflow reuse variables in different name scope - python

I solved it by using a dictionary to save the the weight matrix of embedding. A hint from https://www.tensorflow.org/versions/r0.12/how_tos/variable_scope/

Related

How to get the symbolic gradient in Tensorflow 2.x

Updating Unrolled GAN to TF2

Tensorflow ExponentialMovingAverage with PartitionedVariable

Getting access to class's variables inside another class's def

Error when referencing a class within its init method

Categories

Resources

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Tensorflow reuse variables in different name scope - python

I solved it by using a dictionary to save the the weight matrix of embedding. A hint from https://www.tensorflow.org/versions/r0.12/how_tos/variable_scope/

Related

How to get the symbolic gradient in Tensorflow 2.x

Updating Unrolled GAN to TF2

Tensorflow ExponentialMovingAverage with PartitionedVariable

Getting access to class's variables inside another class's def

Error when referencing a class within its __init__ method

Categories

Resources

Error when referencing a class within its init method