Tensorflow reuse variables in different name scope - python

I've got the problem of reuse variable in different name scope. The code below separate source embedding and target embedding in two different spaces, What I want to do is to put source and target in the same space, reusing the variables in lookup table.
''' Applying bidirectional encoding for source-side inputs and first-word decoding.
'''
def decode_first_word(self, source_vocab_id_tensor, source_mask_tensor, scope, reuse):
with tf.name_scope('Word_Embedding_Layer'):
with tf.variable_scope('Source_Side'):
source_embedding_tensor = self._src_lookup_table(source_vocab_id_tensor)
with tf.name_scope('Encoding_Layer'):
source_concated_hidden_tensor = self._encoder.get_biencoded_tensor(\
source_embedding_tensor, source_mask_tensor)
with tf.name_scope('Decoding_Layer_First'):
rvals = self.decode_next_word(source_concated_hidden_tensor, source_mask_tensor, \
None, None, None, scope, reuse)
return rvals + [source_concated_hidden_tensor]
''' Applying one-step decoding.
'''
def decode_next_word(self, enc_concat_hidden, src_mask, cur_dec_hidden, \
cur_trg_wid, trg_mask=None, scope=None, reuse=False, \
src_side_pre_act=None):
with tf.name_scope('Word_Embedding_Layer'):
with tf.variable_scope('Target_Side'):
cur_trg_wemb = None
if None == cur_trg_wid:
pass
else:
cur_trg_wemb = self._trg_lookup_table(cur_trg_wid)
I want to make them as follows ,so there will only be one embedding node in the whole graph:
def decode_first_word_shared_embedding(self, source_vocab_id_tensor, source_mask_tensor, scope, reuse):
with tf.name_scope('Word_Embedding_Layer'):
with tf.variable_scope('Bi_Side'):
source_embedding_tensor = self._bi_lookup_table(source_vocab_id_tensor)
with tf.name_scope('Encoding_Layer'):
source_concated_hidden_tensor = self._encoder.get_biencoded_tensor(\
source_embedding_tensor, source_mask_tensor)
with tf.name_scope('Decoding_Layer_First'):
rvals = self.decode_next_word_shared_embedding(source_concated_hidden_tensor, source_mask_tensor, \
None, None, None, scope, reuse)
return rvals + [source_concated_hidden_tensor]
def decode_next_word_shared_embedding(self, enc_concat_hidden, src_mask, cur_dec_hidden, \
cur_trg_wid, trg_mask=None, scope=None, reuse=False, \
src_side_pre_act=None):
with tf.name_scope('Word_Embedding_Layer'):
cur_trg_wemb = None
if None == cur_trg_wid:
pass
else:
with tf.variable_scope('Bi_Side'):
cur_trg_wemb = self._bi_lookup_table(cur_trg_wid)
How to achieve this?

I solved it by using a dictionary to save the the weight matrix of embedding. A hint from https://www.tensorflow.org/versions/r0.12/how_tos/variable_scope/

One of the solutions is to save the variable_scope instance and reuse it.
def decode_first_word_shared_embedding(self, source_vocab_id_tensor, source_mask_tensor, scope, reuse):
with tf.name_scope('Word_Embedding_Layer'):
with tf.variable_scope('Bi_Side'):
source_embedding_tensor = self._bi_lookup_table(source_vocab_id_tensor)
shared_variable_scope = tf.get_variable_scope()
with tf.name_scope('Encoding_Layer'):
source_concated_hidden_tensor = self._encoder.get_biencoded_tensor(\
source_embedding_tensor, source_mask_tensor)
with tf.name_scope('Decoding_Layer_First'):
rvals = self.decode_next_word_shared_embedding(source_concated_hidden_tensor, source_mask_tensor, \
None, None, None, scope, reuse)
return rvals + [source_concated_hidden_tensor],
def decode_next_word_shared_embedding(self, enc_concat_hidden, src_mask, cur_dec_hidden, shared_variable_scope, \
cur_trg_wid, trg_mask=None, scope=None, reuse=False, \
src_side_pre_act=None):
with tf.variable_scope('Target_Side'):
cur_trg_wemb = None
if None == cur_trg_wid:
pass
else:
with tf.variable_scope(shared_variable_scope, reuse=True):
cur_trg_wemb = self._bi_lookup_table(cur_trg_wid)
And this is my demo code:
with tf.variable_scope('Word_Embedding_Layer'):
with tf.variable_scope('Bi_Side'):
v = tf.get_variable('bi_var', [1], dtype=tf.float32)
reuse_scope = tf.get_variable_scope()
with tf.variable_scope('Target_side'):
# some other codes.
with tf.variable_scope(reuse_scope, reuse=True):
w = tf.get_variable('bi_var', [1], dtype=tf.float32)
print(v.name)
print(w.name)
assert v==w
Output:
Word_Embedding_Layer/Bi_Side/bi_var:0
Word_Embedding_Layer/Bi_Side/bi_var:0

Related

How to get the symbolic gradient in Tensorflow 2.x

I want to convert https://web.casadi.org/blog/tensorflow/ , which was written in Tensorflow 1 with casadi, using Tensorflow 2. I have changed the code yet tf.disable_v2_behavior() had to be done to get it working.
import casadi as ca
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
class TensorFlowEvaluator(ca.Callback):
def __init__(self,t_in,t_out,session, opts={}):
"""
t_in: list of inputs (tensorflow placeholders)
t_out: list of outputs (tensors dependent on those placeholders)
session: a tensorflow session
"""
ca.Callback.__init__(self)
assert isinstance(t_in,list)
self.t_in = t_in
assert isinstance(t_out,list)
self.t_out = t_out
self.construct("TensorFlowEvaluator", opts)
self.session = session
self.refs = []
def get_n_in(self): return len(self.t_in)
def get_n_out(self): return len(self.t_out)
def get_sparsity_in(self,i):
return ca.Sparsity.dense(*self.t_in[i].get_shape().as_list())
def get_sparsity_out(self,i):
return ca.Sparsity.dense(*self.t_out[i].get_shape().as_list())
def eval(self,arg):
# Associate each tensorflow input with the numerical argument passed by CasADi
d = dict((v,arg[i].toarray()) for i,v in enumerate(self.t_in))
# Evaluate the tensorflow expressions
ret = self.session.run(self.t_out,feed_dict=d)
return ret
# Vanilla tensorflow offers just the reverse mode AD
def has_reverse(self,nadj): return nadj==1
def get_reverse(self,nadj,name,inames,onames,opts):
# Construct tensorflow placeholders for the reverse seeds
adj_seed = [tf.placeholder(shape=self.sparsity_out(i).shape,dtype=tf.float64) for i in range(self.n_out())]
# Construct the reverse tensorflow graph through 'gradients'
grad = tf.gradients(self.t_out, self.t_in,grad_ys=adj_seed)
# Create another TensorFlowEvaluator object
callback = TensorFlowEvaluator(self.t_in+adj_seed,grad,self.session)
# Make sure you keep a reference to it
self.refs.append(callback)
# Package it in the nominal_in+nominal_out+adj_seed form that CasADi expects
nominal_in = self.mx_in()
nominal_out = self.mx_out()
adj_seed = self.mx_out()
return ca.Function(name,nominal_in+nominal_out+adj_seed,callback.call(nominal_in+adj_seed),inames,onames)
if __name__=="__main__":
a = tf.placeholder(shape=(2,2),dtype=tf.float64)
b = tf.placeholder(shape=(2,1),dtype=tf.float64)
y = tf.matmul(tf.sin(a), b)
with tf.Session() as session:
f_tf = TensorFlowEvaluator([a,b], [y], session)
a = ca.MX.sym("a",2,2)
b = ca.MX.sym("a",2,1)
y = f_tf(a,b)
yref = ca.mtimes(ca.sin(a),b)
f = ca.Function('f',[a,b],[ca.jacobian(y,a)])
fref = ca.Function('f',[a,b],[ca.jacobian(yref,a)])
print(f(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
print(fref(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
Now I want to write this purely using Tensorflow 2.x. Eager execution is enabled by default I was thinking to use #tf.function to calculate the gradient,
#tf.function
def f_k(input_dat):
y = tf.matmul(tf.sin(input_dat[0]), input_dat[1])
grads = tf.gradients([y], input_dat)
# grads = tape.gradient([y], input_dat)
tf.print('tf >>', grads)
print('print >>', grads)
return y, grads
Here is the update the code at the moment,
import casadi as ca
import tensorflow as tf
from casadi import Sparsity
class TensorFlowEvaluator(ca.Callback):
def __init__(self, t_in, t_out, model, opts={}):
"""
t_in: list of inputs (tensorflow placeholders)
t_out: list of outputs (tensors dependent on those placeholders)
"""
ca.Callback.__init__(self)
assert isinstance(t_in,list)
self.t_in = t_in
assert isinstance(t_out,list)
self.t_out = t_out
self.construct("TensorFlowEvaluator", opts)
self.refs = []
self.model = model
def get_n_in(self): return len(self.t_in)
def get_n_out(self): return len(self.t_out)
def get_sparsity_in(self, i):
tesnor_shape = self.t_in[i].get_shape().as_list()
return Sparsity.dense(tesnor_shape[0], tesnor_shape[1])
# return Sparsity.dense(4, 1)
def get_sparsity_out(self, i):
return Sparsity.dense(2, 1)
def eval(self, arg):
# Associate each tensorflow input with the numerical argument passed by CasADi
print(arg)
# d = dict((v, arg[i].toarray()) for i,v in enumerate(self.t_in))
updated_t = []
for i,v in enumerate(self.t_in):
updated_t.append(tf.Variable(arg[i].toarray()))
# Evaluate the tensorflow expressions
if not tf.is_tensor(self.t_out[0]):
ret = self.t_out[0](updated_t)[0].numpy()
else:
ret = self.t_out[0](updated_t).numpy()
return [ca.DM(ret)]
# Vanilla tensorflow offers just the reverse mode AD
def has_reverse(self,nadj): return nadj==1
def get_reverse(self, nadj, name, inames, onames, opts):
initializer = tf.random_normal_initializer(mean=1., stddev=2.)
adj_seed = [ tf.Variable(initializer(shape=self.sparsity_out(i).shape, dtype=tf.float64)) for i in range(self.n_out())]
tf.config.run_functions_eagerly(False)
print("=============== self.t_in========", self.t_out)
print("=============== self.t_out========", self.t_in)
# grad = tape.gradient(mean, self.t_in, output_gradients=adj_seed)
out_, grad = self.t_out[0](self.t_in)
print("============== grad========", grad)
# Create another TensorFlowEvaluator object
callback = TensorFlowEvaluator(self.t_in + adj_seed, grad, self.model)
# Make sure you keep a reference to it
self.refs.append(callback)
# Package it in the nominal_in+nominal_out+adj_seed form that CasADi expects
nominal_in = self.mx_in()
nominal_out = self.mx_out()
adj_seed = self.mx_out()
return ca.Function(name, nominal_in+nominal_out+adj_seed, callback.call(nominal_in + adj_seed), inames, onames)
if __name__=="__main__":
initializer = tf.random_normal_initializer(mean=1., stddev=2.)
a = tf.Variable(initializer(shape=(2,2), dtype=tf.float64))
b = tf.Variable(initializer(shape=(2,1), dtype=tf.float64))
#tf.function
def f_k(input_dat):
y = tf.matmul(tf.sin(input_dat[0]), input_dat[1])
grads = tf.gradients([y], input_dat)
# grads = tape.gradient([y], input_dat)
tf.print('tf >>', grads)
print('print >>', grads)
return y, grads
f_tf = TensorFlowEvaluator([a,b], [f_k], None)
a = ca.MX.sym("a",2,2)
b = ca.MX.sym("a",2,1)
y = f_tf(a,b)
yref = ca.mtimes(ca.sin(a),b)
f = ca.Function('f',[a,b],[ca.jacobian(y,a)])
fref = ca.Function('f',[a,b],[ca.jacobian(yref,a)])
print(fref(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
print(f(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
Problem:
In the get_reverse method, when calculating the gradient, i.e., grad = tf.gradients(self.t_out, self.t_in,grad_ys=adj_seed), I get symbolic form, i.e., [<tf.Tensor 'gradients/Sin_grad/mul:0' shape=(2, 2) dtype=float32>, <tf.Tensor 'gradients/MatMul_grad/MatMul_1:0' shape=(2, 1) dtype=float32>] in Tensorflow 1.
However, in Tensorflow 2, I always get numerical results. I can access the graph but those are not callable. self.t_out[0].get_concrete_function(self.t_in).graph similar to here
What would be the better way to get the symbolic gradient like in Tensorflow 1?
Expected Behaviour:
out_, grad = self.t_out[0](self.t_in)
grad should return symbolic form of the gradient rather than numerical evaluation

Updating Unrolled GAN to TF2

I am trying to implement the Unrolled GAN model as described here, with example code. However, it was implemented using TF1, and I have been doing my best to update it but I am relatively new to python and TF (only been using it for the past ~6 months).
The line(s) that I cannot seem to make work (for the moment, there may be more) is this one:
gen_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "generator")
disc_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "discriminator")
These both return empty lists, and I cannot see what I am missing. Even without specifying a scope, the get_collection() returns []. Earlier, we define both generator and discriminator as scopes like so:
def generator(z, output_dim=2, n_hidden=128, n_layer=2):
with tf.compat.v1.variable_scope("generator"):
h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
x = slim.fully_connected(h, output_dim, activation_fn=None)
return x
def discriminator(x, n_hidden=128, n_layer=2, reuse=False):
with tf.compat.v1.variable_scope("discriminator", reuse=reuse):
h = slim.stack(x, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
log_d = slim.fully_connected(h, 1, activation_fn=None)
return log_d
Is there a problem with the definition of the scope?
Here is my updated code in full, in case there is maybe something I missed elsewhere:
%pylab inline
from collections import OrderedDict
import tensorflow as tf
import tensorflow_probability as tfp
ds = tfp.distributions
# slim = tf.contrib.slim
import tf_slim as slim
from keras.optimizers import Adam
try:
from moviepy.video.io.bindings import mplfig_to_npimage
import moviepy.editor as mpy
generate_movie = True
except:
print("Warning: moviepy not found.")
generate_movie = False
def remove_original_op_attributes(graph):
"""Remove _original_op attribute from all operations in a graph."""
for op in graph.get_operations():
op._original_op = None
def graph_replace(*args, **kwargs):
"""Monkey patch graph_replace so that it works with TF 1.0"""
remove_original_op_attributes(tf.get_default_graph())
return _graph_replace(*args, **kwargs)
def extract_update_dict(update_ops):
"""Extract variables and their new values from Assign and AssignAdd ops.
Args:
update_ops: list of Assign and AssignAdd ops, typically computed using Keras' opt.get_updates()
Returns:
dict mapping from variable values to their updated value
"""
name_to_var = {v.name: v for v in tf.compat.v1.global_variables()}
updates = OrderedDict()
for update in update_ops:
var_name = update.op.inputs[0].name
var = name_to_var[var_name]
value = update.op.inputs[1]
if update.op.type == 'Assign':
updates[var.value()] = value
elif update.op.type == 'AssignAdd':
updates[var.value()] = var + value
else:
raise ValueError("Update op type (%s) must be of type Assign or AssignAdd"%update_op.op.type)
return updates
def sample_mog(batch_size, n_mixture=8, std=0.01, radius=1.0):
thetas = np.linspace(0, 2 * np.pi, n_mixture)
xs, ys = radius * np.sin(thetas), radius * np.cos(thetas)
cat = ds.Categorical(tf.zeros(n_mixture))
comps = [ds.MultivariateNormalDiag([xi, yi], [std, std]) for xi, yi in zip(xs.ravel(), ys.ravel())]
data = ds.Mixture(cat, comps)
return data.sample(batch_size)
def generator(z, output_dim=2, n_hidden=128, n_layer=2):
with tf.compat.v1.variable_scope("generator"):
h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
x = slim.fully_connected(h, output_dim, activation_fn=None)
return x
def discriminator(x, n_hidden=128, n_layer=2, reuse=False):
with tf.compat.v1.variable_scope("discriminator", reuse=reuse):
h = slim.stack(x, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
log_d = slim.fully_connected(h, 1, activation_fn=None)
return log_d
params = dict(
batch_size=512,
disc_learning_rate=1e-4,
gen_learning_rate=1e-3,
beta1=0.5,
epsilon=1e-8,
max_iter=25000,
viz_every=5000,
z_dim=256,
x_dim=2,
unrolling_steps=5,
)
tf.compat.v1.reset_default_graph()
data = sample_mog(params['batch_size'])
noise = ds.Normal(tf.zeros(params['z_dim']),
tf.ones(params['z_dim'])).sample(params['batch_size'])
# Construct generator and discriminator nets
# with slim.arg_scope([slim.fully_connected], weights_initializer=tf.orthogonal_initializer(gain=1.4)): ## old
with slim.arg_scope([slim.fully_connected], weights_initializer=tf.keras.initializers.Orthogonal(gain=1.4)):
samples = generator(noise, output_dim=params['x_dim'])
real_score = discriminator(data)
fake_score = discriminator(samples, reuse=True)
# Saddle objective
loss = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(logits=tf.cast(real_score, dtype=tf.float32), labels=tf.cast(tf.ones_like(real_score), dtype=tf.float32)) +
tf.nn.sigmoid_cross_entropy_with_logits(logits=tf.cast(fake_score, dtype=tf.float32), labels=tf.cast(tf.zeros_like(fake_score), dtype=tf.float32)))
gen_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "generator")
disc_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "discriminator")
# Vanilla discriminator update
d_opt = Adam(lr=params['disc_learning_rate'], beta_1=params['beta1'], epsilon=params['epsilon'])
# updates = d_opt.get_updates(disc_vars, [], loss) ## old
updates = d_opt.get_updates(loss, [])
d_train_op = tf.group(*updates, name="d_train_op")
### I HAVE NOT UPDATED BEYOND THIS POINT ###
# Unroll optimization of the discrimiantor
if params['unrolling_steps'] > 0:
# Get dictionary mapping from variables to their update value after one optimization step
update_dict = extract_update_dict(updates)
cur_update_dict = update_dict
for i in xrange(params['unrolling_steps'] - 1):
# Compute variable updates given the previous iteration's updated variable
cur_update_dict = graph_replace(update_dict, cur_update_dict)
# Final unrolled loss uses the parameters at the last time step
unrolled_loss = graph_replace(loss, cur_update_dict)
else:
unrolled_loss = loss
# Optimize the generator on the unrolled loss
g_train_opt = tf.train.AdamOptimizer(params['gen_learning_rate'], beta1=params['beta1'], epsilon=params['epsilon'])
g_train_op = g_train_opt.minimize(-unrolled_loss, var_list=gen_vars)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
The implementation of get_collection:
def get_collection(key, scope=None):
"""Wrapper for `Graph.get_collection()` using the default graph.
See `tf.Graph.get_collection`
for more details.
Args:
key: The key for the collection. For example, the `GraphKeys` class contains
many standard names for collections.
scope: (Optional.) If supplied, the resulting list is filtered to include
only items whose `name` attribute matches using `re.match`. Items without
a `name` attribute are never returned if a scope is supplied and the
choice or `re.match` means that a `scope` without special tokens filters
by prefix.
Returns:
The list of values in the collection with the given `name`, or
an empty list if no value has been added to that collection. The
list contains the values in the order under which they were
collected.
#compatibility(eager)
Collections are not supported when eager execution is enabled.
#end_compatibility
"""
return get_default_graph().get_collection(key, scope)
It looks like in this code, key and scope arguments are swapped. If you provide "generator" or "discriminator" as the key with no scope i.e;
gen_vars = tf.compat.v1.get_collection("generator")
disc_vars = tf.compat.v1.get_collection("discriminator")
You should get results (I was able to reproduce locally with Tensorflow 2.2.0). The only issue I could not quite identify is, when providing scope, the function returns an empty list again, regardless of the scope value you provide. For example, tf.compat.v1.GLOBAL_VARIABLES should return everything, but that is not the case:
gen_vars = tf.compat.v1.get_default_graph().get_collection('generator', tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) # returns []
gen_vars = tf.compat.v1.get_default_graph().get_collection('generator', tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) # returns []
disc_vars = tf.compat.v1.get_collection('generator') # returns a list of tensors
Update
It looks like even creating the variables in the context manager doesn't add them to the graph collection. I had to call tf.compat.v1.add_to_collection('generator', x) and tf.compat.v1.add_to_collection('discriminator', log_d) in the respective functions to get those results.
Update #2
I searched around and it doesn't appear there's a context manager which enables you to add variables declared within it to a Tensorflow collection. For the sake of completeness of this answer though, I have implemented one:
from contextlib import contextmanager
#contextmanager
def collection_scope(collection_name):
import inspect
from tensorflow.python.framework.ops import EagerTensor
collection = tf.compat.v1.get_collection_ref(collection_name)
yield
# this is a bit of a hack, but it works...
f = inspect.currentframe().f_back.f_back
# only take variables which were declared within the context manager
tf_variables = set([val.ref() for val in f.f_locals.values() if isinstance(val, EagerTensor)]) - \
set([val.ref() for val in f.f_back.f_locals.values() if isinstance(val, EagerTensor)])
collection.extend(tf_variables)
You can then drop this in your functions in place of the variable scope (tf.compat.v1.variable_scope) context managers. For example, instead of:
def generator(z, output_dim=2, n_hidden=128, n_layer=2):
with tf.compat.v1.variable_scope('generator'):
h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
x = slim.fully_connected(h, output_dim, activation_fn=None)
return x
Do the following:
def generator(z, output_dim=2, n_hidden=128, n_layer=2):
with collection_scope('generator'):
h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
x = slim.fully_connected(h, output_dim, activation_fn=None)
return x
With this change, all tensors declared within the scope of the context manager will be added to the collection "generator" - tf.compat.v1.get_collection('generator') will return the correct list of tensors.

Tensorflow ExponentialMovingAverage with PartitionedVariable

I'm trying to use tf.train.ExponentialMovingAverage with a PartitionedVariable.
I use a custom_getter to create an EMA version of the graph.
If I don't use a partitioner to create my variable the following code works as expected : after setting the variable to zero, with a decay of 1, the EMA version of this variable keeps the original value.
However, if I use a partitioner, I have the following issues
tf1.12 the ema_getter is unable to find the average of the PartitionedVariable hence the two variables are the same object
tf.1.15 I get an AttributeError: 'PartitionedVariable' object has no attribute 'experimental_ref'
Here is my code
import tensorflow as tf
import numpy as np
def ema_getter(ema):
def _ema_getter(getter, name, *args, **kwargs):
var = getter(name, *args, **kwargs)
ema_var = ema.average(var)
if not ema_var:
tf.logging.warning(f"Unable to find EMA of {name}")
return ema_var if ema_var else var
return _ema_getter
if __name__ == "__main__":
use_partitioner = True
var = tf.get_variable(
name='var',
shape=[10, 2],
initializer=tf.ones_initializer(),
partitioner=tf.fixed_size_partitioner(2, axis=0) if use_partitioner else None
)
var_sum = tf.reduce_sum(var)
ema = tf.train.ExponentialMovingAverage(1.0)
variables = tf.get_collection(
tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)
ema_op = ema.apply(variables)
with tf.variable_scope(tf.get_variable_scope(), reuse=True, custom_getter=ema_getter(ema)):
var_ema = tf.get_variable(
name='var',
shape=[10, 2],
partitioner=tf.fixed_size_partitioner(2, axis=0) if use_partitioner else None
)
print(f"EMA variable name: {var_ema.name}")
var_ema_sum = tf.reduce_sum(var_ema)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(ema_op)
print(sess.run(var_sum)) # 20.0
print(sess.run(var_ema_sum)) # 20.0
sess.run(tf.assign(var, tf.zeros_like(var)))
sess.run(ema_op)
print(sess.run(var_sum)) # 0.0
print(sess.run(var_ema_sum)) # should be 20.0
My understanding so far is that the PartitionedVariable does not act as a standard Variable but is merely a shell for a list of other Variable.
The custom_getter needs to take this into account and manually retrieve and reconstruct a ParitionedVariable using the ema versions of the original PartitionedVariable variables.
However, this seems hacky -- using a .__class__ because I could not find a way to import PartitionedVariable in a clean way, or accessing the private attribute _partitions for ex.
Sharing my current fix here
import tensorflow as tf
import numpy as np
def ema_getter(ema):
def _ema_getter(getter, name, *args, **kwargs):
var = getter(name, *args, **kwargs)
# Manually reconstruct if PartitionedVariable
if var.__class__.__name__ == "PartitionedVariable":
ema_vs = [ema.average(v) for v in var]
ema_var = var.__class__(
name=var.name,
shape=var.shape,
dtype=var.dtype,
variable_list=ema_vs,
partitions=var._partitions,
)
else:
ema_var = ema.average(var)
if not ema_var:
tf.logging.warning(f"Unable to find EMA of {name}")
return ema_var if ema_var else var
return _ema_getter
if __name__ == "__main__":
use_partitioner = True
var = tf.get_variable(
name='var',
shape=[10, 2],
initializer=tf.ones_initializer(),
partitioner=tf.fixed_size_partitioner(2, axis=0) if use_partitioner else None
)
var_sum = tf.reduce_sum(var)
ema = tf.train.ExponentialMovingAverage(1.0)
variables = tf.get_collection(
tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)
ema_op = ema.apply(variables)
with tf.variable_scope(tf.get_variable_scope(), reuse=True, custom_getter=ema_getter(ema)):
var_ema = tf.get_variable(
name='var',
shape=[10, 2],
partitioner=tf.fixed_size_partitioner(2, axis=0) if use_partitioner else None
)
print(f"EMA variable name: {var_ema.name}")
var_ema_sum = tf.reduce_sum(var_ema)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(ema_op)
print(sess.run(var_sum))
print(sess.run(var_ema_sum))
sess.run(tf.assign(var, tf.zeros_like(var)))
sess.run(ema_op)
print(sess.run(var_sum))
print(sess.run(var_ema_sum))

Getting access to class's variables inside another class's def

I'm trying override a str method in Person() class:
'''class Person(object):
def __init__(self, Nose = None, Neck = None, RShoulder = None, RElbow = None, RWrist = None, LShoulder = None, LElbow = None, LWrist = None, MidHip = None, RHip = None, RKnee = None, RAnkle = None, LHip = None, LKnee = None, LAnkle = None, REye = None, LEye = None, REar = None, LEar = None, LBigToe = None, LSmallToe = None, LHeel = None, RBigToe = None, RSmallToe = None, RHeel = None):
self.Nose = Nose
self.Neck = Neck
self.RShoulder = RShoulder
self.RElbow = RElbow
self.RWrist = RWrist
self.LShoulder = LShoulder
self.LElbow = LElbow
self.LWrist = LWrist
self.MidHip = MidHip
self.RHip = RHip
self.RKnee = RKnee
self.RAnkle = RAnkle
self.LHip = LHip
self.LKnee = LKnee
self.LAnkle = LAnkle
self.REye = REye
self.LEye = LEye
self.REar = REar
self.LEar = LEar
self.LBigToe = LBigToe
self.LSmallToe = LSmallToe
self.LHeel = LHeel
self.RBigToe = RBigToe
self.RSmallToe = RSmallToe
self.RHeel = RHeel
def __str__(self):
return 'Nose = %s\nNeck = \n%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s'%(self.Nose,self.Neck,self.RShoulder,self.RElbow,self.RWrist,self.LShoulder,self.LElbow,self.LWrist,self.MidHip,self.RHip,self.RKnee,self.RAnkle,self.LHip,self.LKnee,self.LAnkle,self.REye,self.LEye,self.REar,self.LEar,self.LBigToe,self.LSmallToe,self.LHeel,self.RBigToe,self.RSmallToe,self.RHeel)'''
And I want to find more elegant way to return a string which will look like that:
Nose = something
Neck = something
...
...
...
Question: elegant way to return a string which will look like ...
You can use the built-in vars function to get the __dict__ of the class variable and format it using .format(... and .join(....
Reference:
vars([object])
Return the __dict__ attribute for a module, class, instance, or any other object with a __dict__ attribute.
.format(value[, format_spec])
Convert a value to a “formatted” representation, as controlled by a standard formatting syntax that is used by most built-in types: Format Specification Mini-Language.
<str>.join(iterable)
Return a string which is the concatenation of the strings in iterable.
class Person:
def __init__(self, **kwargs):
self.Nose = kwargs.get('Nose', None)
self.Neck = kwargs.get('Neck', None)
self.RShoulder = kwargs.get('RShoulder', None)
def __str__(self):
return '\n'.join(('{} = {}'
.format(k, v) for k, v in vars(self).items()))
p = Person(Nose=1, Neck=1)
print(p)
Output:
Nose = 1
Neck = 1
RShoulder = None
Tested with Python: 3.6

Error when referencing a class within its __init__ method

I am putting the (NMT Tensorflow code) inside a main class. The code base has two classes - 'Encoder' and 'Decoder'. They are referenced in their respective 'init' methods. However it raises an error - 'Undefined named Encoder'.
class TranslationModel(ModelBase):
pathToZip = tf.keras.utils.get_file('spa-eng.zip', origin='http://download.tensorflow.org/data/spa-eng.zip', extract=True)
pathToFile = os.path.dirname(pathToZip)+"/spa-eng/spa.txt"
def __init__(self,
batchSize = 64,
bufferSize = None,
numberOfBatches = None,
units = 1024,
vocabInputSize = None,
vocabTargetSize = None,
optimizer = tf.train.AdamOptimizer(),
dataSetPath = None,
inputTensor = None,
targetTensor = None,
inputLanguage = None,
targetLanguage = None,
maxLengthInput = None,
maxLengthTarget = None,
embeddingDimension = 256, *arg, **kwargs):
self.batchSize = 64
self.bufferSize = None
self.numberOfBatches = None
self.units = units
self.vocabInputSize = None
self.vocabTargetSize = None
self.optimizer = optimizer
self.dataSetPath = dataSetPath
self.targetTensor = targetTensor
self.inputTensor = inputTensor
self.inputLanguage = inputLanguage
self.targetLanguage = targetLanguage
self.maxLengthInput = maxLengthInput
self.maxLengthTarget = maxLengthTarget
self.embeddingDimension = embeddingDimension
super().__init__(*arg, **kwargs)
#OTHER FUNCTIONS HERE
class Encoder(tf.keras.Model):
def __init__(self, vocabSize, embeddingDimension, encoderUnits, batchSize):
super(Encoder, self).__init__() # Raises error - 'Undefined named Encoder'
#Other code here
class Decoder(tf.keras.Model):
def __init__(self, vocabSize, embeddingDimension, dec_units, batchSize):
super('Decoder', self).__init__() # Raises error - 'Undefined named Decoder'
## Other code
It's because when you have a class inside of another and you want to identify it, you should do it this way : OutterClass.InnerClass
it won't work if you just use : InnerClass
for your case it's TranslationModel.Encoder

Categories

Resources