Tensorflow ExponentialMovingAverage with PartitionedVariable - python

I'm trying to use tf.train.ExponentialMovingAverage with a PartitionedVariable.
I use a custom_getter to create an EMA version of the graph.
If I don't use a partitioner to create my variable the following code works as expected : after setting the variable to zero, with a decay of 1, the EMA version of this variable keeps the original value.
However, if I use a partitioner, I have the following issues
tf1.12 the ema_getter is unable to find the average of the PartitionedVariable hence the two variables are the same object
tf.1.15 I get an AttributeError: 'PartitionedVariable' object has no attribute 'experimental_ref'
Here is my code
import tensorflow as tf
import numpy as np
def ema_getter(ema):
def _ema_getter(getter, name, *args, **kwargs):
var = getter(name, *args, **kwargs)
ema_var = ema.average(var)
if not ema_var:
tf.logging.warning(f"Unable to find EMA of {name}")
return ema_var if ema_var else var
return _ema_getter
if __name__ == "__main__":
use_partitioner = True
var = tf.get_variable(
name='var',
shape=[10, 2],
initializer=tf.ones_initializer(),
partitioner=tf.fixed_size_partitioner(2, axis=0) if use_partitioner else None
)
var_sum = tf.reduce_sum(var)
ema = tf.train.ExponentialMovingAverage(1.0)
variables = tf.get_collection(
tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)
ema_op = ema.apply(variables)
with tf.variable_scope(tf.get_variable_scope(), reuse=True, custom_getter=ema_getter(ema)):
var_ema = tf.get_variable(
name='var',
shape=[10, 2],
partitioner=tf.fixed_size_partitioner(2, axis=0) if use_partitioner else None
)
print(f"EMA variable name: {var_ema.name}")
var_ema_sum = tf.reduce_sum(var_ema)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(ema_op)
print(sess.run(var_sum)) # 20.0
print(sess.run(var_ema_sum)) # 20.0
sess.run(tf.assign(var, tf.zeros_like(var)))
sess.run(ema_op)
print(sess.run(var_sum)) # 0.0
print(sess.run(var_ema_sum)) # should be 20.0

My understanding so far is that the PartitionedVariable does not act as a standard Variable but is merely a shell for a list of other Variable.
The custom_getter needs to take this into account and manually retrieve and reconstruct a ParitionedVariable using the ema versions of the original PartitionedVariable variables.
However, this seems hacky -- using a .__class__ because I could not find a way to import PartitionedVariable in a clean way, or accessing the private attribute _partitions for ex.
Sharing my current fix here
import tensorflow as tf
import numpy as np
def ema_getter(ema):
def _ema_getter(getter, name, *args, **kwargs):
var = getter(name, *args, **kwargs)
# Manually reconstruct if PartitionedVariable
if var.__class__.__name__ == "PartitionedVariable":
ema_vs = [ema.average(v) for v in var]
ema_var = var.__class__(
name=var.name,
shape=var.shape,
dtype=var.dtype,
variable_list=ema_vs,
partitions=var._partitions,
)
else:
ema_var = ema.average(var)
if not ema_var:
tf.logging.warning(f"Unable to find EMA of {name}")
return ema_var if ema_var else var
return _ema_getter
if __name__ == "__main__":
use_partitioner = True
var = tf.get_variable(
name='var',
shape=[10, 2],
initializer=tf.ones_initializer(),
partitioner=tf.fixed_size_partitioner(2, axis=0) if use_partitioner else None
)
var_sum = tf.reduce_sum(var)
ema = tf.train.ExponentialMovingAverage(1.0)
variables = tf.get_collection(
tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)
ema_op = ema.apply(variables)
with tf.variable_scope(tf.get_variable_scope(), reuse=True, custom_getter=ema_getter(ema)):
var_ema = tf.get_variable(
name='var',
shape=[10, 2],
partitioner=tf.fixed_size_partitioner(2, axis=0) if use_partitioner else None
)
print(f"EMA variable name: {var_ema.name}")
var_ema_sum = tf.reduce_sum(var_ema)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(ema_op)
print(sess.run(var_sum))
print(sess.run(var_ema_sum))
sess.run(tf.assign(var, tf.zeros_like(var)))
sess.run(ema_op)
print(sess.run(var_sum))
print(sess.run(var_ema_sum))

Related

How to get the symbolic gradient in Tensorflow 2.x

I want to convert https://web.casadi.org/blog/tensorflow/ , which was written in Tensorflow 1 with casadi, using Tensorflow 2. I have changed the code yet tf.disable_v2_behavior() had to be done to get it working.
import casadi as ca
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
class TensorFlowEvaluator(ca.Callback):
def __init__(self,t_in,t_out,session, opts={}):
"""
t_in: list of inputs (tensorflow placeholders)
t_out: list of outputs (tensors dependent on those placeholders)
session: a tensorflow session
"""
ca.Callback.__init__(self)
assert isinstance(t_in,list)
self.t_in = t_in
assert isinstance(t_out,list)
self.t_out = t_out
self.construct("TensorFlowEvaluator", opts)
self.session = session
self.refs = []
def get_n_in(self): return len(self.t_in)
def get_n_out(self): return len(self.t_out)
def get_sparsity_in(self,i):
return ca.Sparsity.dense(*self.t_in[i].get_shape().as_list())
def get_sparsity_out(self,i):
return ca.Sparsity.dense(*self.t_out[i].get_shape().as_list())
def eval(self,arg):
# Associate each tensorflow input with the numerical argument passed by CasADi
d = dict((v,arg[i].toarray()) for i,v in enumerate(self.t_in))
# Evaluate the tensorflow expressions
ret = self.session.run(self.t_out,feed_dict=d)
return ret
# Vanilla tensorflow offers just the reverse mode AD
def has_reverse(self,nadj): return nadj==1
def get_reverse(self,nadj,name,inames,onames,opts):
# Construct tensorflow placeholders for the reverse seeds
adj_seed = [tf.placeholder(shape=self.sparsity_out(i).shape,dtype=tf.float64) for i in range(self.n_out())]
# Construct the reverse tensorflow graph through 'gradients'
grad = tf.gradients(self.t_out, self.t_in,grad_ys=adj_seed)
# Create another TensorFlowEvaluator object
callback = TensorFlowEvaluator(self.t_in+adj_seed,grad,self.session)
# Make sure you keep a reference to it
self.refs.append(callback)
# Package it in the nominal_in+nominal_out+adj_seed form that CasADi expects
nominal_in = self.mx_in()
nominal_out = self.mx_out()
adj_seed = self.mx_out()
return ca.Function(name,nominal_in+nominal_out+adj_seed,callback.call(nominal_in+adj_seed),inames,onames)
if __name__=="__main__":
a = tf.placeholder(shape=(2,2),dtype=tf.float64)
b = tf.placeholder(shape=(2,1),dtype=tf.float64)
y = tf.matmul(tf.sin(a), b)
with tf.Session() as session:
f_tf = TensorFlowEvaluator([a,b], [y], session)
a = ca.MX.sym("a",2,2)
b = ca.MX.sym("a",2,1)
y = f_tf(a,b)
yref = ca.mtimes(ca.sin(a),b)
f = ca.Function('f',[a,b],[ca.jacobian(y,a)])
fref = ca.Function('f',[a,b],[ca.jacobian(yref,a)])
print(f(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
print(fref(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
Now I want to write this purely using Tensorflow 2.x. Eager execution is enabled by default I was thinking to use #tf.function to calculate the gradient,
#tf.function
def f_k(input_dat):
y = tf.matmul(tf.sin(input_dat[0]), input_dat[1])
grads = tf.gradients([y], input_dat)
# grads = tape.gradient([y], input_dat)
tf.print('tf >>', grads)
print('print >>', grads)
return y, grads
Here is the update the code at the moment,
import casadi as ca
import tensorflow as tf
from casadi import Sparsity
class TensorFlowEvaluator(ca.Callback):
def __init__(self, t_in, t_out, model, opts={}):
"""
t_in: list of inputs (tensorflow placeholders)
t_out: list of outputs (tensors dependent on those placeholders)
"""
ca.Callback.__init__(self)
assert isinstance(t_in,list)
self.t_in = t_in
assert isinstance(t_out,list)
self.t_out = t_out
self.construct("TensorFlowEvaluator", opts)
self.refs = []
self.model = model
def get_n_in(self): return len(self.t_in)
def get_n_out(self): return len(self.t_out)
def get_sparsity_in(self, i):
tesnor_shape = self.t_in[i].get_shape().as_list()
return Sparsity.dense(tesnor_shape[0], tesnor_shape[1])
# return Sparsity.dense(4, 1)
def get_sparsity_out(self, i):
return Sparsity.dense(2, 1)
def eval(self, arg):
# Associate each tensorflow input with the numerical argument passed by CasADi
print(arg)
# d = dict((v, arg[i].toarray()) for i,v in enumerate(self.t_in))
updated_t = []
for i,v in enumerate(self.t_in):
updated_t.append(tf.Variable(arg[i].toarray()))
# Evaluate the tensorflow expressions
if not tf.is_tensor(self.t_out[0]):
ret = self.t_out[0](updated_t)[0].numpy()
else:
ret = self.t_out[0](updated_t).numpy()
return [ca.DM(ret)]
# Vanilla tensorflow offers just the reverse mode AD
def has_reverse(self,nadj): return nadj==1
def get_reverse(self, nadj, name, inames, onames, opts):
initializer = tf.random_normal_initializer(mean=1., stddev=2.)
adj_seed = [ tf.Variable(initializer(shape=self.sparsity_out(i).shape, dtype=tf.float64)) for i in range(self.n_out())]
tf.config.run_functions_eagerly(False)
print("=============== self.t_in========", self.t_out)
print("=============== self.t_out========", self.t_in)
# grad = tape.gradient(mean, self.t_in, output_gradients=adj_seed)
out_, grad = self.t_out[0](self.t_in)
print("============== grad========", grad)
# Create another TensorFlowEvaluator object
callback = TensorFlowEvaluator(self.t_in + adj_seed, grad, self.model)
# Make sure you keep a reference to it
self.refs.append(callback)
# Package it in the nominal_in+nominal_out+adj_seed form that CasADi expects
nominal_in = self.mx_in()
nominal_out = self.mx_out()
adj_seed = self.mx_out()
return ca.Function(name, nominal_in+nominal_out+adj_seed, callback.call(nominal_in + adj_seed), inames, onames)
if __name__=="__main__":
initializer = tf.random_normal_initializer(mean=1., stddev=2.)
a = tf.Variable(initializer(shape=(2,2), dtype=tf.float64))
b = tf.Variable(initializer(shape=(2,1), dtype=tf.float64))
#tf.function
def f_k(input_dat):
y = tf.matmul(tf.sin(input_dat[0]), input_dat[1])
grads = tf.gradients([y], input_dat)
# grads = tape.gradient([y], input_dat)
tf.print('tf >>', grads)
print('print >>', grads)
return y, grads
f_tf = TensorFlowEvaluator([a,b], [f_k], None)
a = ca.MX.sym("a",2,2)
b = ca.MX.sym("a",2,1)
y = f_tf(a,b)
yref = ca.mtimes(ca.sin(a),b)
f = ca.Function('f',[a,b],[ca.jacobian(y,a)])
fref = ca.Function('f',[a,b],[ca.jacobian(yref,a)])
print(fref(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
print(f(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
Problem:
In the get_reverse method, when calculating the gradient, i.e., grad = tf.gradients(self.t_out, self.t_in,grad_ys=adj_seed), I get symbolic form, i.e., [<tf.Tensor 'gradients/Sin_grad/mul:0' shape=(2, 2) dtype=float32>, <tf.Tensor 'gradients/MatMul_grad/MatMul_1:0' shape=(2, 1) dtype=float32>] in Tensorflow 1.
However, in Tensorflow 2, I always get numerical results. I can access the graph but those are not callable. self.t_out[0].get_concrete_function(self.t_in).graph similar to here
What would be the better way to get the symbolic gradient like in Tensorflow 1?
Expected Behaviour:
out_, grad = self.t_out[0](self.t_in)
grad should return symbolic form of the gradient rather than numerical evaluation

Updating Unrolled GAN to TF2

I am trying to implement the Unrolled GAN model as described here, with example code. However, it was implemented using TF1, and I have been doing my best to update it but I am relatively new to python and TF (only been using it for the past ~6 months).
The line(s) that I cannot seem to make work (for the moment, there may be more) is this one:
gen_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "generator")
disc_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "discriminator")
These both return empty lists, and I cannot see what I am missing. Even without specifying a scope, the get_collection() returns []. Earlier, we define both generator and discriminator as scopes like so:
def generator(z, output_dim=2, n_hidden=128, n_layer=2):
with tf.compat.v1.variable_scope("generator"):
h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
x = slim.fully_connected(h, output_dim, activation_fn=None)
return x
def discriminator(x, n_hidden=128, n_layer=2, reuse=False):
with tf.compat.v1.variable_scope("discriminator", reuse=reuse):
h = slim.stack(x, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
log_d = slim.fully_connected(h, 1, activation_fn=None)
return log_d
Is there a problem with the definition of the scope?
Here is my updated code in full, in case there is maybe something I missed elsewhere:
%pylab inline
from collections import OrderedDict
import tensorflow as tf
import tensorflow_probability as tfp
ds = tfp.distributions
# slim = tf.contrib.slim
import tf_slim as slim
from keras.optimizers import Adam
try:
from moviepy.video.io.bindings import mplfig_to_npimage
import moviepy.editor as mpy
generate_movie = True
except:
print("Warning: moviepy not found.")
generate_movie = False
def remove_original_op_attributes(graph):
"""Remove _original_op attribute from all operations in a graph."""
for op in graph.get_operations():
op._original_op = None
def graph_replace(*args, **kwargs):
"""Monkey patch graph_replace so that it works with TF 1.0"""
remove_original_op_attributes(tf.get_default_graph())
return _graph_replace(*args, **kwargs)
def extract_update_dict(update_ops):
"""Extract variables and their new values from Assign and AssignAdd ops.
Args:
update_ops: list of Assign and AssignAdd ops, typically computed using Keras' opt.get_updates()
Returns:
dict mapping from variable values to their updated value
"""
name_to_var = {v.name: v for v in tf.compat.v1.global_variables()}
updates = OrderedDict()
for update in update_ops:
var_name = update.op.inputs[0].name
var = name_to_var[var_name]
value = update.op.inputs[1]
if update.op.type == 'Assign':
updates[var.value()] = value
elif update.op.type == 'AssignAdd':
updates[var.value()] = var + value
else:
raise ValueError("Update op type (%s) must be of type Assign or AssignAdd"%update_op.op.type)
return updates
def sample_mog(batch_size, n_mixture=8, std=0.01, radius=1.0):
thetas = np.linspace(0, 2 * np.pi, n_mixture)
xs, ys = radius * np.sin(thetas), radius * np.cos(thetas)
cat = ds.Categorical(tf.zeros(n_mixture))
comps = [ds.MultivariateNormalDiag([xi, yi], [std, std]) for xi, yi in zip(xs.ravel(), ys.ravel())]
data = ds.Mixture(cat, comps)
return data.sample(batch_size)
def generator(z, output_dim=2, n_hidden=128, n_layer=2):
with tf.compat.v1.variable_scope("generator"):
h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
x = slim.fully_connected(h, output_dim, activation_fn=None)
return x
def discriminator(x, n_hidden=128, n_layer=2, reuse=False):
with tf.compat.v1.variable_scope("discriminator", reuse=reuse):
h = slim.stack(x, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
log_d = slim.fully_connected(h, 1, activation_fn=None)
return log_d
params = dict(
batch_size=512,
disc_learning_rate=1e-4,
gen_learning_rate=1e-3,
beta1=0.5,
epsilon=1e-8,
max_iter=25000,
viz_every=5000,
z_dim=256,
x_dim=2,
unrolling_steps=5,
)
tf.compat.v1.reset_default_graph()
data = sample_mog(params['batch_size'])
noise = ds.Normal(tf.zeros(params['z_dim']),
tf.ones(params['z_dim'])).sample(params['batch_size'])
# Construct generator and discriminator nets
# with slim.arg_scope([slim.fully_connected], weights_initializer=tf.orthogonal_initializer(gain=1.4)): ## old
with slim.arg_scope([slim.fully_connected], weights_initializer=tf.keras.initializers.Orthogonal(gain=1.4)):
samples = generator(noise, output_dim=params['x_dim'])
real_score = discriminator(data)
fake_score = discriminator(samples, reuse=True)
# Saddle objective
loss = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(logits=tf.cast(real_score, dtype=tf.float32), labels=tf.cast(tf.ones_like(real_score), dtype=tf.float32)) +
tf.nn.sigmoid_cross_entropy_with_logits(logits=tf.cast(fake_score, dtype=tf.float32), labels=tf.cast(tf.zeros_like(fake_score), dtype=tf.float32)))
gen_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "generator")
disc_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "discriminator")
# Vanilla discriminator update
d_opt = Adam(lr=params['disc_learning_rate'], beta_1=params['beta1'], epsilon=params['epsilon'])
# updates = d_opt.get_updates(disc_vars, [], loss) ## old
updates = d_opt.get_updates(loss, [])
d_train_op = tf.group(*updates, name="d_train_op")
### I HAVE NOT UPDATED BEYOND THIS POINT ###
# Unroll optimization of the discrimiantor
if params['unrolling_steps'] > 0:
# Get dictionary mapping from variables to their update value after one optimization step
update_dict = extract_update_dict(updates)
cur_update_dict = update_dict
for i in xrange(params['unrolling_steps'] - 1):
# Compute variable updates given the previous iteration's updated variable
cur_update_dict = graph_replace(update_dict, cur_update_dict)
# Final unrolled loss uses the parameters at the last time step
unrolled_loss = graph_replace(loss, cur_update_dict)
else:
unrolled_loss = loss
# Optimize the generator on the unrolled loss
g_train_opt = tf.train.AdamOptimizer(params['gen_learning_rate'], beta1=params['beta1'], epsilon=params['epsilon'])
g_train_op = g_train_opt.minimize(-unrolled_loss, var_list=gen_vars)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
The implementation of get_collection:
def get_collection(key, scope=None):
"""Wrapper for `Graph.get_collection()` using the default graph.
See `tf.Graph.get_collection`
for more details.
Args:
key: The key for the collection. For example, the `GraphKeys` class contains
many standard names for collections.
scope: (Optional.) If supplied, the resulting list is filtered to include
only items whose `name` attribute matches using `re.match`. Items without
a `name` attribute are never returned if a scope is supplied and the
choice or `re.match` means that a `scope` without special tokens filters
by prefix.
Returns:
The list of values in the collection with the given `name`, or
an empty list if no value has been added to that collection. The
list contains the values in the order under which they were
collected.
#compatibility(eager)
Collections are not supported when eager execution is enabled.
#end_compatibility
"""
return get_default_graph().get_collection(key, scope)
It looks like in this code, key and scope arguments are swapped. If you provide "generator" or "discriminator" as the key with no scope i.e;
gen_vars = tf.compat.v1.get_collection("generator")
disc_vars = tf.compat.v1.get_collection("discriminator")
You should get results (I was able to reproduce locally with Tensorflow 2.2.0). The only issue I could not quite identify is, when providing scope, the function returns an empty list again, regardless of the scope value you provide. For example, tf.compat.v1.GLOBAL_VARIABLES should return everything, but that is not the case:
gen_vars = tf.compat.v1.get_default_graph().get_collection('generator', tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) # returns []
gen_vars = tf.compat.v1.get_default_graph().get_collection('generator', tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) # returns []
disc_vars = tf.compat.v1.get_collection('generator') # returns a list of tensors
Update
It looks like even creating the variables in the context manager doesn't add them to the graph collection. I had to call tf.compat.v1.add_to_collection('generator', x) and tf.compat.v1.add_to_collection('discriminator', log_d) in the respective functions to get those results.
Update #2
I searched around and it doesn't appear there's a context manager which enables you to add variables declared within it to a Tensorflow collection. For the sake of completeness of this answer though, I have implemented one:
from contextlib import contextmanager
#contextmanager
def collection_scope(collection_name):
import inspect
from tensorflow.python.framework.ops import EagerTensor
collection = tf.compat.v1.get_collection_ref(collection_name)
yield
# this is a bit of a hack, but it works...
f = inspect.currentframe().f_back.f_back
# only take variables which were declared within the context manager
tf_variables = set([val.ref() for val in f.f_locals.values() if isinstance(val, EagerTensor)]) - \
set([val.ref() for val in f.f_back.f_locals.values() if isinstance(val, EagerTensor)])
collection.extend(tf_variables)
You can then drop this in your functions in place of the variable scope (tf.compat.v1.variable_scope) context managers. For example, instead of:
def generator(z, output_dim=2, n_hidden=128, n_layer=2):
with tf.compat.v1.variable_scope('generator'):
h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
x = slim.fully_connected(h, output_dim, activation_fn=None)
return x
Do the following:
def generator(z, output_dim=2, n_hidden=128, n_layer=2):
with collection_scope('generator'):
h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
x = slim.fully_connected(h, output_dim, activation_fn=None)
return x
With this change, all tensors declared within the scope of the context manager will be added to the collection "generator" - tf.compat.v1.get_collection('generator') will return the correct list of tensors.

NameError: name 'train_gen' is not defined

I'm new to python and tensorflow. I'm now testing Improved WGAN code from https://github.com/igul222/improved_wgan_training
After adjusting the code to python 3.6, it still gives "NameError: name 'train_gen' is not defined" when I ran it, although there wasn't warning from pylint.
Can anyone help me with it?
The version of python I'm using is 3.6. There were many syntax differences from 2.7. I've already changed a lot to make it work. And I am running Tensorflow in a virtual environment. Still couldn't figure out this one.
import os, sys
sys.path.append(os.getcwd())
import time
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import sklearn.datasets
import tensorflow as tf
import tflib as lib
import tflib.ops.linear
import tflib.ops.conv2d
import tflib.ops.batchnorm
import tflib.ops.deconv2d
import tflib.save_images
import tflib.mnist
import tflib.plot
MODE = 'wgan-gp' # dcgan, wgan, or wgan-gp
DIM = 64 # Model dimensionality
BATCH_SIZE = 50 # Batch size
CRITIC_ITERS = 5 # For WGAN and WGAN-GP, number of critic iters per gen iter
LAMBDA = 10 # Gradient penalty lambda hyperparameter
ITERS = 200000 # How many generator iterations to train for
OUTPUT_DIM = 784 # Number of pixels in MNIST (28*28)
lib.print_model_settings(locals().copy())
def LeakyReLU(x, alpha=0.2):
return tf.maximum(alpha*x, x)
def ReLULayer(name, n_in, n_out, inputs):
output = lib.ops.linear.Linear(
name+'.Linear',
n_in,
n_out,
inputs,
initialization='he'
)
return tf.nn.relu(output)
def LeakyReLULayer(name, n_in, n_out, inputs):
output = lib.ops.linear.Linear(
name+'.Linear',
n_in,
n_out,
inputs,
initialization='he'
)
return LeakyReLU(output)
def Generator(n_samples, noise=None):
if noise is None:
noise = tf.random_normal([n_samples, 128])
output = lib.ops.linear.Linear('Generator.Input', 128, 4*4*4*DIM, noise)
if MODE == 'wgan':
output = lib.ops.batchnorm.Batchnorm('Generator.BN1', [0], output)
output = tf.nn.relu(output)
output = tf.reshape(output, [-1, 4*DIM, 4, 4])
output = lib.ops.deconv2d.Deconv2D('Generator.2', 4*DIM, 2*DIM, 5, output)
if MODE == 'wgan':
output = lib.ops.batchnorm.Batchnorm('Generator.BN2', [0,2,3], output)
output = tf.nn.relu(output)
output = output[:,:,:7,:7]
output = lib.ops.deconv2d.Deconv2D('Generator.3', 2*DIM, DIM, 5, output)
if MODE == 'wgan':
output = lib.ops.batchnorm.Batchnorm('Generator.BN3', [0,2,3], output)
output = tf.nn.relu(output)
output = lib.ops.deconv2d.Deconv2D('Generator.5', DIM, 1, 5, output)
output = tf.nn.sigmoid(output)
return tf.reshape(output, [-1, OUTPUT_DIM])
def Discriminator(inputs):
output = tf.reshape(inputs, [-1, 1, 28, 28])
output = lib.ops.conv2d.Conv2D('Discriminator.1',1,DIM,5,output,stride=2)
output = LeakyReLU(output)
output = lib.ops.conv2d.Conv2D('Discriminator.2', DIM, 2*DIM, 5, output, stride=2)
if MODE == 'wgan':
output = lib.ops.batchnorm.Batchnorm('Discriminator.BN2', [0,2,3], output)
output = LeakyReLU(output)
output = lib.ops.conv2d.Conv2D('Discriminator.3', 2*DIM, 4*DIM, 5, output, stride=2)
if MODE == 'wgan':
output = lib.ops.batchnorm.Batchnorm('Discriminator.BN3', [0,2,3], output)
output = LeakyReLU(output)
output = tf.reshape(output, [-1, 4*4*4*DIM])
output = lib.ops.linear.Linear('Discriminator.Output', 4*4*4*DIM, 1, output)
return tf.reshape(output, [-1])
real_data = tf.placeholder(tf.float32, shape=[BATCH_SIZE, OUTPUT_DIM])
fake_data = Generator(BATCH_SIZE)
disc_real = Discriminator(real_data)
disc_fake = Discriminator(fake_data)
gen_params = lib.params_with_name('Generator')
disc_params = lib.params_with_name('Discriminator')
if MODE == 'wgan':
gen_cost = -tf.reduce_mean(disc_fake)
disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real)
gen_train_op = tf.train.RMSPropOptimizer(
learning_rate=5e-5
).minimize(gen_cost, var_list=gen_params)
disc_train_op = tf.train.RMSPropOptimizer(
learning_rate=5e-5
).minimize(disc_cost, var_list=disc_params)
clip_ops = []
for var in lib.params_with_name('Discriminator'):
clip_bounds = [-.01, .01]
clip_ops.append(
tf.assign(
var,
tf.clip_by_value(var, clip_bounds[0], clip_bounds[1])
)
)
clip_disc_weights = tf.group(*clip_ops)
elif MODE == 'wgan-gp':
gen_cost = -tf.reduce_mean(disc_fake)
disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real)
alpha = tf.random_uniform(
shape=[BATCH_SIZE,1],
minval=0.,
maxval=1.
)
differences = fake_data - real_data
interpolates = real_data + (alpha*differences)
gradients = tf.gradients(Discriminator(interpolates), [interpolates])[0]
slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1]))
gradient_penalty = tf.reduce_mean((slopes-1.)**2)
disc_cost += LAMBDA*gradient_penalty
gen_train_op = tf.train.AdamOptimizer(
learning_rate=1e-4,
beta1=0.5,
beta2=0.9
).minimize(gen_cost, var_list=gen_params)
disc_train_op = tf.train.AdamOptimizer(
learning_rate=1e-4,
beta1=0.5,
beta2=0.9
).minimize(disc_cost, var_list=disc_params)
clip_disc_weights = None
elif MODE == 'dcgan':
gen_cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
disc_fake,
tf.ones_like(disc_fake)
))
disc_cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
disc_fake,
tf.zeros_like(disc_fake)
))
disc_cost += tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
disc_real,
tf.ones_like(disc_real)
))
disc_cost /= 2.
gen_train_op = tf.train.AdamOptimizer(
learning_rate=2e-4,
beta1=0.5
).minimize(gen_cost, var_list=gen_params)
disc_train_op = tf.train.AdamOptimizer(
learning_rate=2e-4,
beta1=0.5
).minimize(disc_cost, var_list=disc_params)
clip_disc_weights = None
# For saving samples
fixed_noise = tf.constant(np.random.normal(size=(128, 128)).astype('float32'))
fixed_noise_samples = Generator(128, noise=fixed_noise)
def generate_image(frame, true_dist):
samples = session.run(fixed_noise_samples)
lib.save_images.save_images(
samples.reshape((128, 28, 28)),
'samples_{}.png'.format(frame)
)
# Dataset iterator
train_gen, dev_gen, test_gen = lib.mnist.load(BATCH_SIZE, BATCH_SIZE)
def inf_train_gen():
while True:
for images, targets in train_gen():
yield images
# Train loop
with tf.Session() as session:
session.run(tf.initialize_all_variables())
gen = inf_train_gen()
for iteration in range(ITERS):
start_time = time.time()
if iteration > 0:
_ = session.run(gen_train_op)
if MODE == 'dcgan':
disc_iters = 1
else:
disc_iters = CRITIC_ITERS
for i in range(disc_iters):
_data = gen.__next__()
_disc_cost, _ = session.run(
[disc_cost, disc_train_op],
feed_dict={real_data: _data}
)
if clip_disc_weights is not None:
_ = session.run(clip_disc_weights)
lib.plot.plot('train disc cost', _disc_cost)
lib.plot.plot('time', time.time() - start_time)
# Calculate dev loss and generate samples every 100 iters
if iteration % 100 == 99:
dev_disc_costs = []
for images,_ in dev_gen():
_dev_disc_cost = session.run(
disc_cost,
feed_dict={real_data: images}
)
dev_disc_costs.append(_dev_disc_cost)
lib.plot.plot('dev disc cost', np.mean(dev_disc_costs))
generate_image(iteration, _data)
# Write logs every 100 iters
if (iteration < 5) or (iteration % 100 == 99):
lib.plot.flush()
lib.plot.tick()
This is the section containing the error name.
# Dataset iterator
train_gen, dev_gen, test_gen = lib.mnist.load(BATCH_SIZE, BATCH_SIZE)
def inf_train_gen():
while True:
for images, targets in train_gen():
yield images
And here is the error.
Traceback (most recent call last):
File "<stdin>", line 13, in <module>
File "<stdin>", line 3, in inf_train_gen
NameError: name 'train_gen' is not defined
Attempt 1:
I believe it's just because you are saying for images, targets in train_gen(): when you should be saying for images, targets in train_gen:
In a nutshell, the brackets suggest you are calling a function, which leads Python to raise the exception NameError: name 'train_gen' is not defined because there is no function with the name train_gen defined.
In the future, your code should be minimal, because you have pasted an enormous amount of code which makes it very hard to debug/see what you're doing.
Attempt 2:
Upon second review of the code (this is a good reason why you need to make your examples as small as possible), I have realised that is is possible you are maybe importing this code from elsewhere?
When you are making the first assignment to train_gen this is outside the function scope. It is possible then that when you go to call the function train_gen is no longer defined, which is why you get your error. This can occur for a number of reasons. After having reviewed the code a little there are various issues I can see (bad practice mostly).
It is generally not a good idea to use global variables within a function as you have in inf_train_gen, if a function needs an argument to run properly, then it should be passed as an argument. This is because if we have a problem with a variable (as we do now) we can normally see where this variable comes from and what uses it, but if all function rely on the globally scoped variable, any number of functions could delete it, change it, etc.
Right now I have no idea what has happened to the variable train_gen, I would suggest printing out the variable at different intervals and seeing if you can see which function call is causing issues and in the future stay away from globally scoped variables unless absolutely necessary, it makes it near-impossible to debug.

Understanding how a TensorFlow model as a class and a TensorFlow Session interact

I have been using TensorFlow for a reasonable length of time now. and believed I had a thorough understanding of how a TensorFlow graph works and executes within a session. However, I have written all of my TensorFlow models in a script-like fashion as such:
import tensorflow as tf
import DataWorker
import Constants
x = tf.placeholder(tf.float32, [None, Constants.sequenceLength, DataWorker.numFeatures])
y = tf.placeholder(tf.float32, [None, 1])
xTensors = tf.unstack(x, axis=1) # [seqLength tensors of shape (batchSize, numFeatures)]
W = tf.Variable(tf.random_normal([Constants.numHidden, 1])) # Weighted matrix
b = tf.Variable(tf.random_normal([1])) # Bias
cell = tf.contrib.rnn.BasicLSTMCell(Constants.numHidden, forget_bias=Constants.forgetBias)
outputs, finalState = tf.nn.static_rnn(cell, xTensors, dtype=tf.float32)
# predictions = [tf.add(tf.matmul(output, W), b) for output in outputs] # List of predictions after each time step
prediction = tf.add(tf.matmul(outputs[-1], W), b) # Prediction after final time step
prediction = tf.tanh(prediction) # Activation
mse = tf.losses.mean_squared_error(predictions=prediction, labels=y) # Mean loss over entire batch
accuracy = tf.reduce_mean(1 - (tf.abs(y - prediction) / DataWorker.labelRange)) # Accuracy over entire batch
optimiser = tf.train.AdamOptimizer(Constants.learningRate).minimize(mse) # Backpropagation
with tf.Session() as session:
session.run(tf.global_variables_initializer())
# #############################################
# TRAINING
# #############################################
for epoch in range(Constants.numEpochs):
print("***** EPOCH:", epoch + 1, "*****\n")
IDPointer, TSPointer = 0, 0 # Pointers to current ID and timestamp
epochComplete = False
batchNum = 0
while not epochComplete:
batchNum += 1
batchX, batchY, IDPointer, TSPointer, epochComplete = DataWorker.generateBatch(IDPointer, TSPointer, isTraining=True)
dict = {x: batchX, y: batchY}
session.run(optimiser, dict)
if batchNum % 1000 == 0 or epochComplete:
batchLoss = session.run(mse, dict)
batchAccuracy = session.run(accuracy, dict)
print("Iteration:", batchNum)
print(batchLoss)
print(str("%.2f" % (batchAccuracy * 100) + "%\n"))
# #############################################
# TESTING
# #############################################
testX, testY, _, _, _ = DataWorker.generateBatch(0, 0, isTraining=False)
testAccuracy = session.run(accuracy, {x: testX, y: testY})
print("Testing Accuracy:", str("%.2f" % (testAccuracy * 100) + "%"))
But now, for practicality and readability, I want to implement my model as a class, but have encountered many problems with initializing my variables, etc.
This is the closest I have got to implementing the above example using my own LSTM class
Model.py
import tensorflow as tf
import Constants
import DataWorker # Remove this dependency
class LSTM():
"""docstring."""
def __init__(self,
inputDimensionList,
outputDimensionList,
numLayers=Constants.numLayers,
numHidden=Constants.numHidden,
learningRate=Constants.learningRate,
forgetBias=Constants.forgetBias
):
"""docstring."""
self.batchInputs = tf.placeholder(tf.float32, [None] + inputDimensionList)
self.batchLabels = tf.placeholder(tf.float32, [None] + outputDimensionList)
self.weightedMatrix = tf.Variable(tf.random_normal([numHidden] + outputDimensionList))
self.biasMatrix = tf.Variable(tf.random_normal(outputDimensionList))
self.cell = tf.contrib.rnn.BasicLSTMCell(numHidden, forget_bias=forgetBias)
self.numLayers = numLayers
self.numHidden = numHidden
self.learningRate = learningRate
self.forgetBias = forgetBias
self.batchDict = {}
self.batchInputTensors = None
self.batchOutputs = None # All needed as instance variables?
self.batchFinalStates = None
self.batchPredictions = None
self.batchLoss = None
self.batchAccuracy = None
self.initialised = False
self.session = tf.Session()
# Take in activation, loss and optimiser FUNCTIONS as args
def execute(self, command):
"""docstring."""
return self.session.run(command, self.batchDict)
def setBatchDict(self, inputs, labels):
"""docstring."""
self.batchDict = {self.batchInputs: inputs, self.batchLabels: labels}
self.batchInputTensors = tf.unstack(self.batchInputs, axis=1)
def processBatch(self):
"""docstring."""
self.batchOutputs, self.batchFinalState = tf.nn.static_rnn(self.cell, self.batchInputTensors, dtype=tf.float32)
pred = tf.tanh(tf.add(tf.matmul(self.batchOutputs[-1], self.weightedMatrix), self.biasMatrix))
mse = tf.losses.mean_squared_error(predictions=pred, labels=self.batchLabels)
optimiser = tf.train.AdamOptimizer(self.learningRate).minimize(mse)
if not self.initialised:
self.session.run(tf.global_variables_initializer())
self.initialised = True
with tf.variable_scope("model") as scope:
if self.initialised:
scope.reuse_variables()
self.execute(optimiser)
self.batchPredictions = self.execute(pred)
self.batchLoss = self.execute(tf.losses.mean_squared_error(predictions=self.batchPredictions, labels=self.batchLabels))
self.batchAccuracy = self.execute(tf.reduce_mean(1 - (tf.abs(self.batchLabels - self.batchPredictions) / DataWorker.labelRange)))
return self.batchPredictions, self.batchLabels, self.batchLoss, self.batchAccuracy
def kill(self):
"""docstring."""
self.session.close()
This class is quite messy, especially processBatch() as I have just been trying to get it to work before refining it.
I then run my model here:
Main.py
import DataWorker
import Constants
from Model import LSTM
inputDim = [Constants.sequenceLength, DataWorker.numFeatures]
outputDim = [1]
lstm = LSTM(inputDimensionList=inputDim, outputDimensionList=outputDim)
# #############################################
# TRAINING
# #############################################
for epoch in range(Constants.numEpochs):
print("***** EPOCH:", epoch + 1, "*****\n")
IDPointer, TSPointer = 0, 0 # Pointers to current ID and timestamp
epochComplete = False
batchNum = 0
while not epochComplete:
batchNum += 1
batchX, batchY, IDPointer, TSPointer, epochComplete = DataWorker.generateBatch(IDPointer, TSPointer, isTraining=True)
lstm.setBatchDict(batchX, batchY)
batchPredictions, batchLabels, batchLoss, batchAccuracy = lstm.runBatch()
if batchNum % 1000 == 0 or epochComplete:
print("Iteration:", batchNum)
print("Pred:", batchPredictions[-1], "\tLabel:", batchLabels[-1])
print("Loss:", batchLoss)
print("Accuracy:", str("%.2f" % (batchAccuracy * 100) + "%\n"))
# #############################################
# TESTING
# #############################################
testX, testY, _, _, _ = DataWorker.generateBatch(0, 0, isTraining=False)
lstm.setBatchDict(testX, testY)
_, _, _, testAccuracy = lstm.runBatch()
print("Testing Accuracy:", str("%.2f" % (testAccuracy * 100) + "%"))
lstm.kill()
A single passthrough of the graph is executed fine, when all the variables are initialized, but it is on the second iteration where I get the error
ValueError: Variable rnn/basic_lstm_cell/kernel/Adam/ already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:
optimiser = tf.train.AdamOptimizer(self.learningRate).minimize(mse)
I Googled this problem and learned that using scope.reuse_variables() should stop it trying to initialize the AdamOptimizer a second time, but cleary this isn't working how I have implemented it. How can I fix this issue?
As a side note, is my method of creating the TensorFlow session as an instance variable within my LSTM class acceptable, or should I create the session in Main and then pass it into the LSTM instance?
In general I wrap anything that creates variables under the hood with tf.make_template when doing object oriented model building.
However, you should avoid adding ops to the graph in a training loop, which looks like it's happening here. They will build up and cause problems, and likely give you incorrect results. Instead, define the graph (with inputs from tf.data, placeholders, or queues) and only loop over a session.run call. Even better, structure your code as an Estimator and this will be enforced.

tensorflow embeddings don't exist after first RNN example

I've setup a print statement and I've noticed that for the first batch when feeding an RNN, the embeddings exist, but after the second batch they don't and I get the following error:
ValueError: Variable RNNLM/RNNLM/Embedding/Adam_2/ does not exist, or was not created with tf.get_variable(). Did you mean to set reuse=None in VarScope?
Here is my code for generating the embeddings:
def add_embedding(self):
with tf.device('/gpu:0'):
embedding = tf.get_variable("Embedding", [len(self.vocab), self.config.embed_size])
e_x = tf.nn.embedding_lookup(embedding, self.input_placeholder)
inputs = [tf.squeeze(s, [1]) for s in tf.split(1, self.config.num_steps, e_x)]
return inputs
Here is how the model is seutp, this is where I suspect the problem lies
def model(self, inputs):
with tf.variable_scope("input_drop"):
inputs_drop = [tf.nn.dropout(i, self.dropout_placeholder) for i in inputs]
with tf.variable_scope("RNN") as scope:
self.initial_state = tf.zeros([self.config.batch_size, self.config.hidden_size], tf.float32)
state = self.initial_state
states = []
for t, e in enumerate(inputs_drop):
print "t is {0}".format(t)
if t > 0:
scope.reuse_variables()
H = tf.get_variable("Hidden", [self.config.hidden_size, self.config.hidden_size])
I = tf.get_variable("I", [self.config.embed_size, self.config.hidden_size])
b_1 = tf.get_variable("b_1", (self.config.hidden_size,))
state = tf.sigmoid(tf.matmul(state, H) + tf.matmul(e, I) + b_1)
states.append(state)
with tf.variable_scope("output_dropout"):
rnn_outputs = [tf.nn.dropout(o, self.dropout_placeholder) for o in states]
return rnn_outputs
The issue arises when I get to the loss function, defined as follows
def add_training_op(self, loss):
opt = tf.train.AdamOptimizer(self.config.lr)
train_op = opt.minimize(loss)
return train_op
EDIT: Here is some updated code to help everyone out
def __init__(self, config):
self.config = config
self.load_data(debug=False)
self.add_placeholders()
self.inputs = self.add_embedding()
self.rnn_outputs = self.add_model(self.inputs)
self.outputs = self.add_projection(self.rnn_outputs)
self.predictions = [tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs]
output = tf.reshape(tf.concat(1, self.outputs), [-1, len(self.vocab)])
self.calculate_loss = self.add_loss_op(output)
self.train_step = self.add_training_op(self.calculate_loss)
Here are the other methods here, pertaining to add_projection and calculate_loss so we can rule them out.
def add_loss_op(self, output):
weights = tf.ones([self.config.batch_size * self.config.num_steps], tf.int32)
seq_loss = tf.python.seq2seq.sequence_loss(
[output],
tf.reshape(self.labels_placeholder, [-1]),
weights
)
tf.add_to_collection('total_loss', seq_loss)
loss = tf.add_n(tf.get_collection('total_loss'))
return loss
def add_projection(self, rnn_outputs):
with tf.variable_scope("Projection", initializer=tf.contrib.layers.xavier_initializer()) as scope:
U = tf.get_variable("U", [self.config.hidden_size, len(self.vocab)])
b_2 = tf.get_variable("b_2", [len(self.vocab)])
outputs = [tf.matmul(x, U) + b_2 for x in rnn_outputs]
return outputs
def train_RNNLM():
config = Config()
gen_config = deepcopy(config)
gen_config.batch_size = gen_config.num_steps = 1
with tf.variable_scope('RNNLM') as scope:
model = RNNLM_Model(config)
# This instructs gen_model to reuse the same variables as the model above
scope.reuse_variables()
gen_model = RNNLM_Model(gen_config)
init = tf.initialize_all_variables()
saver = tf.train.Saver()
with tf.Session() as session:
best_val_pp = float('inf')
best_val_epoch = 0
session.run(init)
for epoch in xrange(config.max_epochs):
print 'Epoch {}'.format(epoch)
start = time.time()
###
train_pp = model.run_epoch(
session, model.encoded_train,
train_op=model.train_step)
valid_pp = model.run_epoch(session, model.encoded_valid)
print 'Training perplexity: {}'.format(train_pp)
print 'Validation perplexity: {}'.format(valid_pp)
if valid_pp < best_val_pp:
best_val_pp = valid_pp
best_val_epoch = epoch
saver.save(session, './ptb_rnnlm.weights')
if epoch - best_val_epoch > config.early_stopping:
break
print 'Total time: {}'.format(time.time() - start)
Seems that the code is trying to create a new Adam Variable in each batch.
Possible that the add_training_op is called twice?
Also, the snippet of def add_training_op is incomplete since there is no return statement.
The problem turned out to be the following line of code:
model = RNNLM_Model(config)
# This instructs gen_model to reuse the same variables as the model above
scope.reuse_variables()
gen_model = RNNLM_Model(gen_config)
It turns out that the second model was an issue by using reuse_variables(). By removing this line by issues went away.

Categories

Resources