I am trying to implement the Unrolled GAN model as described here, with example code. However, it was implemented using TF1, and I have been doing my best to update it but I am relatively new to python and TF (only been using it for the past ~6 months).
The line(s) that I cannot seem to make work (for the moment, there may be more) is this one:
gen_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "generator")
disc_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "discriminator")
These both return empty lists, and I cannot see what I am missing. Even without specifying a scope, the get_collection() returns []. Earlier, we define both generator and discriminator as scopes like so:
def generator(z, output_dim=2, n_hidden=128, n_layer=2):
with tf.compat.v1.variable_scope("generator"):
h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
x = slim.fully_connected(h, output_dim, activation_fn=None)
return x
def discriminator(x, n_hidden=128, n_layer=2, reuse=False):
with tf.compat.v1.variable_scope("discriminator", reuse=reuse):
h = slim.stack(x, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
log_d = slim.fully_connected(h, 1, activation_fn=None)
return log_d
Is there a problem with the definition of the scope?
Here is my updated code in full, in case there is maybe something I missed elsewhere:
%pylab inline
from collections import OrderedDict
import tensorflow as tf
import tensorflow_probability as tfp
ds = tfp.distributions
# slim = tf.contrib.slim
import tf_slim as slim
from keras.optimizers import Adam
try:
from moviepy.video.io.bindings import mplfig_to_npimage
import moviepy.editor as mpy
generate_movie = True
except:
print("Warning: moviepy not found.")
generate_movie = False
def remove_original_op_attributes(graph):
"""Remove _original_op attribute from all operations in a graph."""
for op in graph.get_operations():
op._original_op = None
def graph_replace(*args, **kwargs):
"""Monkey patch graph_replace so that it works with TF 1.0"""
remove_original_op_attributes(tf.get_default_graph())
return _graph_replace(*args, **kwargs)
def extract_update_dict(update_ops):
"""Extract variables and their new values from Assign and AssignAdd ops.
Args:
update_ops: list of Assign and AssignAdd ops, typically computed using Keras' opt.get_updates()
Returns:
dict mapping from variable values to their updated value
"""
name_to_var = {v.name: v for v in tf.compat.v1.global_variables()}
updates = OrderedDict()
for update in update_ops:
var_name = update.op.inputs[0].name
var = name_to_var[var_name]
value = update.op.inputs[1]
if update.op.type == 'Assign':
updates[var.value()] = value
elif update.op.type == 'AssignAdd':
updates[var.value()] = var + value
else:
raise ValueError("Update op type (%s) must be of type Assign or AssignAdd"%update_op.op.type)
return updates
def sample_mog(batch_size, n_mixture=8, std=0.01, radius=1.0):
thetas = np.linspace(0, 2 * np.pi, n_mixture)
xs, ys = radius * np.sin(thetas), radius * np.cos(thetas)
cat = ds.Categorical(tf.zeros(n_mixture))
comps = [ds.MultivariateNormalDiag([xi, yi], [std, std]) for xi, yi in zip(xs.ravel(), ys.ravel())]
data = ds.Mixture(cat, comps)
return data.sample(batch_size)
def generator(z, output_dim=2, n_hidden=128, n_layer=2):
with tf.compat.v1.variable_scope("generator"):
h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
x = slim.fully_connected(h, output_dim, activation_fn=None)
return x
def discriminator(x, n_hidden=128, n_layer=2, reuse=False):
with tf.compat.v1.variable_scope("discriminator", reuse=reuse):
h = slim.stack(x, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
log_d = slim.fully_connected(h, 1, activation_fn=None)
return log_d
params = dict(
batch_size=512,
disc_learning_rate=1e-4,
gen_learning_rate=1e-3,
beta1=0.5,
epsilon=1e-8,
max_iter=25000,
viz_every=5000,
z_dim=256,
x_dim=2,
unrolling_steps=5,
)
tf.compat.v1.reset_default_graph()
data = sample_mog(params['batch_size'])
noise = ds.Normal(tf.zeros(params['z_dim']),
tf.ones(params['z_dim'])).sample(params['batch_size'])
# Construct generator and discriminator nets
# with slim.arg_scope([slim.fully_connected], weights_initializer=tf.orthogonal_initializer(gain=1.4)): ## old
with slim.arg_scope([slim.fully_connected], weights_initializer=tf.keras.initializers.Orthogonal(gain=1.4)):
samples = generator(noise, output_dim=params['x_dim'])
real_score = discriminator(data)
fake_score = discriminator(samples, reuse=True)
# Saddle objective
loss = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(logits=tf.cast(real_score, dtype=tf.float32), labels=tf.cast(tf.ones_like(real_score), dtype=tf.float32)) +
tf.nn.sigmoid_cross_entropy_with_logits(logits=tf.cast(fake_score, dtype=tf.float32), labels=tf.cast(tf.zeros_like(fake_score), dtype=tf.float32)))
gen_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "generator")
disc_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "discriminator")
# Vanilla discriminator update
d_opt = Adam(lr=params['disc_learning_rate'], beta_1=params['beta1'], epsilon=params['epsilon'])
# updates = d_opt.get_updates(disc_vars, [], loss) ## old
updates = d_opt.get_updates(loss, [])
d_train_op = tf.group(*updates, name="d_train_op")
### I HAVE NOT UPDATED BEYOND THIS POINT ###
# Unroll optimization of the discrimiantor
if params['unrolling_steps'] > 0:
# Get dictionary mapping from variables to their update value after one optimization step
update_dict = extract_update_dict(updates)
cur_update_dict = update_dict
for i in xrange(params['unrolling_steps'] - 1):
# Compute variable updates given the previous iteration's updated variable
cur_update_dict = graph_replace(update_dict, cur_update_dict)
# Final unrolled loss uses the parameters at the last time step
unrolled_loss = graph_replace(loss, cur_update_dict)
else:
unrolled_loss = loss
# Optimize the generator on the unrolled loss
g_train_opt = tf.train.AdamOptimizer(params['gen_learning_rate'], beta1=params['beta1'], epsilon=params['epsilon'])
g_train_op = g_train_opt.minimize(-unrolled_loss, var_list=gen_vars)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
The implementation of get_collection:
def get_collection(key, scope=None):
"""Wrapper for `Graph.get_collection()` using the default graph.
See `tf.Graph.get_collection`
for more details.
Args:
key: The key for the collection. For example, the `GraphKeys` class contains
many standard names for collections.
scope: (Optional.) If supplied, the resulting list is filtered to include
only items whose `name` attribute matches using `re.match`. Items without
a `name` attribute are never returned if a scope is supplied and the
choice or `re.match` means that a `scope` without special tokens filters
by prefix.
Returns:
The list of values in the collection with the given `name`, or
an empty list if no value has been added to that collection. The
list contains the values in the order under which they were
collected.
#compatibility(eager)
Collections are not supported when eager execution is enabled.
#end_compatibility
"""
return get_default_graph().get_collection(key, scope)
It looks like in this code, key and scope arguments are swapped. If you provide "generator" or "discriminator" as the key with no scope i.e;
gen_vars = tf.compat.v1.get_collection("generator")
disc_vars = tf.compat.v1.get_collection("discriminator")
You should get results (I was able to reproduce locally with Tensorflow 2.2.0). The only issue I could not quite identify is, when providing scope, the function returns an empty list again, regardless of the scope value you provide. For example, tf.compat.v1.GLOBAL_VARIABLES should return everything, but that is not the case:
gen_vars = tf.compat.v1.get_default_graph().get_collection('generator', tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) # returns []
gen_vars = tf.compat.v1.get_default_graph().get_collection('generator', tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) # returns []
disc_vars = tf.compat.v1.get_collection('generator') # returns a list of tensors
Update
It looks like even creating the variables in the context manager doesn't add them to the graph collection. I had to call tf.compat.v1.add_to_collection('generator', x) and tf.compat.v1.add_to_collection('discriminator', log_d) in the respective functions to get those results.
Update #2
I searched around and it doesn't appear there's a context manager which enables you to add variables declared within it to a Tensorflow collection. For the sake of completeness of this answer though, I have implemented one:
from contextlib import contextmanager
#contextmanager
def collection_scope(collection_name):
import inspect
from tensorflow.python.framework.ops import EagerTensor
collection = tf.compat.v1.get_collection_ref(collection_name)
yield
# this is a bit of a hack, but it works...
f = inspect.currentframe().f_back.f_back
# only take variables which were declared within the context manager
tf_variables = set([val.ref() for val in f.f_locals.values() if isinstance(val, EagerTensor)]) - \
set([val.ref() for val in f.f_back.f_locals.values() if isinstance(val, EagerTensor)])
collection.extend(tf_variables)
You can then drop this in your functions in place of the variable scope (tf.compat.v1.variable_scope) context managers. For example, instead of:
def generator(z, output_dim=2, n_hidden=128, n_layer=2):
with tf.compat.v1.variable_scope('generator'):
h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
x = slim.fully_connected(h, output_dim, activation_fn=None)
return x
Do the following:
def generator(z, output_dim=2, n_hidden=128, n_layer=2):
with collection_scope('generator'):
h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
x = slim.fully_connected(h, output_dim, activation_fn=None)
return x
With this change, all tensors declared within the scope of the context manager will be added to the collection "generator" - tf.compat.v1.get_collection('generator') will return the correct list of tensors.
Related
I am trying to get to grips with Pytorch and I wanted to try to reproduce this code:
https://github.com/andy-psai/MountainCar_ActorCritic/blob/master/RL%20Blog%20FINAL%20MEDIUM%20code%2002_12_19.ipynb
in Pytorch.
I am having a problem in that this error is being returned:
RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.
A similar question said to use zero_grad() again after the optimizer step, but this hasn't resolved the issue.
I've included the entire code below so hopefully it should be reproduceable.
Any advice would be much appreciated.
import gym
import os
import os.path as osp
import time
import math
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.distributions import Normal
env = gym.envs.make("MountainCarContinuous-v0")
# Value function
class Value(nn.Module):
def __init__(self, dim_states):
super(Value, self).__init__()
self.net = nn.Sequential(
nn.Linear(dim_states, 400),
nn.ReLU(),
nn.Linear(400,400),
nn.ReLU(),
nn.Linear(400, 1)
)
self.optimizer = optim.Adam(self.parameters(), lr = 1e-3)
self.criterion = nn.MSELoss()
def forward(self, state):
return self.net(torch.from_numpy(state).float())
def compute_return(self, output, target):
self.optimizer.zero_grad()
loss = self.criterion(output, target)
loss.backward()
self.optimizer.step()
self.optimizer.zero_grad()
# Policy network
class Policy(nn.Module):
def __init__(self, dim_states, env):
super(Policy, self).__init__()
self.hidden1 = nn.Linear(dim_states, 40)
self.hidden2 = nn.Linear(40, 40)
self.mu = nn.Linear(40, 1)
self.sigma = nn.Linear(40,1)
self.env = env
self.optimizer = optim.Adam(self.parameters(), lr = 2e-5)
def forward(self, state):
state = torch.from_numpy(state).float()
x = F.relu(self.hidden1(state))
x = F.relu(self.hidden2(x))
mu = self.mu(x)
sigma = F.softmax(self.sigma(x), dim=-1)
action_dist = Normal(mu, sigma)
action_var = action_dist.rsample()
action_var = torch.clip(action_var,
self.env.action_space.low[0],
self.env.action_space.high[0])
return action_var, action_dist
def compute_return(self, action, dist, td_error):
self.optimizer.zero_grad()
loss_actor = -dist.log_prob(action)*td_error
loss_actor.backward()
self.optimizer.step()
self.optimizer.zero_grad()
# Normalise the state space
import sklearn
import sklearn.preprocessing
state_space_samples = np.array(
[env.observation_space.sample() for x in range(10000)])
scaler = sklearn.preprocessing.StandardScaler()
scaler.fit(state_space_samples)
# Normaliser
def scale_state(state):
scaled = scaler.transform([state])
return scaled
##################################
# Parameters
lr_actor = 0.00002
lr_critic = 0.001
actor = Policy(2, env)
critic = Value(2)
# Training loop params
gamma = 0.99
num_episodes = 300
episode_history = []
for episode in range(num_episodes):
# Receive initial state from E
state = env.reset()
reward_total = 0
steps = 0
done = False
while not done:
action, dist = actor(state)
# print(np.squeeze(action))
next_state, reward, done, _ = env.step(
np.array([action.item()]))
if episode % 50 == 0:
env.render()
steps += 1
reward_total += reward
# TD Target
target = reward + gamma * np.squeeze(critic(next_state), axis=0)
td_error = target - np.squeeze(critic(state), axis=0)
# Update actor
actor.compute_return(action, dist, td_error)
# Update critic
critic.compute_return(np.squeeze(critic(state), axis=0), target)
episode_history.append(reward_total)
print(f"Episode: {episode}, N Steps: {steps}, Cumulative reward {reward_total}")
if np.mean(episode_history[-100:]) > 90 and len(episode_history) > 101:
print("Solved")
print(f"Mean cumulative reward over 100 episodes {np.mean(episode_history[-100:])}")
Problem lies in this snippet. When you create target variable, there is a forward pass through critic which generates a computation graph and critic(next_state) is the leaf node of that graph making target a part of the graph (you can check this by printing target which will show you grad_fn=<AddBackward0>). Finally, when you call critic.compute_return(critic_out, target), a new computation graph is generated and passing target(which is a part of the previous computation graph) causes a Runtime error.
Solution is to call detach() on critic(next_state), this will free target variable and it will no longer be a part of the computation graph(again check by printing target).
target = reward + gamma * np.squeeze(critic(next_state).detach(), axis=0)
td_error = target - np.squeeze(critic(state), axis=0)
# Update actor
actor.compute_return(action, dist, td_error)
# Update critic
critic_out = np.squeeze(critic(state), axis=0)
print(critic_out)
critic.compute_return(critic_out, target)
I want to convert https://web.casadi.org/blog/tensorflow/ , which was written in Tensorflow 1 with casadi, using Tensorflow 2. I have changed the code yet tf.disable_v2_behavior() had to be done to get it working.
import casadi as ca
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
class TensorFlowEvaluator(ca.Callback):
def __init__(self,t_in,t_out,session, opts={}):
"""
t_in: list of inputs (tensorflow placeholders)
t_out: list of outputs (tensors dependent on those placeholders)
session: a tensorflow session
"""
ca.Callback.__init__(self)
assert isinstance(t_in,list)
self.t_in = t_in
assert isinstance(t_out,list)
self.t_out = t_out
self.construct("TensorFlowEvaluator", opts)
self.session = session
self.refs = []
def get_n_in(self): return len(self.t_in)
def get_n_out(self): return len(self.t_out)
def get_sparsity_in(self,i):
return ca.Sparsity.dense(*self.t_in[i].get_shape().as_list())
def get_sparsity_out(self,i):
return ca.Sparsity.dense(*self.t_out[i].get_shape().as_list())
def eval(self,arg):
# Associate each tensorflow input with the numerical argument passed by CasADi
d = dict((v,arg[i].toarray()) for i,v in enumerate(self.t_in))
# Evaluate the tensorflow expressions
ret = self.session.run(self.t_out,feed_dict=d)
return ret
# Vanilla tensorflow offers just the reverse mode AD
def has_reverse(self,nadj): return nadj==1
def get_reverse(self,nadj,name,inames,onames,opts):
# Construct tensorflow placeholders for the reverse seeds
adj_seed = [tf.placeholder(shape=self.sparsity_out(i).shape,dtype=tf.float64) for i in range(self.n_out())]
# Construct the reverse tensorflow graph through 'gradients'
grad = tf.gradients(self.t_out, self.t_in,grad_ys=adj_seed)
# Create another TensorFlowEvaluator object
callback = TensorFlowEvaluator(self.t_in+adj_seed,grad,self.session)
# Make sure you keep a reference to it
self.refs.append(callback)
# Package it in the nominal_in+nominal_out+adj_seed form that CasADi expects
nominal_in = self.mx_in()
nominal_out = self.mx_out()
adj_seed = self.mx_out()
return ca.Function(name,nominal_in+nominal_out+adj_seed,callback.call(nominal_in+adj_seed),inames,onames)
if __name__=="__main__":
a = tf.placeholder(shape=(2,2),dtype=tf.float64)
b = tf.placeholder(shape=(2,1),dtype=tf.float64)
y = tf.matmul(tf.sin(a), b)
with tf.Session() as session:
f_tf = TensorFlowEvaluator([a,b], [y], session)
a = ca.MX.sym("a",2,2)
b = ca.MX.sym("a",2,1)
y = f_tf(a,b)
yref = ca.mtimes(ca.sin(a),b)
f = ca.Function('f',[a,b],[ca.jacobian(y,a)])
fref = ca.Function('f',[a,b],[ca.jacobian(yref,a)])
print(f(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
print(fref(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
Now I want to write this purely using Tensorflow 2.x. Eager execution is enabled by default I was thinking to use #tf.function to calculate the gradient,
#tf.function
def f_k(input_dat):
y = tf.matmul(tf.sin(input_dat[0]), input_dat[1])
grads = tf.gradients([y], input_dat)
# grads = tape.gradient([y], input_dat)
tf.print('tf >>', grads)
print('print >>', grads)
return y, grads
Here is the update the code at the moment,
import casadi as ca
import tensorflow as tf
from casadi import Sparsity
class TensorFlowEvaluator(ca.Callback):
def __init__(self, t_in, t_out, model, opts={}):
"""
t_in: list of inputs (tensorflow placeholders)
t_out: list of outputs (tensors dependent on those placeholders)
"""
ca.Callback.__init__(self)
assert isinstance(t_in,list)
self.t_in = t_in
assert isinstance(t_out,list)
self.t_out = t_out
self.construct("TensorFlowEvaluator", opts)
self.refs = []
self.model = model
def get_n_in(self): return len(self.t_in)
def get_n_out(self): return len(self.t_out)
def get_sparsity_in(self, i):
tesnor_shape = self.t_in[i].get_shape().as_list()
return Sparsity.dense(tesnor_shape[0], tesnor_shape[1])
# return Sparsity.dense(4, 1)
def get_sparsity_out(self, i):
return Sparsity.dense(2, 1)
def eval(self, arg):
# Associate each tensorflow input with the numerical argument passed by CasADi
print(arg)
# d = dict((v, arg[i].toarray()) for i,v in enumerate(self.t_in))
updated_t = []
for i,v in enumerate(self.t_in):
updated_t.append(tf.Variable(arg[i].toarray()))
# Evaluate the tensorflow expressions
if not tf.is_tensor(self.t_out[0]):
ret = self.t_out[0](updated_t)[0].numpy()
else:
ret = self.t_out[0](updated_t).numpy()
return [ca.DM(ret)]
# Vanilla tensorflow offers just the reverse mode AD
def has_reverse(self,nadj): return nadj==1
def get_reverse(self, nadj, name, inames, onames, opts):
initializer = tf.random_normal_initializer(mean=1., stddev=2.)
adj_seed = [ tf.Variable(initializer(shape=self.sparsity_out(i).shape, dtype=tf.float64)) for i in range(self.n_out())]
tf.config.run_functions_eagerly(False)
print("=============== self.t_in========", self.t_out)
print("=============== self.t_out========", self.t_in)
# grad = tape.gradient(mean, self.t_in, output_gradients=adj_seed)
out_, grad = self.t_out[0](self.t_in)
print("============== grad========", grad)
# Create another TensorFlowEvaluator object
callback = TensorFlowEvaluator(self.t_in + adj_seed, grad, self.model)
# Make sure you keep a reference to it
self.refs.append(callback)
# Package it in the nominal_in+nominal_out+adj_seed form that CasADi expects
nominal_in = self.mx_in()
nominal_out = self.mx_out()
adj_seed = self.mx_out()
return ca.Function(name, nominal_in+nominal_out+adj_seed, callback.call(nominal_in + adj_seed), inames, onames)
if __name__=="__main__":
initializer = tf.random_normal_initializer(mean=1., stddev=2.)
a = tf.Variable(initializer(shape=(2,2), dtype=tf.float64))
b = tf.Variable(initializer(shape=(2,1), dtype=tf.float64))
#tf.function
def f_k(input_dat):
y = tf.matmul(tf.sin(input_dat[0]), input_dat[1])
grads = tf.gradients([y], input_dat)
# grads = tape.gradient([y], input_dat)
tf.print('tf >>', grads)
print('print >>', grads)
return y, grads
f_tf = TensorFlowEvaluator([a,b], [f_k], None)
a = ca.MX.sym("a",2,2)
b = ca.MX.sym("a",2,1)
y = f_tf(a,b)
yref = ca.mtimes(ca.sin(a),b)
f = ca.Function('f',[a,b],[ca.jacobian(y,a)])
fref = ca.Function('f',[a,b],[ca.jacobian(yref,a)])
print(fref(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
print(f(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
Problem:
In the get_reverse method, when calculating the gradient, i.e., grad = tf.gradients(self.t_out, self.t_in,grad_ys=adj_seed), I get symbolic form, i.e., [<tf.Tensor 'gradients/Sin_grad/mul:0' shape=(2, 2) dtype=float32>, <tf.Tensor 'gradients/MatMul_grad/MatMul_1:0' shape=(2, 1) dtype=float32>] in Tensorflow 1.
However, in Tensorflow 2, I always get numerical results. I can access the graph but those are not callable. self.t_out[0].get_concrete_function(self.t_in).graph similar to here
What would be the better way to get the symbolic gradient like in Tensorflow 1?
Expected Behaviour:
out_, grad = self.t_out[0](self.t_in)
grad should return symbolic form of the gradient rather than numerical evaluation
I am trying to run a NEAT algorithm using this python implementation. This is the original file from the library that is relevant for my question:
from neat.graphs import feed_forward_layers
class FeedForwardNetwork(object):
def __init__(self, inputs, outputs, node_evals):
self.input_nodes = inputs
self.output_nodes = outputs
self.node_evals = node_evals
self.values = dict((key, 0.0) for key in inputs + outputs)
def activate(self, inputs):
if len(self.input_nodes) != len(inputs):
raise RuntimeError("Expected {0:n} inputs, got {1:n}".format(len(self.input_nodes), len(inputs)))
for k, v in zip(self.input_nodes, inputs):
self.values[k] = v
for node, act_func, agg_func, bias, response, links in self.node_evals:
node_inputs = []
for i, w in links:
node_inputs.append(self.values[i] * w)
s = agg_func(node_inputs)
self.values[node] = act_func(bias + response * s)
return [self.values[i] for i in self.output_nodes]
#staticmethod
def create(genome, config):
""" Receives a genome and returns its phenotype (a FeedForwardNetwork). """
# Gather expressed connections.
connections = [cg.key for cg in genome.connections.values() if cg.enabled]
layers = feed_forward_layers(config.genome_config.input_keys, config.genome_config.output_keys, connections)
node_evals = []
for layer in layers:
for node in layer:
inputs = []
node_expr = [] # currently unused
for conn_key in connections:
inode, onode = conn_key
if onode == node:
cg = genome.connections[conn_key]
inputs.append((inode, cg.weight))
node_expr.append("v[{}] * {:.7e}".format(inode, cg.weight))
ng = genome.nodes[node]
aggregation_function = config.genome_config.aggregation_function_defs.get(ng.aggregation)
activation_function = config.genome_config.activation_defs.get(ng.activation)
node_evals.append((node, activation_function, aggregation_function, ng.bias, ng.response, inputs))
return FeedForwardNetwork(config.genome_config.input_keys, config.genome_config.output_keys, node_evals)
Since I evaluate the performance of my neural networks on a large dataset, I wanted to speed up the activate method using numba jit. In order to not fall back into numbas object mode I had to update the implementation of the activate method (and hence also the fields of the FeedForwardNetwork class) using only datatypes supported by numba. This is what I came up with (create is the same as before):
from neat.graphs import feed_forward_layers
from neat.six_util import itervalues
import numba
from numba import jit, njit
from numba.typed import List, Dict
import numpy as np
import math
#jit(nopython=True)
def activate(input_nodes, output_nodes, node_evals_node, node_evals_bias, node_evals_resp, node_evals_ins_nodes, node_evals_ins_conns, values, inputs):
for i in range(input_nodes.size):
values[input_nodes[i]] = inputs[i]
for node in range(len(node_evals_node)):
s = 0
for pred in range(len(node_evals_ins_nodes[node])):
s += values[node_evals_ins_nodes[node][pred]] * node_evals_ins_conns[node][pred]
values[node_evals_node[node]] = math.tanh(node_evals_bias[node] + node_evals_resp[node] * s)
return [values[output_nodes[i]] for i in range(output_nodes.size)]
class FeedForwardNetwork(object):
def __init__(self, inputs, outputs, node_evals):
self.input_nodes = np.array(inputs)
self.output_nodes = np.array(outputs)
# NODE_EVALS decomposition
self.node_evals_node = np.reshape(np.array(node_evals)[:, 0:1], (len(node_evals),)).astype(np.int64)
self.node_evals_bias = np.reshape(np.array(node_evals)[:, 3:4], (len(node_evals),)).astype(np.float64)
self.node_evals_resp = np.reshape(np.array(node_evals)[:, 4:5], (len(node_evals),)).astype(np.float64)
temp = np.array(node_evals)[:, 5:6]
self.node_evals_ins_nodes = List()
self.node_evals_ins_conns = List()
for node in range(temp.size):
l = List()
m = List()
for predecessor in range(len(temp[node])):
l.append(temp[0][node][predecessor][0])
m.append(temp[0][node][predecessor][1])
self.node_evals_ins_nodes.append(l)
self.node_evals_ins_conns.append(m)
self.values = Dict()
# Set types of dict
self.values[0] = float(1)
self.values.pop(0)
This is the code I call the create and activate method in:
def eval_single_genome(genome, config, thread_id, result):
net = neat.nn.FeedForwardNetwork.create(genome, config)
error_sum = 0
for i, row in PRICES.iterrows():
prediction = feed_forward.activate(net.input_nodes, net.output_nodes, net.node_evals_node, net.node_evals_bias, net.node_evals_resp, net.node_evals_ins_nodes, net.node_evals_ins_conns, net.values, np.array([0]))
error_sum += (prediction - PRICES.iloc[i]['open']) ** 2
result[thread_id] = error_sum
The code compiles and runs without errors or warnings which (as far as I've understood) indicates that numba should be able to optimize my implementation. But adding/removing the #jit(nopython=True)decorator doesn't change the runtime at all.
Did I overlook something? Or is there just nothing that numba can improve in my case?
I'm trying to use tf.train.ExponentialMovingAverage with a PartitionedVariable.
I use a custom_getter to create an EMA version of the graph.
If I don't use a partitioner to create my variable the following code works as expected : after setting the variable to zero, with a decay of 1, the EMA version of this variable keeps the original value.
However, if I use a partitioner, I have the following issues
tf1.12 the ema_getter is unable to find the average of the PartitionedVariable hence the two variables are the same object
tf.1.15 I get an AttributeError: 'PartitionedVariable' object has no attribute 'experimental_ref'
Here is my code
import tensorflow as tf
import numpy as np
def ema_getter(ema):
def _ema_getter(getter, name, *args, **kwargs):
var = getter(name, *args, **kwargs)
ema_var = ema.average(var)
if not ema_var:
tf.logging.warning(f"Unable to find EMA of {name}")
return ema_var if ema_var else var
return _ema_getter
if __name__ == "__main__":
use_partitioner = True
var = tf.get_variable(
name='var',
shape=[10, 2],
initializer=tf.ones_initializer(),
partitioner=tf.fixed_size_partitioner(2, axis=0) if use_partitioner else None
)
var_sum = tf.reduce_sum(var)
ema = tf.train.ExponentialMovingAverage(1.0)
variables = tf.get_collection(
tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)
ema_op = ema.apply(variables)
with tf.variable_scope(tf.get_variable_scope(), reuse=True, custom_getter=ema_getter(ema)):
var_ema = tf.get_variable(
name='var',
shape=[10, 2],
partitioner=tf.fixed_size_partitioner(2, axis=0) if use_partitioner else None
)
print(f"EMA variable name: {var_ema.name}")
var_ema_sum = tf.reduce_sum(var_ema)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(ema_op)
print(sess.run(var_sum)) # 20.0
print(sess.run(var_ema_sum)) # 20.0
sess.run(tf.assign(var, tf.zeros_like(var)))
sess.run(ema_op)
print(sess.run(var_sum)) # 0.0
print(sess.run(var_ema_sum)) # should be 20.0
My understanding so far is that the PartitionedVariable does not act as a standard Variable but is merely a shell for a list of other Variable.
The custom_getter needs to take this into account and manually retrieve and reconstruct a ParitionedVariable using the ema versions of the original PartitionedVariable variables.
However, this seems hacky -- using a .__class__ because I could not find a way to import PartitionedVariable in a clean way, or accessing the private attribute _partitions for ex.
Sharing my current fix here
import tensorflow as tf
import numpy as np
def ema_getter(ema):
def _ema_getter(getter, name, *args, **kwargs):
var = getter(name, *args, **kwargs)
# Manually reconstruct if PartitionedVariable
if var.__class__.__name__ == "PartitionedVariable":
ema_vs = [ema.average(v) for v in var]
ema_var = var.__class__(
name=var.name,
shape=var.shape,
dtype=var.dtype,
variable_list=ema_vs,
partitions=var._partitions,
)
else:
ema_var = ema.average(var)
if not ema_var:
tf.logging.warning(f"Unable to find EMA of {name}")
return ema_var if ema_var else var
return _ema_getter
if __name__ == "__main__":
use_partitioner = True
var = tf.get_variable(
name='var',
shape=[10, 2],
initializer=tf.ones_initializer(),
partitioner=tf.fixed_size_partitioner(2, axis=0) if use_partitioner else None
)
var_sum = tf.reduce_sum(var)
ema = tf.train.ExponentialMovingAverage(1.0)
variables = tf.get_collection(
tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)
ema_op = ema.apply(variables)
with tf.variable_scope(tf.get_variable_scope(), reuse=True, custom_getter=ema_getter(ema)):
var_ema = tf.get_variable(
name='var',
shape=[10, 2],
partitioner=tf.fixed_size_partitioner(2, axis=0) if use_partitioner else None
)
print(f"EMA variable name: {var_ema.name}")
var_ema_sum = tf.reduce_sum(var_ema)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(ema_op)
print(sess.run(var_sum))
print(sess.run(var_ema_sum))
sess.run(tf.assign(var, tf.zeros_like(var)))
sess.run(ema_op)
print(sess.run(var_sum))
print(sess.run(var_ema_sum))
I have been using TensorFlow for a reasonable length of time now. and believed I had a thorough understanding of how a TensorFlow graph works and executes within a session. However, I have written all of my TensorFlow models in a script-like fashion as such:
import tensorflow as tf
import DataWorker
import Constants
x = tf.placeholder(tf.float32, [None, Constants.sequenceLength, DataWorker.numFeatures])
y = tf.placeholder(tf.float32, [None, 1])
xTensors = tf.unstack(x, axis=1) # [seqLength tensors of shape (batchSize, numFeatures)]
W = tf.Variable(tf.random_normal([Constants.numHidden, 1])) # Weighted matrix
b = tf.Variable(tf.random_normal([1])) # Bias
cell = tf.contrib.rnn.BasicLSTMCell(Constants.numHidden, forget_bias=Constants.forgetBias)
outputs, finalState = tf.nn.static_rnn(cell, xTensors, dtype=tf.float32)
# predictions = [tf.add(tf.matmul(output, W), b) for output in outputs] # List of predictions after each time step
prediction = tf.add(tf.matmul(outputs[-1], W), b) # Prediction after final time step
prediction = tf.tanh(prediction) # Activation
mse = tf.losses.mean_squared_error(predictions=prediction, labels=y) # Mean loss over entire batch
accuracy = tf.reduce_mean(1 - (tf.abs(y - prediction) / DataWorker.labelRange)) # Accuracy over entire batch
optimiser = tf.train.AdamOptimizer(Constants.learningRate).minimize(mse) # Backpropagation
with tf.Session() as session:
session.run(tf.global_variables_initializer())
# #############################################
# TRAINING
# #############################################
for epoch in range(Constants.numEpochs):
print("***** EPOCH:", epoch + 1, "*****\n")
IDPointer, TSPointer = 0, 0 # Pointers to current ID and timestamp
epochComplete = False
batchNum = 0
while not epochComplete:
batchNum += 1
batchX, batchY, IDPointer, TSPointer, epochComplete = DataWorker.generateBatch(IDPointer, TSPointer, isTraining=True)
dict = {x: batchX, y: batchY}
session.run(optimiser, dict)
if batchNum % 1000 == 0 or epochComplete:
batchLoss = session.run(mse, dict)
batchAccuracy = session.run(accuracy, dict)
print("Iteration:", batchNum)
print(batchLoss)
print(str("%.2f" % (batchAccuracy * 100) + "%\n"))
# #############################################
# TESTING
# #############################################
testX, testY, _, _, _ = DataWorker.generateBatch(0, 0, isTraining=False)
testAccuracy = session.run(accuracy, {x: testX, y: testY})
print("Testing Accuracy:", str("%.2f" % (testAccuracy * 100) + "%"))
But now, for practicality and readability, I want to implement my model as a class, but have encountered many problems with initializing my variables, etc.
This is the closest I have got to implementing the above example using my own LSTM class
Model.py
import tensorflow as tf
import Constants
import DataWorker # Remove this dependency
class LSTM():
"""docstring."""
def __init__(self,
inputDimensionList,
outputDimensionList,
numLayers=Constants.numLayers,
numHidden=Constants.numHidden,
learningRate=Constants.learningRate,
forgetBias=Constants.forgetBias
):
"""docstring."""
self.batchInputs = tf.placeholder(tf.float32, [None] + inputDimensionList)
self.batchLabels = tf.placeholder(tf.float32, [None] + outputDimensionList)
self.weightedMatrix = tf.Variable(tf.random_normal([numHidden] + outputDimensionList))
self.biasMatrix = tf.Variable(tf.random_normal(outputDimensionList))
self.cell = tf.contrib.rnn.BasicLSTMCell(numHidden, forget_bias=forgetBias)
self.numLayers = numLayers
self.numHidden = numHidden
self.learningRate = learningRate
self.forgetBias = forgetBias
self.batchDict = {}
self.batchInputTensors = None
self.batchOutputs = None # All needed as instance variables?
self.batchFinalStates = None
self.batchPredictions = None
self.batchLoss = None
self.batchAccuracy = None
self.initialised = False
self.session = tf.Session()
# Take in activation, loss and optimiser FUNCTIONS as args
def execute(self, command):
"""docstring."""
return self.session.run(command, self.batchDict)
def setBatchDict(self, inputs, labels):
"""docstring."""
self.batchDict = {self.batchInputs: inputs, self.batchLabels: labels}
self.batchInputTensors = tf.unstack(self.batchInputs, axis=1)
def processBatch(self):
"""docstring."""
self.batchOutputs, self.batchFinalState = tf.nn.static_rnn(self.cell, self.batchInputTensors, dtype=tf.float32)
pred = tf.tanh(tf.add(tf.matmul(self.batchOutputs[-1], self.weightedMatrix), self.biasMatrix))
mse = tf.losses.mean_squared_error(predictions=pred, labels=self.batchLabels)
optimiser = tf.train.AdamOptimizer(self.learningRate).minimize(mse)
if not self.initialised:
self.session.run(tf.global_variables_initializer())
self.initialised = True
with tf.variable_scope("model") as scope:
if self.initialised:
scope.reuse_variables()
self.execute(optimiser)
self.batchPredictions = self.execute(pred)
self.batchLoss = self.execute(tf.losses.mean_squared_error(predictions=self.batchPredictions, labels=self.batchLabels))
self.batchAccuracy = self.execute(tf.reduce_mean(1 - (tf.abs(self.batchLabels - self.batchPredictions) / DataWorker.labelRange)))
return self.batchPredictions, self.batchLabels, self.batchLoss, self.batchAccuracy
def kill(self):
"""docstring."""
self.session.close()
This class is quite messy, especially processBatch() as I have just been trying to get it to work before refining it.
I then run my model here:
Main.py
import DataWorker
import Constants
from Model import LSTM
inputDim = [Constants.sequenceLength, DataWorker.numFeatures]
outputDim = [1]
lstm = LSTM(inputDimensionList=inputDim, outputDimensionList=outputDim)
# #############################################
# TRAINING
# #############################################
for epoch in range(Constants.numEpochs):
print("***** EPOCH:", epoch + 1, "*****\n")
IDPointer, TSPointer = 0, 0 # Pointers to current ID and timestamp
epochComplete = False
batchNum = 0
while not epochComplete:
batchNum += 1
batchX, batchY, IDPointer, TSPointer, epochComplete = DataWorker.generateBatch(IDPointer, TSPointer, isTraining=True)
lstm.setBatchDict(batchX, batchY)
batchPredictions, batchLabels, batchLoss, batchAccuracy = lstm.runBatch()
if batchNum % 1000 == 0 or epochComplete:
print("Iteration:", batchNum)
print("Pred:", batchPredictions[-1], "\tLabel:", batchLabels[-1])
print("Loss:", batchLoss)
print("Accuracy:", str("%.2f" % (batchAccuracy * 100) + "%\n"))
# #############################################
# TESTING
# #############################################
testX, testY, _, _, _ = DataWorker.generateBatch(0, 0, isTraining=False)
lstm.setBatchDict(testX, testY)
_, _, _, testAccuracy = lstm.runBatch()
print("Testing Accuracy:", str("%.2f" % (testAccuracy * 100) + "%"))
lstm.kill()
A single passthrough of the graph is executed fine, when all the variables are initialized, but it is on the second iteration where I get the error
ValueError: Variable rnn/basic_lstm_cell/kernel/Adam/ already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:
optimiser = tf.train.AdamOptimizer(self.learningRate).minimize(mse)
I Googled this problem and learned that using scope.reuse_variables() should stop it trying to initialize the AdamOptimizer a second time, but cleary this isn't working how I have implemented it. How can I fix this issue?
As a side note, is my method of creating the TensorFlow session as an instance variable within my LSTM class acceptable, or should I create the session in Main and then pass it into the LSTM instance?
In general I wrap anything that creates variables under the hood with tf.make_template when doing object oriented model building.
However, you should avoid adding ops to the graph in a training loop, which looks like it's happening here. They will build up and cause problems, and likely give you incorrect results. Instead, define the graph (with inputs from tf.data, placeholders, or queues) and only loop over a session.run call. Even better, structure your code as an Estimator and this will be enforced.