Related
I want to convert https://web.casadi.org/blog/tensorflow/ , which was written in Tensorflow 1 with casadi, using Tensorflow 2. I have changed the code yet tf.disable_v2_behavior() had to be done to get it working.
import casadi as ca
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
class TensorFlowEvaluator(ca.Callback):
def __init__(self,t_in,t_out,session, opts={}):
"""
t_in: list of inputs (tensorflow placeholders)
t_out: list of outputs (tensors dependent on those placeholders)
session: a tensorflow session
"""
ca.Callback.__init__(self)
assert isinstance(t_in,list)
self.t_in = t_in
assert isinstance(t_out,list)
self.t_out = t_out
self.construct("TensorFlowEvaluator", opts)
self.session = session
self.refs = []
def get_n_in(self): return len(self.t_in)
def get_n_out(self): return len(self.t_out)
def get_sparsity_in(self,i):
return ca.Sparsity.dense(*self.t_in[i].get_shape().as_list())
def get_sparsity_out(self,i):
return ca.Sparsity.dense(*self.t_out[i].get_shape().as_list())
def eval(self,arg):
# Associate each tensorflow input with the numerical argument passed by CasADi
d = dict((v,arg[i].toarray()) for i,v in enumerate(self.t_in))
# Evaluate the tensorflow expressions
ret = self.session.run(self.t_out,feed_dict=d)
return ret
# Vanilla tensorflow offers just the reverse mode AD
def has_reverse(self,nadj): return nadj==1
def get_reverse(self,nadj,name,inames,onames,opts):
# Construct tensorflow placeholders for the reverse seeds
adj_seed = [tf.placeholder(shape=self.sparsity_out(i).shape,dtype=tf.float64) for i in range(self.n_out())]
# Construct the reverse tensorflow graph through 'gradients'
grad = tf.gradients(self.t_out, self.t_in,grad_ys=adj_seed)
# Create another TensorFlowEvaluator object
callback = TensorFlowEvaluator(self.t_in+adj_seed,grad,self.session)
# Make sure you keep a reference to it
self.refs.append(callback)
# Package it in the nominal_in+nominal_out+adj_seed form that CasADi expects
nominal_in = self.mx_in()
nominal_out = self.mx_out()
adj_seed = self.mx_out()
return ca.Function(name,nominal_in+nominal_out+adj_seed,callback.call(nominal_in+adj_seed),inames,onames)
if __name__=="__main__":
a = tf.placeholder(shape=(2,2),dtype=tf.float64)
b = tf.placeholder(shape=(2,1),dtype=tf.float64)
y = tf.matmul(tf.sin(a), b)
with tf.Session() as session:
f_tf = TensorFlowEvaluator([a,b], [y], session)
a = ca.MX.sym("a",2,2)
b = ca.MX.sym("a",2,1)
y = f_tf(a,b)
yref = ca.mtimes(ca.sin(a),b)
f = ca.Function('f',[a,b],[ca.jacobian(y,a)])
fref = ca.Function('f',[a,b],[ca.jacobian(yref,a)])
print(f(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
print(fref(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
Now I want to write this purely using Tensorflow 2.x. Eager execution is enabled by default I was thinking to use #tf.function to calculate the gradient,
#tf.function
def f_k(input_dat):
y = tf.matmul(tf.sin(input_dat[0]), input_dat[1])
grads = tf.gradients([y], input_dat)
# grads = tape.gradient([y], input_dat)
tf.print('tf >>', grads)
print('print >>', grads)
return y, grads
Here is the update the code at the moment,
import casadi as ca
import tensorflow as tf
from casadi import Sparsity
class TensorFlowEvaluator(ca.Callback):
def __init__(self, t_in, t_out, model, opts={}):
"""
t_in: list of inputs (tensorflow placeholders)
t_out: list of outputs (tensors dependent on those placeholders)
"""
ca.Callback.__init__(self)
assert isinstance(t_in,list)
self.t_in = t_in
assert isinstance(t_out,list)
self.t_out = t_out
self.construct("TensorFlowEvaluator", opts)
self.refs = []
self.model = model
def get_n_in(self): return len(self.t_in)
def get_n_out(self): return len(self.t_out)
def get_sparsity_in(self, i):
tesnor_shape = self.t_in[i].get_shape().as_list()
return Sparsity.dense(tesnor_shape[0], tesnor_shape[1])
# return Sparsity.dense(4, 1)
def get_sparsity_out(self, i):
return Sparsity.dense(2, 1)
def eval(self, arg):
# Associate each tensorflow input with the numerical argument passed by CasADi
print(arg)
# d = dict((v, arg[i].toarray()) for i,v in enumerate(self.t_in))
updated_t = []
for i,v in enumerate(self.t_in):
updated_t.append(tf.Variable(arg[i].toarray()))
# Evaluate the tensorflow expressions
if not tf.is_tensor(self.t_out[0]):
ret = self.t_out[0](updated_t)[0].numpy()
else:
ret = self.t_out[0](updated_t).numpy()
return [ca.DM(ret)]
# Vanilla tensorflow offers just the reverse mode AD
def has_reverse(self,nadj): return nadj==1
def get_reverse(self, nadj, name, inames, onames, opts):
initializer = tf.random_normal_initializer(mean=1., stddev=2.)
adj_seed = [ tf.Variable(initializer(shape=self.sparsity_out(i).shape, dtype=tf.float64)) for i in range(self.n_out())]
tf.config.run_functions_eagerly(False)
print("=============== self.t_in========", self.t_out)
print("=============== self.t_out========", self.t_in)
# grad = tape.gradient(mean, self.t_in, output_gradients=adj_seed)
out_, grad = self.t_out[0](self.t_in)
print("============== grad========", grad)
# Create another TensorFlowEvaluator object
callback = TensorFlowEvaluator(self.t_in + adj_seed, grad, self.model)
# Make sure you keep a reference to it
self.refs.append(callback)
# Package it in the nominal_in+nominal_out+adj_seed form that CasADi expects
nominal_in = self.mx_in()
nominal_out = self.mx_out()
adj_seed = self.mx_out()
return ca.Function(name, nominal_in+nominal_out+adj_seed, callback.call(nominal_in + adj_seed), inames, onames)
if __name__=="__main__":
initializer = tf.random_normal_initializer(mean=1., stddev=2.)
a = tf.Variable(initializer(shape=(2,2), dtype=tf.float64))
b = tf.Variable(initializer(shape=(2,1), dtype=tf.float64))
#tf.function
def f_k(input_dat):
y = tf.matmul(tf.sin(input_dat[0]), input_dat[1])
grads = tf.gradients([y], input_dat)
# grads = tape.gradient([y], input_dat)
tf.print('tf >>', grads)
print('print >>', grads)
return y, grads
f_tf = TensorFlowEvaluator([a,b], [f_k], None)
a = ca.MX.sym("a",2,2)
b = ca.MX.sym("a",2,1)
y = f_tf(a,b)
yref = ca.mtimes(ca.sin(a),b)
f = ca.Function('f',[a,b],[ca.jacobian(y,a)])
fref = ca.Function('f',[a,b],[ca.jacobian(yref,a)])
print(fref(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
print(f(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
Problem:
In the get_reverse method, when calculating the gradient, i.e., grad = tf.gradients(self.t_out, self.t_in,grad_ys=adj_seed), I get symbolic form, i.e., [<tf.Tensor 'gradients/Sin_grad/mul:0' shape=(2, 2) dtype=float32>, <tf.Tensor 'gradients/MatMul_grad/MatMul_1:0' shape=(2, 1) dtype=float32>] in Tensorflow 1.
However, in Tensorflow 2, I always get numerical results. I can access the graph but those are not callable. self.t_out[0].get_concrete_function(self.t_in).graph similar to here
What would be the better way to get the symbolic gradient like in Tensorflow 1?
Expected Behaviour:
out_, grad = self.t_out[0](self.t_in)
grad should return symbolic form of the gradient rather than numerical evaluation
I am trying to implement the Unrolled GAN model as described here, with example code. However, it was implemented using TF1, and I have been doing my best to update it but I am relatively new to python and TF (only been using it for the past ~6 months).
The line(s) that I cannot seem to make work (for the moment, there may be more) is this one:
gen_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "generator")
disc_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "discriminator")
These both return empty lists, and I cannot see what I am missing. Even without specifying a scope, the get_collection() returns []. Earlier, we define both generator and discriminator as scopes like so:
def generator(z, output_dim=2, n_hidden=128, n_layer=2):
with tf.compat.v1.variable_scope("generator"):
h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
x = slim.fully_connected(h, output_dim, activation_fn=None)
return x
def discriminator(x, n_hidden=128, n_layer=2, reuse=False):
with tf.compat.v1.variable_scope("discriminator", reuse=reuse):
h = slim.stack(x, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
log_d = slim.fully_connected(h, 1, activation_fn=None)
return log_d
Is there a problem with the definition of the scope?
Here is my updated code in full, in case there is maybe something I missed elsewhere:
%pylab inline
from collections import OrderedDict
import tensorflow as tf
import tensorflow_probability as tfp
ds = tfp.distributions
# slim = tf.contrib.slim
import tf_slim as slim
from keras.optimizers import Adam
try:
from moviepy.video.io.bindings import mplfig_to_npimage
import moviepy.editor as mpy
generate_movie = True
except:
print("Warning: moviepy not found.")
generate_movie = False
def remove_original_op_attributes(graph):
"""Remove _original_op attribute from all operations in a graph."""
for op in graph.get_operations():
op._original_op = None
def graph_replace(*args, **kwargs):
"""Monkey patch graph_replace so that it works with TF 1.0"""
remove_original_op_attributes(tf.get_default_graph())
return _graph_replace(*args, **kwargs)
def extract_update_dict(update_ops):
"""Extract variables and their new values from Assign and AssignAdd ops.
Args:
update_ops: list of Assign and AssignAdd ops, typically computed using Keras' opt.get_updates()
Returns:
dict mapping from variable values to their updated value
"""
name_to_var = {v.name: v for v in tf.compat.v1.global_variables()}
updates = OrderedDict()
for update in update_ops:
var_name = update.op.inputs[0].name
var = name_to_var[var_name]
value = update.op.inputs[1]
if update.op.type == 'Assign':
updates[var.value()] = value
elif update.op.type == 'AssignAdd':
updates[var.value()] = var + value
else:
raise ValueError("Update op type (%s) must be of type Assign or AssignAdd"%update_op.op.type)
return updates
def sample_mog(batch_size, n_mixture=8, std=0.01, radius=1.0):
thetas = np.linspace(0, 2 * np.pi, n_mixture)
xs, ys = radius * np.sin(thetas), radius * np.cos(thetas)
cat = ds.Categorical(tf.zeros(n_mixture))
comps = [ds.MultivariateNormalDiag([xi, yi], [std, std]) for xi, yi in zip(xs.ravel(), ys.ravel())]
data = ds.Mixture(cat, comps)
return data.sample(batch_size)
def generator(z, output_dim=2, n_hidden=128, n_layer=2):
with tf.compat.v1.variable_scope("generator"):
h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
x = slim.fully_connected(h, output_dim, activation_fn=None)
return x
def discriminator(x, n_hidden=128, n_layer=2, reuse=False):
with tf.compat.v1.variable_scope("discriminator", reuse=reuse):
h = slim.stack(x, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
log_d = slim.fully_connected(h, 1, activation_fn=None)
return log_d
params = dict(
batch_size=512,
disc_learning_rate=1e-4,
gen_learning_rate=1e-3,
beta1=0.5,
epsilon=1e-8,
max_iter=25000,
viz_every=5000,
z_dim=256,
x_dim=2,
unrolling_steps=5,
)
tf.compat.v1.reset_default_graph()
data = sample_mog(params['batch_size'])
noise = ds.Normal(tf.zeros(params['z_dim']),
tf.ones(params['z_dim'])).sample(params['batch_size'])
# Construct generator and discriminator nets
# with slim.arg_scope([slim.fully_connected], weights_initializer=tf.orthogonal_initializer(gain=1.4)): ## old
with slim.arg_scope([slim.fully_connected], weights_initializer=tf.keras.initializers.Orthogonal(gain=1.4)):
samples = generator(noise, output_dim=params['x_dim'])
real_score = discriminator(data)
fake_score = discriminator(samples, reuse=True)
# Saddle objective
loss = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(logits=tf.cast(real_score, dtype=tf.float32), labels=tf.cast(tf.ones_like(real_score), dtype=tf.float32)) +
tf.nn.sigmoid_cross_entropy_with_logits(logits=tf.cast(fake_score, dtype=tf.float32), labels=tf.cast(tf.zeros_like(fake_score), dtype=tf.float32)))
gen_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "generator")
disc_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, "discriminator")
# Vanilla discriminator update
d_opt = Adam(lr=params['disc_learning_rate'], beta_1=params['beta1'], epsilon=params['epsilon'])
# updates = d_opt.get_updates(disc_vars, [], loss) ## old
updates = d_opt.get_updates(loss, [])
d_train_op = tf.group(*updates, name="d_train_op")
### I HAVE NOT UPDATED BEYOND THIS POINT ###
# Unroll optimization of the discrimiantor
if params['unrolling_steps'] > 0:
# Get dictionary mapping from variables to their update value after one optimization step
update_dict = extract_update_dict(updates)
cur_update_dict = update_dict
for i in xrange(params['unrolling_steps'] - 1):
# Compute variable updates given the previous iteration's updated variable
cur_update_dict = graph_replace(update_dict, cur_update_dict)
# Final unrolled loss uses the parameters at the last time step
unrolled_loss = graph_replace(loss, cur_update_dict)
else:
unrolled_loss = loss
# Optimize the generator on the unrolled loss
g_train_opt = tf.train.AdamOptimizer(params['gen_learning_rate'], beta1=params['beta1'], epsilon=params['epsilon'])
g_train_op = g_train_opt.minimize(-unrolled_loss, var_list=gen_vars)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
The implementation of get_collection:
def get_collection(key, scope=None):
"""Wrapper for `Graph.get_collection()` using the default graph.
See `tf.Graph.get_collection`
for more details.
Args:
key: The key for the collection. For example, the `GraphKeys` class contains
many standard names for collections.
scope: (Optional.) If supplied, the resulting list is filtered to include
only items whose `name` attribute matches using `re.match`. Items without
a `name` attribute are never returned if a scope is supplied and the
choice or `re.match` means that a `scope` without special tokens filters
by prefix.
Returns:
The list of values in the collection with the given `name`, or
an empty list if no value has been added to that collection. The
list contains the values in the order under which they were
collected.
#compatibility(eager)
Collections are not supported when eager execution is enabled.
#end_compatibility
"""
return get_default_graph().get_collection(key, scope)
It looks like in this code, key and scope arguments are swapped. If you provide "generator" or "discriminator" as the key with no scope i.e;
gen_vars = tf.compat.v1.get_collection("generator")
disc_vars = tf.compat.v1.get_collection("discriminator")
You should get results (I was able to reproduce locally with Tensorflow 2.2.0). The only issue I could not quite identify is, when providing scope, the function returns an empty list again, regardless of the scope value you provide. For example, tf.compat.v1.GLOBAL_VARIABLES should return everything, but that is not the case:
gen_vars = tf.compat.v1.get_default_graph().get_collection('generator', tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) # returns []
gen_vars = tf.compat.v1.get_default_graph().get_collection('generator', tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) # returns []
disc_vars = tf.compat.v1.get_collection('generator') # returns a list of tensors
Update
It looks like even creating the variables in the context manager doesn't add them to the graph collection. I had to call tf.compat.v1.add_to_collection('generator', x) and tf.compat.v1.add_to_collection('discriminator', log_d) in the respective functions to get those results.
Update #2
I searched around and it doesn't appear there's a context manager which enables you to add variables declared within it to a Tensorflow collection. For the sake of completeness of this answer though, I have implemented one:
from contextlib import contextmanager
#contextmanager
def collection_scope(collection_name):
import inspect
from tensorflow.python.framework.ops import EagerTensor
collection = tf.compat.v1.get_collection_ref(collection_name)
yield
# this is a bit of a hack, but it works...
f = inspect.currentframe().f_back.f_back
# only take variables which were declared within the context manager
tf_variables = set([val.ref() for val in f.f_locals.values() if isinstance(val, EagerTensor)]) - \
set([val.ref() for val in f.f_back.f_locals.values() if isinstance(val, EagerTensor)])
collection.extend(tf_variables)
You can then drop this in your functions in place of the variable scope (tf.compat.v1.variable_scope) context managers. For example, instead of:
def generator(z, output_dim=2, n_hidden=128, n_layer=2):
with tf.compat.v1.variable_scope('generator'):
h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
x = slim.fully_connected(h, output_dim, activation_fn=None)
return x
Do the following:
def generator(z, output_dim=2, n_hidden=128, n_layer=2):
with collection_scope('generator'):
h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.tanh)
x = slim.fully_connected(h, output_dim, activation_fn=None)
return x
With this change, all tensors declared within the scope of the context manager will be added to the collection "generator" - tf.compat.v1.get_collection('generator') will return the correct list of tensors.
Here, I have LSTM Autoencoder written in Keras. I want to convert the code to Chainer.
import numpy as np
from keras.layers import Input, GRU
from keras.models import Model
input_feat = Input(shape=(30, 2000))
l = GRU( 100, return_sequences=True, activation="tanh", recurrent_activation="hard_sigmoid")(input_feat)
l = GRU(2000, return_sequences=True, activation="tanh", recurrent_activation="hard_sigmoid")(l)
model = Model(input_feat, l)
model.compile(optimizer="RMSprop", loss="mean_squared_error")
feat = np.load("feat.npy")
model.fit(feat, feat[:, ::-1, :], epochs=200, batch_size=250)
feat is numpy whose dimension is (269, 30, 2000). I could run above code and the result was reasonable. I had written below Chainer code.
import numpy as np
from chainer import Chain, Variable, optimizers
import chainer.functions as F
import chainer.links as L
class GRUAutoEncoder(Chain):
def __init__(self):
super().__init__()
with self.init_scope():
self.encode = L.GRU(2000, 100)
self.decode = L.GRU(100, 2000)
def __call__(self, h, mode):
if mode == "encode":
h = F.tanh(self.encode(h))
return h
if mode == "decode":
h = F.tanh(self.decode(h))
return h
def reset(self):
self.encode.reset_state()
self.decode.reset_state()
def main():
feat = np.load("feat.npy") #(269, 30, 2000)
gru_autoencoder = GRUAutoEncoder()
optimizer = optimizers.RMSprop(lr=0.01).setup(gru_autoencoder)
N = len(feat)
batch_size = 250
for epoch in range(200):
index = np.random.randint(0, N-batch_size+1)
input_splices = feat[index:index+batch_size] #(250, 30, 2000)
#Encoding
input_vector = np.zeros((30, batch_size, 2000), dtype="float32")
h = []
for i in range(frame_rate):
input_vector[i] = input_splices[:, i, :] #(250, 1, 2000)
tmp = Variable(input_vector[i])
h.append(gru_autoencoder(tmp, "encode")) #(250, 100)
#Decoding
output_vector = []
for i in range(frame_rate):
tmp = h[i]
output_vector.append(gru_autoencoder(tmp, "decode"))
x = input_vector[0]
t = output_vector[0]
for i in range(len(output_vector)):
x = F.concat((x,input_vector[i]), axis=1)
t = F.concat((t,output_vector[i]), axis=1)
loss = F.mean_squared_error(x, t)
gru_autoencoder.cleargrads()
loss.backward()
optimizer.update()
gru_autoencoder.reset()
if __name__ == "__main__":
main()
But the result of above code was not reasonable. I think the Chainer code has something wrong but I cannot find where it is.
In Keras code,
model.fit(feat, feat[:, ::-1, :])
So, I tried to reverse output_vector in Chainer code,
output_vector.reverse()
but the result was not still reasonable.
.. note: This answer is a translation of [Japanese SO].(https://ja.stackoverflow.com/questions/52162/keras%E3%81%AE%E3%82%B3%E3%83%BC%E3%83%89%E3%82%92chainer%E3%81%AB%E6%9B%B8%E3%81%8D%E6%8F%9B%E3%81%88%E3%81%9F%E3%81%84lstm-autoencoder%E3%81%AE%E5%AE%9F%E8%A3%85/52213#52213)
You should avoid using L.GRU and should use L.NStepGRU, because for L.GRU you have to write "recurrence-aware" code. In other words, you have to apply L.GRU multiple times to one timeseries, therefore "batch" must be treated with great care. L.NStepGRU (with n_layers=1) wraps the batch-processing, so it would be user-friendly.
An instance of L.StepGRU takes two input arguments: one is initial state, and the other is a list of timeserieses, which composes a batch. Conventionally, the initial state is None.
Therefore, the whole answer for your question is as follows.
### dataset.py
from chainer.dataset import DatasetMixin
import numpy as np
class MyDataset(DatasetMixin):
N_SAMPLES = 269
N_TIMESERIES = 30
N_DIMS = 2000
def __init__(self):
super().__init__()
self.data = np.random.randn(self.N_SAMPLES, self.N_TIMESERIES, self.N_DIMS) \
.astype(np.float32)
def __len__(self):
return self.N_SAMPLES
def get_example(self, i):
return self.data[i, :, :]
### model.py
import chainer
from chainer import links as L
from chainer import functions as F
from chainer.link import Chain
class MyModel(Chain):
N_IN_CHANNEL = 2000
N_HIDDEN_CHANNEL = 100
N_OUT_CHANNEL = 2000
def __init__(self):
super().__init__()
self.encoder = L.NStepGRU(n_layers=1, in_size=self.N_IN_CHANNEL, out_size=self.N_HIDDEN_CHANNEL, dropout=0)
self.decoder = L.NStepGRU(n_layers=1, in_size=self.N_HIDDEN_CHANNEL, out_size=self.N_OUT_CHANNEL, dropout=0)
def to_gpu(self, device=None):
self.encoder.to_gpu(device)
self.decoder.to_gpu(device)
def to_cpu(self):
self.encoder.to_cpu()
self.decoder.to_cpu()
#staticmethod
def flip_list(source_list):
return [F.flip(source, axis=1) for source in source_list]
def __call__(self, source_list):
"""
.. note:
This implementation makes use of "auto-encoding"
by avoiding redundant copy in GPU device.
In the typical implementation, this function should receive
both of ``source_list`` and ``target_list``.
"""
target_list = self.flip_list(source_list)
_, h_list = self.encoder(hx=None, xs=source_list)
_, predicted_list = self.decoder(hx=None, xs=h_list)
diff_list = [F.mean_squared_error(target, predicted).reshape((1,)) for target, predicted in zip(target_list, predicted_list)]
loss = F.sum(F.concat(diff_list, axis=0))
chainer.report({'loss': loss}, self)
return loss
### converter.py (referring examples/seq2seq/seq2seq.py)
from chainer.dataset import to_device
def convert(batch, device):
"""
.. note:
batch must be list(batch_size) of array
"""
if device is None:
return batch
else:
return [to_device(device, x) for x in batch]
### train.py
from chainer.iterators import SerialIterator
from chainer.optimizers import RMSprop
from chainer.training.updaters import StandardUpdater
from chainer.training.trainer import Trainer
dataset = MyDataset()
BATCH_SIZE = 32
iterator = SerialIterator(dataset, BATCH_SIZE)
model = MyModel()
optimizer = RMSprop()
optimizer.setup(model)
updater = StandardUpdater(iterator, optimizer, convert, device=0)
trainer = Trainer(updater, (100, 'iteration'))
from chainer.training.extensions import snapshot_object
trainer.extend(snapshot_object(model, "model_iter_{.updater.iteration}"), trigger=(10, 'iteration'))
from chainer.training.extensions import LogReport, PrintReport, ProgressBar
trainer.extend(LogReport(['epoch', 'iteration', 'main/loss'], (1, 'iteration')))
trainer.extend(PrintReport(['epoch', 'iteration', 'main/loss']), trigger=(1, 'iteration'))
trainer.extend(ProgressBar(update_interval=1))
trainer.run()
I'm trying to solve the 'BipedalWalker-v2' problem from Open AI, by using python and Tensorflow. In order to solve it I'm implementing an episodic policy gradient algorithms. Because the 'BipedalWalker-v2' actions are continuous my policy is approximated by a multivariate Gaussian distribution. The mean of this distribution is approximated using a fully connected neural network. My neural network has the following layers: [input:24,hidden:5,hidden:5,output:4]. My problem is that when I train the agent, the training process gets slower and slower until it almost freeze. My guess is that I'm misusing sess.run, I'm not feeding the batches in an efficient way. But is just a guess. My question is: Is my guess correct? if it is correct, how can I improve it? and if it is something else, what it is? I'm not looking for a literal solution I just want to get some lights about how to improve the training.
Thanks in advance,
my computer is a Inspiron 15 7000 Gaming, GeForce nvidia gtx 1050, 8 gb ram,cpu: I5
My CODE:
Libraries:
import tensorflow as tf
import tensorflow.contrib.slim as slim
import numpy as np
import gym
import matplotlib.pyplot as plt
Agent class:
class agent_episodic_continuous_action():
def __init__(self, lr, s_size,a_size,batch_size,dist_type):
self.stuck = False
self.gamma = 0.99
self.dist_type = dist_type
self.is_brain_present = False
self.s_size = s_size
self.batch_size=batch_size
self.state_in= tf.placeholder(shape=[None,s_size],dtype=tf.float32)
self.a_size=a_size
self.reward_holder = tf.placeholder(shape=[None],dtype=tf.float32)
self.cov = tf.eye(a_size)
self.reduction = 0.01
if a_size > 1:
self.action_holder = tf.placeholder(shape=[None,a_size],dtype=tf.float32)
else:
self.action_holder = tf.placeholder(shape=[None],dtype=tf.float32)
self.gradient_holders = []
self.optimizer = tf.train.AdamOptimizer(learning_rate=lr)
def save_model(self,path,sess):
self.saver.save(sess, path)
def load_model(self,path,sess):
self.saver.restore(sess, path)
def create_brain(self,hidd_layer,hidd_layer_act_fn,output_act_fn):
self.is_brain_present = True
hidden_output=slim.stack(self.state_in,slim.fully_connected,hidd_layer,activation_fn=hidd_layer_act_fn)
self.output = slim.fully_connected(hidden_output,self.a_size,activation_fn=output_act_fn,biases_initializer=None)
def create_pi_dist(self):
if self.dist_type == "normal":
# amplify= tf.pow(slim.fully_connected(self.output,1,activation_fn=None,biases_initializer=None),2)
mean= self.output
#cov =tf.eye(self.a_size,batch_shape=[self.batch_size])*amplify
normal = tf.contrib.distributions.MultivariateNormalFullCovariance(
loc=mean,
covariance_matrix=self.cov*self.reduction)
self.dist = normal
def create_loss(self):
self.loss = -tf.reduce_mean(tf.log(self.dist.prob(self.action_holder))*self.reward_holder)
def get_gradients_holder(self):
for idx,var in enumerate(self.tvars):
placeholder = tf.placeholder(tf.float32,name=str(idx)+'_holder')
self.gradient_holders.append(placeholder)
def sample_action(self,sess,state):
sample_action= sess.run(self.dist.sample(),feed_dict={self.state_in:state})
return sample_action
def calculate_loss_gradient(self):
self.gradients = tf.gradients(self.loss,self.tvars)
def update_weights(self):
self.update_batch = self.optimizer.apply_gradients(zip(self.gradients,self.tvars))
return self.update_batch
def memorize_data(self,episode,first):
if first:
self.episode_history = episode
self.stuck = False
else:
self.episode_history = np.vstack((self.episode_history,episode))
def shuffle_memories(self):
np.random.shuffle(self.episode_history)
def create_graph_connections(self):
if self.is_brain_present:
self.create_pi_dist()
self.create_loss()
self.tvars = tf.trainable_variables()
self.calculate_loss_gradient()
self.saver = tf.train.Saver()
self.update_weights()
else:
print("initialize brain first")
self.init = tf.global_variables_initializer()
def memory_batch_generator(self):
total=self.episode_history.shape[0]
amount_of_batches= int(total/self.batch_size)
for i in range(amount_of_batches+1):
if i < amount_of_batches:
top=(i+1)*self.batch_size
bottom =i*self.batch_size
yield (self.episode_history[bottom:top,0:self.s_size],self.episode_history[bottom:top,self.s_size:self.s_size+self.a_size],self.episode_history[bottom:top,self.s_size+self.a_size:self.s_size+self.a_size+1],self.episode_history[bottom:top,self.s_size+self.a_size+1:])
else:
yield (self.episode_history[top:,0:self.s_size],self.episode_history[top:,self.s_size:self.s_size+self.a_size],self.episode_history[top:,self.s_size+self.a_size:self.s_size+self.a_size+1],self.episode_history[top:,self.s_size+self.a_size+1:])
def train_with_current_memories(self,sess):
self.sess = sess
for step_sample_batch in self.memory_batch_generator():
sess.run(self.update_weights(), feed_dict={self.state_in:step_sample_batch[0],self.action_holder:step_sample_batch[1],self.reward_holder:step_sample_batch[2].reshape([step_sample_batch[2].shape[0]])})
def get_returns(self):
self.episode_history[:,self.s_size+self.a_size:self.s_size+self.a_size+1] = self.discount_rewards(self.episode_history[:,self.s_size+self.a_size:self.s_size+self.a_size+1])
def discount_rewards(self,r):
""" take 1D float array of rewards and compute discounted reward """
discounted_r = np.zeros_like(r)
running_add = 0
for t in reversed(range(0, r.size)):
running_add = running_add * self.gamma + r[t]
discounted_r[t] = running_add
return discounted_r
def prob_action(self,sess,action,state):
prob = sess.run(self.dist.prob(action),feed_dict={self.state_in:state})
return prob
def check_movement(self):
ep_back = 5
jump = 3
threshold = 3
if len(self.episode_history) > ep_back*2:
difference = sum(abs(self.episode_history[-ep_back:-1,:]-self.episode_history[-ep_back-jump:-1-jump,:]).flatten())
print(difference)
if difference < threshold:
self.stuck = True
def print_last_n_returns(self,n):
if len(self.episode_history[:,self.s_size+self.a_size:self.s_size+self.a_size+1])>n:
n_returns = sum(self.episode_history[-n:,self.s_size+self.a_size:self.s_size+self.a_size+1])/float(n)
print(n_returns)
return n_returns
Training loops:
tf.reset_default_graph()
agent_2= agent_episodic_continuous_action(1e-2,s_size=24,a_size=4,batch_size=30,dist_type="normal")
agent_2.create_brain([5,5],tf.nn.relu,None)
agent_2.create_graph_connections()
env = gym.make('BipedalWalker-v2')
with tf.Session() as sess:
sess.run(agent_2.init)
for i in range(200):
s = env.reset()
d = False
a=agent_2.sample_action(sess,[s])[0]
print(a)
if None in a:
print("None in a! inside for")
print(s)
s1,r,d,_ = env.step(a)
episode = np.hstack((s,a,r,s1))
agent_2.memorize_data(episode=episode,first=True)
count = 0
while not d:
count = count + 1
s = s1
a=agent_2.sample_action(sess,[s])[0]
s1,r,d,_ = env.step(a)
episode = np.hstack((s,a,r,s1))
# env.render()
agent_2.memorize_data(episode=episode,first=False)
# print(s1)
if count % 5 == 0 :
agent_2.check_movement()
if agent_2.stuck:
d = True
agent_2.get_returns()
agent_2.print_last_n_returns(20)
agent_2.shuffle_memories()
agent_2.train_with_current_memories(sess)
env.close()
For each batch of 30 samples I execute Agent.update_weights()
def update_weights(self):
self.update_batch = self.optimizer.apply_gradients(zip(self.gradients,self.tvars))
When I execute:
def train_with_current_memories(self,sess):
self.sess = sess
for step_sample_batch in self.memory_batch_generator():
sess.run(self.update_weights(), feed_dict={self.state_in:step_sample_batch[0],self.action_holder:step_sample_batch[1],self.reward_holder:step_sample_batch[2].reshape([step_sample_batch[2].shape[0]])})
Or maybe this sluggishness is an expected behavior.
The code was slowing down after each iteration because the graph was getting bigger at each iteration. This is because I was creating new graph elements inside the iteration loop.
during each iteration the following function was being called:
def update_weights(self):
self.update_batch = self.optimizer.apply_gradients(zip(self.gradients,self.tvars))
return self.update_batch
This function was creating a new element to the graph.
The best way to avoid "graph leaking" is to add the line
sess.graph.finalize()
as soon as you create your session. In this way, if there is a graph leaking, Tensorflow will raise an exception.
I'm trying to use one_vs_one composition of decision trees for multiclass classification. The problem is, when I pass different object weights to a classifier, the result stays the same.
Do I misunderstand something with weights, or do they just work incorrectly?
Thanks for your replies!
Here is my code:
class AdaLearner(object):
def __init__(self, in_base_type, in_multi_type):
self.base_type = in_base_type
self.multi_type = in_multi_type
def train(self, in_features, in_labels):
model = AdaBoost(self.base_type, self.multi_type)
model.learn(in_features, in_labels)
return model
class AdaBoost(object):
CLASSIFIERS_NUM = 100
def __init__(self, in_base_type, in_multi_type):
self.base_type = in_base_type
self.multi_type = in_multi_type
self.classifiers = []
self.weights = []
def learn(self, in_features, in_labels):
labels_number = len(set(in_labels))
self.weights = self.get_initial_weights(in_labels)
for iteration in xrange(AdaBoost.CLASSIFIERS_NUM):
classifier = self.multi_type(self.base_type())
self.classifiers.append(classifier.train(in_features,
in_labels,
weights=self.weights))
answers = []
for obj in in_features:
answers.append(self.classifiers[-1].apply(obj))
err = self.compute_weighted_error(in_labels, answers)
print err
if abs(err - 0.) < 1e-6:
break
alpha = 0.5 * log((1 - err)/err)
self.update_weights(in_labels, answers, alpha)
self.normalize_weights()
def apply(self, in_features):
answers = {}
for classifier in self.classifiers:
answer = classifier.apply(in_features)
if answer in answers:
answers[answer] += 1
else:
answers[answer] = 1
ranked_answers = sorted(answers.iteritems(),
key=lambda (k,v): (v,k),
reverse=True)
return ranked_answers[0][0]
def compute_weighted_error(self, in_labels, in_answers):
error = 0.
w_sum = sum(self.weights)
for ind in xrange(len(in_labels)):
error += (in_answers[ind] != in_labels[ind]) * self.weights[ind] / w_sum
return error
def update_weights(self, in_labels, in_answers, in_alpha):
for ind in xrange(len(in_labels)):
self.weights[ind] *= exp(in_alpha * (in_answers[ind] != in_labels[ind]))
def normalize_weights(self):
w_sum = sum(self.weights)
for ind in xrange(len(self.weights)):
self.weights[ind] /= w_sum
def get_initial_weights(self, in_labels):
weight = 1 / float(len(in_labels))
result = []
for i in xrange(len(in_labels)):
result.append(weight)
return result
As you can see, it is just a simple AdaBoost (I instantiated it with in_base_type = tree_learner, in_multi_type = one_against_one) and it worked the same way no matter how many base classifiers were engaged. It just acted as one multiclass decision tree.
Then I've made a hack. I chose a random sample of objects on the each iteration with respect to their weights and trained classifiers with a random subset of objects without any weights. And that worked as it was supposed to.
The default tree criterion, namely information gain, does not take the weights into account. If you know of a formula which would do it, I'll implement it.
In the meanwhile, using neg_z1_loss will do it correctly. By the way, there was a slight bug in that implementation, so you will need to use the most current github master.