Loss become nan in a Mixture model on an hybrid Pareto distribution - python

my goal is to compute the loss function in this paper
for this loss, we need to compute functions that depend on our estimators
def z_func(xi):
return tf.divide((1+xi)** 2,2 * np.pi)
def gamma_cal(xi):
val=tf.math.divide(lambertw(z_func(xi)),2)
val=1+0.5*(1+tf.math.erf(tf.math.sqrt(val)))
return val
def beta_cal(xi,sigma):
z=z_func(xi)
n= tf.multiply(sigma, tf.math.add(xi,1))
d=tf.math.sqrt(lambertw(z))
return tf.math.divide(n,d)
def alpha_cal(xi,mu,sigma):
a=tf.math.sqrt(lambertw(z_func(xi)))
a = tf.multiply(sigma, a)
a=tf.math.add(a,mu)
return a
and this part is normal and Pareto distribution functions
def pdf_normal(y,mu,sigma,gamma):
value = -tf.subtract(y, mu)**2
value = tf.math.divide(value, 2*sigma**2)
n=tf.math.exp(value)
d=tf.math.multiply(gamma,tf.math.sqrt(2 * np.pi)*sigma)
value = tf.math.divide(n, d)
return tf.where(tf.is_nan(value), tf.zeros_like(value), value)
def pareto_xi_pos(y,xi,alpha,beta,gamma):
value = tf.subtract(y, alpha)
value = tf.math.multiply(xi,value)
value = tf.math.divide(value,beta)
value=tf.math.add(value,1)
expo=tf.math.divide(-1-xi,xi)
n=tf.math.pow(value,expo)
d = tf.math.multiply(gamma,beta)
value= tf.math.divide(n,d)
return tf.where(tf.math.less_equal(y,alpha), tf.zeros_like(value), value)
and this is my loss function that I try to minimize
def calc_pdf(y,mu,sigma,xi,gamma,alpha,beta):
mask = tf.math.less_equal(y,alpha)
mask = tf.cast(mask, tf.keras.backend.floatx())
mask2 = tf.math.greater(y,alpha)
mask2 = tf.cast(mask2, tf.keras.backend.floatx())
normal_mask= tf.multiply(mask,pdf_normal(y,mu,sigma,gamma))
pareto_mask= tf.multiply(mask2,pareto_xi_pos(y,xi,alpha,beta,gamma))
return normal_mask+pareto_mask
def mdn_loss(y_true,y_pred):
mu, sigma,xi, pi = tf.split(y_pred, num_or_size_splits=[m, m,m,m],axis=-1, name='mdn_coef_split')
gamma=gamma_cal(xi)
beta=beta_cal(xi,sigma)
alpha=alpha_cal(xi,mu,sigma)
out = calc_pdf(y_true,mu,sigma,xi,gamma,alpha,beta)
out = tf.multiply(out, pi)
out = tf.reduce_sum(out, 1)
out = -tf.math.log(out + 1e-10)
return tf.reduce_mean(out)
The problem is during the training my loss become an Nan and i dont understand why

Related

Reproducing Hamilton 1989 Markov Switching Model in PyMC3

I am trying to understand of Hamilton's 1989 Markov-Switching Autoregression model. I attempt to reproduce the results with a Bayesian twist. I wrote a number of PyMC3 models using Eric Ma's tutorial about HMM's in PyMC3 and the latest iteration can be found below.
Without autoregression, the model converges to mu values close to Hamilton's (1.16 and -0.36) and realistic transition probabilities.
However, when autoregression is added, the model fails to converge and fit coefficients close to Hamilton's results. The transition probabilities are fit especially poorly.
What am I missing?
# %%
import pymc3 as pm
import theano.tensor as tt
import theano.tensor.slinalg as sla # theano-wrapped scipy linear algebra
import theano.tensor.nlinalg as nla # theano-wrapped numpy linear algebra
import theano
theano.config.gcc.cxxflags = "-Wno-c++11-narrowing"
import pandas as pd
dta = pd.read_stata('https://www.stata-press.com/data/r14/rgnp.dta').iloc[1:]
dta.index = pd.DatetimeIndex(dta.date, freq='QS')
dta_hamilton = dta.rgnp
# Plot the data
dta_hamilton.plot(title='Growth rate of Real GNP', figsize=(12,3))
# %%
fig, ax = plt.subplots(figsize=(12, 4))
#plt.plot(np.round(trace["hmm_states"].mean(axis=0)), label="inferred")
plt.plot(dta_hamilton.values, label="true")
# %%
def solve_equilibrium(n_states, p_transition):
A = tt.dmatrix('A')
A = tt.eye(n_states) - p_transition + tt.ones(shape=(n_states, n_states))
p_equilibrium = pm.Deterministic("p_equilibrium", sla.solve(A.T, tt.ones(shape=(n_states))))
return p_equilibrium
class HMMStates(pm.Categorical):
def __init__(self, p_transition, p_equilibrium, n_states, *args, **kwargs):
"""You can ignore this section for the time being."""
super(pm.Categorical, self).__init__(*args, **kwargs)
self.p_transition = p_transition
self.p_equilibrium = p_equilibrium
# This is needed
self.k = n_states
# This is only needed because discrete distributions must define a mode.
self.mode = tt.cast(0,dtype='int64')
def logp(self, x):
"""Focus your attention here!"""
p_eq = self.p_equilibrium
# Broadcast out the transition probabilities,
# so that we can broadcast the calculation
# of log-likelihoods
p_tr = self.p_transition[x[:-1]]
# the logp of the initial state evaluated against the equilibrium probabilities
initial_state_logp = pm.Categorical.dist(p_eq).logp(x[0])
# the logp of the rest of the states.
x_i = x[1:]
ou_like = pm.Categorical.dist(p_tr).logp(x_i)
transition_logp = tt.sum(ou_like)
return initial_state_logp + transition_logp
# %%
class HamiltonEmissions(pm.Continuous):
def __init__(self, states, phi, sigma, mu, *args, **kwargs):
super().__init__(*args, **kwargs)
self.states = states
self.phi = phi
self.mu = mu
self.sigma = sigma # variance
def logp(self, x):
"""
x: observations
"""
states = self.states
sigma = self.sigma[states]
mu = self.mu[states]
phi = self.phi
z = x - mu # Centered version of x
ar_mean = \
phi[0] * z[0:-4] + \
phi[1] * z[1:-3] + \
phi[2] * z[2:-2] + \
phi[3] * z[3:-1]
ar_like = tt.sum(pm.Normal.dist(mu= ar_mean + mu[4:], sigma=sigma[4:]).logp(x[4:]))
boundary_like = pm.Normal.dist(mu=0, sigma=sigma[:4]).logp(x[:4])
return ar_like + boundary_like
# %%
n_states = 2
with pm.Model() as model:
# Priors for transition matrix
p_transition = pm.Dirichlet("p_transition",
a=tt.ones((n_states, n_states)),
shape=(n_states, n_states))
# Solve for the equilibrium state
p_equilibrium = solve_equilibrium(n_states, p_transition)
# HMM state
hmm_states = HMMStates(
"hmm_states",
p_transition=p_transition,
p_equilibrium=p_equilibrium,
n_states=n_states,
shape=(len(dta_hamilton),)
)
# Prior for mu and sigma
mu = pm.Normal("mu", mu=0, sigma=1, shape=(n_states,))
sigma = pm.Exponential("sigma", lam=2, shape=(n_states,))
phi = pm.Normal("phi", 0, 0.5, shape=(4, ))
# Observed emission likelihood
obs = HamiltonEmissions(
"emission",
states=hmm_states,
mu=mu,
sigma=sigma,
phi=phi,
observed=dta_hamilton
)
# %%
with model:
start = pm.find_MAP()
step1 = pm.Metropolis(vars=[mu, sigma, phi, p_transition, emission])
step2 = pm.BinaryGibbsMetropolis(vars=[hmm_states])
trace = pm.sample(2500, cores=1, chains=2, step=[step1, step2], tune=1500)
# %%
import arviz as az
az.plot_trace(trace, var_names=["p_transition"])
The pymc3-hmm package provides a forward-filter backward-sample sampler. That might work better for your problem.

Using PyMC3 to compute ODE parameter posterior: Bad initial energy error

I am trying to sample the parameter posterior of an ODE's parameters using a Likelihood that has mean equal to the logarithm of those ODE solutions for a particular choice of parameter and initial value. This is based on the tutorial found here. I can replicate the tutorial, but can't make my model work. My model's ODE is:
dQ(t)/dt = (1/K)*(R(t) - Q(t))
where R(t) is based on rainfall data that I input.
I am assigning priors to the noise standard deviation \sigma, the initial value Q(0) and parameter K.
Any help on how to overcome the error would be much appreciated :)
This is my code:
from scipy.integrate import odeint
from scipy.interpolate import interp1d
import numpy as np
import pandas as pd
import theano
from theano import *
import pymc3 as pm
import theano.tensor as tt
THEANO_FLAGS='optimizer=fast_compile'
theano.config.exception_verbosity= 'high'
theano.config.floatX = 'float64'
n_states = 1
n_odeparams = 1
n_ivs = 1
class LinearReservoirModel(object):
def __init__(self, n_states, n_odeparams, n_ivs,net_rainfall_data, y0=None):
self._n_states = n_states
self._n_odeparams = n_odeparams
self._n_ivs = n_ivs
self._y0 = y0
self._nr = net_rainfall_data
def simulate(self, parameters, times):
return self._simulate(parameters, times, self._nr, False)
def simulate_with_sensitivities(self, parameters, times):
return self._simulate(parameters, times, self._nr, True)
def _simulate(self, parameters, times, net_rainfall_data, sensitivities):
k, q0 = [x for x in parameters]
# Interpolate net_rainfall
nr_int = interp1d(times, net_rainfall_data,fill_value="extrapolate",kind='slinear')
def r(q,time,k,nrint):
return (nrint(time) - q) * (1./k)
if sensitivities:
def jac(k):
ret = np.zeros((self._n_states, self._n_states))
ret[0, 0] = (-1./k)
return ret
def dfdp(x,t,k,nrint):
ret = np.zeros((self._n_states,
self._n_odeparams + self._n_ivs))
ret[0, 0] = (-1./(k**2)) * (nrint(t) - x)
return ret
def rhs(q_and_dqdp, t, k, nrint):
q = q_and_dqdp[0:self._n_states]
dqdp = q_and_dqdp[self._n_states:].reshape((self._n_states,
self._n_odeparams + self._n_ivs))
dqdt = r(q, t, k, nrint)
# print('jacobian',jac(q))
# print('dqdp',dqdp)
# print('dfdp',dfdp(q,t,nrint))
d_dqdp_dt = jac(k)*dqdp + dfdp(q,t,k,nrint) # CHANGED CODE HERE np.matmul(jac(q), dqdp) + dfdp(q,t,nrint)
return np.concatenate((dqdt, d_dqdp_dt.reshape(-1)))
y0 = np.zeros( (n_states*(n_odeparams+n_ivs)) + n_states ) # CHANGED CODE HERE 2*
y0[2] = 1. #\frac{\partial [X]}{\partial Xt0} at t==0, and same below for Y
y0[0:n_states] = q0
result = odeint(rhs, y0, times, (k,nr_int),rtol=1e-6,atol=1e-5)
values = result[:, 0:self._n_states]
dvalues_dp = result[:, self._n_states:].reshape((len(times),
self._n_states,
self._n_odeparams + self._n_ivs))
return values, dvalues_dp
else:
q = odeint(r,q0,times,args=(k,nr_int),rtol=1e-6,atol=1e-5)
q_flat = [item for sublist in q for item in sublist]
return q_flat
q = [0.01, 0.084788051,0.289827287,0.487426902,0.623592162,0.855202214,0.901709887,0.87936577,0.857067839,0.775516564,0.701725939,0.675138958,0.68101658,0.64644605,0.701305112,0.747128907,0.676039744,0.668502137,0.731464651,0.766588801]
nr = [1.618666063,0.0001,4.405308823,0.394073731,3.392555321,2.733285785,0.0001,1.31186209,0.0001,0.0001,0.0001,0.83074128,0.646141131,0.0001,2.405660466,0.0001,0.0001,1.174002978,1.481146447,0.73244669]
ode_model = LinearReservoirModel(n_states, n_odeparams, n_ivs, nr)
class ODEGradop(theano.Op):
def __init__(self, numpy_vsp):
self._numpy_vsp = numpy_vsp
def make_node(self, x, g):
x = theano.tensor.as_tensor_variable(x)
g = theano.tensor.as_tensor_variable(g)
node = theano.Apply(self, [x, g], [g.type()])
return node
def perform(self, node, inputs_storage, output_storage):
x = inputs_storage[0]
g = inputs_storage[1]
out = output_storage[0]
out[0] = self._numpy_vsp(x, g) # get the numerical VSP
class ODEop(theano.Op):
def __init__(self, state, numpy_vsp):
self._state = state
self._numpy_vsp = numpy_vsp
def make_node(self, x):
x = theano.tensor.as_tensor_variable(x)
return theano.Apply(self, [x], [x.type()])
def perform(self, node, inputs_storage, output_storage):
x = inputs_storage[0]
out = output_storage[0]
out[0] = self._state(x) # get the numerical solution of ODE states
def grad(self, inputs, output_grads):
x = inputs[0]
g = output_grads[0]
grad_op = ODEGradop(self._numpy_vsp) # pass the VSP when asked for gradient
grad_op_apply = grad_op(x, g)
return [grad_op_apply]
class solveCached(object):
def __init__(self, times, n_params, n_outputs):
self._times = times
self._n_params = n_params
self._n_outputs = n_outputs
self._cachedParam = np.zeros(n_params)
self._cachedSens = np.zeros((len(times), n_outputs, n_params))
self._cachedState = np.zeros((len(times),n_outputs))
def __call__(self, x):
if np.all(x==self._cachedParam):
state, sens = self._cachedState, self._cachedSens
else:
state, sens = ode_model.simulate_with_sensitivities(x, times)
return state, sens
times = np.arange(0, len(q)) # number of measurement points (see below)
cached_solver=solveCached(times, n_odeparams + n_ivs, n_states)
def state(x):
State, Sens = cached_solver(np.array(x,dtype=np.float64))
cached_solver._cachedState, cached_solver._cachedSens, cached_solver._cachedParam = State, Sens, x
return State.reshape((len(State),))
def numpy_vsp(x, g):
numpy_sens = cached_solver(np.array(x,dtype=np.float64))[1].reshape((n_states*len(times),len(x)))
return numpy_sens.T.dot(g)
# Define the data matrix
Q = np.vstack((q))
# Now instantiate the theano custom ODE op
my_ODEop = ODEop(state,numpy_vsp)
# The probabilistic model
with pm.Model() as LR_model:
# Priors for unknown model parameters
k = pm.Uniform('k', lower=0.01, upper=10)
# Priors for initial conditions and noise level
q0 = pm.Lognormal('q0', mu=np.log(1.2), sd=1)
sigma = pm.Lognormal('sigma', mu=-1, sd=1, shape=1)
# Forward model
all_params = pm.math.stack([k,q0],axis=0)
ode_sol = my_ODEop(all_params)
forward = ode_sol.reshape(Q.shape)
# log_forward = pm.math.log(forward)
# log_forward_print = tt.printing.Print('log_forward')(log_forward.shape)
# tt.printing.Print('sigma')(sigma.shape)
# Likelihood
Q_obs = pm.Lognormal('Q_obs', mu=pm.math.log(forward), sd=sigma, observed=Q)
print(LR_model.check_test_point())
# Y_obs_print = tt.printing.Print('Y_obs')(Y_obs)
trace = pm.sample(n_init=1500, tune=1000, chains=1, init='adapt_diag')
trace['diverging'].sum()
If you run the code above you should be able to reproduce the following error:
Traceback (most recent call last):
File "examples/myexample.py", line 195, in <module>
trace = pm.sample(1500, tune=1000, chains=1, init='adapt_diag')
File "/Users/Yannis/.pyenv/versions/mini-project/lib/python3.6/site-packages/pymc3/sampling.py", line 457, in sample
trace = _sample_many(**sample_args)
File "/Users/Yannis/.pyenv/versions/mini-project/lib/python3.6/site-packages/pymc3/sampling.py", line 503, in _sample_many
step=step, random_seed=random_seed[i], **kwargs)
File "/Users/Yannis/.pyenv/versions/mini-project/lib/python3.6/site-packages/pymc3/sampling.py", line 544, in _sample
for it, strace in enumerate(sampling):
File "/Users/Yannis/.pyenv/versions/mini-project/lib/python3.6/site-packages/tqdm/std.py", line 1091, in __iter__
for obj in iterable:
File "/Users/Yannis/.pyenv/versions/mini-project/lib/python3.6/site-packages/pymc3/sampling.py", line 633, in _iter_sample
point, states = step.step(point)
File "/Users/Yannis/.pyenv/versions/mini-project/lib/python3.6/site-packages/pymc3/step_methods/arraystep.py", line 247, in step
apoint, stats = self.astep(array)
File "/Users/Yannis/.pyenv/versions/mini-project/lib/python3.6/site-packages/pymc3/step_methods/hmc/base_hmc.py", line 144, in astep
raise SamplingError("Bad initial energy")
pymc3.exceptions.SamplingError: Bad initial energy
PyMC3 Version: 3.7
Theano Version: 1.0.4
Python Version: 3.6.5
Operating system: macOS Catalina (v10.15.1)
How did you install PyMC3: pip (managed in a pyenv virtualenv)

Soft actor critic with discrete action space

I'm trying to implement the soft actor critic algorithm for discrete action space and I have trouble with the loss function.
Here is the link from SAC with continuous action space:
https://spinningup.openai.com/en/latest/algorithms/sac.html
I do not know what I'm doing wrong.
The problem is the network do not learn anything on the cartpole environment.
The full code on github: https://github.com/tk2232/sac_discrete/blob/master/sac_discrete.py
Here is my idea how to calculate the loss for discrete actions.
Value Network
class ValueNet:
def __init__(self, sess, state_size, hidden_dim, name):
self.sess = sess
with tf.variable_scope(name):
self.states = tf.placeholder(dtype=tf.float32, shape=[None, state_size], name='value_states')
self.targets = tf.placeholder(dtype=tf.float32, shape=[None, 1], name='value_targets')
x = Dense(units=hidden_dim, activation='relu')(self.states)
x = Dense(units=hidden_dim, activation='relu')(x)
self.values = Dense(units=1, activation=None)(x)
optimizer = tf.train.AdamOptimizer(0.001)
loss = 0.5 * tf.reduce_mean((self.values - tf.stop_gradient(self.targets)) ** 2)
self.train_op = optimizer.minimize(loss, var_list=_params(name))
def get_value(self, s):
return self.sess.run(self.values, feed_dict={self.states: s})
def update(self, s, targets):
self.sess.run(self.train_op, feed_dict={self.states: s, self.targets: targets})
In the Q_Network I'm gather the values with the collected actions
Example
q_out = [[0.5533, 0.4444], [0.2222, 0.6666]]
collected_actions = [0, 1]
gather = [[0.5533], [0.6666]]
gather function
def gather_tensor(params, idx):
idx = tf.stack([tf.range(tf.shape(idx)[0]), idx[:, 0]], axis=-1)
params = tf.gather_nd(params, idx)
return params
Q Network
class SoftQNetwork:
def __init__(self, sess, state_size, action_size, hidden_dim, name):
self.sess = sess
with tf.variable_scope(name):
self.states = tf.placeholder(dtype=tf.float32, shape=[None, state_size], name='q_states')
self.targets = tf.placeholder(dtype=tf.float32, shape=[None, 1], name='q_targets')
self.actions = tf.placeholder(dtype=tf.int32, shape=[None, 1], name='q_actions')
x = Dense(units=hidden_dim, activation='relu')(self.states)
x = Dense(units=hidden_dim, activation='relu')(x)
x = Dense(units=action_size, activation=None)(x)
self.q = tf.reshape(gather_tensor(x, self.actions), shape=(-1, 1))
optimizer = tf.train.AdamOptimizer(0.001)
loss = 0.5 * tf.reduce_mean((self.q - tf.stop_gradient(self.targets)) ** 2)
self.train_op = optimizer.minimize(loss, var_list=_params(name))
def update(self, s, a, target):
self.sess.run(self.train_op, feed_dict={self.states: s, self.actions: a, self.targets: target})
def get_q(self, s, a):
return self.sess.run(self.q, feed_dict={self.states: s, self.actions: a})
Policy Net
class PolicyNet:
def __init__(self, sess, state_size, action_size, hidden_dim):
self.sess = sess
with tf.variable_scope('policy_net'):
self.states = tf.placeholder(dtype=tf.float32, shape=[None, state_size], name='policy_states')
self.targets = tf.placeholder(dtype=tf.float32, shape=[None, 1], name='policy_targets')
self.actions = tf.placeholder(dtype=tf.int32, shape=[None, 1], name='policy_actions')
x = Dense(units=hidden_dim, activation='relu')(self.states)
x = Dense(units=hidden_dim, activation='relu')(x)
self.logits = Dense(units=action_size, activation=None)(x)
dist = Categorical(logits=self.logits)
optimizer = tf.train.AdamOptimizer(0.001)
# Get action
self.new_action = dist.sample()
self.new_log_prob = dist.log_prob(self.new_action)
# Calc loss
log_prob = dist.log_prob(tf.squeeze(self.actions))
loss = tf.reduce_mean(tf.squeeze(self.targets) - 0.2 * log_prob)
self.train_op = optimizer.minimize(loss, var_list=_params('policy_net'))
def get_action(self, s):
action = self.sess.run(self.new_action, feed_dict={self.states: s[np.newaxis, :]})
return action[0]
def get_next_action(self, s):
next_action, next_log_prob = self.sess.run([self.new_action, self.new_log_prob], feed_dict={self.states: s})
return next_action.reshape((-1, 1)), next_log_prob.reshape((-1, 1))
def update(self, s, a, target):
self.sess.run(self.train_op, feed_dict={self.states: s, self.actions: a, self.targets: target})
Update function
def soft_q_update(batch_size, frame_idx):
gamma = 0.99
alpha = 0.2
state, action, reward, next_state, done = replay_buffer.sample(batch_size)
action = action.reshape((-1, 1))
reward = reward.reshape((-1, 1))
done = done.reshape((-1, 1))
Q_target
v_ = value_net_target.get_value(next_state)
q_target = reward + (1 - done) * gamma * v_
V_target
next_action, next_log_prob = policy_net.get_next_action(state)
q1 = soft_q_net_1.get_q(state, next_action)
q2 = soft_q_net_2.get_q(state, next_action)
q = np.minimum(q1, q2)
v_target = q - alpha * next_log_prob
Policy_target
q1 = soft_q_net_1.get_q(state, action)
q2 = soft_q_net_2.get_q(state, action)
policy_target = np.minimum(q1, q2)
Since the algorithm is generic to both discrete and continuous policy, the key idea is that we need a discrete distribution that is reparametrizable. Then the extension should involve minimal code modification from the continuous SAC --- by just changing the policy distribution class.
There is one such distribution — the GumbelSoftmax distribution. PyTorch does not have this built-in, so I simply extend it from a close cousin which has the right rsample() and add a correct log prob calculation method. With the ability to calculate a reparametrized action and its log prob, SAC works beautifully for discrete actions with minimal extra code, as seen below.
def calc_log_prob_action(self, action_pd, reparam=False):
'''Calculate log_probs and actions with option to reparametrize from paper eq. 11'''
samples = action_pd.rsample() if reparam else action_pd.sample()
if self.body.is_discrete: # this is straightforward using GumbelSoftmax
actions = samples
log_probs = action_pd.log_prob(actions)
else:
mus = samples
actions = self.scale_action(torch.tanh(mus))
# paper Appendix C. Enforcing Action Bounds for continuous actions
log_probs = (action_pd.log_prob(mus) - torch.log(1 - actions.pow(2) + 1e-6).sum(1))
return log_probs, actions
# ... for discrete action, GumbelSoftmax distribution
class GumbelSoftmax(distributions.RelaxedOneHotCategorical):
'''
A differentiable Categorical distribution using reparametrization trick with Gumbel-Softmax
Explanation http://amid.fish/assets/gumbel.html
NOTE: use this in place PyTorch's RelaxedOneHotCategorical distribution since its log_prob is not working right (returns positive values)
Papers:
[1] The Concrete Distribution: A Continuous Relaxation of Discrete Random Variables (Maddison et al, 2017)
[2] Categorical Reparametrization with Gumbel-Softmax (Jang et al, 2017)
'''
def sample(self, sample_shape=torch.Size()):
'''Gumbel-softmax sampling. Note rsample is inherited from RelaxedOneHotCategorical'''
u = torch.empty(self.logits.size(), device=self.logits.device, dtype=self.logits.dtype).uniform_(0, 1)
noisy_logits = self.logits - torch.log(-torch.log(u))
return torch.argmax(noisy_logits, dim=-1)
def log_prob(self, value):
'''value is one-hot or relaxed'''
if value.shape != self.logits.shape:
value = F.one_hot(value.long(), self.logits.shape[-1]).float()
assert value.shape == self.logits.shape
return - torch.sum(- value * F.log_softmax(self.logits, -1), -1)
And here's the LunarLander results. SAC learns to solve it really fast.
The full implementation code is in SLM Lab at https://github.com/kengz/SLM-Lab/blob/master/slm_lab/agent/algorithm/sac.py
The SAC benchmark results on Roboschool (continuous) and LunarLander (discrete) are shown here: https://github.com/kengz/SLM-Lab/pull/399
There is a paper about SAC with discrete action spaces. It says SAC for discrete action spaces doesn't need re-parametrization tricks like Gumbel softmax. Instead, SAC needs some modifications. please refer to the paper for more details.
Paper /
Author's implementation (without codes for atari) /
Reproduction (with codes for atari)
I hope it helps you.
Probably this repo may be helpful. Description says, that repo contains an implementation of SAC for discrete action space on PyTorch. There is file with SAC algorithm for continuous action space and file with SAC adapted for discrete action space.
Pytorch 1.8 has RelaxedOneHotCategorical, this supports re-parameterized sampling using gumbel softmax.
import torch
import torch.nn as nn
from torch.distributions import RelaxedOneHotCategorical
class Policy(nn.Module):
def __init__(self, input_dims, hidden_dims, actions):
super().__init__()
self.mlp = nn.Sequential(nn.Linear(input_dims, hidden_dims), nn.SELU(inplace=True),
nn.Linear(hidden_dims, hidden_dims), nn.SELU(inplace=True),
nn.Linear(hidden_dims, out_dims))
def forward(self, state):
logits = torch.log_softmax(self.mlp(state), dim=-1)
return RelaxedOneHotCategorical(logits=logits, temperature=torch.ones(1) * 1.0)
>>> policy = Policy(4, 16, 2)
>>> a_dist = policy(torch.randn(8, 4))
>>> a_dist.rsample()
tensor([[0.0353, 0.9647],
[0.1348, 0.8652],
[0.1110, 0.8890],
[0.4956, 0.5044],
[0.6941, 0.3059],
[0.6126, 0.3874],
[0.2932, 0.7068],
[0.0498, 0.9502]], grad_fn=<ExpBackward>)

Precompute tensorflow tensors for GPflow model

I want to include a Sparse Gaussian Process model (from GPflow library) into another project. The problem is that I can't call the prediction function for several inputs once, but I have to call it sequentially. I've checked the predictive function predict_F in SGPR class (https://github.com/GPflow/GPflow/blob/master/gpflow/models/sgpr.py) and found that I could precompute a lot of things in advance. Thus, I made a child class of SGPR and wrote a method precompute, modified predictive function:
#params_as_tensors
def precompute(self):
p_num_inducing = len(self.feature)
p_err = self.Y - self.mean_function(self.X)
p_Kuf = self.feature.Kuf(self.kern, self.X)
p_Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level)
p_sigma = tf.sqrt(self.likelihood.variance)
self.p_L = tf.cholesky(p_Kuu)
p_A = tf.matrix_triangular_solve(self.p_L, p_Kuf, lower=True) / p_sigma
p_B = tf.matmul(p_A, p_A, transpose_b=True) + tf.eye(p_num_inducing, dtype=settings.tf_float)
self.p_LB = tf.cholesky(p_B)
p_Aerr = tf.matmul(p_A, p_err)
self.p_c = tf.matrix_triangular_solve(self.p_LB, p_Aerr, lower=True) / p_sigma
#params_as_tensors
def _build_predict(self, Xnew, full_cov=False):
"""
Compute the mean and variance of the latent function at some new points
Xnew. For a derivation of the terms in here, see the associated SGPR
notebook.
"""
Kus = self.feature.Kuf(self.kern, Xnew)
tmp1 = tf.matrix_triangular_solve(self.p_L, Kus, lower=True)
tmp2 = tf.matrix_triangular_solve(self.p_LB, tmp1, lower=True)
mean = tf.matmul(tmp2, self.p_c, transpose_a=True)
if full_cov:
var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \
- tf.matmul(tmp1, tmp1, transpose_a=True)
shape = tf.stack([1, 1, tf.shape(self.Y)[1]])
var = tf.tile(tf.expand_dims(var, 2), shape)
else:
var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \
- tf.reduce_sum(tf.square(tmp1), 0)
shape = tf.stack([1, tf.shape(self.Y)[1]])
var = tf.tile(tf.expand_dims(var, 1), shape)
return mean + self.mean_function(Xnew), var
But when I run the code, there is no difference in speed. I suppose that tensorflow executes all the expressions only when I call predict_f, but I have no idea how to explicitly precompute some tensors. Hope tensorflow gurus can help me, thanks in advance!
I've found a silly but straightforward solution to this problem. Here is a wrapper for the SGPR class, that precomputes some common matrices and then uses them for predictions.
from gpflow.models import GPModel, SGPR
from gpflow.decors import params_as_tensors, autoflow
from gpflow import settings
from gpflow.params import Parameter, DataHolder
import tensorflow as tf
class fastSGPR(SGPR, GPModel):
def __init__(self,X_tr, Y_tr, kernel, Zp):
gpflow.models.SGPR.__init__(self,X_tr, Y_tr, kern=kernel, Z=Zp)
print("Model has been initialized")
#autoflow()
#params_as_tensors
def precompute(self):
print("Precomputing required tensors...")
p_num_inducing = len(self.feature)
p_err = self.Y - self.mean_function(self.X)
p_Kuf = self.feature.Kuf(self.kern, self.X)
p_Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level)
p_sigma = tf.sqrt(self.likelihood.variance)
self.p_L = tf.cholesky(p_Kuu)
p_A = tf.matrix_triangular_solve(self.p_L, p_Kuf, lower=True) / p_sigma
p_B = tf.matmul(p_A, p_A, transpose_b=True) + tf.eye(p_num_inducing, dtype=settings.tf_float)
self.p_LB = tf.cholesky(p_B)
p_Aerr = tf.matmul(p_A, p_err)
self.p_c = tf.matrix_triangular_solve(self.p_LB, p_Aerr, lower=True) / p_sigma
print("Tensors have been precomputed")
return self.p_L, self.p_LB, self.p_c
#autoflow((settings.float_type, [None, None]), (settings.float_type, [None, None]), (settings.float_type, [None, None]), (settings.float_type, [None, None]))
def predict_f(self, Xnew, L, LB, c):
"""
Compute the mean and variance of the latent function(s) at the points
Xnew.
"""
return self._build_predict(Xnew, L, LB, c)
#params_as_tensors
def _build_predict(self, Xnew, L, LB, c, full_cov=False):
"""
Compute the mean and variance of the latent function at some new points
Xnew. For a derivation of the terms in here, see the associated SGPR
notebook.
"""
Kus = self.feature.Kuf(self.kern, Xnew)
tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True)
tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True)
mean = tf.matmul(tmp2, c, transpose_a=True)
if full_cov:
var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \
- tf.matmul(tmp1, tmp1, transpose_a=True)
shape = tf.stack([1, 1, tf.shape(self.Y)[1]])
var = tf.tile(tf.expand_dims(var, 2), shape)
else:
var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \
- tf.reduce_sum(tf.square(tmp1), 0)
shape = tf.stack([1, tf.shape(self.Y)[1]])
var = tf.tile(tf.expand_dims(var, 1), shape)
return mean + self.mean_function(Xnew), var
def assign(self, params):
params1 = dict(params)
params1["fastSGPR/kern/variance"] = params1.pop("SGPR/kern/variance")
params1["fastSGPR/kern/lengthscales"] = params1.pop("SGPR/kern/lengthscales")
params1["fastSGPR/likelihood/variance"] = params1.pop("SGPR/likelihood/variance")
params1["fastSGPR/feature/Z"] = params1.pop("SGPR/feature/Z")
SGPR.assign(self,params1)

Python milk library: object weights issue

I'm trying to use one_vs_one composition of decision trees for multiclass classification. The problem is, when I pass different object weights to a classifier, the result stays the same.
Do I misunderstand something with weights, or do they just work incorrectly?
Thanks for your replies!
Here is my code:
class AdaLearner(object):
def __init__(self, in_base_type, in_multi_type):
self.base_type = in_base_type
self.multi_type = in_multi_type
def train(self, in_features, in_labels):
model = AdaBoost(self.base_type, self.multi_type)
model.learn(in_features, in_labels)
return model
class AdaBoost(object):
CLASSIFIERS_NUM = 100
def __init__(self, in_base_type, in_multi_type):
self.base_type = in_base_type
self.multi_type = in_multi_type
self.classifiers = []
self.weights = []
def learn(self, in_features, in_labels):
labels_number = len(set(in_labels))
self.weights = self.get_initial_weights(in_labels)
for iteration in xrange(AdaBoost.CLASSIFIERS_NUM):
classifier = self.multi_type(self.base_type())
self.classifiers.append(classifier.train(in_features,
in_labels,
weights=self.weights))
answers = []
for obj in in_features:
answers.append(self.classifiers[-1].apply(obj))
err = self.compute_weighted_error(in_labels, answers)
print err
if abs(err - 0.) < 1e-6:
break
alpha = 0.5 * log((1 - err)/err)
self.update_weights(in_labels, answers, alpha)
self.normalize_weights()
def apply(self, in_features):
answers = {}
for classifier in self.classifiers:
answer = classifier.apply(in_features)
if answer in answers:
answers[answer] += 1
else:
answers[answer] = 1
ranked_answers = sorted(answers.iteritems(),
key=lambda (k,v): (v,k),
reverse=True)
return ranked_answers[0][0]
def compute_weighted_error(self, in_labels, in_answers):
error = 0.
w_sum = sum(self.weights)
for ind in xrange(len(in_labels)):
error += (in_answers[ind] != in_labels[ind]) * self.weights[ind] / w_sum
return error
def update_weights(self, in_labels, in_answers, in_alpha):
for ind in xrange(len(in_labels)):
self.weights[ind] *= exp(in_alpha * (in_answers[ind] != in_labels[ind]))
def normalize_weights(self):
w_sum = sum(self.weights)
for ind in xrange(len(self.weights)):
self.weights[ind] /= w_sum
def get_initial_weights(self, in_labels):
weight = 1 / float(len(in_labels))
result = []
for i in xrange(len(in_labels)):
result.append(weight)
return result
As you can see, it is just a simple AdaBoost (I instantiated it with in_base_type = tree_learner, in_multi_type = one_against_one) and it worked the same way no matter how many base classifiers were engaged. It just acted as one multiclass decision tree.
Then I've made a hack. I chose a random sample of objects on the each iteration with respect to their weights and trained classifiers with a random subset of objects without any weights. And that worked as it was supposed to.
The default tree criterion, namely information gain, does not take the weights into account. If you know of a formula which would do it, I'll implement it.
In the meanwhile, using neg_z1_loss will do it correctly. By the way, there was a slight bug in that implementation, so you will need to use the most current github master.

Categories

Resources