Precompute tensorflow tensors for GPflow model

Precompute tensorflow tensors for GPflow model - python

I want to include a Sparse Gaussian Process model (from GPflow library) into another project. The problem is that I can't call the prediction function for several inputs once, but I have to call it sequentially. I've checked the predictive function predict_F in SGPR class (https://github.com/GPflow/GPflow/blob/master/gpflow/models/sgpr.py) and found that I could precompute a lot of things in advance. Thus, I made a child class of SGPR and wrote a method precompute, modified predictive function:
#params_as_tensors
def precompute(self):
p_num_inducing = len(self.feature)
p_err = self.Y - self.mean_function(self.X)
p_Kuf = self.feature.Kuf(self.kern, self.X)
p_Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level)
p_sigma = tf.sqrt(self.likelihood.variance)
self.p_L = tf.cholesky(p_Kuu)
p_A = tf.matrix_triangular_solve(self.p_L, p_Kuf, lower=True) / p_sigma
p_B = tf.matmul(p_A, p_A, transpose_b=True) + tf.eye(p_num_inducing, dtype=settings.tf_float)
self.p_LB = tf.cholesky(p_B)
p_Aerr = tf.matmul(p_A, p_err)
self.p_c = tf.matrix_triangular_solve(self.p_LB, p_Aerr, lower=True) / p_sigma
#params_as_tensors
def _build_predict(self, Xnew, full_cov=False):
"""
Compute the mean and variance of the latent function at some new points
Xnew. For a derivation of the terms in here, see the associated SGPR
notebook.
"""
Kus = self.feature.Kuf(self.kern, Xnew)
tmp1 = tf.matrix_triangular_solve(self.p_L, Kus, lower=True)
tmp2 = tf.matrix_triangular_solve(self.p_LB, tmp1, lower=True)
mean = tf.matmul(tmp2, self.p_c, transpose_a=True)
if full_cov:
var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \
- tf.matmul(tmp1, tmp1, transpose_a=True)
shape = tf.stack([1, 1, tf.shape(self.Y)[1]])
var = tf.tile(tf.expand_dims(var, 2), shape)
else:
var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \
- tf.reduce_sum(tf.square(tmp1), 0)
shape = tf.stack([1, tf.shape(self.Y)[1]])
var = tf.tile(tf.expand_dims(var, 1), shape)
return mean + self.mean_function(Xnew), var
But when I run the code, there is no difference in speed. I suppose that tensorflow executes all the expressions only when I call predict_f, but I have no idea how to explicitly precompute some tensors. Hope tensorflow gurus can help me, thanks in advance!

I've found a silly but straightforward solution to this problem. Here is a wrapper for the SGPR class, that precomputes some common matrices and then uses them for predictions.
from gpflow.models import GPModel, SGPR
from gpflow.decors import params_as_tensors, autoflow
from gpflow import settings
from gpflow.params import Parameter, DataHolder
import tensorflow as tf
class fastSGPR(SGPR, GPModel):
def __init__(self,X_tr, Y_tr, kernel, Zp):
gpflow.models.SGPR.__init__(self,X_tr, Y_tr, kern=kernel, Z=Zp)
print("Model has been initialized")
#autoflow()
#params_as_tensors
def precompute(self):
print("Precomputing required tensors...")
p_num_inducing = len(self.feature)
p_err = self.Y - self.mean_function(self.X)
p_Kuf = self.feature.Kuf(self.kern, self.X)
p_Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level)
p_sigma = tf.sqrt(self.likelihood.variance)
self.p_L = tf.cholesky(p_Kuu)
p_A = tf.matrix_triangular_solve(self.p_L, p_Kuf, lower=True) / p_sigma
p_B = tf.matmul(p_A, p_A, transpose_b=True) + tf.eye(p_num_inducing, dtype=settings.tf_float)
self.p_LB = tf.cholesky(p_B)
p_Aerr = tf.matmul(p_A, p_err)
self.p_c = tf.matrix_triangular_solve(self.p_LB, p_Aerr, lower=True) / p_sigma
print("Tensors have been precomputed")
return self.p_L, self.p_LB, self.p_c
#autoflow((settings.float_type, [None, None]), (settings.float_type, [None, None]), (settings.float_type, [None, None]), (settings.float_type, [None, None]))
def predict_f(self, Xnew, L, LB, c):
"""
Compute the mean and variance of the latent function(s) at the points
Xnew.
"""
return self._build_predict(Xnew, L, LB, c)
#params_as_tensors
def _build_predict(self, Xnew, L, LB, c, full_cov=False):
"""
Compute the mean and variance of the latent function at some new points
Xnew. For a derivation of the terms in here, see the associated SGPR
notebook.
"""
Kus = self.feature.Kuf(self.kern, Xnew)
tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True)
tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True)
mean = tf.matmul(tmp2, c, transpose_a=True)
if full_cov:
var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \
- tf.matmul(tmp1, tmp1, transpose_a=True)
shape = tf.stack([1, 1, tf.shape(self.Y)[1]])
var = tf.tile(tf.expand_dims(var, 2), shape)
else:
var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \
- tf.reduce_sum(tf.square(tmp1), 0)
shape = tf.stack([1, tf.shape(self.Y)[1]])
var = tf.tile(tf.expand_dims(var, 1), shape)
return mean + self.mean_function(Xnew), var
def assign(self, params):
params1 = dict(params)
params1["fastSGPR/kern/variance"] = params1.pop("SGPR/kern/variance")
params1["fastSGPR/kern/lengthscales"] = params1.pop("SGPR/kern/lengthscales")
params1["fastSGPR/likelihood/variance"] = params1.pop("SGPR/likelihood/variance")
params1["fastSGPR/feature/Z"] = params1.pop("SGPR/feature/Z")
SGPR.assign(self,params1)

Related

Reproducing Hamilton 1989 Markov Switching Model in PyMC3

I am trying to understand of Hamilton's 1989 Markov-Switching Autoregression model. I attempt to reproduce the results with a Bayesian twist. I wrote a number of PyMC3 models using Eric Ma's tutorial about HMM's in PyMC3 and the latest iteration can be found below.
Without autoregression, the model converges to mu values close to Hamilton's (1.16 and -0.36) and realistic transition probabilities.
However, when autoregression is added, the model fails to converge and fit coefficients close to Hamilton's results. The transition probabilities are fit especially poorly.
What am I missing?
# %%
import pymc3 as pm
import theano.tensor as tt
import theano.tensor.slinalg as sla # theano-wrapped scipy linear algebra
import theano.tensor.nlinalg as nla # theano-wrapped numpy linear algebra
import theano
theano.config.gcc.cxxflags = "-Wno-c++11-narrowing"
import pandas as pd
dta = pd.read_stata('https://www.stata-press.com/data/r14/rgnp.dta').iloc[1:]
dta.index = pd.DatetimeIndex(dta.date, freq='QS')
dta_hamilton = dta.rgnp
# Plot the data
dta_hamilton.plot(title='Growth rate of Real GNP', figsize=(12,3))
# %%
fig, ax = plt.subplots(figsize=(12, 4))
#plt.plot(np.round(trace["hmm_states"].mean(axis=0)), label="inferred")
plt.plot(dta_hamilton.values, label="true")
# %%
def solve_equilibrium(n_states, p_transition):
A = tt.dmatrix('A')
A = tt.eye(n_states) - p_transition + tt.ones(shape=(n_states, n_states))
p_equilibrium = pm.Deterministic("p_equilibrium", sla.solve(A.T, tt.ones(shape=(n_states))))
return p_equilibrium
class HMMStates(pm.Categorical):
def __init__(self, p_transition, p_equilibrium, n_states, *args, **kwargs):
"""You can ignore this section for the time being."""
super(pm.Categorical, self).__init__(*args, **kwargs)
self.p_transition = p_transition
self.p_equilibrium = p_equilibrium
# This is needed
self.k = n_states
# This is only needed because discrete distributions must define a mode.
self.mode = tt.cast(0,dtype='int64')
def logp(self, x):
"""Focus your attention here!"""
p_eq = self.p_equilibrium
# Broadcast out the transition probabilities,
# so that we can broadcast the calculation
# of log-likelihoods
p_tr = self.p_transition[x[:-1]]
# the logp of the initial state evaluated against the equilibrium probabilities
initial_state_logp = pm.Categorical.dist(p_eq).logp(x[0])
# the logp of the rest of the states.
x_i = x[1:]
ou_like = pm.Categorical.dist(p_tr).logp(x_i)
transition_logp = tt.sum(ou_like)
return initial_state_logp + transition_logp
# %%
class HamiltonEmissions(pm.Continuous):
def __init__(self, states, phi, sigma, mu, *args, **kwargs):
super().__init__(*args, **kwargs)
self.states = states
self.phi = phi
self.mu = mu
self.sigma = sigma # variance
def logp(self, x):
"""
x: observations
"""
states = self.states
sigma = self.sigma[states]
mu = self.mu[states]
phi = self.phi
z = x - mu # Centered version of x
ar_mean = \
phi[0] * z[0:-4] + \
phi[1] * z[1:-3] + \
phi[2] * z[2:-2] + \
phi[3] * z[3:-1]
ar_like = tt.sum(pm.Normal.dist(mu= ar_mean + mu[4:], sigma=sigma[4:]).logp(x[4:]))
boundary_like = pm.Normal.dist(mu=0, sigma=sigma[:4]).logp(x[:4])
return ar_like + boundary_like
# %%
n_states = 2
with pm.Model() as model:
# Priors for transition matrix
p_transition = pm.Dirichlet("p_transition",
a=tt.ones((n_states, n_states)),
shape=(n_states, n_states))
# Solve for the equilibrium state
p_equilibrium = solve_equilibrium(n_states, p_transition)
# HMM state
hmm_states = HMMStates(
"hmm_states",
p_transition=p_transition,
p_equilibrium=p_equilibrium,
n_states=n_states,
shape=(len(dta_hamilton),)
)
# Prior for mu and sigma
mu = pm.Normal("mu", mu=0, sigma=1, shape=(n_states,))
sigma = pm.Exponential("sigma", lam=2, shape=(n_states,))
phi = pm.Normal("phi", 0, 0.5, shape=(4, ))
# Observed emission likelihood
obs = HamiltonEmissions(
"emission",
states=hmm_states,
mu=mu,
sigma=sigma,
phi=phi,
observed=dta_hamilton
)
# %%
with model:
start = pm.find_MAP()
step1 = pm.Metropolis(vars=[mu, sigma, phi, p_transition, emission])
step2 = pm.BinaryGibbsMetropolis(vars=[hmm_states])
trace = pm.sample(2500, cores=1, chains=2, step=[step1, step2], tune=1500)
# %%
import arviz as az
az.plot_trace(trace, var_names=["p_transition"])

The pymc3-hmm package provides a forward-filter backward-sample sampler. That might work better for your problem.

How to add print OP in TensorFlow layer(GRU)?

I add print OP in GRU source code, and want to debug the input of GRU , and also want to debug with some operation inside GRU, But this print nothing.
Dose tf.print don't work inside this source code of GRU.
I hope someone can give me some suggesstion.
Thank you very much!
def call(self, inputs, state):
"""Gated recurrent unit (GRU) with nunits cells."""
import tensorflow as tf
print_GRU = tf.print(inputs) #<<<<<<<<<<<<<<<<<< add print OP HERE
with tf.control_dependencies([print_GRU]):
gate_inputs = math_ops.matmul(
array_ops.concat([inputs, state], 1), self._gate_kernel)
# gate_inputs = math_ops.matmul(
# array_ops.concat([inputs, state], 1), self._gate_kernel)
gate_inputs = nn_ops.bias_add(gate_inputs, self._gate_bias)
value = math_ops.sigmoid(gate_inputs)
r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)
r_state = r * state
candidate = math_ops.matmul(
array_ops.concat([inputs, r_state], 1), self._candidate_kernel)
candidate = nn_ops.bias_add(candidate, self._candidate_bias)
c = self._activation(candidate)
new_h = u * state + (1 - u) * c
return new_h, new_h

Inside call, use this line:
tf.py_function(func=tf.print, inp=[inputs], Tout=[])

Loss become nan in a Mixture model on an hybrid Pareto distribution

my goal is to compute the loss function in this paper
for this loss, we need to compute functions that depend on our estimators
def z_func(xi):
return tf.divide((1+xi)** 2,2 * np.pi)
def gamma_cal(xi):
val=tf.math.divide(lambertw(z_func(xi)),2)
val=1+0.5*(1+tf.math.erf(tf.math.sqrt(val)))
return val
def beta_cal(xi,sigma):
z=z_func(xi)
n= tf.multiply(sigma, tf.math.add(xi,1))
d=tf.math.sqrt(lambertw(z))
return tf.math.divide(n,d)
def alpha_cal(xi,mu,sigma):
a=tf.math.sqrt(lambertw(z_func(xi)))
a = tf.multiply(sigma, a)
a=tf.math.add(a,mu)
return a
and this part is normal and Pareto distribution functions
def pdf_normal(y,mu,sigma,gamma):
value = -tf.subtract(y, mu)**2
value = tf.math.divide(value, 2*sigma**2)
n=tf.math.exp(value)
d=tf.math.multiply(gamma,tf.math.sqrt(2 * np.pi)*sigma)
value = tf.math.divide(n, d)
return tf.where(tf.is_nan(value), tf.zeros_like(value), value)
def pareto_xi_pos(y,xi,alpha,beta,gamma):
value = tf.subtract(y, alpha)
value = tf.math.multiply(xi,value)
value = tf.math.divide(value,beta)
value=tf.math.add(value,1)
expo=tf.math.divide(-1-xi,xi)
n=tf.math.pow(value,expo)
d = tf.math.multiply(gamma,beta)
value= tf.math.divide(n,d)
return tf.where(tf.math.less_equal(y,alpha), tf.zeros_like(value), value)
and this is my loss function that I try to minimize
def calc_pdf(y,mu,sigma,xi,gamma,alpha,beta):
mask = tf.math.less_equal(y,alpha)
mask = tf.cast(mask, tf.keras.backend.floatx())
mask2 = tf.math.greater(y,alpha)
mask2 = tf.cast(mask2, tf.keras.backend.floatx())
normal_mask= tf.multiply(mask,pdf_normal(y,mu,sigma,gamma))
pareto_mask= tf.multiply(mask2,pareto_xi_pos(y,xi,alpha,beta,gamma))
return normal_mask+pareto_mask
def mdn_loss(y_true,y_pred):
mu, sigma,xi, pi = tf.split(y_pred, num_or_size_splits=[m, m,m,m],axis=-1, name='mdn_coef_split')
gamma=gamma_cal(xi)
beta=beta_cal(xi,sigma)
alpha=alpha_cal(xi,mu,sigma)
out = calc_pdf(y_true,mu,sigma,xi,gamma,alpha,beta)
out = tf.multiply(out, pi)
out = tf.reduce_sum(out, 1)
out = -tf.math.log(out + 1e-10)
return tf.reduce_mean(out)
The problem is during the training my loss become an Nan and i dont understand why

Soft actor critic with discrete action space

I'm trying to implement the soft actor critic algorithm for discrete action space and I have trouble with the loss function.
Here is the link from SAC with continuous action space:
https://spinningup.openai.com/en/latest/algorithms/sac.html
I do not know what I'm doing wrong.
The problem is the network do not learn anything on the cartpole environment.
The full code on github: https://github.com/tk2232/sac_discrete/blob/master/sac_discrete.py
Here is my idea how to calculate the loss for discrete actions.
Value Network
class ValueNet:
def __init__(self, sess, state_size, hidden_dim, name):
self.sess = sess
with tf.variable_scope(name):
self.states = tf.placeholder(dtype=tf.float32, shape=[None, state_size], name='value_states')
self.targets = tf.placeholder(dtype=tf.float32, shape=[None, 1], name='value_targets')
x = Dense(units=hidden_dim, activation='relu')(self.states)
x = Dense(units=hidden_dim, activation='relu')(x)
self.values = Dense(units=1, activation=None)(x)
optimizer = tf.train.AdamOptimizer(0.001)
loss = 0.5 * tf.reduce_mean((self.values - tf.stop_gradient(self.targets)) ** 2)
self.train_op = optimizer.minimize(loss, var_list=_params(name))
def get_value(self, s):
return self.sess.run(self.values, feed_dict={self.states: s})
def update(self, s, targets):
self.sess.run(self.train_op, feed_dict={self.states: s, self.targets: targets})
In the Q_Network I'm gather the values with the collected actions
Example
q_out = [[0.5533, 0.4444], [0.2222, 0.6666]]
collected_actions = [0, 1]
gather = [[0.5533], [0.6666]]
gather function
def gather_tensor(params, idx):
idx = tf.stack([tf.range(tf.shape(idx)[0]), idx[:, 0]], axis=-1)
params = tf.gather_nd(params, idx)
return params
Q Network
class SoftQNetwork:
def __init__(self, sess, state_size, action_size, hidden_dim, name):
self.sess = sess
with tf.variable_scope(name):
self.states = tf.placeholder(dtype=tf.float32, shape=[None, state_size], name='q_states')
self.targets = tf.placeholder(dtype=tf.float32, shape=[None, 1], name='q_targets')
self.actions = tf.placeholder(dtype=tf.int32, shape=[None, 1], name='q_actions')
x = Dense(units=hidden_dim, activation='relu')(self.states)
x = Dense(units=hidden_dim, activation='relu')(x)
x = Dense(units=action_size, activation=None)(x)
self.q = tf.reshape(gather_tensor(x, self.actions), shape=(-1, 1))
optimizer = tf.train.AdamOptimizer(0.001)
loss = 0.5 * tf.reduce_mean((self.q - tf.stop_gradient(self.targets)) ** 2)
self.train_op = optimizer.minimize(loss, var_list=_params(name))
def update(self, s, a, target):
self.sess.run(self.train_op, feed_dict={self.states: s, self.actions: a, self.targets: target})
def get_q(self, s, a):
return self.sess.run(self.q, feed_dict={self.states: s, self.actions: a})
Policy Net
class PolicyNet:
def __init__(self, sess, state_size, action_size, hidden_dim):
self.sess = sess
with tf.variable_scope('policy_net'):
self.states = tf.placeholder(dtype=tf.float32, shape=[None, state_size], name='policy_states')
self.targets = tf.placeholder(dtype=tf.float32, shape=[None, 1], name='policy_targets')
self.actions = tf.placeholder(dtype=tf.int32, shape=[None, 1], name='policy_actions')
x = Dense(units=hidden_dim, activation='relu')(self.states)
x = Dense(units=hidden_dim, activation='relu')(x)
self.logits = Dense(units=action_size, activation=None)(x)
dist = Categorical(logits=self.logits)
optimizer = tf.train.AdamOptimizer(0.001)
# Get action
self.new_action = dist.sample()
self.new_log_prob = dist.log_prob(self.new_action)
# Calc loss
log_prob = dist.log_prob(tf.squeeze(self.actions))
loss = tf.reduce_mean(tf.squeeze(self.targets) - 0.2 * log_prob)
self.train_op = optimizer.minimize(loss, var_list=_params('policy_net'))
def get_action(self, s):
action = self.sess.run(self.new_action, feed_dict={self.states: s[np.newaxis, :]})
return action[0]
def get_next_action(self, s):
next_action, next_log_prob = self.sess.run([self.new_action, self.new_log_prob], feed_dict={self.states: s})
return next_action.reshape((-1, 1)), next_log_prob.reshape((-1, 1))
def update(self, s, a, target):
self.sess.run(self.train_op, feed_dict={self.states: s, self.actions: a, self.targets: target})
Update function
def soft_q_update(batch_size, frame_idx):
gamma = 0.99
alpha = 0.2
state, action, reward, next_state, done = replay_buffer.sample(batch_size)
action = action.reshape((-1, 1))
reward = reward.reshape((-1, 1))
done = done.reshape((-1, 1))
Q_target
v_ = value_net_target.get_value(next_state)
q_target = reward + (1 - done) * gamma * v_
V_target
next_action, next_log_prob = policy_net.get_next_action(state)
q1 = soft_q_net_1.get_q(state, next_action)
q2 = soft_q_net_2.get_q(state, next_action)
q = np.minimum(q1, q2)
v_target = q - alpha * next_log_prob
Policy_target
q1 = soft_q_net_1.get_q(state, action)
q2 = soft_q_net_2.get_q(state, action)
policy_target = np.minimum(q1, q2)

Since the algorithm is generic to both discrete and continuous policy, the key idea is that we need a discrete distribution that is reparametrizable. Then the extension should involve minimal code modification from the continuous SAC --- by just changing the policy distribution class.
There is one such distribution — the GumbelSoftmax distribution. PyTorch does not have this built-in, so I simply extend it from a close cousin which has the right rsample() and add a correct log prob calculation method. With the ability to calculate a reparametrized action and its log prob, SAC works beautifully for discrete actions with minimal extra code, as seen below.
def calc_log_prob_action(self, action_pd, reparam=False):
'''Calculate log_probs and actions with option to reparametrize from paper eq. 11'''
samples = action_pd.rsample() if reparam else action_pd.sample()
if self.body.is_discrete: # this is straightforward using GumbelSoftmax
actions = samples
log_probs = action_pd.log_prob(actions)
else:
mus = samples
actions = self.scale_action(torch.tanh(mus))
# paper Appendix C. Enforcing Action Bounds for continuous actions
log_probs = (action_pd.log_prob(mus) - torch.log(1 - actions.pow(2) + 1e-6).sum(1))
return log_probs, actions
# ... for discrete action, GumbelSoftmax distribution
class GumbelSoftmax(distributions.RelaxedOneHotCategorical):
'''
A differentiable Categorical distribution using reparametrization trick with Gumbel-Softmax
Explanation http://amid.fish/assets/gumbel.html
NOTE: use this in place PyTorch's RelaxedOneHotCategorical distribution since its log_prob is not working right (returns positive values)
Papers:
[1] The Concrete Distribution: A Continuous Relaxation of Discrete Random Variables (Maddison et al, 2017)
[2] Categorical Reparametrization with Gumbel-Softmax (Jang et al, 2017)
'''
def sample(self, sample_shape=torch.Size()):
'''Gumbel-softmax sampling. Note rsample is inherited from RelaxedOneHotCategorical'''
u = torch.empty(self.logits.size(), device=self.logits.device, dtype=self.logits.dtype).uniform_(0, 1)
noisy_logits = self.logits - torch.log(-torch.log(u))
return torch.argmax(noisy_logits, dim=-1)
def log_prob(self, value):
'''value is one-hot or relaxed'''
if value.shape != self.logits.shape:
value = F.one_hot(value.long(), self.logits.shape[-1]).float()
assert value.shape == self.logits.shape
return - torch.sum(- value * F.log_softmax(self.logits, -1), -1)
And here's the LunarLander results. SAC learns to solve it really fast.
The full implementation code is in SLM Lab at https://github.com/kengz/SLM-Lab/blob/master/slm_lab/agent/algorithm/sac.py
The SAC benchmark results on Roboschool (continuous) and LunarLander (discrete) are shown here: https://github.com/kengz/SLM-Lab/pull/399

There is a paper about SAC with discrete action spaces. It says SAC for discrete action spaces doesn't need re-parametrization tricks like Gumbel softmax. Instead, SAC needs some modifications. please refer to the paper for more details.
Paper /
Author's implementation (without codes for atari) /
Reproduction (with codes for atari)
I hope it helps you.

Probably this repo may be helpful. Description says, that repo contains an implementation of SAC for discrete action space on PyTorch. There is file with SAC algorithm for continuous action space and file with SAC adapted for discrete action space.

Pytorch 1.8 has RelaxedOneHotCategorical, this supports re-parameterized sampling using gumbel softmax.
import torch
import torch.nn as nn
from torch.distributions import RelaxedOneHotCategorical
class Policy(nn.Module):
def __init__(self, input_dims, hidden_dims, actions):
super().__init__()
self.mlp = nn.Sequential(nn.Linear(input_dims, hidden_dims), nn.SELU(inplace=True),
nn.Linear(hidden_dims, hidden_dims), nn.SELU(inplace=True),
nn.Linear(hidden_dims, out_dims))
def forward(self, state):
logits = torch.log_softmax(self.mlp(state), dim=-1)
return RelaxedOneHotCategorical(logits=logits, temperature=torch.ones(1) * 1.0)
>>> policy = Policy(4, 16, 2)
>>> a_dist = policy(torch.randn(8, 4))
>>> a_dist.rsample()
tensor([[0.0353, 0.9647],
[0.1348, 0.8652],
[0.1110, 0.8890],
[0.4956, 0.5044],
[0.6941, 0.3059],
[0.6126, 0.3874],
[0.2932, 0.7068],
[0.0498, 0.9502]], grad_fn=<ExpBackward>)

RNN in generative mode with Theano (Scan op)

I have a question regarding my RNN implementation.
I have the following code
def one_step(x_t, h_tm1, W_ih, W_hh, b_h, W_ho, b_o):
h_t = T.tanh(
theano.dot(x_t, W_ih) +
theano.dot(h_tm1, W_hh) +
b_h
)
y_t = theano.dot(h_t, W_ho) + b_o
return [h_t, y_t]
n_hid = 3
n_in = 1
n_out = 1
W_hh_values = np.array(np.random.uniform(size=(n_hid, n_hid), low=-.01, high=.01), dtype=dtype)
W_hh2_values = np.array(np.random.uniform(size=(n_hid, n_hid), low=-.01, high=.01), dtype=dtype)
h0_value = np.array(np.random.uniform(size=(n_hid), low=-.01, high=.01), dtype=dtype)
b_h_value = np.array(np.random.uniform(size=(n_hid), low=-.01, high=.01), dtype=dtype)
b_h2_value = np.array(np.random.uniform(size=(n_hid), low=-.01, high=.01), dtype=dtype)
W_ih_values = np.array(np.random.uniform(size=(n_in, n_hid), low=-.01, high=.01), dtype=dtype)
W_ho_values = np.array(np.random.uniform(size=(n_hid, n_out), low=-.01, high=.01), dtype=dtype)
b_o_value = np.array(np.random.uniform(size=(n_out), low=-.01, high=.01), dtype=dtype)
# parameters of the rnn
b_h = theano.shared(b_h_value)
b_h2 = theano.shared(b_h_value)
h0 = theano.shared(h0_value)
W_ih = theano.shared(W_ih_values)
W_hh = theano.shared(W_hh_values)
W_hh2 = theano.shared(W_hh_values)
W_ho = theano.shared(W_ho_values)
b_o = theano.shared(b_o_value)
params = [W_ih, W_hh, b_h, W_ho, b_o, h0]
# target values
t = T.matrix(dtype=dtype)
# hidden and outputs of the entire sequence
[h_vals, y_vals], _ = theano.scan(fn=one_step,
sequences = dict(input = x, taps=10),
outputs_info = [h0, None], # corresponds to the return type of one_step
non_sequences = [W_ih, W_hh, b_h, W_ho, b_o]
)
learn_rnn_fn = theano.function([],
outputs = cost,
updates = updates,
givens = {
x: s_,
t: t_
}
)
Now after training I could predict output of course as such:
test_rnn_fn = theano.function([],
outputs = y_vals,
givens = {x: s_2}
)
However this is running the network in a predictive mode (i.e. take X steps of input and predict an output). I would like to run this in a generative mode, meaning that I want to start from an initial state and have the RNN run for any arbitrary amount of steps and feed its output back as input.
How could I do this?
Thanks!

You can run scan with an arbitrary number of steps using the n_steps parameter. To pass y_t to the next computation -- and assuming x_t.shape == y_t.shape -- you can use outputs_info=[h0, x_t] as an argument to scan and modify the step function to one_step(h_tm1, y_tm1, W_ih, W_hh, b_h, W_ho, b_o)
More complex termination criteria could be created with the theano.scan_module.until() but those are a question of design not implementation. See here for examples.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Precompute tensorflow tensors for GPflow model - python

Related

Reproducing Hamilton 1989 Markov Switching Model in PyMC3

How to add print OP in TensorFlow layer(GRU)?

Loss become nan in a Mixture model on an hybrid Pareto distribution

Soft actor critic with discrete action space

RNN in generative mode with Theano (Scan op)

Categories

Resources