I am starting on Bayesian Statistics using the book Probabilistic Programming and Bayesian Methods for Hackers. I realized that the code examples there are based on pymc which has been deprecated in favor of pymc3. I am trying to figure out how to port the code into pymc3 code, but am running into issues there.
I was wondering if someone could help port the following code snippet to pymc3:
import numpy as np
import pymc as pm
count_data = np.loadtxt("txtdata.csv")
n_count_data = len(count_data)
alpha = 1.0 / count_data.mean()
lambda_1 = pm.Exponential("lambda_1", alpha)
lambda_2 = pm.Exponential("lambda_2", alpha)
tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data)
#pm.deterministic
def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2):
out = np.zeros(n_count_data)
out[:tau] = lambda_1 # lambda before tau is lambda1
out[tau:] = lambda_2 # lambda after (and including) tau is lambda2
return out
observation = pm.Poisson("obs", lambda_, value=count_data, observed=True)
model = pm.Model([observation, lambda_1, lambda_2, tau])
mcmc = pm.MCMC(model)
mcmc.sample(40000, 10000, 1)
lambda_1_samples = mcmc.trace('lambda_1')[:]
lambda_2_samples = mcmc.trace('lambda_2')[:]
tau_samples = mcmc.trace('tau')[:]
I was able to do it partially, but then I am running into trouble at the line
#pm.deterministic
My partial code (for what it is worth):
import numpy as np
import pymc3 as pm
from matplotlib import pyplot as plt
count_data = np.loadtxt("txtdata.csv")
n_count_data = len(count_data)
alpha = 1.0 / count_data.mean()
basic_model = pm.Model()
with basic_model:
lambda_1 = pm.Exponential("lambda_1", alpha)
lambda_2 = pm.Exponential("lambda_2", alpha)
tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data)
#pm.deterministics
def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2):
out = np.zeros(n_count_data)
out[:tau] = lambda_1 # lambda before tau is lambda1
out[tau:] = lambda_2 # lambda after (and including) tau is lambda2
return out
observation = pm.Poisson("obs", lambda_, value=count_data, observed=True)
How do I get this to work in pymc3?
Related
I want to find the parameters of a Weibull distribution by minimizing the parameters using Kullbak-Leibler method. I found a code here which did the same thing. I replaced the Normal distributions in the original code by the Weibull distributions. I do not know why I get “Nan” parameters and “Nan” Kullback-Leibler divergence value. Can anyone please help?
import numpy as np
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import seaborn as sns
sns.set()
from scipy.stats import weibull_min
learning_rate = 0.001
epochs = 100
x = np.arange(0, 2000,0.001)
p_pdf=weibull_min.pdf(x, 1.055,0, 468).reshape(1, -1)
p = tf.placeholder(tf.float64, shape=p_pdf.shape)
alpha = tf.Variable(np.zeros(1))
beta = tf.Variable(np.eye(1))
weibull=(beta / alpha) * ((x / alpha)**(beta - 1)) * tf.exp(-((x / alpha)**beta))
q = weibull
kl_divergence = tf.reduce_sum(tf.where(p == 0, tf.zeros(p_pdf.shape, tf.float64), p * tf.log(p / q)))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(kl_divergence)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
history = []
alphas = []
betas = []
for i in range(epochs):
sess.run(optimizer, { p: p_pdf })
if i % 10 == 0:
history.append(sess.run(kl_divergence, { p: p_pdf }))
alphas.append(sess.run(alpha)[0])
betas.append(sess.run(beta)[0][0])
for a, b in zip(alphas, betas):
q_pdf =weibull_min.pdf(x, b,0,a)
plt.plot(x, q_pdf.reshape(-1, 1), c='red')
plt.title('KL(P||Q) = %1.3f' % history[-1])
plt.plot(x, p_pdf.reshape(-1, 1), linewidth=3)
plt.show()
plt.plot(history)
plt.show()
sess.close()
Try initialising your alphas to not be 0. Perhaps initialise to np.ones(1) instead.
If you use an alpha of zero you will get a nan with scipy.
from scipy.stats import weibull_min
weibull_min.pdf(100, 0, 0, 2.), weibull_min.pdf(100, 1, 0, 2.)
(nan, 9.643749239819589e-23)
i'm studying gaussian process regression, and i'm trying to use the built-in functions from scikit-learn, and also trying to impement a custom function for doing so.
This is the code when using scikit-learn:
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor as gpr
from sklearn.gaussian_process.kernels import RBF,WhiteKernel,ConstantKernel as C
from scipy.optimize import minimize
import scipy.stats as s
X = np.linspace(0,10,10).reshape(-1,1) # Input Values
Y = 2*X + np.sin(X) # Function
v = 1
kernel = v*RBF() + WhiteKernel() #Defining kernel
gp = gpr(kernel=kernel,n_restarts_optimizer=50).fit(X,Y) #fitting the process to get optimized
hyperparameter
gp.kernel_ #Hyperparameters optimized by the GPR function in scikit-learn
Out[]: 14.1**2 * RBF(length_scale=3.7) + WhiteKernel(noise_level=1e-05) #result
And this is the code i wrote manually:
def marglike(par,X,Y): #defining log-marginal-likelihood
# print(par)
l,var,sigma_n = par
n = len(X)
dist_X = (X - X.T)**2
# print(dist_X)
k = var*np.exp(-(1/(2*(l**2)))*dist_X)
inverse = np.linalg.inv(k + (sigma_n**2)*np.eye(len(k)))
ml = (1/2)*np.dot(np.dot(Y.T,inverse),Y) + (1/2)*np.log(np.linalg.det(k +
(sigma_n**2)*np.eye(len(k)))) + (n/2)*np.log(2*np.pi)
return ml
b= [0.0005,100]
bnd = [b,b,b] #bounds used for "minimize" function
start = np.array([1.1,1.6,0.05]) #initial hyperparameters values
re = minimize(marglike,start,args=(X,Y),method="L-BFGS-B",options = {'disp':True},bounds=bnd) #the
method used is the same as the one used by scikit-learn
re.x #Hyperparameter results
Out[]: array([3.55266484e+00, 9.99986210e+01, 5.00000000e-04])
As you can see, the hyperparameter i got from the 2 methods are different, but yet i used the same data(X,Y) and same minimization method.
Could somebody help me to understand why and maybe how to get same results ?!
As suggested by San Mason, adding noise actually works! Otherwise, while you do it manually (in the custom code), set the initial noise to reasonably low and have multiple restarts with different initializations then you will get values close by. By the way, noiseless data seems to be creating a stationary ridge in the space of hyperparameters (like Fig. 1.6 in Surrogates GP book). Note that scikit-learn noise is sigma_n^2 for your custom function. Below are the snippets of noisy and noise-less cases.
Noise-less case
scikit-learn
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor as gpr
from sklearn.gaussian_process.kernels import RBF,WhiteKernel,ConstantKernel as C
from scipy.optimize import minimize
import scipy.stats as s
X = np.linspace(0,10,10).reshape(-1,1) # Input Values
Y = 2*X + np.sin(X) #+ np.random.normal(10)# Function
v = 1
kernel = v*RBF() + WhiteKernel() #Defining kernel
gp = gpr(kernel=kernel,n_restarts_optimizer=50).fit(X,Y) #fitting the process to get optimized
# hyperparameter
gp.kernel_ #Hyperparameters optimized by the GPR function in scikit-learn
# Out[]: 14.1**2 * RBF(length_scale=3.7) + WhiteKernel(noise_level=1e-05) #result
custom function
def marglike(par,X,Y): #defining log-marginal-likelihood
# print(par)
l,std,sigma_n = par
n = len(X)
dist_X = (X - X.T)**2
# print(dist_X)
k = std**2*np.exp(-(dist_X/(2*(l**2)))) + (sigma_n**2)*np.eye(n)
inverse = np.linalg.inv(k)
ml = (1/2)*np.dot(np.dot(Y.T,inverse),Y) + (1/2)*np.log(np.linalg.det(k)) + (n/2)*np.log(2*np.pi)
return ml[0,0]
b= [10**-5,10**5]
bnd = [b,b,b] #bounds used for "minimize" function
start = [1,1,10**-5] #initial hyperparameters values
re = minimize(fun=marglike,x0=start,args=(X,Y),method="L-BFGS-B",options = {'disp':True},bounds=bnd) #the
# method used is the same as the one used by scikit-learn
re.x[1], re.x[0], re.x[2]**2
# Output - (9.920690495739379, 3.5657912350017575, 1.0000000000000002e-10)
Noisy case
scikit-learn
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor as gpr
from sklearn.gaussian_process.kernels import RBF,WhiteKernel,ConstantKernel as C
from scipy.optimize import minimize
import scipy.stats as s
X = np.linspace(0,10,10).reshape(-1,1) # Input Values
Y = 2*X + np.sin(X) + np.random.normal(size=10).reshape(10,1)*0.1 # Function
v = 1
kernel = v*RBF() + WhiteKernel() #Defining kernel
gp = gpr(kernel=kernel,n_restarts_optimizer=50).fit(X,Y) #fitting the process to get optimized
# hyperparameter
gp.kernel_ #Hyperparameters optimized by the GPR function in scikit-learn
# Out[]: 10.3**2 * RBF(length_scale=3.45) + WhiteKernel(noise_level=0.00792) #result
Custom function
def marglike(par,X,Y): #defining log-marginal-likelihood
# print(par)
l,std,sigma_n = par
n = len(X)
dist_X = (X - X.T)**2
# print(dist_X)
k = std**2*np.exp(-(dist_X/(2*(l**2)))) + (sigma_n**2)*np.eye(n)
inverse = np.linalg.inv(k)
ml = (1/2)*np.dot(np.dot(Y.T,inverse),Y) + (1/2)*np.log(np.linalg.det(k)) + (n/2)*np.log(2*np.pi)
return ml[0,0]
b= [10**-5,10**5]
bnd = [b,b,b] #bounds used for "minimize" function
start = [1,1,10**-5] #initial hyperparameters values
re = minimize(fun=marglike,x0=start,args=(X,Y),method="L-BFGS-B",options = {'disp':True},bounds=bnd) #the
# method used is the same as the one used by scikit-learn
re.x[1], re.x[0], re.x[2]**2
# Output - (10.268943740577331, 3.4462604625225106, 0.007922681239535326)
Right now I do something like this, and im wonedring if there are better ways.
import numpy as np
from scipy import integrate
from sklearn.mixture import GaussianMixture as GMM
model = GMM(n, covariance_type = "full").fit(X)
def cdf(x):
return integrate.quad(lambda t: np.exp(model.score(t)), -inf, x)[0]
The CDF of mixed Gaussian distributions with CDF of F_1,F_2,F_3...,and weights of ω_1,ω_2,ω_3..., equals to F_mixed = ω_1 * F_1 + ω_2 * F_2 + ω_3 * F_3 + ... Therefore, the answer is:
from scipy.stats import norm
weights = [0.163, 0.131, 0.486, 0.112, 0.107]
means = [45.279, 55.969, 49.315, 53.846, 61.953]
covars = [0.047, 1.189, 3.632, 0.040, 0.198]
def mix_norm_cdf(x, weights, means, covars):
mcdf = 0.0
for i in range(len(weights)):
mcdf += weights[i] * norm.cdf(x, loc=means[i], scale=covars[i])
return mcdf
print(mix_norm_cdf(50, weights, means, covars))
output
0.442351546658755
I am generating a time series that has a drastic change in the middle.
import numpy as np
size = 120
x1 = np.random.randn(size)
x2 = np.random.randn(size) * 4
x = np.hstack([x1, x2])
This series of x looks like this:
The goal is now to use PyMC3 to estimate the posterior distribution of the time when the change occurred (switchpoint). This should occur around the index 120. I've used the following code;
from pymc3 import Model, Normal, HalfNormal, DiscreteUniform
basic_model = Model()
with basic_model:
mu1 = Normal('mu1', mu=0, sd=10)
mu2 = Normal('mu2', mu=0, sd=10)
sigma1 = HalfNormal('sigma1', sd=2)
sigma2 = HalfNormal('sigma2', sd=2)
tau = DiscreteUniform('tau', 0, 240)
# get likelihoods
y1 = Normal('y1', mu=mu1, sd=sigma1, observed=x[:tau])
y2 = Normal('y2', mu=mu2, sd=sigma2, observed=x[tau:])
Doing this gives an error that I cannot use tau to slice the array. What would be the approach to solve this in PyMC? It seems like I'll need the slicing to be done by something stochastic in PyMC.
Turns out PyMC3 has a switch model. Let t be the variable for time.
import pymc3 as pm
basic_model = pm.Model()
with basic_model:
mu1 = pm.Normal('mu1', mu=0, sd=10)
mu2 = pm.Normal('mu2', mu=0, sd=10)
sigma1 = pm.HalfNormal('sigma1', sd=2)
sigma2 = pm.HalfNormal('sigma2', sd=2)
switchpoint = pm.DiscreteUniform('switchpoint', t.min(), t.max())
tau_mu = pm.switch(t >= switchpoint, mu1, mu2)
tau_sigma = pm.switch(t >= switchpoint, sigma1, sigma2)
y = pm.Normal('y1', mu=tau_mu, sd=tau_sigma, observed=x)
Say I have a random collection of (X,Y) points:
import pymc as pm
import numpy as np
import matplotlib.pyplot as plt
import scipy
x = np.array(range(0,50))
y = np.random.uniform(low=0.0, high=40.0, size=200)
y = map((lambda a: a[0] + a[1]), zip(x,y))
plt.scatter(x,y)
and that I fit simple linear regression:
std = 20.
tau=1/(std**2)
alpha = pm.Normal('alpha', mu=0, tau=tau)
beta = pm.Normal('beta', mu=0, tau=tau)
sigma = pm.Uniform('sigma', lower=0, upper=20)
y_est = alpha + beta * x
likelihood = pm.Normal('y', mu=y_est, tau=1/(sigma**2), observed=True, value=y)
model = pm.Model([likelihood, alpha, beta, sigma, y_est])
mcmc = pm.MCMC(model)
mcmc.sample(40000, 15000)
How can I get the distribution or the statistics of y_est[0], y_est[1], y_est[2].. (note that these variables correspond to the y values estimated for each input x value.
In PyMC 2, if you are interested in the trace of a deterministic, you should wrap the deterministic in a Lambda object (or decorate a function with #deterministic). In your case, this would be:
y_est = Lambda('y_est', lambda a=alpha, b=beta: a + b * x)
You should then be able to call the summary method or plot the node, just like a Stochastic.
BTW, you do not need to instantiate a Model object, as MCMC already does that for you. All you need is:
mcmc = pm.MCMC([likelihood, alpha, beta, sigma, y_est])
or even more concisely:
mcmc = pm.MCMC(vars())
Following #Chris' advice, the following works:
x = pm.Uniform('x', lower=xmin, upper=xmax)
alpha = pm.Normal('alpha', mu=0, tau=tau)
beta = pm.Normal('beta', mu=0, tau=tau)
sigma = pm.Uniform('sigma', lower=0, upper=20)
# The deterministic:
y_gen = pm.Lambda('y_gen', lambda a=alpha, x=x, b=beta: a + b * x)
And then draw samples from it as follows:
mcmc = pm.MCMC([x, y_gen])
mcmc.sample(n_total_samples, n_burn_in)
x_trace = mcmc.trace('x')[:]
y_trace = mcmc.trace('y_gen')[:]