I am lost within the pymcmcstat documentation of Python. I managed to plot the parameter distributions etc, but when it comes to the Bayes factor, I need to calculate the integral over the parameter space of likelihood for each model.
I followed this video. Each model has a different model function with different parameters. According to this link, I am supposed to compare the model evidences for model selection. All I have in my hand is the chain results after burnin that returns the distribution for each parameters, chain for sum-of-squares error (SSE) and variances. How do I compare the models with mcmc chain results I have?
Where do I go from here?
Here is my code for one model; for each model, the test_modelfun is changed and the chain results are saved for further comparison of different models;
# Data related lines: input omega and output fm
x = (np.array([76.29395, 152.5879, 305.1758, 610.3516, 1220.703, 2441.406, 4882.813, 9765.625, 19531.25, 39062.5, 78125, 156250, 312500, 625000]))
y = np.array([155.6412886 -63.3826188j , 113.9114436 -79.90544719j, 64.97809441-77.65152741j, 26.87482243-57.38474656j, 7.44462341-34.02438426j, 2.32954856-16.17918216j, 2.30747953 -6.72487436j, 3.39658859 -2.72444011j, 4.0084345 -1.2029167j , 4.25877486 -0.70276446j, 4.11761329 -0.69591231j, 3.83339489 -0.65244854j, 3.47289164 -0.6079278j , 3.07027319 -0.14914359j])
#import mcmc library and add data to the library in the second line below
mcstat = MCMC()
mcstat.data.add_data_set(x,y)
##define transfer function model calculated with theta parameters
def test_modelfun(xdata, theta):
K, alpha_0, alpha_1, Tp_1, Tp_2, Tz_1 = 10**theta[0], 10**theta[1], 10**theta[2], 10**theta[3], 10**theta[4], 10**theta[5]
#####################
Pz_0 = (omega**(alpha_0))
Pz_1 = (np.sqrt(((Tp_1**2)*(omega**(2*alpha_1))) + (2*Tp_1*(omega**alpha_1)*cos(alpha_1*pi/2)) +1))
Pz_2 = (np.sqrt(((Tp_2**2)*(omega**(2*alpha_1))) + (2*Tp_2*(omega**alpha_1)*cos(alpha_1*pi/2)) +1))
Zz_1 = (np.sqrt(((Tz_1**2)*(omega**(2*alpha_1))) + (2*Tz_1*(omega**alpha_1)*cos(alpha_1*pi/2)) +1))
Pp_0 = np.array([(-1*pi*alpha_0)/2]*len(omega)).T#[0]
Pp_1 = np.array([math.atan((Tp_1*(omega[i]**alpha_1)*sin(pi*alpha_1/2))/(1+(Tp_1*(omega[i]**alpha_1)*cos(pi*alpha_1/2)))) for i in range(len(omega))])
Pp_2 = np.array([math.atan((Tp_2*(omega[i]**alpha_1)*sin(pi*alpha_1/2))/(1+(Tp_2*(omega[i]**alpha_1)*cos(pi*alpha_1/2)))) for i in range(len(omega))])
Zp_1 = np.array([math.atan((Tz_1*(omega[i]**alpha_1)*sin(pi*alpha_1/2))/(1+(Tz_1*(omega[i]**alpha_1)*cos(pi*alpha_1/2)))) for i in range(len(omega))])
#####################
Z_est = (K*Zz_1)/(Pz_0*Pz_1*Pz_2)
P_est = Zp_1 + Pp_0 - Pp_1 - Pp_2
#####################
R_est = np.real([cmath.rect(Z_est[i], P_est[i]) for i in range(len(omega))])#abs()#[:,0]
X_est = np.imag([cmath.rect(Z_est[i], P_est[i]) for i in range(len(omega))])#abs()#[:,0]
RX_est = (R_est + 1j*X_est)
return RX_est
def modelfun(xdata, theta):
ymodel = test_modelfun(xdata,theta)
Zest = 20*log10(np.abs(ymodel))
return Zest
##define sum of squares function for the error in evaluating the likelihood function L(Fobs(i)|q)
def test_ssfun(theta,data):
xdata = data.xdata[0]
ydata = data.ydata[0]
ymodel = test_modelfun(xdata,theta)
return (1/len(omega))*(sum((real(fm)- real(ymodel))**2 + (imag(fm)-imag(ymodel))**2))
#sumsquares = sum((ymodel[:,0]-ydata[:,0])**2)
##import mcmc library and add data to the library in the second line below
itr = 50.0e4
verb = 1
wbar = 1
mcstat = MCMC()
mcstat.data.add_data_set(x,y)
## add model parameters
mcstat.parameters.add_model_parameter(name='th_1',theta0=1, minimum=-2,maximum=3) #m_k, M_k = -2, 3
mcstat.parameters.add_model_parameter(name='th_2',theta0=-1, minimum=-4,maximum=0) #m_a0, M_a0 = -4, 0
mcstat.parameters.add_model_parameter(name='th_3',theta0=-1, minimum=-3,maximum=0) #m_a1, M_a1 = -3, 0
mcstat.parameters.add_model_parameter(name='th_4',theta0=-4, minimum=-9,maximum=0) #m_p1, M_p1 = -9, 0
mcstat.parameters.add_model_parameter(name='th_5',theta0=-4, minimum=-9,maximum=0) #m_p2, M_p2 = -9, 0
mcstat.parameters.add_model_parameter(name='th_6',theta0=-4, minimum=-9,maximum=0) #m_z1, M_z1 = -9, 0
## define simulation options: mh=metropolis-hastings, am=adaptive metropolis, dr=delayed rejection, dram=dr+am
mcstat.simulation_options.define_simulation_options(nsimu=int(itr), updatesigma=1, method='dr', adaptint=100, verbosity=verb, waitbar=wbar)
## define model settings
mcstat.model_settings.define_model_settings(sos_function=test_ssfun)
mcstat.run_simulation()
## extract results
results=mcstat.simulation_results.results
chain = results['chain']# chain for each parameter sampled during simulation. s2
s2chain = results['s2chain']# chain for error variances. if updatesigma=0 then s2chain is an empty list
sschain = results['sschain']# chain for sum-of-squares error calculated using each set of parameter values in the cahin
names = results['names']
burnin = int(itr/2)
## display chain statistics
mcstat.chainstats(chain[burnin:,:],results)
mcpl = mcstat.mcmcplot
figcp = mcpl.plot_chain_panel(chain, names, figsizeinches = (7,6))
axes = figcp.get_axes()
for ii, ax in enumerate(axes):
ch = chain[:, ii]
ax.plot([burnin, burnin], [ch.min(), ch.max()], 'r')
figpd = mcpl.plot_density_panel(chain[burnin:,:], names, figsizeinches=(7,6))
figpc = mcpl.plot_pairwise_correlation_panel(chain[burnin:,:], names, figsizeinches = (7,6))
mcstat.PI.setup_prediction_interval_calculation(results=results, data=mcstat.data, modelfunction=modelfun, burnin=burnin)
mcstat.PI.generate_prediction_intervals(calc_pred_int=True, waitbar=False)
fg, ax = mcstat.PI.plot_prediction_intervals(adddata=True, plot_pred_int=True, figsizeinches = (7,5), data_display=dict(color='k'))
Related
I am doing an optimization for designing a seismic resistant structural system using Abaqus.
I am intending to use Gekko for this purpose. But it seems that I am making a mistakes in writing down the suitable syntax concerning this task.
"Objective" is the name of the subroutine that is responsible for creating the numerical model of Abaqus, analyzing the model, processing results and calculating the penalty function.
"Objective" returns the values of the cost of the building summed up with the penalty function to be minimized.
this is the message of error:
#error: Insufficient Data
CSV Read Error : number of data rows must be >= 2 for dynamic problems
Data Points Identified: 1
STOPPING. . .
Traceback (most recent call last):
File "C:\temp\AK\24-Gekko\opti1.10.3.0.py", line 721, in
m.solve()
File "C:\Users\amjad\AppData\Roaming\Python\Python38\site-packages\gekko\gekko.py", line 2140, in solve
raise Exception(apm_error)
Exception: #error: Insufficient Data
CSV Read Error : number of data rows must be >= 2 for dynamic problems
Data Points Identified: 1
STOPPING. . .
this is the main code of the optimization process:
'''
m = GEKKO(remote=False)
Alfa1 = m.Const(1.25)
Alfa2 = m.Const(0.3)
Alfa3 = m.Const(1.25)
UCS = m.Const(1000.) # $/Ton of steel
UCC = m.Const(67.) # $/m3 of concrete
UCCF = m.Const(20.) # $/m2 of column wood framework
UCBF = m.Const(28.) # $/m2 of beam wood framework
UCWF = m.Const(20.) # $/m2 of wall wood framework
GammaS = m.Const(7850)
GammaC = m.Const(2500)
cover = m.Const(10.)
f_c = m.Const(30.)
fy = m.Const(400.)
Est = m.Const(200000.)
eu_c = m.Const(0.003)
bw = m.Const(700.)
fai = 6.
#defining material parameters
Mats = {
"CDP30.0" :("mm",GammaC.value*1.e-12,0.18,0.,[f_c.value,0.0015,0.4,100],[f_c.value/10., 0.001 , 1.0],35,0.1,1.12,0.667,0.1,1.0,0.0),
"C30.0" :("mm",GammaC.value*1.e-12,0.18,0.,f_c.value),
"S400.0":("mm",GammaS.value*1.e-12,0.3,0.,fy.value),
}
#defining section's locations within building members and lengths of members
Sections = {
"C11":[[("A0-A1","A1-A2","D0-D1","D1-D2"),750.],[],[],[]],
"C22":[[("A2-A3","A3-A4","A4-A5","D2-D3","D3-D4","D4-D5"),750.],[],[],[]],
"C33":[[("B0-B1","C0-C1","B1-B2","C1-C2","B2-B3","C2-C3","B3-B4","C3-C4","B4-B5","C4-C5",),750.],[],[],[]],
"BB1":[[("A1-B1","C1-D1","A2-B2","C2-D2","A3-B3","C3-D3","A4-B4","C4-D4","A5-B5","C5-D5",),700.],[],[]],
"WW1":[[("B0-C1","B1-C2","B2-C3","B3-C4","B4-C5",),750.],[],[]],
}
#creating materials of the model
MatText = []
for i in Mats:
if i[1:3]=="DP":CDPs(i,Mats[i][0],Mats[i][1],Mats[i][2],Mats[i][3],Mats[i][4],Mats[i][5],Mats[i][6],Mats[i][7],Mats[i][8],Mats[i][9],Mats[i][10],Mats[i][11],Mats[i][12])
elif i[0] =="C" :EPPs(i,Mats[i][0],Mats[i][1],Mats[i][2],Mats[i][3],Mats[i][4])
elif i[0] =="S" :EPPs(i,Mats[i][0],Mats[i][1],Mats[i][2],Mats[i][3],Mats[i][4])
#objective function initializing
FX = 0.
Vars = []
for i in [x for x in Sections if x[0]=="C"]:
a = m.Var(value=125,lb=100,ub=250,integer = False)
b = m.Var(value=125,lb=100,ub=250,integer = False)
Us = m.Var(value=0.02,lb=0.01,ub=0.045,integer = False)
rebarS = rectRC(a.value,b.value,Us.value,cover.value,fai,"C")
Sections[i][1] = [a.value,b.value,Us.value,rebarS]
As = np.pi*fai**2./4*(4+2*rebarS[1][0]+2*rebarS[2][0])
#calculating the cost of a Column section
Sections[i][2] = m.Intermediate(Alfa1*As/(1000**3)*GammaS*UCS+a*b/(1000*1000)*UCC+2*(a+b)/(1000)*UCCF)
Sections[i][3] = PMinteraction(eu_c.value,cover.value,a.value,b.value,f_c.value,fy.value,Est.value,(2+rebarS[1][0])*np.pi*fai**2/4,(2+rebarS[1][0])*np.pi*fai**2/4)
FX += len(Sections[i][0][0])*Sections[i][0][1]/1000*Sections[i][2]
Vars.append((i,Sections[i][1][:3]))
for i in [x for x in Sections if x[0]=="B"]:
a = m.Var(value=125,lb=100,ub=250,integer = False)
b = m.Var(value=125,lb=100,ub=250,integer = False)
Us = m.Var(value=0.02,lb=0.01,ub=0.045)
rebarS = rectRC(a.value,b.value,Us.value,cover.value,fai,"B")
Sections[i][1] = [a.value,b.value,Us.value,rebarS]
As = np.pi*fai**2./4*(4+2*rebarS[1][0]+2*rebarS[2][0])
#calculating the cost of a Beam section
Sections[i][2] = m.Intermediate((2*Alfa2*As/(1000**3)+(1-2*Alfa2)*As/(1000**3))*GammaS*UCS+a*b/(1000*1000)*UCC+(a+2*b)/(1000)*UCBF)
print (Sections[i][2])
FX += len(Sections[i][0][0])*Sections[i][0][1]/1000*Sections[i][2]
Vars.append((i,Sections[i][1][:3]))
for i in [x for x in Sections if x[0]=="W"]:
a = m.Var(value=125,lb=100,ub=250,integer = False)
b = bw.value
Us = m.Var(value=0.009,lb=0.007,ub=0.01)
rebarS = rectRC(a.value,b.value,Us.value,cover.value,fai,"W")
Sections[i][1] = [a.value,b.value,Us.value,rebarS]
As = np.pi*fai**2./4*(4+2*rebarS[1][0]+2*rebarS[2][0])
#calculating the cost of a Wall section
Sections[i][2] = m.Intermediate(Alfa3*As/(1000**3)*GammaS*UCS+a*b/(1000*1000)*UCC+2*b/(1000)*UCWF)
print (Sections[i][2])
FX += len(Sections[i][0][0])*Sections[i][0][1]/1000*Sections[i][2]
Vars.append((i,Sections[i][1][:3]))
#modifying object function by a reference value
FX = FX/ReferenceFX * 1.
m.Minimize(Objective(Vars))
m.options.SOLVER = 1
m.options.IMODE = 6
m.solve()
'''
Gekko solution modes are described in more detail in the documentation. The current mode is IMODE=6 that should have differential and algebraic equations. In this mode, it is required to define the time points for the solution such as:
m.time=[0,0.1,0.2,0.5,1.0,1.5,2.0]
If it is a steady state solution (no differential equations) then it should be IMODE=3 for steady state optimization.
m.options.IMODE=3
There is currently no definition for PMinteraction. If this is an Abaqus model call then it will need to be replaced by a suitable model approximation that Gekko can compile into byte-code. Some options are cspline (1D), bspline (2D), or machine learning models (higher dimension functions).
I am trying to reconstruct the approximations and details at all levels using the inverse stationary wavelet transform from the by wavelets package in python. My code is the following:
def UDWT(Btotal, wname, Lps, Hps, edge_eff):
Br = Btotal[0]; Bt = Btotal[1]; Bn = Btotal[2]
## Set parameters needed for UDWT
samplelength=len(Br)
# If length of data is odd, turn into even numbered sample by getting rid
# of one point
if np.mod(samplelength,2)>0:
Br = Br[0:-1]
Bt = Bt[0:-1]
Bn = Bn[0:-1]
samplelength = len(Br)
# edge extension mode set to periodic extension by default with this
# routine in the rice toolbox.
pads = 2**(np.ceil(np.log2(abs(samplelength))))-samplelength # for edge extension, This function
# returns 2^{ the next power of 2 }for input: samplelength
## Do the UDWT decompositon and reconstruction
keep_all = {}
for m in range(3):
# Gets the data size up to the next power of 2 due to UDWT restrictions
# Although periodic extension is used for the wavelet edge handling we are
# getting the data up to the next power of 2 here by extending the data
# sample with a constant value
if (m==0):
y = np.pad(Br,pad_width = int(pads/2) ,constant_values=np.nan)
elif (m==1):
y = np.pad(Bt,pad_width = int(pads/2) ,constant_values=np.nan)
else:
y = np.pad(Bn,pad_width = int(pads/2) ,constant_values=np.nan)
# Decompose the signal using the UDWT
nlevel = min(pywt.swt_max_level(y.shape[-1]), 8) # Level of decomposition, impose upper limit 10
Coeff = pywt.swt(y, wname, nlevel) # List of approximation and details coefficients
# pairs in order similar to wavedec function:
# [(cAn, cDn), ..., (cA2, cD2), (cA1, cD1)]
# Assign approx: swa and details: swd to
swa = np.zeros((len(y),nlevel))
swd = np.zeros((len(y),nlevel))
for o in range(nlevel):
swa[:,o] = Coeff[o][0]
swd[:,o] = Coeff[o][1]
# Reconstruct all the approximations and details at all levels
mzero = np.zeros(np.shape(swd))
A = mzero
coeffs_inverse = list(zip(swa.T,mzero.T))
invers_res = pywt.iswt(coeffs_inverse, wname)
D = mzero
for pp in range(nlevel):
swcfs = mzero
swcfs[:,pp] = swd[:,pp]
coeffs_inverse2 = list(zip(np.zeros((len(swa),1)).T , swcfs.T))
D[:,pp] = pywt.iswt(coeffs_inverse2, wname)
for jjj in range(nlevel-1,-1,-1):
if (jjj==nlevel-1):
A[:,jjj] = invers_res
# print(jjj)
else:
A[:,jjj] = A[:,jjj+1] + D[:,jjj+1]
# print(jjj)
# *************************************************************************
# VERY IMPORTANT: LINEAR PHASE SHIFT CORRECTION
# *************************************************************************
# Correct for linear phase shift in wavelet coefficients at each level. No
# need to do this for the low-pass filters approximations as they will be
# reconstructed and the shift will automatically be reversed. The formula
# for the shift has been taken from Walden's paper, or has been made up by
# me (can't exactly remember) -- but it is verified and correct.
# *************************************************************************
for j in range(1,nlevel+1):
shiftfac = Hps*(2**(j-1));
for l in range(1,j):
shiftfac = int(shiftfac + Lps*(2**(l-2))*((l-2)>=0)) ;
swd[:,j-1] = np.roll(swd[:,j-1],shiftfac)
flds = {"A": A.T,
"D": D.T,
"swd" : swd.T,
}
Btot = ['Br', 'Bt', 'Bn'] # Used Just to name files
keep_all[str(Btot[m])] = flds
# 1) Put all the files together into a cell structure
Apr = {}
Swd = {}
pads = int(pads)
names = ['Br', 'Bt', 'Bn']
for kk in range(3):
A = keep_all[names[kk]]['A']
Apr[names[kk]] = A[:,int(pads/2):len(A)-int(pads/2)]
swd = keep_all[names[kk]]['swd']
Swd[names[kk]] = swd[:,int(pads/2):len(A)-int(pads/2)]
# Returns filters list for the current wavelet in the following order
wavelet = pywt.Wavelet(wname)
[h_0,h_1,_,_] = wavelet.inverse_filter_bank
filterlength = len(h_0)
if edge_eff:
# 2) Getting rid of the edge effects; to keep edges skip this section
for j in range(1,nlevel+1):
extra = int((2**(j-2))*filterlength) # give some reasoning for this eq
for m in range(3):
# for approximations
Apr[names[m]][j-1][0:extra] = np.nan
Apr[names[m]][j-1][-extra:-1] = np.nan
# for details
Swd[names[m]][j-1][0:extra] = np.nan
Swd[names[m]][j-1][-extra:-1] = np.nan
return Apr, Swd, pads, nlevel
aa = np.sin(np.linspace(0,2*np.pi,100000))+0.05*np.random.rand(100000)
bb = np.cos(np.linspace(0,2*np.pi,100000))+0.05*np.random.rand(100000)
cc = np.cos(np.linspace(0,4*np.pi,100000))+0.05*np.random.rand(100000)
Btotal = [aa,bb,cc]
wname ='coif2'
Lps = 7; # Low pass filter phase shift for level 1 Coiflet2
Hps = 4; # High pass filter phase shift for level 1 Coiflet2
Apr, Swd, pads, nlevel = UDWT(Btotal, wname, Lps, Hps, edge_eff)
### Add the details at all levels with the highest level approximations
## to compare with the original timeseries. (The equation shown in website)
new = Swd['Br'][0]
for i in range(1,nlevel):
new = Swd['Br'][i]+new
sig = Apr['Br'][-1]+new
### Now plot to comapre ##
## Reconstructed signal 1
plt.plot(sig)
### Second way to get reconstructed signal
### aa first level details with approximations
plt.plot(Apr['Br'][-1] +Swd['Br'][-1] )
### Original signal
plt.plot(aa)
I am trying to follow the procedure described on this website:
http://matlab.izmiran.ru/help/toolbox/wavelet/ch01_i24.html
However, the reconstructed time-series does not seem to match the original exactly. As you can see here:
Any help?
I am trying to set up a hierarchical linear regression model using PYMC3. In my particular case, I want to see whether postal codes provide a meaningful structure for other features. Suppose I use the following mock data:
import pandas as pd
import numpy as np
import pymc3 as pm
data = pd.DataFrame({"postalcode": np.floor(np.random.uniform(low=10, high=99, size=1000)),
"x": np.random.normal(size=1000),
"y": np.random.normal(size=1000)})
data["postalcode"] = data["postalcode"].astype(int)
I generate postal codes from 10 to 99, as well as a normally distributed feature x and a target value y. Now I set up my indices for postal code level 1 and level 2:
def create_pc_index(level):
pc = data["postalcode"].astype(str).str[0:level]
unique_pc = pc.unique()
pc_dict = dict(zip(unique_pc, range(0, len(unique_pc))))
return pc_dict, pc.apply(lambda x: pc_dict[x]).values
pc1_dict, pc1_index = create_pc_index(1)
pc2_dict, pc2_index = create_pc_index(2)
Using the first digit of the postal code as hierarchical attribute works fine:
number_of_samples = 1000
x = data["x"]
y = data["y"]
with pm.Model() as model:
sigma = pm.HalfCauchy('sigma', beta=10, testval=0.5, shape=1)
mu_i = pm.Normal("mu_i", 5, sd=25, shape=1)
intercept = pm.Normal('Intercept', mu_i, sd=1, shape=len(pc1_dict))
mu_s = pm.Normal("mu_x", 0, sd=3, shape=1)
x_coeffs = pm.Normal("x", mu_s, 1, shape=len(pc1_dict))
mean = intercept[pc1_index] + x_coeffs[pc1_index] * x
likelihood_mean = pm.Deterministic("mean", mean)
likelihood = pm.Normal('y', mu=likelihood_mean, sd=sigma, observed=y)
trace = pm.sample(number_of_samples)
burned_trace = trace[number_of_samples/2:]
However, if I want to add a second level to my hierarchy (in this case only on the intercept, ignoring x for the moment), I run into shape problems
with pm.Model() as model:
sigma = pm.HalfCauchy('sigma', beta=10, testval=0.5, shape=1)
mu_i_level_1 = pm.Normal("mu_i", 0, sd=25, shape=1)
mu_i_level_2 = pm.Normal("mu_i_level_2", mu_i_level_1, sd=1, shape=len(pc1_dict))
intercept = pm.Normal('Intercept', mu_i_level_2[pc1_index], sd=1, shape=len(pc2_dict))
mu_s = pm.Normal("mu_x", 0, sd=3, shape=1)
x_coeffs = pm.Normal("x", mu_s, 1, shape=len(pc1_dict))
mean = intercept[pc2_index] + x_coeffs[pc1_index] * x
likelihood_mean = pm.Deterministic("mean", mean)
likelihood = pm.Normal('y', mu=likelihood_mean, sd=sigma, observed=y)
trace = pm.sample(number_of_samples)
burned_trace = trace[number_of_samples/2:]
The error message is:
operands could not be broadcast together with shapes (89,) (1000,)
How do I model multiple levels in my regression correctly? Is this just an issue with the correct shape size or is there a more fundamental error on my part?
Thanks in advance!
I don't think intercept can have a shape of len(pc2_dict) but a mu of len(pc1_dict). The contradiction is here:
intercept = pm.Normal('Intercept', mu_i_level_2[pc1_index], sd=1, shape=len(pc2_dict))
In the Pymc3 example for multilevel linear regression (the example is here, with the radon data set from Gelman et al.’s (2007)), the intercepts (for different counties) and slopes (for apartment with and without basement) each have a Normal prior. How can I model them together with a multivariate normal prior, so that I can examine the correlation between them?
The hierarchical model given in the example is like this:
with pm.Model() as hierarchical_model:
# Hyperpriors for group nodes
mu_a = pm.Normal('mu_a', mu=0., sd=100**2)
sigma_a = pm.HalfCauchy('sigma_a', 5)
mu_b = pm.Normal('mu_b', mu=0., sd=100**2)
sigma_b = pm.HalfCauchy('sigma_b', 5)
# Intercept for each county, distributed around group mean mu_a
# Above we just set mu and sd to a fixed value while here we
# plug in a common group distribution for all a and b (which are
# vectors of length n_counties).
a = pm.Normal('a', mu=mu_a, sd=sigma_a, shape=n_counties)
# Intercept for each county, distributed around group mean mu_a
b = pm.Normal('b', mu=mu_b, sd=sigma_b, shape=n_counties)
# Model error
eps = pm.HalfCauchy('eps', 5)
radon_est = a[county_idx] + b[county_idx] * data.floor.values
# Data likelihood
radon_like = pm.Normal('radon_like', mu=radon_est, sd=eps, observed=data.log_radon)
hierarchical_trace = pm.sample(2000)
And I'm trying to make some change to the priors
with pm.Model() as correlation_model:
# Hyperpriors for group nodes
mu_a = pm.Normal('mu_a', mu=0., sd=100**2)
mu_b = pm.Normal('mu_b', mu=0., sd=100**2)
# here I want to model a and b together
# I borrowed some code from a multivariate normal model
# but the code does not work
sigma = pm.HalfCauchy('sigma', 5, shape=2)
C_triu = pm.LKJCorr('C_triu', n=2, p=2)
C = T.fill_diagonal(C_triu[np.zeros((2,2), 'int')], 1)
cov = pm.Deterministic('cov', T.nlinalg.matrix_dot(sigma, C, sigma))
tau = pm.Deterministic('tau', T.nlinalg.matrix_inverse(cov))
a, b = pm.MvNormal('mu', mu=(mu_a, mu_b), tau=tau,
shape=(n_counties, n_counties))
# Model error
eps = pm.HalfCauchy('eps', 5)
radon_est = a[county_idx] + b[county_idx] * data.floor.values
# Data likelihood
radon_like = pm.Normal('radon_like', mu=radon_est, sd=eps, observed=data.log_radon)
correlation_trace = pm.sample(2000)
Here is the error message I got:
File "<ipython-input-108-ce400c54cc39>", line 14, in <module>
tau = pm.Deterministic('tau', T.nlinalg.matrix_inverse(cov))
File "/home/olivier/anaconda3/lib/python3.5/site-packages/theano/gof/op.py", line 611, in __call__
node = self.make_node(*inputs, **kwargs)
File "/home/olivier/anaconda3/lib/python3.5/site-packages/theano/tensor/nlinalg.py", line 73, in make_node
assert x.ndim == 2
AssertionError
Clearly I've made some mistakes about the covariance matrix, but I'm new to pymc3 and completely new to theano so have no idea how to fix it. I gather this should be a rather common use case so maybe there have been some examples on it? I just can't find them.
The full replicable code and data can be seen on the example page (link given above). I didn't include it here because it's too long and also I thought those familiar with pymc3 are very likely already quite familiar with it:)
You forgot to add one line when creating the covariance matrix you miss-specified the shape of the MvNormal. Your model should look something like this:
with pm.Model() as correlation_model:
mu = pm.Normal('mu', mu=0., sd=10, shape=2)
sigma = pm.HalfCauchy('sigma', 5, shape=2)
C_triu = pm.LKJCorr('C_triu', n=2, p=2)
C = tt.fill_diagonal(C_triu[np.zeros((2,2), 'int')], 1.)
sigma_diag = tt.nlinalg.diag(sigma) # this line
cov = tt.nlinalg.matrix_dot(sigma_diag, C, sigma_diag)
tau = tt.nlinalg.matrix_inverse(cov)
ab = pm.MvNormal('ab', mu=mu, tau=tau, shape=(n_counties, 2))
eps = pm.HalfCauchy('eps', 5)
radon_est = ab[:,0][county_idx] + ab[:,1][county_idx] * data.floor.values
radon_like = pm.Normal('radon_like', mu=radon_est, sd=eps, observed=data.log_radon)
trace = pm.sample(2000)
Notice that alternatively, you can evaluate the correlation of the intercept and the slope from the posterior of hierarchical_model. You can use a frequentist method or build another Bayesian model, that takes as the observed data the result of hierarchical_model. May be this could be faster.
EDIT
If you want to evaluate the correlation of two variables from the posterior you can do something like.
chain = hierarchical_trace[100:]
x_0 = chain['mu_a']
x_1 = chain['mu_b']
X = np.vstack((x_0, x_1)).T
and then you can run the following model:
with pm.Model() as correlation:
mu = pm.Normal('mu', mu=0., sd=10, shape=2)
sigma = pm.HalfCauchy('sigma', 5, shape=2)
C_triu = pm.LKJCorr('C_triu', n=2, p=2)
C = tt.fill_diagonal(C_triu[np.zeros((2,2), 'int')], 1.)
sigma_diag = tt.nlinalg.diag(sigma)
cov = tt.nlinalg.matrix_dot(sigma_diag, C, sigma_diag)
tau = tt.nlinalg.matrix_inverse(cov)
yl = pm.MvNormal('yl', mu=mu, tau=tau, shape=(2, 2), observed=X)
trace = pm.sample(5000, pm.Metropolis())
You can replace x_0 and x_1 according to your needs. For example you may want to do:
x_0 = np.random.normal(chain['mu_a'], chain['sigma_a'])
x_1 = np.random.normal(chain['mu_b'], chain['sigma_b'])
I'm trying to do some bayesian probit code using data augmentation. I can get it to work if I loop over the rows of the output matrix, but I'd like to vectorize it and do it all in one shot (presumably that's faster).
import numpy as np
from numpy import random
import statsmodels.api as sm
from scipy import stats
from scipy.stats import norm, truncnorm
##################################
### Create some simulated data ###
num_leg = 50
num_bills = 20
a = np.random.uniform(-1,1,num_bills).reshape(num_bills, 1)
b = np.random.uniform(-2,2,num_bills).reshape(num_bills, 1)
x = np.random.standard_normal(num_leg).reshape(num_leg, 1)
ystar_base = a + np.dot(b,x.T)
epsilon = np.random.standard_normal(num_leg * num_bills).reshape(num_bills, num_leg)
ystar = ystar_base + epsilon
y = 1*(ystar >0)
### Initialize some stuff I need ###
avec = [0]*num_bills # These are bill parameters
bvec = [0]*num_bills
betavec = [np.matrix(zip(avec,bvec))]
xvec = [0]*num_leg # these are legislator parameters
_ones = np.ones(num_leg)
def init_y(mat): # initialize a latent y matrix
if mat==1: return truncnorm.rvs(0,10000)
else: return truncnorm.rvs(-10000,0)
vectorize_y = np.vectorize(init_y)
latent_y = np.matrix(vectorize_y(y))
burn = 500 # How long to run the MCMC
runs = 500
### define the functions ###
def sample_params(xnow,ynow): # This is the function I'd like to vectorize
if type(xnow) == list:
xnow = np.array(xnow)
if type(ynow) == list:
ynow = np.array(ynow)
ynow = ynow.T #reshape(ynow.shape[0],1)
sigma = np.linalg.inv(np.dot(xnow.T,xnow)) ###This is the line that produces an error###
xy = np.dot(xnow.T,ynow)
mu = np.dot(sigma, xy) # this is just (x'x)inv x'y
return np.random.multivariate_normal(np.array(mu).flatten(), sigma)
vecparams = np.vectorize(sample_params)
def get_mu(xnow, bnow): # getting the updated mean to draw the latent ys
if type(xnow) == list:
xnow = np.array(xnow)
if type(bnow) == list:
bnow = np.array(bnow)
mu = np.dot(xnow,bnow.T)
mu = np.matrix(mu)
return mu
def sample_y(mu, ynow): # generate latent y matrix
if ynow==1:
a, b = (0 - mu),(10000-mu)
else:
a, b = (-10000 - mu),(0-mu)
return truncnorm.rvs(a,b)
vector_sample = np.vectorize(sample_y) # I'd like to be able to do something like this
### Here's the MCMC loop with the internal loop over rows(bills)
for i in range(burn+runs):
this_beta = []
this_x = []
this_y = []
for j in range(num_bills): #I'd like to get rid of this loop
ex = zip(x_ones, x)
newbeta = sample_params(ex, latent_y[j])
this_beta.append(newbeta)
#ex = np.array(zip(x_ones, x))
#this_beta = vecparams(ex, latent_y[:,]) # and call the vectorized function here
betavec.append(this_beta)
#Note, I can vectorize the latent outputs easily enough here
mean = get_mu(ex, betavec[-1])
latent_y = np.matrix(vector_sample(mean, np.matrix(y).T).T.reshape(latent_y.shape[0], latent_y.shape[1]))
### Now a bit of code to check to see if I've recovered what I want ###
test_beta = [zip(*(z)) for z in betavec[burn:]]
test_a = np.array([z[0] for z in test_beta])
test_b = np.array([z[1] for z in test_beta])
amean = test_a.sum(axis = 0)/float(runs)
bmean = test_b.sum(axis = 0)/float(runs)
print 'a mean'
print np.corrcoef([amean, np.array(a)])
print
print 'b mean'
print np.corrcoef([bmean, np.array(b)])
If I comment out the loop and use the commented out lines just above, I get the following error at the line I indicated earlier (the one that defines sigma):
LinAlgError: 0-dimensional array given. Array must be at least two-dimensional