statsmodels.tsa.api-0.9.0 ZeroDivisionError: division by zero - python

Code:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels as sm
from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt
print('statsmodels.__version__', sm.__version__)
df = pd.DataFrame([
[547.184518, 256.990247, 237.709566, 465.214791, 1479.401737],
], columns=['point_4', 'point_5', 'point_6', 'point_7', 'point_8'], index=['000001.XSHE'])
fit2 = SimpleExpSmoothing(df.loc['000001.XSHE']).fit(smoothing_level=0.6, optimized=False)
fcast1 = fit2.forecast(1)
Error:
statsmodels.__version__ 0.9.0
/opt/conda/lib/python3.6/site-packages/statsmodels/tsa/base/tsa_model.py:221: ValueWarning: An unsupported index was provided and will be ignored when e.g. forecasting.
' ignored when e.g. forecasting.', ValueWarning)
---------------------------------------------------------------------------
ZeroDivisionError Traceback (most recent call last)
<ipython-input-4-a742c2be4f46> in <module>
12 ], columns=['point_4', 'point_5', 'point_6', 'point_7', 'point_8'], index=['000001.XSHE'])
13
---> 14 fit2 = SimpleExpSmoothing(df.loc['000001.XSHE']).fit(smoothing_level=0.6, optimized=False)
15 fcast1 = fit2.forecast(1)
/opt/conda/lib/python3.6/site-packages/statsmodels/tsa/holtwinters.py in fit(self, smoothing_level, optimized)
814 [1] Hyndman, Rob J., and George Athanasopoulos. Forecasting: principles and practice. OTexts, 2014.
815 """
--> 816 return super(SimpleExpSmoothing, self).fit(smoothing_level=smoothing_level, optimized=optimized)
817
818
/opt/conda/lib/python3.6/site-packages/statsmodels/tsa/holtwinters.py in fit(self, smoothing_level, smoothing_slope, smoothing_seasonal, damping_slope, optimized, use_boxcox, remove_bias, use_basinhopping)
592 smoothing_seasonal=gamma, damping_slope=phi,
593 initial_level=l0, initial_slope=b0, initial_seasons=s0,
--> 594 use_boxcox=use_boxcox, lamda=lamda, remove_bias=remove_bias)
595 hwfit._results.mle_retvals = opt
596 return hwfit
/opt/conda/lib/python3.6/site-packages/statsmodels/tsa/holtwinters.py in _predict(self, h, smoothing_level, smoothing_slope, smoothing_seasonal, initial_level, initial_slope, damping_slope, initial_seasons, use_boxcox, lamda, remove_bias)
733 k = m * seasoning + 2 * trending + 2 + 1 * damped
734 aic = self.nobs * np.log(sse / self.nobs) + (k) * 2
--> 735 aicc = aic + (2 * (k + 2) * (k + 3)) / (self.nobs - k - 3)
736 bic = self.nobs * np.log(sse / self.nobs) + (k) * np.log(self.nobs)
737 resid = data - fitted[:-h - 1]
ZeroDivisionError: division by zero
SimpleExpSmoothing is used for forcasting time series based data, my input data is valid, it should output forecast data without error.
If I remove point_8 column from the DataFrame, then the error disappears.
Do you know why it throws ZeroDivisionError?

Related

TypeError: expected dtype object, got 'numpy.dtype[float32] when running StatsForecast

I just followed exactly same as 'Forecast with ARIMA and ETS' (https://nixtla.github.io/statsforecast/examples/getting_started_with_auto_arima_and_ets.html). But somehow my Jupyter Notebook (Anaconda) showed the following error.
"TypeError: expected dtype object, got 'numpy.dtype[float32]'"
Why do I get the error? Can you give me solutions for this?
Thanks in advance.
import numpy as np
import pandas as pd
from IPython.display import display, Markdown
import matplotlib.pyplot as plt
from statsforecast import StatsForecast
from statsforecast.models import AutoARIMA, ETS, Naive #Imports the models you will use
from statsforecast.utils import AirPassengersDF
Y_df = AirPassengersDF
Y_df.head()
unique_id ds y
0 1.0 1949-01-31 112.0
1 1.0 1949-02-28 118.0
2 1.0 1949-03-31 132.0
3 1.0 1949-04-30 129.0
4 1.0 1949-05-31 121.0
Y_train_df = Y_df[Y_df.ds<='1959-12-31'] # 132 monthly observations for train
Y_test_df = Y_df[Y_df.ds>'1959-12-31'] # 12 monthly observations for test
season_length = 12 # Monthly data
horizon = len(Y_test_df) # Predict the lenght of the test df
# Include the models you imported
models = [
AutoARIMA(season_length=season_length),
ETS(season_length=season_length),
Naive()
]
# Instansiate the StatsForecast class as sf
sf = StatsForecast(
df=Y_train_df,
models=models,
freq='M',
n_jobs=-1
)
# Forecast for the defined horizon
Y_hat_df = sf.forecast(horizon)
Y_hat_df.head()
And then, I just hit the run. But I got the following error.
TypeError Traceback (most recent call last)
<ipython-input-10-a9ee1bd8ce20> in <module>
18
19 # Forecast for the defined horizon
---> 20 Y_hat_df = sf.forecast(horizon)
21
22 Y_hat_df.head()
~\spyder\lib\site-packages\statsforecast\core.py in forecast(self, h, df, X_df, level, fitted, sort_df)
668 X, level = self._parse_X_level(h=h, X=X_df, level=level)
669 if self.n_jobs == 1:
--> 670 res_fcsts = self.ga.forecast(
671 models=self.models,
672 h=h,
~\spyder\lib\site-packages\statsforecast\core.py in forecast(self, models, h, fallback_model, fitted, X, level, verbose)
197 )
198 else:
--> 199 raise error
200 cols_m = [
201 key
~\spyder\lib\site-packages\statsforecast\core.py in forecast(self, models, h, fallback_model, fitted, X, level, verbose)
183 kwargs["level"] = level
184 try:
--> 185 res_i = model.forecast(
186 h=h, y=y_train, X=X_train, X_future=X_f, fitted=fitted, **kwargs
187 )
~\spyder\lib\site-packages\statsforecast\models.py in forecast(self, y, h, X, X_future, level, fitted)
306 """
307 with np.errstate(invalid="ignore"):
--> 308 mod = auto_arima_f(
309 x=y,
310 d=self.d,
~\spyder\lib\site-packages\statsforecast\arima.py in auto_arima_f(x, d, D, max_p, max_q, max_P, max_Q, max_order, max_d, max_D, start_p, start_q, start_P, start_Q, stationary, seasonal, ic, stepwise, nmodels, trace, approximation, method, truncate, xreg, test, test_kwargs, seasonal_test, seasonal_test_kwargs, allowdrift, allowmean, blambda, biasadj, parallel, num_cores, period)
1785 D = 0
1786 elif D is None:
-> 1787 D = nsdiffs(
1788 xx, period=m, test=seasonal_test, max_D=max_D, **seasonal_test_kwargs
1789 )
~\spyder\lib\site-packages\statsforecast\arima.py in nsdiffs(x, test, alpha, period, max_D, **kwargs)
1608 while dodiff and D < max_D:
1609 D += 1
-> 1610 x = diff(x, period, 1)
1611 if is_constant(x):
1612 return D
~\spyder\lib\site-packages\statsforecast\arima.py in diff(x, lag, differences)
583 def diff(x, lag, differences):
584 if x.ndim == 1:
--> 585 y = diff1d(x, lag, differences)
586 nan_mask = np.isnan(y)
587 elif x.ndim == 2:
TypeError: expected dtype object, got 'numpy.dtype[float32]'
I am supposed to get below.
unique_id ds AutoARIMA ETS Naive
1.0 1960-01-31 424.160156 406.651276 405.0
1.0 1960-02-29 407.081696 401.732910 405.0
1.0 1960-03-31 470.860535 456.289642 405.0
1.0 1960-04-30 460.913605 440.870514 405.0
1.0 1960-05-31 484.900879 440.333923 405.0
Well, somehow I found a solution for this error. I simply updated 'numba' version from 0.50.1 to 0.55.0. Then, all went thru successfully. It looks like the 'StatsForecast' needs an updated version of 0.55.0 of 'numba' and 0.38.0 of llvmlite. Thanks.
conda install -c numba numba=0.55.0
conda install -c numba llvmlite=0.38.0

Strange behaviour of raise Sympy's rsolve with fractional coefficients

I have the following issue with Sympy when trying to solve difference (recursive) equations. When the free terms are integers, everything is OK, e.g.
y = sym.Function('y')
t = sym.symbols('t',integer=True)
f = y(t)- 1/3*y(t-1) - 9
eq_sol = sym.rsolve(f,y(t))
eq_sol
But if a coefficient is a decimal or a fraction, such as here (9 changes to 9.1),
y = sym.Function('y')
t = sym.symbols('t',integer=True)
f = y(t)- 1/3*y(t-1) - 9.1
eq_sol = sym.rsolve(f,y(t))
eq_sol
an error appears:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_27564/4249747763.py in <module>
2 t = sym.symbols('t',integer=True)
3 f = y(t)- 1/3*y(t-1) - 9.1
----> 4 eq_sol = sym.rsolve(f,y(t))
5 eq_sol
~\Anaconda3\lib\site-packages\sympy\solvers\recurr.py in rsolve(f, y, init)
741 h_part[int(result[k])].append(coeff)
742 continue
--> 743 raise ValueError(
744 "'%s(%s + k)' expected, got '%s'" % (y.func, n, h))
745 for k in h_part:
ValueError: 'y(t + k)' expected, got '9.10000000000000'
I would be grateful to those who could provide a hint.
Thank you very much, Oscar Benjamin! That did the trick:
y = sym.Function('y')
t = sym.symbols('t',integer=True)
f = y(t)- 1/3*y(t-1) - Fraction(9,2) # different number here, not 9.1
eq_sol = sym.rsolve(f,y(t))
eq_sol

Can't differentiate wrt numpy arrays of dtype int64?

I am a newbie to numpy. Today when I use it to work with linear regression, it shows as below:
KeyError Traceback (most recent call
last)
~/anaconda3/lib/python3.6/site-packages/autograd/numpy/numpy_extra.py
in new_array_node(value, tapes)
84 try:
---> 85 return array_dtype_mappings[value.dtype](value, tapes)
86 except KeyError:
KeyError: dtype('int64')
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call
last)
<ipython-input-4-aebe8f7987b0> in <module>()
24 return cost/float(np.size(y))
25
---> 26 weight_h, cost_h = gradient_descent(least_squares, alpha,
max_its, w)
27
28 # a)
<ipython-input-2-1b74c4f818f4> in gradient_descent(g, alpha, max_its,
w)
12 for k in range(max_its):
13 # evaluate the gradient
---> 14 grad_eval = gradient(w)
15
16 # take gradient descent step
~/anaconda3/lib/python3.6/site-packages/autograd/core.py in
gradfun(*args, **kwargs)
19 #attach_name_and_doc(fun, argnum, 'Gradient')
20 def gradfun(*args,**kwargs):
---> 21 return
backward_pass(*forward_pass(fun,args,kwargs,argnum))
22 return gradfun
23
~/anaconda3/lib/python3.6/site-packages/autograd/core.py in
forward_pass(fun, args, kwargs, argnum)
57 tape = CalculationTape()
58 arg_wrt = args[argnum]
---> 59 start_node = new_node(safe_type(getval(arg_wrt)),
[tape])
60 args = list(args)
61 args[argnum] = merge_tapes(start_node, arg_wrt)
~/anaconda3/lib/python3.6/site-packages/autograd/core.py in
new_node(value, tapes)
185 def new_node(value, tapes=[]):
186 try:
--> 187 return Node.type_mappings[type(value)](value, tapes)
188 except KeyError:
189 return NoDerivativeNode(value, tapes)
~/anaconda3/lib/python3.6/site-packages/autograd/numpy/numpy_extra.py
in new_array_node(value, tapes)
85 return array_dtype_mappings[value.dtype](value, tapes)
86 except KeyError:
---> 87 raise TypeError("Can't differentiate wrt numpy arrays
of dtype {0}".format(value.dtype))
88 Node.type_mappings[anp.ndarray] = new_array_node
89
TypeError: Can't differentiate wrt numpy arrays of dtype int64
I really have no idea about what is happened. I guess it might be related to the structure of array in numpy. Or did I forget to download any packages? Below is my original codes.
# import statements
datapath = 'datasets/'
from autograd import numpy as np
# import automatic differentiator to compute gradient module
from autograd import grad
# gradient descent function
def gradient_descent(g,alpha,max_its,w):
# compute gradient module using autograd
gradient = grad(g)
# run the gradient descent loop
weight_history = [w] # weight history container
cost_history = [g(w)] # cost function history container
for k in range(max_its):
# evaluate the gradient
grad_eval = gradient(w)
# take gradient descent step
w = w - alpha*grad_eval
# record weight and cost
weight_history.append(w)
cost_history.append(g(w))
return weight_history,cost_history
# load in dataset
csvname = datapath + 'kleibers_law_data.csv'
data = np.loadtxt(csvname,delimiter=',')
# get input and output of dataset
x = data[:-1,:]
y = data[-1:,:]
x = np.log(x)
y = np.log(y)
#Data Initiation
alpha = 0.01
max_its = 1000
w = np.array([0,0])
#linear model
def model(x, w):
a = w[0] + np.dot(x.T, w[1:])
return a.T
def least_squares(w):
cost = np.sum((model(x,w)-y)**2)
return cost/float(np.size(y))
weight_h, cost_h = gradient_descent(least_squares, alpha, max_its, w)
# a)
k = np.linspace(-5.5, 7.5, 250)
y = weight_h[max_its][0] + k*weight_h[max_its][1]
plt.figure()
plt.plot(x, y, label='Linear Line', color='g')
plt.xlabel('log of mass')
plt.ylabel('log of metabolic rate')
plt.title("Answer Of a")
plt.legend()
plt.show()
# b)
w0 = weight_h[max_its][0]
w1 = weight_h[max_its][1]
print("Nonlinear relationship between the body mass x and the metabolic
rate y is " /
+ str(w0) + " + " + "log(xp)" + str(w1) + " = " + "log(yp)")
# c)
x2 = np.log(10)
Kj = np.exp(w0 + w1*x2)*1000/4.18
print("It needs " + str(Kj) + " calories")
Could someone help me to figure it out? Thanks a lot.
Here's the important parts of your error:
---> 14 grad_eval = gradient(w)
...
Type Error: Can't differentiate wrt numpy arrays of dtype int64
Your gradient function is saying it doesn't like to differentiate arrays of ints, which makes some sense, since it probably wants more precision than an int can give. You probably need them to be doubles or floats. For a simple solution to this, I believe you can just change your initializer from:
w = np.array([0,0])
which is going to automatically cast those 0s as ints, to:
w = np.array([0.0,0.0])
Those decimals after the 0 will let it know you want floats. There's other ways to go about telling it what kind of array you want (https://docs.scipy.org/doc/numpy-1.15.1/reference/generated/numpy.array.html), but this is a simple way.

Python rpy2 - nls regression RRuntimeError

I am trying to do some nls regression using R within Python. I am getting stuck with a RRuntimeError and am getting to a point where I am way outside my expertise and have struggled for a few days to get it to work so would appreciate some help.
This is my csv of data:
http://www.sharecsv.com/s/4cdd4f832b606d6616260f9dc0eedf38/ratedata.csv
This is my code:
import pandas as pd
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
pandas2ri.activate()
dfData = pd.read_csv('C:\\Users\\nick\\Desktop\\ratedata.csv')
rdf = pandas2ri.py2ri(dfData)
a = 0.5
b = 1.1
count = rdf.rx(True, 'Trials')
rates = rdf.rx(True, 'Successes')
base = importr('base', robject_translations={'with': '_with'})
stats = importr('stats', robject_translations={'format_perc': '_format_perc'})
my_formula = stats.as_formula('rates ~ 1-(1/(10^(a * count ^ (b-1))))')
d = ro.ListVector({'a': a, 'b': b})
fit = stats.nls(my_formula, weights=count, start=d)
Everything is compiling apart from:
fit = stats.nls(my_formula, weights=count, start=d)
I am getting the following traceback:
---------------------------------------------------------------------------
RRuntimeError Traceback (most recent call last)
<ipython-input-12-3f7fcd7d7851> in <module>()
6 d = ro.ListVector({'a': a, 'b': b})
7
----> 8 fit = stats.nls(my_formula, weights=count, start=d)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\rpy2\robjects\functions.py in __call__(self, *args, **kwargs)
176 v = kwargs.pop(k)
177 kwargs[r_k] = v
--> 178 return super(SignatureTranslatedFunction, self).__call__(*args, **kwargs)
179
180 pattern_link = re.compile(r'\\link\{(.+?)\}')
~\AppData\Local\Continuum\anaconda3\lib\site-packages\rpy2\robjects\functions.py in __call__(self, *args, **kwargs)
104 for k, v in kwargs.items():
105 new_kwargs[k] = conversion.py2ri(v)
--> 106 res = super(Function, self).__call__(*new_args, **new_kwargs)
107 res = conversion.ri2ro(res)
108 return res
RRuntimeError: Error in (function (formula, data = parent.frame(), start, control = nls.control(), :
parameters without starting value in 'data': rates, count
I would be eternally thankful if anyone can see where I am going wrong, or can offer advice. All I want is the two numbers from that formula back in Python so I can use those to construct some confidence intervals.
Thank you
Consider incorporating all your formula variables into a single dataframe and use the data argument. The as_formula call looks in the R environment but rates and count are in the Python scope. Hence, contain all items in same object. Then run your nls with either the Pandas dataframe or R dataframe:
import pandas as pd
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
base = importr('base', robject_translations={'with': '_with'})
stats = importr('stats', robject_translations={'format_perc': '_format_perc'})
a = 0.05
b = 1.1
d = ro.ListVector({'a': a, 'b': b})
dfData = pd.read_csv('Input.csv')
dfData['count'] = dfData['Trials'].astype('float')
dfData['rates'] = dfData['Successes'] / dfData['Trials']
dfData['a'] = a
dfData['b'] = b
pandas2ri.activate()
rdf = pandas2ri.py2ri(dfData)
my_formula = stats.as_formula('rates ~ 1-(1/(10^(a * count ^ (b-1))))')
# WITH PANDAS DATAFRAME
fit = stats.nls(formula=my_formula, data=dfData, weights=dfData['count'], start=d)
print(fit)
# WITH R DATAFRAME
fit = stats.nls(formula=my_formula, data=rdf, weights=rdf.rx(True, 'count'), start=d)
print(fit)
Alternatively, you can use robjects.globalenv and not use data argument:
ro.globalenv['rates'] = dfData['rates']
ro.globalenv['count'] = dfData['count']
ro.globalenv['a'] = dfData['a']
ro.globalenv['b'] = dfData['b']
fit = stats.nls(formula=my_formula, weights=dfData['count'], start=d)
print(fit)
# Nonlinear regression model
# model: rates ~ 1 - (1/(10^(a * count^(b - 1))))
# data: parent.frame()
# a b
# 0.01043 1.24943
# weighted residual sum-of-squares: 14.37
# Number of iterations to convergence: 6
# Achieved convergence tolerance: 9.793e-07
# To return parameters
num = fit.rx('m')[0].names.index('getPars')
obj = fit.rx('m')[0][num]()
print(obj[0])
# 0.010425686223717435
print(obj[1])
# 1.2494303314553932
Equivalently in R:
dfData <- read.csv('Input.csv')
a <- .05
b <- 1.1
d <- list(a=a, b=b)
dfData$count <- dfData$Trials
dfData$rates <- dfData$Successes / dfData$Trials
dfData$a <- a
dfData$b <- b
my_formula <- stats::as.formula("rates ~ 1-(1/(10^(a * count ^ (b-1))))")
fit <- stats::nls(my_formula, data=dfData, weights=dfData$count, start=d)
print(fit)
# Nonlinear regression model
# model: rates ~ 1 - (1/(10^(a * count^(b - 1))))
# data: dfData
# a b
# 0.01043 1.24943
# weighted residual sum-of-squares: 14.37
# Number of iterations to convergence: 6
# Achieved convergence tolerance: 9.793e-07
# To return parameters
fit$m$getPars()['a']
# 0.01042569
fit$m$getPars()['b']
# 1.24943

Converting a mixture of gaussians to PyMC3

I am trying to learn PyMC3, I want to make a simple mixture of gaussians example. I found this example and want to convert it to pymc3 but I'm currently getting an error when trying to plot the traceplot.
n1 = 500
n2 = 200
n = n1+n2
mean1 = 21.8
mean2 = 42.0
precision = 0.1
sigma = np.sqrt(1 / precision)
# precision = 1/sigma^2
print "sigma1: %s" % sigma1
print "sigma2: %s" % sigma2
data1 = np.random.normal(mean1,sigma,n1)
data2 = np.random.normal(mean2,sigma,n2)
data = np.concatenate([data1 , data2])
#np.random.shuffle(data)
fig = plt.figure(figsize=(7, 7))
ax = fig.add_subplot(111, xlabel='x', ylabel='y', title='mixture of 2 guassians')
ax.plot(range(0,n1+n2), data, 'x', label='data')
plt.legend(loc=0)
with pm.Model() as model:
#priors
p = pm.Uniform( "p", 0 , 1) #this is the fraction that come from mean1 vs mean2
ber = pm.Bernoulli( "ber", p = p) # produces 1 with proportion p.
precision = pm.Gamma('precision', alpha=0.1, beta=0.1)
mean1 = pm.Normal( "mean1", 0, 0.01 ) #better to use normals versus Uniforms (unless you are certain the value is truncated at 0 and 200
mean2 = pm.Normal( "mean2", 0, 0.01 )
mean = pm.Deterministic('mean', ber*mean1 + (1-ber)*mean2)
process = pm.Normal('process', mu=mean, tau=precision, observed=data)
# inference
step = pm.Metropolis()
trace = pm.sample(10000, step)
pm.traceplot(trace)
Error:
sigma1: 3.16227766017
sigma2: 1.69030850946
[-----------------100%-----------------] 10000 of 10000 complete in 4.4 sec
---------------------------------------------------------------------------
LinAlgError Traceback (most recent call last)
<ipython-input-10-eb728824de83> in <module>()
44 step = pm.Metropolis()
45 trace = pm.sample(10000, step)
---> 46 pm.traceplot(trace)
/usr/lib/python2.7/site-packages/pymc-3.0-py2.7.egg/pymc/plots.pyc in traceplot(trace, vars, figsize, lines, combined, grid)
70 ax[i, 0].set_xlim(mind - .5, maxd + .5)
71 else:
---> 72 kdeplot_op(ax[i, 0], d)
73 ax[i, 0].set_title(str(v))
74 ax[i, 0].grid(grid)
/usr/lib/python2.7/site-packages/pymc-3.0-py2.7.egg/pymc/plots.pyc in kdeplot_op(ax, data)
94 for i in range(data.shape[1]):
95 d = data[:, i]
---> 96 density = kde.gaussian_kde(d)
97 l = np.min(d)
98 u = np.max(d)
/usr/lib64/python2.7/site-packages/scipy/stats/kde.pyc in __init__(self, dataset, bw_method)
186
187 self.d, self.n = self.dataset.shape
--> 188 self.set_bandwidth(bw_method=bw_method)
189
190 def evaluate(self, points):
/usr/lib64/python2.7/site-packages/scipy/stats/kde.pyc in set_bandwidth(self, bw_method)
496 raise ValueError(msg)
497
--> 498 self._compute_covariance()
499
500 def _compute_covariance(self):
/usr/lib64/python2.7/site-packages/scipy/stats/kde.pyc in _compute_covariance(self)
507 self._data_covariance = atleast_2d(np.cov(self.dataset, rowvar=1,
508 bias=False))
--> 509 self._data_inv_cov = linalg.inv(self._data_covariance)
510
511 self.covariance = self._data_covariance * self.factor**2
/usr/lib64/python2.7/site-packages/scipy/linalg/basic.pyc in inv(a, overwrite_a, check_finite)
381 inv_a, info = getri(lu, piv, lwork=lwork, overwrite_lu=1)
382 if info > 0:
--> 383 raise LinAlgError("singular matrix")
384 if info < 0:
385 raise ValueError('illegal value in %d-th argument of internal '
LinAlgError: singular matrix
Thanks to Fonnesbeck for answering this on the github issue tracker:
https://github.com/pymc-devs/pymc3/issues/452
here is the updated code:
with pm.Model() as model:
#priors
p = pm.Uniform( "p", 0 , 1) #this is the fraction that come from mean1 vs mean2
ber = pm.Bernoulli( "ber", p = p, shape=len(data)) # produces 1 with proportion p.
sigma = pm.Uniform('sigma', 0, 100)
precision = sigma**-2
mean = pm.Normal( "mean", 0, 0.01, shape=2 )
mu = pm.Deterministic('mu', mean[ber])
process = pm.Normal('process', mu=mu, tau=precision, observed=data)
with model:
step1 = pm.Metropolis([p, sigma, mean])
step2 = pm.BinaryMetropolis([ber])
trace = pm.sample(10000, [step1, step2])
You need to use BinaryMetropolis when inferring a Bernoulli random variable
And an even simpler and quicker version is as follows:
with pm.Model() as model2:
p = pm.Beta( "p", 1., 1.)
means = pm.Uniform('mean', 15, 60, shape=2)
sigma = pm.Uniform('sigma', 0, 20, testval=5)
process = pm.NormalMixture('obs', tt.stack([p, 1-p]), means, sd=sigma, observed=data)
with model2:
step = pm.Metropolis()
trace = pm.sample(10000, step=step)
I know this issue is old, but I am trying differente examples of PyMC3 usages to get used to modeling in PyMC3. The answer as given above does not work in current version 1.0 of PyMC3 (It does not distringuish the two means correctly). The minimum changes I had to do in order to make it work were the following:
1)
# mean = pm.Normal("mean", 0, 0.01, shape=2 )
mean = pm.Uniform('mean', 15, 60, shape=2)
2)
# step2 = pm.BinaryMetropolis([ber])
step2 = pm.ElemwiseCategorical(vars=[ber], values=[0, 1])
Just in case anybody else is having a similar problem.

Categories

Resources