Negative binomial model cannot find starting position to sample - python

I am having difficulties running a PYMC3 model when the observed data is discrete. Oddly, if the observed data contains the value zero (0.), the model will run.
I've read in other posts that that suggest using
start = pm.find_MAP(fmin=scipy.optimize.fmin_powell) but that does not resolve the issue.
pymc3.__version__ = '3.0'
theano.__version__ = '0.7.0.dev-RELEASE'
numpy.__version__ = '1.8.0rc1'
Python 2.7.10
See iPython notebook
The code and error are below.
import pymc3 as pm
data = [6.0,12.0,12.0,46.0,5.0,11.0,11.0,39.0,4.0,10.0,25.0,11.0,8.0,5.0,10.0,2.0,30.0,21.0]
with pm.Model() as model:
alpha = pm.Uniform('alpha', lower=0, upper=100)
mu = pm.Uniform('mu', lower=0, upper=100)
y_pred = pm.NegativeBinomial('y_pred', mu=mu, alpha=alpha)
y_est = pm.NegativeBinomial('y_est',
mu=mu,
alpha=alpha,
observed=data)
start = pm.find_MAP()
step = pm.Metropolis()
trace = pm.sample(20000, step, start, progressbar=True)
The error I get is:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-5-b9f2264fccfc> in <module>()
14 observed=data)
15
---> 16 start = pm.find_MAP()
17
18 step = pm.Metropolis()
/Library/Python/2.7/site-packages/pymc3/tuning/starting.pyc in find_MAP(start, vars, fmin, return_raw, disp, model, *args, **kwargs)
79 if 'fprime' in getargspec(fmin).args:
80 r = fmin(logp_o, bij.map(
---> 81 start), fprime=grad_logp_o, disp=disp, *args, **kwargs)
82 else:
83 r = fmin(logp_o, bij.map(start), disp=disp, *args, **kwargs)
/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/scipy/optimize/optimize.pyc in fmin_bfgs(f, x0, fprime, args, gtol, norm, epsilon, maxiter, full_output, disp, retall, callback)
775 'return_all': retall}
776
--> 777 res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts)
778
779 if full_output:
/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/scipy/optimize/optimize.pyc in _minimize_bfgs(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options)
830 else:
831 grad_calls, myfprime = wrap_function(fprime, args)
--> 832 gfk = myfprime(x0)
833 k = 0
834 N = len(x0)
/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/scipy/optimize/optimize.pyc in function_wrapper(*wrapper_args)
279 def function_wrapper(*wrapper_args):
280 ncalls[0] += 1
--> 281 return function(*(wrapper_args + args))
282
283 return ncalls, function_wrapper
/Library/Python/2.7/site-packages/pymc3/tuning/starting.pyc in grad_logp_o(point)
74
75 def grad_logp_o(point):
---> 76 return nan_to_num(-dlogp(point))
77
78 # Check to see if minimization function actually uses the gradient
/Library/Python/2.7/site-packages/pymc3/blocking.pyc in __call__(self, x)
117
118 def __call__(self, x):
--> 119 return self.fa(self.fb(x))
/Library/Python/2.7/site-packages/pymc3/model.pyc in __call__(self, state)
397
398 def __call__(self, state):
--> 399 return self.f(**state)
400
401 class LoosePointFunc(object):
/Library/Python/2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
862 node=self.fn.nodes[self.fn.position_of_error],
863 thunk=thunk,
--> 864 storage_map=getattr(self.fn, 'storage_map', None))
865 else:
866 # old-style linkers raise their own exceptions
/Library/Python/2.7/site-packages/theano/gof/link.pyc in raise_with_op(node, thunk, exc_info, storage_map)
312 # extra long error message in that case.
313 pass
--> 314 reraise(exc_type, exc_value, exc_trace)
315
316
/Library/Python/2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
850 t0_fn = time.time()
851 try:
--> 852 outputs = self.fn()
853 except Exception:
854 if hasattr(self.fn, 'position_of_error'):
ValueError: Input dimension mis-match. (input[0].shape[0] = 1, input[4].shape[0] = 18)
Apply node that caused the error: Elemwise{Composite{Switch(i0, i1, Switch(i2, Switch(i3, i1, i4), i1))}}(TensorConstant{(1,) of 0}, TensorConstant{(1,) of 0}, Elemwise{mul,no_inplace}.0, InplaceDimShuffle{x}.0, TensorConstant{[ 6. 12... 30. 21.]})
Toposort index: 33
Inputs types: [TensorType(int8, vector), TensorType(int8, (True,)), TensorType(int8, (True,)), TensorType(int8, (True,)), TensorType(float64, vector)]
Inputs shapes: [(1,), (1,), (1,), (1,), (18,)]
Inputs strides: [(1,), (1,), (1,), (1,), (8,)]
Inputs values: [array([0], dtype=int8), array([0], dtype=int8), array([1], dtype=int8), array([0], dtype=int8), 'not shown']
Outputs clients: [[Sum{acc_dtype=float64}(Elemwise{Composite{Switch(i0, i1, Switch(i2, Switch(i3, i1, i4), i1))}}.0)]]
HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.

Related

Trackback Error with Scipy Optimize Jac: IndexError: index 1 is out of bounds for axis 0 with size 1

I'm taking the Coursera Machine Learning course by Andrew Ng. I'm working on this exercise to optimize the cost function for the logistic regression. I was able to do it correctly on Octave but I got errors when implementing in Python.
Here's my code:
# Define Sigmoid Function
def sig(z):
return 1/(1+np.exp(-z))
# Initialize Theta
theta = np.zeros((X.shape[1], 1))
# Find Gradient
def find_cost_grad(theta, X, y):
h = sig(X.dot(theta))
m = X.shape[0]
theta_s = theta[1:]
J = (-1/m) * (y.T.dot(np.log(h)) + (1-y).T.dot(np.log(1-h))) + lamb/(2*m) * np.sum(theta_s**2)
grad1 = (1/m)*((h-y).T.dot(X)).flatten()
grad2 = (theta*lamb/m).flatten()
grad2[0] = 0
grad = grad1+grad2
return J, grad
find_cost_grad(theta, X, y)
## Use BFGS and L-BFGS optimization
from scipy.optimize import minimize
bfgs = minimize(find_cost, theta, (X, y), method='L-BFGS-B', jac=True)
final_theta2 = bfgs['x']
bfgs
I got the following error:
621 **options)
622 elif meth == 'l-bfgs-b':
--> 623 return _minimize_lbfgsb(fun, x0, args, jac, bounds,
624 callback=callback, **options)
625 elif meth == 'tnc':
~\anaconda3\lib\site-packages\scipy\optimize\lbfgsb.py in _minimize_lbfgsb(fun, x0, args, jac, bounds, disp, maxcor, ftol, gtol, eps, maxfun, maxiter, iprint, callback, maxls, finite_diff_rel_step, **unknown_options)
304 iprint = disp
305
--> 306 sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
307 bounds=new_bounds,
308 finite_diff_rel_step=finite_diff_rel_step)
~\anaconda3\lib\site-packages\scipy\optimize\optimize.py in _prepare_scalar_function(fun, x0, jac, args, bounds, epsilon, finite_diff_rel_step, hess)
259 # ScalarFunction caches. Reuse of fun(x) during grad
260 # calculation reduces overall function evaluations.
--> 261 sf = ScalarFunction(fun, x0, args, grad, hess,
262 finite_diff_rel_step, bounds, epsilon=epsilon)
263
~\anaconda3\lib\site-packages\scipy\optimize\_differentiable_functions.py in __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step, finite_diff_bounds, epsilon)
138
139 self._update_fun_impl = update_fun
--> 140 self._update_fun()
141
142 # Gradient evaluation
~\anaconda3\lib\site-packages\scipy\optimize\_differentiable_functions.py in _update_fun(self)
231 def _update_fun(self):
232 if not self.f_updated:
--> 233 self._update_fun_impl()
234 self.f_updated = True
235
~\anaconda3\lib\site-packages\scipy\optimize\_differentiable_functions.py in update_fun()
135
136 def update_fun():
--> 137 self.f = fun_wrapped(self.x)
138
139 self._update_fun_impl = update_fun
~\anaconda3\lib\site-packages\scipy\optimize\_differentiable_functions.py in fun_wrapped(x)
132 # Overwriting results in undefined behaviour because
133 # fun(self.x) will change self.x, with the two no longer linked.
--> 134 return fun(np.copy(x), *args)
135
136 def update_fun():
~\anaconda3\lib\site-packages\scipy\optimize\optimize.py in __call__(self, x, *args)
72 def __call__(self, x, *args):
73 """ returns the the function value """
---> 74 self._compute_if_needed(x, *args)
75 return self._value
76
~\anaconda3\lib\site-packages\scipy\optimize\optimize.py in _compute_if_needed(self, x, *args)
67 self.x = np.asarray(x).copy()
68 fg = self.fun(x, *args)
---> 69 self.jac = fg[1]
70 self._value = fg[0]
71
IndexError: index 1 is out of bounds for axis 0 with size 1
I checked that the gradient returned is a row vector (1x28). The find_cost_grad function runs fine when I ran it itself.
However, it caused errors when I ran it within the minimize function. Been looking for a solution but can't find it anywhere.
Please help.
Thanks!

Minimise a function in Python

Could you help on how to minimize a function in Python? Particularly, it's the logistic function. Below is my cost function, I got it correctly because I checked the answer.
def g(z):
h = 1 / (1 + numpy.exp(-z))
return h
theta1_ravel = theta1.ravel().T
theta2_ravel = theta2.ravel().T
theta_conca = numpy.concatenate((theta1_ravel, theta2_ravel))
lambada = 1
def computecost(theta_conca):
theta1 = numpy.reshape(theta_conca[0:10025], (25, 401))
theta2 = numpy.reshape(theta_conca[10025:10285], (10, 26))
a1 = X_1
a2 = g(X_1 * theta1.T) # (5000, 25)
a2 = numpy.column_stack((numpy.ones((m, 1)), a2))
a3 = g(a2 * theta2.T) # (5000, 10)
reg_term = (lambada / (2*m)) * (numpy.sum(numpy.power(theta1[:, 1:], 2)) + numpy.sum(numpy.power(theta2[:, 1:], 2)))
J = -(1/m) * (numpy.sum(numpy.multiply(Y, numpy.log(a3)) + numpy.multiply((1 - Y), numpy.log(1 - a3)))) + reg_term
return J
J = computecost(theta_conca)
J
And then I try to minimize it, get theta by using "scipy.optimize.minimize" but it's not working.
theta_random = numpy.random.rand(m, 1)
theta_random
scipy.optimize.minimize(computecost, theta_random)
This is the error notification:
ValueError Traceback (most recent call last)
<ipython-input-36-05051c7a9e1f> in <module>
1 theta_random = numpy.random.rand(m, 1)
2 theta_random
----> 3 scipy.optimize.minimize(computecost, theta_random)
/usr/lib/python3/dist-packages/scipy/optimize/_minimize.py in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
592 return _minimize_cg(fun, x0, args, jac, callback, **options)
593 elif meth == 'bfgs':
--> 594 return _minimize_bfgs(fun, x0, args, jac, callback, **options)
595 elif meth == 'newton-cg':
596 return _minimize_newtoncg(fun, x0, args, jac, hess, hessp, callback,
/usr/lib/python3/dist-packages/scipy/optimize/optimize.py in _minimize_bfgs(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options)
996 else:
997 grad_calls, myfprime = wrap_function(fprime, args)
--> 998 gfk = myfprime(x0)
999 k = 0
1000 N = len(x0)
/usr/lib/python3/dist-packages/scipy/optimize/optimize.py in function_wrapper(*wrapper_args)
325 def function_wrapper(*wrapper_args):
326 ncalls[0] += 1
--> 327 return function(*(wrapper_args + args))
328
329 return ncalls, function_wrapper
/usr/lib/python3/dist-packages/scipy/optimize/optimize.py in approx_fprime(xk, f, epsilon, *args)
755
756 """
--> 757 return _approx_fprime_helper(xk, f, epsilon, args=args)
758
759
/usr/lib/python3/dist-packages/scipy/optimize/optimize.py in _approx_fprime_helper(xk, f, epsilon, args, f0)
689 """
690 if f0 is None:
--> 691 f0 = f(*((xk,) + args))
692 grad = numpy.zeros((len(xk),), float)
693 ei = numpy.zeros((len(xk),), float)
/usr/lib/python3/dist-packages/scipy/optimize/optimize.py in function_wrapper(*wrapper_args)
325 def function_wrapper(*wrapper_args):
326 ncalls[0] += 1
--> 327 return function(*(wrapper_args + args))
328
329 return ncalls, function_wrapper
<ipython-input-33-3761d25f71af> in computecost(theta_conca)
14 def computecost(theta_conca):
15
---> 16 theta1 = numpy.reshape(theta_conca[0:10025], (25, 401))
17 theta2 = numpy.reshape(theta_conca[10025:10285], (10, 26))
18
<__array_function__ internals> in reshape(*args, **kwargs)
/usr/lib/python3/dist-packages/numpy/core/fromnumeric.py in reshape(a, newshape, order)
299 [5, 6]])
300 """
--> 301 return _wrapfunc(a, 'reshape', newshape, order=order)
302
303
/usr/lib/python3/dist-packages/numpy/core/fromnumeric.py in _wrapfunc(obj, method, *args, **kwds)
59
60 try:
---> 61 return bound(*args, **kwds)
62 except TypeError:
63 # A TypeError occurs if the object does have such a method in its
ValueError: cannot reshape array of size 5000 into shape (25,401)
How can I implement it? I'm sorry that the code looks like a mess but I think I shouldn't cut off something. I'll give you the entire code and data if you need to try on your device.
Thank you.

statsmodels GLM Negative Binomial autoregressive covariance type

I am trying to utilize a statsmodels GLM with family of Negative Binomial to model time series data. I have gotten the model to work fine with the default covariance type of nonrobust, but I would like to use the cov_type of hac_groupsum.
Here is the link to the options for cov_type:
https://www.statsmodels.org/stable/generated/statsmodels.regression.linear_model.OLSResults.get_robustcov_results.html
However I keep getting an error when I define the cov_kwds. I have tried using the index of X, the Time variable of X, and creating a list, array, and DataFrame of integers from 1 to 73 for the time keyword.
Here is the code that works
neg_bin = sm.GLM(y, X,family=sm.families.NegativeBinomial(alpha=aux_olsr_results.params[0])).fit()
print(neg_bin.summary())
and here is the code that gives me an error using a DataFrame of integers
time = np.array(range(1,74))
time = pd.DataFrame(time, columns = ['Time'])
neg_bin = sm.GLM(y, X,family=sm.families.NegativeBinomial(alpha=aux_olsr_results.params[0]
)).fit(cov_kwds = {'time' : time
,'maxlags': 5}
,cov_type = 'hac-groupsum')
print(neg_bin.summary())
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-107-cf6c22076bb1> in <module>
4 )).fit(cov_kwds = {'time' : time
5 ,'maxlags': 5}
----> 6 ,cov_type = 'hac-groupsum')
7 print(neg_bin.summary())
~\anaconda3\lib\site-packages\statsmodels\genmod\generalized_linear_model.py in fit(self, start_params, maxiter, method, tol, scale, cov_type, cov_kwds, use_t, full_output, disp, max_start_irls, **kwargs)
1025 return self._fit_irls(start_params=start_params, maxiter=maxiter,
1026 tol=tol, scale=scale, cov_type=cov_type,
-> 1027 cov_kwds=cov_kwds, use_t=use_t, **kwargs)
1028 else:
1029 self._optim_hessian = kwargs.get('optim_hessian')
~\anaconda3\lib\site-packages\statsmodels\genmod\generalized_linear_model.py in _fit_irls(self, start_params, maxiter, tol, scale, cov_type, cov_kwds, use_t, **kwargs)
1188 self.scale,
1189 cov_type=cov_type, cov_kwds=cov_kwds,
-> 1190 use_t=use_t)
1191
1192 glm_results.method = "IRLS"
~\anaconda3\lib\site-packages\statsmodels\genmod\generalized_linear_model.py in __init__(self, model, params, normalized_cov_params, scale, cov_type, cov_kwds, use_t)
1472 cov_kwds = {}
1473 get_robustcov_results(self, cov_type=cov_type, use_self=True,
-> 1474 use_t=use_t, **cov_kwds)
1475
1476 #cached_data
~\anaconda3\lib\site-packages\statsmodels\base\covtype.py in get_robustcov_results(self, cov_type, use_t, **kwds)
336 if adjust_df:
337 # need to find number of groups
--> 338 tt = (np.nonzero(time[1:] < time[:-1])[0] + 1)
339 self.n_groups = n_groups = len(tt) + 1
340 res.cov_params_default = sw.cov_nw_groupsum(self, maxlags, time,
~\anaconda3\lib\site-packages\pandas\core\ops\__init__.py in f(self, other)
773 if not self._indexed_same(other):
774 raise ValueError(
--> 775 "Can only compare identically-labeled DataFrame objects"
776 )
777 new_data = dispatch_to_series(self, other, op, str_rep)
ValueError: Can only compare identically-labeled DataFrame objects
here is the error I get when I replace time with X.index
---------------------------------------------------------------------------
UFuncTypeError Traceback (most recent call last)
<ipython-input-109-77dde8cd9c9d> in <module>
4 )).fit(cov_kwds = {'time' : X.index
5 ,'maxlags': 5}
----> 6 ,cov_type = 'hac-groupsum')
7 print(neg_bin.summary())
~\anaconda3\lib\site-packages\statsmodels\genmod\generalized_linear_model.py in fit(self, start_params, maxiter, method, tol, scale, cov_type, cov_kwds, use_t, full_output, disp, max_start_irls, **kwargs)
1025 return self._fit_irls(start_params=start_params, maxiter=maxiter,
1026 tol=tol, scale=scale, cov_type=cov_type,
-> 1027 cov_kwds=cov_kwds, use_t=use_t, **kwargs)
1028 else:
1029 self._optim_hessian = kwargs.get('optim_hessian')
~\anaconda3\lib\site-packages\statsmodels\genmod\generalized_linear_model.py in _fit_irls(self, start_params, maxiter, tol, scale, cov_type, cov_kwds, use_t, **kwargs)
1188 self.scale,
1189 cov_type=cov_type, cov_kwds=cov_kwds,
-> 1190 use_t=use_t)
1191
1192 glm_results.method = "IRLS"
~\anaconda3\lib\site-packages\statsmodels\genmod\generalized_linear_model.py in __init__(self, model, params, normalized_cov_params, scale, cov_type, cov_kwds, use_t)
1472 cov_kwds = {}
1473 get_robustcov_results(self, cov_type=cov_type, use_self=True,
-> 1474 use_t=use_t, **cov_kwds)
1475
1476 #cached_data
~\anaconda3\lib\site-packages\statsmodels\base\covtype.py in get_robustcov_results(self, cov_type, use_t, **kwds)
340 res.cov_params_default = sw.cov_nw_groupsum(self, maxlags, time,
341 weights_func=weights_func,
--> 342 use_correction=use_correction)
343 res.cov_kwds['description'] = descriptions['HAC-Groupsum']
344 else:
~\anaconda3\lib\site-packages\statsmodels\stats\sandwich_covariance.py in cov_nw_groupsum(results, nlags, time, weights_func, use_correction)
859 '''
860
--> 861 xu, hessian_inv = _get_sandwich_arrays(results)
862
863 #S_hac = S_nw_panel(xw, weights, groupidx)
~\anaconda3\lib\site-packages\statsmodels\stats\sandwich_covariance.py in _get_sandwich_arrays(results, cov_type)
238 elif hasattr(results.model, 'score_obs'):
239 xu = results.model.score_obs(results.params)
--> 240 hessian_inv = np.linalg.inv(results.model.hessian(results.params))
241 else:
242 xu = results.model.wexog * results.wresid[:, None]
~\anaconda3\lib\site-packages\statsmodels\genmod\generalized_linear_model.py in hessian(self, params, scale, observed)
579
580 factor = self.hessian_factor(params, scale=scale, observed=observed)
--> 581 np.multiply(self.exog.T, factor, out=tmp.T)
582 return -tmp.T.dot(self.exog)
583
UFuncTypeError: Cannot cast ufunc 'multiply' output from dtype('float64') to dtype('int64') with casting rule 'same_kind'

Python Symfit : bound issue in chained minimization

I am tackling an optimization problem, and to do so I chain 3 optimizers.
Symfit is imported as sft, and sympy as sy:
[EDIT] The code below is a Minimal example of my situation, producing the same error message.
k, a = sft.parameters('k, a') # parameters to be optimized
k.min = 0.01
k.max = 1
a.min, a.max = 0.01, 1
L = sft.Parameter('L', value = 5, fixed = True) #this parameter is known,
#therefore I don't wan't is to move
#variables
x = sft.Variable('x')
A = sft.Variable('A')
P = sft.Variable('P')
#model
model_dict = {
sy.Derivative(A, x): k * A - P**a/ L,
sy.Derivative(P, x): - k * (P**2)/L
}
odemodel = sft.ODEModel(model_dict, initial= {x : 0.,
A : 0,
P : 0
})
#some mock data ( inspired of tBuLi symfit doc)
x = np.linspace(0, 20, 40)
mock_data = odemodel(x=x, k=0.1, a = 0.08, L = 5)._asdict()
sigma_data = 0.5
np.random.seed(42)
for var in mock_data:
mock_data[var] += np.random.normal(0, sigma_data, size=len(x))
fit = sft.Fit(odemodel, x = x,
A = mock_data[A], P = mock_data[P],
minimizer = [DifferentialEvolution, LBFGSB, BasinHopping]) #DifferentialEvolution #BasinHopping
fit_result = fit.execute()
The following error message pops :
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-8-ea4a7a6e9a8e> in <module>
34 A = mock_data[A], P = mock_data[P],
35 minimizer = [DifferentialEvolution, LBFGSB, BasinHopping]) #DifferentialEvolution #BasinHopping
---> 36 fit_result = fit.execute()
C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\fit.py in execute(self, **minimize_options)
577 :return: FitResults instance
578 """
--> 579 minimizer_ans = self.minimizer.execute(**minimize_options)
580 minimizer_ans.covariance_matrix = self.covariance_matrix(
581 dict(zip(self.model.params, minimizer_ans._popt))
C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\minimizers.py in execute(self, **minimizer_kwargs)
270 for minimizer, kwargs in zip(self.minimizers, bound_arguments.arguments.values()):
271 minimizer.initial_guesses = next_guess
--> 272 ans = minimizer.execute(**kwargs)
273 next_guess = list(ans.params.values())
274 answers.append(ans)
C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\support.py in wrapped_func(*args, **kwargs)
421 else:
422 bound_args.arguments[param.name] = param.default
--> 423 return func(*bound_args.args, **bound_args.kwargs)
424 return wrapped_func
425
C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\minimizers.py in execute(self, **minimize_options)
408 if jacobian is None:
409 jacobian = self.wrapped_jacobian
--> 410 return super(ScipyGradientMinimize, self).execute(jacobian=jacobian, **minimize_options)
411
412 def scipy_constraints(self, constraints):
C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\minimizers.py in execute(self, **minimize_options)
428 def execute(self, **minimize_options):
429 return super(ScipyBoundedMinimizer, self).execute(bounds=self.bounds,
--> 430 **minimize_options)
431
432
C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\support.py in wrapped_func(*args, **kwargs)
421 else:
422 bound_args.arguments[param.name] = param.default
--> 423 return func(*bound_args.args, **bound_args.kwargs)
424 return wrapped_func
425
C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\minimizers.py in execute(self, bounds, jacobian, hessian, constraints, **minimize_options)
353 jac=jacobian,
354 hess=hessian,
--> 355 **minimize_options
356 )
357 return self._pack_output(ans)
C:\ProgramData\Anaconda3\lib\site-packages\scipy\optimize\_minimize.py in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
608 elif meth == 'l-bfgs-b':
609 return _minimize_lbfgsb(fun, x0, args, jac, bounds,
--> 610 callback=callback, **options)
611 elif meth == 'tnc':
612 return _minimize_tnc(fun, x0, args, jac, bounds, callback=callback,
C:\ProgramData\Anaconda3\lib\site-packages\scipy\optimize\lbfgsb.py in _minimize_lbfgsb(fun, x0, args, jac, bounds, disp, maxcor, ftol, gtol, eps, maxfun, maxiter, iprint, callback, maxls, **unknown_options)
275 bounds = [(None, None)] * n
276 if len(bounds) != n:
--> 277 raise ValueError('length of x0 != length of bounds')
278 # unbounded variables must use None, not +-inf, for optimizer to work properly
279 bounds = [(None if l == -np.inf else l, None if u == np.inf else u) for l, u in bounds]
ValueError: length of x0 != length of bounds
As mentioned in this (long) message it is the second minimizer lbfgsb which is causing troubles but I don't know at all how to overcome this.
In my full code, when I try to put only one minimizer, the program runs forever and I cannot find out why. This is why one minimizer only seems to be not enough and I want to chain them. I think this is due to the complexity of the problem : two coupled ODE with 7 parameters to optimize, and a initial guess performed with InteractiveGuess ( very great tool by the way).
Thanks by advance for your help !

Implementing logistic regression -- why does this not converge?

I am adapting existing implementations of logistic regression, but I can't figure out what I am doing wrong.
Here is my implementation:
from scipy.optimize import fmin_bfgs
import numpy as np
import pandas as pd
# With help from http://stackoverflow.com/questions/13794754/logistic-regression-using-scipy
# as well as https://bryantravissmith.com/2015/12/29/implementing-logistic-regression-from-scratch-part-2-python-code/
def sigma(features, weights):
"""returns sigma(<w,x>)"""
return 1 / (1 + np.exp(-features.dot(weights)))
def log_likelihood(weights, features, labels):
"""calculates -ln p(t|w)"""
s = sigma(features, weights)
#s += 1e-24 # pseudocount to prevent logs of 0
t = labels * np.log(s + 1e-24)
t2 = (1 - labels) * (np.log((1 - s) + 1e-24))
ll = (t + t2).sum()
print -ll
return -ll
def gradient_log_likelihood(weights, features, labels):
"""calculates the gradient (Jacobian) of the log likelihood"""
error = labels - sigma(features, weights)
grad = (error * features).sum(axis=0)
return grad.reshape(grad.shape[0], 1)
Here is a sample dataset:
labels = np.array([0, 1, 1]).reshape(3, 1)
df = pd.DataFrame.from_dict({'a': [1,2,3], 'b': [2,3,4], 'c': [6,7,8]})
n, m = df.shape
weights = np.zeros(m + 1).reshape(m + 1, 1) # zero vector of starting weights
# add the intercept column
features = np.ones((n, m + 1)) # make matrix with all 1's
features[:,1:] = df # replace the 1's in all columns after column 0 with actual data
If I run each of these methods individually on the beginning weight vector, they run. But once I try to optimize, I get a shape error:
optimized = fmin_bfgs(log_likelihood, x0=weights, args=(features, labels), gtol=1e-4, fprime=gradient_log_likelihood)
ValueError Traceback (most recent call last)
<ipython-input-26-34c3cde48ac4> in <module>()
----> 1 optimized = fmin_bfgs(log_likelihood, x0=weights, args=(features, labels), gtol=1e-4, fprime=gradient_log_likelihood)
/Users/ifiddes/anaconda/lib/python2.7/site-packages/scipy/optimize/optimize.pyc in fmin_bfgs(f, x0, fprime, args, gtol, norm, epsilon, maxiter, full_output, disp, retall, callback)
791 'return_all': retall}
792
--> 793 res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts)
794
795 if full_output:
/Users/ifiddes/anaconda/lib/python2.7/site-packages/scipy/optimize/optimize.pyc in _minimize_bfgs(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options)
845 else:
846 grad_calls, myfprime = wrap_function(fprime, args)
--> 847 gfk = myfprime(x0)
848 k = 0
849 N = len(x0)
/Users/ifiddes/anaconda/lib/python2.7/site-packages/scipy/optimize/optimize.pyc in function_wrapper(*wrapper_args)
287 def function_wrapper(*wrapper_args):
288 ncalls[0] += 1
--> 289 return function(*(wrapper_args + args))
290
291 return ncalls, function_wrapper
<ipython-input-3-9678bc972b41> in gradient_log_likelihood(weights, features, labels)
2 """calculates the gradient (Jacobian) of the log likelihood"""
3 error = labels - sigma(features, weights)
----> 4 grad = (error * features).sum(axis=0)
5 return grad.reshape(grad.shape[0], 1)
6
ValueError: operands could not be broadcast together with shapes (3,3) (3,4)
The problem is that somehow this line:
error = (labels - sigma(features, weights))
Converts error from a 3 x 1 vector into a 3 x 3 matrix.
Note that if you print error and run gradient_log_likelihood(weights, features, labels), you get output:
[[-0.5]
[ 0.5]
[ 0.5]]
And if you run the optimization, you get:
[[-0.5 -0.5 -0.5]
[ 0.5 0.5 0.5]
[ 0.5 0.5 0.5]]
in addition to the ValueError. This is because labels - sigma(features, weights) changes the shape.
You can investigate why, but if you hacked around it you can just pull the first column out, error = (labels - sigma(features, weights)).T[0].reshape(3,1) which gives you the same solution when you run gradient_log_likelihood(weights, features, labels) but you get a new error in the optimization function.
optimized = fmin_bfgs(log_likelihood, x0=weights, args=(features, labels), gtol=1e-3, fprime=gradient_log_likelihood)
6.23832462504
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-135-d7e8b04daeba> in <module>()
----> 1 optimized = fmin_bfgs(log_likelihood, x0=weights, args=(features, labels), gtol=1e-3, fprime=gradient_log_likelihood)
/Library/Python/2.7/site-packages/scipy/optimize/optimize.pyc in fmin_bfgs(f, x0, fprime, args, gtol, norm, epsilon, maxiter, full_output, disp, retall, callback)
791 'return_all': retall}
792
--> 793 res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts)
794
795 if full_output:
/Library/Python/2.7/site-packages/scipy/optimize/optimize.pyc in _minimize_bfgs(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options)
863 alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
864 _line_search_wolfe12(f, myfprime, xk, pk, gfk,
--> 865 old_fval, old_old_fval)
866 except _LineSearchError:
867 # Line search failed to find a better solution.
/Library/Python/2.7/site-packages/scipy/optimize/optimize.pyc in _line_search_wolfe12(f, fprime, xk, pk, gfk, old_fval, old_old_fval, **kwargs)
697 ret = line_search_wolfe1(f, fprime, xk, pk, gfk,
698 old_fval, old_old_fval,
--> 699 **kwargs)
700
701 if ret[0] is None:
/Library/Python/2.7/site-packages/scipy/optimize/linesearch.pyc in line_search_wolfe1(f, fprime, xk, pk, gfk, old_fval, old_old_fval, args, c1, c2, amax, amin, xtol)
95 return np.dot(gval[0], pk)
96
---> 97 derphi0 = np.dot(gfk, pk)
98
99 stp, fval, old_fval = scalar_search_wolfe1(
ValueError: shapes (4,1) and (4,1) not aligned: 1 (dim 1) != 4 (dim 0)

Categories

Resources