Scipy optimize function: matrixes not aligned - python

EDIT: The data set is the MNIST data set from the Homework of Week 4 of Andrew Ng's Machine Learning Course
I've checked the question on scipy optimize but I still couldn't figure out what is wrong with my code. I am trying to optimize theta for the oneVsAll question on the Andrew Ng coursera course.
Here is the relevant code
def sigmoid(x):
a = []
for item in x:
a.append(1/(1+math.exp(-item)))
return a
def hypothesis(x, theta):
return np.array(sigmoid(np.dot(x, theta)))
def costFunction(theta, x, y, lamba_):
m = X.shape[0]
part1 = np.dot(y.T, np.log(hypothesis(x, theta)).reshape(m,1))
part2 = np.dot((np.ones((m,1)) - y).T, np.log( 1 - hypothesis(x, theta)).reshape(m,1))
summ = (part1 + part2)
return -summ[0]/m
def gradientVect(theta, x, y, lambda_):
n = X.shape[1]
m = X.shape[0]
gradient = []
theta = theta.reshape(n,1)
beta = hypothesis(x, theta) - y
reg = theta[1:] * lambda_/m
grad = np.dot(X.T, beta) * 1./m
grad[1:] = grad[1:] * reg
return grad.flatten()
from scipy import optimize
def optimizeTheta(x, y, nLabels, lambda_):
for i in np.arange(0, nLabels):
theta = np.zeros((n,1))
res = optimize.minimize(costFunction, theta, args=(x, (y == i)*1, lambda_), method=None,
jac=gradientVect, options={'maxiter':50})
print(res)
return result
but running
optimizeTheta(X, y, 10, 0) # X shape = 401, 500
Gives me the following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-247-e0e6e4c1eddd> in <module>()
3 n = X.shape[1]
4
----> 5 optimizeTheta(X, y, 10, 0)
<ipython-input-246-0a15e9f4769a> in optimizeTheta(x, y, nLabels, lambda_)
54 theta = np.zeros((n,1))
55 res = optimize.minimize(costFunction, x0 = theta, args=(x, (y == i)*1, lambda_), method=None,
---> 56 jac=gradientVect, options={'maxiter':50})
57 print(res)
58 return result
//anaconda/lib/python3.5/site-packages/scipy/optimize/_minimize.py in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
439 return _minimize_cg(fun, x0, args, jac, callback, **options)
440 elif meth == 'bfgs':
--> 441 return _minimize_bfgs(fun, x0, args, jac, callback, **options)
442 elif meth == 'newton-cg':
443 return _minimize_newtoncg(fun, x0, args, jac, hess, hessp, callback,
//anaconda/lib/python3.5/site-packages/scipy/optimize/optimize.py in _minimize_bfgs(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options)
859 gnorm = vecnorm(gfk, ord=norm)
860 while (gnorm > gtol) and (k < maxiter):
--> 861 pk = -numpy.dot(Hk, gfk)
862 try:
863 alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
ValueError: shapes (401,401) and (2005000,) not aligned: 401 (dim 1) != 2005000 (dim 0)
And I can't figure out why the shapes are not aligned.
Thanks!

So I realized what was wrong with my question.
The problem was the sigmoid function returning a list and not an integer and therefore it messed up the matrixes multiplications afterwards. The new sigmoid function is
def sigmoid(z):
return(1 / (1 + np.exp(-z)))

Related

dimension error with scipy optimize.minimize

I want to minimise the objective function: objective_function_2() subject to the inequality constraints 69 < T(x) < 71. The objective of the optimisation is to find the set of Fourier series parameters [c0, c1, c2, wavelength] that minimises the objective function (curve fitting problem). fx() is the function that calculates the residuals between the data and the estimated values of the Fourier series. T(x) is the function calculate_tightness() that calculates the range of the fourier series estimated at each iteration.
The problem is that I don't understand why I get this error message: ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)
the objective_function_2() produce an n-vector of residuals and calculate_tightness() one scalar value.
Does anyone have any idea why this is happening?
import numpy as np
from scipy.optimize import LinearConstraint, NonlinearConstraint, BFGS, minimize
ref_fld = np.array([-479.9024323 , -469.80142114, -459.70040999, -449.59939883,
-439.49838768, -429.39737652, -419.29636536, -409.19535421,
-399.09434305, -388.9933319 , -378.89232074, -368.79130958,
-358.69029843, -348.58928727, -338.48827611, -328.38726496,
-318.2862538 , -308.18524265, -298.08423149, -287.98322033,
-277.88220918, -267.78119802, -257.68018686, -247.57917571,
-237.47816455, -227.3771534 , -217.27614224, -207.17513108,
-197.07411993, -186.97310877, -176.87209762, -166.77108646,
-156.6700753 , -146.56906415, -136.46805299, -126.36704183,
-116.26603068, -106.16501952, -96.06400837, -85.96299721,
-75.86198605, -65.7609749 , -55.65996374, -45.55895258,
-35.45794143, -25.35693027, -15.25591912, -5.15490796,
4.9461032 , 15.04711435, 25.14812551, 35.24913667,
45.35014782, 55.45115898, 65.55217013, 75.65318129,
85.75419245, 95.8552036 , 105.95621476, 116.05722591,
126.15823707, 136.25924823, 146.36025938, 156.46127054,
166.5622817 , 176.66329285, 186.76430401, 196.86531516,
206.96632632, 217.06733748, 227.16834863, 237.26935979,
247.37037095, 257.4713821 , 267.57239326, 277.67340441,
287.77441557, 297.87542673, 307.97643788, 318.07744904,
328.1784602 , 338.27947135, 348.38048251, 358.48149366,
368.58250482, 378.68351598, 388.78452713, 398.88553829,
408.98654944, 419.0875606 , 429.18857176, 439.28958291,
449.39059407, 459.49160523, 469.59261638, 479.69362754,
489.79463869, 499.89564985, 509.99666101, 520.09767216])
fld = np.array([-300.41522506, -120.9280477 , -274.77413647, 494.45656622,
-44.00495929, -479.90233432, 58.55913797, -326.056248 ,
84.20018256, 443.17449743])
flr = np.array([-13.20752855, 38.56985419, 44.28484794, -51.64708478,
-10.50558888, -49.95878419, -53.88137785, -12.73304144,
-54.2792669 , -7.59544309])
def fourier_series(x, c0, c1, c2, w):
"""
Parameters
----------
x
c0
c1
c2
w
Returns
-------
"""
v = np.array(x.astype(float))
# v.fill(c0)
v = c0 + c1 * np.cos(2 * np.pi / w * x) + c2 * np.sin(2 * np.pi / w * x)
return np.rad2deg(np.arctan(v))
def calculate_splot(ref_fold_frame, popt):
return np.rad2deg(np.arctan(fourier_series(ref_fold_frame, *popt)))
def calculate_tightness(theta):
curve = calculate_splot(ref_fld, theta)
amax = np.arctan(np.deg2rad(curve.max()))
amin = np.arctan(np.deg2rad(curve.min()))
return 180 - np.rad2deg(2*np.tan((amax - amin) / 2))
def fx(theta, x, y):
# function to calculate residuals for optimisation (least squares)
return np.tan(np.deg2rad(y)) - fourier_series(x, *theta)
def objective_function_2(theta):
x = fld
y = flr
return fx(theta, x, y) + calculate_tightness(theta)
Cx = NonlinearConstraint(calculate_tightness, 69, 71, jac='2-point', hess=BFGS())
x0 = [0, 1, 1, 500]
res = minimize(objective_function_2, x0, constraints=[Cx], method='trust-constr',
options={'verbose': 1}, )
Error message
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_91/1210121354.py in <module>
1 Cx = NonlinearConstraint(calculate_tightness, 69, 71, jac='2-point', hess=BFGS()) #setup_optimisation_constraints(constraints_matrix, [60, -1e-2], [71, 1e-4], linear=False)
2 x0 = theta[recovery<1][0]
----> 3 res = minimize(objective_function_2, x0, constraints=[Cx], method='trust-constr',
4
5 options={'verbose': 1}, )
/mnt/c/Users/rcha0044/LoopPhD/LOOP_ENV/env_18/lib/python3.8/site-packages/scipy/optimize/_minimize.py in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
632 constraints, callback=callback, **options)
633 elif meth == 'trust-constr':
--> 634 return _minimize_trustregion_constr(fun, x0, args, jac, hess, hessp,
635 bounds, constraints,
636 callback=callback, **options)
/mnt/c/Users/rcha0044/LoopPhD/LOOP_ENV/env_18/lib/python3.8/site-packages/scipy/optimize/_trustregion_constr/minimize_trustregion_constr.py in _minimize_trustregion_constr(fun, x0, args, grad, hess, hessp, bounds, constraints, xtol, gtol, barrier_tol, sparse_jacobian, callback, maxiter, verbose, finite_diff_rel_step, initial_constr_penalty, initial_tr_radius, initial_barrier_parameter, initial_barrier_tolerance, factorization_method, disp)
507
508 elif method == 'tr_interior_point':
--> 509 _, result = tr_interior_point(
510 objective.fun, objective.grad, lagrangian_hess,
511 n_vars, canonical.n_ineq, canonical.n_eq,
/mnt/c/Users/rcha0044/LoopPhD/LOOP_ENV/env_18/lib/python3.8/site-packages/scipy/optimize/_trustregion_constr/tr_interior_point.py in tr_interior_point(fun, grad, lagr_hess, n_vars, n_ineq, n_eq, constr, jac, x0, fun0, grad0, constr_ineq0, jac_ineq0, constr_eq0, jac_eq0, stop_criteria, enforce_feasibility, xtol, state, initial_barrier_parameter, initial_tolerance, initial_penalty, initial_trust_radius, factorization_method)
302 s0 = np.maximum(-1.5*constr_ineq0, np.ones(n_ineq))
303 # Define barrier subproblem
--> 304 subprob = BarrierSubproblem(
305 x0, s0, fun, grad, lagr_hess, n_vars, n_ineq, n_eq, constr, jac,
306 barrier_parameter, tolerance, enforce_feasibility,
/mnt/c/Users/rcha0044/LoopPhD/LOOP_ENV/env_18/lib/python3.8/site-packages/scipy/optimize/_trustregion_constr/tr_interior_point.py in __init__(self, x0, s0, fun, grad, lagr_hess, n_vars, n_ineq, n_eq, constr, jac, barrier_parameter, tolerance, enforce_feasibility, global_stop_criteria, xtol, fun0, grad0, constr_ineq0, jac_ineq0, constr_eq0, jac_eq0)
51 self.xtol = xtol
52 self.fun0 = self._compute_function(fun0, constr_ineq0, s0)
---> 53 self.grad0 = self._compute_gradient(grad0)
54 self.constr0 = self._compute_constr(constr_ineq0, constr_eq0, s0)
55 self.jac0 = self._compute_jacobian(jac_eq0, jac_ineq0, s0)
/mnt/c/Users/rcha0044/LoopPhD/LOOP_ENV/env_18/lib/python3.8/site-packages/scipy/optimize/_trustregion_constr/tr_interior_point.py in _compute_gradient(self, g)
137
138 def _compute_gradient(self, g):
--> 139 return np.hstack((g, -self.barrier_parameter*np.ones(self.n_ineq)))
140
141 def _compute_jacobian(self, J_eq, J_ineq, s):
<__array_function__ internals> in hstack(*args, **kwargs)
/mnt/c/Users/rcha0044/LoopPhD/LOOP_ENV/env_18/lib/python3.8/site-packages/numpy/core/shape_base.py in hstack(tup)
343 return _nx.concatenate(arrs, 0)
344 else:
--> 345 return _nx.concatenate(arrs, 1)
346
347
<__array_function__ internals> in concatenate(*args, **kwargs)
ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)
SciPy/NumPy/Python version information
1.7.1 1.21.3 sys.version_info(major=3, minor=8, micro=10, releaselevel='final', serial=0)

Fitting data to Faddeeva function using python's optimize.leastsq() and optimize.curve_fit

Hello Stackoverflow community,
I am trying to fit data to a Faddeeva function (optimize.special.wofz) using pyhton's optimize.leastsq() or optimize.curve_fit(). The fit parameters are the following two: z1 and z2. They are complex, whereas the independent variable (time) and the output of the function (meas_data) are purely real numbers. This is my first attempt to fit the data:
import numpy as np
from scipy import optimize
from scipy import special
meas_data = np.loadtxt('directory')
time = np.loadtxt('directory')
def test(params, time):
z1 = params[0]
z2 = params[1]
a = z1*np.sqrt(time)
b = z2*np.sqrt(time)
a = np.complex(0, a)
b = np.complex(0, b)
c = special.wofz(a)
d = special.wofz(b)
return np.real(c*d)
def test_error(params, time, t_error):
return test(params, time) - t_error
initial_guess = (300+200j, 300-200j)
params_fit, cov_x, infodict, mesg, ier = optimize.leastsq(test_error, initial_guess, args = (time, meas_data), full_output = True)
My second attempt looks like :
import numpy as np
from scipy import optimize
from scipy import special
meas_data = np.loadtxt('directory')
time = np.loadtxt('directory')
def test(time, z1, z2):
a = z1*np.sqrt(time)
b = z2*np.sqrt(time)
a = np.complex(0, a)
b = np.complex(0, b)
c = special.wofz(a)
d = special.wofz(b)
return np.real(c*d)
popt, pcov = optimize.curve_fit(test, time, meas_data)
For both cases, I get a similar error message:
for the first attempt:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-13-9286b2981692> in <module>()
22
23 initial_guess = (300+200j, 300-200j)
---> 24 params_fit, cov_x, infodict, mesg, ier = optimize.leastsq(test_error, initial_guess, args = (time, msd), full_output = True)
/Users/tthalheim/anaconda/lib/python3.5/site-packages/scipy/optimize/minpack.py in leastsq(func, x0, args, Dfun, full_output, col_deriv, ftol, xtol, gtol, maxfev, epsfcn, factor, diag)
375 if not isinstance(args, tuple):
376 args = (args,)
--> 377 shape, dtype = _check_func('leastsq', 'func', func, x0, args, n)
378 m = shape[0]
379 if n > m:
/Users/tthalheim/anaconda/lib/python3.5/site-packages/scipy/optimize/minpack.py in _check_func(checker, argname, thefunc, x0, args, numinputs, output_shape)
24 def _check_func(checker, argname, thefunc, x0, args, numinputs,
25 output_shape=None):
---> 26 res = atleast_1d(thefunc(*((x0[:numinputs],) + args)))
27 if (output_shape is not None) and (shape(res) != output_shape):
28 if (output_shape[0] != 1):
<ipython-input-13-9286b2981692> in test_error(params, time, t_error)
19
20 def test_error(params, time, t_error):
---> 21 return test(params, time) - t_error
22
23 initial_guess = (z1, z2)
<ipython-input-13-9286b2981692> in test(params, time)
10 b = z2*np.sqrt(time)
11
---> 12 a = np.complex(0, a)
13 b = np.complex(0, b)
14
TypeError: only length-1 arrays can be converted to Python scalars
and for the second attempt:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-8-8f631a7ede54> in <module>()
16 return np.real(c*d)
17
---> 18 popt, pcov = optimize.curve_fit(test, time, msd)
/Users/tthalheim/anaconda/lib/python3.5/site-packages/scipy/optimize/minpack.py in curve_fit(f, xdata, ydata, p0, sigma, absolute_sigma, check_finite, bounds, method, jac, **kwargs)
674 # Remove full_output from kwargs, otherwise we're passing it in twice.
675 return_full = kwargs.pop('full_output', False)
--> 676 res = leastsq(func, p0, Dfun=jac, full_output=1, **kwargs)
677 popt, pcov, infodict, errmsg, ier = res
678 cost = np.sum(infodict['fvec'] ** 2)
/Users/tthalheim/anaconda/lib/python3.5/site-packages/scipy/optimize/minpack.py in leastsq(func, x0, args, Dfun, full_output, col_deriv, ftol, xtol, gtol, maxfev, epsfcn, factor, diag)
375 if not isinstance(args, tuple):
376 args = (args,)
--> 377 shape, dtype = _check_func('leastsq', 'func', func, x0, args, n)
378 m = shape[0]
379 if n > m:
/Users/tthalheim/anaconda/lib/python3.5/site-packages/scipy/optimize/minpack.py in _check_func(checker, argname, thefunc, x0, args, numinputs, output_shape)
24 def _check_func(checker, argname, thefunc, x0, args, numinputs,
25 output_shape=None):
---> 26 res = atleast_1d(thefunc(*((x0[:numinputs],) + args)))
27 if (output_shape is not None) and (shape(res) != output_shape):
28 if (output_shape[0] != 1):
/Users/tthalheim/anaconda/lib/python3.5/site-packages/scipy/optimize/minpack.py in func_wrapped(params)
453 if weights is None:
454 def func_wrapped(params):
--> 455 return func(xdata, *params) - ydata
456 else:
457 def func_wrapped(params):
<ipython-input-8-8f631a7ede54> in test(time, z1, z2)
7 b = z2*np.sqrt(time)
8
----> 9 a = np.complex(0, a)
10 b = np.complex(0, b)
11
TypeError: only length-1 arrays can be converted to Python scalars
The data I am using for fitting are times in the range of 10e-6 to 10e-2 and measurement data in the range of 10e-19 to 10e-16. Both test functions used for calculating individual numbers given that the z1 and z2 are known work. I think that it has something to do with python's fitting routines which maybe not can handle complex values during their calculation?
I would be very happy, if someone could help me fixing this problem.
The third comment by PRMoureu on my question fixed the problem.

Implementing logistic regression -- why does this not converge?

I am adapting existing implementations of logistic regression, but I can't figure out what I am doing wrong.
Here is my implementation:
from scipy.optimize import fmin_bfgs
import numpy as np
import pandas as pd
# With help from http://stackoverflow.com/questions/13794754/logistic-regression-using-scipy
# as well as https://bryantravissmith.com/2015/12/29/implementing-logistic-regression-from-scratch-part-2-python-code/
def sigma(features, weights):
"""returns sigma(<w,x>)"""
return 1 / (1 + np.exp(-features.dot(weights)))
def log_likelihood(weights, features, labels):
"""calculates -ln p(t|w)"""
s = sigma(features, weights)
#s += 1e-24 # pseudocount to prevent logs of 0
t = labels * np.log(s + 1e-24)
t2 = (1 - labels) * (np.log((1 - s) + 1e-24))
ll = (t + t2).sum()
print -ll
return -ll
def gradient_log_likelihood(weights, features, labels):
"""calculates the gradient (Jacobian) of the log likelihood"""
error = labels - sigma(features, weights)
grad = (error * features).sum(axis=0)
return grad.reshape(grad.shape[0], 1)
Here is a sample dataset:
labels = np.array([0, 1, 1]).reshape(3, 1)
df = pd.DataFrame.from_dict({'a': [1,2,3], 'b': [2,3,4], 'c': [6,7,8]})
n, m = df.shape
weights = np.zeros(m + 1).reshape(m + 1, 1) # zero vector of starting weights
# add the intercept column
features = np.ones((n, m + 1)) # make matrix with all 1's
features[:,1:] = df # replace the 1's in all columns after column 0 with actual data
If I run each of these methods individually on the beginning weight vector, they run. But once I try to optimize, I get a shape error:
optimized = fmin_bfgs(log_likelihood, x0=weights, args=(features, labels), gtol=1e-4, fprime=gradient_log_likelihood)
ValueError Traceback (most recent call last)
<ipython-input-26-34c3cde48ac4> in <module>()
----> 1 optimized = fmin_bfgs(log_likelihood, x0=weights, args=(features, labels), gtol=1e-4, fprime=gradient_log_likelihood)
/Users/ifiddes/anaconda/lib/python2.7/site-packages/scipy/optimize/optimize.pyc in fmin_bfgs(f, x0, fprime, args, gtol, norm, epsilon, maxiter, full_output, disp, retall, callback)
791 'return_all': retall}
792
--> 793 res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts)
794
795 if full_output:
/Users/ifiddes/anaconda/lib/python2.7/site-packages/scipy/optimize/optimize.pyc in _minimize_bfgs(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options)
845 else:
846 grad_calls, myfprime = wrap_function(fprime, args)
--> 847 gfk = myfprime(x0)
848 k = 0
849 N = len(x0)
/Users/ifiddes/anaconda/lib/python2.7/site-packages/scipy/optimize/optimize.pyc in function_wrapper(*wrapper_args)
287 def function_wrapper(*wrapper_args):
288 ncalls[0] += 1
--> 289 return function(*(wrapper_args + args))
290
291 return ncalls, function_wrapper
<ipython-input-3-9678bc972b41> in gradient_log_likelihood(weights, features, labels)
2 """calculates the gradient (Jacobian) of the log likelihood"""
3 error = labels - sigma(features, weights)
----> 4 grad = (error * features).sum(axis=0)
5 return grad.reshape(grad.shape[0], 1)
6
ValueError: operands could not be broadcast together with shapes (3,3) (3,4)
The problem is that somehow this line:
error = (labels - sigma(features, weights))
Converts error from a 3 x 1 vector into a 3 x 3 matrix.
Note that if you print error and run gradient_log_likelihood(weights, features, labels), you get output:
[[-0.5]
[ 0.5]
[ 0.5]]
And if you run the optimization, you get:
[[-0.5 -0.5 -0.5]
[ 0.5 0.5 0.5]
[ 0.5 0.5 0.5]]
in addition to the ValueError. This is because labels - sigma(features, weights) changes the shape.
You can investigate why, but if you hacked around it you can just pull the first column out, error = (labels - sigma(features, weights)).T[0].reshape(3,1) which gives you the same solution when you run gradient_log_likelihood(weights, features, labels) but you get a new error in the optimization function.
optimized = fmin_bfgs(log_likelihood, x0=weights, args=(features, labels), gtol=1e-3, fprime=gradient_log_likelihood)
6.23832462504
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-135-d7e8b04daeba> in <module>()
----> 1 optimized = fmin_bfgs(log_likelihood, x0=weights, args=(features, labels), gtol=1e-3, fprime=gradient_log_likelihood)
/Library/Python/2.7/site-packages/scipy/optimize/optimize.pyc in fmin_bfgs(f, x0, fprime, args, gtol, norm, epsilon, maxiter, full_output, disp, retall, callback)
791 'return_all': retall}
792
--> 793 res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts)
794
795 if full_output:
/Library/Python/2.7/site-packages/scipy/optimize/optimize.pyc in _minimize_bfgs(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options)
863 alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
864 _line_search_wolfe12(f, myfprime, xk, pk, gfk,
--> 865 old_fval, old_old_fval)
866 except _LineSearchError:
867 # Line search failed to find a better solution.
/Library/Python/2.7/site-packages/scipy/optimize/optimize.pyc in _line_search_wolfe12(f, fprime, xk, pk, gfk, old_fval, old_old_fval, **kwargs)
697 ret = line_search_wolfe1(f, fprime, xk, pk, gfk,
698 old_fval, old_old_fval,
--> 699 **kwargs)
700
701 if ret[0] is None:
/Library/Python/2.7/site-packages/scipy/optimize/linesearch.pyc in line_search_wolfe1(f, fprime, xk, pk, gfk, old_fval, old_old_fval, args, c1, c2, amax, amin, xtol)
95 return np.dot(gval[0], pk)
96
---> 97 derphi0 = np.dot(gfk, pk)
98
99 stp, fval, old_fval = scalar_search_wolfe1(
ValueError: shapes (4,1) and (4,1) not aligned: 1 (dim 1) != 4 (dim 0)

Logistic regression: objects are not aligned

I am trying to do logistic regression on this dataset from A Ng's machihne learning class in coursera.
The idea is that we have a cost function, which we need to minimize to find the parameters theta.
import numpy as np
from scipy.optimize import fmin_bfgs
data = np.loadtxt('ex2data1.txt',delimiter=",")
m,n = data.shape
X = np.array(np.column_stack((np.ones(m),data[:,:-1])))
y = np.array(data[:,2].reshape(m,1))
theta = np.array(np.zeros(n).reshape(n,1))
def sigmoid(z):
return 1/(1+np.exp(-z))
def hypothesis(X,theta):
return sigmoid( X.dot(theta) )
def cost(theta):
print theta.shape
h = hypothesis(X,theta)
cost = (-y.T.dot(np.log(h))-(1-y).T.dot(np.log(1-h)))/m
return cost
def gradient(theta):
h = hypothesis(X,theta)
grad = ((h-y).T.dot(X)).T/m
return grad.flatten()
def fmin():
initial_theta=np.zeros(n).reshape(n,1)
theta=fmin_bfgs(cost,initial_theta,fprime=gradient)
return theta
print fmin()
I am getting ValueError: Objects are not aligned but I have checked the shapes of all entities and still can't figure it out. Here is the traceback:
---> 32 theta=fmin_bfgs(cost,initial_theta,fprime=gradient)
33
/usr/lib/python2.7/dist-packages/scipy/optimize/optimize.pyc in fmin_bfgs(f, x0, fprime, args, gtol, norm, epsilon, maxiter, full_output, disp, retall, callback)
775 'return_all': retall}
776
--> 777 res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts)
778
779 if full_output:
/usr/lib/python2.7/dist-packages/scipy/optimize/optimize.pyc in _minimize_bfgs(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options)
844 gnorm = vecnorm(gfk, ord=norm)
845 while (gnorm > gtol) and (k < maxiter):
--> 846 pk = -numpy.dot(Hk, gfk)
847 try:
848 alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
ValueError: objects are not aligned
I modified your code, it can get the same result as LogisticRegression in sklearn with c=inf:
import numpy as np
from scipy.optimize import fmin_bfgs
import io
data = np.loadtxt('ex2data1.txt',delimiter=",")
m,n = data.shape
X = np.array(np.column_stack((np.ones(m),data[:,:-1])))
y = np.array(data[:,2].reshape(m,1))
theta = np.array(np.zeros(n).reshape(n,1))
def sigmoid(z):
return 1/(1+np.exp(-z))
def hypothesis(X,theta):
return sigmoid( X.dot(theta) )
def cost(theta):
h = hypothesis(X,theta)
cost = (-y.T.dot(np.log(h))-(1-y).T.dot(np.log(1-h)))/m
r = cost[0]
if np.isnan(r):
return np.inf
return r
def gradient(theta):
theta = theta.reshape(-1, 1)
h = hypothesis(X,theta)
grad = ((h-y).T.dot(X)).T/m
return grad.flatten()
def fmin():
initial_theta=np.zeros(n)
theta=fmin_bfgs(cost,initial_theta,fprime=gradient)
return theta
theta = fmin()

IndexError: too many indices working with Pandas Dataframe

OK so here's my code for a multi-classification task using one-vs-all logistic regression with some regularization. I've been struggling with this for the past 2 days, I don't know why it doesn't work.
import pandas as pd
import numpy as np
import scipy.optimize as sp
Data = pd.read_csv(Location,
sep=';',
dtype = np.float64,
header = None)
X = Data.ix[:,0:1]
y = Data.ix[:,2:]
y.columns = [0]
def sigmoid(z) :
g = 1.0/(1.0+np.exp(-z))
return g
def lrCostFunction(theta, X, y, lambd):
m , n = X.shape
J=-(y.T.dot(np.log(sigmoid(X.dot(theta))))+(1-y).T.dot(np.log(1-sigmoid(X.dot(theta)))))/m
J = J + (theta.T.dot(theta)- np.power(theta[0,0],2))*(lambd)/(2*m);
return J.ix[0,0]
def Gradient(theta, X, y, lambd):
m , n = X.shape
grad = X.T.dot(sigmoid(X.dot(theta))-y)/m
grad.ix[1:(n-1),:] = grad.ix[1:(n-1),:] + lambd*theta.ix[1:(n-1),:]/m;
return grad.values.flatten().tolist()
def oneVsAll(X, y, num_labels, lambd):
m , n = X.shape
all_theta = pd.DataFrame(data = [[0 for col in range(n+1)] for row in range(num_labels)])
ones = pd.DataFrame(data = [1 for i in range(X.shape[0])])
X = pd.concat([ones,X], axis = 1)
for c in range(0,num_labels-1) :
initial_theta = pd.DataFrame(data = [0 for i in range(n+1)])
theta = sp.minimize(fun = lrCostFunction,
x0 = initial_theta,
args = (X,y,lambd),
method = 'TNC',
jac = Gradient)
all_theta.ix[c,:] = theta
return all_theta
oneVsAll(X, y, 4, 0.1)
And it says :
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-27-b18648b06674> in <module>()
1 theta = pd.DataFrame(data = [0 for i in range(X.shape[1])])
----> 2 oneVsAll(X, y, 4, 0.1)
<ipython-input-26-ba0f7093d1f6> in oneVsAll(X, y, num_labels, lambd)
10 args = (X,y,lambd),
11 method = 'TNC',
---> 12 jac = Gradient)
13 all_theta.ix[c,:] = theta
14 return all_theta
/Users/jean-marcmarty/anaconda/lib/python2.7/site-packages/scipy/optimize/_minimize.pyc in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
381 elif meth == 'tnc':
382 return _minimize_tnc(fun, x0, args, jac, bounds, callback=callback,
--> 383 **options)
384 elif meth == 'cobyla':
385 return _minimize_cobyla(fun, x0, args, constraints, **options)
/Users/jean-marcmarty/anaconda/lib/python2.7/site-packages/scipy/optimize/tnc.pyc in _minimize_tnc(fun, x0, args, jac, bounds, eps, scale, offset, mesg_num, maxCGit, maxiter, eta, stepmx, accuracy, minfev, ftol, xtol, gtol, rescale, disp, callback, **unknown_options)
396 offset, messages, maxCGit, maxfun,
397 eta, stepmx, accuracy, fmin, ftol,
--> 398 xtol, pgtol, rescale, callback)
399
400 funv, jacv = func_and_grad(x)
/Users/jean-marcmarty/anaconda/lib/python2.7/site-packages/scipy/optimize/tnc.pyc in func_and_grad(x)
358 else:
359 def func_and_grad(x):
--> 360 f = fun(x, *args)
361 g = jac(x, *args)
362 return f, g
<ipython-input-24-5f31e87e00da> in lrCostFunction(theta, X, y, lambd)
2 m , n = X.shape
3 J=-(y.T.dot(np.log(sigmoid(X.dot(theta))))+(1-y).T.dot(np.log(1-sigmoid(X.dot(theta)))))/m
----> 4 J = J + (theta.T.dot(theta)- np.power(theta[0,0],2))*(lambd)/(2*m);
5 return J.ix[0,0]
IndexError: too many indices
I don't know anything about the math, but the error is coming from this code:
theta[0,0]
Theta is a 1d array, so you'd need to index at as theta[0], unless there was some reason you were expecting it to be 2d?

Categories

Resources