I am adapting existing implementations of logistic regression, but I can't figure out what I am doing wrong.
Here is my implementation:
from scipy.optimize import fmin_bfgs
import numpy as np
import pandas as pd
# With help from http://stackoverflow.com/questions/13794754/logistic-regression-using-scipy
# as well as https://bryantravissmith.com/2015/12/29/implementing-logistic-regression-from-scratch-part-2-python-code/
def sigma(features, weights):
"""returns sigma(<w,x>)"""
return 1 / (1 + np.exp(-features.dot(weights)))
def log_likelihood(weights, features, labels):
"""calculates -ln p(t|w)"""
s = sigma(features, weights)
#s += 1e-24 # pseudocount to prevent logs of 0
t = labels * np.log(s + 1e-24)
t2 = (1 - labels) * (np.log((1 - s) + 1e-24))
ll = (t + t2).sum()
print -ll
return -ll
def gradient_log_likelihood(weights, features, labels):
"""calculates the gradient (Jacobian) of the log likelihood"""
error = labels - sigma(features, weights)
grad = (error * features).sum(axis=0)
return grad.reshape(grad.shape[0], 1)
Here is a sample dataset:
labels = np.array([0, 1, 1]).reshape(3, 1)
df = pd.DataFrame.from_dict({'a': [1,2,3], 'b': [2,3,4], 'c': [6,7,8]})
n, m = df.shape
weights = np.zeros(m + 1).reshape(m + 1, 1) # zero vector of starting weights
# add the intercept column
features = np.ones((n, m + 1)) # make matrix with all 1's
features[:,1:] = df # replace the 1's in all columns after column 0 with actual data
If I run each of these methods individually on the beginning weight vector, they run. But once I try to optimize, I get a shape error:
optimized = fmin_bfgs(log_likelihood, x0=weights, args=(features, labels), gtol=1e-4, fprime=gradient_log_likelihood)
ValueError Traceback (most recent call last)
<ipython-input-26-34c3cde48ac4> in <module>()
----> 1 optimized = fmin_bfgs(log_likelihood, x0=weights, args=(features, labels), gtol=1e-4, fprime=gradient_log_likelihood)
/Users/ifiddes/anaconda/lib/python2.7/site-packages/scipy/optimize/optimize.pyc in fmin_bfgs(f, x0, fprime, args, gtol, norm, epsilon, maxiter, full_output, disp, retall, callback)
791 'return_all': retall}
792
--> 793 res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts)
794
795 if full_output:
/Users/ifiddes/anaconda/lib/python2.7/site-packages/scipy/optimize/optimize.pyc in _minimize_bfgs(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options)
845 else:
846 grad_calls, myfprime = wrap_function(fprime, args)
--> 847 gfk = myfprime(x0)
848 k = 0
849 N = len(x0)
/Users/ifiddes/anaconda/lib/python2.7/site-packages/scipy/optimize/optimize.pyc in function_wrapper(*wrapper_args)
287 def function_wrapper(*wrapper_args):
288 ncalls[0] += 1
--> 289 return function(*(wrapper_args + args))
290
291 return ncalls, function_wrapper
<ipython-input-3-9678bc972b41> in gradient_log_likelihood(weights, features, labels)
2 """calculates the gradient (Jacobian) of the log likelihood"""
3 error = labels - sigma(features, weights)
----> 4 grad = (error * features).sum(axis=0)
5 return grad.reshape(grad.shape[0], 1)
6
ValueError: operands could not be broadcast together with shapes (3,3) (3,4)
The problem is that somehow this line:
error = (labels - sigma(features, weights))
Converts error from a 3 x 1 vector into a 3 x 3 matrix.
Note that if you print error and run gradient_log_likelihood(weights, features, labels), you get output:
[[-0.5]
[ 0.5]
[ 0.5]]
And if you run the optimization, you get:
[[-0.5 -0.5 -0.5]
[ 0.5 0.5 0.5]
[ 0.5 0.5 0.5]]
in addition to the ValueError. This is because labels - sigma(features, weights) changes the shape.
You can investigate why, but if you hacked around it you can just pull the first column out, error = (labels - sigma(features, weights)).T[0].reshape(3,1) which gives you the same solution when you run gradient_log_likelihood(weights, features, labels) but you get a new error in the optimization function.
optimized = fmin_bfgs(log_likelihood, x0=weights, args=(features, labels), gtol=1e-3, fprime=gradient_log_likelihood)
6.23832462504
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-135-d7e8b04daeba> in <module>()
----> 1 optimized = fmin_bfgs(log_likelihood, x0=weights, args=(features, labels), gtol=1e-3, fprime=gradient_log_likelihood)
/Library/Python/2.7/site-packages/scipy/optimize/optimize.pyc in fmin_bfgs(f, x0, fprime, args, gtol, norm, epsilon, maxiter, full_output, disp, retall, callback)
791 'return_all': retall}
792
--> 793 res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts)
794
795 if full_output:
/Library/Python/2.7/site-packages/scipy/optimize/optimize.pyc in _minimize_bfgs(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options)
863 alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
864 _line_search_wolfe12(f, myfprime, xk, pk, gfk,
--> 865 old_fval, old_old_fval)
866 except _LineSearchError:
867 # Line search failed to find a better solution.
/Library/Python/2.7/site-packages/scipy/optimize/optimize.pyc in _line_search_wolfe12(f, fprime, xk, pk, gfk, old_fval, old_old_fval, **kwargs)
697 ret = line_search_wolfe1(f, fprime, xk, pk, gfk,
698 old_fval, old_old_fval,
--> 699 **kwargs)
700
701 if ret[0] is None:
/Library/Python/2.7/site-packages/scipy/optimize/linesearch.pyc in line_search_wolfe1(f, fprime, xk, pk, gfk, old_fval, old_old_fval, args, c1, c2, amax, amin, xtol)
95 return np.dot(gval[0], pk)
96
---> 97 derphi0 = np.dot(gfk, pk)
98
99 stp, fval, old_fval = scalar_search_wolfe1(
ValueError: shapes (4,1) and (4,1) not aligned: 1 (dim 1) != 4 (dim 0)
Related
I want to minimise the objective function: objective_function_2() subject to the inequality constraints 69 < T(x) < 71. The objective of the optimisation is to find the set of Fourier series parameters [c0, c1, c2, wavelength] that minimises the objective function (curve fitting problem). fx() is the function that calculates the residuals between the data and the estimated values of the Fourier series. T(x) is the function calculate_tightness() that calculates the range of the fourier series estimated at each iteration.
The problem is that I don't understand why I get this error message: ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)
the objective_function_2() produce an n-vector of residuals and calculate_tightness() one scalar value.
Does anyone have any idea why this is happening?
import numpy as np
from scipy.optimize import LinearConstraint, NonlinearConstraint, BFGS, minimize
ref_fld = np.array([-479.9024323 , -469.80142114, -459.70040999, -449.59939883,
-439.49838768, -429.39737652, -419.29636536, -409.19535421,
-399.09434305, -388.9933319 , -378.89232074, -368.79130958,
-358.69029843, -348.58928727, -338.48827611, -328.38726496,
-318.2862538 , -308.18524265, -298.08423149, -287.98322033,
-277.88220918, -267.78119802, -257.68018686, -247.57917571,
-237.47816455, -227.3771534 , -217.27614224, -207.17513108,
-197.07411993, -186.97310877, -176.87209762, -166.77108646,
-156.6700753 , -146.56906415, -136.46805299, -126.36704183,
-116.26603068, -106.16501952, -96.06400837, -85.96299721,
-75.86198605, -65.7609749 , -55.65996374, -45.55895258,
-35.45794143, -25.35693027, -15.25591912, -5.15490796,
4.9461032 , 15.04711435, 25.14812551, 35.24913667,
45.35014782, 55.45115898, 65.55217013, 75.65318129,
85.75419245, 95.8552036 , 105.95621476, 116.05722591,
126.15823707, 136.25924823, 146.36025938, 156.46127054,
166.5622817 , 176.66329285, 186.76430401, 196.86531516,
206.96632632, 217.06733748, 227.16834863, 237.26935979,
247.37037095, 257.4713821 , 267.57239326, 277.67340441,
287.77441557, 297.87542673, 307.97643788, 318.07744904,
328.1784602 , 338.27947135, 348.38048251, 358.48149366,
368.58250482, 378.68351598, 388.78452713, 398.88553829,
408.98654944, 419.0875606 , 429.18857176, 439.28958291,
449.39059407, 459.49160523, 469.59261638, 479.69362754,
489.79463869, 499.89564985, 509.99666101, 520.09767216])
fld = np.array([-300.41522506, -120.9280477 , -274.77413647, 494.45656622,
-44.00495929, -479.90233432, 58.55913797, -326.056248 ,
84.20018256, 443.17449743])
flr = np.array([-13.20752855, 38.56985419, 44.28484794, -51.64708478,
-10.50558888, -49.95878419, -53.88137785, -12.73304144,
-54.2792669 , -7.59544309])
def fourier_series(x, c0, c1, c2, w):
"""
Parameters
----------
x
c0
c1
c2
w
Returns
-------
"""
v = np.array(x.astype(float))
# v.fill(c0)
v = c0 + c1 * np.cos(2 * np.pi / w * x) + c2 * np.sin(2 * np.pi / w * x)
return np.rad2deg(np.arctan(v))
def calculate_splot(ref_fold_frame, popt):
return np.rad2deg(np.arctan(fourier_series(ref_fold_frame, *popt)))
def calculate_tightness(theta):
curve = calculate_splot(ref_fld, theta)
amax = np.arctan(np.deg2rad(curve.max()))
amin = np.arctan(np.deg2rad(curve.min()))
return 180 - np.rad2deg(2*np.tan((amax - amin) / 2))
def fx(theta, x, y):
# function to calculate residuals for optimisation (least squares)
return np.tan(np.deg2rad(y)) - fourier_series(x, *theta)
def objective_function_2(theta):
x = fld
y = flr
return fx(theta, x, y) + calculate_tightness(theta)
Cx = NonlinearConstraint(calculate_tightness, 69, 71, jac='2-point', hess=BFGS())
x0 = [0, 1, 1, 500]
res = minimize(objective_function_2, x0, constraints=[Cx], method='trust-constr',
options={'verbose': 1}, )
Error message
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_91/1210121354.py in <module>
1 Cx = NonlinearConstraint(calculate_tightness, 69, 71, jac='2-point', hess=BFGS()) #setup_optimisation_constraints(constraints_matrix, [60, -1e-2], [71, 1e-4], linear=False)
2 x0 = theta[recovery<1][0]
----> 3 res = minimize(objective_function_2, x0, constraints=[Cx], method='trust-constr',
4
5 options={'verbose': 1}, )
/mnt/c/Users/rcha0044/LoopPhD/LOOP_ENV/env_18/lib/python3.8/site-packages/scipy/optimize/_minimize.py in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
632 constraints, callback=callback, **options)
633 elif meth == 'trust-constr':
--> 634 return _minimize_trustregion_constr(fun, x0, args, jac, hess, hessp,
635 bounds, constraints,
636 callback=callback, **options)
/mnt/c/Users/rcha0044/LoopPhD/LOOP_ENV/env_18/lib/python3.8/site-packages/scipy/optimize/_trustregion_constr/minimize_trustregion_constr.py in _minimize_trustregion_constr(fun, x0, args, grad, hess, hessp, bounds, constraints, xtol, gtol, barrier_tol, sparse_jacobian, callback, maxiter, verbose, finite_diff_rel_step, initial_constr_penalty, initial_tr_radius, initial_barrier_parameter, initial_barrier_tolerance, factorization_method, disp)
507
508 elif method == 'tr_interior_point':
--> 509 _, result = tr_interior_point(
510 objective.fun, objective.grad, lagrangian_hess,
511 n_vars, canonical.n_ineq, canonical.n_eq,
/mnt/c/Users/rcha0044/LoopPhD/LOOP_ENV/env_18/lib/python3.8/site-packages/scipy/optimize/_trustregion_constr/tr_interior_point.py in tr_interior_point(fun, grad, lagr_hess, n_vars, n_ineq, n_eq, constr, jac, x0, fun0, grad0, constr_ineq0, jac_ineq0, constr_eq0, jac_eq0, stop_criteria, enforce_feasibility, xtol, state, initial_barrier_parameter, initial_tolerance, initial_penalty, initial_trust_radius, factorization_method)
302 s0 = np.maximum(-1.5*constr_ineq0, np.ones(n_ineq))
303 # Define barrier subproblem
--> 304 subprob = BarrierSubproblem(
305 x0, s0, fun, grad, lagr_hess, n_vars, n_ineq, n_eq, constr, jac,
306 barrier_parameter, tolerance, enforce_feasibility,
/mnt/c/Users/rcha0044/LoopPhD/LOOP_ENV/env_18/lib/python3.8/site-packages/scipy/optimize/_trustregion_constr/tr_interior_point.py in __init__(self, x0, s0, fun, grad, lagr_hess, n_vars, n_ineq, n_eq, constr, jac, barrier_parameter, tolerance, enforce_feasibility, global_stop_criteria, xtol, fun0, grad0, constr_ineq0, jac_ineq0, constr_eq0, jac_eq0)
51 self.xtol = xtol
52 self.fun0 = self._compute_function(fun0, constr_ineq0, s0)
---> 53 self.grad0 = self._compute_gradient(grad0)
54 self.constr0 = self._compute_constr(constr_ineq0, constr_eq0, s0)
55 self.jac0 = self._compute_jacobian(jac_eq0, jac_ineq0, s0)
/mnt/c/Users/rcha0044/LoopPhD/LOOP_ENV/env_18/lib/python3.8/site-packages/scipy/optimize/_trustregion_constr/tr_interior_point.py in _compute_gradient(self, g)
137
138 def _compute_gradient(self, g):
--> 139 return np.hstack((g, -self.barrier_parameter*np.ones(self.n_ineq)))
140
141 def _compute_jacobian(self, J_eq, J_ineq, s):
<__array_function__ internals> in hstack(*args, **kwargs)
/mnt/c/Users/rcha0044/LoopPhD/LOOP_ENV/env_18/lib/python3.8/site-packages/numpy/core/shape_base.py in hstack(tup)
343 return _nx.concatenate(arrs, 0)
344 else:
--> 345 return _nx.concatenate(arrs, 1)
346
347
<__array_function__ internals> in concatenate(*args, **kwargs)
ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)
SciPy/NumPy/Python version information
1.7.1 1.21.3 sys.version_info(major=3, minor=8, micro=10, releaselevel='final', serial=0)
Could you help on how to minimize a function in Python? Particularly, it's the logistic function. Below is my cost function, I got it correctly because I checked the answer.
def g(z):
h = 1 / (1 + numpy.exp(-z))
return h
theta1_ravel = theta1.ravel().T
theta2_ravel = theta2.ravel().T
theta_conca = numpy.concatenate((theta1_ravel, theta2_ravel))
lambada = 1
def computecost(theta_conca):
theta1 = numpy.reshape(theta_conca[0:10025], (25, 401))
theta2 = numpy.reshape(theta_conca[10025:10285], (10, 26))
a1 = X_1
a2 = g(X_1 * theta1.T) # (5000, 25)
a2 = numpy.column_stack((numpy.ones((m, 1)), a2))
a3 = g(a2 * theta2.T) # (5000, 10)
reg_term = (lambada / (2*m)) * (numpy.sum(numpy.power(theta1[:, 1:], 2)) + numpy.sum(numpy.power(theta2[:, 1:], 2)))
J = -(1/m) * (numpy.sum(numpy.multiply(Y, numpy.log(a3)) + numpy.multiply((1 - Y), numpy.log(1 - a3)))) + reg_term
return J
J = computecost(theta_conca)
J
And then I try to minimize it, get theta by using "scipy.optimize.minimize" but it's not working.
theta_random = numpy.random.rand(m, 1)
theta_random
scipy.optimize.minimize(computecost, theta_random)
This is the error notification:
ValueError Traceback (most recent call last)
<ipython-input-36-05051c7a9e1f> in <module>
1 theta_random = numpy.random.rand(m, 1)
2 theta_random
----> 3 scipy.optimize.minimize(computecost, theta_random)
/usr/lib/python3/dist-packages/scipy/optimize/_minimize.py in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
592 return _minimize_cg(fun, x0, args, jac, callback, **options)
593 elif meth == 'bfgs':
--> 594 return _minimize_bfgs(fun, x0, args, jac, callback, **options)
595 elif meth == 'newton-cg':
596 return _minimize_newtoncg(fun, x0, args, jac, hess, hessp, callback,
/usr/lib/python3/dist-packages/scipy/optimize/optimize.py in _minimize_bfgs(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options)
996 else:
997 grad_calls, myfprime = wrap_function(fprime, args)
--> 998 gfk = myfprime(x0)
999 k = 0
1000 N = len(x0)
/usr/lib/python3/dist-packages/scipy/optimize/optimize.py in function_wrapper(*wrapper_args)
325 def function_wrapper(*wrapper_args):
326 ncalls[0] += 1
--> 327 return function(*(wrapper_args + args))
328
329 return ncalls, function_wrapper
/usr/lib/python3/dist-packages/scipy/optimize/optimize.py in approx_fprime(xk, f, epsilon, *args)
755
756 """
--> 757 return _approx_fprime_helper(xk, f, epsilon, args=args)
758
759
/usr/lib/python3/dist-packages/scipy/optimize/optimize.py in _approx_fprime_helper(xk, f, epsilon, args, f0)
689 """
690 if f0 is None:
--> 691 f0 = f(*((xk,) + args))
692 grad = numpy.zeros((len(xk),), float)
693 ei = numpy.zeros((len(xk),), float)
/usr/lib/python3/dist-packages/scipy/optimize/optimize.py in function_wrapper(*wrapper_args)
325 def function_wrapper(*wrapper_args):
326 ncalls[0] += 1
--> 327 return function(*(wrapper_args + args))
328
329 return ncalls, function_wrapper
<ipython-input-33-3761d25f71af> in computecost(theta_conca)
14 def computecost(theta_conca):
15
---> 16 theta1 = numpy.reshape(theta_conca[0:10025], (25, 401))
17 theta2 = numpy.reshape(theta_conca[10025:10285], (10, 26))
18
<__array_function__ internals> in reshape(*args, **kwargs)
/usr/lib/python3/dist-packages/numpy/core/fromnumeric.py in reshape(a, newshape, order)
299 [5, 6]])
300 """
--> 301 return _wrapfunc(a, 'reshape', newshape, order=order)
302
303
/usr/lib/python3/dist-packages/numpy/core/fromnumeric.py in _wrapfunc(obj, method, *args, **kwds)
59
60 try:
---> 61 return bound(*args, **kwds)
62 except TypeError:
63 # A TypeError occurs if the object does have such a method in its
ValueError: cannot reshape array of size 5000 into shape (25,401)
How can I implement it? I'm sorry that the code looks like a mess but I think I shouldn't cut off something. I'll give you the entire code and data if you need to try on your device.
Thank you.
I'm struggling to get scipy.minimize to work for an optimization parameter that's an array, where I'm only looking at a part of the array inside the objective function.
import numpy as np
from scipy.optimize import minimize
n = 5
X_true = np.random.normal(size=(n,n))
X_guess = np.random.normal(size=(n,n))
indices = np.triu_indices(n)
def mean_square_error(X):
return ((X.flatten() - X_true.flatten()) ** 2).mean()
def mean_square_error_over_indices(X):
return ((X[indices].flatten() - X_true[indices].flatten()) ** 2).mean()
# works fine
print(mean_square_error(X_guess))
# works fine
print(mean_square_error_over_indices(X_guess))
# works fine (flatten is necessary inside the objective function)
print(minimize(mean_square_error, X_guess).x)
# IndexError
print(minimize(mean_square_error_over_indices, X_guess).x)
The traceback:
IndexError Traceback (most recent call last)
<ipython-input-1-08d40604e22a> in <module>
20 print(minimize(mean_square_error, X_guess).x) # works fine
21
---> 22 print(minimize(mean_square_error_over_indices, X_guess).x) # error
C:\Anaconda\lib\site-packages\scipy\optimize\_minimize.py in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
593 return _minimize_cg(fun, x0, args, jac, callback, **options)
594 elif meth == 'bfgs':
--> 595 return _minimize_bfgs(fun, x0, args, jac, callback, **options)
596 elif meth == 'newton-cg':
597 return _minimize_newtoncg(fun, x0, args, jac, hess, hessp, callback,
C:\Anaconda\lib\site-packages\scipy\optimize\optimize.py in _minimize_bfgs(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options)
968 else:
969 grad_calls, myfprime = wrap_function(fprime, args)
--> 970 gfk = myfprime(x0)
971 k = 0
972 N = len(x0)
C:\Anaconda\lib\site-packages\scipy\optimize\optimize.py in function_wrapper(*wrapper_args)
298 def function_wrapper(*wrapper_args):
299 ncalls[0] += 1
--> 300 return function(*(wrapper_args + args))
301
302 return ncalls, function_wrapper
C:\Anaconda\lib\site-packages\scipy\optimize\optimize.py in approx_fprime(xk, f, epsilon, *args)
728
729 """
--> 730 return _approx_fprime_helper(xk, f, epsilon, args=args)
731
732
C:\Anaconda\lib\site-packages\scipy\optimize\optimize.py in _approx_fprime_helper(xk, f, epsilon, args, f0)
662 """
663 if f0 is None:
--> 664 f0 = f(*((xk,) + args))
665 grad = numpy.zeros((len(xk),), float)
666 ei = numpy.zeros((len(xk),), float)
C:\Anaconda\lib\site-packages\scipy\optimize\optimize.py in function_wrapper(*wrapper_args)
298 def function_wrapper(*wrapper_args):
299 ncalls[0] += 1
--> 300 return function(*(wrapper_args + args))
301
302 return ncalls, function_wrapper
<ipython-input-1-08d40604e22a> in mean_square_error_over_indices(X)
11
12 def mean_square_error_over_indices(X):
---> 13 return ((X[indices].flatten() - X_true[indices].flatten()) ** 2).mean()
14
15
IndexError: too many indices for array
Based on the docs scipy.optimize.minimize accepts 1d arrays, so you are right about using "flatten()" but you should also use it for the initial guess that you pass to minimize()`. Here my suggestion to solve your problem:
import numpy as np
from scipy.optimize import minimize
# init
n = 5
x_true = np.random.normal(size=(n,n))
x_guess = np.random.normal(size=(n,n))
indices = np.triu_indices(n)
# flatten initial values for minimize
guess_x0 = x_guess.flatten()
guess_indeices_x0 = x_guess[indices].flatten()
# define objective funcs
mse = lambda x: ((x - x_true.flatten()) ** 2).mean()
mse_over_indices = lambda x: ((x - x_true[indices].flatten()) ** 2).mean()
# works fine
print("MSE: %5f" % mse(guess_x0))
print("MSE for indices: %5f" % mse_over_indices(guess_indeices_x0))
# works fine (flatten is necessary inside the objective function)
print("Result 1:", minimize(mse, guess_x0).x)
print("Result 2:", minimize(mse_over_indices, guess_indeices_x0).x)
Output:
MSE: 2.763674
MSE for indices: 3.192139
Result 1: [-1.2828193 0.49468516 -0.99500157 -0.47284983 1.6380719 -0.33051017
0.13769163 -0.23920633 -0.87430572 0.63945803 1.38327467 0.8484247
0.31888506 -1.15764468 1.06891773 -0.28372002 1.34104286 1.21024251
-0.11020374 1.37024001 1.08940389 1.82391261 0.32469148 0.64567877
0.54364199]
Result 2: [-1.28281964 0.49468503 -0.99500147 -0.47284976 1.63807209 0.13769154
-0.23920624 -0.87430606 0.63945812 0.31888521 -1.15764475 1.06891776
-0.11020373 1.37024006 0.54364213]
EDIT: The data set is the MNIST data set from the Homework of Week 4 of Andrew Ng's Machine Learning Course
I've checked the question on scipy optimize but I still couldn't figure out what is wrong with my code. I am trying to optimize theta for the oneVsAll question on the Andrew Ng coursera course.
Here is the relevant code
def sigmoid(x):
a = []
for item in x:
a.append(1/(1+math.exp(-item)))
return a
def hypothesis(x, theta):
return np.array(sigmoid(np.dot(x, theta)))
def costFunction(theta, x, y, lamba_):
m = X.shape[0]
part1 = np.dot(y.T, np.log(hypothesis(x, theta)).reshape(m,1))
part2 = np.dot((np.ones((m,1)) - y).T, np.log( 1 - hypothesis(x, theta)).reshape(m,1))
summ = (part1 + part2)
return -summ[0]/m
def gradientVect(theta, x, y, lambda_):
n = X.shape[1]
m = X.shape[0]
gradient = []
theta = theta.reshape(n,1)
beta = hypothesis(x, theta) - y
reg = theta[1:] * lambda_/m
grad = np.dot(X.T, beta) * 1./m
grad[1:] = grad[1:] * reg
return grad.flatten()
from scipy import optimize
def optimizeTheta(x, y, nLabels, lambda_):
for i in np.arange(0, nLabels):
theta = np.zeros((n,1))
res = optimize.minimize(costFunction, theta, args=(x, (y == i)*1, lambda_), method=None,
jac=gradientVect, options={'maxiter':50})
print(res)
return result
but running
optimizeTheta(X, y, 10, 0) # X shape = 401, 500
Gives me the following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-247-e0e6e4c1eddd> in <module>()
3 n = X.shape[1]
4
----> 5 optimizeTheta(X, y, 10, 0)
<ipython-input-246-0a15e9f4769a> in optimizeTheta(x, y, nLabels, lambda_)
54 theta = np.zeros((n,1))
55 res = optimize.minimize(costFunction, x0 = theta, args=(x, (y == i)*1, lambda_), method=None,
---> 56 jac=gradientVect, options={'maxiter':50})
57 print(res)
58 return result
//anaconda/lib/python3.5/site-packages/scipy/optimize/_minimize.py in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
439 return _minimize_cg(fun, x0, args, jac, callback, **options)
440 elif meth == 'bfgs':
--> 441 return _minimize_bfgs(fun, x0, args, jac, callback, **options)
442 elif meth == 'newton-cg':
443 return _minimize_newtoncg(fun, x0, args, jac, hess, hessp, callback,
//anaconda/lib/python3.5/site-packages/scipy/optimize/optimize.py in _minimize_bfgs(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options)
859 gnorm = vecnorm(gfk, ord=norm)
860 while (gnorm > gtol) and (k < maxiter):
--> 861 pk = -numpy.dot(Hk, gfk)
862 try:
863 alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
ValueError: shapes (401,401) and (2005000,) not aligned: 401 (dim 1) != 2005000 (dim 0)
And I can't figure out why the shapes are not aligned.
Thanks!
So I realized what was wrong with my question.
The problem was the sigmoid function returning a list and not an integer and therefore it messed up the matrixes multiplications afterwards. The new sigmoid function is
def sigmoid(z):
return(1 / (1 + np.exp(-z)))
I am having difficulties running a PYMC3 model when the observed data is discrete. Oddly, if the observed data contains the value zero (0.), the model will run.
I've read in other posts that that suggest using
start = pm.find_MAP(fmin=scipy.optimize.fmin_powell) but that does not resolve the issue.
pymc3.__version__ = '3.0'
theano.__version__ = '0.7.0.dev-RELEASE'
numpy.__version__ = '1.8.0rc1'
Python 2.7.10
See iPython notebook
The code and error are below.
import pymc3 as pm
data = [6.0,12.0,12.0,46.0,5.0,11.0,11.0,39.0,4.0,10.0,25.0,11.0,8.0,5.0,10.0,2.0,30.0,21.0]
with pm.Model() as model:
alpha = pm.Uniform('alpha', lower=0, upper=100)
mu = pm.Uniform('mu', lower=0, upper=100)
y_pred = pm.NegativeBinomial('y_pred', mu=mu, alpha=alpha)
y_est = pm.NegativeBinomial('y_est',
mu=mu,
alpha=alpha,
observed=data)
start = pm.find_MAP()
step = pm.Metropolis()
trace = pm.sample(20000, step, start, progressbar=True)
The error I get is:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-5-b9f2264fccfc> in <module>()
14 observed=data)
15
---> 16 start = pm.find_MAP()
17
18 step = pm.Metropolis()
/Library/Python/2.7/site-packages/pymc3/tuning/starting.pyc in find_MAP(start, vars, fmin, return_raw, disp, model, *args, **kwargs)
79 if 'fprime' in getargspec(fmin).args:
80 r = fmin(logp_o, bij.map(
---> 81 start), fprime=grad_logp_o, disp=disp, *args, **kwargs)
82 else:
83 r = fmin(logp_o, bij.map(start), disp=disp, *args, **kwargs)
/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/scipy/optimize/optimize.pyc in fmin_bfgs(f, x0, fprime, args, gtol, norm, epsilon, maxiter, full_output, disp, retall, callback)
775 'return_all': retall}
776
--> 777 res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts)
778
779 if full_output:
/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/scipy/optimize/optimize.pyc in _minimize_bfgs(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options)
830 else:
831 grad_calls, myfprime = wrap_function(fprime, args)
--> 832 gfk = myfprime(x0)
833 k = 0
834 N = len(x0)
/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/scipy/optimize/optimize.pyc in function_wrapper(*wrapper_args)
279 def function_wrapper(*wrapper_args):
280 ncalls[0] += 1
--> 281 return function(*(wrapper_args + args))
282
283 return ncalls, function_wrapper
/Library/Python/2.7/site-packages/pymc3/tuning/starting.pyc in grad_logp_o(point)
74
75 def grad_logp_o(point):
---> 76 return nan_to_num(-dlogp(point))
77
78 # Check to see if minimization function actually uses the gradient
/Library/Python/2.7/site-packages/pymc3/blocking.pyc in __call__(self, x)
117
118 def __call__(self, x):
--> 119 return self.fa(self.fb(x))
/Library/Python/2.7/site-packages/pymc3/model.pyc in __call__(self, state)
397
398 def __call__(self, state):
--> 399 return self.f(**state)
400
401 class LoosePointFunc(object):
/Library/Python/2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
862 node=self.fn.nodes[self.fn.position_of_error],
863 thunk=thunk,
--> 864 storage_map=getattr(self.fn, 'storage_map', None))
865 else:
866 # old-style linkers raise their own exceptions
/Library/Python/2.7/site-packages/theano/gof/link.pyc in raise_with_op(node, thunk, exc_info, storage_map)
312 # extra long error message in that case.
313 pass
--> 314 reraise(exc_type, exc_value, exc_trace)
315
316
/Library/Python/2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
850 t0_fn = time.time()
851 try:
--> 852 outputs = self.fn()
853 except Exception:
854 if hasattr(self.fn, 'position_of_error'):
ValueError: Input dimension mis-match. (input[0].shape[0] = 1, input[4].shape[0] = 18)
Apply node that caused the error: Elemwise{Composite{Switch(i0, i1, Switch(i2, Switch(i3, i1, i4), i1))}}(TensorConstant{(1,) of 0}, TensorConstant{(1,) of 0}, Elemwise{mul,no_inplace}.0, InplaceDimShuffle{x}.0, TensorConstant{[ 6. 12... 30. 21.]})
Toposort index: 33
Inputs types: [TensorType(int8, vector), TensorType(int8, (True,)), TensorType(int8, (True,)), TensorType(int8, (True,)), TensorType(float64, vector)]
Inputs shapes: [(1,), (1,), (1,), (1,), (18,)]
Inputs strides: [(1,), (1,), (1,), (1,), (8,)]
Inputs values: [array([0], dtype=int8), array([0], dtype=int8), array([1], dtype=int8), array([0], dtype=int8), 'not shown']
Outputs clients: [[Sum{acc_dtype=float64}(Elemwise{Composite{Switch(i0, i1, Switch(i2, Switch(i3, i1, i4), i1))}}.0)]]
HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.