My code is to implement an active learning algorithm, using L-BFGS optimization. I want to optimize four parameters: alpha, beta, w and gamma.
However, when I run the code below, I got an error:
optimLogitLBFGS = sp.optimize.fmin_l_bfgs_b(func, x0 = x0, args = (X,Y,Z), fprime = func_grad)
File "C:\Python27\lib\site-packages\scipy\optimize\lbfgsb.py", line 188, in fmin_l_bfgs_b
**opts)
File "C:\Python27\lib\site-packages\scipy\optimize\lbfgsb.py", line 311, in _minimize_lbfgsb
isave, dsave)
_lbfgsb.error: failed in converting 7th argument ``g' of _lbfgsb.setulb to C/Fortran array
0-th dimension must be fixed to 22 but got 4
My code is:
# -*- coding: utf-8 -*-
import numpy as np
import scipy as sp
import scipy.stats as sps
num_labeler = 3
num_instance = 5
X = np.array([[1,1,1,1],[2,2,2,2],[3,3,3,3],[4,4,4,4],[5,5,5,5]])
Z = np.array([1,0,1,0,1])
Y = np.array([[1,0,1],[0,1,0],[0,0,0],[1,1,1],[1,0,0]])
W = np.array([[1,1,1,1],[2,2,2,2],[3,3,3,3]])
gamma = np.array([1,1,1,1,1])
alpha = np.array([1,1,1,1])
beta = 1
para = np.array([1,1,1,1,1,1,1,1,1,2,2,2,2,3,3,3,3,1,1,1,1,1])
def get_params(para):
# extract parameters from 1D parameter vector
assert len(para) == 22
alpha = para[0:4]
beta = para[4]
W = para[5:17].reshape(3, 4)
gamma = para[17:]
return alpha, beta, gamma, W
def log_p_y_xz(yit,zi,sigmati): #log P(y_it|x_i,z_i)
return np.log(sps.norm(zi,sigmati).pdf(yit))#tested
def log_p_z_x(alpha,beta,xi): #log P(z_i=1|x_i)
return -np.log(1+np.exp(-np.dot(alpha,xi)-beta))#tested
def sigma_eta_ti(xi, w_t, gamma_t): # 1+exp(-w_t x_i -gamma_t)^-1
return 1/(1+np.exp(-np.dot(xi,w_t)-gamma_t)) #tested
def df_alpha(X,Y,Z,W,alpha,beta,gamma):#df/dalpha
return np.sum((2/(1+np.exp(-np.dot(alpha,X[i])-beta))-1)*np.exp(-np.dot(alpha,X[i])-beta)*X[i]/(1+np.exp(-np.dot(alpha,X[i])-beta))**2 for i in range (num_instance))
#tested
def df_beta(X,Y,Z,W,alpha,beta,gamma):#df/dbelta
return np.sum((2/(1+np.exp(-np.dot(alpha,X[i])-beta))-1)*np.exp(-np.dot(alpha,X[i])-beta)/(1+np.exp(-np.dot(alpha,X[i])-beta))**2 for i in range (num_instance))
def df_w(X,Y,Z,W,alpha,beta,gamma):#df/sigma * sigma/dw
return np.sum(np.sum((-3)*(Y[i][t]**2-(-np.log(1+np.exp(-np.dot(alpha,X[i])-beta)))*(2*Y[i][t]-1))*(1/(1/(1+np.exp(-np.dot(X[i],W[t])-gamma[t])))**4)*(1/(1+np.exp(-np.dot(X[i],W[t])-gamma[t])))*(1-(1/(1+np.exp(-np.dot(X[i],W[t])-gamma[t]))))*X[i]+(1/(1/(1+np.exp(-np.dot(X[i],W[t])-gamma[t])))**2)*(1/(1+np.exp(-np.dot(X[i],W[t])-gamma[t])))*(1-(1/(1+np.exp(-np.dot(X[i],W[t])-gamma[t]))))*X[i]for t in range(num_labeler)) for i in range (num_instance))
def df_gamma(X,Y,Z,W,alpha,beta,gamma):#df/sigma * sigma/dgamma
return np.sum(np.sum((-3)*(Y[i][t]**2-(-np.log(1+np.exp(-np.dot(alpha,X[i])-beta)))*(2*Y[i][t]-1))*(1/(1/(1+np.exp(-np.dot(X[i],W[t])-gamma[t])))**4)*(1/(1+np.exp(-np.dot(X[i],W[t])-gamma[t])))*(1-(1/(1+np.exp(-np.dot(X[i],W[t])-gamma[t]))))+(1/(1/(1+np.exp(-np.dot(X[i],W[t])-gamma[t])))**2)*(1/(1+np.exp(-np.dot(X[i],W[t])-gamma[t])))*(1-(1/(1+np.exp(-np.dot(X[i],W[t])-gamma[t]))))for t in range(num_labeler)) for i in range (num_instance))
def func(para, *args):
alpha, beta, gamma, W = get_params(para)
#args
X = args [0]
Y = args[1]
Z = args[2]
return np.sum(np.sum(log_p_y_xz(Y[i][t], Z[i], sigma_eta_ti(X[i],W[t],gamma[t]))+log_p_z_x(alpha, beta, X[i]) for t in range(num_labeler)) for i in range (num_instance))
#tested
def func_grad(para, *args):
alpha, beta, gamma, W = get_params(para)
#args
X = args [0]
Y = args[1]
Z = args[2]
#gradiants
d_f_a = df_alpha(X,Y,Z,W,alpha,beta,gamma)
d_f_b = df_beta(X,Y,Z,W,alpha,beta,gamma)
d_f_w = df_w(X,Y,Z,W,alpha,beta,gamma)
d_f_g = df_gamma(X,Y,Z,W,alpha,beta,gamma)
return np.array([d_f_a, d_f_b,d_f_w,d_f_g])
x0 = np.concatenate([np.ravel(alpha), np.ravel(beta), np.ravel(W), np.ravel(gamma)])
optimLogitLBFGS = sp.optimize.fmin_l_bfgs_b(func, x0 = x0, args = (X,Y,Z), fprime = func_grad)
I am not sure what is the problem. Maybe, the func_grad cause the problem? Could anyone have a look? thanks
You need to be taking the derivative of func with respect to each of the elements in your concatenated array of alpha, beta, w, gamma parameters, so func_grad ought to return a single 1D array of the same length as x0 (i.e. 22). Instead it returns a jumble of two arrays and two scalar floats nested inside an np.object array:
In [1]: func_grad(x0, X, Y, Z)
Out[1]:
array([array([ 0.00681272, 0.00681272, 0.00681272, 0.00681272]),
0.006684719133999417,
array([-0.01351227, -0.01351227, -0.01351227, -0.01351227]),
-0.013639910534587798], dtype=object)
Part of the problem is that np.array([d_f_a, d_f_b,d_f_w,d_f_g]) is not concatenating those objects into a single 1D array since some are numpy arrays and some are Python floats. That part is easily solved by using np.hstack([d_f_a, d_f_b,d_f_w,d_f_g]) instead.
However, the combined sizes of these objects is still only 10, whereas the output of func_grad needs to be a 22-long vector. You will need to take another look at your df_* functions. In particular, W is a (3, 4) array, but df_w only returns a (4,) vector, and gamma is a (4,) vector whereas df_gamma only returns a scalar.
Related
I wrote a simple class in Python to compute spherical harmonic basis functions and corresponding coefficients for a function defined on a sphere. See below.
import numpy as np
from scipy.special import sph_harm
import scipy.integrate as integrate
class SphHarmBasis():
def __init__(self, n_coeffs=16):
self._n_coeffs = n_coeffs
self.basis = self.sph_harm_basis()
''' Compute real spherical harmonic basis function '''
def get_sph_harm_function(self, l, m):
def basis_function(theta, phi):
Y = sph_harm(abs(m), l, phi, theta)
if m < 0:
Y = np.sqrt(2) * (-1)**m * Y.imag
elif m > 0:
Y = np.sqrt(2) * (-1)**m * Y.real
return Y.real
return basis_function
''' Get a specified number of basis functions '''
def sph_harm_basis(self):
basis_functions = []
dimension = 0
l, m = 0, 0
while dimension < self._n_coeffs:
while m <= l:
basis_functions.append(self.get_sph_harm_function(l, m))
m += 1
dimension += 1
l += 1
m = -l
return basis_functions
''' Compute spherical harmonic coefficients '''
def sph_harm_coeff(self, Y, f):
def integrand(phi, theta):
return f(theta, phi) * Y(theta, phi) * np.sin(theta)
return integrate.dblquad(integrand, 0., np.pi, lambda x : 0., lambda x : 2*np.pi)[0]
''' Get spherical harmonic coefficients for a function in a basis '''
def sph_harm_transform(self, f, basis=None):
if basis is None:
basis = self.basis
coeffs = []
for Y in basis:
coeffs.append(self.sph_harm_coeff(Y, f))
return coeffs
''' Reconstruct a function from basis and corresponding coefficients'''
def sph_harm_reconstruct(self, coeffs, basis=None):
if basis is None:
basis = self.basis
return lambda theta, phi : np.dot(coeffs, [f(theta, phi) for f in basis])
And you can use it like this:
def my_sphere_function(theta, phi):
return np.sin(theta+phi)
my_basis = SphHarmBasis(n_coeffs=25)
# encode your function in the desired basis and record the coefficients
my_coeffs = my_basis.sph_harm_transform(my_sphere_function)
# reconstruct your function at a point
point = (np.pi, np.pi/8)
my_basis.sph_harm_reconstruct(my_coeffs)(*point), my_sphere_function(*point)
My problem is that it doesn't seem to be very accurate.
For example, running the follow test code reports a mean absolute error of 0.4933463836715332.
import matplotlib.pyplot as plt
my_reconstr_function = my_basis.sph_harm_reconstruct(my_coeffs)
pts = np.linspace(0, 2*np.pi)
pts_2d = np.reshape(np.stack(np.meshgrid(pts,pts), axis=-1), (-1, 2))
actual = []
approx = []
pts_total = 0
for n, pt in enumerate(pts_2d):
f_actual = my_sphere_function(*pt)
f_approx = my_reconstr_function(*pt)
actual.append(f_actual)
approx.append(f_approx)
pts_total += abs(f_approx - f_actual)
print(pts_total / len(pts_2d))
plt.subplot(121)
plt.imshow(np.reshape(np.repeat(actual, 3), (50, 50, 3)))
plt.subplot(122)
plt.imshow(np.reshape(np.repeat(approx, 3), (50, 50, 3)))
Actual vs. reconstructed images
On the left is an image representation of the actual function, and on the right is the reconstructed function.
Where is my error? Changing the number of coefficients doesn't seem to affect much.
My problem was that I was testing samples out of the domain of the spherical harmonic basis functions. I specified the correct integration bounds, as the polar angle must be in the interval [0, pi] while the azimuthal belongs to [0, 2pi]. But for my test example, pts_2d was in the interval [0, 2pi]^2. All is well!
Reconstruction comparison after fixing the test code
I am trying to build a linear regression model and find optimal values using fmin_cg optimizer.
I have two functions for this job. First linear_reg_cost which is cost function and second linear_reg_grad which is gradient of cost function. This functions both have same argument.
def hypothesis(x,theta):
return np.dot(x,theta)
Cost function:
def linear_reg_cost(x_flatten, y, theta_flatten, lambda_, num_of_features,num_of_samples):
x = x_flatten.reshape(num_of_samples, num_of_features)
theta = theta_flatten.reshape(n,1)
loss = hypothesis(x,theta)-y
regularizer = lambda_*np.sum(theta[1:,:]**2)/(2*m)
j = np.sum(loss ** 2)/(2*m)
return j
Gradient function:
def linear_reg_grad(x_flatten, y, theta_flatten, lambda_, num_of_features,num_of_samples):
x = x_flatten.reshape(num_of_samples, num_of_features)
m,n = x.shape
theta = theta_flatten.reshape(n,1)
new_theta = np.zeros(shape=(theta.shape))
loss = hypothesis(x,theta)-y
gradient = np.dot(x.T,loss)
new_theta[0:,:] = gradient/m
new_theta[1:,:] = gradient[1:,:]/m + lambda_*(theta[1:,]/m)
return new_theta
and fmin_cg:
theta = np.ones(n)
from scipy.optimize import fmin_cg
new_theta = fmin_cg(f=linear_reg_cost, x0=theta, fprime=linear_reg_grad,args=(x.flatten(), y, lambda_, m,n))
Note: I flatten x as input and retrieve in the cost and gradient function as matrix.
the output error:
<ipython-input-98-b29c1b8f6e58> in linear_reg_grad(x_flatten, y, theta_flatten, lambda_, num_of_features, num_of_samples)
1 def linear_reg_grad(x_flatten, y, theta_flatten, lambda_,num_of_features, num_of_samples):
----> 2 x = x_flatten.reshape(num_of_samples, num_of_features)
3 m,n = x.shape
4 theta = theta_flatten.reshape(n,1)
5 new_theta = np.zeros(shape=(theta.shape))
ValueError: cannot reshape array of size 2 into shape (2,12)
Note: x.shape = (12,2), y.shape = (12,1) ,theta.shape = (2,). So num_of_features =2 and num_of_samples=12. But error shows that my input x is parsing instead of theta. Why this happening even when I explicitly assigned args in fmin_cg? And how I should solve this problem?
Thanks for any advice
All of your implementations are correct but you have a little mistake.
Be inform to pass arguments in order for both of your functions.
Your problem is the order of num_of_feature and num_of_samples. You can replace their position with each other in linear_reg_grad or linear_reg_cost. Of course you should change this order in scipy.optimize.fmin_cg, args argument.
Second important thing is, x as first argument in fmin_cg is the variable you want to update each time and find the optimal one. So in your solution, x in fmin_cg must be theta not your x which is your input.
I want to optimize a function by varying the parameters where two of the parameters are actually arrays. I've tried to do
...
# initial parameters
params0 = np.array([p1, p2, ... , p_array1, p_array2])
p_min = minimize(myfunc, params0, args)
...
where the pj's are scalars and p_array1 and p_array2 are arrays of the same length, but this gave me an error saying
ValueError: setting an array element with a sequence.
I've also tried passing p_array1 and p_array2 as scalars into myfunc and then create predetermined arrays from those two inside myfunc (e.g. setting p_array1 = p_array1*np.arange(6) and similarly for p_array2), eliminating the error, but I don't want them to be predetermined -- instead I want 'minimize' to figure out what they should be.
Is there any way that I can utilize one of Scipy's optimization functions without getting this error while still keeping p_array1 and p_array2 as arrays and not scalars?
EDIT
Sorry for being very broad but here is my code:
NOTE: 'myfunc' here is actually norm_residual .
import pandas as pd
import numpy as np
def f(yvec, t, a, b, c, d, M, theta):
# the system of ODEs to be solved
x, y = yvec
dydt = [ a*x - b*y**2 + 1, -c*x - d*x*y + np.sum(M * np.cos(theta*t)) ]
return dydt
ni = 3 # the number of periodic forcing functions to add to the DE system
M = 0.56*np.random.rand(ni) # the initial amplitudes of forcing functions
theta = np.pi/6*np.arange(ni) # the initial coefficients of the forcing functions
# initialize the parameters
params0 = [0.75, 0.23, 1.0, 0.2, M, theta]
# grabbing the data to be used later
data = pd.read_csv('data.csv')
y_data = data['Y']
N = y_data.shape[0] #20
t = np.linspace(0, N, N) # array of t values to integrate over
yvec0 = [0.3, 0.34] # initial conditions for x and y respectively
def norm_residual(params, *args):
"""
Computes the L^2 norm of the residual of y and the data (y as defined above).
Input: params = array of parameters (scalars or arrays) for the DE system
args = other arguments to pass into the function f or to use
to compute the residual.
Output: err = L^2 error of the solution vector (scalar).
"""
data, yvec0, t = args
a, b, c, d, M, theta = params
sol = odeint(f, yvec0, t, args=(a, b, c, d, M, theta))
x = sol[:, 0]; y = sol[:, 1]
res = data - y
err = np.linalg.norm(res, 2)
return err
from scipy.optimize import minimize
p_min = minimize(norm_residual, params0, args=(y_data, yvec0, t))
print(p_min)
And the traceback
Traceback (most recent call last):
File "model_ex_1.py", line 62, in <module>
p_min = minimize(norm_residual, params0, args=(y_anom, yvec0, t))
File "/usr/lib/python2.7/dist-packages/scipy/optimize/_minimize.py", line 354, in minimize
x0 = np.asarray(x0)
File "/usr/lib/python2.7/dist-packages/numpy/core/numeric.py", line 482, in asarray
return array(a, dtype, copy=False, order=order)
ValueError: setting an array element with a sequence.
You cannot put a list in a numpy array if the other elements are scalars.
>>> import numpy as np
>>> foo_array = np.array([1,2,3,[5,6,7]])
Traceback (most recent call last):
File "<pyshell#1>", line 1, in <module>
foo_array = np.array([1,2,3,[5,6,7]])
ValueError: setting an array element with a sequence.
It would be helpful if you post myfunc
but you can do this -
def foo():
return [p0,p1,p2..pn]
params0 = numpy.array([foo(), p_array1, p_array2])
p_min = minimize(myfunc, params0, args)
OR from Multiple variables in SciPy's optimize.minimize
import scipy.optimize as optimize
def f(params):
# print(params) # <-- you'll see that params is a NumPy array
a, b, c = params # <-- for readability you may wish to assign names to the component variables
return a**2 + b**2 + c**2
initial_guess = [1, 1, 1]
result = optimize.minimize(f, initial_guess)
if result.success:
fitted_params = result.x
print(fitted_params)
else:
raise ValueError(result.message)
I figured it out! The solution that I found to work was to change
params0 = [0.75, 0.23, 1.0, 0.2, M, theta]
in line 6 to
params0 = np.array([ 0.75, 0.23, 1.0, 0.2, *M, *theta], dtype=np.float64)
and in my function definition of my system of ODEs to be solved, instead of having
def f(yvec, t, a, b, c, d, M, theta):
x, y = yvec
dydt = [ a*x - b*y**2 + 1, -c*x - d*x*y + np.sum(M * np.cos(theta*t)) ]
return dydt
I now have
def f(yvec, t, myparams):
x, y = yvec
a, b, c, d = myparams[:4]
ni = (myparams[4:].shape[0])//2 # halved b/c M and theta are of the same shape
M = myparams[4:ni+4]
theta = myparams[ni+4:]
dydt = [ a*x - b*y**2 + 1, -c*x - d*x*y + np.sum(M * np.cos(theta*t)) ]
return dydt
NOTE: I had to add "dtype=np.float64" for 'params0' because I was getting the error
AttributeError: 'numpy.float64' object has no attribute 'cos'
when I did not have it there and it appears that 'cos' does not know how to handle 'ndarray' objects. The workaround can be found here.
Thanks everyone for the suggestions!
I want to fit the function f(x) = b + a / x to my data set. For that I found scipy leastsquares from optimize were suitable.
My code is as follows:
x = np.asarray(range(20,401,20))
y is distances that I calculated, but is an array of length 20, here is just random numbers for example
y = np.random.rand(20)
Initial guesses of the params a and b:
params = np.array([1,1])
Function to minimize
def funcinv(x):
return params[0]/x+params[1]
res = least_squares(funinv, params, args=(x, y))
Error given:
return np.atleast_1d(fun(x, *args, **kwargs))
TypeError: funinv() takes 1 positional argument but 3 were given
How can I fit my data?
To make a little of clarity. There are two related problems:
Minimizing a function
Fitting model to data
To fit a model to observed data is to find such parameters of a model which minimize some sort of error between model data and observed data.
least_squares method just minimizes a following function with respect to x (x can be a vector).
F(x) = 0.5 * sum(rho(f_i(x)**2), i = 0, ..., m - 1)
(rho is a loss function and default is rho(x) = x so don't mind it for now)
least_squares(func, x0) expects that call to func(x) will return a vector [a1, a2, a3, ...] for which a sum of squares will be computed: S = 0.5 * (a1^2 + a2^2 + a3^2 + ...).
least_squares will tweak x0 to minimize S.
Thus, in order to use it to fit model to data, one must construct a function of error between a model and actual data - residuals and then minimize that residuals function. In your case you can write it as follows:
import numpy as np
from scipy.optimize import least_squares
x = np.asarray(range(20,401,20))
y = np.random.rand(20)
params = np.array([1,1])
def funcinv(x, a, b):
return b + a/x
def residuals(params, x, data):
# evaluates function given vector of params [a, b]
# and return residuals: (observed_data - model_data)
a, b = params
func_eval = funcinv(x, a, b)
return (data - func_eval)
res = least_squares(residuals, params, args=(x, y))
This gives a result:
print(res)
...
message: '`gtol` termination condition is satisfied.'
nfev: 4
njev: 4 optimality: 5.6774618339971994e-10
status: 1
success: True
x: array([ 6.89518618, 0.37118815])
However, as a residuals function pretty much the same all the time (res = observed_data - model_data), there is a shortcut in scipy.optimize called curve_fit: curve_fit(func, xdata, ydata, x0). curve_fit builds residuals function automatically and you can simply write:
import numpy as np
from scipy.optimize import curve_fit
x = np.asarray(range(20,401,20))
y = np.random.rand(20)
params = np.array([1,1])
def funcinv(x, a, b):
return b + a/x
res = curve_fit(funcinv, x, y, params)
print(res) # ... array([ 6.89518618, 0.37118815]), ...
I am coding an algorithm for active learning, using L-BFGS algorithm from scipy.optimize. I need to optimize four parameters: alpha,beta,W and gamma.
However, it does not work, with the error of
optimLogitLBFGS = sp.optimize.fmin_l_bfgs_b(func, x0=np.array(alpha,beta,W,gamma), fprime=func_grad)
ValueError: only 2 non-keyword arguments accepted
Note that, in last sentence of the code, x0 is the initial guess of the four paramters. If I change to x0=np.array((alpha,beta,W,gamma),dtype=float), I got an error of
ValueError: setting an array element with a sequence.
I am not sure why the error happens.
from sys import argv
import numpy as np
import scipy as sp
import pandas as pd
import scipy.stats as sps
num_labeler = 3
num_instance = 5
X = np.array([[1,1,1,1],[2,2,2,2],[3,3,3,3],[4,4,4,4],[5,5,5,5]])
Z = np.array([1,0,1,0,1])
Y = np.array([[1,0,1],[0,1,0],[0,0,0],[1,1,1],[1,0,0]])
W = np.array([[1,1,1,1],[2,2,2,2],[3,3,3,3]])
gamma = np.ones(5)
alpha = np.ones(4)
beta = 1
def log_p_y_xz(yit,zi,sigmati): #log P(y_it|x_i,z_i)
return np.log(sps.norm(zi,sigmati).pdf(yit))#tested
def log_p_z_x(alpha,beta,xi): #log P(z_i=1|x_i)
return -np.log(1+np.exp(-np.dot(alpha,xi)-beta))#tested
def sigma_eta_ti(xi, w_t, gamma_t): # 1+exp(-w_t x_i -gamma_t)^-1
return 1/(1+np.exp(-np.dot(xi,w_t)-gamma_t)) #tested
def df_alpha(X,Y,Z,W,alpha,beta,gamma):#df/dalpha
return np.sum((2/(1+np.exp(-np.dot(alpha,X[i])-beta))-1)*np.exp(-np.dot(alpha,X[i])-beta)*X[i]/(1+np.exp(-np.dot(alpha,X[i])-beta))**2 for i in range (num_instance))
def df_beta(X,Y,Z,W,alpha,beta,gamma):#df/dbelta
return np.sum((2/(1+np.exp(-np.dot(alpha,X[i])-beta))-1)*np.exp(-np.dot(alpha,X[i])-beta)/(1+np.exp(-np.dot(alpha,X[i])-beta))**2 for i in range (num_instance))
def df_w(X,Y,Z,W,alpha,beta,gamma):#df/sigma * sigma/dw
return np.sum(np.sum((-3)*(Y[i][t]**2-(-np.log(1+np.exp(-np.dot(alpha,X[i])-beta)))*(2*Y[i][t]-1))*(1/(1/(1+np.exp(-np.dot(X[i],W[t])-gamma[t])))**4)*(1/(1+np.exp(-np.dot(X[i],W[t])-gamma[t])))*(1-(1/(1+np.exp(-np.dot(X[i],W[t])-gamma[t]))))*X[i]+(1/(1/(1+np.exp(-np.dot(X[i],W[t])-gamma[t])))**2)*(1/(1+np.exp(-np.dot(X[i],W[t])-gamma[t])))*(1-(1/(1+np.exp(-np.dot(X[i],W[t])-gamma[t]))))*X[i]for t in range(num_labeler)) for i in range (num_instance))
def df_gamma(X,Y,Z,W,alpha,beta,gamma):#df/sigma * sigma/dgamma
return np.sum(np.sum((-3)*(Y[i][t]**2-(-np.log(1+np.exp(-np.dot(alpha,X[i])-beta)))*(2*Y[i][t]-1))*(1/(1/(1+np.exp(-np.dot(X[i],W[t])-gamma[t])))**4)*(1/(1+np.exp(-np.dot(X[i],W[t])-gamma[t])))*(1-(1/(1+np.exp(-np.dot(X[i],W[t])-gamma[t]))))+(1/(1/(1+np.exp(-np.dot(X[i],W[t])-gamma[t])))**2)*(1/(1+np.exp(-np.dot(X[i],W[t])-gamma[t])))*(1-(1/(1+np.exp(-np.dot(X[i],W[t])-gamma[t]))))for t in range(num_labeler)) for i in range (num_instance))
def func(para, *args):
#the function to minimize
#parameters
alpha = para[0]#alpha should be an array
beta = para[1]
W = para[2]
gamma = para[3]
#args
X = args [0]
Y = args[1]
Z = args[2]
return np.sum(np.sum(log_p_y_xz(Y[i][t], Z[i], sigma_eta_ti(X[i],W[t],gamma[t]))+log_p_z_x(alpha, beta, X[i]) for t in range(num_labeler)) for i in range (num_instance))
def func_grad(para, *args):
#para have 4 values
alpha = para[0]#alpha should be an array
beta = para[1]
W = para[2]
gamma = para[3]
#args
X = args [0]
Y = args[1]
Z = args[2]
#gradiants
d_f_a = df_alpha(X,Y,Z,W,alpha,beta,gamma)
d_f_b = df_beta(X,Y,Z,W,alpha,beta,gamma)
d_f_w = df_w(X,Y,Z,W,alpha,beta,gamma)
d_f_g = df_gamma(X,Y,Z,W,alpha,beta,gamma)
return np.array([d_f_a, d_f_b,d_f_w,d_f_g])
optimLogitLBFGS = sp.optimize.fmin_l_bfgs_b(func, x0 =np.array(alpha,beta,W,gamma), fprime = func_grad)
The scipy optimization routines can only optimize a one-dimensional vector of parameters. It looks like you're trying to optimize a tuple of values which contain a mix of scalars, vectors, and matrices.
What you'll want to do is flatten all the relevant parameter values into a one-dimensional array, and have your func unwrap and set these appropriately.
Edit: I'd proceed by creating a convenience function which extracts your parameters; for example:
def get_params(para):
# extract parameters from 1D parameter vector
assert len(para) == 22
alpha = para[0:4]
beta = para[4]
W = para[5:17].reshape(3, 4)
gamma = para[17:]
return alpha, beta, gamma, W
def func(para, *args):
#the function to minimize
#parameters
alpha, beta, gamma, W = get_params(para)
#args
X = args [0]
Y = args[1]
Z = args[2]
return np.sum(np.sum(log_p_y_xz(Y[i][t], Z[i], sigma_eta_ti(X[i],W[t],gamma[t]))+log_p_z_x(alpha, beta, X[i]) for t in range(num_labeler)) for i in range (num_instance))
def func_grad(para, *args):
#para have 4 values
alpha, beta, gamma, W = get_params(para)
#args
X = args [0]
Y = args[1]
Z = args[2]
#gradiants
d_f_a = df_alpha(X,Y,Z,W,alpha,beta,gamma)
d_f_b = df_beta(X,Y,Z,W,alpha,beta,gamma)
d_f_w = df_w(X,Y,Z,W,alpha,beta,gamma)
d_f_g = df_gamma(X,Y,Z,W,alpha,beta,gamma)
return np.array([d_f_a, d_f_b,d_f_w,d_f_g])
x0 = np.concatenate([np.ravel(alpha), np.ravel(beta), np.ravel(W), np.ravel(gamma)])
optimLogitLBFGS = sp.optimize.fmin_l_bfgs_b(func, x0=x0, fprime=func_grad)
It doesn't run because your func() and func_grad() require additional arguments which are not specified in your code snippet, but this change solves the specific problem you were asking about.