dimensions are wrong when function called through lambda - python

I have issues with the following code snippet, where I optimize a function (minimizing the volatility).
from scipy import optimize as sco
import numpy as np
def risk_measure(covMatrix, weights):
risk = np.dot(weights, np.dot(covMatrix, weights))
return risk
prescribed_esg = 6 # esg score between 0 and 10 used as threshold in the esg_constraint
# Covariance and return matrix
V = np.matrix([[84.76695659, 20.8854772, 20.62182415, 74.73652696, 14.35995947],
[20.8854772, 35.22429277, 12.95439707, 32.22912903, 12.96449085],
[20.62182415, 12.95439707, 44.02079739, 38.73627316, 9.46608475],
[74.73652696, 32.22912903, 38.73627316, 178.86640813, 33.40281336],
[14.35995947, 12.96449085, 9.46608475, 33.40281336, 32.38514103]])
R = np.matrix([[-0.32264539, -0.08469428, 1.27628749, -0.23207085, 0.21012106]]).T
# Mean ESG score of each company
esgarr = np.matrix([[8.24336898, 4.6373262, 8.30657754, 4.65406417, 3.43620321]]).T
# Bounds and constraints
N = len(R) # number of instruments
bounds = ((-10,10),)*N # allow shorting, bounds of stocks
constraints = {'type': 'eq', 'fun': lambda weights: weights.sum() - 1}
esg_constraint = {'type': 'eq', 'fun': lambda weights: np.dot(weights, esgarr) - prescribed_esg}
esgmvp = sco.minimize(lambda x: risk_measure(V, x), # function to be minimized
N * [1 / N], # initial guess
bounds=bounds, # boundary conditions
constraints =[constraints, esg_constraint], # equality constraints)
)
esgmvp_weights = list(esgmvp['x'])
esgmvp_risk = esgmvp['fun']
esgmvp_esg = np.dot(esgmvp_weights, esgarr)
With the error message
<ipython-input-252-0d6bf5d30ccf> in risk_measure(covMatrix, weights)
3
4 def risk_measure(covMatrix, weights):
----> 5 risk = np.dot(weights, np.dot(covMatrix, weights))
6 return risk
7
<__array_function__ internals> in dot(*args, **kwargs)
ValueError: shapes (5,) and (1,5) not aligned: 5 (dim 0) != 1 (dim 0)
I am able to get results If I create a standalone matrix of weights such as
weights = np.matrix([[1, 1, 1, 1, 1]])
risk = np.dot(weights, np.dot(V, weights.T))
but this does not work when tranposing in my original function.

The following solved it
V = np.squeeze(np.asarray(V))
esg_constraint = {'type': 'eq', 'fun': lambda weights: np.dot(weights, esgarr).sum() - prescribed_esg}
I also edited the function
def risk_measure(covMatrix, weights):
risk = np.dot(weights.T, np.dot(covMatrix, weights))
return risk

Related

Python: ValueError: Input must be 1- or 2-d

I have this code to estimate a model using a tobit regression in Python. This is the code which is parsed in three parts: data definition, the estimator builder and estimation.
import numpy as np
from scipy.optimize import minimize
# define the dependent variable and independent variables
X = data.iloc[:, 1:]
y = data.iloc[:, 0]
# Add a column of ones to the independent variables for the constant term
X = np.c_[np.ones(X.shape[0]), X]
# Define the likelihood function for the Tobit model
def likelihood(params, y, X, lower, upper):
beta = params[:-1]
sigma = params[-1]
mu = X # beta
prob = (1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-0.5 * ((y - mu) / sigma)**2))
prob[y < lower] = 0
prob[y > upper] = 0
return -np.log(prob).sum()
# Set the initial values for the parameters and the lower and upper bounds for censoring
params_init = np.random.normal(size=X.shape[1] + 1)
bounds = [(None, None) for i in range(X.shape[1])] + [(1e-10, None)]
# Perform the MLE estimation
res = minimize(likelihood, params_init, args=(y, X, 0, 100), bounds=bounds, method='L-BFGS-B')
# Extract the estimated parameters and their standard errors
params = res.x
stderr = np.sqrt(np.diag(res.hess_inv))
# Print the results
print(f'Coefficients: {params[:-1]}')
print(f'Standard Errors: {stderr[:-1]}')
print(f'Sigma: {params[-1]:.4f}')
Why am I getting this error message?
Thank you.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-245-5f39f416cc07> in <module>
31 # Extract the estimated parameters and their standard errors
32 params = res.x
---> 33 stderr = np.sqrt(np.diag(res.hess_inv))
34
35 # Print the results
/opt/anaconda3/lib/python3.8/site-packages/numpy/core/overrides.py in diag(*args, **kwargs)
/opt/anaconda3/lib/python3.8/site-packages/numpy/lib/twodim_base.py in diag(v, k)
307 return diagonal(v, k)
308 else:
--> 309 raise ValueError("Input must be 1- or 2-d.")
310
311
ValueError: Input must be 1- or 2-d.
EDIT: If you wanna look at the type of data I'm dealing with, you can simulate them using these lines of code I just wrote:
data = pd.DataFrame()
# Append 'interview probabilities' for individuals with and without disabilities
interview_prob_disabled = np.random.normal(38.63, 28.72, 619)
interview_prob_enabled = np.random.normal(44.27, 28.19, 542)
interview_prob = np.append(interview_prob_disabled, interview_prob_enabled)
# Correct the variable by its mean and standard deviation, without it being negative, nor exceeding 100, nor a float
interview_prob = np.clip(interview_prob, 0, 100)
interview_prob = np.round(interview_prob)
# Add the 'interview probabilities' variable to the dataframe
data['Interview Probabilities'] = interview_prob
# Add other variables such as age, gender, employment status, education, etc.
data['Age'] = np.random.randint(18, 65, size=len(interview_prob))
data['Gender'] = np.random.choice(['Male', 'Female'], size=len(interview_prob))
data['Employment Status'] = np.random.choice(['Employed', 'Unemployed', 'Retired'], size=len(interview_prob))
data['Education Level'] = np.random.choice(['High School', 'College', 'Vocational', 'Graduate School'], size=len(interview_prob))
# Add a 'disability status' variable as a dummy
data['Disability Status'] = np.append(np.repeat('Disabled', 619), np.repeat('Non-disabled', 542))
# Categorical variables
data['Gender'] = data['Gender'].map({'Male': 0, 'Female': 1})
data['Employment Status'] = data['Employment Status'].map({'Employed': 0, 'Unemployed': 1})
data['Education Level'] = data['Education Level'].map({'High School': 0, 'College': 1, 'Vocational': 2, 'Graduate School': 3})
data['Disability Status'] = data['Disability Status'].map({'Disabled': 1, 'Non-disabled': 0})
# Print the df
data
The problem is that your solver, L-BFGS-B yields a LbfgsInvHessProduct object (a linear operator) out of .hess_inv instead of a numpy array (which something like BFGS would give).
One solution to your problem would be to use res.hess_inv.todense() instead.

How to constrain the weight of characteristic variables in regression

Now I faced a problem that for a data sample(lets‘s say 10 continuous variables and one dependent variable),  I need fit a model for the prediction. I would like constrain the weights of all the variables between a particular number, like abs(0.2). Which means the variables should no more than 0.2 or less than -0.2. However, I tried lasso and ridge regression in sklearn.linear_model(Also tried ElasticNet) to control the weights of variables, it's not quite good because there always be one or two extreme large weights or sometimes when I gave a large alpha the r square shows the model was really bad. I tried to write my own methods, but I could only constrain the sum of weights nor the every weight of variables. SVR would provide a pretty close answer, however I still wanna ask if there are some good choices for muti-regression with self define constrains?
import numpy as np
from scipy.optimize import shgo
def my_general_linear_model_func(A1,b1):
num_x = np.shape(A1)[1]
def my_func(x):
ls = 0.5*(b1-np.dot(A1,x))**2
result = np.sum(ls)
return result
def g1(x):
return np.sum(x) #sum of X >= 0
def g2(x):
return 1-np.sum(x) #sum of X <= 1
cons = ({'type': 'ineq', 'fun': g1}
,{'type': 'ineq', 'fun': g2})
x0 = np.zeros(num_x)
bnds = [(0,1)]
for i in range(num_x-1):
bnds.append((0,1))
res1 = shgo(my_func,
bounds = bnds,
constraints=cons)
return res1
A1 = np.array([[0.12,5.96,3.14],[0.68,7.89,4.56]])
b1 = np.array([3,5])
my_general_linear_model_func(A1,b1)
The result:
fun: 0.07651391974288956
funl: array([0.07651392, 0.11079534, 0.2564125 ])
message: 'Optimization terminated successfully.'
nfev: 53
nit: 2
nlfev: 49
nlhev: 0
nljev: 12
success: True
x: array([1.12339358e-16, 5.62146099e-02, 9.43785390e-01])
xl: array([[1.12339358e-16, 5.62146099e-02, 9.43785390e-01],
[3.90241087e-01, 5.00000000e-01, 1.09758913e-01],
[5.00000000e-01, 5.00000000e-01, 0.00000000e+00]])

PYTHON : IndexError: index 2 is out of bounds for axis 0 with size 2

This was my piece of code initially :
Here X is the array of data points with dimensions (m x n) where m is number of data points to predict, and n is number of features without the bias term.
y is the data labels with shape (m,)
lambda_ is the regularization term.
from scipy import optimize
def oneVsAll(X,y,num_labels,lambda_):
#used to find the optimal parametrs theta for each label against the others
#X (m,n)
#y (m,)
#num_labels : possible number of labels
#lambda_ : regularization param
#all_theta : trained param for logistic reg for each class
#hence (k,n+1) where k is #labels and n+1 is #features with bias
m,n = X.shape
all_theta = np.array((num_labels,n+1))
X = np.concatenate([np.ones((m,1)),X],axis = 1)
for k in np.arange(num_labels):
#y == k will generate a list with shape of y,but 1 only for index with value same as k and rest with 0
initial_theta = np.zeros(n+1)
options = {"maxiter" : 50}
res = optimize.minimize(lrCostFunction,
initial_theta,args = (X,y==k,lambda_),
jac = True,method = 'CG',
options = options)
all_theta[k] = res.x
return all_theta
lambda_ = 0.1
all_theta = oneVsAll(X,y,num_labels,lambda_)
The error I got was :
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-45-f9501694361e> in <module>()
1 lambda_ = 0.1
----> 2 all_theta = oneVsAll(X,y,num_labels,lambda_)
<ipython-input-44-05a9b582ccaf> in oneVsAll(X, y, num_labels, lambda_)
20 jac = True,method = 'CG',
21 options = options)
---> 22 all_theta[k] = res.x
23 return all_theta
ValueError: setting an array element with a sequence.
Then after debugging, I changed the code to :
from scipy import optimize
def oneVsAll(X,y,num_labels,lambda_):
#used to find the optimal parametrs theta for each label against the others
#X (m,n)
#y (m,)
#num_labels : possible number of labels
#lambda_ : regularization param
#all_theta : trained param for logistic reg for each class
#hence (k,n+1) where k is #labels and n+1 is #features with bias
m,n = X.shape
all_theta = np.array((num_labels,n+1),dtype = "object")
X = np.concatenate([np.ones((m,1)),X],axis = 1)
for k in np.arange(num_labels):
#y == k will generate a list with shape of y,but 1 only for index with value same as k and rest with 0
initial_theta = np.zeros(n+1)
options = {"maxiter" : 50}
res = optimize.minimize(lrCostFunction,
initial_theta,args = (X,y==k,lambda_),
jac = True,method = 'CG',
options = options)
all_theta[k] = res.x
return all_theta
Now the error I am getting is :
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-47-f9501694361e> in <module>()
1 lambda_ = 0.1
----> 2 all_theta = oneVsAll(X,y,num_labels,lambda_)
<ipython-input-46-383fc22e26cc> in oneVsAll(X, y, num_labels, lambda_)
20 jac = True,method = 'CG',
21 options = options)
---> 22 all_theta[k] = res.x
23 return all_theta
IndexError: index 2 is out of bounds for axis 0 with size 2
How can I correct this?
You create all_theta running:
all_theta = np.array((num_labels,n+1),dtype = "object")
This instruction actually creates an array containig just 2 elements
(the shape is (2,)), containing two passed values, whereas you probably
intend to pass the shape of the array to be created.
Change this instruction to:
all_theta = np.empty((num_labels,n+1))
Specification of dtype (in my opinion) is not necessary.

Optimization using scipy

I am trying to build an efficient frontier as in the Markowitz problem.
I have written the code below, but I get the error "ValueError: Objective function must return a scalar". I have tested 'fun' with some values, for example, I input to the console:
W = np.ones([n])/n # start optimization with equal weights
cov_matrix = returns.cov()
fun = 0.5*np.dot(np.dot(W, cov_matrix), W) # variance of the portfolio
fun
The output is 0.00015337622774133828, which is a scalar.
I don't know what might be wrong. Any help is appreciated.
Code:
from scipy.optimize import minimize
import pandas as pd
import numpy as np
from openpyxl import load_workbook
wb = load_workbook('path/Assets_3.xlsx') # in this workbook there is data for returns.
# The next lines clean unnecessary first column and first row.
ws = wb.active
df = pd.DataFrame(ws.values)
df1 = df.drop(0,axis=1)
df1 = df1.drop(0)
df1 = df1.astype(float)
rf = 0.05
r_bar = 0.05
returns = df1.copy()
def efficient_frontier(rf, r_bar, returns):
n = len(returns.transpose())
W = np.ones([n])/n # start optimization with equal weights
exp_ret = returns.mean()
cov_matrix = returns.cov()
fun = 0.5*np.dot(np.dot(W, cov_matrix), W) # variance of the portfolio
cons = ({'type': 'eq', 'fun': lambda W: sum(W) - 1. },
{'type': 'ineq', 'fun': lambda W: np.dot(exp_ret,W) - r_bar })
bnds = [(0.,1.) for i in range(n)] # weights between 0..1.
res = minimize(fun, W, (returns, cov_matrix, rf),
method='SLSQP', bounds = bnds, constraints = cons)
return res
x= efficient_frontier(rf,r_bar,returns)
x
Some Data
1 2 3
1 0.060206 0.005781 0.001117
2 0.006463 -0.007390 0.001133
3 -0.003211 -0.015730 0.001167
4 0.044227 -0.006250 0.001225
5 -0.040571 -0.006910 0.001292
6 -0.007900 -0.006160 0.001208
7 0.068702 0.013836 0.001300
8 0.039286 0.009854 0.001350
9 0.012457 -0.007950 0.001358
10 -0.013758 0.001021 0.001283
11 -0.002616 -0.013600 0.001300
12 0.059004 -0.006090 0.001442
13 0.015566 0.002818 0.001308
14 -0.036454 0.001395 0.001283
15 0.058899 0.011072 0.001325
16 -0.043086 0.017070 0.001308
17 0.023156 -0.003350 0.001392
18 0.063705 0.000301 0.001417
19 0.017628 -0.001960 0.001508
20 -0.014567 -0.006990 0.001525
21 -0.007191 -0.013000 0.001425
22 -0.000815 0.014773 0.001450
23 0.046493 -0.001540 0.001542
24 0.051832 -0.008580 0.001742
25 -0.007151 0.001177 0.001633
26 -0.018196 -0.008680 0.001642
27 -0.013513 -0.008810 0.001675
28 -0.026493 -0.010510 0.001825
29 -0.003249 -0.014750 0.001800
30 0.001222 0.022258 0.001758
This code is a mess and while i can show you something which runs, that does not mean anything.
You will see convergence to your starting-point, whatever that means in your task! It's a strong indicator that something is still very wrong (might be the underlying theory)!
Some additional remarks:
scipy's optimizers are build to work with numpy-arrays, not pandas Dataframes or Series objects!
the only things in your original question which hinted pandas-usage was a var-name df and returns.cov() which does not exist for numpy-arrays!
rf is never used anywhere!
there are multiple things in optimize's args, which are not used!
it does not feel like a problem one should use scipy's optimizers for! (but it's possible; here we are paying for numerical-differentiation for example)
cvxpy would probably a much much better approach (more clean, faster, more accurate) if interpret the problem correctly (did not analyze much)
but the same rules apply: some python-knowledge is needed!
Code:
from scipy.optimize import minimize
import numpy as np
import pandas as pd
rf = 0.05
r_bar = 0.05
returns = pd.DataFrame(np.random.randn(30, 3), columns=list('ABC')) # PANDAS DF
cov_matrix = returns.cov().as_matrix() # use PANDAS one last time
# but result = np.array!
returns = returns.as_matrix() # From now on: np-only!
def fun(x, returns, cov_matrix, rf):
return 0.5*np.dot(np.dot(x, cov_matrix), x)
def efficient_frontier(rf, r_bar, returns):
n = len(returns.transpose())
W = np.ones([n])/n # start optimization with equal weights
exp_ret = returns.mean()
cons = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1. }, # let's use numpy here
{'type': 'ineq', 'fun': lambda x: np.dot(exp_ret, x) - r_bar })
bnds = [(0.,1.) for i in range(n)] # weights between 0..1.
res = minimize(fun, W, (returns, cov_matrix, rf),
method='SLSQP', bounds = bnds, constraints = cons)
return res
x= efficient_frontier(rf,r_bar,returns)
print(x)
Output:
A B C
A 0.813375 -0.001370 0.173901
B -0.001370 1.482756 0.380514
C 0.173901 0.380514 1.285936
fun: 0.2604530793556774
jac: array([ 0.32863522, 0.62063321, 0.61345008])
message: 'Optimization terminated successfully.'
nfev: 35
nit: 7
njev: 3
status: 0
success: True
x: array([ 0.33333333, 0.33333333, 0.33333333])

Having trouble with scipy Minimize function, it is giving me odd results

Created an objective function
Added constraints
The problem is no matter what initial guess I use, the minimize functions just keeps on using that number. for example: If I use 15 for the initial guess, the solver will not try any other number and say the answer is 15. I'm sure the ere is an issue with the code but I am not sure where.
CODE BELOW:
from scipy.optimize import minimize
import numpy as np
from pandas import *
#----------------------------------------------------
#-------- Create Function ------------
#----------------------------------------------------
def MovingAverage(Input,N,test=0):
# Create data frame
df = DataFrame(Input, columns=['Revenue'])
# Add columns
df['CummSum'] = df['Revenue'].cumsum()
df['Mavg'] = rolling_mean(df['Revenue'], N)
df['Error'] = df['Revenue'] - df['Mavg']
df['MFE'] = (df['Error']).mean()
df['MAD'] = np.fabs(df['Error']).mean()
df['MSE'] = np.sqrt(np.square(df['Error']).mean())
df['TS'] = np.sum(df['Error'])/df['MAD']
print N, df.MAD[0]
if test == 0:
return df.MAD[0]
else: return df
#----------------------------------------------------
#-------- Input ------------
#----------------------------------------------------
data = [1,2,3,4,5,5,5,5,5,5,5,5,5,5,5]
#----------------------------------------------------
#-------- SOLVER ------------
#----------------------------------------------------
## Objective Function
fun = lambda x: MovingAverage(data, x[0])
## Contraints
cons = ({'type': 'ineq', 'fun': lambda x: x[0] - 2}, # N>=2
{'type': 'ineq', 'fun': lambda x: len(data) - x[0]}) # N<=len(data)
## Bounds (note sure what this is yet)
bnds = (None,None)
## Solver
res = minimize(fun, 15, method='SLSQP', bounds=bnds, constraints=cons)
##print res
##print res.status
##print res.success
##print res.njev
##print res.nfev
##print res.fun
##for i in res.x:
## print i
##print res.message
##for i in res.jac:
## print i
##print res.nit
# print final results
result = MovingAverage(data,res.x,1)
print result
List of possible values:
2 = 0.142857142857,
3 = 0.25641025641,
4 = 0.333333333333,
5 = 0.363636363636,
6 = 0.333333333333,
7 = 0.31746031746,
8 = 0.3125,
9 = 0.31746031746,
10 = 0.333333333333,
11 = 0.363636363636,
12 = 0.416666666667,
13 = 0.487179487179,
14 = 0.571428571429,
15 = 0.666666666667
Your function is piecewise constant between integer input values, as seen in the plot below (plotted in steps of 0.1 on the x axis):
So the derivative is zero at almost all points, and that's why a gradient based minimization method will return any given initial point as a local minimum.
To rescue the situation, you could think about using interpolation in the objective function to get intermediate function values for non-integer input values. If you combine this with a gradient-based minimization, it might find some point around 8 as a local minimum when starting at 15.

Categories

Resources