I have been attempting to calibrate my model, but I am running into issues with scipy.optimize module. I have tried various scipy optimizers, but they all return the error "TypeError: can only concatenate tuple (not "list") to tuple". Does anyone know how to resolve this issue? Thank you for your time.
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from numba import jit, njit,float64
from scipy.optimize import fmin_slsqp
i=complex(0,1)
sigma, kappa, theta, volvol, rho = 0.1, 0.1, 0.1, 0.1, 0.1
params=[sigma, kappa, theta, volvol, rho]
strikes=[4650,4655,4660,4665,4670]
maturities=[1/48,2/48,3/48,1/12,5/48]
marketPrices=[70.00,66.70,63.50,60.35,57.30,82.50,79.20,76.0,72.80,69.70,92.65,89.35,86.10,82.90,79.75,101.60,98.30,95.10,91.90,88.75,109.85,106.60,103.35,100.20,97.00]
marketPrices=np.array(marketPrices)
rates=[0.05,0.05,0.05,0.05,0.05]
St=4662
np.shape(marketPrices)
#jit
def fHeston(s, St, K, r, T, sigma, kappa, theta, volvol, rho):
# To be used a lot
prod = rho * sigma *i *s
# Calculate d
d1 = (prod - kappa)**2
d2 = (sigma**2) * (i*s + s**2)
d = np.sqrt(d1 + d2)
# Calculate g
g1 = kappa - prod - d
g2 = kappa - prod + d
g = g1/g2
# Calculate first exponential
exp1 = np.exp(np.log(St) * i *s) * np.exp(i * s* r* T)
exp2 = 1 - g * np.exp(-d *T)
exp3 = 1- g
mainExp1 = exp1 * np.power(exp2/ exp3, -2 * theta * kappa/(sigma **2))
# Calculate second exponential
exp4 = theta * kappa * T/(sigma **2)
exp5 = volvol/(sigma **2)
exp6 = (1 - np.exp(-d * T))/(1 - g * np.exp(-d * T))
mainExp2 = np.exp((exp4 * g1) + (exp5 *g1 * exp6))
return (mainExp1 * mainExp2)
#jit(forceobj=True)
def priceHestonMid(St, K, r, T, sigma, kappa, theta, volvol, rho):
P, iterations, maxNumber = 0,1000,100
ds = maxNumber/iterations
element1 = 0.5 * (St - K * np.exp(-r * T))
# Calculate the complex integral
# Using j instead of i to avoid confusion
for j in range(1, iterations):
s1 = ds * (2*j + 1)/2
s2 = s1 - i
numerator1 = fHeston(s2, St, K, r, T, sigma, kappa, theta, volvol, rho)
numerator2 = K * fHeston(s1, St, K, r, T, sigma, kappa, theta, volvol, rho)
denominator = np.exp(np.log(K) * i * s1) *i *s1
P = P + ds *(numerator1 - numerator2)/denominator
element2 = P/np.pi
return np.real((element1 + element2))
# vectorify
def strikematurePriceHestonMid(St, W, r, Q, sigma, kappa, theta, volvol, rho):
stuff=[]
volsur=[]
e=0
p=0
for p in range(5):
for e in range(5):
stuff.append(priceHestonMid(St, W[e], r, Q[p], sigma, kappa, theta, volvol, rho))
#volsur[e][p]=stuff[4*p::4*p+4]
#print(priceHestonMid(St, W[e], r, Q[p], sigma, kappa, theta, volvol, rho))
volsur=np.reshape(stuff,(5,5))
stuff=np.array(stuff)
return stuff
def calibratorHeston(St, initialValues = [0.5,0.5,0.5,0.5,-0.5],
lowerBounds = [1e-2,1e-2,1e-2,1e-2,-1],
upperBounds = [10,10,10,10,0]):
objectiveFunctionHeston = ((marketPrices) - (strikematurePriceHestonMid(St, strikes,
rates[0],
maturities,
sigma,
kappa,
theta,
volvol,
rho))).sum()
result=fmin_slsqp(objectiveFunctionHeston,initialValues,args=params)
return result
calibratorHeston(4662)
UPDATE:
I was able to figure out how to get it done, I am still not sure why it was not working before nonetheless I got it working with SciPy. Thank you all.
from scipy.optimize import minimize
def objectiveFunctionHeston(x,St, strikes,rates, maturities):
objective = ((marketPrices)-(strikematurePriceHestonMid(St, strikes,
rates,
maturities,
sigma=x[0],
kappa=x[1],
theta=x[2],
volvol=x[3],
rho=x[4])))/marketPrices
objective=np.square(np.dot(objective,objective))
return objective
bounds=((1e-2,5),(1e-2,8),(1e-2,10),(1e-2,10),(-1,1))
res = minimize(objectiveFunctionHeston, method='SLSQP', x0=[sigma, kappa, theta, volvol, rho],args=(St,strikes,rates[0],maturities),
bounds = bounds, tol=1e-20,
options={"maxiter":1000})
print(res)
Related
I have two parameters I'd like to maximise the log likelihood for and get the optimal parameter values. The code here calculates an ODE system, from which I will use two trajectories as x and y data:
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.optimize import minimize
import math
# Total population, N.
N = 1
# Initial number of infected and recovered individuals, I0 and R0.
I0, R0 = 0.001, 0
# Everyone else, S0, is susceptible to infection initially.
U0 = N - I0 - R0
J0 = I0
Lf0, Ls0 = 0, 0
# Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
beta, gamma = 8, 0.4
mu, muTB, sigma, rho = 1/80, 1/6, 1/6, 0.03
u, v, w = 0.88, 0.083, 0.0006
t = np.linspace(0, 500, 500+1)
# The SIR model differential equations.
def deriv(y, t, N, beta, gamma, mu, muTB, sigma, rho, u, v, w):
U, Lf, Ls, I, R, cInc = y
b = (mu * (U + Lf + Ls + R)) + (muTB * I)
lamda = beta * I
clamda = 0.2 * lamda
dU = b - ((lamda + mu) * U)
dLf = (lamda*U) + ((clamda)*(Ls + R)) - ((u + v + mu) * Lf)
dLs = (u * Lf) - ((w + clamda + mu) * Ls)
dI = w*Ls + v*Lf - ((gamma + muTB + sigma) * I) + (rho * R)
dR = ((gamma + sigma) * I) - ((rho + clamda + mu) * R)
cI = w*Ls + v*Lf + (rho * R)
return dU, dLf, dLs, dI, dR, cI
# Integrate the SIR equations over the time grid, t.
solve = odeint(deriv, (U0, Lf0, Ls0, I0, R0, J0), t, args=(N, beta, gamma, mu, muTB, sigma, rho, u, v, w))
U, Lf, Ls, I, R, cInc = solve.T
My next function simply calculates the log likelihood when values for beta and gamma are passed in, which is what I'm trying to optimise
def loglik(beta, gamma):
solve = odeint(deriv, (U0, Lf0, Ls0, I0, R0, J0), t, args=(N, beta, gamma, mu, muTB, sigma, rho, u, v, w))
U, Lf, Ls, I, R, cInc = solve.T #get trajectories
muPrev, sigmaPrev = I[-1]*100000, 40 #I (prevalence)
muInc, sigmaInc = (cInc[1:] - cInc[:-1])[-1]*100000, 30 #cInc (incidence)
n = 10000
# logPrev = np.random.lognormal(np.log((muPrev**2) / (muPrev**2 + sigmaPrev**2)**0.5), (np.log(1 + (sigmaPrev**2 / muPrev**2)))**0.5, n) #lognormal
# logInc = np.random.lognormal(np.log((muInc**2) / (muInc**2 + sigmaInc**2)**0.5), (np.log(1 + (sigmaInc**2 / muInc**2)))**0.5, n) #lognormal
xPrev = I[-1]*100000 #value of x in formula for log of pdf
xInc = (cInc[1:] - cInc[:-1])[-1]*100000 #value of x in formula for log of pdf
logmuPrev = np.log((muPrev**2) / (muPrev**2 + sigmaPrev**2)**0.5) #lognormal params
logsdPrev = (np.log(1 + (sigmaPrev**2 / muPrev**2)))**0.5
logmuInc = np.log((muInc**2) / (muInc**2 + sigmaInc**2)**0.5)#lognormal params
logsdInc = (np.log(1 + (sigmaInc**2 / muInc**2)))**0.5
L_prev = -0.5*((np.log(xPrev) - logmuPrev) / logsdPrev)**2 - np.log(xPrev * logsdPrev * (2*math.pi)**0.5) #log of pdf for prev and inc
L_inc = -0.5*((np.log(xInc) - logmuInc) / logsdInc)**2 - np.log(xInc * logsdInc * (2*math.pi)**0.5)
logsum = L_prev + L_inc #summing logs
np.exp(logsum) #exp for likelihood
return np.exp(logsum)
Finally, I try to optimise, but my arrays have different shapes so I cannot optimise this. How do I fix this so I can correctly maximise the log likelihood?
x = I[:-1]
y = cInc[1:] - cInc[:-1]
lik_model = minimize(loglik, 8, 0.4, method='L-BFGS-B')
----edit----
I have managed to get the minimise to run, but it is just returning the 8 and 0.4 values I fed into it?
results:
fun: 0.00013295432301190784
hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64>
jac: array([-5.75331835e-08, 6.87479045e-07])
message: 'CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL'
nfev: 3
nit: 0
njev: 1
status: 0
success: True
x: array([8. , 0.4])
My code solves an ODE system using two random values of randomly generated parameters from a list. I put this in a for loop to generate some 50 outputs using those parameter values. I use an if loop within my for loop, to set conditions so that if the values calculated fall within a range, it prints an acceptance message, else print a rejection message. However, what I want is this information (the output values, and the corresponding param values used to generate those outputs) to be stored in memory, for example within a list. However my method only saves the last value in the list, or does not save anything in the list at all. Depending on how I position code within the loops, it will end up printing various elements 50 times in a row, which I do not want. My code is this:
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.optimize import minimize
beta_samples = np.random.uniform(0, 30, 50)
gamma_samples = np.random.uniform(0, 2, 50)
for i, j in zip(beta_samples, gamma_samples):
# Total population, N.
N = 1
# Initial number of infected and recovered individuals, I0 and R0.
I0, R0 = 0.001, 0
# Everyone else, S0, is susceptible to infection initially.
U0 = N - I0 - R0
J0 = I0
Lf0, Ls0 = 0, 0
# Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
beta, gamma = i, j
mu, muTB, sigma, rho = 1/80, 1/6, 1/6, 0.03
u, v, w = 0.88, 0.083, 0.0006
t = np.linspace(0, 500, 500+1)
# The SIR model differential equations.
def deriv(y, t, N, beta, gamma, mu, muTB, sigma, rho, u, v, w):
U, Lf, Ls, I, R, cInc = y
b = (mu * (U + Lf + Ls + R)) + (muTB * I)
lamda = beta * I
clamda = 0.2 * lamda
dU = b - ((lamda + mu) * U)
dLf = (lamda*U) + ((clamda)*(Ls + R)) - ((u + v + mu) * Lf)
dLs = (u * Lf) - ((w + clamda + mu) * Ls)
dI = w*Ls + v*Lf - ((gamma + muTB + sigma) * I) + (rho * R)
dR = ((gamma + sigma) * I) - ((rho + clamda + mu) * R)
cI = w*Ls + v*Lf + (rho * R)
return dU, dLf, dLs, dI, dR, cI
# Integrate the SIR equations over the time grid, t.
solve = odeint(deriv, (U0, Lf0, Ls0, I0, R0, J0), t, args=(N, beta, gamma, mu, muTB, sigma, rho, u, v, w))
U, Lf, Ls, I, R, cInc = solve.T
if 320 < I[-1]*100000 < 480 and 240 < (cInc[1:] - cInc[:-1])[-1]*100000 < 360:
acc = [320 < I[-1]*100000 < 480]
acc.append(320 < I[-1]*100000 < 480)
print('for beta of', beta, 'and gamma of', gamma, 'pprevalence is ', I[-1]*100000, 'incidence is ', (cInc[1:] - cInc[:-1])[-1]*100000)
else:
rejected.append(beta_samples)
print('values of', beta, 'and gamma of', gamma, 'rejected')
Where is my code going wrong? I simply want a list of the values that come under the 'if' conditions, and those coming under the 'else' conditions
I wish to minimize the parameters beta and gamma in this model. However, my observed data isnt in the form of a time series. The values I want to estimate are for when two certain trajectories have equilibrium values. Namely, when equilibrium values for I (prevalence) and J_diff (incidence) reach 0.4 and 0.3 respectively. My code is as follows:
def peak_infections(x):
# Total population, N.
N = 1
# Initial number of infected and recovered individuals, I0 and R0.
I0, R0 = 0.001, 0
# Everyone else, S0, is susceptible to infection initially.
beta = x[0]
gamma = x[1]
U0 = N - I0 - R0
J0 = I0
Lf0, Ls0 = 0, 0
# Contact rate, beta, and mean recovery rate, gamma, (in 1/years).
beta, gamma = 15, 2/5
mu, muTB, sigma, rho = 1/80, 1/6, 1/6, 0.03
u, v, w = 0.083, 0.88, 0.0006
# A grid of time points
times = np.arange(0,20,2.5)
def deriv(y, times, N, beta, gamma, mu, muTB, sigma, rho, u, v, w):
U, Lf, Ls, I, R, cInc = y
b = (mu * (U + Lf + Ls + R)) + (muTB * I)
lamda = beta * I
clamda = 0.2 * lamda
dU = b - ((lamda + mu) * U)
dLf = (lamda*U) + ((clamda)*(Ls + R)) - ((u + v + mu) * Lf)
dLs = (u * Lf) - ((w + clamda + mu) * Ls)
dI = w*Ls + v*Lf - ((gamma + muTB + sigma) * I) + (rho * R)
dR = ((gamma + sigma) * I) - ((rho + clamda + mu) * R)
cI = w*Ls + v*Lf + (rho * R)
return dU, dLf, dLs, dI, dR, cI
# Initial conditions are S0, I0, R0
# Integrate the SIR equations over the time grid, t.
solve = odeint(deriv, (U0, Lf0, Ls0, I0, R0, J0), times, args=(N, beta, gamma, mu, muTB, sigma, rho, u, v, w))
U, Lf, Ls, I, R, cInc = solve.T
return I
def residual(x):
# Total population, N.
StartingPop = 1
prev= 0.4/StartingPop
return np.sum((peak_infections(x) - prev) ** 2)
x0 = [12, 0.4] #estimates for beta and gamma starting point
res = minimize(residual, x0, method="Nelder-Mead", options={'fatol':1e-04}).x
print(res)
However, where I attempt the minimizing as res, it simply returns the initial estimates in x0 that I gave it. How do I correct this code to include in the residual function, that this must be optimised for when I and J_diff reach their equilibrium states for 0.4 and 0.3?
You are overwriting your input arguments to the function 'peak_infections'. beta and gamma are being assigned the values of x[0] and x[1], respectively. But a few lines later, they are being reassigned as 15 and 2/5. No matter what you pass to the function, the result is the same. Just delete the line where you assign those values to 15 and 2/5 and you will get a result.
I have searched and referenced the code for solving the SIR model from others on this website, but the fitting effect is very poor. Is there something wrong with my data? Still what? How should I predict given new data for this SIR model?
import numpy as np
import matplotlib.pyplot as plt
from scipy import integrate, optimize
import pandas as pd
y_total = [0.0, 0.0010131712259371835, 0.0035460992907801418, 0.00911854103343465,
0.008611955420466059, 0.021783181357649443, 0.00911854103343465, 0.07852077001013172, 0.4397163120567376,
0.21681864235055726, 0.232016210739615, 0.5278622087132725, 0.13576494427558258, 0.2988855116514691, 0.37436676798378926,
0.4209726443768997, 0.544579533941236, 0.7254305977710233, 1.0, 0.7740628166160081, 0.43617021276595747, 0.48226950354609927]
x_total = range(0,22)
ydata = np.array(y_total, dtype=float)
xdata = np.array(x_total, dtype=float)
# IO + SO + R0 is always 1 regardless of "value"
I0 = 0.3
S0 = 1 - I0
R0 = 0
def sir_model(y, x, beta, gamma):
S = -beta * y[0] * y[1] / N
R = gamma * y[1]
I = -(S + R)
return S, I, R
def fit_odeint(x, beta, gamma):
return integrate.odeint(sir_model, (S0, I0, R0), x, args=(beta, gamma))[:,1]
N = 1.0
I0 = ydata[0]
S0 = N - I0
R0 = 0.0
popt, pcov = optimize.curve_fit(fit_odeint, xdata, ydata)
fitted = fit_odeint(xdata, *popt)
plt.plot(xdata, ydata, 'o')
plt.plot(xdata, fitted)
plt.show()
Make the initial infected number also a variable, which is easily done by shift the computation of the initial state into the target function
def sir_model(y, x, beta, gamma):
N = sum(y)
S = -beta * y[0] * y[1] / N
R = gamma * y[1]
I = -(S + R)
return S, I, R
def fit_odeint(x, beta, gamma, I0):
# IO + SO + R0 is always 1 regardless of "value"
S0 = 1 - I0
R0 = 0
return integrate.odeint(sir_model, (S0, I0, R0), x, args=(beta, gamma))[:,1]
popt, pcov = optimize.curve_fit(fit_odeint, xdata, ydata,(1/5,1/8,0.1))
There were some other changes, especially adding an initial guess to curve_fit. Still this gets a warning on some difficulty in odeint. But a result is reached anyway, with popt = [0.36714402, 0.04176973, 0.01311579], 1/popt = [2.72372678, 23.94078424, 76.2439491]. Changing the initial guess to values close to this, (1/3, 1/24, 0.05), eliminates the warning.
Psi in the Carr Madan method of option pricing is the Fourier transform of the option price as a function of the characteristic function.
The function psi should return a scalar as opposed to a vector which is causing me the problem. So I am looking for suggestions of alternative implementation.
import numpy as np
from scipy.special import gamma
param = [24.79, 94.45, 95.79, 0.2495, 0]
alpha = 1.1
lnS = 4.142976
r = 0.05
Sigma = 0.25
scale = 1
FFT_N = int(np.power(2,15))
uvec = np.linspace(1,FFT_N,FFT_N)
vj = (uvec-1) * 0.5
Tmt = 1/12
def psi(CF, vj, alpha, lnS, T=Tmt):
u=vj-(alpha*1j+1j)
denom = alpha**2 + alpha - Sigma**2 + vj * 2 * alpha * 1j + 1j * vj
return CF(u,lnS, Tmt)/denom
CF=lambda u, lnS, T: cf_log_cgmy(u=u, lnS=lnS, T=Tmt, mu=r, half_etasq=param[4],C=param[0], G=param[1], M=param[2], Y=param[3])
def cf_log_cgmy(u, lnS, T, mu ,half_etasq, C, G, M, Y):
omega = -C*gamma(-Y)*(np.power(M-1,Y)-np.power(M,Y)+np.power(G+1,Y)-np.power(G,Y ))
phi_CGMY = C*T*gamma(-Y)*(np.power(M-1j*u,Y)-np.power(M,Y)+np.power(G+1j*u,Y)- np.power(G,Y))
phi = 1j*u*(lnS + (mu+omega-half_etasq)*T) + phi_CGMY - half_etasq*np.power(u,2)
return np.exp(scale*phi)
psi(CF, vj, alpha, lnS, T=Tmt)
The psi function stands for equation (6) on page 64 of the attached below paper
https://engineering.nyu.edu/sites/default/files/2018-08/CarrMadan2_0.pdf