Maximising log likelihood for two parameters

Maximising log likelihood for two parameters - python

I have two parameters I'd like to maximise the log likelihood for and get the optimal parameter values. The code here calculates an ODE system, from which I will use two trajectories as x and y data:
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.optimize import minimize
import math
# Total population, N.
N = 1
# Initial number of infected and recovered individuals, I0 and R0.
I0, R0 = 0.001, 0
# Everyone else, S0, is susceptible to infection initially.
U0 = N - I0 - R0
J0 = I0
Lf0, Ls0 = 0, 0
# Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
beta, gamma = 8, 0.4
mu, muTB, sigma, rho = 1/80, 1/6, 1/6, 0.03
u, v, w = 0.88, 0.083, 0.0006
t = np.linspace(0, 500, 500+1)
# The SIR model differential equations.
def deriv(y, t, N, beta, gamma, mu, muTB, sigma, rho, u, v, w):
U, Lf, Ls, I, R, cInc = y
b = (mu * (U + Lf + Ls + R)) + (muTB * I)
lamda = beta * I
clamda = 0.2 * lamda
dU = b - ((lamda + mu) * U)
dLf = (lamda*U) + ((clamda)*(Ls + R)) - ((u + v + mu) * Lf)
dLs = (u * Lf) - ((w + clamda + mu) * Ls)
dI = w*Ls + v*Lf - ((gamma + muTB + sigma) * I) + (rho * R)
dR = ((gamma + sigma) * I) - ((rho + clamda + mu) * R)
cI = w*Ls + v*Lf + (rho * R)
return dU, dLf, dLs, dI, dR, cI
# Integrate the SIR equations over the time grid, t.
solve = odeint(deriv, (U0, Lf0, Ls0, I0, R0, J0), t, args=(N, beta, gamma, mu, muTB, sigma, rho, u, v, w))
U, Lf, Ls, I, R, cInc = solve.T
My next function simply calculates the log likelihood when values for beta and gamma are passed in, which is what I'm trying to optimise
def loglik(beta, gamma):
solve = odeint(deriv, (U0, Lf0, Ls0, I0, R0, J0), t, args=(N, beta, gamma, mu, muTB, sigma, rho, u, v, w))
U, Lf, Ls, I, R, cInc = solve.T #get trajectories
muPrev, sigmaPrev = I[-1]*100000, 40 #I (prevalence)
muInc, sigmaInc = (cInc[1:] - cInc[:-1])[-1]*100000, 30 #cInc (incidence)
n = 10000
# logPrev = np.random.lognormal(np.log((muPrev**2) / (muPrev**2 + sigmaPrev**2)**0.5), (np.log(1 + (sigmaPrev**2 / muPrev**2)))**0.5, n) #lognormal
# logInc = np.random.lognormal(np.log((muInc**2) / (muInc**2 + sigmaInc**2)**0.5), (np.log(1 + (sigmaInc**2 / muInc**2)))**0.5, n) #lognormal
xPrev = I[-1]*100000 #value of x in formula for log of pdf
xInc = (cInc[1:] - cInc[:-1])[-1]*100000 #value of x in formula for log of pdf
logmuPrev = np.log((muPrev**2) / (muPrev**2 + sigmaPrev**2)**0.5) #lognormal params
logsdPrev = (np.log(1 + (sigmaPrev**2 / muPrev**2)))**0.5
logmuInc = np.log((muInc**2) / (muInc**2 + sigmaInc**2)**0.5)#lognormal params
logsdInc = (np.log(1 + (sigmaInc**2 / muInc**2)))**0.5
L_prev = -0.5*((np.log(xPrev) - logmuPrev) / logsdPrev)**2 - np.log(xPrev * logsdPrev * (2*math.pi)**0.5) #log of pdf for prev and inc
L_inc = -0.5*((np.log(xInc) - logmuInc) / logsdInc)**2 - np.log(xInc * logsdInc * (2*math.pi)**0.5)
logsum = L_prev + L_inc #summing logs
np.exp(logsum) #exp for likelihood
return np.exp(logsum)
Finally, I try to optimise, but my arrays have different shapes so I cannot optimise this. How do I fix this so I can correctly maximise the log likelihood?
x = I[:-1]
y = cInc[1:] - cInc[:-1]
lik_model = minimize(loglik, 8, 0.4, method='L-BFGS-B')
----edit----
I have managed to get the minimise to run, but it is just returning the 8 and 0.4 values I fed into it?
results:
fun: 0.00013295432301190784
hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64>
jac: array([-5.75331835e-08, 6.87479045e-07])
message: 'CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL'
nfev: 3
nit: 0
njev: 1
status: 0
success: True
x: array([8. , 0.4])

Related

Plotting two curves that start at different times

Using matploblib I wish to plot one curve which starts at time 0 and runs for say 500 units of time, and then another curve which starts after the first curve has flat lined, and runs for another 500 units of time. My code produces the plot like this, I want the red curve to begin at around time 500.
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.optimize import minimize
import math
# Total population, N.
N = 1
# Initial number of infected and recovered individuals, I0 and R0.
I0, R0 = 0.001, 0
# Everyone else, S0, is susceptible to infection initially.
U0 = N - I0 - R0
J0 = I0
Lf0, Ls0 = 0, 0
# Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
beta, gamma = 8, 0.4
int_gamma = 0.8
mu, muTB, sigma, rho = 1/80, 1/6, 1/6, 0.03
u, v, w = 0.88, 0.083, 0.0006
t = np.linspace(0, 1000, 1000+1)
# The SIR model differential equations.
def deriv(y, t, N, beta, gamma, mu, muTB, sigma, rho, u, v, w):
U, Lf, Ls, I, R, cInc = y
b = (mu * (U + Lf + Ls + R)) + (muTB * I)
lamda = beta * I
clamda = 0.2 * lamda
dU = b - ((lamda + mu) * U)
dLf = (lamda*U) + ((clamda)*(Ls + R)) - ((u + v + mu) * Lf)
dLs = (u * Lf) - ((w + clamda + mu) * Ls)
dI = w*Ls + v*Lf - ((gamma + muTB + sigma) * I) + (rho * R)
dR = ((gamma + sigma) * I) - ((rho + clamda + mu) * R)
cI = w*Ls + v*Lf + (rho * R)
return dU, dLf, dLs, dI, dR, cI
# Integrate the SIR equations over the time grid, t.
solve = odeint(deriv, (U0, Lf0, Ls0, I0, R0, J0), t, args=(N, beta, gamma, mu, muTB, sigma, rho, u, v, w))
U, Lf, Ls, I, R, cInc = solve.T
# The SIR model differential equations.
def derivint(y, t, N, beta, int_gamma, mu, muTB, sigma, rho, u, v, w):
U, Lf, Ls, I, R, cInc = y
b = (mu * (U + Lf + Ls + R)) + (muTB * I)
lamda = beta * I
clamda = 0.2 * lamda
dU = b - ((lamda + mu) * U)
dLf = (lamda*U) + ((clamda)*(Ls + R)) - ((u + v + mu) * Lf)
dLs = (u * Lf) - ((w + clamda + mu) * Ls)
dI = w*Ls + v*Lf - ((int_gamma + muTB + sigma) * I) + (rho * R)
dR = ((int_gamma + sigma) * I) - ((rho + clamda + mu) * R)
cI = w*Ls + v*Lf + (rho * R)
return dU, dLf, dLs, dI, dR, cI
# Integrate the SIR equations over the time grid, t.
solveint = odeint(derivint, (U0, Lf0, Ls0, I0, R0, J0), t, args=(N, beta, int_gamma, mu, muTB, sigma, rho, u, v, w))
Uint, Lfint, Lsint, Iint, Rint, cIncint = solveint.T
J_diff = cInc[1:] - cInc[:-1]
J_diffint = cIncint[1:] - cIncint[:-1]
#J_diff = np.diff(cInc)
fig = plt.figure(facecolor='w')
ax = fig.add_subplot(111, facecolor='#dddddd', axisbelow=True)
#ax.plot(t, U*100000, 'black', alpha=1, lw=2, label='uninfected')
#ax.plot(t, Lf/100000, 'r', alpha=1, lw=2, label='latent fast')
#ax.plot(t, Ls/100000, 'black', alpha=1, lw=2, label='latent slow')
#ax.plot(t, I*100000, 'green', alpha=1, lw=2, label='infected')
#ax.plot(t, R*100000, 'red', alpha=1, lw=2, label='recovered')
ax.plot(t[1:], J_diff*100000, 'blue', alpha=1, lw=2, label='incidence')
ax.plot(t[1:], J_diffint*100000, 'red', alpha=1, lw=2, label='intervention incidence')
#ax.plot(t, cInc, 'red', alpha=1, lw=2, label='Prevalence')
ax.set_xlabel('Time in years')
ax.set_ylabel('Number')
ax.grid(b=True, which='major', c='w', lw=2, ls='-')
legend = ax.legend()
legend.get_frame().set_alpha(0.5)
plt.show()

Should you just add 500 to the x values of the second curve?
ax.plot(t[1:]+500, J_diffint*100000, 'red', alpha=1, lw=2, label='intervention incidence')
Output:

Storing the output from a for loop into a list

My code solves an ODE system using two random values of randomly generated parameters from a list. I put this in a for loop to generate some 50 outputs using those parameter values. I use an if loop within my for loop, to set conditions so that if the values calculated fall within a range, it prints an acceptance message, else print a rejection message. However, what I want is this information (the output values, and the corresponding param values used to generate those outputs) to be stored in memory, for example within a list. However my method only saves the last value in the list, or does not save anything in the list at all. Depending on how I position code within the loops, it will end up printing various elements 50 times in a row, which I do not want. My code is this:
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.optimize import minimize
beta_samples = np.random.uniform(0, 30, 50)
gamma_samples = np.random.uniform(0, 2, 50)
for i, j in zip(beta_samples, gamma_samples):
# Total population, N.
N = 1
# Initial number of infected and recovered individuals, I0 and R0.
I0, R0 = 0.001, 0
# Everyone else, S0, is susceptible to infection initially.
U0 = N - I0 - R0
J0 = I0
Lf0, Ls0 = 0, 0
# Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
beta, gamma = i, j
mu, muTB, sigma, rho = 1/80, 1/6, 1/6, 0.03
u, v, w = 0.88, 0.083, 0.0006
t = np.linspace(0, 500, 500+1)
# The SIR model differential equations.
def deriv(y, t, N, beta, gamma, mu, muTB, sigma, rho, u, v, w):
U, Lf, Ls, I, R, cInc = y
b = (mu * (U + Lf + Ls + R)) + (muTB * I)
lamda = beta * I
clamda = 0.2 * lamda
dU = b - ((lamda + mu) * U)
dLf = (lamda*U) + ((clamda)*(Ls + R)) - ((u + v + mu) * Lf)
dLs = (u * Lf) - ((w + clamda + mu) * Ls)
dI = w*Ls + v*Lf - ((gamma + muTB + sigma) * I) + (rho * R)
dR = ((gamma + sigma) * I) - ((rho + clamda + mu) * R)
cI = w*Ls + v*Lf + (rho * R)
return dU, dLf, dLs, dI, dR, cI
# Integrate the SIR equations over the time grid, t.
solve = odeint(deriv, (U0, Lf0, Ls0, I0, R0, J0), t, args=(N, beta, gamma, mu, muTB, sigma, rho, u, v, w))
U, Lf, Ls, I, R, cInc = solve.T
if 320 < I[-1]*100000 < 480 and 240 < (cInc[1:] - cInc[:-1])[-1]*100000 < 360:
acc = [320 < I[-1]*100000 < 480]
acc.append(320 < I[-1]*100000 < 480)
print('for beta of', beta, 'and gamma of', gamma, 'pprevalence is ', I[-1]*100000, 'incidence is ', (cInc[1:] - cInc[:-1])[-1]*100000)
else:
rejected.append(beta_samples)
print('values of', beta, 'and gamma of', gamma, 'rejected')
Where is my code going wrong? I simply want a list of the values that come under the 'if' conditions, and those coming under the 'else' conditions

Estimating parameters with scipy minimize with unorthodox observed data

I wish to minimize the parameters beta and gamma in this model. However, my observed data isnt in the form of a time series. The values I want to estimate are for when two certain trajectories have equilibrium values. Namely, when equilibrium values for I (prevalence) and J_diff (incidence) reach 0.4 and 0.3 respectively. My code is as follows:
def peak_infections(x):
# Total population, N.
N = 1
# Initial number of infected and recovered individuals, I0 and R0.
I0, R0 = 0.001, 0
# Everyone else, S0, is susceptible to infection initially.
beta = x[0]
gamma = x[1]
U0 = N - I0 - R0
J0 = I0
Lf0, Ls0 = 0, 0
# Contact rate, beta, and mean recovery rate, gamma, (in 1/years).
beta, gamma = 15, 2/5
mu, muTB, sigma, rho = 1/80, 1/6, 1/6, 0.03
u, v, w = 0.083, 0.88, 0.0006
# A grid of time points
times = np.arange(0,20,2.5)
def deriv(y, times, N, beta, gamma, mu, muTB, sigma, rho, u, v, w):
U, Lf, Ls, I, R, cInc = y
b = (mu * (U + Lf + Ls + R)) + (muTB * I)
lamda = beta * I
clamda = 0.2 * lamda
dU = b - ((lamda + mu) * U)
dLf = (lamda*U) + ((clamda)*(Ls + R)) - ((u + v + mu) * Lf)
dLs = (u * Lf) - ((w + clamda + mu) * Ls)
dI = w*Ls + v*Lf - ((gamma + muTB + sigma) * I) + (rho * R)
dR = ((gamma + sigma) * I) - ((rho + clamda + mu) * R)
cI = w*Ls + v*Lf + (rho * R)
return dU, dLf, dLs, dI, dR, cI
# Initial conditions are S0, I0, R0
# Integrate the SIR equations over the time grid, t.
solve = odeint(deriv, (U0, Lf0, Ls0, I0, R0, J0), times, args=(N, beta, gamma, mu, muTB, sigma, rho, u, v, w))
U, Lf, Ls, I, R, cInc = solve.T
return I
def residual(x):
# Total population, N.
StartingPop = 1
prev= 0.4/StartingPop
return np.sum((peak_infections(x) - prev) ** 2)
x0 = [12, 0.4] #estimates for beta and gamma starting point
res = minimize(residual, x0, method="Nelder-Mead", options={'fatol':1e-04}).x
print(res)
However, where I attempt the minimizing as res, it simply returns the initial estimates in x0 that I gave it. How do I correct this code to include in the residual function, that this must be optimised for when I and J_diff reach their equilibrium states for 0.4 and 0.3?

You are overwriting your input arguments to the function 'peak_infections'. beta and gamma are being assigned the values of x[0] and x[1], respectively. But a few lines later, they are being reassigned as 15 and 2/5. No matter what you pass to the function, the result is the same. Just delete the line where you assign those values to 15 and 2/5 and you will get a result.

SciPy optimization

I have been attempting to calibrate my model, but I am running into issues with scipy.optimize module. I have tried various scipy optimizers, but they all return the error "TypeError: can only concatenate tuple (not "list") to tuple". Does anyone know how to resolve this issue? Thank you for your time.
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from numba import jit, njit,float64
from scipy.optimize import fmin_slsqp
i=complex(0,1)
sigma, kappa, theta, volvol, rho = 0.1, 0.1, 0.1, 0.1, 0.1
params=[sigma, kappa, theta, volvol, rho]
strikes=[4650,4655,4660,4665,4670]
maturities=[1/48,2/48,3/48,1/12,5/48]
marketPrices=[70.00,66.70,63.50,60.35,57.30,82.50,79.20,76.0,72.80,69.70,92.65,89.35,86.10,82.90,79.75,101.60,98.30,95.10,91.90,88.75,109.85,106.60,103.35,100.20,97.00]
marketPrices=np.array(marketPrices)
rates=[0.05,0.05,0.05,0.05,0.05]
St=4662
np.shape(marketPrices)
#jit
def fHeston(s, St, K, r, T, sigma, kappa, theta, volvol, rho):
# To be used a lot
prod = rho * sigma *i *s
# Calculate d
d1 = (prod - kappa)**2
d2 = (sigma**2) * (i*s + s**2)
d = np.sqrt(d1 + d2)
# Calculate g
g1 = kappa - prod - d
g2 = kappa - prod + d
g = g1/g2
# Calculate first exponential
exp1 = np.exp(np.log(St) * i *s) * np.exp(i * s* r* T)
exp2 = 1 - g * np.exp(-d *T)
exp3 = 1- g
mainExp1 = exp1 * np.power(exp2/ exp3, -2 * theta * kappa/(sigma **2))
# Calculate second exponential
exp4 = theta * kappa * T/(sigma **2)
exp5 = volvol/(sigma **2)
exp6 = (1 - np.exp(-d * T))/(1 - g * np.exp(-d * T))
mainExp2 = np.exp((exp4 * g1) + (exp5 *g1 * exp6))
return (mainExp1 * mainExp2)
#jit(forceobj=True)
def priceHestonMid(St, K, r, T, sigma, kappa, theta, volvol, rho):
P, iterations, maxNumber = 0,1000,100
ds = maxNumber/iterations
element1 = 0.5 * (St - K * np.exp(-r * T))
# Calculate the complex integral
# Using j instead of i to avoid confusion
for j in range(1, iterations):
s1 = ds * (2*j + 1)/2
s2 = s1 - i
numerator1 = fHeston(s2, St, K, r, T, sigma, kappa, theta, volvol, rho)
numerator2 = K * fHeston(s1, St, K, r, T, sigma, kappa, theta, volvol, rho)
denominator = np.exp(np.log(K) * i * s1) *i *s1
P = P + ds *(numerator1 - numerator2)/denominator
element2 = P/np.pi
return np.real((element1 + element2))
# vectorify
def strikematurePriceHestonMid(St, W, r, Q, sigma, kappa, theta, volvol, rho):
stuff=[]
volsur=[]
e=0
p=0
for p in range(5):
for e in range(5):
stuff.append(priceHestonMid(St, W[e], r, Q[p], sigma, kappa, theta, volvol, rho))
#volsur[e][p]=stuff[4*p::4*p+4]
#print(priceHestonMid(St, W[e], r, Q[p], sigma, kappa, theta, volvol, rho))
volsur=np.reshape(stuff,(5,5))
stuff=np.array(stuff)
return stuff
def calibratorHeston(St, initialValues = [0.5,0.5,0.5,0.5,-0.5],
lowerBounds = [1e-2,1e-2,1e-2,1e-2,-1],
upperBounds = [10,10,10,10,0]):
objectiveFunctionHeston = ((marketPrices) - (strikematurePriceHestonMid(St, strikes,
rates[0],
maturities,
sigma,
kappa,
theta,
volvol,
rho))).sum()
result=fmin_slsqp(objectiveFunctionHeston,initialValues,args=params)
return result
calibratorHeston(4662)
UPDATE:
I was able to figure out how to get it done, I am still not sure why it was not working before nonetheless I got it working with SciPy. Thank you all.
from scipy.optimize import minimize
def objectiveFunctionHeston(x,St, strikes,rates, maturities):
objective = ((marketPrices)-(strikematurePriceHestonMid(St, strikes,
rates,
maturities,
sigma=x[0],
kappa=x[1],
theta=x[2],
volvol=x[3],
rho=x[4])))/marketPrices
objective=np.square(np.dot(objective,objective))
return objective
bounds=((1e-2,5),(1e-2,8),(1e-2,10),(1e-2,10),(-1,1))
res = minimize(objectiveFunctionHeston, method='SLSQP', x0=[sigma, kappa, theta, volvol, rho],args=(St,strikes,rates[0],maturities),
bounds = bounds, tol=1e-20,
options={"maxiter":1000})
print(res)

Solving system of coupled differential equations using Runge-Kutta in python

This python code can solve one non- coupled differential equation:
import numpy as np
import matplotlib.pyplot as plt
import numba
import time
start_time = time.clock()
#numba.jit()
# A sample differential equation "dy / dx = (x - y**2)/2"
def dydx(x, y):
return ((x - y**2)/2)
# Finds value of y for a given x using step size h
# and initial value y0 at x0.
def rungeKutta(x0, y0, x, h):
# Count number of iterations using step size or
# step height h
n = (int)((x - x0)/h)
# Iterate for number of iterations
y = y0
for i in range(1, n + 1):
"Apply Runge Kutta Formulas to find next value of y"
k1 = h * dydx(x0, y)
k2 = h * dydx(x0 + 0.5 * h, y + 0.5 * k1)
k3 = h * dydx(x0 + 0.5 * h, y + 0.5 * k2)
k4 = h * dydx(x0 + h, y + k3)
# Update next value of y
y = y + (1.0 / 6.0)*(k1 + 2 * k2 + 2 * k3 + k4)
# Update next value of x
x0 = x0 + h
return y
def dplot(start,end,steps):
Y=list()
for x in np.linspace(start,end,steps):
Y.append(rungeKutta(x0, y, x , h))
plt.plot(np.linspace(start,end,steps),Y)
print("Execution time:",time.clock() - start_time, "seconds")
plt.show()
start,end = 0, 10
steps = end* 100
x0 = 0
y = 1
h = 0.002
dplot(start,end,steps)
This code can solve this differential equation:
dydx= (x - y**2)/2
Now I have a system of coupled differential equations:
dydt= (x - y**2)/2
dxdt= x*3 + 3y
How can I implement these two as a system of coupled differential equations in the above code?
Is there any more generalized way for system of n-number of coupled differential equations?

With the help of others, I got to this:
import numpy as np
from math import sqrt
import matplotlib.pyplot as plt
import numba
import time
start_time = time.clock()
a=1
b=1
c=1
d=1
# Equations:
#numba.jit()
#du/dt=V(u,t)
def V(u,t):
x, y, vx, vy = u
return np.array([vy,vx,a*x+b*y,c*x+d*y])
def rk4(f, u0, t0, tf , n):
t = np.linspace(t0, tf, n+1)
u = np.array((n+1)*[u0])
h = t[1]-t[0]
for i in range(n):
k1 = h * f(u[i], t[i])
k2 = h * f(u[i] + 0.5 * k1, t[i] + 0.5*h)
k3 = h * f(u[i] + 0.5 * k2, t[i] + 0.5*h)
k4 = h * f(u[i] + k3, t[i] + h)
u[i+1] = u[i] + (k1 + 2*(k2 + k3 ) + k4) / 6
return u, t
u, t = rk4(V, np.array([1., 0., 1. , 0.]) , 0. , 10. , 100000)
x,y, vx,vy = u.T
# plt.plot(t, x, t,y)
plt.semilogy(t, x, t,y)
plt.grid('on')
print("Execution time:",time.clock() - start_time, "seconds")
plt.show()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Maximising log likelihood for two parameters - python

Related

Plotting two curves that start at different times

Storing the output from a for loop into a list

Estimating parameters with scipy minimize with unorthodox observed data

SciPy optimization

Solving system of coupled differential equations using Runge-Kutta in python

Categories

Resources