Illogical parameters returned by scipy.curve_fit - python

I'm modelling a ball falling through fluid in Python and fitting the model function to a set of data points using the damping coefficients (a and b) and the density of the fluid, but the fitted value for the fluid density is coming back negative and I have no idea what is wrong in the code. My code is below:
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import odeint
from scipy.optimize import curve_fit
##%%Parameters and constants
m = 0.1 #mass of object in kg
g = 9.81 #acceleration due to gravity in m/s**2
rho = 700 #density of object in kg/m**3
v0 = 0 #velocity at t=0
y0 = 0 #position at t=0
V = m / rho #volume in cubic meters
r = ((3/4)*(V/np.pi))**(1/3) #radius of the sphere
asample = 0.0001 #sample value for a
bsample = 0.0001 #sample value for b
#%%Defining integrating function
##fuction => y'' = g*(1-(rhof/rho))-((a/m)y'+(b/m)y'**2)
## y' = v
## v' = g*(1-rhof/rho)-((a/m)v+(b/m)v**2)
def sinkingball(Y, time, a, b, rhof):
return [Y[1],(1/m)*(V*g*(rho-rhof)-a*Y[1]-b*(Y[1]**2))]
def balldepth(time, a, b, rhof):
solutions = odeint(sinkingball, [y0,v0], time, args=(a, b, rhof))
return solutions[:,0]
time = np.linspace(0,15,151)
# imported some experimental values and named the array data
a, b, rhof = curve_fit(balldepth, time, data, p0=(asample, bsample, 100))[0]
print(a,b,rhof)

Providing the output you actually get would be helpful, and the comment about time not being used by sinkingball() is worth following.
You might find lmfit (https://lmfit.github.io/lmfit-py) useful. This provides a higher-level interface to curve-fitting that allows, among other things, placing bounds on parameters so that they can remain physically sensible. I think your problem would translate from curve_fit to lmfit as:
from lmfit import Model
def balldepth(time, a, b, rhof):
solutions = odeint(sinkingball, [y0,v0], time, args=(a, b, rhof))
return solutions[:,0]
# create a model based on the model function "balldepth"
ballmodel = Model(balldepth)
# create parameters, which will be named using the names of the
# function arguments, and provide initial values
params = bollmodel.make_params(a=0.001, b=0.001, rhof=100)
# you wanted rhof to **not** vary in the fit:
params['rhof'].vary = False
# set min/max values on `a` and `b`:
params['a'].min = 0
params['b'].min = 0
# run the fit
result = ballmodel.fit(data, params, time=time)
# print out full report of results
print(result.fit_report())
# get / print out best-fit parameters:
for parname, param in result.params.items():
print("%s = %f +/- %f" % (parname, param.value, param.stderr))

Related

how to correct leastsq() missing 1 required positional argument: 'x0'

A= df4["v_Acc"] #Output of the model
a=1.5 # Acceleration parameter
v=df4["v_Vel"] # Current speed
vo=20 # desired speed
s=df4["Space_Headway"] # Actual distance
so=2.0 # minimum distance gap
del_v=df4["v_Vel"]-df4["leader_Vel"] # relative speed =V_f - V_l
b=2.0 # comfortable deceleration
T=1 # time gap
delta=4 # Acceleration Exponent
import math
sx = so + v*T + (v*del_v)/(2* math.pow(a*b,0.5)) # desired distance
def residual(a,vo,so,b,T,A,v,del_v,s,delta):
model = a(1-((v/vo)**delta- ((sx/s)**2)))
return model-A
from scipy.optimize import leastsq
out = leastsq(residual, args=(a,vo,so,b,T,A,v,del_v,s,delta) )
I am trying to calibrate the IDM model with the NGSIM data. I was trying to find the values of the parameter a, b, T, so, vo by taking the initial guess as shown. However I am not able to find the error in my least square method. What wrong am I doing here
v=df4["v_Vel"].to_numpy()
s=df4["Space_Headway"].to_numpy()
del_v=(df4["v_Vel"]-df4["leader_Vel"]).to_numpy()
A= df4["v_Acc"].to_numpy()
def fcn2min(params, v,s,del_v, A):
vo = params['vo']
T = params['T']
so = params['so']
a = params['a']
b = params['b']
sx = so + v*T + (v*del_v)/(2*(math.sqrt(a*b)))
model = a*(1-((v/vo)**4- ((sx/s)**2)))
return (model-A)
from lmfit import Minimizer, Parameters, report_fit
params = Parameters()
params.add('vo', value=12,min=10,max=30)
params.add('T', value=1.2,min=1,max=5)
params.add('so', value=2.5,min=2,max=6)
params.add('a', value=2.1,min=2,max=6)
params.add('b', value=0.5,min=0.5,max=4)
minner = Minimizer(fcn2min, params, fcn_args=(v,s,del_v,A))
res=minner.minimize()

Python curve fitting with constraints

I have been looking for Python curve fitting with constraints. One of the option is to use lmfit module and another option is to use penalization to enforce the constraints. I have following code in which I am trying to enforce a+b=3.6 as the constraint. In other words, y=3.6 when x=1 and x is always >=1 in my case.
import numpy as np
import scipy.optimize as sio
def func(x, a, b, c):
return a+b*x**c
x = [1, 2, 4, 8, 16]
y = [3.6, 3.96, 4.31, 5.217, 6.842]
lb = np.ones(3, dtype=float)
ub = np.ones(3, dtype=float)*10.
popt, pcov = sio.curve_fit(func, x, y)
print(popt)
Ideally, I would like to use the lmfit approach and spent good amount of trying to understand examples but could not succeed. Can someone help with an example?
If I understand your question correctly, you want to model some data with
def func(x, a, b, c):
return a+b*x**c
and for a particular set of data you want to impose the constraint that a+b=3.6. You could, just "hardwire that in", changing the function to be
def func2(x, b, c):
a = 3.6 - b
return a+b*x**c
and now you have a model function with only two variables: b, and c.
That would not be very flexible but would work.
Using lmfit gives back some of that flexibility. To do a completely unconstrained fit, you would say
from lmfit import Model
mymodel = Model(func)
params = mymodel.make_params(a=2, b=1.6, c=0.5)
result = mymodel.fit(y, params, x=x)
(Just as an aside: scipy.optimize.curve_fit permits you to not specify initial values for the parameters and implicitly sets them to 1 without telling you. This is a terrible mis-feature - always give initial values).
If you do want to impose the constraint a+b=3.6, you could then do
params['a'].expr = '3.6-b'
result2 = mymodel.fit(y, params, x=x)
print(result2.fit_report())
When I do that with the data you provided, this prints (note that it reports 2 variables, not 3):
[[Model]]
Model(func)
[[Fit Statistics]]
# fitting method = leastsq
# function evals = 34
# data points = 5
# variables = 2
chi-square = 0.01066525
reduced chi-square = 0.00355508
Akaike info crit = -26.7510142
Bayesian info crit = -27.5321384
[[Variables]]
a: 3.28044833 +/- 0.04900625 (1.49%) == '3.6-b'
b: 0.31955167 +/- 0.04900626 (15.34%) (init = 1.6)
c: 0.86901253 +/- 0.05281279 (6.08%) (init = 0.5)
[[Correlations]] (unreported correlations are < 0.100)
C(b, c) = -0.994
Your code hinted at using (but did not actually use) upper and lower bounds for the parameter values. Those are also possible with lmfit, as with
params['b'].min = 1
params['b'].min = 10
and so forth. I'm not sure you need them here, and would caution against trying to set bounds too tightly.

Solving an Involved Set of Coupled Differential Equations

I am trying to solve a set of complicated differential equations in Python. These equations contain five functions (defined in the function 'ODEs' in the code below) that are functions of a variable n (greek name is eta--- I used n and eta interchangeably as variable names). These coupled differential equations contain a function (called a) which is a function of a parameter t. n (or eta) is also a function of t and so my first goal was to express, again numerically, the function a as a function of n (eta). Hence I had to solve a less involved pair of differential equations, which I defined in the function 'coupleODE'. I got a plot of a(t) and n(t) and used interpolation to get a model relating function a to function n. This function is a numerical estimation for a(n). I called this interpolation model 'f_quad' --- f, means function, and quad represents quadratic interpolation.
Now, the original five differential equations actually contain a'(n)/a(n), where ' is derivative with respect to n. I numerically found an interpolator for this as well and called it 'deriv_quad'.
Now in 'ODEs' I am modelling the five differential equations, and as you can see in the code, the function body contains segments that use the interpolators 'f_quad' and 'deriv_quad'; these have argument n, which represents the a(n) and a'(n) respectively. I then use odeint python function to numerically solve these differential equations, but I get the error message:
'A value in x_new is above the interpolation range.' My computer says that the error occurred in the line 'f = odeint(ODEs, initials, n_new, atol=1.0e-8, rtol=1.0e-6)'.
I am not sure why this happened; I used the same eta values that I used when finding the interpolators 'f_quad' and 'deriv_quad'. Can anyone help me get rid of this error?
Here is the code:
import numpy as np
from scipy.misc import derivative
from scipy.integrate import odeint
from scipy.interpolate import interp1d
import matplotlib.pyplot as plt
import math
def coupledODE(x,t):
#define important constants
m = 0.315
r = 0.0000926
d = 0.685
H_0 = 67.4
a = x[0]
eta = x[1]
dndt = a**(-1)
dadt = H_0 * (m*a**(-1) + r*a**(-2) + d*a**2)**(1/2)
return [dadt, dndt]
#initial condtions
x0 = [1e-7, 1e-8]
t = np.linspace(0,0.01778301154,7500)
x = odeint(coupledODE, x0, t, atol=1.0e-8, rtol=1.0e-6) #vector of the functions a(t), n(t)
a = x[:,0]
n = x[:,1] #Eta; n is the greek letter eta
plt.semilogx(t,a)
plt.xlabel('time')
plt.ylabel('a(t)')
plt.show()
plt.semilogx(t,n)
plt.xlabel('time')
plt.ylabel('Eta')
plt.show()
plt.plot(n,a)
plt.xlabel('Eta')
plt.ylabel('a(t)')
plt.show()
##############################################################################
# Calculate the Derivative a' (i.e. da/d(eta) Numerically
##############################################################################
derivative_values = []
for i in range(7499):
numerator = x[i+1,0] - x[i,0]
denominator = x[i+1,1] - x[i,1]
ratio = numerator / denominator
derivative_values.append(ratio)
x_axis = n
x_axis = np.delete(x_axis, -1)
plt.plot(x_axis,derivative_values)
plt.xlabel('Eta')
plt.ylabel('Derivative of a')
plt.show()
##############################################################################
#Interpolation
##############################################################################
#Using quadratic interpolation
f_quad = interp1d(n, a, kind = 'quadratic')
deriv_quad = interp1d(x_axis, derivative_values, kind = 'quadratic')
n_new = np.linspace(1.0e-8, 0.0504473, num = 20000, endpoint = True)
plt.plot(n_new, f_quad(n_new))
plt.xlabel('Eta')
plt.ylabel('a')
plt.show()
plt.plot(n_new, deriv_quad(n_new))
plt.xlabel('Eta')
plt.ylabel('Derivative of a')
plt.show()
#print(x[0,1])
#print(eta_new[0])
#print(deriv_quad(1e-8))
##############################################################################
# The Main Coupled Equations
##############################################################################
def ODEs(x,n):
fourPiG_3 = 1
CDM_0 = 1
Rad_0 = 1
k = 0.005
Phi = x[0]
Theta1 = x[1]
iSpeed = x[2]
DeltaC = x[3]
Theta0 = x[4]
dPhi_dn = fourPiG_3 *((CDM_0 * DeltaC)/ deriv_quad(n) + (4*Rad_0)/(f_quad(n)*deriv_quad(n))) -
(k**2*Phi*f_quad(n))/(deriv_quad(n)) - (deriv_quad(n) * Phi)/(f_quad(n))
dTheta1_dn = (k/3)*(Theta0 - Phi)
diSpeed_dn = -k*Phi - (deriv_quad(n)/f_quad(n))*iSpeed
dDeltaC_dn = -k*iSpeed - 3*dPhi_dn
dTheta0_dn = -k*Theta1 - dPhi_dn
return [dPhi_dn, dTheta1_dn, diSpeed_dn, dDeltaC_dn, dTheta0_dn]
hub_0 = deriv_quad(1e-8)/f_quad(1e-8)
#Now the initial conditions
Phi_k_0 = 1.0e-5
Theta1_k_0 = -1*Phi_k_0/hub_0 #Ask about the k
iSpeed_k_0 = 3*Theta1_k_0
DeltaC_k_0 = 1.5 * Phi_k_0
Theta0_k_0 = 0.5 * Phi_k_0
initials = [Phi_k_0, Theta1_k_0, iSpeed_k_0, DeltaC_k_0, Theta0_k_0]
####Error Happens Here ####
f = odeint(ODEs, initials, n_new, atol=1.0e-8, rtol=1.0e-6)
Phi = f[:,0]
Theta1 = f[:,1]
iSpeed = f[:,2]
DeltaC = f[:,3]
Theta0 = f[:,4]

Difference in result between fmin and fminsearch in Matlab and Python

My objective is to perform an Inverse Laplace Transform on some decay data (NMR T2 decay via CPMG). For that, we were provided with the CONTIN algorithm. This algorithm was adapted to Matlab by Iari-Gabriel Marino, and it works very well. I want to adapt this code into Python. The core of the problem is with scipy.optimize.fmin, which is not minimizing the mean square deviation (MSD) in any way similar to Matlab's fminsearch. The latter results in a good minimization, while the former doesn't.
I have gone through line by line of my adapted code in Python, and the original Matlab. I checked every matrix and every output. I used this to identify that the critical point is in fmin. I also tried scipy.optimize.minimize and other minimization algorithms, but none gave even remotely satisfactory results.
I have made two MWE, for Python and Matlab, to make it reproducible to all. The example data were obtained from the documentation of the matlab function. Apologies if this is long code, but I don't really know how to shorten it without sacrificing readability and clarity. I tried to have the lines match as closely as possible. I am using Python 3.7.3, scipy v1.3.0, numpy 1.16.2, Matlab R2018b, on Windows 8.1. It's a relatively recent Anaconda install (<2 months).
My code:
import numpy as np
from scipy.optimize import fmin
import matplotlib.pyplot as plt
def msd(g, y, A, alpha, R, w, constraints):
""" msd: mean square deviation. This is the function to be minimized by fmin"""
if 'zero_at_extremes' in constraints:
g[0] = 0
g[-1] = 0
if 'g>0' in constraints:
g = np.abs(g)
r = np.diff(g, axis=0, n=2)
yfit = A # g
# Sum of weighted square residuals
VAR = np.sum(w * (y - yfit) ** 2)
# Regularizor
REG = alpha ** 2 * np.sum((r - R # g) ** 2)
# output to be minimized
return VAR + REG
# Objective: match this distribution
g0 = np.array([0, 0, 10.1625, 25.1974, 21.8711, 1.6377, 7.3895, 8.736, 1.4256, 0, 0]).reshape((-1, 1))
s0 = np.logspace(-3, 6, len(g0)).reshape((-1, 1))
t = np.linspace(0.01, 500, 100).reshape((-1, 1))
sM, tM = np.meshgrid(s0, t)
A = np.exp(-tM / sM)
np.random.seed(1)
# Creates data from the initial distribution with some random noise.
data = (A # g0) + 0.07 * np.random.rand(t.size).reshape((-1, 1))
# Parameters and function start
alpha = 1E-2 # regularization parameter
s = np.logspace(-3, 6, 20).reshape((-1, 1)) # x of the ILT
g0 = np.ones(s.size).reshape((-1, 1)) # guess of y of ILT
y = data # noisy data
options = {'maxiter':1e8, 'maxfun':1e8} # for the fmin function
constraints=['g>0', 'zero_at_extremes'] # constraints for the MSD function
R=np.zeros((len(g0) - 2, len(g0)), order='F') # Regularizor
w=np.ones(y.reshape(-1, 1).size).reshape((-1, 1)) # Weights
sM, tM = np.meshgrid(s, t, indexing='xy')
A = np.exp(-tM/sM)
g0 = g0 * y.sum() / (A # g0).sum() # Makes a "better guess" for the distribution, according to algorithm
print('msd of input data:\n', msd(g0, y, A, alpha, R, w, constraints))
for i in range(5): # Just for testing. If this is extremely high, ~1000, it's still bad.
g = fmin(func=msd,
x0 = g0,
args=(y, A, alpha, R, w, constraints),
**options,
disp=True)[:, np.newaxis]
msdfit = msd(g, y, A, alpha, R, w, constraints)
if 'zero_at_extremes' in constraints:
g[0] = 0
g[-1] = 0
if 'g>0' in constraints:
g = np.abs(g)
g0 = g
print('New guess', g)
print('Final msd of g', msdfit)
# Visualize the fit
plt.plot(s, g, label='Initial approximation')
plt.plot(np.logspace(-3, 6, 11), np.array([0, 0, 10.1625, 25.1974, 21.8711, 1.6377, 7.3895, 8.736, 1.4256, 0, 0]), label='Distribution to match')
plt.xscale('log')
plt.legend()
plt.show()
Matlab:
% Objective: match this distribution
g0 = [0 0 10.1625 25.1974 21.8711 1.6377 7.3895 8.736 1.4256 0 0]';
s0 = logspace(-3,6,length(g0))';
t = linspace(0.01,500,100)';
[sM,tM] = meshgrid(s0,t);
A = exp(-tM./sM);
rng(1);
% Creates data from the initial distribution with some random noise.
data = A*g0 + 0.07*rand(size(t));
% Parameters and function start
alpha = 1e-2; % regularization parameter
s = logspace(-3,6,20)'; % x of the ILT
g0 = ones(size(s)); % initial guess of y of ILT
y = data; % noisy data
options = optimset('MaxFunEvals',1e8,'MaxIter',1e8); % constraints for fminsearch
constraints = {'g>0','zero_at_the_extremes'}; % constraints for MSD
R = zeros(length(g0)-2,length(g0));
w = ones(size(y(:)));
[sM,tM] = meshgrid(s,t);
A = exp(-tM./sM);
g0 = g0*sum(y)/sum(A*g0); % Makes a "better guess" for the distribution
disp('msd of input data:')
disp(msd(g0, y, A, alpha, R, w, constraints))
for k = 1:5
[g,msdfit] = fminsearch(#msd,g0,options,y,A,alpha,R,w,constraints);
if ismember('zero_at_the_extremes',constraints)
g(1) = 0;
g(end) = 0;
end
if ismember('g>0',constraints)
g = abs(g);
end
g0 = g;
end
disp('New guess')
disp(g)
disp('Final msd of g')
disp(msdfit)
% Visualize the fit
semilogx(s, g)
hold on
semilogx(logspace(-3,6,11), [0 0 10.1625 25.1974 21.8711 1.6377 7.3895 8.736 1.4256 0 0])
legend('First approximation', 'Distribution to match')
hold off
function out = msd(g,y,A,alpha,R,w,constraints)
% msd: The mean square deviation; this is the function
% that has to be minimized by fminsearch
% Constraints and any 'a priori' knowledge
if ismember('zero_at_the_extremes',constraints)
g(1) = 0;
g(end) = 0;
end
if ismember('g>0',constraints)
g = abs(g); % must be g(i)>=0 for each i
end
r = diff(diff(g(1:end))); % second derivative of g
yfit = A*g;
% Sum of weighted square residuals
VAR = sum(w.*(y-yfit).^2);
% Regularizor
REG = alpha^2 * sum((r-R*g).^2);
% Output to be minimized
out = VAR+REG;
end
Here is the optimization in Python
Here is the optimization in Matlab
I have checked the output of MSD of g0 before starting, and both give the value of 2651. After minimization, Python goes up, to 4547, and Matlab goes down to 0.1381.
I think the problem is one of the following. It's in my implementation, that is, I am using fmin wrong, or there's some other passage I got wrong, but I can't figure out what. The fact the MSD increases when it should have decreased with a minimization function is damning. Reading the documentation, the scipy implementation is different from Matlab's (they use the Nelder Mead method described in Lagarias, per their documentation), while scipy uses the original Nelder Mead). Maybe that affects significantly? Or perhaps my initial guess is too bad for scipy's algorithm?
So, quite a long time since I posted this, but I wanted to share what I ended up learning and doing.
The Inverse Laplace Transform for CPMG data is a bit of a misnomer, and it's more properly called just inversion. The general problem is solving a Fredholm integral of the first kind. One way of doing this is the Tikhonov regularization method. Turns out, you can describe this problem quite easily using numpy, and solve it with a scipy package, so I don't have to "reinvent" the wheel with this.
I used the solution shown in this post, and the names here reflect that solution.
def tikhonov_regularized_inversion(
kernel: np.ndarray, alpha: float, data: np.ndarray
) -> np.ndarray:
data = data.reshape(-1, 1)
I = alpha * np.eye(*kernel.shape)
C = np.concatenate([kernel, I], axis=0)
d = np.concatenate([data, np.zeros_like(data)])
x, _ = nnls(C, d.flatten())
Here, kernel is a matrix containing all the possible exponential decay curves, and my solution judges the contribution of each decay curve in the data I received. First, I stack my data as a column, then pad it with zeros, creating the vector d. I then stack my kernel on top of a diagonal matrix containing the regularization parameter alpha along the diagonal, of the same size as the kernel. Last, I call the convenient nnls, a non negative least square solver in scipy.optimize. This is because there's no reason to have a negative contribution, only no contribution.
This solved my problem, it's quick and convenient.

Simultaneous numerical fit of two equations using Numpy least square method

I am trying to fit below mentioned two equations using python leastsq method but am not sure whether this is the right approach. First equation has incomplete gamma function in it while the second one is slightly complex, and along with an exponential function contains a term which is obtained by using a separate fitting formula.
J_mg = T_incomplete(hw/T_mag)
J_nmg = e^(-hw/T)*g(w,T)
Here g is a function of w and T and is calucated using a given fitting formula.
I am following the steps outlined in this question.
Here is what I have done
import numpy as np
from scipy.optimize import leastsq
from scipy.special import gammaincc
from scipy.special import gamma
from matplotlib.pyplot import plot
# generating data
NPTS = 10
hw = np.linspace(0.5, 10, NPTS)
j1 = np.linspace(0.001,10,NPTS)
j2 = np.linspace(0.003,10,NPTS)
T_mag = np.linspace(0.3,0.5,NPTS)
#defining functions
def calc_gaunt_factor(hw,T):
fitting_coeff= np.loadtxt('fitting_coeff.txt', skiprows=1)
#T is in KeV
#K_b = 8.6173303(50)e−5 ev/K
g = 0
gamma = 0.0136/T
theta= hw/T
A= (np.log10(gamma**2) +0.5)*0.4
B= (np.log10(theta)+1.5)*0.4
for i in range(11):
for j in range(11):
g_ij = fitting_coeff[i][j]*(A**i)*(B**j)
g = g_ij+g
return g
def j_w_mag(hw,T_mag):
order= 0.001
return np.sqrt(1/T_mag)*gamma(order)*gammaincc(order,hw/T_mag)
def j_w_nonmag(hw,T):
gamma = 0.0136/T
theta= hw/T
return np.sqrt(1/T)*np.exp((-hw)/T)*calc_gaunt_factor(hw,T)
def residual_func(T,T_mag,hw,j1,j2):
err_unmag = np.nan_to_num(j1 - j_w_nonmag(hw,T))
err_mag = np.nan_to_num(j2 - j_w_mag(hw,T_mag))
err= np.concatenate((err_unmag, err_mag))
return err
par_init = np.array([.35])
best, cov, info, message, ler = leastsq(residual_func,par_init,args=(T_mag,hw,j1,j2),full_output=True)
print("Best-Fit Parameters:")
print("T=%s" %(best[0]))
I am getting weird value for my fitting parameter, T. Is this the right approach? Thanks.

Categories

Resources