Fitting two Gaussians with python

Fitting two Gaussians with python - python

I'm working on the analysis of some data. Theoretically, there should be two gaussians, that overlap more or less. I found out that fitting the data works best if you don't fit the two gaussians but their distribution function. Actually, this fit seems to work rather well, but if i go back to the density representation of the data it looks somehow wired. The pictures attached speak for them self. Any idea what goes wrong there?
Here is my code:`
import numpy as np
import scipy
import scipy.special
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import leastsq
from scipy.special import erf
def fitfunction(params,Bins):
amp_ratio, sigma1, sigma2, mu, Delta = params
return amp_ratio * 0.5 * (1 + erf((Bins -mu)/np.sqrt(2*sigma1**2))) + (1-amp_ratio)* 0.5 * (1 + erf((Bins - (mu + Delta))/np.sqrt(2*sigma2**2)))
def errorfunction(params, Reale_werte, Bins):
amp_ratio, sigma1, sigma2, mu, Delta = params
if(amp_ratio > 0) and (amp_ratio < 1):
return (Reale_werte - fitfunction(params, Bins))
else:
return (Reale_werte - fitfunction(params, Bins))*100
def Gaussians(params, Bins):
amp_ratio, sigma1, sigma2, mu, Delta = params
return amp_ratio/np.sqrt(2*np.pi*sigma1*sigma1) * np.exp(-((Bins-mu)**2) / np.sqrt(2*np.pi*sigma1*sigma1)) + (1-amp_ratio)/np.sqrt(2*np.pi*sigma2*sigma2) * np.exp(-((Bins-(mu + Delta))**2) / np.sqrt(2*np.pi*sigma2*sigma2))
def Gaussian1(params, Bins):
amp_ratio, sigma1, sigma2, mu, Delta = params
return amp_ratio/np.sqrt(2*np.pi*sigma1*sigma1) * np.exp(-((Bins-mu)**2) / np.sqrt(2*np.pi*sigma1*sigma1))
def Gaussian2(params, Bins):
amp_ratio, sigma1, sigma2, mu, Delta = params
return (1-amp_ratio)/np.sqrt(2*np.pi*sigma2*sigma2) * np.exp(-((Bins-(mu + Delta))**2) / np.sqrt(2*np.pi*sigma2*sigma2))
params = 0.25,1,10,1,5
params_init = 0.75, 0.8, 2.5, 1.2, 4
Bins = np.linspace(-4,18,1024)
data = Gaussians(params, Bins)
summe = np.zeros_like(Bins)
for i in range(Bins.shape[0]-1):
summe[i+1] = summe[i] + data[i]
summe = summe/summe[Bins.shape[0]-1]
params_result = leastsq(errorfunction, params_init, args=(summe, Bins))
plt.plot(Bins,fitfunction(params_result[0], Bins))
plt.plot(Bins, summe, 'x')
plt.savefig("Distribution.png")
plt.show()
print params_result[0]
plt.plot(Bins,Gaussians(params_result[0], Bins))
plt.plot(Bins, data, 'x')
plt.savefig("Gaussian.png")
plt.show()`

I was wondering whether kernel density estimation works in your case:
from scipy.stats import kde
import matplotlib.pyplot as plt
density = kde.gaussian_kde(x) # your data
xgrid = np.linspace(x.min(), x.max(), 1024)
plt.plot(xgrid, density(xgrid))
plt.show()

Related

Multiprocess not recognizing defined function

I'm trying to fit a simple gaussian model to some data using the markov-chain monte carlo package EMCEE while utilizing parallel processing. I am using multiprocess instead of multiprocessing as I am working in Jupyter notebook where multiprocessing often runs into problems for some reason. Currently, the code is saying that one of my functions is not defined even though it was defined earlier. Not sure why it isn't being carried over once the parallelization starts. Specifically the error occurs on the very last line saying that "log_prior" is not defined. Any thoughts on how to fix this issue would be much appreciated.
import numpy as np
import emcee
from scipy.optimize import minimize
import matplotlib.pyplot as plt
np.random.seed(123)
# Choose the "true" parameters.
mu_true = 5
sig_true = 0.5
f_true = 0.534
# Generate some synthetic data from the model.
N = 500
x = np.sort(10 * np.random.rand(N))
yerr = 0.03 + 0.05 * np.random.rand(N)
y = np.exp(-np.power(x - mu_true, 2.) / (2 * np.power(sig_true, 2.)))
y+= 0.5*np.abs(f_true * y) * np.random.randn(N)
y += yerr * np.random.randn(N)
def log_likelihood(theta, x, y, yerr):
mu, sig, log_f = theta
model = np.exp(-np.power(x - mu, 2.) / (2 * np.power(sig, 2.)))
sigma2 = yerr**2 + model**2 * np.exp(2 * log_f)
return -0.5 * np.sum((y - model) ** 2 / sigma2 + np.log(sigma2))
np.random.seed(42)
nll = lambda *args: -log_likelihood(*args)
initial = np.array([mu_true, sig_true, np.log(f_true)]) + 0.1 * np.random.randn(3)
soln = minimize(nll, initial, args=(x, y, yerr))
mu_ml, sig_ml, log_f_ml = soln.x
def log_prior(theta):
mu, sig, log_f = theta
if 2 < mu < 10 and 0.0 < sig < 5 and -10.0 < log_f < 1.0:
return 0.0
return -np.inf
def log_probability(theta, x, y, yerr):
lp = log_prior(theta)
if not np.isfinite(lp):
return -np.inf
return lp + log_likelihood(theta, x, y, yerr)
import time
import multiprocess
import numpy as np
from multiprocess import Pool
pos = soln.x + 0.2 * np.random.randn(32, 3)
nwalkers, ndim = pos.shape
nsteps = 10000
if __name__ == "__main__":
with Pool() as pool:
sampler = emcee.EnsembleSampler(
nwalkers, ndim, log_probability, args=(x, y, yerr), pool=pool)
sampler.run_mcmc(pos, 10000, progress=True);

Fit a curve with a SIR model and predict

I have searched and referenced the code for solving the SIR model from others on this website, but the fitting effect is very poor. Is there something wrong with my data? Still what? How should I predict given new data for this SIR model?
import numpy as np
import matplotlib.pyplot as plt
from scipy import integrate, optimize
import pandas as pd
y_total = [0.0, 0.0010131712259371835, 0.0035460992907801418, 0.00911854103343465,
0.008611955420466059, 0.021783181357649443, 0.00911854103343465, 0.07852077001013172, 0.4397163120567376,
0.21681864235055726, 0.232016210739615, 0.5278622087132725, 0.13576494427558258, 0.2988855116514691, 0.37436676798378926,
0.4209726443768997, 0.544579533941236, 0.7254305977710233, 1.0, 0.7740628166160081, 0.43617021276595747, 0.48226950354609927]
x_total = range(0,22)
ydata = np.array(y_total, dtype=float)
xdata = np.array(x_total, dtype=float)
# IO + SO + R0 is always 1 regardless of "value"
I0 = 0.3
S0 = 1 - I0
R0 = 0
def sir_model(y, x, beta, gamma):
S = -beta * y[0] * y[1] / N
R = gamma * y[1]
I = -(S + R)
return S, I, R
def fit_odeint(x, beta, gamma):
return integrate.odeint(sir_model, (S0, I0, R0), x, args=(beta, gamma))[:,1]
N = 1.0
I0 = ydata[0]
S0 = N - I0
R0 = 0.0
popt, pcov = optimize.curve_fit(fit_odeint, xdata, ydata)
fitted = fit_odeint(xdata, *popt)
plt.plot(xdata, ydata, 'o')
plt.plot(xdata, fitted)
plt.show()

Make the initial infected number also a variable, which is easily done by shift the computation of the initial state into the target function
def sir_model(y, x, beta, gamma):
N = sum(y)
S = -beta * y[0] * y[1] / N
R = gamma * y[1]
I = -(S + R)
return S, I, R
def fit_odeint(x, beta, gamma, I0):
# IO + SO + R0 is always 1 regardless of "value"
S0 = 1 - I0
R0 = 0
return integrate.odeint(sir_model, (S0, I0, R0), x, args=(beta, gamma))[:,1]
popt, pcov = optimize.curve_fit(fit_odeint, xdata, ydata,(1/5,1/8,0.1))
There were some other changes, especially adding an initial guess to curve_fit. Still this gets a warning on some difficulty in odeint. But a result is reached anyway, with popt = [0.36714402, 0.04176973, 0.01311579], 1/popt = [2.72372678, 23.94078424, 76.2439491]. Changing the initial guess to values close to this, (1/3, 1/24, 0.05), eliminates the warning.

why doesn't my gaussian pdf integrate to one?

normal pdf:
import numpy as np
import scipy
def gaussian(x, mu = 0, sigma = 1):
return 1/(np.sqrt(2*np.pi*sigma)) * np.exp(-(x-mu)**2 / (0.5*sigma)**2)
integrate over entire support:
scipy.integrate.quad(gaussian, -np.inf, np.inf)
returns
(0.3535533905932738, 1.4635936470160148e-11)
I know I messed up somewhere in the pdf but i've been starting at it for an hour and i can't see it

Your gaussian function is incorrect. It should be:
def gaussian(x, mu = 0, sigma = 1):
return (1/(sigma * np.sqrt(2 * np.pi))) * np.exp((-(x-mu)**2) / (2 * sigma ** 2))

I want to use Dirac delta as function of time in a python code to solve four coupled differential equation. I am not getting it right

I want to use Dirac delta as function of time in a python code to solve four coupled
differential equation. In the python code i am using solve_ivp to solve the coupled equation.
I am not getting it right. I am not able to define the Dirac delta function. Please help me if
anyone can. In the python code i am using solve_ivp to solve the coupled equation.
I am not getting it right. I am not able to define the Dirac delta function. Please help me if
anyone can.
import numpy as np
import matplotlib.pyplot as plt
import scipy.interpolate
from pylab import *
from qutip import *
import scipy.special as sp
import scipy.linalg as la
from scipy.integrate import solve_ivp
import math
import cmath
from sympy import DiracDelta, diff, pi
from scipy import signal
omg1 = 1
omg2 = 1.1
Omg=1
Omegar=10
beta=0.1
Mu=0.5
epsilon=0.01
V=1
K=0.5
g=1
m=1
hcut =1
A = 0.001
p = [ omg1, omg2, Omg,Omegar, beta, Mu, K, g,m]
#------------------------------------------------------------------------------
#####----------------Initial conditions, packed in w0--------------------------
##### IMPORTANT NOTE:
##### Please feed initial values with a complex part even if it's zero
#y1 & y2 are first derivatives of x1 and x2
x1 = 1
x2 = 0
#------------------------------------------------------------------------------
z1 = 0+0j
z2 = 1+0j
#A = 0
w0 = [x1, x2,z1,z2]
####----------------Function model passed to the ode solver--------------------
def f(t):
result = 0
for i in range(-25,25,1):
result = result + 1.0*DiracDelta(t-(i+1)*2*np.pi)
def vectorfield(t, w, omg1, omg2,Omg,Omegar, beta,Mu,K,g,m):
x1, x2,z1,z2= w
#result = 0
#for i in range(-10,10,1):
# result = result + 1.0*DiracDelta(t-(i+1)*np.pi)
field = [((g*np.sqrt(2*(x1)/(m*Omegar))*np.sin(x2))*(-0.5* abs(z2)**2 * np.cos(np.pi/3) + 0.5* np.conj(z1)* z2* np.sin(np.pi/3) + 0.5*
abs(z1)**2*np.cos(np.pi/3) + 0.5* np.conj(z2)* z1* np.sin(np.pi/3))-K*np.sin(x2)*f(t)),
((-g/np.sqrt((2*(x1)*m*Omegar))*np.cos(x2))*(-0.5* abs(z2)**2 * np.cos(np.pi/3) + 0.5* np.conj(z1)* z2* np.sin(np.pi/3) + 0.5*
abs(z1)**2*np.cos(np.pi/3) + 0.5* np.conj(z2)* z1* np.sin(np.pi/3))+x1),
((z1*(0.5*Omg+0.5*g*np.sqrt(2*x1/(m*Omegar))*np.cos(x2)*np.cos(np.pi/3))+0.5*z2*g*np.sqrt(2*x1/ (m*Omegar))*np.cos(x2)*np.sin(np.pi/3)) * -1j * (1/hcut) * (1/(cmath.sqrt(abs(z1)**2 + abs(z2)**2 )))),
(( z2*(-0.5*Omg+0.5*g*np.sqrt(2*x1/(m*Omegar))*np.cos(x2)*np.cos(np.pi/3))+ 0.5*g*np.sqrt(2*x1/(m*Omegar))*np.cos(x2)*z1*np.sin(np.pi/3)) * -1j * (1/hcut) * (1/(cmath.sqrt(abs(z1)**2 + abs(z2)**2 ))))]
#field1 = np.array(field, dtype='complex_')
#print(abs(z1)**2 + abs(z2)**2 )
print(z2)
return field
duration = 50
# time points
t = np.linspace(0, duration, 100)
abserr = 1.0e-10
relerr = 1.0e-6
#solution = odeint(vectorfield, w0, t, args=(p,))
solution = solve_ivp(vectorfield, [0, duration], w0,t_eval=t ,args=(p), atol=abserr,
rtol=relerr)
lw = 1
#'''
plot1 = plt.figure(1)
plt.style.use('seaborn-darkgrid')
plt.xlabel('time(t)')
plt.grid(True)
####----------------Plotting the oscillator dynamics---------------------------
plt.plot(t, solution.y[0,:], 'b', label='I', linewidth=lw)
plt.plot(t, solution.y[1,:], 'r', label='$\Theta$', linewidth=lw)
plt.plot(t, solution.y[2,:], 'g', label='x1(t)', linewidth=lw)
plt.plot(t, solution.y[3,:], 'orange', label='x2(t)', linewidth=lw)
plt.legend()
expect_1 = np.absolute(solution.y[2,:])**2 - np.absolute(solution.y[3,:])**2
plot2 = plt.figure(2)
plt.xlabel('time(t)')
#plt.plot(t, result, 'm', label='z_expect2', linewidth=lw)
plt.plot(t, solution.y[0,:], 'b', label='I', linewidth=lw)
plt.legend()
plt.show()

How do I put a constraint on SciPy curve fit?

I'm trying to fit the distribution of some experimental values with a custom probability density function. Obviously, the integral of the resulting function should always be equal to 1, but the results of simple scipy.optimize.curve_fit(function, dataBincenters, dataCounts) never satisfy this condition.
What is the best way to solve this problem?

You can define your own residuals function, including a penalization parameter, like detailed in the code below, where it is known beforehand that the integral along the interval must be 2.. If you test without the penalization you will see that what your are getting is the conventional curve_fit:
import matplotlib.pyplot as plt
import scipy
from scipy.optimize import curve_fit, minimize, leastsq
from scipy.integrate import quad
from scipy import pi, sin
x = scipy.linspace(0, pi, 100)
y = scipy.sin(x) + (0. + scipy.rand(len(x))*0.4)
def func1(x, a0, a1, a2, a3):
return a0 + a1*x + a2*x**2 + a3*x**3
# here you include the penalization factor
def residuals(p, x, y):
integral = quad(func1, 0, pi, args=(p[0], p[1], p[2], p[3]))[0]
penalization = abs(2.-integral)*10000
return y - func1(x, p[0], p[1], p[2], p[3]) - penalization
popt1, pcov1 = curve_fit(func1, x, y)
popt2, pcov2 = leastsq(func=residuals, x0=(1., 1., 1., 1.), args=(x, y))
y_fit1 = func1(x, *popt1)
y_fit2 = func1(x, *popt2)
plt.scatter(x, y, marker='.')
plt.plot(x, y_fit1, color='g', label='curve_fit')
plt.plot(x, y_fit2, color='y', label='constrained')
plt.legend()
plt.xlim(-0.1, 3.5)
plt.ylim(0, 1.4)
print('Exact integral:', quad(sin, 0, pi)[0])
print('Approx integral1:', quad(func1, 0, pi, args=(popt1[0], popt1[1], popt1[2], popt1[3]))[0])
print('Approx integral2:', quad(func1, 0, pi, args=(popt2[0], popt2[1], popt2[2], popt2[3]))[0])
plt.show()
#Exact integral: 2.0
#Approx integral1: 2.60068579748
#Approx integral2: 2.00001911981
Other related questions:
SciPy LeastSq Goodness of Fit Estimator

Here is an almost-identical snippet which makes only use of curve_fit.
import matplotlib.pyplot as plt
import numpy as np
import scipy.optimize as opt
import scipy.integrate as integr
x = np.linspace(0, np.pi, 100)
y = np.sin(x) + (0. + np.random.rand(len(x))*0.4)
def Func(x, a0, a1, a2, a3):
return a0 + a1*x + a2*x**2 + a3*x**3
# modified function definition with Penalization
def FuncPen(x, a0, a1, a2, a3):
integral = integr.quad( Func, 0, np.pi, args=(a0,a1,a2,a3))[0]
penalization = abs(2.-integral)*10000
return a0 + a1*x + a2*x**2 + a3*x**3 + penalization
popt1, pcov1 = opt.curve_fit( Func, x, y )
popt2, pcov2 = opt.curve_fit( FuncPen, x, y )
y_fit1 = Func(x, *popt1)
y_fit2 = Func(x, *popt2)
plt.scatter(x,y, marker='.')
plt.plot(x,y_fit2, color='y', label='constrained')
plt.plot(x,y_fit1, color='g', label='curve_fit')
plt.legend(); plt.xlim(-0.1,3.5); plt.ylim(0,1.4)
print 'Exact integral:',integr.quad(np.sin ,0,np.pi)[0]
print 'Approx integral1:',integr.quad(Func,0,np.pi,args=(popt1[0],popt1[1],
popt1[2],popt1[3]))[0]
print 'Approx integral2:',integr.quad(Func,0,np.pi,args=(popt2[0],popt2[1],
popt2[2],popt2[3]))[0]
plt.show()
#Exact integral: 2.0
#Approx integral1: 2.66485028754
#Approx integral2: 2.00002116217

Following the example above here is more general way to add any constraints:
from scipy.optimize import minimize
from scipy.integrate import quad
import matplotlib.pyplot as plt
import numpy as np
x = np.linspace(0, np.pi, 100)
y = np.sin(x) + (0. + np.random.rand(len(x))*0.4)
def func_to_fit(x, params):
return params[0] + params[1] * x + params[2] * x ** 2 + params[3] * x ** 3
def constr_fun(params):
intgrl, _ = quad(func_to_fit, 0, np.pi, args=(params,))
return intgrl - 2
def func_to_minimise(params, x, y):
y_pred = func_to_fit(x, params)
return np.sum((y_pred - y) ** 2)
# Do the parameter fitting
#without constraints
res1 = minimize(func_to_minimise, x0=np.random.rand(4), args=(x, y))
params1 = res1.x
# with constraints
cons = {'type': 'eq', 'fun': constr_fun}
res2 = minimize(func_to_minimise, x0=np.random.rand(4), args=(x, y), constraints=cons)
params2 = res2.x
y_fit1 = func_to_fit(x, params1)
y_fit2 = func_to_fit(x, params2)
plt.scatter(x,y, marker='.')
plt.plot(x, y_fit2, color='y', label='constrained')
plt.plot(x, y_fit1, color='g', label='curve_fit')
plt.legend(); plt.xlim(-0.1,3.5); plt.ylim(0,1.4)
plt.show()
print(f"Constrant violation: {constr_fun(params1)}")
Constraint violation: -2.9179325622408214e-10

If you are able normalise your probability fitting function in advance then you can use this information to constrain your fit. A very simple example of this would be fitting a Gaussian to data. If one were to fit the following three-parameter (A, mu, sigma) Gaussian then it would be unnormalised in general:
however, if one instead enforces the normalisation condition on A:
then the Gaussian is only two parameter and is automatically normalised.

You could ensure that your fitted probability distribution is normalised via a numerical integration. For example, assuming that you have data x and y and that you have defined an unnormalised_function(x, a, b) with parameters a and b for your probability distribution, which is defined on the interval x1 to x2 (which could be infinite):
from scipy.optimize import curve_fit
from scipy.integrate import quad
# Define a numerically normalised function
def normalised_function(x, a, b):
normalisation, _ = quad(lambda x: unnormalised_function(x, a, b), x1, x2)
return unnormalised_function(x, a, b)/normalisation
# Do the parameter fitting
fitted_parameters, _ = curve_fit(normalised_function, x, y)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Fitting two Gaussians with python - python

I was wondering whether kernel density estimation works in your case: from scipy.stats import kde import matplotlib.pyplot as plt density = kde.gaussian_kde(x) # your data xgrid = np.linspace(x.min(), x.max(), 1024) plt.plot(xgrid, density(xgrid)) plt.show()

Related

Multiprocess not recognizing defined function

Fit a curve with a SIR model and predict

why doesn't my gaussian pdf integrate to one?

I want to use Dirac delta as function of time in a python code to solve four coupled differential equation. I am not getting it right

How do I put a constraint on SciPy curve fit?

Categories

Resources