I'd like to make a Gaussian Fit for some data that has a rough gaussian fit. I'd like the information of data peak (A), center position (mu), and standard deviation (sigma), along with 95% confidence intervals for these values.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.stats import norm
# gaussian function
def gaussian_func(x, A, mu, sigma):
return A * np.exp( - (x - mu)**2 / (2 * sigma**2))
# generate toy data
x = np.arange(50)
y = [ 97.04421053, 96.53052632, 96.85684211, 96.33894737, 96.85052632,
96.30526316, 96.87789474, 96.75157895, 97.05052632, 96.73473684,
96.46736842, 96.23368421, 96.22526316, 96.11789474, 96.41263158,
96.32631579, 96.33684211, 96.44421053, 96.48421053, 96.49894737,
97.30105263, 98.58315789, 100.07368421, 101.43578947, 101.92210526,
102.26736842, 101.80421053, 101.91157895, 102.07368421, 102.02105263,
101.35578947, 99.83578947, 98.28, 96.98315789, 96.61473684,
96.82947368, 97.09263158, 96.82105263, 96.24210526, 95.95578947,
95.84210526, 95.67157895, 95.83157895, 95.37894737, 95.25473684,
95.32842105, 95.45684211, 95.31578947, 95.42526316, 95.30526316]
plt.scatter(x,y)
# initial_guess_of_parameters
# この値はソルバーとかで求めましょう.
parameter_initial = np.array([652, 2.9, 1.3])
# estimate optimal parameter & parameter covariance
popt, pcov = curve_fit(gaussian_func, x, y, p0=parameter_initial)
# plot result
xd = np.arange(x.min(), x.max(), 0.01)
estimated_curve = gaussian_func(xd, popt[0], popt[1], popt[2])
plt.plot(xd, estimated_curve, label="Estimated curve", color="r")
plt.legend()
plt.savefig("gaussian_fitting.png")
plt.show()
# estimate standard Error
StdE = np.sqrt(np.diag(pcov))
# estimate 95% confidence interval
alpha=0.025
lwCI = popt + norm.ppf(q=alpha)*StdE
upCI = popt + norm.ppf(q=1-alpha)*StdE
# print result
mat = np.vstack((popt,StdE, lwCI, upCI)).T
df=pd.DataFrame(mat,index=("A", "mu", "sigma"),
columns=("Estimate", "Std. Error", "lwCI", "upCI"))
print(df)
Data Plot with Fitted Curve
The data peak and center position seems correct, but the standard deviation is off. Any input is greatly appreciated.
Your scatter indeed looks similar to a gaussian distribution, but it is not centered around zero. Given the specifics of the Gaussian function it will therefor be hard to nicely fit a Gaussian distribution to the data the way you gave us. I would therefor propose by starting with demeaning the x series:
x = np.arange(0, 50) - 24.5
Next I would add one additional parameter to your gaussian function, the offset. Since the regular Gaussian function will always have its tails close to zero it is impossible to otherwise nicely fit your scatterplot:
def gaussian_function(x, A, mu, sigma, offset):
return A * np.exp(-np.power((x - mu)/sigma, 2.)/2.) + offset
Next you should define an error_loss_function to minimise:
def error_loss_function(params):
gaussian = gaussian_function(x, params[0], params[1], params[2], params[3])
errors = gaussian - y
return sum(np.power(errors, 2)) # You can also pick a different error loss function!
All that remains is fitting our curve now:
fit = scipy.optimize.minimize(fun=error_loss_function, x0=[2, 0, 0.2, 97])
params = fit.x # A: 6.57592661, mu: 1.95248855, sigma: 3.93230503, offset: 96.12570778
xd = np.arange(x.min(), x.max(), 0.01)
estimated_curve = gaussian_function(xd, params[0], params[1], params[2], params[3])
plt.plot(xd, estimated_curve, label="Estimated curve", color="b")
plt.legend()
plt.show(block=False)
Hopefully this helps. Looks like a fun project, let me know if my answer is not clear.
Related
I am trying to fit a gaussian to a discrete potential using the astropy.modeling package. Although I assign a negative amplitude to the gaussian, it returns a null gaussian, i.e, with zero amplitude everywhere:
plot(c[0], pot_x, label='Discrete potential')
plot(c[0], g(c[0]), label='Gaussian fit')
legend()
I have the following code lines to perform the fitting:
g_init = models.Gaussian1D(amplitude=-1., mean=0, stddev=1.)
fit_g = fitting.LevMarLSQFitter()
g = fit_g(g_init, c[0], pot_x)
Where
c[0] = array([13.31381488, 13.31944489, 13.32507491, 13.33070493, 13.33633494,
13.34196496, 13.34759498, 13.35322499, 13.35885501, 13.36448503,
13.37011504, 13.37574506, 13.38137507, 13.38700509, 13.39263511,
13.39826512, 13.40389514, 13.40952516, 13.41515517, 13.42078519])
pot_x = array([ -1.72620157, -3.71811187, -6.01282809, -6.98874144,
-8.36645166, -14.31787771, -23.3688849 , -26.14679496,
-18.85970983, -10.73888697, -7.10763373, -5.81176637,
-5.44146953, -5.37165105, -4.6454408 , -2.90307138,
-1.66250349, -1.66096343, -1.8188269 , -1.41980552])
Does anyone have an ideia what the problem might be?
Solved: I just had to assign a mean that is in the range of the domain, like 13.35.
As I am not familiar with Astropy, I used scipy. The code below provides the following outpt:
import numpy as np
from matplotlib import pyplot as plt
from scipy.optimize import curve_fit
x = np.asarray([13.31381488, 13.31944489, 13.32507491, 13.33070493, 13.33633494,
13.34196496, 13.34759498, 13.35322499, 13.35885501, 13.36448503,
13.37011504, 13.37574506, 13.38137507, 13.38700509, 13.39263511,
13.39826512, 13.40389514, 13.40952516, 13.41515517, 13.42078519])
y = -np.asarray([ -1.72620157, -3.71811187, -6.01282809, -6.98874144,
-8.36645166, -14.31787771, -23.3688849 , -26.14679496,
-18.85970983, -10.73888697, -7.10763373, -5.81176637,
-5.44146953, -5.37165105, -4.6454408 , -2.90307138,
-1.66250349, -1.66096343, -1.8188269 , -1.41980552])
mean = sum(x * y) / sum(y)
sigma = np.sqrt(sum(y * (x - mean)**2) / sum(y))
def Gauss(x, a, x0, sigma):
return a * np.exp(-(x - x0)**2 / (2 * sigma**2))
popt,pcov = curve_fit(Gauss, x, y, p0=[max(y), mean, sigma])
plt.plot(x, y, 'b+:', label='data')
plt.plot(x, Gauss(x, *popt), 'r-', label='fit')
plt.legend()
By simplicity, I reused this answer. I am not entirely certain about the mean and sigma definition, as I am not used to fitting a Gaussian on a 2D dataset. However, it doesn't really matter as it is simply used to define an approximatation used to start the curve_fit algorithm.
I recently got a script running to fit a gaussian to my absorption profile with help of SO. My hope was that things would work fine if I simply replace the Gauss function by a Voigt one, but this seems not to be the case. I think mainly due to the fact that it is a shifted voigt.
Edit: The profiles are absorption lines that vary in optical thickness. In practice they will be a mix between optically thick and thin features. Like the bottom part in this diagram. The current data will be more like the top image, but maybe the bottom is already flattened a bit. (And we only see the left side of the profile, a bit beyond the center)
For a Gauss it looks like this and as predicted the bottom seems to be less deep than the fit wants it to be, but still quite close. The profile itself should still be a voigt though. But now I realize that the central points might throw off the fit. So maybe a weight should be added based on wing position?
I'm mostly wondering if the shifted function could be mis-defined or if its my starting values.
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import numpy as np
from scipy.special import wofz
x = np.arange(13)
xx = xx = np.linspace(0, 13, 100)
y = np.array([19699.959 , 21679.445 , 21143.195 , 20602.875 , 16246.769 ,
11635.25 , 8602.465 , 7035.493 , 6697.0337, 6510.092 ,
7717.772 , 12270.446 , 16807.81 ])
# weighted arithmetic mean (corrected - check the section below)
#mean = 2.4
sigma = 2.4
gamma = 2.4
def Gauss(x, y0, a, x0, sigma):
return y0 + a * np.exp(-(x - x0)**2 / (2 * sigma**2))
def Voigt(x, x0, y0, a, sigma, gamma):
#sigma = alpha / np.sqrt(2 * np.log(2))
return y0 + a * np.real(wofz((x - x0 + 1j*gamma)/sigma/np.sqrt(2))) / sigma /np.sqrt(2*np.pi)
popt, pcov = curve_fit(Voigt, x, y, p0=[8, np.max(y), -(np.max(y)-np.min(y)), sigma, gamma])
#p0=[8, np.max(y), -(np.max(y)-np.min(y)), mean, sigma])
plt.plot(x, y, 'b+:', label='data')
plt.plot(xx, Voigt(xx, *popt), 'r-', label='fit')
plt.legend()
plt.show()
I may be misunderstanding the model you're using, but I think you would need to include some sort of constant or linear background.
To do that with lmfit (which has Voigt, Gaussian, and many other models built in, and tries very hard to make these interchangeable), I would suggest starting with something like this:
import numpy as np
import matplotlib.pyplot as plt
from lmfit.models import GaussianModel, VoigtModel, LinearModel, ConstantModel
x = np.arange(13)
xx = np.linspace(0, 13, 100)
y = np.array([19699.959 , 21679.445 , 21143.195 , 20602.875 , 16246.769 ,
11635.25 , 8602.465 , 7035.493 , 6697.0337, 6510.092 ,
7717.772 , 12270.446 , 16807.81 ])
# build model as Voigt + Constant
## model = GaussianModel() + ConstantModel()
model = VoigtModel() + ConstantModel()
# create parameters with initial values
params = model.make_params(amplitude=-1e5, center=8,
sigma=2, gamma=2, c=25000)
# maybe place bounds on some parameters
params['center'].min = 2
params['center'].max = 12
params['amplitude'].max = 0.
# do the fit, print out report with results
result = model.fit(y, params, x=x)
print(result.fit_report())
# plot data, best fit, fit interpolated to `xx`
plt.plot(x, y, 'b+:', label='data')
plt.plot(x, result.best_fit, 'ko', label='fitted points')
plt.plot(xx, result.eval(x=xx), 'r-', label='interpolated fit')
plt.legend()
plt.show()
And, yes, you can simply replace VoigtModel() with GaussianModel() or LorentzianModel() and redo the fit and compare the fit statistics to see which model is better.
For the Voigt model fit, the printed report would be
[[Model]]
(Model(voigt) + Model(constant))
[[Fit Statistics]]
# fitting method = leastsq
# function evals = 41
# data points = 13
# variables = 4
chi-square = 17548672.8
reduced chi-square = 1949852.54
Akaike info crit = 191.502014
Bayesian info crit = 193.761811
[[Variables]]
amplitude: -173004.338 +/- 30031.4068 (17.36%) (init = -100000)
center: 8.06574198 +/- 0.16209266 (2.01%) (init = 8)
sigma: 1.96247322 +/- 0.23522096 (11.99%) (init = 2)
c: 23800.6655 +/- 1474.58991 (6.20%) (init = 25000)
gamma: 1.96247322 +/- 0.23522096 (11.99%) == 'sigma'
fwhm: 7.06743644 +/- 0.51511574 (7.29%) == '1.0692*gamma+sqrt(0.8664*gamma**2+5.545083*sigma**2)'
height: -18399.0337 +/- 2273.61672 (12.36%) == '(amplitude/(max(2.220446049250313e-16, sigma*sqrt(2*pi))))*wofz((1j*gamma)/(max(2.220446049250313e-16, sigma*sqrt(2)))).real'
[[Correlations]] (unreported correlations are < 0.100)
C(amplitude, c) = -0.957
C(amplitude, sigma) = -0.916
C(sigma, c) = 0.831
C(center, c) = -0.151
Note that by default gamma is constrained to be the same value as sigma. This constraint can be lifted and gamma made to vary independently with params['gamma'].set(expr=None, vary=True, min=1.e-9). I think that you may not have enough data points in this data set to robustly and independently determine gamma.
The plot for that fit would look like this:
I managed to get something, but not very satisfying. If you remove the offset as a parameter and add 20000 directly in the Voigt function, with starting values [8, 126000, 0.71, 2] (the particular values don't' matter much) you'll get something like
Now, the fit produces a value for gamma which is negative which I cannot really justify. I would expect gamma to always be positive, but maybe I'm wrong and it's completely fine.
One thing you could try is to mirror your data so that its a "positive" peak (and while at it removing the background) and/or normalize the values. That might help you in the convergence.
I have no idea why when using the offset as a parameter the solver has problems finding an optimum. Maybe you need a different optimizer routine.
Maybe it'll be a better option to use the lmfit package that it's a wrapper over scipy to fit nonlinear functions with many prebuilt lineshapes. There is even an example of fitting a Voigt profile.
I have a problem with fitting a custom function using scipy.optimize in Python and I do not know, why that is happening. I generate data from centered and normalized binomial distribution (Gaussian curve) and then fit a curve. The expected outcome is in the picture when I plot my function over the fitted data. But when I do the fitting, it fails.
I'm convinced it is a pythonic thing because it should give the parameter a = 1 (that's how I define it) and it gives it but then the fit is bad (see picture). However, if I change sigma to 0.65*sigma in:
p_halfg, p_halfg_cov = optimize.curve_fit(lambda x, a:piecewise_half_gauss(x, a, sigma = 0.65*sigma_fit), x, y, p0=[1])
, it gives almost perfect fit (a is then 5/3, as predicted by math). Those fits should be the same and they are not!
I give more comments bellow. Could you please tell me what is happening and where the problem could be?
Plot with a=1 and sigma = sigma_fit
Plot with sigma = 0.65*sigma_fit
I generate data from normalized binomial distribution (I can provide my code but the values are more important now). It is a distribution with N = 10 and p = 0.5 and I'm centering it and taking only the right side of the curve. Then I'm fitting it with my half-gauss function, which should be the same distribution as binomial if its parameter a = 1 (and the sigma is equal to the sigma of the distribution, sqrt(np(1-p)) ). Now the problem is first that it does not fit the data as shown in the picture despite getting the correct value of parameter a.
Notice weird stuff... if I set sigma = 3* sigma_fit, I get a = 1/3 and a very bad fit (underestimate). If I set it to 0.2*sigma_fit, I get also a bad fit and a = 1/0.2 = 5 (overestimate). And so on. Why? (btw. a=1/sigma so the fitting procedure should work).
import numpy as np
import matplotlib.pyplot as plt
import math
pi = math.pi
import scipy.optimize as optimize
# define my function
sigma_fit = 1
def piecewise_half_gauss(x, a, sigma=sigma_fit):
"""Half of normal distribution curve, defined as gaussian centered at 0 with constant value of preexponential factor for x < 0
Arguments: x values as ndarray whose numbers MUST be float type (use linspace or np.arange(start, end, step, dtype=float),
a as a parameter of width of the distribution,
sigma being the deviation, second moment
Returns: Half gaussian curve
Ex:
>>> piecewise_half_gauss(5., 1)
array(0.04839414)
>>> x = np.linspace(0,10,11)
... piecewise_half_gauss(x, 2, 3)
array([0.06649038, 0.06557329, 0.0628972 , 0.05867755, 0.05324133,
0.04698531, 0.04032845, 0.03366645, 0.02733501, 0.02158627,
0.01657952])
>>> piecewise_half_gauss(np.arange(0,11,1, dtype=float), 1, 2.4)
array([1.66225950e-01, 1.52405153e-01, 1.17463281e-01, 7.61037856e-02,
4.14488078e-02, 1.89766470e-02, 7.30345854e-03, 2.36286717e-03,
6.42616248e-04, 1.46914868e-04, 2.82345875e-05])
"""
return np.piecewise(x, [x >= 0, x < 0],
[lambda x: np.exp(-x ** 2 / (2 * ((a * sigma) ** 2))) / (np.sqrt(2 * pi) * sigma * a),
lambda x: 1 / (np.sqrt(2 * pi) * sigma)])
# Create normalized data for binomial distribution Bin(N,p)
n = 10
p = 0.5
x = np.array([0., 1., 2., 3., 4., 5.])
y = np.array([0.25231325, 0.20657662, 0.11337165, 0.0417071 , 0.01028484,
0.00170007])
# Get the estimate for sigma parameter
sigma_fit = (n*p*(1-p))**0.5
# Get fitting parameters
p_halfg, p_halfg_cov = optimize.curve_fit(lambda x, a:piecewise_half_gauss(x, a, sigma = sigma_fit), x, y, p0=[1])
print(sigma_fit, p_halfg, p_halfg_cov)
## Plot the result
# unpack fitting parameters
a = np.float64(p_halfg)
# unpack uncertainties in fitting parameters from diagonal of covariance matrix
#da = [np.sqrt(p_halfg_cov[j,j]) for j in range(p_halfg.size)] # if we fit more parameters
da = np.float64(np.sqrt(p_halfg_cov[0]))
# create fitting function from fitted parameters
f_fit = np.linspace(0, 10, 50)
y_fit = piecewise_half_gauss(f_fit, a)
# Create figure window to plot data
fig = plt.figure(1, figsize=(10,10))
plt.scatter(x, y, color = 'r', label = 'Original points')
plt.plot(f_fit, y_fit, label = 'Fit')
plt.xlabel('My x values')
plt.ylabel('My y values')
plt.text(5.8, .25, 'a = {0:0.5f}$\pm${1:0.6f}'.format(a, da))
plt.legend()
However, if I plot it manually, it fits EXACTLY!
plt.scatter(x, y, c = 'r', label = 'Original points')
plt.plot(np.linspace(0,5,50), piecewise_half_gauss(np.linspace(0,5,50), 1, sigma_fit), label = 'Fit')
plt.legend()
EDIT -- solved:
it is a plotting problem, need to use
y_fit = piecewise_half_gauss(f_fit, a, sigma = 0.6*sigma_fit)
The problem was in plotting and fitting the parameters -- if I fit it with different sigma, I also need to change it in the plotting section when I generate y_fit:
# Get fitting parameters
p_halfg, p_halfg_cov = optimize.curve_fit(lambda x, a:piecewise_half_gauss(x, a, sigma = 0.6*sigma_fit), x, y, p0=[1])
...
y_fit = piecewise_half_gauss(f_fit, a, sigma = 0.6*sigma_fit)
My goal is to create a dataset of random points whose histogram looks like an exponential decay function and then plot an exponential decay function through those points.
First I tried to create a series of random numbers (but did not do so successfully since these should be points, not numbers) from an exponential distribution.
from pylab import *
from scipy.optimize import curve_fit
import random
import numpy as np
import pandas as pd
testx = pd.DataFrame(range(10)).astype(float)
testx = testx[0]
for i in range(1,11):
x = random.expovariate(15) # rate = 15 arrivals per second
data[i] = [x]
testy = pd.DataFrame(data).T.astype(float)
testy = testy[0]; testy
plot(testx, testy, 'ko')
The result could look something like this.
And then I define a function to draw a line through my points:
def func(x, a, e):
return a*np.exp(-a*x)+e
popt, pcov = curve_fit(f=func, xdata=testx, ydata=testy, p0 = None, sigma = None)
print popt # parameters
print pcov # covariance
plot(testx, testy, 'ko')
xx = np.linspace(0, 15, 1000)
plot(xx, func(xx,*popt))
plt.show()
What I'm looking for is: (1) a more elegant way to create an array of random numbers from an exponential (decay) distribution and (2) how to test that my function is indeed going through the data points.
I would guess that the following is close to what you want. You can generate some random numbers drawn from an exponential distribution with numpy,
data = numpy.random.exponential(5, size=1000)
You can then create a histogram of them using numpy.hist and draw the histogram values into a plot. You may decide to take the middle of the bins as position for the point (this assumption is of course wrong, but gets the more valid the more bins you use).
Fitting works as in the code from the question. You will then find out that our fit roughly finds the parameter used for the data generation (in this case below ~5).
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
data = np.random.exponential(5, size=1000)
hist,edges = np.histogram(data,bins="auto",density=True )
x = edges[:-1]+np.diff(edges)/2.
plt.scatter(x,hist)
func = lambda x,beta: 1./beta*np.exp(-x/beta)
popt, pcov = curve_fit(f=func, xdata=x, ydata=hist)
print(popt)
xx = np.linspace(0, x.max(), 101)
plt.plot(xx, func(xx,*popt), ls="--", color="k",
label="fit, $beta = ${}".format(popt))
plt.legend()
plt.show()
I think you are actually asking about a regression problem, which is what Praveen was suggesting.
You have a bog standard exponential decay that arrives at the y-axis at about y=0.27. Its equation is therefore y = 0.27*exp(-0.27*x). I can model gaussian error around the values of this function and plot the result using the following code.
import matplotlib.pyplot as plt
from math import exp
from scipy.stats import norm
x = range(0, 16)
Y = [0.27*exp(-0.27*_) for _ in x]
error = norm.rvs(0, scale=0.05, size=9)
simulated_data = [max(0, y+e) for (y,e) in zip(Y[:9],error)]
plt.plot(x, Y, 'b-')
plt.plot(x[:9], simulated_data, 'r.')
plt.show()
print (x[:9])
print (simulated_data)
Here's the plot. Notice that I save the output values for subsequent use.
Now I can calculate the nonlinear regression of the exponential decay values, contaminated with noise, on the independent variable, which is what curve_fit does.
from math import exp
from scipy.optimize import curve_fit
import numpy as np
def model(x, p):
return p*np.exp(-p*x)
x = list(range(9))
Y = [0.22219001972988275, 0.15537454187341937, 0.15864069451825827, 0.056411162886672819, 0.037398831058143338, 0.10278251869912845, 0.03984605649260467, 0.0035360087611421981, 0.075855255999424692]
popt, pcov = curve_fit(model, x, Y)
print (popt[0])
print (pcov)
The bonus is that, not only does curve_fit calculate an estimate for the parameter — 0.207962159793 — it also offers an estimate for this estimate's variance — 0.00086071 — as an element of pcov. This would appear to be a fairly small value, given the small sample size.
Here's how to calculate the residuals. Notice that each residual is the difference between the data value and the value estimated from x using the parameter estimate.
residuals = [y-model(_, popt[0]) for (y, _) in zip(Y, x)]
print (residuals)
If you wanted to further 'test that my function is indeed going through the data points' then I would suggest looking for patterns in the residuals. But discussions like this might be beyond what's welcomed on stackoverflow: Q-Q and P-P plots, plots of residuals vs y or x, and so on.
I agree with the solution of #ImportanceOfBeingErnes, but I'd like to add a (well known?) general solution for distributions. If you have a distribution function f with integral F (i.e. f = dF / dx) then you get the required distribution by mapping random numbers with inv F i.e. the inverse function of the integral. In case of the exponential function, the integral is, again, an exponential and the inverse is the logarithm. So it can be done like this:
import matplotlib.pyplot as plt
import numpy as np
from random import random
def gen( a ):
y=random()
return( -np.log( y ) / a )
def dist_func( x, a ):
return( a * np.exp( -a * x) )
data = [ gen(3.14) for x in range(20000) ]
fig = plt.figure()
ax = fig.add_subplot( 1, 1, 1 )
ax.hist(data, bins=80, normed=True, histtype="step")
ax.plot(np.linspace(0,5,150), dist_func( np.linspace(0,5,150), 3.14 ) )
plt.show()
How can I find and plot a LOWESS curve that looks like the following using Python?
I'm aware of the LOWESS implementation in statsmodels, but it doesn't seem to be able to give me 95% confidence interval lines that I can shade between. Seaborn has a method that calls the statsmodels implementation, but it can't plot the confidence intervals.
Other StackOverflow answers give code to draw a LOESS/LOWESS line, but none with a confidence interval. Can anyone assist with this? Is anyone aware of an existing implementation that would enable me to do this?
Thanks in advance.
I found a link here is useful, and I put code below:
def lowess(x, y, f=1./3.):
"""
Basic LOWESS smoother with uncertainty.
Note:
- Not robust (so no iteration) and
only normally distributed errors.
- No higher order polynomials d=1
so linear smoother.
"""
# get some paras
xwidth = f*(x.max()-x.min()) # effective width after reduction factor
N = len(x) # number of obs
# Don't assume the data is sorted
order = np.argsort(x)
# storage
y_sm = np.zeros_like(y)
y_stderr = np.zeros_like(y)
# define the weigthing function -- clipping too!
tricube = lambda d : np.clip((1- np.abs(d)**3)**3, 0, 1)
# run the regression for each observation i
for i in range(N):
dist = np.abs((x[order][i]-x[order]))/xwidth
w = tricube(dist)
# form linear system with the weights
A = np.stack([w, x[order]*w]).T
b = w * y[order]
ATA = A.T.dot(A)
ATb = A.T.dot(b)
# solve the syste
sol = np.linalg.solve(ATA, ATb)
# predict for the observation only
yest = A[i].dot(sol)# equiv of A.dot(yest) just for k
place = order[i]
y_sm[place]=yest
sigma2 = (np.sum((A.dot(sol) -y [order])**2)/N )
# Calculate the standard error
y_stderr[place] = np.sqrt(sigma2 *
A[i].dot(np.linalg.inv(ATA)
).dot(A[i]))
return y_sm, y_stderr
import numpy as np
import matplotlib.pyplot as plt
# make some data
x = 5*np.random.random(100)
y = np.sin(x) * 3*np.exp(-x) + np.random.normal(0, 0.2, 100)
order = np.argsort(x)
#run it
y_sm, y_std = lowess(x, y, f=1./5.)
# plot it
plt.plot(x[order], y_sm[order], color='tomato', label='LOWESS')
plt.fill_between(x[order], y_sm[order] - 1.96*y_std[order],
y_sm[order] + 1.96*y_std[order], alpha=0.3, label='LOWESS uncertainty')
plt.plot(x, y, 'k.', label='Observations')
plt.legend(loc='best')
#run it
y_sm, y_std = lowess(x, y, f=1./5.)
# plot it
plt.plot(x[order], y_sm[order], color='tomato', label='LOWESS')
plt.fill_between(x[order], y_sm[order] - y_std[order],
y_sm[order] + y_std[order], alpha=0.3, label='LOWESS uncertainty')
plt.plot(x, y, 'k.', label='Observations')
plt.legend(loc='best')