I am very new to Gaussian processes and python as well.
I am trying to produce a very simple Gaussian regression for a 3d model.
I have a very simple Python code for a function:
import numpy as np
def exponential_cov(x, y, params):
return params[0] * np.exp( -0.5 * params[1] * np.subtract.outer(x, y)**2)
def conditional(x_new, x, y, params):
B = exponential_cov(x_new, x, params)
C = exponential_cov(x, x, params)
A = exponential_cov(x_new, x_new, params)
mu = np.linalg.inv(C).dot(B.T).T.dot(y)
sigma = A - B.dot(np.linalg.inv(C).dot(B.T))
return(mu.squeeze(), sigma.squeeze())
import matplotlib.pylab as plt
# GP PRIOR
tu = [1, 10]
Si_tu = exponential_cov(0, 0, tu)
xpts = np.arange(-5, 5, step=0.01)
plt.errorbar(xpts, np.zeros(len(xpts)), yerr=Si_tu, capsize=0, color='#95daed', alpha=0.5, label='error') #error
plt.plot(xpts, np.zeros(len(xpts)), linestyle='dashed', color='#3105b2', linewidth=2.5, label='mu'); #mu
# GP FOR 1ST POINT
x = [1.]
y = np.sin(x)+np.cos(np.sqrt(15)*x)
Si_1 = exponential_cov(x, x, tu)
def predict(x, data, kernel, params, sigma, t):
k = [kernel(x, y, params) for y in data]
Sinv = np.linalg.inv(sigma)
y_pred = np.dot(k, Sinv).dot(t)
sigma_new = kernel(x, x, params) - np.dot(k, Sinv).dot(k)
return y_pred, sigma_new
x_pred = np.linspace(-5, 5, 1000) #change step here!!
print "x_pred="
print(x_pred)
predictions = [predict(i, x, exponential_cov, tu, Si_1, y) for i in x_pred]
y_pred, sigmas = np.transpose(predictions)
print "y_pred ="
print(y_pred )
print "sigmas ="
print(sigmas )
# GP FOR 2ND POINT
m, s = conditional([-1], x, y, tu)
y2 = np.sin(-1)+np.cos(np.sqrt(15)*(-1))
x.append(-1)
y=np.append(y,y2)
Si_2 = exponential_cov(x, x, tu)
predictions = [predict(i, x, exponential_cov, tu, Si_2, y) for i in x_pred]
y_pred, sigmas = np.transpose(predictions)
print "y_pred ="
print(y_pred )
print "sigmas ="
print(sigmas )
By using this code I get very nice fitting results for the function np.sin(x) + np.cos(np.sqrt(15) * x), but what I really want to do is to try the same Gaussian process for the function Z = np.sin(2*X) * np.cos(2*Y) / 2.
I know that the idea is basically the same, but I cannot adapt my python code to the [x,y] input to obtain z.
I will really appreciate your help, hints or links!
In the previous, the input of your function is 1-D, and then the new function is 2-D. So you have to change the covariance function, for example, use ard-based kernel, please refer to cook book for kernel. Also, you can do the isotropic kernel for 2-D, just make sure the suitable distance function (e.g. L2-norm) and the single lengthscale you choose.
Related
I try to fit a function to extract parameters from a binary 2d grating in python.
Here is my code, which runs but does not deliver a proper output:
import numpy as np
import pylab as plt
from scipy.optimize import curve_fit
def grid(X, Y, P, FS):
"""
function to calculate Z(X, Y) of a binary grating with
period P and feature size FS
input:
X, Y (np.array) from numpy meshgrid, the domain of the function
P(float, int): period of the grating
FS(float, int): size of the grating features
output:
Z(np.array): binary heightprofile of the grating conainting 0 and 1
same shape as X and Y
"""
Z = np.ones_like(X)
Z[X%P>FS] = 0
Z[Y%P>FS] = 0
return Z
# domain of the example
x = np.arange(0, 500)
y = np.arange(0, 500)
X, Y = np.meshgrid(x, y)
# plot of the example grating
Z = grid(X, Y, 93, 42)
plt.contourf(X, Y, Z)
plt.show()
None
# here starts the fit
# np.ravel is used in combination with scipy.optimize.curve_fit like in every example I found online
# goal: find the values of P and FS used to generate Z
xdata = np.vstack((X.ravel(), Y.ravel()))
ydata = Z.ravel()
def _grid(xdata, P, FS):
"""
helper function to call grid(X, Y, P, FS) with the flattend input used
for the curve_fit
returns the result of Z in same flatted manner
"""
# unpack x, y and generate the meshgrid
x, y = xdata
x = np.unique(x)
y = np.unique(y)
X, Y = np.meshgrid(x, y)
# call the original function and return the flattend result
res = grid(X, Y, P, FS)
return res.ravel()
# try to fit the parameters
popt, pcov = curve_fit(_grid, xdata, ydata, p0=[90, 40])
print (popt)
print (pcov)
Does someone else maybe spot the problem? Or is there a better way or programming languge to do this simple fit?
I am trying to create a Gaussian Fit by using scipy.opimize curve fit.
My y datas have a poisson error, so i need to integrate this uncertainties into my curve fit, but I don't know how.
At first a create a function fit_gauss which worked without the error in y. Now i try to modifize this code.
That's what i got:
x = x_data #datas are imported from file
y = y_data
y_un = unp.uarray(y, np.sqrt(y))
print("DATA - Gauss")
#Define Gauss function
def f_gauss(x,a,x0,sigma):
return a*exp(-(x-x0)**2/(2*sigma**2))
#Define Fitting function
def fit_gauss(x,y,title,path):
n=len(x)
mean=sum(x*y)/n
w=[0]*len(x)
for i in range(len(x)):
w[i]=y[i]*(x[i]-mean)**2
sigma = (sum(w) / sum(y))**(1 / 2)
#sigma = (sum(y * (x - mean)**2) / sum(y))**(1/2)
gopt, gcov = curve_fit(
f_gauss,
x, y,
p0=[max(y),mean,sigma]
) #trying to use curve fit
gerrors = np.sqrt(np.diag(gcov))
unparams_gauss = unp.uarray(gopt, gerrors)
print(f"""
{title}
Mean: {mean}
Sigma: {sigma}
a={unparams_gauss[0]}
x0={unparams_gauss[1]}
sigma={unparams_gauss[2]}
""")
#plotting
plt.title(title)
plt.plot(x, y, "k", label=f"{title}")
plt.plot(x, f_gauss(x, *gopt), "r--", label="Gauß Fit")
plt.legend(loc="best")
plt.savefig(path)
plt.close()
fit_gauss(x,y_un,"Cs-137","plots/gauss_fit.pdf")
I have some troubles with plotting a polynomial with confidence bounds. Here's my code:
import matplotlib.pyplot as plt
import numpy as np
X = np.array([-5.965215369881319, -40.41538208207736, -15.584956679988448, -6.073510488327594, -11.784890508714675, -7.754674907524617, -17.482364474520395, 2.4347468437246667, -16.133111795228572, -15.815302670890363, 5.9730059659614305, -19.249139823392717, 4.044936045002517, -7.102200416593474, 5.035187269390729, -23.543269648523623, -12.593308808761405, -21.08859785268947, -24.712871361819676, 2.4347468437246667, -21.028901691001877, -15.815302670890363, 7.208054914877421, -29.6589088548177])
Y = np.array([-2.6822693448184607, -23.168555618313547, -3.6166894384329806, -3.5137320916685866, -3.770179381108618, -12.788411352407874, -15.698803377485447, 1.9978332067376703, -11.838042662997829, -8.377671546754629, 8.109573809406804, -14.749849913813343, 2.8160696371542833, -3.3810722874645083, 5.560322978176329, -16.710386872172883, -6.795050134412731, -9.855604995547115, -25.386715163603533, 1.9978332067376703, -11.828949808296766, -8.402106796338003, 7.631911984593458, -18.155638519731614])
#
plt.plot(X, Y, '.')
poly_degree = 5
sd_cutoff = 1 # 2 keeps everything
coeffs = np.polyfit(X, Y, poly_degree)
poly_eqn = np.poly1d(coeffs)
Y_hat = poly_eqn(X)
delta = Y - Y_hat
sd_p = np.std(delta)
ok = abs(delta) < sd_p * sd_cutoff
plt.scatter(X, Y, color=np.where(ok, 'k', 'r'))
plt.fill_between(
X,
Y_hat - sd_p * sd_cutoff,
Y_hat + sd_p * sd_cutoff,
color='#00000020')
plt.plot(X, Y_hat)
Why my polynomial seems so strange?
https://imgur.com/a/hf4gY3P
Since the linked question does not provide a sorting solution, here the code that will sort X-Y pairs:
...
ind = np.argsort(X)
X = X[ind]
Y = Y[ind]
...
Output:
I am learning ML with python. I read the below code from that book.
x, y = np.array(x), np.array(y)
x = (x - x.mean()) / x.std()
x0 = np.linspace(-2, 4, 100)
def get_model(deg):
return lambda input_x=x0: np.polyval(np.polyfit(x, y, deg), input_x)
def get_cost(deg, input_x, input_y):
return 0.5 * ((get_model(deg)(input_x) - input_y) ** 2).sum()
I'm not sure why in the get_cost function, the author uses get_model(deg) to multiply input_x which is x. In my understanding, get_model(deg) function already return the predicted y based on x0.
When I tried to understand what's happening, I typed get_model(4), then it returned <function __main__.get_model.<locals>.<lambda>>. To my surprised, it haven't returned the predicted y based on x0 but a function?! I just totally messed up.
When I tried typing get_model(4)(x), It just return the predicted y based on x, I don't get it. Please someone could help me to figure out.
The method get_model(x) is, as you noticed, not return predictions, but a model for predicting.
If you execute get_model(1) the method will return you a linear model, which allows you to fit your values into a linear function:
import numpy as np
import matplotlib.pyplot as plt
fig = plt.gcf()
fig.set_size_inches(10, 5)
x = np.linspace(-2, 4, 200)
y = x**2
y += np.random.rand(len(x)) * 10
x0= x
def get_model(deg):
return lambda input_x=x0: np.polyval(np.polyfit(x, y, deg), input_x)
linear_model = get_model(1)
plt.scatter(x, y)
plt.scatter(x, linear_model(), c='red')
plt.show()
If you want to try another model, you can do this by changing the degree of the model:
plt.scatter(x, y)
plt.scatter(x, get_model(2)(), c='red')
plt.scatter(x, get_model(19)(), c='yellow')
plt.show()
I hope this helps you understand the code a bit better.
I am just wondering if there is a easy way to implement gaussian/lorentzian fits to 10 peaks and extract fwhm and also to determine the position of fwhm on the x-values. The complicated way is to separate the peaks and fit the data and extract fwhm.
Data is [https://drive.google.com/file/d/0B6sUnnbyNGuOT2RZb2UwYXU4dlE/view?usp=sharing].
Any advise greatly appreciated. Thanks.
from scipy.optimize import curve_fit
import numpy as np
import matplotlib.pyplot as plt
data = np.loadtxt('data.txt', delimiter=',')
x, y = data
plt.plot(x,y)
plt.show()
def func(x, *params):
y = np.zeros_like(x)
print len(params)
for i in range(0, len(params), 3):
ctr = params[i]
amp = params[i+1]
wid = params[i+2]
y = y + amp * np.exp( -((x - ctr)/wid)**2)
guess = [0, 60000, 80, 1000, 60000, 80]
for i in range(12):
guess += [60+80*i, 46000, 25]
popt, pcov = curve_fit(func, x, y, p0=guess)
print popt
fit = func(x, *popt)
plt.plot(x, y)
plt.plot(x, fit , 'r-')
plt.show()
Traceback (most recent call last):
File "C:\Users\test.py", line 33, in <module>
popt, pcov = curve_fit(func, x, y, p0=guess)
File "C:\Python27\lib\site-packages\scipy\optimize\minpack.py", line 533, in curve_fit
res = leastsq(func, p0, args=args, full_output=1, **kw)
File "C:\Python27\lib\site-packages\scipy\optimize\minpack.py", line 368, in leastsq
shape, dtype = _check_func('leastsq', 'func', func, x0, args, n)
File "C:\Python27\lib\site-packages\scipy\optimize\minpack.py", line 19, in _check_func
res = atleast_1d(thefunc(*((x0[:numinputs],) + args)))
File "C:\Python27\lib\site-packages\scipy\optimize\minpack.py", line 444, in _ general_function
return function(xdata, *params) - ydata
TypeError: unsupported operand type(s) for -: 'NoneType' and 'float'
This requires a non-linear fit. A good tool for this is scipy's curve_fit function.
To use curve_fit, we need a model function, call it func, that takes x and our (guessed) parameters as arguments and returns the corresponding values for y. As our model, we use a sum of gaussians:
from scipy.optimize import curve_fit
import numpy as np
def func(x, *params):
y = np.zeros_like(x)
for i in range(0, len(params), 3):
ctr = params[i]
amp = params[i+1]
wid = params[i+2]
y = y + amp * np.exp( -((x - ctr)/wid)**2)
return y
Now, let's create an initial guess for our parameters. This guess starts with peaks at x=0 and x=1,000 with amplitude 60,000 and e-folding widths of 80. Then, we add candidate peaks at x=60, 140, 220, ... with amplitude 46,000 and width of 25:
guess = [0, 60000, 80, 1000, 60000, 80]
for i in range(12):
guess += [60+80*i, 46000, 25]
Now, we are ready to perform the fit:
popt, pcov = curve_fit(func, x, y, p0=guess)
fit = func(x, *popt)
To see how well we did, let's plot the actual y values (solid black curve) and the fit (dashed red curve) against x:
As you can see, the fit is fairly good.
Complete working code
from scipy.optimize import curve_fit
import numpy as np
import matplotlib.pyplot as plt
data = np.loadtxt('data.txt', delimiter=',')
x, y = data
plt.plot(x,y)
plt.show()
def func(x, *params):
y = np.zeros_like(x)
for i in range(0, len(params), 3):
ctr = params[i]
amp = params[i+1]
wid = params[i+2]
y = y + amp * np.exp( -((x - ctr)/wid)**2)
return y
guess = [0, 60000, 80, 1000, 60000, 80]
for i in range(12):
guess += [60+80*i, 46000, 25]
popt, pcov = curve_fit(func, x, y, p0=guess)
print popt
fit = func(x, *popt)
plt.plot(x, y)
plt.plot(x, fit , 'r-')
plt.show()
#john1024's answer is good, but requires a manual process to generate the initial guess. here's an easy way to automate the starting guess. replace the relevant 3 lines of john1024's code by the following:
import scipy.signal
i_pk = scipy.signal.find_peaks_cwt(y, widths=range(3,len(x)//Npks))
DX = (np.max(x)-np.min(x))/float(Npks) # starting guess for component width
guess = np.ravel([[x[i], y[i], DX] for i in i_pk]) # starting guess for (x, amp, width) for each component
IMHO it is always advisable to plot the residual (data - model) in problems such as this. You will also want to the look at the ChiSq of the fit.