I am trying to fit some data with Gaussian fit. This data from lateral flow image. The fitted line (red) does not cover data. Please check my code. In the code x is just index. y actually real data.
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
y = np.array([2.22097081, 2.24776432, 2.35519896, 2.43780396, 2.49708355,
2.54224971, 2.58350984, 2.62965057, 2.68644093, 2.75454015,
2.82912617, 2.90423835, 2.97921199, 3.05864617, 3.14649922,
3.2430853 , 3.3471892 , 3.45919857, 3.58109399, 3.71275641,
3.84604379, 3.94884214, 3.94108998, 3.72148453, 3.28407665,
2.7651018 ])
x = np.linspace(1,np.mean(y),len(y))
n = len(x)
mean = sum(x*y)/n
sigma = np.sqrt(sum(y*(x-mean)**2)/n)
def gaus(x,a,x0,sigma):
return a*np.exp(-(x-x0)**2/(2*sigma**2))/(sigma*np.sqrt(2*np.pi))
popt,pcov = curve_fit(gaus,x,y,p0=[1,mean,sigma])
plt.figure()
plt.plot(x,y,'b+:',label='data')
plt.plot(x,gaus(x,*popt),'ro:',label='fit')
plt.legend()
plt.xlabel('Index')
plt.ylabel('Row Mean')
Related
I am trying to fit a curve for a set of points using numpy and scipy libraries but am getting a closed curve as shown below.
Could anyone let me know how to fit a curve without closing curve?
The code I followed is:
import numpy as np
from scipy.interpolate import splprep, splev
import matplotlib.pyplot as plt
coords = np.array([(3,8),(3,9),(4,10),(5,11),(6,11), (7,13), (9,13),(10,14),(11,14),(12,14),(14,16),(16,17),(17,18),(18,18),(19,18), (20,19),
(21,19),(22,20),(23,20),(24,21),(26,21),(27,21),(28,21),(30,21),(32,20),(33,20),(32,17),(33,16),(33,15),(34,12), (34,10),(33,10),
(33,9),(33,8),(33,6),(34,6),(34,5)])
tck, u = splprep(coords.T, u=None, s=0.0, per=1)
u_new = np.linspace(u.min(), u.max(), 1000)
x_new, y_new = splev(u_new, tck, der=0)
plt.plot(coords[:,1], coords[:,0], 'ro')
plt.plot(y_new, x_new, 'b--')
plt.show()
Output:
I need output without joining the 1st and last point.
Thank you.
Just set per parameter to 0 in scipy.interpolate.splprep:
tck, u = splprep(coords.T, u=None, s=0.0, per=0)
I have an experimental data with relative uncertainties. And I would applied a fourth degree polynomial considering a specified model with logarithms. The issue im facing is how can I get an uncertainty on the fit considering the experimental data. I have no idea how can I do this. Any suggestion would be appreciated.
Thank you.
import numpy as np
import matplotlib.pyplot as plt
x = [121.78, 244.69, 344.278, 411.1165, 778.904, 867.38, 964.079, 1112.076, 1212.948, 1299.142, 1408.013]
y = [2.98E-03, 2.18E-03, 1.64E-03, 1.79E-03, 8.09E-04, 7.54E-04, 6.80E-04, 6.11E-04, 5.83E-04, 5.49E-04, 5.01E-04]
error_abs = [4.72E-05, 3.64E-05, 2.63E-05, 3.38E-05, 1.38E-05, 1.50E-05, 1.15E-05, 1.06E-05, 1.66E-05, 1.51E-05, 8.41E-06] # y_err
# x and y in logarithms
t1 = np.log(x)
t2 = np.log(y)
l = np.linspace(np.min(t1),np.max(t1),1000)
p,cov = np.polyfit(t1,t2,4, cov=True)
fit = np.polyval(p,l)
plt.errorbar(x, y, yerr=error_abs, lw=0.5, capsize=2, capthick=0.5, linestyle='none', marker='s',markersize=3, label='efficiency', color='orange')
plt.plot(np.exp(l),np.exp(fit), label ='Fit')
plt.show()
print(np.sqrt(np.diag(cov)))
I have some data I'm trying to model with lmfit's Model.
Specifically, I'm measuring superconducting resistors. I'm trying fit the experimental data (resistance vs. temperature) to a model which incorporates the critical temperature Tc (material dependent), the resistance below Tc (nominally 0), and the resistance above Tc (structure dependent).
Here's a simplified version (with simulated data) of the code I'm using to plot my data, along with the output plot.
I'm not getting any errors but, as you can see, I'm also not getting a fit that matches my data.
What am I doing wrong? This is my first time using lmfit and Model, so I may be making a newbie mistake. I thought I was following the lmfit example but, as I said, I'm obviously doing something wrong.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from lmfit import Model
def main():
x = np.linspace(0, 12, 50)
x_ser = pd.Series(x) # Simulated temperature data
y1 = [0] * 20
y2 = [10] * 30
y1_ser = pd.Series(y1) # Simulated resistance data below Tc
y2_ser = pd.Series(y2) # Simulated resistance data above Tc (
y_ser = y1_ser.append(y2_ser, ignore_index=True)
xcrit_model = Model(data_equation)
params = xcrit_model.make_params(y1_guess=0, y2_guess=12, xcrit_guess=9)
print('params: {}'.format(params))
result = xcrit_model.fit(y_ser, params, x=x_ser)
print(result.fit_report())
plt.plot(x_ser, y_ser, 'bo', label='simulated data')
plt.plot(x_ser, result.init_fit, 'k.', label='initial fit')
plt.plot(x_ser, result.best_fit, 'r:', label='best fit')
plt.legend()
plt.show()
def data_equation(x, y1_guess, y2_guess, xcrit_guess):
x_lt_xcrit = x[x < xcrit_guess]
x_ge_xcrit = x[x >= xcrit_guess]
y1 = [y1_guess] * x_lt_xcrit.size
y1_ser = pd.Series(data=y1)
y2 = [y2_guess] * x_ge_xcrit.size
y2_ser = pd.Series(data=y2)
y = y1_ser.append(y2_ser, ignore_index=True)
return y
if __name__ == '__main__':
main()
lmfit (and basically all similar solvers) work with continuous variables and investigate how they alter the result by making tiny changes in the parameter values and seeing how that effects this fit.
But your xcrit_guess parameter is used only as a discrete variable. If its value changes from 9.0000 to 9.00001, the fit will not change at all.
So, basically, don't do:
x_lt_xcrit = x[x < xcrit_guess]
x_ge_xcrit = x[x >= xcrit_guess]
Instead, you should use a smoother sigmoidal step function. In fact, lmfit has one of these built-in. So you might try something like this (note, there is no point in converting numpy.arrays to pandas.Series - the code will just turn these back to numpy arrays anyway):
import numpy as np
from lmfit.models import StepModel
import matplotlib.pyplot as plt
x = np.linspace(0, 12, 50)
y = 9.5*np.ones(len(x))
y[:26] = 0.0
y = y + np.random.normal(size=len(y), scale=0.0002)
xcrit_model = StepModel(form='erf')
params = xcrit_model.make_params(amplitude=4, center=5, sigma=1)
result = xcrit_model.fit(y, params, x=x)
print(result.fit_report())
plt.plot(x, y, 'bo', label='simulated data')
plt.plot(x, result.init_fit, 'k', label='initial fit')
plt.plot(x, result.best_fit, 'r:', label='best fit')
plt.legend()
plt.show()
I was wondering if there's a way to find tangents to curve from discrete data.
For example:
x = np.linespace(-100,100,100001)
y = sin(x)
so here x values are integers, but what if we want to find tangent at something like x = 67.875?
I've been trying to figure out if numpy.interp would work, but so far no luck.
I also found a couple of similar examples, such as this one, but haven't been able to apply the techniques to my case :(
I'm new to Python and don't entirely know how everything works yet, so any help would be appreciated...
this is what I get:
from scipy import interpolate
import matplotlib.pyplot as plt
import numpy as np
x = np.linspace(-100,100,10000)
y = np.sin(x)
tck, u = interpolate.splprep([y])
ti = np.linspace(-100,100,10000)
dydx = interpolate.splev(ti,tck,der=1)
plt.plot(x,y)
plt.plot(ti,dydx[0])
plt.show()
There is a comment in this answer, which tells you that there is a difference between splrep and splprep. For the 1D case you have here, splrep is completely sufficient.
You may also want to limit your curve a but to be able to see the oscilations.
from scipy import interpolate
import matplotlib.pyplot as plt
import numpy as np
x = np.linspace(-15,15,1000)
y = np.sin(x)
tck = interpolate.splrep(x,y)
dydx = interpolate.splev(x,tck,der=1)
plt.plot(x,y)
plt.plot(x,dydx, label="derivative")
plt.legend()
plt.show()
While this is how the code above would be made runnable, it does not provide a tangent. For the tangent you only need the derivative at a single point. However you need to have the equation of a tangent somewhere and actually use it; so this is more a math question.
from scipy import interpolate
import matplotlib.pyplot as plt
import numpy as np
x = np.linspace(-15,15,1000)
y = np.sin(x)
tck = interpolate.splrep(x,y)
x0 = 7.3
y0 = interpolate.splev(x0,tck)
dydx = interpolate.splev(x0,tck,der=1)
tngnt = lambda x: dydx*x + (y0-dydx*x0)
plt.plot(x,y)
plt.plot(x0,y0, "or")
plt.plot(x,tngnt(x), label="tangent")
plt.legend()
plt.show()
It should be noted that you do not need to use splines at all if the points you have are dense enough. In that case obtaining the derivative is just taking the differences between the nearest points.
from scipy import interpolate
import matplotlib.pyplot as plt
import numpy as np
x = np.linspace(-15,15,1000)
y = np.sin(x)
x0 = 7.3
i0 = np.argmin(np.abs(x-x0))
x1 = x[i0:i0+2]
y1 = y[i0:i0+2]
dydx, = np.diff(y1)/np.diff(x1)
tngnt = lambda x: dydx*x + (y1[0]-dydx*x1[0])
plt.plot(x,y)
plt.plot(x1[0],y1[0], "or")
plt.plot(x,tngnt(x), label="tangent")
plt.legend()
plt.show()
The result will be visually identical to the one above.
I am trying to estimate the probability density function of my data. IN my case, the data is a satellite image with a shape 8200 x 8100.
Below, I present you the code of PDF (the function 'is_outlier' is borrowed by a guy that post this code on here ). As we can see, the PDF is in figure 1 too dense. I guess, this is due to the thousands of pixels that the satellite image is composed of. This is very ugly.
My question is, how can I plot a PDF that is not too dense? something like shown in figure 2 for example.
lst = 'satellite_img.tif' #import the image
lst_flat = lst.flatten() #create 1D array
#the function below removes the outliers
def is_outlier(points, thres=3.5):
if len(points.shape) == 1:
points = points[:,None]
median = np.median(points, axis=0)
diff = np.sum((points - median)**2, axis=-1)
diff = np.sqrt(diff)
med_abs_deviation = np.median(diff)
modified_z_score = 0.6745 * diff / med_abs_deviation
return modified_z_score > thres
lst_flat = np.r_[lst_flat]
lst_flat_filtered = lst_flat[~is_outlier(lst_flat)]
fit = stats.norm.pdf(lst_flat_filtered, np.mean(lst_flat_filtered), np.std(lst_flat_filtered))
plt.plot(lst_flat_filtered, fit)
plt.hist(lst_flat_filtered, bins=30, normed=True)
plt.show()
figure 1
figure 2
The issue is that the x values in the PDF plot are not sorted, so the plotted line is going back and forwards between random points, creating the mess you see.
Two options:
Don't plot the line, just plot points (not great if you have lots of points, but will confirm if what I said above is right or not):
plt.plot(lst_flat_filtered, fit, 'bo')
Sort the lst_flat_filtered array before calculating the PDF and plotting it:
lst_flat = np.r_[lst_flat]
lst_flat_filtered = np.sort(lst_flat[~is_outlier(lst_flat)]) # Changed this line
fit = stats.norm.pdf(lst_flat_filtered, np.mean(lst_flat_filtered), np.std(lst_flat_filtered))
plt.plot(lst_flat_filtered, fit)
Here's some minimal examples showing these behaviours:
Reproducing your problem:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
lst_flat_filtered = np.random.normal(7, 5, 1000)
fit = stats.norm.pdf(lst_flat_filtered, np.mean(lst_flat_filtered), np.std(lst_flat_filtered))
plt.hist(lst_flat_filtered, bins=30, normed=True)
plt.plot(lst_flat_filtered, fit)
plt.show()
Plotting points
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
lst_flat_filtered = np.random.normal(7, 5, 1000)
fit = stats.norm.pdf(lst_flat_filtered, np.mean(lst_flat_filtered), np.std(lst_flat_filtered))
plt.hist(lst_flat_filtered, bins=30, normed=True)
plt.plot(lst_flat_filtered, fit, 'bo')
plt.show()
Sorting the data
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
lst_flat_filtered = np.sort(np.random.normal(7, 5, 1000))
fit = stats.norm.pdf(lst_flat_filtered, np.mean(lst_flat_filtered), np.std(lst_flat_filtered))
plt.hist(lst_flat_filtered, bins=30, normed=True)
plt.plot(lst_flat_filtered, fit)
plt.show()