I am new to python and was trying to fit dataset distribution using the following code. The actual data is a list that contains two columns- predicted market price and actual market price. And I was trying to use scipy.curve_fit() but it gave me many lines plotted at the same place. Any help is appreciated.
# import the necessary modules and define a func.
from scipy.optimize import curve_fit
from matplotlib import pyplot as plt
def func(x, a, b, c):
return a * x** b + c
# my data
pred_data = [3.0,1.0,1.0,7.0,6.0,1.0,7.0,4.0,9.0,3.0,5.0,5.0,2.0,6.0,8.0]
actu_data =[ 3.84,1.55,1.15,7.56,6.64,1.09,7.12,4.17,9.45,3.12,5.37,5.65,1.92,6.27,7.63]
popt, pcov = curve_fit(func, pred_data, actu_data)
#adjusting y
yaj = func(pred_data, popt[0],popt[1], popt[2])
# plot the data
plt.plot(pred_data,actu_data, 'ro', label = 'Data')
plt.plot(pred_data,yaj,'b--', label = 'Best fit')
plt.legend()
plt.show()
Scipy doesn't produce multiple lines, the strange output is caused by the way you present your unsorted data to matplotlib. Sort your x-values and you get the desired output:
from scipy.optimize import curve_fit
from matplotlib import pyplot as plt
def func(x, a, b, c):
return a * x** b + c
# my data
pred_data = [3.0,1.0,1.0,7.0,6.0,1.0,7.0,4.0,9.0,3.0,5.0,5.0,2.0,6.0,8.0]
actu_data =[ 3.84,1.55,1.15,7.56,6.64,1.09,7.12,4.17,9.45,3.12,5.37,5.65,1.92,6.27,7.63]
popt, pcov = curve_fit(func, pred_data, actu_data)
#adjusting y
yaj = func(sorted(pred_data), *popt)
# plot the data
plt.plot(pred_data,actu_data, 'ro', label = 'Data')
plt.plot(sorted(pred_data),yaj,'b--', label = 'Best fit')
plt.legend()
plt.show()
A better way is of course to define an evenly-spaced high resolution array for your x-values and calculate the fit for this array to have a smoother representation of your fit function:
from scipy.optimize import curve_fit
import numpy as np
from matplotlib import pyplot as plt
def func(x, a, b, c):
return a * x** b + c
# my data
pred_data = [3.0,1.0,1.0,7.0,6.0,1.0,7.0,4.0,9.0,3.0,5.0,5.0,2.0,6.0,8.0]
actu_data =[ 3.84,1.55,1.15,7.56,6.64,1.09,7.12,4.17,9.45,3.12,5.37,5.65,1.92,6.27,7.63]
popt, pcov = curve_fit(func, pred_data, actu_data)
xaj = np.linspace(min(pred_data), max(pred_data), 1000)
yaj = func(xaj, *popt)
# plot the data
plt.plot(pred_data,actu_data, 'ro', label = 'Data')
plt.plot(xaj, yaj,'b--', label = 'Best fit')
plt.legend()
plt.show()
Related
I am trying to fit an exponential function of the form y = e^(-b/x)in python. The data I'm using looks like this
But when I try to plot I'm getting this error
"RuntimeWarning: overflow encountered in exp
if name == 'main':"
Here is my code
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as mpl
""" Fitting Function"""
def func(x, a, b, c):
return a *np.exp(-1*b/x)
data = np.loadtxt("S005_CP_0011_N20.dat", skiprows=1)
xData, yData = np.hsplit(data,2)
x = xData[:,0]
y = yData[:,0]
popt, pcov = curve_fit(func, x, y)
mpl.plot(x, func(x, *popt), label="Fitted Curve")
mpl.legend(loc='upper left')
mpl.show()
Could someone please help to rectify the code?
So I'm trying to get an exponential curve for some COVID data, but I can't seem to get my curve_fit function to show any sort of curve whatsoever. It's so bad it perfectly overlaps the regression line seaborn generated in my graph.
I've tried making both my date and case data smaller/bigger before throwing it into the curve_fit function, but I still either get a similar line and/or an Optimization error. I even tried calculating my function manually but that was (naturally) also way off.
#Plot scatter plot for total case count
x = df_sb['date_ordinal']
y1 = df_sb['totalcountconfirmed']
y2 = df_sb['totalcountdeaths']
plt.figure(figsize=(14,10))
ax = plt.subplot(1,1,1)
# Plot scatter plot along with linear regression line
sns.regplot(x='date_ordinal', y='totalcountconfirmed', data=df_sb)
# Formatting axes
ax.set_xlim(x.min() - 1, x.max() + 10)
ax.set_ylim(0, y1.max() + 1)
ax.set_xlabel('Date')
labels = [dt.date.fromordinal(int(item)) for item in ax.get_xticks()]
ax.set_xticklabels(labels)
plt.xticks(rotation = 45)
plt.ylabel("Total Confirmed Cases")
# Exponential Curve
from scipy.optimize import curve_fit
from scipy.special import expit
x_data = df_sb['date_ordinal'].to_numpy()
Y_data = df_sb['totalcountconfirmed'].to_numpy()
def func(x, a, b, c):
return a * expit(-b * x) + c
popt, pcov = curve_fit(func, x_data, Y_data, maxfev=10000)
a, b, c = popt
fit_y = func(x_data, a, b, c)
plt.plot(x_data, fit_y)
plt.legend(['Total Cases (Linear)','Total Cases (Exponential)'])
# Inserting Significant Date Labels
add_sig_dates(df_sb, 'totalcountconfirmed')
plt.show()
Despite you did not give any access to the data, just by looking at the plot I'm pretty sure you mean
def func(x, a, b, c):
return a * np.exp(-b * x) + c
instead of
def func(x, a, b, c):
return a * expit(-b * x) + c
Since it's an exponential fit, I think you should provide initial guess for parameters in order to achieve good results. This can be done with the p0 argument.
For example:
p0 = [2 ,1, 0] # < -- just an example, they are bad guesses
popt, pcov = curve_fit(func, x_data, Y_data, maxfev=10000, p0=p0)
I have a function: f(theta) = a+b*cos(theta - c) as well as sampled data. I'd like to find the coefficients a, b, and c that minimize mean square error. Any idea if there's an efficient way to do this in python?
EDIT:
import numpy as np
from scipy.optimize import curve_fit
#definition of the function
def myfunc(x, a, b, c):
return a + b * np.cos(x - c)
#sample data
x_data = [0, 60, 120, 180, 240, 300]
y_data = [25, 40, 70, 30, 10, 15]
#the actual curve fitting procedure, a, b, c are stored in popt
popt, _pcov = curve_fit(myfunc, x_data, y_data)
print(popt)
print(np.degrees(popt[2]))
#the rest is just a graphic representation of the data points and the fitted curve
from matplotlib import pyplot as plt
#x_fit = np.linspace(-1, 6, 1000)
y_fit = myfunc(x_data, *popt)
plt.plot(x_data, y_data, "ro")
plt.plot(x_data, y_fit, "b")
plt.xlabel(r'$\theta$ (degrees)');
plt.ylabel(r'$f(\theta)$');
plt.legend()
plt.show()
Here is a picture showing how the curve doesn't really fit the points. It seems like the amplitude should be higher. The local mins and maxes appear to be in the right places.
scipy.optimize.curve_fit makes it really easy to fit data points to your custom function:
import numpy as np
from scipy.optimize import curve_fit
#definition of the function
def myfunc(x, a, b, c):
return a + b * np.cos(x - c)
#sample data
x_data = np.arange(5)
y_data = 2.34 + 1.23 * np.cos(x_data + .23)
#the actual curve fitting procedure, a, b, c are stored in popt
popt, _pcov = curve_fit(myfunc, x_data, y_data)
print(popt)
#the rest is just a graphic representation of the data points and the fitted curve
from matplotlib import pyplot as plt
x_fit = np.linspace(-1, 6, 1000)
y_fit = myfunc(x_fit, *popt)
plt.plot(x_data, y_data, "ro", label = "data points")
plt.plot(x_fit, y_fit, "b", label = "fitted curve\na = {}\nb = {}\nc = {}".format(*popt))
plt.legend()
plt.show()
Output:
[ 2.34 1.23 -0.23]
Edit:
Your question update introduces several problems. First, your x-values are in degree, while np.cos expects values in radians. Therefore, we better convert the values with np.deg2rad. The reverse function would be np.rad2deg.
Second, it is a good idea to fit for different frequencies as well, let's introduce an additional parameter for that.
Third, fits are usually quite sensitive to initial guesses. You can provide a parameter p0 in scipy for that.
Fourth, you changed the resolution of the fitted curve to the low resolution of your data points, hence it looks so undersampled. If we address all these problems:
import numpy as np
from scipy.optimize import curve_fit
#sample data
x_data = [0, 60, 120, 180, 240, 300]
y_data = [25, 40, 70, 30, 10, 15]
#definition of the function with additional frequency value d
def myfunc(x, a, b, c, d):
return a + b * np.cos(d * np.deg2rad(x) - c)
#initial guess of parameters a, b, c, d
p_initial = [np.average(y_data), np.average(y_data), 0, 1]
#the actual curve fitting procedure, a, b, c, d are stored in popt
popt, _pcov = curve_fit(myfunc, x_data, y_data, p0 = p_initial)
print(popt)
#we have to convert the phase shift back into degrees
print(np.rad2deg(popt[2]))
#graphic representation of the data points and the fitted curve
from matplotlib import pyplot as plt
#define x_values for a smooth curve representation
x_fit = np.linspace(np.min(x_data), np.max(x_data), 1000)
y_fit = myfunc(x_fit, *popt)
plt.plot(x_data, y_data, "ro", label = "data")
plt.plot(x_fit, y_fit, "b", label = "fit")
plt.xlabel(r'$\theta$ (degrees)');
plt.ylabel(r'$f(\theta)$');
plt.legend()
plt.show()
we get this output:
[34.31293761 26.92479369 2.20852009 1.18144319]
126.53888003953764
I got a question that I fight around for days with now.
How do I calculate the (95%) confidence band of a fit?
Fitting curves to data is the every day job of every physicist -- so I think this should be implemented somewhere -- but I can't find an implementation for this neither do I know how to do this mathematically.
The only thing I found is seaborn that does a nice job for linear least-square.
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
x = np.linspace(0,10)
y = 3*np.random.randn(50) + x
data = {'x':x, 'y':y}
frame = pd.DataFrame(data, columns=['x', 'y'])
sns.lmplot('x', 'y', frame, ci=95)
plt.savefig("confidence_band.pdf")
But this is just linear least-square. When I want to fit e.g. a saturation curve like , I'm screwed.
Sure, I can calculate the t-distribution from the std-error of a least-square method like scipy.optimize.curve_fit but that is not what I'm searching for.
Thanks for any help!!
You can achieve this easily using StatsModels module.
Also see this example and this answer.
Here is an answer for your question:
import numpy as np
from matplotlib import pyplot as plt
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import summary_table
x = np.linspace(0,10)
y = 3*np.random.randn(50) + x
X = sm.add_constant(x)
res = sm.OLS(y, X).fit()
st, data, ss2 = summary_table(res, alpha=0.05)
fittedvalues = data[:,2]
predict_mean_se = data[:,3]
predict_mean_ci_low, predict_mean_ci_upp = data[:,4:6].T
predict_ci_low, predict_ci_upp = data[:,6:8].T
fig, ax = plt.subplots(figsize=(8,6))
ax.plot(x, y, 'o', label="data")
ax.plot(X, fittedvalues, 'r-', label='OLS')
ax.plot(X, predict_ci_low, 'b--')
ax.plot(X, predict_ci_upp, 'b--')
ax.plot(X, predict_mean_ci_low, 'g--')
ax.plot(X, predict_mean_ci_upp, 'g--')
ax.legend(loc='best');
plt.show()
kmpfit's confidence_band() calculates the confidence band for non-linear least squares. Here for your saturation curve:
from pylab import *
from kapteyn import kmpfit
def model(p, x):
a, b = p
return a*(1-np.exp(b*x))
x = np.linspace(0, 10, 100)
y = .1*np.random.randn(x.size) + model([1, -.4], x)
fit = kmpfit.simplefit(model, [.1, -.1], x, y)
a, b = fit.params
dfdp = [1-np.exp(b*x), -a*x*np.exp(b*x)]
yhat, upper, lower = fit.confidence_band(x, dfdp, 0.95, model)
scatter(x, y, marker='.', color='#0000ba')
for i, l in enumerate((upper, lower, yhat)):
plot(x, l, c='g' if i == 2 else 'r', lw=2)
savefig('kmpfit confidence bands.png', bbox_inches='tight')
The dfdp are the partial derivatives ∂f/∂p of the model f = a*(1-e^(b*x)) with respect to each parameter p (i.e., a and b), see my answer to a similar question for background links. And here the output:
Hi I'm attempting to produce a fit for each of my three exponential decays. I am not successful with producing a satisfactory fit. This is what I get: http://i.imgur.com/Nx44wsS.jpg
Any help is greatly appreciated. My code is below.
import pylab as plb
import matplotlib.pyplot as plt
import matplotlib.axes as ax
import scipy as sp
from scipy.optimize import curve_fit
from matplotlib import rc
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
## for Palatino and other serif fonts use:
#rc('font',**{'family':'serif','serif':['Palatino']})
rc('text', usetex=True)
data = plb.loadtxt('data.csv',skiprows=2)
yp = data[:,4]
yr = data[:,5]
yl = data[:,6]
x = data[:,0]
def func(x,a,b,c):
return a*np.exp(-b*x) + c
popt, pcov = curve_fit(func, x, yl,maxfev=20000)
a = popt[0]
b = popt[1]
c = popt[2]
print a
print b
print c
print func(x,a,b,c)
xf = np.linspace(0,70,100)
yf = a*np.exp(-b*x) + c
plt.clf()
plt.plot(x,yf,'r-', label="Fitted Curve")
plt.plot(x,func(x,*popt))
plt.plot(x,yp,'bo',label='Polished')
plt.plot(x,yr,'ro',label='Rough')
plt.plot(x,yl,'go',label='Lacquered')
plt.legend()
plt.ylabel("Temperature (K)")
plt.xlabel("Time (min)")
plt.show()
Nonlinear fits are difficult and the trick is that you have to provide a reasonable initial guess.
Here is a version of your code which does two fits, one with an approximate initial guess and one with the default initial guess:
import pylab as plb
import matplotlib.pyplot as plt
import matplotlib.axes as ax
import scipy as sp
from scipy.optimize import curve_fit
from matplotlib import rc
import numpy as np
rc('font', **{'family':'sans-serif', 'sans-serif':['Helvetica']})
rc('text', usetex=True)
# Fake data
x = np.arange(0, 70., 2.)
yl = 300 + 63*np.exp(-x/35.)
def func(x, a, b, c):
return a*np.exp(-b*x) + c
popt, pcov = curve_fit(func, x, yl, p0=(40, 0.012, 250), maxfev=20000)
a, b, c = popt
print 'a=', a, 'b=', b, 'c=', c
print 'func=', func(x, a, b, c)
popt2, pcov2 = curve_fit(func, x, yl, p0=None, maxfev=20000)
a2, b2, c2 = popt2
print 'a2=', a2, 'b2=', b2, 'c2=', c2
print 'func=', func(x, a2, b2, c2)
xf = np.linspace(0, 70, 100)
yf = a*np.exp(-b*x) + c
plt.clf()
plt.plot(x, yf, 'r-', label="Fitted Curve")
plt.plot(x, func(x, *popt))
plt.plot(x, func(x, *popt2), 'b-', label='Fit w/o guess')
plt.plot(x, yl, 'go', label='Lacquered')
plt.legend()
plt.ylabel("Temperature (K)")
plt.xlabel("Time (min)")
plt.show()
And here are the resulting fits:
As you can see, the fit with a reasonable initial guess does very well (red line). If you don't provide an initial guess, scipy assumes 1 for all parameters and that works poorly (blue line).