Related
My specific issue is that I cannot seem to get my data to converted to floating points. I have data and simply want to fit a robust curve using my model equation:
y = a * e^(-b*z)
This cookbook is my reference: click
Below is my attempt. I am getting this:
TypeError: 'data type not understood'
which I believe is because my columns are strings, so I tried pd.Series.astype().
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import least_squares
for i in range(1):
def model(z, a, b):
y = a * np.exp(-b * z)
return y
data = pd.read_excel('{}.xlsx'.format(600+i), names = ['EdGnd','380','395','412','443','465','490','510','520','532','555','560','565','589','625','665','670','683','694','710','Temp','z','EdZTemp','Tilt','Roll','EdZVin'])
data.dropna(axis = 0, how = 'any')
data.astype('float')
np.dtype(data)
data.plot.scatter('z','380')
def fun(x, z, y):
return x[0] * np.exp(-x[1] * z) - y
x0 = np.ones(3)
rbst1 = least_squares(fun, x0, loss='soft_l1', f_scale=0.1, args=('z', 'ed380'))
y_robust = model('z', *rbst1.x)
plt.plot('z', y_robust, label='robust lsq')
plt.xlabel('$z$')
plt.ylabel('$Ed$')
plt.legend();
I think the problem is that you pass 'z' in args which is a string and can therefore not be used in the multiplication.
Below is some code using curve_fit which uses least_squares but might be slightly easier to use:
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit
# your model definition
def model(z, a, b):
return a * np.exp(-b * z)
# your input data
x = np.array([20, 30, 40, 50, 60])
y = np.array([5.4, 4.0, 3.0, 2.2, 1.6])
# do the fit with some initial values
popt, pcov = curve_fit(model, x, y, p0=(5, 0.1))
# prepare some data for a plot
xx = np.linspace(20, 60, 1000)
yy = model(xx, *popt)
plt.plot(x, y, 'o', xx, yy)
plt.title('Exponential Fit')
plt.show()
This will plot
You could try to adapt this code for your needs.
If you want to use f_scale you can use:
popt, pcov = curve_fit(model, x, y, p0=(5, 0.1), method='trf', f_scale=0.1)
See the documentation:
kwargs
Keyword arguments passed to leastsq for method='lm' or least_squares otherwise.
If you have an unbound problem, by default method='lm' is used which uses leastsq which does not accept f_scale as a keyword. Therefore, we can use method='trf' which then uses least_squares which accepts f_scale.
I am new to python and was trying to fit dataset distribution using the following code. The actual data is a list that contains two columns- predicted market price and actual market price. And I was trying to use scipy.curve_fit() but it gave me many lines plotted at the same place. Any help is appreciated.
# import the necessary modules and define a func.
from scipy.optimize import curve_fit
from matplotlib import pyplot as plt
def func(x, a, b, c):
return a * x** b + c
# my data
pred_data = [3.0,1.0,1.0,7.0,6.0,1.0,7.0,4.0,9.0,3.0,5.0,5.0,2.0,6.0,8.0]
actu_data =[ 3.84,1.55,1.15,7.56,6.64,1.09,7.12,4.17,9.45,3.12,5.37,5.65,1.92,6.27,7.63]
popt, pcov = curve_fit(func, pred_data, actu_data)
#adjusting y
yaj = func(pred_data, popt[0],popt[1], popt[2])
# plot the data
plt.plot(pred_data,actu_data, 'ro', label = 'Data')
plt.plot(pred_data,yaj,'b--', label = 'Best fit')
plt.legend()
plt.show()
Scipy doesn't produce multiple lines, the strange output is caused by the way you present your unsorted data to matplotlib. Sort your x-values and you get the desired output:
from scipy.optimize import curve_fit
from matplotlib import pyplot as plt
def func(x, a, b, c):
return a * x** b + c
# my data
pred_data = [3.0,1.0,1.0,7.0,6.0,1.0,7.0,4.0,9.0,3.0,5.0,5.0,2.0,6.0,8.0]
actu_data =[ 3.84,1.55,1.15,7.56,6.64,1.09,7.12,4.17,9.45,3.12,5.37,5.65,1.92,6.27,7.63]
popt, pcov = curve_fit(func, pred_data, actu_data)
#adjusting y
yaj = func(sorted(pred_data), *popt)
# plot the data
plt.plot(pred_data,actu_data, 'ro', label = 'Data')
plt.plot(sorted(pred_data),yaj,'b--', label = 'Best fit')
plt.legend()
plt.show()
A better way is of course to define an evenly-spaced high resolution array for your x-values and calculate the fit for this array to have a smoother representation of your fit function:
from scipy.optimize import curve_fit
import numpy as np
from matplotlib import pyplot as plt
def func(x, a, b, c):
return a * x** b + c
# my data
pred_data = [3.0,1.0,1.0,7.0,6.0,1.0,7.0,4.0,9.0,3.0,5.0,5.0,2.0,6.0,8.0]
actu_data =[ 3.84,1.55,1.15,7.56,6.64,1.09,7.12,4.17,9.45,3.12,5.37,5.65,1.92,6.27,7.63]
popt, pcov = curve_fit(func, pred_data, actu_data)
xaj = np.linspace(min(pred_data), max(pred_data), 1000)
yaj = func(xaj, *popt)
# plot the data
plt.plot(pred_data,actu_data, 'ro', label = 'Data')
plt.plot(xaj, yaj,'b--', label = 'Best fit')
plt.legend()
plt.show()
When attempting to plot an exponential curve to a set of data:
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import style
from matplotlib import pylab
import numpy as np
from scipy.optimize import curve_fit
x = np.array([30,40,50,60])
y = np.array([0.027679854,0.055639098,0.114814815,0.240740741])
def exponenial_func(x, a, b, c):
return a*np.exp(-b*x)+c
popt, pcov = curve_fit(exponenial_func, x, y, p0=(1, 1e-6, 1))
xx = np.linspace(10,60,1000)
yy = exponenial_func(xx, *popt)
plt.plot(x,y,'o', xx, yy)
pylab.title('Exponential Fit')
ax = plt.gca()
fig = plt.gcf()
plt.xlabel(r'Temperature, C')
plt.ylabel(r'1/Time, $s^-$$^1$')
plt.show()
Graph for the above code:
However when I add the data point 20 (x) and 0.015162344 (y):
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import style
from matplotlib import pylab
import numpy as np
from scipy.optimize import curve_fit
x = np.array([20,30,40,50,60])
y = np.array([0.015162344,0.027679854,0.055639098,0.114814815,0.240740741])
def exponenial_func(x, a, b, c):
return a*np.exp(-b*x)+c
popt, pcov = curve_fit(exponenial_func, x, y, p0=(1, 1e-6, 1))
xx = np.linspace(20,60,1000)
yy = exponenial_func(xx, *popt)
plt.plot(x,y,'o', xx, yy)
pylab.title('Exponential Fit')
ax = plt.gca()
fig = plt.gcf()
plt.xlabel(r'Temperature, C')
plt.ylabel(r'1/Time, $s^-$$^1$')
plt.show()
The above code generates the error
'RuntimeError: Optimal parameters not found: Number of calls to
function has reached maxfev = 800.'
If maxfev is set to maxfev = 1300
popt, pcov = curve_fit(exponenial_func, x, y, p0=(1, 1e-6, 1),maxfev=1300)
The graph is plotted but does not fit the curve correctly. Graph from above code change, maxfev = 1300:
I think this is because points 20 and 30 a too close to each other? For comparison, excel plots the data like this:
How can I plot this curve correctly?
From your data it is obvious that you need a positive exponent, therefore, b needs to be negative as you use a*np.exp(-b*x) + c as the underlying model. However, you start with a positive initial value for b which most likely causes the issues.
If you change
popt, pcov = curve_fit(exponenial_func, x, y, p0=(1, 1e-6, 1))
to
popt, pcov = curve_fit(exponenial_func, x, y, p0=(1, -1e-6, 1))
it works fine and gives the expected outcome.
Alternatively, you could also change your equation to
return a*np.exp(b*x) + c
and start with the same initial values as you had.
Here is the entire code:
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit
def exponenial_func(x, a, b, c):
return a*np.exp(b*x)+c
x = np.array([20, 30, 40, 50, 60])
y = np.array([0.015162344, 0.027679854, 0.055639098, 0.114814815, 0.240740741])
popt, pcov = curve_fit(exponenial_func, x, y, p0=(1, 1e-6, 1))
xx = np.linspace(20, 60, 1000)
yy = exponenial_func(xx, *popt)
# please check whether that is correct
r2 = 1. - sum((exponenial_func(x, *popt) - y) ** 2) / sum((y - np.mean(y)) ** 2)
plt.plot(x, y, 'o', xx, yy)
plt.title('Exponential Fit')
plt.xlabel(r'Temperature, C')
plt.ylabel(r'1/Time, $s^-$$^1$')
plt.text(30, 0.15, "equation:\n{:.4f} exp({:.4f} x) + {:.4f}".format(*popt))
plt.text(30, 0.1, "R^2:\n {}".format(r2))
plt.show()
I am trying to fit some data using the following code:
xdata = [0.03447378, 0.06894757, 0.10342136, 0.13789514, 0.17236893,
0.20684271, 0.24131649, 0.27579028, 0.31026407, 0.34473785,
0.37921163, 0.41368542, 0.44815921, 0.48263299]
ydata = [ 2.5844 , 2.87449, 3.01929, 3.10584, 3.18305, 3.24166,
3.28897, 3.32979, 3.35957, 3.39193, 3.41662, 3.43956,
3.45644, 3.47135]
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def func(x, a, b, c, d):
return a + b*x - c*np.exp(-d*x)
popt, pcov = curve_fit(func, xdata, ydata))
plt.figure()
plt.plot(xdata, ydata, 'ko', label="Original Noised Data")
plt.plot(xdata, func(xdata, *popt), 'r-', label="Fitted Curve")
plt.legend()
plt.show()
The curve is not being fitted:
Data fit with straight line - should be curve
What should I be doing to correctly fit the data?
It looks like the optimizer is getting stuck in a local minimum, or perhaps just a very flat area of the objective function. A better fit can be found by tweaking the initial guess of the parameters that is used by curve_fit. For example, I get a reasonable-looking fit with p0=[1, 1, 1, 2.0] (the default is [1, 1, 1, 1]):
Here's the modified version of your script that I used:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def func(x, a, b, c, d):
return a + b*x - c*np.exp(-d*x)
xdata = np.array([0.03447378, 0.06894757, 0.10342136, 0.13789514, 0.17236893,
0.20684271, 0.24131649, 0.27579028, 0.31026407, 0.34473785,
0.37921163, 0.41368542, 0.44815921, 0.48263299])
ydata = np.array([ 2.5844 , 2.87449, 3.01929, 3.10584, 3.18305, 3.24166,
3.28897, 3.32979, 3.35957, 3.39193, 3.41662, 3.43956,
3.45644, 3.47135])
p0 = [1, 1, 1, 2.0]
popt, pcov = curve_fit(func, xdata, ydata, p0=p0)
print(popt)
plt.figure()
plt.plot(xdata, ydata, 'ko', label="Original Noised Data")
plt.plot(xdata, func(xdata, *popt), 'r-', label="Fitted Curve")
plt.legend(loc='best')
plt.show()
The printed output is:
[ 3.13903988 0.71827903 0.97047248 15.40936232]
Please try to be more specific with the issue you're having.
Two things I noticed that will prevent your code from working as it is:
line 15 (the curve_fit() call), there is an additional right paranthesis at the end of the line
xdata is a python list, so this won't work once you try to multiply it with a parameter in func, i.e. turn it into a numpy array with
xdata = np.array(xdata)
If you fix these two issues, the fit should work.
Edit: Warren is of course right - fixing the above issues still will get you started in a wrong minimum.
Hi I'm attempting to produce a fit for each of my three exponential decays. I am not successful with producing a satisfactory fit. This is what I get: http://i.imgur.com/Nx44wsS.jpg
Any help is greatly appreciated. My code is below.
import pylab as plb
import matplotlib.pyplot as plt
import matplotlib.axes as ax
import scipy as sp
from scipy.optimize import curve_fit
from matplotlib import rc
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
## for Palatino and other serif fonts use:
#rc('font',**{'family':'serif','serif':['Palatino']})
rc('text', usetex=True)
data = plb.loadtxt('data.csv',skiprows=2)
yp = data[:,4]
yr = data[:,5]
yl = data[:,6]
x = data[:,0]
def func(x,a,b,c):
return a*np.exp(-b*x) + c
popt, pcov = curve_fit(func, x, yl,maxfev=20000)
a = popt[0]
b = popt[1]
c = popt[2]
print a
print b
print c
print func(x,a,b,c)
xf = np.linspace(0,70,100)
yf = a*np.exp(-b*x) + c
plt.clf()
plt.plot(x,yf,'r-', label="Fitted Curve")
plt.plot(x,func(x,*popt))
plt.plot(x,yp,'bo',label='Polished')
plt.plot(x,yr,'ro',label='Rough')
plt.plot(x,yl,'go',label='Lacquered')
plt.legend()
plt.ylabel("Temperature (K)")
plt.xlabel("Time (min)")
plt.show()
Nonlinear fits are difficult and the trick is that you have to provide a reasonable initial guess.
Here is a version of your code which does two fits, one with an approximate initial guess and one with the default initial guess:
import pylab as plb
import matplotlib.pyplot as plt
import matplotlib.axes as ax
import scipy as sp
from scipy.optimize import curve_fit
from matplotlib import rc
import numpy as np
rc('font', **{'family':'sans-serif', 'sans-serif':['Helvetica']})
rc('text', usetex=True)
# Fake data
x = np.arange(0, 70., 2.)
yl = 300 + 63*np.exp(-x/35.)
def func(x, a, b, c):
return a*np.exp(-b*x) + c
popt, pcov = curve_fit(func, x, yl, p0=(40, 0.012, 250), maxfev=20000)
a, b, c = popt
print 'a=', a, 'b=', b, 'c=', c
print 'func=', func(x, a, b, c)
popt2, pcov2 = curve_fit(func, x, yl, p0=None, maxfev=20000)
a2, b2, c2 = popt2
print 'a2=', a2, 'b2=', b2, 'c2=', c2
print 'func=', func(x, a2, b2, c2)
xf = np.linspace(0, 70, 100)
yf = a*np.exp(-b*x) + c
plt.clf()
plt.plot(x, yf, 'r-', label="Fitted Curve")
plt.plot(x, func(x, *popt))
plt.plot(x, func(x, *popt2), 'b-', label='Fit w/o guess')
plt.plot(x, yl, 'go', label='Lacquered')
plt.legend()
plt.ylabel("Temperature (K)")
plt.xlabel("Time (min)")
plt.show()
And here are the resulting fits:
As you can see, the fit with a reasonable initial guess does very well (red line). If you don't provide an initial guess, scipy assumes 1 for all parameters and that works poorly (blue line).