I am new to Python and I am trying to use a small data frame and plot it. But I also want to use curve_fit in order to get the values of some parameter.
######Fitting Using Data Frame######
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit
# Data into a dictionary
data = {'keV':[22.16,32.19,8.05,17.48,13.39,44.47,5],'ToT':[31.68,39.87,10.67,26.38,21.4,53.56,0]}
# Create dataframe and get only the values of arrays
df = pd.DataFrame(data)
xdata = df['keV'].values
ydata = df['ToT'].values
# Main function for the mathematical method
def main_test(x, a, b,c,t):
return a*x +b - c/(x-t)
# Guess of the fit values
guess = [1,1,100,150]
n = len(xdata)
# Empty np array that will get these values
y = np.empty(n)
# Repeat in all these times
c, cov = curve_fit(main_test,xdata,ydata)
# THE MOST IMPORTANT PART OF THE CODE. TO GET THE PARAMETERS VALUES.
print(c)
for sample in range(n):
# Populating y with guess numbers = Prediction
y[sample] = main_test(xdata[sample],a[0],b[1],c[2],t[3])
plt.figure(figsize=(6,4))
plt.scatter(xdata,ydata)
plt.plot(xdata, y,'r.')
plt.show()
RuntimeError: Optimal parameters not found: Number of calls to function has reached maxfev = 1000.
This was a matter of finding good starting parameters. Here is a graphical solver using your data and equation, with scipy's differential_evolution genetic algorithm module used to determine initial parameter estimates for curve_fit(). That scipy module uses the Latin Hypercube algorithm to ensure a thorough search of parameter space, requiring bounds within which to search. As the search bounds can be generous, I first tried search bounds of +/- 100.0 for all of the parameters and that worked.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import differential_evolution
import warnings
keV = [22.16,32.19,8.05,17.48,13.39,44.47,5]
ToT = [31.68,39.87,10.67,26.38,21.4,53.56,0]
# rename data to re-use previous example code
xData = numpy.array(keV)
yData = numpy.array(ToT)
# mathematical model
def func(x, a, b,c,t):
return a*x +b - c/(x-t)
# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
val = func(xData, *parameterTuple)
return numpy.sum((yData - val) ** 2.0)
def generate_Initial_Parameters():
parameterBounds = []
parameterBounds.append([-100.0, 100.0]) # search bounds for a
parameterBounds.append([-100.0, 100.0]) # search bounds for b
parameterBounds.append([-100.0, 100.0]) # search bounds for c
parameterBounds.append([-100.0, 100.0]) # search bounds for t
# "seed" the numpy random number generator for repeatable results
result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
return result.x
# by default, differential_evolution completes by calling curve_fit() using parameter bounds
geneticParameters = generate_Initial_Parameters()
# now call curve_fit without passing bounds from the genetic algorithm,
# just in case the best fit parameters are aoutside those bounds
fittedParameters, pcov = curve_fit(func, xData, yData, geneticParameters)
print('Fitted parameters:', fittedParameters)
print()
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print()
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('keV') # X axis data label
axes.set_ylabel('ToT') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
Related
I can't manage to fit my points with an equation.
It plots a horizontal line.
I have the impression that it comes from the initial parameters but I don't know what to put.
I took this piece of code from another forum.
At first, everything works well but when I enter new data, there is a problem.
Can someone help me, please ?
***import numpy as np, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import differential_evolution
import warnings
xData = np.array([.26, .35, .36, .37, .42, .46, .48, .54, .59, .72, .74, .83, .88, 1.04, 1.10, 1.12, 1.48])
yData = np.array([27.40, 29.96, 27.50, 28.20, 32.47, 31.52, 31.00, 34.93, 32.80, 35.84, 39.50, 40.00, 41.35, 41.50, 42.79, 41.71, 46.23])
def func(x, a, b, Offset): # Sigmoid A With Offset from zunzun.com
return 1.0 / (1.0 + np.exp(-a * (x-b))) + Offset
# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
val = func(xData, *parameterTuple)
return np.sum((yData - val) ** 2.0)
# generate initial parameter values
geneticParameters = [0,0,0]
# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, geneticParameters)
print('Parameters', fittedParameters)
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = np.square(absError) # squared errors
MSE = np.mean(SE) # mean squared errors
RMSE = np.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (np.var(absError) / np.var(yData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = np.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
# plt.close('all') # clean up after using pyplot
graphWidth = 400
graphHeight = 300
ModelAndScatterPlot(graphWidth, graphHeight)***
The most probable cause of the failure is that you chose a three parameters (a,b,offset) sigmoid model instead of a four parameters model. The range of the data from 27.4 to 46.23 never can be fitted to a function which range is from 0 to 1, even with an offset.
Another possible cause of difficulty is the initial guessing of the values of the parameters in starting the iterrative process of the non-linear regression.
In order to avoid those difficulties we will use the non-iterative method shown below.
Note that the symbols and notations are not the same as in your code.
You can use the above numerical values of the parameters as initial values in your non-linear regression software.
Note : The general principle of the non-iterative method is explained in the paper : https://fr.scribd.com/doc/14674814/Regressions-et-equations-integrales
The above calculus is a simplified version of the application to the four parameters logistic regression.
I want to optimize a dose-response curve (4 parameter logistic) using a data set. I need to use the Powell algorithm, therefore, I have to use optimize.minimize() instead of curve_fit or least square.
I wrote the following code:
import numpy as np
from scipy.optimize import minimize
ydata = np.array([0.1879, 0.4257, 0.80975, 1.3038, 1.64305, 1.94055, 2.21605, 2.3917])
xdata = np.array([40, 100, 250, 400, 600, 800, 1150, 1400])
initParams = [2.4, 0.2, 600.0, 1.0]
def logistic(params):
A = params[0]
B = params[1]
C = params[2]
D = params[3]
logistic4 = ((A-D)/(1.0+((xdata/C)**B))) + D
sse = np.sum(np.square(ydata-logistic4))
print sse
results = minimize(logistic, initParams, method='Powell')
print results
Theoretically, this minimizes the sse of the experimental and theoretical data sets iterating the 4 parameters initially entered using the Powell algorithm.
Practically, it does not work: it starts and the last error, in a fairly long list, is
TypeError: unsupported operand type(s) for -: 'NoneType' and 'NoneType'.
Any ideas on how to code this?
Here is a graphical Python solver for your data and equation, it uses minimize() with 'Powell' and also has a commented-out call to curve_fit. I could not get a good fit with the initial parameter estimates that you supplied, so those are commented out here and replaced with my own values. My equation search confirms that this is an excellent equation to use in modeling this data set.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import minimize
xData = numpy.array([40, 100, 250, 400, 600, 800, 1150, 1400], dtype=float)
yData = numpy.array([0.1879, 0.4257, 0.80975, 1.3038, 1.64305, 1.94055, 2.21605, 2.3917], dtype=float)
def func(xdata, A, B, C, D):
return ((A-D)/(1.0+((xdata/C)**B))) + D
# minimize() requires a function to be minimized, unlike curve_fit()
def SSE(inParameters): # function to minimize, here sum of squared errors
predictions = func(xData, *inParameters)
errors = predictions - yData
return numpy.sum(numpy.square(errors))
#initialParameters = numpy.array([2.4, 0.2, 600.0, 1.0])
initialParameters = numpy.array([3.0, -1.5, 500.0, 0.1])
# curve fit the data with curve_fit()
#fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
# curve fit the data with minimize()
resultObject = minimize(SSE, initialParameters, method='Powell')
fittedParameters = resultObject.x
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('Parameters:', fittedParameters)
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
Shouldn't the correct Hill equation used in the function "func" use the term (C/x)**B rather than (x/C)**B where x=dose, C=IC50, and B is Hill coefficient?
For calibration purposes I am making N measurements of water flow, each of which is time-intensive. I want to reduce the number of measurements. It sounds like this is part of feature selection as I am reducing the number of columns I have. BUT - I need to predict the measurements I will be dropping.
Here is a sample of the data:
SerialNumber val speed
0 193604048 1.350254 105.0
1 193604048 1.507517 3125.0
2 193604048 1.455142 525.0
6 193604048 1.211184 12.8
7 193604048 1.238835 20.0
For each serial number I have a complete set of speed-val measurements. Ideally I would like a model whose output is the vector of all N val measurements, but it seems the options are all neural networks, which I am trying to avoid for now. Are there are any other options?
If I feed this data into a regression model, how do I differentiate between each serialNumber dataset?
To make sure my goal is clear - I want to learn the historical measurements I have of N measurements and find which speed-val I can drop to still accurately predict all N output values.
Thank you!
I tried to find the simplest equation that would give a good fit to the example data you posted, and from my equation search the Harris Yield Density equation, "y = 1.0 / (a + b * pow(x, c))", is an good candidate. Here is a graphical Python fitter using that equation and your data, with initial parameter estimates for the non-linear fitter calculated directly from the data max and min values. Note that SerialNumber itself is unrelated to the data and would not be used in regressions.
My hope is that you might find this equation generally useful in your work, and it might be possible that after performing similar regressions on several different data sets that parameters a, b, and c are very similar in all cases - that is the best outcome. If your measurement accuracy is high, I personally would expect that with this three-parameter equation it should be possible to use a minimum of four data points per calibration, with max, min and two other well-spaced points along the expected calibration curve.
Note that here the fitted parameters a = -1.91719091e-03. b = 1.11357103e+00, and c = -1.51294798e+01 yield RMSE = 3.191 and R-squared = 0.9999
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
xData = numpy.array([1.350254, 1.507517, 1.455142, 1.211184, 1.238835])
yData = numpy.array([105.0, 3125.0, 525.0, 12.8, 20.0])
def func(x, a, b, c): # Harris yield density equation
return 1.0 / (a + b*numpy.power(x, c))
initialParameters = numpy.array([0.0, min(xData), -10.0 * max(xData)])
# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('Parameters:', fittedParameters)
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_title('Harris Yield Density Equation') # title
axes.set_xlabel('Val') # X axis data label
axes.set_ylabel('Speed') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
UPDATE using reversed X and Y
Per the comments, here is a three-parameter equation Mixed Power and Eponential "a * pow(x, b) * exp(c * x)" graphical fitter with X and Y reversed from the previous code. Here the fitted parameters a = 1.05910664e+00, b = 5.26304345e-02, and -2.25604946e-05 yield RMSE = 0.0003602 and R-squared= 0.9999
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
xData = numpy.array([105.0, 3125.0, 525.0, 12.8, 20.0])
yData = numpy.array([1.350254, 1.507517, 1.455142, 1.211184, 1.238835])
def func(x, a, b, c): # mixed power and exponential equation
return a * numpy.power(x, b) * numpy.exp(c * x)
initialParameters = [1.0, 0.01, -0.01]
# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('Parameters:', fittedParameters)
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_title('Mixed Power and Exponential Equation') # title
axes.set_xlabel('Speed') # X axis data label
axes.set_ylabel('Val') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
I have a scatter plot with only 5 data points, which I would like to fit a curve to. I have tried both polyfit and the following code, but neither are able to produce a curve with so few data points
def func(x, a, b, c):
return a * np.exp(-b * x) + c
plt.plot(xdata, ydata, ".", label="Data");
optimizedParameters, pcov = opt.curve_fit(func, xdata, ydata);
plt.plot(xdata, func(xdata, *optimizedParameters), label="fit");
Attached is an example of the plot, along with an example of the kind of curve I am trying to produce (apologies for the bad drawing). Thanks!
Here is an example graphical Python fitter using the data in your comment, fitting to a Polytrope type of equation. In this example there is no need to take logs of the data. Here the X axis is plotted on a decade logarithmic scale. Please note that the data in the example code is in the form of floating point numbers.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
xData = numpy.array([7e-09, 9e-09, 1e-08, 2e-8, 1e-6])
yData = numpy.array([790.0, 870.0, 2400.0, 2450.0, 3100.0])
def func(x, a, b, offset): # polytrope equation from zunzun.com
return a / numpy.power(x, b) + offset
# these are the same as the scipy defaults
initialParameters = numpy.array([1.0, 1.0, 1.0])
# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('Parameters:', fittedParameters)
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData), 1000)
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.xscale('log') # comment this out for default linear scaling
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
You would have to choose what you want to fit the curver after. From the look of your drawing it seems you are trying to shape it to be somewhat logarithmic.
Here's a picture of a logarithmic regression:
A logarithmmic regression would follow the form of y = A + B ln(x).
This is essentially a linear regression fitting were instead of fitting y vs. x
we are trying to fit y vs. ln(x).
So you may just take the natural log of the x-values of the points in your dataset and execute a linear regression algorithm on it. The yielding coefficients are then A and B for y=A + B ln(x).
Picture Credits:
http://mathworld.wolfram.com/LeastSquaresFittingLogarithmic.html
Edit: As James Phillips pointed out in his answer, it is also possible to model the curve in the form of y=Ax^(-B) + C since for so few points it cannot be determined wheter the graph has an horizontal asymptote or is always growing but decelerating. A lot of curves are possible (for instance y=A* B^(-x) +C could be another one) but you would need to choose what to model the data after.
The exponential function does not fit your data well. Consider another modeling function.
Given
import numpy as np
import scipy.optimize as opt
import matplotlib.pyplot as plt
%matplotlib inline
x_samp = np.array([7e-09, 9e-09, 1e-08, 2e-8, 1e-6])
y_samp = np.array([790, 870, 2400, 2450, 3100])
def func(x, a, b):
"""Return a exponential result."""
return a + b*np.log(x)
def func2(x, a, b, c):
"""Return a 'power law' result."""
return a/np.power(x, b) + c
Code
From #Allan Lago's logarithmic model:
# REGRESSION ------------------------------------------------------------------
x_lin = np.linspace(x_samp.min(), x_samp.max(), 50)
w, _ = opt.curve_fit(func, x_samp, y_samp)
print("Estimated Parameters", w)
# Model
y_model = func(x_lin, *w)
# PLOT ------------------------------------------------------------------------
# Visualize data and fitted curves
plt.plot(x_samp, y_samp, "ko", label="Data")
plt.plot(x_lin, y_model, "k--", label="Fit")
plt.xticks(np.arange(0, x_samp.max(), x_samp.max()/2))
plt.title("Least squares regression")
plt.legend(loc="upper left")
Estimated Parameters [8339.61062739 367.6992259 ]
Using #James Phillips' "Polytrope" model:
# REGRESSION ------------------------------------------------------------------
p0 = [1, 1, 1]
w, _ = opt.curve_fit(func2, x_samp, y_samp, p0=p0)
print("Estimated Parameters", w)
# Model
y_model = func2(x_lin, *w)
# PLOT ------------------------------------------------------------------------
# Visualize data and fitted curves
plt.plot(x_samp, y_samp, "ko", label="Data")
plt.plot(x_lin, y_model, "k--", label="Fit")
plt.xticks(np.arange(0, x_samp.max(), x_samp.max()/2))
plt.title("Least squares regression")
plt.legend()
Estimated Parameters [-3.49305043e-10 1.57259788e+00 3.05801283e+03]
I have a 2D plot and it seems quadratic. I got it from a data set which is obtained numerically from a calculation. i know the equation can be obtained by fitting the data sets. It seems Python automatically fitted from the data points. I need to print the equation of the fitted curve.
I solved Y for different X and obtained two arreys Y and X. Then I plotted them
plt.plot(X,Y)
plt.xlabel('X')
plt.ylabel('Y')
plt.savefig('YvsX.png', format='png', dpi=1000)
plt.show()
and obtained this:
Need to print the equation of this plot
I extracted data points from your plot and performed an equation search, which turned up "y = a * exp(b/x) + Offset" as a likely candidate equation. Here is a graphical fitter using the extracted data and this equation, with initial parameter estimates for scipy's curve_fit() solver provided by scipy's differential_evolution genetic algorithm module. That module uses the Latin Hypercube algorithm to ensure a thorough search of parameter space, requiring bounds within which to search. In this example I used bounds of the maximum and minimum data values, and those search bounds worked well in this case.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import differential_evolution
import warnings
# extracted from plot
xData = numpy.array([365.731, 377.548, 392.909, 409.453, 428.360, 447.267, 473.264, 498.079, 521.713, 545.347, 573.707, 598.522, 629.246, 669.423, 695.420, 736.779, 772.230, 823.042, 858.493, 883.308, 915.214, 951.846, 986.115, 1029.837, 1059.379, 1105.465, 1155.096, 1204.726, 1251.994])
yData = numpy.array([-0.306, -0.576, -0.969, -1.276, -1.766, -2.147, -2.503, -2.883, -3.177, -3.398, -3.705, -3.963, -4.196, -4.515, -4.662, -4.871, -5.055, -5.300, -5.374, -5.496, -5.582, -5.705, -5.803, -5.914, -5.987, -6.098, -6.208, -6.331, -6.368])
def func(x, a, b, Offset): # from the zunzun.com "function finder"
return a * numpy.exp(b/x) + Offset
# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
val = func(xData, *parameterTuple)
return numpy.sum((yData - val) ** 2.0)
def generate_Initial_Parameters():
# min and max used for bounds
maxX = max(xData)
minX = min(xData)
maxY = max(yData)
minY = min(yData)
minData = min(minX, minY)
maxData = max(maxX, maxY)
parameterBounds = []
parameterBounds.append([minData, maxData]) # search bounds for a
parameterBounds.append([minData, maxData]) # search bounds for b
parameterBounds.append([minData, maxData]) # search bounds for Offset
# "seed" the numpy random number generator for repeatable results
result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
return result.x
# by default, differential_evolution completes by calling curve_fit() using parameter bounds
geneticParameters = generate_Initial_Parameters()
# now call curve_fit without passing bounds from the genetic algorithm,
# just in case the best fit parameters are aoutside those bounds
fittedParameters, pcov = curve_fit(func, xData, yData, geneticParameters)
print('Fitted parameters:', fittedParameters)
print()
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print()
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
I give you an example of how to find a quadratic fit:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
points = np.array([(0, 2), (1, 3), (2, 6), (3, 11),(4,18),(5,27)])
# get x and y vectors
x = points[:,0]
y = points[:,1]
# calculate polynomial
z = np.polyfit(x, y, 2)
z
Out:
array([1.0000000e+00, 4.2765887e-15, 2.0000000e+00])
which implies x ** 2 + 4.2765887e-15 * x +2