I'm interesting in mimic Excel's LOGEST function in Python but have no idea where to start.
Here is a graphical fitter using LOGEST as described in
https://support.office.com/en-us/article/logest-function-f27462d8-3657-4030-866b-a272c1d18b4b
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
xData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.0, 6.6, 7.7])
yData = numpy.array([1.1, 20.2, 30.3, 60.4, 50.0, 60.6, 70.7])
# LOGEST from https://support.office.com/en-us/article/logest-function-f27462d8-3657-4030-866b-a272c1d18b4b
def func(x, b, m):
y = b * m**x
return y
# these are the same as the scipy defaults
initialParameters = numpy.array([1.0, 1.0])
# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('Parameters:', fittedParameters)
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
You could do this taking the log and doing a linear regression or you could fit to an exponential function. Here I show both solutions using scipy.stats.linregress and scipy.optimize.curve_fit, respectively.
Here is the example from the documentation on the function LOGEST in Excel from Microsoft:
Method using linregress:
from scipy.stats import linregress
import math
x = months = [11, 12, 13, 14, 15, 16]
y = units = [33100, 47300, 69000, 102000, 150000, 220000]
slope, intercept, r_value, p_value, std_err = linregress(
x,
list(map(math.log, y)),
)
print('m', math.exp(slope))
print('b', math.exp(intercept))
Output:
m 1.4632756281161756
b 495.3047701587278
Method using curve_fit:
from scipy.optimize import curve_fit
def f(x, b, m):
return b * m ** x
popt, pcov = curve_fit(f, x, y)
print('m', popt[1])
print('b', popt[0])
Output:
m 1.4678382448967822
b 473.717820465515
Related
def gaus(x,a,x0,sigma):
return a*np.exp(-(x-x0)**2/(2*sigma**2))
times, amplitudes = openFile("../datafiles/number_of_counts.txt")
mean = sum(np.array(times)*np.array(amplitudes))/sum(amplitudes)
sigma = np.sqrt(sum(np.array(amplitudes)*(np.array(times)-mean)**2)/sum(amplitudes))
params,pcov = curve_fit(gaus,times, amplitudes,p0=[max(amplitudes),mean,sigma])
plt.plot(times, amplitudes)
plt.plot(times ,gaus(np.array(times),params[0],params[1],params[2]),'r', label="fitted curve")
plt.ylabel("Coincidents")
plt.title("Coincident plot")
plt.legend()
plt.show()
My gaussian fit doesn't work properly, but looks like a soft curve, instead of for fitting to the sharp peak, I assume I have some super silly error in my script, but not sure what. Someone who can see it?
Your data has a constant offset of about 3750, but your gaus model function cannot account for that, so you are fitting a normal distribution with offset 0.
It needs one more parameter:
def gaus(x, a, x0, sigma, c):
return a * np.exp(-(x - x0)**2 / (2 * sigma**2)) + c
Then:
offset_guess = 3750 # maybe calculate it from the data as well
params, pcov = curve_fit(
gaus, times, amplitudes,
p0=[max(amplitudes), mean, sigma, offset_guess])
plt.plot(times, gaus(np.array(times), params[0], params[1], params[2], params[3]), ...)
Result:
>>> print(params)
[1545.00193331 -20.45639132 -43.28484495 3792.41050636]
I extracted data from the plot for analysis, and found that with the extracted data a Weibull peak plus offset gave me a better fit than a Gaussian peak with offset. Here is a graphical Python fitter with the extracted data and a Weibull peak plus offset equation, you should be able to substitute in the actual data and run it directly. Note the simple determination of initial parameter estimates.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
xData = numpy.array([4.914e+03, 5.600e+03, 7.886e+03, 8.571e+03, 1.063e+04, 1.154e+04, 1.269e+04, 1.566e+04, 1.634e+04, 1.817e+04, 1.886e+04, 2.114e+04, 2.389e+04, 2.526e+04, 2.754e+04, 3.051e+04, 3.257e+04, 3.417e+04, 3.554e+04, 3.669e+04, 4.011e+04, 4.240e+04, 4.491e+04, 4.583e+04, 4.697e+04, 4.880e+04, 4.994e+04, 5.154e+04, 5.246e+04, 5.474e+04, 5.634e+04, 5.886e+04, 6.091e+04, 6.366e+04, 6.731e+04, 7.051e+04, 7.257e+04, 7.394e+04, 7.691e+04, 7.851e+04, 7.966e+04, 8.103e+04, 8.240e+04, 8.514e+04, 8.720e+04, 8.834e+04, 8.949e+04, 9.109e+04, 9.223e+04, 9.360e+04, 9.566e+04, 9.726e+04, 9.909e+04, 1.005e+05, 1.014e+05, 1.030e+05, 1.059e+05, 1.073e+05, 1.089e+05, 1.101e+05, 1.119e+05, 1.130e+05, 1.139e+05, 1.162e+05, 1.178e+05, 1.190e+05, 1.203e+05, 1.222e+05, 1.233e+05, 1.247e+05, 1.281e+05, 1.299e+05, 1.309e+05, 1.329e+05, 1.341e+05, 1.357e+05, 1.370e+05, 1.382e+05, 1.395e+05, 1.407e+05, 1.430e+05, 1.439e+05])
yData = numpy.array([3.300e+03, 8.100e+03, 6.100e+03, 1.010e+04, 9.700e+03, 7.300e+03, 7.500e+03, 6.900e+03, 8.100e+03, 3.900e+03, 5.700e+03, 4.900e+03, 4.500e+03, 8.300e+03, 4.100e+03, 8.100e+03, 5.300e+03, 8.100e+03, 6.700e+03, 1.130e+04, 9.300e+03, 6.300e+03, 9.500e+03, 8.900e+03, 1.490e+04, 6.300e+03, 1.190e+04, 6.300e+03, 7.700e+03, 1.310e+04, 9.500e+03, 1.590e+04, 1.050e+04, 1.930e+04, 4.890e+04, 7.350e+04, 5.230e+04, 5.130e+04, 2.350e+04, 1.950e+04, 1.010e+04, 1.510e+04, 9.500e+03, 9.500e+03, 6.900e+03, 6.900e+03, 1.210e+04, 6.300e+03, 7.700e+03, 5.700e+03, 1.410e+04, 8.700e+03, 1.390e+04, 4.900e+03, 7.500e+03, 4.900e+03, 9.500e+03, 5.300e+03, 9.300e+03, 6.300e+03, 1.250e+04, 4.300e+03, 7.700e+03, 6.900e+03, 9.700e+03, 8.500e+03, 1.130e+04, 5.300e+03, 5.100e+03, 1.700e+03, 8.700e+03, 7.300e+03, 6.300e+03, 2.100e+03, 3.100e+03, 7.100e+03, 4.900e+03, 6.100e+03, 3.700e+03, 9.300e+03, 5.500e+03, 5.700e+03])
def func(x, a, b, c, offset): # Weibull peak with offset from zunzun.com
return a * numpy.exp(-0.5 * numpy.power(numpy.log(x/b) / c, 2.0)) + offset
a_est = max(yData)
b_est = max(yData)
c_est = 1.0
offset_est = min(yData)
initialParameters = numpy.array([a_est, b_est, c_est, offset_est])
# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('Parameters:', fittedParameters)
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData), 500)
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
I want to optimize a dose-response curve (4 parameter logistic) using a data set. I need to use the Powell algorithm, therefore, I have to use optimize.minimize() instead of curve_fit or least square.
I wrote the following code:
import numpy as np
from scipy.optimize import minimize
ydata = np.array([0.1879, 0.4257, 0.80975, 1.3038, 1.64305, 1.94055, 2.21605, 2.3917])
xdata = np.array([40, 100, 250, 400, 600, 800, 1150, 1400])
initParams = [2.4, 0.2, 600.0, 1.0]
def logistic(params):
A = params[0]
B = params[1]
C = params[2]
D = params[3]
logistic4 = ((A-D)/(1.0+((xdata/C)**B))) + D
sse = np.sum(np.square(ydata-logistic4))
print sse
results = minimize(logistic, initParams, method='Powell')
print results
Theoretically, this minimizes the sse of the experimental and theoretical data sets iterating the 4 parameters initially entered using the Powell algorithm.
Practically, it does not work: it starts and the last error, in a fairly long list, is
TypeError: unsupported operand type(s) for -: 'NoneType' and 'NoneType'.
Any ideas on how to code this?
Here is a graphical Python solver for your data and equation, it uses minimize() with 'Powell' and also has a commented-out call to curve_fit. I could not get a good fit with the initial parameter estimates that you supplied, so those are commented out here and replaced with my own values. My equation search confirms that this is an excellent equation to use in modeling this data set.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import minimize
xData = numpy.array([40, 100, 250, 400, 600, 800, 1150, 1400], dtype=float)
yData = numpy.array([0.1879, 0.4257, 0.80975, 1.3038, 1.64305, 1.94055, 2.21605, 2.3917], dtype=float)
def func(xdata, A, B, C, D):
return ((A-D)/(1.0+((xdata/C)**B))) + D
# minimize() requires a function to be minimized, unlike curve_fit()
def SSE(inParameters): # function to minimize, here sum of squared errors
predictions = func(xData, *inParameters)
errors = predictions - yData
return numpy.sum(numpy.square(errors))
#initialParameters = numpy.array([2.4, 0.2, 600.0, 1.0])
initialParameters = numpy.array([3.0, -1.5, 500.0, 0.1])
# curve fit the data with curve_fit()
#fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
# curve fit the data with minimize()
resultObject = minimize(SSE, initialParameters, method='Powell')
fittedParameters = resultObject.x
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('Parameters:', fittedParameters)
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
Shouldn't the correct Hill equation used in the function "func" use the term (C/x)**B rather than (x/C)**B where x=dose, C=IC50, and B is Hill coefficient?
I would like to see the regression equation for a polynomial regression in python.
I am new to python, in R the analogous command I am looking for is "summary." I have tried the print function in python.
x = (LIST)
y = (LIST)
x = x[:, np.newaxis]
y = y[:, np.newaxis]
poly = PolynomialFeatures(degree=2)
x_poly = poly.fit_transform(x)
poly.fit(x_poly,y)
lin = LinearRegression()
lin.fit(x_poly,y)
y_poly_pred = lin.predict(x_poly)
print(lin)
print(poly)
print(lin.predict)
print(poly.fit_transform)
I would like the output to give me the ax^2 + bx + c equation, or at least the info to figure out that equation. Instead, I get (below) for my 4 print statements.
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
normalize=False)
PolynomialFeatures(degree=2, include_bias=True, interaction_only=False,
order='C')
<bound method LinearModel.predict of LinearRegression(copy_X=True,
fit_intercept=True, n_jobs=None, normalize=False)>
<bound method TransformerMixin.fit_transform of
PolynomialFeatures(degree=2, include_bias=True, interaction_only=False,
order='C')>
Here is an example graphical polynomial fitter using numpy.polyfit for fitting and numpy.polyval for evaluation. This example has eight data points, and making polynomialOrder = 7 shows Runge's phenomenon rather nicely.
import numpy, matplotlib
import matplotlib.pyplot as plt
xData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.0, 6.6, 7.7, 0.0])
yData = numpy.array([1.1, 20.2, 30.3, 40.4, 50.0, 60.6, 70.7, 0.1])
polynomialOrder = 2 # example quadratic
# curve fit the test data
fittedParameters = numpy.polyfit(xData, yData, polynomialOrder)
print('Fitted Parameters:', fittedParameters)
modelPredictions = numpy.polyval(fittedParameters, xData)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = numpy.polyval(fittedParameters, xModel)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
I have done my linear regression and the best fit line, but would like to have also a line connecting the real points (the ones in blue) to the predicted points (the ones i red x) representing the predictions error, or the so called residuals. The plot should look in a similar way:
And what I have until now is:
# draw the plot
xx=X[:,np.newaxis]
yy=y[:,np.newaxis]
slr=LinearRegression()
slr.fit(xx,yy)
y_pred=slr.predict(xx)
plt.scatter(xx,yy)
plt.plot(xx,y_pred,'r')
plt.plot(X,y_pred,'rx') #add the prediction points
plt.show()
Thank you very much in advance!
Here is example code with the vertical lines
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
xData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.0, 6.6, 7.7])
yData = numpy.array([1.1, 20.2, 30.3, 60.4, 50.0, 60.6, 70.7])
def func(x, a, b): # simple linear example
return a * x + b
initialParameters = numpy.array([1.0, 1.0])
# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
# now add individual line for each point
for i in range(len(xData)):
lineXdata = (xData[i], xData[i]) # same X
lineYdata = (yData[i], modelPredictions[i]) # different Y
plt.plot(lineXdata, lineYdata)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
I have four features and a dependent(X). I want to plot a graph with the predicted regression line and the feature values. I went through the documentation but I can't figure out how to represent everything in a scatter plot.
Here is some example code to get you started, it fits a simple quadratic and scatterplots the raw data and fitted curve along with calculation of RMSE and R-squared. The example uses a non-linear fit in case you would like to try fitting non-linear equations.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import scipy.stats
xData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.0, 6.6, 7.7])
yData = numpy.array([1.1, 20.2, 30.3, 40.4, 50.0, 60.6, 70.7])
def func(x, a, b, c): # simple quadratic example
return (a * numpy.square(x)) + b * x + c
initialParameters = numpy.array([1.0, 1.0, 1.0])
# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)