How to see results (equation) of a Regression in Python? - python

I would like to see the regression equation for a polynomial regression in python.
I am new to python, in R the analogous command I am looking for is "summary." I have tried the print function in python.
x = (LIST)
y = (LIST)
x = x[:, np.newaxis]
y = y[:, np.newaxis]
poly = PolynomialFeatures(degree=2)
x_poly = poly.fit_transform(x)
poly.fit(x_poly,y)
lin = LinearRegression()
lin.fit(x_poly,y)
y_poly_pred = lin.predict(x_poly)
print(lin)
print(poly)
print(lin.predict)
print(poly.fit_transform)
I would like the output to give me the ax^2 + bx + c equation, or at least the info to figure out that equation. Instead, I get (below) for my 4 print statements.
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
normalize=False)
PolynomialFeatures(degree=2, include_bias=True, interaction_only=False,
order='C')
<bound method LinearModel.predict of LinearRegression(copy_X=True,
fit_intercept=True, n_jobs=None, normalize=False)>
<bound method TransformerMixin.fit_transform of
PolynomialFeatures(degree=2, include_bias=True, interaction_only=False,
order='C')>

Here is an example graphical polynomial fitter using numpy.polyfit for fitting and numpy.polyval for evaluation. This example has eight data points, and making polynomialOrder = 7 shows Runge's phenomenon rather nicely.
import numpy, matplotlib
import matplotlib.pyplot as plt
xData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.0, 6.6, 7.7, 0.0])
yData = numpy.array([1.1, 20.2, 30.3, 40.4, 50.0, 60.6, 70.7, 0.1])
polynomialOrder = 2 # example quadratic
# curve fit the test data
fittedParameters = numpy.polyfit(xData, yData, polynomialOrder)
print('Fitted Parameters:', fittedParameters)
modelPredictions = numpy.polyval(fittedParameters, xData)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = numpy.polyval(fittedParameters, xModel)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)

Related

How do I code dose-response (4PL) curve fitting with optimize.minimize()

I want to optimize a dose-response curve (4 parameter logistic) using a data set. I need to use the Powell algorithm, therefore, I have to use optimize.minimize() instead of curve_fit or least square.
I wrote the following code:
import numpy as np
from scipy.optimize import minimize
ydata = np.array([0.1879, 0.4257, 0.80975, 1.3038, 1.64305, 1.94055, 2.21605, 2.3917])
xdata = np.array([40, 100, 250, 400, 600, 800, 1150, 1400])
initParams = [2.4, 0.2, 600.0, 1.0]
def logistic(params):
A = params[0]
B = params[1]
C = params[2]
D = params[3]
logistic4 = ((A-D)/(1.0+((xdata/C)**B))) + D
sse = np.sum(np.square(ydata-logistic4))
print sse
results = minimize(logistic, initParams, method='Powell')
print results
Theoretically, this minimizes the sse of the experimental and theoretical data sets iterating the 4 parameters initially entered using the Powell algorithm.
Practically, it does not work: it starts and the last error, in a fairly long list, is
TypeError: unsupported operand type(s) for -: 'NoneType' and 'NoneType'.
Any ideas on how to code this?
Here is a graphical Python solver for your data and equation, it uses minimize() with 'Powell' and also has a commented-out call to curve_fit. I could not get a good fit with the initial parameter estimates that you supplied, so those are commented out here and replaced with my own values. My equation search confirms that this is an excellent equation to use in modeling this data set.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import minimize
xData = numpy.array([40, 100, 250, 400, 600, 800, 1150, 1400], dtype=float)
yData = numpy.array([0.1879, 0.4257, 0.80975, 1.3038, 1.64305, 1.94055, 2.21605, 2.3917], dtype=float)
def func(xdata, A, B, C, D):
return ((A-D)/(1.0+((xdata/C)**B))) + D
# minimize() requires a function to be minimized, unlike curve_fit()
def SSE(inParameters): # function to minimize, here sum of squared errors
predictions = func(xData, *inParameters)
errors = predictions - yData
return numpy.sum(numpy.square(errors))
#initialParameters = numpy.array([2.4, 0.2, 600.0, 1.0])
initialParameters = numpy.array([3.0, -1.5, 500.0, 0.1])
# curve fit the data with curve_fit()
#fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
# curve fit the data with minimize()
resultObject = minimize(SSE, initialParameters, method='Powell')
fittedParameters = resultObject.x
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('Parameters:', fittedParameters)
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
Shouldn't the correct Hill equation used in the function "func" use the term (C/x)**B rather than (x/C)**B where x=dose, C=IC50, and B is Hill coefficient?

How to mimic Excel's LOGEST function in Python

I'm interesting in mimic Excel's LOGEST function in Python but have no idea where to start.
Here is a graphical fitter using LOGEST as described in
https://support.office.com/en-us/article/logest-function-f27462d8-3657-4030-866b-a272c1d18b4b
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
xData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.0, 6.6, 7.7])
yData = numpy.array([1.1, 20.2, 30.3, 60.4, 50.0, 60.6, 70.7])
# LOGEST from https://support.office.com/en-us/article/logest-function-f27462d8-3657-4030-866b-a272c1d18b4b
def func(x, b, m):
y = b * m**x
return y
# these are the same as the scipy defaults
initialParameters = numpy.array([1.0, 1.0])
# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('Parameters:', fittedParameters)
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
You could do this taking the log and doing a linear regression or you could fit to an exponential function. Here I show both solutions using scipy.stats.linregress and scipy.optimize.curve_fit, respectively.
Here is the example from the documentation on the function LOGEST in Excel from Microsoft:
Method using linregress:
from scipy.stats import linregress
import math
x = months = [11, 12, 13, 14, 15, 16]
y = units = [33100, 47300, 69000, 102000, 150000, 220000]
slope, intercept, r_value, p_value, std_err = linregress(
x,
list(map(math.log, y)),
)
print('m', math.exp(slope))
print('b', math.exp(intercept))
Output:
m 1.4632756281161756
b 495.3047701587278
Method using curve_fit:
from scipy.optimize import curve_fit
def f(x, b, m):
return b * m ** x
popt, pcov = curve_fit(f, x, y)
print('m', popt[1])
print('b', popt[0])
Output:
m 1.4678382448967822
b 473.717820465515

Linear regression with defined intercept

I have a DataFrame (df) with two columns and three rows.
Column X = [137,270,344]
Column Y = [51, 121, 136]
I want to get the slope of the linear regression considering the intercept = 0.
I have tried to add a point (0,0) but it doesn´t work.
EX.
Column X = [0, 137,270,344]
Column Y = [0, 51, 121, 136]
The code that I am using.
Code:
X= df [“Column X”].astype(float)
Y = df [“Column Y”].astype(float)
slope, intercept, r_value, p_value, std_err = stats.linregress(X, Y)
intercept_desv = slope
coef_desv = intercept
I expected intercept = 0 but is less than 0.
In standard linear regression, all data points implicitly have a weight of 1.0. In any software that allows linear regression using weights, the regression can effectively be made to pass through any single point - such as the origin - by assigning that data point an extremely large weight. Numpy's polyfit() allows weights. Here is a graphing example with your data using this technique to make the fitted line pass through the 0,0 point.
import numpy, matplotlib
import matplotlib.pyplot as plt
xData = numpy.array( [0.0, 137.0, 270.0, 344.0])
yData = numpy.array([0.0, 51.0, 121.0, 136.0])
weights = numpy.array([1.0E10, 1.0, 1.0, 1.0]) # heavily weight the 0,0 point
#weights = None # use this for "no weights"
polynomialOrder = 1 # example straight line
# curve fit the test data
fittedParameters = numpy.polyfit(xData, yData, polynomialOrder, w=weights)
print('Fitted Parameters:', fittedParameters)
modelPredictions = numpy.polyval(fittedParameters, xData)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
print('Predicted value at x=0:', modelPredictions[0])
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = numpy.polyval(fittedParameters, xModel)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)

Python Linear Regression, best fit line with residuals

I have done my linear regression and the best fit line, but would like to have also a line connecting the real points (the ones in blue) to the predicted points (the ones i red x) representing the predictions error, or the so called residuals. The plot should look in a similar way:
And what I have until now is:
# draw the plot
xx=X[:,np.newaxis]
yy=y[:,np.newaxis]
slr=LinearRegression()
slr.fit(xx,yy)
y_pred=slr.predict(xx)
plt.scatter(xx,yy)
plt.plot(xx,y_pred,'r')
plt.plot(X,y_pred,'rx') #add the prediction points
plt.show()
Thank you very much in advance!
Here is example code with the vertical lines
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
xData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.0, 6.6, 7.7])
yData = numpy.array([1.1, 20.2, 30.3, 60.4, 50.0, 60.6, 70.7])
def func(x, a, b): # simple linear example
return a * x + b
initialParameters = numpy.array([1.0, 1.0])
# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
# now add individual line for each point
for i in range(len(xData)):
lineXdata = (xData[i], xData[i]) # same X
lineYdata = (yData[i], modelPredictions[i]) # different Y
plt.plot(lineXdata, lineYdata)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)

Scatterplot for multiple regression in matplotlib

I have four features and a dependent(X). I want to plot a graph with the predicted regression line and the feature values. I went through the documentation but I can't figure out how to represent everything in a scatter plot.
Here is some example code to get you started, it fits a simple quadratic and scatterplots the raw data and fitted curve along with calculation of RMSE and R-squared. The example uses a non-linear fit in case you would like to try fitting non-linear equations.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import scipy.stats
xData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.0, 6.6, 7.7])
yData = numpy.array([1.1, 20.2, 30.3, 40.4, 50.0, 60.6, 70.7])
def func(x, a, b, c): # simple quadratic example
return (a * numpy.square(x)) + b * x + c
initialParameters = numpy.array([1.0, 1.0, 1.0])
# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)

Categories

Resources