Related
I have looked into the example on this website: https://scipython.com/blog/plotting-the-decision-boundary-of-a-logistic-regression-model/
I understand how they plot the decision boundary for a linear feature vector. But how would I plot the decision boundary if I apply
from sklearn.preprocessing import PolynomialFeatures
...
poly = PolynomialFeatures(degree = 3, interaction_only=False, include_bias=False)
X_poly = poly.fit_transform(X)
# Fit the data to a logistic regression model.
clf = sklearn.linear_model.LogisticRegression()
clf.fit(X_poly, Y)
to get a curved decision boundary? (I know it doesnt make a lot of sense for the example on the webiste, but it may be easier to talk about it).
I have tried to plot the resulting polynomial decision boundary by overlaying the polynomial plot but only got weird results like this:
So how could I do a curved decision boundary plot?
the edited code:
from sklearn.preprocessing import PolynomialFeatures
import numpy as np
import matplotlib.pyplot as plt
import sklearn.linear_model
plt.rc('text', usetex=True)
plt.figure(dpi=1200)
pts = np.loadtxt(r'C:\Users\stefa\OneDrive\Desktop\linpts.txt')
X = pts[:,:2]
Y = pts[:,2].astype('int')
poly = PolynomialFeatures(degree = 2, interaction_only=False, include_bias=False)
X_poly = poly.fit_transform(X)
# Fit the data to a logistic regression model.
clf = sklearn.linear_model.LogisticRegression()
clf.fit(X_poly, Y)
# Retrieve the model parameters.
b = clf.intercept_[0]
w1, w2,w3,w4,w5 = clf.coef_.T
# In[]
def PolyCoefficients(x, coeffs):
""" Returns a polynomial for ``x`` values for the ``coeffs`` provided.
The coefficients must be in ascending order (``x**0`` to ``x**o``).
"""
o = len(coeffs)
print(f'# This is a polynomial of order {ord}.')
y = 0
for i in range(o):
y += coeffs[i]*x**i
return y
x = np.linspace(0, 9, 100)
coeffs = [b, w1, w2, w3, w4, w5]
plt.plot(x, PolyCoefficients(x, coeffs))
plt.show()
# In[]
# Calculate the intercept and gradient of the decision boundary.
c = -b/w2
m = -w1/w2
# Plot the data and the classification with the decision boundary.
xmin, xmax = -1, 2
ymin, ymax = -1, 2.5
xd = np.array([xmin, xmax])
yd = m*xd + c
#plt.plot(xd, yd, 'k', lw=1, ls='--')
plt.plot(x, PolyCoefficients(x, coeffs))
plt.fill_between(xd, yd, ymin, color='tab:blue', alpha=0.2)
plt.fill_between(xd, yd, ymax, color='tab:orange', alpha=0.2)
plt.scatter(*X[Y==0].T, s=8, alpha=0.5)
plt.scatter(*X[Y==1].T, s=8, alpha=0.5)
plt.xlim(xmin, xmax)
plt.ylim(ymin, ymax)
plt.ylabel(r'$x_2$')
plt.xlabel(r'$x_1$')
plt.show()
Let me generate a demo data.
from sklearn.preprocessing import PolynomialFeatures
import numpy as np
import matplotlib.pyplot as plt
import sklearn.linear_model
X = np.random.normal(size=(1000, 2))
Y = ((X[:,0] - X[:,1] + 0.4*X[:,0]*X[:,1] + 0.7*X[:,0]**2 - 0.8*X[:,1]**2 +
np.random.normal(scale=0.1, size=(1000,))) >= 0).astype(int)
flg = (Y > 0)
plt.scatter(X[flg,0], X[flg,1], alpha=0.3, marker="o")
plt.scatter(X[~flg,0], X[~flg,1], alpha=0.3, marker="x")
Apart from the randomness, the data looks something like this.
Train the model like you did.
poly = PolynomialFeatures(degree=2, interaction_only=False, include_bias=False)
X_poly = poly.fit_transform(X)
# Fit the data to a logistic regression model.
clf = sklearn.linear_model.LogisticRegression()
clf.fit(X_poly, Y)
print(poly.powers_)
#[[1 0]
# [0 1]
# [2 0]
# [1 1]
# [0 2]]
This tells us that the features are ordered as: x1, x2, x1^2, x1*x2, x2^2.
So collect the coefficients and the intercept and give them intuitive names.
w1, w2, w11, w12, w22 = clf.coef_[0]
b = clf.intercept_[0]
By definition, the decision boundary is a set of (x1, x2) such that the probability is even between the two classes. Mathematically, they are the solutions to:
b + w1*x1 + w2*x2 + w11*x1^2 + w12*x1*x2 + w22x2^2 = 0
If we fix x1, then this is a quadratic equation of x2, which we can solve analytically. The following function does this job.
def boundary(x1):
# returns x2 on the boundary for a given x1
# we solve square equation
# a x^2 + b x + c = 0
# --> x = (-b +- sqrt(b^2 - 4ac)) / 2a
a_ = w22
b_ = w2 + w12 * x1
c_ = b + w1*x1 + w11*x1**2
tmp = b_**2 - 4*a_*c_
if tmp < 0:
return None
ans = [(-b_ + tmp**0.5) / (2*a_), (-b_ - tmp**0.5) / (2*a_)]
ans.sort() # smaller first
return ans
# compute the boundaries
xs = np.linspace(X[:,0].min(), X[:,0].max(), num=100)
ys_1 = []
ys_2 = []
for x1 in xs:
tmp = boundary(x1)
if tmp is None:
ys_1.append(None)
ys_2.append(None)
else:
ys_1.append(tmp[0]) # smaller boundary
ys_2.append(tmp[1]) # larger boundary
Now we have the boundaries as data, we can visualize them easily.
flg = (Y > 0)
plt.scatter(X[flg,0], X[flg,1], alpha=0.3, marker="o")
plt.scatter(X[~flg,0], X[~flg,1], alpha=0.3, marker="x")
plt.plot(xs, ys_1, c="green")
plt.plot(xs, ys_2, c="gray")
# if ys contains None, need to skip them
plt.fill_between(xs, ys_1, ys_2, color='tab:blue', alpha=0.2)
plt.fill_between(xs, min(ys_1), ys_1, color='tab:orange', alpha=0.2)
plt.fill_between(xs, ys_2, max(ys_2), color='tab:orange', alpha=0.2)
Notice that the boundaries can be explicitly computed because the model is quadratic. Different approaches are needed for more general, complex classifiers.
An easier, generally applicable approach is to create dummy data containing various combination of variables and let the classifier predict, and plot with the color given by the predicted class.
xs = np.linspace(X[:,0].min(), X[:,0].max(), num=100)
ys = np.linspace(X[:,1].min(), X[:,1].max(), num=100)
newX = []
for x1 in xs:
for x2 in ys:
newX.append((x1, x2))
newX = np.array(newX)
p = clf.predict(poly.transform(newX))
flg = (Y > 0)
plt.scatter(X[flg,0], X[flg,1], alpha=0.3, marker="o")
plt.scatter(X[~flg,0], X[~flg,1], alpha=0.3, marker="x")
flg = (p > 0)
plt.scatter(newX[flg,0], newX[flg,1], alpha=0.02, c="tab:blue", marker="s", s=20)
plt.scatter(newX[~flg,0], newX[~flg,1], alpha=0.02, c="tab:orange", marker="s", s=20)
The output of your PolyCoefficients function is a 4th order polynomial made up of:
coeffs[0]*x^0 + coeffs[1]*x^1 + coeffs[2]*x^2 + coeffs[3]*x^3 + coeffs[i]*x^4
Instead what you need is a 2nd order polynomial (specified by your parameter degree = 2 inside the sklearn.preprocessing.PolynomialFeatures object), which is really the following:
(coeffs[0]*x1^1) + (coeffs[1]*x2^1) + (coeffs[2]*x1^2 + 2*coeffs[3]*x1*x2 + coeffs[4]*x2^2)
This formula is valid as soon as you are using two features x1 and x2, otherwise you would need to use x1, x2, ..., xN and all terms that derive from (x1 + x2 + ... + xN)^2.
You can find more details and examples here.
I need to plot the decision boundary for KNN without using sklearn. I have implemented the classifier but I am not able to plot the decision boundary. The plot should be as described in the book ElemStatLearn "The Elements of Statistical Learning: Data Mining, Inference, and Prediction. Second Edition" by Trevor Hastie & Robert Tibshirani& Jerome Friedman. The plot required is shown below:
KNN k=15 classifier Original
So, far I have been able to plot only the image below:
KNN k=15 classifier Plot produced so far
I have calculated the grid points and the predictions on those points. I also tried to find the points on the boundary if the predictions don't match with the prediction on the previous grid point and sorted the points. But when I plot the points, they don't look like the one that is required.
def get_grid(X):
# Creating grids for decision surface
## Define bounds of the surface
min1, max1 = X[:, 0].min() - 0.2, X[:, 0].max() + 0.2
min2, max2 = X[:, 1].min() - 0.2, X[:, 1].max() + 0.2
## Define the x and y points
x1grid = arange(min1, max1, 0.1)
x2grid = arange(min2, max2, 0.1)
## Create all of the lines and rows of the grid
xx, yy = meshgrid(x1grid, x2grid)
## Flatten each grid to a vector
r1, r2 = xx.flatten(), yy.flatten()
r1, r2 = r1.reshape((len(r1), 1)), r2.reshape((len(r2), 1))
## Horizontally stack vectors to create x1, x2 input for the model
grid_X = hstack((r1, r2))
return grid_X
X, y = data[:, :-1], data[:, -1].astype(int)
# Custom class defined
model = KNNClassifier(num_neighbors = 5)
model.fit(X, y)
y_pred = model.predict(X)
grid_X = get_grid(X)
grid_yhat = model.predict(grid_X)
boundary = []
for i in range(1, len(grid_X)):
if grid_yhat[i] != grid_yhat[i-1]:
boundary.append((grid_X[i] + grid_X[i-1]) * 0.5)
boundary_x = [b[0] for b in boundary]
boundary_y = [b[1] for b in boundary]
order = np.argsort(boundary_x)
boundary_x = np.array(boundary_x)[order]
boundary_y = np.array(boundary_y)[order]
def plot_decision_surface(X, y, boundary_X, boundary_y, grid_X, grid_yhat):
figure(figsize=(10,10))
axis('off')
# Plot the ground truth data points in the 2D feature space
X_pos, X_neg = split_X(X, y)
scatter(X_pos[:, 0], X_pos[:, 1], facecolors='none', edgecolors='orange', marker='o', linewidth=3, s=60)
scatter(X_neg[:, 0], X_neg[:, 1], facecolors='none', edgecolors='blue', marker='o', linewidth=3, s=60)
grid_pos, grid_neg = split_X(grid_X, grid_yhat)
# Plot and color the grid of x, y values with class
scatter(grid_pos[:, 0], grid_pos[:, 1], color='orange', marker='.', linewidth=0.05)
scatter(grid_neg[:, 0], grid_neg[:, 1], color='blue', marker='.', linewidth=0.05)
# Plot the decision boundary for the classification
scatter(boundary_X, boundary_y, color='k')
plot(boundary_X, boundary_y, color='k')
# Plot Info
show()
plot_decision_surface(X, y, boundary_X, boundary_y, grid_X, grid_yhat)
Failed attempt to plot the boundary is shown below:
Failed attempt to plot the boundary
I am trying to implement GP regression using Poisson likelihood.
I followed the example in GPy by doing
poisson_likelihood = GPy.likelihoods.Poisson()
laplace_inf = GPy.inference.latent_function_inference.Laplace()
m = GPy.core.GP(X=X, Y=Y, likelihood=poisson_likelihood, inference_method=laplace_inf, kernel=kernel)
m.optimize()
#for ploting
pred_points = np.linspace(300,800,1000)[:, None]
#Predictive GP for log intensity mean and variance
f_mean, f_var = m._raw_predict(pred_points)
f_upper, f_lower = f_mean + 2*np.sqrt(f_var), f_mean - 2.*np.sqrt(f_var)
pb.figure(figsize=(10, 13))
pb.plot(pred_points, np.exp(f_mean), color='blue', lw=2)
pb.fill_between(pred_points[:,0], np.exp(f_lower[:,0]), np.exp(f_upper[:,0]), color='blue', alpha=.1)
pb.errorbar(Xc.flatten(), Yc.flatten(), dyc, fmt='.', color='k',markersize=8,alpha=1.0, label='Data')
When I tried to do the same using GPflow, I implemented in the following way
poisson_likelihood = gpflow.likelihoods.Poisson()
m = gpflow.models.VGP((X, Y), kernel=k, likelihood=poisson_likelihood, num_latent_gps=1)
opt = gpflow.optimizers.Scipy()
opt_logs = opt.minimize(m.training_loss, m.trainable_variables, options=dict(maxiter=100))
#for ploting
xx = np.linspace(300, 800, 100).reshape(100, 1)
mean, var = m.predict_f(xx)
plt.plot(X, Y, "kx", mew=2)
plt.plot(xx, np.exp(mean), "C0", lw=2)
plt.fill_between(
xx[:, 0],
np.exp(mean[:, 0] - 1.96 * np.sqrt(var[:, 0])),
np.exp(mean[:, 0] + 1.96 * np.sqrt(var[:, 0])),
color="C0",
alpha=0.2,
)
When I implemented this using GP flow, the hyper parameters did not move from initialized values.
Also, I am getting very different results, am I doing something wrong?
Result with GPflow
Result with GPy
I try to visualise an SVM with code from https://scikit-learn.org/stable/auto_examples/svm/plot_iris_svc.html and want to be able to specify the colours for each class. For that I create a custom colormap using LinearSegmentedColormap.from_list. This method works as intended for 6 classes or fewer, but for >6 classes, the colours of the contour-plot are often wrong.
How can I specify the colours for >6 classes?
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from sklearn import svm, preprocessing
# create dummy data
use = (
((1,9),(.2,.1),'b'),
((4,0),(.2,.4),'r'),
((1,5),(.2,.3),'g'),
((6,3),(.3,.2),'m'),
((5,6),(.1,.2),'c'),
((4,2),(.1,.1),'xkcd:orange'),
((5,4),(.3,.2),'xkcd:peach'),
((3,1),(.1,.4),'xkcd:bright pink'),
((2,1),(.2,.1),'xkcd:crimson'),
)
sx = [np.random.normal(loc=u[0][0], scale=u[1][0], size=(20,)) for u in use]
sy = [np.random.normal(loc=u[0][1], scale=u[1][1], size=(20,)) for u in use]
X = np.array([[ix[i], iy[i]] for ix, iy in zip(sx, sy) for i in range(20)])
y = np.array([i for i in range(len(use)) for n in range(20)])
# scale the data
Scaler = preprocessing.StandardScaler().fit(X)
X = Scaler.transform(X)
# color map
cm = LinearSegmentedColormap.from_list('use', [u[2] for u in use], N=len(use))
def make_meshgrid(x, y, h=.02):
"""Create a mesh of points to plot in
Parameters
----------
x: data to base x-axis meshgrid on
y: data to base y-axis meshgrid on
h: stepsize for meshgrid, optional
Returns
-------
xx, yy : ndarray
"""
x_min, x_max = x.min() - 1, x.max() + 1
y_min, y_max = y.min() - 1, y.max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
return xx, yy
def plot_contours(ax, clf, xx, yy, **params):
"""Plot the decision boundaries for a classifier.
Parameters
----------
ax: matplotlib axes object
clf: a classifier
xx: meshgrid ndarray
yy: meshgrid ndarray
params: dictionary of params to pass to contourf, optional
"""
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z/Z.max()
Z = Z.reshape(xx.shape)
out = ax.contourf(xx, yy, Z, **params)
return out
# we create an instance of SVM and fit out data.
C = 1.0 # SVM regularization parameter
models = (svm.SVC(kernel='linear', C=C, decision_function_shape='ovo'),
svm.SVC(kernel='sigmoid', C=C, decision_function_shape='ovo'),
svm.SVC(kernel='rbf', gamma=0.7, C=C, decision_function_shape='ovo'),
svm.SVC(kernel='poly', degree=3, gamma='auto', C=C, decision_function_shape='ovo'))
models = (clf.fit(X, y) for clf in models)
# title for the plots
titles = ('SVC with linear kernel',
'SVC with sigmoid kernel',
'SVC with RBF kernel',
'SVC with polynomial (degree 3) kernel')
# Set-up 2x2 grid for plotting.
fig, sub = plt.subplots(2, 2)
# plt.subplots_adjust(wspace=0.4, hspace=0.4)
X0, X1 = X[:, 0], X[:, 1]
xx, yy = make_meshgrid(X0, X1)
for clf, title, ax in zip(models, titles, sub.flatten()):
plot_contours(ax, clf, xx, yy, cmap=cm, alpha=0.7)
ax.scatter(X0, X1, c=y, cmap=cm, s=20, edgecolors='k')
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
ax.set_xticks(())
ax.set_yticks(())
ax.set_title(title)
plt.show()
One has to specify the levels as a list-like object if one wants to enforce colours like this. The list of levels should contain $n+1$ entries with the borders of the classes, where $n$ is the number of classes. So with the classes equalling range(len(use)) this should be [i - .5 for i in range(len(use) + 1)], hence one can use the following to get the desired output:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from sklearn import svm, preprocessing
# create dummy data
use = (
((1,9),(.2,.1),'b'),
((4,0),(.2,.4),'r'),
((1,5),(.2,.3),'g'),
((6,3),(.3,.2),'m'),
((5,6),(.1,.2),'c'),
((4,2),(.1,.1),'xkcd:orange'),
((5,4),(.3,.2),'xkcd:peach'),
((3,1),(.1,.4),'xkcd:bright pink'),
((2,1),(.2,.1),'xkcd:crimson'),
)
sx = [np.random.normal(loc=u[0][0], scale=u[1][0], size=(20,)) for u in use]
sy = [np.random.normal(loc=u[0][1], scale=u[1][1], size=(20,)) for u in use]
X = np.array([[ix[i], iy[i]] for ix, iy in zip(sx, sy) for i in range(20)])
y = np.array([i for i in range(len(use)) for n in range(20)])
# scale the data
Scaler = preprocessing.StandardScaler().fit(X)
X = Scaler.transform(X)
# color map
cm = LinearSegmentedColormap.from_list('use', [u[2] for u in use], N=len(use))
def make_meshgrid(x, y, h=.02):
"""Create a mesh of points to plot in
Parameters
----------
x: data to base x-axis meshgrid on
y: data to base y-axis meshgrid on
h: stepsize for meshgrid, optional
Returns
-------
xx, yy : ndarray
"""
x_min, x_max = x.min() - 1, x.max() + 1
y_min, y_max = y.min() - 1, y.max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
return xx, yy
def plot_contours(ax, clf, xx, yy, **params):
"""Plot the decision boundaries for a classifier.
Parameters
----------
ax: matplotlib axes object
clf: a classifier
xx: meshgrid ndarray
yy: meshgrid ndarray
params: dictionary of params to pass to contourf, optional
"""
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
out = ax.contourf(xx, yy, Z, **params)
return out
# we create an instance of SVM and fit out data.
C = 1.0 # SVM regularization parameter
models = (svm.SVC(kernel='linear', C=C, decision_function_shape='ovo'),
svm.SVC(kernel='sigmoid', C=C, decision_function_shape='ovo'),
svm.SVC(kernel='rbf', gamma=0.7, C=C, decision_function_shape='ovo'),
svm.SVC(kernel='poly', degree=3, gamma='auto', C=C, decision_function_shape='ovo'))
models = (clf.fit(X, y) for clf in models)
# title for the plots
titles = ('SVC with linear kernel',
'SVC with sigmoid kernel',
'SVC with RBF kernel',
'SVC with polynomial (degree 3) kernel')
# Set-up 2x2 grid for plotting.
fig, sub = plt.subplots(2, 2)
# plt.subplots_adjust(wspace=0.4, hspace=0.4)
X0, X1 = X[:, 0], X[:, 1]
xx, yy = make_meshgrid(X0, X1)
for clf, title, ax in zip(models, titles, sub.flatten()):
plot_contours(ax, clf, xx, yy,
cmap=cm, alpha=0.7,
levels=[i - .5 for i in range(len(use) + 1)])
ax.scatter(X0, X1, c=y, cmap=cm, s=20, edgecolors='k')
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
ax.set_xticks(())
ax.set_yticks(())
ax.set_title(title)
plt.show()
I am trying to plot the hyperplane for the model I trained with LinearSVC and sklearn. Note that I am working with natural languages; before fitting the model I extracted features with CountVectorizer and TfidfTransformer.
Here the classifier:
from sklearn.svm import LinearSVC
from sklearn import svm
clf = LinearSVC(C=0.2).fit(X_train_tf, y_train)
Then I tried to plot as suggested on the Scikit-learn website:
# get the separating hyperplane
w = clf.coef_[0]
a = -w[0] / w[1]
xx = np.linspace(-5, 5)
yy = a * xx - (clf.intercept_[0]) / w[1]
# plot the parallels to the separating hyperplane that pass through the
# support vectors
b = clf.support_vectors_[0]
yy_down = a * xx + (b[1] - a * b[0])
b = clf.support_vectors_[-1]
yy_up = a * xx + (b[1] - a * b[0])
# plot the line, the points, and the nearest vectors to the plane
plt.plot(xx, yy, 'k-')
plt.plot(xx, yy_down, 'k--')
plt.plot(xx, yy_up, 'k--')
plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1],
s=80, facecolors='none')
plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired)
plt.axis('tight')
plt.show()
This example uses svm.SVC(kernel='linear'), while my classifier is LinearSVC. Therefore, I get this error:
AttributeError Traceback (most recent call last)
<ipython-input-39-6e231c530d87> in <module>()
7 # plot the parallels to the separating hyperplane that pass through the
8 # support vectors
----> 9 b = clf.support_vectors_[0]
1 yy_down = a * xx + (b[1] - a * b[0])
11 b = clf.support_vectors_[-1]
AttributeError: 'LinearSVC' object has no attribute 'support_vectors_'
How can I successfully plot the hyperplan of my LinearSVC classifier?
What about leaving the support_ out, which is not defined for a LinearSVC?
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
np.random.seed(0)
X = np.r_[np.random.randn(20, 2) - [2, 2], np.random.randn(20, 2) + [2, 2]]
Y = [0] * 20 + [1] * 20
fig, ax = plt.subplots()
clf2 = svm.LinearSVC(C=1).fit(X, Y)
# get the separating hyperplane
w = clf2.coef_[0]
a = -w[0] / w[1]
xx = np.linspace(-5, 5)
yy = a * xx - (clf2.intercept_[0]) / w[1]
# create a mesh to plot in
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx2, yy2 = np.meshgrid(np.arange(x_min, x_max, .2),
np.arange(y_min, y_max, .2))
Z = clf2.predict(np.c_[xx2.ravel(), yy2.ravel()])
Z = Z.reshape(xx2.shape)
ax.contourf(xx2, yy2, Z, cmap=plt.cm.coolwarm, alpha=0.3)
ax.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.coolwarm, s=25)
ax.plot(xx,yy)
ax.axis([x_min, x_max,y_min, y_max])
plt.show()