How to plot a Python 3-dimensional level set? - python

I have some trouble plotting the image which is in my head.
I want to visualize the Kernel-trick with Support Vector Machines. So I made some two-dimensional data consisting of two circles (an inner and an outer circle) which should be separated by a hyperplane. Obviously this isn't possible in two dimensions - so I transformed them into 3D. Let n be the number of samples. Now I have an (n,3)-array (3 columns, n rows) X of data points and an (n,1)-array y with labels. Using sklearn I get the linear classifier via
clf = svm.SVC(kernel='linear', C=1000)
clf.fit(X, y)
I already plot the data points as scatter plot via
plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=plt.cm.Paired)
Now I want to plot the separating hyperplane as surface plot. My problem here is the missing explicit representation of the hyperplane because the decision function only yields an implicit hyperplane via decision_function = 0. Therefore I need to plot the level set (of level 0) of an 4-dimensional object.
Since I'm not a python expert I would appreciate if somebody could help me out! And I know that this isn't really the "style" of using a SVM but I need this image as an illustration for my thesis.
Edit: my current "code"
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.datasets import make_blobs, make_circles
from tikzplotlib import save as tikz_save
plt.close('all')
# we create 50 separable points
#X, y = make_blobs(n_samples=40, centers=2, random_state=6)
X, y = make_circles(n_samples=50, factor=0.5, random_state=4, noise=.05)
X2, y2 = make_circles(n_samples=50, factor=0.2, random_state=5, noise=.08)
X = np.append(X,X2, axis=0)
y = np.append(y,y2, axis=0)
# shifte X to [0,2]x[0,2]
X = np.array([[item[0] + 1, item[1] + 1] for item in X])
X[X<0] = 0.01
clf = svm.SVC(kernel='rbf', C=1000)
clf.fit(X, y)
plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=plt.cm.Paired)
# plot the decision function
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
# create grid to evaluate model
xx = np.linspace(xlim[0], xlim[1], 30)
yy = np.linspace(ylim[0], ylim[1], 30)
YY, XX = np.meshgrid(yy, xx)
xy = np.vstack([XX.ravel(), YY.ravel()]).T
Z = clf.decision_function(xy).reshape(XX.shape)
# plot decision boundary and margins
ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5, linestyles=['--','-','--'])
# plot support vectors
ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=100,
linewidth=1, facecolors='none', edgecolors='k')
################## KERNEL TRICK - 3D ##################
trans_X = np.array([[item[0]**2, item[1]**2, np.sqrt(2*item[0]*item[1])] for item in X])
fig = plt.figure()
ax = plt.axes(projection ="3d")
# creating scatter plot
ax.scatter3D(trans_X[:,0],trans_X[:,1],trans_X[:,2], c = y, cmap=plt.cm.Paired)
clf2 = svm.SVC(kernel='linear', C=1000)
clf2.fit(trans_X, y)
ax = plt.gca(projection='3d')
xlim = ax.get_xlim()
ylim = ax.get_ylim()
zlim = ax.get_zlim()
### from here i don't know what to do ###
xx = np.linspace(xlim[0], xlim[1], 3)
yy = np.linspace(ylim[0], ylim[1], 3)
zz = np.linspace(zlim[0], zlim[1], 3)
ZZ, YY, XX = np.meshgrid(zz, yy, xx)
xyz = np.vstack([XX.ravel(), YY.ravel(), ZZ.ravel()]).T
Z = clf2.decision_function(xyz).reshape(XX.shape)
#ax.contour(XX, YY, ZZ, Z, colors='k', levels=[-1, 0, 1], alpha=0.5, linestyles=['--','-','--'])
Desired Output
I want to get something like that.
In general I want to reconstruct what they do in this article, especially "Non-linear transformations".

Part of your question is addressed in this question on linear-kernel SVM. It's a partial answer, because only linear kernels can be represented this way, i.e. thanks to hyperplane coordinates accessible via the estimator when using linear kernel.
Another solution is to find the isosurface with marching_cubes
This solution involves installing the scikit-image toolkit (https://scikit-image.org) which allows to find an isosurface of a given value (here, I considered 0 since it represents the distance to the hyperplane) from the mesh grid of the 3D coordinates.
In the code below (copied from yours), I implement the idea for any kernel (in the example, I used the RBF kernel), and the output is shown beneath the code. Please consider my footnote about 3D plotting with matplotlib, which may be another issue in your case.
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from skimage import measure
from sklearn.datasets import make_blobs, make_circles
from tikzplotlib import save as tikz_save
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
plt.close('all')
# we create 50 separable points
#X, y = make_blobs(n_samples=40, centers=2, random_state=6)
X, y = make_circles(n_samples=50, factor=0.5, random_state=4, noise=.05)
X2, y2 = make_circles(n_samples=50, factor=0.2, random_state=5, noise=.08)
X = np.append(X,X2, axis=0)
y = np.append(y,y2, axis=0)
# shifte X to [0,2]x[0,2]
X = np.array([[item[0] + 1, item[1] + 1] for item in X])
X[X<0] = 0.01
clf = svm.SVC(kernel='rbf', C=1000)
clf.fit(X, y)
plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=plt.cm.Paired)
# plot the decision function
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
# create grid to evaluate model
xx = np.linspace(xlim[0], xlim[1], 30)
yy = np.linspace(ylim[0], ylim[1], 30)
YY, XX = np.meshgrid(yy, xx)
xy = np.vstack([XX.ravel(), YY.ravel()]).T
Z = clf.decision_function(xy).reshape(XX.shape)
# plot decision boundary and margins
ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5, linestyles=['--','-','--'])
# plot support vectors
ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=100,
linewidth=1, facecolors='none', edgecolors='k')
################## KERNEL TRICK - 3D ##################
trans_X = np.array([[item[0]**2, item[1]**2, np.sqrt(2*item[0]*item[1])] for item in X])
fig = plt.figure()
ax = plt.axes(projection ="3d")
# creating scatter plot
ax.scatter3D(trans_X[:,0],trans_X[:,1],trans_X[:,2], c = y, cmap=plt.cm.Paired)
clf2 = svm.SVC(kernel='rbf', C=1000)
clf2.fit(trans_X, y)
z = lambda x,y: (-clf2.intercept_[0]-clf2.coef_[0][0]*x-clf2.coef_[0][1]*y) / clf2.coef_[0][2]
ax = plt.gca(projection='3d')
xlim = ax.get_xlim()
ylim = ax.get_ylim()
zlim = ax.get_zlim()
### from here i don't know what to do ###
xx = np.linspace(xlim[0], xlim[1], 50)
yy = np.linspace(ylim[0], ylim[1], 50)
zz = np.linspace(zlim[0], zlim[1], 50)
XX ,YY, ZZ = np.meshgrid(xx, yy, zz)
xyz = np.vstack([XX.ravel(), YY.ravel(), ZZ.ravel()]).T
Z = clf2.decision_function(xyz).reshape(XX.shape)
# find isosurface with marching cubes
dx = xx[1] - xx[0]
dy = yy[1] - yy[0]
dz = zz[1] - zz[0]
verts, faces, _, _ = measure.marching_cubes_lewiner(Z, 0, spacing=(1, 1, 1), step_size=2)
verts *= np.array([dx, dy, dz])
verts -= np.array([xlim[0], ylim[0], zlim[0]])
# add as Poly3DCollection
mesh = Poly3DCollection(verts[faces])
mesh.set_facecolor('g')
mesh.set_edgecolor('none')
mesh.set_alpha(0.3)
ax.add_collection3d(mesh)
ax.view_init(20, -45)
plt.savefig('kerneltrick')
Running the code produces the following image with Matplotlib, where the green semi-transparent surface represents the non-linear decision boundary.
Footnote: 3D plotting with matplotlib
Note that Matplotlib 3D is not able to manage the "depth" of objects in some cases, because it can be in conflict with the zorder of this object. This is the reason why sometimes the hyperplane look to be plotted "on top of" the points, even it should be "behind". This issue is a known bug discussed in the matplotlib 3d documentation and in this answer.
If you want to have better rendering results, you may want to use Mayavi, as recommended by the Matplotlib developers, or any other 3D Python plotting library.

Related

How to plot my own logistic regression decision boundaries and SKlearn's ones on the same figure

I have an assignment in which I need to compare my own multi-class logistic regression and the built-in SKlearn one.
As part of it, I need to plot the decision boundaries of each, on the same figure (for 2,3, and 4 classes separately).
This is my model's decision boundaries for 3 classes:
Made with this code:
x1_min, x1_max = X[:,0].min()-.5, X[:,0].max()+.5
x2_min, x2_max = X[:,1].min()-.5, X[:,1].max()+.5
xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max), np.linspace(x2_min, x2_max))
grid = np.c_[xx1.ravel(), xx2.ravel()]
for i in range(len(ws)):
probs = ol.predict_prob(grid, ws[i]).reshape(xx1.shape)
plt.contour(xx1, xx2, probs, [0.5], linewidths=1, colors='green')
where
ol - is my Own Linear regression
ws - the current weights
That's how I tried to plot the Sklearn boundaries:
for i in range(len(clf.coef_)):
w = clf.coef_[i]
a = -w[0] / w[1]
xx = np.linspace(x1_min, x1_max)
yy = a * xx - (clf.intercept_[0]) / w[1]
plt.plot(xx, yy, 'k-')
Resulting
I understand that it's due to the 1dim vs 2dim grids, but I can't understand how to solve it.
I also tried to use the built-in DecisionBoundaryDisplay but I couldn't figure out how to plot it with my boundaries + it doesn't plot only the lines but also the whole background is painted in the corresponding color.
A couple fixes:
Change clf.intercept_[1] to clf.intercept_[i]
If the xlimits and ylimits in the plot look strange, you can constrain them.
ax.set_xlim([x1_min, x1_max])
ax.set_ylim([x2_min, x2_max])
MRE:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import make_blobs
from sklearn.linear_model import LogisticRegression
X, y = make_blobs(n_features=2, centers=3, random_state=42)
fig, ax = plt.subplots(1, 2)
x1_min, x1_max = X[:,0].min()-.5, X[:,0].max()+.5
x2_min, x2_max = X[:,1].min()-.5, X[:,1].max()+.5
def draw_coef_lines(clf, X, y, ax, title):
for i in range(len(clf.coef_)):
w = clf.coef_[i]
a = -w[0] / w[1]
xx = np.linspace(x1_min, x1_max)
yy = a * xx - (clf.intercept_[i]) / w[1]
ax.plot(xx, yy, 'k-')
ax.scatter(X[:, 0], X[:, 1], c=y)
ax.set_xlim([x1_min, x1_max])
ax.set_ylim([x2_min, x2_max])
ax.set_title(title)
clf1 = LogisticRegression().fit(X, y)
clf2 = LogisticRegression(multi_class="ovr").fit(X, y)
draw_coef_lines(clf1, X, y, ax[0], "Multinomial")
draw_coef_lines(clf2, X, y, ax[1], "OneVsRest")
plt.show()

How to have multiple categorical markers on a scatterplot

I want to train logistic regression model, and after that create a plot which shows boundary lines, but in specific way.
My work so far
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from matplotlib.colors import ListedColormap
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, :2] # we only take the first two features.
Y = iris.target
logreg = LogisticRegression(C=1e5)
# Create an instance of Logistic Regression Classifier and fit the data.
logreg.fit(X, Y)
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, x_max]x[y_min, y_max].
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = .02 # step size in the mesh
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure(1, figsize=(4, 3))
plt.pcolormesh(xx, yy, Z, cmap=cmap_light)
# Plot also the training points
plt.scatter(X[:, 0], X[:,1], c=Y, marker='x',edgecolors='k', cmap=cmap_bold)
plt.xlabel('Sepal length'),
plt.ylabel('Sepal width')
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.xticks(())
plt.yticks(())
plt.show()
However I find it very unreadable. I want to have other markers for each classification and legend in left upper corner. Just like in the image below :
Do you have any idea how can I change that ? I played with marker ='s', marker='x', but those change all points on scatter plot, instead of one specific classification.
Since you are plotting with categorical values, you can just plot each class separately:
# Replace this
# plt.scatter(X[:, 0], X[:,1], c=Y, marker='x',edgecolors='k', cmap=cmap_bold)
# with this
markers = 'sxo'
for m,i in zip(markers,np.unique(Y)):
mask = Y==i
plt.scatter(X[mask, 0], X[mask,1], c=cmap_bold.colors[i],
marker=m,edgecolors='k', label=i)
plt.legend()
Output:
I find it easier to create a dataframe from X & Y, and then plot the data points with seaborn.scatterplot.
seaborn is a high-level api for matplotlib
As shown in How to extract the boundary values from k-nearest neighbors predict, the dataframe columns can be used to specify all data for fitting, and x and y min and max.
load and setup the data
import numpy as np
import matplotlib.pyplot as plt # version 3.3.1
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from matplotlib.colors import ListedColormap
import seaborn # versuin 0.11.0
import pandas # version 1.1.3
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
# seaborn.scatterplot palette parameter takes a list
palette = ['#FF0000', '#00FF00', '#0000FF']
# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, :2] # we only take the first two features.
Y = iris.target
# add X & Y to dataframe
df = pd.DataFrame(X, columns=iris.feature_names[:2])
df['label'] = Y
# map the number values to the species name and add it to the dataframe
species_map = dict(zip(range(3), iris.target_names))
df['species'] = df.label.map(species_map)
logreg = LogisticRegression(C=1e5)
# Create an instance of Logistic Regression Classifier and fit the data.
logreg.fit(X, Y)
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, x_max]x[y_min, y_max].
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = .02 # step size in the mesh
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plot the data
plt.figure(1, figsize=(8, 6))
plt.pcolormesh(xx, yy, Z, cmap=cmap_light, shading='auto')
# Plot also the training points
# add data points using seaborn
sns.scatterplot(data=df, x='sepal length (cm)', y='sepal width (cm)', hue='species',
style='species', edgecolor='k', alpha=0.5, palette=palette, s=70)
# change legend location
plt.legend(title='Species', loc=2)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
# plt.xticks(())
# plt.yticks(())
plt.show()
alpha=0.5 is used with sns.scatterplot, to show that some values of 'versicolor' and 'virginica' overlap.
If the species label is desired for the legend, instead of the name, change hue='species' to hue='label'.
You need to change a single call to plt.scatter to one call for each marker type, since matplotlib does not allow passing multiple marker types as it does with color.
The plot code becomes something like
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure(1, figsize=(4, 3))
plt.pcolormesh(xx, yy, Z, cmap=cmap_light)
# Plot also the training points
X0 = X[Y==0]
X1 = X[Y==1]
X2 = X[Y==2]
Y0 = Y[Y==0]
Y1 = Y[Y==1]
Y2 = Y[Y==2]
plt.scatter(X0[:, 0], X0[:,1], marker='s',color="red")
plt.scatter(X1[:, 0], X1[:,1], marker='x',color="blue")
plt.scatter(X2[:, 0], X2[:,1], marker='o',color="green")
plt.xlabel('Sepal length'),
plt.ylabel('Sepal width')
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.xticks(())
plt.yticks(())
plt.show()
where you individually set the marker type and color of each class. You can also create a list for the marker type and another for the color and use a loop.

Problem in plotting support vector contours in Jupyter

I am following this script from ScikitLearn to plot the margins and the hyperplane for SVC. I'm executing each line in a different cell and am following the same order, i.e. cell 1 for line 1, cell 2 for line 2, and so on. When I finally get to the plotting part, say on cell k(which is the last cell in my notebook that holds the last 7 lines of the code), I get UserWarning: No contour levels were found within the data range. and no plot as mentioned in the link given above shows up.
However, when I execute all the lines in the same cell, the code works as expected. What am I doing wrong here?
The code:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.datasets import make_blobs
# we create 40 separable points
X, y = make_blobs(n_samples=40, centers=2, random_state=6)
# fit the model, don't regularize for illustration purposes
clf = svm.SVC(kernel='linear', C=1000)
clf.fit(X, y)
plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=plt.cm.Paired)
# plot the decision function
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
# create grid to evaluate model
xx = np.linspace(xlim[0], xlim[1], 30)
yy = np.linspace(ylim[0], ylim[1], 30)
YY, XX = np.meshgrid(yy, xx)
xy = np.vstack([XX.ravel(), YY.ravel()]).T
Z = clf.decision_function(xy).reshape(XX.shape)
# plot decision boundary and margins
ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5,
linestyles=['--', '-', '--'])
# plot support vectors
ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=100,
linewidth=1, facecolors='none', edgecolors='k')
plt.show()

Issues with plotting the decision boundaries for the Iris Dataset with KNearestNeighbors

I am trying to plot the decision boundaries for the KNeighborsClassifier in Scikit-learn for the Iris dataset. But, the graphs that I am getting don't make much sense to me.
I would expect the boundary between the dark blue and light blue lines go in the direction for the green line that I drew on the picture.
The code that I used to generate it can be found below. It was inspired by Plot the decision boundaries of a VotingClassifier.
What am I missing or not understanding?
# -*- coding: utf-8 -*-
"""
Created on Sat May 30 14:22:05 2020
#author: KamKam
Plotting the decision boundaries for KNearestNeighbours.
"""
# Import required modules.
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
from matplotlib.colors import ListedColormap
n_neighbors = [1, 3, 9]
# Load the iris dataset.
iris = datasets.load_iris()
X = iris.data[:, 2:4] # Slice features to only contain
y = iris.target
# Set up the data such that it can be inserting into one plot.
# Count the number of each target that are in the dataset.
ylen = y.shape[0]
unique, counts = np.unique(y, return_counts=True)
# Create empty arrays for each of the targets. We only require them to have 2
# features because we are only plotting in 2D.
X0 = np.zeros((counts[0], 2))
X1 = np.zeros((counts[1], 2))
X2 = np.zeros((counts[2], 2))
countX0, countX1, countX2 = 0, 0, 0 #Initialize place holder for interating
# though and adding data to the X arrays.
# Insert data into to newly created arrays.
for i in range(ylen):
if y[i] == 0:
X0[countX0, :] = X[i, :]
countX0 += 1
elif y[i] == 1:
X1[countX1, :] = X[i, :]
countX1 += 1
else:
X2[countX2, :] = X[i, :]
countX2 += 1
h = 0.02 # Step size of the mesh.
plotCount = 0 # Counter for each of the plots that we will be creating.
# Create colour maps.
cmap_light = ListedColormap(['orange', 'cyan', 'cornflowerblue'])
cmap_bold = ListedColormap(['darkorange', 'c', 'darkblue'])
# Initialize plotting. Close all the currently open plots, initialize the
# figure and subplot commands
plt.close('all')
fig, axs = plt.subplots(1, 3)
axs = axs.ravel()
for j in n_neighbors:
# Create the instance od Neighbours classifier and fit the data.
knn = KNeighborsClassifier(n_neighbors=j)
knn.fit(X, y)
# Plot the decision boundary. For that, we will assign a color for each
# point in the mesh [x_min, x_max]x[y_min, y_max]
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
axs[plotCount].pcolormesh(xx, yy, Z, cmap=cmap_bold)
# Plot the training points.
axs[plotCount].scatter(X0[:,0], X0[:,1], c='k', marker='o',
label=iris.target_names[0])
axs[plotCount].scatter(X1[:,0], X1[:,1], c='r', marker='o',
label=iris.target_names[1])
axs[plotCount].scatter(X1[:,0], X2[:,1], c='y', marker='o',
label=iris.target_names[2])
axs[plotCount].set_xlabel('Petal Width')
axs[plotCount].set_ylabel('Petal Length')
axs[plotCount].legend()
axs[plotCount].set_title('n_neighbours = ' + str(j))
plotCount += 1
fig.suptitle('Petal Width vs Length')
plt.show()
The introduction of the arrays X0, X1 and X2 seems to complicate things too much and makes it hard to turn the code more pythonic.
Some things that should be avoided in Python:
the superfluous variable plotCount only to iterate through the axes, can be left out and replaced by for j, ax in zip(n_neighbors, axs).
The contents for X0, X1 and ``X2can be obtained directly viaX[:, 0][y == y_val], X[:, 1][y == y_val]` which moreover allows to write the scatter plots easily in one loop. You can read more about numpy's advanced indexing in this doc.
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
from matplotlib.colors import ListedColormap
n_neighbors = [1, 3, 9]
# Load the iris dataset.
iris = datasets.load_iris()
X = iris.data[:, 2:4] # Slice features to only contain
y = iris.target
# Set up the data such that it can be inserting into one plot.
# Count the number of each target that are in the dataset.
ylen = y.shape[0]
unique, counts = np.unique(y, return_counts=True)
h = 0.02 # Step size of the mesh.
# Create colour maps.
#cmap_light = ListedColormap(['orange', 'cyan', 'cornflowerblue'])
cmap_bold = ListedColormap(['darkorange', 'c', 'darkblue'])
# Initialize plotting. Close all the currently open plots, initialize the
# figure and subplot commands
plt.close('all')
fig, axs = plt.subplots(1, 3)
axs = axs.ravel()
for j, ax in zip(n_neighbors, axs):
# Create the instance od Neighbours classifier and fit the data.
knn = KNeighborsClassifier(n_neighbors=j)
knn.fit(X, y)
# Plot the decision boundary. For that, we will assign a color for each
# point in the mesh [x_min, x_max]x[y_min, y_max]
x_min, x_max = X[:, 0].min() - h, X[:, 0].max() + h
y_min, y_max = X[:, 1].min() - h, X[:, 1].max() + h
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
ax.pcolormesh(xx, yy, Z, cmap=cmap_bold)
# Plot the training points.
for y_val, (color, name) in enumerate(zip(['k', 'r', 'y'], iris.target_names)):
ax.scatter(X[:, 0][y == y_val], X[:, 1][y == y_val], c=color, marker='o', label=name)
ax.set_xlabel('Petal Width')
ax.set_ylabel('Petal Length')
ax.legend()
ax.set_title(f'n_neighbours = {j}')
fig.suptitle('Petal Width vs Length')
plt.show()

How to plot SVM decision boundary in sklearn Python?

Using SVM with sklearn library, I would like to plot the data with each labels representing its color. I don't want to color the points but filling area with colors.
I have now :
d_pred, d_train_std, d_test_std, l_train, l_test
d_pred are the labels predicted.
I would plot d_pred with d_train_std with shape : (70000,2) where X-axis are the first column and Y-Axis the second column.
Thank you.
You cannot visualize the decision surface for a lot of features. This is because the dimensions will be too many and there is no way to visualize an N-dimensional surface.
However, you can use 2 features and plot nice decision surfaces as follows.
I have also written an article about this here:
https://towardsdatascience.com/support-vector-machines-svm-clearly-explained-a-python-tutorial-for-classification-problems-29c539f3ad8?source=friends_link&sk=80f72ab272550d76a0cc3730d7c8af35
Case 1: 2D plot for 2 features and using the iris dataset
from sklearn.svm import SVC
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets
iris = datasets.load_iris()
X = iris.data[:, :2] # we only take the first two features.
y = iris.target
def make_meshgrid(x, y, h=.02):
x_min, x_max = x.min() - 1, x.max() + 1
y_min, y_max = y.min() - 1, y.max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
return xx, yy
def plot_contours(ax, clf, xx, yy, **params):
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
out = ax.contourf(xx, yy, Z, **params)
return out
model = svm.SVC(kernel='linear')
clf = model.fit(X, y)
fig, ax = plt.subplots()
# title for the plots
title = ('Decision surface of linear SVC ')
# Set-up grid for plotting.
X0, X1 = X[:, 0], X[:, 1]
xx, yy = make_meshgrid(X0, X1)
plot_contours(ax, clf, xx, yy, cmap=plt.cm.coolwarm, alpha=0.8)
ax.scatter(X0, X1, c=y, cmap=plt.cm.coolwarm, s=20, edgecolors='k')
ax.set_ylabel('y label here')
ax.set_xlabel('x label here')
ax.set_xticks(())
ax.set_yticks(())
ax.set_title(title)
ax.legend()
plt.show()
Case 2: 3D plot for 3 features and using the iris dataset
from sklearn.svm import SVC
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets
from mpl_toolkits.mplot3d import Axes3D
iris = datasets.load_iris()
X = iris.data[:, :3] # we only take the first three features.
Y = iris.target
#make it binary classification problem
X = X[np.logical_or(Y==0,Y==1)]
Y = Y[np.logical_or(Y==0,Y==1)]
model = svm.SVC(kernel='linear')
clf = model.fit(X, Y)
# The equation of the separating plane is given by all x so that np.dot(svc.coef_[0], x) + b = 0.
# Solve for w3 (z)
z = lambda x,y: (-clf.intercept_[0]-clf.coef_[0][0]*x -clf.coef_[0][1]*y) / clf.coef_[0][2]
tmp = np.linspace(-5,5,30)
x,y = np.meshgrid(tmp,tmp)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot3D(X[Y==0,0], X[Y==0,1], X[Y==0,2],'ob')
ax.plot3D(X[Y==1,0], X[Y==1,1], X[Y==1,2],'sr')
ax.plot_surface(x, y, z(x,y))
ax.view_init(30, 60)
plt.show()
It can be difficult to get the function in 3D. An easy way to get a visualization is to get a large amount of points that cover your point space and run them through your learned function (my_model.predict), keep the points that hit inside the function, and visualize them. The more you add the more defined the boundary will be.
Here's my code that does what #Christian Tuchez describes:
outputs = my_clf.predict(1_test)
hits = []
for i in range(outputs.size):
if outputs[i] == 1:
hits.append(i) # save the index where it's 1
This saves the index of all the points that hit in the function (saved in the "hits" list). You can probably accomplish this without a loop, I just found it easiest for me.
Then to display just those points, you'd write something like this:
ax.scatter(1_test[hits[:], 0], 1_test[hits[:], 1], 1_test[hits[:], 2], c="cyan", s=2, edgecolor=None)

Categories

Resources