I have some trouble plotting the image which is in my head.
I want to visualize the Kernel-trick with Support Vector Machines. So I made some two-dimensional data consisting of two circles (an inner and an outer circle) which should be separated by a hyperplane. Obviously this isn't possible in two dimensions - so I transformed them into 3D. Let n be the number of samples. Now I have an (n,3)-array (3 columns, n rows) X of data points and an (n,1)-array y with labels. Using sklearn I get the linear classifier via
clf = svm.SVC(kernel='linear', C=1000)
clf.fit(X, y)
I already plot the data points as scatter plot via
plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=plt.cm.Paired)
Now I want to plot the separating hyperplane as surface plot. My problem here is the missing explicit representation of the hyperplane because the decision function only yields an implicit hyperplane via decision_function = 0. Therefore I need to plot the level set (of level 0) of an 4-dimensional object.
Since I'm not a python expert I would appreciate if somebody could help me out! And I know that this isn't really the "style" of using a SVM but I need this image as an illustration for my thesis.
Edit: my current "code"
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.datasets import make_blobs, make_circles
from tikzplotlib import save as tikz_save
plt.close('all')
# we create 50 separable points
#X, y = make_blobs(n_samples=40, centers=2, random_state=6)
X, y = make_circles(n_samples=50, factor=0.5, random_state=4, noise=.05)
X2, y2 = make_circles(n_samples=50, factor=0.2, random_state=5, noise=.08)
X = np.append(X,X2, axis=0)
y = np.append(y,y2, axis=0)
# shifte X to [0,2]x[0,2]
X = np.array([[item[0] + 1, item[1] + 1] for item in X])
X[X<0] = 0.01
clf = svm.SVC(kernel='rbf', C=1000)
clf.fit(X, y)
plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=plt.cm.Paired)
# plot the decision function
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
# create grid to evaluate model
xx = np.linspace(xlim[0], xlim[1], 30)
yy = np.linspace(ylim[0], ylim[1], 30)
YY, XX = np.meshgrid(yy, xx)
xy = np.vstack([XX.ravel(), YY.ravel()]).T
Z = clf.decision_function(xy).reshape(XX.shape)
# plot decision boundary and margins
ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5, linestyles=['--','-','--'])
# plot support vectors
ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=100,
linewidth=1, facecolors='none', edgecolors='k')
################## KERNEL TRICK - 3D ##################
trans_X = np.array([[item[0]**2, item[1]**2, np.sqrt(2*item[0]*item[1])] for item in X])
fig = plt.figure()
ax = plt.axes(projection ="3d")
# creating scatter plot
ax.scatter3D(trans_X[:,0],trans_X[:,1],trans_X[:,2], c = y, cmap=plt.cm.Paired)
clf2 = svm.SVC(kernel='linear', C=1000)
clf2.fit(trans_X, y)
ax = plt.gca(projection='3d')
xlim = ax.get_xlim()
ylim = ax.get_ylim()
zlim = ax.get_zlim()
### from here i don't know what to do ###
xx = np.linspace(xlim[0], xlim[1], 3)
yy = np.linspace(ylim[0], ylim[1], 3)
zz = np.linspace(zlim[0], zlim[1], 3)
ZZ, YY, XX = np.meshgrid(zz, yy, xx)
xyz = np.vstack([XX.ravel(), YY.ravel(), ZZ.ravel()]).T
Z = clf2.decision_function(xyz).reshape(XX.shape)
#ax.contour(XX, YY, ZZ, Z, colors='k', levels=[-1, 0, 1], alpha=0.5, linestyles=['--','-','--'])
Desired Output
I want to get something like that.
In general I want to reconstruct what they do in this article, especially "Non-linear transformations".
Part of your question is addressed in this question on linear-kernel SVM. It's a partial answer, because only linear kernels can be represented this way, i.e. thanks to hyperplane coordinates accessible via the estimator when using linear kernel.
Another solution is to find the isosurface with marching_cubes
This solution involves installing the scikit-image toolkit (https://scikit-image.org) which allows to find an isosurface of a given value (here, I considered 0 since it represents the distance to the hyperplane) from the mesh grid of the 3D coordinates.
In the code below (copied from yours), I implement the idea for any kernel (in the example, I used the RBF kernel), and the output is shown beneath the code. Please consider my footnote about 3D plotting with matplotlib, which may be another issue in your case.
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from skimage import measure
from sklearn.datasets import make_blobs, make_circles
from tikzplotlib import save as tikz_save
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
plt.close('all')
# we create 50 separable points
#X, y = make_blobs(n_samples=40, centers=2, random_state=6)
X, y = make_circles(n_samples=50, factor=0.5, random_state=4, noise=.05)
X2, y2 = make_circles(n_samples=50, factor=0.2, random_state=5, noise=.08)
X = np.append(X,X2, axis=0)
y = np.append(y,y2, axis=0)
# shifte X to [0,2]x[0,2]
X = np.array([[item[0] + 1, item[1] + 1] for item in X])
X[X<0] = 0.01
clf = svm.SVC(kernel='rbf', C=1000)
clf.fit(X, y)
plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=plt.cm.Paired)
# plot the decision function
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
# create grid to evaluate model
xx = np.linspace(xlim[0], xlim[1], 30)
yy = np.linspace(ylim[0], ylim[1], 30)
YY, XX = np.meshgrid(yy, xx)
xy = np.vstack([XX.ravel(), YY.ravel()]).T
Z = clf.decision_function(xy).reshape(XX.shape)
# plot decision boundary and margins
ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5, linestyles=['--','-','--'])
# plot support vectors
ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=100,
linewidth=1, facecolors='none', edgecolors='k')
################## KERNEL TRICK - 3D ##################
trans_X = np.array([[item[0]**2, item[1]**2, np.sqrt(2*item[0]*item[1])] for item in X])
fig = plt.figure()
ax = plt.axes(projection ="3d")
# creating scatter plot
ax.scatter3D(trans_X[:,0],trans_X[:,1],trans_X[:,2], c = y, cmap=plt.cm.Paired)
clf2 = svm.SVC(kernel='rbf', C=1000)
clf2.fit(trans_X, y)
z = lambda x,y: (-clf2.intercept_[0]-clf2.coef_[0][0]*x-clf2.coef_[0][1]*y) / clf2.coef_[0][2]
ax = plt.gca(projection='3d')
xlim = ax.get_xlim()
ylim = ax.get_ylim()
zlim = ax.get_zlim()
### from here i don't know what to do ###
xx = np.linspace(xlim[0], xlim[1], 50)
yy = np.linspace(ylim[0], ylim[1], 50)
zz = np.linspace(zlim[0], zlim[1], 50)
XX ,YY, ZZ = np.meshgrid(xx, yy, zz)
xyz = np.vstack([XX.ravel(), YY.ravel(), ZZ.ravel()]).T
Z = clf2.decision_function(xyz).reshape(XX.shape)
# find isosurface with marching cubes
dx = xx[1] - xx[0]
dy = yy[1] - yy[0]
dz = zz[1] - zz[0]
verts, faces, _, _ = measure.marching_cubes_lewiner(Z, 0, spacing=(1, 1, 1), step_size=2)
verts *= np.array([dx, dy, dz])
verts -= np.array([xlim[0], ylim[0], zlim[0]])
# add as Poly3DCollection
mesh = Poly3DCollection(verts[faces])
mesh.set_facecolor('g')
mesh.set_edgecolor('none')
mesh.set_alpha(0.3)
ax.add_collection3d(mesh)
ax.view_init(20, -45)
plt.savefig('kerneltrick')
Running the code produces the following image with Matplotlib, where the green semi-transparent surface represents the non-linear decision boundary.
Footnote: 3D plotting with matplotlib
Note that Matplotlib 3D is not able to manage the "depth" of objects in some cases, because it can be in conflict with the zorder of this object. This is the reason why sometimes the hyperplane look to be plotted "on top of" the points, even it should be "behind". This issue is a known bug discussed in the matplotlib 3d documentation and in this answer.
If you want to have better rendering results, you may want to use Mayavi, as recommended by the Matplotlib developers, or any other 3D Python plotting library.
I can not find a curve that adjust the data (lists 'chi' and 'm'). I used polyfit to generate the curve but it was not enough to capture the behavior of the points.
The code ahead has a plot that shows the discrepancy between the data and the adjustment.
import matplotlib.pyplot as plt
import numpy as np
chi = [159.227326193538,157.045536099339,154.874421083320,152.714227953804,150.565205206850,148.427603026261,146.301673283577,144.187669538078,142.085847036787,139.996462714462,137.919775193605,135.856044784456,133.805533484994,131.768504980940,129.745224645753,127.735959540633,125.740978414520,123.760551704092,121.794951533770,119.844451715712,117.909327749816,115.989856823722,114.086317812809,112.198991280194,110.328159476736,108.474106341033,106.637117499424,104.817480265986,103.015483642536,101.231418318633,99.4655766715733,97.7182527663948,95.9897423558747,94.2803428805298,92.5903534686167,90.9200749361326,89.2698097868135,87.6398622121363,86.0305380913169,84.4421449913117,82.8749921668166,81.3293905602669,79.8056528018393,78.3040932094484,76.8250277887500,75.3687742331392,73.9356519237512,72.5259819294609,71.1400870068830,69.7782916003724,68.4409218420233,67.1283055516702,65.8407722368873,64.5786530929887,63.3422810030283,62.1319905377998,60.9481179558368,59.7910012034130,58.6609799145416,57.5583954109757,56.4835907022086,55.4369104854728,54.4187011457414,53.4293107557267,52.4690890758814,51.5383875543978,50.6375593272080,49.7669592179839,48.9269437381375,48.1178710868206,47.3401011509247,46.5939955050811,45.8799174116612,45.1982318207762,44.5493053702771,43.9335063857545,43.3512048805394,42.8027725557022,42.2885828000534,41.8090106901432,41.3644329902617,40.9552281524389,40.5817763164445,40.2444593097885,39.9436606477201,39.6797655332288,39.4531608570438,39.2642351976343,39.1133788212092,39.0009836817171,38.9274434208471,38.8931533680273,38.8985105404262,38.9439136429520,39.0297630682529,39.1564608967166,39.3244108964711,39.5340185233838,39.7856909210623,40.0798369208539,40.4168670418459,40.7971934908652,41.2212301624788,41.6893926389935,42.2020981904556,42.7597657746519,43.3628160371087,44.0116713110920,44.7067556176079,45.4484946654022,46.2373158509606,47.0736482585089,47.9579226600125,48.8905715151762,49.8720289714460,50.9027308640062,51.9831147157818,53.1136197374377,54.2946868273783,55.5267585717480,56.8102792444312,58.1456948070521,59.5334529089743,60.9740028873018,62.4677957668786,64.0152842602876,65.6169227678529,67.2731673776373,68.9844758654438,70.7513076948157,72.5741240170354,74.4533876711260,76.3895631838499,78.3831167697092,80.4345163309464,82.5442314575433,84.7127334272220,86.9404952054444,89.2279914454118,91.5756984880661,93.9840943620883,96.4536587839001,98.9848731576614,101.578220575274,104.234185816379,106.953255348357,109.735917326327,112.582661593151,115.493979679428,118.470364803498,121.512311871442,124.620317477080,127.794879901969,131.036499115411,134.345676774445,137.722916223849,141.168722496142,144.683602311584,148.268064078173,151.922617891649,155.647775535488,159.444050480909,163.311957886871,167.252014600072,171.264739154948,175.350651773679,179.510274366181,183.744130530113,188.052745550870,192.436646401591,196.896361743152,201.432421924170,206.045358981001,210.735706637743,215.504000306232,220.350777086043,225.276575764494,230.281936816639,235.367402405274,240.533516380936,245.780824281900,251.109873334181,256.521212451534,262.015392235454,267.592964975176,273.254484647676,279.000506917667,284.831589137604,290.748290347682,296.751171275834,302.840794337735,309.017723636798,315.282524964177,321.635765798766,328.078015307199,334.609844343848,341.231825450827,347.944532857988,354.748542482925,361.644431930971,368.632780495196]
m=[-1,-0.990000000000000,-0.980000000000000,-0.970000000000000,-0.960000000000000,-0.950000000000000,-0.940000000000000,-0.930000000000000,-0.920000000000000,-0.910000000000000,-0.900000000000000,-0.890000000000000,-0.880000000000000,-0.870000000000000,-0.860000000000000,-0.850000000000000,-0.840000000000000,-0.830000000000000,-0.820000000000000,-0.810000000000000,-0.800000000000000,-0.790000000000000,-0.780000000000000,-0.770000000000000,-0.760000000000000,-0.750000000000000,-0.740000000000000,-0.730000000000000,-0.720000000000000,-0.710000000000000,-0.700000000000000,-0.690000000000000,-0.680000000000000,-0.670000000000000,-0.660000000000000,-0.650000000000000,-0.640000000000000,-0.630000000000000,-0.620000000000000,-0.610000000000000,-0.600000000000000,-0.590000000000000,-0.580000000000000,-0.570000000000000,-0.560000000000000,-0.550000000000000,-0.540000000000000,-0.530000000000000,-0.520000000000000,-0.510000000000000,-0.500000000000000,-0.490000000000000,-0.480000000000000,-0.470000000000000,-0.460000000000000,-0.450000000000000,-0.440000000000000,-0.430000000000000,-0.420000000000000,-0.410000000000000,-0.400000000000000,-0.390000000000000,-0.380000000000000,-0.370000000000000,-0.360000000000000,-0.350000000000000,-0.340000000000000,-0.330000000000000,-0.320000000000000,-0.310000000000000,-0.300000000000000,-0.290000000000000,-0.280000000000000,-0.270000000000000,-0.260000000000000,-0.250000000000000,-0.240000000000000,-0.230000000000000,-0.220000000000000,-0.210000000000000,-0.200000000000000,-0.190000000000000,-0.180000000000000,-0.170000000000000,-0.160000000000000,-0.150000000000000,-0.140000000000000,-0.130000000000000,-0.120000000000000,-0.110000000000000,-0.100000000000000,-0.0900000000000000,-0.0800000000000000,-0.0700000000000000,-0.0599999999999999,-0.0499999999999999,-0.0400000000000000,-0.0300000000000000,-0.0200000000000000,-0.0100000000000000,0,0.0100000000000000,0.0200000000000000,0.0300000000000000,0.0400000000000000,0.0499999999999999,0.0599999999999999,0.0700000000000000,0.0800000000000000,0.0900000000000000,0.100000000000000,0.110000000000000,0.120000000000000,0.130000000000000,0.140000000000000,0.150000000000000,0.160000000000000,0.170000000000000,0.180000000000000,0.190000000000000,0.200000000000000,0.210000000000000,0.220000000000000,0.230000000000000,0.240000000000000,0.250000000000000,0.260000000000000,0.270000000000000,0.280000000000000,0.290000000000000,0.300000000000000,0.310000000000000,0.320000000000000,0.330000000000000,0.340000000000000,0.350000000000000,0.360000000000000,0.370000000000000,0.380000000000000,0.390000000000000,0.400000000000000,0.410000000000000,0.420000000000000,0.430000000000000,0.440000000000000,0.450000000000000,0.460000000000000,0.470000000000000,0.480000000000000,0.490000000000000,0.500000000000000,0.510000000000000,0.520000000000000,0.530000000000000,0.540000000000000,0.550000000000000,0.560000000000000,0.570000000000000,0.580000000000000,0.590000000000000,0.600000000000000,0.610000000000000,0.620000000000000,0.630000000000000,0.640000000000000,0.650000000000000,0.660000000000000,0.670000000000000,0.680000000000000,0.690000000000000,0.700000000000000,0.710000000000000,0.720000000000000,0.730000000000000,0.740000000000000,0.750000000000000,0.760000000000000,0.770000000000000,0.780000000000000,0.790000000000000,0.800000000000000,0.810000000000000,0.820000000000000,0.830000000000000,0.840000000000000,0.850000000000000,0.860000000000000,0.870000000000000,0.880000000000000,0.890000000000000,0.900000000000000,0.910000000000000,0.920000000000000,0.930000000000000,0.940000000000000,0.950000000000000,0.960000000000000,0.970000000000000,0.980000000000000,0.990000000000000,1]
poly = np.polyfit(chi, m, deg = 40)
fit_fn = np.poly1d(poly)
f = plt.figure()
ax = f.add_subplot(111)
ax.plot(m, chi, 'r-', label = 'data')
ax.plot(fit_fn(chi), chi, 'b-', label = 'adjust')
ax.set_xlabel('$m$')
ax.set_ylabel('$\chi^2$')
plt.legend()
plt.show()
plt.close()
The problem is that you mixed the x and the y coordinates while fitting and plotting the fit. Since m is the x-coordinate (independent variable) and chi is the y-coordinate (dependent variable), pass them in the right order. The lines modified are indicated by a comment #
poly = np.polyfit(m, chi, deg = 4) # <-----
fit_fn = np.poly1d(poly)
f = plt.figure()
ax = f.add_subplot(111)
ax.plot(m, chi, 'rx', label = 'data') # <---- Just used x to plot symbols
ax.plot(m, fit_fn(m), 'b-', lw=2, label = 'adjust') # <-----
ax.set_xlabel('$m$')
ax.set_ylabel('$\chi^2$')
plt.legend()
plt.show()
plt.close()
I am using python to plot and my codes are:
import matplotlib.pyplot as plt
import numpy as np
# these are the data to be plot
x = [1,2,3,4,5,6,7,8,9,10,11,12,13,14]
x_test = ['grid50', 'grid100', 'grid150', 'grid250', 'grid500', 'grid750', 'NN5', 'NN10', 'NN15', 'NN20', 'NN50', 'NN100', 'CB', 'CBG']
clf = [0.58502, 0.60799, 0.60342, 0.59629, 0.56464, 0.53757, 0.62567, 0.63429, 0.63583, 0.63239, 0.63315, 0.63156, 0.60630, 0.52755]
hitrate = [0.80544, 0.89422, 0.94029, 0.98379, 0.99413, 0.99921, 0.99478, 0.99961, 0.99997, 0.99980, 0.99899, 0.99991, 0.88435, 1.0]
level = [23.04527, 9.90955, 4.35757, 1.46438, 0.51277, 0.15071, 1.30057, 0.00016, 0.00001, 0.00021, 0.00005, 0.00004, 6.38019, 0]
fig = plt.figure(figsize=(20,7))
ax = fig.add_subplot(111)
fig.subplots_adjust(right=0.8)
# this is the function to put annotation on bars
def autolabel(rects):
# attach some text labels
for ii,rect in enumerate(rects):
height = rect.get_height()
plt. text(rect.get_x()+rect.get_width()/2., 1.02*height, '%s'% (clf[ii]),ha='center', va='bottom')
plt.xticks(x,x_test)
# this part is to plot the red bar charts
ins1 = ax.bar(x,clf,color='Red', align='center',label='classification results')
ax.set_ylabel('classification results', color='Red')
ax.tick_params(axis='y',colors='Red')
ax.set_ylim(0,1.5)
autolabel(ins1)
# this part is to plot the green hitrate and the for-loop is to put annotation next to the line
ax2 = ax.twinx()
ins2, = ax2.plot(x,hitrate,marker='o',color='Green', linewidth=3.0, label='hitrate')
ax2.set_ylabel('hitrate', color='Green')
ax2.tick_params(axis='y',colors='Green')
ax2.set_ylim(0,1.5)
for i,j in zip(x, hitrate):
ax2.annotate(str(j),xy=(i,j+0.02))
# this part is to plot the blue level, forloop same as that of hitrate
ax3 = ax.twinx()
axes = [ax, ax2, ax3]
ax3.spines['right'].set_position(('axes', 1.1))
ax3.set_frame_on(True)
ax3.patch.set_visible(False)
ins3, = ax3.plot(x,level,marker='^', color='Blue', linewidth=3.0, label='obfuscation level')
ax3.set_ylabel('obfuscation level', color='Blue')
ax3.tick_params(axis='y',colors='Blue')
ax3.set_ylim(0,25)
for i,j in zip(x, level):
ax3.annotate(str(j),xy=(i,j+0.02))
ax.set_xlabel('Cell Configurations')
ax.set_xlim(0,15)
ax.set_title('benchmark')
ax.legend([ins1,ins2,ins3],['clf', 'hit', 'level'])
plt.grid()
plt.show()
And I got a figure like :
The problem is that, some numbers are not put in a good place so to be read clearly, but I don't know whether there is a method to put the annotation naturally at a blank area. Any ideas?