SVM implementation not classifing right - python

I am building a SVM in python. However, my implemtation is generating the wrong plane. I think it has something to do with my parameters(langrage multipliers) being so small but I am not sure. I think I am doing the convex optimization right. Maybe my data isn't in the right format. I based my code on theses tutorials:http://tullo.ch/articles/svm-py/ and http://www.mblondel.org/journal/2010/09/19/support-vector-machines-in-python/.
Here is my code and output:
import numpy
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import cvxopt
from matplotlib import cm
from cvxopt import matrix, solvers
from itertools import izip
#http://www.tristanfletcher.co.uk/SVM%20Explained.pdf
class SVM:
def __init__(self,X,y):
self.X = X
self.y = y
def findParameters(self,X,y):
# min 1/2 x^T P x + q^T x
#Ax = b
#y's are answer vectors
#put in cvxopt
#
"""P = cvxopt.matrix(np.outer(self.y,self.y)* self.gramMatrix())
q = cvxopt.matrix((numpy.ones(len(self.y))).T)
#G =
#h =
limits = np.asarray(self.y)
A = cvxopt.matrix(limits.T)
#genrates matrix of zzeros
b = cvxopt.matrix(numpy.zeros(len(self.y)))
# actually comp
param = cvxopt.solvers.qp(P,q,G,h,A,b);"""
n_samples, n_features = X.shape
K = self.gramMatrix(X)
P = cvxopt.matrix(np.outer(y, y) * K)
q = cvxopt.matrix(-1 * np.ones(n_samples))
Gtry = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))
htry = cvxopt.matrix(np.zeros(n_samples))
A = cvxopt.matrix(y, (1, n_samples))
b = cvxopt.matrix(0.0)
param = cvxopt.solvers.qp(P, q, Gtry, htry, A, b)
array = param['x']
return array
def WB_calculator(self,X,y):
#calculates w vector
yi = self.y
X = np.asarray(X)
y = np.asarray(y)
important = self.findParameters(X,y)
print("these are parameters")
print(important)
firstsum = [0 for x in range(0,len(y))]
for point in range(0,len(important)):
liste = X[point]*important[point]*yi[point]
firstsum = [x + y for x, y in zip(firstsum,liste)]
#this part calculates bias
#this is a very naive implementation of bias
#xstuff is the x_coordinate vector we find this by transpose
b = 0
for i in range(0,len(important)):
b = b+ (yi[i]- np.dot(firstsum,X[i]))
avgB = b/len(important)
answer = (firstsum , avgB)
print("w vector")
print(firstsum)
return answer
def polynomialK(self,u,v,b):
return (np.dot(u,v)+b)**2
#Guassian Kernal Funciton
def gaussianK(self,v1, v2, sigma):
return np.exp(-norm(v1-v2, 2)**2/(2.*sigma**2))
#computes the gramMatrix given a set of all points included in the data
#this is basicly a matrix of dot prodducts
def gramMatrix(self,X):
gramMatrix = []
data = np.asarray(self.X)
dataTran = data
#print(dataTran)
for x in dataTran:
row = []
#print(row)
for y in dataTran:
row.append(np.dot(x,y))
gramMatrix.append(row)
#print(row)
return gramMatrix
def determineAcceptance(self,point,X,y):
# I'm not sure if this is the proper bounding lets checl
cutoff = self.WB_calculator(X,y)
if(np.dot(cutoff[0],point)+cutoff[1] >0):
print("You got in")
elif(np.dot(cutoff[0],point)+cutoff[1]<0):
print("Study")
# plots plane and points
def Graph(self,X,y):
important_stuff = self.WB_calculator(X,y)
weights = important_stuff[0]
c = important_stuff[1]
#here we actaually graph the functionb
graphable = X.T
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
xs = graphable[0]
ys = graphable[1]
zs = graphable[2]
colors = self.y
ax.scatter(xs,ys,zs,c=colors)
ax.set_xlabel("A")
ax.set_ylabel("B")
ax.set_zlabel("C")
#this changes orientation and look of surface
ax.view_init(azim = 180+40,elev = 22)
X = np.arange(-2, 2, 0.25)
Y = np.arange(-2, 2, 0.25)
X, Y = np.meshgrid(X, Y)
Z = ((-weights[0]*X + -weights[1]*Y - c)/(weights[2]))
surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm,
linewidth=0, antialiased=True)
plt.show()
#list of points to test
a = [[-.1,-.1,-.1],[-.2,-.2,-.2],[.15,.15,.15],[.9,.9,.9],[.95,.95,.95]]
check = np.asarray(a)
b = [.01,.01,.01,1,1]
bigger =np.asarray(b)
d = SVM(a,b)
print(d.gramMatrix(check)[0])
print("parameters ya")
print(d.findParameters(check,bigger))
print(d.WB_calculator(check,bigger))
d.Graph(check,bigger)
d.determineAcceptance([.01,.01,.01],check,bigger)

Related

How can i draw a quadratic function?

I have a questions concerning numpy.
i listed my data from csv package and used sklearn to plot the data.
i compiled, then plotted graph was weird.
i want to graph as "y = a * x**2 + b * x + c", gentle curve.
How can i change my code to have quadratic function?
data_list = pd.read_csv(r\'과제3_data_list.csv')
type_list = list(["B2V","B3V","B5V","B8V","B9V","A0V","A2V","A3V","A7V","F0V","F3V","F7V","G0V","G8V","K0V","K2V","K3V","K4V","K5V","K6V"])
def N(x):
y = list()
for i in range(len(x)):
if np.isnan(x[i]) == False:
y.insert(i,x[i])
return y
Type = np.array(type_list).reshape(-1,1)
standard = np.array(N(np.array(data_list["NO"]))).reshape(-1,1)
standard_mb = np.array(N(np.array(data_list["mb"]))).reshape(-1,1)
standard_size = np.array(N(np.array(data_list["mean"]))).reshape(-1,1)
star_number = np.array(N(np.array(data_list["number"]))).reshape(-1,1)
star_mbmv = np.array(N(np.array(data_list["mb-mv"]))).reshape(-1,1)
star_mv = np.array(N(np.array(data_list["mv"]))).reshape(-1,1)
evas_mbmv = np.array(N(np.array(data_list["evas_mb-mv"]))).reshape(-1,1)
evas_Mv = np.array(N(np.array(data_list["evas_Mv"]))).reshape(-1,1)
Linear = LinearRegression()
standard_size_sorted = sorted(standard_size)
Poly = PolynomialFeatures(degree=2,include_bias=False)
x_poly = Poly.fit_transform(standard_size)
Linear.fit(x_poly,standard_mb)
y = Linear.predict(x_poly)
def Poly_func(x):
y = Linear.coef_[0][0] * x + Linear.coef_[0][1] * x **2 + Linear.intercept_
return y
plt.scatter(standard_size,standard_mb) # this is just x, y value
plt.plot(standard_size,Poly_func(standard_size)) # this plot have a trouble.
plt.show()

How automatic scale plot canvas

I have plot with size 10x10
y, x = np.mgrid[-10:10:100j, -10:10:100j]
When i draw small graphics - all good. But when coordinates go beyond the graph i see issue. How autozoom plot canvas?
def f(x,a,b):
return x**3+a*x + b
def show_plot(plt, A,B,P,Q): #A=-7, B=10, P=4, Q=8
y, x = np.mgrid[-10:10:100j, -10:10:100j]
xp = P
yp = np.sqrt(f(xp,A,B))
xq = Q
yq = np.sqrt(f(xq,A,B))
m = 0.0
if P==Q:
xx = 3*pow(xp,2)
m = (xx+A)/(2*yp)
pass
else:
m=(yp-yq)/(xp-xq) #m
xr = m*m-xp-xq
yr1 = yp+m*(xr-xp)
yr2 = yq+m*(xr-xq)
plt.contour(x, y, y**2 - f(x,A,B), levels=[0])
b = -xp*m + yp
poly = np.poly1d([-1, m**2, 2*m*b+3, b**2-5])
x = np.roots(poly)
y = np.sqrt(f(x,A,B))
x = np.linspace(-5, 5)
plt.plot(x, m*x+b)
coordinates = [('P',xp,yp), ('Q',xq,yq), ('-R',xr,-yr1)]
for x in coordinates:
plt.plot(x[1], x[2], 'ro-')
plt.annotate(x[0], xy=(x[1]-0.2, x[2]+0.2), horizontalalignment='right', verticalalignment='top',family='fantasy')
plt.plot([xr,xr],[yr1,-yr1],'g-')
plt.grid(True)

'float' has no attribute dtype

I have a set of data and I have fit a normal distribution to the data using numpy and scipy. But when I try to plot the pdf, I get the following error:
I have tried to change the dtype of Z, but that did not work. Any suggestions will help. Thanks.
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import numpy as np
import random
from sympy.matrices import Matrix
from sympy import symbols, pprint, N
from scipy.stats import multivariate_normal
from target import true_target_trajectory, target_posiion
def plot_gaussian(X, Y, Z):
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(X, Y, Z)
plt.show()
def covariance(x, y):
sigma1 = np.std(x, dtype=np.float64)
sigma2 = np.std(y, dtype=np.float64)
cov = np.matrix([[sigma1, sigma1*sigma2], [sigma1*sigma2, sigma2]])
min_eig = np.min(np.real(np.linalg.eigvals(cov)))
if min_eig < 0:
cov -= 10*min_eig * np.eye(*cov.shape)
return cov
def gaussian(x, mu, cov):
rv = multivariate_normal(mu, cov)
return rv.pdf(x)
#plot_gaussian()
vin = 300
qin = 9
x = []
y = []
time = np.linspace(0, 2*np.pi, 100)
for t in (time):
cc = target_posiion(vin, qin, t)
x.append(cc.T[0])
y.append(cc.T[1])
mu = np.array([np.mean(x), np.mean(y)])
cov = covariance(x, y)
X, Y = np.meshgrid(x, y)
pos = np.dstack((X, Y))
Z = gaussian(pos, mu, cov)
plot_gaussian(X, Y, Z)
I tried to reproduce the issue with x = np.linspace(-1, 3, 100) and y = np.linspace(0, 4, 100). But that did not give any error and i got the bell curve as expected.
So i am attaching the code for target position.
The code for target_position:
import random
import numpy as np
from sympy.vector.coordsysrect import CoordSys3D
from sympy.physics.mechanics import dynamicsymbols
from sympy import symbols, sin, pprint, Derivative, Identity, N
from sympy.matrices import Matrix, BlockMatrix, block_collapse
C = CoordSys3D('C')
i, j, k = C.base_vectors()
def evaluate_matrix(m, v_in, q_in, tk):
w, t = symbols('w t')
v0, q = symbols('v0 q')
params = {v0:v_in, q:q_in, t:tk}
return Matrix([[N(m[0].subs(params)), N(m[1].subs(params))]]).T
def true_target_trajectory(v_in, q_in, tk):
w, t = symbols('w t')
v0, q, A = symbols('v0 q A')
r, v, a, x, y = dynamicsymbols('r v a x y')
A = (v0**2)/q
w = q/(2*v0)
x = A*sin(w*t)*i
y = A*sin(2*w*t)*j
r = x + y
r_m = Matrix(r.to_matrix(C)[:2])
v = Derivative(r, t).doit()
v_m = Matrix(v.to_matrix(C)[:2])
a = Derivative(v, t).doit()
a_m = Matrix(a.to_matrix(C)[:2])
x_k = BlockMatrix([[r_m.T, v_m.T, a_m.T]]).T
I = Identity(2)
H = BlockMatrix([[I, I, I]])
z = evaluate_matrix(block_collapse(H*x_k), v_in, q_in, tk)
return z
def target_posiion(v_in, q_in, tk):
sigma = 50
u_k = Matrix([[random.gauss(0,1), random.gauss(0,1)]]).T
z = true_target_trajectory(v_in, q_in, tk)
z_c_k = z + sigma*u_k
return z_c_k
The problem
The problem is that your x and y are lists of type sympy.core.numbers.Float, not regular Python float. Numpy doesn't know how to convert Sympy numeric types, so meshgrid ends up returning X and Y arrays of dtype=object. Down the line, this ends up screwing up the call to ax.plot_surface.
The fix
Just convert x and y to standard Numpy arrays of np.float64 before you pass them into meshgrid:
X, Y = np.meshgrid(np.array(x).astype(float), np.array(y).astype(float))
Once you do that, everything should be fine. Here's the output:

How to calculate error in Polynomial Linear Regression?

I am trying to calculate the error rate of the training data I'm using.
I believe I'm calculating the error incorrectly. The formula is as shown:
y is calculated as shown:
I am calculating this in the function fitPoly(M) at line 49. I believe I am incorrectly calculating y(x(n)), but I don't know what else to do.
Below is the Minimal, Complete, and Verifiable example.
import numpy as np
import matplotlib.pyplot as plt
dataTrain = [[2.362761180904257019e-01, -4.108125266714775847e+00],
[4.324296163702689988e-01, -9.869308732049049127e+00],
[6.023323504115264404e-01, -6.684279243433971729e+00],
[3.305079685397107614e-01, -7.897042003779912278e+00],
[9.952423271981121200e-01, 3.710086310489402628e+00],
[8.308127402955634011e-02, 1.828266768673480147e+00],
[1.855495407116576345e-01, 1.039713135916495501e+00],
[7.088332047815845138e-01, -9.783208407540947560e-01],
[9.475723071629885697e-01, 1.137746192425550085e+01],
[2.343475721257285427e-01, 3.098019704040922750e+00],
[9.338350584099475160e-02, 2.316408265530458976e+00],
[2.107903139601833287e-01, -1.550451474833406396e+00],
[9.509966727520677843e-01, 9.295029459100994984e+00],
[7.164931165416982273e-01, 1.041025972594300075e+00],
[2.965557300301902011e-03, -1.060607693351102121e+01]]
def strip(L, xt):
ret = []
for i in L:
ret.append(i[xt])
return ret
x1 = strip(dataTrain, 0)
y1 = strip(dataTrain, 1)
# HELP HERE
def getY(m, w, D):
y = w[0]
y += np.sum(w[1:] * D[:m])
return y
# HELP ABOVE
def dataMatrix(X, M):
Z = []
for x in range(len(X)):
row = []
for m in range(M + 1):
row.append(X[x][0] ** m)
Z.append(row)
return Z
def fitPoly(M):
t = []
for i in dataTrain:
t.append(i[1])
w, _, _, _ = np.linalg.lstsq(dataMatrix(dataTrain, M), t)
w = w[::-1]
errTrain = np.sum(np.subtract(t, getY(M, w, x1)) ** 2)/len(x1)
print('errTrain: %s' % (errTrain))
return([w, errTrain])
#fitPoly(8)
def plotPoly(w):
plt.ylim(-15, 15)
x, y = zip(*dataTrain)
plt.plot(x, y, 'bo')
xw = np.arange(0, 1, .001)
yw = np.polyval(w, xw)
plt.plot(xw, yw, 'r')
#plotPoly(fitPoly(3)[0])
def bestPoly():
m = 0
plt.figure(1)
plt.xlim(0, 16)
plt.ylim(0, 250)
plt.xlabel('M')
plt.ylabel('Error')
plt.suptitle('Question 3: training and Test error')
while m < 16:
plt.figure(0)
plt.subplot(4, 4, m + 1)
plotPoly(fitPoly(m)[0])
plt.figure(1)
plt.plot(fitPoly(m)[1])
#plt.plot(fitPoly(m)[2])
m+= 1
plt.figure(3)
plt.xlabel('t')
plt.ylabel('x')
plt.suptitle('Question 3: best-fitting polynomial (degree = 8)')
plotPoly(fitPoly(8)[0])
print('Best M: %d\nBest w: %s\nTraining error: %s' % (8, fitPoly(8)[0], fitPoly(8)[1], ))
bestPoly()
Updated: This solution uses numpy's np.interp which will connect the points as a kind of "best fit". We then use your error function to find the difference between this interpolated line and the predicted y values for the degree of each polynomial.
import numpy as np
import matplotlib.pyplot as plt
import itertools
dataTrain = [
[2.362761180904257019e-01, -4.108125266714775847e+00],
[4.324296163702689988e-01, -9.869308732049049127e+00],
[6.023323504115264404e-01, -6.684279243433971729e+00],
[3.305079685397107614e-01, -7.897042003779912278e+00],
[9.952423271981121200e-01, 3.710086310489402628e+00],
[8.308127402955634011e-02, 1.828266768673480147e+00],
[1.855495407116576345e-01, 1.039713135916495501e+00],
[7.088332047815845138e-01, -9.783208407540947560e-01],
[9.475723071629885697e-01, 1.137746192425550085e+01],
[2.343475721257285427e-01, 3.098019704040922750e+00],
[9.338350584099475160e-02, 2.316408265530458976e+00],
[2.107903139601833287e-01, -1.550451474833406396e+00],
[9.509966727520677843e-01, 9.295029459100994984e+00],
[7.164931165416982273e-01, 1.041025972594300075e+00],
[2.965557300301902011e-03, -1.060607693351102121e+01]
]
data = np.array(dataTrain)
data = data[data[:, 0].argsort()]
X,y = data[:, 0], data[:, 1]
fig,ax = plt.subplots(4, 4)
indices = list(itertools.product([0,1,2,3], repeat=2))
for i,loc in enumerate(indices, start=1):
xx = np.linspace(X.min(), X.max(), 1000)
yy = np.interp(xx, X, y)
w = np.polyfit(X, y, i)
y_pred = np.polyval(w, xx)
ax[loc].scatter(X, y)
ax[loc].plot(xx, y_pred)
ax[loc].plot(xx, yy, 'r--')
error = np.square(yy - y_pred).sum() / X.shape[0]
print(error)
plt.show()
This prints out:
2092.19807848
1043.9400277
1166.94550318
252.238810889
225.798905379
155.785478366
125.662973726
143.787869281
6553.66570273
10805.6609259
15577.8686283
13536.1755299
108074.871771
213513916823.0
472673224393.0
1.01198058355e+12
Visually, it plots out this:
From here, it's just a matter of saving those errors to a list and finding the minimum.
I may contribute :
def pol_y(x, w):
y = 0; power = 0;
for i in w:
y += i*(x**power);
power += 1;
return y
The M is included implicitly because it is the final index of w. So if w = [0, 0, 1], then pol_y(x, w) is as same as f(x) = x^2.
If you want to map the 1st column of the dataTrain :
get_Y = [pol_y(i, w) for i in x1 ]
The error may be calculated by
vec_error = [(y1[i] - getY[i])**2 for i in range(0, len(y1)];
train_error = np.sum(vec_error)/len(y1);
Hope this helps.

Python: heat density plot in a disk

My goal is to make a density heat map plot of sphere in 2D. The plotting code below the line works when I use rectangular domains. However, I am trying to use the code for a circular domain. The radius of sphere is 1. The code I have so far is:
from pylab import *
import numpy as np
from matplotlib.colors import LightSource
from numpy.polynomial.legendre import leggauss, legval
xi = 0.0
xf = 1.0
numx = 500
yi = 0.0
yf = 1.0
numy = 500
def f(x):
if 0 <= x <= 1:
return 100
if -1 <= x <= 0:
return 0
deg = 1000
xx, w = leggauss(deg)
L = np.polynomial.legendre.legval(xx, np.identity(deg))
integral = (L * (f(x) * w)[None,:]).sum(axis = 1)
c = (np.arange(1, 500) + 0.5) * integral[1:500]
def r(x, y):
return np.sqrt(x ** 2 + y ** 2)
theta = np.arctan2(y, x)
x, y = np.linspace(0, 1, 500000)
def T(x, y):
return (sum(r(x, y) ** l * c[:,None] *
np.polynomial.legendre.legval(xx, identity(deg)) for l in range(1, 500)))
T(x, y) should equal the sum of c the coefficients times the radius as a function of x and y to the l power times the legendre polynomial where the argument is of the legendre polynomial is cos(theta).
In python: integrating a piecewise function, I learned how to use the Legendre polynomials in a summation but that method is slightly different, and for the plotting, I need a function T(x, y).
This is the plotting code.
densityinterpolation = 'bilinear'
densitycolormap = cm.jet
densityshadedflag = False
densitybarflag = True
gridflag = True
plotfilename = 'laplacesphere.eps'
x = arange(xi, xf, (xf - xi) / (numx - 1))
y = arange(yi, yf, (yf - yi) / (numy - 1))
X, Y = meshgrid(x, y)
z = T(X, Y)
if densityshadedflag:
ls = LightSource(azdeg = 120, altdeg = 65)
rgb = ls.shade(z, densitycolormap)
im = imshow(rgb, extent = [xi, xf, yi, yf], cmap = densitycolormap)
else:
im = imshow(z, extent = [xi, xf, yi, yf], cmap = densitycolormap)
im.set_interpolation(densityinterpolation)
if densitybarflag:
colorbar(im)
grid(gridflag)
show()
I made the plot in Mathematica for reference of what my end goal is
If you set the values outside of the disk domain (or whichever domain you want) to float('nan'), those points will be ignored when plotting (leaving them in white color).

Categories

Resources