I am trying to calculate the error rate of the training data I'm using.
I believe I'm calculating the error incorrectly. The formula is as shown:
y is calculated as shown:
I am calculating this in the function fitPoly(M) at line 49. I believe I am incorrectly calculating y(x(n)), but I don't know what else to do.
Below is the Minimal, Complete, and Verifiable example.
import numpy as np
import matplotlib.pyplot as plt
dataTrain = [[2.362761180904257019e-01, -4.108125266714775847e+00],
[4.324296163702689988e-01, -9.869308732049049127e+00],
[6.023323504115264404e-01, -6.684279243433971729e+00],
[3.305079685397107614e-01, -7.897042003779912278e+00],
[9.952423271981121200e-01, 3.710086310489402628e+00],
[8.308127402955634011e-02, 1.828266768673480147e+00],
[1.855495407116576345e-01, 1.039713135916495501e+00],
[7.088332047815845138e-01, -9.783208407540947560e-01],
[9.475723071629885697e-01, 1.137746192425550085e+01],
[2.343475721257285427e-01, 3.098019704040922750e+00],
[9.338350584099475160e-02, 2.316408265530458976e+00],
[2.107903139601833287e-01, -1.550451474833406396e+00],
[9.509966727520677843e-01, 9.295029459100994984e+00],
[7.164931165416982273e-01, 1.041025972594300075e+00],
[2.965557300301902011e-03, -1.060607693351102121e+01]]
def strip(L, xt):
ret = []
for i in L:
ret.append(i[xt])
return ret
x1 = strip(dataTrain, 0)
y1 = strip(dataTrain, 1)
# HELP HERE
def getY(m, w, D):
y = w[0]
y += np.sum(w[1:] * D[:m])
return y
# HELP ABOVE
def dataMatrix(X, M):
Z = []
for x in range(len(X)):
row = []
for m in range(M + 1):
row.append(X[x][0] ** m)
Z.append(row)
return Z
def fitPoly(M):
t = []
for i in dataTrain:
t.append(i[1])
w, _, _, _ = np.linalg.lstsq(dataMatrix(dataTrain, M), t)
w = w[::-1]
errTrain = np.sum(np.subtract(t, getY(M, w, x1)) ** 2)/len(x1)
print('errTrain: %s' % (errTrain))
return([w, errTrain])
#fitPoly(8)
def plotPoly(w):
plt.ylim(-15, 15)
x, y = zip(*dataTrain)
plt.plot(x, y, 'bo')
xw = np.arange(0, 1, .001)
yw = np.polyval(w, xw)
plt.plot(xw, yw, 'r')
#plotPoly(fitPoly(3)[0])
def bestPoly():
m = 0
plt.figure(1)
plt.xlim(0, 16)
plt.ylim(0, 250)
plt.xlabel('M')
plt.ylabel('Error')
plt.suptitle('Question 3: training and Test error')
while m < 16:
plt.figure(0)
plt.subplot(4, 4, m + 1)
plotPoly(fitPoly(m)[0])
plt.figure(1)
plt.plot(fitPoly(m)[1])
#plt.plot(fitPoly(m)[2])
m+= 1
plt.figure(3)
plt.xlabel('t')
plt.ylabel('x')
plt.suptitle('Question 3: best-fitting polynomial (degree = 8)')
plotPoly(fitPoly(8)[0])
print('Best M: %d\nBest w: %s\nTraining error: %s' % (8, fitPoly(8)[0], fitPoly(8)[1], ))
bestPoly()
Updated: This solution uses numpy's np.interp which will connect the points as a kind of "best fit". We then use your error function to find the difference between this interpolated line and the predicted y values for the degree of each polynomial.
import numpy as np
import matplotlib.pyplot as plt
import itertools
dataTrain = [
[2.362761180904257019e-01, -4.108125266714775847e+00],
[4.324296163702689988e-01, -9.869308732049049127e+00],
[6.023323504115264404e-01, -6.684279243433971729e+00],
[3.305079685397107614e-01, -7.897042003779912278e+00],
[9.952423271981121200e-01, 3.710086310489402628e+00],
[8.308127402955634011e-02, 1.828266768673480147e+00],
[1.855495407116576345e-01, 1.039713135916495501e+00],
[7.088332047815845138e-01, -9.783208407540947560e-01],
[9.475723071629885697e-01, 1.137746192425550085e+01],
[2.343475721257285427e-01, 3.098019704040922750e+00],
[9.338350584099475160e-02, 2.316408265530458976e+00],
[2.107903139601833287e-01, -1.550451474833406396e+00],
[9.509966727520677843e-01, 9.295029459100994984e+00],
[7.164931165416982273e-01, 1.041025972594300075e+00],
[2.965557300301902011e-03, -1.060607693351102121e+01]
]
data = np.array(dataTrain)
data = data[data[:, 0].argsort()]
X,y = data[:, 0], data[:, 1]
fig,ax = plt.subplots(4, 4)
indices = list(itertools.product([0,1,2,3], repeat=2))
for i,loc in enumerate(indices, start=1):
xx = np.linspace(X.min(), X.max(), 1000)
yy = np.interp(xx, X, y)
w = np.polyfit(X, y, i)
y_pred = np.polyval(w, xx)
ax[loc].scatter(X, y)
ax[loc].plot(xx, y_pred)
ax[loc].plot(xx, yy, 'r--')
error = np.square(yy - y_pred).sum() / X.shape[0]
print(error)
plt.show()
This prints out:
2092.19807848
1043.9400277
1166.94550318
252.238810889
225.798905379
155.785478366
125.662973726
143.787869281
6553.66570273
10805.6609259
15577.8686283
13536.1755299
108074.871771
213513916823.0
472673224393.0
1.01198058355e+12
Visually, it plots out this:
From here, it's just a matter of saving those errors to a list and finding the minimum.
I may contribute :
def pol_y(x, w):
y = 0; power = 0;
for i in w:
y += i*(x**power);
power += 1;
return y
The M is included implicitly because it is the final index of w. So if w = [0, 0, 1], then pol_y(x, w) is as same as f(x) = x^2.
If you want to map the 1st column of the dataTrain :
get_Y = [pol_y(i, w) for i in x1 ]
The error may be calculated by
vec_error = [(y1[i] - getY[i])**2 for i in range(0, len(y1)];
train_error = np.sum(vec_error)/len(y1);
Hope this helps.
Related
I have to do a second degree interpolation using an already existing code and changing the values for mine, but for some reason when i go ahead and gragth the interpolation, the fuction suddently stops (it is not continuous). Can someone help me figuring out whats wrong? I believe it has something to do with line 43 (evaluation on new data points), but I am not sure.
Source code:
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn-poster')
%matplotlib inline
def divided_diff(x, y):
'''
function to calculate the divided
differences table
'''
n = len(y)
coef = np.zeros([n, n])
# the first column is y
coef[:,0] = y
for j in range(1,n):
for i in range(n-j):
coef[i][j] = \
(coef[i+1][j-1] - coef[i][j-1]) / (x[i+j]-x[i])
return coef
def newton_poly(coef, x_data, x):
'''
evaluate the newton polynomial
at x
'''
n = len(x_data) - 1
p = coef[n]
for k in range(1,n+1):
p = coef[n-k] + (x -x_data[n-k])*p
return p
x = np.array([-5, -1, 0, 2])
y = np.array([-2, 6, 1, 3])
# get the divided difference coef
a_s = divided_diff(x, y)[0, :]
# evaluate on new data points
x_new = np.arange(-5, 2.1, .1)
y_new = newton_poly(a_s, x, x_new)
plt.figure(figsize = (12, 8))
plt.plot(x, y, 'bo')
plt.plot(x_new, y_new)
My code (adjusted for data points (0,0);(6.4,1.9);(10.6,4.3)):
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn-poster')
%matplotlib inline
def divided_diff(x, y):
'''
function to calculate the divided
differences table
'''
n = len(y)
coef = np.zeros([n, n])
# the first column is y
coef[:,0] = y
for j in range(1,n):
for i in range(n-j):
coef[i][j] = \
(coef[i+1][j-1] - coef[i][j-1]) / (x[i+j]-x[i])
return coef
def newton_poly(coef, x_data, x):
'''
evaluate the newton polynomial
at x
'''
n = len(x_data) - 1
p = coef[n]
for k in range(1,n+1):
p = coef[n-k] + (x -x_data[n-k])*p
return p
x = np.array([0, 6.4, 10.6])
y = np.array([0, 1.9, 4.3])
# get the divided difference coef
a_s = divided_diff(x, y)[0, :]
# evaluate on new data points
x_new = np.arange(-5, 2.1, .1)
y_new = newton_poly(a_s, x, x_new)
plt.figure(figsize = (12, 8))
plt.plot(x, y, 'bo')
plt.plot(x_new, y_new)
I am trying to make my own implementation of a simple neural network to classify points. I heard about a specific type of activation function that I am interested in testing, the Gaussian. I do not just want to use relus or sigmoids, I am trying to build a network that takes as input about 300 x and y values, then in the first layer computes the Gaussian function on these values with about 50 neurons which each have a separate x and y value as their means (I will keep the sigma constant). Mathematically I anticipate this to look like
exp(- [(x-Mx)^2 + (y-My)^2] / (2 * sigma^2) ) / (sqrt(2*pi*sigma))
then I will perform a weighted sum of these terms over all the neurons in the first layer, add a bias, and pass it through a sigmoid to get my prediction. I will perform this step for each training example and get a list of predictions. I think that I do the forward propagation but I will include the code for that in case someone can spot an obvious error in my implementation. Then I perform the back-propogation. I have tested my updating of the weights and bias, and I believe that they are not the problem. I think that there is something wrong with my implementation of the gradient for the means however because they always cluster to a single point which clearly does not maximize the cost function. I have already tried using a couple of different data sets, and varying some hyper parameters, all to no avail. Can anyone figure out what the problem is?
Here is my code.
# libraries
import matplotlib.patches as patches
import seaborn as sns; sns.set()
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pdb
# functions
def gaussian(sq_error, sigma):
return ((1/np.sqrt(2*np.pi*sigma**2))) * np.exp(-(sq_error)/(2*sigma**2))
def calc_X1(X0, Mx, My, m, sigma):
X1 = [] # shape will be (10, m)
for ex in range(0, m):
sq_error = (X0[0][ex] - Mx) **2 + (X0[1][ex] - My) **2
X1.append(gaussian(sq_error, sigma))
X1 = np.array(X1)
return X1.T
def sigmoid(Z):
return 1 / (1 + np.exp(-Z))
def calc_X2(W2, X1, b2):
return sigmoid(np.dot(W2, X1) + b2)
def cost(X2, Y, m):
return -1/m * ( np.dot(Y, np.log(X2.T)) + np.dot(1-Y, np.log(1-X2.T))) [0]
def calc_dZ2(X2, Y):
return X2 - Y
def calc_dM(dZ2, W2, X1, sigma, M, m, xOrY, X0):
cur_dM = np.zeros(M.shape)
for i in range(0, m):
# pdb.set_trace()
cur_dM += dZ2[0][i] * float(np.dot(W2, X1.T[i])) * 1/sigma**2 * (X0[xOrY][i] - M)
return cur_dM / m
def train_correct(X2, Y, m):
ct = 0
for i in range(0, m):
if np.round(X2[0][i]) == Y[i]:
ct += 1
return ct / m
# graphing functions
def plot_train_data(X, Y, m, ax):
for ex in range(0, m):
xCur = X[0][ex]
yCur = X[1][ex]
if Y[ex] == 1:
color=(1, 0, 0)
else:
color=(0,0,1)
ax.scatter(xCur, yCur, c=color)
def probability_hash(pr):
return (float(pr), float(np.round(pr)), float(1-pr))
def probability_hash_1d(pr):
return float(pr)
def plot_boundary(Mx, My, sigma, W2, b2, ax):
boundsx = [-5, 5]
boundsy = [-5, 5]
samples = [10, 10]
width = (boundsx[1] - boundsx[0]) / samples[0]
height = (boundsy[1] - boundsy[0]) / samples[1]
pt = np.zeros((2,1))
for x in np.linspace(boundsx[0], boundsx[1], samples[0]):
for y in np.linspace(boundsy[0], boundsy[1], samples[1]):
pt[0][0] = x
pt[1][0] = y
X1_cur = calc_X1(pt, Mx, My, 1, sigma)
X2_cur = calc_X2(W2, X1_cur, b2)
# ax.add_patch(patches.Rectangle((x, y), width, height, facecolor=probability_hash(X2_cur)))
ax.scatter(x, y, c=probability_hash(X2_cur))
def cool_plot_boundary(Mx, My, sigma, W2, b2, ax):
boundsx = [-2, 2]
boundsy = [-2, 2]
samples = [50, 50]
width = (boundsx[1] - boundsx[0]) / samples[0]
height = (boundsy[1] - boundsy[0]) / samples[1]
pt = np.zeros((2,1))
heats = []
xs = np.linspace(boundsx[0], boundsx[1], samples[0])
ys = np.linspace(boundsy[0], boundsy[1], samples[1])
for x in xs:
heats.append([])
for y in ys:
pt[0][0] = x
pt[1][0] = y
X1_cur = calc_X1(pt, Mx, My, 1, sigma)
X2_cur = calc_X2(W2, X1_cur, b2)
heats[-1].append(probability_hash_1d(X2_cur))
# xticks = []
# yticks = []
# for i in range(0, len(xs)):
# if i % 3 == 0:
# xticks.append(round(xs[i], 2))
# for i in range(0, len(ys)):
# if i % 3 == 0:
# yticks.append(round(ys[i], 2))
xticks = []
yticks = []
sns.heatmap(heats, ax=ax, cbar=True, xticklabels=xticks, yticklabels=yticks)
def plot_m(Mx, My, n1, ax):
for i in range(0, n1):
ax.scatter(Mx[i], My[i], c="k")
# initialize parameters
file = "data/disk2.csv"
df = pd.read_csv(file)
sigma = 2
itterations = 10000
learning_rate = 0.9
n0 = 2 # DO NOT CHANGE, formality
X0 = np.row_stack((df["0"], df["1"])) # shape is (2, m)
Y = np.array(df["2"])
m = len(Y)
n1 = 50
Mx = np.random.randn(n1)
My = np.random.randn(n1)
X1 = calc_X1(X0, Mx, My, m, sigma)
n2 = 1 # DO NOT CHANGE, formality
small_number = 0.01
W2 = np.random.randn(1, n1) * small_number
b2 = 0
X2 = calc_X2(W2, X1, b2)
J = cost(X2, Y, m)
Js = []
itters = []
fig = plt.figure()
plotGap = 200
for i in range(0, itterations):
# forward propogation
X1 = calc_X1(X0, Mx, My, m, sigma)
X2 = calc_X2(W2, X1, b2)
J = cost(X2, Y, m)
if i % plotGap == 0:
fig.clear()
costAx = fig.add_subplot(311)
plotAx = fig.add_subplot(312)
pointsAx = fig.add_subplot(313)
cool_plot_boundary(Mx, My, sigma, W2, b2, plotAx)
# plot_boundary(Mx, My, sigma, W2, b2, plotAx)
plot_train_data(X0, Y, m, pointsAx)
Js.append(J)
itters.append(i)
costAx.plot(itters, Js, c="k")
print("cost = " + str(J) + "\ttraining correct = " + str(train_correct(X2, Y, m)))
plot_m(Mx, My, n1, pointsAx)
plt.pause(0.1)
# back propogation
dZ2 = calc_dZ2(X2, Y)
dW2 = np.dot(dZ2, X1.T) / m
db2 = np.sum(dZ2) / m
dMx = calc_dM(dZ2, W2, X1, sigma, Mx, m, 0, X0)
dMy = calc_dM(dZ2, W2, X1, sigma, My, m, 1, X0)
b2 -= learning_rate * db2
W2 -= learning_rate * dW2
Mx -= learning_rate * dMx
My -= learning_rate * dMy
For data I have a csv with a bunch of point locations and labels. You can use this code to generate a similar csv. (Make sure you have a folder called data in the folder you run this from).
# makes data in R2 to learn
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
n = 2
# number of exaples
m = 300
X = []
Y = []
# hyperparamers for data
rApprox = 1
error = 0.4
noise = 0.1
name = "data/disk2"
plt.cla()
for ex in range(0, m):
xCur = np.random.randn(2)
X.append(xCur)
if abs(np.linalg.norm(xCur) + np.random.randn()*noise - rApprox) < error:
Y.append(1)
color="r"
else:
Y.append(0)
color="b"
plt.scatter(xCur[0], xCur[1], c=color)
if abs(np.random.randn()) < 0.01:
plt.pause(0.1)
plt.pause(1)
plt.savefig(name + ".png")
X = np.array(X)
Y = np.array(Y)
df = pd.DataFrame(X)
df[2] = Y
df.to_csv(name + ".csv", index=False)
Thanks for your help.
Substitute this function for the calculate dm function. You must be careful when multiplying, it is not just enough that the dimensions work out.
def calculuate_dMs(X0, X1, X2, Mx, My, W2, dZ2, sigma, m, n1):
# pdb.set_trace()
X0x_big = np.dot(np.ones((n1, 1)), X0[0].reshape(1, m))
X0y_big = np.dot(np.ones((n1, 1)), X0[1].reshape(1, m))
Mx_big = np.dot(Mx.reshape(n1, 1), np.ones((1, m)))
My_big = np.dot(My.reshape(n1, 1), np.ones((1, m)))
W2_big = np.dot(W2.reshape(n1, 1), np.ones((1, m)))
dZ2_big = np.dot(np.ones((n1, 1)), dZ2.reshape(1, m))
dxTemp = np.multiply(np.multiply(np.multiply((X0x_big - Mx_big), X1), W2_big), dZ2_big)
dyTemp = np.multiply(np.multiply(np.multiply((X0y_big - My_big), X1), W2_big), dZ2_big)
return (np.sum(dxTemp, axis=1)/m, np.sum(dyTemp, axis=1)/m)
I implemented logistic regression and use it on a data set. (This is an exercise in Coursera's ML course Week #3 (which normally uses matlab and octave) using python (so this isn't cheating)).
I started with the implementation in sklearn to classify the data set used in week three of this course (http://pastie.org/10872959). Here is a small, reproducible example for anyone to try out what I used (it relies only on numpy and sklearn):
It takes the data set, splits it into the feature matrix and the output matrix, and then constructs 26 more features from the original 2 (i.e from
). I then use logistic regression in sklearn, but this does not give the contour plot desired (please see below).
from sklearn.linear_model import LogisticRegression as expit
import numpy as np
def thetaFunc(y, theta, x):
deg = 6
spot = 0
sum = 0
for i in range(1, deg + 1):
for j in range(i + 1):
sum += theta[spot] * x**(i - j) * y**(j)
spot += 1
return sum
def constructVariations(X, deg):
features = np.zeros((len(X), 27))
spot = 0
for i in range(1, deg + 1):
for j in range(i + 1):
features[:, spot] = X[:,0]**(i - j) * X[:,1]**(j)
spot += 1
return features
if __name__ == '__main__':
data = np.loadtxt("ex2points.txt", delimiter = ",")
X,Y = np.split(data, [len(data[0,:]) - 1], 1)
X = reg.constructVariations(X, 6)
oneArray = np.ones((len(X),1))
X = np.hstack((oneArray, X))
trial = expit(solver = 'sag')
trial = trial.fit(X = X,y = np.ravel(Y))
print(trial.coef_)
# everything below has been edited in
from matplotlib import pyplot as plt
txt = open("RegLogTheta", "r").read()
txt = txt.split()
theta = np.array(txt, float)
x = np.linspace(-1, 1.5, 100)
y = np.linspace(-1,1.5,100)
z = np.empty((100,100))
xx,yy = np.meshgrid(x,y)
for i in range(len(x)):
for j in range(len(y)):
z[i][j] = thetaFunc(yy[i][j], theta, xx[i][j])
plt.contour(xx,yy,z, levels = [0])
plt.show()
Here are the coefficients of the generic feature terms.
http://pastie.org/10872957 (i.e the coefficients to terms
and the contour it generates:
One potential source of error is that I'm misinterpreting the 7 X 4 matrix coefficient matrix stored in trial._coeff. I believe that these 28 values are the coefficients to the 28 "variations" above, and I've mapped the coefficients to the variations both column-wise and row-wise. By column-wise, I mean that [:][0] get mapped to the first 7 variations, [:][1] to the next 7 and so on, and my function constructVariations explains how the variations are systematically created. Now the API maintains than an array of shape (n_classes, n_features) is stored in trial._coeff, so should I infer that fit classified the data into four classes? Or have I run through this problem poorly in another way?
Update
My interpretation (and/or use) of the weights must be at fault:
Instead of relying on the prediction built into sklearn, I myself tried to calculate the values that set the following to 1/2
The values of theta are those found from printing trial._coeff and x and y are scalars. Those x,y are then plotted to give the contour.
The code I used (but did not originally add in) attempts to do this. What is wrong with the math behind it?
One potential source of error is that I'm misinterpreting the 7 X 4 matrix coefficient matrix stored in trial._coeff
This matrix is not 7x4, it is 1x28 (check print(trial.coef_.shape)). One coefficient for each of your 28 features (27 returned by constructVariations and 1 added manually).
so should I infer that fit classified the data into four classes?
No, you missinterpreted the array, it has a single row (for binary classificaation there is no point in having two).
Or have I run through this problem poorly in another way?
Code is fine, interpretation not. In particular, see actual decision boundary from your model (plotted by calling "predict" and plotting contour)
from sklearn.linear_model import LogisticRegression as expit
import numpy as np
def constructVariations(X, deg):
features = np.zeros((len(X), 27))
spot = 0
for i in range(1, deg + 1):
for j in range(i + 1):
features[:, spot] = X[:,0]**(i - j) * X[:,1]**(j)
spot += 1
return features
if __name__ == '__main__':
data = np.loadtxt("ex2points.txt", delimiter = ",")
X,Y = np.split(data, [len(data[0,:]) - 1], 1)
rawX = np.copy(X)
X = constructVariations(X, 6)
oneArray = np.ones((len(X),1))
X = np.hstack((oneArray, X))
trial = expit(solver = 'sag')
trial = trial.fit(X = X,y = np.ravel(Y))
print(trial.coef_)
from matplotlib import pyplot as plt
h = 0.01
x_min, x_max = rawX[:, 0].min() - 1, rawX[:, 0].max() + 1
y_min, y_max = rawX[:, 1].min() - 1, rawX[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
data = constructVariations(np.c_[xx.ravel(), yy.ravel()], 6)
oneArray = np.ones((len(data),1))
data = np.hstack((oneArray, data))
Z = trial.predict(data)
Z = Z.reshape(xx.shape)
plt.figure()
plt.scatter(rawX[:, 0], rawX[:, 1], c=Y, linewidth=0, s=50)
plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)
plt.show()
Update
In the code provided you forgot (in visualization) that you added column of "1"s to your data representation, thus your thetas are one "off", as theta[0] is a bias, theta1 is related to your 0'th variable etc.
def thetaFunc(y, theta, x):
deg = 6
spot = 0
sum = theta[spot]
spot += 1
for i in range(1, deg + 1):
for j in range(i + 1):
sum += theta[spot] * x**(i - j) * y**(j)
spot += 1
return sum
you also forgot about intercept term from logisticregression itself, thus
xx,yy = np.meshgrid(x,y)
for i in range(len(x)):
for j in range(len(y)):
z[i][j] = thetaFunc(yy[i][j], theta, xx[i][j])
z -= trial.intercept_
(image generated using fixed code of yours)
import numpy as np
from sklearn.linear_model import LogisticRegression as expit
def thetaFunc(y, theta, x):
deg = 6
spot = 0
sum = theta[spot]
spot += 1
for i in range(1, deg + 1):
for j in range(i + 1):
sum += theta[spot] * x**(i - j) * y**(j)
spot += 1
return np.exp(-sum)
def constructVariations(X, deg):
features = np.zeros((len(X), 27))
spot = 0
for i in range(1, deg + 1):
for j in range(i + 1):
features[:, spot] = X[:,0]**(i - j) * X[:,1]**(j)
spot += 1
return features
if __name__ == '__main__':
data = np.loadtxt("ex2points.txt", delimiter = ",")
X,Y = np.split(data, [len(data[0,:]) - 1], 1)
X = constructVariations(X, 6)
rawX = np.copy(X)
oneArray = np.ones((len(X),1))
X = np.hstack((oneArray, X))
trial = expit(solver = 'sag')
trial = trial.fit(X = X,y = np.ravel(Y))
from matplotlib import pyplot as plt
theta = trial.coef_.ravel()
x = np.linspace(-1, 1.5, 100)
y = np.linspace(-1,1.5,100)
z = np.empty((100,100))
xx,yy = np.meshgrid(x,y)
for i in range(len(x)):
for j in range(len(y)):
z[i][j] = thetaFunc(yy[i][j], theta, xx[i][j])
z -= trial.intercept_
plt.contour(xx,yy,z > 1,cmap=plt.cm.Paired, alpha=0.8)
plt.scatter(rawX[:, 0], rawX[:, 1], c=Y, linewidth=0, s=50)
plt.show()
I am building a SVM in python. However, my implemtation is generating the wrong plane. I think it has something to do with my parameters(langrage multipliers) being so small but I am not sure. I think I am doing the convex optimization right. Maybe my data isn't in the right format. I based my code on theses tutorials:http://tullo.ch/articles/svm-py/ and http://www.mblondel.org/journal/2010/09/19/support-vector-machines-in-python/.
Here is my code and output:
import numpy
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import cvxopt
from matplotlib import cm
from cvxopt import matrix, solvers
from itertools import izip
#http://www.tristanfletcher.co.uk/SVM%20Explained.pdf
class SVM:
def __init__(self,X,y):
self.X = X
self.y = y
def findParameters(self,X,y):
# min 1/2 x^T P x + q^T x
#Ax = b
#y's are answer vectors
#put in cvxopt
#
"""P = cvxopt.matrix(np.outer(self.y,self.y)* self.gramMatrix())
q = cvxopt.matrix((numpy.ones(len(self.y))).T)
#G =
#h =
limits = np.asarray(self.y)
A = cvxopt.matrix(limits.T)
#genrates matrix of zzeros
b = cvxopt.matrix(numpy.zeros(len(self.y)))
# actually comp
param = cvxopt.solvers.qp(P,q,G,h,A,b);"""
n_samples, n_features = X.shape
K = self.gramMatrix(X)
P = cvxopt.matrix(np.outer(y, y) * K)
q = cvxopt.matrix(-1 * np.ones(n_samples))
Gtry = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))
htry = cvxopt.matrix(np.zeros(n_samples))
A = cvxopt.matrix(y, (1, n_samples))
b = cvxopt.matrix(0.0)
param = cvxopt.solvers.qp(P, q, Gtry, htry, A, b)
array = param['x']
return array
def WB_calculator(self,X,y):
#calculates w vector
yi = self.y
X = np.asarray(X)
y = np.asarray(y)
important = self.findParameters(X,y)
print("these are parameters")
print(important)
firstsum = [0 for x in range(0,len(y))]
for point in range(0,len(important)):
liste = X[point]*important[point]*yi[point]
firstsum = [x + y for x, y in zip(firstsum,liste)]
#this part calculates bias
#this is a very naive implementation of bias
#xstuff is the x_coordinate vector we find this by transpose
b = 0
for i in range(0,len(important)):
b = b+ (yi[i]- np.dot(firstsum,X[i]))
avgB = b/len(important)
answer = (firstsum , avgB)
print("w vector")
print(firstsum)
return answer
def polynomialK(self,u,v,b):
return (np.dot(u,v)+b)**2
#Guassian Kernal Funciton
def gaussianK(self,v1, v2, sigma):
return np.exp(-norm(v1-v2, 2)**2/(2.*sigma**2))
#computes the gramMatrix given a set of all points included in the data
#this is basicly a matrix of dot prodducts
def gramMatrix(self,X):
gramMatrix = []
data = np.asarray(self.X)
dataTran = data
#print(dataTran)
for x in dataTran:
row = []
#print(row)
for y in dataTran:
row.append(np.dot(x,y))
gramMatrix.append(row)
#print(row)
return gramMatrix
def determineAcceptance(self,point,X,y):
# I'm not sure if this is the proper bounding lets checl
cutoff = self.WB_calculator(X,y)
if(np.dot(cutoff[0],point)+cutoff[1] >0):
print("You got in")
elif(np.dot(cutoff[0],point)+cutoff[1]<0):
print("Study")
# plots plane and points
def Graph(self,X,y):
important_stuff = self.WB_calculator(X,y)
weights = important_stuff[0]
c = important_stuff[1]
#here we actaually graph the functionb
graphable = X.T
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
xs = graphable[0]
ys = graphable[1]
zs = graphable[2]
colors = self.y
ax.scatter(xs,ys,zs,c=colors)
ax.set_xlabel("A")
ax.set_ylabel("B")
ax.set_zlabel("C")
#this changes orientation and look of surface
ax.view_init(azim = 180+40,elev = 22)
X = np.arange(-2, 2, 0.25)
Y = np.arange(-2, 2, 0.25)
X, Y = np.meshgrid(X, Y)
Z = ((-weights[0]*X + -weights[1]*Y - c)/(weights[2]))
surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm,
linewidth=0, antialiased=True)
plt.show()
#list of points to test
a = [[-.1,-.1,-.1],[-.2,-.2,-.2],[.15,.15,.15],[.9,.9,.9],[.95,.95,.95]]
check = np.asarray(a)
b = [.01,.01,.01,1,1]
bigger =np.asarray(b)
d = SVM(a,b)
print(d.gramMatrix(check)[0])
print("parameters ya")
print(d.findParameters(check,bigger))
print(d.WB_calculator(check,bigger))
d.Graph(check,bigger)
d.determineAcceptance([.01,.01,.01],check,bigger)
At first, thank you everybody for the amazing work on stackoverflow... you guys are amazing and have helped me out quite some times already. Regarding my problem: I have a series of vectors in the format (VectorX, VectorY, StartingpointX, StartingpointY)
data = [(-0.15304757819399128, -0.034405679205349315, -5.42877197265625, 53.412933349609375), (-0.30532995491023485, -0.21523935094046465, -63.36669921875, 91.832427978515625), (-0.15872430479453215, -0.077999419482978283, -67.805389404296875, 81.001983642578125), (-0.36415549211687903, -0.33757147194808113, -59.015228271484375, 82.976226806640625), (0.0, 0.0, 0.0, 0.0), (-0.052973530805275004, 0.098212384392411423, 19.02667236328125, -13.72125244140625), (-0.34318724086483599, 0.17123742336019632, 80.0394287109375, 108.58499145507812), (0.19410169197834648, -0.17635303976555861, -55.603790283203125, -76.298828125), (-0.38774018337716143, -0.0824692384322816, -44.59942626953125, 68.402496337890625), (0.062202543524108478, -0.37219011831012949, -79.828826904296875, -10.764404296875), (-0.56582988168383963, 0.14872365390732512, 39.67657470703125, 97.303192138671875), (0.12496832467900276, -0.12216653754859408, 24.65948486328125, -30.92584228515625)]
When I plot the vectorfield it looks like this:
import numpy as np
import matplotlib.pyplot as plt
def main():
# Format Data...
numdata = len(data)
x = np.zeros(numdata)
y = np.zeros(numdata)
u = np.zeros(numdata)
v = np.zeros(numdata)
for i,el in enumerate(data):
x[i] = el[2]
y[i] = el[3]
# length of vector
z[i] = math.sqrt(el[0]**2+el[1]**2)
u[i] = el[0]
v[i] = el[1]
# Plot
plt.quiver(x,y,u,v )
# showing the length with color
plt.scatter(x, y, c=z)
plt.show()
main()
I want to create a polynomial function to fit a continous vector field for the whole area. After some research I found the following functions for fitting polynoms in two dimensions. The problem is, that it only accepts one value for the value that is fitted.
def polyfit2d(x, y, z, order=3):
ncols = (order + 1)**2
G = np.zeros((x.size, ncols))
ij = itertools.product(range(order+1), range(order+1))
for k, (i,j) in enumerate(ij):
G[:,k] = x**i * y**j
m, _, _, _ = np.linalg.lstsq(G, z)
return m
def polyval2d(x, y, m):
order = int(np.sqrt(len(m))) - 1
ij = itertools.product(range(order+1), range(order+1))
z = np.zeros_like(x)
for a, (i,j) in zip(m, ij):
z += a * x**i * y**j
return z
Also when I tried to fit the one dimensional length of the vectors, the values returned from the polyval2d were completely off. Does anybody know a method to get a fitted function that will return a vector (x,y) for any point in the grid?
Thank you!
A polynomial to fit a 2-d vector field will be two bivariate polynomials - one for the x-component and one for the y-component. In other words, your final polynomial fitting will look something like:
P(x,y) = ( x + x*y, 1 + x + y )
So you will have to call polyfit2d twice. Here is an example:
import numpy as np
import itertools
def polyfit2d(x, y, z, order=3):
ncols = (order + 1)**2
G = np.zeros((x.size, ncols))
ij = itertools.product(range(order+1), range(order+1))
for k, (i,j) in enumerate(ij):
G[:,k] = x**i * y**j
m, _, _, _ = np.linalg.lstsq(G, z)
return m
def fmt1(x,i):
if i == 0:
return ""
elif i == 1:
return x
else:
return x + '^' + str(i)
def fmt2(i,j):
if i == 0:
return fmt1('y',j)
elif j == 0:
return fmt1('x',i)
else:
return fmt1('x',i) + fmt1('y',j)
def fmtpoly2(m, order):
for (i,j), c in zip(itertools.product(range(order+1), range(order+1)), m):
yield ("%f %s" % (c, fmt2(i,j)))
xs = np.array([ 0, 1, 2, 3] )
ys = np.array([ 0, 1, 2, 3] )
zx = np.array([ 0, 2, 6, 12])
zy = np.array([ 1, 3, 5, 7])
mx = polyfit2d(xs, ys, zx, 2)
print "x-component(x,y) = ", ' + '.join(fmtpoly2(mx,2))
my = polyfit2d(xs, ys, zy, 2)
print "y-component(x,y) = ", ' + '.join(fmtpoly2(my,2))
In this example our vector field is:
at (0,0): (0,1)
at (1,1): (2,3)
at (2,2): (6,5)
at (3,3): (12,7)
Also, I think I found a bug in polyval2d - this version gives more accurate results:
def polyval2d(x, y, m):
order = int(np.sqrt(len(m))) - 1
ij = itertools.product(range(order+1), range(order+1))
z = np.zeros_like(x)
for a, (i,j) in zip(m, ij):
z = z + a * x**i * y**j
return z