I want to implement subgradient and Stochastic descent using a cost function, calculate the number of iterations that it takes to find a perfect classifier for the data and also the weights (w) and bias (b).
the dataset is in four dimension
this is my cost function
i have take the derivative of the cost function and here it is:
When i run my code i get a lot of errors, can someone please help.
Here is my Code in python
import numpy as np
learn_rate = 1
w = np.zeros((4,1))
b = 0
M = 1000
data = '/Users/labuew/Desktop/dataset.data'
#calculating the gradient
def cal_grad_w(data, w, b):
for i in range (M):
sample = data[i,:]
Ym = sample[-1]
Xm = sample[0:4]
if -Ym[i]*(w*Xm+b) >= 0:
tmp = 1.0
else:
tmp = 0
value = Ym[i]*Xm*tmp
sum = sum +value
return sum
def cal_grad_b(data, w, b):
for i in range (M):
sample = data[i,:]
Ym = sample[-1]
Xm = sample[0:4]
if -Ym*(w*Xm+b) >= 0:
tmp = 1.0
else:
tmp = 0
value = Ym[i]*x*tmp
sum = sum +value
return sum
if __name__ == '__main__':
counter = 0
while 1:
counter +=1
dw = cal_grad_w(data, w, b)
db = cal_grad_b(data, w, b)
if dw == 0 and db == 0:
break
w = w - learn_rate*dw
b = b - learn_rate *dw
print(counter,w,b)
are you missing the numpy load function?
data = np.load('/Users/labuew/Desktop/dataset.data')
It looks like you're doing the numerics on the string.
also
Ym = sample[-1]
Xm = sample[0:4]
Also 4 dimensions implies that Ym = Xm[3]? Is your data rank 2 with the second rank being dimension 5? [0:4] includes the forth dimension i.e.
z = [1,2,3,4]
z[0:4] = [1,2,3,4]
This would be my best guess. I'm taking a few educated guesses about your data format.
import numpy as np
learn_rate = 1
w = np.zeros((1,4))
b = 0
M = 1000
#Possible format
#data = np.load('/Users/labuew/Desktop/dataset.data')
#Assumed format
data = np.ones((1000,5))
#calculating the gradient
def cal_grad_w(data, w, b):
sum = 0
for i in range (M):
sample = data[i,:]
Ym = sample[-1]
Xm = sample[0:4]
if -1*Ym*(np.matmul(w,Xm.reshape(4,1))+b) >= 0:
tmp = 1.0
else:
tmp = 0
value = Ym*Xm*tmp
sum = sum +value
return sum.reshape(1,4)
def cal_grad_b(data, w, b):
sum = 0
for i in range (M):
sample = data[i,:]
Ym = sample[-1]
Xm = sample[0:4]
if -1*Ym*(np.matmul(w,Xm.reshape(4,1))+b) >= 0:
tmp = 1.0
else:
tmp = 0
value = Ym*tmp
sum = sum +value
return sum
if __name__ == '__main__':
counter = 0
while 1:
counter +=1
dw = cal_grad_w(data, w, b)
db = cal_grad_b(data, w, b)
if dw.all() == 0 and db == 0:
break
w = w - learn_rate*dw
b = b - learn_rate*db
print([counter,w,b])
Put in dummy data because I don't know the format.
Im needing to solve a whole range of 8x8 and 9x9 matrices so thought I could build a python program to make the whole thing easier.
So far I have managed to create:
from __future__ import division
import numpy as np
def solveEqns(A,v):
def lu( A ):
#Factor A into LU by Gaussian elimination with scaled partial pivoting
n, m = np.shape( A )
if n != m:
print "Error: input matrix is not square"
return None
# Generate initial index vector
p = range( n )
# Determine the largest (in magnitude) element in each row. These
# factors are used to scale the pivot elements for comparison purposes
# when deciding which row to use as a pivot row.
s = [0] * n
for i in xrange( n ):
smax = 0.0
for j in xrange( n ):
smax = max( smax, abs( A[i][j] ) )
s[i] = smax
# Begin Gaussian elimination.
for k in xrange( n - 1 ):
# Find the remaining row with the largest scaled pivot.
rmax = 0.0
for i in xrange( k, n ):
r = abs( A[p[i][k]] / s[p[i]] )
if r > rmax:
rmax = r
j = i
# Row j has the largest scaled pivot, so "swap" that row with the
# current row (row k). The swap is not actually done by copying rows,
# but by swaping two entries in an index vector.
p[j], p[k] = ( p[k], p[j] )
# Now carry out the next elimination step as usual, except for the
# added complication of the index vector.
for i in xrange( k + 1, n ):
xmult = A[p[i],k] / A[p[k],k]
A[p[i],k] = xmult
for j in xrange( k + 1, n ):
A[p[i],j] = A[p[i],j] - xmult * A[p[k],j]
# All done, return factored matrix A and permutation vector p
return ( A, p )
def solve( A, p, b ):
#Solves Ax = b given an LU factored matrix A and permuation vector p
n, m = np.shape( A )
if n != m:
print "Error: input matrix is not square"
return None
# Forward solve
x = np.zeros( n )
for k in xrange( n - 1 ):
for i in xrange( k + 1, n ):
b[p[i]] = b[p[i]] - A[p[i],k] * b[p[k]]
# Backward solve
for i in xrange( n - 1, -1, -1 ):
sum = b[p[i]]
for j in xrange( i + 1, n ):
sum = sum - A[p[i],j] * x[j]
x[i] = sum / A[p[i],i]
# All done, return solution vector
return x
lu(A)
return solve(A,p,v)
def circuit():
A = np.array([[1,0,0,0,0,8,0,0,0],[0,1,0,0,5,0,0,0,0],[0,1,0,0,5,0,0,0,0],[0,0,0,1,-1,1,0,0,0],[0,0,1,0,0,0,1,-1,0],[0,0,1,0,0,0,1,0,-1],[0,1,0,0,-1,0,0,0,1],[1,0,0,0,0,-1,0,1,0],[1,-1,0,1,0,0,0,0,0]])
v = np.array([9,-12,-0.5,0,0,0,0,0,0])
I = solveEqns(A,v)
return I
to solve the 9x9 matrix A at the end. This is one of the easier ones i need to solve so can solve it outside of python to check if the results coming through are accurate.
Im getting a traceback error on line 26 of:
Traceback (most recent call last):
File "<ipython-input-110-6daf773db1e3>", line 1, in <module>
solveEqns(A,b)
File "C:/Users/SamMc/Documents/Python Scripts/q6u1510416 v4.py", line 65, in solveEqns
lu(A)
File "C:/Users/SamMc/Documents/Python Scripts/q6u1510416 v4.py", line 26, in lu
r = abs( A[p[i][k]] / s[p[i]] )
TypeError: 'int' object has no attribute '__getitem__'
which i cant figure out why its not pulling through a number from the matrix.
Any help would be greatly appreciated.
Thanks
Sam
you might use gauss elimination via scaled pivoting. the code is shown below.
import numpy as np
def gauss_pivot(a,b,tol=1.0e-12):
"""
x = gaussPivot(a,b,tol=1.0e-12).
Solves [a]{x} = {b} by Gauss elimination with
scaled row pivoting
"""
a = np.copy(a)
b = np.copy(b)
n = len(b)
assert (np.all(np.shape(a) ==(n,n))) # check if a is a square matrix
# Set up scale factors
s = np.zeros(n)
for i in range(n):
s[i] = max(np.abs(a[i,:])) # find the max of each row
for k in range(0, n-1): #pivot row
# Row interchange, if needed
p = np.argmax(np.abs(a[k:n,k])/s[k:n]) # find which row has max item for each col k, and scale by s
if abs(a[p,k]) < tol:
raise Exception("Matrix is singular")
if p != k: # swap rows if current row does not contain max item with the one contains max item within same col
a[[k,p+k],:] = a[[p+k, k],:]
b[k],b[p+k] = b[p+k],b[k]
s[k],s[p+k] = s[p+k],s[k]
# Elimination phase of matrix a
for i in range(k+1,n):
if a[i,k] != 0.0: # skip if a(i,k) is already zero
lam = a [i,k]/a[k,k]
a[i,k:n] = a[i,k:n] - lam*a[k,k:n]
b[i] = b[i] - lam*b[k]
if abs(a[n-1,n-1]) < tol:
raise Exception("Matrix is singular")
# Back substitution phase, solution is substituted by b
x = np.zeros_like(b)
x[n-1] = b[n-1]/a[n-1,n-1]
for k in range(n-2,-1,-1):
x[k] = (b[k] - np.dot(a[k,k+1:n],x[k+1:n]))/a[k,k]
return x
a = np.random.randn(100,100)*10
b = np.random.randn(100)*10
x = gauss_pivot(a,b)
if np.allclose(np.dot(a,x), b) == True:
print("x is the correct solution")
If you want the code to perform faster you might probably replace x by b, so upon function return b contains the solution.
you might also slightly modify elimination phase so elements of matrix a below diagonal are not zeroed, since there are irrelevant during back substitution phase. Therefore, the code becomes as shown below:
import numpy as np
def gauss_pivot(a,b,tol=1.0e-12):
"""
x = gaussPivot(a,b,tol=1.0e-12).
Solves [a]{x} = {b} by Gauss elimination with
scaled row pivoting
"""
a = np.copy(a)
b = np.copy(b)
n = len(b)
assert (np.all(np.shape(a) ==(n,n))) # check if a is a square matrix
# Set up scale factors
s = np.zeros(n)
for i in range(n):
s[i] = max(np.abs(a[i,:])) # find the max of each row
for k in range(0, n-1): #pivot row
# Row interchange, if needed
p = np.argmax(np.abs(a[k:n,k])/s[k:n]) # find which row has max item for each col k, and scale by s
if abs(a[p,k]) < tol:
raise Exception("Matrix is singular")
if p != k: # swap rows if current row does not contain max item with the one contains max item within same col
a[[k,p+k],:] = a[[p+k, k],:]
b[k],b[p+k] = b[p+k],b[k]
s[k],s[p+k] = s[p+k],s[k]
# Elimination phase of matrix a
for i in range(k+1,n):
if a[i,k] != 0.0: # skip if a(i,k) is already zero
lam = a [i,k]/a[k,k]
a[i,k+1:n] = a[i,k+1:n] - lam*a[k,k+1:n]
b[i] = b[i] - lam*b[k]
if abs(a[n-1,n-1]) < tol:
raise Exception("Matrix is singular")
# Back substitution phase, solution is substituted by b
b[n-1] = b[n-1]/a[n-1,n-1]
for k in range(n-2,-1,-1):
b[k] = (b[k] - np.dot(a[k,k+1:n],b[k+1:n]))/a[k,k]
return b
To use LU decomposition instead which is more ideal for b containing more than one column, the LU code is shown below
import numpy as np
def lu_decomp(a,tol=1.0e-9):
a = np.copy(a)
n = len(a)
assert (np.all(np.shape(a) ==(n,n))) # check if a is a square matrix
seq = np.arange(n, dtype=int)
s = np.zeros((n))
for i in range(n):
s[i] = max(abs(a[i,:]))
for k in range(0,n-1):
p = np.argmax(np.abs(a[k:n,k])/s[k:n])
if abs(a[p,k]) < tol:
raise Exception("Matrix is singular")
if p != k:
a[[k,p+k],:] = a[[p+k, k],:]
s[k],s[p+k] = s[p+k],s[k]
seq[k], seq[p+k] = seq[p+k],seq[k]
# Elimination
for i in range(k+1,n):
if a[i,k] != 0.0:
lam = a[i,k]/a[k,k]
a[i,k+1:n] = a[i,k+1:n] - lam*a[k,k+1:n]
a[i,k] = lam
return a,seq
def lu_solve(a,b,seq):
n = len(a)
x = b.copy()
for i in range(n):
x[i] = b[seq[i]]
# Solution
for k in range(1,n):
x[k] = x[k] - np.dot(a[k,0:k],x[0:k])
x[n-1] = x[n-1]/a[n-1,n-1]
for k in range(n-2,-1,-1):
x[k] = (x[k] - np.dot(a[k,k+1:n],x[k+1:n]))/a[k,k]
return x
a2 = np.random.randn(500,500)*100
b2 = np.random.randn(500,20)*100
a_decomposed, seq = lu_decomp(a2)
x2 = np.zeros_like(b2)
for col in range(b2.shape[1]):
x2[:,col] = lu_solve(a_decomposed, b2[:, col], seq)
if np.allclose(np.dot(a2,x2), b2) == True:
print("x2 is the correct solution")
Both methods gives the the output,
Gauss Elimination
x is the correct solution
LU method
x2 is the correct solution
I recommend you use scipy linalg package, from scipy.linalg import solve, lu_factor, lu_solve.
They perform way faster for large matrix size. you can use the same code above but annotate them with numba jit so for large matrix the performance is way better.
from numba import jit
#jit
def gauss_pivot(a, b):
...
...
acknowledgement: codes inspired from the book numerical methods in science and engineering with Python by Prof. Jaan Kiusalaas
https://www.amazon.co.uk/Numerical-Methods-Engineering-Python-3/dp/1107033853/ref=sr_1_1?ie=UTF8&qid=1517845946&sr=8-1&keywords=numerical+method+in+science+and+engineering+with+python
Hi scipy stats has a implementation of Fisher's exact test but it is only for 2 by 2 contingency tables. I want to do the test on bigger than 2 by 2 tables. (5x2 ,5x3)
I know there is fisher.test in R which can do the job but I want to do it in my python code
Anybody knows an python implementation of Fisher's exact test that can work on bigger tables?
Also I am not sure if it is ok to do Fisher's exact test on bigger than 2 by 2 tables.
Thanks
Yes, it is ok to do a Fisher's exact test on tables bigger than 2x2.
There currently aren't any clean, widely tested solutions out there in python. One solution would be to use rpy2 and call the R function from python:
import numpy as np
import rpy2.robjects.numpy2ri
from rpy2.robjects.packages import importr
rpy2.robjects.numpy2ri.activate()
stats = importr('stats')
m = np.array([[4,4],[4,5],[10,6]])
res = stats.fisher_test(m)
print 'p-value: {}'.format(res[0][0])
>> p-value: 0.668165917041
Another solution would be to dig into the C code that the R implementation uses and to call that code directly. Here is a link to someone's github project where they went back to the original fortran implementation and call that from python.
By some simple calculation, we can extend the formula of hyper-geometric probability of 2 x 2 contingency table to any size of r x c. For example, the probability of a 2 x 3 contingency table can be found as
(N1!N2!M1!M2!M3!) / (N!a!b!c!d!e!f!)
where N1, N2 is the sum of each row, M1, M2, M3 is the sum of each column, N is the total sum, and a,b,c,d,e,f is the number in each cell.
You can implement Fisher's Exact Test by a depth-first search(dfs). The code:
import math
def _dfs(mat, pos, r_sum, c_sum, p_0, p):
(xx, yy) = pos
(r, c) = (len(r_sum), len(c_sum))
mat_new = []
for i in range(len(mat)):
temp = []
for j in range(len(mat[0])):
temp.append(mat[i][j])
mat_new.append(temp)
if xx == -1 and yy == -1:
for i in range(r-1):
temp = r_sum[i]
for j in range(c-1):
temp -= mat_new[i][j]
mat_new[i][c-1] = temp
for j in range(c-1):
temp = c_sum[j]
for i in range(r-1):
temp -= mat_new[i][j]
mat_new[r-1][j] = temp
temp = r_sum[r-1]
for j in range(c-1):
temp -= mat_new[r-1][j]
if temp <0:
return
mat_new[r-1][c-1] = temp
p_1 = 1
for x in r_sum:
p_1 *= math.factorial(x)
for y in c_sum:
p_1 *= math.factorial(y)
n = 0
for x in r_sum:
n += x
p_1 /= math.factorial(n)
for i in range(len(mat_new)):
for j in range(len(mat_new[0])):
p_1 /= math.factorial(mat_new[i][j])
if p_1 <= p_0 + 0.00000001:
#print(mat_new)
#print(p_1)
p[0] += p_1
else:
max_1 = r_sum[xx]
max_2 = c_sum[yy]
for j in range(c):
max_1 -= mat_new[xx][j]
for i in range(r):
max_2 -= mat_new[i][yy]
for k in range(min(max_1,max_2)+1):
mat_new[xx][yy] = k
if xx == r-2 and yy == c-2:
pos_new = (-1, -1)
elif xx == r-2:
pos_new = (0, yy+1)
else:
pos_new = (xx+1, yy)
_dfs(mat_new, pos_new, r_sum, c_sum, p_0, p)
def fisher_exact(table):
row_sum = []
col_sum = []
for i in range(len(table)):
temp = 0
for j in range(len(table[0])):
temp += table[i][j]
row_sum.append(temp)
for j in range(len(table[0])):
temp = 0
for i in range(len(table)):
temp += table[i][j]
col_sum.append(temp)
mat = [[0] * len(col_sum)] * len(row_sum)
pos = (0, 0)
p_0 = 1
for x in row_sum:
p_0 *= math.factorial(x)
for y in col_sum:
p_0 *= math.factorial(y)
n = 0
for x in row_sum:
n += x
p_0 /= math.factorial(n)
for i in range(len(table)):
for j in range(len(table[0])):
p_0 /= math.factorial(table[i][j])
p = [0]
_dfs(mat, pos, row_sum, col_sum, p_0, p)
return p[0]
You can test the code, for example:
print(fisher_exact([[1,24],[5,20],[14,11],[11,14]]))
give the result
0.0001228337404686859
which is the same as that given by R. There should be more elegant approach, while this code do give the right result.
I have tried to implement gradient descent here in python but the cost J just seems to be increasing irrespective of lambda ans alpha value, i am unable to figure out what the issue over here is. It'll be great if someone can help me out with this. The input is a matrix Y and R with same dimensions. Y is a matrix of movies x users and R is just to say if a user has rated a movie.
#Recommender system ML
import numpy
import scipy.io
def gradientDescent(y,r):
(nm,nu) = numpy.shape(y)
x = numpy.mat(numpy.random.randn(nm,10))
theta = numpy.mat(numpy.random.randn(nu,10))
for i in range(1,10):
(x,theta) = costFunc(x,theta,y,r)
def costFunc(x,theta,y,r):
X_tmp = numpy.power(x , 2)
Theta_tmp = numpy.power(theta , 2)
lmbda = 0.1
reg = ((lmbda/2) * numpy.sum(Theta_tmp))+ ((lmbda/2)*numpy.sum(X_tmp))
ans = numpy.multiply(numpy.power(((theta * x.T).T - y),2) , r)
res = (0.5 * numpy.sum(ans))+reg
print "J:",res
print "reg:",reg
(nm,nu) = numpy.shape(y)
X_grad = numpy.mat(numpy.zeros((nm,10)));
Theta_grad = numpy.mat(numpy.zeros((nu,10)));
alpha = 0.1
# [m f] = size(X);
(m,f) = numpy.shape(x);
for i in range(0,m):
for k in range(0,f):
tmp = 0
# X_grad(i,k) += (((theta * x'(:,i)) - y(i,:)').*r(i,:)')' * theta(:,k);
tmp += ((numpy.multiply(((theta * x.T[:,i]) - y[i,:].T),r[i,:].T)).T) * theta[:,k];
tmp += (lmbda*x[i,k]);
X_grad[i,k] -= (alpha*tmp)
# X_grad(i,k) += (lambda*X(i,k));
# [m f] = size(Theta);
(m,f) = numpy.shape(theta);
for i in range(0,m):
for k in range(0,f):
tmp = 0
# Theta_grad(i,k) += (((theta(i,:) * x') - y(:,i)').*r(:,i)') * x(:,k);
tmp += (numpy.multiply(((theta[i,:] * x.T) - y[:,i].T),r[:,i].T)) * x[:,k];
tmp += (lmbda*theta[i,k]);
Theta_grad[i,k] -= (alpha*tmp)
# Theta_grad(i,k) += (lambda*Theta(i,k));
return(X_grad,Theta_grad)
def main():
mat1 = scipy.io.loadmat("C:\Users\ROHIT\Machine Learning\Coursera\mlclass-ex8\ex8_movies.mat")
Y = mat1['Y']
R = mat1['R']
r = numpy.mat(R)
y = numpy.mat(Y)
gradientDescent(y,r)
#if __init__ == '__main__':
main()
I did not check the whole code logic, but assuming it is correct, your costfunc is supposed to return gradient of the cost function, and in these lines:
for i in range(1,10):
(x,theta) = costFunc(x,theta,y,r)
you are overwriting the last values of x and theta with its gradient, while gradient is the measure of change, so you should move in the opposite direction (substract the gradient instead of overwriting the values):
for i in range(1,10):
(x,theta) -= costFunc(x,theta,y,r)
But it seems that you already assign the minus sign to the gradient in your costfunc so you should add this value instead
for i in range(1,10):
(x,theta) += costFunc(x,theta,y,r)