I am trying to implement GMM Clustering for both 24 Dimension feature vector and 32 dimension feature vector, where assignment of initial parameters are done by Kmeans algorightm (K mean clustering is providing cluster centers - MU - only).
I am following this link, where it's implemented only for 2D feature vector and predefined Mu and sigma.
If anyone have the code for GMM clustering kindly post.
Predefined Lib for GMM is also there in sklearn, but it's not giving me likelyhood for each iteration. sklearn GMM
def kmeans(dataSet, k, c):
# 1. Randomly choose clusters
rng = np.random.RandomState(c)
p = rng.permutation(dataSet.shape[0])[:k]
centers = dataSet[p]
while True:
labels = pairwise_distances_argmin(dataSet, centers)
new_centers = np.array([dataSet[labels == i].mean(0) for i in range(k)]
if np.all(centers == new_centers):
break
centers = new_centers
cluster_data = [dataSet[labels == i] for i in range(k)]
l = []
covs = []
for i in range(k):
l.append(len(cluster_data[i]) * 1.0 / len(dataSet))
covs.append(np.cov(np.array(cluster_data[i]).T))
return centers, l, covs, cluster_data
return new_mu, new_covs, cluster_data
class gaussian_Mix_Model:
def __init__(self, k = 8, eps = 0.0000001):
self.k = k ## number of clusters
self.eps = eps ## threshold to stop `epsilon`
def calculate_Exp_Maxim(self, X, max_iters = 1000):
# n = number of data-points, d = dimension of data points
n, d = X.shape
mu, Cov = [], []
for i in range(1,k):
new_mu, new_covs, cluster_data = kmeans(dataSet, k, c)
# Initialize new
mu[k] = new_mu
Cov[k]= new_cov
# initialize the weights
w = [1./self.k] * self.k
R = np.zeros((n, self.k))
### LLhoods
LLhoods = []
P = lambda mu, s: np.linalg.det(s) ** -.5 ** (2 * np.pi) ** (-X.shape[1]/2.) \
* np.exp(-.5 * np.einsum('ij, ij -> i',\
X - mu, np.dot(np.linalg.inv(s) , (X - mu).T).T ) )
# Iterate till max_iters iterations
while len(LLhoods) < max_iters:
# Expectation Calcultion
## membership for each of K Clusters
for k in range(self.k):
R[:, k] = w[k] * P(mu[k], Cov[k])
# Finding the log likelihood
LLhood = np.sum(np.log(np.sum(R, axis = 1)))
# Now store the log likelihood to the list.
LLhoods.append(LLhood)
# Number of data points to each clusters
R = (R.T / np.sum(R, axis = 1)).T
N_ks = np.sum(R, axis = 0)
# Maximization and calculating the new parameters.
for k in range(self.k):
# Calculate the new means
mu[k] = 1. / N_ks[k] * np.sum(R[:, k] * X.T, axis = 1).T
x_mu = np.matrix(X - mu[k])
# Calculate new cov
Cov[k] = np.array(1 / N_ks[k] * np.dot(np.multiply(x_mu.T, R[:, k]), x_mu))
# Calculate new PiK
w[k] = 1. / n * N_ks[k]
# check for convergence
if (np.abs(LLhood - LLhoods[-2]) < self.eps) and (iteration < max_iters): break
else:
Continue
from collections import namedtuple
self.params = namedtuple('params', ['mu', 'Cov', 'w', 'LLhoods', 'num_iters'])
self.params.mu = mu
self.params.Cov = Cov
self.params.w = w
self.params.LLhoods = LLhoods
self.params.num_iters = len(LLhoods)
return self.params
# Call the GMM to find the model
gmm = gaussian_Mix_Model(3, 0.000001)
params = gmm.fit_EM(X, max_iters= 150)
# Plotting of Log-Likelihood VS Iterations.
plt.plot(LLhoods[0])
plt.savefig('Dataset_2A_GMM_Class_1_K_16.png')
plt.clf()
plt.plot(LLhoods[1])
plt.savefig('Dataset_2A_GMM_Class_2_K_16.png')
plt.clf()
plt.plot(LLhoods[2])
plt.savefig('Dataset_2A_GMM_Class_3_K_16.png')
plt.clf()
The following code takes in a single value, x, and a list of points, X, and determines the value of the Lagrange polynomial through the list of points at the given x value.
def chunkIt(seq, num):
avg = len(seq) / float(num)
out = []
last = 0.0
while last < len(seq):
out.append(seq[int(last):int(last + avg)])
last += avg
return out
def product(list):
p = 1
for i in list:
p *= i
return p
def Lagrange(x,X):
T = np.zeros((2,len(X)))
list = []
for i in range(len(X)):
for j in range(len(X)):
if i != j:
list.append((x-X[j][0])/(X[i][0]-X[j][0]))
p = []
for i in chunkIt(list,len(X)):
p.append(product(i))
for i in range(len(X)):
T[0][i] = p[i]
T[1][i] = X[i][1]
list2 = []
for i in range(len(X)):
list2.append(T[0][i]*T[1][i])
return sum(list2)
For example:
x, X = 3, [[0,0],[1,1],[2,0.5]]
gives a value of -1.5.
How do I modify this code to determine the equation of the polynomial through the list of points? i.e. if I put x = 'x' as the input, I want it to return -0.75x**2 + 1.75x [for the given example]
import numpy as np
from pypoly import Polynomial
x, X = 3, [[0, 0], [1, 1], [2, 0.5]]
order = len(X)
This is the order of the resulting Lagrange polynomial. For your example, order is 3.
equations = np.array([[point[0] ** i for i in range(order)] for point in X])
values = np.array([point[1] for point in X])
coefficients = np.linalg.solve(equations, values)
This sets up simultaneous equations by substituting the points into a general polynomial. For order 3, the general polynomial is:
a * x ** 2 + b * x ** 1 + c * x ** 0 = y
It solves the system of simultaneous equations to find coefficients. For order 3, we get the values of a, b, c.
print 'coefficients', list(coefficients)
coefficients [0.0, 1.75, -0.75]
p = Polynomial(*coefficients)
Here, the * operator splits the elements of the array-like into individual values to be passed as arguments to Polynomial().
print p
1.75 * X - 0.75 * X**2
print p(x)
-1.5
To install PyPolynomial with pip, use:
for Python 2:
pip install PyPolynomial
for Python 3:
pip3 install PyPolynomial
Im needing to solve a whole range of 8x8 and 9x9 matrices so thought I could build a python program to make the whole thing easier.
So far I have managed to create:
from __future__ import division
import numpy as np
def solveEqns(A,v):
def lu( A ):
#Factor A into LU by Gaussian elimination with scaled partial pivoting
n, m = np.shape( A )
if n != m:
print "Error: input matrix is not square"
return None
# Generate initial index vector
p = range( n )
# Determine the largest (in magnitude) element in each row. These
# factors are used to scale the pivot elements for comparison purposes
# when deciding which row to use as a pivot row.
s = [0] * n
for i in xrange( n ):
smax = 0.0
for j in xrange( n ):
smax = max( smax, abs( A[i][j] ) )
s[i] = smax
# Begin Gaussian elimination.
for k in xrange( n - 1 ):
# Find the remaining row with the largest scaled pivot.
rmax = 0.0
for i in xrange( k, n ):
r = abs( A[p[i][k]] / s[p[i]] )
if r > rmax:
rmax = r
j = i
# Row j has the largest scaled pivot, so "swap" that row with the
# current row (row k). The swap is not actually done by copying rows,
# but by swaping two entries in an index vector.
p[j], p[k] = ( p[k], p[j] )
# Now carry out the next elimination step as usual, except for the
# added complication of the index vector.
for i in xrange( k + 1, n ):
xmult = A[p[i],k] / A[p[k],k]
A[p[i],k] = xmult
for j in xrange( k + 1, n ):
A[p[i],j] = A[p[i],j] - xmult * A[p[k],j]
# All done, return factored matrix A and permutation vector p
return ( A, p )
def solve( A, p, b ):
#Solves Ax = b given an LU factored matrix A and permuation vector p
n, m = np.shape( A )
if n != m:
print "Error: input matrix is not square"
return None
# Forward solve
x = np.zeros( n )
for k in xrange( n - 1 ):
for i in xrange( k + 1, n ):
b[p[i]] = b[p[i]] - A[p[i],k] * b[p[k]]
# Backward solve
for i in xrange( n - 1, -1, -1 ):
sum = b[p[i]]
for j in xrange( i + 1, n ):
sum = sum - A[p[i],j] * x[j]
x[i] = sum / A[p[i],i]
# All done, return solution vector
return x
lu(A)
return solve(A,p,v)
def circuit():
A = np.array([[1,0,0,0,0,8,0,0,0],[0,1,0,0,5,0,0,0,0],[0,1,0,0,5,0,0,0,0],[0,0,0,1,-1,1,0,0,0],[0,0,1,0,0,0,1,-1,0],[0,0,1,0,0,0,1,0,-1],[0,1,0,0,-1,0,0,0,1],[1,0,0,0,0,-1,0,1,0],[1,-1,0,1,0,0,0,0,0]])
v = np.array([9,-12,-0.5,0,0,0,0,0,0])
I = solveEqns(A,v)
return I
to solve the 9x9 matrix A at the end. This is one of the easier ones i need to solve so can solve it outside of python to check if the results coming through are accurate.
Im getting a traceback error on line 26 of:
Traceback (most recent call last):
File "<ipython-input-110-6daf773db1e3>", line 1, in <module>
solveEqns(A,b)
File "C:/Users/SamMc/Documents/Python Scripts/q6u1510416 v4.py", line 65, in solveEqns
lu(A)
File "C:/Users/SamMc/Documents/Python Scripts/q6u1510416 v4.py", line 26, in lu
r = abs( A[p[i][k]] / s[p[i]] )
TypeError: 'int' object has no attribute '__getitem__'
which i cant figure out why its not pulling through a number from the matrix.
Any help would be greatly appreciated.
Thanks
Sam
you might use gauss elimination via scaled pivoting. the code is shown below.
import numpy as np
def gauss_pivot(a,b,tol=1.0e-12):
"""
x = gaussPivot(a,b,tol=1.0e-12).
Solves [a]{x} = {b} by Gauss elimination with
scaled row pivoting
"""
a = np.copy(a)
b = np.copy(b)
n = len(b)
assert (np.all(np.shape(a) ==(n,n))) # check if a is a square matrix
# Set up scale factors
s = np.zeros(n)
for i in range(n):
s[i] = max(np.abs(a[i,:])) # find the max of each row
for k in range(0, n-1): #pivot row
# Row interchange, if needed
p = np.argmax(np.abs(a[k:n,k])/s[k:n]) # find which row has max item for each col k, and scale by s
if abs(a[p,k]) < tol:
raise Exception("Matrix is singular")
if p != k: # swap rows if current row does not contain max item with the one contains max item within same col
a[[k,p+k],:] = a[[p+k, k],:]
b[k],b[p+k] = b[p+k],b[k]
s[k],s[p+k] = s[p+k],s[k]
# Elimination phase of matrix a
for i in range(k+1,n):
if a[i,k] != 0.0: # skip if a(i,k) is already zero
lam = a [i,k]/a[k,k]
a[i,k:n] = a[i,k:n] - lam*a[k,k:n]
b[i] = b[i] - lam*b[k]
if abs(a[n-1,n-1]) < tol:
raise Exception("Matrix is singular")
# Back substitution phase, solution is substituted by b
x = np.zeros_like(b)
x[n-1] = b[n-1]/a[n-1,n-1]
for k in range(n-2,-1,-1):
x[k] = (b[k] - np.dot(a[k,k+1:n],x[k+1:n]))/a[k,k]
return x
a = np.random.randn(100,100)*10
b = np.random.randn(100)*10
x = gauss_pivot(a,b)
if np.allclose(np.dot(a,x), b) == True:
print("x is the correct solution")
If you want the code to perform faster you might probably replace x by b, so upon function return b contains the solution.
you might also slightly modify elimination phase so elements of matrix a below diagonal are not zeroed, since there are irrelevant during back substitution phase. Therefore, the code becomes as shown below:
import numpy as np
def gauss_pivot(a,b,tol=1.0e-12):
"""
x = gaussPivot(a,b,tol=1.0e-12).
Solves [a]{x} = {b} by Gauss elimination with
scaled row pivoting
"""
a = np.copy(a)
b = np.copy(b)
n = len(b)
assert (np.all(np.shape(a) ==(n,n))) # check if a is a square matrix
# Set up scale factors
s = np.zeros(n)
for i in range(n):
s[i] = max(np.abs(a[i,:])) # find the max of each row
for k in range(0, n-1): #pivot row
# Row interchange, if needed
p = np.argmax(np.abs(a[k:n,k])/s[k:n]) # find which row has max item for each col k, and scale by s
if abs(a[p,k]) < tol:
raise Exception("Matrix is singular")
if p != k: # swap rows if current row does not contain max item with the one contains max item within same col
a[[k,p+k],:] = a[[p+k, k],:]
b[k],b[p+k] = b[p+k],b[k]
s[k],s[p+k] = s[p+k],s[k]
# Elimination phase of matrix a
for i in range(k+1,n):
if a[i,k] != 0.0: # skip if a(i,k) is already zero
lam = a [i,k]/a[k,k]
a[i,k+1:n] = a[i,k+1:n] - lam*a[k,k+1:n]
b[i] = b[i] - lam*b[k]
if abs(a[n-1,n-1]) < tol:
raise Exception("Matrix is singular")
# Back substitution phase, solution is substituted by b
b[n-1] = b[n-1]/a[n-1,n-1]
for k in range(n-2,-1,-1):
b[k] = (b[k] - np.dot(a[k,k+1:n],b[k+1:n]))/a[k,k]
return b
To use LU decomposition instead which is more ideal for b containing more than one column, the LU code is shown below
import numpy as np
def lu_decomp(a,tol=1.0e-9):
a = np.copy(a)
n = len(a)
assert (np.all(np.shape(a) ==(n,n))) # check if a is a square matrix
seq = np.arange(n, dtype=int)
s = np.zeros((n))
for i in range(n):
s[i] = max(abs(a[i,:]))
for k in range(0,n-1):
p = np.argmax(np.abs(a[k:n,k])/s[k:n])
if abs(a[p,k]) < tol:
raise Exception("Matrix is singular")
if p != k:
a[[k,p+k],:] = a[[p+k, k],:]
s[k],s[p+k] = s[p+k],s[k]
seq[k], seq[p+k] = seq[p+k],seq[k]
# Elimination
for i in range(k+1,n):
if a[i,k] != 0.0:
lam = a[i,k]/a[k,k]
a[i,k+1:n] = a[i,k+1:n] - lam*a[k,k+1:n]
a[i,k] = lam
return a,seq
def lu_solve(a,b,seq):
n = len(a)
x = b.copy()
for i in range(n):
x[i] = b[seq[i]]
# Solution
for k in range(1,n):
x[k] = x[k] - np.dot(a[k,0:k],x[0:k])
x[n-1] = x[n-1]/a[n-1,n-1]
for k in range(n-2,-1,-1):
x[k] = (x[k] - np.dot(a[k,k+1:n],x[k+1:n]))/a[k,k]
return x
a2 = np.random.randn(500,500)*100
b2 = np.random.randn(500,20)*100
a_decomposed, seq = lu_decomp(a2)
x2 = np.zeros_like(b2)
for col in range(b2.shape[1]):
x2[:,col] = lu_solve(a_decomposed, b2[:, col], seq)
if np.allclose(np.dot(a2,x2), b2) == True:
print("x2 is the correct solution")
Both methods gives the the output,
Gauss Elimination
x is the correct solution
LU method
x2 is the correct solution
I recommend you use scipy linalg package, from scipy.linalg import solve, lu_factor, lu_solve.
They perform way faster for large matrix size. you can use the same code above but annotate them with numba jit so for large matrix the performance is way better.
from numba import jit
#jit
def gauss_pivot(a, b):
...
...
acknowledgement: codes inspired from the book numerical methods in science and engineering with Python by Prof. Jaan Kiusalaas
https://www.amazon.co.uk/Numerical-Methods-Engineering-Python-3/dp/1107033853/ref=sr_1_1?ie=UTF8&qid=1517845946&sr=8-1&keywords=numerical+method+in+science+and+engineering+with+python