How to compute the Mass distribution in Python?

How to compute the Mass distribution in Python? - python

Mass distribution is defined as follows.
f is the probability density function of a continuous variable.
Given a set of data values, which are saved in a list, how to approximate this function? Since the integrals in the numerator and the denominator are identical to the expected value of a distribution, can we use the sample mean based approach as follows?
def get_mass_distribution(values):
x = np.linspace(0, max(values), max(values))
mean = sum(values)/len(values)
mass = []
values.sort()
for i in range(len(values)):
mass.append(sum(values[0:i+1])/(mean*(i+1)))
return x, mass

You should use trapezoidal rule to approximate this integral.
def get_mass_distribution(data):
a = np.array(data)
ag = st.gaussian_kde(a)
denom_integral = trapezoidal(ag, 0, max(data), max(data)*10)
Fm = [0]
x = []
k = 0
while(k < max(data)):
x.append(k)
k = k+1
for i in x[1:]:
enum_integral = trapezoidal(ag, 0, i, i*10)
Fm.append(enum_integral/denom_integral)
return x, Fm
def trapezoidal(ag, a, b, n):
h = float(b - a) / n
s = 0.0
s += a*ag(a)[0]/2.0
for i in range(1, n):
s += (a + i*h)*ag(a + i*h)[0]
s += b*ag(b)[0]/2.0
return s * h

Related

Taylor Series for sine using functions

I've created a fatorial function that would help me calculate the taylor expansion of sine. There are no evident mistakes in my code, but the returned value is wrong. That's my code:
PI = 3.14159265358979323846
def fatorial(n):
fatorial = 1
for i in range(1,n+1,1):
fatorial = fatorial * i
return fatorial
def seno(theta):
n = 1
k = 3
eps = 10**-10
theta = (theta*PI)/180
x = ((-1)**n)*((theta)**k)
y = fatorial(k)
while x/y > eps or x/y < -eps:
theta = theta + (x/y)
n = n + 1
k = k + 2
x = ((-1)**n) * ((theta)**k)
y = fatorial(k)
return theta

You are summing theta in the while-loop and therefore using an adjusted angle for the next elements of the Taylor series. Just add a thetas (for the sum) like so (I have taken the liberty to add some performance improvements, no need to recalculate the full factorial, also calculated first elements explicitly and changed the limit check to avoid recalculations of x/y):
import math
PI = 3.14159265358979323846
def fatorial(n):
fatorial = 1
for i in range(1,n+1,1):
fatorial = fatorial * i
return fatorial
def seno(theta):
n = 1
k = 3
#eps = 10**-10
theta = (theta*PI)/180
thetas = theta # <- added this
x = -theta*theta*theta
y = 6
#while x/y > eps or x/y < -eps:
while k < 14:
thetas = thetas + (x/y) # sum thetas
n = n + 1
k = k + 2
#x = ((-1)**n) * ((theta)**k)
x = ((-1)**(n%2)) * ((theta)**k) # but use the original value for the series
#y = fatorial(k)
y *= k * (k-1)
return thetas # return the sum
if __name__ == '__main__':
print(seno(80), math.sin(8*PI/18))
this results in
0.984807753125684 0.984807753012208

How to implement GMM Clustering EM algorighm(Expectation Maximisation algorithm) which work for N Dimension feature vector in python

I am trying to implement GMM Clustering for both 24 Dimension feature vector and 32 dimension feature vector, where assignment of initial parameters are done by Kmeans algorightm (K mean clustering is providing cluster centers - MU - only).
I am following this link, where it's implemented only for 2D feature vector and predefined Mu and sigma.
If anyone have the code for GMM clustering kindly post.
Predefined Lib for GMM is also there in sklearn, but it's not giving me likelyhood for each iteration. sklearn GMM

def kmeans(dataSet, k, c):
# 1. Randomly choose clusters
rng = np.random.RandomState(c)
p = rng.permutation(dataSet.shape[0])[:k]
centers = dataSet[p]
while True:
labels = pairwise_distances_argmin(dataSet, centers)
new_centers = np.array([dataSet[labels == i].mean(0) for i in range(k)]
if np.all(centers == new_centers):
break
centers = new_centers
cluster_data = [dataSet[labels == i] for i in range(k)]
l = []
covs = []
for i in range(k):
l.append(len(cluster_data[i]) * 1.0 / len(dataSet))
covs.append(np.cov(np.array(cluster_data[i]).T))
return centers, l, covs, cluster_data
return new_mu, new_covs, cluster_data
class gaussian_Mix_Model:
def __init__(self, k = 8, eps = 0.0000001):
self.k = k ## number of clusters
self.eps = eps ## threshold to stop `epsilon`
def calculate_Exp_Maxim(self, X, max_iters = 1000):
# n = number of data-points, d = dimension of data points
n, d = X.shape
mu, Cov = [], []
for i in range(1,k):
new_mu, new_covs, cluster_data = kmeans(dataSet, k, c)
# Initialize new
mu[k] = new_mu
Cov[k]= new_cov
# initialize the weights
w = [1./self.k] * self.k
R = np.zeros((n, self.k))
### LLhoods
LLhoods = []
P = lambda mu, s: np.linalg.det(s) ** -.5 ** (2 * np.pi) ** (-X.shape[1]/2.) \
* np.exp(-.5 * np.einsum('ij, ij -> i',\
X - mu, np.dot(np.linalg.inv(s) , (X - mu).T).T ) )
# Iterate till max_iters iterations
while len(LLhoods) < max_iters:
# Expectation Calcultion
## membership for each of K Clusters
for k in range(self.k):
R[:, k] = w[k] * P(mu[k], Cov[k])
# Finding the log likelihood
LLhood = np.sum(np.log(np.sum(R, axis = 1)))
# Now store the log likelihood to the list.
LLhoods.append(LLhood)
# Number of data points to each clusters
R = (R.T / np.sum(R, axis = 1)).T
N_ks = np.sum(R, axis = 0)
# Maximization and calculating the new parameters.
for k in range(self.k):
# Calculate the new means
mu[k] = 1. / N_ks[k] * np.sum(R[:, k] * X.T, axis = 1).T
x_mu = np.matrix(X - mu[k])
# Calculate new cov
Cov[k] = np.array(1 / N_ks[k] * np.dot(np.multiply(x_mu.T, R[:, k]), x_mu))
# Calculate new PiK
w[k] = 1. / n * N_ks[k]
# check for convergence
if (np.abs(LLhood - LLhoods[-2]) < self.eps) and (iteration < max_iters): break
else:
Continue
from collections import namedtuple
self.params = namedtuple('params', ['mu', 'Cov', 'w', 'LLhoods', 'num_iters'])
self.params.mu = mu
self.params.Cov = Cov
self.params.w = w
self.params.LLhoods = LLhoods
self.params.num_iters = len(LLhoods)
return self.params
# Call the GMM to find the model
gmm = gaussian_Mix_Model(3, 0.000001)
params = gmm.fit_EM(X, max_iters= 150)
# Plotting of Log-Likelihood VS Iterations.
plt.plot(LLhoods[0])
plt.savefig('Dataset_2A_GMM_Class_1_K_16.png')
plt.clf()
plt.plot(LLhoods[1])
plt.savefig('Dataset_2A_GMM_Class_2_K_16.png')
plt.clf()
plt.plot(LLhoods[2])
plt.savefig('Dataset_2A_GMM_Class_3_K_16.png')
plt.clf()

Nested integral in Python

I have an M-dimensional integral where the outer limits over x_M are [0, y], the next limits over x_{M-1} are [0, max(x_M, y-x_M)], .... and the inner integral is over x_1 with limits [0, max(x_2, y-x_M-...-x_2)].
The function/integrand is
(K!/(K-M)!)*(1/(x_1+1)^2)*....*(1/(x_{M-1}+1)^2)*(1/(x_M+1)^{K-M+2})
where K and M are integers such that K >= M >= 1, and K!=K*(K-1)*...*2*1 is K factorial.
How can I do this in Python using scipy.integrate.nquad? I had a similar problem here, but don't know how to extend the code there to my case here.
The LaTeX version of the integral:
My attempt (but the code isn't working. It doesn't give a result between 0 and 1)
K=4
M=2
du = 0.01
#For m=M
def F(u):
return 1/(1+u)**(K-M+2)
#For m=1,2,...,M-1
def f(u):
return 1/((1+u))**2
#Recursive function to evaluate the integral
def G(h, m, prev_lim):
#print(f'h is {h}, and k is {k}')
if m == M:
res = F(h)
else:
res = 0
u = prev_lim
while u < h:
res += G(h-u, m+1, u)*f(u)*du
u += du
return (math.factorial(K)/math.factorial(K-M))*res
print(G(2, 1, 0))

Python code for Lagrange interpolation - determining the equation of the polynomial

The following code takes in a single value, x, and a list of points, X, and determines the value of the Lagrange polynomial through the list of points at the given x value.
def chunkIt(seq, num):
avg = len(seq) / float(num)
out = []
last = 0.0
while last < len(seq):
out.append(seq[int(last):int(last + avg)])
last += avg
return out
def product(list):
p = 1
for i in list:
p *= i
return p
def Lagrange(x,X):
T = np.zeros((2,len(X)))
list = []
for i in range(len(X)):
for j in range(len(X)):
if i != j:
list.append((x-X[j][0])/(X[i][0]-X[j][0]))
p = []
for i in chunkIt(list,len(X)):
p.append(product(i))
for i in range(len(X)):
T[0][i] = p[i]
T[1][i] = X[i][1]
list2 = []
for i in range(len(X)):
list2.append(T[0][i]*T[1][i])
return sum(list2)
For example:
x, X = 3, [[0,0],[1,1],[2,0.5]]
gives a value of -1.5.
How do I modify this code to determine the equation of the polynomial through the list of points? i.e. if I put x = 'x' as the input, I want it to return -0.75x**2 + 1.75x [for the given example]

import numpy as np
from pypoly import Polynomial
x, X = 3, [[0, 0], [1, 1], [2, 0.5]]
order = len(X)
This is the order of the resulting Lagrange polynomial. For your example, order is 3.
equations = np.array([[point[0] ** i for i in range(order)] for point in X])
values = np.array([point[1] for point in X])
coefficients = np.linalg.solve(equations, values)
This sets up simultaneous equations by substituting the points into a general polynomial. For order 3, the general polynomial is:
a * x ** 2 + b * x ** 1 + c * x ** 0 = y
It solves the system of simultaneous equations to find coefficients. For order 3, we get the values of a, b, c.
print 'coefficients', list(coefficients)
coefficients [0.0, 1.75, -0.75]
p = Polynomial(*coefficients)
Here, the * operator splits the elements of the array-like into individual values to be passed as arguments to Polynomial().
print p
1.75 * X - 0.75 * X**2
print p(x)
-1.5
To install PyPolynomial with pip, use:
for Python 2:
pip install PyPolynomial
for Python 3:
pip3 install PyPolynomial

Solving a 9x9 matric with gausian emlinination with pivoting in python

Im needing to solve a whole range of 8x8 and 9x9 matrices so thought I could build a python program to make the whole thing easier.
So far I have managed to create:
from __future__ import division
import numpy as np
def solveEqns(A,v):
def lu( A ):
#Factor A into LU by Gaussian elimination with scaled partial pivoting
n, m = np.shape( A )
if n != m:
print "Error: input matrix is not square"
return None
# Generate initial index vector
p = range( n )
# Determine the largest (in magnitude) element in each row. These
# factors are used to scale the pivot elements for comparison purposes
# when deciding which row to use as a pivot row.
s = [0] * n
for i in xrange( n ):
smax = 0.0
for j in xrange( n ):
smax = max( smax, abs( A[i][j] ) )
s[i] = smax
# Begin Gaussian elimination.
for k in xrange( n - 1 ):
# Find the remaining row with the largest scaled pivot.
rmax = 0.0
for i in xrange( k, n ):
r = abs( A[p[i][k]] / s[p[i]] )
if r > rmax:
rmax = r
j = i
# Row j has the largest scaled pivot, so "swap" that row with the
# current row (row k). The swap is not actually done by copying rows,
# but by swaping two entries in an index vector.
p[j], p[k] = ( p[k], p[j] )
# Now carry out the next elimination step as usual, except for the
# added complication of the index vector.
for i in xrange( k + 1, n ):
xmult = A[p[i],k] / A[p[k],k]
A[p[i],k] = xmult
for j in xrange( k + 1, n ):
A[p[i],j] = A[p[i],j] - xmult * A[p[k],j]
# All done, return factored matrix A and permutation vector p
return ( A, p )
def solve( A, p, b ):
#Solves Ax = b given an LU factored matrix A and permuation vector p
n, m = np.shape( A )
if n != m:
print "Error: input matrix is not square"
return None
# Forward solve
x = np.zeros( n )
for k in xrange( n - 1 ):
for i in xrange( k + 1, n ):
b[p[i]] = b[p[i]] - A[p[i],k] * b[p[k]]
# Backward solve
for i in xrange( n - 1, -1, -1 ):
sum = b[p[i]]
for j in xrange( i + 1, n ):
sum = sum - A[p[i],j] * x[j]
x[i] = sum / A[p[i],i]
# All done, return solution vector
return x
lu(A)
return solve(A,p,v)
def circuit():
A = np.array([[1,0,0,0,0,8,0,0,0],[0,1,0,0,5,0,0,0,0],[0,1,0,0,5,0,0,0,0],[0,0,0,1,-1,1,0,0,0],[0,0,1,0,0,0,1,-1,0],[0,0,1,0,0,0,1,0,-1],[0,1,0,0,-1,0,0,0,1],[1,0,0,0,0,-1,0,1,0],[1,-1,0,1,0,0,0,0,0]])
v = np.array([9,-12,-0.5,0,0,0,0,0,0])
I = solveEqns(A,v)
return I
to solve the 9x9 matrix A at the end. This is one of the easier ones i need to solve so can solve it outside of python to check if the results coming through are accurate.
Im getting a traceback error on line 26 of:
Traceback (most recent call last):
File "<ipython-input-110-6daf773db1e3>", line 1, in <module>
solveEqns(A,b)
File "C:/Users/SamMc/Documents/Python Scripts/q6u1510416 v4.py", line 65, in solveEqns
lu(A)
File "C:/Users/SamMc/Documents/Python Scripts/q6u1510416 v4.py", line 26, in lu
r = abs( A[p[i][k]] / s[p[i]] )
TypeError: 'int' object has no attribute '__getitem__'
which i cant figure out why its not pulling through a number from the matrix.
Any help would be greatly appreciated.
Thanks
Sam

you might use gauss elimination via scaled pivoting. the code is shown below.
import numpy as np
def gauss_pivot(a,b,tol=1.0e-12):
"""
x = gaussPivot(a,b,tol=1.0e-12).
Solves [a]{x} = {b} by Gauss elimination with
scaled row pivoting
"""
a = np.copy(a)
b = np.copy(b)
n = len(b)
assert (np.all(np.shape(a) ==(n,n))) # check if a is a square matrix
# Set up scale factors
s = np.zeros(n)
for i in range(n):
s[i] = max(np.abs(a[i,:])) # find the max of each row
for k in range(0, n-1): #pivot row
# Row interchange, if needed
p = np.argmax(np.abs(a[k:n,k])/s[k:n]) # find which row has max item for each col k, and scale by s
if abs(a[p,k]) < tol:
raise Exception("Matrix is singular")
if p != k: # swap rows if current row does not contain max item with the one contains max item within same col
a[[k,p+k],:] = a[[p+k, k],:]
b[k],b[p+k] = b[p+k],b[k]
s[k],s[p+k] = s[p+k],s[k]
# Elimination phase of matrix a
for i in range(k+1,n):
if a[i,k] != 0.0: # skip if a(i,k) is already zero
lam = a [i,k]/a[k,k]
a[i,k:n] = a[i,k:n] - lam*a[k,k:n]
b[i] = b[i] - lam*b[k]
if abs(a[n-1,n-1]) < tol:
raise Exception("Matrix is singular")
# Back substitution phase, solution is substituted by b
x = np.zeros_like(b)
x[n-1] = b[n-1]/a[n-1,n-1]
for k in range(n-2,-1,-1):
x[k] = (b[k] - np.dot(a[k,k+1:n],x[k+1:n]))/a[k,k]
return x
a = np.random.randn(100,100)*10
b = np.random.randn(100)*10
x = gauss_pivot(a,b)
if np.allclose(np.dot(a,x), b) == True:
print("x is the correct solution")
If you want the code to perform faster you might probably replace x by b, so upon function return b contains the solution.
you might also slightly modify elimination phase so elements of matrix a below diagonal are not zeroed, since there are irrelevant during back substitution phase. Therefore, the code becomes as shown below:
import numpy as np
def gauss_pivot(a,b,tol=1.0e-12):
"""
x = gaussPivot(a,b,tol=1.0e-12).
Solves [a]{x} = {b} by Gauss elimination with
scaled row pivoting
"""
a = np.copy(a)
b = np.copy(b)
n = len(b)
assert (np.all(np.shape(a) ==(n,n))) # check if a is a square matrix
# Set up scale factors
s = np.zeros(n)
for i in range(n):
s[i] = max(np.abs(a[i,:])) # find the max of each row
for k in range(0, n-1): #pivot row
# Row interchange, if needed
p = np.argmax(np.abs(a[k:n,k])/s[k:n]) # find which row has max item for each col k, and scale by s
if abs(a[p,k]) < tol:
raise Exception("Matrix is singular")
if p != k: # swap rows if current row does not contain max item with the one contains max item within same col
a[[k,p+k],:] = a[[p+k, k],:]
b[k],b[p+k] = b[p+k],b[k]
s[k],s[p+k] = s[p+k],s[k]
# Elimination phase of matrix a
for i in range(k+1,n):
if a[i,k] != 0.0: # skip if a(i,k) is already zero
lam = a [i,k]/a[k,k]
a[i,k+1:n] = a[i,k+1:n] - lam*a[k,k+1:n]
b[i] = b[i] - lam*b[k]
if abs(a[n-1,n-1]) < tol:
raise Exception("Matrix is singular")
# Back substitution phase, solution is substituted by b
b[n-1] = b[n-1]/a[n-1,n-1]
for k in range(n-2,-1,-1):
b[k] = (b[k] - np.dot(a[k,k+1:n],b[k+1:n]))/a[k,k]
return b
To use LU decomposition instead which is more ideal for b containing more than one column, the LU code is shown below
import numpy as np
def lu_decomp(a,tol=1.0e-9):
a = np.copy(a)
n = len(a)
assert (np.all(np.shape(a) ==(n,n))) # check if a is a square matrix
seq = np.arange(n, dtype=int)
s = np.zeros((n))
for i in range(n):
s[i] = max(abs(a[i,:]))
for k in range(0,n-1):
p = np.argmax(np.abs(a[k:n,k])/s[k:n])
if abs(a[p,k]) < tol:
raise Exception("Matrix is singular")
if p != k:
a[[k,p+k],:] = a[[p+k, k],:]
s[k],s[p+k] = s[p+k],s[k]
seq[k], seq[p+k] = seq[p+k],seq[k]
# Elimination
for i in range(k+1,n):
if a[i,k] != 0.0:
lam = a[i,k]/a[k,k]
a[i,k+1:n] = a[i,k+1:n] - lam*a[k,k+1:n]
a[i,k] = lam
return a,seq
def lu_solve(a,b,seq):
n = len(a)
x = b.copy()
for i in range(n):
x[i] = b[seq[i]]
# Solution
for k in range(1,n):
x[k] = x[k] - np.dot(a[k,0:k],x[0:k])
x[n-1] = x[n-1]/a[n-1,n-1]
for k in range(n-2,-1,-1):
x[k] = (x[k] - np.dot(a[k,k+1:n],x[k+1:n]))/a[k,k]
return x
a2 = np.random.randn(500,500)*100
b2 = np.random.randn(500,20)*100
a_decomposed, seq = lu_decomp(a2)
x2 = np.zeros_like(b2)
for col in range(b2.shape[1]):
x2[:,col] = lu_solve(a_decomposed, b2[:, col], seq)
if np.allclose(np.dot(a2,x2), b2) == True:
print("x2 is the correct solution")
Both methods gives the the output,
Gauss Elimination
x is the correct solution
LU method
x2 is the correct solution
I recommend you use scipy linalg package, from scipy.linalg import solve, lu_factor, lu_solve.
They perform way faster for large matrix size. you can use the same code above but annotate them with numba jit so for large matrix the performance is way better.
from numba import jit
#jit
def gauss_pivot(a, b):
...
...
acknowledgement: codes inspired from the book numerical methods in science and engineering with Python by Prof. Jaan Kiusalaas
https://www.amazon.co.uk/Numerical-Methods-Engineering-Python-3/dp/1107033853/ref=sr_1_1?ie=UTF8&qid=1517845946&sr=8-1&keywords=numerical+method+in+science+and+engineering+with+python

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to compute the Mass distribution in Python? - python

Related

Taylor Series for sine using functions

How to implement GMM Clustering EM algorighm(Expectation Maximisation algorithm) which work for N Dimension feature vector in python

Nested integral in Python

Python code for Lagrange interpolation - determining the equation of the polynomial

Solving a 9x9 matric with gausian emlinination with pivoting in python

Categories

Resources