I was trying to code up an EM algorithm in python for clustering images of different types. My understanding of the EM algorithm is as follows:
Accordingly, I coded the same in python. Here's my code:
import numpy as np
import sys
from scipy import stats
# μ: mean vector ∈ R^(self.m x self.n)
# Σ: covariance matrix ∈ R^(self.m x self.n x self.n)
# π: probabilities of each component
class gmm:
def __init__(self,n_components):
self.m = n_components
self.π = np.random.random((self.m,))
self.x = None
self.Σ = None
self.μ = None
self.r = None
self.n = None # length of data
#classmethod
def N(cls, x, μ, Σ):
#print(Σ)
#print('\n')
return stats.multivariate_normal(μ, Σ).pdf(x)
def E_step(self):
for i,x_i in enumerate(self.x):
den = 0
for c in range(self.m):
#print(self.Σ[c].shape)
#print(self.μ[c].shape)
#sys.exit()
den+= self.π[c]*gmm.N(x_i,self.μ[c],self.Σ[c])
#print(f'{den=}')
for c in range(self.m):
num = self.π[c]*gmm.N(x_i,self.μ[c],self.Σ[c])
self.r[i,c] = num/den
print(f'{self.r}\n')
def M_step(self):
m_c = np.sum(self.r, axis = 0)
self.π = m_c/self.m
for c in range(self.m):
s1 = 0
s2 = 0
for i in range(self.n):
s1+= self.r[i,c]*self.x[i]
s2+= self.r[i,c]*(self.x[i]-self.μ[c]).dot(self.x[i]-self.μ[c])
self.μ[c] = s1/m_c[c]
self.Σ[c] = s2/m_c[c]
def fit(self,x, iterations = 10):
self.x = x
self.n = x.shape[0]
self.r = np.empty((self.n, self.m))
self.μ = np.random.random((self.m, self.n))
Sigma = [np.random.random((self.n, self.n)) for i in range(self.m)]
Sigma = [0.5*(s + s.T)+5*np.eye(s.shape[0]) for s in Sigma] # A symmetric diagonally dominant matrix is PD
#print([np.linalg.det(s) for s in Sigma])
self.Σ = np.asarray(Sigma)
for i in range(iterations):
self.E_step()
self.M_step()
def params():
return self.π, self.μ, self.Σ
if __name__ == '__main__':
data_dim = 5 # No. of data dimensions
data = [[]]*6
data[0] = np.random.normal(0.5,2, size = (300,))
data[1] = np.random.normal(3,2, size = (300,))
data[2] = np.random.normal(-1, 0.1, size = (300,))
data[3] = np.random.normal(2,3.14159, size = (300,))
data[4] = np.random.normal(0,1, size = (300,))
data[5] = np.random.normal(3.5,5, size = (300,))
p = [0.1, 0.15, 0.22, 0.3, 0.2, 0.03]
vals = [0,1,2,3,4,5]
combined = []
for i in range(data_dim):
choice = np.random.choice(vals, p = p)
combined.append(np.random.choice(data[choice]))
combined = np.array(combined)
G = gmm(n_components = 6)
G.fit(combined)
pi, mu, sigma = G.params()
print(pi)
print(mu)
print(sigma)
Now comes the problem. While running the code, the covariance matrix Σ becomes singular after some iterations. Specifically, all the entries of Sigma become the same all of a sudden in a particular iteration.
I have tried adding some random noise to Σ while this happens, but this seems to only delay the inevitable.
Any help/comments will be greatly appreciated. Thanks in Advance :)
To prevent the covariance matrix from becoming singular, you could add an arbitrary value along the diagonal of the matrix, i.e.
val * np.identity(size)
as this ensures that the covariance matrix will remain positive definite, and have an inverse. For instance, sklearn uses the default value 1e-6 for their regularization.
Related
I have been using DeepXDE (which is a framework for solving differential equations). I am particularly interested in implementing interface conditions, for example, to represent perfect thermal contact and heat flux continuity at a interface between to different solids.
Problem:
So far, I've considered a simple heat transfer problem, as if it were a rod composed of two different materials, with Dirichlet conditions at x=0 and x=L:
from x=0 to x=L/2, we have conductivity coefficient a_1 and temperature T_1(x,t);
from x=L/2 to x=L, we have coefficient a_2 and temperature T_2(x,t);
at the interface, we have to meet both T_1 - T_2 = 0 and a_1dT_1/dx + a_2dT_2/dx = 0 for x=L/2 and t>0.
Although I did not find a concise solution, I tried to implement this problem. But, I have some questions:
I found a way to enforce the heat flux continuity using geom.boundary_normal( ). But, the respective loss is not decreasing (in fact, it is constant). Is it correct to use geom.boundary_normal( )? Is there an alternative way?
I am struggling to come up with away to enforce T_1 - T_2 = 0. How could I get the values of T_1 and T_2 at x=L/2 during the model train?
My code is as follows:
# Libraries to import
import deepxde as dde
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
# pamaters and geometries
a1 = 1.14 # conductivity coefficient 1
a2 = 0.01 # conductivity coefficient 2 # fairly different from a1
L = 1.0 # total length
T0 = 0.0 # temperature specified at x=0
TL = 1.0 # temperature specified at x=L
tend = 1.0 # final time for the simulation
geom1 = dde.geometry.Interval(0, L/2) # first solid
geom2 = dde.geometry.Interval(L/2, L) # second solid
timedomain = dde.geometry.TimeDomain(0, tend)
geomtime = dde.geometry.GeometryXTime(geom1|geom2, timedomain)
# Models and the respective domains
def pde_T1(x, y, _):
dy_t = dde.grad.jacobian(y, x, i=0, j=1)
dy_xx = dde.grad.hessian(y, x, i=0, j=0)
return dy_t - a1 * dy_xx
def pde_T2(x, y, _):
dy_t = dde.grad.jacobian(y, x, i=0, j=1)
dy_xx = dde.grad.hessian(y, x, i=0, j=0)
return dy_t - a2 * dy_xx
def on_domain1(x, on_domain):
return geom1.inside(x)[0]
def on_domain2(x, on_domain):
return geom2.inside(x)[0]
# Boundary and initial conditions
def on_boundary1(x, on_boundary):
return on_boundary and np.isclose(x[0], 0)
def on_boundary2(x, on_boundary):
return on_boundary and np.isclose(x[0], L)
def boundary_initial(x, on_initial):
return on_initial and np.isclose(x[1], 0)
# interface conditions
def on_interf(x, on_boundary):
return on_boundary and np.isclose(x[0], L/2)
def flux_int(x,y,X):
# I need help here.
return (a1*geom1.boundary_normal(X) + a2*geom2.boundary_normal(X)).reshape(-1,1)
def Temp_int(x,y,X):
# I need help here.
# T1_int: how to get from geom1 at x=L/2?
# T2_int = how to get from geom2 at x=L/2?
pass
# Setting the IC
def init_func(X):
x = X[:, 0:1]
y = X[:, 1:2]
t = np.zeros((len(X),1))
for count, x_ in enumerate(x):
if x_ < L/2:
t[count] = T0
else:
t[count] = T0 + 2*(Ts-T0) * (x_ - L/2)
return t
ic = dde.IC(geomtime, init_func, boundary_initial)
# Seting the BCs
pde1 = dde.OperatorBC(geomtime1, pde_T1, on_boundary = on_domain1)
pde2 = dde.OperatorBC(geomtime2, pde_T2, on_boundary = on_domain2)
bc1 = dde.icbc.DirichletBC(geomtime1, lambda x: T0*np.ones((len(x),1)), on_boundary1)
bc2 = dde.icbc.DirichletBC(geomtime2, lambda x: TL*np.ones((len(x),1)), on_boundary2) # not used in loss
# Setting the BC at the interface with 500 points
X = np.hstack( (np.full((500), L/2).reshape(-1,1), timedomain.random_points(500))).reshape(-1, 2)
FluxInterf = dde.icbc.PointSetOperatorBC(X,
np.zeros((X.shape[0],1)), # fluxes must add up to zero at x=L/2.
lambda x, y, X : flux_int(x, y, X[:,0]))
# Setting the problem
loss = [pde1, pde2, bc1, ic, FluxInterf]
data = dde.data.TimePDE(
geomtime,
None,
loss,
num_domain=1000,
num_boundary=500,
num_initial=500,
num_test=500)
loss_weights = [10, 10, 0.1, 0.1, 100]
net = dde.nn.FNN([2] + 4 * [50] + [1], "tanh", "Glorot normal")
# Enforcing BC at x=L
def output_transform(x, y):
xx, t = x[:,0:1], x[:,1:2]
return (L-xx)*y + Ts
net.apply_output_transform(output_transform)
model = dde.Model(data, net)
model.compile("adam", lr=1.0e-3, loss_weights = loss_weights)
losshistory, train_state = model.train(iterations=25000)
model.compile("L-BFGS")
losshistory, train_state = model.train()
dde.saveplot(losshistory, train_state, issave=True, isplot=True)
Thank you for your time and consideration.
Best regards.
I've been trying to implement an incremental weighted mean and covariance calculator. So given a data matrix X of size (N,D) and weights of size (N,), I need to be able to get its weighted statistics (mean vector of size (D,)) and covariance matrix of size (D,D)) by running update in the class below
I've gotten the weighted mean part but I'm unsure how to proceed with calculating the covariance matrix in this manner.
import numpy as np
class OnlineWeightStats:
def __init__(self, X):
N, D = X.shape
self.data_mat = X
self.mean = np.zeros(D)
self.wsum = 1e-7
def update(self, weights=None):
if weights is None:
weights = np.ones(self.data_mat.shape[0])
for d, w in zip(self.data_mat,weights):
self.wsum += w
upcon = (w / self.wsum)
delta = d - self.mean
self.mean += delta * upcon
if __name__ == "__main__":
N = 10000
D = 2
X = np.random.rand(N,D)
# Define OnlineStats
ows = OnlineWeightStats(X)
# Update 1
ows.update() # No weights, so just calculates standard mean
# Change weights and update again
new_weights = np.random.uniform(0,1,N)
ows.update(new_weights)
# Change weights and update
......
Got the solution. I'm thinking this is the best that Python (with numpy) can do unless we count writing it as a C/C++ extension.
import numpy as np
class OnlineWeightStats:
def __init__(self, X):
N, D = X.shape
self.data_mat = X
self.mean = np.zeros(D)
self.wsum = 1e-7
self.xpsum = np.zeros((D,D)) # Sum of cross-products
def update(self, weights=None):
if weights is None:
weights = np.ones(self.data_mat.shape[0])
for d, w in zip(self.data_mat,weights):
self.wsum += w
upcon = (w / self.wsum)
delta = d - self.mean
# Weighted mean computation
self.mean += delta * upcon
# Weighted covariance computation
self.xpsum += np.outer(delta, delta) * w * (1-upcon)
self.cov = self.xpsum/self.wsum
I am trying to evaluate the integral using the Monte Carlo method where the integrand underwent a transformation from cylindrical to cartesian coordinates. The integrand itself is quite simple and could be calculated using scipy.integrate.quad, but I need it to be in cartesian coordinates for a specific purpose later on.
So here is the integrand: rho*k_r**2*(k_r**2*kn(0,k_r*rho)**2+k_n**2*kn(1,k_r*rho)**2) d rho
Here the kn(i,rho) is modified Bessel function of 2nd kind.
Solving it with quad gives the following result:
from scipy.special import kn
from scipy.integrate import quad
import random
k_r = 6.2e-2
k_n = k_r/1.05
C_factor = 2*np.pi*1e5
lmax,lmin = 80,50
def integration_polar():
def K_int(rho):
return rho*k_r**2*(k_r**2*kn(0,k_r*rho)**2+k_n**2*kn(1,k_r*rho)**2)
rho = np.linspace(lmin,lmax,200)
I,_ = quad(K_int,lmin,lmax)
Gamma = I*C_factor
print("expected=",Gamma)
Output: Expected = 7.641648442007296
Now the same integral using Monte Carlo method (hit-or-miss method looked up from here) gives almost the same result:
def integration_polar_MC():
random.seed(1)
n = 100000
def K_int(rho):
return rho*k_r**2*(k_r**2*kn(0,k_r*rho)**2+k_n**2*kn(1,k_r*rho)**2)
def sampler():
x = random.uniform(lmin,lmax)
y = random.uniform(0,c_lim)
return x,y
c_lim = 2*K_int(50) #Upper limit of integrand
sum_I = 0
for i in range(n):
x,y = sampler()
func_Int = K_int(x)
if y>func_Int:
I = 0
elif y<=func_Int:
I = 1
sum_I += I
Gamma = C_factor*(lmax-lmin)*c_lim*sum_I/n
print("MC_integral_polar:",Gamma)
Output: MC_integral_polar = 7.637391399699502
Since Monte Carlo worked with this example, I thought the cartesian case would go smoothly as well but I couldn't get the right answer.
For the cartesian case, similarly as in previous case I've employed the hit-or-miss method, with rho = np.sqrt(x**2+y**2) and integrand becoming k_r**2*(k_r**2*kn(0,k_r*rho)**2+k_n**2*kn(1,k_r*rho)**2) dx dy where domain over x and y:
-80 <= x <= 80
-80 <= y <= 80
50 <= np.sqrt(x**2+y**2) <= 80
Here is my attempt:
def integration_cartesian_MCtry():
random.seed(1)
lmin,lmax = -100,100
n = 100000
def K_int(x,y):
rho = np.sqrt(x**2+y**2)
if rho>=50 and rho<=80:
return k_r**2*(k_r**2*kn(0,k_r*rho)**2+k_n**2*kn(1,k_r*rho)**2)
else:
return 0
def sampler():
x = random.uniform(lmin,lmax)
y = random.uniform(lmin,lmax)
z = random.uniform(0,c_lim)
return x,y,z
c_lim = K_int(50,0)
sum_I = 0
for i in range(n):
x,y,z = sampler()
func_Int = K_int(x,y)
if z>func_Int:
I = 0
elif z<=func_Int:
I = 1
sum_I += I
Gamma = C_factor*(lmax-lmin)**2*c_lim*sum_I/n
print("MC_integral_cartesian:",Gamma)
Output: MC_integral_cartesian = 48.83166430996952
As you can see Monte Carlo in cartesian overestimates the integral. I am not sure why it is happening but think that it may be related to the incorrect limits or domain over which I should integrate the function.
Any help appreciated as I am stuck without any progress for a few days.
Problem, as I said, is with jacobian. In case of polar, you have integration over
f(ρ)*ρ*dρ*dφ
You integrate over dφ analytically (your f(ρ) doesn't depend on φ), and get 2π
In case of cartesian there are no analytical integration, so it is over dx*dy, no factor
of 2π. Code to illustrate it, Python 3.9.1, Windows 10 x64, and it produced pretty much the same answer
import numpy as np
from scipy.special import kn
k_r = 6.2e-2
k_n = k_r/1.05
C_factor = 2*np.pi*1e5
lmin = 50
lmax = 80
def integration_polar_MC(rng, n):
def K_int(rho):
if rho>=50 and rho<=80:
return rho*k_r**2*(k_r**2*kn(0, k_r*rho)**2 + k_n**2*kn(1, k_r*rho)**2)
return 0.0
def sampler():
x = rng.uniform(lmin, lmax)
y = rng.uniform(0.0, c_lim)
return x,y
c_lim = 2*K_int(50) # Upper limit of integrand
sum_I = 0
for i in range(n):
x,y = sampler()
func_Int = K_int(x)
I = 1
if y>func_Int:
I = 0
sum_I += I
Gamma = C_factor*(lmax-lmin)*c_lim*sum_I/n
return Gamma
def integration_cartesian_MC(rng, n):
def K_int(x,y):
rho = np.hypot(x, y)
if rho>=50 and rho<=80:
return k_r**2*(k_r**2*kn(0,k_r*rho)**2+k_n**2*kn(1,k_r*rho)**2)
return 0.0
def sampler():
x = rng.uniform(lmin,lmax)
y = rng.uniform(lmin,lmax)
z = rng.uniform(0,c_lim)
return x,y,z
lmin,lmax = -100,100
c_lim = K_int(50, 0)
sum_I = 0
for i in range(n):
x,y,z = sampler()
func_Int = K_int(x,y)
I = 1
if z>func_Int:
I = 0
sum_I += I
Gamma = C_factor*(lmax-lmin)**2*c_lim*sum_I/n
return Gamma/(2.0*np.pi) # to compensate for 2π in the constant
rng = np.random.default_rng()
q = integration_polar_MC(rng, 100000)
print("MC_integral_polar:", q)
q = integration_cartesian_MC(rng, 100000)
print("MC_integral_cart:", q)
I am trying to make my own CFD solver and one of the most computationally expensive parts is solving for the pressure term. One way to solve Poisson differential equations faster is by using a multigrid method. The basic recursive algorithm for this is:
function phi = V_Cycle(phi,f,h)
% Recursive V-Cycle Multigrid for solving the Poisson equation (\nabla^2 phi = f) on a uniform grid of spacing h
% Pre-Smoothing
phi = smoothing(phi,f,h);
% Compute Residual Errors
r = residual(phi,f,h);
% Restriction
rhs = restriction(r);
eps = zeros(size(rhs));
% stop recursion at smallest grid size, otherwise continue recursion
if smallest_grid_size_is_achieved
eps = smoothing(eps,rhs,2*h);
else
eps = V_Cycle(eps,rhs,2*h);
end
% Prolongation and Correction
phi = phi + prolongation(eps);
% Post-Smoothing
phi = smoothing(phi,f,h);
end
I've attempted to implement this algorithm myself (also at the end of this question) however it is very slow and doesn't give good results so evidently it is doing something wrong. I've been trying to find why for too long and I think it's just worthwhile seeing if anyone can help me.
If I use a grid size of 2^5 by 2^5 points, then it can solve it and give reasonable results. However, as soon as I go above this it takes exponentially longer to solve and basically get stuck at some level of inaccuracy, no matter how many V-Loops are performed. at 2^7 by 2^7 points, the code takes way too long to be useful.
I think my main issue is that my implementation of a jacobian iteration is using linear algebra to calculate the update at each step. This should, in general, be fast however, the update matrix A is an n*m sized matrix, and calculating the dot product of a 2^7 * 2^7 sized matrix is expensive. As most of the cells are just zeros, should I calculate the result using a different method?
if anyone has any experience in multigrid methods, I would appreciate any advice!
Thanks
my code:
# -*- coding: utf-8 -*-
"""
Created on Tue Dec 29 16:24:16 2020
#author: mclea
"""
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import convolve2d
from mpl_toolkits.mplot3d import Axes3D
from scipy.interpolate import griddata
from matplotlib import cm
def restrict(A):
"""
Creates a new grid of points which is half the size of the original
grid in each dimension.
"""
n = A.shape[0]
m = A.shape[1]
new_n = int((n-2)/2+2)
new_m = int((m-2)/2+2)
new_array = np.zeros((new_n, new_m))
for i in range(1, new_n-1):
for j in range(1, new_m-1):
ii = int((i-1)*2)+1
jj = int((j-1)*2)+1
# print(i, j, ii, jj)
new_array[i,j] = np.average(A[ii:ii+2, jj:jj+2])
new_array = set_BC(new_array)
return new_array
def interpolate_array(A):
"""
Creates a grid of points which is double the size of the original
grid in each dimension. Uses linear interpolation between grid points.
"""
n = A.shape[0]
m = A.shape[1]
new_n = int((n-2)*2 + 2)
new_m = int((m-2)*2 + 2)
new_array = np.zeros((new_n, new_m))
i = (np.indices(A.shape)[0]/(A.shape[0]-1)).flatten()
j = (np.indices(A.shape)[1]/(A.shape[1]-1)).flatten()
A = A.flatten()
new_i = np.linspace(0, 1, new_n)
new_j = np.linspace(0, 1, new_m)
new_ii, new_jj = np.meshgrid(new_i, new_j)
new_array = griddata((i, j), A, (new_jj, new_ii), method="linear")
return new_array
def adjacency_matrix(rows, cols):
"""
Creates the adjacency matrix for an n by m shaped grid
"""
n = rows*cols
M = np.zeros((n,n))
for r in range(rows):
for c in range(cols):
i = r*cols + c
# Two inner diagonals
if c > 0: M[i-1,i] = M[i,i-1] = 1
# Two outer diagonals
if r > 0: M[i-cols,i] = M[i,i-cols] = 1
return M
def create_differences_matrix(rows, cols):
"""
Creates the central differences matrix A for an n by m shaped grid
"""
n = rows*cols
M = np.zeros((n,n))
for r in range(rows):
for c in range(cols):
i = r*cols + c
# Two inner diagonals
if c > 0: M[i-1,i] = M[i,i-1] = -1
# Two outer diagonals
if r > 0: M[i-cols,i] = M[i,i-cols] = -1
np.fill_diagonal(M, 4)
return M
def set_BC(A):
"""
Sets the boundary conditions of the field
"""
A[:, 0] = A[:, 1]
A[:, -1] = A[:, -2]
A[0, :] = A[1, :]
A[-1, :] = A[-2, :]
return A
def create_A(n,m):
"""
Creates all the components required for the jacobian update function
for an n by m shaped grid
"""
LaddU = adjacency_matrix(n,m)
A = create_differences_matrix(n,m)
invD = np.zeros((n*m, n*m))
np.fill_diagonal(invD, 1/4)
return A, LaddU, invD
def calc_RJ(rows, cols):
"""
Calculates the jacobian update matrix Rj for an n by m shaped grid
"""
n = int(rows*cols)
M = np.zeros((n,n))
for r in range(rows):
for c in range(cols):
i = r*cols + c
# Two inner diagonals
if c > 0: M[i-1,i] = M[i,i-1] = 0.25
# Two outer diagonals
if r > 0: M[i-cols,i] = M[i,i-cols] = 0.25
return M
def jacobi_update(v, f, nsteps=1, max_err=1e-3):
"""
Uses a jacobian update matrix to solve nabla(v) = f
"""
f_inner = f[1:-1, 1:-1].flatten()
n = v.shape[0]
m = v.shape[1]
A, LaddU, invD = create_A(n-2, m-2)
Rj = calc_RJ(n-2,m-2)
update=True
step = 0
while update:
v_old = v.copy()
step += 1
vt = v_old[1:-1, 1:-1].flatten()
vt = np.dot(Rj, vt) + np.dot(invD, f_inner)
v[1:-1, 1:-1] = vt.reshape((n-2),(m-2))
err = v - v_old
if step == nsteps or np.abs(err).max()<max_err:
update=False
return v, (step, np.abs(err).max())
def MGV(f, v):
"""
Solves for nabla(v) = f using a multigrid method
"""
# global A, r
n = v.shape[0]
m = v.shape[1]
# If on the smallest grid size, compute the exact solution
if n <= 6 or m <=6:
v, info = jacobi_update(v, f, nsteps=1000)
return v
else:
# smoothing
v, info = jacobi_update(v, f, nsteps=10, max_err=1e-1)
A = create_A(n, m)[0]
# calculate residual
r = np.dot(A, v.flatten()) - f.flatten()
r = r.reshape(n,m)
# downsample resitdual error
r = restrict(r)
zero_array = np.zeros(r.shape)
# interploate the correction computed on a corser grid
d = interpolate_array(MGV(r, zero_array))
# Add prolongated corser grid solution onto the finer grid
v = v - d
v, info = jacobi_update(v, f, nsteps=10, max_err=1e-6)
return v
sigma = 0
# Setting up the grid
k = 6
n = 2**k+2
m = 2**(k)+2
hx = 1/n
hy = 1/m
L = 1
H = 1
x = np.linspace(0, L, n)
y = np.linspace(0, H, m)
XX, YY = np.meshgrid(x, y)
# Setting up the initial conditions
f = np.ones((n,m))
v = np.zeros((n,m))
# How many V cyles to perform
err = 1
n_cycles = 10
loop = True
cycle = 0
# Perform V cycles until converged or reached the maximum
# number of cycles
while loop:
cycle += 1
v_new = MGV(f, v)
if np.abs(v - v_new).max() < err:
loop = False
if cycle == n_cycles:
loop = False
v = v_new
print("Number of cycles " + str(cycle))
plt.contourf(v)
I realize that I'm not answering your question directly, but I do note that you have quite a few loops that will contribute some overhead cost. When optimizing code, I have found the following thread useful - particularly the line profiler thread. This way you can focus in on "high time cost" lines and then start to ask more specific questions regarding opportunities to optimize.
How do I get time of a Python program's execution?
BACKGROUND: I am trying to build a real-time drum simulation model, for which I need really fast matrix-vector products. My matrices are of the size ~5000-10000 rows/cols, out of which only 6 entries per row are non-zero, hence I am inclined to use sparse matrices. I am using scipy.sparse module. The iterations are as below.
Vjk_plus_sparse = Vjk_minus_sparse.transpose()
Vj = Vjk_plus_sparse.dot(constant)
np.put(Vj, Nr, 0.0)
Uj[t] = Uj[t-1] + np.transpose(Vj)/fs
Vj_mat = adj_mat_sparse.multiply(Vj)
Vjk_minus_sparse = Vj_mat-Vjk_plus_sparse.multiply(end_gain)
Here, Vjk_plus_sparse, Vjk_minus_sparse and Vj_mat are sparse CSR matrices, Vj is a numpy array, and Uj is a numpy matrix where each row represents Uj(t). end_gain is an array which is a static numpy array for dampening of vibrations.
THE ISSUE: A single iteration takes about 3 ms for size = 4250. With the most significant
steps being the last 2 lines. They together take about 2.5 ms. I would ideally need it to run in 0.1 ms, which would be more than a 10x speedup. This is the maximum extent of vectorization possible for the problem, and I cannot parallelize as I am marching in time, at least physically it won't be accurate.
ATTEMPTS: I tried fiddling with the sparse data structures, and found best performance with all of them being CSR (Compressed Sparse Row), with the values as quoted above. I also tried to replace the multiply() method with a matrix multiplication, by repeating Vj, but that worsened the time, as the resultant operation would be a sparse*dense operation.
How can I speed this up within python itself? I am open to trying c++ as well, though migrating now would be a major pain. Also, since scipy is essentially based in c, would it even give that much of a speedup?
Added a complete runnable example
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.patches
import math
from mpl_toolkits import mplot3d
import numpy as np
import scipy.sparse as sp
import scipy.fftpack as spf
import matplotlib.animation as animation
import time
sqrt_3 = 1.73205080757
class Pt:
def __init__(self,x_0,y_0):
self.x_0 = x_0
self.y_0 = y_0
self.id = -1
self.neighbours = []
self.distance = (x_0**2 + y_0**2)**0.5
class Circle:
def __init__(self,radius,center):
self.radius = radius
self.center = center
self.nodes = []
def construct_mesh(self, unit):
queue = [self.center]
self.center.distance = 0
curr_id = 0
delta = [(1.,0.), (1./2, (3**0.5)/2),(-1./2, (3**0.5)/2),(-1.,0.), (-1./2,-(3**0.5)/2), (1./2,- (3**0.5)/2)]
node_dict = {}
node_dict[(self.center.x_0,self.center.y_0)] = curr_id
self.nodes.append(self.center)
curr_id+=1
while len(queue)!=0:
curr_pt = queue[0]
queue.pop(0)
# self.nodes.append(curr_pt)
# curr_id+=1
for i in delta:
temp_pt = Pt(curr_pt.x_0 + 2*unit*i[0], curr_pt.y_0 + 2*unit*i[1])
temp_pt.id = curr_id
temp_pt.distance = (temp_pt.x_0 ** 2 + temp_pt.y_0 ** 2)**0.5
# curr_id+=1
if (round(temp_pt.x_0,5), round(temp_pt.y_0,5)) not in node_dict and temp_pt.distance <= self.radius:
# print(temp_pt.x_0, temp_pt.y_0)
self.nodes.append(temp_pt)
node_dict[(round(temp_pt.x_0,5), round(temp_pt.y_0,5))] = curr_id
curr_id+=1
queue.append(temp_pt)
curr_pt.neighbours.append(temp_pt.id)
elif temp_pt.distance <= self.radius:
curr_pt.neighbours.append(node_dict[round(temp_pt.x_0,5), round(temp_pt.y_0,5)])
# print(node_dict)
def plot_neighbours(self, pt):
x = []
y = []
x.append(pt.x_0)
y.append(pt.y_0)
for i in (pt.neighbours):
x.append(self.nodes[i].x_0)
y.append(self.nodes[i].y_0)
plt.scatter(x,y)
plt.axis('scaled')
def boundary_node_ids(self):
boundary_nodes = []
for j in range(len(self.nodes)):
if(len(self.nodes[j].neighbours) < 6):
boundary_nodes.append(j)
return boundary_nodes
def add_rim(self, boundary_node_ids, unit):
c = self.center
rim_ids = []
N = len(self.nodes)
for i in range(len(boundary_node_ids)):
d = self.nodes[boundary_node_ids[i]].distance
xp = self.nodes[boundary_node_ids[i]].x_0
yp = self.nodes[boundary_node_ids[i]].y_0
xnew = xp + xp*unit/d
ynew = yp + yp*unit/d
new_point = Pt(xnew, ynew)
new_point.id = N + i
rim_ids.append(N+i)
self.nodes.append(new_point)
self.nodes[boundary_node_ids[i]].neighbours.append(new_point.id)
self.nodes[N+i].neighbours.append(boundary_node_ids[i])
return rim_ids
def find_nearest_point(mesh, pt):
distances_from_center = np.zeros(len(mesh.nodes))
for i in xrange(len(mesh.nodes)):
distances_from_center[i] = mesh.nodes[i].distance
target_distance = pt.distance
closest_point_id = np.argmin(np.abs(distances_from_center-target_distance))
return closest_point_id
def init_impulse(mesh, impulse, Vj, poi, roi):
data = []
for i in range(len(Vj)):
r = ((mesh.nodes[i].x_0 - mesh.nodes[poi].x_0)**2 + (mesh.nodes[i].y_0 - mesh.nodes[poi].y_0)**2)**0.5
Vj[i] = max(0, impulse*(1. - (r/roi)))
if i in Nr:
Vj[i] = 0.
for k in mesh.nodes[i].neighbours:
data.append(np.asscalar(Vj[i])/2.)
return Vj, data
r = 0.1016 #Radius of drum head
# rho = 2500 #Density of drum head
thickness = 0.001 #Thickness of membrane
# tension = 1500 #Tension in membrane in N
param = 0.9
c = (param/thickness)**(0.5) #Speed of wave in string
duration = 0.25
fs = 4000
delta = c/fs
center = Pt(0,0)
point_of_impact = Pt(r/2., 0)
center.id = 0
mesh = Circle(r,center)
mesh.construct_mesh(delta)
N = len(mesh.nodes)
Nb = []
for j in range(N):
if len(mesh.nodes[j].neighbours) < 6:
Nb.append(j)
Nr = mesh.add_rim(Nb, delta)
N = len(mesh.nodes)
print(N)
row_ind = []
col_ind = []
for j in range(N):
for k in mesh.nodes[j].neighbours:
row_ind.append(j)
col_ind.append(k)
data = np.ones(len(col_ind))
adj_mat_sparse = sp.csr_matrix((data, (row_ind, col_ind)), shape = (N,N))
Vjk_plus = sp.csr_matrix([N, N])
Vj = np.zeros([N,1])
Uj = np.zeros([int(duration*fs), N])
Vj_mat = sp.csc_matrix([N,N])
closest_point_id = find_nearest_point(mesh, point_of_impact)
Vj, Vjk_data = init_impulse(mesh, -10.0, Vj, closest_point_id, r/10.)
Vjk_minus_sparse = sp.csr_matrix((Vjk_data, (row_ind, col_ind)), shape = (N,N))
constant = (1./3)*np.ones([N,1])
Vjk_plus = Vjk_minus_sparse.transpose()
np.put(Vj, Nr, 0.0)
Uj[1] = Uj[0] + np.transpose(Vj)/fs
Vj_mat = adj_mat_sparse.multiply(Vj)
Vjk_minus_sparse = Vj_mat - Vjk_plus
end_gain = np.ones([N,1])
end_gain[Nr] = 1.0
for t in range(2,int(duration*fs)):
Vjk_plus = Vjk_minus_sparse.transpose()
Vj = Vjk_plus.dot(constant)
np.put(Vj, Nr, 0.0)
Uj[t] = Uj[t-1] + np.transpose(Vj)/fs
Vj_mat = adj_mat_sparse.multiply(Vj)
Vjk_minus_sparse = Vj_mat-Vjk_plus.multiply(end_gain)