BACKGROUND: I am trying to build a real-time drum simulation model, for which I need really fast matrix-vector products. My matrices are of the size ~5000-10000 rows/cols, out of which only 6 entries per row are non-zero, hence I am inclined to use sparse matrices. I am using scipy.sparse module. The iterations are as below.
Vjk_plus_sparse = Vjk_minus_sparse.transpose()
Vj = Vjk_plus_sparse.dot(constant)
np.put(Vj, Nr, 0.0)
Uj[t] = Uj[t-1] + np.transpose(Vj)/fs
Vj_mat = adj_mat_sparse.multiply(Vj)
Vjk_minus_sparse = Vj_mat-Vjk_plus_sparse.multiply(end_gain)
Here, Vjk_plus_sparse, Vjk_minus_sparse and Vj_mat are sparse CSR matrices, Vj is a numpy array, and Uj is a numpy matrix where each row represents Uj(t). end_gain is an array which is a static numpy array for dampening of vibrations.
THE ISSUE: A single iteration takes about 3 ms for size = 4250. With the most significant
steps being the last 2 lines. They together take about 2.5 ms. I would ideally need it to run in 0.1 ms, which would be more than a 10x speedup. This is the maximum extent of vectorization possible for the problem, and I cannot parallelize as I am marching in time, at least physically it won't be accurate.
ATTEMPTS: I tried fiddling with the sparse data structures, and found best performance with all of them being CSR (Compressed Sparse Row), with the values as quoted above. I also tried to replace the multiply() method with a matrix multiplication, by repeating Vj, but that worsened the time, as the resultant operation would be a sparse*dense operation.
How can I speed this up within python itself? I am open to trying c++ as well, though migrating now would be a major pain. Also, since scipy is essentially based in c, would it even give that much of a speedup?
Added a complete runnable example
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.patches
import math
from mpl_toolkits import mplot3d
import numpy as np
import scipy.sparse as sp
import scipy.fftpack as spf
import matplotlib.animation as animation
import time
sqrt_3 = 1.73205080757
class Pt:
def __init__(self,x_0,y_0):
self.x_0 = x_0
self.y_0 = y_0
self.id = -1
self.neighbours = []
self.distance = (x_0**2 + y_0**2)**0.5
class Circle:
def __init__(self,radius,center):
self.radius = radius
self.center = center
self.nodes = []
def construct_mesh(self, unit):
queue = [self.center]
self.center.distance = 0
curr_id = 0
delta = [(1.,0.), (1./2, (3**0.5)/2),(-1./2, (3**0.5)/2),(-1.,0.), (-1./2,-(3**0.5)/2), (1./2,- (3**0.5)/2)]
node_dict = {}
node_dict[(self.center.x_0,self.center.y_0)] = curr_id
self.nodes.append(self.center)
curr_id+=1
while len(queue)!=0:
curr_pt = queue[0]
queue.pop(0)
# self.nodes.append(curr_pt)
# curr_id+=1
for i in delta:
temp_pt = Pt(curr_pt.x_0 + 2*unit*i[0], curr_pt.y_0 + 2*unit*i[1])
temp_pt.id = curr_id
temp_pt.distance = (temp_pt.x_0 ** 2 + temp_pt.y_0 ** 2)**0.5
# curr_id+=1
if (round(temp_pt.x_0,5), round(temp_pt.y_0,5)) not in node_dict and temp_pt.distance <= self.radius:
# print(temp_pt.x_0, temp_pt.y_0)
self.nodes.append(temp_pt)
node_dict[(round(temp_pt.x_0,5), round(temp_pt.y_0,5))] = curr_id
curr_id+=1
queue.append(temp_pt)
curr_pt.neighbours.append(temp_pt.id)
elif temp_pt.distance <= self.radius:
curr_pt.neighbours.append(node_dict[round(temp_pt.x_0,5), round(temp_pt.y_0,5)])
# print(node_dict)
def plot_neighbours(self, pt):
x = []
y = []
x.append(pt.x_0)
y.append(pt.y_0)
for i in (pt.neighbours):
x.append(self.nodes[i].x_0)
y.append(self.nodes[i].y_0)
plt.scatter(x,y)
plt.axis('scaled')
def boundary_node_ids(self):
boundary_nodes = []
for j in range(len(self.nodes)):
if(len(self.nodes[j].neighbours) < 6):
boundary_nodes.append(j)
return boundary_nodes
def add_rim(self, boundary_node_ids, unit):
c = self.center
rim_ids = []
N = len(self.nodes)
for i in range(len(boundary_node_ids)):
d = self.nodes[boundary_node_ids[i]].distance
xp = self.nodes[boundary_node_ids[i]].x_0
yp = self.nodes[boundary_node_ids[i]].y_0
xnew = xp + xp*unit/d
ynew = yp + yp*unit/d
new_point = Pt(xnew, ynew)
new_point.id = N + i
rim_ids.append(N+i)
self.nodes.append(new_point)
self.nodes[boundary_node_ids[i]].neighbours.append(new_point.id)
self.nodes[N+i].neighbours.append(boundary_node_ids[i])
return rim_ids
def find_nearest_point(mesh, pt):
distances_from_center = np.zeros(len(mesh.nodes))
for i in xrange(len(mesh.nodes)):
distances_from_center[i] = mesh.nodes[i].distance
target_distance = pt.distance
closest_point_id = np.argmin(np.abs(distances_from_center-target_distance))
return closest_point_id
def init_impulse(mesh, impulse, Vj, poi, roi):
data = []
for i in range(len(Vj)):
r = ((mesh.nodes[i].x_0 - mesh.nodes[poi].x_0)**2 + (mesh.nodes[i].y_0 - mesh.nodes[poi].y_0)**2)**0.5
Vj[i] = max(0, impulse*(1. - (r/roi)))
if i in Nr:
Vj[i] = 0.
for k in mesh.nodes[i].neighbours:
data.append(np.asscalar(Vj[i])/2.)
return Vj, data
r = 0.1016 #Radius of drum head
# rho = 2500 #Density of drum head
thickness = 0.001 #Thickness of membrane
# tension = 1500 #Tension in membrane in N
param = 0.9
c = (param/thickness)**(0.5) #Speed of wave in string
duration = 0.25
fs = 4000
delta = c/fs
center = Pt(0,0)
point_of_impact = Pt(r/2., 0)
center.id = 0
mesh = Circle(r,center)
mesh.construct_mesh(delta)
N = len(mesh.nodes)
Nb = []
for j in range(N):
if len(mesh.nodes[j].neighbours) < 6:
Nb.append(j)
Nr = mesh.add_rim(Nb, delta)
N = len(mesh.nodes)
print(N)
row_ind = []
col_ind = []
for j in range(N):
for k in mesh.nodes[j].neighbours:
row_ind.append(j)
col_ind.append(k)
data = np.ones(len(col_ind))
adj_mat_sparse = sp.csr_matrix((data, (row_ind, col_ind)), shape = (N,N))
Vjk_plus = sp.csr_matrix([N, N])
Vj = np.zeros([N,1])
Uj = np.zeros([int(duration*fs), N])
Vj_mat = sp.csc_matrix([N,N])
closest_point_id = find_nearest_point(mesh, point_of_impact)
Vj, Vjk_data = init_impulse(mesh, -10.0, Vj, closest_point_id, r/10.)
Vjk_minus_sparse = sp.csr_matrix((Vjk_data, (row_ind, col_ind)), shape = (N,N))
constant = (1./3)*np.ones([N,1])
Vjk_plus = Vjk_minus_sparse.transpose()
np.put(Vj, Nr, 0.0)
Uj[1] = Uj[0] + np.transpose(Vj)/fs
Vj_mat = adj_mat_sparse.multiply(Vj)
Vjk_minus_sparse = Vj_mat - Vjk_plus
end_gain = np.ones([N,1])
end_gain[Nr] = 1.0
for t in range(2,int(duration*fs)):
Vjk_plus = Vjk_minus_sparse.transpose()
Vj = Vjk_plus.dot(constant)
np.put(Vj, Nr, 0.0)
Uj[t] = Uj[t-1] + np.transpose(Vj)/fs
Vj_mat = adj_mat_sparse.multiply(Vj)
Vjk_minus_sparse = Vj_mat-Vjk_plus.multiply(end_gain)
Related
I was trying to code up an EM algorithm in python for clustering images of different types. My understanding of the EM algorithm is as follows:
Accordingly, I coded the same in python. Here's my code:
import numpy as np
import sys
from scipy import stats
# μ: mean vector ∈ R^(self.m x self.n)
# Σ: covariance matrix ∈ R^(self.m x self.n x self.n)
# π: probabilities of each component
class gmm:
def __init__(self,n_components):
self.m = n_components
self.π = np.random.random((self.m,))
self.x = None
self.Σ = None
self.μ = None
self.r = None
self.n = None # length of data
#classmethod
def N(cls, x, μ, Σ):
#print(Σ)
#print('\n')
return stats.multivariate_normal(μ, Σ).pdf(x)
def E_step(self):
for i,x_i in enumerate(self.x):
den = 0
for c in range(self.m):
#print(self.Σ[c].shape)
#print(self.μ[c].shape)
#sys.exit()
den+= self.π[c]*gmm.N(x_i,self.μ[c],self.Σ[c])
#print(f'{den=}')
for c in range(self.m):
num = self.π[c]*gmm.N(x_i,self.μ[c],self.Σ[c])
self.r[i,c] = num/den
print(f'{self.r}\n')
def M_step(self):
m_c = np.sum(self.r, axis = 0)
self.π = m_c/self.m
for c in range(self.m):
s1 = 0
s2 = 0
for i in range(self.n):
s1+= self.r[i,c]*self.x[i]
s2+= self.r[i,c]*(self.x[i]-self.μ[c]).dot(self.x[i]-self.μ[c])
self.μ[c] = s1/m_c[c]
self.Σ[c] = s2/m_c[c]
def fit(self,x, iterations = 10):
self.x = x
self.n = x.shape[0]
self.r = np.empty((self.n, self.m))
self.μ = np.random.random((self.m, self.n))
Sigma = [np.random.random((self.n, self.n)) for i in range(self.m)]
Sigma = [0.5*(s + s.T)+5*np.eye(s.shape[0]) for s in Sigma] # A symmetric diagonally dominant matrix is PD
#print([np.linalg.det(s) for s in Sigma])
self.Σ = np.asarray(Sigma)
for i in range(iterations):
self.E_step()
self.M_step()
def params():
return self.π, self.μ, self.Σ
if __name__ == '__main__':
data_dim = 5 # No. of data dimensions
data = [[]]*6
data[0] = np.random.normal(0.5,2, size = (300,))
data[1] = np.random.normal(3,2, size = (300,))
data[2] = np.random.normal(-1, 0.1, size = (300,))
data[3] = np.random.normal(2,3.14159, size = (300,))
data[4] = np.random.normal(0,1, size = (300,))
data[5] = np.random.normal(3.5,5, size = (300,))
p = [0.1, 0.15, 0.22, 0.3, 0.2, 0.03]
vals = [0,1,2,3,4,5]
combined = []
for i in range(data_dim):
choice = np.random.choice(vals, p = p)
combined.append(np.random.choice(data[choice]))
combined = np.array(combined)
G = gmm(n_components = 6)
G.fit(combined)
pi, mu, sigma = G.params()
print(pi)
print(mu)
print(sigma)
Now comes the problem. While running the code, the covariance matrix Σ becomes singular after some iterations. Specifically, all the entries of Sigma become the same all of a sudden in a particular iteration.
I have tried adding some random noise to Σ while this happens, but this seems to only delay the inevitable.
Any help/comments will be greatly appreciated. Thanks in Advance :)
To prevent the covariance matrix from becoming singular, you could add an arbitrary value along the diagonal of the matrix, i.e.
val * np.identity(size)
as this ensures that the covariance matrix will remain positive definite, and have an inverse. For instance, sklearn uses the default value 1e-6 for their regularization.
I am trying to evaluate the integral using the Monte Carlo method where the integrand underwent a transformation from cylindrical to cartesian coordinates. The integrand itself is quite simple and could be calculated using scipy.integrate.quad, but I need it to be in cartesian coordinates for a specific purpose later on.
So here is the integrand: rho*k_r**2*(k_r**2*kn(0,k_r*rho)**2+k_n**2*kn(1,k_r*rho)**2) d rho
Here the kn(i,rho) is modified Bessel function of 2nd kind.
Solving it with quad gives the following result:
from scipy.special import kn
from scipy.integrate import quad
import random
k_r = 6.2e-2
k_n = k_r/1.05
C_factor = 2*np.pi*1e5
lmax,lmin = 80,50
def integration_polar():
def K_int(rho):
return rho*k_r**2*(k_r**2*kn(0,k_r*rho)**2+k_n**2*kn(1,k_r*rho)**2)
rho = np.linspace(lmin,lmax,200)
I,_ = quad(K_int,lmin,lmax)
Gamma = I*C_factor
print("expected=",Gamma)
Output: Expected = 7.641648442007296
Now the same integral using Monte Carlo method (hit-or-miss method looked up from here) gives almost the same result:
def integration_polar_MC():
random.seed(1)
n = 100000
def K_int(rho):
return rho*k_r**2*(k_r**2*kn(0,k_r*rho)**2+k_n**2*kn(1,k_r*rho)**2)
def sampler():
x = random.uniform(lmin,lmax)
y = random.uniform(0,c_lim)
return x,y
c_lim = 2*K_int(50) #Upper limit of integrand
sum_I = 0
for i in range(n):
x,y = sampler()
func_Int = K_int(x)
if y>func_Int:
I = 0
elif y<=func_Int:
I = 1
sum_I += I
Gamma = C_factor*(lmax-lmin)*c_lim*sum_I/n
print("MC_integral_polar:",Gamma)
Output: MC_integral_polar = 7.637391399699502
Since Monte Carlo worked with this example, I thought the cartesian case would go smoothly as well but I couldn't get the right answer.
For the cartesian case, similarly as in previous case I've employed the hit-or-miss method, with rho = np.sqrt(x**2+y**2) and integrand becoming k_r**2*(k_r**2*kn(0,k_r*rho)**2+k_n**2*kn(1,k_r*rho)**2) dx dy where domain over x and y:
-80 <= x <= 80
-80 <= y <= 80
50 <= np.sqrt(x**2+y**2) <= 80
Here is my attempt:
def integration_cartesian_MCtry():
random.seed(1)
lmin,lmax = -100,100
n = 100000
def K_int(x,y):
rho = np.sqrt(x**2+y**2)
if rho>=50 and rho<=80:
return k_r**2*(k_r**2*kn(0,k_r*rho)**2+k_n**2*kn(1,k_r*rho)**2)
else:
return 0
def sampler():
x = random.uniform(lmin,lmax)
y = random.uniform(lmin,lmax)
z = random.uniform(0,c_lim)
return x,y,z
c_lim = K_int(50,0)
sum_I = 0
for i in range(n):
x,y,z = sampler()
func_Int = K_int(x,y)
if z>func_Int:
I = 0
elif z<=func_Int:
I = 1
sum_I += I
Gamma = C_factor*(lmax-lmin)**2*c_lim*sum_I/n
print("MC_integral_cartesian:",Gamma)
Output: MC_integral_cartesian = 48.83166430996952
As you can see Monte Carlo in cartesian overestimates the integral. I am not sure why it is happening but think that it may be related to the incorrect limits or domain over which I should integrate the function.
Any help appreciated as I am stuck without any progress for a few days.
Problem, as I said, is with jacobian. In case of polar, you have integration over
f(ρ)*ρ*dρ*dφ
You integrate over dφ analytically (your f(ρ) doesn't depend on φ), and get 2π
In case of cartesian there are no analytical integration, so it is over dx*dy, no factor
of 2π. Code to illustrate it, Python 3.9.1, Windows 10 x64, and it produced pretty much the same answer
import numpy as np
from scipy.special import kn
k_r = 6.2e-2
k_n = k_r/1.05
C_factor = 2*np.pi*1e5
lmin = 50
lmax = 80
def integration_polar_MC(rng, n):
def K_int(rho):
if rho>=50 and rho<=80:
return rho*k_r**2*(k_r**2*kn(0, k_r*rho)**2 + k_n**2*kn(1, k_r*rho)**2)
return 0.0
def sampler():
x = rng.uniform(lmin, lmax)
y = rng.uniform(0.0, c_lim)
return x,y
c_lim = 2*K_int(50) # Upper limit of integrand
sum_I = 0
for i in range(n):
x,y = sampler()
func_Int = K_int(x)
I = 1
if y>func_Int:
I = 0
sum_I += I
Gamma = C_factor*(lmax-lmin)*c_lim*sum_I/n
return Gamma
def integration_cartesian_MC(rng, n):
def K_int(x,y):
rho = np.hypot(x, y)
if rho>=50 and rho<=80:
return k_r**2*(k_r**2*kn(0,k_r*rho)**2+k_n**2*kn(1,k_r*rho)**2)
return 0.0
def sampler():
x = rng.uniform(lmin,lmax)
y = rng.uniform(lmin,lmax)
z = rng.uniform(0,c_lim)
return x,y,z
lmin,lmax = -100,100
c_lim = K_int(50, 0)
sum_I = 0
for i in range(n):
x,y,z = sampler()
func_Int = K_int(x,y)
I = 1
if z>func_Int:
I = 0
sum_I += I
Gamma = C_factor*(lmax-lmin)**2*c_lim*sum_I/n
return Gamma/(2.0*np.pi) # to compensate for 2π in the constant
rng = np.random.default_rng()
q = integration_polar_MC(rng, 100000)
print("MC_integral_polar:", q)
q = integration_cartesian_MC(rng, 100000)
print("MC_integral_cart:", q)
I am trying to make my own CFD solver and one of the most computationally expensive parts is solving for the pressure term. One way to solve Poisson differential equations faster is by using a multigrid method. The basic recursive algorithm for this is:
function phi = V_Cycle(phi,f,h)
% Recursive V-Cycle Multigrid for solving the Poisson equation (\nabla^2 phi = f) on a uniform grid of spacing h
% Pre-Smoothing
phi = smoothing(phi,f,h);
% Compute Residual Errors
r = residual(phi,f,h);
% Restriction
rhs = restriction(r);
eps = zeros(size(rhs));
% stop recursion at smallest grid size, otherwise continue recursion
if smallest_grid_size_is_achieved
eps = smoothing(eps,rhs,2*h);
else
eps = V_Cycle(eps,rhs,2*h);
end
% Prolongation and Correction
phi = phi + prolongation(eps);
% Post-Smoothing
phi = smoothing(phi,f,h);
end
I've attempted to implement this algorithm myself (also at the end of this question) however it is very slow and doesn't give good results so evidently it is doing something wrong. I've been trying to find why for too long and I think it's just worthwhile seeing if anyone can help me.
If I use a grid size of 2^5 by 2^5 points, then it can solve it and give reasonable results. However, as soon as I go above this it takes exponentially longer to solve and basically get stuck at some level of inaccuracy, no matter how many V-Loops are performed. at 2^7 by 2^7 points, the code takes way too long to be useful.
I think my main issue is that my implementation of a jacobian iteration is using linear algebra to calculate the update at each step. This should, in general, be fast however, the update matrix A is an n*m sized matrix, and calculating the dot product of a 2^7 * 2^7 sized matrix is expensive. As most of the cells are just zeros, should I calculate the result using a different method?
if anyone has any experience in multigrid methods, I would appreciate any advice!
Thanks
my code:
# -*- coding: utf-8 -*-
"""
Created on Tue Dec 29 16:24:16 2020
#author: mclea
"""
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import convolve2d
from mpl_toolkits.mplot3d import Axes3D
from scipy.interpolate import griddata
from matplotlib import cm
def restrict(A):
"""
Creates a new grid of points which is half the size of the original
grid in each dimension.
"""
n = A.shape[0]
m = A.shape[1]
new_n = int((n-2)/2+2)
new_m = int((m-2)/2+2)
new_array = np.zeros((new_n, new_m))
for i in range(1, new_n-1):
for j in range(1, new_m-1):
ii = int((i-1)*2)+1
jj = int((j-1)*2)+1
# print(i, j, ii, jj)
new_array[i,j] = np.average(A[ii:ii+2, jj:jj+2])
new_array = set_BC(new_array)
return new_array
def interpolate_array(A):
"""
Creates a grid of points which is double the size of the original
grid in each dimension. Uses linear interpolation between grid points.
"""
n = A.shape[0]
m = A.shape[1]
new_n = int((n-2)*2 + 2)
new_m = int((m-2)*2 + 2)
new_array = np.zeros((new_n, new_m))
i = (np.indices(A.shape)[0]/(A.shape[0]-1)).flatten()
j = (np.indices(A.shape)[1]/(A.shape[1]-1)).flatten()
A = A.flatten()
new_i = np.linspace(0, 1, new_n)
new_j = np.linspace(0, 1, new_m)
new_ii, new_jj = np.meshgrid(new_i, new_j)
new_array = griddata((i, j), A, (new_jj, new_ii), method="linear")
return new_array
def adjacency_matrix(rows, cols):
"""
Creates the adjacency matrix for an n by m shaped grid
"""
n = rows*cols
M = np.zeros((n,n))
for r in range(rows):
for c in range(cols):
i = r*cols + c
# Two inner diagonals
if c > 0: M[i-1,i] = M[i,i-1] = 1
# Two outer diagonals
if r > 0: M[i-cols,i] = M[i,i-cols] = 1
return M
def create_differences_matrix(rows, cols):
"""
Creates the central differences matrix A for an n by m shaped grid
"""
n = rows*cols
M = np.zeros((n,n))
for r in range(rows):
for c in range(cols):
i = r*cols + c
# Two inner diagonals
if c > 0: M[i-1,i] = M[i,i-1] = -1
# Two outer diagonals
if r > 0: M[i-cols,i] = M[i,i-cols] = -1
np.fill_diagonal(M, 4)
return M
def set_BC(A):
"""
Sets the boundary conditions of the field
"""
A[:, 0] = A[:, 1]
A[:, -1] = A[:, -2]
A[0, :] = A[1, :]
A[-1, :] = A[-2, :]
return A
def create_A(n,m):
"""
Creates all the components required for the jacobian update function
for an n by m shaped grid
"""
LaddU = adjacency_matrix(n,m)
A = create_differences_matrix(n,m)
invD = np.zeros((n*m, n*m))
np.fill_diagonal(invD, 1/4)
return A, LaddU, invD
def calc_RJ(rows, cols):
"""
Calculates the jacobian update matrix Rj for an n by m shaped grid
"""
n = int(rows*cols)
M = np.zeros((n,n))
for r in range(rows):
for c in range(cols):
i = r*cols + c
# Two inner diagonals
if c > 0: M[i-1,i] = M[i,i-1] = 0.25
# Two outer diagonals
if r > 0: M[i-cols,i] = M[i,i-cols] = 0.25
return M
def jacobi_update(v, f, nsteps=1, max_err=1e-3):
"""
Uses a jacobian update matrix to solve nabla(v) = f
"""
f_inner = f[1:-1, 1:-1].flatten()
n = v.shape[0]
m = v.shape[1]
A, LaddU, invD = create_A(n-2, m-2)
Rj = calc_RJ(n-2,m-2)
update=True
step = 0
while update:
v_old = v.copy()
step += 1
vt = v_old[1:-1, 1:-1].flatten()
vt = np.dot(Rj, vt) + np.dot(invD, f_inner)
v[1:-1, 1:-1] = vt.reshape((n-2),(m-2))
err = v - v_old
if step == nsteps or np.abs(err).max()<max_err:
update=False
return v, (step, np.abs(err).max())
def MGV(f, v):
"""
Solves for nabla(v) = f using a multigrid method
"""
# global A, r
n = v.shape[0]
m = v.shape[1]
# If on the smallest grid size, compute the exact solution
if n <= 6 or m <=6:
v, info = jacobi_update(v, f, nsteps=1000)
return v
else:
# smoothing
v, info = jacobi_update(v, f, nsteps=10, max_err=1e-1)
A = create_A(n, m)[0]
# calculate residual
r = np.dot(A, v.flatten()) - f.flatten()
r = r.reshape(n,m)
# downsample resitdual error
r = restrict(r)
zero_array = np.zeros(r.shape)
# interploate the correction computed on a corser grid
d = interpolate_array(MGV(r, zero_array))
# Add prolongated corser grid solution onto the finer grid
v = v - d
v, info = jacobi_update(v, f, nsteps=10, max_err=1e-6)
return v
sigma = 0
# Setting up the grid
k = 6
n = 2**k+2
m = 2**(k)+2
hx = 1/n
hy = 1/m
L = 1
H = 1
x = np.linspace(0, L, n)
y = np.linspace(0, H, m)
XX, YY = np.meshgrid(x, y)
# Setting up the initial conditions
f = np.ones((n,m))
v = np.zeros((n,m))
# How many V cyles to perform
err = 1
n_cycles = 10
loop = True
cycle = 0
# Perform V cycles until converged or reached the maximum
# number of cycles
while loop:
cycle += 1
v_new = MGV(f, v)
if np.abs(v - v_new).max() < err:
loop = False
if cycle == n_cycles:
loop = False
v = v_new
print("Number of cycles " + str(cycle))
plt.contourf(v)
I realize that I'm not answering your question directly, but I do note that you have quite a few loops that will contribute some overhead cost. When optimizing code, I have found the following thread useful - particularly the line profiler thread. This way you can focus in on "high time cost" lines and then start to ask more specific questions regarding opportunities to optimize.
How do I get time of a Python program's execution?
I like to prototype algorithms in Matlab, but I have the requirement of putting them on a server that also runs quite a bit of Python code. Hence I quickly converted the code to Python and compared the two. The Matlab implementation runs ~1000 times faster (from timing function calls - no profiling). Anyone know off hand why the performance of Python is so slow?
Matlab
% init random data
w = 800;
h = 1200;
hmap = zeros(w,h);
npts = 250;
for i=1:npts
hmap(randi(w),randi(h)) = hmap(randi(w),randi(h))+1;
end
% Params
disksize = 251;
nBreaks = 25;
saturation = .9;
floorthresh =.05;
fh = fspecial('gaussian', disksize, disksize/7);
hmap = conv2(hmap, fh, 'same');
% Scaling, paritioning etc
hmap = hmap/(max(max(hmap)));
hmap(hmap<floorthresh) = 0;
hmap = round(nBreaks * hmap)/nBreaks;
hmap = hmap * (1/saturation);
% Show the image
imshow(hmap, [0,1])
colormap('jet')
Python
import numpy as np
from scipy.signal import convolve2d as conv2
# Test data parameters
w = 800
h = 1200
npts = 250
# generate data
xvals = np.random.randint(w, size=npts)
yvals = np.random.randint(h, size=npts)
# Heatmap parameters
gaussianSize = 250
nbreaks = 25
# Preliminary function definitions
def populateMat(w, h, xvals, yvals):
container = np.zeros((w,h))
for idx in range(0,xvals.size):
x = xvals[idx]
y = yvals[idx]
container[x,y] += 1
return container
def makeGaussian(size, fwhm):
x = np.arange(0, size, 1, float)
y = x[:,np.newaxis]
x0 = y0 = size // 2
return np.exp(-4*np.log(2) * ((x-x0)**2 + (y-y0)**2) / fwhm**2)
# Create the data matrix
dmat = populateMat(w,h,xvals,yvals)
h = makeGaussian(gaussianSize, fwhm=gaussianSize/2)
# Convolve
dmat2 = conv2(dmat, h, mode='same')
# Scaling etc
dmat2 = dmat2 / dmat2.max()
dmat2 = np.round(nbreaks*dmat2)/nbreaks
# Show
imshow(dmat2)
Ok, problem solved for me thanks to suggestion from #Yves Daust's comments;
The filter scipy.ndimage.filters.gaussian_filter utilises the separability of the kernel and reduces the running time to within a single order of magnitude of the matlab implementation.
import numpy as np
from scipy.ndimage.filters import gaussian_filter as gaussian
# Test data parameters
w = 800
h = 1200
npts = 250
# generate data
xvals = np.random.randint(w, size=npts)
yvals = np.random.randint(h, size=npts)
# Heatmap parameters
gaussianSize = 250
nbreaks = 25
# Preliminary function definitions
def populateMat(w, h, xvals, yvals):
container = np.zeros((w,h))
for idx in range(0,xvals.size):
x = xvals[idx]
y = yvals[idx]
container[x,y] += 1
return container
# Create the data matrix
dmat = populateMat(w,h,xvals,yvals)
# Convolve
dmat2 = gaussian(dmat, gaussianSize/7)
# Scaling etc
dmat2 = dmat2 / dmat2.max()
dmat2 = np.round(nbreaks*dmat2)/nbreaks
# Show
imshow(dmat2)
I must solve the Euler Bernoulli differential beam equation which is:
u’’’’(x) = f(x) ;
(x is the coordinate of the beam axis points)
and boundary conditions:
u(0)=0, u’(0)=0, u’’(1)=0, u’’’(1)=a
I have studied the theory of numerically finite differences which expresses the series of derivations as:
U’k = (1/2*h)(Uk+1 - Uk-1)
U’’k = (1/h2)(Uk+1 - 2 Uk + Uk-1)
U’’’k = (1/2h3)(Uk+2 - 2 Uk+1 + 2 Uk-1 + Uk-2)
U’’’’k = (1/h4)(Uk+2 - 4 Uk+1 + 6 Uk - 4 Uk-1 + Uk-2)
(k+1, k+2, etc. etc are subscripts)
and I found a script which expresses it as follows:
import numpy as np
from scipy.linalg import solveh_banded
import matplotlib.pyplot as plt
def beam4(n,ffun,a):
x = np.linspace(0,1,n+1)
h = 1.0/n
stencil = np.array((1,-4,6))
B = np.outer(stencil,np.ones(n))
B[1,-1] = -2; B[2,0] = 7; B[2,-1] = 1; B[2,-2] = 5
f = ffun(x)
f *= h** 4; f[-1] *= 0.5; f[-1] -= a*h**3
u = np.zeros(n+1)
u[1:] = solveh_banded(B,f[1:],lower=False)
return x,u
But I can't understand why the coefficient matrix is built this way:
stencil = np.array((1,-4,6))
B = np.outer(stencil,np.ones(n))
B[1,-1] = -2; B[2,0] = 7; B[2,-1] = 1; B[2,-2] = 5
f = ffun(x)
f *= h**4; f[-1] *= 0.5; f[-1] -= a*h**3 "
Thanks in advance!!!!
Hope this is helpful. (due to this is my first time to post an answer)
The coefficients of this Hermitian positive-definite banded matrix are due to applied of ghost node method. It is one of most efficient and popular method for treating the boundary conditions of FDM without lossing of accuracy (here these coefficients will give a second order converge rate in general). If you have trouble to visual the matrix please check the 'K' matrix in my code below:
from numpy import linspace,zeros,array,eye,dot
from numpy.linalg import solve
from pylab import plot,xlabel,ylabel,legend,show
a = 0.2 ;b = 0.0
LX,dx = 1.0,0.05 ;nx = int(LX/dx)+1
X = linspace(0.0,LX,nx)
Fs = X**2.0
""""""""""""""""""""""""""""""""""""""""""""""""""""""
def calcB(l,b):
if b==0: return [0.0,0.0]
elif b==1: return 1.0/l/l/l/l*array([-4.0,7.0,-4.0,1.0])
elif b==nx-2: return 1.0/l/l/l/l*array([1.0,-4.0,5.0,-2.0])
elif b==nx-1: return 1.0/l/l/l/l*array([2.0,-4.0,2.0])
else: return 1.0/l/l/l/l*array([1.0,-4.0,6.0,-4.0,1.0])
U = zeros(nx) ;V = zeros(nx)
M = eye(nx) ;K = zeros((nx,nx))
F = zeros(nx) ;F[nx-2] = -b/dx/dx
F[nx-1] = 2.0*b/dx/dx-2.0*a/dx
for i in range(nx):
if i == 0: I = [i,i+1]
elif i == 1: I = [i-1,i,i+1,i+2]
elif i == nx-2: I = [i-2,i-1,i,i+1]
elif i == nx-1: I = [i-2,i-1,i]
else: I = [i-2,i-1,i,i+1,i+2]
for k,j in enumerate(I):
K[i,j] += calcB(dx,i)[k]
""""""""""""""""""""""""""""""""""""""""""""""""""""""
pn = [0] ;eq2 = pn
eq1 = [i for i in range(nx) if i not in pn]
MM1_ = K[eq1,:]; MM11,MM12 = MM1_[:,eq1],MM1_[:,eq2]
RR = F+Fs
U[eq2] = [0.0]
U[eq1] = solve(MM11,(RR[eq1]-dot(MM12,U[eq2])))
######################Plotting#########################
Us = lambda x: x**6.0/360.0+x**3.0/6.0*(a-1.0/3.0)+x**2.0/2.0*(1.0/4.0-a)
plot(X,U,'bo',label='FDM') ;plot(X,Us(X),'g-',label='solution')
xlabel('X'); ylabel('U'); legend(loc='best')
show()
cheers