For an MCMC implementation, I want to calculate the covariance tensor C in numpy.
Working Single-Threaded Code
The distance between two elements is based on the distance between their indices. For reference, here is the working single threaded code (with an example distance):
import numpy as np
#set size, dimensions, etc
size = 20
ndim = 2
shape = (size,)*ndim*2
#initialize tensor
C = np.zeros(shape)
#example distance
dist = lambda x, y: np.sqrt(np.sum((x-y)**2))
#this runs as a class method, so please forgive my sloppy coding here
def update_tensor():
it = np.nditer(C, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = np.array(it.multi_index)
it[0] = dist(idx[:idx.shape[0]//2], idx[idx.shape[0]//2:])
it.iternext()
update_tensor()
Solution Attempt
Now the issue is, that while applying C to a matrix x is a multithreaded operation:
x = np.random.standard_normal((size,)*ndim)
result = np.tensordot(C, x, axes=ndim)
caculating the entries of C is not. My idea was, to split C after initialization along its first axis and iterate over the chunks separately:
import multiprocessing
def _calc_distances(C):
'Calculate distances of submatrices'
it = np.nditer(C, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = np.array(it.multi_index)
it[0] = dist(idx[:idx.shape[0]//2], idx[idx.shape[0]//2:])
it.iternext()
return C
def update_tensor(C):
'Updates Covariance Operator'
#Multicore Processing
n_processes = multiprocessing.cpu_count()
Chunks = [
C[i*C.shape[0]//n_processes:(i+1)*C.shape[0]//n_processes] for i in range(0, n_processes-1)
]
Chunks.append(C[C.shape[0]//n_processes*(n_processes-1):])
with multiprocessing.Pool(n_processes+1) as p:
#map and stitch together
C = np.concatenate(
p.map(_calc_distances, Chunks)
)
But this fails, because the indeces of the submatrices change.
Question
Is there a nicer solution to this? How do I fix the index issue? Probably the nicest way would be to just iterate over parts of the array with threads sharing the data of C. Is that possible?
Q/A
Q: Do you have to use a numpy iterator?
A: No, it’s nice, but I can give up on that.
Worked like this. Just going to post the class here.
Benchmarks
CPU: Intel Core i5-6300U#2.5GHz, boosting to ~2.9GHz
Windows 10 64-bit, Python 3.7.4, Numpy 1.17
Pro: Less compute time
Con: Uses a little more RAM; somewhat complicated code.
Working Multi-Threaded Code
import multiprocessing
import numpy as np
class CovOp(object):
'F[0,1]^ndim->C[0,1]^ndim'
def f(self, r):
return np.exp(-r/self.ro)#(1 + np.sqrt(3)*r / self.ro) * np.exp(-np.sqrt(3) * r / self.ro)
def dist(self, x,y):
return np.sum((x-y)**2)
def __init__(self, ndim, size, sigma=1, ro=1):
self.tensor_cached = False
self.inverse_cached = False
self.ndim = ndim
self.size = size
self.shape = (size,)*ndim*2
self.C = np.zeros(self.shape)
self.Inv = np.zeros(self.shape)
self.ro = ro * size
self.sigma = sigma
def __call__(self, x):
if not self.tensor_cached:
self.update_tensor
if self.ndim == 0:
return self.sigma * self.C * x
elif self.ndim == 1:
return self.sigma * np.dot(self.C, x)
return self.sigma * np.tensordot(self.C, x, axes=self.ndim)
def _calc_distances(self, Chunk:tuple):
'Calculate distances of submatrices'
C, offset = Chunk
it = np.nditer(C, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = np.array(it.multi_index)
idx[0]+=offset
d = self.dist(idx[:idx.shape[0]//2], idx[idx.shape[0]//2:])
it[0] = self.f(d)
it.iternext()
return C
def update_tensor(self):
'Updates Covariance Operator'
#Multicore Processing
n_processes = multiprocessing.cpu_count()
Chunks = [
(
self.C[i*self.C.shape[0]//n_processes:(i+1)*self.C.shape[0]//n_processes],
i*self.C.shape[0]//n_processes) for i in range(0, n_processes-1)
]
Chunks.append((
self.C[self.C.shape[0]//n_processes*(n_processes-1):],
self.C.shape[0]//n_processes*(n_processes-1)
)
)
with multiprocessing.Pool(n_processes+1) as p:
self.C = np.concatenate(
p.map(self._calc_distances, Chunks)
)
self.tensor_cached = True
#missing cholesky decomposition
def update_inverse(self):
if self.ndim==1:
self.Inv = np.linalg.inv(self.C)
elif self.ndim>1:
self.Inv = np.linalg.tensorinv(self.C)
else:
self.Inv = 1/self.C
self.inverse_cached = True
def inv(self, x):
if self.ndim == 0:
return self.Inv * x / self.sigma
elif self.ndim == 1:
return np.dot(self.Inv, x) / self.sigma
return np.tensordot(self.Inv, x) / self.sigma
if __name__=='__main__':
size = 30
ndim = 2
depth = 1
Cov = CovOp(ndim, size, 1, .2)
import time
n_tests = 5
t_start = time.perf_counter()
for i in range(n_tests):
Cov.update_tensor()
t_stop = time.perf_counter()
dt_new = t_stop - t_start
print(
'''Benchmark; NDim: %s, Size: %s NTests: %s
Mean time per test:
Multithreaded %ss'''%(ndim, size, n_tests, dt_new/n_tests)
)
Related
What is the most efficient implementation of a scalable autonomous tridiagonal system using JAX?
import functools as ft
import jax as jx
import jax.numpy as jnp
import jax.random as jrn
import jax.lax as jlx
def make_T(m):
# Create a psuedo-random tridiagonal Jacobian and store band
T = jnp.zeros((3,m), dtype='f8')
T = T.at[0, 1: ].set(jrn.normal(jrn.PRNGKey(0), shape=(m-1,)))
T = T.at[1, : ].set(jrn.normal(jrn.PRNGKey(1), shape=(m ,)))
T = T.at[2, :-1].set(jrn.normal(jrn.PRNGKey(2), shape=(m-1,)))
return T
def make_y(m):
# Create a pseudo-random state array
y = jrn.normal(jrn.PRNGKey(3), shape=(m ,))
return y
def calc_f_base(y, T):
# Calculate the rate given the current state
f = T[1,:]*y
f = f.at[ 1: ].set(f[ 1: ]+T[0, 1: ]*y[ :-1])
f = f.at[ :-1].set(f[ :-1]+T[2, :-1]*y[ 1: ])
return f
m = 2**22 # potentially exhausts resources
T = make_T(m)
y = make_y(m)
calc_f = ft.partial(calc_f_base, T=T)
Using jax.jacrev or jax.jacfwd will generate a full Jacobian which limits the size of the system.
One attempt to overcome this limitation is as follows
#ft.partial(jx.jit, static_argnums=(0,))
def calc_jacfwd_trid(calc_f, y):
# Determine the Jacobian (forward-mode) tridiagonal band
def scan_body(carry, i):
t, T = carry
t = t.at[i ].set(1.0)
f, dfy = jx.jvp(calc_f, (y,), (t,))
T = T.at[2,i-1].set(dfy[i-1])
T = T.at[1,i ].set(dfy[i ])
T = T.at[0,i+1].set(dfy[i+1])
t = t.at[i-1].set(0.0)
return (t, T), None
# Initialise
m = y.size
t = jnp.zeros_like(y)
T = jnp.zeros((3,m), dtype=y.dtype)
# Differentiate wrt y[0]
t = t.at[0].set(1.0)
f, dfy = jx.jvp(calc_f, (y,), (t,))
idxs = jnp.array([1,0]), jnp.array([0,1])
T = T.at[idxs].set(dfy[0:2])
# Differentiate wrt y[1:-1]
(t, T), empty = jlx.scan(scan_body, (t,T), jnp.arange(1,m-1))
# Differentiate wrt y[-1]
t = t.at[m-2:].set(jnp.array([0.0,1.0]))
f, dfy = jx.jvp(calc_f, (y,), (t,))
idxs = jnp.array([2,1]), jnp.array([m-2,m-1])
T = T.at[idxs].set(dfy[-2:])
return T
which permits
T = jacfwd_trid(calc_f, y)
df = jrn.normal(jrn.PRNGKey(4), shape=y.shape)
dx = jlx.linalg.tridiagonal_solve(*T,df[:,None]).flatten()
Is there a better approach and/or can the time complexity of calc_jacfwd_trid be reduced further?
EDIT
The following implementation is more compact, but run times are slightly slower
#ft.partial(jx.jit, static_argnums=(0,))
def calc_jacfwd_trid_map(calc_f, y):
# Determine the Jacobian (forward-mode) tridiagonal band with lax map
def map_body(i, t):
t = t.at[i-1].set(0.0)
f, dfy = jx.jvp(calc_f, (y,), (t,))
im1 = jnp.where(i > 0, i-1, 0)
Ti = jlx.dynamic_slice(dfy, (im1,), (3,))
Ti = jnp.where(i > 0, Ti, jnp.roll(Ti, shift=+1))
Ti = jnp.where(i < m-1, Ti, jnp.roll(Ti, shift=-1))
t = t.at[i ].set(1.0)
return Ti
# Initialise
m = y.size
t = jnp.zeros_like(y)
# Differentiate wrt y[:]
T = jlx.map(lambda i : map_body(i, t=t), jnp.arange(m))
# Correct the orientation of T
T = T.transpose()
T = jnp.flip(T, axis=0)
T = T.at[0,:].set(jnp.roll(T[0,:], shift=+1))
T = T.at[2,:].set(jnp.roll(T[2,:], shift=-1))
return T
I am trying to run the minimization process found in this publication. The equation is seen on page 6 of the document 16 of the pdf.
I have a dataframe that looks like the below
df = pd.DataFrame({'h_t':[7.06398,6.29948,5.04570,6.20774,4.80106],
'p_atm':[101057.772801,101324.416001,101857.702401,101724.380801,101991.024001],
'q_p':[5.768132,3.825600,2.772215,5.830429,2.619304],
'q_s':[2.684433,3.403679,2.384275,1.008078,2.387106],
'tdg_f':[117.678100,110.131579,108.376963,103.669725,113.594771],
'tdg_tw':[121.052635,119.710907,114.921463,112.156868,115.444900],
'temp_water':[11.92,19.43,16.87,7.45,11.83]})
I have a constraint that says the below function must be positive where b1 and b3 are the coefficients I am optimizing.
def q_ge(q_p,q_s,b1,b3):
return min(q_p,(b1*q_s+b3))
I wrote my constraint below, but I am not sure if it is right.
def constraint_q_ge(x):
b1,b2,b3=x
power_flow = df.apply(lambda x:q_ge(x['q_p'],x['q_s'],b1,b3), axis = 1)
const = power_flow<0
return -const.sum()
Is this correct? I run the function on all rows and check if any are less than 0 and sum this. The negative of that sum should be greater than or equal to 0. If there is even a single value less than 0 this constraint is not met.
EDIT:
Below is the full problem.
from scipy.constants import g as gravity
from sklearn.metrics import mean_squared_error
from math import sqrt
from scipy.optimize import minimize
import warnings
try:
from numpy import any as _any
except ImportError:
def _any(arg):
if arg is True:
return True
if arg is False:
return False
return any(arg)
def water_density(T=None, T0=None, units=None, a=None,
just_return_a=False, warn=True):
if units is None:
K = 1
m = 1
kg = 1
else:
K = units.Kelvin
m = units.meter
kg = units.kilogram
if T is None:
T = 298.15*K
m3 = m**3
if a is None:
a = (-3.983035*K, # C
301.797*K, # C
522528.9*K*K, # C**2
69.34881*K, # C
999.974950*kg/m3)
if just_return_a:
return a
if T0 is None:
T0 = 273.15*K
t = T - T0
if warn and (_any(t < 0*K) or _any(t > 40*K)):
warnings.warn("Temperature is outside range (0-40 degC)")
return a[4]*(1-((t + a[0])**2*(t + a[1]))/(a[2]*(t + a[3])))
def celsius_to_kelvin(t_celsius):
return t_celsius+273.15
def tailwater(h_t, temp_water, p_atm):
t_water_kelvin = celsius_to_kelvin(temp_water)
rho = water_density(t_water_kelvin)
g = gravity
return (1+(rho*g*h_t)/(2*p_atm))
def tailwater_tdg(q_s,q_p,x, h_t,temp_water,p_atm,tdg_f):
b1,b2,b3=x
A = ((q_s+b1*q_s+b3)/(q_s+q_p))
B = tailwater(h_t, temp_water, p_atm)
C = ((q_p-b1*q_s-b3)/(q_s+q_p))
return 100*A*B*b2+tdg_f*C
def q_ge(q_p,q_s,b1,b3):
return min(q_p,(b1*q_s+b3))
def rmse(y, y_hat):
return sqrt(mean_squared_error(y,y_hat))
def objective(x):
y_hat = df.apply(lambda r:tailwater_tdg(q_s=r['q_s'],q_p=r['q_p'],x=x, h_t=r['h_t'],temp_water=r['temp_water'],p_atm=r['p_atm'],tdg_f=r['tdg_f']), axis = 1)
y = df['tdg_tw']
return rmse(y, y_hat)
#constraints and bounds for optimization model. See reference for more information
def constraint_q_ge(x):
b1,b2,b3=x
power_flow = df.apply(lambda x:q_ge(x['q_p'],x['q_s'],b1,b3), axis = 1)
const = power_flow<0
return -const.sum()
constraints = [{'type':'ineq', 'fun':constraint_q_ge}]
bounds = [(-500,10000),(.00001,10000),(-500,10000)]
x0=[1,1,1]
sol = minimize(objective, x0, method = 'SLSQP',constraints = constraints, bounds = bounds,options={'disp':True, 'maxiter':100})
I want to use FCBF technique from github https://github.com/shiralkarprashant/FCBF
The problem i faced is that i am working on Python 3 ad the module is implemented for python 2 users . I got the following error that describes that name 'xrange' is not defined because i work on python3
i think to solve the issue just by changing range by xrange
from FCBF_module import FCBF, FCBFK, FCBFiP, get_i
from sklearn.datasets import load_digits
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
import time
from sklearn.grid_search import GridSearchCV
classifiers = [('DecisionTree', DecisionTreeClassifier(), {'max_depth' : [5, 10, 15]}),
('LogisticRegression', LogisticRegression(), {'C' : [0.1, 1, 10]})]
n_features = dataCAD.shape[1]
npieces = get_i(n_features)
The module code contains just one xrange occurence i tried to change it by range but it does not solve the problem:
# -*- coding: utf-8 -*-
import numpy as np
def count_vals(x):
vals = np.unique(x)
occ = np.zeros(shape = vals.shape)
for i in range(vals.size):
occ[i] = np.sum(x == vals[i])
return occ
def entropy(x):
n = float(x.shape[0])
ocurrence = count_vals(x)
px = ocurrence / n
return -1* np.sum(px*np.log2(px))
def symmetricalUncertain(x,y):
n = float(y.shape[0])
vals = np.unique(y)
# Computing Entropy for the feature x.
Hx = entropy(x)
# Computing Entropy for the feature y.
Hy = entropy(y)
#Computing Joint entropy between x and y.
partial = np.zeros(shape = (vals.shape[0]))
for i in range(vals.shape[0]):
partial[i] = entropy(x[y == vals[i]])
partial[np.isnan(partial)==1] = 0
py = count_vals(y).astype(dtype = 'float64') / n
Hxy = np.sum(py[py > 0]*partial)
IG = Hx-Hxy
return 2*IG/(Hx+Hy)
def suGroup(x, n):
m = x.shape[0]
x = np.reshape(x, (n,m/n)).T
m = x.shape[1]
SU_matrix = np.zeros(shape = (m,m))
for j in range(m-1):
x2 = x[:,j+1::]
y = x[:,j]
temp = np.apply_along_axis(symmetricalUncertain, 0, x2, y)
for k in range(temp.shape[0]):
SU_matrix[j,j+1::] = temp
SU_matrix[j+1::,j] = temp
return 1/float(m-1)*np.sum(SU_matrix, axis = 1)
def isprime(a):
return all(a % i for i in xrange(2, a))
"""
get
"""
def get_i(a):
if isprime(a):
a -= 1
return filter(lambda x: a % x == 0, range(2,a))
"""
FCBF - Fast Correlation Based Filter
L. Yu and H. Liu. Feature Selection for High‐Dimensional Data: A Fast Correlation‐Based Filter Solution.
In Proceedings of The Twentieth International Conference on Machine Leaning (ICML‐03), 856‐863.
Washington, D.C., August 21‐24, 2003.
"""
class FCBF:
idx_sel = []
def __init__(self, th = 0.01):
'''
Parameters
---------------
th = The initial threshold
'''
self.th = th
def fit(self, x, y):
'''
This function executes FCBF algorithm and saves indexes
of selected features in self.idx_sel
Parameters
---------------
x = dataset [NxM]
y = label [Nx1]
'''
self.idx_sel = []
"""
First Stage: Computing the SU for each feature with the response.
"""
SU_vec = np.apply_along_axis(symmetricalUncertain, 0, x, y)
SU_list = SU_vec[SU_vec > self.th]
SU_list[::-1].sort()
m = x[:,SU_vec > self.th].shape
x_sorted = np.zeros(shape = m)
for i in range(m[1]):
ind = np.argmax(SU_vec)
SU_vec[ind] = 0
x_sorted[:,i] = x[:,ind].copy()
self.idx_sel.append(ind)
"""
Second Stage: Identify relationships between feature to remove redundancy.
"""
j = 0
while True:
"""
Stopping Criteria:The search finishes
"""
if j >= x_sorted.shape[1]: break
y = x_sorted[:,j].copy()
x_list = x_sorted[:,j+1:].copy()
if x_list.shape[1] == 0: break
SU_list_2 = SU_list[j+1:]
SU_x = np.apply_along_axis(symmetricalUncertain, 0,
x_list, y)
comp_SU = SU_x >= SU_list_2
to_remove = np.where(comp_SU)[0] + j + 1
if to_remove.size > 0:
x_sorted = np.delete(x_sorted, to_remove, axis = 1)
SU_list = np.delete(SU_list, to_remove, axis = 0)
to_remove.sort()
for r in reversed(to_remove):
self.idx_sel.remove(self.idx_sel[r])
j = j + 1
def fit_transform(self, x, y):
'''
This function fits the feature selection
algorithm and returns the resulting subset.
Parameters
---------------
x = dataset [NxM]
y = label [Nx1]
'''
self.fit(x, y)
return x[:,self.idx_sel]
def transform(self, x):
'''
This function applies the selection
to the vector x.
Parameters
---------------
x = dataset [NxM]
'''
return x[:, self.idx_sel]
"""
FCBF# - Fast Correlation Based Filter
B. Senliol, G. Gulgezen, et al. Fast Correlation Based Filter (FCBF) with a Different Search Strategy.
In Computer and Information Sciences (ISCIS ‘08) 23rd International Symposium on, pages 1‐4.
Istanbul, October 27‐29, 2008.
"""
class FCBFK(FCBF):
idx_sel = []
def __init__(self, k = 10):
'''
Parameters
---------------
k = Number of features to include in the
subset.
'''
self.k = k
def fit(self, x, y):
'''
This function executes FCBFK algorithm and saves indexes
of selected features in self.idx_sel
Parameters
---------------
x = dataset [NxM]
y = label [Nx1]
'''
self.idx_sel = []
"""
First Stage: Computing the SU for each feature with the response.
"""
SU_vec = np.apply_along_axis(symmetricalUncertain, 0, x, y)
SU_list = SU_vec[SU_vec > 0]
SU_list[::-1].sort()
m = x[:,SU_vec > 0].shape
x_sorted = np.zeros(shape = m)
for i in range(m[1]):
ind = np.argmax(SU_vec)
SU_vec[ind] = 0
x_sorted[:,i] = x[:,ind].copy()
self.idx_sel.append(ind)
"""
Second Stage: Identify relationships between features to remove redundancy with stopping
criteria (features in x_best == k).
"""
j = 0
while True:
y = x_sorted[:,j].copy()
SU_list_2 = SU_list[j+1:]
x_list = x_sorted[:,j+1:].copy()
"""
Stopping Criteria:The search finishes
"""
if x_list.shape[1] == 0: break
SU_x = np.apply_along_axis(symmetricalUncertain, 0,
x_list, y)
comp_SU = SU_x >= SU_list_2
to_remove = np.where(comp_SU)[0] + j + 1
if to_remove.size > 0 and x.shape[1] > self.k:
for i in reversed(to_remove):
x_sorted = np.delete(x_sorted, i, axis = 1)
SU_list = np.delete(SU_list, i, axis = 0)
self.idx_sel.remove(self.idx_sel[i])
if x_sorted.shape[1] == self.k: break
if x_list.shape[1] == 1 or x_sorted.shape[1] == self.k:
break
j = j + 1
if len(self.idx_sel) > self.k:
self.idx_sel = self.idx_sel[:self.k]
"""
FCBFiP - Fast Correlation Based Filter in Pieces
"""
class FCBFiP(FCBF):
idx_sel = []
def __init__(self, k = 10, npieces = 2):
'''
Parameters
---------------
k = Number of features to include in the
subset.
npieces = Number of pieces to divide the
feature space.
'''
self.k = k
self.npieces = npieces
def fit(self, x, y):
'''
This function executes FCBF algorithm and saves indexes
of selected features in self.idx_sel
Parameters
---------------
x = dataset [NxM]
y = label [Nx1]
'''
"""
First Stage: Computing the SU for each feature with the response. We sort the
features. When we have a prime number of features we remove the last one from the
sorted features list.
"""
m = x.shape
nfeaturesPieces = int(m[1] / float(self.npieces))
SU_vec = np.apply_along_axis(symmetricalUncertain, 0, x, y)
x_sorted = np.zeros(shape = m, dtype = 'float64')
idx_sorted = np.zeros(shape = m[1], dtype = 'int64')
for i in range(m[1]):
ind = np.argmax(SU_vec)
SU_vec[ind] = -1
idx_sorted[i]= ind
x_sorted[:,i] = x[:,ind].copy()
if isprime(m[1]):
x_sorted = np.delete(x_sorted, m[1]-1, axis = 1 )
ind_prime = idx_sorted[m[1]-1]
idx_sorted = np.delete(idx_sorted, m[1]-1)
#m = x_sorted.shape
"""
Second Stage: Identify relationships between features into its vecinity
to remove redundancy with stopping criteria (features in x_best == k).
"""
x_2d = np.reshape(x_sorted.T, (self.npieces, nfeaturesPieces*m[0])).T
SU_x = np.apply_along_axis(suGroup, 0, x_2d, nfeaturesPieces)
SU_x = np.reshape(SU_x.T, (self.npieces*nfeaturesPieces,))
idx_sorted2 = np.zeros(shape = idx_sorted.shape, dtype = 'int64')
SU_x[np.isnan(SU_x)] = 1
for i in range(idx_sorted.shape[0]):
ind = np.argmin(SU_x)
idx_sorted2[i] = idx_sorted[ind]
SU_x[ind] = 10
"""
Scoring step
"""
self.scores = np.zeros(shape = m[1], dtype = 'int64')
for i in range(m[1]):
if i in idx_sorted:
self.scores[i] = np.argwhere(i == idx_sorted) + np.argwhere(i == idx_sorted2)
if isprime(m[1]):
self.scores[ind_prime] = 2*m[1]
self.set_k(self.k)
def set_k(self, k):
self.k = k
scores_temp = -1*self.scores
self.idx_sel = np.zeros(shape = self.k, dtype = 'int64')
for i in range(self.k):
ind = np.argmax(scores_temp)
scores_temp[ind] = -100000000
self.idx_sel[i] = ind
Try using 2to3 package for python to automatically convert the files. Worked for me!
https://docs.python.org/2/library/2to3.html
I am trying to parallelize a loop that is very costly.
Here is the code:
import numpy as np
class em:
def __init__(self, k, x, iterations):
self.k = k
self.x = x
self.iterations = iterations
self.n = self.x.shape[0]
self.pi = np.array([1 / self.k for _ in range(self.k)])
self.z = np.ndarray(shape=(self.k, self.n))
def fit(self):
for i in range(self.iterations):
print('iteration', i)
self.expectation_step()
self.maximization_step()
def expectation_step(self):
# update z
pass
def maximization_step(self):
# update pi and parameters
pass
class bmm_em(em):
def __init__(self, k, x, iterations=1000, d=784):
super().__init__(k, x, iterations)
self.d = d
self.mu = np.random.rand(self.k, self.d)
for m in range(self.k):
normalization_factor = 0.0
for i in range(self.d):
self.mu[m,i] = np.random.random() * 0.5 + 0.25
normalization_factor += self.mu[m, i]
for i in range(self.d):
self.mu[m,i] /= normalization_factor
def expectation_step(self):
prod = np.zeros(self.k)
for n in range(self.n):
for m in range(self.k):
t = self.pi[m]
t *= np.prod(np.power(self.mu[m], self.x[n]))
t *= np.prod(np.power((1.0 - self.mu[m]), (1.0 - self.x[n])))
prod[m] = t
s = sum(prod)
for n in range(self.n):
for m in range(self.k):
if s > 0.0:
self.z[m,n] = prod[m] / s
else:
self.z[m,n] = prod[m] / float(self.k)
def maximization_step(self):
for m in range(self.k):
n_m = np.sum(self.z[m])
self.pi[m] = n_m / self.n # update pi
self.mu[m] = 0
for i in range(self.n):
self.mu[m] += self.z[m,i] * self.x[i].T
self.mu[m] /= n_m
The very costly part is the first loop in bmm_em.expectation_step.
I looked at the joblib module but couldn't figure out how I can rewrite my code to make it work.
Can anyone give me a hint? :)
As #Sergei noted, the use of numpy is preferred here.
Here is what my code became, it's way way faster
def _log_support(self):
pi = self.pi; mu = self.mu
log_support = np.ndarray(shape=(self.k, self.n))
for k in range(self.k):
log_support[k, :] = np.log(pi[k]) \
+ np.sum(self.x * np.log(mu[k, :].clip(min=1e-20)), 1) \
+ np.sum(self.xc * np.log((1 - mu[k, :]).clip(min=1e-20)), 1)
return log_support
def expectation_step(self, log_support):
log_normalisation = np.logaddexp.reduce(log_support, axis=0)
log_responsibilities = log_support - log_normalisation
self.z = np.exp(log_responsibilities)
With the code below, I'm attempting to implement the Levy-Khintchine formula (https://en.wikipedia.org/wiki/L%C3%A9vy_process#L.C3.A9vy.E2.80.93Khintchine_representation). In the limit of no jumps, the Levy-Khitchine formula reduces to the multivariate normal distribution. My code uses the (multi-dimensional) trapeziodal integration rule (http://mathfaculty.fullerton.edu/mathews/n2003/SimpsonsRule2DMod.html) to approximate the Fourier transform of the characteristic function as a discrete Fourier transform. For the 1-dimensional case, the code works perfectly. For the 2-D case, I can't find what I'm doing wrong.
Does anyone have example numpy.fftn code that correctly implements multivariate_normal pdf?
class LevyKhintchine:
def __init__(self, mean, cov, jump_measure):
self.mean = mean
self.cov = cov
self.jump_measure = jump_measure
self.factors = mean.shape[0]
def logCF(self, k):
rolled = Roll(k)
out = np.empty(Shape(k))
return (self.jump_measure(k) -
Dot(rolled, self.cov, rolled, out)*0.5 +
np.sum(np.multiply(Roll(k), self.mean), axis=-1)*1j)
def pdf_grid(self, J):
diag = np.diagonal(self.cov)
tmp = np.pi*2/J
dk = np.sqrt(tmp/diag)
dx = np.sqrt(tmp*diag)
k = Grid(np.zeros(self.factors), dk, J)
x0 = self.mean - dx*J*0.5
f = np.exp(self.logCF(k) - Coef(dk, x0, J)*1j)
for n in range(self.factors):
f[ 0] *= 0.5
f[-1] *= 0.5
f = np.rollaxis(f, 0, factors)
pdf = np.fft.fftn(f)
return Grid(x0, dx, J), pdf.real*(np.product(dk)/np.pi)
def Grid(left, width, J):
def Slice(slices, j):
slices.append(slice(left[j], left[j] + width[j]*(J-1), 1j*J))
return slices
slices = reduce(Slice, range(len(left)), [])
return np.mgrid[slices]
def Shape(grid):
return np.asarray(grid).shape[1:]
def Roll(grid):
grid = np.asarray(grid)
try:
rolled = np.rollaxis(grid, 0, len(grid)+1)
except ValueError:
rolled = grid
return rolled
def Dot(x, cov, y, out): #x & y are "rolled"
for j in np.ndindex(out.shape):
out[j] = np.dot(x[j].T, np.dot(cov, y[j]))
return out
def Coef(dks, x0s, J):
factors = len(dks)
coef = np.zeros((J,)*factors)
for n, (dk, x0) in enumerate(zip(dks, x0s)):
shape = np.ones(factors, dtype=int)
shape[n] = J
coef += np.arange(J).reshape(shape)*(dk*x0)
return coef
Here's the tests:
from scipy.stats import multivariate_normal
J = 64
factors = 1
mean = np.full((factors,), -1)
cov = np.identity(factors)
rv = LevyKhintchine(mean, cov, lambda k: 0)
rv0 = multivariate_normal(mean, cov)
x, pdf = rv.pdf_grid(J)
plt.plot(x[0], pdf, x[0], rv0.pdf(Roll(x)))
factors = 2
mean = np.full((factors,), 5)
cov = np.identity(factors)
rv = LevyKhintchine(mean, cov, lambda k: 0)
x, pdf = rv.pdf_grid(J)
rv0 = multivariate_normal(mean, cov)
fig2 = plt.figure()
ax2 = fig2.add_subplot(111)
ax2.contourf(x[0], x[1], pdf)
fig3 = plt.figure()
ax3 = fig3.add_subplot(111)
ax3.contourf(x[0], x[1], rv0.pdf(Roll(x)))
I figured it out: in 1-d I can get away with integrating over only positive wave numbers k, in higher dimensions I cannot.
Here's the corrected code:
class LevyKhintchine:
def __init__(self, mean, cov, jump_measure):
self.mean = mean
self.cov = cov
self.jump_measure = jump_measure
self.factors = mean.shape[0]
def logCF(self, k):
rolled = Roll(k)
out = np.empty(Shape(k))
return (self.jump_measure(k) -
Dot(rolled, self.cov, rolled, out)*0.5 +
np.sum(np.multiply(Roll(k), self.mean), axis=-1)*1j)
def pdf_grid(self, J):
diag = np.diagonal(self.cov)
tmp = np.pi*2/J
dk = np.sqrt(tmp/diag)
dx = np.sqrt(tmp*diag)
k0 = -0.5*dk*J
x0 = -0.5*dx*J + self.mean
k = Grid(k0, dk, J)
x = Grid(x0, dx, J)
f = np.exp(-1j*Coef(dk, x0, J) + self.logCF(k))
for n in range(self.factors):
f[ 0] *= 0.5
f[-1] *= 0.5
f = np.rollaxis(f, 0, factors)
c = ((0.5/np.pi)**self.factors*np.product(dk)*np.exp(-1j*np.dot(k0, x0)))
pdf = np.fft.fftn(f)*np.exp(-1j*Coef(k0, dx, J))*c
return x, pdf.real
def Grid(left, width, J):
def Slice(slices, j):
slices.append(slice(left[j], left[j] + width[j]*(J-1), 1j*J))
return slices
slices = reduce(Slice, range(len(left)), [])
return np.mgrid[slices]
def Shape(grid):
return np.asarray(grid).shape[1:]
def Roll(grid):
grid = np.asarray(grid)
try:
rolled = np.rollaxis(grid, 0, len(grid)+1)
except ValueError:
rolled = grid
return rolled
def Dot(x, cov, y, out): #x & y are "rolled"
for j in np.ndindex(out.shape):
out[j] = np.dot(x[j].T, np.dot(cov, y[j]))
return out
def Coef(dks, x0s, J):
factors = len(dks)
coef = np.zeros((J,)*factors)
for n, (dk, x0) in enumerate(zip(dks, x0s)):
shape = np.ones(factors, dtype=int)
shape[n] = J
coef += np.arange(J).reshape(shape)*(dk*x0)
return coef
Here's the tests:
from scipy.stats import multivariate_normal
J = 32
for factors in range(1, 4):
mean = np.full((factors,), -1)
cov = np.identity(factors)
rv = LevyKhintchine(mean, cov, lambda k: 0)
rv0 = multivariate_normal(mean, cov)
x, pdf = rv.pdf_grid(J)
pdf0 = rv0.pdf(Roll(x))
print np.allclose(pdf, pdf0)
True
True
True