Python - Parallelisation of a loop - python

I am trying to parallelize a loop that is very costly.
Here is the code:
import numpy as np
class em:
def __init__(self, k, x, iterations):
self.k = k
self.x = x
self.iterations = iterations
self.n = self.x.shape[0]
self.pi = np.array([1 / self.k for _ in range(self.k)])
self.z = np.ndarray(shape=(self.k, self.n))
def fit(self):
for i in range(self.iterations):
print('iteration', i)
self.expectation_step()
self.maximization_step()
def expectation_step(self):
# update z
pass
def maximization_step(self):
# update pi and parameters
pass
class bmm_em(em):
def __init__(self, k, x, iterations=1000, d=784):
super().__init__(k, x, iterations)
self.d = d
self.mu = np.random.rand(self.k, self.d)
for m in range(self.k):
normalization_factor = 0.0
for i in range(self.d):
self.mu[m,i] = np.random.random() * 0.5 + 0.25
normalization_factor += self.mu[m, i]
for i in range(self.d):
self.mu[m,i] /= normalization_factor
def expectation_step(self):
prod = np.zeros(self.k)
for n in range(self.n):
for m in range(self.k):
t = self.pi[m]
t *= np.prod(np.power(self.mu[m], self.x[n]))
t *= np.prod(np.power((1.0 - self.mu[m]), (1.0 - self.x[n])))
prod[m] = t
s = sum(prod)
for n in range(self.n):
for m in range(self.k):
if s > 0.0:
self.z[m,n] = prod[m] / s
else:
self.z[m,n] = prod[m] / float(self.k)
def maximization_step(self):
for m in range(self.k):
n_m = np.sum(self.z[m])
self.pi[m] = n_m / self.n # update pi
self.mu[m] = 0
for i in range(self.n):
self.mu[m] += self.z[m,i] * self.x[i].T
self.mu[m] /= n_m
The very costly part is the first loop in bmm_em.expectation_step.
I looked at the joblib module but couldn't figure out how I can rewrite my code to make it work.
Can anyone give me a hint? :)

As #Sergei noted, the use of numpy is preferred here.
Here is what my code became, it's way way faster
def _log_support(self):
pi = self.pi; mu = self.mu
log_support = np.ndarray(shape=(self.k, self.n))
for k in range(self.k):
log_support[k, :] = np.log(pi[k]) \
+ np.sum(self.x * np.log(mu[k, :].clip(min=1e-20)), 1) \
+ np.sum(self.xc * np.log((1 - mu[k, :]).clip(min=1e-20)), 1)
return log_support
def expectation_step(self, log_support):
log_normalisation = np.logaddexp.reduce(log_support, axis=0)
log_responsibilities = log_support - log_normalisation
self.z = np.exp(log_responsibilities)

Related

SciPy Optimise minimise error - challenge to solve

How do I solve this error?
TypeError: NumPy boolean subtract, the `-` operator, is not supported, use the bitwise_xor, the `^` operator, or the logical_xor function instead.
I have programmed an optimizing program that must minimize the cost of a wall design. The wall is based on 3 parameters, x, k and m. There are constraints to the sizes of x, k and m as shown. Another constraint is that z (or deflection) must be kept under 100mm. The equation for deflection changes based on a certain t (or time) at which the blast wall is experiencing the blast. If t is below a certain time value which is calculated dependent on, x, k and m the equation is as shown. If t is above the same certain time value, the equation for z changes.
Here is the programming... Please help many thanks :)
import numpy as np
from numpy import linspace
from math import cos
from math import sin
from scipy.optimize import minimize
#Function for minimising
def calcCost(c):
k = c[0]
m = c[1]
x = c[2]
Cost = (900 + 825*k**2 - 1725) + (10*m - 200) + ((2400*x**2)/4)
return Cost
#Objective function
def objective(c):
return calcCost(c)
#Defining Variables
def calck(c):
k = c[0]
k=k
k.resize(12,)
return k
def calcm(c):
m = c[1]
m=m
m.resize(12,)
return m
def calcx(c):
x = c[2]
x=x
x.resize(12,)
return x
def calcz(c):
k = c[0]
x = c[1]
m = c[2]
l = linspace(0,140,141)
for t in l:
if t <= ((20 - 0.12*x**2 + 4.2*x)/1000):
deflection = ((((1000+9*x**2-183*x)*1000)/k)*(1-cos(t*((k/m)**0.5))) + (((1000+9*x**2-183*x)*1000)/k*((20 - 0.12*x**2 + 4.2*x)/1000))*((sin(t*((k/m)**0.5))/((k/m)**0.5))-t))*1000
else:
deflection = ((((1000+9*x**2-183*x)*1000)/(k*((k/m)**0.5)*((20 - 0.12*x**2 + 4.2*x)/1000)))*(sin(((k/m)**0.5)*t))-(sin(((k/m)**0.5)*(t-((20 - 0.12*x**2 + 4.2*x)/1000))))-(((1000+9*x**2-183*x)*1000)/k)*cos(((k/m)**0.5)*t))*1000
deflection.resize(12,)
return deflection
#Constraint functions
def kconstraint1(c):
k = c[0]
return k-(1*10**6) >= 0
def kconstraint2(c):
k = c[0]
return k-(7*10**6) <= 0
def mconstraint1(c):
m = c[0]
return m-200 >= 0
def mconstraint2(c):
m = c[0]
return m-1200 <= 0
def xconstraint1(c):
x = c[0]
return x >= 0
def xconstraint2(c):
x = c[0]
return x <= 10
def zconstraint1(c):
k = c[0]
x = c[1]
m = c[2]
l = linspace(0,140,141)
for t in l:
if t <= ((20 - 0.12*x**2 + 4.2*x)/1000):
deflection = ((((1000+9*x**2-183*x)*1000)/k)*(1-cos(t*((k/m)**0.5))) + (((1000+9*x**2-183*x)*1000)/k*((20 - 0.12*x**2 + 4.2*x)/1000))*((sin(t*((k/m)**0.5))/((k/m)**0.5))-t))*1000
else:
deflection = ((((1000+9*x**2-183*x)*1000)/(k*((k/m)**0.5)*((20 - 0.12*x**2 + 4.2*x)/1000)))*(sin(((k/m)**0.5)*t))-(sin(((k/m)**0.5)*(t-((20 - 0.12*x**2 + 4.2*x)/1000))))-(((1000+9*x**2-183*x)*1000)/k)*cos(((k/m)**0.5)*t))*1000
return deflection <= 99.99999999
b = (0.5,1)
be = (0.5,10)
bb = (0.1,2.0)
bnds = (b,be,bb,bb)
con1 = ({'type':'ineq','fun':kconstraint1})
con2 = ({'type':'ineq','fun':kconstraint2})
con3 = ({'type':'ineq','fun':mconstraint1})
con4 = ({'type':'ineq','fun':mconstraint2})
con5 = ({'type':'ineq','fun':xconstraint1})
con6 = ({'type':'ineq','fun':xconstraint2})
con7 = ({'type':'ineq','fun':zconstraint1})
cons = [con1,con2,con3,con4,con5,con6,con7]
xGUESS = 5
kGUESS = 3*10**6
mGUESS = 700
zGUESS = 90
x0 = np.array([xGUESS,kGUESS,mGUESS,zGUESS])
sol = minimize(objective,x0,method='SLSQP',bounds=bnds,constraints=cons,options={'disp':True})
xOpt = sol.x
CostOPT = sol.fun
kOPT = calck(xOpt)
xOPT = calcx(xOpt)
mOPT = calcm(xOpt)
zOPT = calcz(xOpt)
print(str(CostOPT))
print(str(calcx))
print(str(calcm))
print(str(calck))
print(str(calcz))

How to call a specific return of a function in Python?

I am trying to write a numerical integration code. I have a function GetAcc, and it returns two acceleration values which are x and y compononets. How can I call first and second return values at the different parts?
This is my function.
def GetAcc(xPositions, yPositions):
global xAcc
global yAcc
xAcc = np.zeros(xPositions.size, xPositions.size)
yAcc = np.zeros(yPositions.size, yPositions.size)
for i in range(0, xPositions.size-1):
for j in range(i+1, xPositions.size):
r_x = xPositions[j] - xPositions[i]
r_y = yPositions[j] - yPositions[i]
rmag = np.sqrt(r_x*r_x + r_y*r_y)
force_scalar = GetLJForce(rmag, 0.84, 2.56)
force_x = force_scalar * r_x / rmag
force_y = force_scalar * r_y / rmag
xAcc[i,j] = force_x / m
xAcc[j,i] = - force_x / m
yAcc[i,j] = force_y / m
yAcc[j,i] = - force_y / m
return np.sum(xAcc, axis=0), np.sum(yAcc, axis=0)
Since I couldn't find a solution, I tried this:
def GetxAcc(xPositions):
a, b = GetAcc(xPositions, yPositions)
return a
def GetyAcc(xPositions):
a, b = GetAcc(xPositions, yPositions)
return b
And I need to call the x-component which is np.sum(xAcc, axis=0) here.
def RunMDx(dt, number_of_steps, x):
global xPositions
xPositions = np.zeros((number_of_steps, 3))
v_x = 0
a_x = GetxAcc(x)
for i in range(number_of_steps):
x = UpdatexPos(x, v_x, a_x, dt)
a1_x = GetxAcc(x)
v_x = UpdatexVel(v_x, a_x, a1_x, dt)
a_x = np.array(a1_x)
xPositions[i, :] = x
return xPositions
Thanks!
You could just do this:
a_x = GetAcc(xPositions, yPositions)[0]

Object values not being reset in python function

Here is my code. In the calculateOptimalLambda() function, I am attempting to declare a copy of n and
store it as m, remove one point from m, and make some calculations and a graph. Then, the loop should
restart, make a fresh copy of m, remove the next point, and so on.
However, when in the next iteration
of the loop, a point has been removed. Eventually, I run out of points to remove, and I get an error.
How do I declare a fresh copy of m so I can remove the next point?
import numpy as np
from matplotlib import pyplot as plt
class Data:
def __init__(self, points, sigma, lamda):
self.points = points
self.sigma = sigma
self.sample = np.random.uniform(-1,1, (points, 2))
self.transformedData = np.ones((points, 5))
self.weight = np.zeros((5,1))
self.lamda = lamda
def changeLamda(self,x):
self.lamda = x
def removePoint(self, x):
self.points = self.points - 1
self.sample = np.delete(self.sample, x, 0)
self.transformedData = np.delete(self.transformedData, x, 0)
def transformedFunction(self, x):
transformedData = np.ones((1, 5))
transformedData[0,1] = x
transformedData[0,2] = 0.5 * (3*x**2 -1)
transformedData[0,3]= 0.5 * (5*x**3 - 3*x)
transformedData[0,4] = 0.125 * (35*x**4 -30*x**2 + 3)
return np.dot(transformedData, self.weight)
def setY(self):
for i in range(len(self.sample[0:,0])):
self.sample[i,1] = np.random.normal(0, self.sigma) + self.sample[i,0]**2
def transform(self):
for i in range(len(self.sample[0:,0])):
self.transformedData[i,1] = self.sample[i,0]
self.transformedData[i,2] = 0.5 * (3*self.sample[i,0]**2 -1)
self.transformedData[i,3]= 0.5 * (5*self.sample[i,0]**3 - 3*self.sample[i,0])
self.transformedData[i,4] = 0.125 * (35*self.sample[i,0]**4 -30*self.sample[i,0]**2 + 3)
def calculateWeight(self):
z = n.transformedData
zProd = np.linalg.inv(np.matmul(np.transpose(z), z) + np.identity(5)*self.lamda)
next1 = np.matmul(zProd,np.transpose(z))
a = self.sample[0:,1]
a = a.reshape((-1, 1))
print(a)
self.weight = np.matmul(next1,a)
def calculateError(self):
error= (np.matmul(self.transformedData, self.weight) - self.sample[1,0:])
return error/self.points
def calculateOptimalLambda(n, L):
a = 0
for i in range(len(L)):
n.changeLamda(L[i])
for x in range(n.getPoints()):
a+=1
plt.subplot(4,5,a)
m = n
m.removePoint(x)
m.calculateWeight()
weight = m.getWeight()
error = m.calculateError()
twoD_plot(m)
print(error)
def twoD_plot(n):
t = np.linspace(-1, 1, 400)
x = np.square(t)
plt.plot(t,x,'b')
error = 0
y = x
for i in range(len(t)):
y[i] = n.transformedFunction(t[i])
error += (y[i] - t[i]**2)**2
"""print(error/len(t))"""
plt.plot(t,y,'r')
plt.scatter(n.getSample()[0:,0],n.getSample()[0:,1], c = 'g', marker = 'o')
n = Data(5,0.1,0)
n.setY()
n.transform()
n.calculateWeight()
L = [1, 0.01, 0.00001, 0]
calculateOptimalLambda(n, L)
plt.show()

TypeError when using a method from a class - python

I have an improved kmeans algorithm (KPlusPlus) that builds on the class kmeans. Detk is another class inherited from KPlusPlus.
The objective of the KPlusPlus class is to find out the optimal seeding for finding the kmeans centroids (Source)
Detk calculates the gap statistic to find the optimal number of clusters. I have found this code from here
# kmeans class
class KMeans():
def __init__(self, K, X=None, N=0):
self.K = K
if X == None:
if N == 0:
raise Exception("If no data is provided, \
a parameter N (number of points) is needed")
else:
self.N = N
self.X = self._init_board_gauss(N, K)
else:
self.X = X
self.N = len(X)
self.mu = None
self.clusters = None
self.method = None
def _init_board_gauss(self, N, k):
n = float(N)/k
X = []
for i in range(k):
c = (random.uniform(-1,1), random.uniform(-1,1))
s = random.uniform(0.05,0.15)
x = []
while len(x) < n:
a,b = np.array([np.random.normal(c[0],s),np.random.normal(c[1],s)])
# Continue drawing points from the distribution in the range [-1,1]
if abs(a) and abs(b)<1:
x.append([a,b])
X.extend(x)
X = np.array(X)[:N]
return X
def plot_board(self):
X = self.X
fig = plt.figure(figsize=(5,5))
plt.xlim(-1,1)
plt.ylim(-1,1)
if self.mu and self.clusters:
mu = self.mu
clus = self.clusters
K = self.K
for m, clu in clus.items():
cs = cm.spectral(1.*m/self.K)
plt.plot(mu[m][0], mu[m][1], 'o', marker='*', \
markersize=12, color=cs)
plt.plot(zip(*clus[m])[0], zip(*clus[m])[1], '.', \
markersize=8, color=cs, alpha=0.5)
else:
plt.plot(zip(*X)[0], zip(*X)[1], '.', alpha=0.5)
if self.method == '++':
tit = 'K-means++'
else:
tit = 'K-means with random initialization'
pars = 'N=%s, K=%s' % (str(self.N), str(self.K))
plt.title('\n'.join([pars, tit]), fontsize=16)
plt.savefig('kpp_N%s_K%s.png' % (str(self.N), str(self.K)), \
bbox_inches='tight', dpi=200)
def _cluster_points(self):
mu = self.mu
clusters = {}
for x in self.X:
bestmukey = min([(i[0], np.linalg.norm(x-mu[i[0]])) \
for i in enumerate(mu)], key=lambda t:t[1])[0]
try:
clusters[bestmukey].append(x)
except KeyError:
clusters[bestmukey] = [x]
self.clusters = clusters
def _reevaluate_centers(self):
clusters = self.clusters
newmu = []
keys = sorted(self.clusters.keys())
for k in keys:
newmu.append(np.mean(clusters[k], axis = 0))
self.mu = newmu
def _has_converged(self):
K = len(self.oldmu)
return(set([tuple(a) for a in self.mu]) == \
set([tuple(a) for a in self.oldmu])\
and len(set([tuple(a) for a in self.mu])) == K)
def find_centers(self,K, method='random'):
self.method = method
X = self.X
K = self.K
self.oldmu = random.sample(X, K)
if method != '++':
# Initialize to K random centers
self.mu = random.sample(X, K)
while not self._has_converged():
self.oldmu = self.mu
# Assign all points in X to clusters
self._cluster_points()
# Reevaluate centers
self._reevaluate_centers()
The KPlusPlus class inherits from kmeans to find the optimal seeding
class KPlusPlus(KMeans):
def _dist_from_centers(self):
cent = self.mu
X = self.X
D2 = np.array([min([np.linalg.norm(x-c)**2 for c in cent]) for x in X])
self.D2 = D2
def _choose_next_center(self):
self.probs = self.D2/self.D2.sum()
self.cumprobs = self.probs.cumsum()
r = random.random()
ind = np.where(self.cumprobs >= r)[0][0]
return(self.X[ind])
def init_centers(self,K):
self.K = K
self.mu = random.sample(self.X, 1)
while len(self.mu) < self.K:
self._dist_from_centers()
self.mu.append(self._choose_next_center())
def plot_init_centers(self):
X = self.X
fig = plt.figure(figsize=(5,5))
plt.xlim(-1,1)
plt.ylim(-1,1)
plt.plot(zip(*X)[0], zip(*X)[1], '.', alpha=0.5)
plt.plot(zip(*self.mu)[0], zip(*self.mu)[1], 'ro')
plt.savefig('kpp_init_N%s_K%s.png' % (str(self.N),str(self.K)), \
bbox_inches='tight', dpi=200)
The class Detk inherits from KPlusPlus to find the optmal number of clusters based on gap statistic
class DetK(KPlusPlus):
def fK(self, thisk, Skm1=0):
X = self.X
Nd = len(X[0])
a = lambda k, Nd: 1 - 3/(4*Nd) if k == 2 else a(k-1, Nd) + (1-a(k-1, Nd))/6
self.find_centers(thisk, method='++')
mu, clusters = self.mu, self.clusters
Sk = sum([np.linalg.norm(mu[i]-c)**2 \
for i in range(thisk) for c in clusters[i]])
if thisk == 1:
fs = 1
elif Skm1 == 0:
fs = 1
else:
fs = Sk/(a(thisk,Nd)*Skm1)
return fs, Sk
def _bounding_box(self):
X = self.X
xmin, xmax = min(X,key=lambda a:a[0])[0], max(X,key=lambda a:a[0])[0]
ymin, ymax = min(X,key=lambda a:a[1])[1], max(X,key=lambda a:a[1])[1]
return (xmin,xmax), (ymin,ymax)
def gap(self, thisk):
X = self.X
(xmin,xmax), (ymin,ymax) = self._bounding_box()
self.init_centers(thisk)
self.find_centers(thisk, method='++')
mu, clusters = self.mu, self.clusters
Wk = np.log(sum([np.linalg.norm(mu[i]-c)**2/(2*len(c)) \
for i in range(thisk) for c in clusters[i]]))
# Create B reference datasets
B = 10
BWkbs = zeros(B)
for i in range(B):
Xb = []
for n in range(len(X)):
Xb.append([random.uniform(xmin,xmax), \
random.uniform(ymin,ymax)])
Xb = np.array(Xb)
kb = DetK(thisk, X=Xb)
kb.init_centers(thisk)
kb.find_centers(thisk, method='++')
ms, cs = kb.mu, kb.clusters
BWkbs[i] = np.log(sum([np.linalg.norm(ms[j]-c)**2/(2*len(c)) \
for j in range(thisk) for c in cs[j]]))
Wkb = sum(BWkbs)/B
sk = np.sqrt(sum((BWkbs-Wkb)**2)/float(B))*np.sqrt(1+1/B)
return Wk, Wkb, sk
def run(self, maxk, which='both'):
ks = range(1,maxk)
fs = zeros(len(ks))
Wks,Wkbs,sks = zeros(len(ks)+1),zeros(len(ks)+1),zeros(len(ks)+1)
# Special case K=1
self.init_centers(1)
if which == 'f':
fs[0], Sk = self.fK(1)
elif which == 'gap':
Wks[0], Wkbs[0], sks[0] = self.gap(1)
else:
fs[0], Sk = self.fK(1)
Wks[0], Wkbs[0], sks[0] = self.gap(1)
# Rest of Ks
for k in ks[1:]:
self.init_centers(k)
if which == 'f':
fs[k-1], Sk = self.fK(k, Skm1=Sk)
elif which == 'gap':
Wks[k-1], Wkbs[k-1], sks[k-1] = self.gap(k)
else:
fs[k-1], Sk = self.fK(k, Skm1=Sk)
Wks[k-1], Wkbs[k-1], sks[k-1] = self.gap(k)
if which == 'f':
self.fs = fs
elif which == 'gap':
G = []
for i in range(len(ks)):
G.append((Wkbs-Wks)[i] - ((Wkbs-Wks)[i+1]-sks[i+1]))
self.G = np.array(G)
else:
self.fs = fs
G = []
for i in range(len(ks)):
G.append((Wkbs-Wks)[i] - ((Wkbs-Wks)[i+1]-sks[i+1]))
self.G = np.array(G)
When I try to run the following program on a given number of points (locArray)
locArray = np.array(locArrayMaster[counter])
kmeanscluster = DetK(2, X = locArray)
kmeanscluster.run(5)
noClusters[counter] = np.where(kmeanscluster.fs == min(kmeanscluster.fs))[0][0]+ 1
it returns me the following error
File "C:\Users\Anaconda2\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 714, in runfile
execfile(filename, namespace)
File "C:\Users\Anaconda2\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 74, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "C:/Users/Documents/SUMOTraffic/kplusplus.py", line 355, in <module>
kmeanscluster.run(5)
File "C:/Users/Documents/SUMOTraffic/kplusplus.py", line 217, in run
Wks[0], Wkbs[0], sks[0] = self.gap(1)
File "C:/Users/Documents/SUMOTraffic/kplusplus.py", line 200, in gap
for j in range(thisk) for c in cs[j]]))
TypeError: 'NoneType' object has no attribute '__getitem__'
Thanks for any help.
The error is due to the failure of the kmeans algorithm to find the cluster centres when the number of clusters is just 1. Hence the cluster dictionary is not created for this case. So, added an extra line of code in the class DetK which checks if the type of cluster dictionary is 'NoneType' and if it returns TRUE, recalculates the cluster centres again.
class DetK(KPlusPlus):
def fK(self, thisk, Skm1=0):
X = self.X
Nd = len(X[0])
a = lambda k, Nd: 1 - 3/(4*Nd) if k == 2 else a(k-1, Nd) + (1-a(k-1, Nd))/6
self.find_centers(thisk, method='++')
while type(self.clusters) is not dict:
self.find_centers(thisk, method = '++')
mu, clusters = self.mu, self.clusters
Sk = sum([np.linalg.norm(mu[i]-c)**2 \
for i in range(thisk) for c in clusters[i]])
if thisk == 1:
fs = 1
elif Skm1 == 0:
fs = 1
else:
fs = Sk/(a(thisk,Nd)*Skm1)
return fs, Sk

Does anyone have example numpy.fftn code that correctly implements multivariate_normal pdf?

With the code below, I'm attempting to implement the Levy-Khintchine formula (https://en.wikipedia.org/wiki/L%C3%A9vy_process#L.C3.A9vy.E2.80.93Khintchine_representation). In the limit of no jumps, the Levy-Khitchine formula reduces to the multivariate normal distribution. My code uses the (multi-dimensional) trapeziodal integration rule (http://mathfaculty.fullerton.edu/mathews/n2003/SimpsonsRule2DMod.html) to approximate the Fourier transform of the characteristic function as a discrete Fourier transform. For the 1-dimensional case, the code works perfectly. For the 2-D case, I can't find what I'm doing wrong.
Does anyone have example numpy.fftn code that correctly implements multivariate_normal pdf?
class LevyKhintchine:
def __init__(self, mean, cov, jump_measure):
self.mean = mean
self.cov = cov
self.jump_measure = jump_measure
self.factors = mean.shape[0]
def logCF(self, k):
rolled = Roll(k)
out = np.empty(Shape(k))
return (self.jump_measure(k) -
Dot(rolled, self.cov, rolled, out)*0.5 +
np.sum(np.multiply(Roll(k), self.mean), axis=-1)*1j)
def pdf_grid(self, J):
diag = np.diagonal(self.cov)
tmp = np.pi*2/J
dk = np.sqrt(tmp/diag)
dx = np.sqrt(tmp*diag)
k = Grid(np.zeros(self.factors), dk, J)
x0 = self.mean - dx*J*0.5
f = np.exp(self.logCF(k) - Coef(dk, x0, J)*1j)
for n in range(self.factors):
f[ 0] *= 0.5
f[-1] *= 0.5
f = np.rollaxis(f, 0, factors)
pdf = np.fft.fftn(f)
return Grid(x0, dx, J), pdf.real*(np.product(dk)/np.pi)
def Grid(left, width, J):
def Slice(slices, j):
slices.append(slice(left[j], left[j] + width[j]*(J-1), 1j*J))
return slices
slices = reduce(Slice, range(len(left)), [])
return np.mgrid[slices]
def Shape(grid):
return np.asarray(grid).shape[1:]
def Roll(grid):
grid = np.asarray(grid)
try:
rolled = np.rollaxis(grid, 0, len(grid)+1)
except ValueError:
rolled = grid
return rolled
def Dot(x, cov, y, out): #x & y are "rolled"
for j in np.ndindex(out.shape):
out[j] = np.dot(x[j].T, np.dot(cov, y[j]))
return out
def Coef(dks, x0s, J):
factors = len(dks)
coef = np.zeros((J,)*factors)
for n, (dk, x0) in enumerate(zip(dks, x0s)):
shape = np.ones(factors, dtype=int)
shape[n] = J
coef += np.arange(J).reshape(shape)*(dk*x0)
return coef
Here's the tests:
from scipy.stats import multivariate_normal
J = 64
factors = 1
mean = np.full((factors,), -1)
cov = np.identity(factors)
rv = LevyKhintchine(mean, cov, lambda k: 0)
rv0 = multivariate_normal(mean, cov)
x, pdf = rv.pdf_grid(J)
plt.plot(x[0], pdf, x[0], rv0.pdf(Roll(x)))
factors = 2
mean = np.full((factors,), 5)
cov = np.identity(factors)
rv = LevyKhintchine(mean, cov, lambda k: 0)
x, pdf = rv.pdf_grid(J)
rv0 = multivariate_normal(mean, cov)
fig2 = plt.figure()
ax2 = fig2.add_subplot(111)
ax2.contourf(x[0], x[1], pdf)
fig3 = plt.figure()
ax3 = fig3.add_subplot(111)
ax3.contourf(x[0], x[1], rv0.pdf(Roll(x)))
I figured it out: in 1-d I can get away with integrating over only positive wave numbers k, in higher dimensions I cannot.
Here's the corrected code:
class LevyKhintchine:
def __init__(self, mean, cov, jump_measure):
self.mean = mean
self.cov = cov
self.jump_measure = jump_measure
self.factors = mean.shape[0]
def logCF(self, k):
rolled = Roll(k)
out = np.empty(Shape(k))
return (self.jump_measure(k) -
Dot(rolled, self.cov, rolled, out)*0.5 +
np.sum(np.multiply(Roll(k), self.mean), axis=-1)*1j)
def pdf_grid(self, J):
diag = np.diagonal(self.cov)
tmp = np.pi*2/J
dk = np.sqrt(tmp/diag)
dx = np.sqrt(tmp*diag)
k0 = -0.5*dk*J
x0 = -0.5*dx*J + self.mean
k = Grid(k0, dk, J)
x = Grid(x0, dx, J)
f = np.exp(-1j*Coef(dk, x0, J) + self.logCF(k))
for n in range(self.factors):
f[ 0] *= 0.5
f[-1] *= 0.5
f = np.rollaxis(f, 0, factors)
c = ((0.5/np.pi)**self.factors*np.product(dk)*np.exp(-1j*np.dot(k0, x0)))
pdf = np.fft.fftn(f)*np.exp(-1j*Coef(k0, dx, J))*c
return x, pdf.real
def Grid(left, width, J):
def Slice(slices, j):
slices.append(slice(left[j], left[j] + width[j]*(J-1), 1j*J))
return slices
slices = reduce(Slice, range(len(left)), [])
return np.mgrid[slices]
def Shape(grid):
return np.asarray(grid).shape[1:]
def Roll(grid):
grid = np.asarray(grid)
try:
rolled = np.rollaxis(grid, 0, len(grid)+1)
except ValueError:
rolled = grid
return rolled
def Dot(x, cov, y, out): #x & y are "rolled"
for j in np.ndindex(out.shape):
out[j] = np.dot(x[j].T, np.dot(cov, y[j]))
return out
def Coef(dks, x0s, J):
factors = len(dks)
coef = np.zeros((J,)*factors)
for n, (dk, x0) in enumerate(zip(dks, x0s)):
shape = np.ones(factors, dtype=int)
shape[n] = J
coef += np.arange(J).reshape(shape)*(dk*x0)
return coef
Here's the tests:
from scipy.stats import multivariate_normal
J = 32
for factors in range(1, 4):
mean = np.full((factors,), -1)
cov = np.identity(factors)
rv = LevyKhintchine(mean, cov, lambda k: 0)
rv0 = multivariate_normal(mean, cov)
x, pdf = rv.pdf_grid(J)
pdf0 = rv0.pdf(Roll(x))
print np.allclose(pdf, pdf0)
True
True
True

Categories

Resources