Indexing multidimensional numpy array inside numba's jitclass - python

I'm trying to insert a small multidimensional array into a larger one inside a numba jitclass. The small array is set specific positions of the larger array defined by an index list.
The following MWE shows the problem without numba - everything works as expected
import numpy as np
class NumbaClass(object):
def __init__(self, n, m):
self.A = np.zeros((n, m))
# solution 1 using pure python
def nonNumbaFunction1(self, idx, values):
self.A[idx[:, None], idx] = values
# solution 2 using pure python
def nonNumbaFunction2(self, idx, values):
self.A[np.ix_(idx, idx)] = values
if __name__ == "__main__":
n = 6
m = 8
obj = NumbaClass(n, m)
print(f'A =\n{obj.A}')
idx = np.array([0, 2, 5])
values = np.arange(len(idx)**2).reshape(len(idx), len(idx))
print(f'values =\n{values}')
obj.nonNumbaFunction1(idx, values)
print(f'A =\n{obj.A}')
obj.nonNumbaFunction2(idx, values)
print(f'A =\n{obj.A}')
Both functions nonNumbaFunction1 and nonNumbaFunction2 do not work inside a numba class. So my current solution looks like this which is not really nice in my opinion
import numpy as np
from numba import jitclass
from numba import int64, float64
from collections import OrderedDict
specs = OrderedDict()
specs['A'] = float64[:, :]
#jitclass(specs)
class NumbaClass(object):
def __init__(self, n, m):
self.A = np.zeros((n, m))
# solution for numba jitclass
def numbaFunction(self, idx, values):
for i in range(len(values)):
idxi = idx[i]
for j in range(len(values)):
idxj = idx[j]
self.A[idxi, idxj] = values[i, j]
if __name__ == "__main__":
n = 6
m = 8
obj = NumbaClass(n, m)
print(f'A =\n{obj.A}')
idx = np.array([0, 2, 5])
values = np.arange(len(idx)**2).reshape(len(idx), len(idx))
print(f'values =\n{values}')
obj.numbaFunction(idx, values)
print(f'A =\n{obj.A}')
So my questions are:
Does anyone know a solution to this indexing in numba or is there another vectorized solution?
Is there a faster solution for nonNumbaFunction1?
It might be useful to know that inserted array is small (4x4 to 10x10), but this indexing appears in nested loops so it has to be quiet fast as well! Later I need a similar indexing for three dimensional objects too.

Because of limitations on numba's indexing support, I don't think you can do any better than writing out the for loops yourself. To make it generic across dimensions, you could use the generated_jit decorator to specialize. Something like this:
def set_2d(target, values, idx):
for i in range(values.shape[0]):
for j in range(values.shape[1]):
target[idx[i], idx[j]] = values[i, j]
def set_3d(target, values, idx):
for i in range(values.shape[0]):
for j in range(values.shape[1]):
for k in range(values.shape[2]):
target[idx[i], idx[j], idx[k]] = values[i, j, l]
#numba.generated_jit
def set_nd(target, values, idx):
if target.ndim == 2:
return set_2d
elif target.ndim == 3:
return set_3d
Then, this could be used in your jitclass
specs = OrderedDict()
specs['A'] = float64[:, :]
#jitclass(specs)
class NumbaClass(object):
def __init__(self, n, m):
self.A = np.zeros((n, m))
def numbaFunction(self, idx, values):
set_nd(self.A, values, idx)

Related

How to simplify "for loop" and "boolean variable" statement [duplicate]

What is the fastest way to check if a value exists in a very large list?
7 in a
Clearest and fastest way to do it.
You can also consider using a set, but constructing that set from your list may take more time than faster membership testing will save. The only way to be certain is to benchmark well. (this also depends on what operations you require)
As stated by others, in can be very slow for large lists. Here are some comparisons of the performances for in, set and bisect. Note the time (in second) is in log scale.
Code for testing:
import random
import bisect
import matplotlib.pyplot as plt
import math
import time
def method_in(a, b, c):
start_time = time.time()
for i, x in enumerate(a):
if x in b:
c[i] = 1
return time.time() - start_time
def method_set_in(a, b, c):
start_time = time.time()
s = set(b)
for i, x in enumerate(a):
if x in s:
c[i] = 1
return time.time() - start_time
def method_bisect(a, b, c):
start_time = time.time()
b.sort()
for i, x in enumerate(a):
index = bisect.bisect_left(b, x)
if index < len(a):
if x == b[index]:
c[i] = 1
return time.time() - start_time
def profile():
time_method_in = []
time_method_set_in = []
time_method_bisect = []
# adjust range down if runtime is too long or up if there are too many zero entries in any of the time_method lists
Nls = [x for x in range(10000, 30000, 1000)]
for N in Nls:
a = [x for x in range(0, N)]
random.shuffle(a)
b = [x for x in range(0, N)]
random.shuffle(b)
c = [0 for x in range(0, N)]
time_method_in.append(method_in(a, b, c))
time_method_set_in.append(method_set_in(a, b, c))
time_method_bisect.append(method_bisect(a, b, c))
plt.plot(Nls, time_method_in, marker='o', color='r', linestyle='-', label='in')
plt.plot(Nls, time_method_set_in, marker='o', color='b', linestyle='-', label='set')
plt.plot(Nls, time_method_bisect, marker='o', color='g', linestyle='-', label='bisect')
plt.xlabel('list size', fontsize=18)
plt.ylabel('log(time)', fontsize=18)
plt.legend(loc='upper left')
plt.yscale('log')
plt.show()
profile()
You could put your items into a set. Set lookups are very efficient.
Try:
s = set(a)
if 7 in s:
# do stuff
edit In a comment you say that you'd like to get the index of the element. Unfortunately, sets have no notion of element position. An alternative is to pre-sort your list and then use binary search every time you need to find an element.
The original question was:
What is the fastest way to know if a value exists in a list (a list
with millions of values in it) and what its index is?
Thus there are two things to find:
is an item in the list, and
what is the index (if in the list).
Towards this, I modified #xslittlegrass code to compute indexes in all cases, and added an additional method.
Results
Methods are:
in--basically if x in b: return b.index(x)
try--try/catch on b.index(x) (skips having to check if x in b)
set--basically if x in set(b): return b.index(x)
bisect--sort b with its index, binary search for x in sorted(b).
Note mod from #xslittlegrass who returns the index in the sorted b,
rather than the original b)
reverse--form a reverse lookup dictionary d for b; then
d[x] provides the index of x.
Results show that method 5 is the fastest.
Interestingly the try and the set methods are equivalent in time.
Test Code
import random
import bisect
import matplotlib.pyplot as plt
import math
import timeit
import itertools
def wrapper(func, *args, **kwargs):
" Use to produced 0 argument function for call it"
# Reference https://www.pythoncentral.io/time-a-python-function/
def wrapped():
return func(*args, **kwargs)
return wrapped
def method_in(a,b,c):
for i,x in enumerate(a):
if x in b:
c[i] = b.index(x)
else:
c[i] = -1
return c
def method_try(a,b,c):
for i, x in enumerate(a):
try:
c[i] = b.index(x)
except ValueError:
c[i] = -1
def method_set_in(a,b,c):
s = set(b)
for i,x in enumerate(a):
if x in s:
c[i] = b.index(x)
else:
c[i] = -1
return c
def method_bisect(a,b,c):
" Finds indexes using bisection "
# Create a sorted b with its index
bsorted = sorted([(x, i) for i, x in enumerate(b)], key = lambda t: t[0])
for i,x in enumerate(a):
index = bisect.bisect_left(bsorted,(x, ))
c[i] = -1
if index < len(a):
if x == bsorted[index][0]:
c[i] = bsorted[index][1] # index in the b array
return c
def method_reverse_lookup(a, b, c):
reverse_lookup = {x:i for i, x in enumerate(b)}
for i, x in enumerate(a):
c[i] = reverse_lookup.get(x, -1)
return c
def profile():
Nls = [x for x in range(1000,20000,1000)]
number_iterations = 10
methods = [method_in, method_try, method_set_in, method_bisect, method_reverse_lookup]
time_methods = [[] for _ in range(len(methods))]
for N in Nls:
a = [x for x in range(0,N)]
random.shuffle(a)
b = [x for x in range(0,N)]
random.shuffle(b)
c = [0 for x in range(0,N)]
for i, func in enumerate(methods):
wrapped = wrapper(func, a, b, c)
time_methods[i].append(math.log(timeit.timeit(wrapped, number=number_iterations)))
markers = itertools.cycle(('o', '+', '.', '>', '2'))
colors = itertools.cycle(('r', 'b', 'g', 'y', 'c'))
labels = itertools.cycle(('in', 'try', 'set', 'bisect', 'reverse'))
for i in range(len(time_methods)):
plt.plot(Nls,time_methods[i],marker = next(markers),color=next(colors),linestyle='-',label=next(labels))
plt.xlabel('list size', fontsize=18)
plt.ylabel('log(time)', fontsize=18)
plt.legend(loc = 'upper left')
plt.show()
profile()
def check_availability(element, collection: iter):
return element in collection
Usage
check_availability('a', [1,2,3,4,'a','b','c'])
I believe this is the fastest way to know if a chosen value is in an array.
a = [4,2,3,1,5,6]
index = dict((y,x) for x,y in enumerate(a))
try:
a_index = index[7]
except KeyError:
print "Not found"
else:
print "found"
This will only be a good idea if a doesn't change and thus we can do the dict() part once and then use it repeatedly. If a does change, please provide more detail on what you are doing.
Be aware that the in operator tests not only equality (==) but also identity (is), the in logic for lists is roughly equivalent to the following (it's actually written in C and not Python though, at least in CPython):
for element in s:
if element is target:
# fast check for identity implies equality
return True
if element == target:
# slower check for actual equality
return True
return False
In most circumstances this detail is irrelevant, but in some circumstances it might leave a Python novice surprised, for example, numpy.NAN has the unusual property of being not being equal to itself:
>>> import numpy
>>> numpy.NAN == numpy.NAN
False
>>> numpy.NAN is numpy.NAN
True
>>> numpy.NAN in [numpy.NAN]
True
To distinguish between these unusual cases you could use any() like:
>>> lst = [numpy.NAN, 1 , 2]
>>> any(element == numpy.NAN for element in lst)
False
>>> any(element is numpy.NAN for element in lst)
True
Note the in logic for lists with any() would be:
any(element is target or element == target for element in lst)
However, I should emphasize that this is an edge case, and for the vast majority of cases the in operator is highly optimised and exactly what you want of course (either with a list or with a set).
If you only want to check the existence of one element in a list,
7 in list_data
is the fastest solution. Note though that
7 in set_data
is a near-free operation, independently of the size of the set! Creating a set from a large list is 300 to 400 times slower than in, so if you need to check for many elements, creating a set first is faster.
Plot created with perfplot:
import perfplot
import numpy as np
def setup(n):
data = np.arange(n)
np.random.shuffle(data)
return data, set(data)
def list_in(data):
return 7 in data[0]
def create_set_from_list(data):
return set(data[0])
def set_in(data):
return 7 in data[1]
b = perfplot.bench(
setup=setup,
kernels=[list_in, set_in, create_set_from_list],
n_range=[2 ** k for k in range(24)],
xlabel="len(data)",
equality_check=None,
)
b.save("out.png")
b.show()
It sounds like your application might gain advantage from the use of a Bloom Filter data structure.
In short, a bloom filter look-up can tell you very quickly if a value is DEFINITELY NOT present in a set. Otherwise, you can do a slower look-up to get the index of a value that POSSIBLY MIGHT BE in the list. So if your application tends to get the "not found" result much more often then the "found" result, you might see a speed up by adding a Bloom Filter.
For details, Wikipedia provides a good overview of how Bloom Filters work, and a web search for "python bloom filter library" will provide at least a couple useful implementations.
This is not the code, but the algorithm for very fast searching.
If your list and the value you are looking for are all numbers, this is pretty straightforward. If strings: look at the bottom:
-Let "n" be the length of your list
-Optional step: if you need the index of the element: add a second column to the list with current index of elements (0 to n-1) - see later
Order your list or a copy of it (.sort())
Loop through:
Compare your number to the n/2th element of the list
If larger, loop again between indexes n/2-n
If smaller, loop again between indexes 0-n/2
If the same: you found it
Keep narrowing the list until you have found it or only have 2 numbers (below and above the one you are looking for)
This will find any element in at most 19 steps for a list of 1.000.000 (log(2)n to be precise)
If you also need the original position of your number, look for it in the second, index column.
If your list is not made of numbers, the method still works and will be fastest, but you may need to define a function which can compare/order strings.
Of course, this needs the investment of the sorted() method, but if you keep reusing the same list for checking, it may be worth it.
Edge case for spatial data
There are probably faster algorithms for handling spatial data (e.g. refactoring to use a k-d tree), but the special case of checking if a vector is in an array is useful:
If you have spatial data (i.e. cartesian coordinates)
If you have integer masks (i.e. array filtering)
In this case, I was interested in knowing if an (undirected) edge defined by two points was in a collection of (undirected) edges, such that
(pair in unique_pairs) | (pair[::-1] in unique_pairs) for pair in pairs
where pair constitutes two vectors of arbitrary length (i.e. shape (2,N)).
If the distance between these vectors is meaningful, then the test can be expressed by a floating point inequality like
test_result = Norm(v1 - v2) < Tol
and "Value exists in List" is simply any(test_result).
Example code and dummy test set generators for integer pairs and R3 vector pairs are below.
# 3rd party
import numpy as np
import numpy.linalg as LA
import matplotlib.pyplot as plt
# optional
try:
from tqdm import tqdm
except ModuleNotFoundError:
def tqdm(X, *args, **kwargs):
return X
print('tqdm not found. tqdm is a handy progress bar module.')
def get_float_r3_pairs(size):
""" generate dummy vector pairs in R3 (i.e. case of spatial data) """
coordinates = np.random.random(size=(size, 3))
pairs = []
for b in coordinates:
for a in coordinates:
pairs.append((a,b))
pairs = np.asarray(pairs)
return pairs
def get_int_pairs(size):
""" generate dummy integer pairs (i.e. case of array masking) """
coordinates = np.random.randint(0, size, size)
pairs = []
for b in coordinates:
for a in coordinates:
pairs.append((a,b))
pairs = np.asarray(pairs)
return pairs
def float_tol_pair_in_pairs(pair:np.ndarray, pairs:np.ndarray) -> np.ndarray:
"""
True if abs(a0 - b0) <= tol & abs(a1 - b1) <= tol for (ai1, aj2), (bi1, bj2)
in [(a01, a02), ... (aik, ajl)]
NB this is expected to be called in iteration so no sanitization is performed.
Parameters
----------
pair : np.ndarray
pair of vectors with shape (2, M)
pairs : np.ndarray
collection of vector pairs with shape (N, 2, M)
Returns
-------
np.ndarray
(pair in pairs) | (pair[::-1] in pairs).
"""
m1 = np.sum( abs(LA.norm(pairs - pair, axis=2)) <= (1e-03, 1e-03), axis=1 ) == 2
m2 = np.sum( abs(LA.norm(pairs - pair[::-1], axis=2)) <= (1e-03, 1e-03), axis=1 ) == 2
return m1 | m2
def get_unique_pairs(pairs:np.ndarray) -> np.ndarray:
"""
apply float_tol_pair_in_pairs for pair in pairs
Parameters
----------
pairs : np.ndarray
collection of vector pairs with shape (N, 2, M)
Returns
-------
np.ndarray
pair if not ((pair in rv) | (pair[::-1] in rv)) for pair in pairs
"""
pairs = np.asarray(pairs).reshape((len(pairs), 2, -1))
rv = [pairs[0]]
for pair in tqdm(pairs[1:], desc='finding unique pairs...'):
if not any(float_tol_pair_in_pairs(pair, rv)):
rv.append(pair)
return np.array(rv)

How can I create a numba callable that has adjustable parameters?

I would like to create a numba-compiled python callable (a function that I can use in another Numba-compiled function) that has an internal array that I can adjust to influence the result of the function call. In pure python, this would correspond to a class with a __call__ method:
class Test:
def __init__(self, arr):
self.arr = arr
def __call__(self, idx):
res = 0
for i in idx:
res += self.arr[i]
return res
t = Test([0, 1, 2])
print(t([1, 2]))
t.arr = [1, 2, 3]
print(t([1, 2]))
which prints 3 and 5, respectively, so the result was different after I modified the internal array arr.
A literal translation to Numba using jitclass and numpy arrays looks like this
import numpy as np
import numba as nb
#nb.jitclass([('arr', nb.double[:])])
class Test:
def __init__(self, arr):
self.arr = arr.astype(np.double)
def __call__(self, idx):
res = 0
for i in idx:
res += self.arr[i]
return res
t = Test(np.arange(3))
print(t(np.array([1, 2])))
t.arr = np.arange(3) + 1
print(t(np.array([1, 2])))
Unfortunately, this fails with TypeError: 'Test' object is not callable, since Numba does not seem to support __call__, yet.
I then tried to solve the problem using closures
import numpy as np
import numba as nb
arr = np.arange(5)
#nb.jit
def call(idx):
res = 0
for i in idx:
res += arr[i]
return res
print(call(np.array([1, 2])))
arr += 1
print(call(np.array([1, 2])))
but this prints 3 twice, since closures copy the data in arr into an internal representation, which I then cannot (easily?) change from the outside.
I even tried to trick Numba, by using ctypes pointers on Numpy arrays I combination with numba.carray, but Numba still seems to copy the data, so I cannot manipulate it.
I understand that Numba wants to control the memory and avoid access to memory regions that might not be used anymore. However, I have a specific use case where I would like to avoid passing around the extra array arr and rather adjust the internal copy somehow. Is there any way to achieve this?
EDIT:
I tried the suggestion by Daniel in the comments to use a method different than __call__, but this also does not work. Here is what I thought might work:
#nb.jitclass([('arr', nb.double[:])])
class Test:
def __init__(self, arr):
self.arr = arr
def call(self, idx):
return self.arr[idx]
a = Test(np.arange(5).astype(np.double))
print(a.call(3))
a.arr += 1
print(a.call(3))
#nb.njit
def rhs(idx):
return a.call(idx)
rhs(3)
This prints 3 and 4, so the array arr can indeed be manipulated. However, using the instance a in a compiled method fails with a NotImplementedError, so I suspect this use case is not (yet) supported by Numba.
Divide the problem in two parts, a numba function and a pure python class:
import numpy as np
import numba
#numba.jit
def calc(arr, idx):
res = 0
for i in idx:
res += arr[i]
return res
class Test:
def __init__(self, arr):
self.arr = arr.astype(np.double)
def __call__(self, idx):
return calc(self.arr, idx)
t = Test(np.arange(3))
print(t(np.array([1, 2])))
t.arr = np.arange(3) + 1
print(t(np.array([1, 2])))
I believe you need #property before the methods of the class but this may not be the only issue
#nb.jitclass([('arr', nb.double[:])])
class Test:
def __init__(self, arr):
self.arr = arr
#property
def call(self, idx):
return self.arr[idx]
a = Test(np.arange(5).astype(np.double))
print(a.call(3))
a.arr += 1
print(a.call(3))
#nb.njit
def rhs(idx):
return a.call(idx)
rhs(3)
This effect is the result of nopython compilation. If your goal is to create such callable at any costs, even possibly without taking benefits from jit-compilation - object compilation mode is a simple solution for your problem. This may be acheived in your closure example code simply by providing forceobj=True parameter to #nb.jit decorator.
This code prints 3 and 5 respectively:
import numpy as np
import numba as nb
arr = np.arange(5)
#nb.jit(forceobj=True)
def call(idx):
res = 0
for i in idx:
res += arr[i]
return res
print(call(np.array([1, 2])))
arr += 1
print(call(np.array([1, 2])))

How to vectorize a class instantiation to allow NumPy arrays as input?

I programmed class which looks something like this:
import numpy as np
class blank():
def __init__(self,a,b,c):
self.a=a
self.b=b
self.c=c
n=5
c=a/b*8
if (a>b):
y=c+a*b
else:
y=c-a*b
p = np.empty([1,1])
k = np.empty([1,1])
l = np.empty([1,1])
p[0]=b
k[0]=b*(c-1)
l[0]=p+k
for i in range(1, n, 1):
p=np.append(p,l[i-1])
k=np.append(k,(p[i]*(c+1)))
l=np.append(l,p[i]+k[i])
komp = np.zeros(shape=(n, 1))
for i in range(0, n):
pl_avg = (p[i] + l[i]) / 2
h=pl_avg*3
komp[i]=pl_avg*h/4
self.tot=komp+l
And when I call it like this:
from ex1 import blank
import numpy as np
res=blank(1,2,3)
print(res.tot)
everything works well.
BUT I want to call it like this:
res = blank(np.array([1,2,3]), np.array([3,4,5]), 3)
Is there an easy way to call it for each i element of this two arrays without editing class code?
You won't be able to instantiate a class with NumPy arrays as inputs without changing the class code. #PabloAlvarez and #NagaKiran already provided alternative: iterate with zip over arrays and instantiate class for each pair of elements. While this is pretty simple solution, it defeats the purpose of using NumPy with its efficient vectorized operations.
Here is how I suggest you to rewrite the code:
from typing import Union
import numpy as np
def total(a: Union[float, np.ndarray],
b: Union[float, np.ndarray],
n: int = 5) -> np.array:
"""Calculates what your self.tot was"""
bc = 8 * a
c = bc / b
vectorized_geometric_progression = np.vectorize(geometric_progression,
otypes=[np.ndarray])
l = np.stack(vectorized_geometric_progression(bc, c, n))
l = np.atleast_2d(l)
p = np.insert(l[:, :-1], 0, b, axis=1)
l = np.squeeze(l)
p = np.squeeze(p)
pl_avg = (p + l) / 2
komp = np.array([0.75 * pl_avg ** 2]).T
return komp + l
def geometric_progression(bc, c, n):
"""Calculates array l"""
return bc * np.logspace(start=0,
stop=n - 1,
num=n,
base=c + 2)
And you can call it both for sole numbers and NumPy arrays like that:
>>> print(total(1, 2))
[[2.6750000e+01 6.6750000e+01 3.0675000e+02 1.7467500e+03 1.0386750e+04]
[5.9600000e+02 6.3600000e+02 8.7600000e+02 2.3160000e+03 1.0956000e+04]
[2.1176000e+04 2.1216000e+04 2.1456000e+04 2.2896000e+04 3.1536000e+04]
[7.6205600e+05 7.6209600e+05 7.6233600e+05 7.6377600e+05 7.7241600e+05]
[2.7433736e+07 2.7433776e+07 2.7434016e+07 2.7435456e+07 2.7444096e+07]]
>>> print(total(3, 4))
[[1.71000000e+02 3.39000000e+02 1.68300000e+03 1.24350000e+04 9.84510000e+04]
[8.77200000e+03 8.94000000e+03 1.02840000e+04 2.10360000e+04 1.07052000e+05]
[5.59896000e+05 5.60064000e+05 5.61408000e+05 5.72160000e+05 6.58176000e+05]
[3.58318320e+07 3.58320000e+07 3.58333440e+07 3.58440960e+07 3.59301120e+07]
[2.29323574e+09 2.29323590e+09 2.29323725e+09 2.29324800e+09 2.29333402e+09]]
>>> print(total(np.array([1, 3]), np.array([2, 4])))
[[[2.67500000e+01 6.67500000e+01 3.06750000e+02 1.74675000e+03 1.03867500e+04]
[1.71000000e+02 3.39000000e+02 1.68300000e+03 1.24350000e+04 9.84510000e+04]]
[[5.96000000e+02 6.36000000e+02 8.76000000e+02 2.31600000e+03 1.09560000e+04]
[8.77200000e+03 8.94000000e+03 1.02840000e+04 2.10360000e+04 1.07052000e+05]]
[[2.11760000e+04 2.12160000e+04 2.14560000e+04 2.28960000e+04 3.15360000e+04]
[5.59896000e+05 5.60064000e+05 5.61408000e+05 5.72160000e+05 6.58176000e+05]]
[[7.62056000e+05 7.62096000e+05 7.62336000e+05 7.63776000e+05 7.72416000e+05]
[3.58318320e+07 3.58320000e+07 3.58333440e+07 3.58440960e+07 3.59301120e+07]]
[[2.74337360e+07 2.74337760e+07 2.74340160e+07 2.74354560e+07 2.74440960e+07]
[2.29323574e+09 2.29323590e+09 2.29323725e+09 2.29324800e+09 2.29333402e+09]]]
You can see that results are in compliance.
Explanation:
First of all I'd like to note that your calculation of p, k, and l doesn't have to be in the loop. Moreover, calculating k is unnecessary. If you see carefully, how elements of p and l are calculated, they are just geometric progressions (except the 1st element of p):
p = [b, b*c, b*c*(c+2), b*c*(c+2)**2, b*c*(c+2)**3, b*c*(c+2)**4, ...]
l = [b*c, b*c*(c+2), b*c*(c+2)**2, b*c*(c+2)**3, b*c*(c+2)**4, b*c*(c+2)**5, ...]
So, instead of that loop, you can use np.logspace. Unfortunately, np.logspace doesn't support base parameter as an array, so we have no other choice but to use np.vectorize which is just a loop under the hood...
Calculating of komp though is easily vectorized. You can see it in my example. No need for loops there.
Also, as I already noted in a comment, your class doesn't have to be a class, so I took a liberty of changing it to a function.
Next, note that input parameter c is overwritten, so I got rid of it. Variable y is never used. (Also, you could calculate it just as y = c + a * b * np.sign(a - b))
And finally, I'd like to remark that creating NumPy arrays with np.append is very inefficient (as it was pointed out by #kabanus), so you should always try to create them at once - no loops, no appending.
P.S.: I used np.atleast_2d and np.squeeze in my code and it could be unclear why I did it. They are necessary to avoid if-else clauses where we would check dimensions of array l. You can print intermediate results to see what is really going on there. Nothing difficult.
if it is just calling class with two different list elements, loop can satisfies well
res = [blank(i,j,3) for i,j in zip(np.array([1,2,3]),np.array([3,4,5]))]
You can see list of values for res variable
The only way I can think of iterating lists of arrays is by using a function on the main program for iteration and then do the operations you need to do inside the loop.
This solution works for each element of both arrays (note to use zip function for making the iteration in both lists if they have a small size as listed in this answer here):
for n,x in zip(np.array([1,2,3]),np.array([3,4,5])):
res=blank(n,x,3)
print(res.tot)
Hope it is what you need!

Fastest way to sort in Python (no cython)

I have a problem where I've to sort a very big array(shape - 7900000X4X4) with a custom function. I used sorted but it took more than 1 hour to sort. My code was something like this.
def compare(x,y):
print('DD '+str(x[0]))
if(np.array_equal(x[1],y[1])==True):
return -1
a = x[1].flatten()
b = y[1].flatten()
idx = np.where( (a>b) != (a<b) )[0][0]
if a[idx]<0 and b[idx]>=0:
return 0
elif b[idx]<0 and a[idx]>=0:
return 1
elif a[idx]<0 and b[idx]<0:
if a[idx]>b[idx]:
return 0
elif a[idx]<b[idx]:
return 1
elif a[idx]<b[idx]:
return 1
else:
return 0
def cmp_to_key(mycmp):
class K:
def __init__(self, obj, *args):
self.obj = obj
def __lt__(self, other):
return mycmp(self.obj, other.obj)
return K
tblocks = sorted(tblocks.items(),key=cmp_to_key(compare))
This worked but I want it to complete in seconds. I don't think any direct implementation in python can give me the performance I need, so I tried cython. My Cython code is this, which is pretty simple.
cdef int[:,:] arrr
cdef int size
cdef bool compare(int a,int b):
global arrr,size
cdef int[:] x = arrr[a]
cdef int[:] y = arrr[b]
cdef int i,j
i = 0
j = 0
while(i<size):
if((j==size-1)or(y[j]<x[i])):
return 0
elif(x[i]<y[j]):
return 1
i+=1
j+=1
return (j!=size-1)
def sorted(np.ndarray boxes,int total_blocks,int s):
global arrr,size
cdef int i
cdef vector[int] index = xrange(total_blocks)
arrr = boxes
size = s
sort(index.begin(),index.end(),compare)
return index
This code in cython took 33 seconds! Cython is the solution, but I am looking for some alternate solutions which can run directly on python. For example numba. I tried Numba, but I didn't get satisfying results. Kindly help!
It is hard to give an answer without a working example. I assume, that arrr in your Cython code was a 2D-array and I assume that size was size=arrr.shape[0]
Numba Implementation
import numpy as np
import numba as nb
from numba.targets import quicksort
def custom_sorting(compare_fkt):
index_arange=np.arange(size)
quicksort_func=quicksort.make_jit_quicksort(lt=compare_fkt,is_argsort=False)
jit_sort_func=nb.njit(quicksort_func.run_quicksort)
index=jit_sort_func(index_arange)
return index
def compare(a,b):
x = arrr[a]
y = arrr[b]
i = 0
j = 0
while(i<size):
if((j==size-1)or(y[j]<x[i])):
return False
elif(x[i]<y[j]):
return True
i+=1
j+=1
return (j!=size-1)
arrr=np.random.randint(-9,10,(7900000,8))
size=arrr.shape[0]
index=custom_sorting(compare)
This gives 3.85s for the generated testdata. But the speed of a sorting algorithm heavily depends on the data....
Simple Example
import numpy as np
import numba as nb
from numba.targets import quicksort
#simple reverse sort
def compare(a,b):
return a > b
#create some test data
arrr=np.array(np.random.rand(7900000)*10000,dtype=np.int32)
#we can pass the comparison function
quicksort_func=quicksort.make_jit_quicksort(lt=compare,is_argsort=True)
#compile the sorting function
jit_sort_func=nb.njit(quicksort_func.run_quicksort)
#get the result
ind_sorted=jit_sort_func(arrr)
This implementation is about 35% slower than np.argsort, but this is also common in using np.argsort in compiled code.
If I understand your code correctly then the order you have in mind is the standard order, only that it starts at 0 wraps around at +/-infinity and maxes out at -0. On top of that we have simple left-to-right lexicographic order.
Now, if your array dtype is integer, observe the following: Because of complement representation of negatives view-casting to unsigned int makes your order the standard order. On top of that, if we use big endian encoding, efficient lexicographic ordering can be achieved by view-casting to void dtype.
The code below shows that using a 10000x4x4 example that this method gives the same result as your Python code.
It also benchmarks it on a 7,900,000x4x4 example (using array, not dict). On my modest laptop this method takes 8 seconds.
import numpy as np
def compare(x, y):
# print('DD '+str(x[0]))
if(np.array_equal(x[1],y[1])==True):
return -1
a = x[1].flatten()
b = y[1].flatten()
idx = np.where( (a>b) != (a<b) )[0][0]
if a[idx]<0 and b[idx]>=0:
return 0
elif b[idx]<0 and a[idx]>=0:
return 1
elif a[idx]<0 and b[idx]<0:
if a[idx]>b[idx]:
return 0
elif a[idx]<b[idx]:
return 1
elif a[idx]<b[idx]:
return 1
else:
return 0
def cmp_to_key(mycmp):
class K:
def __init__(self, obj, *args):
self.obj = obj
def __lt__(self, other):
return mycmp(self.obj, other.obj)
return K
def custom_sort(a):
assert a.dtype==np.int64
b = a.astype('>i8', copy=False)
return b.view(f'V{a.dtype.itemsize * a.shape[1]}').ravel().argsort()
tblocks = np.random.randint(-9,10, (10000, 4, 4))
tblocks = dict(enumerate(tblocks))
tblocks_s = sorted(tblocks.items(),key=cmp_to_key(compare))
tblocksa = np.array(list(tblocks.values()))
tblocksa = tblocksa.reshape(tblocksa.shape[0], -1)
order = custom_sort(tblocksa)
tblocks_s2 = list(tblocks.items())
tblocks_s2 = [tblocks_s2[o] for o in order]
print(tblocks_s == tblocks_s2)
from timeit import timeit
data = np.random.randint(-9_999, 10_000, (7_900_000, 4, 4))
print(timeit(lambda: data[custom_sort(data.reshape(data.shape[0], -1))],
number=5) / 5)
Sample output:
True
7.8328493310138585

Parallelize these nested for loops in python

I have a multidimensional array (result) that should be filled by some nested loops. Function fun() is a complex and time-consuming function. I want to fill my array elements in a parallel manner, so I can use all my system's processing power.
Here's the code:
import numpy as np
def fun(x, y, z):
# time-consuming computation...
# ...
return output
dim1 = 10
dim2 = 20
dim3 = 30
result = np.zeros([dim1, dim2, dim3])
for i in xrange(dim1):
for j in xrange(dim2):
for k in xrange(dim3):
result[i, j, k] = fun(i, j, k)
My question is that "Can I parallelize this code or not? if yes, How?"
I'm using Windows 10 64-bit and python 2.7.
Please provide your solution by changing my code if you can.
Thanks!
If you want a more general solution, taking advantage of fully parallel execution, then why not use something like this:
>>> import multiprocess as mp
>>> p = mp.Pool()
>>>
>>> # a time consuming function taking x,y,z,...
>>> def fun(*args):
... import time
... time.sleep(.1)
... return sum(*args)
...
>>> dim1, dim2, dim3 = 10, 20, 30
>>> import itertools
>>> input = ((i,j,k) for i,j,k in itertools.combinations_with_replacement(xrange(dim3), 3) if i < dim1 and j < dim2)
>>> results = p.map(fun, input)
>>> p.close()
>>> p.join()
>>>
>>> results[:2]
[0, 1]
>>> results[-2:]
[56, 57]
Note I'm using multiprocess instead of multiprocessing, but that's only to get the ability to work in the interpreter.
I didn't use a numpy.array, but if you had to... you could just dump the output from p.map directly into a numpy.array and then modify the shape attribute to be shape = (dim1, dim2, dim3), or you could do something like this:
>>> input = ((i,j,k) for i,j,k in itertools.combinations_with_replacement(xrange(dim3), 3) if i < dim1 and j < dim2)
>>> import numpy as np
>>> results = np.empty(dim1*dim2*dim3)
>>> res = p.imap(fun, input)
>>> for i,r in enumerate(res):
... results[i] = r
...
>>> results.shape = (dim1,dim2,dim3)
Here is a version of code that runs fun(i, j, k) in parallel for differend k indices. This is done by running fun in different processes by using https://docs.python.org/2/library/multiprocessing.html
import numpy as np
from multiprocessing import Pool
def fun(x, y, z):
# time-consuming computation...
# ...
return output
def fun_wrapper(indices):
fun(*indices)
if __name__ == '__main__':
dim1 = 10
dim2 = 20
dim3 = 30
result = np.zeros([dim1, dim2, dim3])
pool = Pool(processes=8)
for i in xrange(dim1):
for j in xrange(dim2):
result[i, j] = pool.map(fun_wrapper, [(i, j, k) for k in xrange(dim3)])
This is not the most elegant solution but you may start with it. And you will get a speed up only if fun contains time-consuming computation
A simple approach could be to divide the array in sections and create some threads to operate throught these sections. For example one section from (0,0,0) to (5,10,15) and other one from (5,10,16) to (10,20,30).
You can use threading module and do something like this
import numpy as np
import threading as t
def fun(x, y, z):
# time-consuming computation...
# ...
return output
dim1 = 10
dim2 = 20
dim3 = 30
result = np.zeros([dim1, dim2, dim3])
#b - beginning index, e - end index
def work(ib,jb,kb,ie,je,ke):
for i in xrange(ib,ie):
for j in xrange(jb,je):
for k in xrange(kb,ke):
result[i, j, k] = fun(i, j, k)
threads = list()
threads.append(t.Thread(target=work, args(0,0,0,dim1/2,dim2/2,dim3/2))
threads.append(t.Thread(target=work, args(dim1/2,dim2/2,dim3/2 +1,dim1, dim2, dim3))
for thread in threads:
thread.start()
You can define these sections through some algorithm and determine the number of threads dynamically. Hope it helps you or at least give you some ideas.

Categories

Resources