How to handle both float and array input in Python? - python

I'm trying to write a function that accepts either a float OR an array of floats, and handles both of them using the same lines of code. For example, I want to return the float itself if it's a float, and the sum of the array of floats if it's an array. Something like this
def func(a):
return np.sum(a)
and have both func(1.2) return 1.2, and func(np.array([1.2,1.3,1.4]) return 3.9.

The usual way to make sure the input is a NumPy array is to use np.asarray():
import numpy as np
def func(a):
a = np.asarray(a)
return np.sum(a)
func(1.2)
# 1.2
func([1.2, 3.4])
# 4.6
func(np.array([1.2, 3.4]))
# 4.6
or, if you want to get len() of your array, make sure it is at least 1-dimensional, use np.atleast_1d():
def func(a):
a = np.atleast_1d(a)
return a.shape[0]
func(1.2)
# 1
func([1.2, 3.4])
# 2
func(np.array([1.2, 3.4]))
# 2

This already works, where's the problem?
import numpy as np
def func(a):
return np.sum(a)
print(func(np.array([1.2,2.3,3.2])))
print(func(1.2))
Output:
6.7
1.2

You can use argument flattening:
def func(*args):
# code to handle args
return sum(args)
Now the following have the same behaviour:
>>> func(3)
3
>>> func(3, 4, 5)
12
>>> func(*[3, 4, 5])
12

You can check if the input is a float, and then put it in a list before processing the sum:
def func(a):
if isinstance(a, float):
a = [a]
return np.sum(a)

Related

How can I create a numba callable that has adjustable parameters?

I would like to create a numba-compiled python callable (a function that I can use in another Numba-compiled function) that has an internal array that I can adjust to influence the result of the function call. In pure python, this would correspond to a class with a __call__ method:
class Test:
def __init__(self, arr):
self.arr = arr
def __call__(self, idx):
res = 0
for i in idx:
res += self.arr[i]
return res
t = Test([0, 1, 2])
print(t([1, 2]))
t.arr = [1, 2, 3]
print(t([1, 2]))
which prints 3 and 5, respectively, so the result was different after I modified the internal array arr.
A literal translation to Numba using jitclass and numpy arrays looks like this
import numpy as np
import numba as nb
#nb.jitclass([('arr', nb.double[:])])
class Test:
def __init__(self, arr):
self.arr = arr.astype(np.double)
def __call__(self, idx):
res = 0
for i in idx:
res += self.arr[i]
return res
t = Test(np.arange(3))
print(t(np.array([1, 2])))
t.arr = np.arange(3) + 1
print(t(np.array([1, 2])))
Unfortunately, this fails with TypeError: 'Test' object is not callable, since Numba does not seem to support __call__, yet.
I then tried to solve the problem using closures
import numpy as np
import numba as nb
arr = np.arange(5)
#nb.jit
def call(idx):
res = 0
for i in idx:
res += arr[i]
return res
print(call(np.array([1, 2])))
arr += 1
print(call(np.array([1, 2])))
but this prints 3 twice, since closures copy the data in arr into an internal representation, which I then cannot (easily?) change from the outside.
I even tried to trick Numba, by using ctypes pointers on Numpy arrays I combination with numba.carray, but Numba still seems to copy the data, so I cannot manipulate it.
I understand that Numba wants to control the memory and avoid access to memory regions that might not be used anymore. However, I have a specific use case where I would like to avoid passing around the extra array arr and rather adjust the internal copy somehow. Is there any way to achieve this?
EDIT:
I tried the suggestion by Daniel in the comments to use a method different than __call__, but this also does not work. Here is what I thought might work:
#nb.jitclass([('arr', nb.double[:])])
class Test:
def __init__(self, arr):
self.arr = arr
def call(self, idx):
return self.arr[idx]
a = Test(np.arange(5).astype(np.double))
print(a.call(3))
a.arr += 1
print(a.call(3))
#nb.njit
def rhs(idx):
return a.call(idx)
rhs(3)
This prints 3 and 4, so the array arr can indeed be manipulated. However, using the instance a in a compiled method fails with a NotImplementedError, so I suspect this use case is not (yet) supported by Numba.
Divide the problem in two parts, a numba function and a pure python class:
import numpy as np
import numba
#numba.jit
def calc(arr, idx):
res = 0
for i in idx:
res += arr[i]
return res
class Test:
def __init__(self, arr):
self.arr = arr.astype(np.double)
def __call__(self, idx):
return calc(self.arr, idx)
t = Test(np.arange(3))
print(t(np.array([1, 2])))
t.arr = np.arange(3) + 1
print(t(np.array([1, 2])))
I believe you need #property before the methods of the class but this may not be the only issue
#nb.jitclass([('arr', nb.double[:])])
class Test:
def __init__(self, arr):
self.arr = arr
#property
def call(self, idx):
return self.arr[idx]
a = Test(np.arange(5).astype(np.double))
print(a.call(3))
a.arr += 1
print(a.call(3))
#nb.njit
def rhs(idx):
return a.call(idx)
rhs(3)
This effect is the result of nopython compilation. If your goal is to create such callable at any costs, even possibly without taking benefits from jit-compilation - object compilation mode is a simple solution for your problem. This may be acheived in your closure example code simply by providing forceobj=True parameter to #nb.jit decorator.
This code prints 3 and 5 respectively:
import numpy as np
import numba as nb
arr = np.arange(5)
#nb.jit(forceobj=True)
def call(idx):
res = 0
for i in idx:
res += arr[i]
return res
print(call(np.array([1, 2])))
arr += 1
print(call(np.array([1, 2])))

Is there a method in numpy that verifies if an array is contained in another array?

I want to verify if a numpy array is a continuous sequence in another array.
E.g.
a = np.array([1,2,3,4,5,6,7])
b = np.array([3,4,5])
c = np.array([2,3,4,6])
The expected result would be:
is_sequence_of(b, a) # should return True
is_sequence_of(c, a) # should return False
I want to know if there is a numpy method that does this.
Approach #1
We can use one with np.searchsorted -
def isin_seq(a,b):
# Look for the presence of b in a, while keeping the sequence
sidx = a.argsort()
idx = np.searchsorted(a,b,sorter=sidx)
idx[idx==len(a)] = 0
ssidx = sidx[idx]
return (np.diff(ssidx)==1).all() & (a[ssidx]==b).all()
Note that this assumes that the input arrays have no duplicates.
Sample runs -
In [42]: isin_seq(a,b) # search for the sequence b in a
Out[42]: True
In [43]: isin_seq(c,b) # search for the sequence b in c
Out[43]: False
Approach #2
Another with skimage.util.view_as_windows -
from skimage.util import view_as_windows
def isin_seq_v2(a,b):
return (view_as_windows(a,len(b))==b).all(1).any()
Approach #3
This could also be considered as a template-matching problem and hence, for int numbers, we can use OpenCV's built-in function for template-matching : cv2.matchTemplate (inspired by this post), like so -
import cv2
from cv2 import matchTemplate as cv2m
def isin_seq_v3(arr,seq):
S = cv2m(arr.astype('uint8'),seq.astype('uint8'),cv2.TM_SQDIFF)
return np.isclose(S,0).any()
Approach #4
Our methods could benefit with a short-circuiting based one. So, we will use one with numba for performance-efficiency, like so -
from numba import njit
#njit
def isin_seq_numba(a,b):
m = len(a)
n = len(b)
for i in range(m-n+1):
for j in range(n):
if a[i+j]!=b[j]:
break
if j==n-1:
return True
return False

Numba dictionary: signature in JIT() decorator

My function takes a list of numpy arrays and a dictionary (or a list of dictionaries) as input arguments and returns a list of values. The list of numpy arrays is long, and arrays may be of different shape. Though I can pass numpy arrays separately, for housekeeping purposes I really would like to form a tuple of numpy arrays and pass them as such into my function.
Without dictionary (which is specially formed according to numba >=0.43) the whole setup works fine - see the script below. Because the structure of input and output is of Tuple form, JIT requires signature - it cannot figure out the type of data structure without it. However no matter how I try to declare my dictionary 'd' into the JIT decorator, I cannot manage to get the script working.
Please help with ideas or a solution if one exists.
Many thanks
'''python:
import numpy as np
from numba import njit
from numba import types
from numba.typed import Dict
#njit( 'Tuple( (f8,f8) )(Tuple( (f8[:],f8[:]) ))' )
def somefunction(lst_arr):
arr1, arr2 = lst_arr
summ = 0
prod = 1
for i in arr2:
summ += i
for j in arr1:
prod *= j
result = (summ,prod)
return result
a = np.arange(5)+1.0
b = np.arange(5)+11.0
arg = (a,b)
print(a,b)
print(somefunction(arg))
# ~~ The Dict.empty() constructs a typed dictionary.
d = Dict.empty(
key_type=types.unicode_type,
value_type=types.float64,)
d['k1'] = 1.5
d['k2'] = 0.5
'''
I expect to pass 'd'-dictionary into 'somefunction' and use it inside with dict keys...Form example as follows: result = (summ * d['k1'], prod * d['k2'])
import numpy as np
from numba import njit
from numba import types
from numba.typed import Dict
#njit( 'Tuple( (f8,f8) )(Tuple( (f8[:],f8[:]) ), Dict)' )
def somefunction(lst_arr, mydict):
arr1, arr2 = lst_arr
summ = 0
prod = 1
for i in arr2:
summ += i
for j in arr1:
prod *= j
result = (summ*mydict['k1'],prod*mydict['k2'])
return result
# ~~ Input numpy arrays
a = np.arange(5)+1.0
b = np.arange(5)+11.0
arg = (a,b)
# ~~ Input dictionary for the function
d = Dict.empty(
key_type=types.unicode_type,
value_type=types.float64)
d['k1'] = 1.5
d['k2'] = 0.5
# ~~ Run function and print results
print(somefunction(arg, d))
I am using the version 0.45.1. You can simply pass the dictionary without having to declare the type in the dictionary:
d = Dict.empty(
key_type=types.unicode_type,
value_type=types.float64[:],
)
d['k1'] = np.arange(5) + 1.0
d['k2'] = np.arange(5) + 11.0
# Numba will infer the type on it's own.
#njit
def somefunction2(d):
prod = 1
# I am assuming you want sum of second array and product of second
result = (d['k2'].sum(), d['k1'].prod())
return result
print(somefunction(d))
# Output : (65.0, 120.0)
For reference, you check this example from the official documentation.
Update:
In your case you can simply let jit infer the types on it's own and it should work, the following code works for me:
import numpy as np
from numba import njit
from numba import types
from numba.typed import Dict
from numba.types import DictType
# Let jit infer the types on it's own
#njit
def somefunction(lst_arr, mydict):
arr1, arr2 = lst_arr
summ = 0
prod = 1
for i in arr2:
summ += i
for j in arr1:
prod *= j
result = (summ*mydict['k1'],prod*mydict['k2'])
return result
# ~~ Input numpy arrays
a = np.arange(5)+1.0
b = np.arange(10)+11.0 #<--------------- This is of different shape
arg = (a,b)
# ~~ Input dictionary for the function
d = Dict.empty(
key_type=types.unicode_type,
value_type=types.float64)
d['k1'] = 1.5
d['k2'] = 0.5
# This works now
print(somefunction(arg, d))
You can see the official documentation here:
Unless necessary, it is recommended to let Numba infer argument types by using the signature-less variant of #jit.
I tried various methods, but this is the only one that worked for the problem you specified.

How to vectorize a class instantiation to allow NumPy arrays as input?

I programmed class which looks something like this:
import numpy as np
class blank():
def __init__(self,a,b,c):
self.a=a
self.b=b
self.c=c
n=5
c=a/b*8
if (a>b):
y=c+a*b
else:
y=c-a*b
p = np.empty([1,1])
k = np.empty([1,1])
l = np.empty([1,1])
p[0]=b
k[0]=b*(c-1)
l[0]=p+k
for i in range(1, n, 1):
p=np.append(p,l[i-1])
k=np.append(k,(p[i]*(c+1)))
l=np.append(l,p[i]+k[i])
komp = np.zeros(shape=(n, 1))
for i in range(0, n):
pl_avg = (p[i] + l[i]) / 2
h=pl_avg*3
komp[i]=pl_avg*h/4
self.tot=komp+l
And when I call it like this:
from ex1 import blank
import numpy as np
res=blank(1,2,3)
print(res.tot)
everything works well.
BUT I want to call it like this:
res = blank(np.array([1,2,3]), np.array([3,4,5]), 3)
Is there an easy way to call it for each i element of this two arrays without editing class code?
You won't be able to instantiate a class with NumPy arrays as inputs without changing the class code. #PabloAlvarez and #NagaKiran already provided alternative: iterate with zip over arrays and instantiate class for each pair of elements. While this is pretty simple solution, it defeats the purpose of using NumPy with its efficient vectorized operations.
Here is how I suggest you to rewrite the code:
from typing import Union
import numpy as np
def total(a: Union[float, np.ndarray],
b: Union[float, np.ndarray],
n: int = 5) -> np.array:
"""Calculates what your self.tot was"""
bc = 8 * a
c = bc / b
vectorized_geometric_progression = np.vectorize(geometric_progression,
otypes=[np.ndarray])
l = np.stack(vectorized_geometric_progression(bc, c, n))
l = np.atleast_2d(l)
p = np.insert(l[:, :-1], 0, b, axis=1)
l = np.squeeze(l)
p = np.squeeze(p)
pl_avg = (p + l) / 2
komp = np.array([0.75 * pl_avg ** 2]).T
return komp + l
def geometric_progression(bc, c, n):
"""Calculates array l"""
return bc * np.logspace(start=0,
stop=n - 1,
num=n,
base=c + 2)
And you can call it both for sole numbers and NumPy arrays like that:
>>> print(total(1, 2))
[[2.6750000e+01 6.6750000e+01 3.0675000e+02 1.7467500e+03 1.0386750e+04]
[5.9600000e+02 6.3600000e+02 8.7600000e+02 2.3160000e+03 1.0956000e+04]
[2.1176000e+04 2.1216000e+04 2.1456000e+04 2.2896000e+04 3.1536000e+04]
[7.6205600e+05 7.6209600e+05 7.6233600e+05 7.6377600e+05 7.7241600e+05]
[2.7433736e+07 2.7433776e+07 2.7434016e+07 2.7435456e+07 2.7444096e+07]]
>>> print(total(3, 4))
[[1.71000000e+02 3.39000000e+02 1.68300000e+03 1.24350000e+04 9.84510000e+04]
[8.77200000e+03 8.94000000e+03 1.02840000e+04 2.10360000e+04 1.07052000e+05]
[5.59896000e+05 5.60064000e+05 5.61408000e+05 5.72160000e+05 6.58176000e+05]
[3.58318320e+07 3.58320000e+07 3.58333440e+07 3.58440960e+07 3.59301120e+07]
[2.29323574e+09 2.29323590e+09 2.29323725e+09 2.29324800e+09 2.29333402e+09]]
>>> print(total(np.array([1, 3]), np.array([2, 4])))
[[[2.67500000e+01 6.67500000e+01 3.06750000e+02 1.74675000e+03 1.03867500e+04]
[1.71000000e+02 3.39000000e+02 1.68300000e+03 1.24350000e+04 9.84510000e+04]]
[[5.96000000e+02 6.36000000e+02 8.76000000e+02 2.31600000e+03 1.09560000e+04]
[8.77200000e+03 8.94000000e+03 1.02840000e+04 2.10360000e+04 1.07052000e+05]]
[[2.11760000e+04 2.12160000e+04 2.14560000e+04 2.28960000e+04 3.15360000e+04]
[5.59896000e+05 5.60064000e+05 5.61408000e+05 5.72160000e+05 6.58176000e+05]]
[[7.62056000e+05 7.62096000e+05 7.62336000e+05 7.63776000e+05 7.72416000e+05]
[3.58318320e+07 3.58320000e+07 3.58333440e+07 3.58440960e+07 3.59301120e+07]]
[[2.74337360e+07 2.74337760e+07 2.74340160e+07 2.74354560e+07 2.74440960e+07]
[2.29323574e+09 2.29323590e+09 2.29323725e+09 2.29324800e+09 2.29333402e+09]]]
You can see that results are in compliance.
Explanation:
First of all I'd like to note that your calculation of p, k, and l doesn't have to be in the loop. Moreover, calculating k is unnecessary. If you see carefully, how elements of p and l are calculated, they are just geometric progressions (except the 1st element of p):
p = [b, b*c, b*c*(c+2), b*c*(c+2)**2, b*c*(c+2)**3, b*c*(c+2)**4, ...]
l = [b*c, b*c*(c+2), b*c*(c+2)**2, b*c*(c+2)**3, b*c*(c+2)**4, b*c*(c+2)**5, ...]
So, instead of that loop, you can use np.logspace. Unfortunately, np.logspace doesn't support base parameter as an array, so we have no other choice but to use np.vectorize which is just a loop under the hood...
Calculating of komp though is easily vectorized. You can see it in my example. No need for loops there.
Also, as I already noted in a comment, your class doesn't have to be a class, so I took a liberty of changing it to a function.
Next, note that input parameter c is overwritten, so I got rid of it. Variable y is never used. (Also, you could calculate it just as y = c + a * b * np.sign(a - b))
And finally, I'd like to remark that creating NumPy arrays with np.append is very inefficient (as it was pointed out by #kabanus), so you should always try to create them at once - no loops, no appending.
P.S.: I used np.atleast_2d and np.squeeze in my code and it could be unclear why I did it. They are necessary to avoid if-else clauses where we would check dimensions of array l. You can print intermediate results to see what is really going on there. Nothing difficult.
if it is just calling class with two different list elements, loop can satisfies well
res = [blank(i,j,3) for i,j in zip(np.array([1,2,3]),np.array([3,4,5]))]
You can see list of values for res variable
The only way I can think of iterating lists of arrays is by using a function on the main program for iteration and then do the operations you need to do inside the loop.
This solution works for each element of both arrays (note to use zip function for making the iteration in both lists if they have a small size as listed in this answer here):
for n,x in zip(np.array([1,2,3]),np.array([3,4,5])):
res=blank(n,x,3)
print(res.tot)
Hope it is what you need!

python: getting around division by zero

I have a big data set of floating point numbers. I iterate through them and evaluate np.log(x) for each of them.
I get
RuntimeWarning: divide by zero encountered in log
I would like to get around this and return 0 if this error occurs.
I am thinking of defining a new function:
def safe_ln(x):
#returns: ln(x) but replaces -inf with 0
l = np.log(x)
#if l = -inf:
l = 0
return l
Basically,I need a way of testing that the output is -inf but I don't know how to proceed.
Thank you for your help!
You are using a np function, so I can safely guess that you are working on a numpy array?
Then the most efficient way to do this is to use the where function instead of a for loop
myarray= np.random.randint(10,size=10)
result = np.where(myarray>0, np.log(myarray), 0)
otherwise you can simply use the log function and then patch the hole:
myarray= np.random.randint(10,size=10)
result = np.log(myarray)
result[result==-np.inf]=0
The np.log function return correctly -inf when used on a value of 0, so are you sure that you want to return a 0? if somewhere you have to revert to the original value, you are going to experience some problem, changing zeros into ones...
Since the log for x=0 is minus infinite, I'd simply check if the input value is zero and return whatever you want there:
def safe_ln(x):
if x <= 0:
return 0
return math.log(x)
EDIT: small edit: you should check for all values smaller than or equal to 0.
EDIT 2: np.log is of course a function to calculate on a numpy array, for single values you should use math.log. This is how the above function looks with numpy:
def safe_ln(x, minval=0.0000000001):
return np.log(x.clip(min=minval))
You can do this.
def safe_ln(x):
try:
l = np.log(x)
except ZeroDivisionError:
l = 0
return l
I like to use sys.float_info.min as follows:
>>> import numpy as np
>>> import sys
>>> arr = np.linspace(0.0, 1.0, 3)
>>> print(arr)
[0. 0.5 1. ]
>>> arr[arr < sys.float_info.min] = sys.float_info.min
>>> print(arr)
[2.22507386e-308 5.00000000e-001 1.00000000e+000]
>>> np.log10(arr)
array([-3.07652656e+02, -3.01029996e-01, 0.00000000e+00])
Other answers have also introduced small positive values, but I prefer to use the smallest possible value to make the approximation more accurate.
The answer given by Enrico is nice, but both solutions result in a warning:
RuntimeWarning: divide by zero encountered in log
As an alternative, we can still use the where function but only execute the main computation where it is appropriate:
# alternative implementation -- a bit more typing but avoids warnings.
loc = np.where(myarray>0)
result2 = np.zeros_like(myarray, dtype=float)
result2[loc] =np.log(myarray[loc])
# answer from Enrico...
myarray= np.random.randint(10,size=10)
result = np.where(myarray>0, np.log(myarray), 0)
# check it is giving right solution:
print(np.allclose(result, result2))
My use case was for division, but the principle is clearly the same:
x = np.random.randint(10, size=10)
divisor = np.ones(10,)
divisor[3] = 0 # make one divisor invalid
y = np.zeros_like(divisor, dtype=float)
loc = np.where(divisor>0) # (or !=0 if your data could have -ve values)
y[loc] = x[loc] / divisor[loc]
use exception handling:
In [27]: def safe_ln(x):
try:
return math.log(x)
except ValueError: # np.log(x) might raise some other error though
return float("-inf")
....:
In [28]: safe_ln(0)
Out[28]: -inf
In [29]: safe_ln(1)
Out[29]: 0.0
In [30]: safe_ln(-100)
Out[30]: -inf
you could do:
def safe_ln(x):
#returns: ln(x) but replaces -inf with 0
try:
l = np.log(x)
except RunTimeWarning:
l = 0
return l
For those looking for a np.log solution that intakes a np.ndarray and nudges up only zero values:
import sys
import numpy as np
def smarter_nextafter(x: np.ndarray) -> np.ndarray:
safe_x = np.where(x != 0, x, np.nextafter(x, 1))
return np.log(safe_x)
def clip_usage(x: np.ndarray, safe_min: float | None = None) -> np.ndarray:
# Inspiration: https://stackoverflow.com/a/13497931/
clipped_x = x.clip(min=safe_min or np.finfo(x.dtype).min)
return np.log(clipped_x)
def inplace_usage(x: np.ndarray, safe_min: float | None = None) -> np.ndarray:
# Inspiration: https://stackoverflow.com/a/62292638/
x[x == 0] = safe_min or np.finfo(x.dtype).min
return np.log(x)
Or if you don't mind nudging all values and like bad big-O runtimes:
def brute_nextafter(x: np.ndarray) -> np.ndarray:
# Just for reference, don't use this
while not x.all():
x = np.nextafter(x, 1)
return np.log(x)

Categories

Resources