Determine blocks in sorted numpy integer array - python

I have a sorted integer array, e.g., [0, 0, 1, 1, 1, 2, 4, 4], and I would like to determine where the integer blocks start and how long the blocks are. The block sizes are small but the array itself can be very large, so efficiency is important. The total number of blocks is also known.
numpy.unique does the trick:
import numpy
a = numpy.array([0, 0, 1, 1, 1, 2, 4, 4])
num_blocks = 4
print(a)
_, idx_start, count = numpy.unique(a, return_index=True, return_counts=True)
print(idx_start)
print(count)
[0 0 1 1 1 2 4 4]
[0 2 5 6]
[2 3 1 2]
but is slow. I would assume that, given the specific structure of the input array, there's a more efficient solution.
For example, something as simple as
import numpy
a = numpy.array([0, 0, 1, 1, 1, 2, 3, 3])
num_blocks = 4
k = 0
z = a[k]
block_idx = 0
counts = numpy.empty(num_blocks, dtype=int)
count = 0
while k < len(a):
if z == a[k]:
count += 1
else:
z = a[k]
counts[block_idx] = count
count = 1
block_idx += 1
k += 1
counts[block_idx] = count
print(counts)
gives the block sizes, and a simple numpy.cumsum would give index_start. Using a Python loop is slow of course.
Any hints?

Here's one with some masking and slicing -
def grp_start_len(a):
m = np.r_[True,a[:-1] != a[1:],True] #np.concatenate for a bit more boost
idx = np.flatnonzero(m)
return idx[:-1], np.diff(idx)
Sample run -
In [18]: a
Out[18]: array([0, 0, 1, 1, 1, 2, 4, 4])
In [19]: grp_start_len(a)
Out[19]: (array([0, 2, 5, 6]), array([2, 3, 1, 2]))
Timings (setup from #AGN Gazer's solution) -
In [24]: np.random.seed(0)
In [25]: a = np.sort(np.random.randint(1, 10000, 10000))
In [26]: %timeit _, idx_start, count = np.unique(a, return_index=True, return_counts=True)
1000 loops, best of 3: 411 µs per loop
# #AGN Gazer's solution
In [27]: %timeit st = np.where(np.ediff1d(a, a[-1] + 1, a[0] + 1))[0]; idx = st[:-1]; cnt = np.ediff1d(st)
10000 loops, best of 3: 81.2 µs per loop
In [28]: %timeit grp_start_len(a)
10000 loops, best of 3: 60.1 µs per loop
Bumping up the sizes 10x more -
In [40]: np.random.seed(0)
In [41]: a = np.sort(np.random.randint(1, 100000, 100000))
In [42]: %timeit _, idx_start, count = np.unique(a, return_index=True, return_counts=True)
...: %timeit st = np.where(np.ediff1d(a, a[-1] + 1, a[0] + 1))[0]; idx = st[:-1]; cnt = np.ediff1d(st)
...: %timeit grp_start_len(a)
100 loops, best of 3: 5.34 ms per loop
1000 loops, best of 3: 792 µs per loop
1000 loops, best of 3: 463 µs per loop

np.where(np.ediff1d(a, None, a[0]))[0]
If you want to have the first "0" as in your answer, add a non-zero number to a[0]:
np.where(np.ediff1d(a, None, a[0] + 1))[0]
EDIT (Block length):
Ah, just noticed that you also want to get block length. Then, modify the above code:
st = np.where(np.ediff1d(a, a[-1] + 1, a[0] + 1))[0]
idx = st[:-1]
cnt = np.ediff1d(st)
Then,
>>> print(idx)
[0 2 5 6]
>>> print(cnt)
[2 3 1 2]
EDIT 2 (Timing tests)
In [69]: a = np.sort(np.random.randint(1, 10000, 10000))
In [70]: %timeit _, idx_start, count = np.unique(a, return_index=True, return_counts=True)
240 µs ± 7.44 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
In [71]: %timeit st = np.where(np.ediff1d(a, a[-1] + 1, a[0] + 1))[0]; idx = st[:-1]; cnt = np.ediff1d(st)
74.3 µs ± 816 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)

Related

Max value per diagonal in 2d array

I have array and need max of rolling difference with dynamic window.
a = np.array([8, 18, 5,15,12])
print (a)
[ 8 18 5 15 12]
So first I create difference by itself:
b = a - a[:, None]
print (b)
[[ 0 10 -3 7 4]
[-10 0 -13 -3 -6]
[ 3 13 0 10 7]
[ -7 3 -10 0 -3]
[ -4 6 -7 3 0]]
Then replace upper triangle matrix to 0:
c = np.tril(b)
print (c)
[[ 0 0 0 0 0]
[-10 0 0 0 0]
[ 3 13 0 0 0]
[ -7 3 -10 0 0]
[ -4 6 -7 3 0]]
Last need max values per diagonal, so it means:
max([0,0,0,0,0]) = 0
max([-10,13,-10,3]) = 13
max([3,3,-7]) = 3
max([-7,6]) = 6
max([-4]) = -4
So expected output is:
[0, 13, 3, 6, -4]
What is some nice vectorized solution? Or is possible some another way for expected output?
Use ndarray.diagonal
v = [max(c.diagonal(-i)) for i in range(b.shape[0])]
print(v) # [0, 13, 3, 6, -4]
Not sure exactly how efficient this is considering the advanced indexing involved, but this is one way to do that:
import numpy as np
a = np.array([8, 18, 5, 15, 12])
b = a[:, None] - a
# Fill lower triangle with largest negative
b[np.tril_indices(len(a))] = np.iinfo(b.dtype).min # np.finfo for float
# Put diagonals as rows
s = b.strides[1]
diags = np.ndarray((len(a) - 1, len(a) - 1), b.dtype, b, offset=s, strides=(s, (len(a) + 1) * s))
# Get maximum from each row and add initial zero
c = np.r_[0, diags.max(1)]
print(c)
# [ 0 13 3 6 -4]
EDIT:
Another alternative, which may not be what you were looking for though, is just using Numba, for example like this:
import numpy as np
import numba as nb
def max_window_diffs_jdehesa(a):
a = np.asarray(a)
dtinf = np.iinfo(b.dtype) if np.issubdtype(b.dtype, np.integer) else np.finfo(b.dtype)
out = np.full_like(a, dtinf.min)
_pwise_diffs(a, out)
return out
#nb.njit(parallel=True)
def _pwise_diffs(a, out):
out[0] = 0
for w in nb.prange(1, len(a)):
for i in range(len(a) - w):
out[w] = max(a[i] - a[i + w], out[w])
a = np.array([8, 18, 5, 15, 12])
print(max_window_diffs(a))
# [ 0 13 3 6 -4]
Comparing these methods to the original:
import numpy as np
import numba as nb
def max_window_diffs_orig(a):
a = np.asarray(a)
b = a - a[:, None]
out = np.zeros(len(a), b.dtype)
out[-1] = b[-1, 0]
for i in range(1, len(a) - 1):
out[i] = np.diag(b, -i).max()
return out
def max_window_diffs_jdehesa_np(a):
a = np.asarray(a)
b = a[:, None] - a
dtinf = np.iinfo(b.dtype) if np.issubdtype(b.dtype, np.integer) else np.finfo(b.dtype)
b[np.tril_indices(len(a))] = dtinf.min
s = b.strides[1]
diags = np.ndarray((len(a) - 1, len(a) - 1), b.dtype, b, offset=s, strides=(s, (len(a) + 1) * s))
return np.concatenate([[0], diags.max(1)])
def max_window_diffs_jdehesa_nb(a):
a = np.asarray(a)
dtinf = np.iinfo(b.dtype) if np.issubdtype(b.dtype, np.integer) else np.finfo(b.dtype)
out = np.full_like(a, dtinf.min)
_pwise_diffs(a, out)
return out
#nb.njit(parallel=True)
def _pwise_diffs(a, out):
out[0] = 0
for w in nb.prange(1, len(a)):
for i in range(len(a) - w):
out[w] = max(a[i] - a[i + w], out[w])
np.random.seed(0)
a = np.random.randint(0, 100, size=100)
r = max_window_diffs_orig(a)
print((max_window_diffs_jdehesa_np(a) == r).all())
# True
print((max_window_diffs_jdehesa_nb(a) == r).all())
# True
%timeit max_window_diffs_orig(a)
# 348 µs ± 986 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
%timeit max_window_diffs_jdehesa_np(a)
# 91.7 µs ± 1.3 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
%timeit max_window_diffs_jdehesa_nb(a)
# 19.7 µs ± 88.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
np.random.seed(0)
a = np.random.randint(0, 100, size=10000)
%timeit max_window_diffs_orig(a)
# 651 ms ± 26 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit max_window_diffs_jdehesa_np(a)
# 1.61 s ± 6.19 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit max_window_diffs_jdehesa_nb(a)
# 22 ms ± 967 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
The first one may be a bit better for smaller arrays, but doesn't work well for bigger ones. Numba on the other hand is pretty good in all cases.
You can use numpy.diagonal:
a = np.array([8, 18, 5,15,12])
b = a - a[:, None]
c = np.tril(b)
for i in range(b.shape[0]):
print(max(c.diagonal(-i)))
Output:
0
13
3
6
-4
Here's a vectorized solution with strides -
from skimage.util import view_as_windows
n = len(a)
z = np.zeros(n-1,dtype=a.dtype)
p = np.concatenate((a,z))
s = view_as_windows(p,n)
mask = np.tri(n,k=-1,dtype=bool)[:,::-1]
v = s[0]-s
out = np.where(mask,v.min()-1,v).max(1)
With one-loop for memory-efficiency -
n = len(a)
out = [max(a[:-i+n]-a[i:]) for i in range(n)]
Use np.max in place of max for better use of array-memory.
You can abuse the fact that reshaping non-square arrays of shape (N+1, N) to (N, N+1) will make diagonals appear as columns
from scipy.linalg import toeplitz
a = toeplitz([1,2,3,4], [1,4,3])
# array([[1, 4, 3],
# [2, 1, 4],
# [3, 2, 1],
# [4, 3, 2]])
a.reshape(3, 4)
# array([[1, 4, 3, 2],
# [1, 4, 3, 2],
# [1, 4, 3, 2]])
Which you can then use like (note that I've swapped the sign and set the lower triangle to zero)
smallv = -10000 # replace this with np.nan if you have floats
a = np.array([8, 18, 5,15,12])
b = a[:, None] - a
b[np.tril_indices(len(b), -1)] = smallv
d = np.vstack((b, np.full(len(b), smallv)))
d.reshape(len(d) - 1, -1).max(0)[:-1]
# array([ 0, 13, 3, 6, -4])

Can motelling be vectorized in pandas?

"Motelling" is a way to smooth response to a signal.
For example: Given a time-varying signal St that takes integer values 1-5, and a response function Ft({S0...t}) that assigns [-1, 0, +1] to each signal, a standard motelling response function would return:
-1 if St = 1, or if (St = 2) & (Ft-1 = -1)
+1 if St = 5, or if (St = 4) & (Ft-1 = +1)
0 otherwise
If I have a DataFrame by time of the signal {S}, is there a vectorized way to apply this motelling function?
E.g., if DataFrame df['S'].values = [1, 2, 2, 2, 3, 5, 3, 4, 1]
then is there a vectorized approach that would produce:
df['F'].values = [-1, -1, -1, -1, 0, 1, 0, 0, -1]
Or, absent a vectorized solution, is there something obviously faster than the following DataFrame.itertuples() approach I am using now?
df = pd.DataFrame(np.random.random_integers(1,5,100000), columns=['S'])
# First set response for time t
df['F'] = np.where(df['S'] == 5, 1, np.where(df['S'] == 1, -1, 0))
# Now loop to apply motelling
previousF = 0
for row in df.itertuples():
df.at[row.Index, 'F'] = np.where((row.S >= 4) & (previousF == 1), 1,
np.where((row.S <= 2) & (previousF == -1), -1, row.F))
previousF = row.F
With a complex DataFrame the loop portion takes O(minute per million rows)!
You can try regex.
The patterns we are looking for are
(1) 1 follows by 1 or 2. (We select this rule because any 2 comes after 1 can be considered as 1 and keep influence the next row's result)
(2) 5 follows by 4 or 5. (Similarly any 4 comes after 5 can be considered as 5)
(1) will results in consecutive -1s and (2) will results in consecutive 1s. The rest that does not match will be 0.
Using these rules, the rest of work is to do replacement. We espeically use a method lambda m: "x"*len(m.group(0)) that can turn the matched results into the length of such matches. (see reference)
import re
s = [1, 2, 2, 2, 3, 5, 3, 4, 1]
str_s = "".join(str(i) for i in s)
s1 = re.sub("5[45]*", lambda m: "x"*len(m.group(0)),str_s)
s2 = re.sub("1[12]*", lambda m: "y"*len(m.group(0)),s1)
l = list(s2)
l2 = [v if v in ["x", "y"] else 0 for v in l]
l3 = [1 if v == 'x' else v for v in l2]
l4 = [-1 if v == 'y' else v for v in l3]
[-1, -1, -1, -1, 0, 1, 0, 0, -1]
Bigger dataset
def tai(s):
str_s = "".join(str(i) for i in s)
s1 = re.sub("5[45]*", lambda m: "x"*len(m.group(0)),str_s)
s2 = re.sub("1[12]*", lambda m: "y"*len(m.group(0)),s1)
l = list(s2)
l2 = [v if v in ["x", "y"] else 0 for v in l]
l3 = [1 if v == 'x' else v for v in l2]
l4 = [-1 if v == 'y' else v for v in l3]
return l4
s = np.random.randint(1,6,100000)
%timeit tai(s)
104 ms ± 6.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each
df = pd.DataFrame(np.random.randint(1,6,100000), columns=['S'])
# First set response for time t
df['F'] = np.where(df['S'] == 5, 1, np.where(df['S'] == 1, -1, 0))
# Now loop to apply motelling
%%timeit # (OP's answer)
previousF = 0
for row in df.itertuples():
df.at[row.Index, 'F'] = np.where((row.S >= 4) & (previousF == 1), 1,
np.where((row.S <= 2) & (previousF == -1), -1, row.F))
previousF = row.F
1.11 s ± 27.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Reference
Replace substrings in python with the length of each substring
You may notice that since the consecutive elements of F[t] depend on one another this doesn't vectorize well. I'm partial to using numba in this cases. Your function is simple, it works on a numpy array (series is just array under the hood) and it's not easy to vectorize -> numba is ideal for this.
Imports and function:
import numpy as np
import pandas as pd
def motel(S):
F = np.zeros_like(S)
for t in range(S.shape[0]):
if (S[t] == 1) or (S[t] == 2 and F[t-1] == -1):
F[t] = -1
elif (S[t] == 5) or (S[t] == 4 and F[t-1] == 1):
F[t] = 1
# no else required sinze it's already set to zero
return F
Here we can just jit-compile the function
import numba
jit_motel = numba.jit(nopython=True)(motel)
And ensure that the normal and jit versions return expected values
S = pd.Series([1, 2, 2, 2, 3, 5, 3, 4, 1])
print("motel(S) = ", motel(S))
print("jit_motel(S)", jit_motel(S.values))
result:
motel(S) = [-1 -1 -1 -1 0 1 0 0 -1]
jit_motel(S) [-1 -1 -1 -1 0 1 0 0 -1]
For timing, let's scale:
N = 10**4
S = pd.Series( np.random.randint(1, 5, N) )
%timeit jit_motel(S.values)
%timeit motel(S.values)
result:
82.7 µs ± 1.03 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
7.75 ms ± 77.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
For your million data points (didn't time normal function because I didn't wanna wait =) )
N = 10**6
S = pd.Series( np.random.randint(1, 5, N) )
%timeit motel(S.values)
result:
768 ms ± 7.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Boom! Less than a second for a million entries. This approach is simple, readable, and fast. Only downside is the Numba dependency, but it's included in anaconda and available in conda easily (maybe pip I'm not sure).
To aggregate the other answers, first I should note that apparently DataFrame.itertuples() does not iterate deterministically, or as expected, so the sample in the OP doesn't always produce the correct result on large samples.
Thanks to the other answers, I realized that a mechanical application of the motelling logic not only produces correct results, but does so surprisingly quickly when we use DataFrame.fill functions:
def dfmotel(df):
# We'll copy results into column F as we build them
df['F'] = np.nan
# This algo is destructive, so we operate on a copy of the signal
df['temp'] = df['S']
# Fill forward the negative signal
df.loc[df['temp'] == 2, 'temp'] = np.nan
df['temp'].ffill(inplace=True)
df.loc[df['temp'] == 1, 'F'] = -1
# Fill forward the positive signal
df.loc[df['temp'] == 4, 'temp'] = np.nan
df['temp'].ffill(inplace=True)
df.loc[df['temp'] == 5, 'F'] = 1
# All other signals are zero
df['F'].fillna(0, inplace=True)
For all timing tests we will operate on the same input:
df = pd.DataFrame(np.random.randint(1,5,1000000), columns=['S'])
For the DataFrame-based function above we get:
%timeit dfmotel(df.copy())
123 ms ± 2.07 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
This is quite acceptable performance.
tai was first to present this very clever solution using RegEx (which is what inspired my function above), but it can't match the speed of staying in number space:
import re
def tai(s):
str_s = "".join(str(i) for i in s)
s1 = re.sub("5[45]*", lambda m: "x"*len(m.group(0)),str_s)
s2 = re.sub("1[12]*", lambda m: "y"*len(m.group(0)),s1)
l = list(s2)
l2 = [v if v in ["x", "y"] else 0 for v in l]
l3 = [1 if v == 'x' else v for v in l2]
l4 = [-1 if v == 'y' else v for v in l3]
return l4
%timeit tai(df['S'].values)
899 ms ± 9.69 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
But nothing beats compiled code. Thanks to evamicur for this solution using the convenient numba in-line compiler:
import numba
def motel(S):
F = np.zeros_like(S)
for t in range(S.shape[0]):
if (S[t] == 1) or (S[t] == 2 and F[t-1] == -1):
F[t] = -1
elif (S[t] == 5) or (S[t] == 4 and F[t-1] == 1):
F[t] = 1
return F
jit_motel = numba.jit(nopython=True)(motel)
%timeit jit_motel(df['S'].values)
9.06 ms ± 502 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

Transform all elements positionally below 0 into 0 in a matrix (Python)

This is a matrix :
matrix = [[1, 1, 1, 0],
[0, 5, 0, 1],
[2, 1, 3, 10]]
I want to change all the element positionally below 0 into 0 (on the same column).
The resulting matrix will be :
matrix = [[1, 1, 1, 0],
[0, 5, 0, 0],
[0, 1, 0, 0]]
I tried this so far. The return is empty
import numpy as np
def transform(matrix):
newmatrix = np.asarray(matrix)
i = 0
j = 0
for j in range(0,len(matrix[0])-1):
while i < int(len(matrix))-1 and j < int(len(matrix[0]))-1:
if newmatrix[i][j] == 0:
np.put(newmatrix,newmatrix[i+1][j], 0 )
i +=1
return print (newmatrix)
Method 1 (Original)
import numpy as np
def transform(matrix):
mat = np.asarray(matrix)
mat[np.logical_not(np.not_equal(mat, 0).cumprod(axis=0))] = 0
# Alternatively:
# mat[~(mat != 0).cumprod(axis=0, dtype=np.bool)] = 0
# or,
# mat[~((mat != 0).cumprod(axis=0, dtype=np.bool))] = 0
return mat
Then with your sample data, I get the following mat:
In [195]: matrix = [[1, 1, 1, 0],
...: [0, 5, 0, 1],
...: [2, 1, 3, 10]]
In [196]: transform(matrix)
Out[196]:
array([[1, 1, 1, 0],
[0, 5, 0, 0],
[0, 1, 0, 0]])
Method 2 (further optimized)
def transform2(matrix):
mat = np.asarray(matrix)
mat *= (mat != 0).cumprod(axis=0, dtype=np.bool)
return mat
Method 3 (even more optimized)
def transform3(matrix):
mat = np.asarray(matrix)
mat *= mat.cumprod(axis=0, dtype=np.bool)
return mat
Explanation
Let's look at the main statement (in Method 1):
mat[np.logical_not(np.not_equal(mat, 0).cumprod(axis=0))] = 0
We can split it into several "elementary" operations:
Create a boolean mask containing False (numerically 0) where elements of mat are 0 and True (numerically 1) where they are non-zero:
mask1 = np.not_equal(mat, 0)
Using the fact that numerically False is 0, use cumprod() function (a good explanation can be found here: https://www.mathworks.com/help/matlab/ref/cumprod.html)
mask2 = mask1.cumprod(axis=0)
Since 1*1==1 and 0*0 or 0*1 is 0, all elements of this "mask" will be either 0 or 1. They will be 0 only in locations for which mask1 is zero and below (!) because of the "cumulative nature" of the product along the columns (hence axis=0).
Now, we want set those elements of mat that correspond to 0 in mask2 to 0. To do this we create a boolean mask that is True where mask2 is 0 and False elsewhere. This can be easily achieved by applying logical (or binary) NOT to mask2:
mask3 = np.logical_not(mask2)
Using "logical" NOT here creates a boolean array thus we avoid explicit type conversion.
Finally we use Boolean Indexing to select those elements of mat that need to be set to 0 and set them to 0:
mat[mask3] = 0
OPTIONAL OPTIMIZATION
If you think of it, we can get rid of steps 3 and 4 if we do the following:
mask2 = mask1.cumprod(axis=0, dtype=np.bool) #convert result to boolean type
mat *= mask2 # combined step 3&4
See "Method 2" section above for a complete implementation.
PERFORMANCE
There have been several additional answers that use numpy.ufunc.accumulate(). Fundamentally, all these methods revolve around the idea that 0 is a "special" value in the sense that 0*anything==0 or, in the case of #DSM's answer, that False=0<True=0 AND letting numpy perform "cumulative" operation on arrays.
There are some variations in performance but most are minimal except for my method #1 that is slower than other methods.
Here are some timing tests for more functions. NOTE: in order to perform tests correctly, we need to use large arrays. Small array tests will be measuring overhead, cashing, etc.
In [1]: import sys
...: import numpy as np
...:
In [2]: print(sys.version)
...:
3.6.2 |Continuum Analytics, Inc.| (default, Jul 20 2017, 13:14:59)
[GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600.0.57)]
In [3]: print(np.__version__)
...:
1.12.1
In [4]: # Method 1 (Original)
...: def transform1(matrix):
...: mat = np.asarray(matrix)
...: mat[np.logical_not(np.not_equal(mat, 0).cumprod(axis=0))] = 0
...: return mat
...:
In [5]: # Method 2:
...: def transform2(matrix):
...: mat = np.asarray(matrix)
...: mat *= (mat != 0).cumprod(axis=0, dtype=np.bool)
...: return mat
...:
In [6]: # #DSM method:
...: def transform_DSM(matrix):
...: mat = np.asarray(matrix)
...: mat *= np.minimum.accumulate(mat != 0)
...: return mat
...:
In [7]: # #DanielF method:
...: def transform_DanielF(matrix):
...: mat = np.asarray(matrix)
...: mat[~np.logical_and.accumulate(mat, axis = 0)] = 0
...: return mat
...:
In [8]: # Optimized #DanielF method:
...: def transform_DanielF_optimized(matrix):
...: mat = np.asarray(matrix)
...: mat *= np.logical_and.accumulate(mat, dtype=np.bool)
...: return mat
...:
In [9]: matrix = np.random.randint(0, 20000, (20000, 20000))
In [10]: %timeit -n1 transform1(matrix)
22.1 s ± 241 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
In [11]: %timeit -n1 transform2(matrix)
9.29 s ± 185 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
In [12]: %timeit -n1 transform3(matrix)
9.23 s ± 180 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
In [13]: %timeit -n1 transform_DSM(matrix)
9.24 s ± 195 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
In [14]: %timeit -n1 transform_DanielF(matrix)
10.3 s ± 219 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
In [15]: %timeit -n1 transform_DanielF_optimized(matrix)
9.27 s ± 187 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
My initial solution (Method 1) is slowest while other methods are much faster. #DanielF original method is somewhat slower due to use of boolean indexing (but the optimized variant is as fast as other optimized methods).
This is a simple (though not optimised) algorithm:
import numpy as np
from numba import jit
m = np.array([[1, 1, 1, 0],
[0, 5, 0, 1],
[2, 1, 3, 10]])
#jit(nopython=True)
def zeroer(m):
a, b = m.shape
for j in range(b):
for i in range(a):
if m[i, j] == 0:
m[i:, j] = 0
break
return m
zeroer(m)
# [[1 1 1 0]
# [0 5 0 0]
# [0 1 0 0]]
One variant of the cumprod approach is to use a cumulative minimum (or maximum). I prefer this slightly because you can use it to avoid any arithmetic operations beyond comparison, if you wanted, although it's hard to get worked up about it:
In [37]: m
Out[37]:
array([[ 1, 1, 1, 0],
[ 0, 5, 0, 1],
[ 2, 1, 3, 10]])
In [38]: m * np.minimum.accumulate(m != 0)
Out[38]:
array([[1, 1, 1, 0],
[0, 5, 0, 0],
[0, 1, 0, 0]])
In [39]: np.where(np.minimum.accumulate(m != 0), m, 0)
Out[39]:
array([[1, 1, 1, 0],
[0, 5, 0, 0],
[0, 1, 0, 0]])
A more optimized version of #AGNGazer solution using np.logical_and.accumulate and implicit boolean casting of integers (which doesn't require a lot of multiplication)
def transform(matrix):
mat = np.asarray(matrix)
mat[~np.logical_and.accumulate(mat, axis = 0)] = 0
return mat
transform(m)
Out:
array([[1, 1, 1, 0],
[0, 5, 0, 0],
[0, 1, 0, 0]])
Timings:
%timeit transform2(m) # AGN's solution
The slowest run took 44.73 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 9.93 µs per loop
%timeit transform(m)
The slowest run took 9.00 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 7.99 µs per loop
m = np.random.randint(0,5,(100,100))
%timeit transform(m)
The slowest run took 6.03 times longer than the fastest. This could mean that an intermediate result is being cached.
10000 loops, best of 3: 43.9 µs per loop
%timeit transform2(m)
The slowest run took 4.09 times longer than the fastest. This could mean that an intermediate result is being cached.
10000 loops, best of 3: 50.4 µs per loop
Looks to be about a 15% speedup.

put numpy array items into "bins" [duplicate]

This question already has answers here:
Grouping indices of unique elements in numpy
(6 answers)
Closed 5 years ago.
I have a numpy array with some integers, e.g.,
a = numpy.array([1, 6, 6, 4, 1, 1, 4])
I would now like to put all items into "bins" of equal values such that the bin with label 1 contains all indices of a that have the value 1. For the above example:
bins = {
1: [0, 4, 5],
6: [1, 2],
4: [3, 6],
}
A combination of unique and wheres does the trick,
uniques = numpy.unique(a)
bins = {u: numpy.where(a == u)[0] for u in uniques}
but this doesn't seem ideal since the number of unique entries may be large.
Defaultdict with append would do the trick:
from collections import defaultdict
d = defaultdict(list)
for ix, val in enumerate(a):
d[val].append(ix)
Here is one way by utilizing the broadcasting, np.where(), and np.split():
In [66]: unique = np.unique(a)
In [67]: rows, cols = np.where(unique[:, None] == a)
In [68]: indices = np.split(cols, np.where(np.diff(rows) != 0)[0] + 1)
In [69]: dict(zip(unique, indices))
Out[69]: {1: array([0, 4, 5]), 4: array([3, 6]), 6: array([1, 2])}
Here's one approach -
def groupby_uniqueness_dict(a):
sidx = a.argsort()
b = a[sidx]
cut_idx = np.flatnonzero(b[1:] != b[:-1])+1
parts = np.split(sidx, cut_idx)
out = dict(zip(b[np.r_[0,cut_idx]], parts))
return out
More efficient one by avoiding the use of np.split -
def groupby_uniqueness_dict_v2(a):
sidx = a.argsort() # use .tolist() for output dict values as lists
b = a[sidx]
cut_idx = np.flatnonzero(b[1:] != b[:-1])+1
idxs = np.r_[0,cut_idx, len(b)+1]
out = {b[i]:sidx[i:j] for i,j in zip(idxs[:-1], idxs[1:])}
return out
Sample run -
In [161]: a
Out[161]: array([1, 6, 6, 4, 1, 1, 4])
In [162]: groupby_uniqueness_dict(a)
Out[162]: {1: array([0, 4, 5]), 4: array([3, 6]), 6: array([1, 2])}
Runtime test
Other approach(es) -
from collections import defaultdict
def defaultdict_app(a): # #Grisha's soln
d = defaultdict(list)
for ix, val in enumerate(a):
d[val].append(ix)
return d
Timings -
Case #1 : Dict values as arrays
In [226]: a = np.random.randint(0,1000, 10000)
In [227]: %timeit defaultdict_app(a)
...: %timeit groupby_uniqueness_dict(a)
...: %timeit groupby_uniqueness_dict_v2(a)
100 loops, best of 3: 4.06 ms per loop
100 loops, best of 3: 3.06 ms per loop
100 loops, best of 3: 2.02 ms per loop
In [228]: a = np.random.randint(0,10000, 100000)
In [229]: %timeit defaultdict_app(a)
...: %timeit groupby_uniqueness_dict(a)
...: %timeit groupby_uniqueness_dict_v2(a)
10 loops, best of 3: 43.5 ms per loop
10 loops, best of 3: 29.1 ms per loop
100 loops, best of 3: 19.9 ms per loop
Case #2 : Dict values as lists
In [238]: a = np.random.randint(0,1000, 10000)
In [239]: %timeit defaultdict_app(a)
...: %timeit groupby_uniqueness_dict(a)
...: %timeit groupby_uniqueness_dict_v2(a)
100 loops, best of 3: 4.15 ms per loop
100 loops, best of 3: 4.5 ms per loop
100 loops, best of 3: 2.44 ms per loop
In [240]: a = np.random.randint(0,10000, 100000)
In [241]: %timeit defaultdict_app(a)
...: %timeit groupby_uniqueness_dict(a)
...: %timeit groupby_uniqueness_dict_v2(a)
10 loops, best of 3: 57.5 ms per loop
10 loops, best of 3: 54.6 ms per loop
10 loops, best of 3: 34 ms per loop

Vectorized assignment for numpy array with repeated indices (d[i,j,i,j] = s[i,j])

How can I set
d[i,j,i,j] = s[i,j]
using "NumPy" and without for loop?
I've tried the follow:
l1=range(M)
l2=range(N)
d[l1,l2,l1,l2] = s[l1,l2]
If you think about it, that would be same as creating a 2D array of shape (m*n, m*n) and assigning the values from s into the diagonal places. To have the final output as 4D, we just need a reshape at the end. That's basically being implemented below -
m,n = s.shape
d = np.zeros((m*n,m*n),dtype=s.dtype)
d.ravel()[::m*n+1] = s.ravel()
d.shape = (m,n,m,n)
Runtime test
Approaches -
# #MSeifert's solution
def assign_vals_ix(s):
d = np.zeros((m, n, m, n), dtype=s.dtype)
l1 = range(m)
l2 = range(n)
d[np.ix_(l1,l2)*2] = s[np.ix_(l1,l2)]
return d
# Proposed in this post
def assign_vals(s):
m,n = s.shape
d = np.zeros((m*n,m*n),dtype=s.dtype)
d.ravel()[::m*n+1] = s.ravel()
return d.reshape(m,n,m,n)
# Using a strides based approach
def assign_vals_strides(a):
m,n = a.shape
p,q = a.strides
d = np.zeros((m,n,m,n),dtype=a.dtype)
out_strides = (q*(n*m*n+n),(m*n+1)*q)
d_view = np.lib.stride_tricks.as_strided(d, (m,n), out_strides)
d_view[:] = a
return d
Timings -
In [285]: m,n = 10,10
...: s = np.random.rand(m,n)
...: d = np.zeros((m,n,m,n))
...:
In [286]: %timeit assign_vals_ix(s)
10000 loops, best of 3: 21.3 µs per loop
In [287]: %timeit assign_vals_strides(s)
100000 loops, best of 3: 9.37 µs per loop
In [288]: %timeit assign_vals(s)
100000 loops, best of 3: 4.13 µs per loop
In [289]: m,n = 20,20
...: s = np.random.rand(m,n)
...: d = np.zeros((m,n,m,n))
In [290]: %timeit assign_vals_ix(s)
10000 loops, best of 3: 60.2 µs per loop
In [291]: %timeit assign_vals_strides(s)
10000 loops, best of 3: 41.8 µs per loop
In [292]: %timeit assign_vals(s)
10000 loops, best of 3: 35.5 µs per loop
You can use integer array indexing (creating the broadcasted indices with np.ix_):
d[np.ix_(l1,l2)*2] = s[np.ix_(l1,l2)]
The first time the indices have to be duplicated (you want [i, j, i, j] instead of just [i, j]) that's why I multiplied the tuple returned by np.ix_ with 2.
For example:
>>> d = np.zeros((10, 10, 10, 10), dtype=int)
>>> s = np.arange(100).reshape(10, 10)
>>> l1 = range(3)
>>> l2 = range(5)
>>> d[np.ix_(l1,l2)*2] = s[np.ix_(l1,l2)]
And to make sure that the correct values were assigned:
>>> # Assert equality for the given condition
>>> for i in l1:
... for j in l2:
... assert d[i, j, i, j] == s[i, j]
>>> # Interactive tests
>>> d[0, 0, 0, 0], s[0, 0]
(0, 0)
>>> d[1, 2, 1, 2], s[1, 2]
(12, 12)
>>> d[2, 0, 2, 0], s[2, 0]
(20, 20)
>>> d[2, 4, 2, 4], s[2, 4]
(24, 24)

Categories

Resources