Zero padding slice past end of array in numpy - python

In numpy, is there a way to zero pad entries if I'm slicing past the end of the array, such that I get something that is the size of the desired slice?
For example,
>>> x = np.ones((3,3,))
>>> x
array([[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.]])
>>> x[1:4, 1:4] # would behave as x[1:3, 1:3] by default
array([[ 1., 1., 0.],
[ 1., 1., 0.],
[ 0., 0., 0.]])
>>> x[-1:2, -1:2]
array([[ 0., 0., 0.],
[ 0., 1., 1.],
[ 0., 1., 1.]])
Visually, I'd like the out-of-bounds areas to be zero padded:
I'm dealing with images and would like to zero pad to signify moving off the image for my application.
My current plan is to use np.pad to make the entire array larger prior to slicing, but indexing seems to be a bit tricky. Is there a potentially easier way?

As far as I know there is no numpy solution (nor in any package I know) for such a problem. You could do it yourself but it will be a really, really complicated one even if you only want basic slicing. I would suggest you manually np.pad your array and simply offset your start/stop/step before you actually slice it.
However if all you need to support are integers and slices without step I have some "working code" for this:
import numpy as np
class FunArray(np.ndarray):
def __getitem__(self, item):
all_in_slices = []
pad = []
for dim in range(self.ndim):
# If the slice has no length then it's a single argument.
# If it's just an integer then we just return, this is
# needed for the representation to work properly
# If it's not then create a list containing None-slices
# for dim>=1 and continue down the loop
try:
len(item)
except TypeError:
if isinstance(item, int):
return super().__getitem__(item)
newitem = [slice(None)]*self.ndim
newitem[0] = item
item = newitem
# We're out of items, just append noop slices
if dim >= len(item):
all_in_slices.append(slice(0, self.shape[dim]))
pad.append((0, 0))
# We're dealing with an integer (no padding even if it's
# out of bounds)
if isinstance(item[dim], int):
all_in_slices.append(slice(item[dim], item[dim]+1))
pad.append((0, 0))
# Dealing with a slice, here it get's complicated, we need
# to correctly deal with None start/stop as well as with
# out-of-bound values and correct padding
elif isinstance(item[dim], slice):
# Placeholders for values
start, stop = 0, self.shape[dim]
this_pad = [0, 0]
if item[dim].start is None:
start = 0
else:
if item[dim].start < 0:
this_pad[0] = -item[dim].start
start = 0
else:
start = item[dim].start
if item[dim].stop is None:
stop = self.shape[dim]
else:
if item[dim].stop > self.shape[dim]:
this_pad[1] = item[dim].stop - self.shape[dim]
stop = self.shape[dim]
else:
stop = item[dim].stop
all_in_slices.append(slice(start, stop))
pad.append(tuple(this_pad))
# Let numpy deal with slicing
ret = super().__getitem__(tuple(all_in_slices))
# and padding
ret = np.pad(ret, tuple(pad), mode='constant', constant_values=0)
return ret
This can be used as follows:
>>> x = np.arange(9).reshape(3, 3)
>>> x = x.view(FunArray)
>>> x[0:2]
array([[0, 1, 2],
[3, 4, 5]])
>>> x[-3:2]
array([[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
[0, 1, 2],
[3, 4, 5]])
>>> x[-3:2, 2]
array([[0],
[0],
[0],
[2],
[5]])
>>> x[-1:4, -1:4]
array([[0, 0, 0, 0, 0],
[0, 0, 1, 2, 0],
[0, 3, 4, 5, 0],
[0, 6, 7, 8, 0],
[0, 0, 0, 0, 0]])
Note that this may be contain Bugs and "not cleanly coded" parts, I've never used this except in trivial cases.

This class can handle your first test (x[1:4, 1:4]) and can be modified to handle your other test (i.e. appending zeros to the start) if you so desire.
class CustomArray():
def __init__(self, numpy_array):
self._array = numpy_array
def __getitem__(self, val):
# Get the shape you wish to return
required_shape = []
for i in range(2):
start = val[i].start
if not start:
start = 0
required_shape.append(val[i].stop - start)
get = self._array[val]
# Check first dimension
while get.shape[0] < required_shape[0]:
get = np.concatenate((get, np.zeros((1, get.shape[1]))))
# Check second dimension
get = get.T
while get.shape[0] < required_shape[1]:
get = np.concatenate((get, np.zeros((1, get.shape[1]))))
get = get.T
return get
Here is an example of it's usage:
a = CustomArray(np.ones((3, 3)))
print(a[:2, :2])
[[ 1. 1.]
[ 1. 1.]]
print(a[:4, 1:6])
[[ 1. 1. 0. 0. 0.]
[ 1. 1. 0. 0. 0.]
[ 1. 1. 0. 0. 0.]
[ 0. 0. 0. 0. 0.]]
# The actual numpy array is stored in the _array attribute
actual_numpy_array = a._array

Is there a way? Yes. Is it complicated? Not especially.
import numpy as np
def fill_crop(img, pos, crop):
'''
Fills `crop` with values from `img` at `pos`,
while accounting for the crop being off the edge of `img`.
*Note:* negative values in `pos` are interpreted as-is, not as "from the end".
'''
img_shape, pos, crop_shape = np.array(img.shape), np.array(pos), np.array(crop.shape),
end = pos+crop_shape
# Calculate crop slice positions
crop_low = np.clip(0 - pos, a_min=0, a_max=crop_shape)
crop_high = crop_shape - np.clip(end-img_shape, a_min=0, a_max=crop_shape)
crop_slices = (slice(low, high) for low, high in zip(crop_low, crop_high))
# Calculate img slice positions
pos = np.clip(pos, a_min=0, a_max=img_shape)
end = np.clip(end, a_min=0, a_max=img_shape)
img_slices = (slice(low, high) for low, high in zip(pos, end))
crop[tuple(crop_slices)] = img[tuple(img_slices)]
Why use this?
If memory is a concern, then copying the image into a padded version might not be good. This also works well for higher dimensional inputs, and it's clear how to return indices/slices if you needed those.
Why is crop a parameter?
To indicate the padded value, we can instead create the memory for the crop ahead of time with np.zeros/np.full, then fill in the part that we need. The difficulty then isn't working out where to copy from, but instead, where to paste inside the crop.
Theory
Let's look at a 1D case:
If you think about it a little bit, you can see that:
crop_low is as far above 0 as pos is below 0, but if pos >= 0, then crop_low == 0
crop_high is as far below crop.shape as end is above img.shape, but if end <= img.shape, then crop_high == crop.shape
If we put this into normal python code, it would look like this:
crop_low = max(-pos, 0)
crop_high = crop.shape - max(end-img.shape, 0)
The rest of the code above is just for indexing.
Testing
# Examples in 1 dimension
img = np.arange(10, 20)
# Normal
pos = np.array([1,])
crop = np.full([5,], 0)
fill_crop(img, pos, crop)
assert crop.tolist() == [11, 12, 13, 14, 15]
# Off end
pos = np.array([8,])
crop = np.full([5,], 0)
fill_crop(img, pos, crop)
assert crop.tolist() == [18, 19, 0, 0, 0]
# Off start
pos = np.array([-2,])
crop = np.full([5,], 0)
fill_crop(img, pos, crop)
assert crop.tolist() == [ 0, 0, 10, 11, 12]
# Example in 2 dimensions (y,x)
img = np.arange(10, 10+10*10)\
.reshape([10, 10])
# Off Top right
pos = np.array([-2, 8])
crop = np.full([5, 5], 0)
fill_crop(img, pos, crop)
assert np.all(crop[:2] == 0) # That is, the top two rows are 0s
assert np.all(crop[:, 3:] == 0) # That is, the right 3 rows are 0s
assert np.all(crop[2:, :2] == img[:3, 8:])
# That is, the rows 2-5 and columns 0-1 in the crop
# are the same as the top 3 rows and columns 8 and 9 (the last two columns)
And there we have it. The over-engineered answer to the original question.

For the simplest case of rank 2 or 3 images, here is an example of how to implement zero-padded "slicing" with out-of-bounds indices:
def padded_slice(img, sl):
output_shape = np.asarray(img.shape)
output_shape[0] = sl[1] - sl[0]
output_shape[1] = sl[3] - sl[2]
src = [max(sl[0], 0),
min(sl[1], img.shape[0]),
max(sl[2], 0),
min(sl[3], img.shape[1])]
dst = [src[0] - sl[0], src[1] - sl[0],
src[2] - sl[2], src[3] - sl[2]]
output = np.zeros(output_shape, dtype=img.dtype)
output[dst[0]:dst[1], dst[2]:dst[3]] = img[src[0]:src[1], src[2]:src[3]]
return output
For example, call this function with padded_slice(img, [-10, 150, -10, 150]) on a 100x100 image and it will return a 160x160 zero-padded image.

In case of 1D array I did this, can be useful if someone fall here...
def getPaddedSlice(npArray, pos, lenSegment, center = False):
lenNpArray = len(npArray)
if center:
if lenSegment % 2 == 0:
startIndex = int(pos - math.floor(lenSegment / 2.0)) + 1
lastIndex = int(pos + math.ceil(lenSegment / 2.0)) + 1
else :
startIndex = int(pos - math.floor(lenSegment / 2.0))
lastIndex = int(pos + math.ceil(lenSegment / 2.0)) + 1
else:
startIndex = pos
lastIndex = startIndex + lenSegment
if startIndex < 0:
padded_slice = npArray[0: lastIndex]
padded_slice = np.concatenate((np.zeros(abs(startIndex)), padded_slice))
else:
if center :
padded_slice = npArray[startIndex: lastIndex]
else:
padded_slice = npArray[pos: lastIndex]
if lastIndex > len(npArray):
if center :
padded_slice = npArray[startIndex: pos + lenSegment]
padded_slice = np.concatenate((padded_slice, np.zeros(lastIndex - len(a))))
else :
padded_slice = npArray[pos: pos + lenSegment]
padded_slice = np.concatenate((padded_slice, np.zeros(lastIndex - len(a))))
return padded_slice
Usage
a = np.asarray([2,2,3,1,7,6,5,4])
for i in range(len(a)):
b = getPaddedSlice(a, i, lenSegment, True)
print b
Display
[0 2 2 3]
[2 2 3 1]
[2 3 1 7]
[3 1 7 6]
[1 7 6 5]
[7 6 5 4]
[6 5 4 0]
[5 4 0 0]

This problem has a ton of edge-cases. My solution for a fairly restricted problem-space: Regular slices (i.e. slice objects, no masks etc.), slice.start <= slice.stop and slice.step == 1 per dimension, ideally n-dimensional.
Prerequisites:
from typing import Optional, NamedTuple
import numpy as np
from numpy.typing import ArrayLike
class _Intv(NamedTuple):
length: int
buff: slice
data: slice
I want to slice from data into a buffer. A buff array shall be created filled with padding values before data is copied into it from a data array. For this, correct indices/slices must be computed first for each array for every dimension. The following helper does this with data.shape[n] == shape_n. Note that it only computes intervals and does not yet interact with the data:
def _intervals_1d(intv: slice, shape_n: int) -> _Intv:
assert intv.step in (None, 1) # works only for steps of length 1
if intv.start is None:
intv = slice(0, intv.stop)
if intv.stop is None:
intv = slice(intv.start, shape_n)
assert intv.start <= intv.stop # works only if slice's start <= stop
length = intv.stop - intv.start
if intv.start >= 0: # no padding at start
buff_start = 0
data_start = intv.start
else: # padding at start
buff_start = -intv.start
data_start = 0
if intv.stop <= shape_n: # no padding at stop
buff_stop = length
data_stop = intv.stop
else: # padding at stop
buff_stop = length - intv.stop + shape_n
data_stop = shape_n
return _Intv(length, slice(buff_start, buff_stop), slice(data_start, data_stop))
It can be applied to the 1D case:
def paddedslice_1d(data: ArrayLike, intv: slice, fill_value: Optional[float] = np.nan) -> ArrayLike:
assert data.ndim == 1
intv = _intervals_1d(intv, data.shape[0])
buff = np.full((intv.length,), fill_value = fill_value, dtype = data.dtype)
buff[intv.buff] = data[intv.data]
return buff
The following tests are passing:
data_1d = np.array([10, 11, 12, 13])
test = paddedslice_1d(data_1d, intv = slice(None), fill_value = -99)
assert np.all(test == data_1d)
test = paddedslice_1d(data_1d, intv = slice(1, 3), fill_value = -99)
assert np.all(test == np.array([11, 12]))
test = paddedslice_1d(data_1d, intv = slice(-2, 2), fill_value = -99)
assert np.all(test == np.array([-99, -99, 10, 11]))
test = paddedslice_1d(data_1d, intv = slice(2, 6), fill_value = -99)
assert np.all(test == np.array([12, 13, -99, -99]))
test = paddedslice_1d(data_1d, intv = slice(-2, 6), fill_value = -99)
assert np.all(test == np.array([-99, -99, 10, 11, 12, 13, -99, -99]))
Based on the initial helper function, the solution can be generalized to n dimensions:
def paddedslice_nd(data: ArrayLike, *intvs: slice, fill_value: Optional[float] = np.nan) -> ArrayLike:
assert data.ndim == len(intvs)
intvs = [_intervals_1d(intv, shape_n) for intv, shape_n in zip(intvs, data.shape)]
buff = np.full([intv.length for intv in intvs], fill_value = fill_value, dtype = data.dtype)
buff[tuple(intv.buff for intv in intvs)] = data[tuple(intv.data for intv in intvs)]
return buff
The following tests are passing:
data_2d = np.arange(11, 20).reshape(3, 3)
test = paddedslice_nd(data_2d, slice(None), slice(None), fill_value = -99)
assert np.all(test == data_2d)
test = paddedslice_nd(data_2d, slice(-1, None), slice(-1, None), fill_value = -99)
assert np.all(test == np.array([[-99, -99, -99, -99], [-99, 11, 12, 13], [-99, 14, 15, 16], [-99, 17, 18, 19]]))
test = paddedslice_nd(data_2d, slice(-1, 2), slice(-1, 2), fill_value = -99)
assert np.all(test == np.array([[-99, -99, -99], [-99, 11, 12], [-99, 14, 15]]))
More complicated edge-cases can be addressed relatively trivially by altering the helper function _intervals_1d, which is left as an exercise to the reader ;)

Related

Speed up multiplication of two dense tensors

I want to perform element wise multiplication between two tensors, where most of the elements are zero.
For two example tensors:
test1 = np.zeros((2, 3, 5, 6))
test1[0, 0, :, 2] = 4
test1[0, 1, [2, 4], 1] = 7
test1[0, 2, 2, :] = 2
test1[1, 0, 4, 1:3] = 5
test1[1, :, 0, 1] = 3
and,
test2 = np.zeros((5, 6, 4, 7))
test2[2, 2, 2, 4] = 4
test2[0, 1, :, 1] = 3
test2[4, 3, 2, :] = 6
test2[1, 0, 3, 1:3] = 1
test2[3, :, 0, 1] = 2
the calulation I need is:
result = test1[..., None, None] * test2[None, None, ...]
In the actual use case I am coding for, the tensors can have more dimensions and much longer lengths in some of the dimensions, so while the multiplication is reasonably quick, I would like to utilise the fact that most of the elements are zero.
My first thought was to make a sparse representation of each tensor.
coords1 = np.nonzero(test1)
shape1 = test1.shape
test1_squished = test1[coords1]
coords1 = np.array(coords1)
coords2 = np.nonzero(test2)
shape2 = test2.shape
test2_squished = test2[coords2]
coords2 = np.array(coords2)
Here there is enough information to perform the multiplication, by comparing the coordinates along the equal axes and multiplying if they are the same.
I have a function for adding a new axis,
def new_axis(coords, shape, axis):
new_coords = np.zeros((len(coords)+1, len(coords[0])))
new_index = np.delete(np.arange(0, len(coords)+1), axis)
new_coords[new_index] = coords
coords = new_coords
new_shape = np.zeros(len(new_coords), dtype=int)
new_shape[new_index] = shape
new_shape[axis] = 1
new_shape = np.array(new_shape)
return coords, new_shape
and for performing the multiplication,
def multiply(coords1, shape1, array1, coords2, shape2, array2): #all inputs should be numpy arrays
if np.array_equal( shape1, shape2 ):
index1 = np.nonzero( ( coords1.T[:, None, :] == coords2.T ).all(-1).any(-1) )[0]
index2 = np.nonzero( ( coords2.T[:, None, :] == coords1.T ).all(-1).any(-1) )[0]
array = array1[index1] * array2[index2]
coords = ( coords1.T[index] ).T
shape = shape1
else:
if len(shape1) == len(shape2):
equal_index = np.nonzero( ( shape1 == shape2 ) )[0]
not_equal_index = np.nonzero( ~( shape1 == shape2 ) )[0]
if np.logical_or( ( shape1[not_equal_index] == 1 ), ( shape2[not_equal_index] == 1 ) ).all():
#if where not equal, one of them = 1 -> can broadcast
# compare dimensions with same length, if equal then multiply corresponding elements
multiply_index1 = np.nonzero(
( coords1[equal_index].T[:, None, :] == coords2[equal_index].T ).all(-1).any(-1)
)[0]
# would like vecotrised version of below
array = []
coords = []
for index in multiply_index1:
multiply_index2 = np.nonzero( ( (coords2[equal_index]).T == (coords1[equal_index]).T[index] ).all(-1) )[0]
array.append( test_squished[index] * test2_squished[multiply_index2] )
temp = np.zeros((6, len(multiply_index2)))
temp[not_equal_index] = ((coords1[not_equal_index].T[index]).T + (coords2[not_equal_index].T[multiply_index2])).T
if len(multiply_index2)==1:
temp[equal_index] = coords1[equal_index].T[index].T[:, None]
else:
temp[equal_index] = np.repeat( coords1[equal_index].T[index].T[:, None], len(multiply_index2), axis=-1)
coords.append(temp)
array = np.concatenate(array)
coords = np.concatenate(coords, axis=-1)
shape = shape1
shape[np.where(shape==1)] = shape2[np.where(shape==1)]
else:
print("error")
else:
print("error")
return array, coords, shape
However the multiply function is very inefficient and so I lose any gain of going to the sparse representation.
Is there an elegant vectorised approach to the multiply function? Or is there a better solution than this sparse tensor idea?
Thanks in advance.

find all elements > 0 in a np.array with np.where

I have a Array with Numbers ranging from (-infinite to +infinite)
Code looks like that:
delta_up = np.where(delta > 0, delta, 0)
delta_down = np.where(delta < 0, delta, 0)
Problem: I also have nan's in the array and they need to stay as nan's. But they are beeing converted to 0
How to solve it?
my_array = np.array([1, 2, 3, 5, -1, -2, -3, None], dtype="float")
negative_idx = np.where(my_array<0) # np.nan values will be ignore
positive_idx = np.where(my_array>0) # np.nan values will be ignore
# getting subarray with values `array[indexes]`
negative_values = my_array[negative_idx]
positive_values = my_array[positive_idx]

How to efficiently resize a numpy array to a given shape, padding with zeros if necessary?

I want to create an array of a given shape based on another numpy array. The number of dimensions will be matching, but the sizes will differ from axis to axis. If the original size is too small, I want to pad it with zeros to fulfill the requirements. Example of expected behaviour to clarify:
embedding = np.array([
[1, 2, 3, 4],
[5, 6, 7, 8]
])
resize_with_outer_zeros(embedding, (4, 3)) = np.array([
[1, 2, 3],
[5, 6, 7],
[0, 0, 0],
[0, 0, 0]
])
I think I achieved the desired behaviour with the function below.
def resize_with_outer_zeros(embedding: np.ndarray, target_shape: Tuple[int, ...]) -> np.ndarray:
padding = tuple((0, max(0, target_size - size)) for target_size, size in zip(target_shape, embedding.shape))
target_slice = tuple(slice(0, target_size) for target_size in target_shape)
return np.pad(embedding, padding)[target_slice]
However, I have strong doubts about its efficiency and elegance, as it involves a lot of pure python tuple operations. Is there a better and more concise way to do it?
If you know that your array won't be bigger than some size (r, c), why not just:
def pad_with_zeros(A, r, c):
out = np.zeros((r, c))
r_, c_ = np.shape(A)
out[0:r_, 0:c_] = A
return out
If you want to support arbitrary dimensions (tensors) it gets a little uglier, but the principle remains the same:
def pad(A, shape):
out = np.zeros(shape)
out[tuple(slice(0, d) for d in np.shape(A))] = A
return out
And to support larger arrays (larger than what you would pad):
def pad(A, shape):
shape = np.max([np.shape(A), shape], axis=0)
out = np.zeros(shape)
out[tuple(slice(0, d) for d in np.shape(A))] = A
return out
I don't think you can do much better, but instead of using pad and then slicing, just do zeros at the right size and then an assignment - this cuts it to one list comprehension instead of two.
embedding = np.array([
[1, 2, 3, 4],
[5, 6, 7, 8]
])
z = np.zeros((4,3))
s = tuple([slice(None, min(za,ea)) for za,ea in zip(z.shape, embedding.shape)])
z[s] = embedding[s]
z
# array([[1., 2., 3.],
# [5., 6., 7.],
# [0., 0., 0.],
# [0., 0., 0.]])
I'd just use a zero-matrix and run a nested for-loop to set the values from the older array - the remaining places will automatically be padded with zeros.
import numpy as np
def resize_array(array, new_size):
Z = np.zeros(new_size)
for i in range(len(Z)):
for j in range(len(Z[i])):
try:
Z[i][j] = array[i][j]
except IndexError: # just in case array[i][j] doesn't exist in the new size and should be truncated
pass
return Z
embedding = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
print(resize_array(embedding, (4, 3)))

Finding similar sub-sequences in a time series?

I have thousands of time series (24 dimensional data -- 1 dimension for each hour of the day). Out of these time series, I'm interested in a particular sub-sequence or pattern that looks like this:
I'm interested in sub-sequences that resemble the overall shape of the highlighted section -- that is, a sub-sequence with a sharp negative slope, followed by a period of several hours where the slope is relatively flat before finally ending with a sharp positive slope. I know the sub-sequences I'm interested in won't match each other exactly and most likely will be shifted in time, scaled differently, have longer/shorter periods where the slope is relatively flat, etc. but I would like to find a way to detect them all.
To do this, I have developed a simple Heuristic (based on my definition of the highlighted section) to quickly find some of the sub-sequences of interest. However, I was wondering if there was a more elegant way (in Python) to search thousands of time series for the sub-sequence I'm interested in (while taking into account things mentioned above -- differences in time, scale, etc.)?
Edit: a year later I cannot believe how much I overcomplicated flatline and slope detection; stumbling on the same question, I realized it's as simple as
idxs = np.where(x[1:] - x[:-1] == 0)
idxs = [i for idx in idxs for i in (idx, idx + 1)]
First line is implemented efficiently via np.diff(x); further, to e.g. detect slope > 5, use np.diff(x) > 5. The second line is since differencing tosses out right endpoints (e.g. diff([5,6,6,6,7]) = [1,0,0,1] -> idxs=[1,2], excludes 3,.
Functions below should do; code written with intuitive variable & method names, and should be self-explanatory with some readovers. The code is efficient and scalable.
Functionalities:
Specify min & max flatline length
Specify min & max slopes for left & right tails
Specify min & max average slopes for left & right tails, over multiple intervals
Example:
import numpy as np
import matplotlib.pyplot as plt
# Toy data
t = np.array([[ 5, 3, 3, 5, 3, 3, 3, 3, 3, 5, 5, 3, 3, 0, 4,
1, 1, -1, -1, 1, 1, 1, 1, -1, 1, 1, -1, 0, 3, 3,
5, 5, 3, 3, 3, 3, 3, 5, 7, 3, 3, 5]]).T
plt.plot(t)
plt.show()
# Get flatline indices
indices = get_flatline_indices(t, min_len=4, max_len=5)
plt.plot(t)
for idx in indices:
plt.plot(idx, t[idx], marker='o', color='r')
plt.show()
# Filter by edge slopes
lims_left = (-10, -2)
lims_right = (2, 10)
averaging_intervals = [1, 2, 3]
indices_filtered = filter_by_tail_slopes(indices, t, lims_left, lims_right,
averaging_intervals)
plt.plot(t)
for idx in indices_filtered:
plt.plot(idx, t[idx], marker='o', color='r')
plt.show()
def get_flatline_indices(sequence, min_len=2, max_len=6):
indices=[]
elem_idx = 0
max_elem_idx = len(sequence) - min_len
while elem_idx < max_elem_idx:
current_elem = sequence[elem_idx]
next_elem = sequence[elem_idx+1]
flatline_len = 0
if current_elem == next_elem:
while current_elem == next_elem:
flatline_len += 1
next_elem = sequence[elem_idx + flatline_len]
if flatline_len >= min_len:
if flatline_len > max_len:
flatline_len = max_len
trim_start = elem_idx
trim_end = trim_start + flatline_len
indices_to_append = [index for index in range(trim_start, trim_end)]
indices += indices_to_append
elem_idx += flatline_len
flatline_len = 0
else:
elem_idx += 1
return indices if not all([(entry == []) for entry in indices]) else []
def filter_by_tail_slopes(indices, data, lims_left, lims_right, averaging_intervals=1):
indices_filtered = []
indices_temp, tails_temp = [], []
got_left, got_right = False, False
for idx in indices:
slopes_left, slopes_right = _get_slopes(data, idx, averaging_intervals)
for tail_left, slope_left in enumerate(slopes_left):
if _valid_slope(slope_left, lims_left):
if got_left:
indices_temp = [] # discard prev if twice in a row
tails_temp = []
indices_temp.append(idx)
tails_temp.append(tail_left + 1)
got_left = True
if got_left:
for edge_right, slope_right in enumerate(slopes_right):
if _valid_slope(slope_right, lims_right):
if got_right:
indices_temp.pop(-1)
tails_temp.pop(-1)
indices_temp.append(idx)
tails_temp.append(edge_right + 1)
got_right = True
if got_left and got_right:
left_append = indices_temp[0] - tails_temp[0]
right_append = indices_temp[1] + tails_temp[1]
indices_filtered.append(_fill_range(left_append, right_append))
indices_temp = []
tails_temp = []
got_left, got_right = False, False
return indices_filtered
def _get_slopes(data, idx, averaging_intervals):
if type(averaging_intervals) == int:
averaging_intervals = [averaging_intervals]
slopes_left, slopes_right = [], []
for interval in averaging_intervals:
slopes_left += [(data[idx] - data[idx-interval]) / interval]
slopes_right += [(data[idx+interval] - data[idx]) / interval]
return slopes_left, slopes_right
def _valid_slope(slope, lims):
min_slope, max_slope = lims
return (slope >= min_slope) and (slope <= max_slope)
def _fill_range(_min, _max):
return [i for i in range(_min, _max + 1)]

How to crop zero edges of a numpy array?

I have this ugly, un-pythonic beast:
def crop(dat, clp=True):
'''Crops zero-edges of an array and (optionally) clips it to [0,1].
Example:
>>> crop( np.array(
... [[0,0,0,0,0,0],
... [0,0,0,0,0,0],
... [0,1,0,2,9,0],
... [0,0,0,0,0,0],
... [0,7,4,1,0,0],
... [0,0,0,0,0,0]]
... ))
array([[1, 0, 1, 1],
[0, 0, 0, 0],
[1, 1, 1, 0]])
'''
if clp: np.clip( dat, 0, 1, out=dat )
while np.all( dat[0,:]==0 ):
dat = dat[1:,:]
while np.all( dat[:,0]==0 ):
dat = dat[:,1:]
while np.all( dat[-1,:]==0 ):
dat = dat[:-1,:]
while np.all( dat[:,-1]==0 ):
dat = dat[:,:-1]
return dat
# Below gets rid of zero-lines/columns in the middle
#+so not usable.
#dat = dat[~np.all(dat==0, axis=1)]
#dat = dat[:, ~np.all(dat == 0, axis=0)]
How do I tame it, and make it beautiful?
Try incorporating something like this:
# argwhere will give you the coordinates of every non-zero point
true_points = np.argwhere(dat)
# take the smallest points and use them as the top left of your crop
top_left = true_points.min(axis=0)
# take the largest points and use them as the bottom right of your crop
bottom_right = true_points.max(axis=0)
out = dat[top_left[0]:bottom_right[0]+1, # plus 1 because slice isn't
top_left[1]:bottom_right[1]+1] # inclusive
This could be expanded without reasonable difficulty for the general n-d case.
This should work in any number of dimensions. I believe it is also quite efficient because swapping axes and slicing create only views on the array, not copies (which rules out functions such as take() or compress() which one might be tempted to use) or any temporaries. However it is not significantly 'nicer' than your own solution.
def crop2(dat, clp=True):
if clp: np.clip( dat, 0, 1, out=dat )
for i in range(dat.ndim):
dat = np.swapaxes(dat, 0, i) # send i-th axis to front
while np.all( dat[0]==0 ):
dat = dat[1:]
while np.all( dat[-1]==0 ):
dat = dat[:-1]
dat = np.swapaxes(dat, 0, i) # send i-th axis to its original position
return dat
Definitely not the prettiest approach but wanted to try something else.
def _fill_gap(a):
"""
a = 1D array of `True`s and `False`s.
Fill the gap between first and last `True` with `True`s.
Doesn't do a copy of `a` but in this case it isn't really needed.
"""
a[slice(*a.nonzero()[0].take([0,-1]))] = True
return a
def crop3(d, clip=True):
dat = np.array(d)
if clip: np.clip(dat, 0, 1, out=dat)
dat = np.compress(_fill_gap(dat.any(axis=0)), dat, axis=1)
dat = np.compress(_fill_gap(dat.any(axis=1)), dat, axis=0)
return dat
But it works.
In [639]: crop3(np.array(
...: [[0,0,0,0,0,0],
...: [0,0,0,0,0,0],
...: [0,1,0,2,9,0],
...: [0,0,0,0,0,0],
...: [0,7,4,1,0,0],
...: [0,0,0,0,0,0]]))
Out[639]:
array([[1, 0, 1, 1],
[0, 0, 0, 0],
[1, 1, 1, 0]])
Another way of implementing this, which is faster for dense arrays, makes use of the argmax property:
def get_last_nz(vec):
"""Get last nonzero element position of a vector
:param vec: the vector
:type vec: iterable
"""
if not isinstance(vec, np.ndarray) or vec.dtype != 'bool':
vec = np.array(vec) > 0
return vec.size - 1 - np.argmax(vec[::-1])
def get_first_nz(vec):
"""Get the first nonzero element position of a vector
:param vec: the vector
:type vec: iterable
"""
if not isinstance(vec, np.ndarray) or vec.dtype != 'bool':
vec = np.array(vec) > 0
return np.argmax(vec)
def crop(array):
y_sum = array.sum(axis=1) > 0
x_sum = array.sum(axis=0) > 0
x_min = get_first_nz(x_sum)
x_max = get_last_nz(x_sum)
y_min = get_first_nz(y_sum)
y_max = get_last_nz(y_sum)
return array[y_min: y_max + 1, x_min: x_max + 1]

Categories

Resources