Converting a population on a grid to coordinates, and vice versa - python

For an ecology project, I need to switch back and forth between two representations of a population on a square grid world:
Representation 1: Simply the grid (a 2d Numpy array), where the value in each cell corresponds to the number of individuals in this cell. For instance, with a 3x3 grid:
grid = np.array(
[[0, 1, 0],
[0, 3, 1],
[0, 0, 0]]
)
Representation 2: A 2d Numpy array with the x,y coordinates of each individual on the grid:
coords = np.array(
[[0, 1],
[1, 1],
[1, 1],
[1, 1],
[1, 2]]
)
As you can see, when a cell has more than 1 individual on it, its coordinates repeat. Therefore, coords has shape (population_size, 2).
The current implementations for grid_to_coords() and coords_to_grid() both involve for loops, as you can see below, which slow down the execution considerably:
def grid_to_coords(grid):
non_zero_pos = np.nonzero(grid)
pop_size = grid.sum(keepdims=False)
coords = np.zeros((int(pop_size), 2))
offset = 0
for i in range(len(non_zero_pos[0])):
n_in_pos = int(grid[non_zero_pos[0][i], non_zero_pos[1][i]])
for j in range(n_in_pos):
coords[i + j + offset] = [non_zero_pos[0][i], non_zero_pos[1][i]]
offset += j
return pos
def coords_to_grid(coords, grid_dim):
grid = np.zeros((grid_dim, grid_dim), dtype=np.int32)
for x, y in coords:
# Add a particle to the grid, making sure it is actually on the grid!
x = max(0, min(x, grid_dim - 1))
y = max(0, min(y, grid_dim - 1))
grid[x, y] += 1
return grid
I would need a way to vectorise these two functions. Could you please help?
Many thanks.

import numpy as np
grid = np.array(
[[0, 1, 0],
[0, 3, 1],
[0, 0, 0]]
)
coords = np.array(
[[0, 1],
[1, 1],
[1, 1],
[1, 1],
[1, 2]]
)
def grid_to_coords(grid):
"""
>>> grid_to_coords(grid)
array([[0, 1],
[1, 1],
[1, 1],
[1, 1],
[1, 2]])
"""
x, y = np.nonzero(grid) # x = [0 1 1]; y = [1 1 2]
# np.c_[x, y] = [[0 1]
# [1 1]
# [1 2]]
# grid[x, y] = [1 3 1]
return np.c_[x, y].repeat(grid[x, y], axis=0)
def coords_to_grid(coords, grid_dim):
"""
>>> coords_to_grid(coords, 3)
array([[0, 1, 0],
[0, 3, 1],
[0, 0, 0]])
"""
unique, counts = np.unique(coords, axis=0, return_counts=True)
# unique = [[0 1]
# [1 1]
# [1 2]]
# counts = [1 3 1]
ret = np.zeros((grid_dim, grid_dim), dtype=int)
ret[unique[:, 0], unique[:, 1]] = counts
return ret

Related

How to obtain bottom diagonals of a matrix according to an index?

I would like to know how it is possible to obtain the diagonals that point downwards (left and right) with respect to a specific index of the matrix.
To be more graphic I will give the following example of an expected output:
matrix = np.array([[2, 0, 0, 2],
[3, 9, 8, 3],
[3, 0, 0, 2],
[0, 0, 0, 0]])
Expected output results for position: matrix[1][2]
matrix[1][2]
8
diag_right = [2]
diag_left = [0, 0]
Same example but using the matrix in matrix[1][2]
matrix = np.array([[x, x, x, x],
[x, x, 8, x],
[x, 0, x, 2],
[0, x, x, x]])
An easy way using numpy would be:
(i've changed the matrix to make more clear some results in test_ij)
import numpy as np
def get_right_left_diags(matrix,i,j):
n = len(matrix)
left_diag = np.diag(matrix[i+1:n,j+1:n])
right_diag = np.diag( matrix[i+1:n, j-1::-1])
return right_diag,left_diag
%lets check some results
matrix = np.array([[2, 0, 0, 2],
[3, 9, 8, 3],
[3, 0, 0, 2],
[1, 2, 3, 4]])
n = len(matrix)
cases = [[0,0],[0,3],[1,1],[1,2],[2,1]]
for i,j in cases:
right_diag,left_diag = get_right_left_diags(matrix,i,j)
print(f"i={i}, j={j}, left_diag: {left_diag} \t right_diag: {right_diag}")
this will output:
#i=0, j=0, left_diag: [9 0 4] right_diag: [3 0 2]
#i=0, j=3, left_diag: [] right_diag: [8 0 1]
#i=1, j=1, left_diag: [0 4] right_diag: [3]
#i=1, j=2, left_diag: [2] right_diag: [0 1]
#i=2, j=1, left_diag: [3] right_diag: [1]
for me it has total sense.
matrix = np.array([[2, 0, 0, 2],
[3, 9, 8, 3],
[3, 0, 0, 2],
[0, 0, 0, 0]])
i = 1; j = 2
diag_right = []; diag_left = []
for k in range(1, len(matrix) - i):
if(j+k < len(matrix[0])):
diag_right.append(matrix[i+k][j+k])
if(j-k >= 0):
diag_left.append(matrix[i+k][j-k])
Is this what you're looking for?

Advanced boolean indexing

I wanna select values by mask and changes values by use mask-array.
Code:
import numpy as np
a = np.zeros((2, 2), dtype=(np.uint8, 3))
x = np.arange(4, dtype=int).reshape((2, 2))
mask = np.logical_and(a1 < 3, a1 > 0)
a[mask] = (1, x[mask], 2)
I want result:
a[mask]
>> [[1, 1, 2], [1, 2, 2]]
But i get error:
ValueError: setting an array element with a sequence.
If try do things like a[mask] = (1, 2, 2)
array will be
[[[0, 0, 0],
[1, 2, 2]],
[[1, 2, 2],
[0, 0, 0]]]
But i need use values from x.
To make it look like
[[[0, 0, 0],
[1, 1, 3]],
[[1, 2, 3],
[0, 0, 0]]]
How i can do it?
It can be done in two steps.
import numpy as np
a = np.zeros((2, 2), dtype=(np.uint8, 3))
x = np.arange(4, dtype=int).reshape((2, 2))
a1 = x # Create an a1 for the mask
mask = np.logical_and(a1 < 3, a1 > 0)
a[mask] = (1, 0, 2) # Set the outer columns
a[mask, 1] = x[mask] # Set the column 1
print( a )
# [[[0 0 0]
# [1 1 2]]
# [[1 2 2]
# [0 0 0]]]

how to convert n-hot vectors to multi-labels in tensorflow?

I have a multi-classification task, and I have gotten the n-hot type predictions like
n_hot_prediction = [[0, 1, 1],
[0, 1, 0],
[1, 0, 1]]
and another top_k array like
top_k_prediction = [[1, 2],
[0, 1],
[0, 1]]
Firstly, I wish to get a function which works like:
tf.function1(n_hot_prediction) #output: [[1, 2], [1], [0, 2]]
Secondly, I with to find another function which works like:
tf.function2(top_k_prediction) #output: [[0, 1, 1], [1, 1, 0], [1, 1, 0]]
Are there any functions or methods that works like tf.function1 and tf.function2?
Your second function is pretty simple to implement:
import tensorflow as tf
#tf.function
def multi_hot(x, depth=None):
x = tf.convert_to_tensor(x)
if depth is None:
depth = tf.math.reduce_max(x) + 1
r = tf.range(tf.dtypes.cast(depth, x.dtype))
eq = tf.equal(tf.expand_dims(x, axis=-1), r)
return tf.cast(tf.reduce_any(eq, axis=-2), x.dtype)
x = [[1, 2], [0, 1], [0, 1]]
tf.print(multi_hot(x))
# [[0 1 1]
# [1 1 0]
# [1 1 0]]
For the first one, the result is not a proper tensor, so you can make a ragged tensor instead, masking a tensor of sequential values:
import tensorflow as tf
#tf.function
def as_labels(x):
mask = tf.dtypes.cast(x, tf.bool)
s = tf.shape(mask)
r = tf.reshape(tf.range(s[-1]), tf.concat([tf.ones(tf.rank(x) - 1, tf.int32), [-1]], axis=0))
r = tf.tile(r, tf.concat([s[:-1], [1]], axis=0))
return tf.ragged.boolean_mask(r, mask)
x = [[0, 1, 1], [0, 1, 0], [1, 0, 1]]
print(as_labels(x).to_list())
# [[1, 2], [1], [0, 2]]

How do I extract a 2D NumPy sub-array from a 2D NumPy array based on patterns?

I have a 2D NumPy array which looks like this:
Array=
[
[0,0,0,0,0,0,0,2,2,2],
[0,0,0,0,0,0,0,2,2,2].
[0,0,1,1,1,0,0,2,2,2],
[0,0,1,1,1,0,0,2,2,2],
[0,0,1,1,1,0,0,1,1,1],
[0,0,0,0,0,0,0,1,1,1]
]
I need to display the arrays of non-zero elements as:
Array1:
[
[1,1,1],
[1,1,1],
[1,1,1]
]
Array2:
[
[2,2,2],
[2,2,2],
[2,2,2],
[2,2,2]
]
Array3:
[
[1,1,1],
[1,1,1]
]
Could someone please help me out with what logic I could use to achieve the following output? I can't use fixed indexes (like array[a:b, c:d]) since the logic i create should be able to work for any NumPy array with a similar pattern.
This uses scipy.ndimage.label to recursively identify disconnected sub-arrays.
import numpy as np
from scipy.ndimage import label
array = np.array(
[[0,0,0,0,0,0,0,2,2,2,3,3,3],
[0,0,0,0,0,0,0,2,2,2,0,0,1],
[0,0,1,1,1,0,0,2,2,2,0,2,1],
[0,0,1,1,1,0,0,2,2,2,0,2,0],
[0,0,1,1,1,0,0,1,1,1,0,0,0],
[0,0,0,0,0,0,0,1,1,1,0,0,0]])
# initialize list to collect sub-arrays
arr_list = []
def append_subarrays(arr, val, val_0):
'''
arr : 2D array
val : the value used for filtering
val_0 : the original value, which we want to preserve
'''
# remove everything that's not the current val
arr[arr != val] = 0
if 0 in arr: # <-- not a single rectangle yet
# get relevant indices as well as their minima and maxima
x_ind, y_ind = np.where(arr != 0)
min_x, max_x, min_y, max_y = min(x_ind), max(x_ind) + 1, min(y_ind), max(y_ind) + 1
# cut subarray (everything corresponding to val)
arr = arr[min_x:max_x, min_y:max_y]
# use the label function to assign different values to disconnected regions
labeled_arr = label(arr)[0]
# recursively apply append_subarrays to each disconnected region
for sub_val in np.unique(labeled_arr[labeled_arr != 0]):
append_subarrays(labeled_arr.copy(), sub_val, val_0)
else: # <-- we only have a single rectangle left ==> append
arr_list.append(arr * val_0)
for i in np.unique(array[array > 0]):
append_subarrays(array.copy(), i, i)
for arr in arr_list:
print(arr, end='\n'*2)
Output (note: modified example array):
[[1]
[1]]
[[1 1 1]
[1 1 1]
[1 1 1]]
[[1 1 1]
[1 1 1]]
[[2 2 2]
[2 2 2]
[2 2 2]
[2 2 2]]
[[2]
[2]]
[[3 3 3]]
This sounds like a floodfill problem, so skimage.measure.label is a good approach:
Array=np.array([[0,0,0,0,0,0,0,2,2,2],
[0,0,0,0,0,0,0,2,2,2],
[0,0,1,1,1,0,0,2,2,2],
[0,0,1,1,1,0,0,2,2,2],
[0,0,1,1,1,0,0,1,1,1],
[0,0,0,0,0,0,0,1,1,1]
])
from skimage.measure import label
labels = label(Array, connectivity=1)
for label in range(1, labels.max()+1):
xs, ys = np.where(labels==label)
shape = (len(np.unique(xs)), len(np.unique(ys)))
print(Array[xs, ys].reshape(shape))
Output:
[[2 2 2]
[2 2 2]
[2 2 2]
[2 2 2]]
[[1 1 1]
[1 1 1]
[1 1 1]]
[[1 1 1]
[1 1 1]]
startRowIndex = 0 #indexes of sub-arrays
endRowIndex = 0
startColumnIndex = 0
endColumnIndex = 0
tmpI = 0 #for iterating inside the i,j loops
tmpJ = 0
value = 0 #which number we are looking for in array
for i in range(array.shape[0]): #array.shape[0] says how many rows, shape[1] says how many columns
for j in range(array[i].size): #for all elements in a row
if(array[i,j] != 0): #if the element is different than 0
startRowIndex = i
startColumnIndex = j
tmpI = i
tmpJ = j #you cannot change the looping indexes so create tmp indexes
value = array[i,j] #save what number will be sub-array (for example 2)
while(array[tmpI,tmpJ] != 0 and array[tmpI,tmpJ] == value ): #iterate over column numbers
tmpJ+=1
if tmpJ == array.shape[1]: #if you reached end of the array (that is end of the column)
break
#if you left the array then it means you are on index which is not zero,
#so the previous where zero, but displaying array like this a[start:stop]
#will take the values from <start; stop) (stop is excluded)
endColumnIndex = tmpJ
tmpI = i
tmpJ = j
while(array[tmpI,tmpJ] != 0 and array[tmpI,tmpJ] == value): #iterate over row numbers
tmpI += 1
if tmpI == array.shape[0]: #if you reached end of the array
break
#if you left the array then it means you are on index which is not zero,
#so the previous where zero
endRowIndex = tmpI
print(array[startRowIndex:endRowIndex, startColumnIndex:endColumnIndex])
#change array to zero with already used elements
array[startRowIndex:endRowIndex, startColumnIndex:endColumnIndex] = 0
This one is kinda brute-force
but works the way you want it.
This approach doesn't use any external library other than numpy
Here's my pure Python (no NumPy) solution. I took advantage of the fact that the contiguous regions are always rectangular.
The algorithm scans from top-left to bottom-right; when it finds the corner of a region, it scans to find the top-right and bottom-left corners. The dictionary skip is populated so that later scans can skip horizontally past any rectangle which has already been found.
The time complexity is O(nm) for a grid with n rows and m columns, which is optimal for this problem.
def find_rectangles(grid):
width, height = len(grid[0]), len(grid)
skip = dict()
for y in range(height):
x = 0
while x < width:
if (x, y) in skip:
x = skip[x, y]
elif not grid[y][x]:
x += 1
else:
v = grid[y][x]
x2 = x + 1
while x2 < width and grid[y][x2] == v:
x2 += 1
y2 = y + 1
while y2 < height and grid[y2][x] == v:
skip[x, y2] = x2
y2 += 1
yield [ row[x:x2] for row in grid[y:y2] ]
x = x2
Example:
>>> for r in find_rectangles(grid1): # example from the question
... print(r)
...
[[2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2]]
[[1, 1, 1], [1, 1, 1], [1, 1, 1]]
[[1, 1, 1], [1, 1, 1]]
>>> for r in find_rectangles(grid2): # example from mcsoini's answer
... print(r)
...
[[2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2]]
[[3, 3, 3]]
[[1], [1]]
[[1, 1, 1], [1, 1, 1], [1, 1, 1]]
[[2], [2]]
[[1, 1, 1], [1, 1, 1]]
We can do this using scipy.ndimage.label and scipy.ndimage.find_objects:
from scipy.ndimage import label,find_objects
Array = np.array(Array)
[Array[j][i] for j in find_objects(*label(Array)) for i in find_objects(Array[j])]
# [array([[1, 1, 1],
# [1, 1, 1]]), array([[2, 2, 2],
# [2, 2, 2],
# [2, 2, 2],
# [2, 2, 2]]), array([[1, 1, 1],
# [1, 1, 1],
# [1, 1, 1]])]

How to "scale" a numpy array?

I would like to scale an array of shape (h, w) by a factor of n, resulting in an array of shape (h*n, w*n), with the.
Say that I have a 2x2 array:
array([[1, 1],
[0, 1]])
I would like to scale the array to become 4x4:
array([[1, 1, 1, 1],
[1, 1, 1, 1],
[0, 0, 1, 1],
[0, 0, 1, 1]])
That is, the value of each cell in the original array is copied into 4 corresponding cells in the resulting array. Assuming arbitrary array size and scaling factor, what's the most efficient way to do this?
You should use the Kronecker product, numpy.kron:
Computes the Kronecker product, a composite array made of blocks of the second array scaled by the first
import numpy as np
a = np.array([[1, 1],
[0, 1]])
n = 2
np.kron(a, np.ones((n,n)))
which gives what you want:
array([[1, 1, 1, 1],
[1, 1, 1, 1],
[0, 0, 1, 1],
[0, 0, 1, 1]])
You could use repeat:
In [6]: a.repeat(2,axis=0).repeat(2,axis=1)
Out[6]:
array([[1, 1, 1, 1],
[1, 1, 1, 1],
[0, 0, 1, 1],
[0, 0, 1, 1]])
I am not sure if there's a neat way to combine the two operations into one.
scipy.misc.imresize can scale images. It can be used to scale numpy arrays, too:
#!/usr/bin/env python
import numpy as np
import scipy.misc
def scale_array(x, new_size):
min_el = np.min(x)
max_el = np.max(x)
y = scipy.misc.imresize(x, new_size, mode='L', interp='nearest')
y = y / 255 * (max_el - min_el) + min_el
return y
x = np.array([[1, 1],
[0, 1]])
n = 2
new_size = n * np.array(x.shape)
y = scale_array(x, new_size)
print(y)
To scale effectively I use following approach. Works 5 times faster than repeat and 10 times faster that kron. First, initialise target array, to fill scaled array in-place. And predefine slices to win few cycles:
K = 2 # scale factor
a_x = numpy.zeros((h * K, w *K), dtype = a.dtype) # upscaled array
Y = a_x.shape[0]
X = a_x.shape[1]
myslices = []
for y in range(0, K) :
for x in range(0, K) :
s = slice(y,Y,K), slice(x,X,K)
myslices.append(s)
Now this function will do the scale:
def scale(A, B, slices): # fill A with B through slices
for s in slices: A[s] = B
Or the same thing simply in one function:
def scale(A, B, k): # fill A with B scaled by k
Y = A.shape[0]
X = A.shape[1]
for y in range(0, k):
for x in range(0, k):
A[y:Y:k, x:X:k] = B

Categories

Resources