Compare value in a 2d array to nearby values - python

I'm looking for a way to compare each value in a 2D array to values surrounding it and returning which values are close to the value of interest (within a threshold).
The ways I've explored involve iterating through each element of a 2D array, but I feel this is not the fastest or optimal way to do it.
The input would be a 2D array (size: i x j), and the output would be two 3D arrays (k x i x j) where the "extra" dimension is there to store the i and j indices of the nearby elements that are within a threshold.
Some code to illustrate what I am doing at the moment:
import numpy as np
from tqdm import tqdm
np.random.seed(seed=10)
arr = np.random.random((100, 100)) # Some 2D input array
threshold = 0.5
# Arrays for the row and col indices
i_all, j_all = np.mgrid[0:arr.shape[0],
0:arr.shape[1]]
# Footprint around the current element (ie looking at the 8 elements around the central value). Must be odd.
footprint = (3, 3)
footprint_size = np.product(footprint)
# Prepare output for i and j indices
output_i = np.full((footprint_size, *arr.shape), np.nan)
output_j = np.full((footprint_size, *arr.shape), np.nan)
for p, element in enumerate(tqdm(arr.flatten())): # Iterate through each element
i, j = np.unravel_index(p, arr.shape)
# Create mask of elements to compare to
mask = ((i_all >= (i - (footprint[0] - 1) / 2)) &
(i_all <= (i + (footprint[0] - 1) / 2)) &
(j_all >= (j - (footprint[1] - 1) / 2)) &
(j_all <= (j + (footprint[1] - 1) / 2)))
# Create mask of those within the threshold
close_mask = abs(arr[mask] - element) <= threshold
if np.nansum(close_mask) < np.product(footprint): # If at edges need to pad to be able to index into output arrays
output_i[:, i, j] = np.pad(i_all[mask][close_mask].flatten().astype(float),
(int(footprint_size - np.nansum(close_mask)), 0),
mode='constant', constant_values=np.nan)
output_j[:, i, j] = np.pad(j_all[mask][close_mask].flatten().astype(float),
(int(footprint_size - np.nansum(close_mask)), 0),
mode='constant', constant_values=np.nan)
else: # Don't need to pad here
output_i[:, i, j] = i_all[mask][close_mask]
output_j[:, i, j] = j_all[mask][close_mask]
# Output: two 3D arrays of indices corresponding to elements within the threshold of the element of interest for rows and cols
Which works fine for small arrays but is very slow when arrays have ~10^6 elements. The other idea I had was sliding the array over itself to compare values. This might be faster but I'm curious if there are any other ideas or built-in functions that can do a similar thing.

I do not know where, but I am prette sure your method has some bug. When you look at results, last (100x100) subarrays have all indices present.
What I wrote gives results that look better, is ~1000x faster, but still requires some testing from you. I might have made some error.
def faster_method(arr, threshold, footprint):
temp_arr = np.full((arr.shape[0] + footprint[0] - 1, arr.shape[1] + footprint[1] - 1), np.nan)
temp_arr[footprint[0] // 2: footprint[0] // 2 + arr.shape[0],
footprint[1] // 2: footprint[1] // 2 + arr.shape[1]] = arr
temp_i_all, temp_j_all = np.mgrid[-(footprint[0] // 2): arr.shape[0] + footprint[0] // 2,
-(footprint[1] // 2): arr.shape[1] + footprint[1] // 2]
footprint_size = np.product(footprint)
output_i = np.full((footprint_size, *arr.shape), np.nan)
output_j = np.full((footprint_size, *arr.shape), np.nan)
output_idx = 0
for neighbour_vertical_position in range(footprint[0]):
for neighbour_horizontal_position in range(footprint[0]):
if neighbour_vertical_position == footprint[0] // 2 and neighbour_horizontal_position == footprint[1] // 2:
# center point, not a neighbour, so we can keep np.nan for it everywhere
continue
current_neighbour = temp_arr[neighbour_horizontal_position: neighbour_horizontal_position + arr.shape[0],
neighbour_vertical_position: neighbour_vertical_position + arr.shape[1]]
current_i_all = temp_i_all[neighbour_horizontal_position: neighbour_horizontal_position + arr.shape[0],
neighbour_vertical_position: neighbour_vertical_position + arr.shape[1]]
current_j_all = temp_j_all[neighbour_horizontal_position: neighbour_horizontal_position + arr.shape[0],
neighbour_vertical_position: neighbour_vertical_position + arr.shape[1]]
is_close_array = np.abs(arr - current_neighbour) > threshold
output_i[output_idx] = current_i_all + 0 / is_close_array
output_j[output_idx] = current_j_all + 0 / is_close_array
return output_i, output_j

Using dankal444's answer I managed to get this working:
def slidingCompare(arr, footprint=(3, 3), threshold=0.5):
"""
arr: 2D array | input
footprint: tuple | search window dimensions (must be odd)
threshold: float | Threshold for neighbours to be close
"""
import numpy as np
assert footprint[0] % 2 == 1, "Footprint dimensions should be odd. "
assert footprint[0] % 2 == 1, "Footprint dimensions should be odd. "
temp_arr = np.full((arr.shape[0] + footprint[0] - 1,
arr.shape[1] + footprint[1] - 1), np.nan)
temp_arr[footprint[0] // 2:footprint[0] // 2 + arr.shape[0],
footprint[1] // 2:footprint[1] // 2 + arr.shape[1]] = arr
# Arrays for the row and col indices
i_all, j_all = np.mgrid[-(footprint[0] // 2):arr.shape[0]+(footprint[0] // 2),
-(footprint[1] // 2):arr.shape[1]+(footprint[1] // 2)]
# Footprint around the current element (ie looking at the 8 elements around the central value). Must be odd.
footprint_size = np.product(footprint)
# Prepare output for i and j indices
output_i = np.full((footprint_size, *arr.shape), np.nan)
output_j = np.full((footprint_size, *arr.shape), np.nan)
output_ix = np.arange(footprint_size).reshape(footprint)
for vert_pos in np.arange(footprint[0]):
for horiz_pos in np.arange(footprint[1]):
neighbour = temp_arr[vert_pos: vert_pos + arr.shape[0],
horiz_pos: horiz_pos + arr.shape[1]]
close_mask = abs(arr - neighbour) <= threshold
output_i[output_ix[vert_pos, horiz_pos], close_mask] = i_all[vert_pos: vert_pos + arr.shape[0],
horiz_pos: horiz_pos + arr.shape[1]][close_mask]
output_j[output_ix[vert_pos, horiz_pos], close_mask] = j_all[vert_pos: vert_pos + arr.shape[0],
horiz_pos: horiz_pos + arr.shape[1]][close_mask]
# Output: two 3D arrays of indices corresponding to elements within the threshold of the element of interest for rows and cols
return output_i, output_j

Related

Matrix determinant

I have this function to get determinant of matrix
def determinant(self) -> int:
"""
Calculates the Determinant of matrix objects.
Parameters
----------
self
Returns
-------
int
Example
-------
>>> _matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
>>> _matrix = Matrix(_matrix)
>>> _matrix.determinant()
0
"""
if self.row != self.column:
raise ValueError('Cannot get determinant of this matrix! Must be a square Matrix')
else:
def det(matrix):
row = len(matrix)
col = len(matrix[0])
if (row, col) == (1, 1):
return matrix[0][0]
# hard coding for 2x2
elif (row, col) == (2, 2):
return matrix[0][0] * matrix[1][1] - matrix[0][1] * matrix[1][0]
# using sarrus method to solve for 3x3, it's a little faster.
elif (row, col) == (3, 3):
matrix1 = matrix[:]
# Extending matrix to use Sarrus Rule.
for i in range(row - 1):
_col = []
for j in range(col):
_col.append(matrix1[i][j])
matrix1.append(_col)
# Calculating Determinant
# Adding part
add_pointers = [(i, i) for i in range(row)]
result = 0
for pointer in range(row):
temp = 1
for tup in add_pointers:
i, j = tup
temp *= matrix1[i + pointer][j]
result += temp
# Subtracting part
sub_pointers = [((row - 1) - i, 0 + i) for i in range(row)]
for pointers in range(row):
temp = 1
for tup in sub_pointers:
i, j = tup
temp *= matrix1[i + pointers][j]
result -= temp
return result
else:
sign = -1
result = 0
row1 = [matrix[0][i] * (sign ** i) for i in range(col)]
for x, y in enumerate(row1):
mat = matrix[:][1:]
sub_matrix = [[mat[i][j] for j in range(col) if j != x] for i in range(row - 1)]
result += y * det(sub_matrix)
return result
return det(self.matrix)
i have hard-coded determinant of 2x2 and 3x3 matrix, then im recusing through the rest
as u can see its using recursion of nxn matrix(s)... i'm sure there is a faster way, this is extremely slow
A python implementation of the method would be recommended, thank you
The most common best ways would be either list comprehension or the numpy module.
Reason: The for loops will almost certainly be slower than a numpy array simply because of the contiguous and homogeneous nature of a numpy array. In simple terms numpy is basically one memory block all of the same type, where as a list points to different memory blocks and can contain any type.
Here is the numpy example (for 2d):
import numpy as np
a = np.array([[1, 2], [3, 4]])
result = np.linalg.det(a)
print(result)
One of the comments already (correctly) points to this:
https://numpy.org/doc/stable/reference/generated/numpy.linalg.det.html
For more general larger m*n matricies, the advantages would be significant.
Find determinant for 3x3 matrix using the first row:
"""
M:
M11 M12 M13
M21 M22 M23
M31 M32 M33
detM:
M11 * det2D([ [M22, M23], [M32, M33] ]) -
M12 * det2D([ [M21, M23], [M31, M33] ]) +
M13 * det2D([ [M21, M22], [M31, M32] ])
"""
import numpy as np
def det3D(M):
a = M[0][0] * det2D(np.array([ [ M[1][1],M[1][2] ], [ M[2][1],M[2][2] ] ]))
b = M[0][1] * det2D(np.array([ [ M[1][0],M[1][2] ], [ M[2][0],M[2][2] ] ]))
c = M[0][2] * det2D(np.array([ [ M[1][0],M[1][1] ], [ M[2][0],M[2][1] ] ]))
return a - b + c
def det2D(M):
return M[0][0]*M[1,1] - M[0][1] * M[1][0]
M = [ [1,0,0], [0,2,2], [0,2,4] ]
A = det3D(M)
B = round(np.linalg.det(M))
print(A)
print(B)
print(A == B)
Output:
4
4
True
Find determinant of NxN Matrix using recursion:
Note: there are two methods for finding determinants, smartDetNxN run >35X faster than detNxN in the best case on a large matrix.
import numpy as np
# compute partial determinant terms
def terms(M, col = 1, row = 1):
return [x[:col-1] + x[col:] for x in M[0:row-1] + M[row:]]
# compute determinant using first row
def detNxN(M):
N = len(M[0])
# Recursion Base: 2x2 determenant
if (N == 2):
M = np.array(M)
return M[0][0] * M[1,1] - M[0][1] * M[1][0]
# Recursion Loop
else:
rowValues = M[:1][0]
colsSigns = [1 if (col % 2 == 0) else -1 for col in range(N)]
colsDets = [detNxN(terms(M, col + 1)) for col in range(N)]
return sum([rowValues[col] * colsSigns[col] * colsDets[col] for col in range(N)])
# compute determinant using optimum row while skipping zero value columns
def smartDetNxN(M):
N = len(M[0])
# Recursion Base: 2x2 determenant
if (N == 2):
M = np.array(M)
return M[0][0] * M[1,1] - M[0][1] * M[1][0]
# Recursion Loop
else:
# find optimun row
flatM = [len(np.flatnonzero(x)) for x in M]
row = flatM.index(min(flatM))
rowSign = 1 if (row % 2 == 0) else -1
rowValues = M[row]
# compute partial determinants
colsSigns = [1 if (col % 2 == 0) else -1 for col in range(N)]
colsDets = [smartDetNxN(terms(M, col + 1, row + 1)) if (rowValues[col] != 0) else 0 for col in range(N)]
return sum([rowValues[col] * rowSign * colsSigns[col] * colsDets[col] for col in range(N)])
# test case for matrix
def testCase(M):
print()
N1 = len(M[0])
N2 = len(M[0])
A = smartDetNxN(M)
B = round(np.linalg.det(M))
print("Matrix %ix%i:" % (N1, N2))
print("Actual detM = %d, Expected detM = %d " % (A, B))
print("Test Pass:", A == B)
# main
def main():
# Matrix 2 x 2
M1 = [[1,2,],[0,1]]
testCase(M1)
# Matrix 3 x 3
M2 = [[1,2,3],[2,1,2],[3,2,1]]
testCase(M2)
# Matrix 4 x 4
M3 = [[1,2,3,4], [2,1,0,3], [3,0,1,2], [4,0,0,1]]
testCase(M3)
# Matrix 10 x 10
M4 = [
[0,1,2,3,4,5,6,7,8,9],
[1,1,0,0,0,0,0,0,0,8],
[2,0,1,0,0,0,0,0,0,7],
[3,0,0,1,0,0,0,0,0,6],
[4,0,0,0,1,0,0,0,0,5],
[5,0,0,0,0,1,0,0,0,4],
[6,0,0,0,0,0,1,0,0,3],
[7,0,0,0,0,0,0,1,0,2],
[8,0,0,0,0,0,0,0,1,1],
[9,0,0,0,0,0,0,0,0,0],
]
testCase(M4)
main()
Output:
Matrix 2x2:
Actual detM = 1, Expected detM = 1
Test Pass: True
Matrix 3x3:
Actual detM = 8, Expected detM = 8
Test Pass: True
Matrix 4x4:
Actual detM = 20, Expected detM = 20
Test Pass: True
Matrix 10x10:
Actual detM = 999, Expected detM = 999
Test Pass: True

Projection of matrix onto a simplex

I have problem with understanding this piece of code which based on the output, I guess it computes the eigenvector of the matrix.
def simplexProj(y):
"""
Given y, computes its projection x* onto the simplex
Delta = { x | x >= 0 and sum(x) <= 1 },
that is, x* = argmin_x ||x-y||_2 such that x in Delta.
x = SimplexProj(y)
****** Input ******
y : input vector.
****** Output ******
x : projection of y onto Delta.
"""
if len(y.shape) == 1: # Reshape to (1,-1) if y is a vector.
y = y.reshape(1, -1) # row vector
x = y.copy()
x[x < 0] = 0 #element within the matrix that is negative will be replaced with 0, python2 feature
K = np.flatnonzero(np.sum(x, 0) > 1) #return indices that are non-zero in the flattened version of a ; sum of each column
# K gives the column index for column that has colum sum>1, True = 1, False = 0
x[:, K] = blockSimplexProj(y[:, K])
return x
def blockSimplexProj(y):
""" Same as function SimplexProj except that sum(max(Y,0)) > 1. """
r, c = y.shape
ys = -np.sort(-y, axis=0) #sort each column of the matrix with biggest entry on the first row
mu = np.zeros(c, dtype=float)
S = np.zeros((r, c), dtype=float)
for i in range(1, r): #1st to r-1th row
S[i, :] = np.sum(ys[:i, :] - ys[i, :], 0)
print(S)
colInd_ge1 = np.flatnonzero(S[i, :] >= 1)
colInd_lt1 = np.flatnonzero(S[i, :] < 1)
if len(colInd_ge1) > 0:
mu[colInd_ge1] = (1 - S[i - 1, colInd_ge1]) / i - ys[i - 1, colInd_ge1]
if i == r:
mu[colInd_lt1] = (1 - S[r, colInd_lt1]) / (r + 1) - ys[r, colInd_lt1]
x = y + mu
x[x < 0] = 0
return x
I'm a bit puzzle by the step computing the matrix S because according to the code, the row of first row of S should be all 0. Take for example the matrix A = np.array([[25,70,39,10,80],[12,45,32,89,43],[67,24,84,39,21],[0.1,0.2,0.3,0.035,0.06]]) The 3 iterations (i=1,2,3) are computed as expected but then there is an extra step which seemingly gives back S as basis of eigenvectors. It would be great if somebody can help me with understanding this problem. Also I#m not sure what's the name of this algorithm (how S is computed)

Refactor matrix permutations in numpy's style

I wrote the following code to do multiplication of matrix permutations and I was wondering if it can be written in a numpy style, such that I can get rid of the two for loops:
Z = np.empty([new_d, X.shape[1]])
Z = np.ndarray(shape=(new_d, X.shape[1]))
Z = np.concatenate((X, X**2))
res = []
for i in range(0, d):
for j in range(i+1, d):
res.append(np.array(X.T[:,i]* X.T[:,j]))
Z = np.concatenate((Z, res))
while: X shape is (7, 1000), d = 7, new_d=35
any suggestion ?
Approach #1
We could use np.triu_indices to get those pair-wise permutation-indices and then simply perform elementwise multiplicatons of row-indexed arrays -
r,c = np.triu_indices(d,1)
res = X[r]*X[c]
Approach #2
For memory efficiency and hence performance especially on large arrays, we are better off slicing the input array and run a single loop with each iteration working on chunks of data, like so -
n = d-1
idx = np.concatenate(( [0], np.arange(n,0,-1).cumsum() ))
start, stop = idx[:-1], idx[1:]
L = n*(n+1)//2
res_out = np.empty((L,X.shape[1]), dtype=X.dtype)
for i,(s0,s1) in enumerate(zip(start,stop)):
res_out[s0:s1] = X[i] * X[i+1:]
To get Z directly and thus avoid all those concatenations, we could modify the earlier posted approach, like so -
n = d-1
N = len(X)
idx = 2*N + np.concatenate(( [0], np.arange(n,0,-1).cumsum() ))
start, stop = idx[:-1], idx[1:]
L = n*(n+1)//2
Z_out = np.empty((2*N + L,X.shape[1]), dtype=X.dtype)
Z_out[:N] = X
Z_out[N:2*N] = X**2
for i,(s0,s1) in enumerate(zip(start,stop)):
Z_out[s0:s1] = X[i] * X[i+1:]

Python .append seems to run forever and the 'uniform' value seems not too random (making Poisson sphere distribution in Python)

I am now trying to calculate the poisson sphere distribution(a 3D version of the poisson disk) using python and then plug in the result to POV-RAY so that I can generate some random distributed packing rocks.
I am following these two links:
[https://github.com/CodingTrain/Rainbow-Code/blob/master/CodingChallenges/CC_33_poisson_disc/sketch.js#L13]
[https://www.cs.ubc.ca/~rbridson/docs/bridson-siggraph07-poissondisk.pdf]tl;dr
0.Create an n-dimensional grid array and cell size = r/sqrt(n) where r is the minimum distance between each sphere. All arrays are set to be default -1 which stands for 'without point'
1.Create an initial sample. (it should be placed randomly but I choose to put it in the middle). Put it in the grid array. Also, intialize an active array. Put the initial sample in the active array.
2.While the active list is not empty, pick a random index. Generate points near it and make sure the points are not overlapping with nearby points(only test with the nearby arrays). If no sample can be created near the 'random index', kick the 'random index' out. Loop the process.
And here is my code:
import math
import numpy
from random import uniform
import random
from math import floor
r = 1
k = 30
grid = []
w = r / math.sqrt(2)
active = []
width = 100
height = 100
depth = 100
cols = floor(width / w)
rows = floor(height / w)
deps = floor(depth / w)
default = numpy.array((-1,-1,-1))
for i in range(cols * rows * deps):
grid.append(default)
x = width / 2
y = height / 2
z = depth / 2
i = floor(x / w)
j = floor(y / w)
k = floor(z / w)
pos = numpy.array((x,y,z))
grid[i + cols * (j + rows * k)] = pos
active.append(pos)
while (len(active) > 0) and (len(grid[grid == -1]) > 0):
randIndex = floor(uniform(0, len(active)))
pos = active[randIndex]
found = False
for n in range(k):
m1 = uniform(-2 * r, 2 * r)
m2 = uniform(-2 * r, 2 * r)
m3 = uniform(-2 * r, 2 * r)
m = numpy.array((m1,m2,m3))
sample = numpy.add(pos, m)
col = floor(sample[0] / w)
row = floor(sample[1] / w)
dep = floor(sample[2] / w)
if (col > -1 and row > -1 and dep > -1 and col < cols and row < rows and dep < deps and numpy.all([grid[col + cols * (row + rows * dep)],default])==True):
ok = True
for i in range(-1,2):
for j in range(-1, 2):
for k in range(-1, 2):
index = (col + i) + cols * ((row + j) + rows * (dep + k))
if col + i > -1 and row + j > -1 and dep + k > -1 and col + i < cols and row + j < rows and dep + k < deps:
neighbor = grid[index]
if numpy.all([neighbor, default]) == False:
d = numpy.linalg.norm(sample - neighbor)
if (d < r):
ok = False
if ok == True:
found = True
grid[col + cols * (row + rows * dep)] = sample
active.append(sample)
if found == False:
del active[randIndex]
print(len(active))
for printout in range(len(grid)):
print("<" + str(active[printout][0]) + "," + str(active[printout][1]) + "," + str(active[printout][2]) + ">")
print(len(grid))
My code seems to run forever and do not obey my condition(distance of two spheres must be larger than 2 * radius) as shown in the visualization by POV-RAY.(picture in comment)
Therefore I tried to add a print(len(active)) in the last of the while loop.
Surprisingly, I think I discovered the bug as the length of the active list just keep increasing! (It is supposed to be the same length as the grid) I think the problem is caused by the active.append(), but I can't figure out where is the problem as the code is literally the 90% the same as the one made by Mr.Shiffman.
I don't want to free ride this but I have already checked again and again while correcting again and again for this code :(. Still, I don't know where the bug is. (why do the active[] keep appending!?)
Thank you for the precious time.

Computing an map of minimum distance to a number of points

Given a list of points obstacles (given as a list of row, column matrix coordinates, an ndarray of shape (n, 2)), return a map of size size (where size is the shape of the 2D NumPy array) in which the value of r, c is the Euclidean distance to the closest "obstacle."
def gen_distgrid(size, obstacles):
n_obstacles = obstacles.shape[0]
distgrids = np.zeros((n_obstacles + 4, size[0], size[1]))
for layer in range(n_obstacles):
for i in range(size[0]):
for j in range(size[1]):
distgrids[layer, i, j] = np.linalg.norm(obstacles[layer,:] - [i,j])
for i in range(size[0]):
for j in range(size[1]):
distgrids[n_obstacles + 0, i, j] = i
distgrids[n_obstacles + 1, i, j] = (size[0] - i)
distgrids[n_obstacles + 2, i, j] = j
distgrids[n_obstacles + 3, i, j] = (size[1] - j)
distgrid = np.min(distgrids, axis=0)
return distgrid
My method is really slow, and I feel like there should be a better one.
Here is a solution to a similar problem using a KD-tree with Numpy and SciPy. Just insert your obstacles into the KD-tree and query the tree for each grid point to get its nearest neighbor.
I ended up using a KD-tree from SciPy. It has a very easy distance function.
from scipy.spatial import cKDTree as KDTree
def gen_distgrid(obstacles):
n_obstacles = obstacles.shape[0]
obstacles = np.vstack((obstacles, [0,0], [0, size[1] - 1], [size[0] - 1, 0], [size[0] - 1, size[1] - 1]))
distgrid = np.zeros((size[0], size[1]))
obs_tree = KDTree(data=obstacles)
i_v = np.arange(size[0])
j_v = np.arange(size[1])
coordmat = np.dstack(np.meshgrid(i_v, j_v, indexing='ij'))
obs_dists, obs_locs = obs_tree.query(coordmat)
top_dists = np.repeat(i_v, size[1]).reshape(size)
bottom_dists = np.repeat(size[0] - i_v, size[1]).reshape(size)
left_dists = np.repeat(j_v, size[0]).reshape(np.transpose(size)).T
right_dists = np.repeat(size[1] - j_v, size[0]).reshape(np.transpose(size)).T
dists = np.min([obs_dists, top_dists, bottom_dists, left_dists, right_dists], axis=0)
return dists

Categories

Resources