Get all component stats of multiple arrays labeled by one of them - python

I already asked a similar question which got answered but now this is more in detail:
I need a really fast way to get all important component stats of two arrays, where one array is labeled by opencv2 and gives the component areas for both arrays. The stats for all components masked on the two arrays should then saved to a dictionary. My approach works but it is much too slow. Is there something to avoid the loop or a better approach then the ndimage.öabeled_comprehension?
from scipy import ndimage
import numpy as np
import cv2
def calculateMeanMaxMin(val):
return np.array([np.mean(val),np.max(val),np.min(val)])
def getTheStatsForComponents(array1,array2):
ret, thresholded= cv2.threshold(array2, 120, 255, cv2.THRESH_BINARY)
thresholded= thresholded.astype(np.uint8)
numLabels, labels, stats, centroids = cv2.connectedComponentsWithStats(thresholded, 8, cv2.CV_8UC1)
allComponentStats=[]
meanmaxminArray2 = ndimage.labeled_comprehension(array2, labels, np.arange(1, numLabels+1), calculateMeanMaxMin, np.ndarray, 0)
meanmaxminArray1 = ndimage.labeled_comprehension(array1, labels, np.arange(1, numLabels+1), calculateMeanMaxMin, np.ndarray, 0)
for position, label in enumerate(range(1, numLabels)):
currentLabel = np.uint8(labels== label)
contour, _ = cv2.findContours(currentLabel, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
(side1,side2)=cv2.minAreaRect(contour[0])[1]
componentStat = stats[label]
allstats = {'position':centroids[label,:],'area':componentStat[4],'height':componentStat[3],
'width':componentStat[2],'meanArray1':meanmaxminArray1[position][0],'maxArray1':meanmaxminArray1[position][1],
'minArray1':meanmaxminArray1[position][2],'meanArray2':meanmaxminArray2[position][0],'maxArray2':meanmaxminArray2[position][1],
'minArray2':meanmaxminArray2[position][2]}
if side1 >= side2 and side1 > 0:
allstats['elongation'] = np.float32(side2 / side1)
elif side2 > side1 and side2 > 0:
allstats['elongation'] = np.float32(side1 / side2)
else:
allstats['elongation'] = np.float32(0)
allComponentStats.append(allstats)
return allComponentStats
EDIT
The two arrays are 2d arrays:
array1= np.random.choice(255,(512,512)).astype(np.uint8)
array2= np.random.choice(255,(512,512)).astype(np.uint8)
EDIT2
small example of two arrays and the labelArray with two components(1 and 2, and background 0). Calculate the min,max mean with ndimage.labeled_comprhension.
from scipy import ndimage
import numpy as np
labelArray = np.array([[0,1,1,1],[2,2,1,1],[2,2,0,1]])
data = np.array([[0.1,0.2,0.99,0.2],[0.34,0.43,0.87,0.33],[0.22,0.53,0.1,0.456]])
data2 = np.array([[0.1,0.2,0.99,0.2],[0.1,0.2,0.99,0.2],[0.1,0.2,0.99,0.2]])
numLabels = 2
minimumDataForAllLabels = ndimage.labeled_comprehension(data, labelArray, np.arange(1, numLabels+1), np.min, np.ndarray, 0)
minimumData2ForallLabels = ndimage.labeled_comprehension(data2, labelArray, np.arange(1, numLabels+1), np.min, np.ndarray, 0)
print(minimumDataForAllLabels)
print(minimumData2ForallLabels)
print(bin_and_do_simple_stats(labelArray.flatten(),data.flatten()))
Output:
[0.2 0.22] ##minimum of component 1 and 2 from data
[0.2 0.1] ##minimum of component 1 and 2 from data2
[0.1 0.2 0.22] ##minimum output of bin_and_do_simple_stats from data

labeled_comprehension is definitely slow.
At least the simple stats can be done much faster based on the linked post. For simplicity I'm only doing one data array, but as the procedure returns sort indices it can be easily extended to multiple arrays:
import numpy as np
from scipy import sparse
try:
from stb_pthr import sort_to_bins as _stb_pthr
HAVE_PYTHRAN = True
except:
HAVE_PYTHRAN = False
# fallback if pythran not available
def sort_to_bins_sparse(idx, data, mx=-1):
if mx==-1:
mx = idx.max() + 1
aux = sparse.csr_matrix((data, idx, np.arange(len(idx)+1)), (len(idx), mx)).tocsc()
return aux.data, aux.indices, aux.indptr
def sort_to_bins_pythran(idx, data, mx=-1):
indices, indptr = _stb_pthr(idx, mx)
return data[indices], indices, indptr
# pick best available
sort_to_bins = sort_to_bins_pythran if HAVE_PYTHRAN else sort_to_bins_sparse
# example data
idx = np.random.randint(0,10,(100000))
data = np.random.random(100000)
# if possible compare the two methods
if HAVE_PYTHRAN:
dsp,isp,psp = sort_to_bins_sparse(idx,data)
dph,iph,pph = sort_to_bins_pythran(idx,data)
assert (dsp==dph).all()
assert (isp==iph).all()
assert (psp==pph).all()
# example how to do simple vectorized calculations
def simple_stats(data,iptr):
min = np.minimum.reduceat(data,iptr[:-1])
mean = np.add.reduceat(data,iptr[:-1]) / np.diff(iptr)
return min, mean
def bin_and_do_simple_stats(idx,data,mx=-1):
data,indices,indptr = sort_to_bins(idx,data,mx)
return simple_stats(data,indptr)
print("minima: {}\n mean values: {}".format(*bin_and_do_simple_stats(idx,data)))
If you have pythran (not required but a bit faster), compile this as <stb_pthr.py>:
import numpy as np
#pythran export sort_to_bins(int[:], int)
def sort_to_bins(idx, mx):
if mx==-1:
mx = idx.max() + 1
cnts = np.zeros(mx + 2, int)
for i in range(idx.size):
cnts[idx[i]+2] += 1
for i in range(2, cnts.size):
cnts[i] += cnts[i-1]
res = np.empty_like(idx)
for i in range(idx.size):
res[cnts[idx[i]+1]] = i
cnts[idx[i]+1] += 1
return res, cnts[:-1]

Related

python 2d convolution optimization

I'm interested in image convolution. Here is my code to perform convolutions with a 3x3 kernel. I'm looking for any ideas on how to make it run faster.
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from PIL import Image
import numpy as np
img = mpimg.imread('benfrank.png')
imgCopy = img.copy()
Width = 1200
Height = 1464
x1 = 0
y1 = 0
cWidth = 3
cHeight = 3
convul = np.array([[0,0,-5],
[0,1,0],
[-5,0,0]])
summ = convul[2,2]+convul[2,1]+convul[2,0]+convul[1,2]+convul[1,1]+convul[1,0]+convul[0,2]+convul[0,1]+convul[0,0]
def convulute3x3(x,y):
global convul
global img,imgCopy, Width, Height, summ
i = x
j = y
if(i < 1 or i > Width-2 ):
return
elif(j < 1 or j > Height-2 ):
return
for c in range(3):
n11 = img[j-1,i-1,c]*convul[0,0]
n22 = img[j-1,i,c]*convul[1,0]
n33 = img[j-1,i+1,c]*convul[2,0]
n44= img[j,i-1,c]*convul[0,1]
n55 = img[j,i,c]*convul[1,1]
n66 = img[j,i+1,c]*convul[2,1]
n77 = img[j+1,i-1,c]*convul[0,2]
n88 = img[j+1,i,c]*convul[1,2]
n99 = img[j+1,i+1,c]*convul[2,2]
color = (n11+n22+n33+n44+n55+n66+n77+n88+n99)/summ
imgCopy[j,i,c] = color
for x in img:
x1=0
for y in x:
convulute3x3(x1,y1)
x1 = x1+1
y1 = y1+1
plt.imshow(imgCopy)
plt.show()
As #Reti43 has mentioned in the comments, there already exists libraries to do so, but I suspect you just want to play around with some home made implementations.
I too have been interested in how to implement convolutions manually in Python. Python loops are terribly slow, and if you care about speed you should stay away from pure python loops and instead stick to more vectorized methods.
The best I have so far is to use numpy.lib.stride_tricks.as_strided, which allows you to get very customized views of numpy arrays. I use as_strided to get a sliding window view of the image, then I use np.tensordot to do a "more general matrix multiplication" (docs) with the kernel. Furthermore, numpy 1.20 (iirc) has numpy.lib.stride_tricks.sliding_window_view, which is a little less general version of my code below (as of this date), as it cannot do custom strides.
import numpy as np
from numpy.lib.stride_tricks import as_strided
def get_sliding_window(x: np.ndarray, k: np.ndarray, rowstride: int, colstride: int):
imgChannels, imgRows, imgCols = x.shape
_, kernelRows, kernelCols = k.shape
u = np.array(x.itemsize) # Used to scale stride size, as_astrided wants stride sizes in bits
return as_strided(x,
shape=((imgRows-kernelRows)//rowstride+1, (imgCols-kernelCols)//colstride+1, imgChannels, kernelRows, kernelCols),
strides=u*(imgCols*rowstride, colstride, imgRows*imgCols, imgCols, 1)
)
def conv2d(x: np.ndarray, k: np.ndarray, rowstride: int, colstride: int):
"""
Performs 2d convolution on images with arbitrary number of channels where you can
specify the strides as well.
x: np.ndarray, image array of shape (C x N x M), where C is number of channels
k: np.ndarray, convolution kernel of shape (C x P x Q), where C is number of channels
rowstride: int, "vertical" step size
colstride: int, "horizontal" step size
"""
sliding_window_view = get_sliding_window(x, k, rowstride, colstride)
return np.tensordot(sliding_window_view, k, axes=3)
x = np.array([
[[1,1,1,1],
[1,1,1,1],
[2,2,2,2],
[2,2,2,2]],
[[1,1,2,2],
[1,1,2,2],
[4,4,8,8],
[4,4,8,8]]
])
k = np.array([
[[1,1],
[1,1]],
[[1,1],
[1,1]]
]) / 8
print(conv2d(x,k,1,1))
#[[1. 1.25 1.5 ]
# [2. 2.625 3.25 ]
# [3. 4. 5. ]]
print(conv2d(x,k,2,2))
#[[1. 1.5]
# [3. 5. ]]
Bonus
I implemented an ascii visualization thing to sanity check that sliding windows is correct:
import time
def conv2d_asciiviz(x: np.ndarray, k: np.ndarray, rowstride: int, colstride: int):
x = x.copy().astype(object)
sliding_window_view = get_sliding_window(x, k, rowstride, colstride)
highlighter = np.vectorize(lambda x: f"\x1b[33m{x}\x1b[0m")
r = np.full(sliding_window_view.shape[:2], np.nan)
with np.printoptions(nanstr="", formatter={"all":lambda x: str(x)}):
for i, row in enumerate(sliding_window_view):
for j, window in enumerate(row):
temp = window.copy()
r[i,j] = np.tensordot(window, k, axes=3)
window[...] = highlighter(window)
print(f"\x1b[JChannels:\n{x}\n\nResult:\n{str(r)}\x1b[{x.shape[0]*x.shape[1]+len(r)+4}A")
window[...] = temp
time.sleep(0.69)
print(f"\x1b[{x.shape[0]*x.shape[1]+len(r)+4}B")
return r
print("Output:\n",conv2d(x,k,1,1))

python kmedoids - calculating new medoid centers more efficiently

I'm following an excellent medium article: https://towardsdatascience.com/k-medoids-clustering-on-iris-data-set-1931bf781e05 to implement kmedoids from scratch. There is a place in the code where each pixel's distance to the medoid centers is calculated and it is VERY slow. It has numpy.linalg.norm inside a loop. Is there a way to optimize this with numpy.linalg.norm or with numpy broadcasting or scipy.spatial.distance.cdist and np.argmin to do the same thing?
###helper function here###
def compute_d_p(X, medoids, p):
m = len(X)
medoids_shape = medoids.shape
# If a 1-D array is provided,
# it will be reshaped to a single row 2-D array
if len(medoids_shape) == 1:
medoids = medoids.reshape((1,len(medoids)))
k = len(medoids)
S = np.empty((m, k))
for i in range(m):
d_i = np.linalg.norm(X[i, :] - medoids, ord=p, axis=1)
S[i, :] = d_i**p
return S
this is where the slowdown occurs
for datap in cluster_points:
new_medoid = datap
new_dissimilarity= np.sum(compute_d_p(X, datap, p))
if new_dissimilarity < avg_dissimilarity :
avg_dissimilarity = new_dissimilarity
out_medoids[i] = datap
Full code below. All credits to the article author.
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import datasets
from sklearn.decomposition import PCA
# Dataset
iris = datasets.load_iris()
data = pd.DataFrame(iris.data,columns = iris.feature_names)
target = iris.target_names
labels = iris.target
#Scaling
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
data = pd.DataFrame(scaler.fit_transform(data), columns=data.columns)
#PCA Transformation
from sklearn.decomposition import PCA
pca = PCA(n_components=3)
principalComponents = pca.fit_transform(data)
PCAdf = pd.DataFrame(data = principalComponents , columns = ['principal component 1', 'principal component 2','principal component 3'])
datapoints = PCAdf.values
m, f = datapoints.shape
k = 3
def init_medoids(X, k):
from numpy.random import choice
from numpy.random import seed
seed(1)
samples = choice(len(X), size=k, replace=False)
return X[samples, :]
medoids_initial = init_medoids(datapoints, 3)
def compute_d_p(X, medoids, p):
m = len(X)
medoids_shape = medoids.shape
# If a 1-D array is provided,
# it will be reshaped to a single row 2-D array
if len(medoids_shape) == 1:
medoids = medoids.reshape((1,len(medoids)))
k = len(medoids)
S = np.empty((m, k))
for i in range(m):
d_i = np.linalg.norm(X[i, :] - medoids, ord=p, axis=1)
S[i, :] = d_i**p
return S
S = compute_d_p(datapoints, medoids_initial, 2)
def assign_labels(S):
return np.argmin(S, axis=1)
labels = assign_labels(S)
def update_medoids(X, medoids, p):
S = compute_d_p(points, medoids, p)
labels = assign_labels(S)
out_medoids = medoids
for i in set(labels):
avg_dissimilarity = np.sum(compute_d_p(points, medoids[i], p))
cluster_points = points[labels == i]
for datap in cluster_points:
new_medoid = datap
new_dissimilarity= np.sum(compute_d_p(points, datap, p))
if new_dissimilarity < avg_dissimilarity :
avg_dissimilarity = new_dissimilarity
out_medoids[i] = datap
return out_medoids
def has_converged(old_medoids, medoids):
return set([tuple(x) for x in old_medoids]) == set([tuple(x) for x in medoids])
#Full algorithm
def kmedoids(X, k, p, starting_medoids=None, max_steps=np.inf):
if starting_medoids is None:
medoids = init_medoids(X, k)
else:
medoids = starting_medoids
converged = False
labels = np.zeros(len(X))
i = 1
while (not converged) and (i <= max_steps):
old_medoids = medoids.copy()
S = compute_d_p(X, medoids, p)
labels = assign_labels(S)
medoids = update_medoids(X, medoids, p)
converged = has_converged(old_medoids, medoids)
i += 1
return (medoids,labels)
results = kmedoids(datapoints, 3, 2)
final_medoids = results[0]
data['clusters'] = results[1]
There's a good chance numpy's broadcasting capabilities will help. Getting broadcasting to work in 3+ dimensions is a bit tricky, and I usually have to resort to a bit of trial and error to get the details right.
The use of linalg.norm here compounds things further, because my version of the code won't give identical results to linalg.norm for all inputs. But I believe it will give identical results for all relevant inputs in this case.
I've added some comments to the code to explain the thinking behind certain details.
def compute_d_p_broadcasted(X, medoids, p):
# If a 1-D array is provided,
# it will be reshaped to a single row 2-D array
if len(medoids.shape) == 1:
medoids = medoids.reshape((1,len(medoids)))
# In general, broadcasting n-dim arrays requires that the last
# dim of the first array be a singleton dimension, and that the
# first dim of the second array be a singleton dimension. We can
# quickly accomplish that by slicing with `None` in the appropriate
# places. (`np.newaxis` is a slightly more self-documenting way
# of spelling `None`, but I rarely bother.)
# In this case, the shapes of the other two dimensions also
# have to align in the same way you'd expect for a dot product.
# So we pass `medoids.T`.
diff = np.abs(X[:, :, None] - medoids.T[None, :, :])
# The last tricky bit is to figure out which axis to sum. Right
# now, the array is a 3-dimensional array, with the first
# dimension corresponding to the rows of `X` and the last
# dimension corresponding to the columns of `medoids.T`.
# The middle dimension corresponds to the underlying dimensionality
# of the space; that's what we want to sum for a sum of squares.
# (Or sum of cubes for L3 norm, etc.)
return (diff ** p).sum(axis=1)
def compute_d_p(X, medoids, p):
m = len(X)
medoids_shape = medoids.shape
# If a 1-D array is provided,
# it will be reshaped to a single row 2-D array
if len(medoids_shape) == 1:
medoids = medoids.reshape((1,len(medoids)))
k = len(medoids)
S = np.empty((m, k))
for i in range(m):
d_i = np.linalg.norm(X[i, :] - medoids, ord=p, axis=1)
S[i, :] = d_i**p
return S
# A couple of simple tests:
X = np.array([[ 1.0, 2, 3],
[ 4, 5, 6],
[ 7, 8, 9],
[10, 11, 12]])
medoids = X[[0, 2], :]
np.allclose(compute_d_p(X, medoids, 2),
compute_d_p_broadcasted(X, medoids, 2))
# Returns True
np.allclose(compute_d_p(X, medoids, 3),
compute_d_p_broadcasted(X, medoids, 3))
# Returns True
Of course, these tests don't tell whether this actually gives a significant speedup. You'll have to check that yourself for the relevant use-case. But I suspect it will at least help.

border/edge operations on numpy arrays

Suppose I have a 3D numpy array of nonzero values and "background" = 0. As an example I will take a sphere of random values:
array = np.random.randint(1, 5, size = (100,100,100))
z,y,x = np.ogrid[-50:50, -50:50, -50:50]
mask = x**2 + y**2 + z**2<= 20**2
array[np.invert(mask)] = 0
First, I would like to find the "border voxels" (all nonzero values that have a zero within their 3x3x3 neigbourhood). Second, I would like to replace all border voxels with the mean of their nonzero neighbours. So far I tried to use scipy's generic filter in the following way:
Function to apply at each element:
def borderCheck(values):
#check if the footprint center is on a nonzero value
if values[13] != 0:
#replace border voxels with the mean of nonzero neighbours
if 0 in values:
return np.sum(values)/np.count_nonzero(values)
else:
return values[13]
else:
return 0
Generic filter:
from scipy import ndimage
result = ndimage.generic_filter(array, borderCheck, footprint = np.ones((3,3,3)))
Is this a proper way to handle this problem? I feel that I am trying to reinvent the wheel here and that there must be a shorter, nicer way to achieve the result. Are there any other suitable (numpy, scipy ) functions that I can use?
EDIT
I messed one thing up: I would like to replace all border voxels with the mean of their nonzero AND non-border neighbours. For this, I tried to clean up the neighbours from ali_m's code (2D case):
#for each neighbour voxel, check whether it also appears in the border/edges
non_border_neighbours = []
for each in neighbours:
non_border_neighbours.append([i for i in each if nonzero_idx[i] not in edge_idx])
Now I can't figure out why non_border_neighbours comes back empty?
Furthermore, correct me if I am wrong but doesn't tree.query_ball_point with radius 1 adress only the 6 next neighbours (euclidean distance 1)? Should I set sqrt(3) (3D case) as radius to get the 26-neighbourhood?
I think it's best to start out with the 2D case first, since it can be visualized much more easily:
import numpy as np
from matplotlib import pyplot as plt
A = np.random.randint(1, 5, size=(100, 100)).astype(np.double)
y, x = np.ogrid[-50:50, -50:50]
mask = x**2 + y**2 <= 30**2
A[~mask] = 0
To find the edge pixels you could perform binary erosion on your mask, then XOR the result with your mask
# rank 2 structure with full connectivity
struct = ndimage.generate_binary_structure(2, 2)
erode = ndimage.binary_erosion(mask, struct)
edges = mask ^ erode
One approach to find the nearest non-zero neighbours of each edge pixel would be to use a scipy.spatial.cKDTree:
from scipy.spatial import cKDTree
# the indices of the non-zero locations and their corresponding values
nonzero_idx = np.vstack(np.where(mask)).T
nonzero_vals = A[mask]
# build a k-D tree
tree = cKDTree(nonzero_idx)
# use it to find the indices of all non-zero values that are at most 1 pixel
# away from each edge pixel
edge_idx = np.vstack(np.where(edges)).T
neighbours = tree.query_ball_point(edge_idx, r=1, p=np.inf)
# take the average value for each set of neighbours
new_vals = np.hstack(np.mean(nonzero_vals[n]) for n in neighbours)
# use these to replace the values of the edge pixels
A_new = A.astype(np.double, copy=True)
A_new[edges] = new_vals
Some visualisation:
fig, ax = plt.subplots(1, 3, figsize=(10, 4), sharex=True, sharey=True)
norm = plt.Normalize(0, A.max())
ax[0].imshow(A, norm=norm)
ax[0].set_title('Original', fontsize='x-large')
ax[1].imshow(edges)
ax[1].set_title('Edges', fontsize='x-large')
ax[2].imshow(A_new, norm=norm)
ax[2].set_title('Averaged', fontsize='x-large')
for aa in ax:
aa.set_axis_off()
ax[0].set_xlim(20, 50)
ax[0].set_ylim(50, 80)
fig.tight_layout()
plt.show()
This approach will also generalize to the 3D case:
B = np.random.randint(1, 5, size=(100, 100, 100)).astype(np.double)
z, y, x = np.ogrid[-50:50, -50:50, -50:50]
mask = x**2 + y**2 + z**2 <= 20**2
B[~mask] = 0
struct = ndimage.generate_binary_structure(3, 3)
erode = ndimage.binary_erosion(mask, struct)
edges = mask ^ erode
nonzero_idx = np.vstack(np.where(mask)).T
nonzero_vals = B[mask]
tree = cKDTree(nonzero_idx)
edge_idx = np.vstack(np.where(edges)).T
neighbours = tree.query_ball_point(edge_idx, r=1, p=np.inf)
new_vals = np.hstack(np.mean(nonzero_vals[n]) for n in neighbours)
B_new = B.astype(np.double, copy=True)
B_new[edges] = new_vals
Test against your version:
def borderCheck(values):
#check if the footprint center is on a nonzero value
if values[13] != 0:
#replace border voxels with the mean of nonzero neighbours
if 0 in values:
return np.sum(values)/np.count_nonzero(values)
else:
return values[13]
else:
return 0
result = ndimage.generic_filter(B, borderCheck, footprint=np.ones((3, 3, 3)))
print(np.allclose(B_new, result))
# True
I'm sure this isn't the most efficient way to do it, but it will still be significantly faster than using generic_filter.
Update
The performance could be further improved by reducing the number of points that are considered as candidate neighbours of the edge pixels/voxels:
# ...
# the edge pixels/voxels plus their immediate non-zero neighbours
erode2 = ndimage.binary_erosion(erode, struct)
candidate_neighbours = mask ^ erode2
nonzero_idx = np.vstack(np.where(candidate_neighbours)).T
nonzero_vals = B[candidate_neighbours]
# ...

Vectorize compressed sparse matrix from array in Python

I am trying to apply graph theory methods to an image processing problem. I want to generate an adjacency matrix from an array containing the points I want to graph. I want to generate a complete graph of the points in the array. If I have N points in the array that I need to graph, I will need an NxN matrix. The weights should be the distances between the points, so this is the code that I have:
''' vertexarray is an array where the points that are to be
included in the complete graph are True and all others False.'''
import numpy as np
def array_to_complete_graph(vertexarray):
vertcoords = np.transpose(np.where(vertexarray == True))
cg_array = np.eye(len(vertcoords))
for idx, vals in enumerate(vertcoords):
x_val_1, y_val_1 = vals
for jdx, wals in enumerate(vertcoords):
x_diff = wals[0] - vals[0]
y_diff = wals[1] - vals[1]
cg_array[idx,jdx] = np.sqrt(x_diff**2 + y_diff**2)
return cg_array
This works, of course, but my question is: can this same array be generated without the nested for loops?
Use the function scipy.spatial.distance.cdist():
import numpy as np
def array_to_complete_graph(vertexarray):
vertcoords = np.transpose(np.where(vertexarray == True))
cg_array = np.eye(len(vertcoords))
for idx, vals in enumerate(vertcoords):
x_val_1, y_val_1 = vals
for jdx, wals in enumerate(vertcoords):
x_diff = wals[0] - vals[0]
y_diff = wals[1] - vals[1]
cg_array[idx,jdx] = np.sqrt(x_diff**2 + y_diff**2)
return cg_array
arr = np.random.rand(10, 20) > 0.75
from scipy.spatial.distance import cdist
y, x = np.where(arr)
p = np.c_[x, y]
dist = cdist(p, p)
np.allclose(array_to_complete_graph(arr), dist)

Manipulating a large binary image array with numpy and cv2

My code is the following:
import cv2; import numpy as np
class MyClass:
def __init__(self,imagefile):
self.image = cv2.imread(imagefile)
#image details
self.h,self.w = self.image.shape[:2]
#self.bPoints, self.wPoints = np.array([[0,0]]),np.array([[0,0]])
self.bPoints, self.wPoints = [],[]
#CAUTION! Points are of the form (y,x)
# Point filtering
for i in xrange(self.h):
for j in xrange(self.w):
if self.th2.item(i,j) == 0:
#self.bPoints = np.append([[i,j]], self.bPoints, axis=0)
self.bPoints.append((i,j))
else:
self.wPoints.append((i,j))
#self.wPoints = np.append([[i,j]], self.wPoints, axis=0)
#self.bPoints = self.bPoints[:len(self.bPoints) - 1]
#self.wPoints = self.wPoints[:len(self.wPoints) - 1]
self.bPoints, self.wPoints = np.array(self.bPoints), np.array(self.wPoints)
I want to find and separate the white from the black points. I have commented the lines that show a possible (but very-very slow) solution via numpy. Can you recommend me a better and faster solution? I will appreciate it if you do so!
Thanks
I'm assuming self.th2 is a numpy array. This might take some adjustment if that is not the case. Basically, this uses the np.where function to determine all the indices which are 0 or 255.
import cv2; import numpy as np
class MyClass:
def __init__(self,imagefile):
self.image = cv2.imread(imagefile)
#image details
self.h,self.w = self.image.shape[:2]
#self.bPoints, self.wPoints = np.array([[0,0]]),np.array([[0,0]])
self.bPoints, self.wPoints = [],[]
#CAUTION! Points are of the form (y,x)
# use the np.where method instead of a double loop.
# make sure self.th2 is a numpy array
indx = np.where(self.th2==0)
for i,j in zip(indx[0], indx[1]):
self.bPoints.append((i,j))
indx = np.where(self.th2==255)
for i,j in zip(indx[0], indx[1]):
self.wPoints.append((i,j))
# Point filtering
#for i in xrange(self.h):
# for j in xrange(self.w):
# if self.th2.item(i,j) == 0:
# #self.bPoints = np.append([[i,j]], self.bPoints, axis=0)
# self.bPoints.append((i,j))
# else:
# self.wPoints.append((i,j))
# #self.wPoints = np.append([[i,j]], self.wPoints, axis=0)
#self.bPoints = self.bPoints[:len(self.bPoints) - 1]
#self.wPoints = self.wPoints[:len(self.wPoints) - 1]
self.bPoints, self.wPoints = np.array(self.bPoints), np.array(self.wPoints)

Categories

Resources