Related
I'm following an excellent medium article: https://towardsdatascience.com/k-medoids-clustering-on-iris-data-set-1931bf781e05 to implement kmedoids from scratch. There is a place in the code where each pixel's distance to the medoid centers is calculated and it is VERY slow. It has numpy.linalg.norm inside a loop. Is there a way to optimize this with numpy.linalg.norm or with numpy broadcasting or scipy.spatial.distance.cdist and np.argmin to do the same thing?
###helper function here###
def compute_d_p(X, medoids, p):
m = len(X)
medoids_shape = medoids.shape
# If a 1-D array is provided,
# it will be reshaped to a single row 2-D array
if len(medoids_shape) == 1:
medoids = medoids.reshape((1,len(medoids)))
k = len(medoids)
S = np.empty((m, k))
for i in range(m):
d_i = np.linalg.norm(X[i, :] - medoids, ord=p, axis=1)
S[i, :] = d_i**p
return S
this is where the slowdown occurs
for datap in cluster_points:
new_medoid = datap
new_dissimilarity= np.sum(compute_d_p(X, datap, p))
if new_dissimilarity < avg_dissimilarity :
avg_dissimilarity = new_dissimilarity
out_medoids[i] = datap
Full code below. All credits to the article author.
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import datasets
from sklearn.decomposition import PCA
# Dataset
iris = datasets.load_iris()
data = pd.DataFrame(iris.data,columns = iris.feature_names)
target = iris.target_names
labels = iris.target
#Scaling
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
data = pd.DataFrame(scaler.fit_transform(data), columns=data.columns)
#PCA Transformation
from sklearn.decomposition import PCA
pca = PCA(n_components=3)
principalComponents = pca.fit_transform(data)
PCAdf = pd.DataFrame(data = principalComponents , columns = ['principal component 1', 'principal component 2','principal component 3'])
datapoints = PCAdf.values
m, f = datapoints.shape
k = 3
def init_medoids(X, k):
from numpy.random import choice
from numpy.random import seed
seed(1)
samples = choice(len(X), size=k, replace=False)
return X[samples, :]
medoids_initial = init_medoids(datapoints, 3)
def compute_d_p(X, medoids, p):
m = len(X)
medoids_shape = medoids.shape
# If a 1-D array is provided,
# it will be reshaped to a single row 2-D array
if len(medoids_shape) == 1:
medoids = medoids.reshape((1,len(medoids)))
k = len(medoids)
S = np.empty((m, k))
for i in range(m):
d_i = np.linalg.norm(X[i, :] - medoids, ord=p, axis=1)
S[i, :] = d_i**p
return S
S = compute_d_p(datapoints, medoids_initial, 2)
def assign_labels(S):
return np.argmin(S, axis=1)
labels = assign_labels(S)
def update_medoids(X, medoids, p):
S = compute_d_p(points, medoids, p)
labels = assign_labels(S)
out_medoids = medoids
for i in set(labels):
avg_dissimilarity = np.sum(compute_d_p(points, medoids[i], p))
cluster_points = points[labels == i]
for datap in cluster_points:
new_medoid = datap
new_dissimilarity= np.sum(compute_d_p(points, datap, p))
if new_dissimilarity < avg_dissimilarity :
avg_dissimilarity = new_dissimilarity
out_medoids[i] = datap
return out_medoids
def has_converged(old_medoids, medoids):
return set([tuple(x) for x in old_medoids]) == set([tuple(x) for x in medoids])
#Full algorithm
def kmedoids(X, k, p, starting_medoids=None, max_steps=np.inf):
if starting_medoids is None:
medoids = init_medoids(X, k)
else:
medoids = starting_medoids
converged = False
labels = np.zeros(len(X))
i = 1
while (not converged) and (i <= max_steps):
old_medoids = medoids.copy()
S = compute_d_p(X, medoids, p)
labels = assign_labels(S)
medoids = update_medoids(X, medoids, p)
converged = has_converged(old_medoids, medoids)
i += 1
return (medoids,labels)
results = kmedoids(datapoints, 3, 2)
final_medoids = results[0]
data['clusters'] = results[1]
There's a good chance numpy's broadcasting capabilities will help. Getting broadcasting to work in 3+ dimensions is a bit tricky, and I usually have to resort to a bit of trial and error to get the details right.
The use of linalg.norm here compounds things further, because my version of the code won't give identical results to linalg.norm for all inputs. But I believe it will give identical results for all relevant inputs in this case.
I've added some comments to the code to explain the thinking behind certain details.
def compute_d_p_broadcasted(X, medoids, p):
# If a 1-D array is provided,
# it will be reshaped to a single row 2-D array
if len(medoids.shape) == 1:
medoids = medoids.reshape((1,len(medoids)))
# In general, broadcasting n-dim arrays requires that the last
# dim of the first array be a singleton dimension, and that the
# first dim of the second array be a singleton dimension. We can
# quickly accomplish that by slicing with `None` in the appropriate
# places. (`np.newaxis` is a slightly more self-documenting way
# of spelling `None`, but I rarely bother.)
# In this case, the shapes of the other two dimensions also
# have to align in the same way you'd expect for a dot product.
# So we pass `medoids.T`.
diff = np.abs(X[:, :, None] - medoids.T[None, :, :])
# The last tricky bit is to figure out which axis to sum. Right
# now, the array is a 3-dimensional array, with the first
# dimension corresponding to the rows of `X` and the last
# dimension corresponding to the columns of `medoids.T`.
# The middle dimension corresponds to the underlying dimensionality
# of the space; that's what we want to sum for a sum of squares.
# (Or sum of cubes for L3 norm, etc.)
return (diff ** p).sum(axis=1)
def compute_d_p(X, medoids, p):
m = len(X)
medoids_shape = medoids.shape
# If a 1-D array is provided,
# it will be reshaped to a single row 2-D array
if len(medoids_shape) == 1:
medoids = medoids.reshape((1,len(medoids)))
k = len(medoids)
S = np.empty((m, k))
for i in range(m):
d_i = np.linalg.norm(X[i, :] - medoids, ord=p, axis=1)
S[i, :] = d_i**p
return S
# A couple of simple tests:
X = np.array([[ 1.0, 2, 3],
[ 4, 5, 6],
[ 7, 8, 9],
[10, 11, 12]])
medoids = X[[0, 2], :]
np.allclose(compute_d_p(X, medoids, 2),
compute_d_p_broadcasted(X, medoids, 2))
# Returns True
np.allclose(compute_d_p(X, medoids, 3),
compute_d_p_broadcasted(X, medoids, 3))
# Returns True
Of course, these tests don't tell whether this actually gives a significant speedup. You'll have to check that yourself for the relevant use-case. But I suspect it will at least help.
I already asked a similar question which got answered but now this is more in detail:
I need a really fast way to get all important component stats of two arrays, where one array is labeled by opencv2 and gives the component areas for both arrays. The stats for all components masked on the two arrays should then saved to a dictionary. My approach works but it is much too slow. Is there something to avoid the loop or a better approach then the ndimage.öabeled_comprehension?
from scipy import ndimage
import numpy as np
import cv2
def calculateMeanMaxMin(val):
return np.array([np.mean(val),np.max(val),np.min(val)])
def getTheStatsForComponents(array1,array2):
ret, thresholded= cv2.threshold(array2, 120, 255, cv2.THRESH_BINARY)
thresholded= thresholded.astype(np.uint8)
numLabels, labels, stats, centroids = cv2.connectedComponentsWithStats(thresholded, 8, cv2.CV_8UC1)
allComponentStats=[]
meanmaxminArray2 = ndimage.labeled_comprehension(array2, labels, np.arange(1, numLabels+1), calculateMeanMaxMin, np.ndarray, 0)
meanmaxminArray1 = ndimage.labeled_comprehension(array1, labels, np.arange(1, numLabels+1), calculateMeanMaxMin, np.ndarray, 0)
for position, label in enumerate(range(1, numLabels)):
currentLabel = np.uint8(labels== label)
contour, _ = cv2.findContours(currentLabel, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
(side1,side2)=cv2.minAreaRect(contour[0])[1]
componentStat = stats[label]
allstats = {'position':centroids[label,:],'area':componentStat[4],'height':componentStat[3],
'width':componentStat[2],'meanArray1':meanmaxminArray1[position][0],'maxArray1':meanmaxminArray1[position][1],
'minArray1':meanmaxminArray1[position][2],'meanArray2':meanmaxminArray2[position][0],'maxArray2':meanmaxminArray2[position][1],
'minArray2':meanmaxminArray2[position][2]}
if side1 >= side2 and side1 > 0:
allstats['elongation'] = np.float32(side2 / side1)
elif side2 > side1 and side2 > 0:
allstats['elongation'] = np.float32(side1 / side2)
else:
allstats['elongation'] = np.float32(0)
allComponentStats.append(allstats)
return allComponentStats
EDIT
The two arrays are 2d arrays:
array1= np.random.choice(255,(512,512)).astype(np.uint8)
array2= np.random.choice(255,(512,512)).astype(np.uint8)
EDIT2
small example of two arrays and the labelArray with two components(1 and 2, and background 0). Calculate the min,max mean with ndimage.labeled_comprhension.
from scipy import ndimage
import numpy as np
labelArray = np.array([[0,1,1,1],[2,2,1,1],[2,2,0,1]])
data = np.array([[0.1,0.2,0.99,0.2],[0.34,0.43,0.87,0.33],[0.22,0.53,0.1,0.456]])
data2 = np.array([[0.1,0.2,0.99,0.2],[0.1,0.2,0.99,0.2],[0.1,0.2,0.99,0.2]])
numLabels = 2
minimumDataForAllLabels = ndimage.labeled_comprehension(data, labelArray, np.arange(1, numLabels+1), np.min, np.ndarray, 0)
minimumData2ForallLabels = ndimage.labeled_comprehension(data2, labelArray, np.arange(1, numLabels+1), np.min, np.ndarray, 0)
print(minimumDataForAllLabels)
print(minimumData2ForallLabels)
print(bin_and_do_simple_stats(labelArray.flatten(),data.flatten()))
Output:
[0.2 0.22] ##minimum of component 1 and 2 from data
[0.2 0.1] ##minimum of component 1 and 2 from data2
[0.1 0.2 0.22] ##minimum output of bin_and_do_simple_stats from data
labeled_comprehension is definitely slow.
At least the simple stats can be done much faster based on the linked post. For simplicity I'm only doing one data array, but as the procedure returns sort indices it can be easily extended to multiple arrays:
import numpy as np
from scipy import sparse
try:
from stb_pthr import sort_to_bins as _stb_pthr
HAVE_PYTHRAN = True
except:
HAVE_PYTHRAN = False
# fallback if pythran not available
def sort_to_bins_sparse(idx, data, mx=-1):
if mx==-1:
mx = idx.max() + 1
aux = sparse.csr_matrix((data, idx, np.arange(len(idx)+1)), (len(idx), mx)).tocsc()
return aux.data, aux.indices, aux.indptr
def sort_to_bins_pythran(idx, data, mx=-1):
indices, indptr = _stb_pthr(idx, mx)
return data[indices], indices, indptr
# pick best available
sort_to_bins = sort_to_bins_pythran if HAVE_PYTHRAN else sort_to_bins_sparse
# example data
idx = np.random.randint(0,10,(100000))
data = np.random.random(100000)
# if possible compare the two methods
if HAVE_PYTHRAN:
dsp,isp,psp = sort_to_bins_sparse(idx,data)
dph,iph,pph = sort_to_bins_pythran(idx,data)
assert (dsp==dph).all()
assert (isp==iph).all()
assert (psp==pph).all()
# example how to do simple vectorized calculations
def simple_stats(data,iptr):
min = np.minimum.reduceat(data,iptr[:-1])
mean = np.add.reduceat(data,iptr[:-1]) / np.diff(iptr)
return min, mean
def bin_and_do_simple_stats(idx,data,mx=-1):
data,indices,indptr = sort_to_bins(idx,data,mx)
return simple_stats(data,indptr)
print("minima: {}\n mean values: {}".format(*bin_and_do_simple_stats(idx,data)))
If you have pythran (not required but a bit faster), compile this as <stb_pthr.py>:
import numpy as np
#pythran export sort_to_bins(int[:], int)
def sort_to_bins(idx, mx):
if mx==-1:
mx = idx.max() + 1
cnts = np.zeros(mx + 2, int)
for i in range(idx.size):
cnts[idx[i]+2] += 1
for i in range(2, cnts.size):
cnts[i] += cnts[i-1]
res = np.empty_like(idx)
for i in range(idx.size):
res[cnts[idx[i]+1]] = i
cnts[idx[i]+1] += 1
return res, cnts[:-1]
I have a large numpy array of unordered lidar point cloud data, of shape [num_points, 3], which are the XYZ coordinates of each point. I want to downsample this into a 2D grid of mean height values - to do this I want to split the data into 5x5 X-Y bins and calculate the mean height value (Z coordinate) in each bin.
Does anyone know any quick/efficient way to do this?
Current code:
import numpy as np
from open3d import read_point_cloud
resolution = 5
# Code to load point cloud and get points as numpy array
pcloud = read_point_cloud(params.POINT_CLOUD_DIR + "Part001.pcd")
pcloud_np = np.asarray(pcloud.points)
# Code to generate example dataset
pcloud_np = np.random.uniform(0.0, 1000.0, size=(1000,3))
# Current (inefficient) code to quantize into 5x5 XY 'bins' and take mean Z values in each bin
pcloud_np[:, 0:2] = np.round(pcloud_np[:, 0:2]/float(resolution))*float(resolution) # Round XY values to nearest 5
num_x = int(np.max(pcloud_np[:, 0])/resolution)
num_y = int(np.max(pcloud_np[:, 1])/resolution)
mean_height = np.zeros((num_x, num_y))
# Loop over each x-y bin and calculate mean z value
x_val = 0
for x in range(num_x):
y_val = 0
for y in range(num_y):
height_vals = pcloud_np[(pcloud_np[:,0] == float(x_val)) & (pcloud_np[:,1] == float(y_val))]
if height_vals.size != 0:
mean_height[x, y] = np.mean(height_vals)
y_val += resolution
x_val += resolution
Here is a suggestion using an np.bincount idiom on the flattened 2d grid. I also took the liberty to add some small fixes to the original code:
import numpy as np
#from open3d import read_point_cloud
resolution = 5
# Code to load point cloud and get points as numpy array
#pcloud = read_point_cloud(params.POINT_CLOUD_DIR + "Part001.pcd")
#pcloud_np = np.asarray(pcloud.points)
# Code to generate example dataset
pcloud_np = np.random.uniform(0.0, 1000.0, size=(1000,3))
def f_op(pcloud_np, resolution):
# Current (inefficient) code to quantize into 5x5 XY 'bins' and take mean Z values in each bin
pcloud_np[:, 0:2] = np.round(pcloud_np[:, 0:2]/float(resolution))*float(resolution) # Round XY values to nearest 5
num_x = int(np.max(pcloud_np[:, 0])/resolution) + 1
num_y = int(np.max(pcloud_np[:, 1])/resolution) + 1
mean_height = np.zeros((num_x, num_y))
# Loop over each x-y bin and calculate mean z value
x_val = 0
for x in range(num_x):
y_val = 0
for y in range(num_y):
height_vals = pcloud_np[(pcloud_np[:,0] == float(x_val)) & (pcloud_np[:,1] == float(y_val)), 2]
if height_vals.size != 0:
mean_height[x, y] = np.mean(height_vals)
y_val += resolution
x_val += resolution
return mean_height
def f_pp(pcloud_np, resolution):
xy = pcloud_np.T[:2]
xy = ((xy + resolution / 2) // resolution).astype(int)
mn, mx = xy.min(axis=1), xy.max(axis=1)
sz = mx + 1 - mn
flatidx = np.ravel_multi_index(xy-mn[:, None], sz)
histo = np.bincount(flatidx, pcloud_np[:, 2], sz.prod()) / np.maximum(1, np.bincount(flatidx, None, sz.prod()))
return (histo.reshape(sz), *(xy * resolution))
res_op = f_op(pcloud_np, resolution)
res_pp, x, y = f_pp(pcloud_np, resolution)
from timeit import timeit
t_op = timeit(lambda:f_op(pcloud_np, resolution), number=10)*100
t_pp = timeit(lambda:f_pp(pcloud_np, resolution), number=10)*100
print("results equal:", np.allclose(res_op, res_pp))
print(f"timings (ms) op: {t_op:.3f} pp: {t_pp:.3f}")
Sample output:
results equal: True
timings (ms) op: 359.162 pp: 0.427
Speedup almost 1000x.
In order to compute images stored in ndarrays provided by opencv that are (4000,6000,3) shape i want to copy values from a source ndarray to a target ndarray at different coordinates (x,y) in target. Offset to be added to the source coordinates in order to compute target ones are stored in an ndarray.
See below the simple principle implemented with two nested loops:
import numpy as np
source = np.array([
[1,2,3,33],
[4,5,6,66],
[7,8,9,99]])
target = np.array([
[0,0,0,0],
[0,0,0,0],
[0,0,0,0]])
move_instruction = np.array([
[[0,0],[0,0],[0,0],[0,0]],
[[-1,0],[0,0],[1,1],[0,0]],
[[0,0],[0,0],[0,0],[0,0]]])
rows, cols = source.shape
for y in range(rows):
for x in range(cols):
y_target = y + move_instruction[y][x][0]
x_target = x + move_instruction[y][x][1]
target[y_target][x_target] = source[y][x]
Problem is that it is very slow.
I'm beginner with numpy and wondering if there's a smart way to perform this operation with ndarray operations in a more efficient way ?
You can get all the indices of the source array, add the shift to those indices, then assign the values from the source at the positions of the shifted indices on the target.
import numpy as np
source = np.array([
[1,2,3,33],
[4,5,6,66],
[7,8,9,99]])
target = np.zeros_like(source)
move_instruction = np.array([
[[0,0],[0,0],[0,0],[0,0]],
[[-1,0],[0,0],[1,1],[0,0]],
[[-100,100],[-100,0],[0,100],[0,0]]])
all_inds = np.where(np.ones_like(source))
moves = move_instruction[all_inds]
new_r = all_inds[0] + moves[...,0]
new_c = all_inds[1] + moves[...,1]
arr_shape = source.shape
# Filter for invalid shifts
filter = (new_r < 0) + (new_r >= arr_shape[0]) + (new_c < 0) + (new_c >= arr_shape[1])
new_r[filter] = all_inds[0][filter] # This just recovers the original non-moved index;
new_c[filter] = all_inds[1][filter] # if you want to do something else you'll have to
# modify these indices some other way.
new_inds = (new_r, new_c)
target[new_inds] = source[all_inds]
I need to shift a 2D array field, i.e. I have a "previous_data" array which I access through shifted indices to create my "new_data" array.
I can do this in a nonpythonic (and slow) loop, but would very much appreciate some help in finding a pythonic (and faster) solution!
Any help and hints are very much appreciated!
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import mpl
def nonpythonic():
#this works, but is slow (for large arrays)
new_data = np.zeros((ny,nx))
for j in xrange(ny):
for i in xrange(nx):
#go through each item, check if it is within the bounds
#and assign the data to the new_data array
i_new = ix[j,i]
j_new = iy[j,i]
if ((i_new>=0) and (i_new<nx) and (j_new>=0) and (j_new<ny)):
new_data[j,i]=previous_data[j_new,i_new]
ef, axar = plt.subplots(1,2)
im = axar[0].pcolor(previous_data, vmin=0,vmax=2)
ef.colorbar(im, ax=axar[0], shrink=0.9)
im = axar[1].pcolor(new_data, vmin=0,vmax=2)
ef.colorbar(im, ax=axar[1], shrink=0.9)
plt.show()
def pythonic():
#tried a few things here, but none are working
#-tried assigning NaNs to indices (ix,iy) which are out of bounds, but NaN's don't work for indices
#-tried masked arrays, but they also don't work as indices
#-tried boolean arrays, but ended in shape mismatches
#just as in the nonworking code below
ind_y_good = np.where(iy>=0) and np.where(iy<ny)
ind_x_good = np.where(ix>=0) and np.where(ix<nx)
new_data = np.zeros((ny,nx))
new_data[ind_y_good,ind_x_good] = previous_data[iy[ind_y_good],ix[ind_x_good]]
#some 2D array:
nx = 20
ny = 30
#array indices:
iy, ix = np.indices((ny,nx))
#modify indices (shift):
iy = iy + 1
ix = ix - 4
#create some out of range indices (which might happen in my real scenario)
iy[0,2:7] = -9999
ix[0:3,-1] = 6666
#some previous data which is the basis for the new_data:
previous_data = np.ones((ny,nx))
previous_data[2:8,10:20] = 2
nonpythonic()
pythonic()
This is the result of the working (nonpythonic) code above:
I implemented a version of pythonic that replicates nonpythonic with some masking and index fiddling - see below. By the way I think the "new" indices should be the ones corresponding to the new array, rather than the old ones, but I've left it as in your existing function.
The main thing to realise is that in your attempt in the question, your conditions
ind_y_good = np.where(iy>=0) and np.where(iy<ny)
ind_x_good = np.where(ix>=0) and np.where(ix<nx)
must be combined, since we must always have pairs of x and y indices. i.e. if the x index is invalid, then so is the y.
Finally, if the indices are really all shifted by a constant factor, you can make this even simpler by using NumPy's roll function and taking a slice of the indices corresponding to the valid area.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import mpl
def nonpythonic(previous_data, ix, iy, nx, ny):
#this works, but is slow (for large arrays)
new_data = np.zeros((ny,nx))
for j in xrange(ny):
for i in xrange(nx):
#go through each item, check if it is within the bounds
#and assign the data to the new_data array
i_new = ix[j,i]
j_new = iy[j,i]
if ((i_new>=0) and (i_new<nx) and (j_new>=0) and (j_new<ny)):
new_data[j,i]=previous_data[j_new,i_new]
return new_data
def pythonic(previous_data, ix, iy):
ny, nx = previous_data.shape
iy_old, ix_old = np.indices(previous_data.shape)
# note you must apply the same condition to both
# index arrays
valid = (iy >= 0) & (iy < ny) & (ix >= 0) & (ix < nx)
new_data = np.zeros((ny,nx))
new_data[iy_old[valid], ix_old[valid]] = previous_data[iy[valid], ix[valid]]
return new_data
def main():
#some 2D array:
nx = 20
ny = 30
#array indices:
iy, ix = np.indices((ny,nx))
#modify indices (shift):
iy = iy + 1
ix = ix - 4
#create some out of range indices (which might happen in my real scenario)
iy[0,2:7] = -9999
ix[0:3,-1] = 6666
#some previous data which is the basis for the new_data:
previous_data = np.ones((ny,nx))
previous_data[2:8,10:20] = 2
data_nonpythonic = nonpythonic(previous_data, ix, iy, nx, ny)
data_pythonic = pythonic(previous_data, ix, iy)
new_data = data_nonpythonic
ef, axar = plt.subplots(1,2)
im = axar[0].pcolor(previous_data, vmin=0,vmax=2)
ef.colorbar(im, ax=axar[0], shrink=0.9)
im = axar[1].pcolor(new_data, vmin=0,vmax=2)
ef.colorbar(im, ax=axar[1], shrink=0.9)
plt.show()
print(np.allclose(data_nonpythonic, data_pythonic))
if __name__ == "__main__":
main()