I have a data set of discs in 3D space. Each disc is defined by it's center and radius and Strike and Dip(Strike and Dip are the way geoscientists use for defining planes in 3D). I convert Strike and Dip to normal vector. As a result, the discs can be represented using center, radius, and normal vector.
I want to find out how many intersections each disc has with other discs. The way I'm approaching this is as follows:
for each 2 discs:
r1 and r2 are radius of the first and second discs and c1, c2 are the center points
check |c1-c2|<r1+r2
if (1) holds true, check if the normal vectors are parallel.
if parallel -> see if discs are in the same plane -> if yes: they intersect. if no: they don't intersect
if not parallel -> find the intersection line
find the minimum distance of c1 from the line (d1), find minimum distance of c2 from the line (d2). if d1<r1 and d2<r2 then the discs intersect. if not, they do not intersect.
Following I have attached all the functions that I'm using:
In the code: 'Easting' stands for X, 'Northing' stands for Y, 'Depth' stands for Z. I also find the minimum value in each of easting, northing and depth and subtract these values from their respective columns. I assume this will not affect the end result as it's only a translation.
import numpy as np
import pandas as pd
from numpy import radians, cos, sin
import matplotlib as mpl
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import art3d
from mpl_toolkits.mplot3d import proj3d
from matplotlib.patches import Circle
from itertools import product
#################################functions##############################3
def plane_intersect(a, b):
"""
a, b 4-tuples/lists of A,B,C,D where
Ax + By +Cz + D = 0
A,B,C,D in order
output: 2 points on line of intersection, np.arrays, shape (3,)
"""
a_vec, b_vec = np.array(a[:3]), np.array(b[:3])
aXb_vec = np.cross(a_vec, b_vec)
A = np.array([a_vec, b_vec, aXb_vec])
d = np.array([-a[3], -b[3], 0.]).reshape(3,1)
# could add np.linalg.det(A) == 0 test to prevent linalg.solve throwing error
p_inter = np.linalg.solve(A, d).T
return p_inter[0], (p_inter + aXb_vec)[0]
def dist_point_from_line(p, q, rs):
"""
p, q, rs 3-D points in space where
p,q are two points on a line and rs is the point of interest
output: minimum distance (perpendicular) of point rs from the line defined by p, q
"""
p=np.array(p)
q=np.array(q)
rs=np.array(rs)
x = p-q
return np.linalg.norm(
np.outer(np.dot(rs-q, x)/np.dot(x, x), x)+q-rs,
axis=1)
def plane_parameters(S,D,x,y,z):
"""
S,D Strike and Dip of a plane in degrees
x,y,z Easting, Northin, Depth of the event(center of the disc)
output: parameters of the plane containing the disc
"""
A=-cos(radians(S))*sin(radians(D))
B=sin(radians(S))*sin(radians(D))
C=-cos(radians(D))
D=-((A*x)+(B*y)+(C*z))
return A,B,C,D
def disc_intersection(df):
for i in range(len(df)):
a = df.iloc[i][['Northing','Easting','Depth']]
r1= df.iloc[i][['SourceRo']][0]
Strike1=df.iloc[i][['Strike']][0]
Dip1= df.iloc[i][['Dip']][0]
for j in range(i+1,len(df)):
print()
print("i,j:",i,',',j)
b = df.iloc[j][['Northing','Easting','Depth']]
r2= df.iloc[j][['SourceRo']][0]
print('r1:',r1)
print('r2:',r2)
Strike2=df.iloc[j][['Strike']][0]
Dip2= df.iloc[j][['Dip']][0]
centers_distance = np.linalg.norm(a-b, ord=2)
print('centers_distance:', centers_distance)
#print("centers_distance:",centers_distance, type(centers_distance))
if centers_distance<= (r1+r2):
print("|c1-c2| <= r1+r2")
A1,B1,C1,D1 = plane_parameters(Strike1,Dip1,a['Easting'],a['Northing'],a['Depth'])
A2,B2,C2,D2 = plane_parameters(Strike2,Dip2,b['Easting'],b['Northing'],b['Depth'])
print("A1,B1,C1,D1:",A1,",",B1,",",C1,",",D1)
print("A2,B2,C2,D2:",A2,",",B2,",",C2,",",D2)
if not np.any(np.cross(np.array([A1,B1,C1]),np.array([A2,B2,C2]))):
print("normals are parallel!")
if A2*a['Easting']+B2*a['Northing']+C2*a['Depth']+D2==0:
print("discs are in the same plane and they do intersect!")
df.iloc[i,6]+=1
df.iloc[j,6]+=1
else:
print("discs are not in the same plane and hence they don't intersect!")
#planes are parallel and discs are not in the same plane => no intersection
pass
else:
print("planes not parallel!")
p, q= plane_intersect([A1,B1,C1,D1], [A2,B2,C2,D2])
print("c1 from intersection line:", dist_point_from_line(p, q, np.array(a)))
print("c2 from intersection line:", dist_point_from_line(p, q, np.array(b)))
if dist_point_from_line(p, q, np.array(a)) <= r1 and dist_point_from_line(p, q, np.array(b)) <= r2:
print("intersection found!!!")
df.iat[i,6]+=1
df.iat[j,6]+=1
else:
print("intersection NOT found!!!")
#although the planes intersect eachother, discs do not!
pass
else:
#There is no way that two discs placed farther than r1+r2 can intersect eachother.
print("|c1-c2| <= r1+r2 is not true!")
pass
return df
##########################################################
df =pd.DataFrame([[281017,1941326,8923,282.18,64.27,32.874017],
[281019,1941351,8902,47.51,60.60,35.826773],
[281107,1941313,8818,285.14,70.81,52.854332],
[281078,1941385,8865,42.60,40.11,35.170605]], columns=['Northing',
'Easting','Depth', 'Strike', 'Dip', 'SourceRo'])
df=df.loc[:,['Northing', 'Easting', 'Depth', 'Strike', 'Dip', 'SourceRo']]
df['num_intersections']=np.zeros(len(df))
north_min=df['Northing'].min()
east_min=df['Easting'].min()
depth_min=df['Depth'].min()
df['Northing']=df['Northing']-north_min
df['Easting']=df['Easting']-east_min
df['Depth']=df['Depth']-depth_min
north_min=df['Northing'].min()
north_max=df['Northing'].max()
east_min=df['Easting'].min()
east_max=df['Easting'].max()
depth_min=df['Depth'].min()
depth_max=df['Depth'].max()
r_max=df['SourceRo'].max()
north_min-=r_max
north_max+=r_max
east_min-=r_max
east_max+=r_max
depth_min-=r_max
depth_max+=r_max
new_df = disc_intersection(df)
The first two discs actually intersect. I have included an image of the 4 discs in the space for you reference.
I have noticed that d1 and d2 change when I add or remove a disc in the DataFrame, which is not mathematically correct, but I don't know which part of the code is problematic.
Related
I have n lines and m circles.
I have a [n,2] numpy array of line start points:
[x1_1,y1_1],
[x1_2,y1_2],
...
[x1_n,y1_n]
And a [n,2] numpy array of line end points:
[x2_1,y2_1],
[x2_2,y2_2],
...
[x2_n,y2_n]
And an [m,2] numpy array of circle centers:
[cx_1,cy_1],
...
[cx_m,cy_m]
And an [m,1] numpy array of circle radii:
[cr_1...cr_m]
I would like to efficiently get an [n,m] numpy array where array[i,j] is True if line i intersects circle j.
In general I would take the normalised perpendicular vector to each line and take the dot product of that with each (xi,yi) - (cx_j,cy_y) and ask if it's less than cr_i; but I also have to check whether that implied point is on the line and check each end individually if not. I'm wondering if there is a more elegant solution.
Ok, assume to have these shape
start = (np.random.random((3,2))-.5)*5
end = (np.random.random((3,2))-.5)*5
center = (np.random.random((4,2))-.5)*5
radius = np.random.random((4,1))*3
for each center we can compute the distance from the three lines by:
D = np.array([
np.linalg.norm(np.cross(end-start, start-c).reshape(-1,1),axis=1)/np.linalg.norm((end-start).reshape(-1,2), axis=1)
for c in center
]).T
D[i,j] will be the distance between line i (in rows) and center j (in clumns).
Now we can simply compare this distances to the radius distances with:
I = (d<radius.repeat(len(start), axis=1).T)
I is a matrix of the same shape of D; I[i,j] is True if the distance between the line i and the center j is lower than the radius j (and so if the line i intersect the circle j) and False otherwise.
I know it is not very elegant, but I hope it can be useful.
I can't think of a substantially simpler algorithm than the one outlined in the question. As far as the implementation is concerned, it is easy to make these computations using shapely.
First, lets generate and plot some sample data:
from itertools import product
import matplotlib.pyplot as plt
from matplotlib.patches import Circle
import matplotlib.colors as mcolors
from shapely.geometry import LineString, Point, MultiLineString
import numpy as np
import pandas as pd
# generate data
rng = np.random.default_rng(123)
start = (rng.random((3, 2)) - .5) * 5
end = (rng.random((3, 2)) - .5) * 5
center = (rng.random((4, 2)) - .5) * 5
radius = rng.random((4, 1)) * 3
# plot lines and circles
fig, ax = plt.subplots()
fig.set_size_inches(8, 8)
ax.set_aspect('equal')
colors = list(mcolors.TABLEAU_COLORS.keys())
for i, ends in enumerate(zip(start, end)):
ax.plot(*zip(*ends), label=f"line {i}")
for i, (c, r) in enumerate(zip(center, radius)):
ax.add_patch(Circle(c, r, fill=False, ec=colors[i], label=f"circle {i}"))
plt.legend()
plt.show()
This gives:
Next, compute the array of intersections, with rows corresponding to lines and columns corresponding to circles:
lines = [LineString(ends) for ends in list(zip(start, end))]
circles = [Point(c).buffer(r).boundary for c, r in zip(center, radius)]
out = np.empty((len(lines), len(circles)), dtype=bool)
for i, (l, c) in enumerate(product(lines, circles)):
out[np.unravel_index(i, out.shape)] = l.intersects(c)
#convert to a dataframe for better display
df = pd.DataFrame(out)
df.index.name = 'lines'
df.columns.name = 'circles'
print(df)
The result:
circles 0 1 2 3
lines
0 True False True True
1 False False False True
2 False False False False
Consider the following two sets of points. I would like to find the optimal 2D translation and rotation that aligns the largest number of points between dataset blue and dataset orange, where a point is considered aligned if the distance to its nearest neighbor in the other dataset is smaller than a threshold.
I understand that this is related to "Iterative Closest Point" algorithms, but in this case the situation is a bit harder because not all points from one dataset are in the other, and also because some points may turn out to be "false positives" (noise).
Is there an efficient way of doing this?
I come across the same problem and found solution in comaring the CCD stars observation figures, the basic idea is to find the best match of the triangles of the two set of points.
I then use astroalign package to calculate the transformation matrix, and align to all the points. Thank the Lord, it works pretty good.
import itertools
import numpy as np
import matplotlib.pyplot as plt
import astroalign as aa
def getTriangles(set_X, X_combs):
"""
Inefficient way of obtaining the lengths of each triangle's side.
Normalized so that the minimum length is 1.
"""
triang = []
for p0, p1, p2 in X_combs:
d1 = np.sqrt((set_X[p0][0] - set_X[p1][0]) ** 2 +
(set_X[p0][1] - set_X[p1][1]) ** 2)
d2 = np.sqrt((set_X[p0][0] - set_X[p2][0]) ** 2 +
(set_X[p0][1] - set_X[p2][1]) ** 2)
d3 = np.sqrt((set_X[p1][0] - set_X[p2][0]) ** 2 +
(set_X[p1][1] - set_X[p2][1]) ** 2)
d_min = min(d1, d2, d3)
d_unsort = [d1 / d_min, d2 / d_min, d3 / d_min]
triang.append(sorted(d_unsort))
return triang
def sumTriangles(ref_triang, in_triang):
"""
For each normalized triangle in ref, compare with each normalized triangle
in B. find the differences between their sides, sum their absolute values,
and select the two triangles with the smallest sum of absolute differences.
"""
tr_sum, tr_idx = [], []
for i, ref_tr in enumerate(ref_triang):
for j, in_tr in enumerate(in_triang):
# Absolute value of lengths differences.
tr_diff = abs(np.array(ref_tr) - np.array(in_tr))
# Sum the differences
tr_sum.append(sum(tr_diff))
tr_idx.append([i, j])
# Index of the triangles in ref and in with the smallest sum of absolute
# length differences.
tr_idx_min = tr_idx[tr_sum.index(min(tr_sum))]
ref_idx, in_idx = tr_idx_min[0], tr_idx_min[1]
print("Smallest difference: {}".format(min(tr_sum)))
return ref_idx, in_idx
set_ref = np.array([[2511.268821,44.864124],
[2374.085032,201.922566],
[1619.282942,216.089335],
[1655.866502,221.127787],
[ 804.171659,2133.549517], ])
set_in = np.array([[1992.438563,63.727282],
[2285.793346,255.402548],
[1568.915358, 279.144544],
[1509.720134, 289.434629],
[1914.255205, 349.477788],
[2370.786382, 496.026836],
[ 482.702882, 508.685952],
[2089.691026, 523.18825 ],
[ 216.827439, 561.807396],
[ 614.874621, 2007.304727],
[1286.639124, 2155.264827],
[ 729.566116, 2190.982364]])
# All possible triangles.
ref_combs = list(itertools.combinations(range(len(set_ref)), 3))
in_combs = list(itertools.combinations(range(len(set_in)), 3))
# Obtain normalized triangles.
ref_triang, in_triang = getTriangles(set_ref, ref_combs), getTriangles(set_in, in_combs)
# Index of the ref and in triangles with the smallest difference.
ref_idx, in_idx = sumTriangles(ref_triang, in_triang)
# Indexes of points in ref and in of the best match triangles.
ref_idx_pts, in_idx_pts = ref_combs[ref_idx], in_combs[in_idx]
print ('triangle ref %s matches triangle in %s' % (ref_idx_pts, in_idx_pts))
print ("ref:", [set_ref[_] for _ in ref_idx_pts])
print ("input:", [set_in[_] for _ in in_idx_pts])
ref_pts = np.array([set_ref[_] for _ in ref_idx_pts])
in_pts = np.array([set_in[_] for _ in in_idx_pts])
transf, (in_list,ref_list) = aa.find_transform(in_pts, ref_pts)
transf_in = transf(set_in)
print(f'transformation matrix: {transf}')
plt.scatter(set_ref[:,0],set_ref[:,1], s=100,marker='.', c='r',label='Reference')
plt.scatter(set_in[:,0],set_in[:,1], s=100,marker='.', c='b',label='Input')
plt.scatter(transf_in[:,0],transf_in[:,1], s=100,marker='+', c='b',label='Input Aligned')
plt.plot(ref_pts[:,0],ref_pts[:,1], c='r')
plt.plot(in_pts[:,0],in_pts[:,1], c='b')
plt.legend()
plt.tight_layout()
plt.savefig( 'align_coordinates.png', format = 'png')
plt.show()
I have a list of unsorted points:
List = [(-50.6261, 74.3683), (-63.2489, 75.0038), (-76.0384, 75.6219), (-79.8451, 75.7855), (-30.9626, 168.085), (-27.381, 170.967), (-22.9191, 172.928), (-16.5869, 173.087), (-4.813, 172.505), (-109.056, 92.0063), (-96.0705, 91.4232), (-83.255, 90.8563), (-80.7807, 90.7498), (-54.1694, 89.5087), (-41.6419, 88.9191), (-32.527, 88.7737), (-27.6403, 91.0134), (-22.3035, 95.141), (-18.0168, 100.473), (-15.3918, 105.542), (-13.6401, 112.373), (-13.3475, 118.988), (-14.4509, 125.238), (-17.1246, 131.895), (-21.6766, 139.821), (-28.5735, 149.98), (-33.395, 156.344), (-114.702, 83.9644), (-114.964, 87.4599), (-114.328, 89.8325), (-112.314, 91.6144), (-109.546, 92.0209), (-67.9644, 90.179), (-55.2013, 89.5624), (-34.4271, 158.876), (-34.6987, 161.896), (-33.6055, 164.993), (-87.0365, 75.9683), (-99.8007, 76.0889), (-105.291, 76.5448), (-109.558, 77.3525), (-112.516, 79.2509), (-113.972, 81.3335), (2.30014, 171.635), (4.40918, 169.691), (5.07165, 166.974), (5.34843, 163.817), (5.30879, 161.798), (-29.6746, 73.5082), (-42.5876, 74.0206)]
I want to sort those points to have a continuous curve passing by every point just once, starting from start = (-29.6746, 73.5082)
and end = (5.30879, 161.798)
This is what I tried so far:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.neighbors import NearestNeighbors
import networkx as nx
for el in List:
X.append(el[0])
Y.append(el[1])
x = np.array(X)
y = np.array(Y)
points = np.c_[x, y]
# find 2 nearest neighbors
clf = NearestNeighbors(2).fit(points)
G = clf.kneighbors_graph()
T = nx.from_scipy_sparse_matrix(G)
# indexes of the new order
order = list(nx.dfs_preorder_nodes(T, 0))
# sorted arrays
new_x = x[order]
new_y = y[order]
plt.plot(new_x, new_y)
plt.show()
But I still get an unsorted list, and I couldn't find a way to determine the start point and end point.
We can see the problem as a Traveling salesman problem, that we can optimize by looking for the nearest point
def distance(P1, P2):
"""
This function computes the distance between 2 points defined by
P1 = (x1,y1) and P2 = (x2,y2)
"""
return ((P1[0] - P2[0])**2 + (P1[1] - P2[1])**2) ** 0.5
def optimized_path(coords, start=None):
"""
This function finds the nearest point to a point
coords should be a list in this format coords = [ [x1, y1], [x2, y2] , ...]
"""
if start is None:
start = coords[0]
pass_by = coords
path = [start]
pass_by.remove(start)
while pass_by:
nearest = min(pass_by, key=lambda x: distance(path[-1], x))
path.append(nearest)
pass_by.remove(nearest)
return path
# define a start point
start = [x0, y0]
path = optimized_path(List,start)
Not an answer, but too much for a comment
I plotted the data points as scatter and line
I see a visually smooth (low order local derivatve spline curve) with ~10% points 'out of order'
Is this typical of the problem?, is the data mostly in order?
How general or specific does the code have to be
I don't know the "big hammer" libs, but cleaned up the surounding code and did the same plot
List = [(-50.6261, 74.3683), (-63.2489, 75.0038), (-76.0384, 75.6219), (-79.8451, 75.7855), (-30.9626, 168.085), (-27.381, 170.967), (-22.9191, 172.928), (-16.5869, 173.087), (-4.813, 172.505), (-109.056, 92.0063), (-96.0705, 91.4232), (-83.255, 90.8563), (-80.7807, 90.7498), (-54.1694, 89.5087), (-41.6419, 88.9191), (-32.527, 88.7737), (-27.6403, 91.0134), (-22.3035, 95.141), (-18.0168, 100.473), (-15.3918, 105.542), (-13.6401, 112.373), (-13.3475, 118.988), (-14.4509, 125.238), (-17.1246, 131.895), (-21.6766, 139.821), (-28.5735, 149.98), (-33.395, 156.344), (-114.702, 83.9644), (-114.964, 87.4599), (-114.328, 89.8325), (-112.314, 91.6144), (-109.546, 92.0209), (-67.9644, 90.179), (-55.2013, 89.5624), (-34.4271, 158.876), (-34.6987, 161.896), (-33.6055, 164.993), (-87.0365, 75.9683), (-99.8007, 76.0889), (-105.291, 76.5448), (-109.558, 77.3525), (-112.516, 79.2509), (-113.972, 81.3335), (2.30014, 171.635), (4.40918, 169.691), (5.07165, 166.974), (5.34843, 163.817), (5.30879, 161.798), (-29.6746, 73.5082), (-42.5876, 74.0206)]
import matplotlib.pyplot as plt
import numpy as np
from sklearn.neighbors import NearestNeighbors
import networkx as nx
points = np.asarray(List)
# find 2 nearest neighbors
clf = NearestNeighbors(2).fit(points)
G = clf.kneighbors_graph()
T = nx.from_scipy_sparse_matrix(G)
# indexes of the new order
order = list(nx.dfs_preorder_nodes(T, 0))
# sorted arrays
new_points = points[order]
plt.scatter(*zip(*points))
plt.plot(*zip(*new_points), 'r')
plt.show()
So I'm running a KNN in order to create clusters. From each cluster, I would like to obtain the medoid of the cluster.
I'm employing a fractional distance metric in order to calculate distances:
where d is the number of dimensions, the first data point's coordinates are x^i, the second data point's coordinates are y^i, and f is an arbitrary number between 0 and 1
I would then calculate the medoid as:
where S is the set of datapoints, and δ is the absolute value of the distance metric used above.
I've looked online to no avail trying to find implementations of medoid (even with other distance metrics, but most thing were specifically k-means or k-medoid which [I think] is relatively different from what I want.
Essentially this boils down to me being unable to translate the math into effective programming. Any help would or pointers in the right direction would be much appreciated! Here's a short list of what I have so far:
I have figured out how to calculate the fractional distance metric (the first equation) so I think I'm good there.
I know numpy has an argmin() function (documented here).
Extra points for increased efficiency without lack of accuracy (I'm trying not to brute force by calculating every single fractional distance metric (because the number of point pairs might lead to a factorial complexity...).
compute pairwise distance matrix
compute column or row sum
argmin to find medoid index
i.e. numpy.argmin(distMatrix.sum(axis=0)) or similar.
So I've accepted the answer here, but I thought I'd provide my implementation if anyone else was trying to do something similar:
(1) This is the distance function:
def fractional(p_coord_array, q_coord_array):
# f is an arbitrary value, but must be greater than zero and
# less than one. In this case, I used 3/10. I took advantage
# of the difference of cubes in this case, so that I wouldn't
# encounter an overflow error.
a = np.sum(np.array(p_coord_array, dtype=np.float64))
b = np.sum(np.array(q_coord_array, dtype=np.float64))
a2 = np.sum(np.power(p_coord_array, 2))
ab = np.sum(p_coord_array) * np.sum(q_coord_array)
b2 = np.sum(np.power(p_coord_array, 2))
diffab = a - b
suma2abb2 = a2 + ab + b2
temp_dist = abs(diffab * suma2abb2)
temp_dist = np.power(temp_dist, 1./10)
dist = np.power(temp_dist, 10./3)
return dist
(2) The medoid function (if the length of the dataset was less than 6000 [if greater than that, I ran into overflow errors... I'm still working on that bit to be perfectly honest...]):
def medoid(dataset):
point = []
w = len(dataset)
if(len(dataset) < 6000):
h = len(dataset)
dist_matrix = [[0 for x in range(w)] for y in range(h)]
list_combinations = [(counter_1, counter_2, data_1, data_2) for counter_1, data_1 in enumerate(dataset) for counter_2, data_2 in enumerate(dataset) if counter_1 < counter_2]
for counter_3, tuple in enumerate(list_combinations):
temp_dist = fractional(tuple[2], tuple[3])
dist_matrix[tuple[0]][tuple[1]] = abs(temp_dist)
dist_matrix[tuple[1]][tuple[0]] = abs(temp_dist)
Any questions, feel free to comment!
If you don't mind using brute force this might help:
def calc_medoid(X, Y, f=2):
n = len(X)
m = len(Y)
dist_mat = np.zeros((m, n))
# compute distance matrix
for j in range(n):
center = X[j, :]
for i in range(m):
if i != j:
dist_mat[i, j] = np.linalg.norm(Y[i, :] - center, ord=f)
medoid_id = np.argmin(dist_mat.sum(axis=0)) # sum over y
return medoid_id, X[medoid_id, :]
Here is an example of computing a medoid for a single cluster with Euclidean distance.
import numpy as np, pandas as pd, matplotlib.pyplot as plt
a, b, c, d = np.array([0,1]), np.array([1, 3]), np.array([4,2]), np.array([3, 1.5])
vCenroid = np.mean([a, b, c, d], axis=0)
def GetMedoid(vX):
vMean = np.mean(vX, axis=0) # compute centroid
return vX[np.argmin([sum((x - vMean)**2) for x in vX])] # pick a point closest to centroid
vMedoid = GetMedoid([a, b, c, d])
print(f'centroid = {vCenroid}')
print(f'medoid = {vMedoid}')
df = pd.DataFrame([a, b, c, d], columns=['x', 'y'])
ax = df.plot.scatter('x', 'y', grid=True, title='Centroid in 2D plane', s=100);
plt.plot(vCenroid[0], vCenroid[1], 'ro', ms=10); # plot centroid as red circle
plt.plot(vMedoid[0], vMedoid[1], 'rx', ms=20); # plot medoid as red star
You can also use the following package to compute medoid for one or more clusters
!pip -q install scikit-learn-extra > log
from sklearn_extra.cluster import KMedoids
GetMedoid = lambda vX: KMedoids(n_clusters=1).fit(vX).cluster_centers_
GetMedoid([a, b, c, d])[0]
I would say that you just need to compute the median.
np.median(np.asarray(points), axis=0)
Your median is the point with the biggest centrality.
Note: if you are using distances different than Euclidean this doesn't hold.
I have about 50,000 data points in 3D on which I have run scipy.spatial.Delaunay from the new scipy (I'm using 0.10) which gives me a very useful triangulation.
Based on: http://en.wikipedia.org/wiki/Delaunay_triangulation (section "Relationship with the Voronoi diagram")
...I was wondering if there is an easy way to get to the "dual graph" of this triangulation, which is the Voronoi Tesselation.
Any clues? My searching around on this seems to show no pre-built in scipy functions, which I find almost strange!
Thanks,
Edward
The adjacency information can be found in the neighbors attribute of the Delaunay object. Unfortunately, the code does not expose the circumcenters to the user at the moment, so you'll have to recompute those yourself.
Also, the Voronoi edges that extend to infinity are not directly obtained in this way. It's still probably possible, but needs some more thinking.
import numpy as np
from scipy.spatial import Delaunay
points = np.random.rand(30, 2)
tri = Delaunay(points)
p = tri.points[tri.vertices]
# Triangle vertices
A = p[:,0,:].T
B = p[:,1,:].T
C = p[:,2,:].T
# See http://en.wikipedia.org/wiki/Circumscribed_circle#Circumscribed_circles_of_triangles
# The following is just a direct transcription of the formula there
a = A - C
b = B - C
def dot2(u, v):
return u[0]*v[0] + u[1]*v[1]
def cross2(u, v, w):
"""u x (v x w)"""
return dot2(u, w)*v - dot2(u, v)*w
def ncross2(u, v):
"""|| u x v ||^2"""
return sq2(u)*sq2(v) - dot2(u, v)**2
def sq2(u):
return dot2(u, u)
cc = cross2(sq2(a) * b - sq2(b) * a, a, b) / (2*ncross2(a, b)) + C
# Grab the Voronoi edges
vc = cc[:,tri.neighbors]
vc[:,tri.neighbors == -1] = np.nan # edges at infinity, plotting those would need more work...
lines = []
lines.extend(zip(cc.T, vc[:,:,0].T))
lines.extend(zip(cc.T, vc[:,:,1].T))
lines.extend(zip(cc.T, vc[:,:,2].T))
# Plot it
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
lines = LineCollection(lines, edgecolor='k')
plt.hold(1)
plt.plot(points[:,0], points[:,1], '.')
plt.plot(cc[0], cc[1], '*')
plt.gca().add_collection(lines)
plt.axis('equal')
plt.xlim(-0.1, 1.1)
plt.ylim(-0.1, 1.1)
plt.show()
As I spent a considerable amount of time on this, I'd like to share my solution on how to get the Voronoi polygons instead of just the edges.
The code is at https://gist.github.com/letmaik/8803860 and extends on the solution of tauran.
First, I changed the code to give me vertices and (pairs of) indices (=edges) separately, as many calculations can be simplified when working on indices instead of point coordinates.
Then, in the voronoi_cell_lines method I determine which edges belong to which cells. For that I use the proposed solution of Alink from a related question. That is, for each edge find the two nearest input points (=cells) and create a mapping from that.
The last step is to create the actual polygons (see voronoi_polygons method). First, the outer cells which have dangling edges need to be closed. This is as simple as looking through all edges and checking which ones have only one neighboring edge. There can be either zero or two such edges. In case of two, I then connect these by introducing an additional edge.
Finally, the unordered edges in each cell need to be put into the right order to derive a polygon from them.
The usage is:
P = np.random.random((100,2))
fig = plt.figure(figsize=(4.5,4.5))
axes = plt.subplot(1,1,1)
plt.axis([-0.05,1.05,-0.05,1.05])
vertices, lineIndices = voronoi(P)
cells = voronoi_cell_lines(P, vertices, lineIndices)
polys = voronoi_polygons(cells)
for pIdx, polyIndices in polys.items():
poly = vertices[np.asarray(polyIndices)]
p = matplotlib.patches.Polygon(poly, facecolor=np.random.rand(3,1))
axes.add_patch(p)
X,Y = P[:,0],P[:,1]
plt.scatter(X, Y, marker='.', zorder=2)
plt.axis([-0.05,1.05,-0.05,1.05])
plt.show()
which outputs:
The code is probably not suitable for large numbers of input points and can be improved in some areas. Nevertheless, it may be helpful to others who have similar problems.
I came across the same problem and built a solution out of pv.'s answer and other code snippets I found across the web. The solution returns a complete Voronoi diagram, including the outer lines where no triangle neighbours are present.
#!/usr/bin/env python
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.spatial import Delaunay
def voronoi(P):
delauny = Delaunay(P)
triangles = delauny.points[delauny.vertices]
lines = []
# Triangle vertices
A = triangles[:, 0]
B = triangles[:, 1]
C = triangles[:, 2]
lines.extend(zip(A, B))
lines.extend(zip(B, C))
lines.extend(zip(C, A))
lines = matplotlib.collections.LineCollection(lines, color='r')
plt.gca().add_collection(lines)
circum_centers = np.array([triangle_csc(tri) for tri in triangles])
segments = []
for i, triangle in enumerate(triangles):
circum_center = circum_centers[i]
for j, neighbor in enumerate(delauny.neighbors[i]):
if neighbor != -1:
segments.append((circum_center, circum_centers[neighbor]))
else:
ps = triangle[(j+1)%3] - triangle[(j-1)%3]
ps = np.array((ps[1], -ps[0]))
middle = (triangle[(j+1)%3] + triangle[(j-1)%3]) * 0.5
di = middle - triangle[j]
ps /= np.linalg.norm(ps)
di /= np.linalg.norm(di)
if np.dot(di, ps) < 0.0:
ps *= -1000.0
else:
ps *= 1000.0
segments.append((circum_center, circum_center + ps))
return segments
def triangle_csc(pts):
rows, cols = pts.shape
A = np.bmat([[2 * np.dot(pts, pts.T), np.ones((rows, 1))],
[np.ones((1, rows)), np.zeros((1, 1))]])
b = np.hstack((np.sum(pts * pts, axis=1), np.ones((1))))
x = np.linalg.solve(A,b)
bary_coords = x[:-1]
return np.sum(pts * np.tile(bary_coords.reshape((pts.shape[0], 1)), (1, pts.shape[1])), axis=0)
if __name__ == '__main__':
P = np.random.random((300,2))
X,Y = P[:,0],P[:,1]
fig = plt.figure(figsize=(4.5,4.5))
axes = plt.subplot(1,1,1)
plt.scatter(X, Y, marker='.')
plt.axis([-0.05,1.05,-0.05,1.05])
segments = voronoi(P)
lines = matplotlib.collections.LineCollection(segments, color='k')
axes.add_collection(lines)
plt.axis([-0.05,1.05,-0.05,1.05])
plt.show()
Black lines = Voronoi diagram, Red lines = Delauny triangles
I do not know of a function to do this, but it does not seem like an overly complicated task.
The Voronoi graph is the junction of the circumcircles, as described in the wikipedia article.
So you could start with a function that finds the center of the circumcircles of a triangle, which is basic mathematics (http://en.wikipedia.org/wiki/Circumscribed_circle).
Then, just join centers of adjacent triangles.