Move points to nearest unoccupied grid position

Move points to nearest unoccupied grid position - python

I have a random distribution of points in 2D space like so:
from sklearn import datasets
import pandas as pd
import numpy as np
arr, labels = datasets.make_moons()
arr, labels = datasets.make_blobs(n_samples=1000, centers=3)
pd.DataFrame(arr, columns=['x', 'y']).plot.scatter('x', 'y', s=1)
I'm trying to assign each of these points to the nearest unoccupied slot on an imaginary hex grid to ensure the points don't overlap. The code I'm using to accomplish this goal produces the plot below. The general idea is to create the hex bins, then iterate over each point and find the minimal radius that allows the algorithm to assign that point to an unoccupied hex bin:
from scipy.spatial.distance import euclidean
def get_bounds(arr):
'''Given a 2D array return the y_min, y_max, x_min, x_max'''
return [
np.min(arr[:,1]),
np.max(arr[:,1]),
np.min(arr[:,0]),
np.max(arr[:,0]),
]
def create_mesh(arr, h=100, w=100):
'''arr is a 2d array; h=number of vertical divisions; w=number of horizontal divisions'''
print(' * creating mesh with size', h, w)
bounds = get_bounds(arr)
# create array of valid positions
y_vals = np.arange(bounds[0], bounds[1], (bounds[1]-bounds[0])/h)
x_vals = np.arange(bounds[2], bounds[3], (bounds[3]-bounds[2])/w)
# create the dense mesh
data = np.tile(
[[0, 1], [1, 0]],
np.array([
int(np.ceil(len(y_vals) / 2)),
int(np.ceil(len(x_vals) / 2)),
]))
# ensure each axis has an even number of slots
if len(y_vals) % 2 != 0 or len(x_vals) % 2 != 0:
data = data[0:len(y_vals), 0:len(x_vals)]
return pd.DataFrame(data, index=y_vals, columns=x_vals)
def align_points_to_grid(arr, h=100, w=100, verbose=False):
'''arr is a 2d array; h=number of vertical divisions; w=number of horizontal divisions'''
h = w = len(arr)/10
grid = create_mesh(arr, h=h, w=w)
# fill the mesh
print(' * filling mesh')
df = pd.DataFrame(arr, columns=['x', 'y'])
bounds = get_bounds(arr)
# store the number of points slotted
c = 0
for site, point in df[['x', 'y']].iterrows():
# skip points not in original points domain
if point.y < bounds[0] or point.y > bounds[1] or \
point.x < bounds[2] or point.x > bounds[3]:
raise Exception('Input point is out of bounds', point.x, point.y, bounds)
# assign this point to the nearest open slot
r_y = (bounds[1]-bounds[0])/h
r_x = (bounds[3]-bounds[2])/w
slotted = False
while not slotted:
bottom = grid.index.searchsorted(point.y - r_y)
top = grid.index.searchsorted(point.y + r_y, side='right')
left = grid.columns.searchsorted(point.x - r_x)
right = grid.columns.searchsorted(point.x + r_x, side='right')
close_grid_points = grid.iloc[bottom:top, left:right]
# store the position in this point's radius that minimizes distortion
best_dist = np.inf
grid_loc = [np.nan, np.nan]
for x, col in close_grid_points.iterrows():
for y, val in col.items():
if val != 1: continue
dist = euclidean(point, (x,y))
if dist < best_dist:
best_dist = dist
grid_loc = [x,y]
# no open slots were found so increase the search radius
if np.isnan(grid_loc[0]):
r_y *= 2
r_x *= 2
# success! report the slotted position to the user
else:
# assign a value other than 1 to mark this slot as filled
grid.loc[grid_loc[0], grid_loc[1]] = 2
df.loc[site, ['x', 'y']] = grid_loc
slotted = True
c += 1
if verbose:
print(' * completed', c, 'of', len(arr), 'assignments')
return df
# plot sample data
df = align_points_to_grid(arr, verbose=False)
df.plot.scatter('x', 'y', s=1)
I'm satisfied with the result of this algorithm, but not with the performance.
Is there a faster solution to this kind of hexbin assignment problem in Python? I feel others with more exposure to the Linear Assignment Problem or the Hungarian Algorithm might have valuable insight into this problem. Any suggestions would be hugely helpful!

It turned out assigning each point to the first available grid spot within its current radius was sufficiently performant.
For others who end up here, my lab wrapped this functionality into a little Python package for convenience. You can pip install pointgrid and then:
from pointgrid import align_points_to_grid
from sklearn import datasets
# create fake data
arr, labels = datasets.make_blobs(n_samples=1000, centers=5)
# get updated point positions
updated = align_points_to_grid(arr)
updated will be a numpy array with the same shape as the input array arr.

Related

Interpolate between two images

I'm trying to interpolate between two images in Python.
Images are of shapes (188, 188)
I wish to interpolate the image 'in-between' these two images. Say Image_1 is at location z=0 and Image_2 is at location z=2. I want the interpolated image at location z=1.
I believe this answer (MATLAB) contains a similar problem and solution.
Creating intermediate slices in a 3D MRI volume with MATLAB
I've tried to convert this code to Python as follows:
from scipy.interpolate import interpn
from scipy.interpolate import griddata
# Construct 3D volume from images
# arr.shape = (2, 182, 182)
arr = np.r_['0,3', image_1, image_2]
slices,rows,cols = arr.shape
# Construct meshgrids
[X,Y,Z] = np.meshgrid(np.arange(cols), np.arange(rows), np.arange(slices));
[X2,Y2,Z2] = np.meshgrid(np.arange(cols), np.arange(rows), np.arange(slices*2));
# Run n-dim interpolation
Vi = interpn([X,Y,Z], arr, np.array([X1,Y1,Z1]).T)
However, this produces an error:
ValueError: The points in dimension 0 must be strictly ascending
I suspect I am not constructing my meshgrid(s) properly but am kind of lost on whether or not this approach is correct.
Any ideas?
---------- Edit -----------
Found some MATLAB code that appears to solve this problem:
Interpolating Between Two Planes in 3d space
I attempted to convert this to Python:
from scipy.ndimage.morphology import distance_transform_edt
from scipy.interpolate import interpn
def ndgrid(*args,**kwargs):
"""
Same as calling ``meshgrid`` with *indexing* = ``'ij'`` (see
``meshgrid`` for documentation).
"""
kwargs['indexing'] = 'ij'
return np.meshgrid(*args,**kwargs)
def bwperim(bw, n=4):
"""
perim = bwperim(bw, n=4)
Find the perimeter of objects in binary images.
A pixel is part of an object perimeter if its value is one and there
is at least one zero-valued pixel in its neighborhood.
By default the neighborhood of a pixel is 4 nearest pixels, but
if `n` is set to 8 the 8 nearest pixels will be considered.
Parameters
----------
bw : A black-and-white image
n : Connectivity. Must be 4 or 8 (default: 8)
Returns
-------
perim : A boolean image
From Mahotas: http://nullege.com/codes/search/mahotas.bwperim
"""
if n not in (4,8):
raise ValueError('mahotas.bwperim: n must be 4 or 8')
rows,cols = bw.shape
# Translate image by one pixel in all directions
north = np.zeros((rows,cols))
south = np.zeros((rows,cols))
west = np.zeros((rows,cols))
east = np.zeros((rows,cols))
north[:-1,:] = bw[1:,:]
south[1:,:] = bw[:-1,:]
west[:,:-1] = bw[:,1:]
east[:,1:] = bw[:,:-1]
idx = (north == bw) & \
(south == bw) & \
(west == bw) & \
(east == bw)
if n == 8:
north_east = np.zeros((rows, cols))
north_west = np.zeros((rows, cols))
south_east = np.zeros((rows, cols))
south_west = np.zeros((rows, cols))
north_east[:-1, 1:] = bw[1:, :-1]
north_west[:-1, :-1] = bw[1:, 1:]
south_east[1:, 1:] = bw[:-1, :-1]
south_west[1:, :-1] = bw[:-1, 1:]
idx &= (north_east == bw) & \
(south_east == bw) & \
(south_west == bw) & \
(north_west == bw)
return ~idx * bw
def signed_bwdist(im):
'''
Find perim and return masked image (signed/reversed)
'''
im = -bwdist(bwperim(im))*np.logical_not(im) + bwdist(bwperim(im))*im
return im
def bwdist(im):
'''
Find distance map of image
'''
dist_im = distance_transform_edt(1-im)
return dist_im
def interp_shape(top, bottom, num):
if num<0 and round(num) == num:
print("Error: number of slices to be interpolated must be integer>0")
top = signed_bwdist(top)
bottom = signed_bwdist(bottom)
r, c = top.shape
t = num+2
print("Rows - Cols - Slices")
print(r, c, t)
print("")
# rejoin top, bottom into a single array of shape (2, r, c)
# MATLAB: cat(3,bottom,top)
top_and_bottom = np.r_['0,3', top, bottom]
#top_and_bottom = np.rollaxis(top_and_bottom, 0, 3)
# create ndgrids
x,y,z = np.mgrid[0:r, 0:c, 0:t-1] # existing data
x1,y1,z1 = np.mgrid[0:r, 0:c, 0:t] # including new slice
print("Shape x y z:", x.shape, y.shape, z.shape)
print("Shape x1 y1 z1:", x1.shape, y1.shape, z1.shape)
print(top_and_bottom.shape, len(x), len(y), len(z))
# Do interpolation
out = interpn((x,y,z), top_and_bottom, (x1,y1,z1))
# MATLAB: out = out(:,:,2:end-1)>=0;
array_lim = out[-1]-1
out[out[:,:,2:out] >= 0] = 1
return out
I call this as follows:
new_image = interp_shape(image_1,image_2, 1)
Im pretty sure this is 80% of the way there but I still get this error when running:
ValueError: The points in dimension 0 must be strictly ascending
Again, I am probably not constructing my meshes correctly. I believe np.mgrid should produce the same result as MATLABs ndgrid though.
Is there a better way to construct the ndgrid equivalents?

I figured this out. Or at least a method that produces desirable results.
Based on: Interpolating Between Two Planes in 3d space
def signed_bwdist(im):
'''
Find perim and return masked image (signed/reversed)
'''
im = -bwdist(bwperim(im))*np.logical_not(im) + bwdist(bwperim(im))*im
return im
def bwdist(im):
'''
Find distance map of image
'''
dist_im = distance_transform_edt(1-im)
return dist_im
def interp_shape(top, bottom, precision):
'''
Interpolate between two contours
Input: top
[X,Y] - Image of top contour (mask)
bottom
[X,Y] - Image of bottom contour (mask)
precision
float - % between the images to interpolate
Ex: num=0.5 - Interpolate the middle image between top and bottom image
Output: out
[X,Y] - Interpolated image at num (%) between top and bottom
'''
if precision>2:
print("Error: Precision must be between 0 and 1 (float)")
top = signed_bwdist(top)
bottom = signed_bwdist(bottom)
# row,cols definition
r, c = top.shape
# Reverse % indexing
precision = 1+precision
# rejoin top, bottom into a single array of shape (2, r, c)
top_and_bottom = np.stack((top, bottom))
# create ndgrids
points = (np.r_[0, 2], np.arange(r), np.arange(c))
xi = np.rollaxis(np.mgrid[:r, :c], 0, 3).reshape((r**2, 2))
xi = np.c_[np.full((r**2),precision), xi]
# Interpolate for new plane
out = interpn(points, top_and_bottom, xi)
out = out.reshape((r, c))
# Threshold distmap to values above 0
out = out > 0
return out
# Run interpolation
out = interp_shape(image_1,image_2, 0.5)
Example output:

I came across a similar problem where I needed to interpolate the shift between frames where the change did not merely constitute a translation but also changes to the shape itself . I solved this problem by :
Using center_of_mass from scipy.ndimage.measurements to calculate the center of the object we want to move in each frame
Defining a continuous parameter t where t=0 first and t=1 last frame
Interpolate the motion between two nearest frames (with regard to a specific t value) by shifting the image back/forward via shift from scipy.ndimage.interpolation and overlaying them.
Here is the code:
def inter(images,t):
#input:
# images: list of arrays/frames ordered according to motion
# t: parameter ranging from 0 to 1 corresponding to first and last frame
#returns: interpolated image
#direction of movement, assumed to be approx. linear
a=np.array(center_of_mass(images[0]))
b=np.array(center_of_mass(images[-1]))
#find index of two nearest frames
arr=np.array([center_of_mass(images[i]) for i in range(len(images))])
v=a+t*(b-a) #convert t into vector
idx1 = (np.linalg.norm((arr - v),axis=1)).argmin()
arr[idx1]=np.array([0,0]) #this is sloppy, should be changed if relevant values are near [0,0]
idx2 = (np.linalg.norm((arr - v),axis=1)).argmin()
if idx1>idx2:
b=np.array(center_of_mass(images[idx1])) #center of mass of nearest contour
a=np.array(center_of_mass(images[idx2])) #center of mass of second nearest contour
tstar=np.linalg.norm(v-a)/np.linalg.norm(b-a) #define parameter ranging from 0 to 1 for interpolation between two nearest frames
im1_shift=shift(images[idx2],(b-a)*tstar) #shift frame 1
im2_shift=shift(images[idx1],-(b-a)*(1-tstar)) #shift frame 2
return im1_shift+im2_shift #return average
if idx1<idx2:
b=np.array(center_of_mass(images[idx2]))
a=np.array(center_of_mass(images[idx1]))
tstar=np.linalg.norm(v-a)/np.linalg.norm(b-a)
im1_shift=shift(images[idx2],-(b-a)*(1-tstar))
im2_shift=shift(images[idx1],(b-a)*(tstar))
return im1_shift+im2_shift
Result example

I don't know the solution to your problem, but I don't think it's possible to do this with interpn.
I corrected the code that you tried, and used the following input images:
But the result is:
Here's the corrected code:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from scipy import interpolate
n = 8
img1 = np.zeros((n, n))
img2 = np.zeros((n, n))
img1[2:4, 2:4] = 1
img2[4:6, 4:6] = 1
plt.figure()
plt.imshow(img1, cmap=cm.Greys)
plt.figure()
plt.imshow(img2, cmap=cm.Greys)
points = (np.r_[0, 2], np.arange(n), np.arange(n))
values = np.stack((img1, img2))
xi = np.rollaxis(np.mgrid[:n, :n], 0, 3).reshape((n**2, 2))
xi = np.c_[np.ones(n**2), xi]
values_x = interpolate.interpn(points, values, xi, method='linear')
values_x = values_x.reshape((n, n))
print(values_x)
plt.figure()
plt.imshow(values_x, cmap=cm.Greys)
plt.clim((0, 1))
plt.show()
I think the main difference between your code and mine is in the specification of xi. interpn tends to be somewhat confusing to use, and I've explained it in greater detail in an older answer. If you're curious about the mechanics of how I've specified xi, see this answer of mine explaining what I've done.
This result is not entirely surprising, because interpn just linearly interpolated between the two images: so the parts which had 1 in one image and 0 in the other simply became 0.5.
Over here, since one image is the translation of the other, it's clear that we want an image that's translated "in-between". But how would interpn interpolate two general images? If you had one small circle and one big circle, is it in any way clear that there should be a circle of intermediate size "between" them? What about interpolating between a dog and a cat? Or a dog and a building?
I think you are essentially trying to "draw lines" connecting the edges of the two images and then trying to figure out the image in between. This is similar to sampling a moving video at a half-frame. You might want to check out something like optical flow, which connects adjacent frames using vectors. I'm not aware if and what python packages/implementations are available though.

Moving window averages on unequal dimensions

TL;DR: Is there anyway I can get rid of my second for-loop?
I have a time series of points on a 2D-grid. To get rid of fast fluctuations of their position, I average the coordinates over a window of frames. Now in my case, it's possible for the points to cover a larger distance than usual. I don't want to include frames for a specific point, if it travels farther than the cut_off value.
In the first for-loop, I go over all frames and define the moving window. I then calculate the distances between the current frame and each frame in the moving window. After I grab only those positions from all frames, where both the x and y component did not travel farther than cut_off. Now I want to calculate the mean positions for every point from all these selected frames of the moving window (note: the number of selected frames can be smaller than n_window). This leads me to the second for-loop. Here I iterate over all points and actually grab the positions from the frames, in which the current point did not travel farther than cut_off. From these selected frames I calculate the mean value of the coordinates and use it as the new value for the current frame.
This very last for-loop slows down the whole processing. I can't come up with a better way to accomplish this calculation. Any suggestions?
MWE
Put in comments for clarification.
import numpy as np
# Generate a timeseries with 1000 frames, each
# containing 50 individual points defined by their
# x and y coordinates
n_frames = 1000
n_points = 50
n_coordinates = 2
timeseries = np.random.randint(-100, 100, [n_frames, n_points, n_coordinates])
# Set window size to 20 frames
n_window = 20
# Distance cut off
cut_off = 60
# Set up empty array to hold results
avg_data_store = np.zeros([n_frames, timeseries.shape[1], 2])
# Iterate over all frames
for frame in np.arange(0, n_frames):
# Set the frame according to the window size that we're looking at
t_before = int(frame - (n_window / 2))
t_after = int(frame + (n_window / 2))
# If we're trying to access frames below 0, set the lowest one to 0
if t_before < 0:
t_before = 0
# Trying to access frames that are not in the trajectory, set to last frame
if t_after > n_frames - 1:
t_after = n_frames - 1
# Grab x and y coordinates for all points in the corresponding window
pos_before = timeseries[t_before:frame]
pos_after = timeseries[frame + 1:t_after + 1]
pos_now = timeseries[frame]
# Calculate the distance between the current frame and the windows before/after
d_before = np.abs(pos_before - pos_now)
d_after = np.abs(pos_after - pos_now)
# Grab indices of frames+points, that are below the cut off
arg_before = np.argwhere(np.all(d_before < cut_off, axis=2))
arg_after = np.argwhere(np.all(d_after < cut_off, axis=2))
# Iterate over all points
for i in range(0, timeseries.shape[1]):
# Create temp array
temp_stack = pos_now[i]
# Grab all frames in which the current point did _not_
# travel farther than `cut_off`
all_before = arg_before[arg_before[:, 1] == i][:, 0]
all_after = arg_after[arg_after[:, 1] == i][:, 0]
# Grab the corresponding positions for this points in these frames
all_pos_before = pos_before[all_before, i]
all_pos_after = pos_after[all_after, i]
# If we have any frames for that point before / after
# stack them into the temp array
if all_pos_before.size > 0:
temp_stack = np.vstack([all_pos_before, temp_stack])
if all_pos_after.size > 0:
temp_stack = np.vstack([temp_stack, all_pos_after])
# Calculate the moving window average for the selection of frames
avg_data_store[frame, i] = temp_stack.mean(axis=0)

If you are fine with calculating the cutoff distance in x and y separately, you can use scipy.ndimage.generic_filter.
import numpy as np
from scipy.ndimage import generic_filter
def _mean(x, cutoff):
is_too_different = np.abs(x - x[len(x) / 2]) > cutoff
return np.mean(x[~is_too_different])
def _smooth(x, window_length=5, cutoff=1.):
return generic_filter(x, _mean, size=window_length, mode='nearest', extra_keywords=dict(cutoff=cutoff))
def smooth(arr, window_length=5, cutoff=1., axis=-1):
return np.apply_along_axis(_smooth, axis, arr, window_length=window_length, cutoff=cutoff)
# --------------------------------------------------------------------------------
def _simulate_movement_2d(T, fraction_is_jump=0.01):
# generate random velocities with a few "jumps"
velocity = np.random.randn(T, 2)
is_jump = np.random.rand(T) < fraction_is_jump
jump = 10 * np.random.randn(T, 2)
jump[~is_jump] = 0.
# pre-allocate position and momentum arrays
position = np.zeros((T,2))
momentum = np.zeros((T,2))
# initialise the first position
position[0] = np.random.randn(2)
# update position using velocity vector:
# smooth movement by not applying the velocity directly
# but rather by keeping track of the momentum
for ii in range(2,T):
momentum[ii] = 0.9 * momentum[ii-1] + 0.1 * velocity[ii-1]
position[ii] = position[ii-1] + momentum[ii] + jump[ii]
# add some measurement noise
noise = np.random.randn(T,2)
position += noise
return position
def demo(nframes=1000, npoints=3):
# create data
positions = np.array([_simulate_movement_2d(nframes) for ii in range(npoints)])
# format to (nframes, npoints, 2)
position = positions.transpose([1, 0, 2])
# smooth
smoothed = smooth(positions, window_length=11, cutoff=5., axis=1)
# plot
x, y = positions.T
xs, ys = smoothed.T
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1,1)
ax.plot(x, y, 'o')
ax.plot(xs, ys, 'k-', alpha=0.3, lw=2)
plt.show()
demo()

Find a easier way to cluster 2-d scatter data into grid array data

I have figured out a method to cluster disperse point data into structured 2-d array(like rasterize function). And I hope there are some better ways to achieve that target.
My work
1. Intro
1000 point data has there dimensions of properties (lon, lat, emission) whicn represent one factory located at (x,y) emit certain amount of CO2 into atmosphere
grid network: predefine the 2-d array in the shape of 20x20
http://i4.tietuku.com/02fbaf32d2f09fff.png
The code reproduced here:
#### define the map area
xc1,xc2,yc1,yc2 = 113.49805889531724,115.5030664238035,37.39995194888143,38.789235929357105
map = Basemap(llcrnrlon=xc1,llcrnrlat=yc1,urcrnrlon=xc2,urcrnrlat=yc2)
#### reading the point data and scatter plot by their position
df = pd.read_csv("xxxxx.csv")
px,py = map(df.lon, df.lat)
map.scatter(px, py, color = "red", s= 5,zorder =3)
#### predefine the grid networks
lon_grid,lat_grid = np.linspace(xc1,xc2,21), np.linspace(yc1,yc2,21)
lon_x,lat_y = np.meshgrid(lon_grid,lat_grid)
grids = np.zeros(20*20).reshape(20,20)
plt.pcolormesh(lon_x,lat_y,grids,cmap = 'gray', facecolor = 'none',edgecolor = 'k',zorder=3)
2. My target
Finding the nearest grid point for each factory
Add the emission data into this grid number
3. Algorithm realization
3.1 Raster grid
note: 20x20 grid points are distributed in this area represented by blue dot.
http://i4.tietuku.com/8548554587b0cb3a.png
3.2 KD-tree
Find the nearest blue dot of each red point
sh = (20*20,2)
grids = np.zeros(20*20*2).reshape(*sh)
sh_emission = (20*20)
grids_em = np.zeros(20*20).reshape(sh_emission)
k = 0
for j in range(0,yy.shape[0],1):
for i in range(0,xx.shape[0],1):
grids[k] = np.array([lon_grid[i],lat_grid[j]])
k+=1
T = KDTree(grids)
x_delta = (lon_grid[2] - lon_grid[1])
y_delta = (lat_grid[2] - lat_grid[1])
R = np.sqrt(x_delta**2 + y_delta**2)
for i in range(0,len(df.lon),1):
idx = T.query_ball_point([df.lon.iloc[i],df.lat.iloc[i]], r=R)
# there are more than one blue dot which are founded sometimes,
# So I'll calculate the distances between the factory(red point)
# and all blue dots which are listed
if (idx > 1):
distance = []
for k in range(0,len(idx),1):
distance.append(np.sqrt((df.lon.iloc[i] - grids[k][0])**2 + (df.lat.iloc[i] - grids[k][1])**2))
pos_index = distance.index(min(distance))
pos = idx[pos_index]
# Only find 1 point
else:
pos = idx
grids_em[pos] += df.so2[i]
4. Result
co2 = grids_em.reshape(20,20)
plt.pcolormesh(lon_x,lat_y,co2,cmap =plt.cm.Spectral_r,zorder=3)
http://i4.tietuku.com/6ded65c4ac301294.png
5. My question
Can someone point out some drawbacks or error of this method?
Is there some algorithms more aligned with my target?
Thanks a lot!

There are many for-loop in your code, it's not the numpy way.
Make some sample data first:
import numpy as np
import pandas as pd
from scipy.spatial import KDTree
import pylab as pl
xc1, xc2, yc1, yc2 = 113.49805889531724, 115.5030664238035, 37.39995194888143, 38.789235929357105
N = 1000
GSIZE = 20
x, y = np.random.multivariate_normal([(xc1 + xc2)*0.5, (yc1 + yc2)*0.5], [[0.1, 0.02], [0.02, 0.1]], size=N).T
value = np.ones(N)
df_points = pd.DataFrame({"x":x, "y":y, "v":value})
For equal space grids you can use hist2d():
pl.hist2d(df_points.x, df_points.y, weights=df_points.v, bins=20, cmap="viridis");
Here is the output:
Here is the code to use KdTree:
X, Y = np.mgrid[x.min():x.max():GSIZE*1j, y.min():y.max():GSIZE*1j]
grid = np.c_[X.ravel(), Y.ravel()]
points = np.c_[df_points.x, df_points.y]
tree = KDTree(grid)
dist, indices = tree.query(points)
grid_values = df_points.groupby(indices).v.sum()
df_grid = pd.DataFrame(grid, columns=["x", "y"])
df_grid["v"] = grid_values
fig, ax = pl.subplots(figsize=(10, 8))
ax.plot(df_points.x, df_points.y, "kx", alpha=0.2)
mapper = ax.scatter(df_grid.x, df_grid.y, c=df_grid.v,
cmap="viridis",
linewidths=0,
s=100, marker="o")
pl.colorbar(mapper, ax=ax);
the output is:

Generate coordinates inside Polygon

I want to bin the values of polygons to a fine regular grid.
For instance, I have the following coordinates:
data = 2.353
data_lats = np.array([57.81000137, 58.15999985, 58.13000107, 57.77999878])
data_lons = np.array([148.67999268, 148.69999695, 148.47999573, 148.92999268])
My regular grid looks like this:
delta = 0.25
grid_lons = np.arange(-180, 180, delta)
grid_lats = np.arange(90, -90, -delta)
llx, lly = np.meshgrid( grid_lons, grid_lats )
rows = lly.shape[0]
cols = llx.shape[1]
grid = np.zeros((rows,cols))
Now I can find the grid pixel that corresponds to the center of my polygon very easily:
centerx, centery = np.mean(data_lons), np.mean(data_lats)
row = int(np.floor( centery/delta ) + (grid.shape[0]/2))
col = int(np.floor( centerx/delta ) + (grid.shape[1]/2))
grid[row,col] = data
However, there are probably a couple of grid pixels that still intersect with the polygon. Hence, I would like to generate a bunch of coordinates inside my polygon (data_lons, data_lats) and find their corresponding grid pixel as before. Do you a suggestion to generate the coordinates randomly or systematically? I failed, but am still trying.
Note: One data set contains around ~80000 polygons so it has to be really fast (a couple of seconds). That is also why I chose this approach, because it does not account the area of overlap... (like my earlier question Data binning: irregular polygons to regular mesh which is VERY slow)

I worked on a quick and dirty solution by simply calculating the coordinates between corner pixels. Take a look:
dlats = np.zeros((data_lats.shape[0],4))+np.nan
dlons = np.zeros((data_lons.shape[0],4))+np.nan
idx = [0,1,3,2,0] #rearrange the corner pixels
for cc in range(4):
dlats[:,cc] = np.mean((data_lats[:,idx[cc]],data_lats[:,idx[cc+1]]), axis=0)
dlons[:,cc] = np.mean((data_lons[:,idx[cc]],data_lons[:,idx[cc+1]]), axis=0)
data_lats = np.column_stack(( data_lats, dlats ))
data_lons = np.column_stack(( data_lons, dlons ))
Thus, the red dots represent the original corners - the blue ones the intermediate pixels between them.
I can do this one more time and include the center pixel (geo[:,[4,9]])
dlats = np.zeros((data.shape[0],8))
dlons = np.zeros((data.shape[0],8))
for cc in range(8):
dlats[:,cc] = np.mean((data_lats[:,cc], geo[:,4]), axis=0)
dlons[:,cc] = np.mean((data_lons[:,cc], geo[:,9]), axis=0)
data_lats = np.column_stack(( data_lats, dlats, geo[:,4] ))
data_lons = np.column_stack(( data_lons, dlons, geo[:,9] ))
This works really nice, and I can assign each point directly to its corresponding grid pixel like this:
row = np.floor( data_lats/delta ) + (llx.shape[0]/2)
col = np.floor( data_lons/delta ) + (llx.shape[1]/2)
However the final binning now takes ~7sec!!! How can I speed this code up:
for ii in np.arange(len(data)):
for cc in np.arange(data_lats.shape[1]):
final_grid[row[ii,cc],col[ii,cc]] += data[ii]
final_grid_counts[row[ii,cc],col[ii,cc]] += 1

You'll need to test the following approach to see if it is fast enough. First, you should modify all your lats and lons into, to make them (possibly fractional) indices into your grid:
idx_lats = (data_lats - lat_grid_start) / lat_grid step
idx_lons = (data_lons - lon_grid_start) / lon_grid step
Next, we want to split your polygons into triangles. For any convex polygon, you could take the center of the polygon as one vertex of all triangles, and then the vertices of the polygon in consecutive pairs. But if your polygon are all quadrilaterals, it is going to be faster to divide them into only 2 triangles, using vertices 0, 1, 2 for the first, and 0, 2, 3 for the second.
To know if a certain point is inside a triangle, I am going to use the barycentric coordinates approach described here. This first function checks whether a bunch of points are inside a triangle:
def check_in_triangle(x, y, x_tri, y_tri) :
A = np.vstack((x_tri[0], y_tri[0]))
lhs = np.vstack((x_tri[1:], y_tri[1:])) - A
rhs = np.vstack((x, y)) - A
uv = np.linalg.solve(lhs, rhs)
# Equivalent to (uv[0] >= 0) & (uv[1] >= 0) & (uv[0] + uv[1] <= 1)
return np.logical_and(uv >= 0, axis=0) & (np.sum(uv, axis=0) <= 1)
Given a triangle by its vertices, you can get the lattice points inside it, by running the above function on the lattice points in the bounding box of the triangle:
def lattice_points_in_triangle(x_tri, y_tri) :
x_grid = np.arange(np.ceil(np.min(x_tri)), np.floor(np.max(x_tri)) + 1)
y_grid = np.arange(np.ceil(np.min(y_tri)), np.floor(np.max(y_tri)) + 1)
x, y = np.meshgrid(x_grid, y_grid)
x, y = x.reshape(-1), y.reshape(-1)
idx = check_in_triangle(x, y, x_tri, y_tri)
return x[idx], y[idx]
And for a quadrilateral, you simply call this last function twice :
def lattice_points_in_quadrilateral(x_quad, y_quad) :
return map(np.concatenate,
zip(lattice_points_in_triangle(x_quad[:3], y_quad[:3]),
lattice_points_in_triangle(x_quad[[0, 2, 3]],
y_quad[[0, 2, 3]])))
If you run this code on your example data, you will get two empty arrays returned: that's because the order of the quadrilateral points is a surprising one: indices 0 and 1 define one diagonal, 2 and 3 the other. My function above was expecting the vertices to be ordered around the polygon. If you really are doing things this other way, you need to change the second call to lattice_points_in_triangle inside lattice_points_in_quadrilateral so that the indices used are [0, 1, 3] instead of [0, 2, 3].
And now, with that change :
>>> idx_lats = (data_lats - (-180) ) / 0.25
>>> idx_lons = (data_lons - (-90) ) / 0.25
>>> lattice_points_in_quadrilateral(idx_lats, idx_lons)
[array([952]), array([955])]
If you change the resolution of your grid to 0.1:
>>> idx_lats = (data_lats - (-180) ) / 0.1
>>> idx_lons = (data_lons - (-90) ) / 0.1
>>> lattice_points_in_quadrilateral(idx_lats, idx_lons)
[array([2381, 2380, 2381, 2379, 2380, 2381, 2378, 2379, 2378]),
array([2385, 2386, 2386, 2387, 2387, 2387, 2388, 2388, 2389])]
Timing wise this approach is going to be, in my system, about 10x too slow for your needs:
In [8]: %timeit lattice_points_in_quadrilateral(idx_lats, idx_lons)
1000 loops, best of 3: 269 us per loop
So you are looking at over 20 sec. to process your 80,000 polygons.

How can an almost arbitrary plane in a 3D dataset be plotted by matplotlib?

There is an array containing 3D data of shape e.g. (64,64,64), how do you plot a plane given by a point and a normal (similar to hkl planes in crystallography), through this dataset?
Similar to what can be done in MayaVi by rotating a plane through the data.
The resulting plot will contain non-square planes in most cases.
Can those be done with matplotlib (some sort of non-rectangular patch)?
Edit: I almost solved this myself (see below) but still wonder how non-rectangular patches can be plotted in matplotlib...?
Edit: Due to discussions below I restated the question.

This is funny, a similar question I replied to just today. The way to go is: interpolation. You can use griddata from scipy.interpolate:
Griddata
This page features a very nice example, and the signature of the function is really close to your data.
You still have to somehow define the points on you plane for which you want to interpolate the data. I will have a look at this, my linear algebra lessons where a couple of years ago

I have the penultimate solution for this problem. Partially solved by using the second answer to Plot a plane based on a normal vector and a point in Matlab or matplotlib :
# coding: utf-8
import numpy as np
from matplotlib.pyplot import imshow,show
A=np.empty((64,64,64)) #This is the data array
def f(x,y):
return np.sin(x/(2*np.pi))+np.cos(y/(2*np.pi))
xx,yy= np.meshgrid(range(64), range(64))
for x in range(64):
A[:,:,x]=f(xx,yy)*np.cos(x/np.pi)
N=np.zeros((64,64))
"""This is the plane we cut from A.
It should be larger than 64, due to diagonal planes being larger.
Will be fixed."""
normal=np.array([-1,-1,1]) #Define cut plane here. Normal vector components restricted to integers
point=np.array([0,0,0])
d = -np.sum(point*normal)
def plane(x,y): # Get plane's z values
return (-normal[0]*x-normal[1]*y-d)/normal[2]
def getZZ(x,y): #Get z for all values x,y. If z>64 it's out of range
for i in x:
for j in y:
if plane(i,j)<64:
N[i,j]=A[i,j,plane(i,j)]
getZZ(range(64),range(64))
imshow(N, interpolation="Nearest")
show()
It's not the ultimate solution since the plot is not restricted to points having a z value, planes larger than 64 * 64 are not accounted for and the planes have to be defined at (0,0,0).

For the reduced requirements, I prepared a simple example
import numpy as np
import pylab as plt
data = np.arange((64**3))
data.resize((64,64,64))
def get_slice(volume, orientation, index):
orientation2slicefunc = {
"x" : lambda ar:ar[index,:,:],
"y" : lambda ar:ar[:,index,:],
"z" : lambda ar:ar[:,:,index]
}
return orientation2slicefunc[orientation](volume)
plt.subplot(221)
plt.imshow(get_slice(data, "x", 10), vmin=0, vmax=64**3)
plt.subplot(222)
plt.imshow(get_slice(data, "x", 39), vmin=0, vmax=64**3)
plt.subplot(223)
plt.imshow(get_slice(data, "y", 15), vmin=0, vmax=64**3)
plt.subplot(224)
plt.imshow(get_slice(data, "z", 25), vmin=0, vmax=64**3)
plt.show()
This leads to the following plot:
The main trick is dictionary mapping orienations to lambda-methods, which saves us from writing annoying if-then-else-blocks. Of course you can decide to give different names,
e.g., numbers, for the orientations.
Maybe this helps you.
Thorsten
P.S.: I didn't care about "IndexOutOfRange", for me it's o.k. to let this exception pop out since it is perfectly understandable in this context.

I had to do something similar for a MRI data enhancement:
Probably the code can be optimized but it works as it is.
My data is 3 dimension numpy array representing an MRI scanner. It has size [128,128,128] but the code can be modified to accept any dimensions. Also when the plane is outside the cube boundary you have to give the default values to the variable fill in the main function, in my case I choose: data_cube[0:5,0:5,0:5].mean()
def create_normal_vector(x, y,z):
normal = np.asarray([x,y,z])
normal = normal/np.sqrt(sum(normal**2))
return normal
def get_plane_equation_parameters(normal,point):
a,b,c = normal
d = np.dot(normal,point)
return a,b,c,d #ax+by+cz=d
def get_point_plane_proximity(plane,point):
#just aproximation
return np.dot(plane[0:-1],point) - plane[-1]
def get_corner_interesections(plane, cube_dim = 128): #to reduce the search space
#dimension is 128,128,128
corners_list = []
only_x = np.zeros(4)
min_prox_x = 9999
min_prox_y = 9999
min_prox_z = 9999
min_prox_yz = 9999
for i in range(cube_dim):
temp_min_prox_x=abs(get_point_plane_proximity(plane,np.asarray([i,0,0])))
# print("pseudo distance x: {0}, point: [{1},0,0]".format(temp_min_prox_x,i))
if temp_min_prox_x < min_prox_x:
min_prox_x = temp_min_prox_x
corner_intersection_x = np.asarray([i,0,0])
only_x[0]= i
temp_min_prox_y=abs(get_point_plane_proximity(plane,np.asarray([i,cube_dim,0])))
# print("pseudo distance y: {0}, point: [{1},{2},0]".format(temp_min_prox_y,i,cube_dim))
if temp_min_prox_y < min_prox_y:
min_prox_y = temp_min_prox_y
corner_intersection_y = np.asarray([i,cube_dim,0])
only_x[1]= i
temp_min_prox_z=abs(get_point_plane_proximity(plane,np.asarray([i,0,cube_dim])))
#print("pseudo distance z: {0}, point: [{1},0,{2}]".format(temp_min_prox_z,i,cube_dim))
if temp_min_prox_z < min_prox_z:
min_prox_z = temp_min_prox_z
corner_intersection_z = np.asarray([i,0,cube_dim])
only_x[2]= i
temp_min_prox_yz=abs(get_point_plane_proximity(plane,np.asarray([i,cube_dim,cube_dim])))
#print("pseudo distance z: {0}, point: [{1},{2},{2}]".format(temp_min_prox_yz,i,cube_dim))
if temp_min_prox_yz < min_prox_yz:
min_prox_yz = temp_min_prox_yz
corner_intersection_yz = np.asarray([i,cube_dim,cube_dim])
only_x[3]= i
corners_list.append(corner_intersection_x)
corners_list.append(corner_intersection_y)
corners_list.append(corner_intersection_z)
corners_list.append(corner_intersection_yz)
corners_list.append(only_x.min())
corners_list.append(only_x.max())
return corners_list
def get_points_intersection(plane,min_x,max_x,data_cube,shape=128):
fill = data_cube[0:5,0:5,0:5].mean() #this can be a parameter
extended_data_cube = np.ones([shape+2,shape,shape])*fill
extended_data_cube[1:shape+1,:,:] = data_cube
diag_image = np.zeros([shape,shape])
min_x_value = 999999
for i in range(shape):
for j in range(shape):
for k in range(int(min_x),int(max_x)+1):
current_value = abs(get_point_plane_proximity(plane,np.asarray([k,i,j])))
#print("current_value:{0}, val: [{1},{2},{3}]".format(current_value,k,i,j))
if current_value < min_x_value:
diag_image[i,j] = extended_data_cube[k,i,j]
min_x_value = current_value
min_x_value = 999999
return diag_image
The way it works is the following:
you create a normal vector:
for example [5,0,3]
normal1=create_normal_vector(5, 0,3) #this is only to normalize
then you create a point:
(my cube data shape is [128,128,128])
point = [64,64,64]
You calculate the plane equation parameters, [a,b,c,d] where ax+by+cz=d
plane1=get_plane_equation_parameters(normal1,point)
then to reduce the search space you can calculate the intersection of the plane with the cube:
corners1 = get_corner_interesections(plane1,128)
where corners1 = [intersection [x,0,0],intersection [x,128,0],intersection [x,0,128],intersection [x,128,128], min intersection [x,y,z], max intersection [x,y,z]]
With all these you can calculate the intersection between the cube and the plane:
image1 = get_points_intersection(plane1,corners1[-2],corners1[-1],data_cube)
Some examples:
normal is [1,0,0] point is [64,64,64]
normal is [5,1,0],[5,1,1],[5,0,1] point is [64,64,64]:
normal is [5,3,0],[5,3,3],[5,0,3] point is [64,64,64]:
normal is [5,-5,0],[5,-5,-5],[5,0,-5] point is [64,64,64]:
Thank you.

The other answers here do not appear to be very efficient with explicit loops over pixels or using scipy.interpolate.griddata, which is designed for unstructured input data. Here is an efficient (vectorized) and generic solution.
There is a pure numpy implementation (for nearest-neighbor "interpolation") and one for linear interpolation, which delegates the interpolation to scipy.ndimage.map_coordinates. (The latter function probably didn't exist in 2013, when this question was asked.)
import numpy as np
from scipy.ndimage import map_coordinates
def slice_datacube(cube, center, eXY, mXY, fill=np.nan, interp=True):
"""Get a 2D slice from a 3-D array.
Copyright: Han-Kwang Nienhuys, 2020.
License: any of CC-BY-SA, CC-BY, BSD, GPL, LGPL
Reference: https://stackoverflow.com/a/62733930/6228891
Parameters:
- cube: 3D array, assumed shape (nx, ny, nz).
- center: shape (3,) with coordinates of center.
can be float.
- eXY: unit vectors, shape (2, 3) - for X and Y axes of the slice.
(unit vectors must be orthogonal; normalization is optional).
- mXY: size tuple of output array (mX, mY) - int.
- fill: value to use for out-of-range points.
- interp: whether to interpolate (rather than using 'nearest')
Return:
- slice: array, shape (mX, mY).
"""
center = np.array(center, dtype=float)
assert center.shape == (3,)
eXY = np.array(eXY)/np.linalg.norm(eXY, axis=1)[:, np.newaxis]
if not np.isclose(eXY[0] # eXY[1], 0, atol=1e-6):
raise ValueError(f'eX and eY not orthogonal.')
# R: rotation matrix: data_coords = center + R # slice_coords
eZ = np.cross(eXY[0], eXY[1])
R = np.array([eXY[0], eXY[1], eZ], dtype=np.float32).T
# setup slice points P with coordinates (X, Y, 0)
mX, mY = int(mXY[0]), int(mXY[1])
Xs = np.arange(0.5-mX/2, 0.5+mX/2)
Ys = np.arange(0.5-mY/2, 0.5+mY/2)
PP = np.zeros((3, mX, mY), dtype=np.float32)
PP[0, :, :] = Xs.reshape(mX, 1)
PP[1, :, :] = Ys.reshape(1, mY)
# Transform to data coordinates (x, y, z) - idx.shape == (3, mX, mY)
if interp:
idx = np.einsum('il,ljk->ijk', R, PP) + center.reshape(3, 1, 1)
slice = map_coordinates(cube, idx, order=1, mode='constant', cval=fill)
else:
idx = np.einsum('il,ljk->ijk', R, PP) + (0.5 + center.reshape(3, 1, 1))
idx = idx.astype(np.int16)
# Find out which coordinates are out of range - shape (mX, mY)
badpoints = np.any([
idx[0, :, :] < 0,
idx[0, :, :] >= cube.shape[0],
idx[1, :, :] < 0,
idx[1, :, :] >= cube.shape[1],
idx[2, :, :] < 0,
idx[2, :, :] >= cube.shape[2],
], axis=0)
idx[:, badpoints] = 0
slice = cube[idx[0], idx[1], idx[2]]
slice[badpoints] = fill
return slice
# Demonstration
nx, ny, nz = 50, 70, 100
cube = np.full((nx, ny, nz), np.float32(1))
cube[nx//4:nx*3//4, :, :] += 1
cube[:, ny//2:ny*3//4, :] += 3
cube[:, :, nz//4:nz//2] += 7
cube[nx//3-2:nx//3+2, ny//2-2:ny//2+2, :] = 0 # black dot
Rz, Rx = np.pi/6, np.pi/4 # rotation angles around z and x
cz, sz = np.cos(Rz), np.sin(Rz)
cx, sx = np.cos(Rx), np.sin(Rx)
Rmz = np.array([[cz, -sz, 0], [sz, cz, 0], [0, 0, 1]])
Rmx = np.array([[1, 0, 0], [0, cx, -sx], [0, sx, cx]])
eXY = (Rmx # Rmz).T[:2]
slice = slice_datacube(
cube,
center=[nx/3, ny/2, nz*0.7],
eXY=eXY,
mXY=[80, 90],
fill=np.nan,
interp=False
)
import matplotlib.pyplot as plt
plt.close('all')
plt.imshow(slice.T) # imshow expects shape (mY, mX)
plt.colorbar()
Output (for interp=False):
For this test case (50x70x100 datacube, 80x90 slice size) the run time is 376 µs (interp=False) and 550 µs (interp=True) on my laptop.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Move points to nearest unoccupied grid position - python

Related

Interpolate between two images

Moving window averages on unequal dimensions

Find a easier way to cluster 2-d scatter data into grid array data

Generate coordinates inside Polygon

How can an almost arbitrary plane in a 3D dataset be plotted by matplotlib?

Categories

Resources