How to plot gradient descent using plotly - python

I have been trying to replicate some work similar to this code below but when I try to use this data from this link https://raw.githubusercontent.com/plotly/datasets/master/api_docs/mt_bruno_elevation.csv Its throwing some error. I think its because of shape but don't know exactly how to modify it.
It will be great, if you help me to resolve the issue.
Here is my Code
from IPython.core.display import HTML
import plotly
import plotly.graph_objects as go
import noise
import numpy as np
import matplotlib
from mpl_toolkits.mplot3d import axes3d
%matplotlib inline
import pandas as pd
data = = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/api_docs/mt_bruno_elevation.csv')
z = data
import numpy as np
from numpy.lib.stride_tricks import as_strided
def sliding_window(arr, window_size):
""" Construct a sliding window view of the array"""
arr = np.asarray(arr)
window_size = int(window_size)
if arr.ndim != 2:
raise ValueError("need 2-D input")
if not (window_size > 0):
raise ValueError("need a positive window size")
shape = (arr.shape[0] - window_size + 1,
arr.shape[1] - window_size + 1,
window_size, window_size)
if shape[0] <= 0:
shape = (1, shape[1], arr.shape[0], shape[3])
if shape[1] <= 0:
shape = (shape[0], 1, shape[2], arr.shape[1])
strides = (arr.shape[1]*arr.itemsize, arr.itemsize,
arr.shape[1]*arr.itemsize, arr.itemsize)
return as_strided(arr, shape=shape, strides=strides)
def cell_neighbours(arr, i, j, d):
"""Return d-th neighbors of cell (i, j)"""
w = sliding_window(arr, 2*d+1)
ix = np.clip(i - d, 0, w.shape[0]-1)
jx = np.clip(j - d, 0, w.shape[1]-1)
i0 = max(0, i - d - ix)
j0 = max(0, j - d - jx)
i1 = w.shape[2] - max(0, d - i + ix)
j1 = w.shape[3] - max(0, d - j + jx)
return w[ix, jx][i0:i1,j0:j1].ravel()
from dataclasses import dataclass
#dataclass
class descent_step:
"""Class for storing each step taken in gradient descent"""
value: float
x_index: float
y_index: float
def gradient_descent_3d(array,x_start,y_start,steps=50,step_size=1,plot=False):
# Initial point to start gradient descent at
step = descent_step(array[y_start][x_start],x_start,y_start)
# Store each step taken in gradient descent in a list
step_history = []
step_history.append(step)
# Plot 2D representation of array with startng point as a red marker
if plot:
matplotlib.pyplot.imshow(array,origin='lower',cmap='terrain')
matplotlib.pyplot.plot(x_start,y_start,'ro')
current_x = x_start
current_y = y_start
# Loop through specified number of steps of gradient descent to take
for i in range(steps):
prev_x = current_x
prev_y = current_y
# Extract array of neighbouring cells around current step location with size nominated
neighbours=cell_neighbours(array,current_y,current_x,step_size)
# Locate minimum in array (steepest slope from current point)
next_step = neighbours.min()
indices = np.where(array == next_step)
# Update current point to now be the next point after stepping
current_x, current_y = (indices[1][0],indices[0][0])
step = descent_step(array[current_y][current_x],current_x,current_y)
step_history.append(step)
# Plot each step taken as a black line to the current point nominated by a red marker
if plot:
matplotlib.pyplot.plot([prev_x,current_x],[prev_y,current_y],'k-')
matplotlib.pyplot.plot(current_x,current_y,'ro')
# If step is to the same location as previously, this infers convergence and end loop
if prev_y == current_y and prev_x == current_x:
print(f"Converged in {i} steps")
break
return next_step,step_history
np.random.seed(42)
global_minimum = z.min()
indices = np.where(z == global_minimum)
print(f"Target: {global_minimum} # {indices}")
step_size = 0
found_minimum = 99999
# Random starting point
start_x = np.random.randint(0,50)
start_y = np.random.randint(0,50)
# Increase step size until convergence on global minimum
while found_minimum != global_minimum:
step_size += 1
found_minimum,steps = gradient_descent_3d(z,start_x,start_y,step_size=step_size,plot=False)
print(f"Optimal step size {step_size}")
found_minimum,steps = gradient_descent_3d(z,start_x,start_y,step_size=step_size,plot=True)
print(f"Steps: {steps}")
def multiDimenDist(point1,point2):
#find the difference between the two points, its really the same as below
deltaVals = [point2[dimension]-point1[dimension] for dimension in range(len(point1))]
runningSquared = 0
#because the pythagarom theorm works for any dimension we can just use that
for coOrd in deltaVals:
runningSquared += coOrd**2
return runningSquared**(1/2)
def findVec(point1,point2,unitSphere = False):
#setting unitSphere to True will make the vector scaled down to a sphere with a radius one, instead of it's orginal length
finalVector = [0 for coOrd in point1]
for dimension, coOrd in enumerate(point1):
#finding total differnce for that co-ordinate(x,y,z...)
deltaCoOrd = point2[dimension]-coOrd
#adding total difference
finalVector[dimension] = deltaCoOrd
if unitSphere:
totalDist = multiDimenDist(point1,point2)
unitVector =[]
for dimen in finalVector:
unitVector.append( dimen/totalDist)
return unitVector
else:
return finalVector
def generate_3d_plot(step_history):
# Initialise empty lists for markers
step_markers_x = []
step_markers_y = []
step_markers_z = []
step_markers_u = []
step_markers_v = []
step_markers_w = []
for index, step in enumerate(step_history):
step_markers_x.append(step.x_index)
step_markers_y.append(step.y_index)
step_markers_z.append(step.value)
# If we haven't reached the final step, calculate the vector between the current step and the next step
if index < len(steps)-1:
vec1 = [step.x_index,step.y_index,step.value]
vec2 = [steps[index+1].x_index,steps[index+1].y_index,steps[index+1].value]
result_vector = findVec(vec1,vec2)
step_markers_u.append(result_vector[0])
step_markers_v.append(result_vector[1])
step_markers_w.append(result_vector[2])
else:
step_markers_u.append(0.1)
step_markers_v.append(0.1)
step_markers_w.append(0.1)
# Include cones at each marker to show direction of step, scatter3d is to show the red line between points and surface for the terrain
fig = go.Figure(data=[
go.Cone(
x=step_markers_x,
y=step_markers_y,
z=step_markers_z,
u=step_markers_u,
v=step_markers_v,
w=step_markers_w,
sizemode="absolute",
sizeref=2,
anchor='tail'),
go.Scatter3d(
x=step_markers_x,
y=step_markers_y,
z=step_markers_z,
mode='lines',
line=dict(
color='red',
width=2
)),
go.Surface(colorscale=terrain,z=world,opacity=0.5)])
# Z axis is limited to the extent of the terrain array
fig.update_layout(
title='Gradient Descent Steps',
scene = dict(zaxis = dict(range=[world.min(),world.max()],),),)
return fig
# Generate 3D plot from previous random starting location
fig = generate_3d_plot(steps)
HTML(plotly.offline.plot(fig, filename='random_starting_point_3d_gradient_descent.html',include_plotlyjs='cdn'))

The error is occurring because found_minimum is an int, but global_minimum is a Series. I think the tutorial you're referencing assumes that the data is loaded as a numpy array, but it is never explicitly stated.
So, z = data.to_numpy() solves one problem and reveals another which is that the tutorial dataset is 50x50 and your data is 25x25. It's tempting to just change the limits of the random starting point, but this doesn't end up working well. The dataset is just too small for this implementation of gradient descent to appropriately converge.
To get around this issue, I just altered your dataset to manufacture a 50x50 set:
data_arr = data.to_numpy()
double_arr = np.append(data_arr, 1.5*data_arr + 50, axis=0)
quad_arr = np.append(double_arr, 1.5*double_arr + 50, axis=1)
Passing this quad_arr as needed throughout the code as well as updating the plotly colorscale to go.Surface(colorscale=Earth) gives:
Full, copy-pastable code:
from IPython.core.display import HTML
import plotly
import plotly.graph_objects as go
import noise
import numpy as np
import matplotlib
from mpl_toolkits.mplot3d import axes3d
%matplotlib inline
import pandas as pd
data = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/api_docs/mt_bruno_elevation.csv')
data_arr = data.to_numpy()
double_arr = np.append(data_arr, 1.5*data_arr + 50, axis=0)
quad_arr = np.append(double_arr, 1.5*double_arr + 50, axis=1)
z = quad_arr
matplotlib.pyplot.imshow(z,origin='lower',cmap='terrain')
# Find maximum value index in numpy array
indices = np.where(z == z.max())
max_z_x_location, max_z_y_location = (indices[1][0],indices[0][0])
matplotlib.pyplot.plot(max_z_x_location,max_z_y_location,'ro',markersize=15)
# Find minimum value index in numpy array
indices = np.where(z == z.min())
min_z_x_location, min_z_y_location = (indices[1][0],indices[0][0])
matplotlib.pyplot.plot(min_z_x_location,min_z_y_location,'yo',markersize=15)
import numpy as np
from numpy.lib.stride_tricks import as_strided
def sliding_window(arr, window_size):
""" Construct a sliding window view of the array"""
arr = np.asarray(arr)
window_size = int(window_size)
if arr.ndim != 2:
raise ValueError("need 2-D input")
if not (window_size > 0):
raise ValueError("need a positive window size")
shape = (arr.shape[0] - window_size + 1,
arr.shape[1] - window_size + 1,
window_size, window_size)
if shape[0] <= 0:
shape = (1, shape[1], arr.shape[0], shape[3])
if shape[1] <= 0:
shape = (shape[0], 1, shape[2], arr.shape[1])
strides = (arr.shape[1]*arr.itemsize, arr.itemsize,
arr.shape[1]*arr.itemsize, arr.itemsize)
return as_strided(arr, shape=shape, strides=strides)
def cell_neighbours(arr, i, j, d):
"""Return d-th neighbors of cell (i, j)"""
w = sliding_window(arr, 2*d+1)
ix = np.clip(i - d, 0, w.shape[0]-1)
jx = np.clip(j - d, 0, w.shape[1]-1)
i0 = max(0, i - d - ix)
j0 = max(0, j - d - jx)
i1 = w.shape[2] - max(0, d - i + ix)
j1 = w.shape[3] - max(0, d - j + jx)
return w[ix, jx][i0:i1,j0:j1].ravel()
from dataclasses import dataclass
#dataclass
class descent_step:
"""Class for storing each step taken in gradient descent"""
value: float
x_index: float
y_index: float
def gradient_descent_3d(array,x_start,y_start,steps=50,step_size=1,plot=False):
# Initial point to start gradient descent at
step = descent_step(array[y_start][x_start],x_start,y_start)
# Store each step taken in gradient descent in a list
step_history = []
step_history.append(step)
# Plot 2D representation of array with startng point as a red marker
if plot:
matplotlib.pyplot.imshow(array,origin='lower',cmap='terrain')
matplotlib.pyplot.plot(x_start,y_start,'ro')
current_x = x_start
current_y = y_start
# Loop through specified number of steps of gradient descent to take
for i in range(steps):
prev_x = current_x
prev_y = current_y
# Extract array of neighbouring cells around current step location with size nominated
neighbours=cell_neighbours(array,current_y,current_x,step_size)
# Locate minimum in array (steepest slope from current point)
next_step = neighbours.min()
indices = np.where(array == next_step)
# Update current point to now be the next point after stepping
current_x, current_y = (indices[1][0],indices[0][0])
step = descent_step(array[current_y][current_x],current_x,current_y)
step_history.append(step)
# Plot each step taken as a black line to the current point nominated by a red marker
if plot:
matplotlib.pyplot.plot([prev_x,current_x],[prev_y,current_y],'k-')
matplotlib.pyplot.plot(current_x,current_y,'ro')
# If step is to the same location as previously, this infers convergence and end loop
if prev_y == current_y and prev_x == current_x:
print(f"Converged in {i} steps")
break
return next_step,step_history
np.random.seed(42)
global_minimum = z.min()
indices = np.where(z == global_minimum)
print(f"Target: {global_minimum} # {indices}")
step_size = 0
found_minimum = 99999
# Random starting point
start_x = np.random.randint(0,50)
start_y = np.random.randint(0,50)
# Increase step size until convergence on global minimum
print('==========================')
print(found_minimum)
print(global_minimum)
print('==========================')
while found_minimum != global_minimum:
step_size += 1
try:
found_minimum,steps = gradient_descent_3d(z,start_x,start_y,step_size=step_size,plot=True)
except ValueError:
pass
print(f"Optimal step size {step_size}")
found_minimum,steps = gradient_descent_3d(z,start_x,start_y,step_size=step_size,plot=True)
print(f"Steps: {steps}")
def multiDimenDist(point1,point2):
#find the difference between the two points, its really the same as below
deltaVals = [point2[dimension]-point1[dimension] for dimension in range(len(point1))]
runningSquared = 0
#because the pythagarom theorm works for any dimension we can just use that
for coOrd in deltaVals:
runningSquared += coOrd**2
return runningSquared**(1/2)
def findVec(point1,point2,unitSphere = False):
#setting unitSphere to True will make the vector scaled down to a sphere with a radius one, instead of it's orginal length
finalVector = [0 for coOrd in point1]
for dimension, coOrd in enumerate(point1):
#finding total differnce for that co-ordinate(x,y,z...)
deltaCoOrd = point2[dimension]-coOrd
#adding total difference
finalVector[dimension] = deltaCoOrd
if unitSphere:
totalDist = multiDimenDist(point1,point2)
unitVector =[]
for dimen in finalVector:
unitVector.append( dimen/totalDist)
return unitVector
else:
return finalVector
def generate_3d_plot(step_history):
# Initialise empty lists for markers
step_markers_x = []
step_markers_y = []
step_markers_z = []
step_markers_u = []
step_markers_v = []
step_markers_w = []
for index, step in enumerate(step_history):
step_markers_x.append(step.x_index)
step_markers_y.append(step.y_index)
step_markers_z.append(step.value)
# If we haven't reached the final step, calculate the vector between the current step and the next step
if index < len(steps)-1:
vec1 = [step.x_index,step.y_index,step.value]
vec2 = [steps[index+1].x_index,steps[index+1].y_index,steps[index+1].value]
result_vector = findVec(vec1,vec2)
step_markers_u.append(result_vector[0])
step_markers_v.append(result_vector[1])
step_markers_w.append(result_vector[2])
else:
step_markers_u.append(0.1)
step_markers_v.append(0.1)
step_markers_w.append(0.1)
# Include cones at each marker to show direction of step, scatter3d is to show the red line between points and surface for the terrain
fig = go.Figure(data=[
go.Cone(
x=step_markers_x,
y=step_markers_y,
z=step_markers_z,
u=step_markers_u,
v=step_markers_v,
w=step_markers_w,
sizemode="absolute",
sizeref=2,
anchor='tail'),
go.Scatter3d(
x=step_markers_x,
y=step_markers_y,
z=step_markers_z,
mode='lines',
line=dict(
color='red',
width=2
)),
go.Surface(colorscale='Earth', z=quad_arr,opacity=0.5)])
# Z axis is limited to the extent of the terrain array
fig.update_layout(
title='Gradient Descent Steps',
scene = dict(zaxis = dict(range=[quad_arr.min(),quad_arr.max()],),),)
return fig
# Generate 3D plot from previous random starting location
fig = generate_3d_plot(steps)
HTML(plotly.offline.plot(fig, filename='random_starting_point_3d_gradient_descent.html',include_plotlyjs='cdn'))

Related

iterating, calculating, and combining two lists with multiple tuples

I am working with a projected coordinate dataset that contains x,y,z data (432 line csv with X Y Z headers, not attached). I wish to import this dataset, calculate a new grid based on user input and then start performing some statistics on points that fall within the new grid. I've gotten to the point that I have two lists (raw_lst with 431(x,y,z) and grid_lst with 16(x,y) (calling n,e)) but when I try to iterate through to start calculating average and density for the new grid it all falls apart. I am trying to output a final list that contains the grid_lst x and y values along with the calculated average z and density values.
I searched numpy and scipy libraries thinking that they may have already had something to do what I am wanting but was unable to find anything. Let me know if any of you all have any thoughts.
sample_xyz_reddot_is_newgrid_pictoral_representation
import pandas as pd
import math
df=pd.read_csv("Sample_xyz.csv")
N=df["X"]
E=df["Y"]
Z=df["Z"]
#grid = int(input("Specify grid value "))
grid = float(0.5) #for quick testing the grid value is set to 0.5
#max and total calculate the input area extents
max_N = math.ceil(max(N))
max_E = math.ceil(max(E))
min_E = math.floor(min(E))
min_N = math.floor(min(N))
total_N = max_N - min_N
total_E = max_E - min_E
total_N = int(total_N/grid)
total_E = int(total_E/grid)
#N_lst and E_lst calculate the mid points based on the input file extents and the specified grid file
N_lst = []
n=float(max_N)-(0.5*grid)
for x in range(total_N):
N_lst.append(n)
n=n-grid
E_lst = []
e=float(max_E)-(0.5*grid)
for x in range(total_E):
E_lst.append(e)
e=e-grid
grid_lst = []
for n in N_lst:
for e in E_lst:
grid_lst.append((n,e))
#converts the imported dataframe to list
raw_lst = df.to_records(index=False)
raw_lst = list(raw_lst)
#print(grid_lst) # grid_lst is a list of 16 (n,e) tuples for the new grid coordinates.
#print(raw_lst) # raw_lst is a list of 441 (n,e,z) tuples from the imported file - calling these x,y,z.
#The calculation where it all falls apart.
t=[]
average_lst = []
for n, e in grid_lst:
for x, y, z in raw_lst:
if n >= x-(grid/2) and n <= x+(grid/2) and e >= y-(grid/2) and e <= y+(grid/2):
t.append(z)
average = sum(t)/len(t)
density = len(t)/grid
average_lst = (n,e,average,density)
print(average_lst)
# print("The length of this list is " + str(len(average_lst)))
# print("The length of t is " + str(len(t)))
SAMPLE CODE FOR RUNNING
import random
grid=5
raw_lst = [(random.randrange(0,10), random.randrange(0,10), random.randrange(0,2))for i in range(100)]
grid_lst = [(2.5,2.5),(2.5,7.5),(7.5,2.5),(7.5,7.5)]
t=[]
average_lst = []
for n, e in grid_lst:
for x, y, z in raw_lst:
if n >= x-(grid/2) and n <= x+(grid/2) and e >= y-(grid/2) and e <= y+(grid/2):
t.append(z)
average = sum(t)/len(t)
density = len(t)/grid
average_lst = (n,e,average,density)
print(average_lst)
Some advices
when working with arrays, use numpy. It has more functionalities
when working with grids it's often more handy the use x-coords, y-coords as single arrays
Comments to the solution
obviousley you have a grid, or rather a box, grd_lst. We generate it as a numpy meshgrid (gx,gy)
you have a number of points raw_list. We generate each elemnt of it as 1-dimensional numpy arrays
you want to select the r_points that are in the g_box. We use the percentage formula for that: tx = (rx-gxMin)/(gxMax-gxMin)
if tx, ty are within [0..1] we store the index
as an intermediate result we get all indices of raw_list that are within the g_box
with that index you can extract the elements of raw_list that are within the g_box and can do some statistics
note that I have omitted the z-coord. You will have to improve this solution.
--
import numpy as np
from matplotlib import pyplot as plt
import matplotlib.colors as mclr
from matplotlib import cm
f10 = 'C://gcg//picStack_10.jpg' # output file name
f20 = 'C://gcg//picStack_20.jpg' # output file name
def plot_grid(gx,gy,rx,ry,Rx,Ry,fOut):
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(111)
myCmap = mclr.ListedColormap(['blue','lightgreen'])
ax.pcolormesh(gx, gy, gx, edgecolors='b', cmap=myCmap, lw=1, alpha=0.3)
ax.scatter(rx,ry,s=150,c='r', alpha=0.7)
ax.scatter(Rx,Ry,marker='s', s=150,c='gold', alpha=0.5)
ax.set_aspect('equal')
plt.savefig(fOut)
plt.show()
def get_g_grid(nx,ny):
ix = 2.5 + 5*np.linspace(0,1,nx)
iy = 2.5 + 5*np.linspace(0,1,ny)
gx, gy = np.meshgrid(ix, iy, indexing='ij')
return gx,gy
def get_raw_points(N):
rx,ry,rz,rv = np.random.randint(0,10,N), np.random.randint(0,10,N), np.random.randint(0,2,N), np.random.uniform(low=0.0, high=1.0, size=N)
return rx,ry,rz,rv
N = 100
nx, ny = 2, 2
gx,gy = get_base_grid(nx,ny)
rx,ry,rz,rv = get_raw_points(N)
plot_grid(gx,gy,rx,ry,0,0,f10)
def get_the_points_inside(gx,gy,rx,ry):
#----- run throuh the g-grid -------------------------------
nx,ny = gx.shape
N = len(rx)
index = []
for jx in range(0,nx-1):
for jy in range(0,ny-1):
#--- run through the r_points
for jr in range(N):
test_x = (rx[jr]-gx[jx,jy]) / (gx[jx+1,jy] - gx[jx,jy])
test_y = (ry[jr]-gy[jx,jy]) / (gy[jx,jy+1] - gy[jx,jy])
if (0.0 <= test_x <= 1.0) and (0.0 <= test_y <= 1.0):
index.append(jr)
return index
index = get_the_points_inside(gx,gy,rx,ry)
Rx, Ry, Rz, Rv = rx[index], ry[index], rz[index], rv[index]
plot_grid(gx,gy,rx,ry,Rx,Ry,f20)

Problem with 2D mapping graphs using matplotlib

I am plotting 2D images of energy and density distribution. There is always a slight misalignment in the mapping where the very first "columns" seem to go to the last columns during the plot.
I have attach link to for data test file.
Data files
Here is the plot :
Is there anything to prevent this ?
The partial code in plotting is as follows:
import numpy as np
import matplotlib.pyplot as plt
import pylab as pyl
import scipy.stats as ss
import matplotlib.ticker as ticker
import matplotlib.transforms as tr
#%matplotlib inline
pi = 3.1415
n = 5e24 # density plasma
m = 9.109e-31
eps = 8.85e-12
e = 1.6021725e-19
c = 3e8
wp=np.sqrt(n*e*e/(m*eps))
kp = np.sqrt(n*e*e/(m*eps))/c #plasma wavenumber
case=400
## decide on the target range of analysis for multiples
start= 20500
end = 21500
gap = 1000
## Multiples plots
def target_range (start, end, gap):
while start<= end:
yield start
start += gap
for step in target_range(start, end, gap):
fdata =np.genfromtxt('./beam_{}'.format(step)).reshape(-1,6)
## dimension, dt, and superpaticle
xBoxsize = 50e-6 #window size
yBoxsize = 80e-6 #window size
xbind = 10
ybind = 1
dx = 4e-8 #cell size
dy = 4e-7 #cell size
dz = 1e-6 #assume to be same as dy
dt = 1.3209965456e-16
sptcl = 1.6e10
xsub = 0e-6
xmax = dt*step*c
xmin = xmax - xBoxsize
ysub = 1e-7
ymin = ysub #to make our view window
ymax = yBoxsize - ysub
xbins = int((xmax - xmin)/(dx*xbind))
ybins = int((ymax - ymin)/(dy*ybind))
#zbins = int((zmax - zmin)/dz) #option for 3D
# To make or define "data_arr" as a matrix with 2D array size 'xbins x ybins'
data_arr = np.zeros((2,xbins,ybins), dtype=np.float)
for line in fdata:
x = int((line[0]-xmin)/(dx*xbind))
y = int((line[1]-ymin)/(dy*ybind))
#z = int((line[2]-zmin)/dz)
if x >= xbins: x = xbins - 1
if y >= ybins: y = ybins - 1
#if z >= zbins: z = zbins - 1
data_arr[0, x, y] = data_arr[0,x, y] + 1 #cummulative adding up the number of particles
energy_total = np.sqrt(1+ line[2]*line[2]/(c*c)+line[3]*line[3]/(c*c))/0.511
data_arr[1, x, y] += energy_total
#array 1 tells us the energy while array 0 tells us the particles
## make average energy , total energy/particle number
np.errstate(divide='ignore',invalid='ignore')
en_arr = np.true_divide(data_arr[1],data_arr[0]) # total energy/number of particles
en_arr[en_arr == np.inf] = 0
en_arr = np.nan_to_num(en_arr)
en_arr = en_arr.T
## This part is real density of the distribution
data_arr[0]= data_arr[0] * sptcl/dx/dy #in m-3
d = data_arr[0].T
## Plot and save density and energy distribution figures
den_dist=plt.figure(1)
plt.imshow(d,origin='lower', aspect = 'auto',cmap =plt.get_cmap('gnuplot'),extent =(xmin/1e-3,xmax/1e-3,ymin/1e-6,ymax/1e-6))
plt.title('Density_dist [m-3]_{}'.format(step))
plt.xlabel('distance[mm]')
plt.ylabel('y [um]')
plt.colorbar()
plt.show()
den_dist.savefig("./Qen_distribution_{}.png".format(step),format ='png')
#note:cmap: rainbow, hot,jet,gnuplot,plasma
energy_dist=plt.figure(2)
plt.imshow(en_arr, origin ='lower',aspect = 'auto', cmap =plt.get_cmap('jet'),extent =(xmin/1e-3,xmax/1e-3,ymin/1e-6,ymax/1e-6))
plt.title ('Energy_dist [MeV]_{} '.format(step))
plt.xlabel('distance[mm]')
plt.ylabel('y [um]')
plt.colorbar()
plt.show()
energy_dist.savefig("./Qenergy_distribution_{}.png".format(step),format ='png')

Recall function in python from another function(make an array for the variable of the function)

I have this code that have three function called ( Bx,Bhalo, Bdisk)these three the functions only accept arrays (e.g. shape (1000)):
import numpy as np
import logging
import warnings
import gmf
signum = lambda x: (x < 0.) * -1. + (x >= 0) * 1.
pi = np.pi
#Class with analytical functions that describe the GMF according to the model of JF12
class GMF(object):
def __init__(self): # self:is automatically set to reference the newly created object that needs to be initialized
self.Rsun = -8.5 # position of the sun along the x axis in kpc
############################################################################
# Disk Parameters
############################################################################
self.bring, self.bring_unc = 0.1,0.1 # floats, field strength in ring at 3 kpc < r < 5 kpc
self.hdisk, self.hdisk_unc = 0.4, 0.03 # float, disk/halo transition height
self.wdisk, self.wdisk_unc = 0.27,0.08 # floats, transition width
self.b = np.array([0.1,3.,-0.9,-0.8,-2.0,-4.2,0.,2.7]) # (8,1)-dim np.arrays, field strength of spiral arms at 5 kpc
self.b_unc = np.array([1.8,0.6,0.8,0.3,0.1,0.5,1.8,1.8]) # uncertainty
self.rx = np.array([5.1,6.3,7.1,8.3,9.8,11.4,12.7,15.5])# (8,1)-dim np.array,dividing lines of spiral lines coordinates of neg. x-axes that intersect with arm
self.idisk = 11.5 * pi/180. # float, spiral arms pitch angle
#############################################################################
# Halo Parameters
#############################################################################
self.Bn, self.Bn_unc = 1.4,0.1 # floats, field strength northern halo
self.Bs, self.Bs_unc = -1.1,0.1 # floats, field strength southern halo
self.rn, self.rn_unc = 9.22,0.08 # floats, transition radius south, lower limit
self.rs, self.rs_unc = 16.7,0. # transition radius south, lower limit
self.whalo, self.whalo_unc = 0.2,0.12 # floats, transition width
self.z0, self.z0_unc = 5.3, 1.6 # floats, vertical scale height
##############################################################################
# Out of plaxe or "X" component Parameters
##############################################################################
self.BX0, self.BX_unc = 4.6,0.3 # floats, field strength at origin
self.ThetaX0, self.ThetaX0_unc = 49. * pi/180., pi/180. # elev. angle at z = 0, r > rXc
self.rXc, self.rXc_unc = 4.8, 0.2 # floats, radius where thetaX = thetaX0
self.rX, self.rX_unc = 2.9, 0.1 # floats, exponential scale length
# striated field
self.gamma, self.gamma_unc = 2.92,0.14 # striation and/or rel. elec. number dens. rescaling
return
##################################################################################
##################################################################################
# Transition function given by logistic function eq.5
##################################################################################
def L(self,z,h,w):
if np.isscalar(z):
z = np.array([z]) # scalar or numpy array with positions (height above disk, z; distance from center, r)
ones = np.ones(z.shape[0])
return 1./(ones + np.exp(-2. *(np.abs(z)- h)/w))
####################################################################################
# return distance from center for angle phi of logarithmic spiral
# r(phi) = rx * exp(b * phi) as np.array
####################################################################################
def r_log_spiral(self,phi):
if np.isscalar(phi): #Returns True if the type of num is a scalar type.
phi = np.array([phi])
ones = np.ones(phi.shape[0])
# self.rx.shape = 8
# phi.shape = p
# then result is given as (8,p)-dim array, each row stands for one rx
# vstack : Take a sequence of arrays and stack them vertically to make a single array
# tensordot(a, b, axes=2):Compute tensor dot product along specified axes for arrays >=1D.
result = np.tensordot(self.rx , np.exp((phi - 3.*pi*ones) / np.tan(pi/2. - self.idisk)),axes = 0)
result = np.vstack((result, np.tensordot(self.rx , np.exp((phi - pi*ones) / np.tan(pi/2. - self.idisk)),axes = 0) ))
result = np.vstack((result, np.tensordot(self.rx , np.exp((phi + pi*ones) / np.tan(pi/2. - self.idisk)),axes = 0) ))
return np.vstack((result, np.tensordot(self.rx , np.exp((phi + 3.*pi*ones) / np.tan(pi/2. - self.idisk)),axes = 0) ))
#############################################################################################
# Disk component in galactocentric cylindrical coordinates (r,phi,z)
#############################################################################################
def Bdisk(self,r,phi,z):
# Bdisk is purely azimuthal (toroidal) with the field strength b_ring
"""
r: N-dim np.array, distance from origin in GC cylindrical coordinates, is in kpc
z: N-dim np.array, height in kpc in GC cylindrical coordinates
phi:N-dim np.array, polar angle in GC cylindircal coordinates, in radian
Bdisk: (3,N)-dim np.array with (r,phi,z) components of disk field for each coordinate tuple
Bdisk|: N-dim np.array, absolute value of Bdisk for each coordinate tuple
"""
if (not r.shape[0] == phi.shape[0]) and (not z.shape[0] == phi.shape[0]):
warnings.warn("List do not have equal shape! returning -1", RuntimeWarning)
return -1
# Return a new array of given shape and type, filled with zeros.
Bdisk = np.zeros((3,r.shape[0])) # Bdisk vector in r, phi, z
ones = np.ones(r.shape[0])
r_center = (r >= 3.) & (r < 5.1)
r_disk = (r >= 5.1) & (r <= 20.)
Bdisk[1,r_center] = self.bring
# Determine in which arm we are
# this is done for each coordinate individually
if np.sum(r_disk):
rls = self.r_log_spiral(phi[r_disk])
rls = np.abs(rls - r[r_disk])
arms = np.argmin(rls, axis = 0) % 8
# The magnetic spiral defined at r=5 kpc and fulls off as 1/r ,the field direction is given by:
Bdisk[0,r_disk] = np.sin(self.idisk)* self.b[arms] * (5. / r[r_disk])
Bdisk[1,r_disk] = np.cos(self.idisk)* self.b[arms] * (5. / r[r_disk])
Bdisk *= (ones - self.L(z,self.hdisk,self.wdisk)) # multiplied by (1-L)
return Bdisk, np.sqrt(np.sum(Bdisk**2.,axis = 0)) # the Bdisk, the normalization
# axis=0 : sum over index 0(row)
# axis=1 : sum over index 1(columns)
##############################################################################################
# Halo component
###############################################################################################
def Bhalo(self,r,z):
# Bhalo is purely azimuthal (toroidal), i.e. has only a phi component
if (not r.shape[0] == z.shape[0]):
warnings.warn("List do not have equal shape! returning -1", RuntimeWarning)
return -1
Bhalo = np.zeros((3,r.shape[0])) # Bhalo vector in r, phi, z rows: r, phi and z component
ones = np.ones(r.shape[0])
m = ( z != 0. )
# SEE equation 6.
Bhalo[1,m] = np.exp(-np.abs(z[m])/self.z0) * self.L(z[m], self.hdisk, self.wdisk) * \
( self.Bn * (ones[m] - self.L(r[m], self.rn, self.whalo)) * (z[m] > 0.) \
+ self.Bs * (ones[m] - self.L(r[m], self.rs, self.whalo)) * (z[m] < 0.) )
return Bhalo , np.sqrt(np.sum(Bhalo**2.,axis = 0))
##############################################################################################
# BX component (OUT OF THE PLANE)
###############################################################################################
def BX(self,r,z):
#BX is purely ASS and poloidal, i.e. phi component = 0
if (not r.shape[0] == z.shape[0]):
warnings.warn("List do not have equal shape! returning -1", RuntimeWarning)
return -1
BX= np.zeros((3,r.shape[0])) # BX vector in r, phi, z rows: r, phi and z component
m = np.sqrt(r**2. + z**2.) >= 1.
bx = lambda r_p: self.BX0 * np.exp(-r_p / self.rX) # eq.7
thetaX = lambda r,z,r_p: np.arctan(np.abs(z)/(r - r_p)) # eq.10
r_p = r[m] *self.rXc/(self.rXc + np.abs(z[m] ) / np.tan(self.ThetaX0)) # eq. 9
m_r_b = r_p > self.rXc # region with constant elevation angle
m_r_l = r_p <= self.rXc # region with varying elevation angle
theta = np.zeros(z[m].shape[0])
b = np.zeros(z[m].shape[0])
r_p0 = (r[m])[m_r_b] - np.abs( (z[m])[m_r_b] ) / np.tan(self.ThetaX0) # eq.8
b[m_r_b] = bx(r_p0) * r_p0/ (r[m])[m_r_b] # the field strength in the constant elevation angle (b_x(r_p)r_p/r)
theta[m_r_b] = self.ThetaX0 * np.ones(theta.shape[0])[m_r_b]
b[m_r_l] = bx(r_p[m_r_l]) * (r_p[m_r_l]/(r[m])[m_r_l] )**2. # the field strength with varying elevation angle (b_x(r_p)(r_p/r)**2)
theta[m_r_l] = thetaX((r[m])[m_r_l] ,(z[m])[m_r_l] ,r_p[m_r_l])
mz = (z[m] == 0.)
theta[mz] = np.pi/2.
BX[0,m] = b * (np.cos(theta) * (z[m] >= 0) + np.cos(pi*np.ones(theta.shape[0]) - theta) * (z[m] < 0))
BX[2,m] = b * (np.sin(theta) * (z[m] >= 0) + np.sin(pi*np.ones(theta.shape[0]) - theta) * (z[m] < 0))
return BX, np.sqrt(np.sum(BX**2.,axis=0))
now I want to add these three function together; Btotal= Bx+ Bhalo+ Bdisk,these function is a vector field in cylindrical coordinate (r,theta,z), I convert from cylindrical to Cartesian(x,y,z) and I defined a function called vectors to calculate new two vector called n1,n2 (to get two perpendicular vector to Btotal).to calculate the diffusion of particle using stochastic differential equations for many particles(~10000) in the code below, I am trying to call the three functions (form above code) by defined r, theta, z to use it the diffusion equation and plot the result
i.e:
in the for loop I have to get one position in shape (3,1)[I put the range from (0,1)] in (r,theta,z) then convert the value to Cartesian(x,y,z) to use it to find n1,n2,d eltaX,deltaY,deltaZ, respectively! :
import scipy as sp
import numpy as np
import numpy.random as npr
from numpy.lib.scimath import logn # to logartnim scale
import math as math
from random import seed,random, choice
from pylab import *
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import gmf
###########################################################
gmfm = gmf.GMF()
#############################################################
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.set_xlabel(r'$\ X$',size=16)
ax.set_ylabel(r'$\ Y$',size=16)
ax.set_zlabel(r'$\ Z$',size=16)
ax.set_title(' Diffusion Particles in GMF in 3D ')
#############################################################
def vectors(b):
b = b/np.sqrt(np.sum(b**2.,axis=0))
b = b/np.linalg.norm(b)
z = np.array([0.,0.,1.])
n1 = np.cross(z,b,axis=0)
n1 = n1/np.linalg.norm(n1)
n2 = np.cross(b,n1,axis=0)
n2 = n2/np.linalg.norm(n2)
return n1,n2
#############################################################
def CylindricalToCartesian(r,theta,z):
x= r*np.cos(theta)
y= r*np.sin(theta)
z= z
return np.array([x, y, z])
############################################################
T=1 # 100
N=10000
dt=float(T)/N
D= 1 # 2
DII=10
n= 1000
seed(3)
finalpositions=[]
###############################################################
for number in range(0,1):
finalpositions.append([])
r=[]
theta=[]
z=[]
r.append(0)
theta.append(0)
z.append(0)
x=[]
y=[]
x.append(8.5)
y.append(0)
for i in range(n):
Bdisk, Babs_d = gmfm.Bdisk(r,theta,z)
Bhalo, Babs_h = gmfm.Bhalo(r,z)
BX, Babs_x = gmfm.BX(r,z)
Btotal = Bdisk + Bhalo + BX
FieldInXYZ = CylindricalToCartesian(r[-1],theta[-1],z[-1])
FieldInXYZ =Btotal(x[-1],y[-1],z[-1])
localB = Btotal(x[-1],y[-1],z[-1])
print 'FieldInXYZ:', FieldInXYZ
#print 'localB:',localB
n1, n2 = vectors(localB)
s = np.random.normal(0, 1, 3)
finalpositions[-1].append(x)
finalpositions[-1].append(y)
finalpositions[-1].append(z)
allxes = []
allyes = []
allzes = []
for p in finalpositions:
allxes.append(p[0][-1])
allyes.append(p[1][-1])
allzes.append(p[1][-1])
plt.plot(allxes, allyes,allzes, 'o')
plt.show()
but I am getting an error:
AttributeError Traceback (most recent call last)
/usr/lib/python2.7/dist-packages/IPython/utils/py3compat.pyc in execfile(fname, *where)
202 else:
203 filename = fname
--> 204 __builtin__.execfile(filename, *where)
/home/January.py in <module>()
71 for i in range(n):
72
---> 73 Bdisk, Babs_d = gmfm.Bdisk(r,theta,z)
74 Bhalo, Babs_h = gmfm.Bhalo(r,z)
75 BX, Babs_x = gmfm.BX(r,z)
/home/gmf.py in Bdisk(self, r, phi, z)
80 Bdisk|: N-dim np.array, absolute value of Bdisk for each coordinate tuple
81 """
---> 82 if (not r.shape[0] == phi.shape[0]) and (not z.shape[0] == phi.shape[0]):
83 warnings.warn("List do not have equal shape! returning -1", RuntimeWarning)
84 return -1
AttributeError: 'list' object has no attribute 'shape'
I don't know what I did wrong?! Any help would be appreciate
The traceback tells you exactly where the problem lives: you are using r.shape where type(r) is list while it should be numpy.ndarray instead. Tracing the problem back reveals that you declare r = [] and then you pass r to gmfm.Bdisk.
Instead you could do:
Bdisk, Babs_d = gmfm.Bdisk(numpy.ndarray(r),theta,z)
(line number 73); (If you have other lists that you treat as numpy.ndarrays then you need to convert them accordingly of course.)

Inverse Wavelet Transform [/xpost signalprocessing]

Main Problem: How can the scipy.signal.cwt() function be inversed.
I have seen where Matlab has an inverse continuous wavelet transform function which will return the original form of the data by inputting the wavelet transform, although you can filter out the slices you don't want.
MATALAB inverse cwt funciton
Since scipy doesn't appear to have the same function, I have been trying to figure out how to get the data back in the same form, while removing the noise and background.
How do I do this?
I tried squaring it to remove negative values, but this gives me values way to large and not quite right.
Here is what I have been trying:
# Compute the wavelet transform
widths = range(1,11)
cwtmatr = signal.cwt(xy['y'], signal.ricker, widths)
# Maybe we multiple by the original data? and square?
WT_to_original_data = (xy['y'] * cwtmatr)**2
And here is a fully compilable short script to show you the type of data I am trying to get and what I have etc.:
import numpy as np
from scipy import signal
import matplotlib.pyplot as plt
# Make some random data with peaks and noise
def make_peaks(x):
bkg_peaks = np.array(np.zeros(len(x)))
desired_peaks = np.array(np.zeros(len(x)))
# Make peaks which contain the data desired
# (Mid range/frequency peaks)
for i in range(0,10):
center = x[-1] * np.random.random() - x[0]
amp = 60 * np.random.random() + 10
width = 10 * np.random.random() + 5
desired_peaks += amp * np.e**(-(x-center)**2/(2*width**2))
# Also make background peaks (not desired)
for i in range(0,3):
center = x[-1] * np.random.random() - x[0]
amp = 40 * np.random.random() + 10
width = 100 * np.random.random() + 100
bkg_peaks += amp * np.e**(-(x-center)**2/(2*width**2))
return bkg_peaks, desired_peaks
x = np.array(range(0, 1000))
bkg_peaks, desired_peaks = make_peaks(x)
y_noise = np.random.normal(loc=30, scale=10, size=len(x))
y = bkg_peaks + desired_peaks + y_noise
xy = np.array( zip(x,y), dtype=[('x',float), ('y',float)])
# Compute the wavelet transform
# I can't figure out what the width is or does?
widths = range(1,11)
# Ricker is 2nd derivative of Gaussian
# (*close* to what *most* of the features are in my data)
# (They're actually Lorentzians and Breit-Wigner-Fano lines)
cwtmatr = signal.cwt(xy['y'], signal.ricker, widths)
# Maybe we multiple by the original data? and square?
WT = (xy['y'] * cwtmatr)**2
# plot the data and results
fig = plt.figure()
ax_raw_data = fig.add_subplot(4,3,1)
ax = {}
for i in range(0, 11):
ax[i] = fig.add_subplot(4,3, i+2)
ax_desired_transformed_data = fig.add_subplot(4,3,12)
ax_raw_data.plot(xy['x'], xy['y'], 'g-')
for i in range(0,10):
ax[i].plot(xy['x'], WT[i])
ax_desired_transformed_data.plot(xy['x'], desired_peaks, 'k-')
fig.tight_layout()
plt.show()
This script will output this image:
Where the first plot is the raw data, the middle plots are the wavelet transforms and the last plot is what I want to get out as the processed (background and noise removed) data.
Does anyone have any suggestions? Thank you so much for the help.
I ended up finding a package which provides an inverse wavelet transform function called mlpy. The function is mlpy.wavelet.uwt. This is the compilable script I ended up with which may interest people if they are trying to do noise or background removal:
import numpy as np
from scipy import signal
import matplotlib.pyplot as plt
import mlpy.wavelet as wave
# Make some random data with peaks and noise
############################################################
def gen_data():
def make_peaks(x):
bkg_peaks = np.array(np.zeros(len(x)))
desired_peaks = np.array(np.zeros(len(x)))
# Make peaks which contain the data desired
# (Mid range/frequency peaks)
for i in range(0,10):
center = x[-1] * np.random.random() - x[0]
amp = 100 * np.random.random() + 10
width = 10 * np.random.random() + 5
desired_peaks += amp * np.e**(-(x-center)**2/(2*width**2))
# Also make background peaks (not desired)
for i in range(0,3):
center = x[-1] * np.random.random() - x[0]
amp = 80 * np.random.random() + 10
width = 100 * np.random.random() + 100
bkg_peaks += amp * np.e**(-(x-center)**2/(2*width**2))
return bkg_peaks, desired_peaks
# make x axis
x = np.array(range(0, 1000))
bkg_peaks, desired_peaks = make_peaks(x)
avg_noise_level = 30
std_dev_noise = 10
size = len(x)
scattering_noise_amp = 100
scat_center = 100
scat_width = 15
scat_std_dev_noise = 100
y_scattering_noise = np.random.normal(scattering_noise_amp, scat_std_dev_noise, size) * np.e**(-(x-scat_center)**2/(2*scat_width**2))
y_noise = np.random.normal(avg_noise_level, std_dev_noise, size) + y_scattering_noise
y = bkg_peaks + desired_peaks + y_noise
xy = np.array( zip(x,y), dtype=[('x',float), ('y',float)])
return xy
# Random data Generated
#############################################################
xy = gen_data()
# Make 2**n amount of data
new_y, bool_y = wave.pad(xy['y'])
orig_mask = np.where(bool_y==True)
# wavelet transform parameters
levels = 8
wf = 'h'
k = 2
# Remove Noise first
# Wave transform
wt = wave.uwt(new_y, wf, k, levels)
# Matrix of the difference between each wavelet level and the original data
diff_array = np.array([(wave.iuwt(wt[i:i+1], wf, k)-new_y) for i in range(len(wt))])
# Index of the level which is most similar to original data (to obtain smoothed data)
indx = np.argmin(np.sum(diff_array**2, axis=1))
# Use the wavelet levels around this region
noise_wt = wt[indx:indx+1]
# smoothed data in 2^n length
new_y = wave.iuwt(noise_wt, wf, k)
# Background Removal
error = 10000
errdiff = 100
i = -1
iter_y_dict = {0:np.copy(new_y)}
bkg_approx_dict = {0:np.array([])}
while abs(errdiff)>=1*10**-24:
i += 1
# Wave transform
wt = wave.uwt(iter_y_dict[i], wf, k, levels)
# Assume last slice is lowest frequency (background approximation)
bkg_wt = wt[-3:-1]
bkg_approx_dict[i] = wave.iuwt(bkg_wt, wf, k)
# Get the error
errdiff = error - sum(iter_y_dict[i] - bkg_approx_dict[i])**2
error = sum(iter_y_dict[i] - bkg_approx_dict[i])**2
# Make every peak higher than bkg_wt
diff = (new_y - bkg_approx_dict[i])
peak_idxs_to_remove = np.where(diff>0.)[0]
iter_y_dict[i+1] = np.copy(new_y)
iter_y_dict[i+1][peak_idxs_to_remove] = np.copy(bkg_approx_dict[i])[peak_idxs_to_remove]
# new data without noise and background
new_y = new_y[orig_mask]
bkg_approx = bkg_approx_dict[len(bkg_approx_dict.keys())-1][orig_mask]
new_data = diff[orig_mask]
##############################################################
# plot the data and results
fig = plt.figure()
ax_raw_data = fig.add_subplot(121)
ax_WT = fig.add_subplot(122)
ax_raw_data.plot(xy['x'], xy['y'], 'g')
for bkg in bkg_approx_dict.values():
ax_raw_data.plot(xy['x'], bkg[orig_mask], 'k')
ax_WT.plot(xy['x'], new_data, 'y')
fig.tight_layout()
plt.show()
And here is the output I am getting now:
As you can see, there is still a problem with the background removal (it shifts to the right after each iteration), but it is a different question which I will address here.

draw a smooth polygon around data points in a scatter plot, in matplotlib

I have a bunch of cross plots with two sets of data and have been looking for a matploltib way of highlighting their plotted regions with smoothed polygon outlines.
At the moment i just use Adobe Illustrator and amend saved plot, but this is not ideal. Example:
I'd be grateful for any pointers/links to examples.
Cheers
Here, you have an example. I was written the main ideas, but obviously, you could do it better.
A short explanations:
1) You need to compute the convex-hull (http://en.wikipedia.org/wiki/Convex_hull)
2) With the hull, you could scale it to keep all your data inside.
3) You must to interpolate the resulting curve.
The first part was done in http://wiki.scipy.org/Cookbook/Finding_Convex_Hull. The second one is trivial. The third one is very general, and you could perform any method, there are a lot of different ways to do the same. I took the #Jaime's approach (Smooth spline representation of an arbitrary contour, f(length) --> x,y), which I think it's a very good method.
I hope it help you...
#Taken from http://wiki.scipy.org/Cookbook/Finding_Convex_Hull
import numpy as n, pylab as p, time
def _angle_to_point(point, centre):
'''calculate angle in 2-D between points and x axis'''
delta = point - centre
res = n.arctan(delta[1] / delta[0])
if delta[0] < 0:
res += n.pi
return res
def _draw_triangle(p1, p2, p3, **kwargs):
tmp = n.vstack((p1,p2,p3))
x,y = [x[0] for x in zip(tmp.transpose())]
p.fill(x,y, **kwargs)
def area_of_triangle(p1, p2, p3):
'''calculate area of any triangle given co-ordinates of the corners'''
return n.linalg.norm(n.cross((p2 - p1), (p3 - p1)))/2.
def convex_hull(points, graphic=False, smidgen=0.0075):
'''
Calculate subset of points that make a convex hull around points
Recursively eliminates points that lie inside two neighbouring points until only convex hull is remaining.
:Parameters:
points : ndarray (2 x m)
array of points for which to find hull
graphic : bool
use pylab to show progress?
smidgen : float
offset for graphic number labels - useful values depend on your data range
:Returns:
hull_points : ndarray (2 x n)
convex hull surrounding points
'''
if graphic:
p.clf()
p.plot(points[0], points[1], 'ro')
n_pts = points.shape[1]
assert(n_pts > 5)
centre = points.mean(1)
if graphic: p.plot((centre[0],),(centre[1],),'bo')
angles = n.apply_along_axis(_angle_to_point, 0, points, centre)
pts_ord = points[:,angles.argsort()]
if graphic:
for i in xrange(n_pts):
p.text(pts_ord[0,i] + smidgen, pts_ord[1,i] + smidgen, \
'%d' % i)
pts = [x[0] for x in zip(pts_ord.transpose())]
prev_pts = len(pts) + 1
k = 0
while prev_pts > n_pts:
prev_pts = n_pts
n_pts = len(pts)
if graphic: p.gca().patches = []
i = -2
while i < (n_pts - 2):
Aij = area_of_triangle(centre, pts[i], pts[(i + 1) % n_pts])
Ajk = area_of_triangle(centre, pts[(i + 1) % n_pts], \
pts[(i + 2) % n_pts])
Aik = area_of_triangle(centre, pts[i], pts[(i + 2) % n_pts])
if graphic:
_draw_triangle(centre, pts[i], pts[(i + 1) % n_pts], \
facecolor='blue', alpha = 0.2)
_draw_triangle(centre, pts[(i + 1) % n_pts], \
pts[(i + 2) % n_pts], \
facecolor='green', alpha = 0.2)
_draw_triangle(centre, pts[i], pts[(i + 2) % n_pts], \
facecolor='red', alpha = 0.2)
if Aij + Ajk < Aik:
if graphic: p.plot((pts[i + 1][0],),(pts[i + 1][1],),'go')
del pts[i+1]
i += 1
n_pts = len(pts)
k += 1
return n.asarray(pts)
if __name__ == "__main__":
import scipy.interpolate as interpolate
# fig = p.figure(figsize=(10,10))
theta = 2*n.pi*n.random.rand(1000)
r = n.random.rand(1000)**0.5
x,y = r*p.cos(theta),r*p.sin(theta)
points = n.ndarray((2,len(x)))
points[0,:],points[1,:] = x,y
scale = 1.03
hull_pts = scale*convex_hull(points)
p.plot(x,y,'ko')
x,y = [],[]
convex = scale*hull_pts
for point in convex:
x.append(point[0])
y.append(point[1])
x.append(convex[0][0])
y.append(convex[0][1])
x,y = n.array(x),n.array(y)
#Taken from https://stackoverflow.com/questions/14344099/numpy-scipy-smooth-spline-representation-of-an-arbitrary-contour-flength
nt = n.linspace(0, 1, 100)
t = n.zeros(x.shape)
t[1:] = n.sqrt((x[1:] - x[:-1])**2 + (y[1:] - y[:-1])**2)
t = n.cumsum(t)
t /= t[-1]
x2 = interpolate.spline(t, x, nt)
y2 = interpolate.spline(t, y, nt)
p.plot(x2, y2,'r--',linewidth=2)
p.show()
There are some useful papers, eg.:
http://repositorium.sdum.uminho.pt/bitstream/1822/6429/1/ConcaveHull_ACM_MYS.pdf
Also, you could try with: http://resources.arcgis.com/en/help/main/10.1/index.html#//007000000013000000
I don't know nothing about arcgis, but it looks fine.
I came across this and implemented easy to use functions as well as a couple of alternatives/improvements.
Improvements:
use a periodic interpolation which ensures smooth
use quadratic interpolation
now works for only positive points as well
using an alternative to the deprecated scipy.interpolate.spline function
Alternatives:
many different and configurable interpolation schemes
a rounded-corner convex hull version
Hope this helps someone along the way.
import sklearn.preprocessing
import sklearn.pipeline
import scipy.spatial
import numpy as np
def calculate_hull(
X,
scale=1.1,
padding="scale",
n_interpolate=100,
interpolation="quadratic_periodic",
return_hull_points=False):
"""
Calculates a "smooth" hull around given points in `X`.
The different settings have different drawbacks but the given defaults work reasonably well.
Parameters
----------
X : np.ndarray
2d-array with 2 columns and `n` rows
scale : float, optional
padding strength, by default 1.1
padding : str, optional
padding mode, by default "scale"
n_interpolate : int, optional
number of interpolation points, by default 100
interpolation : str or callable(ix,iy,x), optional
interpolation mode, by default "quadratic_periodic"
Inspired by: https://stackoverflow.com/a/17557853/991496
"""
if padding == "scale":
# scaling based padding
scaler = sklearn.pipeline.make_pipeline(
sklearn.preprocessing.StandardScaler(with_std=False),
sklearn.preprocessing.MinMaxScaler(feature_range=(-1,1)))
points_scaled = scaler.fit_transform(X) * scale
hull_scaled = scipy.spatial.ConvexHull(points_scaled, incremental=True)
hull_points_scaled = points_scaled[hull_scaled.vertices]
hull_points = scaler.inverse_transform(hull_points_scaled)
hull_points = np.concatenate([hull_points, hull_points[:1]])
elif padding == "extend" or isinstance(padding, (float, int)):
# extension based padding
# TODO: remove?
if padding == "extend":
add = (scale - 1) * np.max([
X[:,0].max() - X[:,0].min(),
X[:,1].max() - X[:,1].min()])
else:
add = padding
points_added = np.concatenate([
X + [0,add],
X - [0,add],
X + [add, 0],
X - [add, 0]])
hull = scipy.spatial.ConvexHull(points_added)
hull_points = points_added[hull.vertices]
hull_points = np.concatenate([hull_points, hull_points[:1]])
else:
raise ValueError(f"Unknown padding mode: {padding}")
# number of interpolated points
nt = np.linspace(0, 1, n_interpolate)
x, y = hull_points[:,0], hull_points[:,1]
# ensures the same spacing of points between all hull points
t = np.zeros(x.shape)
t[1:] = np.sqrt((x[1:] - x[:-1])**2 + (y[1:] - y[:-1])**2)
t = np.cumsum(t)
t /= t[-1]
# interpolation types
if interpolation is None or interpolation == "linear":
x2 = scipy.interpolate.interp1d(t, x, kind="linear")(nt)
y2 = scipy.interpolate.interp1d(t, y, kind="linear")(nt)
elif interpolation == "quadratic":
x2 = scipy.interpolate.interp1d(t, x, kind="quadratic")(nt)
y2 = scipy.interpolate.interp1d(t, y, kind="quadratic")(nt)
elif interpolation == "quadratic_periodic":
x2 = scipy.interpolate.splev(nt, scipy.interpolate.splrep(t, x, per=True, k=4))
y2 = scipy.interpolate.splev(nt, scipy.interpolate.splrep(t, y, per=True, k=4))
elif interpolation == "cubic":
x2 = scipy.interpolate.CubicSpline(t, x, bc_type="periodic")(nt)
y2 = scipy.interpolate.CubicSpline(t, y, bc_type="periodic")(nt)
else:
x2 = interpolation(t, x, nt)
y2 = interpolation(t, y, nt)
X_hull = np.concatenate([x2.reshape(-1,1), y2.reshape(-1,1)], axis=1)
if return_hull_points:
return X_hull, hull_points
else:
return X_hull
def draw_hull(
X,
scale=1.1,
padding="scale",
n_interpolate=100,
interpolation="quadratic_periodic",
plot_kwargs=None,
ax=None):
"""Uses `calculate_hull` to draw a hull around given points.
Parameters
----------
X : np.ndarray
2d-array with 2 columns and `n` rows
scale : float, optional
padding strength, by default 1.1
padding : str, optional
padding mode, by default "scale"
n_interpolate : int, optional
number of interpolation points, by default 100
interpolation : str or callable(ix,iy,x), optional
interpolation mode, by default "quadratic_periodic"
plot_kwargs : dict, optional
`matplotlib.pyplot.plot` kwargs, by default None
ax : `matplotlib.axes.Axes`, optional
[description], by default None
"""
if plot_kwargs is None:
plot_kwargs = {}
X_hull = calculate_hull(
X, scale=scale, padding=padding, n_interpolate=n_interpolate, interpolation=interpolation)
if ax is None:
ax= plt.gca()
plt.plot(X_hull[:,0], X_hull[:,1], **plot_kwargs)
def draw_rounded_hull(X, padding=0.1, line_kwargs=None, ax=None):
"""Plots a convex hull around points with rounded corners and a given padding.
Parameters
----------
X : np.array
2d array with two columns and n rows
padding : float, optional
padding between hull and points, by default 0.1
line_kwargs : dict, optional
line kwargs (used for `matplotlib.pyplot.plot` and `matplotlib.patches.Arc`), by default None
ax : matplotlib.axes.Axes, optional
axes to plat on, by default None
"""
default_line_kwargs = dict(
color="black",
linewidth=1
)
if line_kwargs is None:
line_kwargs = default_line_kwargs
else:
line_kwargs = {**default_line_kwargs, **line_kwargs}
if ax is None:
ax = plt.gca()
hull = scipy.spatial.ConvexHull(X)
hull_points = X[hull.vertices]
hull_points = np.concatenate([hull_points[[-1]], hull_points, hull_points[[0]]])
diameter = padding * 2
for i in range(1, hull_points.shape[0] - 1):
# line
# source: https://stackoverflow.com/a/1243676/991496
norm_next = np.flip(hull_points[i] - hull_points[i + 1]) * [-1, 1]
norm_next /= np.linalg.norm(norm_next)
norm_prev = np.flip(hull_points[i - 1] - hull_points[i]) * [-1, 1]
norm_prev /= np.linalg.norm(norm_prev)
# plot line
line = hull_points[i:i+2] + norm_next * diameter / 2
ax.plot(line[:,0], line[:,1], **line_kwargs)
# arc
angle_next = np.rad2deg(np.arccos(np.dot(norm_next, [1,0])))
if norm_next[1] < 0:
angle_next = 360 - angle_next
angle_prev = np.rad2deg(np.arccos(np.dot(norm_prev, [1,0])))
if norm_prev[1] < 0:
angle_prev = 360 - angle_prev
arc = patches.Arc(
hull_points[i],
diameter, diameter,
angle=0, fill=False, theta1=angle_prev, theta2=angle_next,
**line_kwargs)
ax.add_patch(arc)
if __name__ == '__main__':
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import patches
# np.random.seed(42)
X = np.random.random((20,2))
fig, ax = plt.subplots(1,1, figsize=(10,10))
ax.scatter(X[:,0], X[:,1])
draw_rounded_hull(X, padding=0.1)
draw_hull(X)
ax.set(xlim=[-1,2], ylim= [-1,2])
fig.savefig("_out/test.png")

Categories

Resources