How to count a peak that drops to 0? Python Find Peaks - python

I am using Scipy's find_peaks to count the number of peaks in a time series.
I need to count the number of peaks with the requirement that it starts at 0 and falls to 0. The second peak from the right (indicated by a vertical line) is counted here, but it shouldn't be since it doesn't fall to 0 before the last peak. Is there a way to specify this in find_peaks?
peaks1 = find_peaks(array, height=(1,1.5),prominence=1)
peaks1_5 = find_peaks(array, height=(1.5,2),prominence=1.5)
peaks2 = find_peaks(array, height=2,prominence=2)
fig, ax = plt.subplots(figsize=(30, 10), dpi=80)
plt.plot(spi_neg['date'],spi["SPI-12"])
[plt.axvline(spi_neg.date.iloc[p],c='red',linewidth=0.3) for p in peaks1[0]]
[plt.axvline(spi_neg.date.iloc[p],c='green',linewidth=0.3) for p in peaks1_5[0]]
[plt.axvline(spi_neg.date.iloc[p],c='purple',linewidth=0.3) for p in peaks2[0]]
plt.axhline(2,linestyle='dashed',linewidth=1)
plt.axhline(1.5,linestyle='dashed',linewidth=1)
plt.axhline(1,linestyle='dashed',linewidth=1)
Peaks chart

A running code with the question would be helpful, and a more precise definition of the countable peaks too ;-)
First we generate some data:
import numpy as np
import matplotlib.pyplot as plt
#---- generate data
mp = 200
freq = 20
t = np.linspace(0,freq*np.pi,mp)
signal = np.sin(t)
noise = np.random.rand(mp)
X = 0.5*signal + noise
#---- scale X
def scale01(a):
return (a-a.min())/(a.max()-a.min())
X = scale01(X) - 0.5
X = np.maximum(X,0.0)
#---- grafics
with plt.style.context('ggplot'):
fig = plt.figure(figsize=(15,3))
plt.plot(t, X)
plt.plot(t, X, 'o')
Now we identify the zero lakes and the non-zero islands
a = np.array(np.where(X<=0))[0] # extract the indices with X<=0
b = np.array(np.where(X>0) )[0] # extract the indices with X>0
with plt.style.context('ggplot'):
fig = plt.figure(figsize=(15,3))
plt.plot(t[b], X[b], 'or', label=">0")
plt.vlines(t[b], 0, X[b], colors='k')
plt.plot(t[a], -X[a], 'og', label="<=0")
plt.legend(); plt.show()
Next we fill the non-zero islands in a list with numpy arrays. Each numpy array contains a non-zero island.
X_ = X[b]
m = len(X_)
list_y = list()
list_Y = list()
for j in range(1,m):
if b[j]-b[j-1]>1 :
list_Y.append(list_y)
list_y = list()
#print("------------------------------------------------------ new list")
#print(j, b[j], X_[j])
list_y.append(X_[j])
list_Y.append(list_y)
print("list_Y");
n = len(list_Y)
for j in range(n):
print(list_Y[j])
With each numpy array in the list you can evaluate the peaks according to your definition (which I could not capture fully).
list_Y
[0.22062475371241386, 0.29207471279008657, 0.35072832015294586, 0.1251594602284437, 0.24379282278250836, 0.06896727821692716]
[0.06271739133976328]
[0.2689504650818615, 0.011887999386713255, 0.055442917743508624, 0.2876317343278316, 0.24084993011027578, 0.12097014134978235]
[0.1907699022464584]
[0.08249052680941726]
[0.10205561805376617]
[0.18903867830269638, 0.26990334850384257, 0.5, 0.3288200602696131, 0.05036869827824486, 0.040381419904307436]
[0.08618838642790339]
[0.0053279353208096625, 0.3468189863146819, 0.05644254569326557, 0.3985674171686334, 0.14897985190026097, 0.0025548308606182513, 0.32765453143333545, 0.3328107320769136, 0.1838328219774621, 0.21123652127176762]
[0.18870251894797663]
[0.13453490446867422, 0.25258744200608363, 0.4981866504733391, 0.35180043079867795, 0.08425183513691303, 0.3376976620831299, 0.22348609066402825]
[0.0716155758184146]
[0.052227024152749935, 0.08639499278421903]
[0.1581304564482665, 0.2273016493144655, 0.26721741895716056, 0.33665669827299305, 0.19255497112246478, 0.16227876457894175]
[0.10236622631923908, 0.06039140456773806, 0.053391261130168344]
[0.21170561257978093, 0.11669466945342633, 0.2479665749659119, 0.25792206298341824, 0.19579440295962314, 0.15210847528158666, 0.23531008247873408]
[0.05340116678342899]
[0.2088166123161308, 0.26031072203571415, 0.2786317264092839, 0.289871721166855, 0.25460661866030165, 0.3214937091565473, 0.36293451974436275]
[0.04525610202919361, 0.1740374143631349, 0.17258947174880612]
[0.14217066607610684, 0.03435965315335088, 0.09996473411377804, 0.48290831305140514, 0.09407783896892297]
[0.03224632110920911, 0.08787466747977346, 0.20032938280871493, 0.23646809723694695, 0.13380244841935984, 0.05305696510866664, 0.2657761536751757, 0.34514204517200975]
[0.17123014194168007, 0.2397521290598289]

Related

How to call the value corresponding to a coordinate in a 2D mesh (2D bin)?

I have a temperature corresponding to a coordinate. Over the fixed area, I want to include a square grid (all the grids must have the same length). For this end I use numpy.meshgrid to generate the cells over my entire area. Now my question is how to sum up the temperature of each row whose coordinate are in kth cell? I am a bit confused as should I use the numpy.histogram2d? It is giving me the frequency of X and Y, does it mean I have to use multidimensional histogram?
Many thanks in advance!
import pandas as pd
import numpy as np
####generating input data frame
df = pd.DataFrame(data = np.random.randint(2000, 6000, (1000000, 3)))
df.columns= ['X','Y', 'Temp']
x2 = np.linspace(df['X'].min(),df['X'].max(), 20)
y2 = np.linspace(df['Y'].min(),df['Y'].max(), 20 )
xx, yy = np.meshgrid(x2, y2, indexing ='ij')
plt.scatter(xx, yy, color="red", marker="x");
#### Or should I use
Hist, xedges, yedges = np.histogram2d(df['X'], df['Y'], bins = (x2,y2))
H = Hist.T
This takes your data set, and produces a 20x20 array that contains the average temperature of all the points within that grid. If there are no temps in a grid, it will produce a NaN:
import numpy as np
data = np.random.randint(2000, 6000, (100000, 3))
# We divide the coordinate space up into 20 bins.
binsize = (6000-2000) // 20
bins = np.zeros((20,20))
counts = np.zeros((20,20))
for row in data:
binx = (row[0] - 2000) // binsize
biny = (row[1] - 2000) // binsize
bins[biny,binx] += row[2]
counts[biny,binx] += 1
print( bins )
print( counts )
print( "Averages:" )
print( bins / counts )

How to get value of area under multiple peaks

I have some data from a bioanalyzer which gives me time (x-axis) and absorbance values (y-axis). The time is every .05 seconds and its from 32s to 138 so you can imagine how many data points I have. I've created a graph using plotly and matplotlib, just so that I have more libraries to work with to find a solution, so a solution in either library is ok! What I'm trying to do is make my script find the area under each peak and return my value.
def create_plot(sheet_name):
sample = book.sheet_by_name(sheet_name)
data = [[sample.cell_value(r, c) for r in range(sample.nrows)] for c in range(sample.ncols)]
y = data[2][18:len(data[2]) - 2]
x = np.arange(32, 138.05, 0.05)
indices = peakutils.indexes(y, thres=0.35, min_dist=0.1)
peaks = [y[i] for i in indices]
This snippet gets my Y values, X values and indices of the peaks. Now is there a way to get the area under each curve? Let's say that there are 15 indices.
Here's what the graph looks like:
An automated answer
Given a set of x and y values as well as a set of peaks (the x-coordinates of the peaks), here's how you can automatically find the area under each of the peaks. I'm assuming that x, y, and peaks are all Numpy arrays:
import numpy as np
# find the minima between each peak
ixpeak = x.searchsorted(peaks)
ixmin = np.array([np.argmin(i) for i in np.split(y, ixpeak)])
ixmin[1:] += ixpeak
mins = x[ixmin]
# split up the x and y values based on those minima
xsplit = np.split(x, ixmin[1:-1])
ysplit = np.split(y, ixmin[1:-1])
# find the areas under each peak
areas = [np.trapz(ys, xs) for xs,ys in zip(xsplit, ysplit)]
Output:
The example data has been set up so that the area under each peak is (more-or-less) guaranteed to be 1.0, so the results in the bottom plot are correct. The green X marks are the locations of the minimum between each two peaks. The part of the curve "belonging" to each peak is determined as the part of the curve in-between the minima adjacent to each peak.
Complete code
Here's the complete code I used to generate the example data:
import scipy as sp
import scipy.stats
prec = 1e5
n = 10
N = 150
r = np.arange(0, N+1, N//n)
# generate some reasonable fake data
peaks = np.array([np.random.uniform(s, e) for s,e in zip(r[:-1], r[1:])])
x = np.linspace(0, N + n, num=int(prec))
y = np.max([sp.stats.norm.pdf(x, loc=p, scale=.4) for p in peaks], axis=0)
and the code I used to make the plots:
import matplotlib.pyplot as plt
# plotting stuff
plt.figure(figsize=(5,7))
plt.subplots_adjust(hspace=.33)
plt.subplot(211)
plt.plot(x, y, label='trace 0')
plt.plot(peaks, y[ixpeak], '+', c='red', ms=10, label='peaks')
plt.plot(mins, y[ixmin], 'x', c='green', ms=10, label='mins')
plt.xlabel('dep')
plt.ylabel('indep')
plt.title('Example data')
plt.ylim(-.1, 1.6)
plt.legend()
plt.subplot(212)
plt.bar(np.arange(len(areas)), areas)
plt.xlabel('Peak number')
plt.ylabel('Area under peak')
plt.title('Area under the peaks of trace 0')
plt.show()

Matplotlib plot with multiple colors based on values on x-axis

I want to get a plot similar to the following plot that has different colors based on values for x-axis. Ignore the u and f letters and also the blue curve and gray lines. I only need the green and red lines. So, if you use my code, you will get a plot that is all one color. What I want is to have different color when x is between 0 and the turning point (in this case it is x=50%) and then a different color for the rest.
Code:
import matplotlib.pyplot as plt
def GRLC(values):
n = len(values)
assert(n > 0), 'Empty list of values'
sortedValues = sorted(values) #Sort smallest to largest
#Find cumulative totals
cumm = [0]
for i in range(n):
cumm.append(sum(sortedValues[0:(i + 1)]))
#Calculate Lorenz points
LorenzPoints = [[], []]
sumYs = 0 #Some of all y values
robinHoodIdx = -1 #Robin Hood index max(x_i, y_i)
for i in range(1, n + 2):
x = 100.0 * (i - 1)/n
y = 100.0 * (cumm[i - 1]/float(cumm[n]))
LorenzPoints[0].append(x)
LorenzPoints[1].append(y)
sumYs += y
maxX_Y = x - y
if maxX_Y > robinHoodIdx: robinHoodIdx = maxX_Y
giniIdx = 100 + (100 - 2 * sumYs)/n #Gini index
return [giniIdx, giniIdx/100, robinHoodIdx, LorenzPoints]
reg=[400,200]
result_reg = GRLC(reg)
print 'Gini Index Reg', result_reg[0]
print 'Gini Coefficient Reg', result_reg[1]
print 'Robin Hood Index Reg', result_reg[2]
#Plot
plt.plot(result_reg[3][0], result_reg[3][1], [0, 100], [0, 100], '--')
plt.legend(['Reg-ALSRank#10','Equity-Line'], loc='upper left',prop={'size':16})
plt.xlabel('% of items ')
plt.ylabel('% of times being recommended')
plt.show()
This is how you would plot two lines of different colors, knowing the index in the array at which the color should change.
import matplotlib.pyplot as plt
import numpy as np
x = np.linspace(0,49, num=50)
y = x**2
x0 = 23
plt.plot(x[:x0+1], y[:x0+1])
plt.plot(x[x0:], y[x0:])
plt.show()
This works because by default, subsequent line plots have a different color, but you could of course set the color yourself,
plt.plot(x[:x0+1], y[:x0+1], color="cornflowerblue")

In matplotlib, how can I plot a multi-colored line, like a rainbow

I would like to plot parallel lines with different colors. E.g. rather than a single red line of thickness 6, I would like to have two parallel lines of thickness 3, with one red and one blue.
Any thoughts would be appreciated.
Merci
Even with the smart offsetting (s. below), there is still an issue in a view that has sharp angles between consecutive points.
Zoomed view of smart offsetting:
Overlaying lines of varying thickness:
Plotting parallel lines is not an easy task. Using a simple uniform offset will of course not show the desired result. This is shown in the left picture below.
Such a simple offset can be produced in matplotlib as shown in the transformation tutorial.
Method1
A better solution may be to use the idea sketched on the right side. To calculate the offset of the nth point we can use the normal vector to the line between the n-1st and the n+1st point and use the same distance along this normal vector to calculate the offset point.
The advantage of this method is that we have the same number of points in the original line as in the offset line. The disadvantage is that it is not completely accurate, as can be see in the picture.
This method is implemented in the function offset in the code below.
In order to make this useful for a matplotlib plot, we need to consider that the linewidth should be independent of the data units. Linewidth is usually given in units of points, and the offset would best be given in the same unit, such that e.g. the requirement from the question ("two parallel lines of width 3") can be met.
The idea is therefore to transform the coordinates from data to display coordinates, using ax.transData.transform. Also the offset in points o can be transformed to the same units: Using the dpi and the standard of ppi=72, the offset in display coordinates is o*dpi/ppi. After the offset in display coordinates has been applied, the inverse transform (ax.transData.inverted().transform) allows a backtransformation.
Now there is another dimension of the problem: How to assure that the offset remains the same independent of the zoom and size of the figure?
This last point can be addressed by recalculating the offset each time a zooming of resizing event has taken place.
Here is how a rainbow curve would look like produced by this method.
And here is the code to produce the image.
import numpy as np
import matplotlib.pyplot as plt
dpi = 100
def offset(x,y, o):
""" Offset coordinates given by array x,y by o """
X = np.c_[x,y].T
m = np.array([[0,-1],[1,0]])
R = np.zeros_like(X)
S = X[:,2:]-X[:,:-2]
R[:,1:-1] = np.dot(m, S)
R[:,0] = np.dot(m, X[:,1]-X[:,0])
R[:,-1] = np.dot(m, X[:,-1]-X[:,-2])
On = R/np.sqrt(R[0,:]**2+R[1,:]**2)*o
Out = On+X
return Out[0,:], Out[1,:]
def offset_curve(ax, x,y, o):
""" Offset array x,y in data coordinates
by o in points """
trans = ax.transData.transform
inv = ax.transData.inverted().transform
X = np.c_[x,y]
Xt = trans(X)
xto, yto = offset(Xt[:,0],Xt[:,1],o*dpi/72. )
Xto = np.c_[xto, yto]
Xo = inv(Xto)
return Xo[:,0], Xo[:,1]
# some single points
y = np.array([1,2,2,3,3,0])
x = np.arange(len(y))
#or try a sinus
x = np.linspace(0,9)
y=np.sin(x)*x/3.
fig, ax=plt.subplots(figsize=(4,2.5), dpi=dpi)
cols = ["#fff40b", "#00e103", "#ff9921", "#3a00ef", "#ff2121", "#af00e7"]
lw = 2.
lines = []
for i in range(len(cols)):
l, = plt.plot(x,y, lw=lw, color=cols[i])
lines.append(l)
def plot_rainbow(event=None):
xr = range(6); yr = range(6);
xr[0],yr[0] = offset_curve(ax, x,y, lw/2.)
xr[1],yr[1] = offset_curve(ax, x,y, -lw/2.)
xr[2],yr[2] = offset_curve(ax, xr[0],yr[0], lw)
xr[3],yr[3] = offset_curve(ax, xr[1],yr[1], -lw)
xr[4],yr[4] = offset_curve(ax, xr[2],yr[2], lw)
xr[5],yr[5] = offset_curve(ax, xr[3],yr[3], -lw)
for i in range(6):
lines[i].set_data(xr[i], yr[i])
plot_rainbow()
fig.canvas.mpl_connect("resize_event", plot_rainbow)
fig.canvas.mpl_connect("button_release_event", plot_rainbow)
plt.savefig(__file__+".png", dpi=dpi)
plt.show()
Method2
To avoid overlapping lines, one has to use a more complicated solution.
One could first offset every point normal to the two line segments it is part of (green points in the picture below). Then calculate the line through those offset points and find their intersection.
A particular case would be when the slopes of two subsequent line segments equal. This has to be taken care of (eps in the code below).
from __future__ import division
import numpy as np
import matplotlib.pyplot as plt
dpi = 100
def intersect(p1, p2, q1, q2, eps=1.e-10):
""" given two lines, first through points pn, second through qn,
find the intersection """
x1 = p1[0]; y1 = p1[1]; x2 = p2[0]; y2 = p2[1]
x3 = q1[0]; y3 = q1[1]; x4 = q2[0]; y4 = q2[1]
nomX = ((x1*y2-y1*x2)*(x3-x4)- (x1-x2)*(x3*y4-y3*x4))
denom = float( (x1-x2)*(y3-y4) - (y1-y2)*(x3-x4) )
nomY = (x1*y2-y1*x2)*(y3-y4) - (y1-y2)*(x3*y4-y3*x4)
if np.abs(denom) < eps:
#print "intersection undefined", p1
return np.array( p1 )
else:
return np.array( [ nomX/denom , nomY/denom ])
def offset(x,y, o, eps=1.e-10):
""" Offset coordinates given by array x,y by o """
X = np.c_[x,y].T
m = np.array([[0,-1],[1,0]])
S = X[:,1:]-X[:,:-1]
R = np.dot(m, S)
norm = np.sqrt(R[0,:]**2+R[1,:]**2) / o
On = R/norm
Outa = On+X[:,1:]
Outb = On+X[:,:-1]
G = np.zeros_like(X)
for i in xrange(0, len(X[0,:])-2):
p = intersect(Outa[:,i], Outb[:,i], Outa[:,i+1], Outb[:,i+1], eps=eps)
G[:,i+1] = p
G[:,0] = Outb[:,0]
G[:,-1] = Outa[:,-1]
return G[0,:], G[1,:]
def offset_curve(ax, x,y, o, eps=1.e-10):
""" Offset array x,y in data coordinates
by o in points """
trans = ax.transData.transform
inv = ax.transData.inverted().transform
X = np.c_[x,y]
Xt = trans(X)
xto, yto = offset(Xt[:,0],Xt[:,1],o*dpi/72., eps=eps )
Xto = np.c_[xto, yto]
Xo = inv(Xto)
return Xo[:,0], Xo[:,1]
# some single points
y = np.array([1,1,2,0,3,2,1.,4,3]) *1.e9
x = np.arange(len(y))
x[3]=x[4]
#or try a sinus
#x = np.linspace(0,9)
#y=np.sin(x)*x/3.
fig, ax=plt.subplots(figsize=(4,2.5), dpi=dpi)
cols = ["r", "b"]
lw = 11.
lines = []
for i in range(len(cols)):
l, = plt.plot(x,y, lw=lw, color=cols[i], solid_joinstyle="miter")
lines.append(l)
def plot_rainbow(event=None):
xr = range(2); yr = range(2);
xr[0],yr[0] = offset_curve(ax, x,y, lw/2.)
xr[1],yr[1] = offset_curve(ax, x,y, -lw/2.)
for i in range(2):
lines[i].set_data(xr[i], yr[i])
plot_rainbow()
fig.canvas.mpl_connect("resize_event", plot_rainbow)
fig.canvas.mpl_connect("button_release_event", plot_rainbow)
plt.show()
Note that this method should work well as long as the offset between the lines is smaller then the distance between subsequent points on the line. Otherwise method 1 may be better suited.
The best that I can think of is to take your data, generate a series of small offsets, and use fill_between to make bands of whatever color you like.
I wrote a function to do this. I don't know what shape you're trying to plot, so this may or may not work for you. I tested it on a parabola and got decent results. You can also play around with the list of colors.
def rainbow_plot(x, y, spacing=0.1):
fig, ax = plt.subplots()
colors = ['red', 'yellow', 'green', 'cyan','blue']
top = max(y)
lines = []
for i in range(len(colors)+1):
newline_data = y - top*spacing*i
lines.append(newline_data)
for i, c in enumerate(colors):
ax.fill_between(x, lines[i], lines[i+1], facecolor=c)
return fig, ax
x = np.linspace(0,1,51)
y = 1-(x-0.5)**2
rainbow_plot(x,y)

Find a easier way to cluster 2-d scatter data into grid array data

I have figured out a method to cluster disperse point data into structured 2-d array(like rasterize function). And I hope there are some better ways to achieve that target.
My work
1. Intro
1000 point data has there dimensions of properties (lon, lat, emission) whicn represent one factory located at (x,y) emit certain amount of CO2 into atmosphere
grid network: predefine the 2-d array in the shape of 20x20
http://i4.tietuku.com/02fbaf32d2f09fff.png
The code reproduced here:
#### define the map area
xc1,xc2,yc1,yc2 = 113.49805889531724,115.5030664238035,37.39995194888143,38.789235929357105
map = Basemap(llcrnrlon=xc1,llcrnrlat=yc1,urcrnrlon=xc2,urcrnrlat=yc2)
#### reading the point data and scatter plot by their position
df = pd.read_csv("xxxxx.csv")
px,py = map(df.lon, df.lat)
map.scatter(px, py, color = "red", s= 5,zorder =3)
#### predefine the grid networks
lon_grid,lat_grid = np.linspace(xc1,xc2,21), np.linspace(yc1,yc2,21)
lon_x,lat_y = np.meshgrid(lon_grid,lat_grid)
grids = np.zeros(20*20).reshape(20,20)
plt.pcolormesh(lon_x,lat_y,grids,cmap = 'gray', facecolor = 'none',edgecolor = 'k',zorder=3)
2. My target
Finding the nearest grid point for each factory
Add the emission data into this grid number
3. Algorithm realization
3.1 Raster grid
note: 20x20 grid points are distributed in this area represented by blue dot.
http://i4.tietuku.com/8548554587b0cb3a.png
3.2 KD-tree
Find the nearest blue dot of each red point
sh = (20*20,2)
grids = np.zeros(20*20*2).reshape(*sh)
sh_emission = (20*20)
grids_em = np.zeros(20*20).reshape(sh_emission)
k = 0
for j in range(0,yy.shape[0],1):
for i in range(0,xx.shape[0],1):
grids[k] = np.array([lon_grid[i],lat_grid[j]])
k+=1
T = KDTree(grids)
x_delta = (lon_grid[2] - lon_grid[1])
y_delta = (lat_grid[2] - lat_grid[1])
R = np.sqrt(x_delta**2 + y_delta**2)
for i in range(0,len(df.lon),1):
idx = T.query_ball_point([df.lon.iloc[i],df.lat.iloc[i]], r=R)
# there are more than one blue dot which are founded sometimes,
# So I'll calculate the distances between the factory(red point)
# and all blue dots which are listed
if (idx > 1):
distance = []
for k in range(0,len(idx),1):
distance.append(np.sqrt((df.lon.iloc[i] - grids[k][0])**2 + (df.lat.iloc[i] - grids[k][1])**2))
pos_index = distance.index(min(distance))
pos = idx[pos_index]
# Only find 1 point
else:
pos = idx
grids_em[pos] += df.so2[i]
4. Result
co2 = grids_em.reshape(20,20)
plt.pcolormesh(lon_x,lat_y,co2,cmap =plt.cm.Spectral_r,zorder=3)
http://i4.tietuku.com/6ded65c4ac301294.png
5. My question
Can someone point out some drawbacks or error of this method?
Is there some algorithms more aligned with my target?
Thanks a lot!
There are many for-loop in your code, it's not the numpy way.
Make some sample data first:
import numpy as np
import pandas as pd
from scipy.spatial import KDTree
import pylab as pl
xc1, xc2, yc1, yc2 = 113.49805889531724, 115.5030664238035, 37.39995194888143, 38.789235929357105
N = 1000
GSIZE = 20
x, y = np.random.multivariate_normal([(xc1 + xc2)*0.5, (yc1 + yc2)*0.5], [[0.1, 0.02], [0.02, 0.1]], size=N).T
value = np.ones(N)
df_points = pd.DataFrame({"x":x, "y":y, "v":value})
For equal space grids you can use hist2d():
pl.hist2d(df_points.x, df_points.y, weights=df_points.v, bins=20, cmap="viridis");
Here is the output:
Here is the code to use KdTree:
X, Y = np.mgrid[x.min():x.max():GSIZE*1j, y.min():y.max():GSIZE*1j]
grid = np.c_[X.ravel(), Y.ravel()]
points = np.c_[df_points.x, df_points.y]
tree = KDTree(grid)
dist, indices = tree.query(points)
grid_values = df_points.groupby(indices).v.sum()
df_grid = pd.DataFrame(grid, columns=["x", "y"])
df_grid["v"] = grid_values
fig, ax = pl.subplots(figsize=(10, 8))
ax.plot(df_points.x, df_points.y, "kx", alpha=0.2)
mapper = ax.scatter(df_grid.x, df_grid.y, c=df_grid.v,
cmap="viridis",
linewidths=0,
s=100, marker="o")
pl.colorbar(mapper, ax=ax);
the output is:

Categories

Resources