Related
I want to create an animation from roughly 250 individual frames, showing data plotted as 2D images in a figure with 4 x 11 subpanels. The data represent power spectra of velocity as a function of temporal frequency and latitude. However, each frame takes about 4 seconds to create and save, including run-time computation of the data. In the non-interactive plotting mode, I use 'agg' as the backend to avoid time spent for interactivity plotting features.
The speed bottleneck here is not the computation of the data to plot, but saving the plots to disk. Example run-times for random data (see code below) and only 5 frames without saving the plots are sth. like 5 seconds, with saving the plots 17-19 seconds. For the actual data I use, there are some more plot artists to be drawn (text on panels, an additional line plot etc.), but the script execution time is quite similar. For the about 250 frames in total, this indicates roughly 900 seconds, thus 15 minutes to compute the data and then save the plots. However, since I likely want to generate similar frames several times or with slightly different data, it would be good to decrease this script execution time.
A (hopefully) reproducible code, using random data, but with data sizes equal to the actual data I use, is given below. An example frame (the first one generated by the code) can also be found below. In the code, the function create_fig() generates a figure with subpanels containing dummy data and in the for-loop over the different frames, only the data in the subpanels is replaced.
Is there a way to speed-up saving the plots into the png files? Any help is much appreciated!
# import packages
import numpy as np
import time
import matplotlib as mpl
import matplotlib.pyplot as plt
path_plots_out = '/home/proxauf'
# set up grids
nt, nlat, nlon = 3328, 24, 48
dlat = 7.5
lats = np.linspace(-90,90-dlat,nlat)
dt = 98191.08
nu = (-1) * np.fft.fftfreq(nt, dt) * 10 ** 9
nnu = len(nu)
nu_fftshift = np.fft.fftshift(nu)
dnu_fftshift = nu_fftshift[1] - nu_fftshift[0]
nu_lims = [-500, 500]
ind_nu_xlims = np.where(np.logical_and(nu_fftshift >= nu_lims[0], nu_fftshift <= nu_lims[1]))[0]
ext_box_nu_lat = [nu_fftshift[ind_nu_xlims][0] - dnu_fftshift / 2, nu_fftshift[ind_nu_xlims][-1] + dnu_fftshift / 2, lats[0] - dlat / 2.0, lats[-1] + dlat / 2.0]
nnu_cut = len(ind_nu_xlims)
plt.ioff()
if plt.rcParams['interactive']:
mpl.use('Qt5Agg')
else:
mpl.use('agg')
# plotting function
def create_fig():
data_xlabels = np.zeros((nrows, ncols), dtype='U30')
data_xlabels[-1, :] = r'Frequency [nHz]'
data_xticks = np.array([[np.linspace(-300, 300, 3)] * ncols] * nrows)
data_xticks_minor = np.array([[np.linspace(-500, 500, 21)] * ncols] * nrows)
data_xlims = np.array([[(-500, 500)] * ncols] * nrows)
data_ylabels = np.zeros((nrows, ncols), dtype='U30')
data_ylabels[:, 0] = r'Latitude [deg]'
data_yticks = np.array([[np.linspace(-90, 90, 7)] * ncols] * nrows)
data_yticks_minor = np.array([[np.linspace(-90, 90, 25)] * ncols] * nrows)
data_ylims = np.array([[(-90, 90)] * ncols] * nrows)
plot_xticks = np.zeros((nrows, ncols), dtype=bool)
plot_xticks[-1, :] = True
plot_yticks = np.zeros((nrows, ncols), dtype=bool)
plot_yticks[:, 0] = True
fig_left, fig_right, fig_bottom, fig_top, fig_hspace, fig_wspace = (0.04, 0.95, 0.06, 0.90, 0.1, 0.1)
fig, axes = plt.subplots(nrows, ncols, figsize=figsize)
data_list = []
for i in range(nrows):
data_list_temp = []
for j in range(ncols):
ax = axes[i, j]
im = ax.imshow(np.zeros((nnu_cut, nlat)).T, interpolation='nearest', origin='lower', aspect='auto', cmap='binary', extent=ext_box_nu_lat)
im.set_clim(0,1e4)
ax.set_xlabel(data_xlabels[i, j])
ax.set_ylabel(data_ylabels[i, j])
ax.set_xlim(data_xlims[i, j])
ax.set_ylim(data_ylims[i, j])
ax.set_xticks(data_xticks[i, j])
ax.set_xticks(data_xticks_minor[i, j], minor=True)
ax.set_yticks(data_yticks[i, j])
ax.set_yticks(data_yticks_minor[i, j], minor=True)
if not plot_xticks[i, j]:
ax.tick_params(labelbottom=False)
if not plot_yticks[i, j]:
ax.tick_params(labelleft=False)
data_list_temp.append(im)
data_list.append(data_list_temp)
fig.subplots_adjust(left=fig_left, right=fig_right, bottom=fig_bottom, top=fig_top, hspace=fig_hspace, wspace=fig_wspace)
fig.canvas.draw()
ax1 = axes[0, -1]
ax2 = axes[-1, -1]
top = ax1.get_position().y1
bottom = ax2.get_position().y0
right = ax2.get_position().x1
cbar_pad = 0.01
cbar_width = 0.01
cbar_height = top - bottom
cax = fig.add_axes([right + cbar_pad, bottom, cbar_width, cbar_height])
cbar = plt.colorbar(data_list[-1][-1], ax=axes[-1, -1], cax=cax)
return fig, axes, data_list
nrows = 4
ncols = 11
figsize = (16.5, 8)
# create figure with empty subpanels
fig, axes, data_list = create_fig()
# generate some data
np.random.seed(100)
data1 = np.random.rand(nt,nlat,nlon)
data2 = np.random.rand(nt,nlat,nlon)
data3 = np.random.rand(nt,nlat,nlon)
data4 = np.random.rand(nt,nlat,nlon)
wsize = nt // 4
data1_temp = np.zeros((nt, nlat, nlon))
data2_temp = np.zeros((nt, nlat, nlon))
data3_temp = np.zeros((nt, nlat, nlon))
data4_temp = np.zeros((nt, nlat, nlon))
data1_temp[:wsize,:,:] = data1[:wsize,:,:]
data2_temp[:wsize,:,:] = data2[:wsize,:,:]
data3_temp[:wsize,:,:] = data3[:wsize,:,:]
data4_temp[:wsize,:,:] = data4[:wsize,:,:]
frame_cad = 10
# do not activate, else program will take about 15-20 minutes to finish
# frame_inds = range(0, nt - wsize + 1, frame_cad)
frame_inds = range(0, 50, frame_cad)
t0 = time.time()
for c, i in enumerate(frame_inds):
print(c)
if i >= 1:
# fill in data for the next frame
data1_temp[i-frame_cad:i] = 0.0
data1_temp[i+wsize- 1:i+wsize-1+frame_cad] = data1[i+wsize-1:i+wsize-1+frame_cad,:,:]
data2_temp[i-frame_cad:i] = 0.0
data2_temp[i+wsize- 1:i+wsize-1+frame_cad] = data2[i+wsize-1:i+wsize-1+frame_cad,:,:]
data3_temp[i-frame_cad:i] = 0.0
data3_temp[i+wsize- 1:i+wsize-1+frame_cad] = data3[i+wsize-1:i+wsize-1+frame_cad,:,:]
data4_temp[i-frame_cad:i] = 0.0
data4_temp[i+wsize- 1:i+wsize-1+frame_cad] = data4[i+wsize-1:i+wsize-1+frame_cad,:,:]
# compute power spectrum
pu1_temp = np.abs(np.fft.fftn(data1_temp, axes=(0, 2))) ** 2
pu2_temp = np.abs(np.fft.fftn(data2_temp, axes=(0, 2))) ** 2
pu3_temp = np.abs(np.fft.fftn(data3_temp, axes=(0, 2))) ** 2
pu4_temp = np.abs(np.fft.fftn(data4_temp, axes=(0, 2))) ** 2
pu_temp_list = [pu1_temp, pu2_temp, pu3_temp, pu4_temp]
# update data in subpanels
for s in range(nrows):
for j in range(ncols):
data_list[s][j].set_data(np.fft.fftshift(pu_temp_list[s][:,:,j], axes=(0,))[ind_nu_xlims].T)
# save figure
fig.savefig('%s/stackoverflow_test/frame_%04d.png' % (path_plots_out, c))
plt.close()
print(time.time() - t0)
Update: Modified code blocks given below (no minor ticks, pyfftw instead of numpy, faster absolute-square computation; note: data_list return argument from create_fig() renamed to plot_data_list) yield running times of about 6s for 5 frames. The biggest speed boost comes from deactivating minor ticks (as mentioned in Jody Klymak's answer).
# use np.take_along_axis() with sorting indices instead of np.fft.fftshift() later, gives a slight (not too much!) speed boost
ind_nu_xlims = np.where(np.logical_and(nu >= nu_lims[0], nu <= nu_lims[1]))[0]
ind_nu_sort = np.argsort(nu[ind_nu_xlims])
nu_sort = np.take_along_axis(nu[ind_nu_xlims],ind_nu_sort,axis=0)
ext_box_nu_lat = [nu_sort[0] + dnu_fftshift / 2, nu_sort[-1] - dnu_fftshift / 2, lats[0] - dlat / 2.0, lats[-1] + dlat / 2.0]
# plotting function
def create_fig():
# deactivating ticks massively (!) boosts plotting performance
# ax.set_xticks(data_xticks_minor[i, j], minor=True)
# ax.set_yticks(data_yticks_minor[i, j], minor=True)
data_list = [data1, data2, data3, data4]
# wisdom makes FFTs much faster using pyfftw than using numpy
# enable cache and set cache memory-keeping time sufficiently large
# this depends on the computation time between FFT calls
pyfftw.interfaces.cache.enable()
pyfftw.interfaces.cache.set_keepalive_time(5)
for c, i in enumerate(frame_inds):
print(c)
data_temp_list = [data1_temp, data2_temp, data3_temp, data4_temp]
pu_temp_list = []
for j, data_temp in enumerate(data_temp_list):
if i >= 1:
# fill in data for the next frame
data_temp[i-frame_cad:i] = 0.0
data_temp[i+wsize-1:i+wsize-1+frame_cad] = data_list[j][i+wsize-1:i+wsize-1+frame_cad,:,:]
# compute Fourier transform via pyfftw; wisdom makes FFTs much faster using pyfftw than using numpy
pu_temp = pyfftw.interfaces.numpy_fft.fftn(data_temp, axes=(0, 2), threads=-1)
# compute absolute-square using np.real(x * np.conj(x));
# about same speed as np.real(x) * np.imag(x);
# faster than np.einsum('ijk,ijk->ijk',x,np.conj(x));
# also faster than np.abs(x)**2 since np.abs(x)**2 first takes square-root, then squares again
pu_temp = np.real(pu_temp*np.conj(pu_temp))
pu_temp_list.append(pu_temp)
# update data in subpanels
for s in range(nrows):
for j in range(ncols):
# use np.take_along_axis() with sorting indices instead of np.fft.fftshift(), gives a slight (not too much!) speed boost
plot_data_list[s][j].set_data(np.take_along_axis(pu_temp_list[s][ind_nu_xlims,:,j], ind_nu_sort[:,None], axis=0).T)
# save figure
fig.savefig('%s/stackoverflow_test/frame_%04d.png' % (path_plots_out, c))
plt.close()
print(time.time() - t0)
So if that is exactly what you want the plot to look like, then I think you are doing the fastest that you can do. I get 15 s for 5 figures, and get 5 s for not saving.
Believe it or not, the easy way to make it faster is to drop your minor ticks. If I comment those lines out I get 8 s, for a 70% speed up. Ticks are really expensive in matplotlib. Given your minor ticks are tiny, I'd suggest that as an easy optimization.
I will give you some tips, but can be not a solution:
You are doing the rigth thing to run over the matrix, but check if can maximize the cache transposing your matrix (when you have a very tall and narrow case)
Have your heard about of sparse-matrix or matrix compressing techniques?
do the stuff that you need to do when i<1 outside of the for loop - you will save 1 comparison if you take out that
can you use parallel computation? like Omp for python?
I want to fill a bunch of polygons with line hatch. The lines must have a specific angle with respect to x-axis. I found that matplotlib already suppots some hatch classes and one can define a custom class (like How to fill a polygon with a custom hatch in matplotlib?). I tried to generate a custom hatch but when I append it to the list of hatches the init function doesn't know the angle. I tried with the following class:
class AngularHatch(HatchPatternBase):
def __init__(self, hatch, density, angle):
self.num_lines = int((hatch.count('{'))*density*3)
self.num_vertices = self.num_lines * 2
self.R = np.array([[np.cos(angle), -np.sin(angle)],
[np.sin(angle), np.cos(angle)]])
def set_vertices_and_codes(self, vertices, codes):
steps, stepsize = np.linspace(0.0, 1.0, self.num_lines, False,
retstep=True)
steps += stepsize / 2.
vertices[0::2, 0] = 0
vertices[0::2, 1] = steps
vertices[1::2, 0] = 1
vertices[1::2, 1] = steps
for i, v in enumerate(vertices):
vertices[i] = self.R.dot(v)
codes[0::2] = Path.MOVETO
codes[1::2] = Path.LINETO
Then I add this class to the list of available classes for hatching. However this will not generate the correct lines since the code is modified from the HorizontalHatch source code here and I think this generates lines in the unit square. Moreover I need to generate this patch for a specific angle for each polygon I want to render. ¿Any ideas on how to give the correct angle to this class per polygon?
The following does not solve this issue. It just solves part of the problem and shows at which point the approach fails. I am currently convinced that hatching with arbitrary angles is not possible with matplotlib, because the size of the unit cell is fixed.
To overcome the problem of setting the angle, one may define a custom format from which to take the angle information. E.g. "{angle}{factor}", such that "{45}{2}" would produce a hatching with an angle of 45° and a density factor of 2.
I then do not completely understand the attempt of calculating the vertices. To replicate the behaviour of the hatches which are built-in, one may rotate them directly.
The problem is that this way the line hatches work only for angles of 45°. This is because the lines at the edges of the unit cell do not align well. See the following:
import numpy as np
import matplotlib.hatch
import matplotlib.path
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse, Rectangle
class AngularHatch(matplotlib.hatch.HatchPatternBase):
def __init__(self, hatch, density):
self.num_lines=0
self.num_vertices=0
if hatch[0] == "{":
h = hatch.strip("{}").split("}{")
angle = np.deg2rad(float(h[0])-45)
d = float(h[1])
self.num_lines = int(density*d)
self.num_vertices = (self.num_lines + 1) * 2
self.R = np.array([[np.cos(angle), -np.sin(angle)],
[np.sin(angle), np.cos(angle)]])
def set_vertices_and_codes(self, vertices, codes):
steps = np.linspace(-0.5, 0.5, self.num_lines + 1, True)
vertices[0::2, 0] = 0.0 + steps
vertices[0::2, 1] = 0.0 - steps
vertices[1::2, 0] = 1.0 + steps
vertices[1::2, 1] = 1.0 - steps
codes[0::2] = matplotlib.path.Path.MOVETO
codes[1::2] = matplotlib.path.Path.LINETO
vertices[:,:] = np.dot((vertices-0.5),self.R)+0.5
matplotlib.hatch._hatch_types.append(AngularHatch)
fig = plt.figure()
ax = fig.add_subplot(111)
ellipse = ax.add_patch(Rectangle((0.1, 0.1), 0.4, 0.8, fill=False))
ellipse.set_hatch('{45}{1}')
ellipse.set_color('red')
ellipse = ax.add_patch(Rectangle((0.55, 0.1), 0.4, 0.8, fill=False))
ellipse.set_hatch('{22}{1}')
ellipse.set_color('blue')
plt.show()
I would like to plot parallel lines with different colors. E.g. rather than a single red line of thickness 6, I would like to have two parallel lines of thickness 3, with one red and one blue.
Any thoughts would be appreciated.
Merci
Even with the smart offsetting (s. below), there is still an issue in a view that has sharp angles between consecutive points.
Zoomed view of smart offsetting:
Overlaying lines of varying thickness:
Plotting parallel lines is not an easy task. Using a simple uniform offset will of course not show the desired result. This is shown in the left picture below.
Such a simple offset can be produced in matplotlib as shown in the transformation tutorial.
Method1
A better solution may be to use the idea sketched on the right side. To calculate the offset of the nth point we can use the normal vector to the line between the n-1st and the n+1st point and use the same distance along this normal vector to calculate the offset point.
The advantage of this method is that we have the same number of points in the original line as in the offset line. The disadvantage is that it is not completely accurate, as can be see in the picture.
This method is implemented in the function offset in the code below.
In order to make this useful for a matplotlib plot, we need to consider that the linewidth should be independent of the data units. Linewidth is usually given in units of points, and the offset would best be given in the same unit, such that e.g. the requirement from the question ("two parallel lines of width 3") can be met.
The idea is therefore to transform the coordinates from data to display coordinates, using ax.transData.transform. Also the offset in points o can be transformed to the same units: Using the dpi and the standard of ppi=72, the offset in display coordinates is o*dpi/ppi. After the offset in display coordinates has been applied, the inverse transform (ax.transData.inverted().transform) allows a backtransformation.
Now there is another dimension of the problem: How to assure that the offset remains the same independent of the zoom and size of the figure?
This last point can be addressed by recalculating the offset each time a zooming of resizing event has taken place.
Here is how a rainbow curve would look like produced by this method.
And here is the code to produce the image.
import numpy as np
import matplotlib.pyplot as plt
dpi = 100
def offset(x,y, o):
""" Offset coordinates given by array x,y by o """
X = np.c_[x,y].T
m = np.array([[0,-1],[1,0]])
R = np.zeros_like(X)
S = X[:,2:]-X[:,:-2]
R[:,1:-1] = np.dot(m, S)
R[:,0] = np.dot(m, X[:,1]-X[:,0])
R[:,-1] = np.dot(m, X[:,-1]-X[:,-2])
On = R/np.sqrt(R[0,:]**2+R[1,:]**2)*o
Out = On+X
return Out[0,:], Out[1,:]
def offset_curve(ax, x,y, o):
""" Offset array x,y in data coordinates
by o in points """
trans = ax.transData.transform
inv = ax.transData.inverted().transform
X = np.c_[x,y]
Xt = trans(X)
xto, yto = offset(Xt[:,0],Xt[:,1],o*dpi/72. )
Xto = np.c_[xto, yto]
Xo = inv(Xto)
return Xo[:,0], Xo[:,1]
# some single points
y = np.array([1,2,2,3,3,0])
x = np.arange(len(y))
#or try a sinus
x = np.linspace(0,9)
y=np.sin(x)*x/3.
fig, ax=plt.subplots(figsize=(4,2.5), dpi=dpi)
cols = ["#fff40b", "#00e103", "#ff9921", "#3a00ef", "#ff2121", "#af00e7"]
lw = 2.
lines = []
for i in range(len(cols)):
l, = plt.plot(x,y, lw=lw, color=cols[i])
lines.append(l)
def plot_rainbow(event=None):
xr = range(6); yr = range(6);
xr[0],yr[0] = offset_curve(ax, x,y, lw/2.)
xr[1],yr[1] = offset_curve(ax, x,y, -lw/2.)
xr[2],yr[2] = offset_curve(ax, xr[0],yr[0], lw)
xr[3],yr[3] = offset_curve(ax, xr[1],yr[1], -lw)
xr[4],yr[4] = offset_curve(ax, xr[2],yr[2], lw)
xr[5],yr[5] = offset_curve(ax, xr[3],yr[3], -lw)
for i in range(6):
lines[i].set_data(xr[i], yr[i])
plot_rainbow()
fig.canvas.mpl_connect("resize_event", plot_rainbow)
fig.canvas.mpl_connect("button_release_event", plot_rainbow)
plt.savefig(__file__+".png", dpi=dpi)
plt.show()
Method2
To avoid overlapping lines, one has to use a more complicated solution.
One could first offset every point normal to the two line segments it is part of (green points in the picture below). Then calculate the line through those offset points and find their intersection.
A particular case would be when the slopes of two subsequent line segments equal. This has to be taken care of (eps in the code below).
from __future__ import division
import numpy as np
import matplotlib.pyplot as plt
dpi = 100
def intersect(p1, p2, q1, q2, eps=1.e-10):
""" given two lines, first through points pn, second through qn,
find the intersection """
x1 = p1[0]; y1 = p1[1]; x2 = p2[0]; y2 = p2[1]
x3 = q1[0]; y3 = q1[1]; x4 = q2[0]; y4 = q2[1]
nomX = ((x1*y2-y1*x2)*(x3-x4)- (x1-x2)*(x3*y4-y3*x4))
denom = float( (x1-x2)*(y3-y4) - (y1-y2)*(x3-x4) )
nomY = (x1*y2-y1*x2)*(y3-y4) - (y1-y2)*(x3*y4-y3*x4)
if np.abs(denom) < eps:
#print "intersection undefined", p1
return np.array( p1 )
else:
return np.array( [ nomX/denom , nomY/denom ])
def offset(x,y, o, eps=1.e-10):
""" Offset coordinates given by array x,y by o """
X = np.c_[x,y].T
m = np.array([[0,-1],[1,0]])
S = X[:,1:]-X[:,:-1]
R = np.dot(m, S)
norm = np.sqrt(R[0,:]**2+R[1,:]**2) / o
On = R/norm
Outa = On+X[:,1:]
Outb = On+X[:,:-1]
G = np.zeros_like(X)
for i in xrange(0, len(X[0,:])-2):
p = intersect(Outa[:,i], Outb[:,i], Outa[:,i+1], Outb[:,i+1], eps=eps)
G[:,i+1] = p
G[:,0] = Outb[:,0]
G[:,-1] = Outa[:,-1]
return G[0,:], G[1,:]
def offset_curve(ax, x,y, o, eps=1.e-10):
""" Offset array x,y in data coordinates
by o in points """
trans = ax.transData.transform
inv = ax.transData.inverted().transform
X = np.c_[x,y]
Xt = trans(X)
xto, yto = offset(Xt[:,0],Xt[:,1],o*dpi/72., eps=eps )
Xto = np.c_[xto, yto]
Xo = inv(Xto)
return Xo[:,0], Xo[:,1]
# some single points
y = np.array([1,1,2,0,3,2,1.,4,3]) *1.e9
x = np.arange(len(y))
x[3]=x[4]
#or try a sinus
#x = np.linspace(0,9)
#y=np.sin(x)*x/3.
fig, ax=plt.subplots(figsize=(4,2.5), dpi=dpi)
cols = ["r", "b"]
lw = 11.
lines = []
for i in range(len(cols)):
l, = plt.plot(x,y, lw=lw, color=cols[i], solid_joinstyle="miter")
lines.append(l)
def plot_rainbow(event=None):
xr = range(2); yr = range(2);
xr[0],yr[0] = offset_curve(ax, x,y, lw/2.)
xr[1],yr[1] = offset_curve(ax, x,y, -lw/2.)
for i in range(2):
lines[i].set_data(xr[i], yr[i])
plot_rainbow()
fig.canvas.mpl_connect("resize_event", plot_rainbow)
fig.canvas.mpl_connect("button_release_event", plot_rainbow)
plt.show()
Note that this method should work well as long as the offset between the lines is smaller then the distance between subsequent points on the line. Otherwise method 1 may be better suited.
The best that I can think of is to take your data, generate a series of small offsets, and use fill_between to make bands of whatever color you like.
I wrote a function to do this. I don't know what shape you're trying to plot, so this may or may not work for you. I tested it on a parabola and got decent results. You can also play around with the list of colors.
def rainbow_plot(x, y, spacing=0.1):
fig, ax = plt.subplots()
colors = ['red', 'yellow', 'green', 'cyan','blue']
top = max(y)
lines = []
for i in range(len(colors)+1):
newline_data = y - top*spacing*i
lines.append(newline_data)
for i, c in enumerate(colors):
ax.fill_between(x, lines[i], lines[i+1], facecolor=c)
return fig, ax
x = np.linspace(0,1,51)
y = 1-(x-0.5)**2
rainbow_plot(x,y)
I have figured out a method to cluster disperse point data into structured 2-d array(like rasterize function). And I hope there are some better ways to achieve that target.
My work
1. Intro
1000 point data has there dimensions of properties (lon, lat, emission) whicn represent one factory located at (x,y) emit certain amount of CO2 into atmosphere
grid network: predefine the 2-d array in the shape of 20x20
http://i4.tietuku.com/02fbaf32d2f09fff.png
The code reproduced here:
#### define the map area
xc1,xc2,yc1,yc2 = 113.49805889531724,115.5030664238035,37.39995194888143,38.789235929357105
map = Basemap(llcrnrlon=xc1,llcrnrlat=yc1,urcrnrlon=xc2,urcrnrlat=yc2)
#### reading the point data and scatter plot by their position
df = pd.read_csv("xxxxx.csv")
px,py = map(df.lon, df.lat)
map.scatter(px, py, color = "red", s= 5,zorder =3)
#### predefine the grid networks
lon_grid,lat_grid = np.linspace(xc1,xc2,21), np.linspace(yc1,yc2,21)
lon_x,lat_y = np.meshgrid(lon_grid,lat_grid)
grids = np.zeros(20*20).reshape(20,20)
plt.pcolormesh(lon_x,lat_y,grids,cmap = 'gray', facecolor = 'none',edgecolor = 'k',zorder=3)
2. My target
Finding the nearest grid point for each factory
Add the emission data into this grid number
3. Algorithm realization
3.1 Raster grid
note: 20x20 grid points are distributed in this area represented by blue dot.
http://i4.tietuku.com/8548554587b0cb3a.png
3.2 KD-tree
Find the nearest blue dot of each red point
sh = (20*20,2)
grids = np.zeros(20*20*2).reshape(*sh)
sh_emission = (20*20)
grids_em = np.zeros(20*20).reshape(sh_emission)
k = 0
for j in range(0,yy.shape[0],1):
for i in range(0,xx.shape[0],1):
grids[k] = np.array([lon_grid[i],lat_grid[j]])
k+=1
T = KDTree(grids)
x_delta = (lon_grid[2] - lon_grid[1])
y_delta = (lat_grid[2] - lat_grid[1])
R = np.sqrt(x_delta**2 + y_delta**2)
for i in range(0,len(df.lon),1):
idx = T.query_ball_point([df.lon.iloc[i],df.lat.iloc[i]], r=R)
# there are more than one blue dot which are founded sometimes,
# So I'll calculate the distances between the factory(red point)
# and all blue dots which are listed
if (idx > 1):
distance = []
for k in range(0,len(idx),1):
distance.append(np.sqrt((df.lon.iloc[i] - grids[k][0])**2 + (df.lat.iloc[i] - grids[k][1])**2))
pos_index = distance.index(min(distance))
pos = idx[pos_index]
# Only find 1 point
else:
pos = idx
grids_em[pos] += df.so2[i]
4. Result
co2 = grids_em.reshape(20,20)
plt.pcolormesh(lon_x,lat_y,co2,cmap =plt.cm.Spectral_r,zorder=3)
http://i4.tietuku.com/6ded65c4ac301294.png
5. My question
Can someone point out some drawbacks or error of this method?
Is there some algorithms more aligned with my target?
Thanks a lot!
There are many for-loop in your code, it's not the numpy way.
Make some sample data first:
import numpy as np
import pandas as pd
from scipy.spatial import KDTree
import pylab as pl
xc1, xc2, yc1, yc2 = 113.49805889531724, 115.5030664238035, 37.39995194888143, 38.789235929357105
N = 1000
GSIZE = 20
x, y = np.random.multivariate_normal([(xc1 + xc2)*0.5, (yc1 + yc2)*0.5], [[0.1, 0.02], [0.02, 0.1]], size=N).T
value = np.ones(N)
df_points = pd.DataFrame({"x":x, "y":y, "v":value})
For equal space grids you can use hist2d():
pl.hist2d(df_points.x, df_points.y, weights=df_points.v, bins=20, cmap="viridis");
Here is the output:
Here is the code to use KdTree:
X, Y = np.mgrid[x.min():x.max():GSIZE*1j, y.min():y.max():GSIZE*1j]
grid = np.c_[X.ravel(), Y.ravel()]
points = np.c_[df_points.x, df_points.y]
tree = KDTree(grid)
dist, indices = tree.query(points)
grid_values = df_points.groupby(indices).v.sum()
df_grid = pd.DataFrame(grid, columns=["x", "y"])
df_grid["v"] = grid_values
fig, ax = pl.subplots(figsize=(10, 8))
ax.plot(df_points.x, df_points.y, "kx", alpha=0.2)
mapper = ax.scatter(df_grid.x, df_grid.y, c=df_grid.v,
cmap="viridis",
linewidths=0,
s=100, marker="o")
pl.colorbar(mapper, ax=ax);
the output is:
I’m trying to plot data an in order to check my code, I’m making a comparison of the resulting plots with what has already been generated with Matlab. I am encountering several issues however with this:
Generally, the parsing of RINEX files works, and the general pattern of the presentation of the data looks similar to that the Matlab scripts plotted. However there are small deviations in data that should become apparent when zooming in on the data i.e. when using a smaller time series, for example plotting over a special 2 hour period, not 24 hours. In Matlab, this small discrepancy can be seen, and a polynomial fitting applied. However for the Python plots (the first plot shown below), the curved line of this two hour period appears “smooth” and does not deviate at all, like that seen in the Matlab script (the second plot shows the blue line as the data, against the red line of the polyfit, hence, the blue line shows a slight discrepancy at x=9.4). The Matlab script is assumed correct, as this deviation is because of an Seismic activity that disrupts the ionosphere temporarily. Please refer to the plots below:
The third plot is in Matlab, where this is simply the polyfit minus the live data.
Therefore, it is not clear just how this data is being plotted on the axes for the Python script, because the data appears to smooth? Nor if my code is wrong (see below) and somehow “smooths” out the data somehow:
#Calculating by looping through
for sv in range(32):
sat = self.obs_data_chunks_dataframe[sv, :]
#print "sat.index_{0}: {1}".format(sv+1, sat.index)
phi1 = sat['L1'] * LAMBDA_1 #Change units of L1 to meters
phi2 = sat['L2'] * LAMBDA_2 #Change units of L2 to meters
pr1 = sat['P1']
pr2 = sat['P2']
#CALCULATION: teqc Calculation
iono_teqc = COEFF * (pr2 - pr1) / 1000000 #divide to make values smaller (tbc)
print "iono_teqc_{0}: {1}".format(sv+1, iono_teqc)
#PLOTTING
#Plotting of the data
plt.plot(sat.index, iono_teqc, label=‘teqc’)
plt.xlabel('Time (UTC)')
plt.ylabel('Ionosphere Delay (meters)')
plt.title("Ionosphere Delay on {0} for Satellite {1}.".format(self.date, sv+1))
plt.legend()
ax = plt.gca()
ax.ticklabel_format(useOffset=False)
plt.grid()
if sys.platform.startswith('win'):
plt.savefig(winpath + '\Figure_SV{0}'.format(sv+1))
elif sys.platform.startswith('darwin'):
plt.savefig(macpath + 'Figure_SV{0}'.format(sv+1))
plt.close()
Following on from point 1, the polynomial fitting code below does not run the way I’d like, so I’m overlooking something here. I assume this has to do with the data used upon the x,y-axes but can’t pinpoint exactly what. Would anyone know where I am going wrong here?
#Zoomed in plots
if sv == 19:
#Plotting of the data
plt.plot(sat.index, iono_teqc, label=‘teqc’) #sat.index to plot for time in UTC
plt.xlim(8, 10)
plt.xlabel('Time (UTC)')
plt.ylabel('Ionosphere Delay (meters)')
plt.title("Ionosphere Delay on {0} for Satellite {1}.".format(self.date, sv+1))
plt.legend()
ax = plt.gca()
ax.ticklabel_format(useOffset=False)
plt.grid()
#Polynomial fitting
coefficients = np.polyfit(sat.index, iono_teqc, 2)
plt.plot(coefficients)
if sys.platform.startswith('win'):
#os.path.join(winpath, 'Figure_SV{0}'.format(sv+1))
plt.savefig(winpath + '\Zoom_SV{0}'.format(sv+1))
elif sys.platform.startswith('darwin'):
plt.savefig(macpath + 'Zoom_SV{0}'.format(sv+1))
plt.close()
My RINEX file comprises 32 satellites. However when trying to generate the plots for all 32, I receive:
IndexError: index 31 is out of bounds for axis 0 with size 31
Changing the code below to 31 solves this partly, only excluding the 32nd satellite. I’d like to also plot for satellite 32. The functions for the parsing, and formatting of the data are given below:
def read_obs(self, RINEXfile, n_sat, sat_map):
obs = np.empty((TOTAL_SATS, len(self.obs_types)), dtype=np.float64) * np.NaN
lli = np.zeros((TOTAL_SATS, len(self.obs_types)), dtype=np.uint8)
signal_strength = np.zeros((TOTAL_SATS, len(self.obs_types)), dtype=np.uint8)
for i in range(n_sat):
# Join together observations for a single satellite if split across lines.
obs_line = ''.join(padline(RINEXfile.readline()[:-1], 16) for _ in range((len(self.obs_types) + 4) / 5))
#obs_line = ''.join(padline(RINEXfile.readline()[:-1], 16) for _ in range(2))
#while obs_line
for j in range(len(self.obs_types)):
obs_record = obs_line[16*j:16*(j+1)]
obs[sat_map[i], j] = floatornan(obs_record[0:14])
lli[sat_map[i], j] = digitorzero(obs_record[14:15])
signal_strength[sat_map[i], j] = digitorzero(obs_record[15:16])
return obs, lli, signal_strength
def read_data_chunk(self, RINEXfile, CHUNK_SIZE = 10000):
obss = np.empty((CHUNK_SIZE, TOTAL_SATS, len(self.obs_types)), dtype=np.float64) * np.NaN
llis = np.zeros((CHUNK_SIZE, TOTAL_SATS, len(self.obs_types)), dtype=np.uint8)
signal_strengths = np.zeros((CHUNK_SIZE, TOTAL_SATS, len(self.obs_types)), dtype=np.uint8)
epochs = np.zeros(CHUNK_SIZE, dtype='datetime64[us]')
flags = np.zeros(CHUNK_SIZE, dtype=np.uint8)
i = 0 #ggfrfg
while True:
hdr = self.read_epoch_header(RINEXfile)
if hdr is None:
break
epoch_time, flags[i], sats = hdr
#epochs[i] = np.datetime64(epoch_time)
epochs[i] = epoch_time
sat_map = np.ones(len(sats)) * -1
for n, sat in enumerate(sats):
if sat[0] == 'G':
sat_map[n] = int(sat[1:]) - 1
obss[i], llis[i], signal_strengths[i] = self.read_obs(RINEXfile, len(sats), sat_map)
i += 1
if i >= CHUNK_SIZE:
break
return obss[:i], llis[:i], signal_strengths[:i], epochs[:i], flags[:i]
def read_data(self, RINEXfile):
obs_data_chunks = []
while True:
obss, _, _, epochs, _ = self.read_data_chunk(RINEXfile)
epochs = epochs.astype(np.int64)
epochs = np.divide(epochs, float(3600.000))
if obss.shape[0] == 0:
break
obs_data_chunks.append(pd.Panel(
np.rollaxis(obss, 1, 0),
items=['G%02d' % d for d in range(1, 33)],
major_axis=epochs,
minor_axis=self.obs_types
).dropna(axis=0, how='all').dropna(axis=2, how='all'))
self.obs_data_chunks_dataframe = obs_data_chunks[0]
Any suggestions?
Cheers, pymat.
I managed to solve Qu1 as it was a conversion issue with my calculation that was overlooked, the other two points are however open...