Python: Interpolation from an irregular 2d grid to a regular one - python

I'd like to map a distribution of values of an irregular grid on a regular one.
I'm trying with the different interpolators but it looks I'm not able to do it.
Here there is the code I've written:
import numpy as np
from scipy import interpolate
import matplotlib.pyplot as plt
N = 100
M = 10
lat = ((np.random.rand(M,N))*2)+0.2
lon = ((np.random.rand(M,N))*3)+0.2
theta = ((np.random.rand(M,N))*180)
lat_min = np.min(lat)
lat_max = np.max(lat)
lon_min = np.min(lon)
lon_max = np.max(lon)
dlat = 0.1 # regular step for the lat[rad]
dlon = 0.1 # regular step for the lon[rad]
# Grid dimensions
Nlat = np.int(np.abs(lat_max-lat_min)/dlat)+1
Nlon = np.int(np.abs(lon_max-lon_min)/dlon)+1
# Lat-Lon vector
reg_lat = np.linspace(lat_min, lat_max, Nlat) # regularly spaced latitude vector
reg_lon = np.linspace(lon_min, lon_max, Nlon) # regularly spaced longitude vector
# Lat-Lon regular Grid
reg_lon_mesh, reg_lat_mesh = np.meshgrid(reg_lon, reg_lat)
I've used:
theta2 = scipy.interpolate.griddata((lon.ravel(), lat.ravel()), theta.ravel(),(reg_lon_mesh, reg_lat_mesh), method='cubic')
but the interpolation seems wrong
and
f = interpolate.interp2d(lon.ravel(), lat.ravel(), theta,kind='cubic')
and it rises the warning: A theoretically impossible results when finding a smoothin spline
with fp = s. Probably causes: s too small or badly chosen eps.
(abs(fp-s)/s>0.001)
kx,ky=3,3 nx,ny=36,34 m=1000 fp=14832451.907306 s=0.000000

Related

Change Colorbar Scaling in Matplotlib

Using NASA's SRTM data, I've generated a global elevation heatmap.
The problem is, however, the continents tend to blend in with the ocean because of the range of elevation values. Is it possible to change the colorbar's scale so that the edges of the continents are more distinct from the ocean? I've tried different cmaps, but they all seem to suffer from the problem.
Here is my code. I'm initializing a giant array (with 0s) to hold global elevation data, and then populating it file by file from the SRTM dataset. Each file is 1 degree latitude by 1 degree longitude.
Another question I had was regarding the map itself. For some reason, the Appalachian Mountains seem to have disappeared entirely.
import os
import numpy as np
from .srtm_map import MapGenerator
from ..utils.hgt_parser import HGTParser
from tqdm import tqdm
import cv2
import matplotlib.pyplot as plt
import richdem as rd
class GlobalMapGenerator():
def __init__(self):
self.gen = MapGenerator()
self.base_dir = "data/elevation/"
self.hgt_files = os.listdir(self.base_dir)
self.global_elevation_data = None
def shrink(data, rows, cols):
return data.reshape(rows, data.shape[0]/rows, cols, data.shape[1]/cols).sum(axis=1).sum(axis=2)
def GenerateGlobalElevationMap(self, stride):
res = 1201//stride
max_N = 59
max_W = 180
max_S = 56
max_E = 179
# N59 --> N00
# S01 --> S56
# E000 --> E179
# W180 --> W001
# Initialize array global elevation
self.global_elevation_data = np.zeros(( res*(max_S+max_N+1), res*(max_E+max_W+1) ))
print("Output Image Shape:", self.global_elevation_data.shape)
for hgt_file in tqdm(self.hgt_files):
lat_letter = hgt_file[0]
lon_letter = hgt_file[3]
lat = int(hgt_file[1:3])
lon = int(hgt_file[4:7])
if lat_letter == "S":
# Shift south down by max_N, but south starts at S01 so we translate up by 1 too
lat_trans = max_N + lat - 1
else:
# Bigger N lat means further up. E.g. N59 is at index 0 and is higher than N00
lat_trans = max_N - lat
if lon_letter == "E":
# Shift east right by max_W
lon_trans = max_W + lon
else:
# Bigger W lon means further left. E.g. W180 is at index 0 and is more left than W001
lon_trans = max_W - lon
# load in data from file as resized
data = cv2.resize(HGTParser(os.path.join(self.base_dir, hgt_file)), (res, res))
# generate bounds (x/y --> lon.lat for data from this file for the giant array)
lat_bounds = [res*lat_trans, res*(lat_trans+1)]
lon_bounds = [res*lon_trans, res*(lon_trans+1)]
try:
self.global_elevation_data[ lat_bounds[0]:lat_bounds[1], lon_bounds[0]:lon_bounds[1] ] = data
except:
print("REFERENCE ERROR: " + hgt_file)
print("lat: ", lat_bounds)
print("lon: ", lon_bounds)
# generate figure
plt.figure(figsize=(20,20))
plt.imshow(self.global_elevation_data, cmap="rainbow")
plt.title("Global Elevation Heatmap")
plt.colorbar()
plt.show()
np.save("figures/GlobalElevationMap.npy", self.global_elevation_data)
plt.savefig("figures/GlobalElevationMap.png")
def GenerateGlobalSlopeMap(self, stride):
pass
Use a TwoSlopeNorm (docs) for your norm, like the example here.
From the example:
Sometimes we want to have a different colormap on either side of a conceptual center point, and we want those two colormaps to have different linear scales. An example is a topographic map where the land and ocean have a center at zero, but land typically has a greater elevation range than the water has depth range, and they are often represented by a different colormap.
If you set the midpoint at sea level (0), then you can have two very different scalings based on ocean elevation vs land elevation.
Example code (taken from the example linked above):
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cbook as cbook
from matplotlib import cm
dem = cbook.get_sample_data('topobathy.npz', np_load=True)
topo = dem['topo']
longitude = dem['longitude']
latitude = dem['latitude']
fig, ax = plt.subplots()
# make a colormap that has land and ocean clearly delineated and of the
# same length (256 + 256)
colors_undersea = plt.cm.terrain(np.linspace(0, 0.17, 256))
colors_land = plt.cm.terrain(np.linspace(0.25, 1, 256))
all_colors = np.vstack((colors_undersea, colors_land))
terrain_map = colors.LinearSegmentedColormap.from_list(
'terrain_map', all_colors)
# make the norm: Note the center is offset so that the land has more
# dynamic range:
divnorm = colors.TwoSlopeNorm(vmin=-500., vcenter=0, vmax=4000)
pcm = ax.pcolormesh(longitude, latitude, topo, rasterized=True, norm=divnorm,
cmap=terrain_map, shading='auto')
# Simple geographic plot, set aspect ratio beecause distance between lines of
# longitude depends on latitude.
ax.set_aspect(1 / np.cos(np.deg2rad(49)))
ax.set_title('TwoSlopeNorm(x)')
cb = fig.colorbar(pcm, shrink=0.6)
cb.set_ticks([-500, 0, 1000, 2000, 3000, 4000])
plt.show()
See how it scales numbers with this simple usage (from docs):
>>> import matplotlib. Colors as mcolors
>>> offset = mcolors.TwoSlopeNorm(vmin=-4000., vcenter=0., vmax=10000)
>>> data = [-4000., -2000., 0., 2500., 5000., 7500., 10000.]
>>> offset(data)
array([0., 0.25, 0.5, 0.625, 0.75, 0.875, 1.0])

Shape of earth seems wrong in Skyfield - is my python correct?

Mapping the distance from the center of the earth to various (lat, lon) positions using Skyfield shows variation with latitude but independent of longitude (sub-millimeter). This may be a documented approximation in the package, a bug in my script, or something else altogether. Am I doing something wrong here? (besides, of course, using jet)
import numpy as np
import matplotlib.pyplot as plt
from skyfield.api import load, now
data = load('de421.bsp')
earth = data['earth']
jd = now()
epos = earth.at(jd).position.km
lats = np.linspace( -90, 90, 19)
lons = np.linspace(-180, 180, 37)
LATS, LONS = np.meshgrid(lats, lons)
s = LATS.shape
points = zip(LATS.flatten(), LONS.flatten())
rr = []
for point in points:
la, lo = point
pos = earth.topos(la, lo).at(jd).position.km
r = np.sqrt( ((pos-epos)**2).sum() )
rr.append(r)
surf = np.array(rr).reshape(s)
extent = [lons.min(), lons.max(), lats.min(), lats.max()]
plt.figure()
plt.imshow(surf.T, origin='lower', extent=extent)
plt.colorbar()
plt.title('uhoh topo')
plt.savefig('uhoh topo')
plt.show()
As a cross-check, I tried some random pairs of locations with the same latitude:
pe = earth.at(jd).position.km
for i in range(10):
lon1, lon2 = 360.*np.random.random(2)-180
lat = float(180.*np.random.random(1)-90.)
p1 = earth.topos(lat, lon1).at(jd).position.km
p2 = earth.topos(lat, lon2).at(jd).position.km
r1 = np.sqrt( ((p1-pe)**2).sum() )
r2 = np.sqrt( ((p2-pe)**2).sum() )
print lat, lon1, lon2, r2-r1
and got this (the fourth column shows differences of microns):
45.8481950437 55.9538249618 115.148786114 1.59288902069e-08
-72.0821405192 4.81264755835 172.783338907 2.17096385313e-09
51.6126938075 -54.5670258363 -134.888403816 2.42653186433e-09
2.92691713179 -178.553103457 134.648099589 1.5916157281e-10
-78.7376163827 -55.0684703115 125.714124504 -6.13908923697e-10
48.5852207923 -169.061708765 35.5374862329 7.60337570682e-10
42.3767785876 130.850223447 -111.520896867 -1.62599462783e-08
11.2951212126 -60.0296460731 32.8775784623 6.91579771228e-09
18.9588262131 71.3414406837 127.516370219 -4.84760676045e-09
-31.5768658495 173.741960359 90.3715297869 -6.78483047523e-10
The topos() method includes a elevation_m=0.0 that you are not specifying. So in each of your calls, it takes its default value of 0.0 meters above sea level. And the definition of “sea level” does not vary with longitude — at a given latitude, sea level is at a fixed distance from the geocenter the entire way around the world.
I am not sure why you call a micron-level error a “pretty big approximation”, but you are indeed running into the limited precision of your machine’s 64-bit floating point math when you subtract two distances that are each about 1 au in length — you are seeing a rounding error down in the last bit or two.

Find a easier way to cluster 2-d scatter data into grid array data

I have figured out a method to cluster disperse point data into structured 2-d array(like rasterize function). And I hope there are some better ways to achieve that target.
My work
1. Intro
1000 point data has there dimensions of properties (lon, lat, emission) whicn represent one factory located at (x,y) emit certain amount of CO2 into atmosphere
grid network: predefine the 2-d array in the shape of 20x20
http://i4.tietuku.com/02fbaf32d2f09fff.png
The code reproduced here:
#### define the map area
xc1,xc2,yc1,yc2 = 113.49805889531724,115.5030664238035,37.39995194888143,38.789235929357105
map = Basemap(llcrnrlon=xc1,llcrnrlat=yc1,urcrnrlon=xc2,urcrnrlat=yc2)
#### reading the point data and scatter plot by their position
df = pd.read_csv("xxxxx.csv")
px,py = map(df.lon, df.lat)
map.scatter(px, py, color = "red", s= 5,zorder =3)
#### predefine the grid networks
lon_grid,lat_grid = np.linspace(xc1,xc2,21), np.linspace(yc1,yc2,21)
lon_x,lat_y = np.meshgrid(lon_grid,lat_grid)
grids = np.zeros(20*20).reshape(20,20)
plt.pcolormesh(lon_x,lat_y,grids,cmap = 'gray', facecolor = 'none',edgecolor = 'k',zorder=3)
2. My target
Finding the nearest grid point for each factory
Add the emission data into this grid number
3. Algorithm realization
3.1 Raster grid
note: 20x20 grid points are distributed in this area represented by blue dot.
http://i4.tietuku.com/8548554587b0cb3a.png
3.2 KD-tree
Find the nearest blue dot of each red point
sh = (20*20,2)
grids = np.zeros(20*20*2).reshape(*sh)
sh_emission = (20*20)
grids_em = np.zeros(20*20).reshape(sh_emission)
k = 0
for j in range(0,yy.shape[0],1):
for i in range(0,xx.shape[0],1):
grids[k] = np.array([lon_grid[i],lat_grid[j]])
k+=1
T = KDTree(grids)
x_delta = (lon_grid[2] - lon_grid[1])
y_delta = (lat_grid[2] - lat_grid[1])
R = np.sqrt(x_delta**2 + y_delta**2)
for i in range(0,len(df.lon),1):
idx = T.query_ball_point([df.lon.iloc[i],df.lat.iloc[i]], r=R)
# there are more than one blue dot which are founded sometimes,
# So I'll calculate the distances between the factory(red point)
# and all blue dots which are listed
if (idx > 1):
distance = []
for k in range(0,len(idx),1):
distance.append(np.sqrt((df.lon.iloc[i] - grids[k][0])**2 + (df.lat.iloc[i] - grids[k][1])**2))
pos_index = distance.index(min(distance))
pos = idx[pos_index]
# Only find 1 point
else:
pos = idx
grids_em[pos] += df.so2[i]
4. Result
co2 = grids_em.reshape(20,20)
plt.pcolormesh(lon_x,lat_y,co2,cmap =plt.cm.Spectral_r,zorder=3)
http://i4.tietuku.com/6ded65c4ac301294.png
5. My question
Can someone point out some drawbacks or error of this method?
Is there some algorithms more aligned with my target?
Thanks a lot!
There are many for-loop in your code, it's not the numpy way.
Make some sample data first:
import numpy as np
import pandas as pd
from scipy.spatial import KDTree
import pylab as pl
xc1, xc2, yc1, yc2 = 113.49805889531724, 115.5030664238035, 37.39995194888143, 38.789235929357105
N = 1000
GSIZE = 20
x, y = np.random.multivariate_normal([(xc1 + xc2)*0.5, (yc1 + yc2)*0.5], [[0.1, 0.02], [0.02, 0.1]], size=N).T
value = np.ones(N)
df_points = pd.DataFrame({"x":x, "y":y, "v":value})
For equal space grids you can use hist2d():
pl.hist2d(df_points.x, df_points.y, weights=df_points.v, bins=20, cmap="viridis");
Here is the output:
Here is the code to use KdTree:
X, Y = np.mgrid[x.min():x.max():GSIZE*1j, y.min():y.max():GSIZE*1j]
grid = np.c_[X.ravel(), Y.ravel()]
points = np.c_[df_points.x, df_points.y]
tree = KDTree(grid)
dist, indices = tree.query(points)
grid_values = df_points.groupby(indices).v.sum()
df_grid = pd.DataFrame(grid, columns=["x", "y"])
df_grid["v"] = grid_values
fig, ax = pl.subplots(figsize=(10, 8))
ax.plot(df_points.x, df_points.y, "kx", alpha=0.2)
mapper = ax.scatter(df_grid.x, df_grid.y, c=df_grid.v,
cmap="viridis",
linewidths=0,
s=100, marker="o")
pl.colorbar(mapper, ax=ax);
the output is:

Covariance/heat flux in Python

I'm looking to compute poleward heat fluxes at a level in the atmosphere, i.e the mean of (u't') . I'm aware of the covariance function in NumPy, but cannot seem to implement it. Here is my code below.
from netCDF4 import Dataset
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
myfile = '/home/ubuntu/Fluxes_Test/out.nc'
Import = Dataset(myfile, mode='r')
lon = Import.variables['lon'][:] # Longitude
lat = Import.variables['lat'][:] # Latitude
time = Import.variables['time'][:] # Time
lev = Import.variables['lev'][:] # Level
wind = Import.variables['ua'][:]
temp = Import.variables['ta'][:]
lon = lon-180 # to shift co-ordinates to -180 to 180.
variable1 = np.squeeze(wind,temp, axis=0)
variable2 = np.cov(variable1)
m = Basemap(resolution='l')
lons, lats = np.meshgrid(lon,lat)
X, Y = m(lons, lats)
cs = m.pcolor(X,Y, variable2)
plt.show()
The shape of the variables wind and temp which I am trying to compute the flux of (the covariance) are both (3960,64,128), so 3960 pieces of data on a 64x128 grid (with co-ordinates).
I tried squeezing both variables to produce a array of (3960, 3960, 64,128) so cov could work on these first two series of data (the two 3960's) of wind and temp, but this didn't work.

SciPy RectSphereBivariateSpline interpolation over sphere returning ValueError

I have 3D measurement data on a sphere that is very coarse and I want to interpolate.
I found that RectSphereBivariateSpline from scipy.interpolate should be most suitable.
I used the example in the RectSphereBivariateSpline documentation as a starting point and now have the following code:
""" read csv input file, post process and plot 3D data """
import csv
import numpy as np
from mayavi import mlab
from scipy.interpolate import RectSphereBivariateSpline
# user input
nElevationPoints = 17 # needs to correspond with csv file
nAzimuthPoints = 40 # needs to correspond with csv file
threshold = - 40 # needs to correspond with how measurement data was captured
turnTableStepSize = 72 # needs to correspond with measurement settings
resolution = 0.125 # needs to correspond with measurement settings
# read data from file
patternData = np.empty([nElevationPoints, nAzimuthPoints]) # empty buffer
ifile = open('ttest.csv') # need the 'b' suffix to prevent blank rows being inserted
reader = csv.reader(ifile,delimiter=',')
reader.next() # skip first line in csv file as this is only text
for nElevation in range (0,nElevationPoints):
# azimuth
for nAzimuth in range(0,nAzimuthPoints):
patternData[nElevation,nAzimuth] = reader.next()[2]
ifile.close()
# post process
def r(thetaIndex,phiIndex):
"""r(thetaIndex,phiIndex): function in 3D plotting to return positive vector length from patternData[theta,phi]"""
radius = -threshold + patternData[thetaIndex,phiIndex]
return radius
#phi,theta = np.mgrid[0:nAzimuthPoints,0:nElevationPoints]
theta = np.arange(0,nElevationPoints)
phi = np.arange(0,nAzimuthPoints)
thetaMesh, phiMesh = np.meshgrid(theta,phi)
stepSizeRad = turnTableStepSize * resolution * np.pi / 180
theta = theta * stepSizeRad
phi = phi * stepSizeRad
# create new grid to interpolate on
phiIndex = np.linspace(1,360,360)
phiNew = phiIndex*np.pi/180
thetaIndex = np.linspace(1,180,180)
thetaNew = thetaIndex*np.pi/180
thetaNew,phiNew = np.meshgrid(thetaNew,phiNew)
# create interpolator object and interpolate
data = r(thetaMesh,phiMesh)
lut = RectSphereBivariateSpline(theta,phi,data.T)
data_interp = lut.ev(thetaNew.ravel(),phiNew.ravel()).reshape((360,180)).T
x = (data_interp(thetaIndex,phiIndex)*np.cos(phiNew)*np.sin(thetaNew))
y = (-data_interp(thetaIndex,phiIndex)*np.sin(phiNew)*np.sin(thetaNew))
z = (data_interp(thetaIndex,phiIndex)*np.cos(thetaNew))
# plot 3D data
obj = mlab.mesh(x, y, z, colormap='jet')
obj.enable_contours = True
obj.contour.filled_contours = True
obj.contour.number_of_contours = 20
mlab.show()
The example from the documentation works, but when I try to run the above code with the following test data: testdata I get a ValueError at the code position where the RectSphereBivariateSpline interpolator object is declared:
ValueError:
ERROR: on entry, the input data are controlled on validity
the following restrictions must be satisfied.
-1<=iopt(1)<=1, 0<=iopt(2)<=1, 0<=iopt(3)<=1,
-1<=ider(1)<=1, 0<=ider(2)<=1, ider(2)=0 if iopt(2)=0.
-1<=ider(3)<=1, 0<=ider(4)<=1, ider(4)=0 if iopt(3)=0.
mu >= mumin (see above), mv >= 4, nuest >=8, nvest >= 8,
kwrk>=5+mu+mv+nuest+nvest,
lwrk >= 12+nuest*(mv+nvest+3)+nvest*24+4*mu+8*mv+max(nuest,mv+nvest)
0< u(i-1)=0: s>=0
if s=0: nuest>=mu+6+iopt(2)+iopt(3), nvest>=mv+7
if one of these conditions is found to be violated,control is
immediately repassed to the calling program. in that case there is no
approximation returned.
I have tried and tried, but I am absolutely clueless what I should change in order to satisfy the RectSphereBivariateSpline object.
Does anyone have any hint as to what I may be doing wrong?
-- EDIT --
With the suggestions from #HYRY, I now have the following code that runs without runtime errors:
""" read csv input file, post process and plot 3D data """
import csv
import numpy as np
from mayavi import mlab
from scipy.interpolate import RectSphereBivariateSpline
# user input
nElevationPoints = 17 # needs to correspond with csv file
nAzimuthPoints = 40 # needs to correspond with csv file
threshold = - 40 # needs to correspond with how measurement data was captured
turnTableStepSize = 72 # needs to correspond with measurement settings
resolution = 0.125 # needs to correspond with measurement settings
# read data from file
patternData = np.empty([nElevationPoints, nAzimuthPoints]) # empty buffer
ifile = open('ttest.csv') # need the 'b' suffix to prevent blank rows being inserted
reader = csv.reader(ifile,delimiter=',')
reader.next() # skip first line in csv file as this is only text
for nElevation in range (0,nElevationPoints):
# azimuth
for nAzimuth in range(0,nAzimuthPoints):
patternData[nElevation,nAzimuth] = reader.next()[2]
ifile.close()
# post process
def r(thetaIndex,phiIndex):
"""r(thetaIndex,phiIndex): function in 3D plotting to return positive vector length from patternData[theta,phi]"""
radius = -threshold + patternData[thetaIndex,phiIndex]
return radius
#phi,theta = np.mgrid[0:nAzimuthPoints,0:nElevationPoints]
theta = np.arange(0,nElevationPoints)
phi = np.arange(0,nAzimuthPoints)
thetaMesh, phiMesh = np.meshgrid(theta,phi)
stepSizeRad = turnTableStepSize * resolution * np.pi / 180
theta = theta * stepSizeRad
phi = phi * stepSizeRad
# create new grid to interpolate on
phiIndex = np.arange(1,361)
phiNew = phiIndex*np.pi/180
thetaIndex = np.arange(1,181)
thetaNew = thetaIndex*np.pi/180
thetaNew,phiNew = np.meshgrid(thetaNew,phiNew)
# create interpolator object and interpolate
data = r(thetaMesh,phiMesh)
theta[0] += 1e-6 # zero values for theta cause program to halt; phi makes no sense at theta=0
lut = RectSphereBivariateSpline(theta,phi,data.T)
data_interp = lut.ev(thetaNew.ravel(),phiNew.ravel()).reshape((360,180)).T
def rInterp(theta,phi):
"""rInterp(theta,phi): function in 3D plotting to return positive vector length from interpolated patternData[theta,phi]"""
thetaIndex = theta/(np.pi/180)
thetaIndex = thetaIndex.astype(int)
phiIndex = phi/(np.pi/180)
phiIndex = phiIndex.astype(int)
radius = data_interp[thetaIndex,phiIndex]
return radius
# recreate mesh minus one, needed otherwise the below gives index error, but why??
phiIndex = np.arange(0,360)
phiNew = phiIndex*np.pi/180
thetaIndex = np.arange(0,180)
thetaNew = thetaIndex*np.pi/180
thetaNew,phiNew = np.meshgrid(thetaNew,phiNew)
x = (rInterp(thetaNew,phiNew)*np.cos(phiNew)*np.sin(thetaNew))
y = (-rInterp(thetaNew,phiNew)*np.sin(phiNew)*np.sin(thetaNew))
z = (rInterp(thetaNew,phiNew)*np.cos(thetaNew))
# plot 3D data
obj = mlab.mesh(x, y, z, colormap='jet')
obj.enable_contours = True
obj.contour.filled_contours = True
obj.contour.number_of_contours = 20
mlab.show()
However, the plot is much different than the non-interpolated data, see picture here as reference.
Also, when running the interactive session, data_interp is much larger in value (>3e5) than the original data (this is around 20 max).
Any further tips?
It looks like that theta[0] can't be 0, if you change it a litte before call RectSphereBivariateSpline:
theta[0] += 1e-6

Categories

Resources