I am using some code from a workshop to extract data from netCDF files by the coordinates closest to my specified coordinates. When using just one set of coordinates I am able to extract the values I need without trouble as below:
import numpy as np
import netCDF4
from math import pi
from numpy import cos, sin
def tunnel_fast(latvar,lonvar,lat0,lon0):
'''
Find closest point in a set of (lat,lon) points to specified point
latvar - 2D latitude variable from an open netCDF dataset
lonvar - 2D longitude variable from an open netCDF dataset
lat0,lon0 - query point
Returns iy,ix such that the square of the tunnel distance
between (latval[it,ix],lonval[iy,ix]) and (lat0,lon0)
is minimum.
'''
rad_factor = pi/180.0 # for trignometry, need angles in radians
# Read latitude and longitude from file into numpy arrays
latvals = latvar[:] * rad_factor
lonvals = lonvar[:] * rad_factor
ny,nx = latvals.shape
lat0_rad = lat0 * rad_factor
lon0_rad = lon0 * rad_factor
# Compute numpy arrays for all values, no loops
clat,clon = cos(latvals),cos(lonvals)
slat,slon = sin(latvals),sin(lonvals)
delX = cos(lat0_rad)*cos(lon0_rad) - clat*clon
delY = cos(lat0_rad)*sin(lon0_rad) - clat*slon
delZ = sin(lat0_rad) - slat;
dist_sq = delX**2 + delY**2 + delZ**2
minindex_1d = dist_sq.argmin() # 1D index of minimum element
iy_min,ix_min = np.unravel_index(minindex_1d, latvals.shape)
return iy_min,ix_min
ncfile = netCDF4.Dataset('E:\wind_level2_1.nc', 'r')
latvar = ncfile.variables['latitude']
lonvar = ncfile.variables['longitude']
#_________GG turbine_________GAD10 Latitude 51.735516, GAD10 Longitude 1.942656
iy,ix = tunnel_fast(latvar, lonvar, 51.735516, 1.942656)
print('Closest lat lon:', latvar[iy,ix], lonvar[iy,ix])
refLAT=latvar[iy,ix]
refLON = lonvar[iy,ix]
#try to find the data for this location
SARwind = ncfile.variables['sar_wind'][:,:]
ModelWind = ncfile.variables['model_speed'][:,:]
print 'iy,ix' #appears to be the index of the value of Lat,lon
print SARwind[iy,ix]
ncfile.close()
Now I am trying to loop through a text file containing coordinates coord_list to extract sets of coordinates, find the data then move to the next set of coordinates in the list. This code works on it's own as below:
import csv
from decimal import Decimal
with open('Turbine_locs_no_header.csv','rb') as f:
reader = csv.reader(f)
#coord_list = list(reader)
coord_list = [reader]
end_row = len(coord_list)
lon_ind=1
lat_ind=2
for row in range(0, end_row-1):#end_row - 1 due to the 0 index
turbine_lat = coord_list[row][lat_ind]
turbine_lon = coord_list[row][lon_ind]
turbine_lat = [Decimal(turbine_lat)]
print 'lat',turbine_lat, 'lon',turbine_lon, row
However, I want to pass coordinates from the text file to this part of the original code iy,ix = tunnel_fast(latvar, lonvar, 51.94341, 1.922094888), replacing the numbers with variables iy, ix = tunnel_fast(latvar, lonvar, turbine_lat, turbine_lon). I try to combine the two codes by creating a function get_coordinates, I get the following errors
File "C:/Users/mm/test_nc_bycoords_GG_turbines_AGW.py", line 65, in <module>
get_coordinates(coord_list, latvar, lonvar)
File "C:/Users/mm/test_nc_bycoords_GG_turbines_AGW.py", line 51, in get_coordinates
iy, ix = tunnel_fast(latvar, lonvar, turbine_lat, turbine_lon)
File "C:/Users/mm/test_nc_bycoords_GG_turbines_AGW.py", line 27, in tunnel_fast
lat0_rad = lat0 * rad_factor
TypeError: can't multiply sequence by non-int of type 'float'
I thought this is because the turbine_lat and turbine_lon are list items so cannot be used, but this doesn't seem to be connected to the errors. I know this code needs more work anyway, but if anyone could help me spot where I am going wrong that would be very helpful. My attempt to combine the two codes is below.
import numpy as np
import netCDF4
from math import pi
from numpy import cos, sin
import csv
# edited from https://github.com/Unidata/unidata-python-workshop/blob/a56daa50d7b343c7debe93968683613642d6b9f7/notebooks/netcdf-by-coordinates.ipynb
def tunnel_fast(latvar,lonvar,lat0,lon0):
'''
Find closest point in a set of (lat,lon) points to specified point
latvar - 2D latitude variable from an open netCDF dataset
lonvar - 2D longitude variable from an open netCDF dataset
lat0,lon0 - query point
Returns iy,ix such that the square of the tunnel distance
between (latval[it,ix],lonval[iy,ix]) and (lat0,lon0)
is minimum.
'''
rad_factor = pi/180.0 # for trignometry, need angles in radians
# Read latitude and longitude from file into numpy arrays
latvals = latvar[:] * rad_factor
lonvals = lonvar[:] * rad_factor
ny,nx = latvals.shape
lat0_rad = lat0 * rad_factor
lon0_rad = lon0 * rad_factor
# Compute numpy arrays for all values, no loops
clat,clon = cos(latvals),cos(lonvals)
slat,slon = sin(latvals),sin(lonvals)
delX = cos(lat0_rad)*cos(lon0_rad) - clat*clon
delY = cos(lat0_rad)*sin(lon0_rad) - clat*slon
delZ = sin(lat0_rad) - slat;
dist_sq = delX**2 + delY**2 + delZ**2
minindex_1d = dist_sq.argmin() # 1D index of minimum element
iy_min,ix_min = np.unravel_index(minindex_1d, latvals.shape)
return iy_min,ix_min
#________________my edits___________________________________________________
def get_coordinates(coord_list, latvar, lonvar):
"this takes coordinates from a .csv and assigns them to variables"
end_row = len(coord_list)
lon_ind=1
lat_ind=2
for row in range(0, end_row-1):#end_row - 1 due to the 0 index
turbine_lat = coord_list[row][lat_ind]
turbine_lon = coord_list[row][lon_ind]
iy, ix = tunnel_fast(latvar, lonvar, turbine_lat, turbine_lon)
print('Closest lat lon:', latvar[iy, ix], lonvar[iy, ix])
#________________________________________________________________________________________________________________________
ncfile = netCDF4.Dataset('NOGAPS_wind_level2_1.nc', 'r')
latvar = ncfile.variables['latitude']
lonvar = ncfile.variables['longitude']
#____added in to pass to get coordinates function
with open('Turbine_locs_no_header.csv','rb') as f:
reader = csv.reader(f)
coord_list = list(reader)
#_________take latitude from coordinateas function
get_coordinates(coord_list, latvar, lonvar)
#iy,ix = tunnel_fast(latvar, lonvar, turbine_lat, turbine_lon)#get these from the 'assign_coordinates_fromlist.py
#print('Closest lat lon:', latvar[iy,ix], lonvar[iy,ix])
SARwind = ncfile.variables['sar_wind'][:,:]
ModelWind = ncfile.variables['model_speed'][:,:]
print 'iy,ix' #appears to be the index of the value of Lat,lon
print SARwind[iy,ix]
ncfile.close()
When I try to convert
You can unpack an argument list using *args (see the docs). In your case you could do tunnel_fast(latvar, lonvar, *coord_list[row]). You need to make sure that the order of arguments in coord_list[row] is correct and if coord_list[row] contains more than the two values then you need to slice it appropriately.
Thanks to help from a_guest
It was a simple problem of lat0 and lon0 being passed as
<type 'str'> to tunnel_fast when it requires <type 'float'>. This appears to come from loading the coord_list as a list.
with open('Turbine_locs_no_header.csv','rb') as f:
reader = csv.reader(f)
coord_list = list(reader)
The workaround I used was to convert lat0 and lon0 to floats at the beginning of tunnel_fast
lat0 = float(lat0)
lon0 = float(lon0)
I am sure there is a more elegant way to do this, but it works.
Related
I'm currently trying to get Tropomi data in geoTiff format. I downloaded some data in netCDF4 format. This way I obtain three numpy arrays. one with latitude coordinates, one with longitude coordinates and one with carbon-mono-oxide values.
So I have a matrix with values for my raster and of each value I know the longitude and latitude of that respective value.
With this information how can I construct a georeferenced raster?
I read in the data as follows
import netCDF4
from netCDF4 import Dataset
import numpy as np
file = '/home/daniel/Downloads/S5P_NRTI_L2__CO_____20190430T171319_20190430T171819_08006_01_010301_20190430T175151.nc'
rootgrp = Dataset(file, "r",format="NETCDF4")
lat = rootgrp.groups['PRODUCT']['latitude'][:]
lon = rootgrp.groups['PRODUCT']['longitude'][:]
carbon = rootgrp.groups['PRODUCT']['carbonmonoxide_total_column'][:]
obtaining 3 matrices with shape (1,290,215)
Now I would like to convert this to a Mercator projected geoTIFF, but I do not know how to go about it.
the gdal_translate option seems to work. But here is an alternative explicit way I did it.
#importing packages
import numpy as np
from scipy import interpolate
from netCDF4 import Dataset
from shapely.geometry import Point
import geopandas as gpd
from geopy.distance import geodesic
import rasterio
import matplotlib.pyplot as plt
#load data
file = '/home/daniel/Ellipsis/db/downloaded/rawtropomi/S5P_NRTI_L2__CO_____20190430T171319_20190430T171819_08006_01_010301_20190430T175151.nc'
rootgrp = Dataset(file, "r",format="NETCDF4")
lat = rootgrp.groups['PRODUCT']['latitude'][:]
lon = rootgrp.groups['PRODUCT']['longitude'][:]
carbon = rootgrp.groups['PRODUCT']['carbonmonoxide_total_column'][:]
carbon = carbon.filled(0)
lat = lat.filled(-1000)
lon = lon.filled(-1000)
carbon = carbon.flatten()
lat = lat.flatten()
lon = lon.flatten()
#calculate the real distance between corners and get the widht and height in pixels assuming you want a pixel resolution of at least 7 by 7 kilometers
w = max(geodesic((min(lat),max(lon)), (min(lat),min(lon))).meters/7000 , geodesic((max(lat),max(lon)), (max(lat),min(lon))).meters/14000)
h = geodesic((min(lat),max(lon)), (max(lat),max(lon))).meters/14000
# create a geopandas with as its rows the latitude, longitude an the measrument values. transfrom it to the webmercator projection (or projection of your choosing)
points = [Point(xy) for xy in zip(lon, lat)]
crs = {'init': 'epsg:4326'}
data = gpd.GeoDataFrame({'value':carbon}, crs=crs, geometry=points)
data = data.to_crs({'init': 'epsg:3395'})
data['lon'] = data.bounds['maxx'].values
data['lat'] = data.bounds['maxy'].values
#make grid of coordinates. You nee de calculate the coordinate of each pixel in the desired raster
minlon = min(data['lon'])
maxlon = max(data['lon'])
minlat = min(data['lat'])
maxlat = max(data['lat'])
lon_list = np.arange(minlon, maxlon, (maxlon-minlon)/w )
lat_list = np.arange(minlat, maxlat, (maxlat-minlat)/h)
lon_2d, lat_2d = np.meshgrid(lon_list, lat_list)
#use the values in the geopandas dataframe to interpolate values int the coordinate raster
r = interpolate.griddata(points = (data['lon'].values,data['lat'].values), values = data['value'].values, xi = (lon_2d, lat_2d))
r = np.flip(r, axis = 0)
#check result
plt.imshow(r)
#save raster
transform = rasterio.transform.from_bounds(south = minlat, east = maxlon, north = maxlat, west = minlon, width = r.shape[1], height = r.shape[2] )
file_out = 'test.tiff'
new_dataset = rasterio.open(file_out , 'w', driver='Gtiff', compress='lzw',
height = r.shape[1], width = r.shape[2],
count= r.shape[0], dtype=str( r.dtype),
crs= data.crs,
transform= transform)
new_dataset.write(r)
new_dataset.close()
I would suggest looking at this answer here using gdal_translate:
Convert NetCDF (.nc) to GEOTIFF
gdal_translate -of GTiff file.nc test.tiff
I want to write The values of Latitude, Longitude and Air_flux values in a csv file in three different columns.
Here is the code in Python3 that I have done so far:
The file "path" has all the values of "Air_Flux" across specified Lat and Lon.
CODE:
import numpy as np
import csv
LAT_MIN = 34.675
LAT_MAX = 38.275
LON_MIN = 124.625
LON_MAX = 130.795
path = 'BESS_PAR_Daily.A2015004.nc_output.csv' # "File That contains the Values Of Air_Flux"
flux = np.genfromtxt(path, delimiter=',') # Reading Data from File
latData = np.arange(LAT_MIN, LAT_MAX, 0.05)
lonData = np.arange(LON_MIN, LON_MAX, 0.05)
with open('data.csv', 'w') as file:
writer = csv.writer(file, delimiter=',')
for x in np.nditer(latData.T, order='C'):
for y in np.nditer(lonData.T, order='C'):
file.write(str(x))
file.write("\n")
file.write(str(y))
file.write("\n")
for fl in np.nditer(flux):
file.write(str(fl))
file.write("\n")
file.close()
I only know the way to store values in One column...
BUT:
I want to write The values of Latitude, Longitude and Air_flux values in a csv file in such a way that one column would have Latitude values, 2nd column for Longitude value and the third column for "Air_flux"
My understanding is that you're data is required in the format
LAT1 LON1 FLUX1
LAT2 LON2 FLUX2
In that case you don't need multiple for loops, you can pass all three arrays to the nditer method and then use csvwriter.writerows to write all values in a stretch.
Here is an example based on your scenario
import numpy as np
import csv
LAT_MIN = 34.675
LAT_MAX = 38.275
LON_MIN = 124.625
LON_MAX = 130.795
# path = 'BESS_PAR_Daily.A2015004.nc_output.csv' # "File That contains the Values Of Air_Flux"
# flux = np.genfromtxt(path, delimiter=',') # Reading Data from File
# latData = np.arange(LAT_MIN, LAT_MAX, 0.05)
# lonData = np.arange(LON_MIN, LON_MAX, 0.05)
flux = np.array([1,2,3,4,5])
latData = np.array([1,2,3,4,5])
lonData = np.array([1,2,3,4,5])
with open('data.csv', 'w') as file:
writer = csv.writer(file, delimiter=',')
for x,y,z in np.nditer([latData.T, lonData.T, flux], order='C'):
writer.writerow([x,y,z])
Also you don't need file.close() since the with block takes care of it
As the values of flux are stored across those Lat and Lons, after iterating Lat values across Lon, I fetched the indices of lat and lon across flux:
writer.writerow([x, y, flux[lat.index, lon.index]])
I have an input of 36,742 points which means if I wanted to calculate the lower triangle of a distance matrix (using the vincenty approximation) I would need to generate 36,742*36,741*0.5 = 1,349,974,563 distances.
I want to keep the pair combinations which are within 50km of each other. My current set-up is as follows
shops= [[id,lat,lon]...]
def lower_triangle_mat(points):
for i in range(len(shops)-1):
for j in range(i+1,len(shops)):
yield [shops[i],shops[j]]
def return_stores_cutoff(points,cutoff_km=0):
below_cut = []
counter = 0
for x in lower_triangle_mat(points):
dist_km = vincenty(x[0][1:3],x[1][1:3]).km
counter += 1
if counter % 1000000 == 0:
print("%d out of %d" % (counter,(len(shops)*len(shops)-1*0.5)))
if dist_km <= cutoff_km:
below_cut.append([x[0][0],x[1][0],dist_km])
return below_cut
start = time.clock()
stores = return_stores_cutoff(points=shops,cutoff_km=50)
print(time.clock() - start)
This will obviously take hours and hours. Some possibilities I was thinking of:
Use numpy to vectorise these calculations rather than looping through
Use some kind of hashing to get a quick rough-cut off (all stores within 100km) and then only calculate accurate distances between those stores
Instead of storing the points in a list use something like a quad-tree but I think that only helps with the ranking of close points rather than actual distance -> so I guess some kind of geodatabase
I can obviously try the haversine or project and use euclidean distances, however I am interested in using the most accurate measure possible
Make use of parallel processing (however I was having a bit of difficulty coming up how to cut the list to still get all the relevant pairs).
Edit: I think geohashing is definitely needed here - an example from:
from geoindex import GeoGridIndex, GeoPoint
geo_index = GeoGridIndex()
for _ in range(10000):
lat = random.random()*180 - 90
lng = random.random()*360 - 180
index.add_point(GeoPoint(lat, lng))
center_point = GeoPoint(37.7772448, -122.3955118)
for distance, point in index.get_nearest_points(center_point, 10, 'km'):
print("We found {0} in {1} km".format(point, distance))
However, I would also like to vectorise (instead of loop) the distance calculations for the stores returned by the geo-hash.
Edit2: Pouria Hadjibagheri - I tried using lambda and map:
# [B]: Mapping approach
lwr_tr_mat = ((shops[i],shops[j]) for i in range(len(shops)-1) for j in range(i+1,len(shops)))
func = lambda x: (x[0][0],x[1][0],vincenty(x[0],x[1]).km)
# Trying to see if conditional statements slow this down
func_cond = lambda x: (x[0][0],x[1][0],vincenty(x[0],x[1]).km) if vincenty(x[0],x[1]).km <= 50 else None
start = time.clock()
out_dist = list(map(func,lwr_tr_mat))
print(time.clock() - start)
start = time.clock()
out_dist = list(map(func_cond,lwr_tr_mat))
print(time.clock() - start)
And they were all around 61 seconds (I restricted number of stores to 2000 from 32,000). Perhaps I used map incorrectly?
This sounds like a classic use case for k-D trees.
If you first transform your points into Euclidean space then you can use the query_pairs method of scipy.spatial.cKDTree:
from scipy.spatial import cKDTree
tree = cKDTree(data)
# where data is (nshops, ndim) containing the Euclidean coordinates of each shop
# in units of km
pairs = tree.query_pairs(50, p=2) # 50km radius, L2 (Euclidean) norm
pairs will be a set of (i, j) tuples corresponding to the row indices of pairs of shops that are ≤50km from each other.
The output of tree.sparse_distance_matrix is a scipy.sparse.dok_matrix. Since the matrix will be symmetric and you're only interested in unique row/column pairs, you could use scipy.sparse.tril to zero out the upper triangle, giving you a scipy.sparse.coo_matrix. From there you can access the nonzero row and column indices and their corresponding distance values via the .row, .col and .data attributes:
from scipy import sparse
tree_dist = tree.sparse_distance_matrix(tree, max_distance=10000, p=2)
udist = sparse.tril(tree_dist, k=-1) # zero the main diagonal
ridx = udist.row # row indices
cidx = udist.col # column indices
dist = udist.data # distance values
Have you tried mapping entire arrays and functions instead of iterating through them? An example would be as follows:
from numpy.random import rand
my_array = rand(int(5e7), 1) # An array of 50,000,000 random numbers in double.
Now what is normally done is:
squared_list_iter = [value**2 for value in my_array]
Which of course works, but is optimally invalid.
The alternative would be to map the array with a function. This is done as follows:
func = lambda x: x**2 # Here is what I want to do on my array.
squared_list_map = map(func, test) # Here I am doing it!
Now, one might ask, how is this any different, or even better for that matter? Since now we have added a call to a function, too! Here is your answer:
For the former solution (via iteration):
1 loop: 1.11 minutes.
Compared to the latter solution (mapping):
500 loop, on average 560 ns.
Simultaneous conversion of a map() to list by list(map(my_list)) would increase the time by a factor of 10 to approximately 500 ms.
You choose!
Thanks everyone's help. I think I have solved this by incorporating all the suggestions.
I use numpy to import the geographic co-ordinates and then project them using "France Lambert - 93". This lets me fill scipy.spatial.cKDTree with the points and then calculate a sparse_distance_matrix by specifying a cut-off of 50km (my projected points are in metres). I then extract extract the lower-triangle to a CSV.
import numpy as np
import csv
import time
from pyproj import Proj, transform
#http://epsg.io/2154 (accuracy: 1.0m)
fr = '+proj=lcc +lat_1=49 +lat_2=44 +lat_0=46.5 +lon_0=3 \
+x_0=700000 +y_0=6600000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 \
+units=m +no_defs'
#http://epsg.io/27700-5339 (accuracy: 1.0m)
uk = '+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 \
+x_0=400000 +y_0=-100000 +ellps=airy \
+towgs84=446.448,-125.157,542.06,0.15,0.247,0.842,-20.489 +units=m +no_defs'
path_to_csv = '.../raw_in.csv'
out_csv = '.../out.csv'
def proj_arr(points):
inproj = Proj(init='epsg:4326')
outproj = Proj(uk)
# origin|destination|lon|lat
func = lambda x: transform(inproj,outproj,x[2],x[1])
return np.array(list(map(func, points)))
tstart = time.time()
# Import points as geographic coordinates
# ID|lat|lon
#Sample to try and replicate
#points = np.array([
# [39007,46.585012,5.5857829],
# [88086,48.192370,6.7296289],
# [62627,50.309155,3.0218611],
# [14020,49.133972,-0.15851507],
# [1091, 42.981765,2.0104902]])
#
points = np.genfromtxt(path_to_csv,
delimiter=',',
skip_header=1)
print("Total points: %d" % len(points))
print("Triangular matrix contains: %d" % (len(points)*((len(points))-1)*0.5))
# Get projected co-ordinates
proj_pnts = proj_arr(points)
# Fill quad-tree
from scipy.spatial import cKDTree
tree = cKDTree(proj_pnts)
cut_off_metres = 1600
tree_dist = tree.sparse_distance_matrix(tree,
max_distance=cut_off_metres,
p=2)
# Extract triangle
from scipy import sparse
udist = sparse.tril(tree_dist, k=-1) # zero the main diagonal
print("Distances after quad-tree cut-off: %d " % len(udist.data))
# Export CSV
import csv
f = open(out_csv, 'w', newline='')
w = csv.writer(f, delimiter=",", )
w.writerow(['id_a','lat_a','lon_a','id_b','lat_b','lon_b','metres'])
w.writerows(np.column_stack((points[udist.row ],
points[udist.col],
udist.data)))
f.close()
"""
Get ID labels
"""
id_to_csv = '...id.csv'
id_labels = np.genfromtxt(id_to_csv,
delimiter=',',
skip_header=1,
dtype='U')
"""
Try vincenty on the un-projected co-ordinates
"""
from geopy.distance import vincenty
vout_csv = '.../out_vin.csv'
test_vin = np.column_stack((points[udist.row].T[1:3].T,
points[udist.col].T[1:3].T))
func = lambda x: vincenty(x[0:2],x[2:4]).m
output = list(map(func,test_vin))
# Export CSV
f = open(vout_csv, 'w', newline='')
w = csv.writer(f, delimiter=",", )
w.writerow(['id_a','id_a2', 'lat_a','lon_a',
'id_b','id_b2', 'lat_b','lon_b',
'proj_metres','vincenty_metres'])
w.writerows(np.column_stack((list(id_labels[udist.row]),
points[udist.row ],
list(id_labels[udist.col]),
points[udist.col],
udist.data,
output,
)))
f.close()
print("Finished in %.0f seconds" % (time.time()-tstart)
This approach took 164 seconds to generate (for 5,306,434 distances) - compared to 9 - and also around 90 seconds to save to disk.
I then compared the difference in the vincenty distance and the hypotenuse distance (on the projected co-ordinates).
The mean difference in metres was 2.7 and the mean difference/metres was 0.0073% - which looks great.
"Use some kind of hashing to get a quick rough-cut off (all stores within 100km) and then only calculate accurate distances between those stores"
I think this might be better called gridding. So first make a dict, with a set of coords as the key and put each shop in a 50km bucket near that point. then when you are calculating distances, you only look in nearby buckets, rather than iterate through each shop in the whole universe
You can use vectorization with the haversine formula discussed in this thread Haversine Formula in Python (Bearing and Distance between two GPS points)
lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2
c = 2 * np.arcsin(np.sqrt(a))
km = 6371 * c
Here you have the %%timeit for 7 451 653 distances
642 ms ± 20.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
I'm looking to compute poleward heat fluxes at a level in the atmosphere, i.e the mean of (u't') . I'm aware of the covariance function in NumPy, but cannot seem to implement it. Here is my code below.
from netCDF4 import Dataset
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
myfile = '/home/ubuntu/Fluxes_Test/out.nc'
Import = Dataset(myfile, mode='r')
lon = Import.variables['lon'][:] # Longitude
lat = Import.variables['lat'][:] # Latitude
time = Import.variables['time'][:] # Time
lev = Import.variables['lev'][:] # Level
wind = Import.variables['ua'][:]
temp = Import.variables['ta'][:]
lon = lon-180 # to shift co-ordinates to -180 to 180.
variable1 = np.squeeze(wind,temp, axis=0)
variable2 = np.cov(variable1)
m = Basemap(resolution='l')
lons, lats = np.meshgrid(lon,lat)
X, Y = m(lons, lats)
cs = m.pcolor(X,Y, variable2)
plt.show()
The shape of the variables wind and temp which I am trying to compute the flux of (the covariance) are both (3960,64,128), so 3960 pieces of data on a 64x128 grid (with co-ordinates).
I tried squeezing both variables to produce a array of (3960, 3960, 64,128) so cov could work on these first two series of data (the two 3960's) of wind and temp, but this didn't work.
I have 3D measurement data on a sphere that is very coarse and I want to interpolate.
I found that RectSphereBivariateSpline from scipy.interpolate should be most suitable.
I used the example in the RectSphereBivariateSpline documentation as a starting point and now have the following code:
""" read csv input file, post process and plot 3D data """
import csv
import numpy as np
from mayavi import mlab
from scipy.interpolate import RectSphereBivariateSpline
# user input
nElevationPoints = 17 # needs to correspond with csv file
nAzimuthPoints = 40 # needs to correspond with csv file
threshold = - 40 # needs to correspond with how measurement data was captured
turnTableStepSize = 72 # needs to correspond with measurement settings
resolution = 0.125 # needs to correspond with measurement settings
# read data from file
patternData = np.empty([nElevationPoints, nAzimuthPoints]) # empty buffer
ifile = open('ttest.csv') # need the 'b' suffix to prevent blank rows being inserted
reader = csv.reader(ifile,delimiter=',')
reader.next() # skip first line in csv file as this is only text
for nElevation in range (0,nElevationPoints):
# azimuth
for nAzimuth in range(0,nAzimuthPoints):
patternData[nElevation,nAzimuth] = reader.next()[2]
ifile.close()
# post process
def r(thetaIndex,phiIndex):
"""r(thetaIndex,phiIndex): function in 3D plotting to return positive vector length from patternData[theta,phi]"""
radius = -threshold + patternData[thetaIndex,phiIndex]
return radius
#phi,theta = np.mgrid[0:nAzimuthPoints,0:nElevationPoints]
theta = np.arange(0,nElevationPoints)
phi = np.arange(0,nAzimuthPoints)
thetaMesh, phiMesh = np.meshgrid(theta,phi)
stepSizeRad = turnTableStepSize * resolution * np.pi / 180
theta = theta * stepSizeRad
phi = phi * stepSizeRad
# create new grid to interpolate on
phiIndex = np.linspace(1,360,360)
phiNew = phiIndex*np.pi/180
thetaIndex = np.linspace(1,180,180)
thetaNew = thetaIndex*np.pi/180
thetaNew,phiNew = np.meshgrid(thetaNew,phiNew)
# create interpolator object and interpolate
data = r(thetaMesh,phiMesh)
lut = RectSphereBivariateSpline(theta,phi,data.T)
data_interp = lut.ev(thetaNew.ravel(),phiNew.ravel()).reshape((360,180)).T
x = (data_interp(thetaIndex,phiIndex)*np.cos(phiNew)*np.sin(thetaNew))
y = (-data_interp(thetaIndex,phiIndex)*np.sin(phiNew)*np.sin(thetaNew))
z = (data_interp(thetaIndex,phiIndex)*np.cos(thetaNew))
# plot 3D data
obj = mlab.mesh(x, y, z, colormap='jet')
obj.enable_contours = True
obj.contour.filled_contours = True
obj.contour.number_of_contours = 20
mlab.show()
The example from the documentation works, but when I try to run the above code with the following test data: testdata I get a ValueError at the code position where the RectSphereBivariateSpline interpolator object is declared:
ValueError:
ERROR: on entry, the input data are controlled on validity
the following restrictions must be satisfied.
-1<=iopt(1)<=1, 0<=iopt(2)<=1, 0<=iopt(3)<=1,
-1<=ider(1)<=1, 0<=ider(2)<=1, ider(2)=0 if iopt(2)=0.
-1<=ider(3)<=1, 0<=ider(4)<=1, ider(4)=0 if iopt(3)=0.
mu >= mumin (see above), mv >= 4, nuest >=8, nvest >= 8,
kwrk>=5+mu+mv+nuest+nvest,
lwrk >= 12+nuest*(mv+nvest+3)+nvest*24+4*mu+8*mv+max(nuest,mv+nvest)
0< u(i-1)=0: s>=0
if s=0: nuest>=mu+6+iopt(2)+iopt(3), nvest>=mv+7
if one of these conditions is found to be violated,control is
immediately repassed to the calling program. in that case there is no
approximation returned.
I have tried and tried, but I am absolutely clueless what I should change in order to satisfy the RectSphereBivariateSpline object.
Does anyone have any hint as to what I may be doing wrong?
-- EDIT --
With the suggestions from #HYRY, I now have the following code that runs without runtime errors:
""" read csv input file, post process and plot 3D data """
import csv
import numpy as np
from mayavi import mlab
from scipy.interpolate import RectSphereBivariateSpline
# user input
nElevationPoints = 17 # needs to correspond with csv file
nAzimuthPoints = 40 # needs to correspond with csv file
threshold = - 40 # needs to correspond with how measurement data was captured
turnTableStepSize = 72 # needs to correspond with measurement settings
resolution = 0.125 # needs to correspond with measurement settings
# read data from file
patternData = np.empty([nElevationPoints, nAzimuthPoints]) # empty buffer
ifile = open('ttest.csv') # need the 'b' suffix to prevent blank rows being inserted
reader = csv.reader(ifile,delimiter=',')
reader.next() # skip first line in csv file as this is only text
for nElevation in range (0,nElevationPoints):
# azimuth
for nAzimuth in range(0,nAzimuthPoints):
patternData[nElevation,nAzimuth] = reader.next()[2]
ifile.close()
# post process
def r(thetaIndex,phiIndex):
"""r(thetaIndex,phiIndex): function in 3D plotting to return positive vector length from patternData[theta,phi]"""
radius = -threshold + patternData[thetaIndex,phiIndex]
return radius
#phi,theta = np.mgrid[0:nAzimuthPoints,0:nElevationPoints]
theta = np.arange(0,nElevationPoints)
phi = np.arange(0,nAzimuthPoints)
thetaMesh, phiMesh = np.meshgrid(theta,phi)
stepSizeRad = turnTableStepSize * resolution * np.pi / 180
theta = theta * stepSizeRad
phi = phi * stepSizeRad
# create new grid to interpolate on
phiIndex = np.arange(1,361)
phiNew = phiIndex*np.pi/180
thetaIndex = np.arange(1,181)
thetaNew = thetaIndex*np.pi/180
thetaNew,phiNew = np.meshgrid(thetaNew,phiNew)
# create interpolator object and interpolate
data = r(thetaMesh,phiMesh)
theta[0] += 1e-6 # zero values for theta cause program to halt; phi makes no sense at theta=0
lut = RectSphereBivariateSpline(theta,phi,data.T)
data_interp = lut.ev(thetaNew.ravel(),phiNew.ravel()).reshape((360,180)).T
def rInterp(theta,phi):
"""rInterp(theta,phi): function in 3D plotting to return positive vector length from interpolated patternData[theta,phi]"""
thetaIndex = theta/(np.pi/180)
thetaIndex = thetaIndex.astype(int)
phiIndex = phi/(np.pi/180)
phiIndex = phiIndex.astype(int)
radius = data_interp[thetaIndex,phiIndex]
return radius
# recreate mesh minus one, needed otherwise the below gives index error, but why??
phiIndex = np.arange(0,360)
phiNew = phiIndex*np.pi/180
thetaIndex = np.arange(0,180)
thetaNew = thetaIndex*np.pi/180
thetaNew,phiNew = np.meshgrid(thetaNew,phiNew)
x = (rInterp(thetaNew,phiNew)*np.cos(phiNew)*np.sin(thetaNew))
y = (-rInterp(thetaNew,phiNew)*np.sin(phiNew)*np.sin(thetaNew))
z = (rInterp(thetaNew,phiNew)*np.cos(thetaNew))
# plot 3D data
obj = mlab.mesh(x, y, z, colormap='jet')
obj.enable_contours = True
obj.contour.filled_contours = True
obj.contour.number_of_contours = 20
mlab.show()
However, the plot is much different than the non-interpolated data, see picture here as reference.
Also, when running the interactive session, data_interp is much larger in value (>3e5) than the original data (this is around 20 max).
Any further tips?
It looks like that theta[0] can't be 0, if you change it a litte before call RectSphereBivariateSpline:
theta[0] += 1e-6