Python Iris Set X Axis limit and ticks - python

I am creating line graphs with either the year or month along the x axis.
Here is the simplified code for the monthly line graph:
import matplotlib.pyplot as plt
import iris
import iris.coord_categorisation as iriscc
import iris.plot as iplt
import iris.quickplot as qplt
import iris.analysis.cartography
import cf_units
#this file is split into parts as follows:
#PART 1: load and format CORDEX models
#PART 2: load and format observed data
#PART 3: format data
#PART 4: plot data
def main():
#PART 1: CORDEX MODELS
#bring in all the models we need and give them a name
CCCma = '/exports/csce/datastore/geos/users/s0XXXX/Climate_Modelling/AFR_44_tas/ERAINT/1979-2012/tas_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_CCCma-CanRCM4_r2_mon_198901-200912.nc'
#Load exactly one cube from given file
CCCma = iris.load_cube(CCCma)
#remove flat latitude and longitude and only use grid latitude and grid longitude to make consistent with the observed data, also make sure all of the longitudes are monotonic
lats = iris.coords.DimCoord(CCCma.coord('latitude').points[:,0], \
standard_name='latitude', units='degrees')
lons = CCCma.coord('longitude').points[0]
for i in range(len(lons)):
if lons[i]>100.:
lons[i] = lons[i]-360.
lons = iris.coords.DimCoord(lons, \
standard_name='longitude', units='degrees')
CCCma.remove_coord('latitude')
CCCma.remove_coord('longitude')
CCCma.remove_coord('grid_latitude')
CCCma.remove_coord('grid_longitude')
CCCma.add_dim_coord(lats, 1)
CCCma.add_dim_coord(lons, 2)
#we are only interested in the latitude and longitude relevant to Malawi
Malawi = iris.Constraint(longitude=lambda v: 32.5 <= v <= 36., \
latitude=lambda v: -17. <= v <= -9.)
CCCma = CCCma.extract(Malawi)
#time constraignt to make all series the same
iris.FUTURE.cell_datetime_objects = True
t_constraint = iris.Constraint(time=lambda cell: 1989 <= cell.point.year <= 2008)
CCCma = CCCma.extract(t_constraint)
#PART 2: OBSERVED DATA
#bring in all the files we need and give them a name
CRU= '/exports/csce/datastore/geos/users/s0XXXX/Climate_Modelling/Actual_Data/cru_ts4.00.1901.2015.tmp.dat.nc'
#Load exactly one cube from given file
CRU = iris.load_cube(CRU, 'near-surface temperature')
#define the latitude and longitude
lats = iris.coords.DimCoord(CRU.coord('latitude').points, \
standard_name='latitude', units='degrees')
lons = CRU.coord('longitude').points
#we are only interested in the latitude and longitude relevant to Malawi
Malawi = iris.Constraint(longitude=lambda v: 32.5 <= v <= 36., \
latitude=lambda v: -17. <= v <= -9.)
CRU = CRU.extract(Malawi)
#time constraignt to make all series the same
iris.FUTURE.cell_datetime_objects = True
t_constraint = iris.Constraint(time=lambda cell: 1989 <= cell.point.year <= 2008)
CRU = CRU.extract(t_constraint)
#PART 3: FORMAT DATA
#data is in Kelvin, but we would like to show it in Celcius
CCCma.convert_units('Celsius')
#bring time data into allignment
new_unit = cf_units.Unit('days since 1900-01-01', calendar = '365_day')
CCCma.coord('time').convert_units(new_unit)
#add years and months to data
iriscc.add_year(CCCma, 'time')
iriscc.add_year(CRU, 'time')
iriscc.add_month(CCCma, 'time')
iriscc.add_month(CRU, 'time')
#We are interested in plotting the data by month, so we need to take a mean of all the data by month
CCCmaYR = CCCma.aggregated_by('month', iris.analysis.MEAN)
CRUYR = CRU.aggregated_by('month', iris.analysis.MEAN)
#regridding scheme requires spatial areas, therefore the longitude and latitude coordinates must be bounded. If the longitude and latitude bounds are not defined in the cube we can guess the bounds based on the coordinates
CCCmaYR.coord('latitude').guess_bounds()
CCCmaYR.coord('longitude').guess_bounds()
CRUYR.coord('latitude').guess_bounds()
CRUYR.coord('longitude').guess_bounds()
#Returns an array of area weights, with the same dimensions as the cube
CCCmaYR_grid_areas = iris.analysis.cartography.area_weights(CCCmaYR)
CRUYR_grid_areas = iris.analysis.cartography.area_weights(CRUYR)
#We want to plot the mean for the whole region so we need a mean of all the lats and lons
CCCmaYR_mean = CCCmaYR.collapsed(['latitude', 'longitude'], iris.analysis.MEAN, weights=CCCmaYR_grid_areas)
CRUYR_mean = CRUYR.collapsed(['latitude', 'longitude'], iris.analysis.MEAN, weights=CRUYR_grid_areas)
#PART 4: PLOT LINE GRAPH
#assign the line colours and set x axis to months
qplt.plot(CCCmaYR_mean.coord('month'),CCCmaYR_mean, label='CanRCM4_ERAINT', lw=1.5, color='blue')
qplt.plot(CRUYR_mean.coord('month'), CRUYR_mean, label='Observed', lw=2, color='black')
#create a legend and set its location to under the graph
plt.legend(loc="upper center", bbox_to_anchor=(0.5,-0.05), fancybox=True, shadow=True, ncol=2)
#create a title
plt.title('Mean Near Surface Temperature for Malawi by month 1989-2008', fontsize=11)
#add grid lines
plt.grid()
#save the image of the graph and include full legend
#plt.savefig('ERAINT_Temperature_LineGraph_Annual', bbox_inches='tight')
#show the graph in the console
iplt.show()
if __name__ == '__main__':
main()
This produces a plot which looks like this:
How can I change the tick marks to show me all month names? I would also like the graph to finish at December (no white space after).
Similarly, for the yearly line graph, here is the simplified code:
import matplotlib.pyplot as plt
import iris
import iris.coord_categorisation as iriscc
import iris.plot as iplt
import iris.quickplot as qplt
import iris.analysis.cartography
#this file is split into parts as follows:
#PART 1: load and format CORDEX models
#PART 2: load and format observed data
#PART 3: format data
#PART 4: plot data
def main():
#PART 1: CORDEX MODELS
#bring in all the models we need and give them a name
CCCma = '/exports/csce/datastore/geos/users/s0XXXX/Climate_Modelling/AFR_44_tas/ERAINT/1979-2012/tas_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_CCCma-CanRCM4_r2_mon_198901-200912.nc'
#Load exactly one cube from given file
CCCma = iris.load_cube(CCCma)
#remove flat latitude and longitude and only use grid latitude and grid longitude to make consistent with the observed data, also make sure all of the longitudes are monotonic
lats = iris.coords.DimCoord(CCCma.coord('latitude').points[:,0], \
standard_name='latitude', units='degrees')
lons = CCCma.coord('longitude').points[0]
for i in range(len(lons)):
if lons[i]>100.:
lons[i] = lons[i]-360.
lons = iris.coords.DimCoord(lons, \
standard_name='longitude', units='degrees')
CCCma.remove_coord('latitude')
CCCma.remove_coord('longitude')
CCCma.remove_coord('grid_latitude')
CCCma.remove_coord('grid_longitude')
CCCma.add_dim_coord(lats, 1)
CCCma.add_dim_coord(lons, 2)
#we are only interested in the latitude and longitude relevant to Malawi
Malawi = iris.Constraint(longitude=lambda v: 32.5 <= v <= 36., \
latitude=lambda v: -17. <= v <= -9.)
CCCma = CCCma.extract(Malawi)
#time constraignt to make all series the same
iris.FUTURE.cell_datetime_objects = True
t_constraint = iris.Constraint(time=lambda cell: 1989 <= cell.point.year <= 2008)
CCCma = CCCma.extract(t_constraint)
#PART 2: OBSERVED DATA
#bring in all the files we need and give them a name
CRU= '/exports/csce/datastore/geos/users/s0XXXX/Climate_Modelling/Actual_Data/cru_ts4.00.1901.2015.tmp.dat.nc'
#Load exactly one cube from given file
CRU = iris.load_cube(CRU, 'near-surface temperature')
#define the latitude and longitude
lats = iris.coords.DimCoord(CRU.coord('latitude').points, \
standard_name='latitude', units='degrees')
lons = CRU.coord('longitude').points
#we are only interested in the latitude and longitude relevant to Malawi
Malawi = iris.Constraint(longitude=lambda v: 32.5 <= v <= 36., \
latitude=lambda v: -17. <= v <= -9.)
CRU = CRU.extract(Malawi)
#time constraignt to make all series the same
iris.FUTURE.cell_datetime_objects = True
t_constraint = iris.Constraint(time=lambda cell: 1989 <= cell.point.year <= 2008)
CRU = CRU.extract(t_constraint)
#PART 3: FORMAT DATA
#data is in Kelvin, but we would like to show it in Celcius
CCCma.convert_units('Celsius')
#add years to data
iriscc.add_year(CCCma, 'time')
iriscc.add_year(CRU, 'time')
#We are interested in plotting the data by month, so we need to take a mean of all the data by month
CCCma = CCCma.aggregated_by('year', iris.analysis.MEAN)
CRU = CRU.aggregated_by('year', iris.analysis.MEAN)
#regridding scheme requires spatial areas, therefore the longitude and latitude coordinates must be bounded. If the longitude and latitude bounds are not defined in the cube we can guess the bounds based on the coordinates
CCCma.coord('latitude').guess_bounds()
CCCma.coord('longitude').guess_bounds()
CRU.coord('latitude').guess_bounds()
CRU.coord('longitude').guess_bounds()
#Returns an array of area weights, with the same dimensions as the cube
CCCma_grid_areas = iris.analysis.cartography.area_weights(CCCma)
CRU_grid_areas = iris.analysis.cartography.area_weights(CRU)
#We want to plot the mean for the whole region so we need a mean of all the lats and lons
CCCma_mean = CCCma.collapsed(['latitude', 'longitude'], iris.analysis.MEAN, weights=CCCma_grid_areas)
CRU_mean = CRU.collapsed(['latitude', 'longitude'], iris.analysis.MEAN, weights=CRU_grid_areas)
#PART 4: PLOT LINE GRAPH
#assign the line colours
qplt.plot(CCCma_mean.coord('year'), CCCma_mean, label='CanRCM4_ERAINT', lw=1.5, color='blue')
qplt.plot(CRU_mean.coord('year'), CRU_mean, label='Observed', lw=2, color='black')
#create a legend and set its location to under the graph
plt.legend(loc="upper center", bbox_to_anchor=(0.5,-0.05), fancybox=True, shadow=True, ncol=2)
#create a title
plt.title('Mean Near Surface Temperature for Malawi 1989-2008', fontsize=11)
#add grid lines
plt.grid()
#save the image of the graph and include full legend
#plt.savefig('ERAINT_Temperature_LineGraph_Annual', bbox_inches='tight')
#show the graph in the console
iplt.show()
if __name__ == '__main__':
main()
and this produces this graph:
As you can see I have limited my data from 1989 to 2008, but the axis goes from 1985 to 2010, how can I make this tighter?
Thank you!

For your monthly graph you may be able to change it by setting the xticks - this has to be numeric but you can also set labels to use instead of the numbers. Something like
plt.xticks(range(12), calendar.month_abbr[1:13])
may work (depends on the format of your data, you may need to plot month number rather than month name). You will need to import calendar to get the above working.
For your yearly graph you should just be able to set the x-axis limits using
plt.xlim((xmin, xmax))
where xmin is probably 1989 and xmax is 2008.

Related

How can I simplify and create conditional colours on this Waterfall Chart?

This is a code for a waterfall chart. I'd kindly like to ask:
if there is a way to simplify this code. The code is far too long and I'm sure there is a lot of extra lines of code that could be reduced.
How I can make the first and last bars black?. Since I am creating a waterfall chart I am looking for the first and last value to be black at all times and the values in between to be green or red depending on whether or not it is a negative or positive number.
Bars greater than zero green.
Bars less than zero red.
Any help would be greatly appreciated.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
#Use python 2.7+ syntax to format currency
def money(x, pos):
'The two args are the value and tick position'
return "${:,.0f}".format(x)
formatter = FuncFormatter(money)
#Data to plot. Do not include a total, it will be calculated
index = ['sales','returns','credit fees','rebates','late charges','shipping']
data = {'amount': [350000,-30000,-7500,-25000,95000,-7000]}
#Store data and create a blank series to use for the waterfall
trans = pd.DataFrame(data=data,index=index)
blank = trans.amount.cumsum().shift(1).fillna(0)
#Get the net total number for the final element in the waterfall
total = trans.sum().amount
trans.loc["net"]= total
blank.loc["net"] = total
#The steps graphically show the levels as well as used for label placement
step = blank.reset_index(drop=True).repeat(3).shift(-1)
step[1::3] = np.nan
#When plotting the last element, we want to show the full bar,
#Set the blank to 0
blank.loc["net"] = 0
#Plot and label
my_plot = trans.plot(kind='bar', stacked=True, bottom=blank,legend=None, figsize=(10, 5), title="2014 Sales Waterfall")
my_plot.plot(step.index, step.values,'k')
my_plot.set_xlabel("Transaction Types")
#Format the axis for dollars
my_plot.yaxis.set_major_formatter(formatter)
#Get the y-axis position for the labels
y_height = trans.amount.cumsum().shift(1).fillna(0)
#Get an offset so labels don't sit right on top of the bar
max = trans.max()
neg_offset = max / 25
pos_offset = max / 50
plot_offset = int(max / 15)
#Start label loop
loop = 0
for index, row in trans.iterrows():
# For the last item in the list, we don't want to double count
if row['amount'] == total:
y = y_height[loop]
else:
y = y_height[loop] + row['amount']
# Determine if we want a neg or pos offset
if row['amount'] > 0:
y += pos_offset
else:
y -= neg_offset
my_plot.annotate("{:,.0f}".format(row['amount']),(loop,y),ha="center")
loop+=1
#Scale up the y axis so there is room for the labels
my_plot.set_ylim(0,blank.max()+int(plot_offset))
#Rotate the labels
my_plot.set_xticklabels(trans.index,rotation=0)
my_plot.get_figure().savefig("waterfall.png",dpi=200,bbox_inches='tight')
Answer to questions 2, 3 and 4: set the colors of the bar patches after plotting them:
for p, c in zip(my_plot.containers[0].patches, np.r_[0, np.sign(trans.amount[1:-1]), 0]):
p.set_color({0: 'k', 1: 'g', -1: 'r'}[c])

Change Colorbar Scaling in Matplotlib

Using NASA's SRTM data, I've generated a global elevation heatmap.
The problem is, however, the continents tend to blend in with the ocean because of the range of elevation values. Is it possible to change the colorbar's scale so that the edges of the continents are more distinct from the ocean? I've tried different cmaps, but they all seem to suffer from the problem.
Here is my code. I'm initializing a giant array (with 0s) to hold global elevation data, and then populating it file by file from the SRTM dataset. Each file is 1 degree latitude by 1 degree longitude.
Another question I had was regarding the map itself. For some reason, the Appalachian Mountains seem to have disappeared entirely.
import os
import numpy as np
from .srtm_map import MapGenerator
from ..utils.hgt_parser import HGTParser
from tqdm import tqdm
import cv2
import matplotlib.pyplot as plt
import richdem as rd
class GlobalMapGenerator():
def __init__(self):
self.gen = MapGenerator()
self.base_dir = "data/elevation/"
self.hgt_files = os.listdir(self.base_dir)
self.global_elevation_data = None
def shrink(data, rows, cols):
return data.reshape(rows, data.shape[0]/rows, cols, data.shape[1]/cols).sum(axis=1).sum(axis=2)
def GenerateGlobalElevationMap(self, stride):
res = 1201//stride
max_N = 59
max_W = 180
max_S = 56
max_E = 179
# N59 --> N00
# S01 --> S56
# E000 --> E179
# W180 --> W001
# Initialize array global elevation
self.global_elevation_data = np.zeros(( res*(max_S+max_N+1), res*(max_E+max_W+1) ))
print("Output Image Shape:", self.global_elevation_data.shape)
for hgt_file in tqdm(self.hgt_files):
lat_letter = hgt_file[0]
lon_letter = hgt_file[3]
lat = int(hgt_file[1:3])
lon = int(hgt_file[4:7])
if lat_letter == "S":
# Shift south down by max_N, but south starts at S01 so we translate up by 1 too
lat_trans = max_N + lat - 1
else:
# Bigger N lat means further up. E.g. N59 is at index 0 and is higher than N00
lat_trans = max_N - lat
if lon_letter == "E":
# Shift east right by max_W
lon_trans = max_W + lon
else:
# Bigger W lon means further left. E.g. W180 is at index 0 and is more left than W001
lon_trans = max_W - lon
# load in data from file as resized
data = cv2.resize(HGTParser(os.path.join(self.base_dir, hgt_file)), (res, res))
# generate bounds (x/y --> lon.lat for data from this file for the giant array)
lat_bounds = [res*lat_trans, res*(lat_trans+1)]
lon_bounds = [res*lon_trans, res*(lon_trans+1)]
try:
self.global_elevation_data[ lat_bounds[0]:lat_bounds[1], lon_bounds[0]:lon_bounds[1] ] = data
except:
print("REFERENCE ERROR: " + hgt_file)
print("lat: ", lat_bounds)
print("lon: ", lon_bounds)
# generate figure
plt.figure(figsize=(20,20))
plt.imshow(self.global_elevation_data, cmap="rainbow")
plt.title("Global Elevation Heatmap")
plt.colorbar()
plt.show()
np.save("figures/GlobalElevationMap.npy", self.global_elevation_data)
plt.savefig("figures/GlobalElevationMap.png")
def GenerateGlobalSlopeMap(self, stride):
pass
Use a TwoSlopeNorm (docs) for your norm, like the example here.
From the example:
Sometimes we want to have a different colormap on either side of a conceptual center point, and we want those two colormaps to have different linear scales. An example is a topographic map where the land and ocean have a center at zero, but land typically has a greater elevation range than the water has depth range, and they are often represented by a different colormap.
If you set the midpoint at sea level (0), then you can have two very different scalings based on ocean elevation vs land elevation.
Example code (taken from the example linked above):
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cbook as cbook
from matplotlib import cm
dem = cbook.get_sample_data('topobathy.npz', np_load=True)
topo = dem['topo']
longitude = dem['longitude']
latitude = dem['latitude']
fig, ax = plt.subplots()
# make a colormap that has land and ocean clearly delineated and of the
# same length (256 + 256)
colors_undersea = plt.cm.terrain(np.linspace(0, 0.17, 256))
colors_land = plt.cm.terrain(np.linspace(0.25, 1, 256))
all_colors = np.vstack((colors_undersea, colors_land))
terrain_map = colors.LinearSegmentedColormap.from_list(
'terrain_map', all_colors)
# make the norm: Note the center is offset so that the land has more
# dynamic range:
divnorm = colors.TwoSlopeNorm(vmin=-500., vcenter=0, vmax=4000)
pcm = ax.pcolormesh(longitude, latitude, topo, rasterized=True, norm=divnorm,
cmap=terrain_map, shading='auto')
# Simple geographic plot, set aspect ratio beecause distance between lines of
# longitude depends on latitude.
ax.set_aspect(1 / np.cos(np.deg2rad(49)))
ax.set_title('TwoSlopeNorm(x)')
cb = fig.colorbar(pcm, shrink=0.6)
cb.set_ticks([-500, 0, 1000, 2000, 3000, 4000])
plt.show()
See how it scales numbers with this simple usage (from docs):
>>> import matplotlib. Colors as mcolors
>>> offset = mcolors.TwoSlopeNorm(vmin=-4000., vcenter=0., vmax=10000)
>>> data = [-4000., -2000., 0., 2500., 5000., 7500., 10000.]
>>> offset(data)
array([0., 0.25, 0.5, 0.625, 0.75, 0.875, 1.0])

Waterfall chart python matplotlib

I am having a problem with waterfall. I took this chart from matplotlib site and added my own data frame with 2 simple columns with some integer numbers. My waterfall was produced but without numbers, just empty bars. I am a bit lost and I would appreciate any suggestions.
What I am trying to build is the custom waterfall that takes one dataframe with column names, values, and some values for filters like countries. I haven't found anything like that anywhere so I am trying to build my own.
import numpy as np;
import pandas as pd;
import matplotlib.pyplot as plt;
from matplotlib.ticker import FuncFormatter;
dataset = pd.read_csv('waterfall_test_data.csv')
#Use python 2.7+ syntax to format currency
def money(x, pos):
'The two args are the value and tick position'
return "${:,.0f}".format(x)
formatter = FuncFormatter(money)
#Data to plot. Do not include a total, it will be calculated
index = dataset['columns']
data = dataset['amount']
#Store data and create a blank series to use for the waterfall
trans = pd.DataFrame(data=data,index=index)
blank = trans.amount.cumsum().shift(1).fillna(0)
#Get the net total number for the final element in the waterfall
total = trans.sum().amount
trans.loc["net"]= total
blank.loc["net"] = total
#The steps graphically show the levels as well as used for label placement
step = blank.reset_index(drop=True).repeat(3).shift(-1)
step[1::3] = np.nan
#When plotting the last element, we want to show the full bar,
#Set the blank to 0
blank.loc["net"] = 0
#Plot and label
my_plot = trans.plot(kind='bar', stacked=True, bottom=blank,legend=None, figsize=(15, 5), title="2014 Sales Waterfall")
my_plot.plot(step.index, step.values,'k')
my_plot.set_xlabel("Transaction Types")
#Format the axis for dollars
my_plot.yaxis.set_major_formatter(formatter)
#Get the y-axis position for the labels
y_height = trans.amount.cumsum().shift(1).fillna(0)
#Get an offset so labels don't sit right on top of the bar
max = trans.max()
neg_offset = max / 25
pos_offset = max / 50
plot_offset = int(max / 15)
#Start label loop
loop = 0
for index, row in trans.iterrows():
# For the last item in the list, we don't want to double count
if row['amount'] == total:
y = y_height[loop]
else:
y = y_height[loop] + row['amount']
# Determine if we want a neg or pos offset
if row['amount'] > 0:
y += pos_offset
else:
y -= neg_offset
my_plot.annotate("{:,.0f}".format(row['amount']),(loop,y),ha="center")
loop+=1
#Scale up the y axis so there is room for the labels
my_plot.set_ylim(0,blank.max()+int(plot_offset))
#Rotate the labels
my_plot.set_xticklabels(trans.index,rotation=0)
my_plot.get_figure().savefig("waterfall.png",dpi=200,bbox_inches='tight')

Python Cartopy mapping

Im trying to map CO2 levels based on public data from NASA on global map and depict those values as (long,lat,value) as topographic map, based on the data and by using Panoply software, this is what my plot should look like:
The data is in .nc4 format and read correctly, however I cant get the data plot Im using Cartopy API & following this example:(https://scitools.org.uk/cartopy/docs/latest/gallery/waves.html#sphx-glr-gallery-waves-py).
Also I do not want to use Basemap.
Attempt # 1:
See Python code below:
from netCDF4 import Dataset
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import cartopy.crs as ccrs
"""
function that download each OCO - 2 data that is in .nc4 format from file "subset_OCO2_L2_ABand_V8_20180929_010345.txt"
which is list of links
for all data with date range 2015 - 09 - 01 to 2016 - 01 - 01# make sure that you have a valid user name & password by registering in https: //earthdata.nasa.gov/
#implementation based on http: //unidata.github.io/netcdf4-python/#section1"""
def download_oco2_nc4(username, password, filespath):
filespath = "C:\\Users\\Desktop\\oco2\\oco2_LtCO2_150831_B8100r_171009083146s.nc4"
dataset = Dataset(filespath)
print(dataset.file_format)
print(dataset.dimensions.keys())
print(dataset.variables['xco2'])
XCO2 = []
LONGITUDE = []
LATITUDE = []
# XCO2
XCO2 = dataset.variables['xco2'][:]
print("->", type(XCO2))
print(dataset.variables['latitude'])
# LATITUDE
LATITUDE = dataset.variables['latitude'][:]
print(dataset.variables['longitude'])
# LONGITUDE
LONGITUDE = dataset.variables['longitude'][:]
return XCO2, LONGITUDE, LATITUDE, dataset
def mapXoco2():
fig = plt.figure(figsize = (10, 5))
ax = fig.add_subplot(1, 1, 1, projection = ccrs.Mollweide())
XCO2, LONGITUDE, LATITUDE, dataset = download_oco2_nc4(1, 2, 3)
dataset.close()
XCO2_subset = list()
counter = 0
for xco2 in XCO2:
if counter < 10:
XCO2_subset.append(xco2)
counter = counter + 1
else:
break
print("XCO2_subset="+str(len(XCO2_subset)))
counter = 0
LONGITUDE_subset = list()
for longitude in LONGITUDE:
if counter < 10:
LONGITUDE_subset.append(longitude)
counter = counter + 1
else:
break
print("LONGITUDE_subset="+str(len(LONGITUDE_subset)))
counter = 0
LATITUDE_subset = list()
for latitude in LATITUDE:
if counter < 10:
LATITUDE_subset.append(latitude)
counter = counter + 1
else:
break
print("LATITUDE_subset="+str(len(LATITUDE_subset)))
XCO2_subset = np.array(XCO2_subset)
LONGITUDE_subset = np.array(LONGITUDE_subset)
LATITUDE_subset = np.array(LATITUDE_subset)
#LONGITUDE_subset, LATITUDE_subset = np.meshgrid(LONGITUDE_subset, LATITUDE_subset)
#XCO2_subset,XCO2_subset = np.meshgrid(XCO2_subset,XCO2_subset)
ax.contourf(LONGITUDE_subset,LATITUDE_subset,XCO2_subset,
transform = ccrs.Mollweide(central_longitude=0, globe=None),
cmap = 'nipy_spectral')
ax.coastlines()
ax.set_global()
plt.show()
print(XCO2_subset)
mapXoco2()
When I comment out these lines:
#LONGITUDE_subset, LATITUDE_subset = np.meshgrid(LONGITUDE_subset, LATITUDE_subset)
#XCO2_subset,XCO2_subset = np.meshgrid(XCO2_subset,XCO2_subset)
I get an error:
raise TypeError("Input z must be a 2D array.")
TypeError: Input z must be a 2D array.
However when I DO NOT comment these lines:
LONGITUDE_subset, LATITUDE_subset = np.meshgrid(LONGITUDE_subset, LATITUDE_subset)
XCO2_subset,XCO2_subset = np.meshgrid(XCO2_subset,XCO2_subset
)
I get an empty map, I see the continents but no plotted values C02 values.
I believe interpreting the 1D to 2D transformation of my inputs incorrectly.
Attempt # 2(updated):
Instead of dealing with why/what these 2d transformations in the API are doing, Im ploting each point 1 by 1 using a loop. The issue is although I can see more data(Im only ploting about 10% of the data) I CANT SEE THE MAP/CONTINENTS I SEE THE VALUES PLOT ON WHITE BACKGROUND???, SEE CODE:
from netCDF4 import Dataset
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import cartopy.crs as ccrs
from random import sample
"""
function that download each OCO - 2 data that is in .nc4 format from file "subset_OCO2_L2_ABand_V8_20180929_010345.txt"
which is list of links
for all data with date range 2015 - 09 - 01 to 2016 - 01 - 01# make sure that you have a valid user name & password by registering in https: //earthdata.nasa.gov/
#implementation based on http: //unidata.github.io/netcdf4-python/#section1"""
filespath = "C:\\Users\\Downloads\\oco2_LtCO2_150830_B7305Br_160712072205s.nc4"
def download_oco2_nc4(filespath):
dataset = Dataset(filespath)
print("file format:"+str(dataset.file_format))
print("dimensions.keys():"+str(dataset.dimensions.keys()))
print("variables['xco2']:"+str(dataset.variables['xco2']))
XCO2 = []
LONGITUDE = []
LATITUDE = []
# XCO2
XCO2 = dataset.variables['xco2'][:]
print("->", type(XCO2))
print(dataset.variables['latitude'])
# LATITUDE
LATITUDE = dataset.variables['latitude'][:]
print(dataset.variables['longitude'])
# LONGITUDE
LONGITUDE = dataset.variables['longitude'][:]
return XCO2, LONGITUDE, LATITUDE, dataset
def mapXoco2():
fig = plt.figure(figsize = (10, 5))
ax = fig.add_subplot(1, 1, 1, projection = ccrs.Mollweide())
XCO2, LONGITUDE, LATITUDE, dataset = download_oco2_nc4(filespath)
dataset.close()
XCO2_subset = np.array(XCO2)
LONGITUDE_subset = np.array(LONGITUDE)
LATITUDE_subset = np.array(LATITUDE)
"""each of the arrays has over 80,000 of data therefore its taking to long to map, after 10,000 rows its to slow, and 10,000 isnt sufficient.
Because oco-2 gathers data from trajectory the 1st 10% or whatever precent of the data will not be a good representation of the overal data.
We must sample from X number of slices across the data.
"""
#XCO2 attempt to get ten ranges, we need to check 10 ranges therefore we need if statements not if/else
if (len(XCO2_subset)>=10000):
first_XCO2_subset=XCO2_subset[0:1000]
if (len(XCO2_subset)>=20000):
second_XCO2_subset=XCO2_subset[20000:21000]
if (len(XCO2_subset)>=30000):
third_XCO2_subset=XCO2_subset[30000:31000]
if (len(XCO2_subset)>=40000):
fourth_XCO2_subset=XCO2_subset[40000:41000]
if (len(XCO2_subset)>=50000):
fifth_XCO2_subset=XCO2_subset[50000:51000]
if (len(XCO2_subset)>=60000):
sixth_XCO2_subset=XCO2_subset[60000:61000]
if (len(XCO2_subset)>=70000):
seventh_XCO2_subset=XCO2_subset[70000:71000]
if (len(XCO2_subset)>=80000):
eight_XCO2_subset=XCO2_subset[80000:81000]
sampled_xco2 = first_XCO2_subset + second_XCO2_subset + third_XCO2_subset + fourth_XCO2_subset + fifth_XCO2_subset + sixth_XCO2_subset + seventh_XCO2_subset + eight_XCO2_subset
#LONGITUDE attempt to get ten ranges, we need to check 10 ranges therefore we need if statements not if/else
if (len(LONGITUDE_subset)>=10000):
first_LONGITUDE_subset=LONGITUDE_subset[0:1000]
if (len(LONGITUDE_subset)>=20000):
second_LONGITUDE_subset=LONGITUDE_subset[20000:21000]
if (len(LONGITUDE_subset)>=30000):
third_LONGITUDE_subset=LONGITUDE_subset[30000:31000]
if (len(LONGITUDE_subset)>=40000):
fourth_LONGITUDE_subset=LONGITUDE_subset[40000:41000]
if (len(LONGITUDE_subset)>=50000):
fifth_LONGITUDE_subset=LONGITUDE_subset[50000:51000]
if (len(LONGITUDE_subset)>=60000):
sixth_LONGITUDE_subset=LONGITUDE_subset[60000:61000]
if (len(LONGITUDE_subset)>=70000):
seventh_LONGITUDE_subset=LONGITUDE_subset[70000:71000]
if (len(LONGITUDE_subset)>=80000):
eight_LONGITUDE_subset=LONGITUDE_subset[80000:81000]
sampled_LONGITUDE = first_LONGITUDE_subset + second_LONGITUDE_subset + third_LONGITUDE_subset + fourth_LONGITUDE_subset + fifth_LONGITUDE_subset + sixth_LONGITUDE_subset + seventh_LONGITUDE_subset + eight_LONGITUDE_subset
#LATITUDE attempt to get ten ranges, we need to check 10 ranges therefore we need if statements not if/else
if (len(LATITUDE_subset)>=10000):
first_LATITUDE_subset=LATITUDE_subset[0:1000]
if (len(LATITUDE_subset)>=20000):
second_LATITUDE_subset=LATITUDE_subset[20000:21000]
if (len(LATITUDE_subset)>=30000):
third_LATITUDE_subset=LATITUDE_subset[30000:31000]
if (len(LATITUDE_subset)>=40000):
fourth_LATITUDE_subset=LATITUDE_subset[40000:41000]
if (len(LATITUDE_subset)>=50000):
fifth_LATITUDE_subset=LATITUDE_subset[50000:51000]
if (len(LATITUDE_subset)>=60000):
sixth_LATITUDE_subset=LATITUDE_subset[60000:61000]
if (len(LATITUDE_subset)>=70000):
seventh_LATITUDE_subset=LATITUDE_subset[70000:71000]
if (len(LATITUDE_subset)>=80000):
eight_LATITUDE_subset=LATITUDE_subset[80000:81000]
sampled_LATITUDE = first_LATITUDE_subset + second_LATITUDE_subset + third_LATITUDE_subset + fourth_LATITUDE_subset + fifth_LATITUDE_subset + sixth_LATITUDE_subset + seventh_LATITUDE_subset + eight_LATITUDE_subset
ax = plt.axes(projection=ccrs.Mollweide())
#plt.contourf(LONGITUDE_subset, LATITUDE_subset, XCO2_subset, 60,transform=ccrs.PlateCarree())
for long, lat, value in zip(sampled_LONGITUDE, sampled_LATITUDE,sampled_xco2):
#print(long, lat, value)
if value >= 0 and value < 370:
ax.plot(long,lat,marker='o',color='blue', markersize=1, transform=ccrs.PlateCarree())
elif value >= 370 and value < 390:
ax.plot(long,lat,marker='o',color='cyan', markersize=1, transform=ccrs.PlateCarree())
elif value >= 390 and value < 402:
ax.plot(long,lat,marker='o',color='yellow', markersize=1, transform=ccrs.PlateCarree())
elif value >= 402 and value < 410:
ax.plot(long,lat,marker='o',color='orange', markersize=1, transform=ccrs.PlateCarree())
elif value >= 410 and value < 415:
ax.plot(long,lat,marker='o',color='red', markersize=1, transform=ccrs.PlateCarree())
else:
ax.plot(long,lat,marker='o',color='brown', markersize=1, transform=ccrs.PlateCarree())
ax.coastlines()
plt.show()
mapXoco2()
Output:
file format:NETCDF4
dimensions.keys():odict_keys(['sounding_id', 'levels', 'bands', 'vertices', 'epoch_dimension', 'source_files'])
variables['xco2']:
float32 xco2(sounding_id)
units: ppm
long_name: XCO2
missing_value: -999999.0
comment: Column-averaged dry-air mole fraction of CO2 (includes bias correction)
unlimited dimensions:
current shape = (82776,)
filling on, default _FillValue of 9.969209968386869e+36 used
->
float32 latitude(sounding_id)
units: degrees_north
long_name: latitude
missing_value: -999999.0
comment: center latitude of the measurement
unlimited dimensions:
current shape = (82776,)
filling on, default _FillValue of 9.969209968386869e+36 used
float32 longitude(sounding_id)
units: degrees_east
long_name: longitude
missing_value: -999999.0
comment: center longitude of the measurement
unlimited dimensions:
current shape = (82776,)
filling on, default _FillValue of 9.969209968386869e+36 used
1) What happened to the map & continents?
Thanks & any useful help appreciated.
It looks like your transform argument is incorrect. If you have latitude/longitude data the value of the transform argument should be ccrs.PlateCarree(). See this guide in the cartopy documentation for details: https://scitools.org.uk/cartopy/docs/latest/tutorials/understanding_transform.html.
I cannot run your example to verify this is the solution. To get the most out of Stack Overflow you should provide a minimal working example that others can run themselves. See https://stackoverflow.com/help/mcve and https://stackoverflow.com/help/how-to-ask for tips.

Find a easier way to cluster 2-d scatter data into grid array data

I have figured out a method to cluster disperse point data into structured 2-d array(like rasterize function). And I hope there are some better ways to achieve that target.
My work
1. Intro
1000 point data has there dimensions of properties (lon, lat, emission) whicn represent one factory located at (x,y) emit certain amount of CO2 into atmosphere
grid network: predefine the 2-d array in the shape of 20x20
http://i4.tietuku.com/02fbaf32d2f09fff.png
The code reproduced here:
#### define the map area
xc1,xc2,yc1,yc2 = 113.49805889531724,115.5030664238035,37.39995194888143,38.789235929357105
map = Basemap(llcrnrlon=xc1,llcrnrlat=yc1,urcrnrlon=xc2,urcrnrlat=yc2)
#### reading the point data and scatter plot by their position
df = pd.read_csv("xxxxx.csv")
px,py = map(df.lon, df.lat)
map.scatter(px, py, color = "red", s= 5,zorder =3)
#### predefine the grid networks
lon_grid,lat_grid = np.linspace(xc1,xc2,21), np.linspace(yc1,yc2,21)
lon_x,lat_y = np.meshgrid(lon_grid,lat_grid)
grids = np.zeros(20*20).reshape(20,20)
plt.pcolormesh(lon_x,lat_y,grids,cmap = 'gray', facecolor = 'none',edgecolor = 'k',zorder=3)
2. My target
Finding the nearest grid point for each factory
Add the emission data into this grid number
3. Algorithm realization
3.1 Raster grid
note: 20x20 grid points are distributed in this area represented by blue dot.
http://i4.tietuku.com/8548554587b0cb3a.png
3.2 KD-tree
Find the nearest blue dot of each red point
sh = (20*20,2)
grids = np.zeros(20*20*2).reshape(*sh)
sh_emission = (20*20)
grids_em = np.zeros(20*20).reshape(sh_emission)
k = 0
for j in range(0,yy.shape[0],1):
for i in range(0,xx.shape[0],1):
grids[k] = np.array([lon_grid[i],lat_grid[j]])
k+=1
T = KDTree(grids)
x_delta = (lon_grid[2] - lon_grid[1])
y_delta = (lat_grid[2] - lat_grid[1])
R = np.sqrt(x_delta**2 + y_delta**2)
for i in range(0,len(df.lon),1):
idx = T.query_ball_point([df.lon.iloc[i],df.lat.iloc[i]], r=R)
# there are more than one blue dot which are founded sometimes,
# So I'll calculate the distances between the factory(red point)
# and all blue dots which are listed
if (idx > 1):
distance = []
for k in range(0,len(idx),1):
distance.append(np.sqrt((df.lon.iloc[i] - grids[k][0])**2 + (df.lat.iloc[i] - grids[k][1])**2))
pos_index = distance.index(min(distance))
pos = idx[pos_index]
# Only find 1 point
else:
pos = idx
grids_em[pos] += df.so2[i]
4. Result
co2 = grids_em.reshape(20,20)
plt.pcolormesh(lon_x,lat_y,co2,cmap =plt.cm.Spectral_r,zorder=3)
http://i4.tietuku.com/6ded65c4ac301294.png
5. My question
Can someone point out some drawbacks or error of this method?
Is there some algorithms more aligned with my target?
Thanks a lot!
There are many for-loop in your code, it's not the numpy way.
Make some sample data first:
import numpy as np
import pandas as pd
from scipy.spatial import KDTree
import pylab as pl
xc1, xc2, yc1, yc2 = 113.49805889531724, 115.5030664238035, 37.39995194888143, 38.789235929357105
N = 1000
GSIZE = 20
x, y = np.random.multivariate_normal([(xc1 + xc2)*0.5, (yc1 + yc2)*0.5], [[0.1, 0.02], [0.02, 0.1]], size=N).T
value = np.ones(N)
df_points = pd.DataFrame({"x":x, "y":y, "v":value})
For equal space grids you can use hist2d():
pl.hist2d(df_points.x, df_points.y, weights=df_points.v, bins=20, cmap="viridis");
Here is the output:
Here is the code to use KdTree:
X, Y = np.mgrid[x.min():x.max():GSIZE*1j, y.min():y.max():GSIZE*1j]
grid = np.c_[X.ravel(), Y.ravel()]
points = np.c_[df_points.x, df_points.y]
tree = KDTree(grid)
dist, indices = tree.query(points)
grid_values = df_points.groupby(indices).v.sum()
df_grid = pd.DataFrame(grid, columns=["x", "y"])
df_grid["v"] = grid_values
fig, ax = pl.subplots(figsize=(10, 8))
ax.plot(df_points.x, df_points.y, "kx", alpha=0.2)
mapper = ax.scatter(df_grid.x, df_grid.y, c=df_grid.v,
cmap="viridis",
linewidths=0,
s=100, marker="o")
pl.colorbar(mapper, ax=ax);
the output is:

Categories

Resources