Python Cartopy mapping - python

Im trying to map CO2 levels based on public data from NASA on global map and depict those values as (long,lat,value) as topographic map, based on the data and by using Panoply software, this is what my plot should look like:
The data is in .nc4 format and read correctly, however I cant get the data plot Im using Cartopy API & following this example:(https://scitools.org.uk/cartopy/docs/latest/gallery/waves.html#sphx-glr-gallery-waves-py).
Also I do not want to use Basemap.
Attempt # 1:
See Python code below:
from netCDF4 import Dataset
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import cartopy.crs as ccrs
"""
function that download each OCO - 2 data that is in .nc4 format from file "subset_OCO2_L2_ABand_V8_20180929_010345.txt"
which is list of links
for all data with date range 2015 - 09 - 01 to 2016 - 01 - 01# make sure that you have a valid user name & password by registering in https: //earthdata.nasa.gov/
#implementation based on http: //unidata.github.io/netcdf4-python/#section1"""
def download_oco2_nc4(username, password, filespath):
filespath = "C:\\Users\\Desktop\\oco2\\oco2_LtCO2_150831_B8100r_171009083146s.nc4"
dataset = Dataset(filespath)
print(dataset.file_format)
print(dataset.dimensions.keys())
print(dataset.variables['xco2'])
XCO2 = []
LONGITUDE = []
LATITUDE = []
# XCO2
XCO2 = dataset.variables['xco2'][:]
print("->", type(XCO2))
print(dataset.variables['latitude'])
# LATITUDE
LATITUDE = dataset.variables['latitude'][:]
print(dataset.variables['longitude'])
# LONGITUDE
LONGITUDE = dataset.variables['longitude'][:]
return XCO2, LONGITUDE, LATITUDE, dataset
def mapXoco2():
fig = plt.figure(figsize = (10, 5))
ax = fig.add_subplot(1, 1, 1, projection = ccrs.Mollweide())
XCO2, LONGITUDE, LATITUDE, dataset = download_oco2_nc4(1, 2, 3)
dataset.close()
XCO2_subset = list()
counter = 0
for xco2 in XCO2:
if counter < 10:
XCO2_subset.append(xco2)
counter = counter + 1
else:
break
print("XCO2_subset="+str(len(XCO2_subset)))
counter = 0
LONGITUDE_subset = list()
for longitude in LONGITUDE:
if counter < 10:
LONGITUDE_subset.append(longitude)
counter = counter + 1
else:
break
print("LONGITUDE_subset="+str(len(LONGITUDE_subset)))
counter = 0
LATITUDE_subset = list()
for latitude in LATITUDE:
if counter < 10:
LATITUDE_subset.append(latitude)
counter = counter + 1
else:
break
print("LATITUDE_subset="+str(len(LATITUDE_subset)))
XCO2_subset = np.array(XCO2_subset)
LONGITUDE_subset = np.array(LONGITUDE_subset)
LATITUDE_subset = np.array(LATITUDE_subset)
#LONGITUDE_subset, LATITUDE_subset = np.meshgrid(LONGITUDE_subset, LATITUDE_subset)
#XCO2_subset,XCO2_subset = np.meshgrid(XCO2_subset,XCO2_subset)
ax.contourf(LONGITUDE_subset,LATITUDE_subset,XCO2_subset,
transform = ccrs.Mollweide(central_longitude=0, globe=None),
cmap = 'nipy_spectral')
ax.coastlines()
ax.set_global()
plt.show()
print(XCO2_subset)
mapXoco2()
When I comment out these lines:
#LONGITUDE_subset, LATITUDE_subset = np.meshgrid(LONGITUDE_subset, LATITUDE_subset)
#XCO2_subset,XCO2_subset = np.meshgrid(XCO2_subset,XCO2_subset)
I get an error:
raise TypeError("Input z must be a 2D array.")
TypeError: Input z must be a 2D array.
However when I DO NOT comment these lines:
LONGITUDE_subset, LATITUDE_subset = np.meshgrid(LONGITUDE_subset, LATITUDE_subset)
XCO2_subset,XCO2_subset = np.meshgrid(XCO2_subset,XCO2_subset
)
I get an empty map, I see the continents but no plotted values C02 values.
I believe interpreting the 1D to 2D transformation of my inputs incorrectly.
Attempt # 2(updated):
Instead of dealing with why/what these 2d transformations in the API are doing, Im ploting each point 1 by 1 using a loop. The issue is although I can see more data(Im only ploting about 10% of the data) I CANT SEE THE MAP/CONTINENTS I SEE THE VALUES PLOT ON WHITE BACKGROUND???, SEE CODE:
from netCDF4 import Dataset
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import cartopy.crs as ccrs
from random import sample
"""
function that download each OCO - 2 data that is in .nc4 format from file "subset_OCO2_L2_ABand_V8_20180929_010345.txt"
which is list of links
for all data with date range 2015 - 09 - 01 to 2016 - 01 - 01# make sure that you have a valid user name & password by registering in https: //earthdata.nasa.gov/
#implementation based on http: //unidata.github.io/netcdf4-python/#section1"""
filespath = "C:\\Users\\Downloads\\oco2_LtCO2_150830_B7305Br_160712072205s.nc4"
def download_oco2_nc4(filespath):
dataset = Dataset(filespath)
print("file format:"+str(dataset.file_format))
print("dimensions.keys():"+str(dataset.dimensions.keys()))
print("variables['xco2']:"+str(dataset.variables['xco2']))
XCO2 = []
LONGITUDE = []
LATITUDE = []
# XCO2
XCO2 = dataset.variables['xco2'][:]
print("->", type(XCO2))
print(dataset.variables['latitude'])
# LATITUDE
LATITUDE = dataset.variables['latitude'][:]
print(dataset.variables['longitude'])
# LONGITUDE
LONGITUDE = dataset.variables['longitude'][:]
return XCO2, LONGITUDE, LATITUDE, dataset
def mapXoco2():
fig = plt.figure(figsize = (10, 5))
ax = fig.add_subplot(1, 1, 1, projection = ccrs.Mollweide())
XCO2, LONGITUDE, LATITUDE, dataset = download_oco2_nc4(filespath)
dataset.close()
XCO2_subset = np.array(XCO2)
LONGITUDE_subset = np.array(LONGITUDE)
LATITUDE_subset = np.array(LATITUDE)
"""each of the arrays has over 80,000 of data therefore its taking to long to map, after 10,000 rows its to slow, and 10,000 isnt sufficient.
Because oco-2 gathers data from trajectory the 1st 10% or whatever precent of the data will not be a good representation of the overal data.
We must sample from X number of slices across the data.
"""
#XCO2 attempt to get ten ranges, we need to check 10 ranges therefore we need if statements not if/else
if (len(XCO2_subset)>=10000):
first_XCO2_subset=XCO2_subset[0:1000]
if (len(XCO2_subset)>=20000):
second_XCO2_subset=XCO2_subset[20000:21000]
if (len(XCO2_subset)>=30000):
third_XCO2_subset=XCO2_subset[30000:31000]
if (len(XCO2_subset)>=40000):
fourth_XCO2_subset=XCO2_subset[40000:41000]
if (len(XCO2_subset)>=50000):
fifth_XCO2_subset=XCO2_subset[50000:51000]
if (len(XCO2_subset)>=60000):
sixth_XCO2_subset=XCO2_subset[60000:61000]
if (len(XCO2_subset)>=70000):
seventh_XCO2_subset=XCO2_subset[70000:71000]
if (len(XCO2_subset)>=80000):
eight_XCO2_subset=XCO2_subset[80000:81000]
sampled_xco2 = first_XCO2_subset + second_XCO2_subset + third_XCO2_subset + fourth_XCO2_subset + fifth_XCO2_subset + sixth_XCO2_subset + seventh_XCO2_subset + eight_XCO2_subset
#LONGITUDE attempt to get ten ranges, we need to check 10 ranges therefore we need if statements not if/else
if (len(LONGITUDE_subset)>=10000):
first_LONGITUDE_subset=LONGITUDE_subset[0:1000]
if (len(LONGITUDE_subset)>=20000):
second_LONGITUDE_subset=LONGITUDE_subset[20000:21000]
if (len(LONGITUDE_subset)>=30000):
third_LONGITUDE_subset=LONGITUDE_subset[30000:31000]
if (len(LONGITUDE_subset)>=40000):
fourth_LONGITUDE_subset=LONGITUDE_subset[40000:41000]
if (len(LONGITUDE_subset)>=50000):
fifth_LONGITUDE_subset=LONGITUDE_subset[50000:51000]
if (len(LONGITUDE_subset)>=60000):
sixth_LONGITUDE_subset=LONGITUDE_subset[60000:61000]
if (len(LONGITUDE_subset)>=70000):
seventh_LONGITUDE_subset=LONGITUDE_subset[70000:71000]
if (len(LONGITUDE_subset)>=80000):
eight_LONGITUDE_subset=LONGITUDE_subset[80000:81000]
sampled_LONGITUDE = first_LONGITUDE_subset + second_LONGITUDE_subset + third_LONGITUDE_subset + fourth_LONGITUDE_subset + fifth_LONGITUDE_subset + sixth_LONGITUDE_subset + seventh_LONGITUDE_subset + eight_LONGITUDE_subset
#LATITUDE attempt to get ten ranges, we need to check 10 ranges therefore we need if statements not if/else
if (len(LATITUDE_subset)>=10000):
first_LATITUDE_subset=LATITUDE_subset[0:1000]
if (len(LATITUDE_subset)>=20000):
second_LATITUDE_subset=LATITUDE_subset[20000:21000]
if (len(LATITUDE_subset)>=30000):
third_LATITUDE_subset=LATITUDE_subset[30000:31000]
if (len(LATITUDE_subset)>=40000):
fourth_LATITUDE_subset=LATITUDE_subset[40000:41000]
if (len(LATITUDE_subset)>=50000):
fifth_LATITUDE_subset=LATITUDE_subset[50000:51000]
if (len(LATITUDE_subset)>=60000):
sixth_LATITUDE_subset=LATITUDE_subset[60000:61000]
if (len(LATITUDE_subset)>=70000):
seventh_LATITUDE_subset=LATITUDE_subset[70000:71000]
if (len(LATITUDE_subset)>=80000):
eight_LATITUDE_subset=LATITUDE_subset[80000:81000]
sampled_LATITUDE = first_LATITUDE_subset + second_LATITUDE_subset + third_LATITUDE_subset + fourth_LATITUDE_subset + fifth_LATITUDE_subset + sixth_LATITUDE_subset + seventh_LATITUDE_subset + eight_LATITUDE_subset
ax = plt.axes(projection=ccrs.Mollweide())
#plt.contourf(LONGITUDE_subset, LATITUDE_subset, XCO2_subset, 60,transform=ccrs.PlateCarree())
for long, lat, value in zip(sampled_LONGITUDE, sampled_LATITUDE,sampled_xco2):
#print(long, lat, value)
if value >= 0 and value < 370:
ax.plot(long,lat,marker='o',color='blue', markersize=1, transform=ccrs.PlateCarree())
elif value >= 370 and value < 390:
ax.plot(long,lat,marker='o',color='cyan', markersize=1, transform=ccrs.PlateCarree())
elif value >= 390 and value < 402:
ax.plot(long,lat,marker='o',color='yellow', markersize=1, transform=ccrs.PlateCarree())
elif value >= 402 and value < 410:
ax.plot(long,lat,marker='o',color='orange', markersize=1, transform=ccrs.PlateCarree())
elif value >= 410 and value < 415:
ax.plot(long,lat,marker='o',color='red', markersize=1, transform=ccrs.PlateCarree())
else:
ax.plot(long,lat,marker='o',color='brown', markersize=1, transform=ccrs.PlateCarree())
ax.coastlines()
plt.show()
mapXoco2()
Output:
file format:NETCDF4
dimensions.keys():odict_keys(['sounding_id', 'levels', 'bands', 'vertices', 'epoch_dimension', 'source_files'])
variables['xco2']:
float32 xco2(sounding_id)
units: ppm
long_name: XCO2
missing_value: -999999.0
comment: Column-averaged dry-air mole fraction of CO2 (includes bias correction)
unlimited dimensions:
current shape = (82776,)
filling on, default _FillValue of 9.969209968386869e+36 used
->
float32 latitude(sounding_id)
units: degrees_north
long_name: latitude
missing_value: -999999.0
comment: center latitude of the measurement
unlimited dimensions:
current shape = (82776,)
filling on, default _FillValue of 9.969209968386869e+36 used
float32 longitude(sounding_id)
units: degrees_east
long_name: longitude
missing_value: -999999.0
comment: center longitude of the measurement
unlimited dimensions:
current shape = (82776,)
filling on, default _FillValue of 9.969209968386869e+36 used
1) What happened to the map & continents?
Thanks & any useful help appreciated.

It looks like your transform argument is incorrect. If you have latitude/longitude data the value of the transform argument should be ccrs.PlateCarree(). See this guide in the cartopy documentation for details: https://scitools.org.uk/cartopy/docs/latest/tutorials/understanding_transform.html.
I cannot run your example to verify this is the solution. To get the most out of Stack Overflow you should provide a minimal working example that others can run themselves. See https://stackoverflow.com/help/mcve and https://stackoverflow.com/help/how-to-ask for tips.

Related

Change Colorbar Scaling in Matplotlib

Using NASA's SRTM data, I've generated a global elevation heatmap.
The problem is, however, the continents tend to blend in with the ocean because of the range of elevation values. Is it possible to change the colorbar's scale so that the edges of the continents are more distinct from the ocean? I've tried different cmaps, but they all seem to suffer from the problem.
Here is my code. I'm initializing a giant array (with 0s) to hold global elevation data, and then populating it file by file from the SRTM dataset. Each file is 1 degree latitude by 1 degree longitude.
Another question I had was regarding the map itself. For some reason, the Appalachian Mountains seem to have disappeared entirely.
import os
import numpy as np
from .srtm_map import MapGenerator
from ..utils.hgt_parser import HGTParser
from tqdm import tqdm
import cv2
import matplotlib.pyplot as plt
import richdem as rd
class GlobalMapGenerator():
def __init__(self):
self.gen = MapGenerator()
self.base_dir = "data/elevation/"
self.hgt_files = os.listdir(self.base_dir)
self.global_elevation_data = None
def shrink(data, rows, cols):
return data.reshape(rows, data.shape[0]/rows, cols, data.shape[1]/cols).sum(axis=1).sum(axis=2)
def GenerateGlobalElevationMap(self, stride):
res = 1201//stride
max_N = 59
max_W = 180
max_S = 56
max_E = 179
# N59 --> N00
# S01 --> S56
# E000 --> E179
# W180 --> W001
# Initialize array global elevation
self.global_elevation_data = np.zeros(( res*(max_S+max_N+1), res*(max_E+max_W+1) ))
print("Output Image Shape:", self.global_elevation_data.shape)
for hgt_file in tqdm(self.hgt_files):
lat_letter = hgt_file[0]
lon_letter = hgt_file[3]
lat = int(hgt_file[1:3])
lon = int(hgt_file[4:7])
if lat_letter == "S":
# Shift south down by max_N, but south starts at S01 so we translate up by 1 too
lat_trans = max_N + lat - 1
else:
# Bigger N lat means further up. E.g. N59 is at index 0 and is higher than N00
lat_trans = max_N - lat
if lon_letter == "E":
# Shift east right by max_W
lon_trans = max_W + lon
else:
# Bigger W lon means further left. E.g. W180 is at index 0 and is more left than W001
lon_trans = max_W - lon
# load in data from file as resized
data = cv2.resize(HGTParser(os.path.join(self.base_dir, hgt_file)), (res, res))
# generate bounds (x/y --> lon.lat for data from this file for the giant array)
lat_bounds = [res*lat_trans, res*(lat_trans+1)]
lon_bounds = [res*lon_trans, res*(lon_trans+1)]
try:
self.global_elevation_data[ lat_bounds[0]:lat_bounds[1], lon_bounds[0]:lon_bounds[1] ] = data
except:
print("REFERENCE ERROR: " + hgt_file)
print("lat: ", lat_bounds)
print("lon: ", lon_bounds)
# generate figure
plt.figure(figsize=(20,20))
plt.imshow(self.global_elevation_data, cmap="rainbow")
plt.title("Global Elevation Heatmap")
plt.colorbar()
plt.show()
np.save("figures/GlobalElevationMap.npy", self.global_elevation_data)
plt.savefig("figures/GlobalElevationMap.png")
def GenerateGlobalSlopeMap(self, stride):
pass
Use a TwoSlopeNorm (docs) for your norm, like the example here.
From the example:
Sometimes we want to have a different colormap on either side of a conceptual center point, and we want those two colormaps to have different linear scales. An example is a topographic map where the land and ocean have a center at zero, but land typically has a greater elevation range than the water has depth range, and they are often represented by a different colormap.
If you set the midpoint at sea level (0), then you can have two very different scalings based on ocean elevation vs land elevation.
Example code (taken from the example linked above):
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cbook as cbook
from matplotlib import cm
dem = cbook.get_sample_data('topobathy.npz', np_load=True)
topo = dem['topo']
longitude = dem['longitude']
latitude = dem['latitude']
fig, ax = plt.subplots()
# make a colormap that has land and ocean clearly delineated and of the
# same length (256 + 256)
colors_undersea = plt.cm.terrain(np.linspace(0, 0.17, 256))
colors_land = plt.cm.terrain(np.linspace(0.25, 1, 256))
all_colors = np.vstack((colors_undersea, colors_land))
terrain_map = colors.LinearSegmentedColormap.from_list(
'terrain_map', all_colors)
# make the norm: Note the center is offset so that the land has more
# dynamic range:
divnorm = colors.TwoSlopeNorm(vmin=-500., vcenter=0, vmax=4000)
pcm = ax.pcolormesh(longitude, latitude, topo, rasterized=True, norm=divnorm,
cmap=terrain_map, shading='auto')
# Simple geographic plot, set aspect ratio beecause distance between lines of
# longitude depends on latitude.
ax.set_aspect(1 / np.cos(np.deg2rad(49)))
ax.set_title('TwoSlopeNorm(x)')
cb = fig.colorbar(pcm, shrink=0.6)
cb.set_ticks([-500, 0, 1000, 2000, 3000, 4000])
plt.show()
See how it scales numbers with this simple usage (from docs):
>>> import matplotlib. Colors as mcolors
>>> offset = mcolors.TwoSlopeNorm(vmin=-4000., vcenter=0., vmax=10000)
>>> data = [-4000., -2000., 0., 2500., 5000., 7500., 10000.]
>>> offset(data)
array([0., 0.25, 0.5, 0.625, 0.75, 0.875, 1.0])

Waterfall chart python matplotlib

I am having a problem with waterfall. I took this chart from matplotlib site and added my own data frame with 2 simple columns with some integer numbers. My waterfall was produced but without numbers, just empty bars. I am a bit lost and I would appreciate any suggestions.
What I am trying to build is the custom waterfall that takes one dataframe with column names, values, and some values for filters like countries. I haven't found anything like that anywhere so I am trying to build my own.
import numpy as np;
import pandas as pd;
import matplotlib.pyplot as plt;
from matplotlib.ticker import FuncFormatter;
dataset = pd.read_csv('waterfall_test_data.csv')
#Use python 2.7+ syntax to format currency
def money(x, pos):
'The two args are the value and tick position'
return "${:,.0f}".format(x)
formatter = FuncFormatter(money)
#Data to plot. Do not include a total, it will be calculated
index = dataset['columns']
data = dataset['amount']
#Store data and create a blank series to use for the waterfall
trans = pd.DataFrame(data=data,index=index)
blank = trans.amount.cumsum().shift(1).fillna(0)
#Get the net total number for the final element in the waterfall
total = trans.sum().amount
trans.loc["net"]= total
blank.loc["net"] = total
#The steps graphically show the levels as well as used for label placement
step = blank.reset_index(drop=True).repeat(3).shift(-1)
step[1::3] = np.nan
#When plotting the last element, we want to show the full bar,
#Set the blank to 0
blank.loc["net"] = 0
#Plot and label
my_plot = trans.plot(kind='bar', stacked=True, bottom=blank,legend=None, figsize=(15, 5), title="2014 Sales Waterfall")
my_plot.plot(step.index, step.values,'k')
my_plot.set_xlabel("Transaction Types")
#Format the axis for dollars
my_plot.yaxis.set_major_formatter(formatter)
#Get the y-axis position for the labels
y_height = trans.amount.cumsum().shift(1).fillna(0)
#Get an offset so labels don't sit right on top of the bar
max = trans.max()
neg_offset = max / 25
pos_offset = max / 50
plot_offset = int(max / 15)
#Start label loop
loop = 0
for index, row in trans.iterrows():
# For the last item in the list, we don't want to double count
if row['amount'] == total:
y = y_height[loop]
else:
y = y_height[loop] + row['amount']
# Determine if we want a neg or pos offset
if row['amount'] > 0:
y += pos_offset
else:
y -= neg_offset
my_plot.annotate("{:,.0f}".format(row['amount']),(loop,y),ha="center")
loop+=1
#Scale up the y axis so there is room for the labels
my_plot.set_ylim(0,blank.max()+int(plot_offset))
#Rotate the labels
my_plot.set_xticklabels(trans.index,rotation=0)
my_plot.get_figure().savefig("waterfall.png",dpi=200,bbox_inches='tight')

Python Iris Set X Axis limit and ticks

I am creating line graphs with either the year or month along the x axis.
Here is the simplified code for the monthly line graph:
import matplotlib.pyplot as plt
import iris
import iris.coord_categorisation as iriscc
import iris.plot as iplt
import iris.quickplot as qplt
import iris.analysis.cartography
import cf_units
#this file is split into parts as follows:
#PART 1: load and format CORDEX models
#PART 2: load and format observed data
#PART 3: format data
#PART 4: plot data
def main():
#PART 1: CORDEX MODELS
#bring in all the models we need and give them a name
CCCma = '/exports/csce/datastore/geos/users/s0XXXX/Climate_Modelling/AFR_44_tas/ERAINT/1979-2012/tas_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_CCCma-CanRCM4_r2_mon_198901-200912.nc'
#Load exactly one cube from given file
CCCma = iris.load_cube(CCCma)
#remove flat latitude and longitude and only use grid latitude and grid longitude to make consistent with the observed data, also make sure all of the longitudes are monotonic
lats = iris.coords.DimCoord(CCCma.coord('latitude').points[:,0], \
standard_name='latitude', units='degrees')
lons = CCCma.coord('longitude').points[0]
for i in range(len(lons)):
if lons[i]>100.:
lons[i] = lons[i]-360.
lons = iris.coords.DimCoord(lons, \
standard_name='longitude', units='degrees')
CCCma.remove_coord('latitude')
CCCma.remove_coord('longitude')
CCCma.remove_coord('grid_latitude')
CCCma.remove_coord('grid_longitude')
CCCma.add_dim_coord(lats, 1)
CCCma.add_dim_coord(lons, 2)
#we are only interested in the latitude and longitude relevant to Malawi
Malawi = iris.Constraint(longitude=lambda v: 32.5 <= v <= 36., \
latitude=lambda v: -17. <= v <= -9.)
CCCma = CCCma.extract(Malawi)
#time constraignt to make all series the same
iris.FUTURE.cell_datetime_objects = True
t_constraint = iris.Constraint(time=lambda cell: 1989 <= cell.point.year <= 2008)
CCCma = CCCma.extract(t_constraint)
#PART 2: OBSERVED DATA
#bring in all the files we need and give them a name
CRU= '/exports/csce/datastore/geos/users/s0XXXX/Climate_Modelling/Actual_Data/cru_ts4.00.1901.2015.tmp.dat.nc'
#Load exactly one cube from given file
CRU = iris.load_cube(CRU, 'near-surface temperature')
#define the latitude and longitude
lats = iris.coords.DimCoord(CRU.coord('latitude').points, \
standard_name='latitude', units='degrees')
lons = CRU.coord('longitude').points
#we are only interested in the latitude and longitude relevant to Malawi
Malawi = iris.Constraint(longitude=lambda v: 32.5 <= v <= 36., \
latitude=lambda v: -17. <= v <= -9.)
CRU = CRU.extract(Malawi)
#time constraignt to make all series the same
iris.FUTURE.cell_datetime_objects = True
t_constraint = iris.Constraint(time=lambda cell: 1989 <= cell.point.year <= 2008)
CRU = CRU.extract(t_constraint)
#PART 3: FORMAT DATA
#data is in Kelvin, but we would like to show it in Celcius
CCCma.convert_units('Celsius')
#bring time data into allignment
new_unit = cf_units.Unit('days since 1900-01-01', calendar = '365_day')
CCCma.coord('time').convert_units(new_unit)
#add years and months to data
iriscc.add_year(CCCma, 'time')
iriscc.add_year(CRU, 'time')
iriscc.add_month(CCCma, 'time')
iriscc.add_month(CRU, 'time')
#We are interested in plotting the data by month, so we need to take a mean of all the data by month
CCCmaYR = CCCma.aggregated_by('month', iris.analysis.MEAN)
CRUYR = CRU.aggregated_by('month', iris.analysis.MEAN)
#regridding scheme requires spatial areas, therefore the longitude and latitude coordinates must be bounded. If the longitude and latitude bounds are not defined in the cube we can guess the bounds based on the coordinates
CCCmaYR.coord('latitude').guess_bounds()
CCCmaYR.coord('longitude').guess_bounds()
CRUYR.coord('latitude').guess_bounds()
CRUYR.coord('longitude').guess_bounds()
#Returns an array of area weights, with the same dimensions as the cube
CCCmaYR_grid_areas = iris.analysis.cartography.area_weights(CCCmaYR)
CRUYR_grid_areas = iris.analysis.cartography.area_weights(CRUYR)
#We want to plot the mean for the whole region so we need a mean of all the lats and lons
CCCmaYR_mean = CCCmaYR.collapsed(['latitude', 'longitude'], iris.analysis.MEAN, weights=CCCmaYR_grid_areas)
CRUYR_mean = CRUYR.collapsed(['latitude', 'longitude'], iris.analysis.MEAN, weights=CRUYR_grid_areas)
#PART 4: PLOT LINE GRAPH
#assign the line colours and set x axis to months
qplt.plot(CCCmaYR_mean.coord('month'),CCCmaYR_mean, label='CanRCM4_ERAINT', lw=1.5, color='blue')
qplt.plot(CRUYR_mean.coord('month'), CRUYR_mean, label='Observed', lw=2, color='black')
#create a legend and set its location to under the graph
plt.legend(loc="upper center", bbox_to_anchor=(0.5,-0.05), fancybox=True, shadow=True, ncol=2)
#create a title
plt.title('Mean Near Surface Temperature for Malawi by month 1989-2008', fontsize=11)
#add grid lines
plt.grid()
#save the image of the graph and include full legend
#plt.savefig('ERAINT_Temperature_LineGraph_Annual', bbox_inches='tight')
#show the graph in the console
iplt.show()
if __name__ == '__main__':
main()
This produces a plot which looks like this:
How can I change the tick marks to show me all month names? I would also like the graph to finish at December (no white space after).
Similarly, for the yearly line graph, here is the simplified code:
import matplotlib.pyplot as plt
import iris
import iris.coord_categorisation as iriscc
import iris.plot as iplt
import iris.quickplot as qplt
import iris.analysis.cartography
#this file is split into parts as follows:
#PART 1: load and format CORDEX models
#PART 2: load and format observed data
#PART 3: format data
#PART 4: plot data
def main():
#PART 1: CORDEX MODELS
#bring in all the models we need and give them a name
CCCma = '/exports/csce/datastore/geos/users/s0XXXX/Climate_Modelling/AFR_44_tas/ERAINT/1979-2012/tas_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_CCCma-CanRCM4_r2_mon_198901-200912.nc'
#Load exactly one cube from given file
CCCma = iris.load_cube(CCCma)
#remove flat latitude and longitude and only use grid latitude and grid longitude to make consistent with the observed data, also make sure all of the longitudes are monotonic
lats = iris.coords.DimCoord(CCCma.coord('latitude').points[:,0], \
standard_name='latitude', units='degrees')
lons = CCCma.coord('longitude').points[0]
for i in range(len(lons)):
if lons[i]>100.:
lons[i] = lons[i]-360.
lons = iris.coords.DimCoord(lons, \
standard_name='longitude', units='degrees')
CCCma.remove_coord('latitude')
CCCma.remove_coord('longitude')
CCCma.remove_coord('grid_latitude')
CCCma.remove_coord('grid_longitude')
CCCma.add_dim_coord(lats, 1)
CCCma.add_dim_coord(lons, 2)
#we are only interested in the latitude and longitude relevant to Malawi
Malawi = iris.Constraint(longitude=lambda v: 32.5 <= v <= 36., \
latitude=lambda v: -17. <= v <= -9.)
CCCma = CCCma.extract(Malawi)
#time constraignt to make all series the same
iris.FUTURE.cell_datetime_objects = True
t_constraint = iris.Constraint(time=lambda cell: 1989 <= cell.point.year <= 2008)
CCCma = CCCma.extract(t_constraint)
#PART 2: OBSERVED DATA
#bring in all the files we need and give them a name
CRU= '/exports/csce/datastore/geos/users/s0XXXX/Climate_Modelling/Actual_Data/cru_ts4.00.1901.2015.tmp.dat.nc'
#Load exactly one cube from given file
CRU = iris.load_cube(CRU, 'near-surface temperature')
#define the latitude and longitude
lats = iris.coords.DimCoord(CRU.coord('latitude').points, \
standard_name='latitude', units='degrees')
lons = CRU.coord('longitude').points
#we are only interested in the latitude and longitude relevant to Malawi
Malawi = iris.Constraint(longitude=lambda v: 32.5 <= v <= 36., \
latitude=lambda v: -17. <= v <= -9.)
CRU = CRU.extract(Malawi)
#time constraignt to make all series the same
iris.FUTURE.cell_datetime_objects = True
t_constraint = iris.Constraint(time=lambda cell: 1989 <= cell.point.year <= 2008)
CRU = CRU.extract(t_constraint)
#PART 3: FORMAT DATA
#data is in Kelvin, but we would like to show it in Celcius
CCCma.convert_units('Celsius')
#add years to data
iriscc.add_year(CCCma, 'time')
iriscc.add_year(CRU, 'time')
#We are interested in plotting the data by month, so we need to take a mean of all the data by month
CCCma = CCCma.aggregated_by('year', iris.analysis.MEAN)
CRU = CRU.aggregated_by('year', iris.analysis.MEAN)
#regridding scheme requires spatial areas, therefore the longitude and latitude coordinates must be bounded. If the longitude and latitude bounds are not defined in the cube we can guess the bounds based on the coordinates
CCCma.coord('latitude').guess_bounds()
CCCma.coord('longitude').guess_bounds()
CRU.coord('latitude').guess_bounds()
CRU.coord('longitude').guess_bounds()
#Returns an array of area weights, with the same dimensions as the cube
CCCma_grid_areas = iris.analysis.cartography.area_weights(CCCma)
CRU_grid_areas = iris.analysis.cartography.area_weights(CRU)
#We want to plot the mean for the whole region so we need a mean of all the lats and lons
CCCma_mean = CCCma.collapsed(['latitude', 'longitude'], iris.analysis.MEAN, weights=CCCma_grid_areas)
CRU_mean = CRU.collapsed(['latitude', 'longitude'], iris.analysis.MEAN, weights=CRU_grid_areas)
#PART 4: PLOT LINE GRAPH
#assign the line colours
qplt.plot(CCCma_mean.coord('year'), CCCma_mean, label='CanRCM4_ERAINT', lw=1.5, color='blue')
qplt.plot(CRU_mean.coord('year'), CRU_mean, label='Observed', lw=2, color='black')
#create a legend and set its location to under the graph
plt.legend(loc="upper center", bbox_to_anchor=(0.5,-0.05), fancybox=True, shadow=True, ncol=2)
#create a title
plt.title('Mean Near Surface Temperature for Malawi 1989-2008', fontsize=11)
#add grid lines
plt.grid()
#save the image of the graph and include full legend
#plt.savefig('ERAINT_Temperature_LineGraph_Annual', bbox_inches='tight')
#show the graph in the console
iplt.show()
if __name__ == '__main__':
main()
and this produces this graph:
As you can see I have limited my data from 1989 to 2008, but the axis goes from 1985 to 2010, how can I make this tighter?
Thank you!
For your monthly graph you may be able to change it by setting the xticks - this has to be numeric but you can also set labels to use instead of the numbers. Something like
plt.xticks(range(12), calendar.month_abbr[1:13])
may work (depends on the format of your data, you may need to plot month number rather than month name). You will need to import calendar to get the above working.
For your yearly graph you should just be able to set the x-axis limits using
plt.xlim((xmin, xmax))
where xmin is probably 1989 and xmax is 2008.

Get density of messages on distinct ID's

Imagine that there is 10 houses, where there can be one to an infinite number of persons. Each of those persons sends a number of messages, containing their userid and the house number. This can be from 1 to infinite number of messages. I want to know the average number of messages that is sent by each person, for each house, to later plot which house got the largest number of average messages.
Now, that I've explained conceptually, the houses aren't houses, but latitudes, from f.ex -90 to -89 etc. And that a person can send messages from different houses.
So I've got a database with latitude and senderID. I want to plot the density of latitudes pr unique senderID:
Number of rows/Number of unique userids at each latitude over an interval
This is an sample input:
lat = [-83.76, -44.88, -38.36, -35.50, -33.99, -31.91, -27.56, -22.95,
40.72, 47.59, 54.42, 63.84, 76.77, 77.43, 78.54]
userid= [5, 7, 6, 6, 6, 6, 5, 2,
2, 2, 1, 5, 10, 9 ,8]
Here are the corresponding densities:
-80 to -90: 1
-40 to -50: 1
-30 to -40: 4
-20 to -30: 1
40 to 50: 2
50 to 60: 1
60 to 70: 1
70 to 80: 1
An other input:
lat = [70,70,70,70,70,80,80,80]
userid = [1,2,3,4,5,1,1,2]
The density for latitude 70 is 1, while the density for latitude 80 is 1.5.
If I would do this through a database query/pseudocode I would do something like:
SELECT count(latitude) FROM messages WHERE latitude < 79 AND latitude > 69
SELECT count(distinct userid) FROM messages WHERE latitude < 79 AND latitude > 69
The density would then be count(latitude)/count(distinct userid) - also to be interpreted as totalmessagesFromCertainLatitude/distinctUserIds. This would be repeated for intervals from -90 to 90, i.e -90<latitude<-89 up to 89<latitude<90
To get any help with this is probably a far stretch, but I just cant organize my thoughts to do this while I'm confident there are no errors. I would be happy for anything. I'm sorry if I was unclear.
Because this packs so neatly into pandas' built-ins, it's probably fast in pandas for big datasets.
lat = [-83.76, -44.88, -38.36, -35.50, -33.99, -31.91, -27.56, -22.95,
40.72, 47.59, 54.42, 63.84, 76.77, 77.43, 78.54]
userid= [5, 7, 6, 6, 6, 6, 5, 2,
2, 2, 1, 5, 10, 9 ,8]
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib.collections import PatchCollection
from math import floor
df = pd.DataFrame(zip(userid,lat), columns = ['userid','lat']
)
df['zone'] = map(lambda x: floor(x) * 10,df.lat/10) # for ten-degree zones
zonewidth=10
#df['zone'] = map(floor, df.lat) # for one-degree zones
#zonewidth=1 # ditto
dfz = df.groupby('zone') #returns a dict of dataframes
#for k, v in dfz: # useful for exploring the GroupBy object
# print(k, v.userid.values, float(len(v.userid.values))/len(set(v.userid.values)))
p = [(k, float(len(v.userid.values))/len(set(v.userid.values))) for k, v in dfz]
# plotting could be tightened up -- PatchCollection?
R = [Rectangle((x, 0), zonewidth, y, facecolor='red', edgecolor='black',fill=True) for x, y in p]
fig, ax = plt.subplots()
for r in R:
ax.add_patch(r)
plt.xlim((-90, 90))
tall = max([r.get_height() for r in R])
plt.ylim((0, tall + 0.5))
plt.show()
For the first set of test data:
I'm not 100% sure I've understood the output you want, but this will produce a stepped, cumulative histogram-like plot with the x-axis being latitudes (binned) and the y axis being the density you define above.
From your sample code, you already have numpy installed and are happy to use it. The approach I would take is to get two data sets rather like what would be returned by your SQL sample and then use them to get the densities and then plot. Using your existing latitude / userid data format - it might look something like this
EDIT: Removed first version of code from here and some comments which were redundant following clarification and question edits from the OP
Following comments and OP clarification - I think this is what is desired:
import numpy as np
import matplotlib.pyplot as plt
from itertools import groupby
import numpy as np
import matplotlib.pyplot as plt
from itertools import groupby
def draw_hist(latitudes,userids):
min_lat = -90
max_lat = 90
binwidth = 1
bin_range = np.arange(min_lat,max_lat,binwidth)
all_rows = zip(latitudes,userids)
binned_latitudes = np.digitize(latitudes,bin_range)
all_in_bins = zip(binned_latitudes,userids)
unique_in_bins = list(set(all_in_bins))
all_in_bins.sort()
unique_in_bins.sort()
bin_count_all = []
for bin, group in groupby(all_in_bins, lambda x: x[0]):
bin_count_all += [(bin, len([k for k in group]))]
bin_count_unique = []
for bin, group in groupby(unique_in_bins, lambda x: x[0]):
bin_count_unique += [(bin, len([ k for k in group]))]
# bin_count_all and bin_count_unique now contain the data
# corresponding to the SQL / pseudocode in your question
# for each latitude bin
bin_density = [(bin_range[b-1],a*1.0/u) for ((b,a),(_,u)) in zip(bin_count_all, bin_count_unique)]
bin_density = np.array(bin_density).transpose()
# plot as standard bar - note you can put uneven widths in
# as an array-like here if necessary
# the * simply unpacks the x and y values from the density
plt.bar(*bin_density, width=binwidth)
plt.show()
# can save away plot here if desired
latitudes = [-70.5, 5.3, 70.32, 70.43, 5, 32, 80, 80, 87.3]
userids = [1,1,2,2,4,5,1,1,2]
draw_hist(latitudes,userids)
Sample output with different bin widths on OP dataset
I think this solves the case, allthough it isn't efficient at all:
con = lite.connect(databasepath)
binwidth = 1
latitudes = []
userids = []
info = []
densities = []
with con:
cur = con.cursor()
cur.execute('SELECT latitude, userid FROM dynamicMessage')
con.commit()
print "executed"
while True:
tmp = cur.fetchone()
if tmp != None:
info.append([float(tmp[0]),float(tmp[1])])
else:
break
info = sorted(info, key=itemgetter(0))
for x in info:
latitudes.append(x[0])
userids.append(x[1])
x = 0
latitudecount = 0
for b in range(int(min(latitudes)),int(max(latitudes))+1):
numlatitudes = sum(i<b for i in latitudes)
if numlatitudes > 1:
tempdensities = latitudes[0:numlatitudes]
latitudes = latitudes[numlatitudes:]
tempuserids = userids[0:numlatitudes]
userids = userids[numlatitudes:]
density = numlatitudes/len(list(set(tempuserids)))
if density>1:
tempdensities = [b]*int(density)
densities.extend(tempdensities)
plt.hist(densities, bins=len(list(set(densities))))
plt.savefig('latlongstats'+'t'+str(time.strftime("%H:%M:%S")), format='png')
What follows is not a complete solution in terms of plotting the required histogram, but I think it's nevertheless worthy of being reported
The bulk of the solution, we scan the array of tuples to select the ones in the required range and we count
the number of selected tuples
the unique ids, using a trick consisting in creating a set (this discards automatically the duplicates) and computing its numerosity
eventually we return the required ratio or zero if the count of distinct ids is zero
def ratio(d, mn, mx):
tmp = [(lat, uid) for lat, uid in d if mn <= lat < mx]
nlats, nduids = len(tmp), len({t[1] for t in tmp})
return 1.0*nlats/nduids if nduids>0 else 0
The data is input and assigned, via zip, to a list of tuples
lat = [-83.76, -44.88, -38.36, -35.50, -33.99, -31.91, -27.56, -22.95,
-19.00, -12.32, -6.14, -1.11, 4.40, 10.23, 19.40, 31.18,
40.72, 47.59, 54.42, 63.84, 76.77]
userid= [52500.0, 70100.0, 35310.0, 47776.0, 70100.0, 30991.0, 37328.0, 25575.0,
37232.0, 6360.0, 52908.0, 52908.0, 52908.0, 77500.0, 345.0, 6360.0,
3670.0, 36690.0, 3720.0, 2510.0, 2730.0]
data = zip(lat,userid)
preparation of the bins
extremes = range(-90,91,10)
intervals = zip(extremes[:-1],extremes[1:])
actual computation, the result is a list of floats that can be passed to the relevant pyplot functions
ratios = [ratio(data,*i) for i in intervals]
print ratios
# [1.0, 0, 0, 0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0, 1.0, 1.0, 1.0, 1.0, 1.0, 0]

Covariance/heat flux in Python

I'm looking to compute poleward heat fluxes at a level in the atmosphere, i.e the mean of (u't') . I'm aware of the covariance function in NumPy, but cannot seem to implement it. Here is my code below.
from netCDF4 import Dataset
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
myfile = '/home/ubuntu/Fluxes_Test/out.nc'
Import = Dataset(myfile, mode='r')
lon = Import.variables['lon'][:] # Longitude
lat = Import.variables['lat'][:] # Latitude
time = Import.variables['time'][:] # Time
lev = Import.variables['lev'][:] # Level
wind = Import.variables['ua'][:]
temp = Import.variables['ta'][:]
lon = lon-180 # to shift co-ordinates to -180 to 180.
variable1 = np.squeeze(wind,temp, axis=0)
variable2 = np.cov(variable1)
m = Basemap(resolution='l')
lons, lats = np.meshgrid(lon,lat)
X, Y = m(lons, lats)
cs = m.pcolor(X,Y, variable2)
plt.show()
The shape of the variables wind and temp which I am trying to compute the flux of (the covariance) are both (3960,64,128), so 3960 pieces of data on a 64x128 grid (with co-ordinates).
I tried squeezing both variables to produce a array of (3960, 3960, 64,128) so cov could work on these first two series of data (the two 3960's) of wind and temp, but this didn't work.

Categories

Resources