Plotting a large database in Basemap: Memory error - python

Hi I extract the data from an interpolation (The data are in the Basemap Grid) with the command and save them as CSV:
def inter_todf(interpolation, grid):
grid['x'], grid['y'] = basemap(grid['x'],grid['y'],inverse=True) # Wandelt Grid in Long und Lat wieder um
dfl = pd.DataFrame({
'Latitude': grid['y'].reshape(-1),
'Longitude': grid['x'].reshape(-1),
'Value': interpolation.reshape(-1)
});
return(dfl)
dfl= inter_todf(interpolation, grid)
dfl.to_csv(plot_folder+'Dezember/'+file.replace(".csv", "grid.csv"))
Afterwards I want to plot them again in another file in Basemap (so it is still planned that I modify the data but that is something else). I do the plotting with the code:
from traceback import print_tb
import numpy as np
from pykrige.ok import OrdinaryKriging
from pykrige.kriging_tools import write_asc_grid
import pykrige.kriging_tools as kt
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
from matplotlib.patches import Path, PathPatch
import pandas as pd
def load_data():
df = pd.read_csv(r"GridFile.csv")
return(df)
def get_data(df):
return {
"lons": df['Longitude'].values.reshape(52920,),
"lats": df['Latitude'].values.reshape(52920,),
"values": df['Value'].values.reshape(52920,)
}
def extend_data(data):
return {
"lons": np.concatenate([np.array([lon-360 for lon in data["lons"]]), data["lons"], np.array([lon+360 for lon in data["lons"]])]),
"lats": np.concatenate([data["lats"], data["lats"], data["lats"]]),
"values": np.concatenate([data["values"], data["values"], data["values"]]),
}
def generate_grid(data, basemap, delta=1):
grid = {
'lon': np.arange(-180, 180, delta),
'lat': np.arange(-89.9, 89.9, delta)
}
grid["x"], grid["y"] = np.meshgrid(grid["lon"], grid["lat"])
grid["x"], grid["y"] = basemap(grid["x"], grid["y"])
return grid
def interpolate(data, grid):
OK =OrdinaryKriging(
data["lons"],
data["lats"],
data["values"],
variogram_model='exponential',
)
return OK.execute("grid", grid["lon"], grid["lat"])
def prepare_map_plot():
figure, axes = plt.subplots(figsize=(10,10))
basemap = Basemap(projection='robin', lon_0=0, lat_0=0, resolution='l',area_thresh=1000000,ax=axes)
return figure, axes, basemap
def plot_mesh_data(interpolation, grid, basemap):
colormesh = basemap.contourf(grid["x"], grid["y"], interpolation,32, cmap='RdBu_r', ) #plot the data on the map. plt.cm.RdYlBu_r
color_bar = basemap.colorbar(colormesh,location='bottom',pad="10%")
df = load_data()
base_data = get_data(df)
# print(df['Latitude'].shape)
figure, axes, basemap = prepare_map_plot()
grid = generate_grid(base_data, basemap, 40)
extended_data = extend_data(base_data)
interpolation, interpolation_error = interpolate(extended_data, grid)
plot_mesh_data(interpolation, grid,basemap)
plt.show()
Unfortunately I get an error message:
numpy.core._exceptions.MemoryError: Unable to allocate 93.9 GiB for an array with shape (12602289420,) and data type float64
How do I have to change my data so that it no longer uses so much memory?

Related

i used chloropeth library but the output is not colored

The code here is in python language for drawing map of haryana Districs for which I used geojsons file of Haryana.
import pandas as pd
import json
import plotly.express as px
import numpy as np
import webbrowser
import plotly.io as pio
pio.renderers.default = 'browser'
haryana_districts = json.load(open("haryanas.geojsons",'r'))
haryana_districts['features'][2]
district_id_map = {}
for feature in haryana_districts['features']:
feature['id'] = feature['properties']['ID_2']
district_id_map[feature['properties']['NAME_2']] = feature['id']
df['populationScale'] = np.log10(df['population'])
df.head()
haryana_districts['features'][1]['properties']
fig = px.choropleth_mapbox(df,
locations='id',
geojson=haryana_districts,
color='populationScale',
hover_name='District',
hover_data=['population'],
mapbox_style="carto-positron",
color_continuous_scale=px.colors.diverging.BrBG,
color_continuous_midpoint=0)
fig.update_geos(fitbounds='locations', visible=False)
fig.show()
the output :
Output Fig. is jut this blank screen
clearly I don't have access to your dataframe of population data, so have sourced from https://www.indiacensus.net/states/haryana
additionally I have found geojson of districts in Harayana
solution
there is no need to loop through and amend geojson use locations and featureidkey https://plotly.com/python/mapbox-county-choropleth/#indexing-by-geojson-properties
import pandas as pd
import plotly.express as px
import requests, io
# get population data
df = pd.read_html(
io.StringIO(requests.get("https://www.indiacensus.net/states/haryana").text)
)[4].rename(
columns={
"S.No.": "id",
"Estimated Population in 2022": "population",
"District Name": "District",
}
)
df["id"] = df["id"].astype(int)
df["populationScale"] = df["population"]
# pio.renderers.default = 'browser'
haryana_districts = requests.get(
"https://raw.githubusercontent.com/shuklaneerajdev/IndiaStateTopojsonFiles/master/Haryana.geojson"
).json()
fig = px.choropleth_mapbox(
df,
locations="District",
featureidkey="properties.Dist_Name",
geojson=haryana_districts,
color="populationScale",
hover_name="District",
hover_data=["population"],
mapbox_style="carto-positron",
color_continuous_scale=px.colors.diverging.BrBG,
# color_continuous_midpoint=0,
).update_layout(
mapbox={"center": {"lon": 76.93018408903318, "lat": 30.3276671165594}, "zoom": 5}
)
# fig.update_geos(fitbounds='locations', visible=False)
fig.show()

How can i return interactive map?

I have this code for a map using a netcdf file of European Centre for Medium-Range Weather Forecasts.
def interactive_map():
import netCDF4 as nc
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
#original name -> _grib2netcdf-webmars-public-svc-green-007-6fe5cac1a363ec1525f54343b6cc9fd8-b5oXS9.nc
fn = '7.nc'
ds = nc.Dataset(fn)
#variables
#longitude, latitude, time, pm1
lons = ds.variables['longitude'][:]
lats = ds.variables['latitude'][:]
time = ds.variables['time'][:]
pm = ds.variables['pm1'][:]
mp = Basemap(projection = 'merc',
llcrnrlon = 97.085310,
llcrnrlat = 2.044212,
urcrnrlon = 106.896219,
urcrnrlat = 7.403567,
resolution = 'i')
lon, lat = np.meshgrid(lons, lats)
x, y = mp(lon, lat)
cscheme = mp.pcolor(x, y, np.squeeze(pm[0,:,:]), cmap = 'turbo')
mp.drawcoastlines()
mp.drawstates()
mp.drawcountries()
cbar = mp.colorbar(cscheme, location = 'right')
#plt.show()
plt.savefig('map.png')
https://i.imgur.com/4lndRuY.png
What i want is add the map to a view of web2py, i know it saves an image and i can render the image, but it's possible to show in an interactive map?
This is what i have to show the map in a view:
<!--index.html-->
{{extend 'layout.html'}}
<img src="{{=URL('static','images/map.png')}}" width="500" height="600">
My package ncplot (https://ncplot.readthedocs.io/en/latest/) will automatically create interactive plots for NetCDF files. You just need to do the following:
from ncplot import ncplot
ncplot(fn)

Getting "keyerror: 0" when running my code

I am getting the below error when I try to plot a short time energy function, please I need your help solve this problem.
Code:
import os
from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
from python_speech_features import mfcc
from python_speech_features import logfbank
import librosa
def plot_ste(ste):
fig, axes = plt.subplots(nrows=1, ncols=1, sharex=False,
sharey=False, figsize=(400, 50))
fig.suptitle('Short Time Energy', size=100, y=1.02)
i = 0
for x in range(1):
for y in range(1):
data = list(ste.values())[i]
x, win = data[0], data[1]
axes[x,y].set_title(list(ste.keys())[i])
axes[x,y].plot(win, x)
axes[x,y].get_xaxis().set_visible(False)
axes[x,y].get_yaxis().set_visible(False)
i+=1
def ste(x, win):
"""Compute short-time energy."""
if isinstance(win, str):
win = scipy.signal.get_window(win, max(1, len(x) // 8))
win = win / len(win)
return scipy.signal.convolve(x**2, win**2, mode="same")
df = pd.read_csv('/dir/to/a.csv')
df.set_index('fname', inplace=True)
classes = list(np.unique(df.ID))
df.reset_index(inplace=True)
ste = {}
for c in classes:
wav_file = df[df.ID==c].iloc[0, 0]
signal, rate = librosa.load('/dir/to/wav_file')
ste[c] = ste
plot_ste(ste)
plt.show()
Error:
File "/home/Desktop/Program/stft_plot_full_Dir.py", line 35, in plot_ste
x, win = data[0], data[1]
KeyError: 0

why I couldn't makes points shown on bokeh map in python?

I'm trying to plot some datapoint on a map in Bokeh but somehow nothing shows up, only the map background.
import pandas as pd
from IPython.core.display import HTML, display
%matplotlib inline
sample = pd.DataFrame({'Lat': [40.7260,40.7209], 'Lon': [-73.991,-74.0507], 'Count': 1})
from bokeh.plotting import figure, output_notebook, show
output_notebook()
from bokeh.tile_providers import STAMEN_TERRAIN
x_range, y_range = ((-8242000,-8210000), (4965000,4990000))
plot_width = int(750)
plot_height = int(plot_width//1.2)
def base_plot(tools='pan,wheel_zoom,reset',plot_width=plot_width, plot_height=plot_height, **plot_args):
p = figure(tools=tools, plot_width=plot_width, plot_height=plot_height,
x_range=x_range, y_range=y_range, outline_line_color=None,
min_border=0, min_border_left=0, min_border_right=0,
min_border_top=0, min_border_bottom=0, **plot_args)
p.axis.visible = False
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
return p
p = base_plot()
p.add_tile(STAMEN_TERRAIN)
p.circle(x=samples['Lat'], y=samples['Lon'], **options)
show(p)
Thanks for advice.
The plot ranges are in Web Mercator units:
((-8242000,-8210000), (4965000,4990000))
But the data points in your sample DataFrame are in lat/lon units. You can either:
add an "extra range" in lat/lon units (that match up!) and have p.circle reference the extra range instead of the default range.
Convert your circle coordinates to Web Mercator
The latter is probably easier. This page has a function that can do the conversion. Using it, you'd get
sample = pd.DataFrame({
'easting': [-8236640.443285105, -8243286.216885463],
'northing': [4972010.345629457, 4971261.231184175]
})
Updating your code to use this:
import pandas as pd
from bokeh.io import output_file, show
from bokeh.plotting import figure
from bokeh.tile_providers import STAMEN_TERRAIN
samples = pd.DataFrame({
'easting': [-8236640.443285105, -8243286.216885463],
'northing': [4972010.345629457, 4971261.231184175]
})
x_range, y_range = ((-8242000,-8210000), (4965000,4990000))
plot_width = int(750)
plot_height = int(plot_width//1.2)
def base_plot(tools='pan,wheel_zoom,reset',plot_width=plot_width, plot_height=plot_height, **plot_args):
p = figure(tools=tools, plot_width=plot_width, plot_height=plot_height,
x_range=x_range, y_range=y_range, outline_line_color=None,
min_border=0, min_border_left=0, min_border_right=0,
min_border_top=0, min_border_bottom=0, **plot_args)
p.axis.visible = False
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
return p
p = base_plot()
p.add_tile(STAMEN_TERRAIN)
p.circle(x=samples['easting'], y=samples['northing'], size=20, color="red")
output_file("map.html")
show(p)
yields this plot:

How do I animate a scatterplot over a basemap in matplotlib?

The code below generates a animated basemap, but not exactly the one I want: I want the scatterplot from the previous frame to disappear, but it persists through the remainder of the animation.
I suspect it has something to do with my not understanding what the basemap really is. I understand calling it on lat/lons to project them to x/y, but I don't entirely get what's going on when I call event_map.scatter().
import random
import os
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
from matplotlib import animation
import pandas as pd
from IPython.display import HTML
# Enables animation display directly in IPython
#(http://jakevdp.github.io/blog/2013/05/12/embedding-matplotlib-animations/)
from tempfile import NamedTemporaryFile
VIDEO_TAG = """<video controls>
<source src="data:video/x-m4v;base64,{0}" type="video/mp4">
Your browser does not support the video tag.
</video>"""
def anim_to_html(anim):
if not hasattr(anim, '_encoded_video'):
with NamedTemporaryFile(suffix='.mp4') as f:
anim.save(f.name, fps=20, extra_args=['-vcodec', 'libx264'])
video = open(f.name, "rb").read()
anim._encoded_video = video.encode("base64")
return VIDEO_TAG.format(anim._encoded_video)
def display_animation(anim):
plt.close(anim._fig)
return HTML(anim_to_html(anim))
animation.Animation._repr_html_ = anim_to_html
FRAMES = 20
POINTS_PER_FRAME = 30
LAT_MIN = 40.5
LAT_MAX = 40.95
LON_MIN = -74.15
LON_MAX = -73.85
FIGSIZE = (10,10)
MAP_BACKGROUND = '.95'
MARKERSIZE = 20
#Make Sample Data
data_frames = {}
for i in range(FRAMES):
lats = [random.uniform(LAT_MIN, LAT_MAX) for x in range(POINTS_PER_FRAME)]
lons = [random.uniform(LON_MIN, LON_MAX) for x in range(POINTS_PER_FRAME)]
data_frames[i] = pd.DataFrame({'lat':lats, 'lon':lons})
class AnimatedMap(object):
""" An animated scatter plot over a basemap"""
def __init__(self, data_frames):
self.dfs = data_frames
self.fig = plt.figure(figsize=FIGSIZE)
self.event_map = Basemap(projection='merc',
resolution='i', area_thresh=1.0, # Medium resolution
lat_0 = (LAT_MIN + LAT_MAX)/2, lon_0=(LON_MIN + LON_MAX)/2, # Map center
llcrnrlon=LON_MIN, llcrnrlat=LAT_MIN, # Lower left corner
urcrnrlon=LON_MAX, urcrnrlat=LAT_MAX) # Upper right corner
self.ani = animation.FuncAnimation(self.fig, self.update, frames=FRAMES, interval=1000,
init_func=self.setup_plot, blit=True,
repeat=False)
def setup_plot(self):
self.event_map.drawcoastlines()
self.event_map.drawcounties()
self.event_map.fillcontinents(color=MAP_BACKGROUND) # Light gray
self.event_map.drawmapboundary()
self.scat = self.event_map.scatter(x = [], y=[], s=MARKERSIZE,marker='o', zorder=10)
return self.scat
def project_lat_lons(self, i):
df = data_frames[i]
x, y = self.event_map(df.lon.values, df.lat.values)
x_y = pd.DataFrame({'x': x, 'y': y}, index=df.index)
df = df.join(x_y)
return df
def update(self, i):
"""Update the scatter plot."""
df = self.project_lat_lons(i)
self.scat = self.event_map.scatter(x = df.x.values, y=df.y.values, marker='o', zorder=10)
return self.scat,
s = AnimatedMap(data_frames)
s.ani
It looks like you're simply adding a new scatter plot at each update. What you should do instead is change the data in the existing path collection at each update. Try something along the lines of
def update(self, i):
"""Update the scatter plot."""
df = self.project_lat_lons(i)
new_offsets = np.vstack([df.x.values, df.y.values]).T
self.scat.set_offsets(new_offsets)
return self.scat,
Note that I haven't tested this.

Categories

Resources