Plotly scatter large volume geographic data - python

I tried to write a code that creates a visualization of all forest fires that happened during the year 2021. The CSV file containing the data is around 1.5Gb, the program looks correct for me, but when I try to run it, it gets stuck without displaying any visualization or error message. The last time I tried, it run for almost half a day until python crashed.
I don't know if I am having an infinite loop, if that's because the file is too big or if there is something else I am missing.
Can anyone provide feedback, please?
Here is my code:
import csv
from datetime import datetime
from plotly.graph_objs import Scattergeo , Layout
from plotly import offline
filename='fire_nrt_J1V-C2_252284.csv'
with open(filename) as f:
reader=csv.reader(f)
header_row=next(reader)
lats, lons, brights, dates=[],[],[],[]
for row in reader:
date=datetime.strptime(row[5], '%Y-%m-%d')
lat=row[0]
lon=row[1]
bright=row[2]
lats.append(lat)
lons.append(lon)
brights.append(bright)
dates.append(date)
data=[{
'type':'scattergeo',
'lon':lons,
'lat':lats,
'text':dates,
'marker':{
'size':[5*bright for bright in brights],
'color': brights,
'colorscale':'Reds',
'colorbar': {'title':'Fire brightness'},
}
}]
my_layout=Layout(title="Forestfires during the year 2021")
fig={'data':data,'layout':my_layout}
offline.plot(fig, filename='global_fires_2021.html')

have found data you describe here https://wifire-data.sdsc.edu/dataset/viirs-i-band-375-m-active-fire-data/resource/3ce73b20-f584-44f7-996b-2f319c480294
plotly uses resources for every point plotted on a scatter. So there is a limit before you run out of resources
there are other approaches to plotting larger number of points
https://plotly.com/python/mapbox-density-heatmaps/ fewer limits, but still limited on very large data sets
https://plotly.com/python/datashader/ can work with very large data sets as it generates an image. It is more challenging to work with (install and navigate API)
data sourcing
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
df = pd.read_csv("https://firms.modaps.eosdis.nasa.gov/data/active_fire/noaa-20-viirs-c2/csv/J1_VIIRS_C2_Global_7d.csv")
df
scatter_geo
limited to random sample of 1000 rows
px.scatter_geo(
df.sample(1000),
lat="latitude",
lon="longitude",
color="bright_ti4",
# size="size",
hover_data=["acq_date"],
color_continuous_scale="reds",
)
density mapbox
px.density_mapbox(
df.sample(5000),
lat="latitude",
lon="longitude",
z="bright_ti4",
radius=3,
color_continuous_scale="reds",
zoom=1,
mapbox_style="carto-positron",
)
datashader Mapbox
all data
some libraries are more difficult to install and use
need to deal with this issue https://community.plotly.com/t/datashader-image-distorted-when-passed-to-mapbox/39375/2
import datashader as ds, colorcet
from pyproj import Transformer
t3857_to_4326 = Transformer.from_crs(3857, 4326, always_xy=True)
# project CRS to ensure image overlays appropriately back over mapbox
# https://community.plotly.com/t/datashader-image-distorted-when-passed-to-mapbox/39375/2
df.loc[:, "longitude_3857"], df.loc[:, "latitude_3857"] = ds.utils.lnglat_to_meters(
df.longitude, df.latitude
)
RESOLUTION=1000
cvs = ds.Canvas(plot_width=RESOLUTION, plot_height=RESOLUTION)
agg = cvs.points(df, x="longitude_3857", y="latitude_3857")
img = ds.tf.shade(agg, cmap=colorcet.fire).to_pil()
fig = go.Figure(go.Scattermapbox())
fig.update_layout(
mapbox={
"style": "carto-positron",
"layers": [
{
"sourcetype": "image",
"source": img,
# Sets the coordinates array contains [longitude, latitude] pairs for the image corners listed in
# clockwise order: top left, top right, bottom right, bottom left.
"coordinates": [
t3857_to_4326.transform(
agg.coords["longitude_3857"].values[a],
agg.coords["latitude_3857"].values[b],
)
for a, b in [(0, -1), (-1, -1), (-1, 0), (0, 0)]
],
}
],
},
margin={"l": 0, "r": 0, "t": 0, "r": 0},
)

Related

Loop over geoJson features and add plotly graph in popup html string to each

I've got a geoJson file with a bunch of features which I'm trying to display on an interactive folium map and I'm trying to add a plotly graph that pops up when you click on a polygon. At this moment I already have a folder with all plotly graphs for each city, 'currentWorkingDirectory/graphs/CityName.HTML'. I also have the interactive folium map with the different polygons which I can hover over or click for a popup.
Now I'm having trouble with adding the plotly graphs as a html string to the geojson popups. Could someone help me with this? I'll add a code snippet of the folium map and what I've tried:
import folium
import geopandas as gpd
import codecs
map = folium.Map(location=['51.096246199999996', '4.178629103169916'], tiles="cartodbpositron", zoom_start=9)
geojson_file_df = gpd.read_file('Refgem_geojson.json')
loc = 'Project GEO ICT'
title_html = '''
<h3 align="center" style="font-size:20px"><b>{}</b></h3>
'''.format(loc)
map.get_root().html.add_child(folium.Element(title_html))
g_map = folium.GeoJson(
geojson_file,
name="GeoJson",
style_function=lambda x: {'fillColor': 'orange'}
).add_to(map)
folium.GeoJsonTooltip(
fields=['NISCODE','NAAM', 'OPPERVL'],
aliases=['NISCODE', 'Naam', 'Oppervlakte'],
sticky=False
).add_to(g_map)
folium.GeoJsonPopup(
fields=["NAAM", "Average Prices: " ,"Woonhuis", "Villa", "Studio"],
aliases=["Naam", "Average Prices: ","Woonhuis", "Villa", "Studio"]
).add_to(g_map)
html="""
<iframe src=\"""" + codecs.open("graphs/AARTSELAAR.html", 'r').read() + """\" width="850" height="400" frameborder="0">
"""
popup1 = folium.Popup(folium.Html(html, script=True))
folium.Marker(['51.096246199999996','4.178629103169916'],popup=popup1,icon=folium.Icon( icon='home', prefix='fa')).add_to(map)
map
Here ^ I tried to add the popup to a marker, but that didn't work for me (it's also not really what I want, I want to add the popup to a polygon).
I believe I should make some sort of loop that iterates over all features in the geoJson and adds a popup for every iteration.
You have not provided sample data / geometry so used standard geopandas sample data
this will create popups / tooltips for each geometry. The popup is a plotly figure convented to an embedded URI encoded image. A pie chart of population of country as %age of population of all geometries.
investigated customising GeoJsonPopup() but found no solution
hence create a layer for each feature with it's own popup
import geopandas as gpd
import folium
from statistics import mean
import plotly.express as px
import base64, io
# some geometry
gdf = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres")).loc[
lambda d: d["continent"].eq("Europe") & ~d.bounds.lt(-30).any(axis=1)
]
# create the map, nicely centered and zoomed
bounds = gdf.total_bounds
x = mean([bounds[0], bounds[2]])
y = mean([bounds[1], bounds[3]])
location = (y, x)
m = folium.Map(location=location)
m.fit_bounds([[bounds[1], bounds[0]], [bounds[3], bounds[2]]])
# given need to create a geojson layer for each figure, create
# feature group to contain them
fg = folium.FeatureGroup(name="Europe", show=False)
# create an encocded image of graph...
# change to generate graph you want
def b64image(vals=[1, 2]):
fig = (
px.pie(values=vals)
.update_layout(margin={"l": 0, "r": 0, "t": 0, "b": 0})
.update_traces(texttemplate="%{percent:.0%}")
)
b = io.BytesIO(fig.to_image(format="png", width=80, height=80))
b64 = base64.b64encode(b.getvalue())
return "data:image/png;base64," + b64.decode("utf-8")
tot_pop = gdf["pop_est"].sum()
# create a geojson layer for each feature
for i, r in gdf.iterrows():
# geodataframe of row
gdf_ = gpd.GeoDataFrame(r.to_frame().T, crs=gdf.crs)
# URI encoded image of plotly figure
img_ = f'<img src="{b64image([r["pop_est"], tot_pop-r["pop_est"]])}"/>'
choro_ = folium.GeoJson(
gdf_.__geo_interface__,
name=r["name"],
style_function=lambda x: {"fillColor": "orange"},
tooltip=folium.GeoJsonTooltip(gdf_.drop(columns="geometry").columns.tolist()),
)
# this is the real work around, add to layer which is a choro
folium.Popup(img_).add_to(choro_)
choro_.add_to(fg)
fg.add_to(m)
m

Calculating spatial averages for each country after spatial join

Hello I am using the following code at the bottom to extract countries from coordinates. Please see the following url which provides a more detailed explanation of the code: Extracting countries from NetCDF data using geopandas.
My main variable/value is the monthly mean pdsi value from: https://psl.noaa.gov/data/gridded/data.pdsi.html. The image below represents a portion of the visualization created by the code below. The shaded squares represent the spatial regions of pdsi values, which is overlapping a shapefile of the world.
From the image of Belgium, you can see that the 4 squares that touch the land area of Belgium are also touching other countries. If I attribute the base values to the Belgium, I believe this overestimates the mean pdsi values. Especially when considering the bottom two squares barely touch Belgium, the weight of these values when calculating the mean should be significantly lower. Thus, is there a way to incorporate some sort of weighted average where the area of each square within a country can be used as the weight to adjust each pdsi value? Additionally, I would like to standardize this process not only for Belgium, but for all countries as well.
Any help would be greatly appreciated!
import geopandas as gpd
import numpy as np
import plotly.express as px
import requests
from pathlib import Path
from zipfile import ZipFile
import urllib
import shapely.geometry
import xarray as xr
# download NetCDF data...
# fmt: off
url = "https://psl.noaa.gov/repository/entry/get/pdsi.mon.mean.selfcalibrated.nc?entryid=synth%3Ae570c8f9-ec09-4e89-93b4-babd5651e7a9%3AL2RhaV9wZHNpL3Bkc2kubW9uLm1lYW4uc2VsZmNhbGlicmF0ZWQubmM%3D"
f = Path.cwd().joinpath(Path(urllib.parse.urlparse(url).path).name)
# fmt: on
if not f.exists():
r = requests.get(url, stream=True, headers={"User-Agent": "XY"})
with open(f, "wb") as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)
ds = xr.open_dataset(f)
pdsi = ds.to_dataframe()
pdsi = pdsi.reset_index().dropna() # don't care about places in oceans...
# use subset for testing... last 5 times...
pdsim = pdsi.loc[pdsi["time"].isin(pdsi.groupby("time").size().index[-5:])]
# create geopandas dataframe
gdf = gpd.GeoDataFrame(
pdsim, geometry=pdsim.loc[:, ["lon", "lat"]].apply(shapely.geometry.Point, axis=1)
)
# make sure that data supports using a buffer...
assert (
gdf["lat"].diff().loc[lambda s: s.ne(0)].mode()
== gdf["lon"].diff().loc[lambda s: s.ne(0)].mode()
).all()
# how big should the square buffer be around the point??
buffer = gdf["lat"].diff().loc[lambda s: s.ne(0)].mode().values[0] / 2
gdf["geometry"] = gdf["geometry"].buffer(buffer, cap_style=3)
# Import shapefile from geopandas
path_to_data = gpd.datasets.get_path("naturalearth_lowres")
world_shp = gpd.read_file(path_to_data)
# the solution... spatial join buffered polygons to countries
# comma separate associated countries
gdf = gdf.join(
world_shp.sjoin(gdf.set_crs("EPSG:4326"))
.groupby("index_right")["name"]
.agg(",".join)
)
gdf["time_a"] = gdf["time"].dt.strftime("%Y-%b-%d")
# simplest way to test is visualise...
px.choropleth_mapbox(
gdf,
geojson=gdf.geometry,
locations=gdf.index,
color="pdsi",
hover_data=["name"],
animation_frame="time_a",
opacity=.3
).update_layout(
mapbox={"style": "carto-positron", "zoom": 1},
margin={"l": 0, "r": 0, "t": 0, "b": 0},
)
using https://geopandas.org/en/stable/docs/reference/api/geopandas.GeoSeries.intersection.html you can get part of grid that intersects with country polygon
using area, you can calculate proportion of overlap
from this I have generated two visualisations
show countries a grid overlaps and how much it overlaps
aggregate to countries using a weighted average plus calculate other measures that can be used for transparency
I do not know if this is mathematically / scientifically sound to aggregate PDSI in this way (either means or weighted averages). This does demonstrate how to get results your question requests.
# the solution... spatial join buffered polygons to countries
# plus work out overlap between PDSI grid and country. Area of each grid is constant...
gdf_c = (
world_shp.sjoin(gdf.set_crs("EPSG:4326"))
.merge(
gdf.loc[:, "geometry"],
left_on="index_right",
right_index=True,
suffixes=("", "_pdsi"),
)
.assign(
overlap=lambda d: (
d["geometry"]
.intersection(gpd.GeoSeries(d["geometry_pdsi"], crs="EPSG:4326"))
.area
/ (buffer * 2) ** 2
).round(3)
)
)
# comma separate associated countries and a list of overlaps
gdf_pdsi = gdf.loc[:, ["geometry", "time", "pdsi"]].join(
gdf_c.groupby("index_right").agg({"name": ",".join, "overlap": list})
)
gdf_pdsi["time_a"] = gdf_pdsi["time"].dt.strftime("%Y-%b-%d")
# simplest way to test is visualise...
fig_buf = px.choropleth_mapbox(
gdf_pdsi,
geojson=gdf_pdsi.geometry,
locations=gdf_pdsi.index,
color="pdsi",
hover_data=["name", "overlap"],
animation_frame="time_a",
opacity=0.3,
).update_layout(
mapbox={"style": "carto-positron", "zoom": 1},
margin={"l": 0, "r": 0, "t": 0, "b": 0},
)
fig_buf
import pandas as pd
# prepare data to plot by country
df_pdsi = (
gdf_c.groupby(["name", "time"])
.apply(
lambda d: pd.Series(
{
"weighted_pdsi": (d["pdsi"] * d["overlap"]).sum() / d["overlap"].sum(),
"unweighted_pdsi": d["pdsi"].mean(),
"min_pdsi": d["pdsi"].min(),
"max_pdsi": d["pdsi"].max(),
"min_overlap": d["overlap"].min(),
"max_overlap": d["overlap"].max(),
"size_pdsi": len(d["pdsi"]),
# "pdsi_list":[round(v,2) for v in d["pdsi"]]
}
)
)
.reset_index()
)
df_pdsi["time_a"] = df_pdsi["time"].dt.strftime("%Y-%b-%d")
fig = px.choropleth_mapbox(
df_pdsi,
geojson=world_shp.set_index("name").loc[:, "geometry"],
locations="name",
color="weighted_pdsi",
hover_data=df_pdsi.columns,
animation_frame="time_a",
opacity=0.3,
).update_layout(
mapbox={"style": "carto-positron", "zoom": 1},
margin={"l": 0, "r": 0, "t": 0, "b": 0},
)
fig

distance from a point to the nearest edge of a polygon

in the below code i want to calculate the distance from a point to the nearest edge of a polygon.as shown in the results section below, the coordinates are provided.the code posted below shows how i find the distance from a point to the neatrest edge of a polygon.
at run time, and as shown below in restults section, for the give point and geometry, the distance from postgis is equal to 4.32797817574802 while the one calculated from geopandas gives 3.8954865274727614e-05
please let me know how to find the distance from a point to nearest edge of a polygon.
code
poly = wkt.loads(fieldCoordinatesAsTextInWKTInEPSG25832)
pt = wkt.loads(centerPointointAsTextInWKTInEPSG25832)
print(poly.distance(pt)))
results:
queryPostgreSQLForDistancesFromPointsToPolygon:4.32797817574802#result from postgis using st_distance operator
centerPointointAsTextInWKTInEPSG4326:POINT(6.7419520458647835 51.08427961641239)
centerPointointAsTextInWKTInEPSG25832:POINT(341849.5 5661622.5)
centerPointointAsTextInWKTInEPSG4326:POINT(6.7419520458647835 51.08427961641239)
fieldCoordinatesAsTextInWKTInEPSG25832:POLYGON ((5622486.93624152 1003060.89945681,5622079.52632924 1003170.95198635,5622126.00418918 1003781.73122161,5622444.73987453 1003694.55868486,5622486.93624152 1003060.89945681))
fieldCoordinatesAsTextInWKTInEPSG4326:POLYGON((6.741879696309871 51.08423775429969,6.742907378503366 51.08158745820981,6.746964018740842 51.08233499299334,6.746152690693346 51.08440763989611,6.741879696309871 51.08423775429969))
poly.distance(pt):3.8954865274727614e-05#result from geopandas
your code works. It's approx 7000km from Belgium to Ethiopia
are you sure your data is correct? Have built a plotly graph to show where buffered polygon, polygon centroid and point are located in EPSG:4326 CRS
from shapely import wkt
import geopandas as gpd
import plotly.express as px
import json
# queryPostgreSQLForDistancesFromPointsToPolygon:4.32797817574802#result from postgis using st_distance operator
centerPointointAsTextInWKTInEPSG4326 = "POINT(6.7419520458647835 51.08427961641239)"
centerPointointAsTextInWKTInEPSG25832 = "POINT(341849.5 5661622.5)"
centerPointointAsTextInWKTInEPSG4326 = "POINT(6.7419520458647835 51.08427961641239)"
fieldCoordinatesAsTextInWKTInEPSG25832 = "POLYGON ((5622486.93624152 1003060.89945681,5622079.52632924 1003170.95198635,5622126.00418918 1003781.73122161,5622444.73987453 1003694.55868486,5622486.93624152 1003060.89945681))"
fieldCoordinatesAsTextInWKTInEPSG4326 = "POLYGON((6.741879696309871 51.08423775429969,6.742907378503366 51.08158745820981,6.746964018740842 51.08233499299334,6.746152690693346 51.08440763989611,6.741879696309871 51.08423775429969))"
# poly.distance(pt):3.8954865274727614e-05#result from geopandas
poly = wkt.loads(fieldCoordinatesAsTextInWKTInEPSG25832)
pt = wkt.loads(centerPointointAsTextInWKTInEPSG25832)
print(poly.distance(pt)/10**3)
# let's visualize it....
gpoly = (
gpd.GeoDataFrame(geometry=[poly], crs="EPSG:25832")
.buffer(10 ** 6)
.to_crs("EPSG:4326")
)
gpoly.plot()
gpt = gpd.GeoDataFrame(geometry=[pt, poly.centroid], crs="EPSG:25832").to_crs(
"EPSG:4326"
)
px.scatter_mapbox(
gpt.assign(dist=poly.distance(pt)/10**3),
lat=gpt.geometry.y,
lon=gpt.geometry.x,
hover_data={"dist":":.0f"},
).update_layout(
mapbox={
"style": "carto-positron",
"zoom": 4,
"layers": [
{
"source": json.loads(gpoly.to_json()),
"below": "traces",
"type": "fill",
"color": "red",
}
],
}
)

How to set cell alpha values for a heatmap in Python?

I would like to show a heatmap in any Python library, where color is determined by the mean value of data points in the cell. But additionally, I'd like to visualize the uncertainty of these means, but using the number of data points in each cell as the alpha value. Hence, cells with few data points will have a color, but faded.
How can I do this in Python the easiest way, when the full color is determined by two values?
import pandas as pd
import numpy as np
import seaborn as sns
from operator import attrgetter
N = 10000
D = pd.DataFrame(
{
"x": np.random.uniform(size=N),
"y": np.random.uniform(size=N),
"c": np.random.choice([0, 1], size=N),
}
)
x_group = pd.cut(D["x"], 10).apply(attrgetter("left"))
y_group = pd.cut(D["y"], 10).apply(attrgetter("left"))
means = D.pivot_table("c", x_group, y_group, aggfunc="mean")
sizes = D.pivot_table("c", x_group, y_group, aggfunc="size")
sns.heatmap(means) # here sizes should be used for the alpha channel - maybe in a postprocessing?

How to update histogram based on selection of points (Altair)?

My goal is to update the histogram shown on the right side of 1 based on the selection of points on the left side.
Initially the plot seems to be alright, however once a selection is made the histogram won't be redrawn (altair 3.2.0)
Does anybody know how to do this?
below is the code to recreate the example:
import altair as alt
import pandas as pd
import numpy as np
from random import choice
dates = pd.date_range("1.1.2019", "2.28.2019")
np.random.seed(999)
Y = np.random.normal(0.5, 0.1, len(dates))
features = [choice(["a", "b", "c"]) for i in range(len(dates))]
df = pd.DataFrame({"dates": dates, "Y": Y, "features": features})
base = alt.Chart(df)
area_args = {"opacity": 0.3, "interpolate": "step"}
pts = alt.selection(type="interval", encodings=["x"])
points = (
base.mark_circle()
.encode(alt.X("dates:T"), alt.Y("Y:Q"), color="features:N")
.add_selection(pts)
)
yscale = alt.Scale(domain=(0, 1))
right_hist = (
base.mark_area(**area_args)
.encode(
alt.Y(
"Y:Q", bin=alt.Bin(maxbins=20, extent=yscale.domain), stack=None, title=""
),
alt.X("count()", stack=None, title=""),
alt.Color("features:N"),
)
.transform_filter(pts)
)
(points | right_hist)
edit1: another image to clarify my point #jvp
Solved in the comments as an issue with the OPs setup and how the plots were rendered on their end.

Categories

Resources