python rename field json with specific structure - python

i have a json with structure:
{
" features": [
{
"geometry": {
"type": "Polygon",
"coordinates": []
},
"type": "Feature",
"properties": {
"ADMIN_LEVE": "REGION",
"POPULTION": 4363916,
"GEO_CENTER": "7.923209152686669, 45.06052300898206",
"ID": "01",
"NAME": "PIEMONTE"
}
}
]
}
and i need to rename the field "ADMIN_LEVE".
I have write a code using pandas to rename but doesn't work
df = pd.DataFrame(data)
df.rename(columns={'ADMIN_LEVE':'ADMIN_LEVEL'}, inplace=True)
I can i do it?
i have also tryied with replace but doesn't work
json_data=open(path + ".json").read()
data = json.loads(json_data)
for d in data:
d.replace('"ADMIN_LEVE"', '"ADMIN_LEVEL"')
Thanks

Source JSON (as string):
In [325]: print(s)
{
" features": [
{
"geometry": {
"type": "Polygon",
"coordinates": []
},
"type": "Feature",
"properties": {
"ADMIN_LEVE": "REGION",
"POPULTION": 4363916,
"GEO_CENTER": "7.923209152686669, 45.06052300898206",
"ID": "01",
"NAME": "PIEMONTE"
}
}
]
}
Replaced:
In [327]: s = s.replace('"ADMIN_LEVE":', '"ADMIN_LEVEL":')
In [328]: print(s)
{
" features": [
{
"geometry": {
"type": "Polygon",
"coordinates": []
},
"type": "Feature",
"properties": {
"ADMIN_LEVEL": "REGION",
"POPULTION": 4363916,
"GEO_CENTER": "7.923209152686669, 45.06052300898206",
"ID": "01",
"NAME": "PIEMONTE"
}
}
]
}
UPDATE: helper function:
def replace_in_json_file(filename, from_str, to_str):
with open(filename) as f:
data = f.read()
with open(filename, 'w') as f:
f.write(data.replace(from_str, to_str))

Related

Exporting a file in .geojson format without T inside the date

I am trying to export a geodataframe in .geojson format. Before exporting, I print the geodataframe in the terminal and the dates appear as I wish:
After writing the line to export the file in .geojson format
predicted_events.to_file("predicted_events.geojson", driver='GeoJSON'),
I print from the terminal and a T appears in the date column:
When I export in .csv format, the T does not appear either.
It is correctly encoding the data type of the column. Convert from date to string and then the GEOJSON contains what you require.
import shapely.wkt
import pandas as pd
import geopandas as gpd
from pathlib import Path
gdf = gpd.GeoDataFrame(geometry=[shapely.wkt.loads(p) for p in ['POINT (6.493069295913722 55.7355541882027)',
'POINT (15.43828764139886 46.30211698572747)',
'POINT (12.60117419209274 43.69100105361232)',
'POINT (6.876712332538435 40.26684145530385)']], data={"date":pd.date_range("1-jan-2021",periods=4)})
f = Path.cwd().joinpath("predicted_events.geojson")
gdf.to_file(f, driver='GeoJSON')
with open(f) as fh:
text = fh.read()
print(text)
gdf["date"] = gdf["date"].astype(str)
gdf.to_file(f, driver='GeoJSON')
with open(f) as fh:
text = fh.read()
print(text)
output
{
"type": "FeatureCollection",
"features": [
{ "type": "Feature", "properties": { "date": "2021-01-01T00:00:00" }, "geometry": { "type": "Point", "coordinates": [ 6.493069295913722, 55.735554188202698 ] } },
{ "type": "Feature", "properties": { "date": "2021-01-02T00:00:00" }, "geometry": { "type": "Point", "coordinates": [ 15.43828764139886, 46.302116985727473 ] } },
{ "type": "Feature", "properties": { "date": "2021-01-03T00:00:00" }, "geometry": { "type": "Point", "coordinates": [ 12.60117419209274, 43.691001053612318 ] } },
{ "type": "Feature", "properties": { "date": "2021-01-04T00:00:00" }, "geometry": { "type": "Point", "coordinates": [ 6.876712332538435, 40.266841455303847 ] } }
]
}
{
"type": "FeatureCollection",
"features": [
{ "type": "Feature", "properties": { "date": "2021-01-01" }, "geometry": { "type": "Point", "coordinates": [ 6.493069295913722, 55.735554188202698 ] } },
{ "type": "Feature", "properties": { "date": "2021-01-02" }, "geometry": { "type": "Point", "coordinates": [ 15.43828764139886, 46.302116985727473 ] } },
{ "type": "Feature", "properties": { "date": "2021-01-03" }, "geometry": { "type": "Point", "coordinates": [ 12.60117419209274, 43.691001053612318 ] } },
{ "type": "Feature", "properties": { "date": "2021-01-04" }, "geometry": { "type": "Point", "coordinates": [ 6.876712332538435, 40.266841455303847 ] } }
]
}

How to display a heatmap on a specific parameter with geopandas?

In my very simple case I would like to display the heatmap of the points in the points GeoJSON file but not on the geographic density (lat, long). In the points file each point has a confidence property (a value from 0 to 1), how to display the heatmap on this parameter? weight=points.confidence don't seem to work.
for exemple:
#points.geojson
{
"type": "FeatureCollection",
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
"features": [
{ "type": "Feature", "properties": {"confidence": 0.67}, "geometry": { "type": "Point", "coordinates": [ 37.703471404215918, 26.541625492300192 ] } },
{ "type": "Feature", "properties": {"confidence": 0.76}, "geometry": { "type": "Point", "coordinates": [ 37.009744331225093, 26.710090585532761 ] } },
{ "type": "Feature", "properties": {"confidence": 0.94}, "geometry": { "type": "Point", "coordinates": [ 37.541708538306224, 26.160111944646022 ] } },
{ "type": "Feature", "properties": {"confidence": 0.52}, "geometry": { "type": "Point", "coordinates": [ 37.628566642215354, 25.917300595223857 ] } },
{ "type": "Feature", "properties": {"confidence": 0.46}, "geometry": { "type": "Point", "coordinates": [ 37.676499267124271, 26.653959791866598 ] } },
{ "type": "Feature", "properties": {"confidence": 0.55}, "geometry": { "type": "Point", "coordinates": [ 37.677033863264533, 26.654033815175087 ] } },
{ "type": "Feature", "properties": {"confidence": 0.12}, "geometry": { "type": "Point", "coordinates": [ 37.37522057234797, 26.353271000367258 ] } },
{ "type": "Feature", "properties": {"confidence": 0.62}, "geometry": { "type": "Point", "coordinates": [ 37.396556958266373, 26.459196264023291 ] } },
{ "type": "Feature", "properties": {"confidence": 0.21}, "geometry": { "type": "Point", "coordinates": [ 36.879775221618168, 26.901743663072878 ] } }
]
}
The image below shows my result but it is on the geographic density not confidence score density.
import geoplot as gplt
import geopandas as gpd
import geoplot.crs as gcrs
import matplotlib.pyplot as plt
points = gpd.read_file('points.geojson')
polygons = gpd.read_file('polygons.geojson')
ax = gplt.polyplot(polygons, projection=gcrs.AlbersEqualArea(), zorder=1)
gplt.kdeplot(points, cmap='Reds', shade=True, clip=polygons, ax=ax)
#weight=points.confidence don’t work inside kdeplot()
plt.show()
using your sample data for points
these points are in Saudi Arabia, so assumed that polygons are regional boundaries in Saudi Arabia. Downloaded this from http://www.naturalearthdata.com/downloads/10m-cultural-vectors/
polygon data is a shape file
loaded into geopandas to allow interface to GEOJSON __geo__interface
dynamically filtered this to Saudi using pandas .loc
confidence data is just a straight https://plotly.com/python/mapbox-density-heatmaps/
boundaries are https://plotly.com/python/mapbox-layers/
# fmt: off
points = {
"type": "FeatureCollection",
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
"features": [
{ "type": "Feature", "properties": {"confidence": 0.67}, "geometry": { "type": "Point", "coordinates": [ 37.703471404215918, 26.541625492300192 ] } },
{ "type": "Feature", "properties": {"confidence": 0.76}, "geometry": { "type": "Point", "coordinates": [ 37.009744331225093, 26.710090585532761 ] } },
{ "type": "Feature", "properties": {"confidence": 0.94}, "geometry": { "type": "Point", "coordinates": [ 37.541708538306224, 26.160111944646022 ] } },
{ "type": "Feature", "properties": {"confidence": 0.52}, "geometry": { "type": "Point", "coordinates": [ 37.628566642215354, 25.917300595223857 ] } },
{ "type": "Feature", "properties": {"confidence": 0.46}, "geometry": { "type": "Point", "coordinates": [ 37.676499267124271, 26.653959791866598 ] } },
{ "type": "Feature", "properties": {"confidence": 0.55}, "geometry": { "type": "Point", "coordinates": [ 37.677033863264533, 26.654033815175087 ] } },
{ "type": "Feature", "properties": {"confidence": 0.12}, "geometry": { "type": "Point", "coordinates": [ 37.37522057234797, 26.353271000367258 ] } },
{ "type": "Feature", "properties": {"confidence": 0.62}, "geometry": { "type": "Point", "coordinates": [ 37.396556958266373, 26.459196264023291 ] } },
{ "type": "Feature", "properties": {"confidence": 0.21}, "geometry": { "type": "Point", "coordinates": [ 36.879775221618168, 26.901743663072878 ] } }
]
}
# fmt: on
import geopandas as gpd
import plotly.express as px
import requests
from pathlib import Path
from zipfile import ZipFile
import urllib
# fmt: off
# download boundaries
url = "https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_1_states_provinces.zip"
f = Path.cwd().joinpath(urllib.parse.urlparse(url).path.split("/")[-1])
# fmt: on
if not f.exists():
r = requests.get(url, stream=True, headers={"User-Agent": "XY"})
with open(f, "wb") as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)
zfile = ZipFile(f)
zfile.extractall(f.stem)
# load downloaded boundaries
gdf2 = gpd.read_file(str(f.parent.joinpath(f.stem).joinpath(f"{f.stem}.shp")))
# confidence data
gdf = gpd.GeoDataFrame.from_features(points)
# now the simple bit, densitity plot data and Saudi Arabia regional boundaries as a layer
fig = px.density_mapbox(
gdf, lat=gdf.geometry.y, lon=gdf.geometry.x, z="confidence"
).update_layout(
mapbox={
"style": "carto-positron",
"zoom": 6,
"layers": [
{
"source": gdf2.loc[gdf2["iso_a2"].eq("SA")].geometry.__geo_interface__,
"type": "line",
}
],
},
margin={"l":0,"r":0,"t":0,"b":0}
)
fig

geojson to Elasticsearch : Failed to parse field [geometry.coordinates] of type [geo_shape]

I am trying to indexing geojson file into elasticsearch (version 7.6.2) using Python.
Here is the mapping I defined in elasticsearch.
'mappings': {
"properties": {
"geometry": {
"properties": {
"coordinates": {
"type": "geo_shape"
},
"type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
}
}
The geojson file looks like this:
{
"type": "FeatureCollection",
"name": "testting",
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
"features": [
{ "type": "Feature", "properties": { "LEGEND": "x_1", "THRESHOLD": -109, "COLOR": "0 0 255", "Prediction": "Coverage" }, "geometry": { "type": "MultiPolygon", "coordinates": [ [ [ [ 151.20061069847705, -33.886918725260998 ], [ 151.200620164862698, -33.886467994010133 ].....
However, when I write the file to Elasticsearch, inspired from this link:
How to index geojson file in elasticsearch?
def geojson2es(gj):
for feature in gj['features']:
yield feature
with open(input_path+'/'+ data) as f:
gj = json.load(f)
es = Elasticsearch(hosts=[{'host': 'localhost', 'port': 9200}])
k = [{
"_index": "test",
"_source": feature,
} for feature in geojson2es(gj)]
helpers.bulk(es, k)
I have got this error:
{'type': 'mapper_parsing_exception',
'reason': 'failed to parse field [geometry.coordinates] of type [geo_shape]', '
caused_by':
{'type': 'parse_exception', 'reason': 'shape must be an object consisting of type and coordinates'}}
Did anyone encounter a similar issue? How can I fix it?
Your mapping is not correct. The geo_shape type already implies type and coordinates, so you don't need to declare them again.
So your mapping should be like this instead, i.e. each feature has a type (e.g. Feature, a hash of properties and a geometry of type geo_shape):
{
"mappings": {
"properties": {
"type": {
"type": "keyword"
},
"properties": {
"type": "object"
},
"geometry": {
"type": "geo_shape"
}
}
}
}

Not understanding how work with "urn:ogc:def:crs:OGC:1.3:CRS84" in geopandas

I have this geojason file
{
"type": "FeatureCollection",
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
"features": [
{ "type": "Feature", "properties": { "visit_date": "2013-03-27Z", "name": "Mayi-Tatu", "n_workers": 150.0, "mineral": "Gold" }, "geometry": { "type": "Point", "coordinates": [ 29.66033, 1.01089 ] } },
{ "type": "Feature", "properties": { "visit_date": "2013-03-27Z", "name": "Mabanga", "n_workers": 115.0, "mineral": "Gold" }, "geometry": { "type": "Point", "coordinates": [ 29.65862, 1.00308 ] } },
{ "type": "Feature", "properties": { "visit_date": "2013-03-27Z", "name": "Molende", "n_workers": 130.0, "mineral": "Gold" }, "geometry": { "type": "Point", "coordinates": [ 29.65629, 0.98563 ] } },
...
{ "type": "Feature", "properties": { "visit_date": "2017-08-31Z", "name": "Kambasha", "n_workers": 37.0, "mineral": "Cassiterite" }, "geometry": { "type": "Point", "coordinates": [ 29.05973167, -2.25938167 ] } }
]
}
I read this file, with the next code:
filename = "ipis_cod_mines.geojson"
df_congomines_crs84_geo = gpd.read_file(filename)
But when I check the crs property of df_congomines_crs84_geo,
df_congomines_crs84_geo.crs
I got "{'init': 'epsg:4326'}", I don't understand why i don't get the right crs. (first question)
After, I read another dataset for the same area (both data belongs to congo)
df_countries_4326_geo = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
This dataset has crs equal to {'init': 'epsg:4326'}.
When i plot both datasets (without change the crs),
ax = congo_df.plot(alpha=0.5, color='brown', figsize=(11,4))
df_congomines_crs84_geo.plot(ax=ax, column='mineral')
plt.show()
I got the next image:
Image result
Why both image are not overlaped if they belong to the same area??? How can I fix it??? Is this problem related to the UTM zone???(second question)
CRS84 is equivalent to WGS84 for which the standard EPSG code is EPSG:4326. CRS84 was defined in an old geojson spec (2008). Reading a geojson file gives EPSG:4326 as the CRS.

Flatten nested JSON arrays with inherits properties in Python

I have a big json/dictionary with different levels of nested json arrays, I would like to flatten it, and also capture the relationship of the structure,
Part of my json looks like:
{
"name": "root",
"type": "all",
"children": [
{
"name": "properties",
"type": "feature",
"children": [
{
"name": "print",
"type": "feature",
"children": [
{
"name": "graphic print",
"type": "feature",
"inherits": true
},
{
"name": "striped print",
"type": "feature",
"inherits": true,
"children": [
{
"name": "pinstriped",
"type": "feature",
"inherits": true
},
{
"name": "light stripe",
"type": "feature",
"inherits": true
},
{
"name": "wide stripe",
"type": "feature",
"inherits": true
}
]
}
]
}
]
},
{
"name": "colours",
"type": "colour",
"children": [
{
"name": "main colours",
"type": "colour",
"children": [
{
"name": "black",
"type": "colour",
"children": [
{
"name": "light black",
"type": "colour",
"inherits": true
},
{
"name": "blue black",
"type": "colour",
"inherits": true
}
]
},
{
"name": "red",
"type": "colour",
"children": [
{
"name": "bright red",
"type": "colour",
"inherits": true
},
{
"name": "light red",
"type": "colour"
}
]
}
]
}
]
},
{
"name": "genders",
"type": "gender",
"children": [
{
"name": "female",
"type": "gender"
},
{
"name": "male",
"type": "gender"
}
]
}
]
}
The depth of nests is not all the same. I
- want all the nodes (values of "name")
- also want all its parents if the node has "Inherit" key of True value.
Something like:
But if there are better ideas on how to store this data, will be happy to accept as well!
Many Thanks!
I think this should do your need
def parse_dict_of_dict(_dict, _parent = '', ret_dict={}):
_name, _children, _inherit = _dict["name"], _dict.get('children', None), _dict.get('inherits', False)
if _children is not None:
if isinstance(_children, list):
for _child in _children:
parse_dict_of_dict(_child, _name+ ', ' + _parent if _inherit else _name , ret_dict)
ret_dict[ _name] = _parent.strip(' ').strip(',') if _inherit else None
return ret_dict
Can you elaborate more on your output?
OR you can use this function to flatten a nested JSON to a simple JSON.
def parse_dict_of_dict(_dict, _str = ''):
ret_dict = {}
for k, v in _dict.iteritems():
if isinstance(v, dict):
ret_dict.update(parse_dict_of_dict(v, _str= _str+k+'_'))
elif isinstance(v, list):
for index, item in enumerate(v):
if isinstance(item, dict):
ret_dict.update(parse_dict_of_dict(item, _str=_str+k+'_%d_'%(index)))
else:
ret_dict.update({k+'_%d'%(index): item})
else:
try:
ret_dict[_str + k] = str(v)
except Exception as e:
ret_dict[_str + k] = unicode.encode(v, errors='ignore')
return ret_dict

Categories

Resources