I want to get the elevation of a segment (linestring) from a GeoJSON file.
I'm using this API:
Documentation: https://api3.geo.admin.ch/services/sdiservices.html#profile
request URL: https://api3.geo.admin.ch/rest/services/profile.csv
My problem is that I cannot get rid of an error saying the passed parameter 'geom' is not of GeoJSON type.
My geojson file:
{
"type": "FeatureCollection",
"name": "test",
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
"features": [
{ "type": "Feature", "properties": { "Name": null, "description": "", "altitudeMode": "clampToGround", "tessellate": 1, "type": "linepolygon" }, "geometry": { "type": "LineString", "coordinates": [ [ 7.349510580151255, 45.998132989830559 ], [ 7.346422156898689, 46.039529058312063 ], [ 7.287112064012824, 46.093617348068292 ], [ 7.236173542687846, 46.127135334945002 ] ] } }
]
}
My code:
import requests
import json
api_url = "https://api3.geo.admin.ch/rest/services/profile.csv"
file_path = "geojson.GEOJSON"
with open(file_path) as f:
geojson = json.load(f)
r = requests.get(api_url, params=dict(geom=geojson))
print(r.json())
Output:
{'error': {'code': 400, 'message': 'Invalid geom parameter, must be a GEOJSON'}, 'success': False}
Edit: Solution
import requests
import json
api_url = "https://api3.geo.admin.ch/rest/services/profile.csv"
file_path = "geojson.GEOJSON"
with open(file_path) as f:
geojson, = json.load(f)['features']
geom = json.dumps(geojson['geometry'])
r = requests.get(api_url, params=dict(geom=geom))
print(r.content)
Look at the example in the docs. The API wants a single geometry, something like {"type": "LineString", "coordinates": [[1,2], [3,4]]}; you are giving it an entire FeatureCollection.
Related
I am using a Databricks notebook to copy a jsonl.gz file from S3 to ABS (my ABS container is already mounted), and need the file to be unzipped at the end of the process. The filenames will be fed into the notebook using the 'directory' and 'fileun' variables. An example filename is 'folder-date/0000-00-0000.jsonl.gz'.
I am having difficulty figuring out the exact syntax for this. Currently I am getting stuck on trying to read the jsonl.gz file into a dataframe. The error I get is "Invalid file path or buffer object type: <class 'dict'>". Here is what I have so far, any help is appreciated:
fileun = dbutils.widgets.get("fileun")
directory = dbutils.widgets.get("directory")
file = fileun[:-3]
file_path=directory+fileun
import pandas as pd
import numpy as np
import boto3
import pyodbc
import gzip
import shutil
client = boto3.client(
"s3",
region_name='region',
aws_access_key_id='key',
aws_secret_access_key='key'
)
response=client.get_object(
Bucket='bucket_name',
Key=file_path
)
**df = pd.read_json(response, compression='infer')**
with gzip.open(response, 'rb') as f_in:
with open(file, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
writefile = open('container_name' % (fileun), 'w')
writefile.write(df)
writefile.close()
Here is a snippet from one of the files:
{
"uid": "9a926d799f437b0d279c144dec2bcef7cd16db341bca6e7653246d960331d00a",
"doc": {
"snippet": {
"authorProfileImageUrl": null,
"textDisplay": "#Keanu Corps Same \"reasoning\" as the democrats. Even though inflation is getting worse.",
"publishedAt": "2021-10-28T09:34:15.334995+0000",
"authorChannelUrl": "/channel/UCsxSW7_bBsbFAjkh7ujctRA",
"authorChannelId": {
"value": "UCsxSW7_bBsbFAjkh7ujctRA"
},
"likeCount": 0,
"videoId": "U2R_srS4TR4",
"authorDisplayName": "The Video Game Hunger 01"
},
"crawlid": "-",
"kind": "youtube#comment",
"correlation_id": "195c9442-74d8-5003-baa3-7f1d05ef5aa6",
"id": "UgznKXgYDDWUEjX0YaZ4AaABAg.9TvZiuGoiMM9U1jbFAWCTn",
"parentId": "UgznKXgYDDWUEjX0YaZ4AaABAg",
"is_reply": true,
"timestamp": "2021-10-28T18:34:15.437543"
},
"system_timestamp": "2021-10-28T18:34:15.944581+00:00",
"norm_attribs": {
"website": "github.com/-",
"type": "youtube",
"version": "1.0"
},
"type": "youtube_comment",
"norm": {
"author": "The Video Game Hunger 01",
"domain": "youtube.com",
"id": "UgznKXgYDDWUEjX0YaZ4AaABAg.9TvZiuGoiMM9U1jbFAWCTn",
"body": "#Keanu Corps Same \"reasoning\" as the democrats. Even though inflation is getting worse.",
"author_id": "UCsxSW7_bBsbFAjkh7ujctRA",
"url": "https://www.youtube.com/watch?v=U2R_srS4TR4&lc=UgznKXgYDDWUEjX0YaZ4AaABAg.9TvZiuGoiMM9U1jbFAWCTn",
"timestamp": "2021-10-28T09:34:15.334995+00:00"
},
"organization_id": "-",
"sub_organization_id": "default",
"campaign_id": "-",
"project_id": "default",
"project_version_id": "default",
"meta": {
"relates_to_timestamp": [
{
"results": [
"2021-10-28T09:34:15.334995+00:00"
],
"attribs": {
"website": "github.com/-",
"source": "Explicit",
"type": "Timestamp Extractor",
"version": "1.0"
}
}
],
"post_type": [
{
"results": [
"post"
],
"attribs": {
"website": "github.com/-",
"source": "Explicit",
"type": "Post Type Extractor",
"version": "1.0"
}
}
],
"relates_to": [
{
"results": [
"U2R_srS4TR4"
],
"attribs": {
"website": "github.com/-",
"source": "Explicit",
"type": "String Extractor",
"version": "1.0"
}
}
],
"author_name": [
{
"results": [
"The Video Game Hunger 01"
],
"attribs": {
"website": "github.com/-",
"source": "Explicit",
"type": "String Extractor",
"version": "1.0"
}
}
],
"author_id": [
{
"results": [
"UCsxSW7_bBsbFAjkh7ujctRA"
],
"attribs": {
"website": "github.com/-",
"source": "Explicit",
"type": "String Extractor",
"version": "1.0"
}
}
],
"rule_matcher": [
{
"results": [
{
"metadata": {
"campaign_title": "-",
"project_title": "-",
"maxdepth": 0
},
"sub_organization_id": null,
"description": null,
"project_version_id": "-",
"rule_id": "2569463",
"rule_tag": "-",
"rule_type": "youtube_keyword",
"project_id": "-",
"appid": "nats-main",
"organization_id": "-",
"value": "طالبان شلیک",
"campaign_id": "-",
"node_id": null
}
],
"attribs": {
"website": "github.com/-",
"source": "Explicit",
"type": "youtube",
"version": "1.0"
}
}
]
}
}
Without knowing the structure of your json inside the .gz file, it's tough to say exactly how to help.
This is what I use to download from .gz from s3 directly to dataframe.
import gzip
import json
s3sr = boto3.resource('s3')
obj =s3sr.Object(bucket, key)
data = json.loads(gzip.decompress(obj.get()['Body'].read()))
df = pd.DataFrame(data)
And if opening .gz from local, I use this
with gzip.open(fullpath, 'rb') as f:
data = json.loads(f.read().decode('utf-8'))
df = pd.DataFrame(data)
This is what finally ended up working to copy the file, but I am losing the individual objects within the file and they are showing up on just one line:
fileun = dbutils.widgets.get("fileun")
directory = dbutils.widgets.get("directory")
key=directory+fileun
import pandas as pd
import numpy as np
import boto3
import pyodbc
import os
import gzip
import shutil
import json
s3_resource = boto3.resource('s3',
aws_access_key_id=[key_id],
aws_secret_access_key= [access_key]
my_bucket = s3_resource.Bucket(bucket_name)
objects = my_bucket.objects.filter(Prefix= directory+fileun)
for obj in objects:
path, filename = os.path.split(obj.key)
my_bucket.download_file(obj.key, filename)
with gzip.open(filename, 'rb') as f_in:
with open(file, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
with open(file) as f:
data = f.readlines()
df = pd.DataFrame(data)
jinsert = df.to_json(orient="records")
writefile = open('[container_name]' % (file), 'w')
writefile.write(jinsert)
writefile.close()
This is the first time I'm working with JSON, and I'm trying to pull url out of the JSON below.
{
"name": "The_New11d112a_Company_Name",
"sections": [
{
"name": "Products",
"payload": [
{
"id": 1,
"name": "TERi Geriatric Patient Skills Trainer,
"type": "string"
}
]
},
{
"name": "Contact Info",
"payload": [
{
"id": 1,
"name": "contacts",
"url": "https://www.a3bs.com/catheterization-kits-8000892-3011958-3b-scientific,p_1057_31043.html",
"contacts": [
{
"name": "User",
"email": "Company Email",
"phone": "Company PhoneNumber"
}
],
"type": "contact"
}
]
}
],
"tags": [
"Male",
"Airway"
],
"_id": "0e4cd5c6-4d2f-48b9-acf2-5aa75ade36e1"
}
I have been able to access description and _id via
data = json.loads(line)
if 'xpath' in data:
xpath = data["_id"]
description = data["sections"][0]["payload"][0]["description"]
However, I can't seem to figure out a way to access url. One other issue I have is there could be other items in sections, which makes indexing into Contact Info a non starter.
Hope this helps:
import json
with open("test.json", "r") as f:
json_out = json.load(f)
for i in json_out["sections"]:
for j in i["payload"]:
for key in j:
if "url" in key:
print(key, '->', j[key])
I think your JSON is damaged, it should be like that.
{
"name": "The_New11d112a_Company_Name",
"sections": [
{
"name": "Products",
"payload": [
{
"id": 1,
"name": "TERi Geriatric Patient Skills Trainer",
"type": "string"
}
]
},
{
"name": "Contact Info",
"payload": [
{
"id": 1,
"name": "contacts",
"url": "https://www.a3bs.com/catheterization-kits-8000892-3011958-3b-scientific,p_1057_31043.html",
"contacts": [
{
"name": "User",
"email": "Company Email",
"phone": "Company PhoneNumber"
}
],
"type": "contact"
}
]
}
],
"tags": [
"Male",
"Airway"
],
"_id": "0e4cd5c6-4d2f-48b9-acf2-5aa75ade36e1"
}
You can check it on http://json.parser.online.fr/.
And if you want to get the value of the url.
import json
j = json.load(open('yourJSONfile.json'))
print(j['sections'][1]['payload'][0]['url'])
I think it's worth to write a short function to get the url(s) and make a decision whether or not to use the first found url in the returned list, or skip processing if there's no url available in your data.
The method shall looks like this:
def extract_urls(data):
payloads = []
for section in data['sections']:
payloads += section.get('payload') or []
urls = [x['url'] for x in payloads if 'url' in x]
return urls
This should print out the URL
import json
# open json file to read
with open('test.json','r') as f:
# load json, parameter as json text (file contents)
data = json.loads(f.read())
# after observing format of JSON data, the location of the URL key
# is determined and the data variable is manipulated to extract the value
print(data['sections'][1]['payload'][0]['url'])
The exact location of the 'url' key:
1st (position) of the array which is the value of the key 'sections'
Inside the array value, there is a dict, and the key 'payload' contains an array
In the 0th (position) of the array is a dict with a key 'url'
While testing my solution, I noticed that the json provided is flawed, after fixing the json flaws(3), I ended up with this.
{
"name": "The_New11d112a_Company_Name",
"sections": [
{
"name": "Products",
"payload": [
{
"id": 1,
"name": "TERi Geriatric Patient Skills Trainer",
"type": "string"
}
]
},
{
"name": "Contact Info",
"payload": [
{
"id": 1,
"name": "contacts",
"url": "https://www.a3bs.com/catheterization-kits-8000892-3011958-3b-scientific,p_1057_31043.html",
"contacts": [
{
"name": "User",
"email": "Company Email",
"phone": "Company PhoneNumber"
}
],
"type": "contact"
}
]
}
],
"tags": [
"Male",
"Airway"
],
"_id": "0e4cd5c6-4d2f-48b9-acf2-5aa75ade36e1"}
After utilizing the JSON that was provided by Vincent55.
I made a working code with exception handling and with certain assumptions.
Working Code:
## Assuming that the target data is always under sections[i].payload
from json import loads
line = open("data.json").read()
data = loads(line)["sections"]
for x in data:
try:
# With assumption that there is only one payload
if x["payload"][0]["url"]:
print(x["payload"][0]["url"])
except KeyError:
pass
I'm new to python. I'm running python on Azure data bricks. I have a .json file. I'm putting the important fields of the json file here
{
"school": [
{
"schoolid": "mr1",
"board": "cbse",
"principal": "akseal",
"schoolName": "dps",
"schoolCategory": "UNKNOWN",
"schoolType": "UNKNOWN",
"city": "mumbai",
"sixhour": true,
"weighting": 3,
"paymentMethods": [
"cash",
"cheque"
],
"contactDetails": [
{
"name": "picsa",
"type": "studentactivities",
"information": [
{
"type": "PHONE",
"detail": "+917597980"
}
]
}
],
"addressLocations": [
{
"locationType": "School",
"address": {
"countryCode": "IN",
"city": "Mumbai",
"zipCode": "400061",
"street": "Madh",
"buildingNumber": "80"
},
"Location": {
"latitude": 49.313885,
"longitude": 72.877426
},
I need to create a data frame with schoolName as one column & latitude & longitude are others two columns. Can you please suggest me how to do that?
you can use the method json.load(), here's an example:
import json
with open('path_to_file/file.json') as f:
data = json.load(f)
print(data)
use this
import json # built-in
with open("filename.json", 'r') as jsonFile:
Data = jsonFile.load()
Data is now a dictionary of the contents exp.
for i in Data:
# loops through keys
print(Data[i]) # prints the value
For more on JSON:
https://docs.python.org/3/library/json.html
and python dictionaries:
https://www.programiz.com/python-programming/dictionary#:~:text=Python%20dictionary%20is%20an%20unordered,when%20the%20key%20is%20known.
I have troubles to test my function using the mock. This function take an url as parameter then return an GeoDataFrame. At first I have to stimulate the response of get request (Json format).
Function to test
def download_stations_from_url(url):
response = requests.get(url)
data = response.json()
gdf = gpd.GeoDataFrame.from_features(data['features'])
gdf.crs = {'init': 'epsg:32188'}
return gdf.to_crs(epsg=4326)
Test using Mock
from py_process.app import download_stations_from_url
#patch('py_process.app.download_stations_from_url')
def test_download_stations_from_url(self, mock_requests_json):
mock_requests_json.return_value.status_code = 200
mock_requests_json.return_value.json.return_value = {
"features": [{
"geometry": {
"coordinates": [
299266.0160258789,
5039428.849663065
],
"type": "Point"
},
"type": "Feature",
"properties": {
"valide_a": 99999999,
"MUNIC": "Montreal",
"X": 299266.016026,
"xlong": -73.5708055439,
"Parking": 0,
"Y": 5039428.84966,
"NOM": "Gare Lucien-L'Allier",
"ylat": 45.4947606844
}
}]
}
response = download_stations_from_url('http://www.123.com')
assert response.status_code == 200
You need to mock requests.get, not the function you are actually testing.
from py_process.app import download_stations_from_url
#patch('py_process.app.requests.get')
def test_download_stations_from_url(self, mock_requests_json):
mock_requests_json.return_value.status_code = 200
mock_requests_json.return_value.json.return_value = {
"features": [{
"geometry": {
"coordinates": [
299266.0160258789,
5039428.849663065
],
"type": "Point"
},
"type": "Feature",
"properties": {
"valide_a": 99999999,
"MUNIC": "Montreal",
"X": 299266.016026,
"xlong": -73.5708055439,
"Parking": 0,
"Y": 5039428.84966,
"NOM": "Gare Lucien-L'Allier",
"ylat": 45.4947606844
}
}]
}
df = download_stations_from_url('http://www.123.com')
# Wrong:
# assert response.status_code == 200
# Right:
# Make assertions about the DataFrame you get back.
i have a json with structure:
{
" features": [
{
"geometry": {
"type": "Polygon",
"coordinates": []
},
"type": "Feature",
"properties": {
"ADMIN_LEVE": "REGION",
"POPULTION": 4363916,
"GEO_CENTER": "7.923209152686669, 45.06052300898206",
"ID": "01",
"NAME": "PIEMONTE"
}
}
]
}
and i need to rename the field "ADMIN_LEVE".
I have write a code using pandas to rename but doesn't work
df = pd.DataFrame(data)
df.rename(columns={'ADMIN_LEVE':'ADMIN_LEVEL'}, inplace=True)
I can i do it?
i have also tryied with replace but doesn't work
json_data=open(path + ".json").read()
data = json.loads(json_data)
for d in data:
d.replace('"ADMIN_LEVE"', '"ADMIN_LEVEL"')
Thanks
Source JSON (as string):
In [325]: print(s)
{
" features": [
{
"geometry": {
"type": "Polygon",
"coordinates": []
},
"type": "Feature",
"properties": {
"ADMIN_LEVE": "REGION",
"POPULTION": 4363916,
"GEO_CENTER": "7.923209152686669, 45.06052300898206",
"ID": "01",
"NAME": "PIEMONTE"
}
}
]
}
Replaced:
In [327]: s = s.replace('"ADMIN_LEVE":', '"ADMIN_LEVEL":')
In [328]: print(s)
{
" features": [
{
"geometry": {
"type": "Polygon",
"coordinates": []
},
"type": "Feature",
"properties": {
"ADMIN_LEVEL": "REGION",
"POPULTION": 4363916,
"GEO_CENTER": "7.923209152686669, 45.06052300898206",
"ID": "01",
"NAME": "PIEMONTE"
}
}
]
}
UPDATE: helper function:
def replace_in_json_file(filename, from_str, to_str):
with open(filename) as f:
data = f.read()
with open(filename, 'w') as f:
f.write(data.replace(from_str, to_str))