KeyError when parsing JSON from API - python

I need to parse data from a JSON file I've requested from an API. The API documentation provides an example code on how to implement it, however their method leads to the following KeyError:
Traceback (most recent call last):
File "testing2.py", line 11, in <module>
for flight in api_response['results']:
KeyError: 'results'
I've spent a lot of time looking for possible reasons but I can't find a solution that works for me. The Python file looks like this:
import requests
params = {
'access_key': my_access_key'
}
results = requests.get('http://api.aviationstack.com/v1/flights?access_key=my_access_key&airline_name=Air%20India&flight_number=560')
api_response = results.json()
for flight in api_response['results']:
if (flight['live']['is_ground'] is False):
print(u'%s flight %s from %s (%s) to %s (%s) is in the air.' % (
flight['airline']['name'],
flight['flight']['iata'],
flight['departure']['airport'],
flight['departure']['iata'],
flight['arrival']['airport'],
flight['arrival']['iata']))
I've been able to read through the data with print(api_response), however I can't parse through any of it since the key "results" simply doesn't seem to exist.
Edit:
This is the result of simply going to the URL or of print(api_response), a large chunk of text:
{"pagination":{"limit":100,"offset":0,"count":4,"total":4},"data":[{"flight_date":"2020-09-02","flight_status":"landed","departure":{"airport":"Indira Gandhi International","timezone":"Asia/Kolkata","iata":"DEL","icao":"VIDP","terminal":"3","gate":"33","delay":4,"scheduled":"2020-09-02T07:15:00+00:00","estimated":"2020-09-02T07:15:00+00:00","actual":"2020-09-02T07:18:00+00:00","estimated_runway":"2020-09-02T07:18:00+00:00","actual_runway":"2020-09-02T07:18:00+00:00"},"arrival":{"airport":"Hyderabad Airport","timezone":"Asia/Kolkata","iata":"HYD","icao":"VOHS","terminal":"1","gate":null,"baggage":null,"delay":2,"scheduled":"2020-09-02T09:10:00+00:00","estimated":"2020-09-02T09:10:00+00:00","actual":"2020-09-02T09:09:00+00:00","estimated_runway":"2020-09-02T09:09:00+00:00","actual_runway":"2020-09-02T09:09:00+00:00"},"airline":{"name":"Air India","iata":"AI","icao":"AIC"},"flight":{"number":"560","iata":"AI560","icao":"AIC560","codeshared":null},"aircraft":{"registration":"VT-EXM","iata":"A20N","icao":"A20N","icao24":"800C4B"},"live":null},{"flight_date":"2020-09-01","flight_status":"landed","departure":{"airport":"Tirupati","timezone":"Asia/Kolkata","iata":"TIR","icao":"VOTP","terminal":null,"gate":null,"delay":null,"scheduled":"2020-09-01T10:20:00+00:00","estimated":"2020-09-01T10:20:00+00:00","actual":"2020-09-01T10:06:00+00:00","estimated_runway":"2020-09-01T10:06:00+00:00","actual_runway":"2020-09-01T10:06:00+00:00"},"arrival":{"airport":"Hyderabad Airport","timezone":"Asia/Kolkata","iata":"HYD","icao":"VOHS","terminal":"3","gate":null,"baggage":null,"delay":null,"scheduled":"2020-09-01T11:40:00+00:00","estimated":"2020-09-01T11:40:00+00:00","actual":"2020-09-01T10:51:00+00:00","estimated_runway":"2020-09-01T10:51:00+00:00","actual_runway":"2020-09-01T10:51:00+00:00"},"airline":{"name":"Air India","iata":"AI","icao":"AIC"},"flight":{"number":"560","iata":"AI560","icao":"AIC560","codeshared":null},"aircraft":null,"live":null},{"flight_date":"2020-09-01","flight_status":"landed","departure":{"airport":"Indira Gandhi International","timezone":"Asia/Kolkata","iata":"DEL","icao":"VIDP","terminal":"3","gate":"29B","delay":6,"scheduled":"2020-09-01T06:50:00+00:00","estimated":"2020-09-01T06:50:00+00:00","actual":"2020-09-01T06:56:00+00:00","estimated_runway":"2020-09-01T06:56:00+00:00","actual_runway":"2020-09-01T06:56:00+00:00"},"arrival":{"airport":"Hyderabad Airport","timezone":"Asia/Kolkata","iata":"HYD","icao":"VOHS","terminal":"2","gate":null,"baggage":null,"delay":null,"scheduled":"2020-09-01T09:20:00+00:00","estimated":"2020-09-01T09:20:00+00:00","actual":null,"estimated_runway":null,"actual_runway":null},"airline":{"name":"Air India","iata":"AI","icao":"AIC"},"flight":{"number":"560","iata":"AI560","icao":"AIC560","codeshared":null},"aircraft":{"registration":"VT-EXM","iata":"A20N","icao":"A20N","icao24":"800C4B"},"live":null},{"flight_date":"2020-09-01","flight_status":"landed","departure":{"airport":"Hyderabad Airport","timezone":"Asia/Kolkata","iata":"HYD","icao":"VOHS","terminal":null,"gate":null,"delay":null,"scheduled":"2020-09-01T12:45:00+00:00","estimated":"2020-09-01T12:45:00+00:00","actual":"2020-09-01T12:41:00+00:00","estimated_runway":"2020-09-01T12:41:00+00:00","actual_runway":"2020-09-01T12:41:00+00:00"},"arrival":{"airport":"Indira Gandhi International","timezone":"Asia/Kolkata","iata":"DEL","icao":"VIDP","terminal":"3","gate":null,"baggage":null,"delay":null,"scheduled":"2020-09-01T14:55:00+00:00","estimated":"2020-09-01T14:55:00+00:00","actual":"2020-09-01T14:42:00+00:00","estimated_runway":"2020-09-01T14:42:00+00:00","actual_runway":"2020-09-01T14:42:00+00:00"},"airline":{"name":"Air India","iata":"AI","icao":"AIC"},"flight":{"number":"560","iata":"AI560","icao":"AIC560","codeshared":null},"aircraft":null,"live":null}]}

Try this. It accesses the key data instead of the key results (which probably does not exist in the response), and it does not raise a TypeError when accessing the live key within a flight object, which might be null (when it is on the ground).
import requests
params = {
'access_key': access_key,
'airline_name': 'Air India',
'flight_number': 560
}
results = requests.get('http://api.aviationstack.com/v1/flights', params=params)
api_response = results.json()
for flight in api_response['data']:
if flight.get('live') and not flight['live']['is_ground']:
print('{} flight {} from {} ({}) to {} ({}) is in the air.'.format(
flight['airline']['name'],
flight['flight']['iata'],
flight['departure']['airport'],
flight['departure']['iata'],
flight['arrival']['airport'],
flight['arrival']['iata']))
else:
print("All flights landed.")

Related

How to handle multiple missing keys in a dict?

I'm using an API to get basic information about shops in my area, name of shop, address, postcode, phone number etc… The API returns back a long list about each shop, but I only want some of the data from each shop.
I created a for loop that just takes the information that I want for every shop that the API has returned. This all works fine.
Problem is not all shops have a phone number or a website, so I get a KeyError because the key website does not exist in every return of a shop. I tried to use try and except which works but only if I only handle one thing, but a shop might not have a phone number and a website, which leads to a second KeyError.
What can I do to check for every key in my for loop and if a key is found missing to just add the value "none"?
My code:
import requests
import geocoder
import pprint
g = geocoder.ip('me')
print(g.latlng)
latitude, longitude = g.latlng
URL = "https://discover.search.hereapi.com/v1/discover"
latitude = xxxx
longitude = xxxx
api_key = 'xxxxx' # Acquire from developer.here.com
query = 'food'
limit = 12
PARAMS = {
'apikey':api_key,
'q':query,
'limit': limit,
'at':'{},{}'.format(latitude,longitude)
}
# sending get request and saving the response as response object
r = requests.get(url = URL, params = PARAMS)
data = r.json()
#print(data)
for x in data['items']:
title = x['title']
address = x['address']['label']
street = x['address']['street']
postalCode = x['address']['postalCode']
position = x['position']
access = x['access']
typeOfBusiness = x['categories'][0]['name']
contacts = x['contacts'][0]['phone'][0]['value']
try:
website = x['contacts'][0]['www'][0]['value']
except KeyError:
website = "none"
resultList = {
'BUSINESS NAME:':title,
'ADDRESS:':address,
'STREET NAME:':street,
'POSTCODE:':postalCode,
'POSITION:':position,
'POSITSION2:':access,
'TYPE:':typeOfBusiness,
'PHONE:':contacts,
'WEBSITE:':website
}
print("--"*80)
pprint.pprint( resultList)
I think a good way to handle it would be to use the operator.itemgetter() to create a callable the will attempt to retrieve all the keys at once, and if any aren't found, it will generate a KeyError.
A short demonstration of what I mean:
from operator import itemgetter
test_dict = dict(name="The Shop", phone='123-45-6789', zipcode=90210)
keys = itemgetter('name', 'phone', 'zipcode')(test_dict)
print(keys) # -> ('The Shop', '123-45-6789', 90210)
keys = itemgetter('name', 'address', 'phone', 'zipcode')(test_dict)
# -> KeyError: 'address'

Big Query how to change mode of columns?

I have a Dataflow pipeline that fetches data from Pub/Sub and prepares them for insertion into Big Query and them writes them into the Database.
It works fine, it can generate the schema automatically and it is able to recognise what datatype to use and everything.
However the data we are using with it can vary vastly in format. Ex: we can get both A and B for a single column
A {"name":"John"}
B {"name":["Albert", "Einstein"]}
If the first message we get gets added, then adding the second one will not work.
If i do it the other way around it does however.
i always get the following error:
INFO:root:Error: 400 POST https://bigquery.googleapis.com/upload/bigquery/v2/project/projectname/jobs?uploadType=resumable: Provided Schema does not match Table project:test_dataset.test_table. Field cars has changed mode from NULLABLE to REPEATED with loading dataframe
ERROR:apache_beam.runners.direct.executor:Exception at bundle <apache_beam.runners.direct.bundle_factory._Bundle object at 0x7fcb9003f2c0>, due to an exception.
Traceback (most recent call last):
........
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
.....
Provided Schema does not match Table project.test_table. Field cars has changed mode from NULLABLE to REPEATED
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "apache_beam/runners/common.py", line 1233, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 582, in apache_beam.runners.common.SimpleInvoker.invoke_process
File "newmain.py", line 211, in process
if load_job and load_job.errors:
UnboundLocalError: local variable 'load_job' referenced before assignment
Below is the code
class WriteDataframeToBQ(beam.DoFn):
def __init__(self, bq_dataset, bq_table, project_id):
self.bq_dataset = bq_dataset
self.bq_table = bq_table
self.project_id = project_id
def start_bundle(self):
self.client = bigquery.Client()
def process(self, df):
# table where we're going to store the data
table_id = f"{self.bq_dataset}.{self.bq_table}"
# function to help with the json -> bq schema transformations
generator = SchemaGenerator(input_format='dict', quoted_values_are_strings=True, keep_nulls=True)
# Get original schema to assist the deduce_schema function. If the table doesn't exist
# proceed with empty original_schema_map
try:
table = self.client.get_table(table_id)
original_schema = table.schema
self.client.schema_to_json(original_schema, "original_schema.json")
with open("original_schema.json") as f:
original_schema = json.load(f)
original_schema_map, original_schema_error_logs = generator.deduce_schema(input_data=original_schema)
except Exception:
logging.info(f"{table_id} table not exists. Proceed without getting schema")
original_schema_map = {}
# convert dataframe to dict
json_text = df.to_dict('records')
# generate the new schema, we need to write it to a file because schema_from_json only accepts json file as input
schema_map, error_logs = generator.deduce_schema(input_data=json_text, schema_map=original_schema_map)
schema = generator.flatten_schema(schema_map)
schema_file_name = "schema_map.json"
with open(schema_file_name, "w") as output_file:
json.dump(schema, output_file)
# convert the generated schema to a version that BQ understands
bq_schema = self.client.schema_from_json(schema_file_name)
job_config = bigquery.LoadJobConfig(
source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,
schema_update_options=[
bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION,
bigquery.SchemaUpdateOption.ALLOW_FIELD_RELAXATION
],
write_disposition=bigquery.WriteDisposition.WRITE_APPEND,
schema=bq_schema
)
job_config.schema = bq_schema
try:
load_job = self.client.load_table_from_json(
json_text,
table_id,
job_config=job_config,
) # Make an API request.
load_job.result() # Waits for the job to complete.
if load_job.errors:
logging.info(f"error_result = {load_job.error_result}")
logging.info(f"errors = {load_job.errors}")
else:
logging.info(f'Loaded {len(df)} rows.')
except Exception as error:
logging.info(f'Error: {error} with loading dataframe')
if load_job and load_job.errors:
logging.info(f"error_result = {load_job.error_result}")
logging.info(f"errors = {load_job.errors}")
def run(argv):
parser = argparse.ArgumentParser()
known_args, pipeline_args = parser.parse_known_args(argv)
pipeline_options = PipelineOptions(pipeline_args, save_main_session=True, streaming=True)
options = pipeline_options.view_as(JobOptions)
with beam.Pipeline(options=pipeline_options) as pipeline:
(
pipeline
| "Read PubSub Messages" >> beam.io.ReadFromPubSub(subscription=options.input_subscription)
| "Write Raw Data to Big Query" >> beam.ParDo(WriteDataframeToBQ(project_id=options.project_id, bq_dataset=options.bigquery_dataset, bq_table=options.bigquery_table))
)
if __name__ == "__main__":
logging.getLogger().setLevel(logging.INFO)
run(sys.argv)
Is there a way to change the restrictions of the table to make this work?
BigQuery isn't a document database, but a columnar oriented database. In addition, you can't update the schema of existing columns (only add or remove them).
For your use case, and because you can't know/predict the most generic schema of each of your field, the safer is to store the raw JSON as a string, and then to use the JSON functions of BigQuery to post process, in SQL, your data

parse github api.. getting string indices must be integers error

I need to loop through commits and get name, date, and messages info from
GitHub API.
https://api.github.com/repos/droptable461/Project-Project-Management/commits
I have many different things but I keep getting stuck at string indices must be integers error:
def git():
#name , date , message
#https://api.github.com/repos/droptable461/Project-Project-Management/commits
#commit { author { name and date
#commit { message
#with urlopen('https://api.github.com/repos/droptable461/Project Project-Management/commits') as response:
#source = response.read()
#data = json.loads(source)
#state = []
#for state in data['committer']:
#state.append(state['name'])
#print(state)
link = 'https://api.github.com/repos/droptable461/Project-Project-Management/events'
r = requests.get('https://api.github.com/repos/droptable461/Project-Project-Management/commits')
#print(r)
#one = r['commit']
#print(one)
for item in r.json():
for c in item['commit']['committer']:
print(c['name'],c['date'])
return 'suc'
Need to get person who did the commit, date and their message.
item['commit']['committer'] is a dictionary object, and therefore the line:
for c in item['commit']['committer']: is transiting dictionary keys.
Since you are calling [] on a string (the dictionary key), you are getting the error.
Instead that code should look more like:
def git():
link = 'https://api.github.com/repos/droptable461/Project-Project-Management/events'
r = requests.get('https://api.github.com/repos/droptable461/Project-Project-Management/commits')
for item in r.json():
for key in item['commit']['committer']:
print(item['commit']['committer']['name'])
print(item['commit']['committer']['date'])
print(item['commit']['message'])
return 'suc'

Hacker News API, KeyError: 'title

Just starting to learn python, and I am starting to learn the web-based side of it.
Following the instructions I have, i keep getting a KeyError: 'title' on line 18. Now I see it as the request not returning a title so it gives an error, how would I write it up to give a generic description if there is no 'title'???
import requests
from operator import itemgetter as ig
url = 'https://hacker-news.firebaseio.com/v0/topstories.json'
r = requests.get(url)
print("Status Code:", r.status_code)
submission_ids= r.json()
submission_dicts = []
for submission_id in submission_ids[:30]:
url= ("https://hacker-news.firebaseio.com/v0/item" + str(submission_id) + '.json')
submission_r = requests.get(url)
print(submission_r.status_code)
response_dict = submission_r.json()
submission_dict = {
'title': response_dict['title'],
'link': "https://news.ycombinator.com/item?id=" +str(submission_id),
'comments': response_dict.get('descendants', 0)
}
submission_dicts = sorted(submission_dicts, key= ig('comments'), reverse= True)
for submission_dict in submission_dicts:
print("\nTitle:", submission_dict['title'])
print("Discussion link:", submission_dict['link'])
print("Comments:", submission_dict['comments'])
Status Code: 200
401
Traceback (most recent call last):
File "C:\Users\Shit Head\Documents\Programming\Tutorial Files\hn_submissions.py", line 18, in <module>
'title': response_dict['title'],
KeyError: 'title'
[Finished in 1.2s]
Following the instructions I have, i keep getting a KeyError: 'title' on line 18. Now I see it as the request not returning a title so it gives an error, how would I write it up to give a generic description if there is no 'title'???
It sounds like you're just looking for the get method:
get(key[, default])
Return the value for key if key is in the dictionary, else default. If default is not given, it defaults to None, so that this method never raises a KeyError.
So, instead of this:
'title': response_dict['title'],
… you do this:
'title': response_dict.get('title', 'Generic Hacker News Submission'),
Under the covers, this is just a more convenient way to write something you could have done anyway. The following are all pretty much equivalent:
title = response_dict.get('title', 'Generic')
title = response_dict['title'] if title in response_dict else 'Generic'
if title in response_dict:
title = response_dict['title']
else:
title = 'Generic'
try:
title = response_dict['title']
except KeyError:
title = 'Generic'
This is worth knowing because Python only usually provides shortcuts like get for really common cases like looking things up in a dictionary. If you wanted to do the same thing with, say, a list that may be empty or have one item, or a file that might or might not exist, or a regular expression that might return a match with a group string or might return None, you'd need to do things the long way.

how to fetch data from google plus using api key in python?

**When i send a request like --
f = urllib.urlopen(https://www.googleapis.com/plus/v1/people/103777531434977807649/activities/public?key=*************** )
json=f.read()
print json
it returns some thing like this not the required json
{
"kind": "plus#activityFeed",
"etag": "\"seVFOlIgH91k2i-GrbizYfaw_AM/chWYjTdvKRLG9yxkeAfrCrofGHk\"",
"nextPageToken": "CAIQ__________9_IAAoAA",
"title": "Google+ List of Activities for Collection PUBLIC",
"items": []
}
what i have to do to get the right response????
this is the code:
import json
f = urllib.urlopen('https://www.googleapis.com/plus/v1/people/'+id+'/activities /public?key=*****************&maxResults=100')
s = f.read()
f.close()
ss=json.loads(s)
print ss
try:
nextpagetoken=str(ss['nextPageToken'])
i=0
str_current_datetime=str(datetime.now())
gp_crawldate=str_current_datetime.split(" ")[0]
gp_groupid=id
db = MySQLdb.connect("localhost","root","****","googleplus" )
cursor=db.cursor()
while i<len(ss['items']):
gp_kind=str(ss['items'][i]['kind'])
gp_title=str(ss['items'][i]['title'].encode('utf8'))
gp_published=str(ss['items'][i]['published'][0:10])
check=int(cool(str(ss['items'][i]['published'][0:19])))#this method is defined in the code
gp_activityid=str(ss['items'][i]['id'])
gp_actorid=str(ss['items'][i]['actor']['id'])
gp_verb=str(ss['items'][i]['verb'])
gp_objecttype=str(ss['items'][i]['object']['objectType'])
gp_originalcontent=str(ss['items'][i]['object']['content'].encode('utf8'))
gp_totalreplies=str(ss['items'][i]['object']['replies']['totalItems'])
gp_totalplusone=str(ss['items'][i]['object']['plusoners']['totalItems'])
gp_totalreshare=str(ss['items'][i]['object']['resharers']['totalItems'])
#gp_geocode=str(ss['items'][i]['geocode'])
#gp_placename=str(ss['items'][i]['placeName'])
i=i+1
is the any change in g+api???
The response you posted is a correct response. If the items field is an empty list, then the user that you are fetching the posts for has probably never posted anything publicly. In this case, I confirmed that the user has no public posts simply by visiting their profile.

Categories

Resources