I need to make a request to an API that only responses with maximum of 200 results. If the total amount of data is more than 200, the API responses also with a parameter lastKey that I need to pass to a new request. When all the data has been returned the lastKey -param is not returned anymore.
My question is how to do it in a simple, clean way? This is how I make the first request and I can see if there is the lastKey -param or not:
url = 'https://example.com'
moreData = False
with requests.Session() as api:
data = requests.get(url)
try:
data.raise_for_status()
except HTTPError as e:
return Response(status=status.HTTP_500_INTERNAL_SERVER_ERROR)
result = data.json()
if 'lastKey' in result:
url = 'https://example.com&lastKey=' + result['lastKey']
moreData = True
How could I do this whole thing for example inside a while -loop?
Just get the first result out of the while loop, then call your api while you have "lastkey" in the result
url = 'https://example.com'
with requests.Session() as api:
data = requests.get(url)
try:
data.raise_for_status()
except HTTPError as e:
return Response(status=status.HTTP_500_INTERNAL_SERVER_ERROR)
result = data.json()
while 'lastKey' in result:
url = 'https://example.com&lastKey=' + result['lastKey']
with requests.Session() as api:
data = requests.get(url)
try:
data.raise_for_status()
except HTTPError as e:
return Response(status=status.HTTP_500_INTERNAL_SERVER_ERROR)
result = data.json()
Related
I'm requesting Microsoft's Graph API, where I'm using the following function to request multiple pages. I'm trying to request all pages, merge the json files and finally write them to a pandas dataframe.
v = "v1.0"
r = "/users?$filter=userType eq 'Member'&$select=displayName,givenName,jobTitle,mail,department&$top=200"
def query(v, r):
all_records = []
url = uri.format(v=v, r=r)
while True:
if not url:
break
result = requests.get(url, headers=headers)
if result.status_code == 200:
json_data = json.loads(result.text)
all_records = all_records + json_data["value"]
url = json_data["#odata.nextLink"]
return all_records
The while-loop goes through all the pages, but when I run the function I'm getting a error:
KeyError: '#odata.nextLink'
I assume this is because the loop reaches the final page, and thus the '#odata.nextLink' cannot be found. But how can I handle this?
You are doing
url = json_data["#odata.nextLink"]
which suggest json_data is dict, so you should be able to use .get method which returns default value when key not found (None by default), please try doing following and write if it does work as excepted:
url = json_data.get("#odata.nextLink")
if url is None:
print("nextLink not found")
else:
print("nextLink found")
I generated the following code where I pass multiple URLS via API and ask that there be an output written to different pandas dataframes. It (sort of) works, but the outcome is incorrect
1) It seems to enter the function and print "Success" way too many times. Why?
2) The output for all the dataframes is the same; not sure where the error is.
See the function:
def data_extract(url):
payload = {'limit':'200000'}
# Persists parameters across requests
s = requests.Session()
# To determine success of request, and error code
for url in url:
try:
response = s.get(url)
# If the response was successful, no Exception will be raised
response.raise_for_status()
except HTTPError as http_err:
print(f'HTTP error occurred: {http_err}')
except Exception as err:
print(f'Other error occurred: {err}')
else:
# Ret
jsonData = s.get(url, params=payload).json()
print('Success!')
df_tmr = pd.DataFrame(jsonData['records'])
return df_tmr
See the call to the function:
urls = {
# Rainfall data
'tot_rain_mth': 'https://data.gov.sg/dataset/5942f8bd-4240-4f68-acd2-a5a276958237/resource/778814b8-1b96-404b-9ac9-68d6c00e637b/data',
'no_days_rain_mth': 'https://data.gov.sg/dataset/rainfall-monthly-number-of-rain-days/resource/8b94f596-91fd-4545-bf9e-7a426493b674/data',
'max_rain_mth': 'https://data.gov.sg/dataset/rainfall-monthly-maximum-daily-total/resource/df4d391e-6950-4fc6-80cd-c9b9ef6354fe/data',
# Temperature Data
'mean_sun_dur_mth': 'https://data.gov.sg/dataset/sunshine-duration-monthly-mean-daily-duration/resource/0230819f-1c83-4980-b738-56136d6dc300/data',
'wet_bulb_hr': 'https://data.gov.sg/dataset/wet-bulb-temperature-hourly/resource/0195dc7a-2f49-4107-ac7c-3112ca4a09a8/data',
'min_air_temp_day': 'https://data.gov.sg/dataset/surface-air-temperature-mean-daily-minimum/resource/ad0d8a97-9321-42e9-ac6f-46bf12845d44/data',
'min_air_temp_mth': 'https://data.gov.sg/dataset/surface-air-temperature-monthly-absolute-extreme-minimum/resource/0c5b9752-2488-46cc-ae1c-42318d0f8865/data',
'mean_air_temp_mth': 'https://data.gov.sg/dataset/surface-air-temperature-monthly-mean/resource/07654ce7-f97f-49c9-81c6-bd41beba4e96/data',
'max_air_temp_day': 'https://data.gov.sg/dataset/surface-air-temperature-mean-daily-maximum/resource/c7a7d2fd-9d32-4508-92ef-d1019e030a2f/data',
'max_air_temp_mth': 'https://data.gov.sg/dataset/air-temperature-absolute-extremes-maximum/resource/96e66346-68bb-4ca9-b001-58bbf39e36a7/data',
# Humidity Data
'min_hum_mth': 'https://data.gov.sg/dataset/relative-humidity-monthly-absolute-extreme-minimum/resource/585c24a5-76cd-4c48-9341-9223de5adc1d/data',
'mean_hum_mth': 'https://data.gov.sg/dataset/relative-humidity-monthly-mean/resource/4631174f-9858-463d-8a88-f3cb21588c67/data',
'mean_hum_yr': 'https://data.gov.sg/dataset/relative-humidity-annual-mean/resource/77b9059f-cc9a-4f4f-a495-9c268945191b/data'
}
df={}
for i in range(len(urls.keys())):
df[str(i)] = pd.DataFrame()
#print('Name of Dataframe:', df)
df[str(i)] = data_extract(urls.values())
print (df['0'])
print (df['1'])
--> Sorry about the bad formatting; cant quite get it right in SO
import requests
import pandas as pd
def data_extract(url):
print(url)
payload = {'limit':'200000'}
s = requests.Session()
try:
response = s.get(url)
response.raise_for_status()
jsonData = s.get(url, params=payload).json()
print('Success!')
except Exception as err:
print(f'Other error occurred: {err}')
df_tmr = pd.DataFrame(jsonData['records'])
return df_tmr
urls = {
# Rainfall data
'tot_rain_mth': 'https://data.gov.sg/dataset/5942f8bd-4240-4f68-acd2-a5a276958237/resource/778814b8-1b96-404b-9ac9-68d6c00e637b/data',
'no_days_rain_mth': 'https://data.gov.sg/dataset/rainfall-monthly-number-of-rain-days/resource/8b94f596-91fd-4545-bf9e-7a426493b674/data',
'max_rain_mth': 'https://data.gov.sg/dataset/rainfall-monthly-maximum-daily-total/resource/df4d391e-6950-4fc6-80cd-c9b9ef6354fe/data',
# Temperature Data
'mean_sun_dur_mth': 'https://data.gov.sg/dataset/sunshine-duration-monthly-mean-daily-duration/resource/0230819f-1c83-4980-b738-56136d6dc300/data',
'wet_bulb_hr': 'https://data.gov.sg/dataset/wet-bulb-temperature-hourly/resource/0195dc7a-2f49-4107-ac7c-3112ca4a09a8/data',
'min_air_temp_day': 'https://data.gov.sg/dataset/surface-air-temperature-mean-daily-minimum/resource/ad0d8a97-9321-42e9-ac6f-46bf12845d44/data',
'min_air_temp_mth': 'https://data.gov.sg/dataset/surface-air-temperature-monthly-absolute-extreme-minimum/resource/0c5b9752-2488-46cc-ae1c-42318d0f8865/data',
'mean_air_temp_mth': 'https://data.gov.sg/dataset/surface-air-temperature-monthly-mean/resource/07654ce7-f97f-49c9-81c6-bd41beba4e96/data',
'max_air_temp_day': 'https://data.gov.sg/dataset/surface-air-temperature-mean-daily-maximum/resource/c7a7d2fd-9d32-4508-92ef-d1019e030a2f/data',
'max_air_temp_mth': 'https://data.gov.sg/dataset/air-temperature-absolute-extremes-maximum/resource/96e66346-68bb-4ca9-b001-58bbf39e36a7/data',
# Humidity Data
'min_hum_mth': 'https://data.gov.sg/dataset/relative-humidity-monthly-absolute-extreme-minimum/resource/585c24a5-76cd-4c48-9341-9223de5adc1d/data',
'mean_hum_mth': 'https://data.gov.sg/dataset/relative-humidity-monthly-mean/resource/4631174f-9858-463d-8a88-f3cb21588c67/data',
'mean_hum_yr': 'https://data.gov.sg/dataset/relative-humidity-annual-mean/resource/77b9059f-cc9a-4f4f-a495-9c268945191b/data'
}
df={}
temp = list(urls.values())
for i in range(len(temp)):
df[str(i)] = data_extract(temp[i])
print(df['0'])
print(df['1'])
if len(df) == len(temp):
print('success')
I think this will help you. You where iterating over all items and returning only last item as you. Just need to remove for loop from data_extract method.
I have a list with over 1000 IDs and I want to call an API with different endpoints for every element of the list.
Example:
customerlist = [803818, 803808, 803803,803738,803730]
I tried the following:
import json
import requests
import pandas as pd
API_BASEURL = "https://exampleurl.com/"
API_TOKEN = "abc"
HEADERS = {'content-type' : 'application/json',
'Authorization': API_TOKEN }
def get_data(endpoint):
for i in customerlist:
api_endpoint = endpoint
params = {'customerid' : i}
response = requests.get(f"{API_BASEURL}/{api_endpoint}",
params = params,
headers = HEADERS)
if response.status_code == 200:
res = json.loads(response.text)
else:
raise Exception(f'API error with status code {response.status_code}')
res= pd.DataFrame([res])
return res
get_data(endpointexample)
This works, but it only returns the values for the first element of the list (803818). I want the function to return the values for every ID from customerlist for the endpoint I defined in the function argument.
I found this - possibly related - question, but I couldn't figure my problem out.
There is probably an easy solution for this which I am not seeing, as I am just starting with Python. Thanks.
The moment a function hits a return statement, it immediately finishes. Since your return statement is in the loop, the other iterations never actually get called.
To fix, you can create a list outside the loop, append to it every loop iteration, and then return the DataFrame created with that list:
def get_data(endpoint):
responses = []
for i in customerlist:
api_endpoint = endpoint
params = {'customerid' : i}
response = requests.get(f"{API_BASEURL}/{api_endpoint}",
params = params,
headers = HEADERS)
if response.status_code == 200:
res = json.loads(response.text)
else:
raise Exception(f'API error with status code {response.status_code}')
responses.append(res)
return pd.DataFrame(responses)
A much cleaner solution would be to use list comprehension:
def get_data(endpoint, i):
api_endpoint = endpoint
params = {'customerid' : i}
response = requests.get(f"{API_BASEURL}/{api_endpoint}",
params = params,
headers = HEADERS)
if response.status_code == 200:
res = json.loads(response.text)
else:
raise Exception(f'API error with status code {response.status_code}')
return res
responses = pd.DataFrame([get_data(endpoint, i) for i in customerlist])
I am not quite happy the way i coded this. Is there a more simple and convenient way to code this in one function and return the output of multiple pages.
def login():
url = "http://192.168.2.45/pricelogin.php"
r = requests.get(url, auth=('pstats', 'pStats'))
page = r.text
return page
def loginhighpricingerror():
pricingerrorurl = "http://192.168.2.45/airline_error.pl"
peu = requests.get(pricingerrorurl, auth=('pstats', 'pstats'))
peupage = peu.text
return peupage
def loginsuccessfullbookings():
sucurl = "http://192.168.2.45/airlinessucbookings.php"
suc = requests.get(sucbookingurl, auth=('pstats', 'pstats'))
sucpage = suc.text
return sucpage
Use session instead of sessionless module functions:
s = requests.Session()
s.auth=('pstats', 'pStats')
def login():
url = "http://192.168.2.45/pricelogin.php"
r = s.get(url)
page = r.text
return page
def loginhighpricingerror():
pricingerrorurl = "http://192.168.2.45/airline_error.pl"
peu = s.get(pricingerrorurl)
peupage = peu.text
return peupage
def loginsuccessfullbookings():
sucurl = "http://192.168.2.45/airlinessucbookings.php"
suc = s.get(sucbookingurl)
sucpage = suc.text
return sucpage
Of course this should be refactored, but hopefully you can see what I mean.
I would generalize the login function, passing the url as parameter:
def login(url):
try:
r = requests.get(url, auth=('pstats', 'pStats'))
except requests.exceptions.RequestException as e:
print e
return '' # but maybe you want to do something else
page = r.text
return page
And then you can run it for each url accumulating the pages in an array for example:
urls = ["http://192.168.2.45/pricelogin.php", "http://192.168.2.45/airline_error.pl", "http://192.168.2.45/airlinessucbookings.php"]
pages = [] # resulting array
for url in urls:
pages.append(login(url))
Note: I added a check on an exception for requests.get since this might fail when there is a connection problem.
part of code containing error:
select_link = db.GqlQuery("select * from PhishTank where url= :1",str(updated_url))
in_database_phishtank = False
for link in select_link:
if str(updated_url) == str(link.url):
in_database_phishtank = True
# chk for 7 days period , update the link
if (datetime.now()-link.timestamp) > timedelta(days = TIME_UPDATE):
# query to the site and update the datastore
url = "http://checkurl.phishtank.com/checkurl/"
parameters = {"url": "%s" % updated_url,
"app_key": "74283d86612c6b89de0b186882446e069dd071f65e9711aa374e9cdbd2ba7ffe",
"format":"json"}
data = urllib.urlencode(parameters)
req = urllib.Request(url, data)
try:
response = urllib2.urlopen(req)
except urllib.error.URLError as e:
self.redirect('/error')
json_post = response.read()
data = json.loads(json_post)
Try this:
urllib.request.Request(url, data)
Be aware that in Python 3.x urllib was split in several modules: urllib.request, urllib.parse, and urllib.error. It's possible that you're importing it wrong.