Looping through API requests to log a value - python

I am working with an API that paginates results of a request, handily the API also generates some 'hypermedia' values that contains information on the pagination; one of these values is 'next_page'.
I am trying to create a loop that takes a request, logs this 'next_page' value, then runs another request using the value of 'next_page', and logs the next page, so on and so on, until the value of 'next_page' = 'None'.
Here's my function so far, but this is only populating one item in the list, when there should be 3.
pages = []
def build_requests(request):
request = 'https://api.performancehorizon.com'+request
job = requests.get(request, headers=headers, params=params)
req = job.json()
hypermedia_np = req['hypermedia']['pagination']['next_page']
if hypermedia_np != 'None':
next_page = req['hypermedia']['pagination']['next_page']
pages.append(next_page)
job = requests.get('https://api.performancehorizon.com'+next_page, headers=headers, params=params)
req = job.json()
else: print("Done!")
How can I change my code to loop over this job until the value of 'next_page' is 'None'?

You can use the below modified code
pages = []
def build_requests(request):
request = 'https://api.performancehorizon.com' + request
job = requests.get(request, headers=headers, params=params)
req = job.json()
hypermedia_np = req['hypermedia']['pagination']['next_page']
while hypermedia_np is not None or hypermedia_np != 'None':
next_page = req['hypermedia']['pagination']['next_page']
if next_page is not None or next_page != 'None':
pages.append(next_page)
job = requests.get('https://api.performancehorizon.com' + next_page, headers=headers, params=params)
req = job.json()
hypermedia_np = req['hypermedia']['pagination']['next_page']
else:
hypermedia_np = None
print("Done!")
Or you can use this simplified version, this should also work for your case
pages = []
def build_requests(request):
request = 'https://api.performancehorizon.com' + request
job = requests.get(request, headers=headers, params=params)
req = job.json()
hypermedia_np = req['hypermedia']['pagination']['next_page']
while hypermedia_np is not None or hypermedia_np != 'None':
pages.append(hypermedia_np)
job = requests.get('https://api.performancehorizon.com' + hypermedia_np, headers=headers, params=params)
req = job.json()
hypermedia_np = req['hypermedia']['pagination']['next_page']
print("Done!")

Related

InvalidSchema: No connection adapters were found for "{'url':

I am trying to do pagination using pyspark and getting below error. My pagination link is in the header as Key [Link] and value [rel="next"]. The error is displayed at this line r1 = requests.get(response.links['next']).The issue is baseURL missing from the "next" URL being passed.
getURL = 'https://api.xxx.com/v3/direct-access/abc'
baseURL = 'https://api.xxx.com/v3/direct-access'
headers = {
"accept" : "application/json",
"Content-Type": "application/json",
"Authorization": "Bearer " + str(token)
}
results = []
response = requests.get(getURL, headers=headers)
r = response.json()
for i in r:
results.append(i)
while response.links['next']: ## != response.links['last']:
r1 = requests.get(response.links['next'])
r = r1.json()
for i in r:
results.append(i)
Error: InvalidSchema: No connection adapters were found for "{'url': '/abc? action=next&next_page=%28id%2Ccompletionid%29+%3C+%28840430000754002%2C840430413029241%29&pagesize=10000', 'rel': 'next'}"
InvalidSchema Traceback (most recent call last) <ipython-input-45-
f27cc7bf373e> in <module> 17 18
while response.links['next']: ## != response.links['last']: ---> 19
r1 = requests.get(response.links['next'])
20 r = r1.json()
21 for i in r:
InvalidSchema: No connection adapters were found for "{'url':
'/liners?
action=next&next_page=linerid+%3C+1010031264&pagesize=10000', 'rel':
'next'}"
How can i merge both baseURL and url into one link and pass it in while loop? Something like below
https://api.xxx.com/v3/direct-access/abc?action=next&next_page=%28id%2Ccompletionid%29+%3C+%28840430000754002%2C840430413029241%29&pagesize=10000
response = requests.get(getURL, headers=headers)
r = response.json()
for i in r:
results.append(i)
while response.links.get('next'):
response = requests.get(baseURL + response.links['next']
['url'],headers=headers)
r1 = response.json()
for i in response:
results.append(i)
#######below not returning results, running for ever ######
return results
rdd = spark.sparkContext.parallelize((results))
print(rdd)
df = spark.read.option('multiline','true').json(rdd)
df.repartition(1).write.json(stagingpath,mode="overwrite")
There are several problems with your code.
response.links['next'] is a dict {'url': ...}. requests.get(...) expects a URL.
# requests.get(response.links['next'])
requests.get(response.links['next']['url'])
# requests.get(baseURL + response.links['next']['url']) # With baseURL
headers are not passed in the subsequent calls.
# requests.get(response.links['next']['url'])
requests.get(response.links['next']['url'], headers=headers)
response is not modified, resulting in infinite loop.
while response.links['next']:
# r1 = requests.get(response.links['next']['url'], headers=headers)
response = requests.get(response.links['next']['url'], headers=headers)
For the last link, 'next' will not exist.
# while response.links['next']:
while response.links.get('next'):
Minimal, reproducible example:
import requests
getURL = 'https://api.github.com/users/acjh/repos'
baseURL = ''
headers = {}
results = []
response = requests.get(getURL, headers=headers)
r = response.json()
for i in r:
results.append(i)
while response.links.get('next'):
nextURL = baseURL + response.links['next']['url']
response = requests.get(nextURL, headers=headers)
r = response.json()
for i in r:
results.append(i)
assert len(results) == requests.get(getURL[:-6]).json()['public_repos']
like this:
while response.links['next']:
next_link = response.links['next']['url']
r1 = requests.get(baseURL + next_link, headers=headers)
r = r1.json()
for i in r:
results.append(i)

Paginating API error (Nested 'next' link)

I am trying to build a class to automate requests to our suplier, but i am having trouble with pagination.
This is the working snippet so far, but the api limits the request to 1000 records, and if the endpoint has more, then I would have to paginate:
response = requests.get(url, data=params, headers=headers).json()
return response
This is what I tried and failed, getting a KeyError: 'next':
response = requests.get(url, data=params, headers=headers).json()
results = response['data']
while response['links']['next']:
response = requests.get(response['links']['next'], data=params, headers=headers).json()
results.extend(response['data'])
return results
you can check the basic sctructure for the response here in the API doc.
Please enlighten me, thank you very much!
You could simply check
while "next" in response['links']:
# ... code ...
But if it can get other problems then putting all in `try/except can be also usefull.
results = [] # empty list
try:
response = requests.get(url, data=params, headers=headers)
#print(response.status_code) # for debug
#print(response.text) # for debug
json_data = response.json()
results += json_data['data']
while "next" in json_data['links']:
response = requests.get(json_data['links']['next'], data=params, headers=headers)
#print(response.status_code) # for debug
#print(response.text) # for debug
json_data = response.json()
results += json_data['data']
except Exception as ex:
print("Exception:", ex)
return results
But I would reduce it to while True
results = [] # empty list
try:
while True:
response = requests.get(url, data=params, headers=headers)
#print(response.status_code) # for debug
#print(response.text) # for debug
json_data = response.json()
results += json_data['data']
if "next" not in json_data['links']:
break
url = json_data['links']['next']
except Exception as ex:
print("Exception:", ex)
return results

API Shows image only if ModHeader is on

I am requesting a user image from an api but the image only shows up if I'm using the chrome extension ModHeader which takes the authorization header and url pattern. In my code I am passing the header info so I'm not sure why it doesn't display.
#app.route('/cardholder/<name>', methods=['GET', 'POST'])
def cardholder(name):
response = requests.get("https://commandcentre-api-us.security.gallagher.cloud/api/cardholders", verify=False, headers=Headers)
r = json.loads(response.text)
data = {}
for x in r['results']:
data[x["firstName"]] = x["id"]
chid = data[''+name+'']
url = ("https://commandcentre-api-us.security.gallagher.cloud/api/cardholders/" + chid)
ch = requests.get(url, verify=False, headers=Headers)
chl = json.loads(ch.text)
try:
chimage = chl["#DL"]["href"]
except:
chimage = "../static/img/noimg.png"
return render_template('cardholder.html', name=name, chid=chid, chl=chl, chimage=chimage)

handle url pagination with python generator

Currently I'm fetching only first page from the server, part of the json is
{"status":"success","count":100,"total":22188,"next":"https://pimber.ly/api/v2/products/?sinceId=5981e16fcde47c0854dc540b","previous":"https://pimber.ly/api/v2/products/?maxId=5981e01dcde47c0854dc4afd","sinceId":"5981e01dcde47c0854dc4afd","maxId":"5981e16fcde47c0854dc540b","data":[.....]}
and the function is:
_fetch_data = response.json()
while _fetch_data['next'] is not None:
response = requests.get(
url=API_DOMAIN',
headers=headers
)
_page_data = response.json()['data']
for _data in _page_data:
yield _data
Current state of the function is only processing the first page, and it will just do that forever, so how can i fix the function to check next so can fetch total data?
I guess it should be
_fetch_data = response.json()
while _fetch_data['next'] is not None:
response = requests.get(_fetch_data['next'], headers=headers)
_fetch_data = response.json()
for _data in fetch_data['data']:
yield _data

How to read the next page on API using python?

I need help on how to do a loop so each time I make a GET request, it will always be the new page from the API.
I start with getting the first response. It includes a parameter to the next page next_key
{
"result": [
{
...,
...
}
],
"next_key": 123
}
Below is my current attempt
import requests
import json
url = "https://flespi.io/gw/channels/all/messages"
headers = {"Authorization": "FlespiToken 23ggh45"}
def getFirst():
data = {"limit_count":100, "limit_size":10000}
params = {"data":json.dumps(data, separators=(",", ":"))}
reqFirst = requests.get(url, params=params, headers=headers).json()
return reqFirst["next_key"] ## this returns "123"
def getDataNext():
data = {"limit_count":100, "limit_size":10000, "curr_key":getFirst()}
params = {"data":json.dumps(data, separators=(",", ":"))}
reqNext = requests.get(url, params=params, headers=headers)
jsonData = reqNext.json()
while True:
if "next_key" in jsonData:
data = {"limit_count":100, "limit_size":10000,"curr_key":jsonData["next_key"]}
params = {"data":json.dumps(data, separators=(",", ":"))}
req = requests.get(url, params=params, headers=headers).json() ## this should do GET request for the third page and so on...
print req["next_key"] # this returns "3321" which is the value for "next_key" in second page
else:
pass
getDataNext()
The full url including limit count, limit size and curr key is as follows https://flespi.io/gw/channels/all/messages?data=%7B%22curr_key%22%123%2C%22limit_count%22%3A100%2C%22limit_size%22%3A10000%7D
As you can see this only returns the second page that is jsonData["next_key"]. What I want to do is that for each GET request, the program will read the next_key and put it on the next GET request.
I am thinking to use increment on the curr_key but the key is random and also I do not know how many page there is.
I believe there must be just a simple solution for this but apparently I could not think about it. Thank you for your help and suggestion.
try this
has_next_key = False
nextKey = ""
if "next_key" in jsonData:
has_next_key = True
nextKey = jsonData["next_key"]
while has_next_key:
data = {"limit_count":100, "limit_size":10000,"curr_key":nextKey}
params = {"data":json.dumps(data, separators=(",", ":"))}
req = requests.get(url, params=params, headers=headers).json() ## this should do GET request for the third page and so on...
if "next_key" in req:
nextKey = req["next_key"]
print nextKey # this returns "3321" which is the value for "next_key" in second page
else:
has_next_key = False
# no next_key, stop the loop

Categories

Resources