How to read the next page on API using python? - python

I need help on how to do a loop so each time I make a GET request, it will always be the new page from the API.
I start with getting the first response. It includes a parameter to the next page next_key
{
"result": [
{
...,
...
}
],
"next_key": 123
}
Below is my current attempt
import requests
import json
url = "https://flespi.io/gw/channels/all/messages"
headers = {"Authorization": "FlespiToken 23ggh45"}
def getFirst():
data = {"limit_count":100, "limit_size":10000}
params = {"data":json.dumps(data, separators=(",", ":"))}
reqFirst = requests.get(url, params=params, headers=headers).json()
return reqFirst["next_key"] ## this returns "123"
def getDataNext():
data = {"limit_count":100, "limit_size":10000, "curr_key":getFirst()}
params = {"data":json.dumps(data, separators=(",", ":"))}
reqNext = requests.get(url, params=params, headers=headers)
jsonData = reqNext.json()
while True:
if "next_key" in jsonData:
data = {"limit_count":100, "limit_size":10000,"curr_key":jsonData["next_key"]}
params = {"data":json.dumps(data, separators=(",", ":"))}
req = requests.get(url, params=params, headers=headers).json() ## this should do GET request for the third page and so on...
print req["next_key"] # this returns "3321" which is the value for "next_key" in second page
else:
pass
getDataNext()
The full url including limit count, limit size and curr key is as follows https://flespi.io/gw/channels/all/messages?data=%7B%22curr_key%22%123%2C%22limit_count%22%3A100%2C%22limit_size%22%3A10000%7D
As you can see this only returns the second page that is jsonData["next_key"]. What I want to do is that for each GET request, the program will read the next_key and put it on the next GET request.
I am thinking to use increment on the curr_key but the key is random and also I do not know how many page there is.
I believe there must be just a simple solution for this but apparently I could not think about it. Thank you for your help and suggestion.

try this
has_next_key = False
nextKey = ""
if "next_key" in jsonData:
has_next_key = True
nextKey = jsonData["next_key"]
while has_next_key:
data = {"limit_count":100, "limit_size":10000,"curr_key":nextKey}
params = {"data":json.dumps(data, separators=(",", ":"))}
req = requests.get(url, params=params, headers=headers).json() ## this should do GET request for the third page and so on...
if "next_key" in req:
nextKey = req["next_key"]
print nextKey # this returns "3321" which is the value for "next_key" in second page
else:
has_next_key = False
# no next_key, stop the loop

Related

get request payload in python

my code is about send get request using query parameters which depends on a page number
After that o have to do for loop to get some ids from the response and also getting the next page number of the same response
and send a new get request with the new next page number that I got from the first response, and I need to get the ids also from the new response
My code works fine , but I’m using two loop which it’s not the right way I think? I couldn’t do it with one loop any ideas?
def get():
response = requests.get(url, headers=header)
data = response.text
data = json.loads(data)
check_if_theres_next_page = data['pagination']['hasMorePages']
check_for_next_page_number = data['pagination']['nextPage']
last_page_number = data['pagination']['lastPage']
orders = data['orders']
list_of_ids = []
for manufacturingOrderId in orders:
ids = manufacturingOrderId['manufacturingOrderId']
list_of_ids.append(ids)
if check_for_next_page_number == 4:
check_for_next_page_number = last_page_number
if check_if_theres_next_page:
url_ = url + '&page_number=' + str(check_for_next_page_number)
response = requests.get(url_, headers=header)
data = response.text
data = json.loads(data)
orders = data['orders']
for manufacturingOrderId_ in orders:
ids = manufacturingOrderId_['manufacturingOrderId']
list_of_ids.append(ids)
if "nextPage" in data['pagination']:
check_for_next_page_number = data['pagination']['nextPage']
else:
check_if_theres_next_page = False
return list_of_ids

Paginating API error (Nested 'next' link)

I am trying to build a class to automate requests to our suplier, but i am having trouble with pagination.
This is the working snippet so far, but the api limits the request to 1000 records, and if the endpoint has more, then I would have to paginate:
response = requests.get(url, data=params, headers=headers).json()
return response
This is what I tried and failed, getting a KeyError: 'next':
response = requests.get(url, data=params, headers=headers).json()
results = response['data']
while response['links']['next']:
response = requests.get(response['links']['next'], data=params, headers=headers).json()
results.extend(response['data'])
return results
you can check the basic sctructure for the response here in the API doc.
Please enlighten me, thank you very much!
You could simply check
while "next" in response['links']:
# ... code ...
But if it can get other problems then putting all in `try/except can be also usefull.
results = [] # empty list
try:
response = requests.get(url, data=params, headers=headers)
#print(response.status_code) # for debug
#print(response.text) # for debug
json_data = response.json()
results += json_data['data']
while "next" in json_data['links']:
response = requests.get(json_data['links']['next'], data=params, headers=headers)
#print(response.status_code) # for debug
#print(response.text) # for debug
json_data = response.json()
results += json_data['data']
except Exception as ex:
print("Exception:", ex)
return results
But I would reduce it to while True
results = [] # empty list
try:
while True:
response = requests.get(url, data=params, headers=headers)
#print(response.status_code) # for debug
#print(response.text) # for debug
json_data = response.json()
results += json_data['data']
if "next" not in json_data['links']:
break
url = json_data['links']['next']
except Exception as ex:
print("Exception:", ex)
return results

Python not able to put a variable in header "session-token"

i'm working on winrest api and since my session token will change from time to time i would like to ad it in a variable, when run my code i get a return request('get', url, params=params, **kwargs)
here the code :
sessionToken = 'session_token': 'gggg6gsl68l2vdim5fgggggg'}
headers = {
'App-Token': 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
'Session-Token': sessionToken
}
response = requests.get(urlUnencryptedWorkstation, headers=headers)
I believe this is what you're looking for:
import requests as req
urlUnencryptedWorkstation = 'https://www.google.com' # Whatever the url is
sessionToken = 'gggg6gsl68l2vdim5fgggggg'
headers = {
'App-Token': 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
'Session-Token': sessionToken
}
res = req.get(urlUnencryptedWorkstation, headers=headers)
print(res.status_code, res.text)
Hope this helps :) Cheers!

Python combine multiple similar functions

I am working with an API to pull back data using python. My functions work fine but I feel like I am repeating myself over and over again and there is probably something I should be doing to make this more efficient.
What each one does is gets the number of results then hits the api back up to bring back the exact number of records.
First function:
def get_categories():
headers = {"Authorization": "Bearer " + access_token} # auth plus token
response = requests.get("https://api.destination.com/categories", headers=headers) # response
data = json.loads(response.text) # load the json data
records = str(data['totalResults']) # get number of results for next call
response = requests.get("https://api.destination.com/categories?$skip=0&$top="+records, headers=headers)
all_data = json.loads(response.text) # load the json data
list_of_dict = all_data['resources'] # get rid of all but lists of dictionaries
df = pd.DataFrame.from_records(list_of_dict) # create dataframe
df['links'] = df['links'].str[0].str['href'] # just grab the links(key) items
return df # return the final dataframe
Second function:
def get_groups():
headers = {"Authorization": "Bearer " + access_token} # auth plus token
response = requests.get("https://api.destination.com/groups", headers=headers) # response
data = json.loads(response.text) # load the json data
records = str(data['totalResults']) # get number of results
response = requests.get("https://api.destination.com/groups?$skip=0&$top="+records, headers=headers)
all_data = json.loads(response.text) # load the json data
list_of_dict = all_data['resources'] # get rid of all but lists of dictionaries
df = pd.DataFrame.from_records(list_of_dict) # create dataframe
df['links'] = df['links'].str[0].str['href'] # just grab the links(key) items
return df # return the final dataframe
And 3 more functions like users that do the same thing. The only difference between them as you can see is the getlike https://api.destination.com/categories vs https://api.destination.com/groups and the number of records returned for each will be different. Is there a way to combine these and call it a certain way?
Looks like you already know how to make functions, just extend it one step further to abstract away everything that is common amongst the functions.
BASE_URL = "https://api.destination.com/{}"
def make_headers():
headers = {"Authorization": "Bearer " + access_token}
return headers
def make_params(recs):
params = {'$skip': 0, '$top': recs}
return params
def make_df(data):
list_of_dict = data['resources']
df = pd.DataFrame.from_records(list_of_dict)
df['links'] = df['links'].str[0].str['href']
return df
def process(process):
headers = make_headers()
url = BASE_URL.format(process)
resp = requests.get(url, headers=headers)
data = resp.json()
records = data['totalResults']
params = make_params(records)
resp = requests.get(url, headers=headers, params=params)
all_data = resp.json()
return make_df(all_data)
Then you can call it like the following:
process('groups')
process('categories')
You can break it up further, but you get the idea.
You can just add a parameter to this function.
As an example:
def get_categories():
headers = {"Authorization": "Bearer " + access_token} # auth plus token
response = requests.get("https://api.destination.com/categories", headers=headers) # response
data = json.loads(response.text) # load the json data
records = str(data['totalResults']) # get number of results for next call
response = requests.get("https://api.destination.com/categories?$skip=0&$top="+records, headers=headers)
all_data = json.loads(response.text) # load the json data
list_of_dict = all_data['resources'] # get rid of all but lists of dictionaries
df = pd.DataFrame.from_records(list_of_dict) # create dataframe
df['links'] = df['links'].str[0].str['href'] # just grab the links(key) items
return df # return the final dataframe
You can just refactor to:
def get_elements(element):
if element is None:
return 'not found' #defaults to 404 error.
headers = {"Authorization": "Bearer " + access_token} # auth plus token
response = requests.get("https://api.destination.com/{}".format(element), headers=headers) # response
data = json.loads(response.text) # load the json data
records = str(data['totalResults']) # get number of results for next call
response = requests.get("https://api.destination.com/{}?$skip=0&$top={}".format(element,records), headers=headers)
all_data = json.loads(response.text) # load the json data
list_of_dict = all_data['resources'] # get rid of all but lists of dictionaries
df = pd.DataFrame.from_records(list_of_dict) # create dataframe
df['links'] = df['links'].str[0].str['href'] # just grab the links(key) items
return df # return the final dataframe

handle url pagination with python generator

Currently I'm fetching only first page from the server, part of the json is
{"status":"success","count":100,"total":22188,"next":"https://pimber.ly/api/v2/products/?sinceId=5981e16fcde47c0854dc540b","previous":"https://pimber.ly/api/v2/products/?maxId=5981e01dcde47c0854dc4afd","sinceId":"5981e01dcde47c0854dc4afd","maxId":"5981e16fcde47c0854dc540b","data":[.....]}
and the function is:
_fetch_data = response.json()
while _fetch_data['next'] is not None:
response = requests.get(
url=API_DOMAIN',
headers=headers
)
_page_data = response.json()['data']
for _data in _page_data:
yield _data
Current state of the function is only processing the first page, and it will just do that forever, so how can i fix the function to check next so can fetch total data?
I guess it should be
_fetch_data = response.json()
while _fetch_data['next'] is not None:
response = requests.get(_fetch_data['next'], headers=headers)
_fetch_data = response.json()
for _data in fetch_data['data']:
yield _data

Categories

Resources