How to merge JSON in while loop - python

I have a while loop for getting JSON response from API. Here is my code.
import json
import requests
import pandas as pd
url = "https://org.zendesk.com/api/v2/tickets?sort_by=created_at"
payload = ""
headers = {
'Authorization': "Basic blablabla"
}
params="page[size]=3"
while url:
response = requests.request("GET", url, data=payload, headers=headers, params=params)
data = response.json()
#get only tickets array and drop everything else
dataTickets = json.dumps(data['tickets'],indent=2)
#print(dataTickets)
#get next page of data
url = data['links']['next']
Each loop gives me the following.
[
{'url': 'https://org.zendesk.com/api/v2/tickets/4025.json'},
{'url': 'https://org.zendesk.com/api/v2/tickets/4026.json'},
{'url': 'https://org.zendesk.com/api/v2/tickets/4027.json'}
]
How to merge each loop run (inside the loop) to get merged array as follows:
[
{'url': 'https://org.zendesk.com/api/v2/tickets/4025.json'},
{'url': 'https://org.zendesk.com/api/v2/tickets/4026.json'},
{'url': 'https://org.zendesk.com/api/v2/tickets/4027.json'},
{'url': 'https://org.zendesk.com/api/v2/tickets/4028.json'},
{'url': 'https://org.zendesk.com/api/v2/tickets/4029.json'},
{'url': 'https://org.zendesk.com/api/v2/tickets/4030.json'},
{'url': 'https://org.zendesk.com/api/v2/tickets/4031.json'},
{'url': 'https://org.zendesk.com/api/v2/tickets/4032.json'},
{'url': 'https://org.zendesk.com/api/v2/tickets/4033.json'}
]

I think I got it. I went about it slightly differently.
I am processing the full request where I merge the "tickets" array.
After I merge it, I take just the "tickets" out and print it. This helps me to manipulate just the tickets later.
The data in one request loop looks as follows:
{
"tickets": [
{
"url": "https://org.zendesk.com/api/v2/tickets/3058.json"
},
{
"url": "https://org.zendesk.com/api/v2/tickets/3059.json"
}
],
"next_page": "https://org.zendesk.com/api/v2/tickets.json?page=2",
"previous_page": null,
"count": 3234
}
import json
import requests
import pandas as pd
url = "https://org.zendesk.com/api/v2/tickets?sort_by=created_at"
payload = ""
headers = {
'Authorization': "Basic blablabla"
}
params="page[size]=3"
while url:
response = requests.request("GET", url, data=payload, headers=headers, params=params)
data = response.json()
#merge (extend) tickets array for every loop
data["tickets"] += data["tickets"]
#get next page of data
url = data['links']['next']
# Format the merged JSON and take out just the tickets array
dataout = json.dumps(data["tickets"],indent=2)
# Write formatted JSON into file
with open('tickets1.json', 'w') as f:
f.write(dataout)

Related

String indicies must be integers in Python

I'm trying to get url value from the api, but have an issue saying TypeError: string indices must be integers
Here is the array that I get from api:
[
{
"created_utc": 1643524062,
"title": "title",
"url": "https://i.redd.it/tmd5shz9rre81.gif",
},
{
"created_utc": 1643530657,
"title": "title",
"url": "https://i.redd.it/qqjykysxase81.gif",
}
]
And here is the code I use to get the url:
url = "https://reddit-meme.p.rapidapi.com/memes/trending"
headers = {
"X-RapidAPI-Key": "83df5aba87msh4580fa40781b33cp12157bjsnb4b412cb57da",
"X-RapidAPI-Host": "reddit-meme.p.rapidapi.com"
}
response = requests.request("GET", url, headers=headers)
print(response.text[0]["url"])
What am I doing wrong?
response.text is a string, you have to parse it first, with the json librarie, like this:
import requests
import json
url = "https://reddit-meme.p.rapidapi.com/memes/trending"
headers = {
"X-RapidAPI-Key": "83df5aba87msh4580fa40781b33cp12157bjsnb4b412cb57da",
"X-RapidAPI-Host": "reddit-meme.p.rapidapi.com"
}
response = requests.request("GET", url, headers=headers)
data = json.loads(response.text)
print(data[0]["url"])

Python Requests Post within a nested Json - retrieve data with a specific value

I already look in stackoverflow and I could not find an answer to my problem.
I'm accessing an API from the German Government that has a output limit of 10.000 entries. I want all data from a specific city, and since there is more than 10.000 entries in the original database, I need to "do the query" while doing the requests.post.
Here is one entry of Json result, when I simply do request.post to this API:
{
"results":[
{
"_id":"CXPTYYFY807",
"CREATED_AT":"2019-12-17T14:48:17.130Z",
"UPDATED_AT":"2019-12-17T14:48:17.130Z",
"result":{
"id":"CXPTYYFY807",
"title":"Bundesstadt Bonn, SGB-315114, Ortsteilzentrum Brüser Berg, Fliesenarbeiten",
"description":["SGB-315114","Ortsteilzentrum Brüser Berg, Fliesenarbeiten"],
"procedure_type":"Ex ante Veröffentlichung (§ 19 Abs. 5)",
"order_type":"VOB",
"publication_date":"",
"cpv_codes":["45431000-7","45431100-8"],
"buyer":{
"name":"Bundesstadt Bonn, Referat Vergabedienste",
"address":"Berliner Platz 2",
"town":"Bonn",
"postal_code":"53111"},
"seller":{
"name":"",
"town":"",
"country":""
},
"geo":{
"lon":7.0944,
"lat":50.73657
},
"value":"",
"CREATED_AT":"2019-12-17T14:48:17.130Z",
"UPDATED_AT":"2019-12-17T14:48:17.130Z"}
}
],
"aggregations":{},
"pagination":{
"total":47389,
"start":0,
"end":0 }}
What I want is all the data which was bought in "town" : "Bonn"
What I already tryed:
import requests
url = 'https://daten.vergabe.nrw.de/rest/evergabe/aggregation_search'
headers = {'Accept': 'application/json', 'Content-Type': 'application/json'}
data = {"results": [{"result": {"buyer": {"town":"Bonn"}}}]}
#need to put the size limit, otherwise he delivers me less:
params = {'size': 10000}
req = requests.post(url, params=params, headers=headers, json=data)
This returns me the post, but not "filtered" by city.
I also tryed req = requests.post(url, params=params, headers=headers, data=data) , which returns me ERROR 400 .
Another way is to grab all the data with the pagination parameters on the end of the json code within a loop, but again I'm not being able to writwe down the json path to the pagination, for example : start: 0 , end:500
Can anyone help me solving it?
Try:
url = 'https://daten.vergabe.nrw.de/rest/evergabe/aggregation_search'
headers = {'Accept': 'application/json', 'Content-Type': 'application/json'}
query1 = {
"query": {
"match": {
"buyer.town": "Bonn"
}
}
}
req = requests.post(url, headers=headers, json=query1)
# Check the output
req.text
Edit:
This won't work if the filter matches with more than 10.000 results, but it may be a quick workaround to the problem you are facing.
import json
import requests
import math
url = "https://daten.vergabe.nrw.de/rest/vmp_rheinland"
size = 5000
payload = '{"sort":[{"_id":"asc"}],"query":{"match_all":{}},"size":'+str(size)+'}'
headers = {
'accept': "application/json",
'content-type': "application/json"
'cache-control': "no-cache"
}
response = requests.request("POST", url, data=payload, headers=headers)
tenders_array = []
query_data = json.loads(response.text)
tenders_array.extend(query_data['results'])
total_hits = query_data['pagination']['total']
result_size = len(query_data['results'])
last_id = query_data['results'][-1]["_id"]
number_of_loops = ((total_hits - size) // size )
last_loop_size = ((total_hits - size) % size)
for i in range(number_of_loops+1):
if i == number_of_loops:
size=last_loop_size
payload = '{"sort":[{"_id":"asc"}],"query":{"match_all":{}},"size":'+str(size)+',"search_after":["'+last_id+'"]}'
response = requests.request("POST", url, data=payload, headers=headers)
query_data = json.loads(response.text)
result_size = len(query_data['results'])
if result_size > 0:
tenders_array.extend(query_data['results'])
last_id = query_data['results'][-1]["_id"]
else:
break
https://gist.github.com/thiagoalencar/34401e204358499ea3b9aa043a18395f
code in the gist.
Some code to paginate through elasticsearch API. This is an API over the elasticsearch API, and the docs where not so clear. Tried scroll, no sucess. This solutions uses search_after parameter without point in time, because the endpoint is not available. Some times the servers refuses the request and it is necessary to verify with response.status_code==502.
The code is messy and need refactoring. But it works. The final tenders_array contains all objects.

Hubspot API Not Populating Deal

When I'm trying to create a deal through the hubspot api, all that is being created is a completely blank deal even though I am passing through populated data
Api Url: https://developers.hubspot.com/docs/api/crm/deals
Here is the following code that I am trying:
import json
import requests
hubspot_api_key = "MY_API_KEY"
url = 'https://api.hubapi.com/crm/v3/objects/deals?hapikey={}'.format(hubspot_api_key)
headers = {"Content-Type": "application/json"}
deals_post = {
'amount': "4034.75",
'closedate': '2021-05-10T12:04:00.000Z',
'dealname': 'Custom data integrations',
'dealstage': 'closedwon',
'hubspot_owner_id': "5448459615",
'pipeline': 'default'
}
response = requests.post(url, headers=headers, data=json.dumps(deals_post))
print(response.text)
And here is the result of it:
The solution to this issue would be adding properties to the data dictionary
import json
import requests
hubspot_api_key = "MY_API_KEY"
url = 'https://api.hubapi.com/crm/v3/objects/deals?hapikey={}'.format(hubspot_api_key)
headers = {"Content-Type": "application/json"}
deals_post = {
'properties': {
'amount': "4034.75",
'closedate': '2021-05-10T12:04:00.000Z',
'dealname': 'Custom data integrations',
'dealstage': 'closedwon',
'hubspot_owner_id': 83849850,
'pipeline': 'default'
}
}
response = requests.post(url, headers=headers, data=json.dumps(deals_post))
print(response.text)
This results in a filled out deal according to the data that was passed in

How to loop through list during API call?

I am looking to loop through about 5 stock tickers using an API. Currently I have "MSFT" as the only stock being called; however, I would like to make a stock list to return multiple responses.
For example:
stock_list = ["MSFT", "AAPL", "LMD", "TSLA", "FLGT"]
How can I request all 5 of these stocks to the querystring to print each response? Here is what I have currently which prints only "MSFT" into a json format...
import requests
#Use RapidAPI request to call info on Stocks
url = "https://alpha-vantage.p.rapidapi.com/query"
querystring = {"function":"GLOBAL_QUOTE","symbol": "MSFT"}
headers = {
'x-rapidapi-key': "KEY INSERTED HERE,
'x-rapidapi-host': "alpha-vantage.p.rapidapi.com"
}
response = requests.request("GET", url, headers=headers, params=querystring)
Try using a for loop.
import requests
url = 'https://alpha-vantage.p.rapidapi.com/query'
headers = {
'x-rapidapi-key': '<API KEY>',
'x-rapidapi-host': 'alpha-vantage.p.rapidapi.com',
}
tickers = ['MSFT', 'AAPL', 'LMD', 'TSLA', 'FLGT']
for ticker in tickers:
querystring = {'function': 'GLOBAL_QUOTE', 'symbol': ticker}
r = requests.get(url, headers=headers, params=querystring)
print(r.json())
You can also try pretty printing the json output using the json module.
import json
# ... your code ...
for ticker in tickers:
# ... your code ...
print(json.dumps(r.json(), indent=2))
Also, you should delete your API key before its abused by anyone! These have to be kept safe somewhere.

Pagination SendinBlue Api Call

I'm been trying to get data from the SendinBlue API. The problem is the API have a limit of 100 registers per call and my Python loop is not working properly. This is what I have so far, the call works fine.
import requests
import pandas as pd
from pandas import json_normalize
import json
results = []
pagination = 0
url = "https://api.sendinblue.com/v3/smtp/statistics/events"
querystring = {"limit":"100","offset":pagination,"days":"15"}
headers = {
"Accept": "application/json",
"api-key": "XXXXXXX"
}
#respuesta de la API
response = requests.request("GET", url, headers=headers, params=querystring)
#convertir json a diccionario
data = json.loads(response.text)
#convertir diccionario a DataFrame
base = pd.json_normalize(data,record_path='events')
The data structure is like this:
{'events': [
{'email': 'chusperu#gmail.com',
'date': '2020-10-18T17:18:58.000-05:00',
'subject': 'Diego, ¡Gracias por registrarte! 😉',
'messageId': '<202010181429.12179607081#smtp-relay.mailin.fr>',
'event': 'opened',
'tag': '',
'from': 'ventas01#grupodymperu.com',
{'email': 'cynthiaapurimac#gmail.com',
'date': '2020-10-18T17:52:56.000-05:00',
'subject': 'Alvarado, ¡Gracias por registrarte! 😉',
'messageId': '<202010182252.53640747487#smtp-relay.mailin.fr>',
'event': 'requests',
'tag': '',
'from': 'ventas01#grupodymperu.com'},
....
The loop I have tried is this, but it only paginated the first 200 registers. What I'm doing wrong?
for i in data['events']:
results.append(i)
while response.status_code == 200:
pagination += 100
querystring ['offset'] = pagination
response = requests.request("GET", url, headers=headers, params=querystring)
data = json.loads(response.text)
for i in data['events']:
results.append(i)
else:
break
print(results)
Finally get it.
import requests
import pandas as pd
from pandas import json_normalize
import json
# Excel = "C:/Users/User/PycharmProjects/Primero/DataSendin.xlsx"
pagination = 0
url = "https://api.sendinblue.com/v3/smtp/statistics/events"
querystring = {"limit":"100","offset":f"{pagination}","days":"3"}
headers = {
"Accept": "application/json",
"api-key": "Your API key"
}
response = requests.request("GET", url, headers=headers, params=querystring)
#respuesta de la API
try:
#convertir json a diccionario
results = []
data = json.loads(response.text)
results.append(data)
if not data:
print("no hay data")
else:
while response.status_code == 200:
pagination += 100
querystring ['offset'] = pagination
response = requests.request("GET", url, headers=headers, params=querystring)
data = json.loads(response.text)
results.append(data)
if not data:
break
except ValueError:
"no data"
#convertir diccionario a DataFrame
final = list(filter(None, results))
base = pd.json_normalize(final,record_path='events')
base

Categories

Resources