Managing Json Response with Python - python

Here is the backstory:
I am trying to pull data via an api:
r = requests.get(url, headers=headers)
df = pd.read_json(json.dumps(r.json()), orient='list') // preferably i would use pd.io.json.json_normalize but it throws an error
Sample Json response(slightly simplified):
{'childCollectors': [12345678,],
'Param1': False,
'creationTime': '2017-01-19T18:53:28Z',
'physicalSerialNumber': ‘someserialnumberhere’,
'accountName': ‘account name’,
'Param2’: False,
'Param3’: None,
‘Param4’: None,
'type': ‘somesupersecrettype :)’,
'timeout': 5,
'points': [],
'archived': False,
‘Class’: ‘name’,
'firstReportedTime': '2017-10-26T00:24:12Z',
'deviceIdentifier': '255CD895348K',
'usenetmetering': False,
'vendor': ‘vendor name’,
'reportingIntervalLengthInMillis': 300000,
'id': 12145278,
'manualReadings': [],
'readingsMultiplier': 1,
'outageTimeInMillis': 43200000,
'multiplier': 1,
'parentPoint': None,
'buildingName': ‘buildingname',
'queryable': True,
'replications': [],
'downwardCommunications': [‘something here’],
'buildings': [associatedbuildingid],
'collectorClassDetails': {'identifierName': 'SERIAL_NUMBER',
'multiplierLabel': ‘supersecretlabel’,
'displayName': ‘fancyversionofClass’,
'downwardCommunications': [’something here’],
'vendor': ‘vendor’,
'isPhysicalMeter': False,
'name': 'fancyversionofClass',
'upwardCommunications': [‘somesuacyformat’],
'gateway': True},
'name': 'Gateway 1',
'primaryBuilding': associatedbuildingid,
'parentCollector': None,
'metrics': [‘well duh metrics’],
'upwardCommunications': [‘internet’]}
My Question is:
1) When I get a weird json format response like the above, what is the best practice of converting it to something more standard so that the data can be extracted and utilized. IE I personally want to create a source that contains all of the childCollector ID's to use to query data.
Sorry if this question is too specific but I am still learning python and would love to learn more about parsing data :)

Related

how to retrieve a link from a discord message?

i'm trying to create a program, which needs to read messages from a discord bot and retrieve links from these messages.
here's the code:
import requests
import json
from bs4 import builder
import bs4
def retrieve_messages(channelid):
headers = {
'authorization': 'NTQ5OTM4ODEzOTUxMTQ4MDQ3.YMi7CQ.fOm6F-dmPJPEW0dehLwCkB_ilBU'
}
r = requests.get(f'https://discord.com/api/v9/channels/{channelid}/messages', headers=headers)
jsonn = json.loads(r.text)
for value in jsonn:
print(value, '\n')
retrieve_messages('563699841377763348')
here's the output:
{'id': '908857015412084796', 'type': 0, 'content': '<#&624528614330859520>', 'channel_id': '5636998413777633, 2021.```\n5J53T-BKJK5-CTXBZ-JJJTJ-WW6F3```Redeem on48', 'author': {'id': '749499357761503284', 'username': 'shift', 'avatar': 'de9cd6f3224e660a4b6906a89fc2bc15/shift-code/5j53t-bkjk5-ctxbz-jjjtj-ww6f3/?utm_source', 'discriminator': '6125', 'public_flags': 0, 'bot': True}, 'attachments': [], 'embeds': [], 'mentions': []'pinned': False, 'mention_everyone': False, 'tts': Fa, 'mention_roles': ['624528614330859520'], 'pinned': False, 'mention_everyone': False, 'tts': False, 'timest}amp': '2021-11-12T23:13:18.221000+00:00', 'edited_timestamp': None, 'flags': 0, 'components': []}
{'id': '908857014430629898', 'type': 0, 'content': '', 'channel_id': '563699841377763348', 'author': {'id':
'749499357761503284', 'username': 'shift', 'avatar': 'de9cd6f3224e660a4b6906a89fc2bc15', 'discriminator': '6125', 'public_flags': 0, 'bot': True}, 'attachments': [], 'embeds': [{'type': 'rich', 'title': '<:GoldenKey:273763771929853962> Borderlands 1: 5 gold keys', 'description': 'Platform: Universal\nExpires: 30 November,
2021.```\n5J53T-BKJK5-CTXBZ-JJJTJ-WW6F3```Redeem on the [website](https://shift.gearboxsoftware.com/rewards) or in game.\n\n[Source](https://shift.orcicorn.com/shift-code/5j53t-bkjk5-ctxbz-jjjtj-ww6f3/?utm_source=json&utm_medium=shift&utm_campaign=automation)', 'color': 16040976}], 'mentions': [], 'mention_roles': [], 'pinned': False, 'mention_everyone': False, 'tts': False, 'timestamp': '2021-11-12T23:13:17.987000+00:00', 'edited_timestamp': None, 'flags': 1, 'components': []}
in the output there are 2 links, but I need to save the second link to a variable, and I'm wondering how I can do that
This is easiest done with the response body as a text object that can be scanned with regex to find the URLs
Solution
The variable test_case_data is the response body in TEXT form as a string.
import re
regex = r"(http|ftp|https):\/\/([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,#?^=%&:\/~+#-]*[\w#?^=%&\/~+#-])"
def find_embedded_urls(data):
return re.finditer(regex,data)
test_case_data = """'id': '908857014430629898', 'type': 0, 'content': '', 'channel_id': '563699841377763348', 'author': {'id':
'749499357761503284', 'username': 'shift', 'avatar': 'de9cd6f3224e660a4b6906a89fc2bc15', 'discriminator': '6125', 'public_flags': 0, 'bot': True}, 'attachments': [], 'embeds': [{'type': 'rich', 'title': '<:GoldenKey:273763771929853962> Borderlands 1: 5 gold keys', 'description': 'Platform: Universal\nExpires: 30 November,
2021.```\n5J53T-BKJK5-CTXBZ-JJJTJ-WW6F3```Redeem on the [website](https://shift.gearboxsoftware.com/rewards) or in game.\n\n[Source](https://shift.orcicorn.com/shift-code/5j53t-bkjk5-ctxbz-jjjtj-ww6f3/?utm_source=json&utm_medium=shift&utm_campaign=automation)', 'color': 16040976}], 'mentions': [], 'mention_roles': [], 'pinned': False, 'mention_everyone': False, 'tts': False, 'timestamp': '2021-11-12T23:13:17.987000+00:00', 'edited_timestamp': None, 'flags': 1, 'components': []}"""
# test_case_data = response.text
matches = find_embedded_urls(test_case_data)
matches = [match[0] for match in matches] #convert all urls to strings
print(matches) # List of all the urls! Index for whatever one you need
Output
['https://shift.gearboxsoftware.com/rewards', 'https://shift.orcicorn.com/shift-code/5j53t-bkjk5-ctxbz-jjjtj-ww6f3/?utm_source=json&utm_medium=shift&utm_campaign=automation']
With the URLs as a list index, you can set variables by indexing the list at whatever point you need.

How to get the authors name from Google books api?

When I search for the book using this link https://www.googleapis.com/books/v1/volumes?q=9780310709626 I get the author name in the details.
However when I run my code and print items I don't see the author name. I've been trying to figure out why it doesn't show from the data but I don't see any problem with my code.
print(searchBooks("9780310709626"))
def getBooks(id):
url = "https://www.googleapis.com/books/v1/volumes?q=isbn:"
resp = url(api + id)
data = json.load(resp)
print(data["items"])
My code output:
[{'kind': 'books#volume', 'id': 'JEP3sgEACAAJ', 'etag': '92vdEneJ83g', 'selfLink': 'https://www.googleapis.com/books/v1/volumes/JEP3sgEACAAJ', 'volumeInfo': {'title': "The Beginner's Bible", 'subtitle': 'Timeless Bible Stories', 'publisher': 'Zondervan', 'publishedDate': '2005', 'description': 'Retells familiar Bible stories from the Old and New Testaments for children to enjoy.', 'industryIdentifiers': [{'type': 'ISBN_10', 'identifier': '0310709628'}, {'type': 'ISBN_13', 'identifier': '9780310709626'}], 'readingModes': {'text': False, 'image': False}, 'pageCount': 511, 'printType': 'BOOK', 'categories': ['Juvenile Nonfiction'], 'averageRating': 4.5, 'ratingsCount': 2, 'maturityRating': 'NOT_MATURE', 'allowAnonLogging': False, 'contentVersion': 'preview-1.0.0', 'panelizationSummary': {'containsEpubBubbles': False, 'containsImageBubbles': False}, 'imageLinks': {'smallThumbnail': 'http://books.google.com/books/content?id=JEP3sgEACAAJ&printsec=frontcover&img=1&zoom=5&source=gbs_api', 'thumbnail': 'http://books.google.com/books/content?id=JEP3sgEACAAJ&printsec=frontcover&img=1&zoom=1&source=gbs_api'}, 'language': 'en', 'previewLink': 'http://books.google.com.tw/books?id=JEP3sgEACAAJ&dq=isbn:9780310709626&hl=&cd=1&source=gbs_api', 'infoLink': 'http://books.google.com.tw/books?id=JEP3sgEACAAJ&dq=isbn:9780310709626&hl=&source=gbs_api', 'canonicalVolumeLink': 'https://books.google.com/books/about/The_Beginner_s_Bible.html?hl=&id=JEP3sgEACAAJ'}, 'saleInfo': {'country': 'TW', 'saleability': 'NOT_FOR_SALE', 'isEbook': False}, 'accessInfo': {'country': 'TW', 'viewability': 'NO_PAGES', 'embeddable': False, 'publicDomain': False, 'textToSpeechPermission': 'ALLOWED', 'epub': {'isAvailable': False}, 'pdf': {'isAvailable': False}, 'webReaderLink': 'http://play.google.com/books/reader?id=JEP3sgEACAAJ&hl=&printsec=frontcover&source=gbs_api', 'accessViewStatus': 'NONE', 'quoteSharingAllowed': False}, 'searchInfo': {'textSnippet': 'Retells familiar Bible stories from the Old and New Testaments for children to enjoy.'}}, {'kind': 'books#volume', 'id': 'ZRgnzQEACAAJ', 'etag': 'RXYM4Rbwx+g', 'selfLink': 'https://www.googleapis.com/books/v1/volumes/ZRgnzQEACAAJ', 'volumeInfo': {'title': "The Beginner's Bible", 'authors': ['Catherine DeVries'], 'publishedDate': '2005', 'industryIdentifiers': [{'type': 'ISBN_10', 'identifier': '0310709628'}, {'type': 'ISBN_13', 'identifier': '9780310709626'}], 'readingModes': {'text': False, 'image': False}, 'pageCount': 511, 'printType': 'BOOK', 'averageRating': 4, 'ratingsCount': 1, 'maturityRating': 'NOT_MATURE', 'allowAnonLogging': False, 'contentVersion': 'preview-1.0.0', 'panelizationSummary': {'containsEpubBubbles': False, 'containsImageBubbles': False}, 'language': 'en', 'previewLink': 'http://books.google.com.tw/books?id=ZRgnzQEACAAJ&dq=isbn:9780310709626&hl=&cd=2&source=gbs_api', 'infoLink': 'http://books.google.com.tw/books?id=ZRgnzQEACAAJ&dq=isbn:9780310709626&hl=&source=gbs_api', 'canonicalVolumeLink': 'https://books.google.com/books/about/The_Beginner_s_Bible.html?hl=&id=ZRgnzQEACAAJ'}, 'saleInfo': {'country': 'TW', 'saleability': 'NOT_FOR_SALE', 'isEbook': False}, 'accessInfo': {'country': 'TW', 'viewability': 'NO_PAGES', 'embeddable': False, 'publicDomain': False, 'textToSpeechPermission': 'ALLOWED', 'epub': {'isAvailable': False}, 'pdf': {'isAvailable': False}, 'webReaderLink': 'http://play.google.com/books/reader?id=ZRgnzQEACAAJ&hl=&printsec=frontcover&source=gbs_api', 'accessViewStatus': 'NONE', 'quoteSharingAllowed': False}}]
With the requests library:
import requests
url = 'https://www.googleapis.com/books/v1/volumes?q=9780310709626'
resp = requests.get(url)
json = resp.json()
print(json['items'][0]['volumeInfo']['authors'])
From the response you can see that authors is an array. To reach that array you will need to do json['items'][0]['volumeInfo']['authors'].
As items is also an array, meaning that there could be multiple items in this response. You might want to write extra code to deal with that other than hard-code index=0.
Note that in this case, you probably won't know the schema of the response. You should handle unexpected behaviors. For some certain books maybe some keys are missing, json['items'] could be an empty array, or even items is not in the response at all.

Python PrettyPrinter shows object address but not the contents

I am trying to pretty-print a python object by calling:
from pprint import pprint
...
pprint(update)
But the output looks like this:
<telegram.update.Update object at 0xffff967e62b0>
However, using Python's internal print() I get the correct output:
{'update_id': 14191809, 'message': {'message_id': 22222, 'date': 11111, 'chat': {'id': 00000, 'type': 'private', 'username': 'xxxx', 'first_name': 'X', 'last_name': 'Y'}, 'text': '/start', 'entities': [{'type': 'bot_command', 'offset': 0, 'length': 6}], 'caption_entities': [], 'photo': [], 'new_chat_members': [], 'new_chat_photo': [], 'delete_chat_photo': False, 'group_chat_created': False, 'supergroup_chat_created': False, 'channel_chat_created': False, 'from': {'id': 01010101, 'first_name': 'X', 'is_bot': False, 'last_name': 'Y', 'username': 'xxxx', 'language_code': 'en'}}}
Is there a way to make pprint(), show the object-data correctly and formatted?
pprint uses the representation (__repr__() method) of the object while print uses __str__(). What you see in print output is not a dictionary but a string representation of the inner structure of the telegram.update.Update instance.
There is no generic solution to this, but since your question is about a specific library, consulting the relevant docs shows that there is a .to_json() method, so you can do this:
import json
from pprint import pprint
...
pprint(json.loads(update.to_json()))

Checking if value in json file is equal to python variable

I am trying to check if a specific item in a json file is equal to one of my python variables.
{'data': {'redemption': {'channel_id': 'secret',
'id': 'secret',
'redeemed_at': '2021-02-08T09:46:22.637059711Z',
'reward': {'background_color': '#FA1ED2',
'channel_id': '145998001',
'cooldown_expires_at': None,
'cost': 500,
'default_image': {'url_1x': 'https://static-cdn.jtvnw.net/custom-reward-images/ghost-1.png',
'url_2x': 'https://static-cdn.jtvnw.net/custom-reward-images/ghost-2.png',
'url_4x': 'https://static-cdn.jtvnw.net/custom-reward-images/ghost-4.png'},
'global_cooldown': {'global_cooldown_seconds': 1,
'is_enabled': False},
'id': '123',
'image': None,
'is_enabled': True,
'is_in_stock': True,
'is_paused': False,
'is_sub_only': False,
'is_user_input_required': False,
'max_per_stream': {'is_enabled': False,
'max_per_stream': 1},
'max_per_user_per_stream': {'is_enabled': False,
'max_per_user_per_stream': 1},
'prompt': '*Dabs*',
'redemptions_redeemed_current_stream': None,
'should_redemptions_skip_request_queue': False,
'template_id': 'template:4425c37e-6881-442a-aa3d-fdc6998a29de',
'title': 'Dab!',
'updated_for_indicator_at': '2020-09-10T18:55:40.064177881Z'},
'status': 'UNFULFILLED',
'user': {'display_name': 'Androteex',
'id': 'secret',
'login': 'androteex'}},
'timestamp': '2021-02-08T09:46:22.637059711Z'},
'type': 'reward-redeemed'}
I want to find the second id: 'id': '123' and check if id is equal to 123. And if so I want to print that string. How could I do that?
You can use the JSON module.
import json
data = json.loads(my_json)
my_id = data['data']['redemption']['reward']['id']
if my_id == '123':
print(data)
Granted it was added to data (can be done with json.loads):
id = data['data']['redemption']['reward']['id']
idcheck = 123
if (int(id) == idcheck):
print ("YES")

How can I make json data from requests into excel file?

This is my first time dealing with json data. So I'm not that familiar with the structure of json.
I got some data through "we the people" e-petition sites with following code:
url = "https://api.whitehouse.gov/v1/petitions.json?limit=3&offset=0&createdBefore=1573862400"
jdata_2 = requests.get(url).json()
Yet, I realize this is something different from... the ordinary json structure since I got some error while I tried to convert it into excel file with pandas
df = pandas.read_json(jdata_2)
Obviously, I must miss something which I must have done before using pandas.read_json() code.
I have searched for the answer but most of questions are "How can I convert json data into excel data", which needs json data. For my case, I scraped it from the url, so I thought I could make that strings into json data, and then try to convert it into excel data as well. So I tried to use json.dump() as well, but it didn't work as well.
I know it must be the naive question. But I'm not sure where I can start with this naive question. If anyone can instruct me how to deal with it, I would really appreciate it. Or link me some references that I can study as well.
Thank you for your help in advance.
This is the json data with the requests, and I pprint it with indent=4.
Input:
url = "https://api.whitehouse.gov/v1/petitions.json?limit=3&offset=0&createdBefore=1573862400"
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(jdata_2)
Output :
{ 'metadata': { 'requestInfo': { 'apiVersion': 1,
'query': { 'body': None,
'createdAfter': None,
'createdAt': None,
'createdBefore': '1573862400',
'isPublic': 1,
'isSignable': None,
'limit': '3',
'mock': 0,
'offset': '0',
'petitionsDefaultLimit': '1000',
'publicThreshold': 149,
'responseId': None,
'signatureCount': None,
'signatureCountCeiling': None,
'signatureCountFloor': 0,
'signatureThreshold': None,
'signatureThresholdCeiling': None,
'signatureThresholdFloor': None,
'sortBy': 'DATE_REACHED_PUBLIC',
'sortOrder': 'ASC',
'status': None,
'title': None,
'url': None,
'websiteUrl': 'https://petitions.whitehouse.gov'},
'resource': 'petitions'},
'responseInfo': { 'developerMessage': 'OK',
'errorCode': '',
'moreInfo': '',
'status': 200,
'userMessage': ''},
'resultset': {'count': 1852, 'limit': 3, 'offset': 0}},
'results': [ { 'body': 'Please save kurdish people in syria \r\n'
'pleaee save north syria',
'created': 1570630389,
'deadline': 1573225989,
'id': '2798897',
'isPublic': True,
'isSignable': False,
'issues': [ { 'id': 326,
'name': 'Homeland Security & '
'Defense'}],
'petition_type': [ { 'id': 291,
'name': 'Call on Congress to '
'act on an issue'}],
'reachedPublic': 0,
'response': [],
'signatureCount': 149,
'signatureThreshold': 100000,
'signaturesNeeded': 99851,
'status': 'closed',
'title': 'Please save rojava north syria\r\n'
'please save kurdish people\r\n'
'please stop erdogan\r\n'
'plaease please',
'type': 'petition',
'url': 'https://petitions.whitehouse.gov/petition/please-save-rojava-north-syria-please-save-kurdish-people-please-stop-erdogan-plaease-please'},
{ 'body': 'Kane Friess was a 2 year old boy who was '
"murdered by his mom's boyfriend, Gyasi "
'Campbell. Even with expert statements from '
'forensic anthropologists, stating his injuries '
'wete the result of homicide. Mr. Campbell was '
'found guilty of involuntary manslaughter. This '
"is an outrage to Kane's Family and our "
'community.',
'created': 1566053365,
'deadline': 1568645365,
'id': '2782248',
'isPublic': True,
'isSignable': False,
'issues': [ { 'id': 321,
'name': 'Criminal Justice Reform'}],
'petition_type': [ { 'id': 281,
'name': 'Change an existing '
'Administration '
'policy'}],
'reachedPublic': 0,
'response': [],
'signatureCount': 149,
'signatureThreshold': 100000,
'signaturesNeeded': 99851,
'status': 'closed',
'title': "Kane's Law. Upon which the murder of a child, "
'regardless of circumstances, be seen as 1st '
'degree murder. A Federal Law.',
'type': 'petition',
'url': 'https://petitions.whitehouse.gov/petition/kanes-law-upon-which-murder-child-regardless-circumstances-be-seen-1st-degree-murder-federal-law'},
{ 'body': "Schumer and Pelosi's hatred and refusing to "
'work with President Donald J. Trump is holding '
'America hostage. We the people know securing '
'our southern border is a priority which will '
'not happen with these two in office. Lets '
'build the wall NOW!',
'created': 1547050064,
'deadline': 1549642064,
'id': '2722358',
'isPublic': True,
'isSignable': False,
'issues': [ {'id': 306, 'name': 'Budget & Taxes'},
{ 'id': 326,
'name': 'Homeland Security & '
'Defense'},
{'id': 29, 'name': 'Immigration'}],
'petition_type': [ { 'id': 291,
'name': 'Call on Congress to '
'act on an issue'}],
'reachedPublic': 0,
'response': [],
'signatureCount': 149,
'signatureThreshold': 100000,
'signaturesNeeded': 99851,
'status': 'closed',
'title': 'Remove Chuck Schumer and Nancy Pelosi from '
'office',
'type': 'petition',
'url': 'https://petitions.whitehouse.gov/petition/remove-chuck-schumer-and-nancy-pelosi-office'}]}
And this is the Error message I got
Input :
df = pandas.read_json(jdata_2)
Output :
ValueError: Invalid file path or buffer object type: <class 'dict'>
You can try the below code as well, it is working fine
URL = "https://api.whitehouse.gov/v1/petitions.json?limit=3&offset=0&createdBefore=1573862400"
// fetching the json response from the URL
req = requests.get(URL)
text_data= req.text
json_dict= json.loads(text_data)
//converting json dictionary to python dataframe for results object
df = pd.DataFrame.from_dict(json_dict["results"])
Finally, saving the dataframe to excel format i.e xlsx
df.to_excel("output.xlsx")

Categories

Resources