I am writing a parser for ads, I ran into a problem that the dict is not displayed in full, but only the first part.
import requests
import json
from datetime import datetime
url = 'https://api.yo.com/api/v1/products?app_id=web2'
response = requests.get(url).json()
items = response['data']
iter1 = []
for item in items:
iter1.append({
'name': item.get('name', 'NA'),
'owner': item.get('owner', 'NA'),
'date_published': item.get('date_published', 'NA'),
'short_url': item.get('short_url', 'NA')
})
result = {}
for keyvalue in iter1:
result["name"] = iter1[0]["name"]
result["user"] = iter1[1]["owner"]["name"]
result["date_published"] = datetime.utcfromtimestamp(iter1[0]["date_published"]).strftime('%Y-%m-%d %H:%M:%S')
result["short_url"] = iter1[0]["short_url"]
print(keyvalue)
Output example print(keyvalue):
{'name': 'text ads', 'user': 'John. Doe', 'date_published': '2021-08-02 20:37:13', 'short_url': 'https://yo.com/p610009e'}
What is actually contained in iter1:
[{'name': 'IPhone 7 32 gb', 'owner': {'id': '5a552f202d23c1214'}, 'date_published': 1627937371, 'short_url': 'https://yo/p60ff'},
{'name': 'Матрас, подушка', 'owner': {'id': '5dc2590dabc73388f2', 'name': 'Olga', 'type': 'b2b_professional', 'linked_id': '5dc2590dad53388f2', 'is_shop': False, 'is_verified': False, 'image': {'id': '5dc2756ba162b6342', 'num': 1, 'url': 'https://cach6342.jpg'}, 'date_registered': 1573017904, 'settings': {'display_phone_to_anon': True, 'display_phone': True, 'display_chat': True, 'location': {'description': 'Москва'}}, 'display_phone_num': None, 'isOnline': False, 'onlineText': 'Не в сети', 'online_text_detailed': 'Сегодня в 23:30', 'answerTimeText': '', 'is_blocked': False, 'store': None, 'rating_mark': 4.4}, 'date_published': 1627542263, 'short_url': 'https://yo/p60dac06897'},
{'name': 'Букеты', 'owner': {'id': '59d60973a3f3386f3f', 'name': 'Екатерина', 'type': 'person', 'linked_id': '59d609739e9486f3f', 'is_shop': False, 'is_verified': False, 'image': {'id': '59db3e1457556c13', 'num': 1, 'url': 'https://cac/i/oi/d6/59d609b.jpg'}, 'date_registered': 1507199347, 'settings': {'display_phone_to_anon': False, 'display_phone': False, 'display_chat': True}, 'display_phone_num': None, 'isOnline': False, 'onlineText': 'Не в сети', 'online_text_detailed': 'Сегодня в 21:44', 'answerTimeText': '', 'is_blocked': False, 'store': None, 'rating_mark': 0}, 'date_published': 1627472412, 'short_url': 'https://you/p60a0db5de263'}]
How can I display all the lines 'print(keyvalue)' that are processed in result = {}?
I think you've got yourself confused about how iteration works in python. But its interesting how your first loop is fine: for item in items:
You probably meant this for your second loop:
output = []
for result in iter1:
item = {}
item["name"] = result["name"]
item["user"] = result["owner"].get("name")
date = datetime.utcfromtimestamp(result["date_published"]).strftime('%Y-%m-%d %H:%M:%S')
item["date_published"] = date
item["short_url"] = result["short_url"]
output.append(item)
print(output)
You could probably collapse the two loops into one, but best to start with two.
Related
need some help..
I have data like:
data dict:
{'folder_id': 13, 'type': 'local', 'read': False, 'last_modification_date': 1653588426,
'creation_date': 1653408009, 'status': 'completed', 'uuid': 'ebaacb26-654b-04de-b5b3-
337b5bb66eea9e74cdc5db8168c5', 'shared': False, 'user_permissions': 128, 'owner':
'email#com.com', 'timezone': 'Europe/Brusel', 'rrules': 'FREQ=WEEKLY;INTERVAL=1;BYDAY=TU',
'starttime': '20220211T190000', 'enabled': True, 'control': True, 'live_results': 0, 'name':
'6666-Test-VM01', 'id': 286}
{'folder_id': 2, 'type': 'local', 'read': True, 'last_modification_date': 1653587386,
'creation_date': 1653399006, 'status': 'completed', 'uuid': 'fbbfbca1-ba55-31cf-7de9-
984ab3aa2ce13223a2ed8c1b45a7', 'shared': False, 'user_permissions': 128, 'owner':
'info#test.com', 'timezone': 'Europe/Brusel', 'rrules': 'FREQ=WEEKLY;INTERVAL=1;BYDAY=TU',
'starttime': '20220215T163000', 'enabled': True, 'control': True, 'live_results': 0, 'name':
'testvm-3', 'id': 83}
{'folder_id': 2, 'type': 'local', 'read': False, 'last_modification_date': 1653587306,
'creation_date': 1653348635, 'status': 'completed', 'uuid': '762ba2a8-9e73-957c-4934-
ae919c7148453dbdd9ea1e07b423', 'shared': False, 'user_permissions': 128, 'owner':
'kitty#xelo.lt', 'timezone': 'Europe/Brusel', 'rrules': 'FREQ=WEEKLY;INTERVAL=1;BYDAY=TU',
'starttime': '20220209T023000', 'enabled': True, 'control': True, 'live_results': 0, 'name':
'111-Vm3000test', 'id': 264}
I'm trying to return dictionary with all data inside, but it's returning only first or last value, how to get all data from this dictionary, my code:
class Testclass():
def __init__(self):
self.resources = {}
def get_data(self):
for data in data_dict:
self.resources['folder_id'] = data['folder_id']
self.resources['name'] = data['name']
return self.resources
output of this code:
{'folders_id': 13, 'name': '6666-Test-VM01'}
why i didint get all anothers?
Expected output:
{'folders_id': 13, 'name': '6666-Test-VM01'}
{'folders_id': 2, 'name': 'testvm-3'}
{'folders_id': 2, 'name': '111-Vm3000test'}
Assuming data_dict is a list of dictionaries, let's do the same for resources, you can do something like:
def __init__(self):
self.resources = []
def get_data(self):
for data in data_dict:
new_dict = {}
new_dict['folder_id'] = data['folder_id']
new_dict['name'] = data['name']
self.resources.append(new_dict)
return self.resources
i'm trying to create a program, which needs to read messages from a discord bot and retrieve links from these messages.
here's the code:
import requests
import json
from bs4 import builder
import bs4
def retrieve_messages(channelid):
headers = {
'authorization': 'NTQ5OTM4ODEzOTUxMTQ4MDQ3.YMi7CQ.fOm6F-dmPJPEW0dehLwCkB_ilBU'
}
r = requests.get(f'https://discord.com/api/v9/channels/{channelid}/messages', headers=headers)
jsonn = json.loads(r.text)
for value in jsonn:
print(value, '\n')
retrieve_messages('563699841377763348')
here's the output:
{'id': '908857015412084796', 'type': 0, 'content': '<#&624528614330859520>', 'channel_id': '5636998413777633, 2021.```\n5J53T-BKJK5-CTXBZ-JJJTJ-WW6F3```Redeem on48', 'author': {'id': '749499357761503284', 'username': 'shift', 'avatar': 'de9cd6f3224e660a4b6906a89fc2bc15/shift-code/5j53t-bkjk5-ctxbz-jjjtj-ww6f3/?utm_source', 'discriminator': '6125', 'public_flags': 0, 'bot': True}, 'attachments': [], 'embeds': [], 'mentions': []'pinned': False, 'mention_everyone': False, 'tts': Fa, 'mention_roles': ['624528614330859520'], 'pinned': False, 'mention_everyone': False, 'tts': False, 'timest}amp': '2021-11-12T23:13:18.221000+00:00', 'edited_timestamp': None, 'flags': 0, 'components': []}
{'id': '908857014430629898', 'type': 0, 'content': '', 'channel_id': '563699841377763348', 'author': {'id':
'749499357761503284', 'username': 'shift', 'avatar': 'de9cd6f3224e660a4b6906a89fc2bc15', 'discriminator': '6125', 'public_flags': 0, 'bot': True}, 'attachments': [], 'embeds': [{'type': 'rich', 'title': '<:GoldenKey:273763771929853962> Borderlands 1: 5 gold keys', 'description': 'Platform: Universal\nExpires: 30 November,
2021.```\n5J53T-BKJK5-CTXBZ-JJJTJ-WW6F3```Redeem on the [website](https://shift.gearboxsoftware.com/rewards) or in game.\n\n[Source](https://shift.orcicorn.com/shift-code/5j53t-bkjk5-ctxbz-jjjtj-ww6f3/?utm_source=json&utm_medium=shift&utm_campaign=automation)', 'color': 16040976}], 'mentions': [], 'mention_roles': [], 'pinned': False, 'mention_everyone': False, 'tts': False, 'timestamp': '2021-11-12T23:13:17.987000+00:00', 'edited_timestamp': None, 'flags': 1, 'components': []}
in the output there are 2 links, but I need to save the second link to a variable, and I'm wondering how I can do that
This is easiest done with the response body as a text object that can be scanned with regex to find the URLs
Solution
The variable test_case_data is the response body in TEXT form as a string.
import re
regex = r"(http|ftp|https):\/\/([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,#?^=%&:\/~+#-]*[\w#?^=%&\/~+#-])"
def find_embedded_urls(data):
return re.finditer(regex,data)
test_case_data = """'id': '908857014430629898', 'type': 0, 'content': '', 'channel_id': '563699841377763348', 'author': {'id':
'749499357761503284', 'username': 'shift', 'avatar': 'de9cd6f3224e660a4b6906a89fc2bc15', 'discriminator': '6125', 'public_flags': 0, 'bot': True}, 'attachments': [], 'embeds': [{'type': 'rich', 'title': '<:GoldenKey:273763771929853962> Borderlands 1: 5 gold keys', 'description': 'Platform: Universal\nExpires: 30 November,
2021.```\n5J53T-BKJK5-CTXBZ-JJJTJ-WW6F3```Redeem on the [website](https://shift.gearboxsoftware.com/rewards) or in game.\n\n[Source](https://shift.orcicorn.com/shift-code/5j53t-bkjk5-ctxbz-jjjtj-ww6f3/?utm_source=json&utm_medium=shift&utm_campaign=automation)', 'color': 16040976}], 'mentions': [], 'mention_roles': [], 'pinned': False, 'mention_everyone': False, 'tts': False, 'timestamp': '2021-11-12T23:13:17.987000+00:00', 'edited_timestamp': None, 'flags': 1, 'components': []}"""
# test_case_data = response.text
matches = find_embedded_urls(test_case_data)
matches = [match[0] for match in matches] #convert all urls to strings
print(matches) # List of all the urls! Index for whatever one you need
Output
['https://shift.gearboxsoftware.com/rewards', 'https://shift.orcicorn.com/shift-code/5j53t-bkjk5-ctxbz-jjjtj-ww6f3/?utm_source=json&utm_medium=shift&utm_campaign=automation']
With the URLs as a list index, you can set variables by indexing the list at whatever point you need.
When I search for the book using this link https://www.googleapis.com/books/v1/volumes?q=9780310709626 I get the author name in the details.
However when I run my code and print items I don't see the author name. I've been trying to figure out why it doesn't show from the data but I don't see any problem with my code.
print(searchBooks("9780310709626"))
def getBooks(id):
url = "https://www.googleapis.com/books/v1/volumes?q=isbn:"
resp = url(api + id)
data = json.load(resp)
print(data["items"])
My code output:
[{'kind': 'books#volume', 'id': 'JEP3sgEACAAJ', 'etag': '92vdEneJ83g', 'selfLink': 'https://www.googleapis.com/books/v1/volumes/JEP3sgEACAAJ', 'volumeInfo': {'title': "The Beginner's Bible", 'subtitle': 'Timeless Bible Stories', 'publisher': 'Zondervan', 'publishedDate': '2005', 'description': 'Retells familiar Bible stories from the Old and New Testaments for children to enjoy.', 'industryIdentifiers': [{'type': 'ISBN_10', 'identifier': '0310709628'}, {'type': 'ISBN_13', 'identifier': '9780310709626'}], 'readingModes': {'text': False, 'image': False}, 'pageCount': 511, 'printType': 'BOOK', 'categories': ['Juvenile Nonfiction'], 'averageRating': 4.5, 'ratingsCount': 2, 'maturityRating': 'NOT_MATURE', 'allowAnonLogging': False, 'contentVersion': 'preview-1.0.0', 'panelizationSummary': {'containsEpubBubbles': False, 'containsImageBubbles': False}, 'imageLinks': {'smallThumbnail': 'http://books.google.com/books/content?id=JEP3sgEACAAJ&printsec=frontcover&img=1&zoom=5&source=gbs_api', 'thumbnail': 'http://books.google.com/books/content?id=JEP3sgEACAAJ&printsec=frontcover&img=1&zoom=1&source=gbs_api'}, 'language': 'en', 'previewLink': 'http://books.google.com.tw/books?id=JEP3sgEACAAJ&dq=isbn:9780310709626&hl=&cd=1&source=gbs_api', 'infoLink': 'http://books.google.com.tw/books?id=JEP3sgEACAAJ&dq=isbn:9780310709626&hl=&source=gbs_api', 'canonicalVolumeLink': 'https://books.google.com/books/about/The_Beginner_s_Bible.html?hl=&id=JEP3sgEACAAJ'}, 'saleInfo': {'country': 'TW', 'saleability': 'NOT_FOR_SALE', 'isEbook': False}, 'accessInfo': {'country': 'TW', 'viewability': 'NO_PAGES', 'embeddable': False, 'publicDomain': False, 'textToSpeechPermission': 'ALLOWED', 'epub': {'isAvailable': False}, 'pdf': {'isAvailable': False}, 'webReaderLink': 'http://play.google.com/books/reader?id=JEP3sgEACAAJ&hl=&printsec=frontcover&source=gbs_api', 'accessViewStatus': 'NONE', 'quoteSharingAllowed': False}, 'searchInfo': {'textSnippet': 'Retells familiar Bible stories from the Old and New Testaments for children to enjoy.'}}, {'kind': 'books#volume', 'id': 'ZRgnzQEACAAJ', 'etag': 'RXYM4Rbwx+g', 'selfLink': 'https://www.googleapis.com/books/v1/volumes/ZRgnzQEACAAJ', 'volumeInfo': {'title': "The Beginner's Bible", 'authors': ['Catherine DeVries'], 'publishedDate': '2005', 'industryIdentifiers': [{'type': 'ISBN_10', 'identifier': '0310709628'}, {'type': 'ISBN_13', 'identifier': '9780310709626'}], 'readingModes': {'text': False, 'image': False}, 'pageCount': 511, 'printType': 'BOOK', 'averageRating': 4, 'ratingsCount': 1, 'maturityRating': 'NOT_MATURE', 'allowAnonLogging': False, 'contentVersion': 'preview-1.0.0', 'panelizationSummary': {'containsEpubBubbles': False, 'containsImageBubbles': False}, 'language': 'en', 'previewLink': 'http://books.google.com.tw/books?id=ZRgnzQEACAAJ&dq=isbn:9780310709626&hl=&cd=2&source=gbs_api', 'infoLink': 'http://books.google.com.tw/books?id=ZRgnzQEACAAJ&dq=isbn:9780310709626&hl=&source=gbs_api', 'canonicalVolumeLink': 'https://books.google.com/books/about/The_Beginner_s_Bible.html?hl=&id=ZRgnzQEACAAJ'}, 'saleInfo': {'country': 'TW', 'saleability': 'NOT_FOR_SALE', 'isEbook': False}, 'accessInfo': {'country': 'TW', 'viewability': 'NO_PAGES', 'embeddable': False, 'publicDomain': False, 'textToSpeechPermission': 'ALLOWED', 'epub': {'isAvailable': False}, 'pdf': {'isAvailable': False}, 'webReaderLink': 'http://play.google.com/books/reader?id=ZRgnzQEACAAJ&hl=&printsec=frontcover&source=gbs_api', 'accessViewStatus': 'NONE', 'quoteSharingAllowed': False}}]
With the requests library:
import requests
url = 'https://www.googleapis.com/books/v1/volumes?q=9780310709626'
resp = requests.get(url)
json = resp.json()
print(json['items'][0]['volumeInfo']['authors'])
From the response you can see that authors is an array. To reach that array you will need to do json['items'][0]['volumeInfo']['authors'].
As items is also an array, meaning that there could be multiple items in this response. You might want to write extra code to deal with that other than hard-code index=0.
Note that in this case, you probably won't know the schema of the response. You should handle unexpected behaviors. For some certain books maybe some keys are missing, json['items'] could be an empty array, or even items is not in the response at all.
I am trying to check if a specific item in a json file is equal to one of my python variables.
{'data': {'redemption': {'channel_id': 'secret',
'id': 'secret',
'redeemed_at': '2021-02-08T09:46:22.637059711Z',
'reward': {'background_color': '#FA1ED2',
'channel_id': '145998001',
'cooldown_expires_at': None,
'cost': 500,
'default_image': {'url_1x': 'https://static-cdn.jtvnw.net/custom-reward-images/ghost-1.png',
'url_2x': 'https://static-cdn.jtvnw.net/custom-reward-images/ghost-2.png',
'url_4x': 'https://static-cdn.jtvnw.net/custom-reward-images/ghost-4.png'},
'global_cooldown': {'global_cooldown_seconds': 1,
'is_enabled': False},
'id': '123',
'image': None,
'is_enabled': True,
'is_in_stock': True,
'is_paused': False,
'is_sub_only': False,
'is_user_input_required': False,
'max_per_stream': {'is_enabled': False,
'max_per_stream': 1},
'max_per_user_per_stream': {'is_enabled': False,
'max_per_user_per_stream': 1},
'prompt': '*Dabs*',
'redemptions_redeemed_current_stream': None,
'should_redemptions_skip_request_queue': False,
'template_id': 'template:4425c37e-6881-442a-aa3d-fdc6998a29de',
'title': 'Dab!',
'updated_for_indicator_at': '2020-09-10T18:55:40.064177881Z'},
'status': 'UNFULFILLED',
'user': {'display_name': 'Androteex',
'id': 'secret',
'login': 'androteex'}},
'timestamp': '2021-02-08T09:46:22.637059711Z'},
'type': 'reward-redeemed'}
I want to find the second id: 'id': '123' and check if id is equal to 123. And if so I want to print that string. How could I do that?
You can use the JSON module.
import json
data = json.loads(my_json)
my_id = data['data']['redemption']['reward']['id']
if my_id == '123':
print(data)
Granted it was added to data (can be done with json.loads):
id = data['data']['redemption']['reward']['id']
idcheck = 123
if (int(id) == idcheck):
print ("YES")
{'contributors': None,
'coordinates': None,
'created_at': 'Tue Aug 02 19:51:58 +0000 2016',
'entities': {'hashtags': [],
'symbols': [],
'urls': [],
'user_mentions': [{'id': 873491544,
'id_str': '873491544',
'indices': [0, 13],
'name': 'Kenel M',
'screen_name': 'KxSweaters13'}]},
'favorite_count': 1,
'favorited': False,
'geo': None,
'id': 760563814450491392,
'id_str': '760563814450491392',
'in_reply_to_screen_name': 'KxSweaters13',
'in_reply_to_status_id': None,
'in_reply_to_status_id_str': None,
'in_reply_to_user_id': 873491544,
'in_reply_to_user_id_str': '873491544',
'is_quote_status': False,
'lang': 'en',
'metadata': {'iso_language_code': 'en', 'result_type': 'recent'},
'place': {'attributes': {},
'bounding_box': {'coordinates': [[[-71.813501, 42.4762],
[-71.702186, 42.4762],
[-71.702186, 42.573956],
[-71.813501, 42.573956]]],
'type': 'Polygon'},
'contained_within': [],
'country': 'Australia',
'country_code': 'AUS',
'full_name': 'Melbourne, V',
'id': 'c4f1830ea4b8caaf',
'name': 'Melbourne',
'place_type': 'city',
'url': 'https://api.twitter.com/1.1/geo/id/c4f1830ea4b8caaf.json'},
'retweet_count': 0,
'retweeted': False,
'source': 'Twitter for Android',
'text': '#KxSweaters13 are you the kenelx13 I see owning leominster for team valor?',
'truncated': False,
'user': {'contributors_enabled': False,
'created_at': 'Thu Apr 21 17:09:52 +0000 2011',
'default_profile': False,
'default_profile_image': False,
'description': "Arbys when it's cold. Kimballs when it's warm. #Ally__09 all year. Comp sci classes sometimes.",
'entities': {'description': {'urls': []}},
'favourites_count': 1106,
'follow_request_sent': None,
'followers_count': 167,
'following': None,
'friends_count': 171,
'geo_enabled': True,
'has_extended_profile': False,
'id': 285715182,
'id_str': '285715182',
'is_translation_enabled': False,
'is_translator': False,
'lang': 'en',
'listed_count': 2,
'location': 'MA',
'name': 'Steve',
'notifications': None,
'profile_background_color': '131516',
'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme14/bg.gif',
'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme14/bg.gif',
'profile_background_tile': True,
'profile_banner_url': 'https://pbs.twimg.com/profile_banners/285715182/1462218226',
'profile_image_url': 'http://pbs.twimg.com/profile_images/727223698332200961/bGPjGjHK_normal.jpg',
'profile_image_url_https': 'https://pbs.twimg.com/profile_images/727223698332200961/bGPjGjHK_normal.jpg',
'profile_link_color': '4A913C',
'profile_sidebar_border_color': 'FFFFFF',
'profile_sidebar_fill_color': 'EFEFEF',
'profile_text_color': '333333',
'profile_use_background_image': True,
'protected': False,
'screen_name': 'StephenBurke_',
'statuses_count': 5913,
'time_zone': 'Eastern Time (US & Canada)',
'url': None,
'utc_offset': -14400,
'verified': False}}
I have a json file which contains a list of json objects (each has the structure like above)
So I read it into a dataframe:
df = pd.read_json('data.json')
and then I try to get all the rows which are the 'city' type by:
df = df[df['place']['place_type'] == 'city']
but then I got the 'TypeError: an integer is required' During handling of the above exception, another exception occurred: KeyError: 'place_type'
Then I tried:
df['place'].head(3)
=>
0 {'id': '01864a8a64df9dc4', 'url': 'https://api...
1 {'id': '01864a8a64df9dc4', 'url': 'https://api...
2 {'id': '0118c71c0ed41109', 'url': 'https://api...
Name: place, dtype: object
So df['place'] return a series where keys are the indexes and that's why I got the TypeError
I've also tried to select the place_type of the first row and it works just fine:
df.iloc[0]['place']['place_type']
=>
city
The question is how can I filter out the rows in this case?
Solution:
Okay, so the problem lies in the fact that the pd.read_json cannot deal with nested JSON structure, so what I have done is to normalize the json object:
with open('data.json') as jsonfile:
data = json.load(jsonfile)
df = pd.io.json.json_normalize(data)
df = df[df['place.place_type'] == 'city']
You can use the a list comprehension to do the filtering you need.
df = [loc for loc in df if d['place']['place_type'] == 'city']
This will give you an array where the elements place_type is 'city'.
I don't know if you have to use the place_type that is the index, to show all the rows that contains city.
"and then I try to get all the rows which are the city type by:"
This way you can get all the rows that contains city in the column place:
df = df[(df['place'] == 'city')]