Dataframe not showing twitter sources from Android - python

I am trying to try to do some analysis on a twitter account, but I am having trouble trying to show sources from Android. What I did was merged two json files and I think I merged it correctly, but incase I got that wrong here is the code I used.
old_tweets = load_tweets("real_tweets/real_old_tweets.json")
print(len(old_tweets))
for aLis1 in old_tweets:
if aLis1 not in tweets:
tweets.append(aLis1)
load_tweets is a custom function that simply opens and loads a json file given a specific path
with open(path, "rb") as f:
import json
return json.load(f)
After merging the two json files of tweets I then called this function to create the data frame and clean it up to only display the information I want.
df_tweets1 = pd.DataFrame(tweets)
df_tweets2 = df_tweets1[['id','created_at','source','full_text','retweet_count']]
df_tweets = df_tweets2.drop_duplicates('id', keep=False
df_tweets.set_index('id', inplace=True)
df_tweets = df_tweets.rename(columns={"created_at": "time", "full_text": "text"})
df_tweets["time"] = pd.to_datetime(df_tweets["time"])
The problem is that when i call df_tweets["source"].unique() I don't see any tweets coming from android
array(['Twitter for iPhone',
'Twitter for iPad',
'Twitter Media Studio',
'Media Studio',
'Twitter Web Client'],
dtype=object)
Did I do something wrong when merging the two sets of Twitter data? Or did I do something wrong when trying to create the data frame?
EDIT**Here is a sample output from real_old_tweets.json to give a sense of the format. I am only going to post one because there is a lot of information contained in one tweet.
[{'created_at': 'Tue Oct 16 16:22:11 +0000 2018',
'id': 1052233253040640001,
'id_str': '1052233253040640001',
'full_text': 'REGISTER TO https://url/0pWiwCHGbh! #MAGAđŸ‡ș🇾 https://url/ACTMe53TZU',
'truncated': False,
'display_text_range': [0, 44],
'entities': {'hashtags': [{'text': 'MAGA', 'indices': [37, 42]}],
'symbols': [],
'user_mentions': [],
'urls': [{'url': 'url/0pWiwCHGbh',
'expanded_url': 'linkVote.GOP',
'display_url': 'Vote.GOP',
'indices': [12, 35]},
{'url': 'url/ACTMe53TZU',
'expanded_url': 'linktwitter.com/erictrump/status/1052174007708147714',
'display_url': 'twitter.com/erictrump/stat
',
'indices': [45, 68]}]},
'source': 'Twitter for iPhone',
'in_reply_to_status_id': None,
'in_reply_to_status_id_str': None,
'in_reply_to_user_id': None,
'in_reply_to_user_id_str': None,
'in_reply_to_screen_name': None,
'user': {'id': 25073877,
'id_str': '25073877',
'name': 'Donald J. Trump',
'screen_name': 'realDonaldTrump',
'location': 'Washington, DC',
'description': '45th President of the United States of AmericađŸ‡ș🇾',
'url': 'url/OMxB0x7xC5',
'entities': {'url': {'urls': [{'url': 'url/OMxB0x7xC5',
'expanded_url': 'linkwww.Instagram.com/realDonaldTrump',
'display_url': 'Instagram.com/realDonaldTrump',
'indices': [0, 23]}]},
'description': {'urls': []}},
'protected': False,
'followers_count': 55165024,
'friends_count': 47,
'listed_count': 94709,
'created_at': 'Wed Mar 18 13:46:38 +0000 2009',
'favourites_count': 25,
'utc_offset': None,
'time_zone': None,
'geo_enabled': True,
'verified': True,
'statuses_count': 39296,
'lang': 'en',
'contributors_enabled': False,
'is_translator': False,
'is_translation_enabled': True,
'profile_background_color': '6D5C18',
'profile_background_image_url': 'linkabs.twimg.com/images/themes/theme1/bg.png',
'profile_background_image_url_https': 'linkabs.twimg.com/images/themes/theme1/bg.png',
'profile_background_tile': True,
'profile_image_url': 'linkpbs.twimg.com/profile_images/874276197357596672/kUuht00m_normal.jpg',
'profile_image_url_https': 'linkpbs.twimg.com/profile_images/874276197357596672/kUuht00m_normal.jpg',
'profile_banner_url': 'linkpbs.twimg.com/profile_banners/25073877/1539493274',
'profile_link_color': '1B95E0',
'profile_sidebar_border_color': 'BDDCAD',
'profile_sidebar_fill_color': 'C5CEC0',
'profile_text_color': '333333',
'profile_use_background_image': True,
'has_extended_profile': False,
'default_profile': False,
'default_profile_image': False,
'following': False,
'follow_request_sent': False,
'notifications': False,
'translator_type': 'regular'},
'geo': None,
'coordinates': None,
'place': None,
'contributors': None,
'is_quote_status': True,
'quoted_status_id': 1052174007708147714,
'quoted_status_id_str': '1052174007708147714',
'quoted_status_permalink': {'url': 'url/ACTMe53TZU',
'expanded': 'linktwitter.com/erictrump/status/1052174007708147714',
'display': 'twitter.com/erictrump/stat
'},
'quoted_status': {'created_at': 'Tue Oct 16 12:26:46 +0000 2018',
'id': 1052174007708147714,
'id_str': '1052174007708147714',
'full_text': 'Friends: Quick reminder that today is that last day to register to vote in Oregon, Kansas, Louisiana, West Virginia, New Jersey and Maryland. It is very quick and easy - simply go to url/GE5BO5ONN1! Let’s #MakeAmericaGreatAgain đŸ‡ș🇾đŸ‡ș🇾đŸ‡ș🇾',
'truncated': False,
'display_text_range': [0, 243],
'entities': {'hashtags': [{'text': 'MakeAmericaGreatAgain',
'indices': [214, 236]}],
'symbols': [],
'user_mentions': [],
'urls': [{'url': 'url/GE5BO5ONN1',
'expanded_url': 'linkwww.Vote.GOP',
'display_url': 'Vote.GOP',
'indices': [183, 206]}]},
'source': 'Twitter for iPhone',
'in_reply_to_status_id': None,
'in_reply_to_status_id_str': None,
'in_reply_to_user_id': None,
'in_reply_to_user_id_str': None,
'in_reply_to_screen_name': None,
'user': {'id': 39349894,
'id_str': '39349894',
'name': 'Eric Trump',
'screen_name': 'EricTrump',
'location': '',
'description': "Executive Vice President of The #Trump Organization. Husband to #LaraLeaTrump. Large advocate of #StJude Children's Research Hospital. #MakeAmericaGreatAgain",
'url': 'url/uwwNiWyamR',
'entities': {'url': {'urls': [{'url': 'url/uwwNiWyamR',
'expanded_url': 'linkwww.Trump.com',
'display_url': 'Trump.com',
'indices': [0, 23]}]},
'description': {'urls': []}},
'protected': False,
'followers_count': 2191617,
'friends_count': 715,
'listed_count': 5736,
'created_at': 'Mon May 11 21:42:30 +0000 2009',
'favourites_count': 8638,
'utc_offset': None,
'time_zone': None,
'geo_enabled': True,
'verified': True,
'statuses_count': 5601,
'lang': 'en',
'contributors_enabled': False,
'is_translator': False,
'is_translation_enabled': False,
'profile_background_color': '000000',
'profile_background_image_url': 'linkabs.twimg.com/images/themes/theme1/bg.png',
'profile_background_image_url_link': 'linkabs.twimg.com/images/themes/theme1/bg.png',
'profile_background_tile': True,
'profile_image_url': 'linkpbs.twimg.com/profile_images/974045997268529152/R0CuVYHM_normal.jpg',
'profile_image_url_link': 'linkpbs.twimg.com/profile_images/974045997268529152/R0CuVYHM_normal.jpg',
'profile_banner_url': 'linkpbs.twimg.com/profile_banners/39349894/1516709628',
'profile_link_color': '116AB8',
'profile_sidebar_border_color': '000000',
'profile_sidebar_fill_color': '616161',
'profile_text_color': '000000',
'profile_use_background_image': True,
'has_extended_profile': False,
'default_profile': False,
'default_profile_image': False,
'following': False,
'follow_request_sent': False,
'notifications': False,
'translator_type': 'none'},
'geo': None,
'coordinates': None,
'place': None,
'contributors': None,
'is_quote_status': False,
'retweet_count': 1945,
'favorite_count': 3828,
'favorited': False,
'retweeted': False,
'possibly_sensitive': False,
'lang': 'en'},
'retweet_count': 5415,
'favorite_count': 16565,
'favorited': False,
'retweeted': False,
'possibly_sensitive': False,
'lang': 'en'},

I am assuming that you are having "android" sources and I don't have a clear idea of how your data looks like and what is the relation between the "id" and source. Having said that, there is a bug when you are preparing your data. You are dropping all the duplicates.
For example:
>>> import pandas as pd
>>> df = pd.DataFrame(data={'col1':[1,2,2],'col2':[3,4,3],'col3':[1,4,1]})
>>> df
col1 col2 col3
0 1 3 1
1 2 4 4
2 2 3 1
>>> df.drop_duplicates('col1',keep=False)
col1 col2 col3
0 1 3 1
In the above code, you can see that it is dropping all the duplicate rows if you use "keep=False".
>>> df.drop_duplicates('col1',keep='first')
col1 col2 col3
0 1 3 1
1 2 4 4
Instead, use keep='first' or keep='last' and see if there is any improvement. Also, it would be great if I can get more sense of data, to figure out where it is going wrong.
EDIT
After some time, I took your JSON object and saved to a "me.json" file in the format of:
[{},{}]
Where the first object's source is an iPhone and second object's source is android. I used your code for loading in the data:
Python 2.7.15rc1 (default, Nov 12 2018, 14:31:15)
[GCC 7.3.0] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import pandas as pd
>>> import json
>>> with open('me.json','rb') as file:
... json_list = json.load(file)
...
>>> len(json_list)
2
>>> df = pd.DataFrame(json_list)
>>> df1 = df[['id','source']]
>>> df1['source'].value_counts()
Twitter for Android 1
Twitter for iPhone 1
Name: source, dtype: int64
In the above output, you can see that I am able to see the "Android". My conclusion is that in your data, there might be no "Android" at all, in the df['source'] column.
Please see it carefully as there are two "source" keys inside each JSON object, one key is inside the "quoted_status". There are chances that you might have seen "Android" in this key.

Related

reading Tweepy data value from tweepy.models.Status object in python is not working

I am trying to get information on retweeters for a specific tweet using Tweepy and fetch the in_reply_to_status_id from the returned Tweepy response.
Here is the code
retweets_list = api.get_retweets(id=tweetid)
for retweet in retweets_list:
retweet_json = json.dumps(retweet._json, indent=2)
retweet_json = json.loads(retweet_json)
print(retweet_json)
The code about above produces the data response below
{'created_at': 'Sat Jun 18 06:38:49 +0000 2022', 'id': 1538048568782688256, 'id_str': '1538048568782688256', 'text': 'RT #gyfboxAI: #isle_mcelroy Some mentioned items in thread \n\n#AllisonPDavis The Governesses => httpsurl The Ob
', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [{'screen_name': 'gyfboxAI', 'name': 'Gyfbox', 'id': 1521109812032978946, 'id_str': '1521109812032978946', 'indices': [3, 12]}, {'screen_name': 'isle_mcelroy', 'name': 'Isle McElroy', 'id': 868462820, 'id_str': '868462820', 'indices': [14, 27]}, {'screen_name': 'AllisonPDavis', 'name': 'Allison P Davis', 'id': 15088579, 'id_str': '15088579', 'indices': [61, 75]}, {'screen_name': 'kvargs93', 'name': 'Katherine Varga', 'id': 885284552897429504, 'id_str': '885284552897429504', 'indices': [125, 134]}], 'urls': [{'url': 'httpsurl', 'expanded_url': 'httpsurlamzn.to/3MUM0mI', 'display_url': 'amzn.to/3MUM0mI', 'indices': [100, 123]}]}, 'source': 'Twitter for iPhone', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 1003173584, 'id_str': '1003173584', 'name': 'Elaine Showalter', 'screen_name': 'ecshowalter', 'location': 'Washington, D.C./London', 'description': 'Professor Emerita Princeton U; Anglophile, feminist, theatre fanatic, “The Civil Wars of Julia Ward Howe.” watercolor by Vanessa Bell, “The Queen’s Tea Party”', 'url': None, 'entities': {'description': {'urls': []}}, 'protected': False, 'followers_count': 8142, 'friends_count': 1049, 'listed_count': 104, 'created_at': 'Tue Dec 11 03:08:17 +0000 2012', 'favourites_count': 24912, 'utc_offset': None, 'time_zone': None, 'geo_enabled': True, 'verified': False, 'statuses_count': 26489, 'lang': None, 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': 'C0DEED', 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png', 'profile_background_image_url_https': 'httpsurlabs.twimg.com/images/themes/theme1/bg.png', 'profile_background_tile': True, 'profile_image_url': 'http://pbs.twimg.com/profile_images/968862619699425281/CKzdSRf6_normal.jpg', 'profile_image_url_https': 'httpsurlpbs.twimg.com/profile_images/968862619699425281/CKzdSRf6_normal.jpg', 'profile_banner_url': 'httpsurlpbs.twimg.com/profile_banners/1003173584/1569562029', 'profile_link_color': '0084B4', 'profile_sidebar_border_color': 'FFFFFF', 'profile_sidebar_fill_color': 'DDEEF6', 'profile_text_color': '333333', 'profile_use_background_image': True, 'has_extended_profile': False, 'default_profile': False, 'default_profile_image': False, 'following': False, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none', 'withheld_in_countries': []}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'retweeted_status': {'created_at': 'Fri Jun 17 17:55:18 +0000 2022', 'id': 1537856423740198913, 'id_str': '1537856423740198913', 'text': '#isle_mcelroy Some mentioned items in thread \n\n#AllisonPDavis The Governesses => httpsurl
 httpsurl', 'truncated': True, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [{'screen_name': 'isle_mcelroy', 'name': 'Isle McElroy', 'id': 868462820, 'id_str': '868462820', 'indices': [0, 13]}, {'screen_name': 'AllisonPDavis', 'name': 'Allison P Davis', 'id': 15088579, 'id_str': '15088579', 'indices': [47, 61]}], 'urls': [{'url': 'httpsurl', 'expanded_url': 'httpsurlamzn.to/3MUM0mI', 'display_url': 'amzn.to/3MUM0mI', 'indices': [86, 109]}, {'url': 'httpsurl’, 'expanded_url': 'httpsurltwitter.com/i/web/status/1537856423740198913', 'display_url': 'twitter.com/i/web/status/1
', 'indices': [111, 134]}]}, 'source': 'gyfbox', 'in_reply_to_status_id': 1537835837542604801, 'in_reply_to_status_id_str': '1537835837542604801', 'in_reply_to_user_id': 868462820, 'in_reply_to_user_id_str': '868462820', 'in_reply_to_screen_name': 'isle_mcelroy', 'user': {'id': 1521109812032978946, 'id_str': '1521109812032978946', 'name': 'Gyfbox', 'screen_name': 'gyfboxAI', 'location': '', 'description': 'Tag "#GyfboxAI find item" \n\n#GyfboxAI will reply with link for items mentioned in the thread\n\nCOMING SOON !', 'url': 'httpsurlt.co/u7fGrxh24Y', 'entities': {'url': {'urls': [{'url': 'httpsurlt.co/u7fGrxh24Y', 'expanded_url': 'httpsurlwww.gyfbox.com', 'display_url': 'gyfbox.com', 'indices': [0, 23]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 1, 'friends_count': 6, 'listed_count': 0, 'created_at': 'Mon May 02 12:50:32 +0000 2022', 'favourites_count': 1, 'utc_offset': None, 'time_zone': None, 'geo_enabled': False, 'verified': False, 'statuses_count': 49, 'lang': None, 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': 'F5F8FA', 'profile_background_image_url': None, 'profile_background_image_url_https': None, 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/1521109885827661824/iTrlR67U_normal.png', 'profile_image_url_https': 'httpsurlpbs.twimg.com/profile_images/1521109885827661824/iTrlR67U_normal.png', 'profile_link_color': '1DA1F2', 'profile_sidebar_border_color': 'C0DEED', 'profile_sidebar_fill_color': 'DDEEF6', 'profile_text_color': '333333', 'profile_use_background_image': True, 'has_extended_profile': True, 'default_profile': True, 'default_profile_image': False, 'following': False, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none', 'withheld_in_countries': []}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 1, 'favorite_count': 0, 'favorited': False, 'retweeted': False, 'possibly_sensitive': False, 'lang': 'en'}, 'is_quote_status': False, 'retweet_count': 1, 'favorite_count': 0, 'favorited': False, 'retweeted': False, 'possibly_sensitive': False, 'lang': 'en'}
Multiple attempts to extract the in_reply_to_status_id always return "None"
Sample attempt that returned none
retweet_json['in_reply_to_status_id']
retweet.in_reply_to_status_id
The data return above shows
'in_reply_to_status_id': 1537835837542604801,
so I should be getting 1537835837542604801 for in_reply_to_status_id
what am i doing wrong and how can I obtain the in_reply_to_status_id ?
According to your Json structure,
the in_reply_status_id is None, the id is in retweeted_status so based on the Json structure
retweet_json['retweeted_status']['in_reply_to_status_id']
should give
1537835837542604801

Python: nested dictionary with string list and subdictionaries

I am interested in the comments (=text) made on certain YouTube Channels. I have scraped data with the Google YouTube Data API. The data comes in a complex structure and format (see picture below) that I am trying to disentangle for a research project.
The comments are stored in the fields Text Display and Text Original that belong to the dictionary Snippet, which in turn is part of the dictionary Top Level Comments. Top Level Comments is part of a string list that in turn is part of the dictionary items.
I think I need to subset the dictionary Top Level Comment as all the comments and related information (see picture below) I need are stored in nested dictionaries there. I don't think I can access the dictionary Top Level Comment as it is part of the list Snippet. So I first tried to subset the list Snippet. This is where I am stuck.
Here my code so far:
from googleapiclient.discovery import build
api_key = '_______________________________'
youtube = build('youtube', 'v3', developerKey = api_key)
#find channel ID https://commentpicker.com/youtube-channel-id.php
request = youtube.commentThreads().list(
part = 'snippet',
allThreadsRelatedToChannelId = 'UC_zxivooFdvF4uuBosUnJxQ'
)
response3 = request.execute()
##Code to explore data structure and format is excluded
#subset dictionary according to keys we want
includedKeys = ['items']
dataDic = {k:v for k, v in response3.items() if k in includedKeys}
In below code I unsuccessfully tried to subset the list Snipet in different ways or convert it.
dataDic2 = {x['snippet'] for x in dataDic} #Link no 1
#TypeError: string indices must be integers
dataDic2 = {x['snippet'] for x in dataDic} #Link no 1
#TypeError: string indices must be integers
dataDic2 = [{'snippet': d['snippet']} for d in dataDic] #Link no 2
#TypeError: string indices must be integers
dataDic2 = [topLevelComment['snippet'] for topLevelComment in dataDic['topLevelComment']['snippet']] #Link no 3
#KeyError: 'topLevelComment'import ast
result = ast.literal_eval('[snippet]')
assert type(result) is list #Link no 4 and 5
#ValueError: malformed node or string: <_ast.Name object at 0x0000010F6D7B9A08>
Link no 1
Link no 2
Link no 3
Link no 4
Link no 5
This link says that ast.literal.eval does not work with lists and dictionaries?
So finally - how to retrieve the data?
I need all fields circled in red in the picture showing the data structure.
EDIT: sample data
see below
data = {'kind': 'youtube#commentThreadListResponse', 'etag': '_yOZ67ear9btS5RarXfH3Xir6A8',
'nextPageToken': 'QURTSl9pME5DS2FQZm5yRzZ5b0ZGZUJGeENkMGh2UWxzVjNueEdUVmtmbVVqYksxSmN4QnpBdDFFWkpCREl6REZVQmlHZS1makpfZXFkQzFNbEpwbDFpb0dNWm95Z2E1TE03NE5GWEg0ajE5UWt0bnlpYS1PczlFVWZ1a1hqbTJLREVRempJaVpaRTYtcnpFeUM2ZU5Va1hUSHR5cVJFTEJ2akdtOHFkTWhGdmdmWUZsMUMwUHg0eTZNVzFBZVdsd1A0YXBqaWhnNGVNMXc=',
'pageInfo': {'totalResults': 14, 'resultsPerPage': 20}, 'items': [
{'kind': 'youtube#commentThread', 'etag': 'knxvgtYnhlPIpkevoCXSTZamb40', 'id': 'Ugwmdd9KdDm4Hm7MxlJ4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'tUXWw6WvgkI',
'topLevelComment': {'kind': 'youtube#comment', 'etag': '4m76jMeR8qFmfrk42kfKeA5Iv_Y',
'id': 'Ugwmdd9KdDm4Hm7MxlJ4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'tUXWw6WvgkI',
'textDisplay': 'Tipp 1: ZusatzszĂŒge – machs wie Fredy. (Hinweis: dieses Video wurde vor der Corona-Pandemie erstellt)',
'textOriginal': 'Tipp 1: ZusatzszĂŒge – machs wie Fredy. (Hinweis: dieses Video wurde vor der Corona-Pandemie erstellt)',
'authorDisplayName': 'Zuschauerquaeler',
'authorProfileImageUrl': 'https://yt3.ggpht.com/ytc/AKedOLQCWIoN-3MmDfxflS5ipDVvatDw8TpbD43mn2kb=s48-c-k-c0x00ffffff-no-rj',
'authorChannelUrl': 'http://www.youtube.com/channel/UCECxysNsTQLhrelU2KikMjQ',
'authorChannelId': {'value': 'UCECxysNsTQLhrelU2KikMjQ'},
'canRate': True, 'viewerRating': 'none', 'likeCount': 1,
'publishedAt': '2021-09-15T07:29:00Z',
'updatedAt': '2021-09-15T07:29:00Z'}}, 'canReply': True,
'totalReplyCount': 0, 'isPublic': True}},
{'kind': 'youtube#commentThread', 'etag': 'tq7mSQltdzKz0sthUiAIPYrQgJg', 'id': 'Ugy2jzL0838zj9HyHu94AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'M98TRem03Lg',
'topLevelComment': {'kind': 'youtube#comment', 'etag': '8BDnS6DXuaN8VdFzHsj7dc1YPZc',
'id': 'Ugy2jzL0838zj9HyHu94AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'M98TRem03Lg',
'textDisplay': 'Ich sehe das Kulturland schon schmelzen und verschwinden...',
'textOriginal': 'Ich sehe das Kulturland schon schmelzen und verschwinden...',
'authorDisplayName': 'Janik Von NiederhÀusern',
'authorProfileImageUrl': 'https://yt3.ggpht.com/ytc/AKedOLSk69KdiWMSYw0sYQSBdjEHagXJTD9tWlHdsw=s48-c-k-c0x00ffffff-no-rj',
'authorChannelUrl': 'http://www.youtube.com/channel/UCt87CYDxeIbDRRJLVT0VrdQ',
'authorChannelId': {'value': 'UCt87CYDxeIbDRRJLVT0VrdQ'},
'canRate': True, 'viewerRating': 'none', 'likeCount': 0,
'publishedAt': '2021-09-14T18:08:55Z',
'updatedAt': '2021-09-14T18:08:55Z'}}, 'canReply': True,
'totalReplyCount': 1, 'isPublic': True}},
{'kind': 'youtube#commentThread', 'etag': 'h_gpfnmUju60NWNxlFEwxjkIPQU', 'id': 'Ugx5GfaJTwt5cnuQ3Bh4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'M98TRem03Lg',
'topLevelComment': {'kind': 'youtube#comment', 'etag': 'fMmN1zDH7PVIWbw3L0n5Mt0dtqk',
'id': 'Ugx5GfaJTwt5cnuQ3Bh4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'M98TRem03Lg',
'textDisplay': 'Guete initiativ! Mega fan vo dere projekt!',
'textOriginal': 'Guete initiativ! Mega fan vo dere projekt!',
'authorDisplayName': 'Nionity',
'authorProfileImageUrl': 'https://yt3.ggpht.com/ytc/AKedOLTM-Tj3pWLuyhuH7ivlUwxs4YtQn6gez-BMCLdLzQ=s48-c-k-c0x00ffffff-no-rj',
'authorChannelUrl': 'http://www.youtube.com/channel/UCbUj9ZwI0YOkElVEfpAnBVQ',
'authorChannelId': {'value': 'UCbUj9ZwI0YOkElVEfpAnBVQ'},
'canRate': True, 'viewerRating': 'none', 'likeCount': 0,
'publishedAt': '2021-09-14T07:18:31Z',
'updatedAt': '2021-09-14T07:18:31Z'}}, 'canReply': True,
'totalReplyCount': 0, 'isPublic': True}},
{'kind': 'youtube#commentThread', 'etag': 'LOajqt43iY4A2N4V0yiLBRZwaig', 'id': 'Ugxez_tcF7ts7VaAL7t4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'zYnbgDyWM9o',
'topLevelComment': {'kind': 'youtube#comment', 'etag': 'DvNHOkNftBCLBqV1Ajam8mzMFYg',
'id': 'Ugxez_tcF7ts7VaAL7t4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'zYnbgDyWM9o',
'textDisplay': 'TrĂšs mauvaise voix off, Ă  un moment il y se reprend mĂȘme dans le texte 😐',
'textOriginal': 'TrĂšs mauvaise voix off, Ă  un moment il y se reprend mĂȘme dans le texte 😐',
'authorDisplayName': 'Patrick__EPfan',
'authorProfileImageUrl': 'https://yt3.ggpht.com/ytc/AKedOLTOmUsxVCimwNSQBVPxNUXfFbUNuYnN7VzVEeBUJA=s48-c-k-c0x00ffffff-no-rj',
'authorChannelUrl': 'http://www.youtube.com/channel/UC8DxMAk8T9Gv8RW0f2n0Q2w',
'authorChannelId': {'value': 'UC8DxMAk8T9Gv8RW0f2n0Q2w'},
'canRate': True, 'viewerRating': 'none', 'likeCount': 0,
'publishedAt': '2021-09-12T12:12:58Z',
'updatedAt': '2021-09-12T12:12:58Z'}}, 'canReply': True,
'totalReplyCount': 0, 'isPublic': True}},
{'kind': 'youtube#commentThread', 'etag': 'MGsQS-TUcYHnuyjyN932wpVIM_A', 'id': 'UgxYTxqSwAsyGyOHzU94AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': '4nU0MgKft6c',
'topLevelComment': {'kind': 'youtube#comment', 'etag': 'iRkZfQGVCGFZ13s8D3xrVZQw83A',
'id': 'UgxYTxqSwAsyGyOHzU94AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': '4nU0MgKft6c',
'textDisplay': 'Shiey be like', 'textOriginal': 'Shiey be like',
'authorDisplayName': 'Canopener Guy',
'authorProfileImageUrl': 'https://yt3.ggpht.com/2XG9uyYmOfkeubUNFQR0cgj7xCimKLsg6_r-3E1PTPVLixXjcxeFosF1HoytvHibGJrxQXal=s48-c-k-c0x00ffffff-no-rj',
'authorChannelUrl': 'http://www.youtube.com/channel/UCk8pieRaYyzsnU32Gp85DvA',
'authorChannelId': {'value': 'UCk8pieRaYyzsnU32Gp85DvA'},
'canRate': True, 'viewerRating': 'none', 'likeCount': 0,
'publishedAt': '2021-09-02T23:23:35Z',
'updatedAt': '2021-09-02T23:23:35Z'}}, 'canReply': True,
'totalReplyCount': 0, 'isPublic': True}},
{'kind': 'youtube#commentThread', 'etag': 'bcPCCsMbvquhAKLiEqIR4a20HnA', 'id': 'Ugw8FWvl7Hbf1RvJWhV4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'oxSLp_1WtcM',
'topLevelComment': {'kind': 'youtube#comment', 'etag': 'rTl4oSjvH14OF4xQ1mnM_amfZag',
'id': 'Ugw8FWvl7Hbf1RvJWhV4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'oxSLp_1WtcM',
'textDisplay': 'Vivement un Lyria en Belgique !!!!',
'textOriginal': 'Vivement un Lyria en Belgique !!!!',
'authorDisplayName': 'Kayuchi Fujimoto',
'authorProfileImageUrl': 'https://yt3.ggpht.com/ytc/AKedOLQ9YSDYj2tQFvjKjt9F_CH9AR2dcWrr84jA70am=s48-c-k-c0x00ffffff-no-rj',
'authorChannelUrl': 'http://www.youtube.com/channel/UCe5ctUAG-Z7cU_hpc-CbauQ',
'authorChannelId': {'value': 'UCe5ctUAG-Z7cU_hpc-CbauQ'},
'canRate': True, 'viewerRating': 'none', 'likeCount': 0,
'publishedAt': '2021-09-02T21:39:26Z',
'updatedAt': '2021-09-02T21:39:26Z'}}, 'canReply': True,
'totalReplyCount': 0, 'isPublic': True}},
{'kind': 'youtube#commentThread', 'etag': 'qbrUI9Z2YkM3LtYOqFogVRwcZWE', 'id': 'UgwomjMWUx5CHjlU_ox4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': '8vCvSmAIv1s',
'topLevelComment': {'kind': 'youtube#comment', 'etag': 'gYjvyBgNsZUB_FYUDK20LCVU-Qk',
'id': 'UgwomjMWUx5CHjlU_ox4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': '8vCvSmAIv1s',
'textDisplay': 'Build a high speed railway line into the moon I dare you with 20 million francs',
'textOriginal': 'Build a high speed railway line into the moon I dare you with 20 million francs',
'authorDisplayName': 'Simulated Trainspotter',
'authorProfileImageUrl': 'https://yt3.ggpht.com/3P-cR_3ORURRZH5RYImCeFv0yeC64SHtpS3otsCiGn4AuBXG-tQVrqnG32vJm4bfwxRt3MwCDzw=s48-c-k-c0x00ffffff-no-rj',
'authorChannelUrl': 'http://www.youtube.com/channel/UCF4ganYY8qP9q8YwXpDn2tQ',
'authorChannelId': {'value': 'UCF4ganYY8qP9q8YwXpDn2tQ'},
'canRate': True, 'viewerRating': 'none', 'likeCount': 0,
'publishedAt': '2021-09-02T08:36:45Z',
'updatedAt': '2021-09-02T08:36:45Z'}}, 'canReply': True,
'totalReplyCount': 0, 'isPublic': True}},
{'kind': 'youtube#commentThread', 'etag': '5KVenAu6Nn6RdnpKTpPj49KuYRY', 'id': 'UgyXleqDMoHFnid0OpV4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': '7earPWDJbhA',
'topLevelComment': {'kind': 'youtube#comment', 'etag': 'C3AxUnPxhDZuIYAKsjqeIZxmyQI',
'id': 'UgyXleqDMoHFnid0OpV4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': '7earPWDJbhA',
'textDisplay': 'Sehr schön', 'textOriginal': 'Sehr schön',
'authorDisplayName': 'Pranave4 Roblox',
'authorProfileImageUrl': 'https://yt3.ggpht.com/V_qXZAr4xsbi2GEFJ2t8NhwDYWGEeiBhFCgVYcgs1TwmaS1e6gCwktKZpdNPJszs3Zwu71ZZ2w=s48-c-k-c0x00ffffff-no-rj',
'authorChannelUrl': 'http://www.youtube.com/channel/UCKoDZxOJY6e90jeujtkC_4A',
'authorChannelId': {'value': 'UCKoDZxOJY6e90jeujtkC_4A'},
'canRate': True, 'viewerRating': 'none', 'likeCount': 2,
'publishedAt': '2021-08-27T16:06:59Z',
'updatedAt': '2021-08-27T16:06:59Z'}}, 'canReply': True,
'totalReplyCount': 0, 'isPublic': True}},
{'kind': 'youtube#commentThread', 'etag': 'mH33Uu3Bm3zkVGLZDiOaOg2idSM', 'id': 'UgxQRQaVxnzeFQRTPTp4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'AXMw3vtsswY',
'topLevelComment': {'kind': 'youtube#comment', 'etag': 'Sht8Gm_LShDQ9cKfIl1nH53FgsI',
'id': 'UgxQRQaVxnzeFQRTPTp4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'AXMw3vtsswY',
'textDisplay': 'wie kann mann feuerwehr mann bei SBB werden',
'textOriginal': 'wie kann mann feuerwehr mann bei SBB werden',
'authorDisplayName': 'Florian Ruhland',
'authorProfileImageUrl': 'https://yt3.ggpht.com/ytc/AKedOLQNfiz21ybCpfDmaXKefJtuy1UDHwFenhsL0R14Kg=s48-c-k-c0x00ffffff-no-rj',
'authorChannelUrl': 'http://www.youtube.com/channel/UCS7LfiWU_ebI-E3ny8Yb6PA',
'authorChannelId': {'value': 'UCS7LfiWU_ebI-E3ny8Yb6PA'},
'canRate': True, 'viewerRating': 'none', 'likeCount': 0,
'publishedAt': '2021-08-21T11:00:05Z',
'updatedAt': '2021-08-21T11:00:05Z'}}, 'canReply': True,
'totalReplyCount': 1, 'isPublic': True}},
{'kind': 'youtube#commentThread', 'etag': 'oM57z1ZCosWjFXPDl1VMIQIFpJ8', 'id': 'UgzzHV3cayZFI7MpziB4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'DmBo0MMxDb0',
'topLevelComment': {'kind': 'youtube#comment', 'etag': '-ecKB_iUT-BOVOeNfX7qoAr0poI',
'id': 'UgzzHV3cayZFI7MpziB4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'DmBo0MMxDb0',
'textDisplay': 'I am only 15, but i have a very very big passion for these trains, i can’t wait to drive around Switzerland and help people arrive at their destinations<br>I also learned about signals in Switzerland as short documentaries on how these trains work.<br>I hope nothing major will change in 5 years:) i really dreaming of becoming an engine driver',
'textOriginal': 'I am only 15, but i have a very very big passion for these trains, i can’t wait to drive around Switzerland and help people arrive at their destinations\nI also learned about signals in Switzerland as short documentaries on how these trains work.\nI hope nothing major will change in 5 years:) i really dreaming of becoming an engine driver',
'authorDisplayName': 'Fred Dev',
'authorProfileImageUrl': 'https://yt3.ggpht.com/JEaQIjszQdpIDgsrIKEtIX6KaeryO48U4IcbSl45oFIKrDNoCxwhmWh3fC6exW5X1pL15Hiw4w=s48-c-k-c0x00ffffff-no-rj',
'authorChannelUrl': 'http://www.youtube.com/channel/UCJKarhI8HsHHix0-HckXwVg',
'authorChannelId': {'value': 'UCJKarhI8HsHHix0-HckXwVg'},
'canRate': True, 'viewerRating': 'none', 'likeCount': 1,
'publishedAt': '2021-08-19T22:32:58Z',
'updatedAt': '2021-08-19T22:32:58Z'}}, 'canReply': True,
'totalReplyCount': 1, 'isPublic': True}},
{'kind': 'youtube#commentThread', 'etag': 'Xu5rUasdLD7ZFsRPWPrL2JUJCWg', 'id': 'UgwBkkcOhrjuzFjE6Y54AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'ES0AnIBNJfQ',
'topLevelComment': {'kind': 'youtube#comment', 'etag': '1ps-PTcq7S2TzbY7s4OuafI4-Fg',
'id': 'UgwBkkcOhrjuzFjE6Y54AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'ES0AnIBNJfQ',
'textDisplay': 'wie heisst der sprecher dieser werbung? so eine wunderbare stimme!<br>die musik ist auch toll, wie heisst das stĂŒck?',
'textOriginal': 'wie heisst der sprecher dieser werbung? so eine wunderbare stimme!\ndie musik ist auch toll, wie heisst das stĂŒck?',
'authorDisplayName': 'cloudwalker',
'authorProfileImageUrl': 'https://yt3.ggpht.com/ytc/AKedOLQxGBcardOjutARwZxXcfbUSH3f66gqTzq3EA=s48-c-k-c0x00ffffff-no-rj',
'authorChannelUrl': 'http://www.youtube.com/channel/UC3VmTS8W5GKZf0PeIb8l2Jw',
'authorChannelId': {'value': 'UC3VmTS8W5GKZf0PeIb8l2Jw'},
'canRate': True, 'viewerRating': 'none', 'likeCount': 1,
'publishedAt': '2021-08-18T00:50:32Z',
'updatedAt': '2021-08-18T00:50:32Z'}}, 'canReply': True,
'totalReplyCount': 2, 'isPublic': True}},
{'kind': 'youtube#commentThread', 'etag': '_hlBnClge81P8_RqsXR7q4_BIes', 'id': 'Ugzvldq2VB0lBIzoGVR4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'AXMw3vtsswY',
'topLevelComment': {'kind': 'youtube#comment', 'etag': 'QZFjHr5bIQC72OicksbfJ3Py-Hk',
'id': 'Ugzvldq2VB0lBIzoGVR4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'AXMw3vtsswY',
'textDisplay': 'Ihr seid spitze! Danke, dass es euch gibt 👏',
'textOriginal': 'Ihr seid spitze! Danke, dass es euch gibt 👏',
'authorDisplayName': 'Cris Tiano',
'authorProfileImageUrl': 'https://yt3.ggpht.com/ytc/AKedOLT_ZmzCfLD22VLmHv-zIOnNiBGZHoYBhgcsgQ=s48-c-k-c0x00ffffff-no-rj',
'authorChannelUrl': 'http://www.youtube.com/channel/UCU3xXx609PrAf6AwLjs5oSw',
'authorChannelId': {'value': 'UCU3xXx609PrAf6AwLjs5oSw'},
'canRate': True, 'viewerRating': 'none', 'likeCount': 0,
'publishedAt': '2021-08-16T15:53:30Z',
'updatedAt': '2021-08-16T15:53:30Z'}}, 'canReply': True,
'totalReplyCount': 0, 'isPublic': True}},
]}
comments = []
for item in data['items']:
entry = {}
snippet = item['snippet']['topLevelComment']['snippet']
for field in ['channelId', 'videoId']:
entry[field] = snippet[field]
for field in ['textOriginal', 'textDisplay','canRate','likeCount','updatedAt','viewerRating','publishedAt']:
entry[field] = snippet[field]
entry['canReply'] = item['snippet']['canReply']
entry['isPublic'] = item['snippet']['isPublic']
entry['totalReplyCount'] = item['snippet']['totalReplyCount']
comments.append(entry)
for idx,comment in enumerate(comments,1):
print(f'{idx}) {comment}')
output
1) {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'tUXWw6WvgkI', 'textOriginal': 'Tipp 1: ZusatzszĂŒge – machs wie Fredy. (Hinweis: dieses Video wurde vor der Corona-Pandemie erstellt)', 'textDisplay': 'Tipp 1: ZusatzszĂŒge – machs wie Fredy. (Hinweis: dieses Video wurde vor der Corona-Pandemie erstellt)', 'canRate': True, 'likeCount': 1, 'updatedAt': '2021-09-15T07:29:00Z', 'viewerRating': 'none', 'publishedAt': '2021-09-15T07:29:00Z', 'canReply': True, 'isPublic': True, 'totalReplyCount': 0}
2) {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'M98TRem03Lg', 'textOriginal': 'Ich sehe das Kulturland schon schmelzen und verschwinden...', 'textDisplay': 'Ich sehe das Kulturland schon schmelzen und verschwinden...', 'canRate': True, 'likeCount': 0, 'updatedAt': '2021-09-14T18:08:55Z', 'viewerRating': 'none', 'publishedAt': '2021-09-14T18:08:55Z', 'canReply': True, 'isPublic': True, 'totalReplyCount': 1}
3) {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'M98TRem03Lg', 'textOriginal': 'Guete initiativ! Mega fan vo dere projekt!', 'textDisplay': 'Guete initiativ! Mega fan vo dere projekt!', 'canRate': True, 'likeCount': 0, 'updatedAt': '2021-09-14T07:18:31Z', 'viewerRating': 'none', 'publishedAt': '2021-09-14T07:18:31Z', 'canReply': True, 'isPublic': True, 'totalReplyCount': 0}
4) {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'zYnbgDyWM9o', 'textOriginal': 'TrĂšs mauvaise voix off, Ă  un moment il y se reprend mĂȘme dans le texte 😐', 'textDisplay': 'TrĂšs mauvaise voix off, Ă  un moment il y se reprend mĂȘme dans le texte 😐', 'canRate': True, 'likeCount': 0, 'updatedAt': '2021-09-12T12:12:58Z', 'viewerRating': 'none', 'publishedAt': '2021-09-12T12:12:58Z', 'canReply': True, 'isPublic': True, 'totalReplyCount': 0}
5) {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': '4nU0MgKft6c', 'textOriginal': 'Shiey be like', 'textDisplay': 'Shiey be like', 'canRate': True, 'likeCount': 0, 'updatedAt': '2021-09-02T23:23:35Z', 'viewerRating': 'none', 'publishedAt': '2021-09-02T23:23:35Z', 'canReply': True, 'isPublic': True, 'totalReplyCount': 0}
6) {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'oxSLp_1WtcM', 'textOriginal': 'Vivement un Lyria en Belgique !!!!', 'textDisplay': 'Vivement un Lyria en Belgique !!!!', 'canRate': True, 'likeCount': 0, 'updatedAt': '2021-09-02T21:39:26Z', 'viewerRating': 'none', 'publishedAt': '2021-09-02T21:39:26Z', 'canReply': True, 'isPublic': True, 'totalReplyCount': 0}
7) {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': '8vCvSmAIv1s', 'textOriginal': 'Build a high speed railway line into the moon I dare you with 20 million francs', 'textDisplay': 'Build a high speed railway line into the moon I dare you with 20 million francs', 'canRate': True, 'likeCount': 0, 'updatedAt': '2021-09-02T08:36:45Z', 'viewerRating': 'none', 'publishedAt': '2021-09-02T08:36:45Z', 'canReply': True, 'isPublic': True, 'totalReplyCount': 0}
8) {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': '7earPWDJbhA', 'textOriginal': 'Sehr schön', 'textDisplay': 'Sehr schön', 'canRate': True, 'likeCount': 2, 'updatedAt': '2021-08-27T16:06:59Z', 'viewerRating': 'none', 'publishedAt': '2021-08-27T16:06:59Z', 'canReply': True, 'isPublic': True, 'totalReplyCount': 0}
9) {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'AXMw3vtsswY', 'textOriginal': 'wie kann mann feuerwehr mann bei SBB werden', 'textDisplay': 'wie kann mann feuerwehr mann bei SBB werden', 'canRate': True, 'likeCount': 0, 'updatedAt': '2021-08-21T11:00:05Z', 'viewerRating': 'none', 'publishedAt': '2021-08-21T11:00:05Z', 'canReply': True, 'isPublic': True, 'totalReplyCount': 1}
10) {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'DmBo0MMxDb0', 'textOriginal': 'I am only 15, but i have a very very big passion for these trains, i can’t wait to drive around Switzerland and help people arrive at their destinations\nI also learned about signals in Switzerland as short documentaries on how these trains work.\nI hope nothing major will change in 5 years:) i really dreaming of becoming an engine driver', 'textDisplay': 'I am only 15, but i have a very very big passion for these trains, i can’t wait to drive around Switzerland and help people arrive at their destinations<br>I also learned about signals in Switzerland as short documentaries on how these trains work.<br>I hope nothing major will change in 5 years:) i really dreaming of becoming an engine driver', 'canRate': True, 'likeCount': 1, 'updatedAt': '2021-08-19T22:32:58Z', 'viewerRating': 'none', 'publishedAt': '2021-08-19T22:32:58Z', 'canReply': True, 'isPublic': True, 'totalReplyCount': 1}
11) {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'ES0AnIBNJfQ', 'textOriginal': 'wie heisst der sprecher dieser werbung? so eine wunderbare stimme!\ndie musik ist auch toll, wie heisst das stĂŒck?', 'textDisplay': 'wie heisst der sprecher dieser werbung? so eine wunderbare stimme!<br>die musik ist auch toll, wie heisst das stĂŒck?', 'canRate': True, 'likeCount': 1, 'updatedAt': '2021-08-18T00:50:32Z', 'viewerRating': 'none', 'publishedAt': '2021-08-18T00:50:32Z', 'canReply': True, 'isPublic': True, 'totalReplyCount': 2}
12) {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'AXMw3vtsswY', 'textOriginal': 'Ihr seid spitze! Danke, dass es euch gibt 👏', 'textDisplay': 'Ihr seid spitze! Danke, dass es euch gibt 👏', 'canRate': True, 'likeCount': 0, 'updatedAt': '2021-08-16T15:53:30Z', 'viewerRating': 'none', 'publishedAt': '2021-08-16T15:53:30Z', 'canReply': True, 'isPublic': True, 'totalReplyCount': 0}

Parse/print out value from JSON content in Python

I am trying to print out the price of an item from this page/JSON:
https://shopee.sg/api/v2/item/get?itemid=2590867516&shopid=165420215
but I am encountering an error as below:
print([d.get('price_max_before_discount') for d in site_json['item'] if d.get('price_max_before_discount')])
AttributeError: 'str' object has no attribute 'get'
import json
from urllib import request
from bs4 import BeautifulSoup
url = 'https://shopee.sg/api/v2/item/get?itemid=2590867516&shopid=165420215'
html = request.urlopen(url).read()
soup = BeautifulSoup(html, 'html.parser')
site_json = json.loads(soup.text)
print([d.get('price_max_before_discount') for d in site_json['item'] if d.get('price_max_before_discount')])
not sure what I am doing wrong but I will appreciate any advice/solutions!
Thank you.
The API url is returning the json object and you can simply use:
import json
from urllib import request
url = 'https://shopee.sg/api/v2/item/get?itemid=2590867516&shopid=165420215'
data = request.urlopen(url).read()
data_json = json.loads(data)
print(data_json)
Output:
{'item': {'itemid': 2590867516, 'price_max_before_discount': 54900000, 'item_status': 'normal', 'can_use_wholesale': False, 'show_free_shipping': True, 'estimated_days': 2, 'is_hot_sales': None, 'is_slash_price_item': False, 'upcoming_flash_sale': None, 'slash_lowest_price': None, 'is_partial_fulfilled': False, 'condition': 1, 'show_original_guarantee': True, 'add_on_deal_info': None, 'is_non_cc_installment_payment_eligible': False, 'categories': [{'display_name': 'Mobile & Gadgets', 'catid': 8, 'image': None, 'no_sub': False, 'is_default_subcat': False, 'block_buyer_platform': None}, {'display_name': 'Mobile Phones & Tablets', 'catid': 10941, 'image': None, 'no_sub': False, 'is_default_subcat': False, 'block_buyer_platform': None}, {'display_name': 'Samsung', 'catid': 10944, 'image': None, 'no_sub': True, 'is_default_subcat': False, 'block_buyer_platform': None}], 'ctime': 1564663070, 'name': "(New Launch) Samsung Galaxy Tab S6 Lite 10.4' LTE Version with S Pen 64GB", 'show_shopee_verified_label': False, 'size_chart': None, 'is_pre_order': False, 'service_by_shopee_flag': None, 'historical_sold': 186, 'reference_item_id': '', 'recommendation_info': None, 'bundle_deal_info': None, 'price_max': 46500000, 'has_lowest_price_guarantee': False, 'shipping_icon_type': 0, 'images': ['68ce24874257ec3c593d0d118e9cf785'], 'price_before_discount': 54900000, 'cod_flag': 0, 'catid': 8, 'is_official_shop': False, 'coin_earn_label': None, 'hashtag_list': None, 'sold': 62, 'makeup': None, 'item_rating': {'rating_star': 4.976744, 'rating_count': [88, 0, 2, 0, 2, 84], 'rcount_with_image': 40, 'rcount_with_context': 58}, 'show_official_shop_label_in_title': False, 'discount': '15%', 'reason': None, 'label_ids': [1000012, 1000035, 1000088, 1001261], 'has_group_buy_stock': False, 'other_stock': 0, 'deep_discount': None, 'attributes': [{'is_pending_qc': False, 'idx': 0, 'value': 'Samsung', 'id': 10054, 'is_timestamp': False, 'name': 'Brand'}, {'is_pending_qc': False, 'idx': 1, 'value': 'Galaxy Tab S6', 'id': 10093, 'is_timestamp': False, 'name': 'Model'}, {'is_pending_qc': True, 'idx': 2, 'value': '64GB', 'id': 10091, 'is_timestamp': False, 'name': 'Built-in Storage'}, {'is_pending_qc': True, 'idx': 3, 'value': '', 'id': 10094, 'is_timestamp': False, 'name': 'RAM'}, {'is_pending_qc': True, 'idx': 4, 'value': '1 Month', 'id': 10095, 'is_timestamp': False, 'name': 'Warranty Period'}], 'badge_icon_type': 0, 'liked': False, 'cmt_count': 88, 'image': '68ce24874257ec3c593d0d118e9cf785', 'recommendation_algorithm': None, 'is_cc_installment_payment_eligible': True, 'shopid': 165420215, 'normal_stock': 8, 'video_info_list': [], 'installment_plans': [{'banks': [{'bank_name': 'UOB', 'sub_options': [{'disabled_reason': None, 'data': {'bank_name': 'UOB', 'down_payment': 0, 'name': '6x # 0%', 'interest_rate': 0, 'option_id': None, 'bank_id': 1641, 'installment_amount': 46500000, 'channel_id': 1000501, 'monthly_installment': 7750000, 'tenure': 6, 'total_amount': 46500000}, 'name': 'Airpay CC Installment [label_bank_uob #6x]', 'option_info': '1000501-19005014'}, {'disabled_reason': None, 'data': {'bank_name': 'UOB', 'down_payment': 0, 'name': '12x # 0%', 'interest_rate': 0, 'option_id': None, 'bank_id': 1641, 'installment_amount': 46500000, 'channel_id': 1000501, 'monthly_installment': 3875000, 'tenure': 12, 'total_amount': 46500000}, 'name': 'Airpay CC Installment [label_bank_uob #12x]', 'option_info': '1000501-19005015'}], 'bank_id': 1641, 'bank_logo': 'https://mall.shopee.sg/static/images/bank_logo/ic_bank_uob.png'}, {'bank_name': 'DBS/POSB', 'sub_options': [{'disabled_reason': None, 'data': {'bank_name': 'DBS/POSB', 'down_payment': 0, 'name': '6x # 0%', 'interest_rate': 0, 'option_id': None, 'bank_id': 4, 'installment_amount': 46500000, 'channel_id': 1000501, 'monthly_installment': 7750000, 'tenure': 6, 'total_amount': 46500000}, 'name': 'Airpay CC Installment [label_bank_dbs_posb #6x]', 'option_info': '1000501-19005011'}, {'disabled_reason': None, 'data': {'bank_name': 'DBS/POSB', 'down_payment': 0, 'name': '12x # 0%', 'interest_rate': 0, 'option_id': None, 'bank_id': 4, 'installment_amount': 46500000, 'channel_id': 1000501, 'monthly_installment': 3875000, 'tenure': 12, 'total_amount': 46500000}, 'name': 'Airpay CC Installment [label_bank_dbs_posb #12x]', 'option_info': '1000501-19005012'}], 'bank_id': 4, 'bank_logo': 'https://mall.shopee.sg/static/images/bank_logo/ic_bank_dbs.png'}, {'bank_name': 'AMERICAN EXPRESS', 'sub_options': [{'disabled_reason': None, 'data': {'bank_name': 'AMERICAN EXPRESS', 'down_payment': 0, 'name': '6x # 0%', 'interest_rate': 0, 'option_id': None, 'bank_id': 437, 'installment_amount': 46500000, 'channel_id': 1000501, 'monthly_installment': 7750000, 'tenure': 6, 'total_amount': 46500000}, 'name': 'Airpay CC Installment [AMERICAN EXPRESS #6x]', 'option_info': '1000501-19005020'}, {'disabled_reason': None, 'data': {'bank_name': 'AMERICAN EXPRESS', 'down_payment': 0, 'name': '12x # 0%', 'interest_rate': 0, 'option_id': None, 'bank_id': 437, 'installment_amount': 46500000, 'channel_id': 1000501, 'monthly_installment': 3875000, 'tenure': 12, 'total_amount': 46500000}, 'name': 'Airpay CC Installment [AMERICAN EXPRESS #12x]', 'option_info': '1000501-19005021'}], 'bank_id': 437, 'bank_logo': ''}, {'bank_name': 'OCBC', 'sub_options': [{'disabled_reason': None, 'data': {'bank_name': 'OCBC', 'down_payment': 0, 'name': '6x # 0%', 'interest_rate': 0, 'option_id': None, 'bank_id': 3430, 'installment_amount': 46500000, 'channel_id': 1000501, 'monthly_installment': 7750000, 'tenure': 6, 'total_amount': 46500000}, 'name': 'Airpay CC Installment [label_bank_ocbc #6x]', 'option_info': '1000501-19005017'}, {'disabled_reason': None, 'data': {'bank_name': 'OCBC', 'down_payment': 0, 'name': '12x # 0%', 'interest_rate': 0, 'option_id': None, 'bank_id': 3430, 'installment_amount': 46500000, 'channel_id': 1000501, 'monthly_installment': 3875000, 'tenure': 12, 'total_amount': 46500000}, 'name': 'Airpay CC Installment [label_bank_ocbc #12x]', 'option_info': '1000501-19005018'}], 'bank_id': 3430, 'bank_logo': 'https://mall.shopee.sg/static/images/bank_logo/ic_bank_ocbc.png'}], 'channel_name': 'label_cc_installment', 'is_cc': True, 'plans': None, 'channel_ic': 'ic_paymentoption_ccinstallment'}], 'view_count': 9765, 'voucher_info': None, 'current_promotion_has_reserve_stock': False, 'liked_count': 491, 'show_official_shop_label': False, 'price_min_before_discount': 54900000, 'show_discount': 15, 'preview_info': None, 'flag': 524290, 'exclusive_price_info': None, 'current_promotion_reserved_stock': 0, 'wholesale_tier_list': [], 'group_buy_info': None, 'shopee_verified': False, 'item_has_post': False, 'hidden_price_display': None, 'transparent_background_image': '68ce24874257ec3c593d0d118e9cf785', 'welcome_package_info': None, 'discount_stock': 8, 'coin_info': {'spend_cash_unit': 1000, 'coin_earn_items': []}, 'is_adult': False, 'currency': 'SGD', 'raw_discount': 15, 'is_preferred_plus_seller': False, 'is_category_failed': False, 'price_min': 46500000, 'can_use_bundle_deal': False, 'cb_option': 0, 'brand': '', 'stock': 8, 'status': 1, 'bundle_deal_id': 0, 'is_group_buy_item': None, 'description': "Brand New Set\r\n\r\nGalaxy Tab S6 Lite is your super portable note-taking, go-getting companion. It comes with a large 10.4 inch display on a slim and light build, One UI 2 on Android, and S Pen in-box and ready to go. Whether you're drawing, learning or gaming, this is the tablet made to be in the moment.\r\n\r\nGalaxy Tab S6 Lite is slim and lightweight thanks to its seamless, metal unibody. It slips easily into a small tote for true portability and minimalist style. \r\n\r\nKeeping up with lectures is a breeze with S Pen. When taking notes, you can jot first and change colors later. Quickly sort through memos when you save with searchable tags. Even write directly onto PDFs to cut the study clutter. When it's finally crunch time, you'll be organised and ready.\r\n\r\nS Pen is a bundle of writing instruments in one. Its natural grip, low latency and impressive pressure sensitivity will make it your go-to for everything from drawing to editing documents. And don't worry about misplacing the S Pen, thanks to the tablet's magnetic holder.\r\n\r\nDisplay : 10.4 inches\r\nExternal Memory Support Slot : Yes , MicroSD (Up to 1TB)\r\n64GB ROM + 4GB RAM\r\n8MP Rear Camera\r\n5MP Selfie Camera\r\n7040 mAh Big battery\r\n\r\nExport set with local seller warranty", 'flash_sale': None, 'models': [{'itemid': 2590867516, 'status': 1, 'current_promotion_reserved_stock': 0, 'name': 'Pink', 'promotionid': 58388, 'price': 46500000, 'price_stocks': [{'model_id': 5839808211, 'stockout_time': 1613452764, 'region': 'SG', 'rebate': 2000000, 'price': 46500000, 'promotion_type': 1, 'allocated_stock': 0, 'shop_id': 165420215, 'end_time': 1613663940, 'stock_breakdown_by_location': [], 'item_id': 2590867516, 'promotion_id': 58388, 'purchase_limit': 1, 'start_time': 1613452860, 'stock': 0}, {'model_id': 5839808211, 'stockout_time': 1612672967, 'region': 'SG', 'rebate': None, 'price': 54900000, 'promotion_type': 0, 'allocated_stock': None, 'shop_id': 165420215, 'end_time': None, 'stock_breakdown_by_location': [], 'item_id': 2590867516, 'promotion_id': 0, 'purchase_limit': None, 'start_time': None, 'stock': 0}], 'current_promotion_has_reserve_stock': False, 'currency': 'SGD', 'normal_stock': 0, 'extinfo': {'seller_promotion_limit': 1, 'has_shopee_promo': True, 'group_buy_info': None, 'holiday_mode_old_stock': None, 'tier_index': [0], 'seller_promotion_refresh_time': 1613663940}, 'price_before_discount': 54900000, 'modelid': 5839808211, 'sold': 12, 'stock': 0}, {'itemid': 2590867516, 'status': 1, 'current_promotion_reserved_stock': 0, 'name': 'Blue', 'promotionid': 58388, 'price': 46500000, 'price_stocks': [{'model_id': 51655477750, 'stockout_time': 1613452764, 'region': 'SG', 'rebate': 2000000, 'price': 46500000, 'promotion_type': 1, 'allocated_stock': 0, 'shop_id': 165420215, 'end_time': 1613663940, 'stock_breakdown_by_location': [], 'item_id': 2590867516, 'promotion_id': 58388, 'purchase_limit': 1, 'start_time': 1613452860, 'stock': 0}, {'model_id': 51655477750, 'stockout_time': 1612146775, 'region': 'SG', 'rebate': None, 'price': 54900000, 'promotion_type': 0, 'allocated_stock': None, 'shop_id': 165420215, 'end_time': None, 'stock_breakdown_by_location': [], 'item_id': 2590867516, 'promotion_id': 0, 'purchase_limit': None, 'start_time': None, 'stock': 0}], 'current_promotion_has_reserve_stock': False, 'currency': 'SGD', 'normal_stock': 0, 'extinfo': {'seller_promotion_limit': 1, 'has_shopee_promo': True, 'group_buy_info': None, 'holiday_mode_old_stock': None, 'tier_index': [2], 'seller_promotion_refresh_time': 1613663940}, 'price_before_discount': 54900000, 'modelid': 51655477750, 'sold': 65, 'stock': 0}, {'itemid': 2590867516, 'status': 1, 'current_promotion_reserved_stock': 0, 'name': 'Gray', 'promotionid': 58388, 'price': 46500000, 'price_stocks': [{'model_id': 70652051151, 'stockout_time': 1613452764, 'region': 'SG', 'rebate': 2000000, 'price': 46500000, 'promotion_type': 1, 'allocated_stock': 0, 'shop_id': 165420215, 'end_time': 1613663940, 'stock_breakdown_by_location': [], 'item_id': 2590867516, 'promotion_id': 58388, 'purchase_limit': 1, 'start_time': 1613452860, 'stock': 0}, {'model_id': 70652051151, 'stockout_time': 0, 'region': 'SG', 'rebate': None, 'price': 54900000, 'promotion_type': 0, 'allocated_stock': None, 'shop_id': 165420215, 'end_time': None, 'stock_breakdown_by_location': [], 'item_id': 2590867516, 'promotion_id': 0, 'purchase_limit': None, 'start_time': None, 'stock': 8}], 'current_promotion_has_reserve_stock': False, 'currency': 'SGD', 'normal_stock': 8, 'extinfo': {'seller_promotion_limit': 1, 'has_shopee_promo': True, 'group_buy_info': None, 'holiday_mode_old_stock': None, 'tier_index': [1], 'seller_promotion_refresh_time': 1613663940}, 'price_before_discount': 54900000, 'modelid': 70652051151, 'sold': 109, 'stock': 8}], 'has_low_fulfillment_rate': False, 'price': 46500000, 'shop_location': None, 'tier_variations': [{'images': ['dd6712ad6486c287be4f4c4ae82d5bb4', '228c0a8b72a457c99e420f980fbe6647', '028138dce1f06a573864ed58033267aa'], 'properties': [], 'type': 0, 'name': 'Colour', 'options': ['Pink', 'Gray', 'Blue']}], 'makeups': None, 'welcome_package_type': 0, 'show_official_shop_label_in_normal_position': None, 'item_type': 0}, 'version': '84fbe13733fdcb30d4c53c991d682692', 'data': None, 'error_msg': None, 'error': None}
You are iterating over dictionary item. Your iteration is making a dictionary get call on every element of item, that's is where the error is coming.
To get what you want, you don't have to iterate:
site_json['item'].get('price_max_before_discount')
The problem here is that at some point you are trying to access the value "price_max_before_discount" from a string, not a dictionary that includes that key. Thus, this error accurs AttributeError: 'str' object has no attribute 'get'.
This happens because you are iterating through the "item" dictionary. In each iteration you get a key. For example you get "itemid" (the key, not the value) and you try to get "price_max_before_discount" from it. But it's just a string!
The solution:
site_json['item']['price_max_before_discount']
Here you are getting the "item" json and from there getting the value of "price_max_before_discount"

Python Beautifulsoup retrieving json

I'm trying to retrieve the 'inStockQty' json key/value pair using beautifulsoup but am having trouble.
Here's my code so far:
import requests
from bs4 import BeautifulSoup
url = "https://direct.asda.com/george/men/shoes/black-leather-lace-up-oxford-shoes/GEM830406,default,pd.html?cgid=D2M1G10C13"
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0.1 Safari/602.2.14'
headers = {'User-Agent': user_agent,
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'}
page = requests.get(url, headers=headers)
soup = BeautifulSoup(page.content, "html5lib")
script = soup.select_one('script:contains("window.priceAvailabilityJSON")')
How do I then find 'inStockQty'? I thought about trying to parse all the JSON, but i don't know how to strip out all the HTML crap.
Many Thanks
Try this:
import json
import requests
from bs4 import BeautifulSoup
url = "https://direct.asda.com/george/men/shoes/black-leather-lace-up-oxford-shoes/GEM830406,default,pd.html?cgid=D2M1G10C13"
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0.1 Safari/602.2.14'
headers = {'User-Agent': user_agent,
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'}
page = requests.get(url, headers=headers)
soup = BeautifulSoup(page.content, "html5lib")
script = soup.find(id='main-content').find('script').string
data = script.split('window.priceAvailabilityJSON = ')[1].split(';\nlet product')[0]
json_data = json.loads(data)
# Output
for product in json_data['productAvailability'].values():
print(product['availability']['inStockQty'])
Try Selenium for that job
from selenium import webdriver
driver = webdriver.Chrome(executable_path=r'C:\Program Files\ChromeDriver\chromedriver.exe')
URL = 'https://direct.asda.com/george/men/shoes/black-leather-lace-up-oxford-shoes/GEM830406,default,pd.html?cgid=D2M1G10C13'
driver.get(URL)
driver.implicitly_wait(5) # wait until content is loaded
Call the variable and you can access itÂŽs content:
jsonData = driver.execute_script('return priceAvailabilityJSON')
print(jsonData.get('productAvailability'))
driver.close()
Output
{'G006386138': {'availability': {'backorderable': False, 'inStockQty': 6, 'instock': True, 'isBackorder': False, 'level': 'instock'}, 'badgesInformation': {'backorderInformation': {'backorderMessage': '', 'backorderableMessage': '', 'displayBackorderMessage': False}, 'displayLowStockBadge': False}, 'price': {'available': True, 'list': {'currency': 'EUR', 'decimalPrice': '27.0', 'formatted': '€ 27.00', 'value': 27}, 'vat': 16}}, 'G006386139': {'availability': {'backorderable': False, 'inStockQty': 2, 'instock': True, 'isBackorder': False, 'level': 'instock'}, 'badgesInformation': {'backorderInformation': {'backorderMessage': '', 'backorderableMessage': '', 'displayBackorderMessage': False}, 'displayLowStockBadge': False}, 'price': {'available': True, 'list': {'currency': 'EUR', 'decimalPrice': '27.0', 'formatted': '€ 27.00', 'value': 27}, 'vat': 16}}, 'G006386140': {'availability': {'backorderable': False, 'inStockQty': 9, 'instock': True, 'isBackorder': False, 'level': 'instock'}, 'badgesInformation': {'backorderInformation': {'backorderMessage': '', 'backorderableMessage': '', 'displayBackorderMessage': False}, 'displayLowStockBadge': False}, 'price': {'available': True, 'list': {'currency': 'EUR', 'decimalPrice': '27.0', 'formatted': '€ 27.00', 'value': 27}, 'vat': 16}}, 'G006386141': {'availability': {'backorderable': False, 'inStockQty': 5, 'instock': True, 'isBackorder': False, 'level': 'instock'}, 'badgesInformation': {'backorderInformation': {'backorderMessage': '', 'backorderableMessage': '', 'displayBackorderMessage': False}, 'displayLowStockBadge': False}, 'price': {'available': True, 'list': {'currency': 'EUR', 'decimalPrice': '27.0', 'formatted': '€ 27.00', 'value': 27}, 'vat': 16}}, 'G006386142': {'availability': {'backorderable': False, 'inStockQty': 2, 'instock': True, 'isBackorder': False, 'level': 'instock'}, 'badgesInformation': {'backorderInformation': {'backorderMessage': '', 'backorderableMessage': '', 'displayBackorderMessage': False}, 'displayLowStockBadge': False}, 'price': {'available': True, 'list': {'currency': 'EUR', 'decimalPrice': '27.0', 'formatted': '€ 27.00', 'value': 27}, 'vat': 16}}, 'G006386143': {'availability': {'backorderable': False, 'inStockQty': 28, 'instock': True, 'isBackorder': False, 'level': 'instock'}, 'badgesInformation': {'backorderInformation': {'backorderMessage': '', 'backorderableMessage': '', 'displayBackorderMessage': False}, 'displayLowStockBadge': False}, 'price': {'available': True, 'list': {'currency': 'EUR', 'decimalPrice': '27.0', 'formatted': '€ 27.00', 'value': 27}, 'vat': 16}}, 'G006386144': {'availability': {'backorderable': False, 'inStockQty': 7, 'instock': True, 'isBackorder': False, 'level': 'instock'}, 'badgesInformation': {'backorderInformation': {'backorderMessage': '', 'backorderableMessage': '', 'displayBackorderMessage': False}, 'displayLowStockBadge': False}, 'price': {'available': True, 'list': {'currency': 'EUR', 'decimalPrice': '27.0', 'formatted': '€ 27.00', 'value': 27}, 'vat': 16}}}

Access Nested Python Object returns None

I'am struggling to access the nested 'is_bot' attribute inside 'from' of the update object down below:
{'message': {'caption_entities': [],
'channel_chat_created': False,
'chat': {'first_name': 'Rodrigo Formighieri',
'id': 446924384,
'type': 'private',
'username': 'rodrigoformi'},
'date': 1516040454,
'delete_chat_photo': False,
'entities': [{'length': 7, 'offset': 0, 'type': 'bot_command'}],
'from': {'first_name': 'Rodrigo Formighieri',
'id': 446924384,
'is_bot': False,
'language_code': 'pt-BR',
'username': 'rodrigoformi'},
'group_chat_created': False,
'message_id': 145,
'new_chat_member': None,
'new_chat_members': [],
'new_chat_photo': [],
'photo': [],
'supergroup_chat_created': False,
'text': '/invite'},
'update_id': 241263441}
What I have Tried:
update.get('message',{}).get('from',{}).is_bot
This up returns AttributeError: 'Message' object has no attribute 'get'
update["message"]["from"]["is_bot"]
This last returns TypeError: 'NoneType' object is not subscriptable
Don't know what to do anymore. Any help?
Try this:
d = {'message': {'caption_entities': [],
'channel_chat_created': False,
'chat': {'first_name': 'Rodrigo Formighieri',
'id': 446924384,
'type': 'private',
'username': 'rodrigoformi'},
'date': 1516040454,
'delete_chat_photo': False,
'entities': [{'length': 7, 'offset': 0, 'type': 'bot_command'}],
'from': {'first_name': 'Rodrigo Formighieri',
'id': 446924384,
'is_bot': False,
'language_code': 'pt-BR',
'username': 'rodrigoformi'},
'group_chat_created': False,
'message_id': 145,
'new_chat_member': None,
'new_chat_members': [],
'new_chat_photo': [],
'photo': [],
'supergroup_chat_created': False,
'text': '/invite'},
'update_id': 241263441}
is_bot = d.get('message').get('from').get('is_bot')
print (is_bot) # False
When I or my students struggle with getting to a deeply nested value, I often step through it to ensure that I am getting, seeing what I expect. As several commenters noted, your second version works fine, as is, so there must be some nuance in the original code that we don't see. Here is a step down to the key you care about:
In [45]: update = {'message': {'caption_entities': [],
...: 'channel_chat_created': False,
...: 'chat': {'first_name': 'Rodrigo Formighieri',
...: 'id': 446924384,
...: 'type': 'private',
...: 'username': 'rodrigoformi'},
...: 'date': 1516040454,
...: 'delete_chat_photo': False,
...: 'entities': [{'length': 7, 'offset': 0, 'type': 'bot_command'}],
...: 'from': {'first_name': 'Rodrigo Formighieri',
...: 'id': 446924384,
...: 'is_bot': False,
...: 'language_code': 'pt-BR',
...: 'username': 'rodrigoformi'},
...: 'group_chat_created': False,
...: 'message_id': 145,
...: 'new_chat_member': None,
...: 'new_chat_members': [],
...: 'new_chat_photo': [],
...: 'photo': [],
...: 'supergroup_chat_created': False,
...: 'text': '/invite'},
...: 'update_id': 241263441}
...:
In [46]: update['message']
Out[46]:
{'caption_entities': [],
'channel_chat_created': False,
'chat': {'first_name': 'Rodrigo Formighieri',
'id': 446924384,
'type': 'private',
'username': 'rodrigoformi'},
'date': 1516040454,
'delete_chat_photo': False,
'entities': [{'length': 7, 'offset': 0, 'type': 'bot_command'}],
'from': {'first_name': 'Rodrigo Formighieri',
'id': 446924384,
'is_bot': False,
'language_code': 'pt-BR',
'username': 'rodrigoformi'},
'group_chat_created': False,
'message_id': 145,
'new_chat_member': None,
'new_chat_members': [],
'new_chat_photo': [],
'photo': [],
'supergroup_chat_created': False,
'text': '/invite'}
In [47]: update['message']['from']
Out[47]:
{'first_name': 'Rodrigo Formighieri',
'id': 446924384,
'is_bot': False,
'language_code': 'pt-BR',
'username': 'rodrigoformi'}
In [48]: update['message']['from']['is_bot']
Out[48]: False
SOLVED:
update = ast.literal_eval(str(update))
update["message"]["from"]["is_bot"] #false
Had to transform cast string and then ast.literal_eval the got damn update object.

Categories

Resources