Related
I am trying to get information on retweeters for a specific tweet using Tweepy and fetch the in_reply_to_status_id from the returned Tweepy response.
Here is the code
retweets_list = api.get_retweets(id=tweetid)
for retweet in retweets_list:
retweet_json = json.dumps(retweet._json, indent=2)
retweet_json = json.loads(retweet_json)
print(retweet_json)
The code about above produces the data response below
{'created_at': 'Sat Jun 18 06:38:49 +0000 2022', 'id': 1538048568782688256, 'id_str': '1538048568782688256', 'text': 'RT #gyfboxAI: #isle_mcelroy Some mentioned items in thread \n\n#AllisonPDavis The Governesses => httpsurl The ObâŠ', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [{'screen_name': 'gyfboxAI', 'name': 'Gyfbox', 'id': 1521109812032978946, 'id_str': '1521109812032978946', 'indices': [3, 12]}, {'screen_name': 'isle_mcelroy', 'name': 'Isle McElroy', 'id': 868462820, 'id_str': '868462820', 'indices': [14, 27]}, {'screen_name': 'AllisonPDavis', 'name': 'Allison P Davis', 'id': 15088579, 'id_str': '15088579', 'indices': [61, 75]}, {'screen_name': 'kvargs93', 'name': 'Katherine Varga', 'id': 885284552897429504, 'id_str': '885284552897429504', 'indices': [125, 134]}], 'urls': [{'url': 'httpsurl', 'expanded_url': 'httpsurlamzn.to/3MUM0mI', 'display_url': 'amzn.to/3MUM0mI', 'indices': [100, 123]}]}, 'source': 'Twitter for iPhone', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 1003173584, 'id_str': '1003173584', 'name': 'Elaine Showalter', 'screen_name': 'ecshowalter', 'location': 'Washington, D.C./London', 'description': 'Professor Emerita Princeton U; Anglophile, feminist, theatre fanatic, âThe Civil Wars of Julia Ward Howe.â watercolor by Vanessa Bell, âThe Queenâs Tea Partyâ', 'url': None, 'entities': {'description': {'urls': []}}, 'protected': False, 'followers_count': 8142, 'friends_count': 1049, 'listed_count': 104, 'created_at': 'Tue Dec 11 03:08:17 +0000 2012', 'favourites_count': 24912, 'utc_offset': None, 'time_zone': None, 'geo_enabled': True, 'verified': False, 'statuses_count': 26489, 'lang': None, 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': 'C0DEED', 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png', 'profile_background_image_url_https': 'httpsurlabs.twimg.com/images/themes/theme1/bg.png', 'profile_background_tile': True, 'profile_image_url': 'http://pbs.twimg.com/profile_images/968862619699425281/CKzdSRf6_normal.jpg', 'profile_image_url_https': 'httpsurlpbs.twimg.com/profile_images/968862619699425281/CKzdSRf6_normal.jpg', 'profile_banner_url': 'httpsurlpbs.twimg.com/profile_banners/1003173584/1569562029', 'profile_link_color': '0084B4', 'profile_sidebar_border_color': 'FFFFFF', 'profile_sidebar_fill_color': 'DDEEF6', 'profile_text_color': '333333', 'profile_use_background_image': True, 'has_extended_profile': False, 'default_profile': False, 'default_profile_image': False, 'following': False, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none', 'withheld_in_countries': []}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'retweeted_status': {'created_at': 'Fri Jun 17 17:55:18 +0000 2022', 'id': 1537856423740198913, 'id_str': '1537856423740198913', 'text': '#isle_mcelroy Some mentioned items in thread \n\n#AllisonPDavis The Governesses => httpsurl⊠httpsurl', 'truncated': True, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [{'screen_name': 'isle_mcelroy', 'name': 'Isle McElroy', 'id': 868462820, 'id_str': '868462820', 'indices': [0, 13]}, {'screen_name': 'AllisonPDavis', 'name': 'Allison P Davis', 'id': 15088579, 'id_str': '15088579', 'indices': [47, 61]}], 'urls': [{'url': 'httpsurl', 'expanded_url': 'httpsurlamzn.to/3MUM0mI', 'display_url': 'amzn.to/3MUM0mI', 'indices': [86, 109]}, {'url': 'httpsurlâ, 'expanded_url': 'httpsurltwitter.com/i/web/status/1537856423740198913', 'display_url': 'twitter.com/i/web/status/1âŠ', 'indices': [111, 134]}]}, 'source': 'gyfbox', 'in_reply_to_status_id': 1537835837542604801, 'in_reply_to_status_id_str': '1537835837542604801', 'in_reply_to_user_id': 868462820, 'in_reply_to_user_id_str': '868462820', 'in_reply_to_screen_name': 'isle_mcelroy', 'user': {'id': 1521109812032978946, 'id_str': '1521109812032978946', 'name': 'Gyfbox', 'screen_name': 'gyfboxAI', 'location': '', 'description': 'Tag "#GyfboxAI find item" \n\n#GyfboxAI will reply with link for items mentioned in the thread\n\nCOMING SOON !', 'url': 'httpsurlt.co/u7fGrxh24Y', 'entities': {'url': {'urls': [{'url': 'httpsurlt.co/u7fGrxh24Y', 'expanded_url': 'httpsurlwww.gyfbox.com', 'display_url': 'gyfbox.com', 'indices': [0, 23]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 1, 'friends_count': 6, 'listed_count': 0, 'created_at': 'Mon May 02 12:50:32 +0000 2022', 'favourites_count': 1, 'utc_offset': None, 'time_zone': None, 'geo_enabled': False, 'verified': False, 'statuses_count': 49, 'lang': None, 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': 'F5F8FA', 'profile_background_image_url': None, 'profile_background_image_url_https': None, 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/1521109885827661824/iTrlR67U_normal.png', 'profile_image_url_https': 'httpsurlpbs.twimg.com/profile_images/1521109885827661824/iTrlR67U_normal.png', 'profile_link_color': '1DA1F2', 'profile_sidebar_border_color': 'C0DEED', 'profile_sidebar_fill_color': 'DDEEF6', 'profile_text_color': '333333', 'profile_use_background_image': True, 'has_extended_profile': True, 'default_profile': True, 'default_profile_image': False, 'following': False, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none', 'withheld_in_countries': []}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 1, 'favorite_count': 0, 'favorited': False, 'retweeted': False, 'possibly_sensitive': False, 'lang': 'en'}, 'is_quote_status': False, 'retweet_count': 1, 'favorite_count': 0, 'favorited': False, 'retweeted': False, 'possibly_sensitive': False, 'lang': 'en'}
Multiple attempts to extract the in_reply_to_status_id always return "None"
Sample attempt that returned none
retweet_json['in_reply_to_status_id']
retweet.in_reply_to_status_id
The data return above shows
'in_reply_to_status_id': 1537835837542604801,
so I should be getting 1537835837542604801 for in_reply_to_status_id
what am i doing wrong and how can I obtain the in_reply_to_status_id ?
According to your Json structure,
the in_reply_status_id is None, the id is in retweeted_status so based on the Json structure
retweet_json['retweeted_status']['in_reply_to_status_id']
should give
1537835837542604801
I am interested in the comments (=text) made on certain YouTube Channels. I have scraped data with the Google YouTube Data API. The data comes in a complex structure and format (see picture below) that I am trying to disentangle for a research project.
The comments are stored in the fields Text Display and Text Original that belong to the dictionary Snippet, which in turn is part of the dictionary Top Level Comments. Top Level Comments is part of a string list that in turn is part of the dictionary items.
I think I need to subset the dictionary Top Level Comment as all the comments and related information (see picture below) I need are stored in nested dictionaries there. I don't think I can access the dictionary Top Level Comment as it is part of the list Snippet. So I first tried to subset the list Snippet. This is where I am stuck.
Here my code so far:
from googleapiclient.discovery import build
api_key = '_______________________________'
youtube = build('youtube', 'v3', developerKey = api_key)
#find channel ID https://commentpicker.com/youtube-channel-id.php
request = youtube.commentThreads().list(
part = 'snippet',
allThreadsRelatedToChannelId = 'UC_zxivooFdvF4uuBosUnJxQ'
)
response3 = request.execute()
##Code to explore data structure and format is excluded
#subset dictionary according to keys we want
includedKeys = ['items']
dataDic = {k:v for k, v in response3.items() if k in includedKeys}
In below code I unsuccessfully tried to subset the list Snipet in different ways or convert it.
dataDic2 = {x['snippet'] for x in dataDic} #Link no 1
#TypeError: string indices must be integers
dataDic2 = {x['snippet'] for x in dataDic} #Link no 1
#TypeError: string indices must be integers
dataDic2 = [{'snippet': d['snippet']} for d in dataDic] #Link no 2
#TypeError: string indices must be integers
dataDic2 = [topLevelComment['snippet'] for topLevelComment in dataDic['topLevelComment']['snippet']] #Link no 3
#KeyError: 'topLevelComment'import ast
result = ast.literal_eval('[snippet]')
assert type(result) is list #Link no 4 and 5
#ValueError: malformed node or string: <_ast.Name object at 0x0000010F6D7B9A08>
Link no 1
Link no 2
Link no 3
Link no 4
Link no 5
This link says that ast.literal.eval does not work with lists and dictionaries?
So finally - how to retrieve the data?
I need all fields circled in red in the picture showing the data structure.
EDIT: sample data
see below
data = {'kind': 'youtube#commentThreadListResponse', 'etag': '_yOZ67ear9btS5RarXfH3Xir6A8',
'nextPageToken': 'QURTSl9pME5DS2FQZm5yRzZ5b0ZGZUJGeENkMGh2UWxzVjNueEdUVmtmbVVqYksxSmN4QnpBdDFFWkpCREl6REZVQmlHZS1makpfZXFkQzFNbEpwbDFpb0dNWm95Z2E1TE03NE5GWEg0ajE5UWt0bnlpYS1PczlFVWZ1a1hqbTJLREVRempJaVpaRTYtcnpFeUM2ZU5Va1hUSHR5cVJFTEJ2akdtOHFkTWhGdmdmWUZsMUMwUHg0eTZNVzFBZVdsd1A0YXBqaWhnNGVNMXc=',
'pageInfo': {'totalResults': 14, 'resultsPerPage': 20}, 'items': [
{'kind': 'youtube#commentThread', 'etag': 'knxvgtYnhlPIpkevoCXSTZamb40', 'id': 'Ugwmdd9KdDm4Hm7MxlJ4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'tUXWw6WvgkI',
'topLevelComment': {'kind': 'youtube#comment', 'etag': '4m76jMeR8qFmfrk42kfKeA5Iv_Y',
'id': 'Ugwmdd9KdDm4Hm7MxlJ4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'tUXWw6WvgkI',
'textDisplay': 'Tipp 1: ZusatzszĂŒge â machs wie Fredy. (Hinweis: dieses Video wurde vor der Corona-Pandemie erstellt)',
'textOriginal': 'Tipp 1: ZusatzszĂŒge â machs wie Fredy. (Hinweis: dieses Video wurde vor der Corona-Pandemie erstellt)',
'authorDisplayName': 'Zuschauerquaeler',
'authorProfileImageUrl': 'https://yt3.ggpht.com/ytc/AKedOLQCWIoN-3MmDfxflS5ipDVvatDw8TpbD43mn2kb=s48-c-k-c0x00ffffff-no-rj',
'authorChannelUrl': 'http://www.youtube.com/channel/UCECxysNsTQLhrelU2KikMjQ',
'authorChannelId': {'value': 'UCECxysNsTQLhrelU2KikMjQ'},
'canRate': True, 'viewerRating': 'none', 'likeCount': 1,
'publishedAt': '2021-09-15T07:29:00Z',
'updatedAt': '2021-09-15T07:29:00Z'}}, 'canReply': True,
'totalReplyCount': 0, 'isPublic': True}},
{'kind': 'youtube#commentThread', 'etag': 'tq7mSQltdzKz0sthUiAIPYrQgJg', 'id': 'Ugy2jzL0838zj9HyHu94AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'M98TRem03Lg',
'topLevelComment': {'kind': 'youtube#comment', 'etag': '8BDnS6DXuaN8VdFzHsj7dc1YPZc',
'id': 'Ugy2jzL0838zj9HyHu94AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'M98TRem03Lg',
'textDisplay': 'Ich sehe das Kulturland schon schmelzen und verschwinden...',
'textOriginal': 'Ich sehe das Kulturland schon schmelzen und verschwinden...',
'authorDisplayName': 'Janik Von NiederhÀusern',
'authorProfileImageUrl': 'https://yt3.ggpht.com/ytc/AKedOLSk69KdiWMSYw0sYQSBdjEHagXJTD9tWlHdsw=s48-c-k-c0x00ffffff-no-rj',
'authorChannelUrl': 'http://www.youtube.com/channel/UCt87CYDxeIbDRRJLVT0VrdQ',
'authorChannelId': {'value': 'UCt87CYDxeIbDRRJLVT0VrdQ'},
'canRate': True, 'viewerRating': 'none', 'likeCount': 0,
'publishedAt': '2021-09-14T18:08:55Z',
'updatedAt': '2021-09-14T18:08:55Z'}}, 'canReply': True,
'totalReplyCount': 1, 'isPublic': True}},
{'kind': 'youtube#commentThread', 'etag': 'h_gpfnmUju60NWNxlFEwxjkIPQU', 'id': 'Ugx5GfaJTwt5cnuQ3Bh4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'M98TRem03Lg',
'topLevelComment': {'kind': 'youtube#comment', 'etag': 'fMmN1zDH7PVIWbw3L0n5Mt0dtqk',
'id': 'Ugx5GfaJTwt5cnuQ3Bh4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'M98TRem03Lg',
'textDisplay': 'Guete initiativ! Mega fan vo dere projekt!',
'textOriginal': 'Guete initiativ! Mega fan vo dere projekt!',
'authorDisplayName': 'Nionity',
'authorProfileImageUrl': 'https://yt3.ggpht.com/ytc/AKedOLTM-Tj3pWLuyhuH7ivlUwxs4YtQn6gez-BMCLdLzQ=s48-c-k-c0x00ffffff-no-rj',
'authorChannelUrl': 'http://www.youtube.com/channel/UCbUj9ZwI0YOkElVEfpAnBVQ',
'authorChannelId': {'value': 'UCbUj9ZwI0YOkElVEfpAnBVQ'},
'canRate': True, 'viewerRating': 'none', 'likeCount': 0,
'publishedAt': '2021-09-14T07:18:31Z',
'updatedAt': '2021-09-14T07:18:31Z'}}, 'canReply': True,
'totalReplyCount': 0, 'isPublic': True}},
{'kind': 'youtube#commentThread', 'etag': 'LOajqt43iY4A2N4V0yiLBRZwaig', 'id': 'Ugxez_tcF7ts7VaAL7t4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'zYnbgDyWM9o',
'topLevelComment': {'kind': 'youtube#comment', 'etag': 'DvNHOkNftBCLBqV1Ajam8mzMFYg',
'id': 'Ugxez_tcF7ts7VaAL7t4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'zYnbgDyWM9o',
'textDisplay': 'TrĂšs mauvaise voix off, Ă un moment il y se reprend mĂȘme dans le texte đ',
'textOriginal': 'TrĂšs mauvaise voix off, Ă un moment il y se reprend mĂȘme dans le texte đ',
'authorDisplayName': 'Patrick__EPfan',
'authorProfileImageUrl': 'https://yt3.ggpht.com/ytc/AKedOLTOmUsxVCimwNSQBVPxNUXfFbUNuYnN7VzVEeBUJA=s48-c-k-c0x00ffffff-no-rj',
'authorChannelUrl': 'http://www.youtube.com/channel/UC8DxMAk8T9Gv8RW0f2n0Q2w',
'authorChannelId': {'value': 'UC8DxMAk8T9Gv8RW0f2n0Q2w'},
'canRate': True, 'viewerRating': 'none', 'likeCount': 0,
'publishedAt': '2021-09-12T12:12:58Z',
'updatedAt': '2021-09-12T12:12:58Z'}}, 'canReply': True,
'totalReplyCount': 0, 'isPublic': True}},
{'kind': 'youtube#commentThread', 'etag': 'MGsQS-TUcYHnuyjyN932wpVIM_A', 'id': 'UgxYTxqSwAsyGyOHzU94AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': '4nU0MgKft6c',
'topLevelComment': {'kind': 'youtube#comment', 'etag': 'iRkZfQGVCGFZ13s8D3xrVZQw83A',
'id': 'UgxYTxqSwAsyGyOHzU94AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': '4nU0MgKft6c',
'textDisplay': 'Shiey be like', 'textOriginal': 'Shiey be like',
'authorDisplayName': 'Canopener Guy',
'authorProfileImageUrl': 'https://yt3.ggpht.com/2XG9uyYmOfkeubUNFQR0cgj7xCimKLsg6_r-3E1PTPVLixXjcxeFosF1HoytvHibGJrxQXal=s48-c-k-c0x00ffffff-no-rj',
'authorChannelUrl': 'http://www.youtube.com/channel/UCk8pieRaYyzsnU32Gp85DvA',
'authorChannelId': {'value': 'UCk8pieRaYyzsnU32Gp85DvA'},
'canRate': True, 'viewerRating': 'none', 'likeCount': 0,
'publishedAt': '2021-09-02T23:23:35Z',
'updatedAt': '2021-09-02T23:23:35Z'}}, 'canReply': True,
'totalReplyCount': 0, 'isPublic': True}},
{'kind': 'youtube#commentThread', 'etag': 'bcPCCsMbvquhAKLiEqIR4a20HnA', 'id': 'Ugw8FWvl7Hbf1RvJWhV4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'oxSLp_1WtcM',
'topLevelComment': {'kind': 'youtube#comment', 'etag': 'rTl4oSjvH14OF4xQ1mnM_amfZag',
'id': 'Ugw8FWvl7Hbf1RvJWhV4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'oxSLp_1WtcM',
'textDisplay': 'Vivement un Lyria en Belgique !!!!',
'textOriginal': 'Vivement un Lyria en Belgique !!!!',
'authorDisplayName': 'Kayuchi Fujimoto',
'authorProfileImageUrl': 'https://yt3.ggpht.com/ytc/AKedOLQ9YSDYj2tQFvjKjt9F_CH9AR2dcWrr84jA70am=s48-c-k-c0x00ffffff-no-rj',
'authorChannelUrl': 'http://www.youtube.com/channel/UCe5ctUAG-Z7cU_hpc-CbauQ',
'authorChannelId': {'value': 'UCe5ctUAG-Z7cU_hpc-CbauQ'},
'canRate': True, 'viewerRating': 'none', 'likeCount': 0,
'publishedAt': '2021-09-02T21:39:26Z',
'updatedAt': '2021-09-02T21:39:26Z'}}, 'canReply': True,
'totalReplyCount': 0, 'isPublic': True}},
{'kind': 'youtube#commentThread', 'etag': 'qbrUI9Z2YkM3LtYOqFogVRwcZWE', 'id': 'UgwomjMWUx5CHjlU_ox4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': '8vCvSmAIv1s',
'topLevelComment': {'kind': 'youtube#comment', 'etag': 'gYjvyBgNsZUB_FYUDK20LCVU-Qk',
'id': 'UgwomjMWUx5CHjlU_ox4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': '8vCvSmAIv1s',
'textDisplay': 'Build a high speed railway line into the moon I dare you with 20 million francs',
'textOriginal': 'Build a high speed railway line into the moon I dare you with 20 million francs',
'authorDisplayName': 'Simulated Trainspotter',
'authorProfileImageUrl': 'https://yt3.ggpht.com/3P-cR_3ORURRZH5RYImCeFv0yeC64SHtpS3otsCiGn4AuBXG-tQVrqnG32vJm4bfwxRt3MwCDzw=s48-c-k-c0x00ffffff-no-rj',
'authorChannelUrl': 'http://www.youtube.com/channel/UCF4ganYY8qP9q8YwXpDn2tQ',
'authorChannelId': {'value': 'UCF4ganYY8qP9q8YwXpDn2tQ'},
'canRate': True, 'viewerRating': 'none', 'likeCount': 0,
'publishedAt': '2021-09-02T08:36:45Z',
'updatedAt': '2021-09-02T08:36:45Z'}}, 'canReply': True,
'totalReplyCount': 0, 'isPublic': True}},
{'kind': 'youtube#commentThread', 'etag': '5KVenAu6Nn6RdnpKTpPj49KuYRY', 'id': 'UgyXleqDMoHFnid0OpV4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': '7earPWDJbhA',
'topLevelComment': {'kind': 'youtube#comment', 'etag': 'C3AxUnPxhDZuIYAKsjqeIZxmyQI',
'id': 'UgyXleqDMoHFnid0OpV4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': '7earPWDJbhA',
'textDisplay': 'Sehr schön', 'textOriginal': 'Sehr schön',
'authorDisplayName': 'Pranave4 Roblox',
'authorProfileImageUrl': 'https://yt3.ggpht.com/V_qXZAr4xsbi2GEFJ2t8NhwDYWGEeiBhFCgVYcgs1TwmaS1e6gCwktKZpdNPJszs3Zwu71ZZ2w=s48-c-k-c0x00ffffff-no-rj',
'authorChannelUrl': 'http://www.youtube.com/channel/UCKoDZxOJY6e90jeujtkC_4A',
'authorChannelId': {'value': 'UCKoDZxOJY6e90jeujtkC_4A'},
'canRate': True, 'viewerRating': 'none', 'likeCount': 2,
'publishedAt': '2021-08-27T16:06:59Z',
'updatedAt': '2021-08-27T16:06:59Z'}}, 'canReply': True,
'totalReplyCount': 0, 'isPublic': True}},
{'kind': 'youtube#commentThread', 'etag': 'mH33Uu3Bm3zkVGLZDiOaOg2idSM', 'id': 'UgxQRQaVxnzeFQRTPTp4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'AXMw3vtsswY',
'topLevelComment': {'kind': 'youtube#comment', 'etag': 'Sht8Gm_LShDQ9cKfIl1nH53FgsI',
'id': 'UgxQRQaVxnzeFQRTPTp4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'AXMw3vtsswY',
'textDisplay': 'wie kann mann feuerwehr mann bei SBB werden',
'textOriginal': 'wie kann mann feuerwehr mann bei SBB werden',
'authorDisplayName': 'Florian Ruhland',
'authorProfileImageUrl': 'https://yt3.ggpht.com/ytc/AKedOLQNfiz21ybCpfDmaXKefJtuy1UDHwFenhsL0R14Kg=s48-c-k-c0x00ffffff-no-rj',
'authorChannelUrl': 'http://www.youtube.com/channel/UCS7LfiWU_ebI-E3ny8Yb6PA',
'authorChannelId': {'value': 'UCS7LfiWU_ebI-E3ny8Yb6PA'},
'canRate': True, 'viewerRating': 'none', 'likeCount': 0,
'publishedAt': '2021-08-21T11:00:05Z',
'updatedAt': '2021-08-21T11:00:05Z'}}, 'canReply': True,
'totalReplyCount': 1, 'isPublic': True}},
{'kind': 'youtube#commentThread', 'etag': 'oM57z1ZCosWjFXPDl1VMIQIFpJ8', 'id': 'UgzzHV3cayZFI7MpziB4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'DmBo0MMxDb0',
'topLevelComment': {'kind': 'youtube#comment', 'etag': '-ecKB_iUT-BOVOeNfX7qoAr0poI',
'id': 'UgzzHV3cayZFI7MpziB4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'DmBo0MMxDb0',
'textDisplay': 'I am only 15, but i have a very very big passion for these trains, i canât wait to drive around Switzerland and help people arrive at their destinations<br>I also learned about signals in Switzerland as short documentaries on how these trains work.<br>I hope nothing major will change in 5 years:) i really dreaming of becoming an engine driver',
'textOriginal': 'I am only 15, but i have a very very big passion for these trains, i canât wait to drive around Switzerland and help people arrive at their destinations\nI also learned about signals in Switzerland as short documentaries on how these trains work.\nI hope nothing major will change in 5 years:) i really dreaming of becoming an engine driver',
'authorDisplayName': 'Fred Dev',
'authorProfileImageUrl': 'https://yt3.ggpht.com/JEaQIjszQdpIDgsrIKEtIX6KaeryO48U4IcbSl45oFIKrDNoCxwhmWh3fC6exW5X1pL15Hiw4w=s48-c-k-c0x00ffffff-no-rj',
'authorChannelUrl': 'http://www.youtube.com/channel/UCJKarhI8HsHHix0-HckXwVg',
'authorChannelId': {'value': 'UCJKarhI8HsHHix0-HckXwVg'},
'canRate': True, 'viewerRating': 'none', 'likeCount': 1,
'publishedAt': '2021-08-19T22:32:58Z',
'updatedAt': '2021-08-19T22:32:58Z'}}, 'canReply': True,
'totalReplyCount': 1, 'isPublic': True}},
{'kind': 'youtube#commentThread', 'etag': 'Xu5rUasdLD7ZFsRPWPrL2JUJCWg', 'id': 'UgwBkkcOhrjuzFjE6Y54AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'ES0AnIBNJfQ',
'topLevelComment': {'kind': 'youtube#comment', 'etag': '1ps-PTcq7S2TzbY7s4OuafI4-Fg',
'id': 'UgwBkkcOhrjuzFjE6Y54AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'ES0AnIBNJfQ',
'textDisplay': 'wie heisst der sprecher dieser werbung? so eine wunderbare stimme!<br>die musik ist auch toll, wie heisst das stĂŒck?',
'textOriginal': 'wie heisst der sprecher dieser werbung? so eine wunderbare stimme!\ndie musik ist auch toll, wie heisst das stĂŒck?',
'authorDisplayName': 'cloudwalker',
'authorProfileImageUrl': 'https://yt3.ggpht.com/ytc/AKedOLQxGBcardOjutARwZxXcfbUSH3f66gqTzq3EA=s48-c-k-c0x00ffffff-no-rj',
'authorChannelUrl': 'http://www.youtube.com/channel/UC3VmTS8W5GKZf0PeIb8l2Jw',
'authorChannelId': {'value': 'UC3VmTS8W5GKZf0PeIb8l2Jw'},
'canRate': True, 'viewerRating': 'none', 'likeCount': 1,
'publishedAt': '2021-08-18T00:50:32Z',
'updatedAt': '2021-08-18T00:50:32Z'}}, 'canReply': True,
'totalReplyCount': 2, 'isPublic': True}},
{'kind': 'youtube#commentThread', 'etag': '_hlBnClge81P8_RqsXR7q4_BIes', 'id': 'Ugzvldq2VB0lBIzoGVR4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'AXMw3vtsswY',
'topLevelComment': {'kind': 'youtube#comment', 'etag': 'QZFjHr5bIQC72OicksbfJ3Py-Hk',
'id': 'Ugzvldq2VB0lBIzoGVR4AaABAg',
'snippet': {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'AXMw3vtsswY',
'textDisplay': 'Ihr seid spitze! Danke, dass es euch gibt đ',
'textOriginal': 'Ihr seid spitze! Danke, dass es euch gibt đ',
'authorDisplayName': 'Cris Tiano',
'authorProfileImageUrl': 'https://yt3.ggpht.com/ytc/AKedOLT_ZmzCfLD22VLmHv-zIOnNiBGZHoYBhgcsgQ=s48-c-k-c0x00ffffff-no-rj',
'authorChannelUrl': 'http://www.youtube.com/channel/UCU3xXx609PrAf6AwLjs5oSw',
'authorChannelId': {'value': 'UCU3xXx609PrAf6AwLjs5oSw'},
'canRate': True, 'viewerRating': 'none', 'likeCount': 0,
'publishedAt': '2021-08-16T15:53:30Z',
'updatedAt': '2021-08-16T15:53:30Z'}}, 'canReply': True,
'totalReplyCount': 0, 'isPublic': True}},
]}
comments = []
for item in data['items']:
entry = {}
snippet = item['snippet']['topLevelComment']['snippet']
for field in ['channelId', 'videoId']:
entry[field] = snippet[field]
for field in ['textOriginal', 'textDisplay','canRate','likeCount','updatedAt','viewerRating','publishedAt']:
entry[field] = snippet[field]
entry['canReply'] = item['snippet']['canReply']
entry['isPublic'] = item['snippet']['isPublic']
entry['totalReplyCount'] = item['snippet']['totalReplyCount']
comments.append(entry)
for idx,comment in enumerate(comments,1):
print(f'{idx}) {comment}')
output
1) {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'tUXWw6WvgkI', 'textOriginal': 'Tipp 1: ZusatzszĂŒge â machs wie Fredy. (Hinweis: dieses Video wurde vor der Corona-Pandemie erstellt)', 'textDisplay': 'Tipp 1: ZusatzszĂŒge â machs wie Fredy. (Hinweis: dieses Video wurde vor der Corona-Pandemie erstellt)', 'canRate': True, 'likeCount': 1, 'updatedAt': '2021-09-15T07:29:00Z', 'viewerRating': 'none', 'publishedAt': '2021-09-15T07:29:00Z', 'canReply': True, 'isPublic': True, 'totalReplyCount': 0}
2) {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'M98TRem03Lg', 'textOriginal': 'Ich sehe das Kulturland schon schmelzen und verschwinden...', 'textDisplay': 'Ich sehe das Kulturland schon schmelzen und verschwinden...', 'canRate': True, 'likeCount': 0, 'updatedAt': '2021-09-14T18:08:55Z', 'viewerRating': 'none', 'publishedAt': '2021-09-14T18:08:55Z', 'canReply': True, 'isPublic': True, 'totalReplyCount': 1}
3) {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'M98TRem03Lg', 'textOriginal': 'Guete initiativ! Mega fan vo dere projekt!', 'textDisplay': 'Guete initiativ! Mega fan vo dere projekt!', 'canRate': True, 'likeCount': 0, 'updatedAt': '2021-09-14T07:18:31Z', 'viewerRating': 'none', 'publishedAt': '2021-09-14T07:18:31Z', 'canReply': True, 'isPublic': True, 'totalReplyCount': 0}
4) {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'zYnbgDyWM9o', 'textOriginal': 'TrĂšs mauvaise voix off, Ă un moment il y se reprend mĂȘme dans le texte đ', 'textDisplay': 'TrĂšs mauvaise voix off, Ă un moment il y se reprend mĂȘme dans le texte đ', 'canRate': True, 'likeCount': 0, 'updatedAt': '2021-09-12T12:12:58Z', 'viewerRating': 'none', 'publishedAt': '2021-09-12T12:12:58Z', 'canReply': True, 'isPublic': True, 'totalReplyCount': 0}
5) {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': '4nU0MgKft6c', 'textOriginal': 'Shiey be like', 'textDisplay': 'Shiey be like', 'canRate': True, 'likeCount': 0, 'updatedAt': '2021-09-02T23:23:35Z', 'viewerRating': 'none', 'publishedAt': '2021-09-02T23:23:35Z', 'canReply': True, 'isPublic': True, 'totalReplyCount': 0}
6) {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'oxSLp_1WtcM', 'textOriginal': 'Vivement un Lyria en Belgique !!!!', 'textDisplay': 'Vivement un Lyria en Belgique !!!!', 'canRate': True, 'likeCount': 0, 'updatedAt': '2021-09-02T21:39:26Z', 'viewerRating': 'none', 'publishedAt': '2021-09-02T21:39:26Z', 'canReply': True, 'isPublic': True, 'totalReplyCount': 0}
7) {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': '8vCvSmAIv1s', 'textOriginal': 'Build a high speed railway line into the moon I dare you with 20 million francs', 'textDisplay': 'Build a high speed railway line into the moon I dare you with 20 million francs', 'canRate': True, 'likeCount': 0, 'updatedAt': '2021-09-02T08:36:45Z', 'viewerRating': 'none', 'publishedAt': '2021-09-02T08:36:45Z', 'canReply': True, 'isPublic': True, 'totalReplyCount': 0}
8) {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': '7earPWDJbhA', 'textOriginal': 'Sehr schön', 'textDisplay': 'Sehr schön', 'canRate': True, 'likeCount': 2, 'updatedAt': '2021-08-27T16:06:59Z', 'viewerRating': 'none', 'publishedAt': '2021-08-27T16:06:59Z', 'canReply': True, 'isPublic': True, 'totalReplyCount': 0}
9) {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'AXMw3vtsswY', 'textOriginal': 'wie kann mann feuerwehr mann bei SBB werden', 'textDisplay': 'wie kann mann feuerwehr mann bei SBB werden', 'canRate': True, 'likeCount': 0, 'updatedAt': '2021-08-21T11:00:05Z', 'viewerRating': 'none', 'publishedAt': '2021-08-21T11:00:05Z', 'canReply': True, 'isPublic': True, 'totalReplyCount': 1}
10) {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'DmBo0MMxDb0', 'textOriginal': 'I am only 15, but i have a very very big passion for these trains, i canât wait to drive around Switzerland and help people arrive at their destinations\nI also learned about signals in Switzerland as short documentaries on how these trains work.\nI hope nothing major will change in 5 years:) i really dreaming of becoming an engine driver', 'textDisplay': 'I am only 15, but i have a very very big passion for these trains, i canât wait to drive around Switzerland and help people arrive at their destinations<br>I also learned about signals in Switzerland as short documentaries on how these trains work.<br>I hope nothing major will change in 5 years:) i really dreaming of becoming an engine driver', 'canRate': True, 'likeCount': 1, 'updatedAt': '2021-08-19T22:32:58Z', 'viewerRating': 'none', 'publishedAt': '2021-08-19T22:32:58Z', 'canReply': True, 'isPublic': True, 'totalReplyCount': 1}
11) {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'ES0AnIBNJfQ', 'textOriginal': 'wie heisst der sprecher dieser werbung? so eine wunderbare stimme!\ndie musik ist auch toll, wie heisst das stĂŒck?', 'textDisplay': 'wie heisst der sprecher dieser werbung? so eine wunderbare stimme!<br>die musik ist auch toll, wie heisst das stĂŒck?', 'canRate': True, 'likeCount': 1, 'updatedAt': '2021-08-18T00:50:32Z', 'viewerRating': 'none', 'publishedAt': '2021-08-18T00:50:32Z', 'canReply': True, 'isPublic': True, 'totalReplyCount': 2}
12) {'channelId': 'UC_zxivooFdvF4uuBosUnJxQ', 'videoId': 'AXMw3vtsswY', 'textOriginal': 'Ihr seid spitze! Danke, dass es euch gibt đ', 'textDisplay': 'Ihr seid spitze! Danke, dass es euch gibt đ', 'canRate': True, 'likeCount': 0, 'updatedAt': '2021-08-16T15:53:30Z', 'viewerRating': 'none', 'publishedAt': '2021-08-16T15:53:30Z', 'canReply': True, 'isPublic': True, 'totalReplyCount': 0}
I am trying to print out the price of an item from this page/JSON:
https://shopee.sg/api/v2/item/get?itemid=2590867516&shopid=165420215
but I am encountering an error as below:
print([d.get('price_max_before_discount') for d in site_json['item'] if d.get('price_max_before_discount')])
AttributeError: 'str' object has no attribute 'get'
import json
from urllib import request
from bs4 import BeautifulSoup
url = 'https://shopee.sg/api/v2/item/get?itemid=2590867516&shopid=165420215'
html = request.urlopen(url).read()
soup = BeautifulSoup(html, 'html.parser')
site_json = json.loads(soup.text)
print([d.get('price_max_before_discount') for d in site_json['item'] if d.get('price_max_before_discount')])
not sure what I am doing wrong but I will appreciate any advice/solutions!
Thank you.
The API url is returning the json object and you can simply use:
import json
from urllib import request
url = 'https://shopee.sg/api/v2/item/get?itemid=2590867516&shopid=165420215'
data = request.urlopen(url).read()
data_json = json.loads(data)
print(data_json)
Output:
{'item': {'itemid': 2590867516, 'price_max_before_discount': 54900000, 'item_status': 'normal', 'can_use_wholesale': False, 'show_free_shipping': True, 'estimated_days': 2, 'is_hot_sales': None, 'is_slash_price_item': False, 'upcoming_flash_sale': None, 'slash_lowest_price': None, 'is_partial_fulfilled': False, 'condition': 1, 'show_original_guarantee': True, 'add_on_deal_info': None, 'is_non_cc_installment_payment_eligible': False, 'categories': [{'display_name': 'Mobile & Gadgets', 'catid': 8, 'image': None, 'no_sub': False, 'is_default_subcat': False, 'block_buyer_platform': None}, {'display_name': 'Mobile Phones & Tablets', 'catid': 10941, 'image': None, 'no_sub': False, 'is_default_subcat': False, 'block_buyer_platform': None}, {'display_name': 'Samsung', 'catid': 10944, 'image': None, 'no_sub': True, 'is_default_subcat': False, 'block_buyer_platform': None}], 'ctime': 1564663070, 'name': "(New Launch) Samsung Galaxy Tab S6 Lite 10.4' LTE Version with S Pen 64GB", 'show_shopee_verified_label': False, 'size_chart': None, 'is_pre_order': False, 'service_by_shopee_flag': None, 'historical_sold': 186, 'reference_item_id': '', 'recommendation_info': None, 'bundle_deal_info': None, 'price_max': 46500000, 'has_lowest_price_guarantee': False, 'shipping_icon_type': 0, 'images': ['68ce24874257ec3c593d0d118e9cf785'], 'price_before_discount': 54900000, 'cod_flag': 0, 'catid': 8, 'is_official_shop': False, 'coin_earn_label': None, 'hashtag_list': None, 'sold': 62, 'makeup': None, 'item_rating': {'rating_star': 4.976744, 'rating_count': [88, 0, 2, 0, 2, 84], 'rcount_with_image': 40, 'rcount_with_context': 58}, 'show_official_shop_label_in_title': False, 'discount': '15%', 'reason': None, 'label_ids': [1000012, 1000035, 1000088, 1001261], 'has_group_buy_stock': False, 'other_stock': 0, 'deep_discount': None, 'attributes': [{'is_pending_qc': False, 'idx': 0, 'value': 'Samsung', 'id': 10054, 'is_timestamp': False, 'name': 'Brand'}, {'is_pending_qc': False, 'idx': 1, 'value': 'Galaxy Tab S6', 'id': 10093, 'is_timestamp': False, 'name': 'Model'}, {'is_pending_qc': True, 'idx': 2, 'value': '64GB', 'id': 10091, 'is_timestamp': False, 'name': 'Built-in Storage'}, {'is_pending_qc': True, 'idx': 3, 'value': '', 'id': 10094, 'is_timestamp': False, 'name': 'RAM'}, {'is_pending_qc': True, 'idx': 4, 'value': '1 Month', 'id': 10095, 'is_timestamp': False, 'name': 'Warranty Period'}], 'badge_icon_type': 0, 'liked': False, 'cmt_count': 88, 'image': '68ce24874257ec3c593d0d118e9cf785', 'recommendation_algorithm': None, 'is_cc_installment_payment_eligible': True, 'shopid': 165420215, 'normal_stock': 8, 'video_info_list': [], 'installment_plans': [{'banks': [{'bank_name': 'UOB', 'sub_options': [{'disabled_reason': None, 'data': {'bank_name': 'UOB', 'down_payment': 0, 'name': '6x # 0%', 'interest_rate': 0, 'option_id': None, 'bank_id': 1641, 'installment_amount': 46500000, 'channel_id': 1000501, 'monthly_installment': 7750000, 'tenure': 6, 'total_amount': 46500000}, 'name': 'Airpay CC Installment [label_bank_uob #6x]', 'option_info': '1000501-19005014'}, {'disabled_reason': None, 'data': {'bank_name': 'UOB', 'down_payment': 0, 'name': '12x # 0%', 'interest_rate': 0, 'option_id': None, 'bank_id': 1641, 'installment_amount': 46500000, 'channel_id': 1000501, 'monthly_installment': 3875000, 'tenure': 12, 'total_amount': 46500000}, 'name': 'Airpay CC Installment [label_bank_uob #12x]', 'option_info': '1000501-19005015'}], 'bank_id': 1641, 'bank_logo': 'https://mall.shopee.sg/static/images/bank_logo/ic_bank_uob.png'}, {'bank_name': 'DBS/POSB', 'sub_options': [{'disabled_reason': None, 'data': {'bank_name': 'DBS/POSB', 'down_payment': 0, 'name': '6x # 0%', 'interest_rate': 0, 'option_id': None, 'bank_id': 4, 'installment_amount': 46500000, 'channel_id': 1000501, 'monthly_installment': 7750000, 'tenure': 6, 'total_amount': 46500000}, 'name': 'Airpay CC Installment [label_bank_dbs_posb #6x]', 'option_info': '1000501-19005011'}, {'disabled_reason': None, 'data': {'bank_name': 'DBS/POSB', 'down_payment': 0, 'name': '12x # 0%', 'interest_rate': 0, 'option_id': None, 'bank_id': 4, 'installment_amount': 46500000, 'channel_id': 1000501, 'monthly_installment': 3875000, 'tenure': 12, 'total_amount': 46500000}, 'name': 'Airpay CC Installment [label_bank_dbs_posb #12x]', 'option_info': '1000501-19005012'}], 'bank_id': 4, 'bank_logo': 'https://mall.shopee.sg/static/images/bank_logo/ic_bank_dbs.png'}, {'bank_name': 'AMERICAN EXPRESS', 'sub_options': [{'disabled_reason': None, 'data': {'bank_name': 'AMERICAN EXPRESS', 'down_payment': 0, 'name': '6x # 0%', 'interest_rate': 0, 'option_id': None, 'bank_id': 437, 'installment_amount': 46500000, 'channel_id': 1000501, 'monthly_installment': 7750000, 'tenure': 6, 'total_amount': 46500000}, 'name': 'Airpay CC Installment [AMERICAN EXPRESS #6x]', 'option_info': '1000501-19005020'}, {'disabled_reason': None, 'data': {'bank_name': 'AMERICAN EXPRESS', 'down_payment': 0, 'name': '12x # 0%', 'interest_rate': 0, 'option_id': None, 'bank_id': 437, 'installment_amount': 46500000, 'channel_id': 1000501, 'monthly_installment': 3875000, 'tenure': 12, 'total_amount': 46500000}, 'name': 'Airpay CC Installment [AMERICAN EXPRESS #12x]', 'option_info': '1000501-19005021'}], 'bank_id': 437, 'bank_logo': ''}, {'bank_name': 'OCBC', 'sub_options': [{'disabled_reason': None, 'data': {'bank_name': 'OCBC', 'down_payment': 0, 'name': '6x # 0%', 'interest_rate': 0, 'option_id': None, 'bank_id': 3430, 'installment_amount': 46500000, 'channel_id': 1000501, 'monthly_installment': 7750000, 'tenure': 6, 'total_amount': 46500000}, 'name': 'Airpay CC Installment [label_bank_ocbc #6x]', 'option_info': '1000501-19005017'}, {'disabled_reason': None, 'data': {'bank_name': 'OCBC', 'down_payment': 0, 'name': '12x # 0%', 'interest_rate': 0, 'option_id': None, 'bank_id': 3430, 'installment_amount': 46500000, 'channel_id': 1000501, 'monthly_installment': 3875000, 'tenure': 12, 'total_amount': 46500000}, 'name': 'Airpay CC Installment [label_bank_ocbc #12x]', 'option_info': '1000501-19005018'}], 'bank_id': 3430, 'bank_logo': 'https://mall.shopee.sg/static/images/bank_logo/ic_bank_ocbc.png'}], 'channel_name': 'label_cc_installment', 'is_cc': True, 'plans': None, 'channel_ic': 'ic_paymentoption_ccinstallment'}], 'view_count': 9765, 'voucher_info': None, 'current_promotion_has_reserve_stock': False, 'liked_count': 491, 'show_official_shop_label': False, 'price_min_before_discount': 54900000, 'show_discount': 15, 'preview_info': None, 'flag': 524290, 'exclusive_price_info': None, 'current_promotion_reserved_stock': 0, 'wholesale_tier_list': [], 'group_buy_info': None, 'shopee_verified': False, 'item_has_post': False, 'hidden_price_display': None, 'transparent_background_image': '68ce24874257ec3c593d0d118e9cf785', 'welcome_package_info': None, 'discount_stock': 8, 'coin_info': {'spend_cash_unit': 1000, 'coin_earn_items': []}, 'is_adult': False, 'currency': 'SGD', 'raw_discount': 15, 'is_preferred_plus_seller': False, 'is_category_failed': False, 'price_min': 46500000, 'can_use_bundle_deal': False, 'cb_option': 0, 'brand': '', 'stock': 8, 'status': 1, 'bundle_deal_id': 0, 'is_group_buy_item': None, 'description': "Brand New Set\r\n\r\nGalaxy Tab S6 Lite is your super portable note-taking, go-getting companion. It comes with a large 10.4 inch display on a slim and light build, One UI 2 on Android, and S Pen in-box and ready to go. Whether you're drawing, learning or gaming, this is the tablet made to be in the moment.\r\n\r\nGalaxy Tab S6 Lite is slim and lightweight thanks to its seamless, metal unibody. It slips easily into a small tote for true portability and minimalist style. \r\n\r\nKeeping up with lectures is a breeze with S Pen. When taking notes, you can jot first and change colors later. Quickly sort through memos when you save with searchable tags. Even write directly onto PDFs to cut the study clutter. When it's finally crunch time, you'll be organised and ready.\r\n\r\nS Pen is a bundle of writing instruments in one. Its natural grip, low latency and impressive pressure sensitivity will make it your go-to for everything from drawing to editing documents. And don't worry about misplacing the S Pen, thanks to the tablet's magnetic holder.\r\n\r\nDisplay : 10.4 inches\r\nExternal Memory Support Slot : Yes , MicroSD (Up to 1TB)\r\n64GB ROM + 4GB RAM\r\n8MP Rear Camera\r\n5MP Selfie Camera\r\n7040 mAh Big battery\r\n\r\nExport set with local seller warranty", 'flash_sale': None, 'models': [{'itemid': 2590867516, 'status': 1, 'current_promotion_reserved_stock': 0, 'name': 'Pink', 'promotionid': 58388, 'price': 46500000, 'price_stocks': [{'model_id': 5839808211, 'stockout_time': 1613452764, 'region': 'SG', 'rebate': 2000000, 'price': 46500000, 'promotion_type': 1, 'allocated_stock': 0, 'shop_id': 165420215, 'end_time': 1613663940, 'stock_breakdown_by_location': [], 'item_id': 2590867516, 'promotion_id': 58388, 'purchase_limit': 1, 'start_time': 1613452860, 'stock': 0}, {'model_id': 5839808211, 'stockout_time': 1612672967, 'region': 'SG', 'rebate': None, 'price': 54900000, 'promotion_type': 0, 'allocated_stock': None, 'shop_id': 165420215, 'end_time': None, 'stock_breakdown_by_location': [], 'item_id': 2590867516, 'promotion_id': 0, 'purchase_limit': None, 'start_time': None, 'stock': 0}], 'current_promotion_has_reserve_stock': False, 'currency': 'SGD', 'normal_stock': 0, 'extinfo': {'seller_promotion_limit': 1, 'has_shopee_promo': True, 'group_buy_info': None, 'holiday_mode_old_stock': None, 'tier_index': [0], 'seller_promotion_refresh_time': 1613663940}, 'price_before_discount': 54900000, 'modelid': 5839808211, 'sold': 12, 'stock': 0}, {'itemid': 2590867516, 'status': 1, 'current_promotion_reserved_stock': 0, 'name': 'Blue', 'promotionid': 58388, 'price': 46500000, 'price_stocks': [{'model_id': 51655477750, 'stockout_time': 1613452764, 'region': 'SG', 'rebate': 2000000, 'price': 46500000, 'promotion_type': 1, 'allocated_stock': 0, 'shop_id': 165420215, 'end_time': 1613663940, 'stock_breakdown_by_location': [], 'item_id': 2590867516, 'promotion_id': 58388, 'purchase_limit': 1, 'start_time': 1613452860, 'stock': 0}, {'model_id': 51655477750, 'stockout_time': 1612146775, 'region': 'SG', 'rebate': None, 'price': 54900000, 'promotion_type': 0, 'allocated_stock': None, 'shop_id': 165420215, 'end_time': None, 'stock_breakdown_by_location': [], 'item_id': 2590867516, 'promotion_id': 0, 'purchase_limit': None, 'start_time': None, 'stock': 0}], 'current_promotion_has_reserve_stock': False, 'currency': 'SGD', 'normal_stock': 0, 'extinfo': {'seller_promotion_limit': 1, 'has_shopee_promo': True, 'group_buy_info': None, 'holiday_mode_old_stock': None, 'tier_index': [2], 'seller_promotion_refresh_time': 1613663940}, 'price_before_discount': 54900000, 'modelid': 51655477750, 'sold': 65, 'stock': 0}, {'itemid': 2590867516, 'status': 1, 'current_promotion_reserved_stock': 0, 'name': 'Gray', 'promotionid': 58388, 'price': 46500000, 'price_stocks': [{'model_id': 70652051151, 'stockout_time': 1613452764, 'region': 'SG', 'rebate': 2000000, 'price': 46500000, 'promotion_type': 1, 'allocated_stock': 0, 'shop_id': 165420215, 'end_time': 1613663940, 'stock_breakdown_by_location': [], 'item_id': 2590867516, 'promotion_id': 58388, 'purchase_limit': 1, 'start_time': 1613452860, 'stock': 0}, {'model_id': 70652051151, 'stockout_time': 0, 'region': 'SG', 'rebate': None, 'price': 54900000, 'promotion_type': 0, 'allocated_stock': None, 'shop_id': 165420215, 'end_time': None, 'stock_breakdown_by_location': [], 'item_id': 2590867516, 'promotion_id': 0, 'purchase_limit': None, 'start_time': None, 'stock': 8}], 'current_promotion_has_reserve_stock': False, 'currency': 'SGD', 'normal_stock': 8, 'extinfo': {'seller_promotion_limit': 1, 'has_shopee_promo': True, 'group_buy_info': None, 'holiday_mode_old_stock': None, 'tier_index': [1], 'seller_promotion_refresh_time': 1613663940}, 'price_before_discount': 54900000, 'modelid': 70652051151, 'sold': 109, 'stock': 8}], 'has_low_fulfillment_rate': False, 'price': 46500000, 'shop_location': None, 'tier_variations': [{'images': ['dd6712ad6486c287be4f4c4ae82d5bb4', '228c0a8b72a457c99e420f980fbe6647', '028138dce1f06a573864ed58033267aa'], 'properties': [], 'type': 0, 'name': 'Colour', 'options': ['Pink', 'Gray', 'Blue']}], 'makeups': None, 'welcome_package_type': 0, 'show_official_shop_label_in_normal_position': None, 'item_type': 0}, 'version': '84fbe13733fdcb30d4c53c991d682692', 'data': None, 'error_msg': None, 'error': None}
You are iterating over dictionary item. Your iteration is making a dictionary get call on every element of item, that's is where the error is coming.
To get what you want, you don't have to iterate:
site_json['item'].get('price_max_before_discount')
The problem here is that at some point you are trying to access the value "price_max_before_discount" from a string, not a dictionary that includes that key. Thus, this error accurs AttributeError: 'str' object has no attribute 'get'.
This happens because you are iterating through the "item" dictionary. In each iteration you get a key. For example you get "itemid" (the key, not the value) and you try to get "price_max_before_discount" from it. But it's just a string!
The solution:
site_json['item']['price_max_before_discount']
Here you are getting the "item" json and from there getting the value of "price_max_before_discount"
I'm trying to retrieve the 'inStockQty' json key/value pair using beautifulsoup but am having trouble.
Here's my code so far:
import requests
from bs4 import BeautifulSoup
url = "https://direct.asda.com/george/men/shoes/black-leather-lace-up-oxford-shoes/GEM830406,default,pd.html?cgid=D2M1G10C13"
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0.1 Safari/602.2.14'
headers = {'User-Agent': user_agent,
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'}
page = requests.get(url, headers=headers)
soup = BeautifulSoup(page.content, "html5lib")
script = soup.select_one('script:contains("window.priceAvailabilityJSON")')
How do I then find 'inStockQty'? I thought about trying to parse all the JSON, but i don't know how to strip out all the HTML crap.
Many Thanks
Try this:
import json
import requests
from bs4 import BeautifulSoup
url = "https://direct.asda.com/george/men/shoes/black-leather-lace-up-oxford-shoes/GEM830406,default,pd.html?cgid=D2M1G10C13"
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0.1 Safari/602.2.14'
headers = {'User-Agent': user_agent,
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'}
page = requests.get(url, headers=headers)
soup = BeautifulSoup(page.content, "html5lib")
script = soup.find(id='main-content').find('script').string
data = script.split('window.priceAvailabilityJSON = ')[1].split(';\nlet product')[0]
json_data = json.loads(data)
# Output
for product in json_data['productAvailability'].values():
print(product['availability']['inStockQty'])
Try Selenium for that job
from selenium import webdriver
driver = webdriver.Chrome(executable_path=r'C:\Program Files\ChromeDriver\chromedriver.exe')
URL = 'https://direct.asda.com/george/men/shoes/black-leather-lace-up-oxford-shoes/GEM830406,default,pd.html?cgid=D2M1G10C13'
driver.get(URL)
driver.implicitly_wait(5) # wait until content is loaded
Call the variable and you can access itÂŽs content:
jsonData = driver.execute_script('return priceAvailabilityJSON')
print(jsonData.get('productAvailability'))
driver.close()
Output
{'G006386138': {'availability': {'backorderable': False, 'inStockQty': 6, 'instock': True, 'isBackorder': False, 'level': 'instock'}, 'badgesInformation': {'backorderInformation': {'backorderMessage': '', 'backorderableMessage': '', 'displayBackorderMessage': False}, 'displayLowStockBadge': False}, 'price': {'available': True, 'list': {'currency': 'EUR', 'decimalPrice': '27.0', 'formatted': '⏠27.00', 'value': 27}, 'vat': 16}}, 'G006386139': {'availability': {'backorderable': False, 'inStockQty': 2, 'instock': True, 'isBackorder': False, 'level': 'instock'}, 'badgesInformation': {'backorderInformation': {'backorderMessage': '', 'backorderableMessage': '', 'displayBackorderMessage': False}, 'displayLowStockBadge': False}, 'price': {'available': True, 'list': {'currency': 'EUR', 'decimalPrice': '27.0', 'formatted': '⏠27.00', 'value': 27}, 'vat': 16}}, 'G006386140': {'availability': {'backorderable': False, 'inStockQty': 9, 'instock': True, 'isBackorder': False, 'level': 'instock'}, 'badgesInformation': {'backorderInformation': {'backorderMessage': '', 'backorderableMessage': '', 'displayBackorderMessage': False}, 'displayLowStockBadge': False}, 'price': {'available': True, 'list': {'currency': 'EUR', 'decimalPrice': '27.0', 'formatted': '⏠27.00', 'value': 27}, 'vat': 16}}, 'G006386141': {'availability': {'backorderable': False, 'inStockQty': 5, 'instock': True, 'isBackorder': False, 'level': 'instock'}, 'badgesInformation': {'backorderInformation': {'backorderMessage': '', 'backorderableMessage': '', 'displayBackorderMessage': False}, 'displayLowStockBadge': False}, 'price': {'available': True, 'list': {'currency': 'EUR', 'decimalPrice': '27.0', 'formatted': '⏠27.00', 'value': 27}, 'vat': 16}}, 'G006386142': {'availability': {'backorderable': False, 'inStockQty': 2, 'instock': True, 'isBackorder': False, 'level': 'instock'}, 'badgesInformation': {'backorderInformation': {'backorderMessage': '', 'backorderableMessage': '', 'displayBackorderMessage': False}, 'displayLowStockBadge': False}, 'price': {'available': True, 'list': {'currency': 'EUR', 'decimalPrice': '27.0', 'formatted': '⏠27.00', 'value': 27}, 'vat': 16}}, 'G006386143': {'availability': {'backorderable': False, 'inStockQty': 28, 'instock': True, 'isBackorder': False, 'level': 'instock'}, 'badgesInformation': {'backorderInformation': {'backorderMessage': '', 'backorderableMessage': '', 'displayBackorderMessage': False}, 'displayLowStockBadge': False}, 'price': {'available': True, 'list': {'currency': 'EUR', 'decimalPrice': '27.0', 'formatted': '⏠27.00', 'value': 27}, 'vat': 16}}, 'G006386144': {'availability': {'backorderable': False, 'inStockQty': 7, 'instock': True, 'isBackorder': False, 'level': 'instock'}, 'badgesInformation': {'backorderInformation': {'backorderMessage': '', 'backorderableMessage': '', 'displayBackorderMessage': False}, 'displayLowStockBadge': False}, 'price': {'available': True, 'list': {'currency': 'EUR', 'decimalPrice': '27.0', 'formatted': '⏠27.00', 'value': 27}, 'vat': 16}}}
I'am struggling to access the nested 'is_bot' attribute inside 'from' of the update object down below:
{'message': {'caption_entities': [],
'channel_chat_created': False,
'chat': {'first_name': 'Rodrigo Formighieri',
'id': 446924384,
'type': 'private',
'username': 'rodrigoformi'},
'date': 1516040454,
'delete_chat_photo': False,
'entities': [{'length': 7, 'offset': 0, 'type': 'bot_command'}],
'from': {'first_name': 'Rodrigo Formighieri',
'id': 446924384,
'is_bot': False,
'language_code': 'pt-BR',
'username': 'rodrigoformi'},
'group_chat_created': False,
'message_id': 145,
'new_chat_member': None,
'new_chat_members': [],
'new_chat_photo': [],
'photo': [],
'supergroup_chat_created': False,
'text': '/invite'},
'update_id': 241263441}
What I have Tried:
update.get('message',{}).get('from',{}).is_bot
This up returns AttributeError: 'Message' object has no attribute 'get'
update["message"]["from"]["is_bot"]
This last returns TypeError: 'NoneType' object is not subscriptable
Don't know what to do anymore. Any help?
Try this:
d = {'message': {'caption_entities': [],
'channel_chat_created': False,
'chat': {'first_name': 'Rodrigo Formighieri',
'id': 446924384,
'type': 'private',
'username': 'rodrigoformi'},
'date': 1516040454,
'delete_chat_photo': False,
'entities': [{'length': 7, 'offset': 0, 'type': 'bot_command'}],
'from': {'first_name': 'Rodrigo Formighieri',
'id': 446924384,
'is_bot': False,
'language_code': 'pt-BR',
'username': 'rodrigoformi'},
'group_chat_created': False,
'message_id': 145,
'new_chat_member': None,
'new_chat_members': [],
'new_chat_photo': [],
'photo': [],
'supergroup_chat_created': False,
'text': '/invite'},
'update_id': 241263441}
is_bot = d.get('message').get('from').get('is_bot')
print (is_bot) # False
When I or my students struggle with getting to a deeply nested value, I often step through it to ensure that I am getting, seeing what I expect. As several commenters noted, your second version works fine, as is, so there must be some nuance in the original code that we don't see. Here is a step down to the key you care about:
In [45]: update = {'message': {'caption_entities': [],
...: 'channel_chat_created': False,
...: 'chat': {'first_name': 'Rodrigo Formighieri',
...: 'id': 446924384,
...: 'type': 'private',
...: 'username': 'rodrigoformi'},
...: 'date': 1516040454,
...: 'delete_chat_photo': False,
...: 'entities': [{'length': 7, 'offset': 0, 'type': 'bot_command'}],
...: 'from': {'first_name': 'Rodrigo Formighieri',
...: 'id': 446924384,
...: 'is_bot': False,
...: 'language_code': 'pt-BR',
...: 'username': 'rodrigoformi'},
...: 'group_chat_created': False,
...: 'message_id': 145,
...: 'new_chat_member': None,
...: 'new_chat_members': [],
...: 'new_chat_photo': [],
...: 'photo': [],
...: 'supergroup_chat_created': False,
...: 'text': '/invite'},
...: 'update_id': 241263441}
...:
In [46]: update['message']
Out[46]:
{'caption_entities': [],
'channel_chat_created': False,
'chat': {'first_name': 'Rodrigo Formighieri',
'id': 446924384,
'type': 'private',
'username': 'rodrigoformi'},
'date': 1516040454,
'delete_chat_photo': False,
'entities': [{'length': 7, 'offset': 0, 'type': 'bot_command'}],
'from': {'first_name': 'Rodrigo Formighieri',
'id': 446924384,
'is_bot': False,
'language_code': 'pt-BR',
'username': 'rodrigoformi'},
'group_chat_created': False,
'message_id': 145,
'new_chat_member': None,
'new_chat_members': [],
'new_chat_photo': [],
'photo': [],
'supergroup_chat_created': False,
'text': '/invite'}
In [47]: update['message']['from']
Out[47]:
{'first_name': 'Rodrigo Formighieri',
'id': 446924384,
'is_bot': False,
'language_code': 'pt-BR',
'username': 'rodrigoformi'}
In [48]: update['message']['from']['is_bot']
Out[48]: False
SOLVED:
update = ast.literal_eval(str(update))
update["message"]["from"]["is_bot"] #false
Had to transform cast string and then ast.literal_eval the got damn update object.