Loading JSON from a Beautifulsoup Object - python

I am currently making a scraper app, but before going full out with the app, using other frameworks like Discord.py, I had to first scrape the site first. It proved quite difficult to scrape the site. The site that I am trying to scrape from is Fiverr. Anyways, long story short, I had to get some cookies to login with Python Requests. The big issue now is that the data I need to scrape comes in the form of JSON, which I don't know much about. I managed to select the javascript in question, but once I load it it gives an error: "TypeError: the JSON object must be str, bytes or bytearray, not Tag". I specifically need the "rows" part which is part of the JSON data.
I'm not quite certain how to fix this and have read and tried some similar questions here. I will appreciate any help.
import requests
from bs4 import BeautifulSoup
import re
import json
# Irrelevant to the question
class JobClass:
def __init__(self, date=None, buyer=None, request=None, duration=None, budget=None, link="https://www.fiverr.com/users/myusername/requests", id=None):
self.date = date
self.buyer = buyer
self.request = request
self.duration = duration
self.budget = budget
self.link = link
self.id = id
# Irrelevant to the question
duplicateSet = set()
scrapedSet = set()
jobObjArr = []
headers = {
# Some private cookies. To get them you just need to use a site like https://curl.trillworks.com/ it is really a life saver
# This is used to tell the site who you are to be logged in (which is why I deleted this part out of the code)
# Please note that I used "myusername" in the URL. This is going to be different depending on user
# Using the requests module, we use the "get" function
# provided to access the webpage provided as an
# argument to this function:
result = requests.get(
'https://www.fiverr.com/users/myusername/requests', headers=headers)
# Now, let us store the page content of the website accessed
# from requests to a variable:
src = result.content
# Now that we have the page source stored, we will use the
# BeautifulSoup module to parse and process the source.
# To do so, we create a BeautifulSoup object based on the
# source variable we created above:
soup = BeautifulSoup(src, "lxml")
data = soup.select("[type='text/javascript']")[1]
# TypeError: the JSON object must be str, bytes or bytearray, not Tag
jsonObject = json.loads(data)
# Here is the output of print(data):
<script type="text/javascript">
document.viewData = {
"dds": {
"subCats": {
"current": {
"text": "All Subcategories",
"val": "-1"
"options": [{
"text": "Web \u0026 Mobile Design",
"val": 151
}, {
"text": "Web Programming",
"val": 140
"results": {
"rows": [{
"type": "none",
"identifier": "5cf132b55e08360011efe633",
"cells": [{
"text": "May 31, 2019",
"type": "date",
"withText": true
}, {
"userPict": "\u003cspan class=\"missing-image-user \"\u003ec\u003c/span\u003e",
"type": "profile-40",
"cssClass": "height95"
}, {
"hintBottom": false,
"text": "My website was hacked and deleted. Need to have it recreated ",
"type": "text-wide",
"tags": [],
"attachment": false
}, {
"text": 1,
"type": "applications",
"alignCenter": true
}, {
"text": "3 days",
"type": "hidden-action",
"actionVisible": false,
"alignCenter": true,
"withText": true,
"buttons": [{
"type": "span",
"text": "3 days",
"class": "duration"
}, {
"type": "button",
"text": "Remove Request",
"class": "remove-request js-remove-request",
"meta": {
"requestId": "5cf132b55e08360011efe633",
"isProfessional": false
}, {
"text": "---",
"type": "hidden-action",
"actionVisible": false,
"alignCenter": true,
"withText": true,
"buttons": [{
"type": "span",
"text": "---",
"class": "budget"
}, {
"type": "button",
"text": "Send Offer",
"class": "btn-standard btn-green-grad js-send-offer",
"meta": {
"username": "conto217",
"category": 3,
"subCategory": 151,
"requestId": "5cf132b55e08360011efe633",
"requestText": "My website was hacked and deleted. Need to have it recreated ",
"userPict": "\u003cspan class=\"missing-image-user \"\u003ec\u003c/span\u003e",
"isProfessional": false,
"buyerId": 32969684
}, {
"type": "none",
"identifier": "5cf12f641b6e99000edf1b60",
"cells": [{
"text": "May 31, 2019",
"type": "date",
"withText": true
}, {
"userPict": "\u003cimg src=\"https://fiverr-res.cloudinary.com/t_profile_small,q_auto,f_auto/attachments/profile/photo/648ceb417a85844b25e8bf070a70d9a0-254781561534997516.9743/MyFileName\" alt=\"muazamkhokher\" width=\"40\" height=\"40\"\u003e",
"type": "profile-40",
"cssClass": "height95"
}, {
"hintBottom": false,
"text": "Need mobile ui/ux designer from marvel wireframes",
"type": "text-wide",
"tags": [],
"attachment": false
}, {
"text": 4,
"type": "applications",
"alignCenter": true
}, {
"text": "5 days",
"type": "hidden-action",
"actionVisible": false,
"alignCenter": true,
"withText": true,
"buttons": [{
"type": "span",
"text": "5 days",
"class": "duration"
}, {
"type": "button",
"text": "Remove Request",
"class": "remove-request js-remove-request",
"meta": {
"requestId": "5cf12f641b6e99000edf1b60",
"isProfessional": false
}, {
"text": "$50",
"type": "hidden-action",
"actionVisible": false,
"alignCenter": true,
"withText": true,
"buttons": [{
"type": "span",
"text": "$50",
"class": "budget"
}, {
"type": "button",
"text": "Send Offer",
"class": "btn-standard btn-green-grad js-send-offer",
"meta": {
"username": "muazamkhokher",
"category": 3,
"subCategory": 151,
"requestId": "5cf12f641b6e99000edf1b60",
"requestText": "Need mobile ui/ux designer from marvel wireframes",
"userPict": "\u003cimg src=\"https://fiverr-res.cloudinary.com/t_profile_small,q_auto,f_auto/attachments/profile/photo/648ceb417a85844b25e8bf070a70d9a0-254781561534997516.9743/MyFileName\" alt=\"muazamkhokher\" width=\"100\" height=\"100\"\u003e",
"isProfessional": false,
"buyerId": 25478156
I expect the JSON to be loaded in jsonObject, but I get an error: "TypeError: the JSON object must be str, bytes or bytearray, not Tag"
Edit: Here is some code at the end of the print statement. It randomly cuts off for some reason with no ending script tag:
}, {
"type": "none",
"identifier": "5cf1236a959aa5000f1ce094",
"cells": [{
"text": "May 31, 2019",
"type": "date",
"withText": true
}, {
"userPict": "\u003cimg src=\"https://fiverr-res.cloudinary.com/t_profile_small,q_auto,f_auto/profile/photos/30069758/original/Universalco_2a_Cloud.png\" alt=\"clarky2000\" width=\"40\" height=\"40\"\u003e",
"type": "profile-40",
"cssClass": "height95"
}, {
"hintBottom": false,
"text": "Slider revolution slider. 3 slides for a music festival. I can supply a copy what each slide should look like (see attached) and all the individual objects. Anyone can create basic RS slides, but I want this to be dynamic as its for a music festival. We are using the free version of RS if were are required to use the paid version of SL for addons please let us know. Bottom line this must be 3 dynamic slides (using the same background) for a music festival audience. Unlimited revisions is a must.",
"type": "see-more",
"tags": [{
"text": "Graphic UI"
}, {
"text": "Landing Pages"
"attachment": {
"url": "/download/file/1559260800%2Fgig_requests%2Fattachment_f2a5f51b9fb473e8fc7f498929f39e3f",
"name": "Outwith Rotator_1920x1080_1.jpg",
"size": "2.68 MB"
}, {
"text": 2,
"type": "applications",
"alignCenter": true
}, {
"text": "24 hours",
"type": "hidden-action",
"actionVisible": false,
"alignCenter": true,
"withText": true,
"buttons": [{
"type": "span",
"text": "24 hours",
"class": "duration"
}, {
"type": "button",
"text": "Remove Request",
"class": "remove-request js-remove-request",
"meta": {
"requestId": "5cf1236a959aa5000f1ce094",
"isProfessional": false
}, {
"text": "$23",
"type": "hidden-action",
"actionVisible": false,
"alignCenter": true,
"withText": true,
"buttons": [{
"type": "span",
"text": "$23",
"class": "budget"
}, {
"type": "button",
"text": "Send Of


Nested json files - Python

Good afternoon all,
I've been reading through the various posts regarding reading .json files using pandas but so far I've not been sucessful extract.
I need to read a specific 'score' in the json file of which I'll then iterate through all the json files I have as the label would be the same.
In the below how would I read the 'score'? I've tried using the normalise function but regardless of the agruement I put in I cannot get any closer.
Part of the json file:
"template_id": "template_fe61177cb0eb4642901b1eae9488fbb4",
"audit_id": "audit_1a0e9ef4a7914286808accb3dcb0700b",
"archived": false,
"created_at": "2022-10-07T08:00:14.021Z",
"modified_at": "2022-10-07T08:05:56.594Z",
"audit_data": {
"score": 10,
"total_score": 11,
"score_percentage": 90.909,
"name": "7 Oct 2022 / Test",
"duration": 240,
"authorship": {
"device_id": "user_65c3799b0f1a48549cacbceca244e1db",
"owner": "test",
"owner_id": "user_65c3799b0f1a48549cacbceca244e1db",
"author": "test",
"author_id": "user_65c3799b0f1a48549cacbceca244e1db"
"date_completed": "2022-10-07T08:05:55.860Z",
"date_modified": "2022-10-07T08:05:56.594Z",
"date_started": "2022-10-07T08:00:13.000Z",
"site": {
"name": "Blue Warehouse"
"template_data": {
"authorship": {
"device_id": "user_4bb896b5308341f7a7543a32f6c1f3ec",
"owner": "test",
"owner_id": "user_4bb896b5308341f7a7543a32f6c1f3ec",
"author": "test",
"author_id": "user_4bb896b5308341f7a7543a32f6c1f3ec"
"metadata": {
"description": "",
"name": "RCS",
"image": {
"date_created": "2022-04-12T13:27:18.852Z",
"file_ext": "png",
"label": "Go \u0026 See icon.PNG",
"media_id": "cf944a4b-7589-47e6-b42a-8d17f06b7031",
"href": "https://1"
"response_sets": {
"5b69aee5-0532-46a4-b2f5-d020d4d5381d": {
"id": "5b69aee5-0532-46a4-b2f5-d020d4d5381d",
"type": "question",
"responses": [
"id": "ef4abf51-3361-46f5-ba04-70c23c85ca20",
"label": "Good",
"colour": "19,133,95",
***"score": 1,***
"enable_score": true
Thanks for your help.
This is done without pandas
import json
with open("my_file.json", 'r') as f:
my_dict = json.load(f)
score = my_dict["response_sets"]["5b69aee5-0532-46a4-b2f5-d020d4d5381d"]["responses"][0]["score"]

Getting specific lines from a print in Python with Spotipy

I am writing some code with Python and Spotipy and I'm relatively new to coding. I have some code that get all the info about a Spotify playlist and prints it out for me:
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy
import json
client_credentials_manager = SpotifyClientCredentials()
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
playlist_id = 'spotify:playlist:76CVeJDw2b90up5PgkZXyU'
results = sp.playlist(playlist_id)
#print(json.dumps(results, indent=4))
print((json.dumps(results, indent=4)))
It works well and gives me all the info. My problem is that I only need specifics from the print:
"collaborative": false,
"description": "",
"external_urls": {
"spotify": "https://open.spotify.com/playlist/76CVeJDw2b90up5PgkZXyU"
"followers": {
"href": null,
"total": 0
"href": "https://api.spotify.com/v1/playlists/76CVeJDw2b90up5PgkZXyU?additional_types=track",
"id": "76CVeJDw2b90up5PgkZXyU",
"images": [
"height": 640,
"url": "https://i.scdn.co/image/ab67616d0000b2734a052b99c042dc15f933145b",
"width": 640
"name": "TEST",
"owner": {
"display_name": "Name",
"external_urls": {
"spotify": "https://open.spotify.com/user/myname"
"href": "https://api.spotify.com/v1/users/myname",
"id": "Myname",
"type": "user",
"uri": "spotify:user:kovizsombor"
"primary_color": null,
"public": true,
"snapshot_id": "MixmMGE0MDgxNDQ1ZGVlNmE4MThiMmQwODMwYWU0OTI3YzkyOGJhOWIz",
"tracks": {
"href": "https://api.spotify.com/v1/playlists/76CVeJDw2b90up5PgkZXyU/tracks?offset=0&limit=100&additional_types=track",
"items": [
"added_at": "2020-05-17T10:00:11Z",
"added_by": {
"external_urls": {
"spotify": "https://open.spotify.com/user/kovizsombor"
"href": "https://api.spotify.com/v1/users/kovizsombor",
"id": "kovizsombor",
"type": "user",
"uri": "spotify:user:kovizsombor"
"is_local": false,
"primary_color": null,
"track": {
"album": {
"album_type": "album",
"artists": [
"external_urls": {
"spotify": "https://open.spotify.com/artist/0PFtn5NtBbbUNbU9EAmIWF"
"href": "https://api.spotify.com/v1/artists/0PFtn5NtBbbUNbU9EAmIWF",
"id": "0PFtn5NtBbbUNbU9EAmIWF",
"name": "TOTO",
"type": "artist",
"uri": "spotify:artist:0PFtn5NtBbbUNbU9EAmIWF"
"external_urls": {
"spotify": "https://open.spotify.com/album/62U7xIHcID94o20Of5ea4D"
"href": "https://api.spotify.com/v1/albums/62U7xIHcID94o20Of5ea4D",
"id": "62U7xIHcID94o20Of5ea4D",
"images": [
"height": 640,
"url": "https://i.scdn.co/image/ab67616d0000b2734a052b99c042dc15f933145b",
"width": 640
"height": 300,
"url": "https://i.scdn.co/image/ab67616d00001e024a052b99c042dc15f933145b",
"width": 300
"height": 64,
"url": "https://i.scdn.co/image/ab67616d000048514a052b99c042dc15f933145b",
"width": 64
"name": "Toto IV",
"release_date": "1982-04-08",
"release_date_precision": "day",
"total_tracks": 10,
"type": "album",
"uri": "spotify:album:62U7xIHcID94o20Of5ea4D"
"artists": [
"external_urls": {
"spotify": "https://open.spotify.com/artist/0PFtn5NtBbbUNbU9EAmIWF"
"href": "https://api.spotify.com/v1/artists/0PFtn5NtBbbUNbU9EAmIWF",
"id": "0PFtn5NtBbbUNbU9EAmIWF",
"name": "TOTO",
"type": "artist",
"uri": "spotify:artist:0PFtn5NtBbbUNbU9EAmIWF"
"available_markets": [
"disc_number": 1,
"duration_ms": 295893,
"episode": false,
"explicit": false,
"external_ids": {
"isrc": "USSM19801941"
"external_urls": {
"spotify": "https://open.spotify.com/track/2374M0fQpWi3dLnB54qaLX"
"href": "https://api.spotify.com/v1/tracks/2374M0fQpWi3dLnB54qaLX",
"id": "2374M0fQpWi3dLnB54qaLX",
"is_local": false,
"name": "Africa",
"popularity": 83,
"preview_url": "https://p.scdn.co/mp3-preview/dd78dafe31bb98f230372c038a126b8808f9349b?cid=d568e7073a38465bba48268ea9f10153",
"track": true,
"track_number": 10,
"type": "track",
"uri": "spotify:track:2374M0fQpWi3dLnB54qaLX"
"video_thumbnail": {
"url": null
"limit": 100,
"next": null,
"offset": 0,
"previous": null,
"total": 1
"type": "playlist",
"uri": "spotify:playlist:76CVeJDw2b90up5PgkZXyU"
From this long print I somehow need to extract the Artist and the song title and preferably make it into a variable. Also not sure how this would work if there are multiple songs in a playlist.
It's also a solution if I can print out only the Artist and the title of the song without printing out all the information.
Based on your posted example it appears that you have only 1 song named "Africa" by artist "TOTO". Copying the track to have two songs, I added another track with two artists for testing the arrays.
If that json is loaded into a variable named results then (as #xcmkz said) you have a python dictionary and can process accordingly.
Try working with the following to transverse through your dict appending artists and songs to lists:
song_dict = {}
for track in results['tracks']['items']:
song_name = track["track"]["name"]
a2 = []
for t1 in track['track']['artists']:
song_dict.update({song_name: a2})
print(f'Dictionary of Songs and Artists:')
for k, v in song_dict.items():
print(f'Song --> {k}, by --> {", ".join(v)}')
Dictionary of Songs and Artists:
Song --> Africa, by --> TOTO
Song --> Just Another Silly Song, by --> Artist 2, Artist 3
sp.playlist returns a dictionary, so you can simply access its values by their keys. For example:
>>> results['name']
JSON is a data serialization format, ie a standardized way of representing objects as pure text and parsing them back from the text. json.dumps therefore converts the dictionary object to a string of text. This is useful if you want to for example save the results to a file and load it back later. You don't need it to access contents from results.
(This is a playlist though—you will need to get information on each song/track to get its artist and name.)

django - iterate between json response objects

I have a response object that I am receiving from an api call. The response has several objects that are returned in a single call. What I want to do is grab information from each of the objects returned and store them in varialbes to use them within the application. I know to grab info from a json response when it returns a single objects but I am getting confused with multiples objects... I know how to automate the iteration process through something like a forloop... it wont iterate.
here is a sample response that I am getting:
I want to grab the _id from both items.
'user':"<class 'synapse_pay_rest.models.users.user.User'>(id=..622d)",
'name':'Charlie Brown LLC'
'address':'PO BOX 85139, RICHMOND, VA, US',
'bank_long_name':'CAPITAL ONE N.A.',
'bank_name':'CAPITAL ONE N.A.',
'name_on_account':' ',
'nickname':'SynapsePay Test Savings Account - 8902',
<class 'synapse_pay_rest.models.nodes.ach_us_node.AchUsNode'>({
'user':"<class 'synapse_pay_rest.models.users.user.User'>(id=..622d)",
'name':'Charlie Brown LLC'
'address':'PO BOX 85139, RICHMOND, VA, US',
'bank_long_name':'CAPITAL ONE N.A.',
'bank_name':'CAPITAL ONE N.A.',
'name_on_account':' ',
'nickname':'SynapsePay Test Checking Account - 8901',
Here is the code that I have:
It wont grab any values...
the iteration needs to be done to the nodes variable which is hte json response object.
def listedLinkAccounts(request):
currentUser = loggedInUser(request)
currentProfile = Profile.objects.get(user = currentUser)
user_id = currentProfile.synapse_id
synapseUser = SynapseUser.by_id(client, str(user_id))
options = {
'type': 'ACH-US',
nodes = Node.all(synapseUser, **options)
response = nodes
_id = response["_id"]
return nodes
here is a sample api response from the api documenation:
"error_code": "0",
"http_code": "200",
"limit": 20,
"node_count": 5,
"nodes": [
"_id": "594e5c694d1d62002f17e3dc",
"_links": {
"self": {
"href": "https://uat-api.synapsefi.com/v3.1/users/594e0fa2838454002ea317a0/nodes/594e5c694d1d62002f17e3dc"
"allowed": "CREDIT-AND-DEBIT",
"client": {
"id": "589acd9ecb3cd400fa75ac06",
"name": "SynapseFI"
"extra": {
"other": {},
"supp_id": "ABC124"
"info": {
"account_num": "7443",
"address": "PLACE DE LA REPUBLIQUE 4 CROIX 59170 FR",
"balance": {
"amount": "0.00",
"currency": "USD"
"bank_long_name": "3 SUISSES INTERNATIONAL",
"name_on_account": " ",
"nickname": "Some Account"
"is_active": true,
"timeline": [
"date": 1498307689471,
"note": "Node created."
"date": 1498307690130,
"note": "Unable to send micro deposits as node type is not ACH-US."
"type": "WIRE-INT",
"user_id": "594e0fa2838454002ea317a0"
"page": 1,
"page_count": 1,
"success": true

How to get the ID of the parent comment (facebook Graph API)?

When sending a request
I get an answer
"data": [
"created_time": "2015-06-17T10:32:04+0000",
"from": {
"name": "First Name",
"id": "12345678987654"
"message": "Message",
"can_remove": true,
"like_count": 0,
"user_likes": false,
"id": "123456898765432_123456789765433"
"paging": {
"cursors": {
"before": "...",
"after": "..."
"summary": {
"order": "chronological",
"total_count": 2532
But if the comment of the second level, I do not know the ID of the parent comment, and I can not answer it programmatically.
Maybe there are some arguments that can be specified, and additional data comment?
I found that there is still an argument metadata = 1
But it shows additional information counter on the object, and there is also no parent ID
I just had this problem and it seems that you can get the parent comment.
(the ACCESS_TOKEN was omitted)
"message": "Sim, está acontecendo com várias pessoas. A Valve vai arrumar logo, provavelmente",
"id": "803009099803144_803075496463171",
"from": {
"name": "Jonathan Gouvea",
"id": "1218897258138073"
"parent": {
"created_time": "2016-01-28T19:58:39+0000",
"from": {
"name": "César Rodryguês",
"id": "552640601571460"
"message": "Dota ta fechando o de vcs quando vai entra na partida ?",
"id": "803009099803144_803068649797189"

How to Convert Json Value of Http Post Parameter to Python Dict in Django?

I am using Django to receive and process push notifications from the foursquare real-time api. Each checkin is pushed as a POST request to my server containing a single parameter named checkin. I am trying to grab the value of the checkin parameter and convert it to a python dict. However, calling json.loads always results in the following error:
NameError: name 'true' is not defined
I know the json is valid, so I must be doing something wrong.
The code is:
import json
def push(request):
if request.is_secure():
checkin_json = request.POST['checkin']
checkin = json.load(request.POST)
The body of the post request is:
"checkin =
"id": "4e6fe1404b90c00032eeac34",
"createdAt": 1315955008,
"type": "checkin",
"timeZone": "America/New_York",
"user": {
"id": "1",
"firstName": "Jimmy",
"lastName": "Foursquare",
"photo": "https://foursquare.com/img/blank_boy.png",
"gender": "male",
"homeCity": "New York, NY",
"relationship": "self"
"venue": {
"id": "4ab7e57cf964a5205f7b20e3",
"name": "foursquare HQ",
"contact": {
"twitter": "foursquare"
"location": {
"address": "East Village",
"lat": 40.72809214560253,
"lng": -73.99112284183502,
"city": "New York",
"state": "NY",
"postalCode": "10003",
"country": "USA"
"categories": [
"id": "4bf58dd8d48988d125941735",
"name": "Tech Startup",
"pluralName": "Tech Startups",
"shortName": "Tech Startup",
"icon": "https://foursquare.com/img/categories/building/default.png",
"parents": [
"Professional & Other Places",
"primary": true
"verified": true,
"stats": {
"checkinsCount": 7313,
"usersCount": 565,
"tipCount": 128
"url": "http://foursquare.com"
Try json.loads(checkin_json) instead of json.load(request.POST). Notice the extra 's'.
change checkin = json.load(request.POST) to checkin = json.loads(checkin_json)
On python, boolean values are Capitalized (first letter is uppercase): True/False.
Check this.
Pay attentiot at this lines:
"primary": true
"verified": true,
Both "true" values are lowercase and need to be capitalized

