Python YouTube API Retrieve List of Videos - python

I would like to fetch all videos from YouTube API for given ChannelId using Python. Below is the code I have tried, but I cannot figure out how to append the data in the while loop.
import requests
import json
mykey = 'myGoogleKey'
channelid = 'someRandomChannelId'
# First request
r = requests.get("https://www.googleapis.com/youtube/v3/search?part=snippet&maxResults=50&channelId="+channelid+"&order=date&key="+mykey)
json_data = r.json()
nextPageToken = json_data.get("nextPageToken")
# Retrieve all the rest of the pages
while nextPageToken:
r = requests.get("https://www.googleapis.com/youtube/v3/search?part=snippet&maxResults=50&channelId="+channelid+"&order=date&key="+mykey+"&pageToken="+nextPageToken)
json_data.append(r.json()) # this part needs to be modified/adjusted
nextPageToken = json_data.get("nextPageToken")
with open('myJsonFile.json', 'w') as outfile:
json.dump(json_data, outfile, sort_keys=True, indent=4)
print("All Done!")

json_data.update(r.json())
Should do the trick.

Related

Python loop through table of URL's and get data from those websites

i'm trying to loop through a table that has all the websites that i want to get the JSON data from.
def getResponse(url):
operUrl = urllib.request.urlopen(url)
if(operUrl.getcode()==200):
data = operUrl.read()
jsonData = json.loads(data)
else:
print("Error receiving data", operUrl.getcode())
return jsonData
def main():
urlData = ("site1.com")
#Needs to loop all the URL's inside
#urlData = ["site1.com", "site2.com"] and so on
jsonData = getResponse(urlData)
for i in jsonData["descriptions"]:
description = f'{i["groups"][0]["variables"][0]["content"]}'
data = data = {'mushrooms':[{'description': description,}]}
with open('data.json', 'w') as f:
json.dump(data, f, ensure_ascii=False)
print(json.dumps(data, indent=4, ensure_ascii=False), )
After running it saves it into a data.json file and here is what it looks like
{
"mushrooms": [
{
"description": "example how it looks",
}
]
}
It does get the data from the one site but i want it to loop through multiple URL's that are in a table like
EDIT:
i got it working by looping like this
for url in urlData:
and i have all my website links in a table urlData and after that appending the data found from those sites into a another table.
I got it working by looping like this
for url in urlData:
and i have all my website links in a table urlData and after that appending the data found from those sites into a another table. and after it's done it dumps the data into a json.

JSON output includes literal \n rather than line breaks

How can the JSON output be formatting in a way that doesn't include the \n text, and instead shows these as new lines as intended? This is what the saved output file looks like:
But, this is how it looks when I use print, which is what it should look like:
import requests
import json
def get_all_time_entries():
url_address = "***"
headers = {
"Authorization": "***",
"api-version": "2020-01-31"
}
# find out total number of pages
r = requests.get(url=url_address, headers=headers).json()
total_pages = 605
# results will be appended to this list
all_time_entries = []
# loop through all pages and return JSON object
for page in range(1, total_pages):
url = "***"+str(page)
response = requests.get(url=url, headers=headers).json()
all_time_entries.append(response)
page += 1
# prettify JSON
data = json.dumps(all_time_entries, sort_keys=True, indent=4)
return data
#print(get_all_time_entries())
with open('appointmentsHistory.json', 'w', encoding='utf-8') as f:
# note that I use dump method, not dumps
json.dump(get_all_time_entries(), f, sort_keys=True, indent=4)
json.dumps() transforms the data dictionary into a string, and then json.dump() writes the JSON representation of that string to the file.
To resolve, remove json.dumps() from the get_all_time_entries() method. json.dump() will take the dictionary in directly and transform it into a JSON string for you.
import requests
import json
def get_all_time_entries():
url_address = "***"
headers = {
"Authorization": "***",
"api-version": "2020-01-31"
}
# find out total number of pages
r = requests.get(url=url_address, headers=headers).json()
total_pages = 605
# results will be appended to this list
all_time_entries = []
# loop through all pages and return JSON object
for page in range(1, total_pages):
url = "***"+str(page)
response = requests.get(url=url, headers=headers).json()
all_time_entries.append(response)
page += 1
return data
with open('appointmentsHistory.json', 'w', encoding='utf-8') as f:
# note that I use dump method, not dumps
json.dump(get_all_time_entries(), f, sort_keys=True, indent=4)
json.dump() takes an object, you seem to be passing it a JSON-like string.

Json file not exporting correctly from python string array in a for loop

i am making a website scraper that scrapes the websites and looks for specific keywords in a website and if it finds the keyword it would either call the website to productive or unproductive and then it would export that info into a json file so i can get it with c# later but the problem is that the json exporting method is not exporting correctly and i am new to both pyhton and json.
i have tried everything and every syntax there is but nothing seems to be working as i want it to be.
this is my python code
from bs4 import BeautifulSoup
import requests
import json
import os
import pandas as pd
import numpy as np
# this scraps the websites that i give it
def scrap_website():
pages = ['https://www.youtube.com/watch?v=tHI2NIaNrGk',
'https://aljazeera.com', 'https://www.svt.se']
for site in pages:
page = requests.get(site)
soup = BeautifulSoup(page.content, 'html.parser')
if 'Game' in soup.getText():
is_productive = False
json_map = {}
json_map["websiteLink"] = site
json_map["isProductive"] = is_productive
json_text = json.dumps(json_map)
else:
is_productive = True
json_map = {}
json_map["websiteLink"] = site
json_map["isProductive"] = is_productive
json_text = json.dumps(json_map)
data = []
data.append(json_text)
with open('data\\data.json', 'a') as json_file:
json.dump(data, json_file, indent=2, separators=(
", ", " "), sort_keys=True)
scrap_website()
this is the json code that i am getting
[
"{\"websiteLink\": \"https://www.youtube.com/watch?v=tHI2NIaNrGk\", \"isProductive\": false}"
][
"{\"websiteLink\": \"https://aljazeera.com\", \"isProductive\": true}"
][
"{\"websiteLink\": \"https://www.svt.se\", \"isProductive\": true}"
]
You can add all nodes in the same json, declaring an array and go to adding each website as node of the array so:
Declare array before than loop
json_map = []
For each website
site_node = {}
site_node["websiteLink"] = site
site_node["isProductive"] = is_productive
json_map.append(site_node)
Finally save the json outside of the loop
with open('data.json', 'w') as outFile:
json.dump(json_map, outFile)
After you can load the json array and loop it with a simple for

Multiple import requests from same url

I would like to download float data of stocks using the following code, which then has to write to a json file.
import requests
import json
filename='float.json'
url = "https://api.iextrading.com/1.0/stock/aapl/stats"
response = requests.get(url).json()
data = (response['symbol'], response['float'])
with open(filename, 'a+') as outfile:
json.dump(data, outfile, indent=4)
Now i would like to download the data for multiple stocks, so where it says "aapl" in the url, i would like to have mutliple stocks, like "tsla", "goog", etc..
Could someone explain to me how to achieve this?
Kind Regards
Can you try the following:
import json
import requests
stk_list = ['aapl', 'tsla', 'goog']
for stk_name in stk_list:
try:
url = "https://api.iextrading.com/1.0/stock/{}/stats".format(stk_name)
response = requests.get(url).json()
data = (response['symbol'], response['float'])
filename = 'float_{}.json'.format(stk_name)
with open(filename, 'a+') as outfile:
json.dump(data, outfile, indent=4)
except:
pass
you could try:
import json
import requests
stocks = ['appl', 'goog']
base_url = 'https://api.iextrading.com/1.0/stock/{}/stats'
filename='float.json'
for stock in stocks:
try:
response = requests.get(base_url.format(stock))
except:
continue
if response.status_code == 200:
response_json = response.json()
data = (response_json['symbol'], response_json['float'])
with open(filename, 'a+') as outfile:
json.dump(data, outfile, indent=4)

How to save scraped json data with key value pair into a json file format using python

I scraped a site for data and I was able to print the desired output with json format containing only value but what i actually needed is to get the data with both key and value pair and save it into output.json format so I can insert into my django database. Here is what I have done so far
import requests
import json
URL ='http://tfda.go.tz/portal/en/trader_module/trader_module/getRegisteredDrugs_products'payload = "draw=1&columns%5B0%5D%5Bdata%5D=no&columns%5B0%5D%5Bname%5D=&columns%5B0%5D%5Bsearchable%5D=True&columns%5B0%5D%5Borderable%5D=True&columns%5B0%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B0%5D%5Bsearch%5D%5Bregex%5D=False&columns%5B1%5D%5Bdata%5D=certificate_no&columns%5B1%5D%5Bname%5D=&columns%5B1%5D%5Bsearchable%5D=True&columns%5B1%5D%5Borderable%5D=True&columns%5B1%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B1%5D%5Bsearch%5D%5Bregex%5D=False&columns%5B2%5D%5Bdata%5D=brand_name&columns%5B2%5D%5Bname%5D=&columns%5B2%5D%5Bsearchable%5D=True&columns%5B2%5D%5Borderable%5D=True&columns%5B2%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B2%5D%5Bsearch%5D%5Bregex%5D=False&columns%5B3%5D%5Bdata%5D=classification_name&columns%5B3%5D%5Bname%5D=&columns%5B3%5D%5Bsearchable%5D=True&columns%5B3%5D%5Borderable%5D=True&columns%5B3%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B3%5D%5Bsearch%5D%5Bregex%5D=False&columns%5B4%5D%5Bdata%5D=common_name&columns%5B4%5D%5Bname%5D=&columns%5B4%5D%5Bsearchable%5D=True&columns%5B4%5D%5Borderable%5D=True&columns%5B4%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B4%5D%5Bsearch%5D%5Bregex%5D=False&columns%5B5%5D%5Bdata%5D=dosage_form&columns%5B5%5D%5Bname%5D=&columns%5B5%5D%5Bsearchable%5D=True&columns%5B5%5D%5Borderable%5D=True&columns%5B5%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B5%5D%5Bsearch%5D%5Bregex%5D=False&columns%5B6%5D%5Bdata%5D=product_strength&columns%5B6%5D%5Bname%5D=&columns%5B6%5D%5Bsearchable%5D=True&columns%5B6%5D%5Borderable%5D=True&columns%5B6%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B6%5D%5Bsearch%5D%5Bregex%5D=False&columns%5B7%5D%5Bdata%5D=registrant&columns%5B7%5D%5Bname%5D=&columns%5B7%5D%5Bsearchable%5D=True&columns%5B7%5D%5Borderable%5D=True&columns%5B7%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B7%5D%5Bsearch%5D%5Bregex%5D=False&columns%5B8%5D%5Bdata%5D=registrant_country&columns%5B8%5D%5Bname%5D=&columns%5B8%5D%5Bsearchable%5D=True&columns%5B8%5D%5Borderable%5D=True&columns%5B8%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B8%5D%5Bsearch%5D%5Bregex%5D=False&columns%5B9%5D%5Bdata%5D=manufacturer&columns%5B9%5D%5Bname%5D=&columns%5B9%5D%5Bsearchable%5D=True&columns%5B9%5D%5Borderable%5D=True&columns%5B9%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B9%5D%5Bsearch%5D%5Bregex%5D=False&columns%5B10%5D%5Bdata%5D=manufacturer_country&columns%5B10%5D%5Bname%5D=&columns%5B10%5D%5Bsearchable%5D=True&columns%5B10%5D%5Borderable%5D=True&columns%5B10%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B10%5D%5Bsearch%5D%5Bregex%5D=False&columns%5B11%5D%5Bdata%5D=expiry_date&columns%5B11%5D%5Bname%5D=&columns%5B11%5D%5Bsearchable%5D=True&columns%5B11%5D%5Borderable%5D=True&columns%5B11%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B11%5D%5Bsearch%5D%5Bregex%5D=False&columns%5B12%5D%5Bdata%5D=id&columns%5B12%5D%5Bname%5D=&columns%5B12%5D%5Bsearchable%5D=True&columns%5B12%5D%5Borderable%5D=True&columns%5B12%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B12%5D%5Bsearch%5D%5Bregex%5D=False&order%5B0%5D%5Bcolumn%5D=0&order%5B0%5D%5Bdir%5D=asc&start=0&length=3911&search%5Bvalue%5D=&search%5Bregex%5D=False"
with requests.Session() as s:
s.headers={"User-Agent":"Mozilla/5.0"}
s.headers.update({'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'})
res = s.post(URL, data = payload)
for data in res.json()['data']:
serial = data['no']
certno = data['certificate_no']
brndname = data['brand_name']
clssification = data['classification_name']
common_name = data['common_name']
dosage_form = data['dosage_form']
expiry_date = data['expiry_date']
manufacturer = data['manufacturer']
manufacturer_country = data['manufacturer_country']
product_strength = data['product_strength']
registrant = data['registrant']
registrant_country = data['registrant_country']
output = (serial,certno,brndname,clssification,common_name,dosage_form,expiry_date,manufacturer, manufacturer_country,product_strength,registrant, registrant_country )
my_list = output
json_str = json.dumps(my_list)
print (json_str)
And here is my attached output screenshot
So how do I approach this?
Use json.dump
with open(path, 'w') as file:
[...]
json.dump(myPythonList, file)
file.write('\n')

Categories

Resources