Python YouTube API Retrieve List of Videos

Python YouTube API Retrieve List of Videos - python

I would like to fetch all videos from YouTube API for given ChannelId using Python. Below is the code I have tried, but I cannot figure out how to append the data in the while loop.
import requests
import json
mykey = 'myGoogleKey'
channelid = 'someRandomChannelId'
# First request
r = requests.get("https://www.googleapis.com/youtube/v3/search?part=snippet&maxResults=50&channelId="+channelid+"&order=date&key="+mykey)
json_data = r.json()
nextPageToken = json_data.get("nextPageToken")
# Retrieve all the rest of the pages
while nextPageToken:
r = requests.get("https://www.googleapis.com/youtube/v3/search?part=snippet&maxResults=50&channelId="+channelid+"&order=date&key="+mykey+"&pageToken="+nextPageToken)
json_data.append(r.json()) # this part needs to be modified/adjusted
nextPageToken = json_data.get("nextPageToken")
with open('myJsonFile.json', 'w') as outfile:
json.dump(json_data, outfile, sort_keys=True, indent=4)
print("All Done!")

json_data.update(r.json())
Should do the trick.

Related

Python loop through table of URL's and get data from those websites

i'm trying to loop through a table that has all the websites that i want to get the JSON data from.
def getResponse(url):
operUrl = urllib.request.urlopen(url)
if(operUrl.getcode()==200):
data = operUrl.read()
jsonData = json.loads(data)
else:
print("Error receiving data", operUrl.getcode())
return jsonData
def main():
urlData = ("site1.com")
#Needs to loop all the URL's inside
#urlData = ["site1.com", "site2.com"] and so on
jsonData = getResponse(urlData)
for i in jsonData["descriptions"]:
description = f'{i["groups"][0]["variables"][0]["content"]}'
data = data = {'mushrooms':[{'description': description,}]}
with open('data.json', 'w') as f:
json.dump(data, f, ensure_ascii=False)
print(json.dumps(data, indent=4, ensure_ascii=False), )
After running it saves it into a data.json file and here is what it looks like
{
"mushrooms": [
{
"description": "example how it looks",
}
]
}
It does get the data from the one site but i want it to loop through multiple URL's that are in a table like
EDIT:
i got it working by looping like this
for url in urlData:
and i have all my website links in a table urlData and after that appending the data found from those sites into a another table.

I got it working by looping like this
for url in urlData:
and i have all my website links in a table urlData and after that appending the data found from those sites into a another table. and after it's done it dumps the data into a json.

JSON output includes literal \n rather than line breaks

How can the JSON output be formatting in a way that doesn't include the \n text, and instead shows these as new lines as intended? This is what the saved output file looks like:
But, this is how it looks when I use print, which is what it should look like:
import requests
import json
def get_all_time_entries():
url_address = "***"
headers = {
"Authorization": "***",
"api-version": "2020-01-31"
}
# find out total number of pages
r = requests.get(url=url_address, headers=headers).json()
total_pages = 605
# results will be appended to this list
all_time_entries = []
# loop through all pages and return JSON object
for page in range(1, total_pages):
url = "***"+str(page)
response = requests.get(url=url, headers=headers).json()
all_time_entries.append(response)
page += 1
# prettify JSON
data = json.dumps(all_time_entries, sort_keys=True, indent=4)
return data
#print(get_all_time_entries())
with open('appointmentsHistory.json', 'w', encoding='utf-8') as f:
# note that I use dump method, not dumps
json.dump(get_all_time_entries(), f, sort_keys=True, indent=4)

json.dumps() transforms the data dictionary into a string, and then json.dump() writes the JSON representation of that string to the file.
To resolve, remove json.dumps() from the get_all_time_entries() method. json.dump() will take the dictionary in directly and transform it into a JSON string for you.
import requests
import json
def get_all_time_entries():
url_address = "***"
headers = {
"Authorization": "***",
"api-version": "2020-01-31"
}
# find out total number of pages
r = requests.get(url=url_address, headers=headers).json()
total_pages = 605
# results will be appended to this list
all_time_entries = []
# loop through all pages and return JSON object
for page in range(1, total_pages):
url = "***"+str(page)
response = requests.get(url=url, headers=headers).json()
all_time_entries.append(response)
page += 1
return data
with open('appointmentsHistory.json', 'w', encoding='utf-8') as f:
# note that I use dump method, not dumps
json.dump(get_all_time_entries(), f, sort_keys=True, indent=4)

json.dump() takes an object, you seem to be passing it a JSON-like string.

Json file not exporting correctly from python string array in a for loop

i am making a website scraper that scrapes the websites and looks for specific keywords in a website and if it finds the keyword it would either call the website to productive or unproductive and then it would export that info into a json file so i can get it with c# later but the problem is that the json exporting method is not exporting correctly and i am new to both pyhton and json.
i have tried everything and every syntax there is but nothing seems to be working as i want it to be.
this is my python code
from bs4 import BeautifulSoup
import requests
import json
import os
import pandas as pd
import numpy as np
# this scraps the websites that i give it
def scrap_website():
pages = ['https://www.youtube.com/watch?v=tHI2NIaNrGk',
'https://aljazeera.com', 'https://www.svt.se']
for site in pages:
page = requests.get(site)
soup = BeautifulSoup(page.content, 'html.parser')
if 'Game' in soup.getText():
is_productive = False
json_map = {}
json_map["websiteLink"] = site
json_map["isProductive"] = is_productive
json_text = json.dumps(json_map)
else:
is_productive = True
json_map = {}
json_map["websiteLink"] = site
json_map["isProductive"] = is_productive
json_text = json.dumps(json_map)
data = []
data.append(json_text)
with open('data\\data.json', 'a') as json_file:
json.dump(data, json_file, indent=2, separators=(
", ", " "), sort_keys=True)
scrap_website()
this is the json code that i am getting
[
"{\"websiteLink\": \"https://www.youtube.com/watch?v=tHI2NIaNrGk\", \"isProductive\": false}"
][
"{\"websiteLink\": \"https://aljazeera.com\", \"isProductive\": true}"
][
"{\"websiteLink\": \"https://www.svt.se\", \"isProductive\": true}"
]

You can add all nodes in the same json, declaring an array and go to adding each website as node of the array so:
Declare array before than loop
json_map = []
For each website
site_node = {}
site_node["websiteLink"] = site
site_node["isProductive"] = is_productive
json_map.append(site_node)
Finally save the json outside of the loop
with open('data.json', 'w') as outFile:
json.dump(json_map, outFile)
After you can load the json array and loop it with a simple for

Multiple import requests from same url

I would like to download float data of stocks using the following code, which then has to write to a json file.
import requests
import json
filename='float.json'
url = "https://api.iextrading.com/1.0/stock/aapl/stats"
response = requests.get(url).json()
data = (response['symbol'], response['float'])
with open(filename, 'a+') as outfile:
json.dump(data, outfile, indent=4)
Now i would like to download the data for multiple stocks, so where it says "aapl" in the url, i would like to have mutliple stocks, like "tsla", "goog", etc..
Could someone explain to me how to achieve this?
Kind Regards

Can you try the following:
import json
import requests
stk_list = ['aapl', 'tsla', 'goog']
for stk_name in stk_list:
try:
url = "https://api.iextrading.com/1.0/stock/{}/stats".format(stk_name)
response = requests.get(url).json()
data = (response['symbol'], response['float'])
filename = 'float_{}.json'.format(stk_name)
with open(filename, 'a+') as outfile:
json.dump(data, outfile, indent=4)
except:
pass

you could try:
import json
import requests
stocks = ['appl', 'goog']
base_url = 'https://api.iextrading.com/1.0/stock/{}/stats'
filename='float.json'
for stock in stocks:
try:
response = requests.get(base_url.format(stock))
except:
continue
if response.status_code == 200:
response_json = response.json()
data = (response_json['symbol'], response_json['float'])
with open(filename, 'a+') as outfile:
json.dump(data, outfile, indent=4)

How to save scraped json data with key value pair into a json file format using python

I scraped a site for data and I was able to print the desired output with json format containing only value but what i actually needed is to get the data with both key and value pair and save it into output.json format so I can insert into my django database. Here is what I have done so far
import requests
import json
URL ='http://tfda.go.tz/portal/en/trader_module/trader_module/getRegisteredDrugs_products'payload = "draw=1&columns%5B0%5D%5Bdata%5D=no&columns%5B0%5D%5Bname%5D=&columns%5B0%5D%5Bsearchable%5D=True&columns%5B0%5D%5Borderable%5D=True&columns%5B0%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B0%5D%5Bsearch%5D%5Bregex%5D=False&columns%5B1%5D%5Bdata%5D=certificate_no&columns%5B1%5D%5Bname%5D=&columns%5B1%5D%5Bsearchable%5D=True&columns%5B1%5D%5Borderable%5D=True&columns%5B1%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B1%5D%5Bsearch%5D%5Bregex%5D=False&columns%5B2%5D%5Bdata%5D=brand_name&columns%5B2%5D%5Bname%5D=&columns%5B2%5D%5Bsearchable%5D=True&columns%5B2%5D%5Borderable%5D=True&columns%5B2%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B2%5D%5Bsearch%5D%5Bregex%5D=False&columns%5B3%5D%5Bdata%5D=classification_name&columns%5B3%5D%5Bname%5D=&columns%5B3%5D%5Bsearchable%5D=True&columns%5B3%5D%5Borderable%5D=True&columns%5B3%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B3%5D%5Bsearch%5D%5Bregex%5D=False&columns%5B4%5D%5Bdata%5D=common_name&columns%5B4%5D%5Bname%5D=&columns%5B4%5D%5Bsearchable%5D=True&columns%5B4%5D%5Borderable%5D=True&columns%5B4%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B4%5D%5Bsearch%5D%5Bregex%5D=False&columns%5B5%5D%5Bdata%5D=dosage_form&columns%5B5%5D%5Bname%5D=&columns%5B5%5D%5Bsearchable%5D=True&columns%5B5%5D%5Borderable%5D=True&columns%5B5%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B5%5D%5Bsearch%5D%5Bregex%5D=False&columns%5B6%5D%5Bdata%5D=product_strength&columns%5B6%5D%5Bname%5D=&columns%5B6%5D%5Bsearchable%5D=True&columns%5B6%5D%5Borderable%5D=True&columns%5B6%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B6%5D%5Bsearch%5D%5Bregex%5D=False&columns%5B7%5D%5Bdata%5D=registrant&columns%5B7%5D%5Bname%5D=&columns%5B7%5D%5Bsearchable%5D=True&columns%5B7%5D%5Borderable%5D=True&columns%5B7%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B7%5D%5Bsearch%5D%5Bregex%5D=False&columns%5B8%5D%5Bdata%5D=registrant_country&columns%5B8%5D%5Bname%5D=&columns%5B8%5D%5Bsearchable%5D=True&columns%5B8%5D%5Borderable%5D=True&columns%5B8%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B8%5D%5Bsearch%5D%5Bregex%5D=False&columns%5B9%5D%5Bdata%5D=manufacturer&columns%5B9%5D%5Bname%5D=&columns%5B9%5D%5Bsearchable%5D=True&columns%5B9%5D%5Borderable%5D=True&columns%5B9%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B9%5D%5Bsearch%5D%5Bregex%5D=False&columns%5B10%5D%5Bdata%5D=manufacturer_country&columns%5B10%5D%5Bname%5D=&columns%5B10%5D%5Bsearchable%5D=True&columns%5B10%5D%5Borderable%5D=True&columns%5B10%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B10%5D%5Bsearch%5D%5Bregex%5D=False&columns%5B11%5D%5Bdata%5D=expiry_date&columns%5B11%5D%5Bname%5D=&columns%5B11%5D%5Bsearchable%5D=True&columns%5B11%5D%5Borderable%5D=True&columns%5B11%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B11%5D%5Bsearch%5D%5Bregex%5D=False&columns%5B12%5D%5Bdata%5D=id&columns%5B12%5D%5Bname%5D=&columns%5B12%5D%5Bsearchable%5D=True&columns%5B12%5D%5Borderable%5D=True&columns%5B12%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B12%5D%5Bsearch%5D%5Bregex%5D=False&order%5B0%5D%5Bcolumn%5D=0&order%5B0%5D%5Bdir%5D=asc&start=0&length=3911&search%5Bvalue%5D=&search%5Bregex%5D=False"
with requests.Session() as s:
s.headers={"User-Agent":"Mozilla/5.0"}
s.headers.update({'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'})
res = s.post(URL, data = payload)
for data in res.json()['data']:
serial = data['no']
certno = data['certificate_no']
brndname = data['brand_name']
clssification = data['classification_name']
common_name = data['common_name']
dosage_form = data['dosage_form']
expiry_date = data['expiry_date']
manufacturer = data['manufacturer']
manufacturer_country = data['manufacturer_country']
product_strength = data['product_strength']
registrant = data['registrant']
registrant_country = data['registrant_country']
output = (serial,certno,brndname,clssification,common_name,dosage_form,expiry_date,manufacturer, manufacturer_country,product_strength,registrant, registrant_country )
my_list = output
json_str = json.dumps(my_list)
print (json_str)
And here is my attached output screenshot
So how do I approach this?

Use json.dump
with open(path, 'w') as file:
[...]
json.dump(myPythonList, file)
file.write('\n')

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python YouTube API Retrieve List of Videos - python

json_data.update(r.json()) Should do the trick.

Related

Python loop through table of URL's and get data from those websites

JSON output includes literal \n rather than line breaks

Json file not exporting correctly from python string array in a for loop

Multiple import requests from same url

How to save scraped json data with key value pair into a json file format using python

Categories

Resources