Beautiful soup doesn't get all elements - python

I'm trying to get all the street addresses that are on the right side of the page (https://www.zillow.com/homes/San-Francisco,-CA_rb/) but insted off getting all I get only 9 of them.
from bs4 import BeautifulSoup
import requests
header = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36",
"Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8"
}
response = requests.get(
"https://www.zillow.com/homes/San-Francisco,-CA_rb/",
headers=header)
data = response.text
soup = BeautifulSoup(data, "html.parser")
tag_adress = soup.find_all('address')
for x in tag_adress:
print(x)

The site uses an api to access the data. I got the URL from dev tools. The script displays 500 addresses (500 agent lists, as the page states).
import requests
import json
useragent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5026.0 Safari/537.36 Edg/103.0.1254.0"
# obtained url from dev tools
url = "https://www.zillow.com/search/GetSearchPageState.htm?searchQueryState=%7B%22pagination%22%3A%7B%7D%2C%22usersSearchTerm%22%3A%22San%20Francisco%2C%20CA%22%2C%22mapBounds%22%3A%7B%22west%22%3A-122.63417331103516%2C%22east%22%3A-122.23248568896484%2C%22south%22%3A37.70660374673871%2C%22north%22%3A37.84391640339095%7D%2C%22mapZoom%22%3A12%2C%22regionSelection%22%3A%5B%7B%22regionId%22%3A20330%2C%22regionType%22%3A6%7D%5D%2C%22isMapVisible%22%3Atrue%2C%22filterState%22%3A%7B%22isAllHomes%22%3A%7B%22value%22%3Atrue%7D%2C%22sortSelection%22%3A%7B%22value%22%3A%22days%22%7D%7D%2C%22isListVisible%22%3Atrue%7D&wants={%22cat1%22:[%22mapResults%22]}&requestId=2"
page = requests.get(url, headers={"User-Agent": useragent})
page.raise_for_status()
data = json.loads(page.content)
results = data["cat1"]["searchResults"]["mapResults"]
print(f"found {len(results)} results")
for item in results:
address = item["address"]
if address != "--":
print(address)
Outputs:
found 500 results
1160 Mission St, San Francisco, CA
1000 N Point St, San Francisco, CA
750 Van Ness Ave, San Francisco, CA
3131 Pierce St, San Francisco, CA
2655 Bush St, San Francisco, CA
1288 Howard St, San Francisco, CA
765 Market St, San Francisco, CA
10 Innes Ct, San Francisco, CA
51 Innes Ct, San Francisco, CA
...

Related

How to submit query to extract a table in a .aspx page with python. 2022

I want to scrape data from https://www.nasdaqtrader.com/trader.aspx?id=TradeHalts. I tried different approaches, like this, this, and this.
I could scrap static pages, but still don't understand the aspx format very well. I am copying here what I took from the first reference link:
import urllib
from bs4 import BeautifulSoup
headers = {
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Origin': 'http://www.indiapost.gov.in',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17',
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': 'http://www.nitt.edu/prm/nitreg/ShowRes.aspx',
'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'en-US,en;q=0.8',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3'
}
class MyOpener(urllib.request.FancyURLopener):
version = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17'
myopener = MyOpener()
url = 'https://www.nasdaqtrader.com/Trader.aspx?id=TradeHalts'
# first HTTP request without form data
f = myopener.open(url)
soup = BeautifulSoup(f)
# parse and retrieve two vital form values
viewstate = soup.findAll("input", {"type": "hidden", "name": "__VIEWSTATE"})
eventvalidation = soup.findAll("input", {"type": "hidden", "name": "__EVENTVALIDATION"})
formData = (
('__EVENTVALIDATION', eventvalidation),
('__VIEWSTATE', viewstate),
('__VIEWSTATEENCRYPTED', ''),
)
encodedFields = urllib.parse.urlencode(formData)
# second HTTP request with form data
f = myopener.open(url, encodedFields)
# We use BeautifulSoup
soup = BeautifulSoup(f)
print(soup.content)
I cannot find the table information in the content. What am I missing?
To get the data as pandas DataFrame you can use next example:
import requests
import pandas as pd
from io import StringIO
url = "https://www.nasdaqtrader.com/RPCHandler.axd"
headers = {
"Referer": "https://www.nasdaqtrader.com/trader.aspx?id=TradeHalts",
}
payload = {
"id": 2,
"method": "BL_TradeHalt.GetTradeHalts",
"params": "[]",
"version": "1.1",
}
data = requests.post(url, json=payload, headers=headers).json()
data = StringIO(data["result"])
df = pd.read_html(data)[0]
print(df.head(10).to_markdown(index=False))
Prints:
Halt Date
Halt Time
Issue Symbol
Issue Name
Market
Reason Codes
Pause Threshold Price
Resumption Date
Resumption Quote Time
Resumption Trade Time
07/06/2022
15:57:38
COMSP
9.25% Srs A Cmltv Redm Prf Stk
NASDAQ
LUDP
nan
07/06/2022
15:57:38
nan
07/06/2022
12:51:35
BRPMU
B. Riley Principal 150 Merg Ut
NASDAQ
LUDP
nan
07/06/2022
12:51:35
12:56:35
07/06/2022
12:06:06
VACC
Vaccitech plc ADS
NASDAQ
LUDP
nan
07/06/2022
12:06:06
12:16:06
07/06/2022
11:15:10
USEA
United Maritime Corp Cm St
NASDAQ
LUDP
nan
07/06/2022
11:15:10
11:29:25
07/06/2022
10:28:53
USEA
United Maritime Corp Cm St
NASDAQ
LUDP
nan
07/06/2022
10:28:53
10:43:30
07/06/2022
10:18:19
USEA
United Maritime Corp Cm St
NASDAQ
LUDP
nan
07/06/2022
10:18:19
10:28:19
07/06/2022
09:41:43
GAMB
Gambling.com Group Os
NASDAQ
LUDP
nan
07/06/2022
09:41:43
09:46:43
07/06/2022
09:37:16
USEA
United Maritime Corp Cm St
NASDAQ
LUDP
nan
07/06/2022
09:37:16
10:17:41
07/06/2022
09:31:15
JJN
iPathA Series B Bloomberg Nickel Subindex Total Return ETN
NYSE Arca
M
nan
07/06/2022
09:36:15
09:36:15
07/06/2022
09:31:17
AMTI
Applied Molecular Transport Cm
NASDAQ
LUDP
nan
07/06/2022
09:31:17
09:36:17

how do i get data from function, var in scripts using python?

<script defer="">
window.__CURRENT_SITE__ = window.__CURRENT_SITE__ || "videoblocks";
window.__CURRENT_PATH__ = window.__CURRENT_PATH__ || "\/video\/stock\/overheated-young-african-american-lady-suffering-from-high-temperature-indoors-belnttknlk6yv56x6";
window.__CURRENT_SEARCH_PARAMS__ = window.__CURRENT_SEARCH_PARAMS__ || "?";
(function() {
var initialState8277 = {"auth":{"isLoggedIn":false,"isMobile":false,"user":null,"subscription":null,"primarySubscription":null,"videoSubscription":null,"audioSubscription":null,"imageSubscription":null,"permissions":{"makerHooksVisible":false,"hasFolderAccess":false},"featureFlags":{"licenseRevampTest":false,"shouldShowMakerActionsHook":false}},"details":{"stockItem":{"id":10852901,"contentClass":"video","contentId":338169628,"assetId":"SBV-338169628","title":"Overheated young african american lady suffering from high temperature indoors.","description":null,"detailsUrl":"\/video\/stock\/overheated-young-african-american-lady-suffering-from-high-temperature-indoors-belnttknlk6yv56x6","previewUrl":"https:\/\/dm0qx8t0i9gc9.cloudfront.net\/watermarks\/video\/Sz9LmzxmBjyscquj0\/videoblocks-overheated-young-african-american-lady-suffering-from-high-temperature-indoors_bxo5ta1nu__b14441d66874772d2a7b34dde20873a9__P360.mp4","smallPreviewUrl":"https:\/\/dm0qx8t0i9gc9.cloudfront.net\/watermarks\/video\/Sz9LmzxmBjyscquj0\/videoblocks-overheated-young-african-american-lady-suffering-from-high-temperature-indoors_bxo5ta1nu__b14441d66874772d2a7b34dde20873a9__P180.mp4","thumbnailUrl":"https:\/\/dm0qx8t0i9gc9.cloudfront.net\/thumbnails\/video\/Sz9LmzxmBjyscquj0\/videoblocks-overheated-young-african-american-lady-suffering-from-high-temperature-indoors_bxo5ta1nu_thumbnail-180_01.jpg","isMarketPlaceItem":false,"contributorPortalId":"BelnTTkNLk6yv56x6","distributionType":"RS","expirationDate":null,"shouldStockItemHaveNewFlag":false,"shouldStockItemHaveStaffPickFlag":false,"dateAdded":"2020-02-23 05:02:16","contentType":"footage","dateDistributionTypeUpdated":"2020-02-23 15:02:16","isActive":true,"mediumImageUrl":"https:\/\/dm0qx8t0i9gc9.cloudfront.net\/thumbnails\/video\/Sz9LmzxmBjyscquj0\/videoblocks-overheated-young-african-american-lady-suffering-from-high-temperature-indoors_bxo5ta1nu_thumbnail-360_01.jpg","duration":14,"is360":false,"isTemplate":false,"collapsedSetId":"","numCollapses":null,"rating":0,"downloads":1247,"hasTalentReleased":true,"hasPropertyReleased":false,"keywords":["african","african american","alone","biracial","discomfort","emotion","expression","female","from","girl","heat","high temperature","home","hot","indoors","lady","millennial","mixed","multiracial","overheated","people","person","portrait","problem","room","sit","sofa","stressed","suffer","suffering","uncomfortable","unhappy","unpleasant","unwell","upset","woman","young"],"hasAlphaChannel":false,"maxResolution":null,"isEditorial":false,"categories":[{"id":10,"groupId":189304,"urlId":"people","name":"People","description":"Download royalty free stock footage video clips of people.","type":"category","source":"auto","searchIndexable":true,"searchType":"footage","active":true,"dateAdded":"2014-10-19T23:35:05.000Z","dateUpdated":"2021-01-21T16:11:04.441Z","contributorId":null,"stockContributorId":null,"position":null,"isHomepageCollection":null,"showInApi":false}],"contributor":{"username":"fizkes","portfolioLink":"https:\/\/www.storyblocks.com\/video\/portfolio\/fizkes"}},"stockItemFormats":[{"id":null,"label":"4KMOV","prettyLabel":"4K MOV","filesize":"1501 MB","filename":"videoblocks-overheated-young-african-american-lady-suffering-from-high-temperature-indoors_Bxo5Ta1NU.mov","resolution":"4096 x 2160","fileExtension":"mov","quality":"4K","formatName":"4KMOV","downloadAjaxUrl":"\/video\/download-ajax\/346730940\/4KMP4","downloadUrl":"\/video\/download\/346730940\/4KMP4","exportUrl":"\/video\/cloud-export\/346730940\/4KMP4","frameRate":"25 fps","codec":"h264"},{"id":null,"label":"HDMOV","prettyLabel":"HD MOV","filesize":"159.4 MB","filename":"videoblocks-61a620ba428c7c51d487da72_stqmh0u9tk_1080__D.mov","resolution":"1920 x 1080","fileExtension":"mov","quality":"HD","formatName":"HDMOV","downloadAjaxUrl":"\/video\/download-ajax\/346730940\/HDMOV","downloadUrl":"\/video\/download\/346730940\/HDMOV","exportUrl":"\/video\/cloud-export\/346730940\/HDMOV","frameRate":"25 fps","codec":"prores"},{"id":null,"label":"HDMP4","prettyLabel":"HD MP4","filesize":"3.9 MB","filename":"videoblocks-61a620ba428c7c51d487da72_stqmh0u9tk_1080__D.mp4","resolution":"1920 x 1080","fileExtension":"mp4","quality":"HD","formatName":"HDMP4","downloadAjaxUrl":"\/video\/download-ajax\/346730940\/HDMP4","downloadUrl":"\/video\/download\/346730940\/HDMP4","exportUrl":"\/video\/cloud-export\/346730940\/HDMP4","frameRate":"25 fps","codec":"h264"}],"selectedAcquisitionOption":[{"id":null,"label":"4KMOV","prettyLabel":"4K MOV","filesize":"1342.4 MB","filename":"videoblocks-61a620ba428c7c51d487da72_STQMh0U9tK.mov","resolution":"3840 x 2160","fileExtension":"mov","quality":"4K","formatName":"4KMOV","downloadAjaxUrl":"\/video\/download-ajax\/346730940\/4KMOV","downloadUrl":"\/video\/download\/346730940\/4KMOV","exportUrl":"\/video\/cloud-export\/346730940\/4KMOV","frameRate":"25 fps","codec":"prores"},{"id":null,"label":"4KMP4","prettyLabel":"4K MP4","filesize":"17.7 MB","filename":"videoblocks-61a620ba428c7c51d487da72_stqmh0u9tk_2160__D.mp4","resolution":"3840 x 2160","fileExtension":"mp4","quality":"4K","formatName":"4KMP4","downloadAjaxUrl":"\/video\/download-ajax\/346730940\/4KMP4","downloadUrl":"\/video\/download\/346730940\/4KMP4","exportUrl":"\/video\/cloud-export\/346730940\/4KMP4","frameRate":"25 fps","codec":"h264"},{"id":null,"label":"HDMOV","prettyLabel":"HD MOV","filesize":"159.4 MB","filename":"videoblocks-61a620ba428c7c51d487da72_stqmh0u9tk_1080__D.mov","resolution":"1920 x 1080","fileExtension":"mov","quality":"HD","formatName":"HDMOV","downloadAjaxUrl":"\/video\/download-ajax\/346730940\/HDMOV","downloadUrl":"\/video\/download\/346730940\/HDMOV","exportUrl":"\/video\/cloud-export\/346730940\/HDMOV","frameRate":"25 fps","codec":"prores"},{"id":null,"label":"HDMP4","prettyLabel":"HD MP4","filesize":"3.9 MB","filename":"videoblocks-61a620ba428c7c51d487da72_stqmh0u9tk_1080__D.mp4","resolution":"1920 x 1080","fileExtension":"mp4","quality":"HD","formatName":"HDMP4","downloadAjaxUrl":"\/video\/download-ajax\/346730940\/HDMP4","downloadUrl":"\/video\/download\/346730940\/HDMP4","exportUrl":"\/video\/cloud-export\/346730940\/HDMP4","frameRate":"25 fps","codec":"h264"}],"isFavorite":false,"topTags":[],"stockItemArtists":[],"moods":[],"genres":[]},{"stockItem":{"id":346653750,"contentClass":"video","contentId":346653750,"assetId":"SBV-346653750","title":"High blood pressure. Close up portrait of sad unhappy african american lady suffering from acute headache","description":null,"detailsUrl":"\/video\/stock\/high-blood-pressure-close-up-portrait-of-sad-unhappy-african-american-lady-suffering-from-acute-headache-h3wb0l6zkktivcy0i","previewUrl":"https:\/\/dm0qx8t0i9gc9.cloudfront.net\/watermarks\/video\/SuFLR1_Nwkex4zo1y\/videoblocks-613f65163396b06b98621838_hh3xcgtft__1911c24512c9600378e44fd3fc60226a__P360.mp4","smallPreviewUrl":"https:\/\/dm0qx8t0i9gc9.cloudfront.net\/watermarks\/video\/SuFLR1_Nwkex4zo1y\/videoblocks-613f65163396b06b98621838_hh3xcgtft__1911c24512c9600378e44fd3fc60226a__P180.mp4","thumbnailUrl":"https:\/\/dm0qx8t0i9gc9.cloudfront.net\/thumbnails\/video\/SuFLR1_Nwkex4zo1y\/videoblocks-613f65163396b06b98621838_hh3xcgtft_thumbnail-180_01.jpg","isMarketPlaceItem":false,"contributorPortalId":"H3WB0l6zKktivcy0i","distributionType":"RS","expirationDate":null,"shouldStockItemHaveNewFlag":false,"shouldStockItemHaveStaffPickFlag":false,"dateAdded":"2021-09-13 12:37:20","contentType":"footage","dateDistributionTypeUpdated":"2021-09-13 16:37:21","isActive":true,"mediumImageUrl":"https:\/\/dm0qx8t0i9gc9.cloudfront.net\/thumbnails\/video\/SuFLR1_Nwkex4zo1y\/videoblocks-613f65163396b06b98621838_hh3xcgtft_thumbnail-360_01.jpg","duration":16,"is360":false,"isTemplate":false,"collapsedSetId":"","numCollapses":null},"stockItemFormats":[{"id":null,"label":"4KMOV","prettyLabel":"4K MOV","filesize":"1338.2 MB","filename":"videoblocks-613f65163396b06b98621838_Hh3XCgTft.mov","resolution":"3840 x 2160","fileExtension":"mov","quality":"4K","formatName":"4KMOV","downloadAjaxUrl":"\/video\/download-ajax\/346653750\/4KMOV","downloadUrl":"\/video\/download\/346653750\/4KMOV","exportUrl":"\/video\/cloud-export\/346653750\/4KMOV","frameRate":"25 fps","codec":"prores"},{"id":null,"label":"4KMP4","prettyLabel":"4K MP4","filesize":"20.3 MB","filename":"videoblocks-613f65163396b06b98621838_hh3xcgtft_2160__D.mp4","resolution":"3840 x 2160","fileExtension":"mp4","quality":"4K","formatName":"4KMP4","downloadAjaxUrl":"\/video\/download-ajax\/346653750\/4KMP4","downloadUrl":"\/video\/download\/346653750\/4KMP4","exportUrl":"\/video\/cloud-export\/346653750\/4KMP4","frameRate":"25 fps","codec":"h264"},{"id":null,"label":"HDMOV","prettyLabel":"HD MOV","filesize":"328.2 MB","filename":"videoblocks-613f65163396b06b98621838_hh3xcgtft_1080__D.mov","resolution":"1920 x 1080","fileExtension":"mov","quality":"HD","formatName":"HDMOV","downloadAjaxUrl":"\/video\/download-ajax\/346653750\/HDMOV","downloadUrl":"\/video\/download\/346653750\/HDMOV","exportUrl":"\/video\/cloud-export\/346653750\/HDMOV","frameRate":"25 fps","codec":"prores"},{"id":null,"label":"HDMP4","prettyLabel":"HD MP4","filesize":"3.6 MB","filename":"videoblocks-613f65163396b06b98621838_hh3xcgtft_1080__D.mp4","resolution":"1920 x 1080","fileExtension":"mp4","quality":"HD","formatName":"HDMP4","downloadAjaxUrl":"\/video\/download-ajax\/346653750\/HDMP4","downloadUrl":"\/video\/download\/346653750\/HDMP4","exportUrl":"\/video\/cloud-export\/346653750\/HDMP4","frameRate":"25 fps","codec":"h264"}],"selectedAcquisitionOption":[{"id":null,"label":"4KMOV","prettyLabel":"4K MOV","filesize":"1338.2 MB","filename":"videoblocks-613f65163396b06b98621838_Hh3XCgTft.mov","resolution":"3840 x 2160","fileExtension":"mov","quality":"4K","formatName":"4KMOV","downloadAjaxUrl":"\/video\/download-ajax\/346653750\/4KMOV","downloadUrl":"\/video\/download\/346653750\/4KMOV","exportUrl":"\/video\/cloud-export\/346653750\/4KMOV","frameRate":"25 fps","codec":"prores"},{"id":null,"label":"4KMP4","prettyLabel":"4K MP4","filesize":"20.3 MB","filename":"videoblocks-613f65163396b06b98621838_hh3xcgtft_2160__D.mp4","resolution":"3840 x 2160","fileExtension":"mp4","quality":"4K","formatName":"4KMP4","downloadAjaxUrl":"\/video\/download-ajax\/346653750\/4KMP4","downloadUrl":"\/video\/download\/346653750\/4KMP4","exportUrl":"\/video\/cloud-export\/346653750\/4KMP4","frameRate":"25 fps","codec":"h264"},{"id":null,"label":"HDMOV","prettyLabel":"HD MOV","filesize":"328.2 MB","filename":"videoblocks-613f65163396b06b98621838_hh3xcgtft_1080__D.mov","resolution":"1920 x 1080","fileExtension":"mov","quality":"HD","formatName":"HDMOV","downloadAjaxUrl":"\/video\/download-ajax\/346653750\/HDMOV","downloadUrl":"\/video\/download\/346653750\/HDMOV","exportUrl":"\/video\/cloud-export\/346653750\/HDMOV","frameRate":"25 fps","codec":"prores"},{"id":null,"label":"HDMP4","prettyLabel":"HD MP4","filesize":"3.6 MB","filename":"videoblocks-613f65163396b06b98621838_hh3xcgtft_1080__D.mp4","resolution":"1920 x 1080","fileExtension":"mp4","quality":"HD","formatName":"HDMP4","downloadAjaxUrl":"\/video\/download-ajax\/346653750\/HDMP4","downloadUrl":"\/video\/download\/346653750\/HDMP4","exportUrl":"\/video\/cloud-export\/346653750\/HDMP4","frameRate":"25 fps","codec":"h264"}],"isFavorite":false,"topTags":[],"stockItemArtists":[],"moods":[],"genres":[]},{"stockItem":{"id":10898291,"contentClass":"video","contentId":338435819,"assetId":"SBV-338435819","title":"Unhealthy young african american woman suffering from flu.","description":null,"detailsUrl":"\/video\/stock\/unhealthy-young-african-american-woman-suffering-from-flu-soi03trjikaihfomm","previewUrl":"https:\/\/dm0qx8t0i9gc9.cloudfront.net\/watermarks\/video\/Sz9LmzxmBjyscquj0\/videoblocks-unhealthy-young-african-american-woman-suffering-from-flu_bdba3yhsi__56c4021384112fbb32db144fbc3fdac6__P360.mp4","smallPreviewUrl":"https:\/\/dm0qx8t0i9gc9.cloudfront.net\/watermarks\/video\/Sz9LmzxmBjyscquj0\/videoblocks-unhealthy-young-african-american-woman-suffering-from-flu_bdba3yhsi__56c4021384112fbb32db144fbc3fdac6__P180.mp4","thumbnailUrl":"https:\/\/dm0qx8t0i9gc9.cloudfront.net\/thumbnails\/video\/Sz9LmzxmBjyscquj0\/videoblocks-unhealthy-young-african-american-woman-suffering-from-flu_bdba3yhsi_thumbnail-180_01.jpg","isMarketPlaceItem":false,"contributorPortalId":"SOI03trjIkaihfomm","distributionType":"RS","expirationDate":null,"shouldStockItemHaveNewFlag":false,"shouldStockItemHaveStaffPickFlag":false,"dateAdded":"2020-05-22 13:33:01","contentType":"footage","dateDistributionTypeUpdated":"2020-05-22 21:33:01","isActive":true,"mediumImageUrl":"https:\/\/dm0qx8t0i9gc9.cloudfront.net\/thumbnails\/video\/Sz9LmzxmBjyscquj0\/videoblocks-unhealthy-young-african-american-woman-suffering-from-flu_bdba3yhsi_thumbnail-360_01.jpg","duration":12,"is360":false,"isTemplate":false,"collapsedSetId":"","numCollapses":null},"stockItemFormats":[{"id":null,"label":"4KMOV","prettyLabel":"4K MOV","filesize":"1193 MB","filename":"videoblocks-unhealthy-young-african-american-woman-suffering-from-flu_BdBa3YHsI.mov","resolution":"4096 x 2160","fileExtension":"mov","quality":"4K","formatName":"4KMOV","downloadAjaxUrl":"\/video\/download-ajax\/10898291\/4KMOV","downloadUrl":"\/video\/download\/10898291\/4KMOV","exportUrl":"\/video\/cloud-export\/10898291\/4KMOV","frameRate":"29.97 fps","codec":"prores"},{"id":null,"label":"4KMP4","prettyLabel":"4K MP4","filesize":"12.8 MB","filename":"videoblocks-unhealthy-young-african-american-woman-suffering-from-flu_bdba3yhsi_2160__D.mp4","resolution":"3840 x 2160","fileExtension":"mp4","quality":"4K","formatName":"4KMP4","downloadAjaxUrl":"\/video\/download-ajax\/10898291\/4KMP4","downloadUrl":"\/video\/download\/10898291\/4KMP4","exportUrl":"\/video\/cloud-export\/10898291\/4KMP4","frameRate":"29.97 fps","codec":"h264"},{"id":null,"label":"HDMOV","prettyLabel":"HD MOV","filesize":"189.6 MB","filename":"videoblocks-unhealthy-young-african-american-woman-suffering-from-flu_bdba3yhsi_1080__D.mov","resolution":"1920 x 1080","fileExtension":"mov","quality":"HD","formatName":"HDMOV","downloadAjaxUrl":"\/video\/download-ajax\/10898291\/HDMOV","downloadUrl":"\/video\/download\/10898291\/HDMOV","exportUrl":"\/video\/cloud-export\/10898291\/HDMOV","frameRate":"29.97 fps","codec":"prores"},{"id":null,"label":"HDMP4","prettyLabel":"HD MP4","filesize":"3.4 MB","filename":"videoblocks-unhealthy-young-african-american-woman-suffering-from-flu_bdba3yhsi_1080__D.mp4","resolution":"1920 x 1080","fileExtension":"mp4","quality":"HD","formatName":"HDMP4","downloadAjaxUrl":"\/video\/download-ajax\/10898291\/HDMP4","downloadUrl":"\/video\/download\/10898291\/HDMP4","exportUrl":"\/video\/cloud-export\/10898291\/HDMP4","frameRate":"29.97 fps","codec":"h264"}],"selectedAcquisitionOption":[{"id":null,"label":"4KMOV","prettyLabel":"4K MOV","filesize":"1193 MB","filename":"videoblocks-unhealthy-young-african-american-woman-suffering-from-flu_BdBa3YHsI.mov","resolution":"4096 x 2160","fileExtension":"mov","quality":"4K","formatName":"4KMOV","downloadAjaxUrl":"\/video\/download-ajax\/10898291\/4KMOV","downloadUrl":"\/video\/download\/10898291\/4KMOV","exportUrl":"\/video\/cloud-export\/10898291\/4KMOV","frameRate":"29.97 fps","codec":"prores"},{"id":null,"label":"4KMP4","prettyLabel":"4K MP4","filesize":"12.8 MB","filename":"videoblocks-unhealthy-young-african-american-woman-suffering-from-flu_bdba3yhsi_2160__D.mp4","resolution":"3840 x 2160","fileExtension":"mp4","quality":"4K","formatName":"4KMP4","downloadAjaxUrl":"\/video\/download-ajax\/10898291\/4KMP4","downloadUrl":"\/video\/download\/10898291\/4KMP4","exportUrl":"\/video\/cloud-export\/10898291\/4KMP4","frameRate":"29.97 fps","codec":"h264"},{"id":null,"label":"HDMOV","prettyLabel":"HD MOV","filesize":"189.6 MB","filename":"videoblocks-unhealthy-young-african-american-woman-suffering-from-flu_bdba3yhsi_1080__D.mov","resolution":"1920 x 1080","fileExtension":"mov","quality":"HD","formatName":"HDMOV","downloadAjaxUrl":"\/video\/download-ajax\/346673065\/HDMOV","downloadUrl":"\/video\/download\/346673065\/HDMOV","exportUrl":"\/video\/cloud-export\/346673065\/HDMOV","frameRate":"25 fps","codec":"prores"},{"id":null,"label":"HDMP4","prettyLabel":"HD MP4","filesize":"2.5 MB","filename":"videoblocks-615ade16105bea1663ce70d6_h6a8bj_vk_1080__D.mp4","resolution":"1920 x 1080","fileExtension":"mp4","quality":"HD","formatName":"HDMP4","downloadAjaxUrl":"\/video\/download-ajax\/346673065\/HDMP4","downloadUrl":"\/video\/download\/346673065\/HDMP4","exportUrl":"\/video\/cloud-export\/346673065\/HDMP4","frameRate":"25 fps","codec":"h264"}],"selectedAcquisitionOption":[{"id":null,"label":"4KMOV","prettyLabel":"4K MOV","filesize":"860.1 MB","filename":"videoblocks-615ade16105bea1663ce70d6_H6a8bj_VK.mov","resolution":"3840 x 2160","fileExtension":"mov","quality":"4K","formatName":"4KMOV","downloadAjaxUrl":"\/video\/download-ajax\/346673065\/4KMOV","downloadUrl":"\/video\/download\/346673065\/4KMOV","exportUrl":"\/video\/cloud-export\/346673065\/4KMOV","frameRate":"25 fps","codec":"prores"},{"id":null,"label":"4KMP4","prettyLabel":"4K MP4","filesize":"9.4 MB","filename":"videoblocks-615ade16105bea1663ce70d6_h6a8bj_vk_2160__D.mp4","resolution":"3840 x 2160","fileExtension":"mp4","quality":"4K","formatName":"4KMP4","downloadAjaxUrl":"\/video\/download-ajax\/346673065\/4KMP4","downloadUrl":"\/video\/download\/346673065\/4KMP4","exportUrl":"\/video\/cloud-export\/346673065\/4KMP4","frameRate":"25 fps","codec":"h264"},{"id":null,"label":"HDMOV","prettyLabel":"HD MOV","filesize":"133.6 MB","filename":"videoblocks-615ade16105bea1663ce70d6_h6a8bj_vk_1080__D.mov","resolution":"1920 x 1080","fileExtension":"mov","quality":"HD","formatName":"HDMOV","downloadAjaxUrl":"\/video\/download-ajax\/346673065\/HDMOV","downloadUrl":"\/video\/download\/346673065\/HDMOV","exportUrl":"\/video\/cloud-export\/346673065\/HDMOV","frameRate":"25 fps","codec":"prores"},{"id":null,"label":"HDMP4","prettyLabel":"HD MP4","filesize":"2.5 MB","filename":"videoblocks-615ade16105bea1663ce70d6_h6a8bj_vk_1080__D.mp4","resolution":"1920 x 1080","fileExtension":"mp4","quality":"HD","formatName":"HDMP4","downloadAjaxUrl":"\/video\/download-ajax\/346673065\/HDMP4","downloadUrl":"\/video\/download\/346673065\/HDMP4","exportUrl":"\/video\/cloud-export\/346673065\/HDMP4","frameRate":"25 fps","codec":"h264"}],"isFavorite":false,"topTags":[],"stockItemArtists":[],"moods":[],"genres":[]},{"stockItem":{"id":11122603,"contentClass":"video","contentId":346560856,"assetId":"SBV-346560856","title":"Young african american lady suffering from strong abdominal ache, lying on white bed wearing pajamas and frowning","description":null,"detailsUrl":"\/video\/stock\/young-african-american-lady-suffering-from-strong-abdominal-ache-lying-on-white-bed-wearing-pajamas-and-frowning-rb-eiawtokqs0wd2g","previewUrl":"https:\/\/dm0qx8t0i9gc9.cloudfront.net\/watermarks\/video\/SuFLR1_Nwkex4zo1y\/videoblocks-young-african-american-lady-suffering-from-strong-abdominal-ache-lying-on-white-bed-wearing-pajamas-and-frowning_ssjqitw6d__b6866d051bcc3a7bbf67d50c5f8d939b__P360.mp4","smallPreviewUrl":"https:\/\/dm0qx8t0i9gc9.cloudfront.net\/watermarks\/video\/SuFLR1_Nwkex4zo1y\/videoblocks-young-african-american-lady-suffering-from-strong-abdominal-ache-lying-on-white-bed-wearing-pajamas-and-frowning_ssjqitw6d__b6866d051bcc3a7bbf67d50c5f8d939b__P180.mp4","thumbnailUrl":"https:\/\/dm0qx8t0i9gc9.cloudfront.net\/thumbnails\/video\/SuFLR1_Nwkex4zo1y\/videoblocks-young-african-american-lady-suffering-from-strong-abdominal-ache-lying-on-white-bed-wearing-pajamas-and-frowning_ssjqitw6d_thumbnail-180_01.jpg","isMarketPlaceItem":false,"contributorPortalId":"rB-EiaWTOkqs0wd2g","distributionType":"RS","expirationDate":null,"shouldStockItemHaveNewFlag":false,"shouldStockItemHaveStaffPickFlag":false,"dateAdded":"2021-07-06 08:23:05","contentType":"footage","dateDistributionTypeUpdated":"2021-07-06 12:23:05","isActive":true,"mediumImageUrl":"https:\/\/dm0qx8t0i9gc9.cloudfront.net\/thumbnails\/video\/SuFLR1_Nwkex4zo1y\/videoblocks-young-african-american-lady-suffering-from-strong-abdominal-ache-lying-on-white-bed-wearing-pajamas-and-frowning_ssjqitw6d_thumbnail-360_01.jpg","duration":19,"is360":false,"isTemplate":false,"collapsedSetId":"","numCollapses":null},"stockItemFormats":[{"id":null,"label":"HDMOV","prettyLabel":"HD MOV","filesize":"396.9 MB","filename":"videoblocks-young-african-american-lady-suffering-from-strong-abdominal-ache-lying-on-white-bed-wearing-pajamas-and-frowning_SSjQiTW6d.mov","resolution":"1920 x 1080","fileExtension":"mov","quality":"HD","formatName":"HDMOV","downloadAjaxUrl":"\/video\/download-ajax\/11122603\/HDMOV","downloadUrl":"\/video\/download\/11122603\/HDMOV","exportUrl":"\/video\/cloud-export\/11122603\/HDMOV","frameRate":"25 fps","codec":"prores"},{"id":null,"label":"HDMP4","prettyLabel":"HD MP4","filesize":"3.2 MB","filename":"videoblocks-young-african-american-lady-suffering-from-strong-abdominal-ache-lying-on-white-bed-wearing-pajamas-and-frowning_ssjqitw6d_1080__D.mp4","resolution":"1920 x 1080","fileExtension":"mp4","quality":"HD","formatName":"HDMP4","downloadAjaxUrl":"\/video\/download-ajax\/11122603\/HDMP4","downloadUrl":"\/video\/download\/11122603\/HDMP4","exportUrl":"\/video\/cloud-export\/11122603\/HDMP4","frameRate":"25 fps","codec":"h264"}],"selectedAcquisitionOption":[{"id":null,"label":"HDMOV","prettyLabel":"HD MOV","filesize":"396.9 MB","filename":"videoblocks-young-african-american-lady-suffering-from-strong-abdominal-ache-lying-on-white-bed-wearing-pajamas-and-frowning_SSjQiTW6d.mov","resolution":"1920 x 1080","fileExtension":"mov","quality":"HD","formatName":"HDMOV","downloadAjaxUrl":"\/video\/download-ajax\/11122603\/HDMOV","downloadUrl":"\/video\/download\/11122603\/HDMOV","exportUrl":"\/video\/cloud-export\/11122603\/HDMOV","frameRate":"25 fps","codec":"prores"},{"id":null,"label":"HDMP4","prettyLabel":"HD MP4","filesize":"3.2 MB","filename":"videoblocks-young-african-american-lady-suffering-from-strong-abdominal-ache-lying-on-white-bed-wearing-pajamas-and-frowning_ssjqitw6d_1080__D.mp4","resolution":"1920 x 1080","fileExtension":"mp4","quality":"HD","formatName":"HDMP4","downloadAjaxUrl":"\/video\/download-ajax\/11122603\/HDMP4","downloadUrl":"\/video\/download\/11122603\/HDMP4","exportUrl":"\/video\/cloud-export\/11122603\/HDMP4","frameRate":"25 fps","codec":"h264"}],"isFavorite":false,"topTags":[],"stockItemArtists":[],"moods":[],"genres":[]},{"stockItem":{"id":11126650,"contentClass":"video","contentId":346565766,"assetId":"SBV-346565766","title":"Young african american lady suffering from strong abdominal ache, lying on white bed wearing pajamas and frowning","description":null,"detailsUrl":"\/video\/stock\/young-african-american-lady-suffering-from-strong-abdominal-ache-lying-on-white-bed-wearing-pajamas-and-frowning-sbr7rwkpukr09hxdi","previewUrl":"https:\/\/dm0qx8t0i9gc9.cloudfront.net\/watermarks\/video\/SuFLR1_Nwkex4zo1y\/videoblocks-young-african-american-lady-suffering-from-strong-abdominal-ache-lying-on-white-bed-wearing-pajamas-and-frowning_rbumswkpu__cb16a49f7af5326267be1b675680fc9f__P360.mp4","smallPreviewUrl":"https:\/\/dm0qx8t0i9gc9.cloudfront.net\/watermarks\/video\/SuFLR1_Nwkex4zo1y\/videoblocks-young-african-american-lady-suffering-from-strong-abdominal-ache-lying-on-white-bed-wearing-pajamas-and-frowning_rbumswkpu__cb16a49f7af5326267be1b675680fc9f__P180.mp4","thumbnailUrl":"https:\/\/dm0qx8t0i9gc9.cloudfront.net\/thumbnails\/video\/SuFLR1_Nwkex4zo1y\/videoblocks-young-african-american-lady-suffering-from-strong-abdominal-ache-lying-on-white-bed-wearing-pajamas-and-frowning_rbumswkpu_thumbnail-180_01.jpg","isMarketPlaceItem":false,"contributorPortalId":"SBR7rwKpukr09hxdi","distributionType":"RS","expirationDate":null,"shouldStockItemHaveNewFlag":false,"shouldStockItemHaveStaffPickFlag":false,"dateAdded":"2021-07-12 02:45:58","contentType":"footage","dateDistributionTypeUpdated":"2021-07-12 06:45:58","isActive":true,"mediumImageUrl":"https:\/\/dm0qx8t0i9gc9.cloudfront.net\/thumbnails\/video\/SuFLR1_Nwkex4zo1y\/videoblocks-young-african-american-lady-suffering-from-strong-abdominal-ache-lying-on-white-bed-wearing-pajamas-and-frowning_rbumswkpu_thumbnail-360_01.jpg","duration":19,"is360":false,"isTemplate":false,"collapsedSetId":"","numCollapses":null},"stockItemFormats":[{"id":null,"label":"HDMOV","prettyLabel":"HD MOV","filesize":"396.9 MB","filename":"videoblocks-young-african-american-lady-suffering-from-strong-abdominal-ache-lying-on-white-bed-wearing-pajamas-and-frowning_rBumSwKpu.mov","resolution":"1920 x 1080","fileExtension":"mov","quality":"HD","formatName":"HDMOV","downloadAjaxUrl":"\/video\/download-ajax\/4267085\/HDMOV","downloadUrl":"\/video\/download\/4267085\/HDMOV","exportUrl":"\/video\/cloud-export\/4267085\/HDMOV","frameRate":"25 fps","codec":"mjpeg"},{"id":null,"label":"HDMP4","prettyLabel":"HD MP4","filesize":"6.6 MB","filename":"videoblocks-a-young-sick-man-suffering-from-influenza-measuring-temperature-and-drinking-medicine_rvme1wxmeb_1080__D.mp4","resolution":"1920 x 1080","fileExtension":"mp4","quality":"HD","formatName":"HDMP4","downloadAjaxUrl":"\/video\/download-ajax\/4267085\/HDMP4","downloadUrl":"\/video\/download\/4267085\/HDMP4","exportUrl":"\/video\/cloud-export\/4267085\/HDMP4","frameRate":"25 fps","codec":"h264"}],"isFavorite":false,"topTags":[],"stockItemArtists":[],"moods":[],"genres":[]}],"memberDownloadDate":"","makerHooks":{"makerPreCreateProjectRoute":"https:\/\/maker.storyblocks.com\/edit?videoIds=10852901","makerSignupRoute":"https:\/\/www.storyblocks.com\/join\/become-user\/maker\/edit?videoIds=10852901"},"canDownload":false,"mvtPages":{"premiere-pro-templates":"\/video\/premiere-pro-templates","apple-motion-templates":"\/video\/apple-motion-templates","intro":"\/video\/after-effects-templates\/intros","green-screen":"\/video\/footage\/green-screen","lower-thirds":"\/video\/after-effects-templates\/lower-thirds"},"canExportToCloud":false,"cloudExportSubfolders":[]}};
</script>
I am new in python, bs4
here is the json data want to scrape, from that script defer > var initialState > {"stockItem":{"id":217104, only i want 217104 or "downloadAjaxUrl":"/video/download-ajax/10852901/4KMP4" or more elements. what should i do?
I have tried these attempts:
import requests
from bs4 import BeautifulSoup
URL = 'https://www.storyblocks.com/video/stock/overheated-young-african-american-lady-suffering-from-high-temperature-indoors-belnttknlk6yv56x6'
HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36', 'origin': URL}
page = requests.get(URL, headers=HEADERS)
soup = BeautifulSoup(page.content, features='lxml')
script = soup.select('body > div.wrapper > script:nth-child(6)')
# or
function = soup.find_all('script')[24]
print(script)
print(function)
#attempt 1
f = function[30:] # 30 for check
print(f)
#output
return self.attrs[key]
TypeError: unhashable type: 'slice'
#attempt 2
f = function["stockItem"]
print(f)
#output
return self.attrs[key]
KeyError: 'stockItem'
can anyone please help me?
You'd use regular expression to pull out that json pattern.
import requests
from bs4 import BeautifulSoup
import re
import json
URL = 'https://www.storyblocks.com/video/stock/overheated-young-african-american-lady-suffering-from-high-temperature-indoors-belnttknlk6yv56x6'
HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36', 'origin': URL}
page = requests.get(URL, headers=HEADERS)
soup = BeautifulSoup(page.content, features='lxml')
script = soup.find_all('script', {'defer':''})
# or
function = soup.find_all('script')[24]
jsonStr = re.search('var initialState.*= ({.*})', str(function)).group(1)
jsonData = json.loads(jsonStr)
Output:
stockItem = jsonData['details']['stockItem']
print(stockItem['id'])
10852901

How to extract data using beautiful soup

import requests
from bs4 import BeautifulSoup
import pandas as pd
baseurl='https://locations.atipt.com/'
headers ={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'
}
r =requests.get('https://locations.atipt.com/al')
soup=BeautifulSoup(r.content, 'html.parser')
tra = soup.find_all('ul',class_='list-unstyled')
productlinks=[]
for links in tra:
for link in links.find_all('a',href=True):
comp=baseurl+link['href']
productlinks.append(comp)
for link in productlinks:
r =requests.get(link,headers=headers)
soup=BeautifulSoup(r.content, 'html.parser')
tag=soup.find_all('div',class_='listing content-card')
for pro in tag:
tup=pro.find('a',class_='name').find_all('p')
for i in tup:
print(i.get_text())
I am trying to extract data but they will provide me nothing I try to extract data from the p tagthese is the page in which I try to extract data from p tag check it https://locations.atipt.com/al/alabaster
The working solution so far using css selectors to get data from p tags as follows:
import requests
from bs4 import BeautifulSoup
import pandas as pd
baseurl = 'https://locations.atipt.com/'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'
}
r = requests.get('https://locations.atipt.com/al')
soup = BeautifulSoup(r.content, 'html.parser')
tra = soup.find_all('ul', class_='list-unstyled')
productlinks = []
for links in tra:
for link in links.find_all('a', href=True):
comp = baseurl+link['href']
productlinks.append(comp)
for link in productlinks:
r = requests.get(link, headers=headers)
soup = BeautifulSoup(r.content, 'html.parser')
tag = ''.join([x.get_text(strip=True).replace('\xa0','') for x in soup.select('div.listing.content-card div:nth-child(2)>p')])
print(tag)
Output:
634 1st Street NSte 100Alabaster, AL35007
9256 Parkway ESte ABirmingham, AL352061940 28th Ave SBirmingham, AL352095431 Patrick WaySte 101Birmingham, AL35235833 St. Vincent's DrSte 100Birmingham, AL352051401 Doug Baker BlvdSte 104Birmingham, AL35242
1877 Cherokee Ave SWCullman, AL350551301-A Bridge Creek Dr NECullman, AL35055
1821 Beltline Rd SWSte BDecatur, AL35601
4825 Montgomery HwySte 103Dothan, AL36303
550 Fieldstown RdGardendale, AL35071323 Fieldstown Rd, Ste 105Gardendale, AL35071
2804 John Hawkins PkwySte 104Hoover, AL35244
700 Pelham Rd NorthJacksonville, AL36265
1811 Hwy 78 ESte 108 & 109Jasper, AL35501-4081
76359 AL-77Ste CLincoln, AL35096
1 College DriveStation #14Livingston, AL35470
106 6th Street SouthSte AOneonta, AL35121-1823
50 Commons WaySte DOxford, AL36203
301 Huntley PkwyPelham, AL35124
41 Eminence WaySte BPell City, AL35128
124 W Grand AveSte A-4Rainbow City, AL35906
1147 US-231Ste 9 & 10Troy, AL36081
7201 Happy Hollow RdTrussville, AL35173
100 Rice Mine Road LoopSte 102Tuscaloosa, AL354061451 Dr. Edward Hillard DrSte 130Tuscaloosa, AL35401
3735 Corporate Woods DrSte 109Vestavia, AL35242-2296
636 Montgomery HwyVestavia Hills, AL352161539 Montgomery HwySte 111Vestavia Hills, AL35216

How to grab specific items from entire json response api calls

I want to grab only Symbol and Company Name items from the entire json data but getting
all data. How I can get above mentioned data and store in pandas DataFrame.
Base_url
My code:
import requests
import pandas as pd
params = {
'sectorID': 'All',
'_': '1630217365368'}
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'}
def main(url):
with requests.Session() as req:
req.headers.update(headers)
sym = []
name = []
r = req.get(url, params=params, headers =headers)
for item in r.json()['data']:
print(item)
# sym.append(item['symbol']),
# name.append(item['lonaName'])
# df = pd.DataFrame(sym, name, columns=[["Symble","Company name"]])
# print(df)
main('https://www.saudiexchange.sa/wps/portal/tadawul/market-participants/issuers/issuers-directory/!ut/p/z1/04_Sj9CPykssy0xPLMnMz0vMAfIjo8zi_Tx8nD0MLIy8DTyMXAwczVy9vV2cTY0MnEz1w8EKjIycLQwtTQx8DHzMDYEK3A08A31NjA0CjfWjSNLv7ulnbuAY6OgR5hYWYgzUQpl-AxPi9BvgAI4GhPVHgZXgCwFUBVi8iFcByA9gBXgcWZAbGhoaYZDpma6oCABqndOv/p0/IZ7_NHLCH082KOAG20A6BDUU6K3082=CZ6_NHLCH082K0H2D0A6EKKDC520B5=N/')
you need to fix the way you are creating the dataframe:
import requests
import pandas as pd
params = {
'sectorID': 'All',
'_': '1630217365368'}
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'}
def main(url):
with requests.Session() as req:
req.headers.update(headers)
sym = []
name = []
r = req.get(url, params=params, headers =headers)
for item in r.json()['data']:
# print(item)
sym.append(item['symbol']),
name.append(item['lonaName'])
df = pd.DataFrame({'symbol':sym , 'longName':name})
print(df)
main('https://www.saudiexchange.sa/wps/portal/tadawul/market-participants/issuers/issuers-directory/!ut/p/z1/04_Sj9CPykssy0xPLMnMz0vMAfIjo8zi_Tx8nD0MLIy8DTyMXAwczVy9vV2cTY0MnEz1w8EKjIycLQwtTQx8DHzMDYEK3A08A31NjA0CjfWjSNLv7ulnbuAY6OgR5hYWYgzUQpl-AxPi9BvgAI4GhPVHgZXgCwFUBVi8iFcByA9gBXgcWZAbGhoaYZDpma6oCABqndOv/p0/IZ7_NHLCH082KOAG20A6BDUU6K3082=CZ6_NHLCH082K0H2D0A6EKKDC520B5=N/')
symbol longName
0 1330 Abdullah A. M. Al-Khodari Sons Co.
1 4001 Abdullah Al Othaim Markets Co.
2 4191 Abdullah Saad Mohammed Abo Moati for Bookstore...
3 1820 Abdulmohsen Alhokair Group for Tourism and Dev...
4 2330 Advanced Petrochemical Co.
.. ... ...
199 3020 Yamama Cement Co.
200 3060 Yanbu Cement Co.
201 2290 Yanbu National Petrochemical Co.
202 3007 Zahrat Al Waha for Trading Co.
203 2240 Zamil Industrial Investment Co.
To get all data from the site, you can use their API:
import requests
import pandas as pd
url = "https://www.saudiexchange.sa/tadawul.eportal.theme.helper/TickerServlet"
data = requests.get(url).json()
# print(json.dumps(data, indent=4))
df = pd.json_normalize(data["stockData"])
print(df)
Prints:
pk_rf_company companyShortNameEn companyShortNameAr companyLongNameEn companyLongNameAr highPrice lowPrice noOfTrades previousClosePrice todaysOpen transactionDate turnOver volumeTraded aveTradeSize change changePercent lastTradePrice transactionDateStr
0 4700 Alkhabeer Income الخبير للدخل Al Khabeer Diversified Income Traded Fund صندوق الخبير للدخل المتنوع المتداول None None 308 None None None 1.293560e+06 142791 463.61 0.01 0.11 9.07 None
1 2030 SARCO المصافي Saudi Arabia Refineries Co. شركة المصافي العربية السعودية None None 877 None None None 1.352797e+07 83391 95.09 -0.40 -0.25 162.20 None
2 2222 SAUDI ARAMCO أرامكو السعودية Saudi Arabian Oil Co. شركة الزيت العربية السعودية None None 4054 None None None 6.034732e+07 1731463 427.10 0.05 0.14 34.90 None
...and so on.
To get only symbol/company name:
print(df[["pk_rf_company", "companyLongNameEn"]])
pk_rf_company companyLongNameEn
0 4700 Al Khabeer Diversified Income Traded Fund
1 2030 Saudi Arabia Refineries Co.
2 2222 Saudi Arabian Oil Co.
...and so on.
It will be way faster if you store data in pandas DataFrame and later process it.
Example Code:
import requests
import pandas as pd
params = {
'sectorID': 'All',
'_': '1630217365368'}
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'}
def main(url):
with requests.Session() as req:
req.headers.update(headers)
r = req.get(url, params=params, headers =headers)
data = r.json()['data']
df_main = pd.DataFrame(data)
df_min = df_main.iloc[:, 0:2]
df_min.columns = ['Symbol', 'Company name']
print(df_min)
main('https://www.saudiexchange.sa/wps/portal/tadawul/market-participants/issuers/issuers-directory/!ut/p/z1/04_Sj9CPykssy0xPLMnMz0vMAfIjo8zi_Tx8nD0MLIy8DTyMXAwczVy9vV2cTY0MnEz1w8EKjIycLQwtTQx8DHzMDYEK3A08A31NjA0CjfWjSNLv7ulnbuAY6OgR5hYWYgzUQpl-AxPi9BvgAI4GhPVHgZXgCwFUBVi8iFcByA9gBXgcWZAbGhoaYZDpma6oCABqndOv/p0/IZ7_NHLCH082KOAG20A6BDUU6K3082=CZ6_NHLCH082K0H2D0A6EKKDC520B5=N/')
Output:

How can one use Beautiful Soup to get information from all the sub url's under a certain url?

My use case is trying to get all the emails from suburls like https://blueprint.uchicago.edu/organization/acacouncil under a parent url: https://blueprint.uchicago.edu/organizations.
I know the general form of the email will be xyz#xyz.com, so locating the email for a single url is easy enough. But when it comes to doing that for all the suburls I'm a bit lost.
No sense in using beautifulsoup here as you can fetch the data directly from the api. First you'll need to know how many organizations there are so that you can use that in the query. Then by grabbing the 'WebsiteKey' or the organization id, you can iterate through the api to pull the emails. You can store in a dictionary, table, print out, etc. Not sure what you really want as the output.
import requests
import pandas as pd
url = 'https://blueprint.uchicago.edu/api/discovery/search/organizations'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'}
payload = {
'orderBy[0]': 'UpperName asc',
'top': '',
'filter':'',
'query':'' ,
'skip': '0'}
data = requests.get(url, headers=headers, params=payload).json()
totalCount = data['#odata.count']
payload = {
'orderBy[0]': 'UpperName asc',
'top': '%s' %totalCount,
'filter':'',
'query':'' ,
'skip': '0'}
data = requests.get(url, headers=headers, params=payload).json()
organizations = {}
for each in data['value']:
organizations[each['Name']] = {'id':each['Id'], 'WebsiteKey':each['WebsiteKey']}
emails = {}
for name, each in organizations.items():
websiteKey = each['WebsiteKey']
org_id = each['id']
url = 'https://blueprint.uchicago.edu/api/discovery/organization/bykey/%s' %websiteKey
data = requests.get(url, headers=headers).json()
emails[name] = data['email']
print('%-70s: %s' %(name, data['email']))
df = pd.DataFrame(list(zip(emails.keys(), emails.values())), columns=['Organization','Email'])
df.to_csv('file.csv', index=False)
Output:
{'A Cappella Council': 'uchicagoacappella#gmail.com', 'ACLU University of Chicago Law Chapter': 'dhbabrams#uchicago.edu', 'Active Minds at the University of Chicago': 'activemindsuchicago#gmail.com', 'African and Caribbean Student Association': 'cvleito#uchicago.edu', 'Aikido Kokikai': 'nahmadc#uchicago.edu', 'Alpha Kappa Psi': 'edwardchang#uchicago.edu', 'Alpha Phi Omega': 'uchi.apo.president#gmail.com', 'American Civil Liberties Union at University of Chicago': 'acluboard#lists.uchicago.edu', 'American Constitution Society': 'acs#law.uchicago.edu', 'American Medical Student Association': None, 'American Red Cross of University of Chicago': 'rkhouri#uchicago.edu', 'Amnesty International': 'eckere#uchicago.edu', 'Animal Legal Defense Fund - The University of Chicago Law School': 'ntschepik#uchicago.edu', 'Animal Welfare Society': 'petrucci#uchicago.edu', 'Anthropology Students Association': 'frevelolarotta#uchicago.edu', 'Apsara': 'uchicagoapsara#gmail.com', 'Arab Student Association': 'malakarafa#uchicago.edu', ...}

Categories

Resources