I get the same output in for loop - python

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
import pandas as pd
s=Service("C:\selenium driver\chromedriver.exe")
driver = webdriver.Chrome(service=s)
companies_names = []
persons_names = []
phones_numbers = []
locations = []
opening_hours = []
descriptions = []
websites_links = []
all_profiles = []
driver.get("https://www.saveface.co.uk/search/")
driver.implicitly_wait(10)
blocks = driver.find_elements(By.XPATH, "//div[#class='result clientresult']")
for block in range(30):
company_name = blocks[block].find_element(By.XPATH, "//h3[#class='resulttitle']").text.strip()
companies_names.append(company_name)
person_name = blocks[block].find_element(By.XPATH, "//p[#class='name_wrapper']").text.strip()
persons_names.append(person_name)
phone_number = blocks[block].find_element(By.XPATH, "//div[#class='searchContact phone']").text.strip()
phones_numbers.append(phone_number)
location = blocks[block].find_element(By.XPATH, "//li[#class='cls_loc']").text.strip()
locations.append(location)
opening_hour = blocks[block].find_element(By.XPATH, "//li[#class='opening-hours']").text.strip()
opening_hours.append(opening_hour)
profile = blocks[block].find_element(By.XPATH, "//a[#class='visitpage']").get_attribute("href")
all_profiles.append(profile)
print(company_name, person_name, phone_number, location, opening_hour, profile)
if block == 29:
two_page = driver.find_element(By.XPATH, "//a[#class='facetwp-page']")
two_page.click()
driver.implicitly_wait(10)
blocks = driver.find_elements(By.XPATH, "//div[#class='result clientresult']")
for i in range(len(all_profiles)):
driver.get(all_profiles[i])
description = driver.find_element(By.XPATH, "//div[#class='desc-text-left']").text.strip()
descriptions.append(description)
website_link = driver.find_element(By.XPATH, "//a[#class='visitwebsite website']").get_attribute("href")
websites_links.append(website_link)
driver.implicitly_wait(10)
driver.close()
df = pd.DataFrame(
{
"company_name": companies_names,
"person_name": persons_names,
"phone_number": phones_numbers,
"location": locations,
"opening_hour": opening_hours,
"description": descriptions,
"website_link": websites_links,
"profile_on_saveface": all_profiles
}
)
df.to_csv('saveface.csv',index=False)
#print(df)
This is the result:
The Hartley Clinic Clinic Contact: Ailing Jeavons 01256 856289 , , Fleet, RG27 8NZ Monday 8:30 — 17:00 Tuesday 8:30 — 19:00 Wednesday 8:30— 17:00 Thursday 8:30 — 17:00 Friday 8:30 — 15:00 Saturday 9:00 — 17:00 Sunday Closed https://www.saveface.co.uk/clinic/the-hartley-clinic/
The Hartley Clinic Clinic Contact: Ailing Jeavons 01256 856289 , , Fleet, RG27 8NZ Monday 8:30 — 17:00 Tuesday 8:30 — 19:00 Wednesday 8:30— 17:00 Thursday 8:30 — 17:00 Friday 8:30 — 15:00 Saturday 9:00 — 17:00 Sunday Closed https://www.saveface.co.uk/clinic/the-hartley-clinic/
The Hartley Clinic Clinic Contact: Ailing Jeavons 01256 856289 , , Fleet, RG27 8NZ Monday 8:30 — 17:00 Tuesday 8:30 — 19:00 Wednesday 8:30— 17:00 Thursday 8:30 — 17:00 Friday 8:30 — 15:00 Saturday 9:00 — 17:00 Sunday Closed https://www.saveface.co.uk/clinic/the-hartley-clinic/
The Hartley Clinic Clinic Contact: Ailing Jeavons 01256 856289 , , Fleet, RG27 8NZ Monday 8:30 — 17:00 Tuesday 8:30 — 19:00 Wednesday 8:30— 17:00 Thursday 8:30 — 17:00 Friday 8:30 — 15:00 Saturday 9:00 — 17:00 Sunday Closed https://www.saveface.co.uk/clinic/the-hartley-clinic/
The Hartley Clinic Clinic Contact: Ailing Jeavons 01256 856289 , , Fleet, RG27 8NZ Monday 8:30 — 17:00 Tuesday 8:30 — 19:00 Wednesday 8:30— 17:00 Thursday 8:30 — 17:00 Friday 8:30 — 15:00 Saturday 9:00 — 17:00 Sunday Closed https://www.saveface.co.uk/clinic/the-hartley-clinic/
The Hartley Clinic Clinic Contact: Ailing Jeavons 01256 856289 , , Fleet, RG27 8NZ Monday 8:30 — 17:00 Tuesday 8:30 — 19:00 Wednesday 8:30— 17:00 Thursday 8:30 — 17:00 Friday 8:30 — 15:00 Saturday 9:00 — 17:00 Sunday Closed https://www.saveface.co.uk/clinic/the-hartley-clinic/

To restric the search within a subtree rooted at the context node, your expression should start with .// so you have to replace // with .// in each of the commands
... = blocks[block].find_element(...)
The meaning of // is to search the document from the document's root, ignoring the context node blocks[block] altogether.
Moreover, notice that not all the blocks have a location as you can see from this image
in this case
location = blocks[block].find_element(By.XPATH, "//li[#class='cls_loc']")
will raise a NoSuchElementException. To avoid this you have to put the command in a try...except... block
UPDATE
Scraping 400 blocks with selenium takes about 1 minute on my computer, I tried with BeautifulSoup and it just takes less than 1 second! The slow part is to scrape the profiles, because for each of them we have to download a new webpage, however is still way faster with BeautifulSoup.
So I write a script without using selenium, just BeautifulSoup (you can install by running pip install beautifulsoup4 in the terminal)
import requests
from bs4 import BeautifulSoup
url = 'https://www.saveface.co.uk/search/'
soup = BeautifulSoup(requests.get(url).text, "html.parser")
css_selector = {
'company name' : ".title",
'person name' : ".name_wrapper",
'phone number' : ".phone",
'location' : ".cls_loc",
'opening hours': ".opening-hours",
'profile link' : ".visitpage",
}
data = {key:[] for key in list(css_selector)+['description','website link']}
number_of_pages = int(str(soup).split('total_pages":')[1].split('}')[0])
for page in range(2,number_of_pages+2):
blocks = soup.select('.clientresult')
for idx,block in enumerate(blocks):
print(f'blocks {idx+1}/{len(blocks)}',end='\r')
for key in list(css_selector):
try:
if 'link' in key:
data[key] += [ block.select_one(css_selector[key])['href'] ]
else:
data[key] += [ block.select_one(css_selector[key]).text.strip().replace('\r\n',', ') ]
except AttributeError:
data[key] += ['*missing value*']
if page <= number_of_pages:
print('\nloading page', page)
url_page = f'{url}?fwp_paged={page}'
soup = BeautifulSoup(requests.get(url_page).text, "html.parser")
print('\nno more pages to load, moving to scrape profile links...')
for idx,url in enumerate(data['profile link']):
print(f"profile link {idx+1}/{len(data['profile link'])} ",end='\r')
soup_profile = BeautifulSoup(requests.get(url).text, "html.parser")
try:
data['description'] += [soup_profile.select_one('.clinicContent > .description').text.strip()]
except AttributeError:
data['description'] += ['*missing value*']
try:
data['website link'] += [soup_profile.select_one('.visitwebsite')['href']]
except AttributeError:
data['website link'] += ['*missing value*']
Output (it took about 8 minutes to complete the execution)
blocks 400/400
loading page 2
blocks 109/109
no more pages to load, moving to scrape profile links...
profile link 509/509
Then you can easily create the dataframe by running pd.DataFrame(data)

this is the new code
but it returns the same output on every page why:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
import pandas as pd
s=Service("C:\selenium driver\chromedriver.exe")
driver = webdriver.Chrome(service=s)
companies_names = []
persons_names = []
phones_numbers = []
locations = []
opening_hours = []
descriptions = []
websites_links = []
all_profiles = []
driver.get("https://www.saveface.co.uk/search/")
driver.implicitly_wait(10)
pages = driver.find_elements(By.XPATH, ".//a[#class='facetwp-page']")
for page in range(len(pages)+1):
blocks = driver.find_elements(By.XPATH, ".//div[#class='result clientresult']")
for block in range(10):
try:
company_name = blocks[block].find_element(By.XPATH, ".//h3[#class='resulttitle']").text.strip()
companies_names.append(company_name)
except:
companies_names.append("Not found on the site")
try:
person_name = blocks[block].find_element(By.XPATH, ".//p[#class='name_wrapper']").text.strip()
persons_names.append(person_name)
except:
persons_names.append("Not found on the site")
try:
phone_number = blocks[block].find_element(By.XPATH, ".//div[#class='searchContact phone']").text.strip()
phones_numbers.append(phone_number)
except:
phones_numbers.append("Not found on the site")
try:
location = blocks[block].find_element(By.XPATH, ".//li[#class='cls_loc']").text.strip()
locations.append(location)
except:
locations.append("Not found on the site")
try:
opening_hour = blocks[block].find_element(By.XPATH, ".//li[#class='opening-hours']").text.strip()
opening_hours.append(opening_hour)
except:
opening_hours.append("Not found on the site")
try:
profile = blocks[block].find_element(By.XPATH, ".//a[#class='visitpage']").get_attribute("href")
all_profiles.append(profile)
except:
all_profiles.append("Not found on the site")
two_page = driver.find_element(By.XPATH, ".//a[#class='facetwp-page']")
two_page.click()
for i in range(len(all_profiles)):
try:
driver.get(all_profiles[i])
driver.implicitly_wait(10)
try:
description = driver.find_element(By.XPATH, ".//div[#class='desc-text-left']").text.strip()
descriptions.append(description)
except:
descriptions.append("Not found on the site")
try:
website_link = driver.find_element(By.XPATH, ".//a[#class='visitwebsite website']").get_attribute("href")
websites_links.append(website_link)
except:
websites_links.append("Not found on the site")
except:
descriptions.append("Not found on the site")
websites_links.append("Not found on the site")
driver.implicitly_wait(10)
driver.close()
df = pd.DataFrame(
{
"company_name": companies_names,
"person_name": persons_names,
"phone_number": phones_numbers,
"location": locations,
"opening_hour": opening_hours,
"description": descriptions,
"website_link": websites_links,
"profile_on_saveface": all_profiles
}
)
df.to_csv('saveface.csv',index=False)
print(df)

Related

How do you web-scrape past a "show more" button using BeautifulSoup Python?

I am using BeautifulSoup on python to scrape football statistics from this website: https://www.skysports.com/premier-league-results/2020-21. Yet the site only shows the first 200 games of the season and the rest of the 180 games are behind a "show more" button. The button does not change the url so I can't just replace the url.
This is my code:
from bs4 import BeautifulSoup
import requests
scores_html_text = requests.get('https://www.skysports.com/premier-league-results/2020-21').text
scores_soup = BeautifulSoup(scores_html_text, 'lxml')
fixtures = scores_soup.find_all('div', class_ = 'fixres__item')
This only gets the first 200 fixtures.
How would I access the html past the show more button?
The hidden results are inside <script> tag, so to get all 380 results you need to parse it additionally:
import requests
import pandas as pd
from bs4 import BeautifulSoup
url = "https://www.skysports.com/premier-league-results/2020-21"
soup = BeautifulSoup(requests.get(url).content, "html.parser")
script = soup.select_one('[type="text/show-more"]')
script.replace_with(BeautifulSoup(script.contents[0], "html.parser"))
all_data = []
for item in soup.select(".fixres__item"):
all_data.append(item.get_text(strip=True, separator="|").split("|")[:5])
all_data[-1].append(
item.find_previous(class_="fixres__header2").get_text(strip=True)
)
df = pd.DataFrame(
all_data, columns=["Team 1", "Score 1", "Score 2", "Time", "Team 2", "Date"]
)
print(df)
df.to_csv("data.csv", index=False)
Prints:
Team 1 Score 1 Score 2 Time Team 2 Date
0 Arsenal 2 0 16:00 Brighton and Hove Albion Sunday 23rd May
1 Aston Villa 2 1 16:00 Chelsea Sunday 23rd May
2 Fulham 0 2 16:00 Newcastle United Sunday 23rd May
3 Leeds United 3 1 16:00 West Bromwich Albion Sunday 23rd May
...
377 Crystal Palace 1 0 15:00 Southampton Saturday 12th September
378 Liverpool 4 3 17:30 Leeds United Saturday 12th September
379 West Ham United 0 2 20:00 Newcastle United Saturday 12th September
and saves data.csv (screenshot from LibreOffice):
I am not aware of how to do this with BeautifulSoup, but this is how I would do it using Selenium (note that I am very new to Selenium, so there are probably better ways of doing this).
The imports used are:
from selenium import webdriver
import time
You will also need to download the Chrome webdriver (assuming that you are on Chrome), and place it in the same directory as your script, or in your library path.
There will be a cookies popup which you have to workaround:
# prepare the driver
URL = "https://www.skysports.com/premier-league-results/2020-21"
driver = webdriver.Chrome()
driver.get(URL)
# wait so that driver has loaded before we look for the cookies popup
time.sleep(2)
# accept cookies popup, which occurs in an iframe
# begin by locating iframe
frame = driver.find_element_by_id('sp_message_iframe_533903')
# find the accept button (inspect element and copy Xpath of button)
driver.find_element_by_xpath('//*[#id="notice"]/div[3]/button[1]').click()
time.sleep(2)
driver.refresh()
# find "show more text" button and click
driver.find_element_by_class_name("plus-more__text").click()
i tried to go up a few levels and this worked , u might need to process it a wee bit more.
from bs4 import BeautifulSoup
import requests
scores_html_text = requests.get('https://www.skysports.com/premier-league-results/2020-21').text
scores_soup = BeautifulSoup(scores_html_text,'lxml')
fixtures = scores_soup.find(class_ = 'site-layout-secondary block page-nav__offset grid')
print(fixtures)

website quits before I can parse anything

Background:
Cannot parse all the li's within the <ul class="cmn-list"> using selenium.
Code:
url= "https://www.eslcafe.com/jobs/international?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60"
chrome_options = webdriver.ChromeOptions()
preferences = {"safebrowsing.enabled": "false"}
chrome_options.add_experimental_option("prefs", preferences)
# chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
browser = webdriver.Chrome('C:/chromedriver.exe', chrome_options=chrome_options)
print(url)
browser.get(url)
delay = 20 # seconds
try:
WebDriverWait(browser, delay)
except:
pass
html_list = browser.find_element_by_class_name("cmn-list")
items = html_list.find_elements_by_tag_name("li")
for item in items:
text = item.text
print(text)
Question:
How can I parse the rows lis in the link <ul class="cmn-list"> with selenium?
There are multiple ul tags with the same class_name. Using browser.find_element_by_class_name('cmn-list') will only select the first ul tag with this class_name, not the ul tag that you want. In order to get the ul tag that you want, I recommend you to use xpaths. Here is the full code to do it:
from selenium import webdriver
import time
def printDetails(items, sponsored):
if sponsored == True:
print('-'*120)
print("Sponsored")
else:
print('-' * 120)
print("Others")
for item in items:
link = item.find_element_by_xpath('.//a').get_attribute('href')
title = item.find_element_by_xpath('.//a').text
company = item.find_element_by_class_name('job-title').find_element_by_xpath('.//p').text
date_time = item.find_element_by_xpath('.//div[#class="job-post-time ng-binding"]').text.split("\n")
datee = date_time[0]
timee = date_time[1]
print('-' * 120)
print(f"Job Title = {title}")
print(f"Link = {link}")
print(f"Company = {company}")
print(f"Date = {datee}")
print(f"Time = {timee}")
url= "https://www.eslcafe.com/jobs/international?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60"
chrome_options = webdriver.ChromeOptions()
preferences = {"safebrowsing.enabled": "false"}
chrome_options.add_experimental_option("prefs", preferences)
# chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
browser = webdriver.Chrome('chromedriver.exe', options=chrome_options)
print(url)
browser.get(url)
delay = 20 # seconds
try:
WebDriverWait(browser, delay)
except:
pass
time.sleep(3)
sponsored = browser.find_element_by_xpath('//*[#id="mid-wrapper"]/div/section[2]/div/div[1]/div[3]/ul')
sponsored_items = sponsored.find_elements_by_class_name('ng-scope')
html_list = browser.find_element_by_xpath('//*[#id="mid-wrapper"]/div/section[2]/div/div[1]/div[4]/ul')
items = html_list.find_elements_by_class_name('ng-scope')
printDetails(sponsored_items, sponsored = True)
printDetails(items, sponsored = False)
browser.close()
Output:
https://www.eslcafe.com/jobs/international?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
------------------------------------------------------------------------------------------------------------------------
Sponsored
------------------------------------------------------------------------------------------------------------------------
Job Title = Native-speaking English Teacher | Taiwan (NT$620 - NT$660 per hour)
Link = https://www.eslcafe.com/postajob-detail/native-speaking-english-teacher-nst?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = HESS International Educational Group
Date = Apr. 20, 2020
Time = 07:39 pm PST
------------------------------------------------------------------------------------------------------------------------
Others
------------------------------------------------------------------------------------------------------------------------
Job Title = University Teaching in Japan! – Tokyo, Kanagawa, Chiba, Saitama, and Aichi
Link = https://www.eslcafe.com/postajob-detail/university-teaching-in-japan---tokyo-kanagawa-37?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Westgate Corporation
Date = Oct. 23, 2020
Time = 09:22 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Elementary/Secondary School Teaching in Japan! - Tokyo, Kanagawa, and Aichi
Link = https://www.eslcafe.com/postajob-detail/elementarysecondary-school-teaching-in-japan-8?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Westgate Corporation
Date = Oct. 23, 2020
Time = 09:22 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Seeking online English Tutor - Up to $26USD/h - Work from home! Choose your own hours!
Link = https://www.eslcafe.com/postajob-detail/seeking-online-english-tutor---up-to-26usdh--?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Magic Ears
Date = Oct. 23, 2020
Time = 09:20 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = English Language Lectutrer in Oman for SY 2020
Link = https://www.eslcafe.com/postajob-detail/english-language-lectutrer-in-oman-for-sy-202?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = TATI Oman
Date = Oct. 22, 2020
Time = 11:06 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = ⭐$2000/month, 3-5 hrs per day⭐, Teach English Online with GOGOKID!
Link = https://www.eslcafe.com/postajob-detail/2000month-3-5-hrs-per-day-teach-english-onlin?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = GOGOKID
Date = Oct. 22, 2020
Time = 11:06 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = [Bachelor's Required]Part-Time Online ESL Teacher - Work from home - Flexible Job!
Link = https://www.eslcafe.com/postajob-detail/bachelors-requiredpart-time-online-esl-teache?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Magic Ears
Date = Oct. 22, 2020
Time = 11:05 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = ★★★TRAVEL ABROAD & TEACH IN THAILAND with BFITS THAILAND (Term 2 November 2020)★★★
Link = https://www.eslcafe.com/postajob-detail/travel-abroad-teach-in-thailand-with-bfits-th-22?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = BFITS Thailand
Date = Oct. 22, 2020
Time = 11:05 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Full-time In-house Academic Editor Wanted in Taipei, Taiwan
Link = https://www.eslcafe.com/postajob-detail/full-time-in-house-academic-editor-wanted-in-6?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Wallace Academic Editing
Date = Oct. 21, 2020
Time = 01:44 pm PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Online English Tutor
Link = https://www.eslcafe.com/postajob-detail/online-english-tutor?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Panda ABC
Date = Oct. 21, 2020
Time = 01:41 pm PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Native Speaker Teacher - Changhua, Taiwan
Link = https://www.eslcafe.com/postajob-detail/native-speaker-teacher?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Leader Language Schools
Date = Oct. 21, 2020
Time = 01:24 pm PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Teachers Needed in Fiji - Pacific American School
Link = https://www.eslcafe.com/postajob-detail/fiji-pacific-american-school?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Pacific American School
Date = Oct. 21, 2020
Time = 10:44 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Online ESL tutor wanted! Teach Korean students online. (CNK English)
Link = https://www.eslcafe.com/postajob-detail/online-esl-tutor-wanted-teach-korean-students-1?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = CNK English
Date = Oct. 20, 2020
Time = 08:15 pm PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Primary Section (Class Teacher for grades 2-3) - Dushanbe, Tajikistan
Link = https://www.eslcafe.com/postajob-detail/primary-section-class-teacher-for-grades-2-3?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Dushanbe International School
Date = Oct. 20, 2020
Time = 08:42 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Native English Teacher needed for private classes - Kuala Lumpur, Malaysia)
Link = https://www.eslcafe.com/postajob-detail/english-teacher-needed-for-private-classes-ku?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = BLC
Date = Oct. 20, 2020
Time = 08:40 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = English Language Center Lecturer - Taiwan
Link = https://www.eslcafe.com/postajob-detail/english-language-center-lecturer?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Tunghai University
Date = Oct. 20, 2020
Time = 08:39 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = 【⭐GOGOKID offers candidate incentive again⭐】Teach English Online
Link = https://www.eslcafe.com/postajob-detail/extra-bonus-30-for-on-boardteach-english-onli?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = GOGOKID
Date = Oct. 19, 2020
Time = 08:46 pm PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Looking for Online ESL Teacher!!!
Link = https://www.eslcafe.com/postajob-detail/looking-for-online-esl-teacher-1?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = First Future
Date = Oct. 19, 2020
Time = 09:23 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Online English Teacher
Link = https://www.eslcafe.com/postajob-detail/online-english-teacher-7?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Whales English
Date = Oct. 19, 2020
Time = 09:23 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = 🇪🇺 🇵🇱 Teach English in Poland with English Wizards! 🇵🇱 🇪🇺
Link = https://www.eslcafe.com/postajob-detail/teach-english-in-poland-with-english-wizards?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = English Wizards
Date = Oct. 19, 2020
Time = 09:22 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = US Certified Science Teacher - Tirane, Albania
Link = https://www.eslcafe.com/postajob-detail/us-certified-science-teacher?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Albanian International School
Date = Oct. 19, 2020
Time = 09:22 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Fantastic teaching jobs around Taiwan, hiring single and couples ASAP
Link = https://www.eslcafe.com/postajob-detail/fantastic-teaching-jobs-around-taiwan-hiring?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = ESLJOBTAIWAN
Date = Oct. 19, 2020
Time = 09:21 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = 🛫 🌞 🌄 Become a Mentor for Language Learners on Ski Camps - Free Hotel Stays in Europe 🛫 🌞 🌄
Link = https://www.eslcafe.com/postajob-detail/become-a-mentor-for-language-learners-on-ski?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Angloville
Date = Oct. 19, 2020
Time = 09:19 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = 【⭐Extra Bonus-First come first served】Online English Tutor-Earn up to $25/hr
Link = https://www.eslcafe.com/postajob-detail/extra-bonus-first-come-first-servedonline-eng?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = GOGOKID
Date = Oct. 19, 2020
Time = 09:19 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = University Teaching in Japan! – Tokyo, Kanagawa, Chiba, Saitama, and Aichi
Link = https://www.eslcafe.com/postajob-detail/university-teaching-in-japan---tokyo-kanagawa-36?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Westgate Corporation
Date = Oct. 19, 2020
Time = 09:18 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Elementary/Secondary School Teaching in Japan! - Tokyo, Kanagawa, and Aichi
Link = https://www.eslcafe.com/postajob-detail/elementarysecondary-school-teaching-in-japan-7?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Westgate Corporation
Date = Oct. 19, 2020
Time = 09:18 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Online ESL Tutor - No minimum teaching requirements - $26/hr part-time job
Link = https://www.eslcafe.com/postajob-detail/online-esl-tutor---no-minimum-teaching-requir?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Magic Ears
Date = Oct. 19, 2020
Time = 09:17 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = 💜💛💙 SUNNY SPAIN, MARVELOUS MADRID & an EXCITING, LOVELY LIFE with the Canterbury English TEFL & Madrid Lifestyle (for TEFL holders) Programs&Guaranteed Teaching Job for all students WITH US (that's the key), which starts during the Course! 💜💛💛
Link = https://www.eslcafe.com/postajob-detail/128156128155128153-sunny-spain-marvelous-madr-30?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Canterbury English
Date = Oct. 18, 2020
Time = 09:36 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Math Teacher - Hargeisa, Somaliland
Link = https://www.eslcafe.com/postajob-detail/math-teacher-2?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Abaarso School of Science & Technology
Date = Oct. 17, 2020
Time = 10:56 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Math/English Teacher - Hargeisa, Somaliland
Link = https://www.eslcafe.com/postajob-detail/mathenglish-teacher?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Barwaaqo Univeristy
Date = Oct. 17, 2020
Time = 10:46 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = TAIWAN! Teach English at schools throughout the beautiful island of TAIWAN - $2,200 USD per month. Taipei, Tainan, Kaohsiung, Taichung, Keelung, PingDong. Summer 2020 graduates welcome.
Link = https://www.eslcafe.com/postajob-detail/taiwan-teach-english-at-schools-throughout-th-35?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Arun Language Training & Recruitment Ltd
Date = Oct. 17, 2020
Time = 10:18 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Online Technical Copywriter
Link = https://www.eslcafe.com/postajob-detail/technical-copywriter?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Lingvoexpert
Date = Oct. 16, 2020
Time = 10:21 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = ★★★LIVE ABROAD & TEACH IN THAILAND with BFITS THAILAND (Term 2 November 2020)★★★
Link = https://www.eslcafe.com/postajob-detail/live-abroad-teach-in-thailand-with-bfits-thai-3?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = BFITS Thailand
Date = Oct. 16, 2020
Time = 10:18 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = US $2500-5000/M + PU Letter+ Teach in China + International & Public School + Training Center + IB + AP + A-level + Social Science, Math, Physics, Chemistry + All Regular Subjects
Link = https://www.eslcafe.com/postajob-detail/apiba-levelmathsciencechemistryphysicscompute?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Can-Achieve Global Talent Inc.
Date = Jul. 21, 2020
Time = 07:13 pm PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Head of Primary and Head of Secondary required ASAP - Iraq- Erbil
Link = https://www.eslcafe.com/postajob-detail/head-of-primary-and-head-of-secondary-require?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = British International School/Iraq-Kurdistan- Erbil
Date = Oct. 15, 2020
Time = 09:35 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Full Time English Teacher in Ehime, JAPAN
Link = https://www.eslcafe.com/postajob-detail/full-time-english-teacher-in-ehime-japan?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Amic International Inc.
Date = Oct. 15, 2020
Time = 09:34 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = 【⭐Dave's Recommendation】Online English Tutor-Earn up to $25/hr
Link = https://www.eslcafe.com/postajob-detail/daves-recommendationonline-english-tutor-earn?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = GOGOKID
Date = Oct. 15, 2020
Time = 09:32 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Teaching English with Magic Ears! - Work from home - Uni students are also acceptable!
Link = https://www.eslcafe.com/postajob-detail/teaching-english-with-magic-ears---work-from?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Magic Ears
Date = Oct. 15, 2020
Time = 09:30 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Teach in Taiwan
Link = https://www.eslcafe.com/postajob-detail/teach-in-taiwan-1?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Neurolink English Academy
Date = Oct. 14, 2020
Time = 09:29 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Japan: Teaching English to children!
Link = https://www.eslcafe.com/postajob-detail/japan-teaching-english-to-children?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Tamaki TEFL Recruitment (TTR)
Date = Oct. 14, 2020
Time = 09:26 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = ESL Instructors Needed for Community Education Courses Baghdad, Iraq
Link = https://www.eslcafe.com/postajob-detail/esl-instructors-needed-for-community-educatio?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = English Language Academy
Date = Oct. 14, 2020
Time = 09:25 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Online English Teacher - Up to $26/hr - With no minimum teaching requirements!
Link = https://www.eslcafe.com/postajob-detail/online-english-teacher---up-to-26hr---with-no?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Magic Ears
Date = Oct. 14, 2020
Time = 09:23 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Worldwide ESL/EFL Projects for the U.S. Department of State in 2021/2022
Link = https://www.eslcafe.com/postajob-detail/worldwide-eslefl-projects-for-the-us-departme-13?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = U.S. Department of State English Language Programs
Date = Oct. 13, 2020
Time = 01:38 pm PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Biggest ESL School in Vietnam - NOW Hiring Teachers
Link = https://www.eslcafe.com/postajob-detail/biggest-esl-school-in-vietnam---now-hiring-te-7?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = APAX English
Date = Oct. 13, 2020
Time = 10:13 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = EXPERIENCED EFL TEACHER NEEDED AT NORTHSTAR COLLEGE, Hargeisa, Somaliland
Link = https://www.eslcafe.com/postajob-detail/experienced-efl-teacher-needed-at-northstar-c?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Northstar College
Date = Oct. 13, 2020
Time = 10:12 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Full time English Teacher - Kanazawa, Japan
Link = https://www.eslcafe.com/postajob-detail/full-time-english-teacher-8?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Bartolo English
Date = Oct. 13, 2020
Time = 09:09 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = A good choice for ESL teachers! Teaching English online for Chinese kids - Earn up to $26/hr
Link = https://www.eslcafe.com/postajob-detail/a-good-choice-for-esl-teachers-teaching-engli?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Magic Ears
Date = Oct. 13, 2020
Time = 09:07 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = ⭐Attention⭐Online Teaching Position offers up to $25/hr
Link = https://www.eslcafe.com/postajob-detail/online-english-tutor-earn-up-to-25hr?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = GOGOKID
Date = Oct. 13, 2020
Time = 09:06 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = ⭐⭐⭐⭐ESL Teaching Positions Available in Taiwan NOW ⭐⭐⭐⭐
Link = https://www.eslcafe.com/postajob-detail/esl-teaching-positions-available-in-taiwan-no-6?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Teach Taiwan
Date = Oct. 12, 2020
Time = 11:38 pm PST
------------------------------------------------------------------------------------------------------------------------
...
Job Title = Work in Japan
Link = https://www.eslcafe.com/postajob-detail/work-in-japan?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Omni International
Date = Oct. 08, 2020
Time = 10:21 am PST
The answer above waits 3 seconds assuming your internet will take less than 3 seconds to load, but if you'd like to wait until there is a certain text in your page you can do:
url= "https://www.eslcafe.com/jobs/international?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60"
chrome_options = webdriver.ChromeOptions()
preferences = {"safebrowsing.enabled": "false"}
chrome_options.add_experimental_option("prefs", preferences)
# chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
browser = webdriver.Chrome('C:/chromedriver.exe', chrome_options=chrome_options)
print(url)
browser.get(url)
displayTimer = 0
wanted_Phrase = "li" # You should put some text that is only on the page when it has loaded.
while wanted_Phrase not in browser.page_source:
sleep(1)
displayTimer += 1
print("[{}] Seconds waited for page".format(displayTimer))
html_list = browser.find_element_by_class_name("cmn-list")
items = html_list.find_elements_by_tag_name("li")
for item in items:
text = item.text
print(text)

How to transfer bs4.element.ResultSet to date/string?

I want to extract date and summary of an article in a website, here is my code
from bs4 import BeautifulSoup
from selenium import webdriver
full_url = 'https://www.wsj.com/articles/readers-favorite-summer-recipes-11599238648?mod=searchresults&page=1&pos=20'
url0 = full_url
browser0 = webdriver.Chrome('C:/Users/liuzh/Downloads/chromedriver_win32/chromedriver')
browser0.get(url0)
html0 = browser0.page_source
page_soup = BeautifulSoup(html0, 'html5lib')
date = page_soup.find_all("time", class_="timestamp article__timestamp flexbox__flex--1")
sub_head = page_soup.find_all("h2", class_="sub-head")
print(date)
print(sub_head)
I got the following result, how can I obtain the standard form ?(e.g. Sept. 4, 2020 12:57 pm ET; This Labor Day weekend, we’re...)
[<time class="timestamp article__timestamp flexbox__flex--1">
Sept. 4, 2020 12:57 pm ET
</time>]
[<h2 class="sub-head" itemprop="description">This Labor Day weekend, we’re savoring the last of summer with a collection of seasonal recipes shared by Wall Street Journal readers. Each one comes with a story about what this food means to a family and why they return to it each year.</h2>]
Thanks.
Try something like:
for d in date:
print(d.text.strip())
Given your sample html, output should be:
Sept. 4, 2020 12:57 pm ET

None output while parsing inside a class

I'm kinda new to python and i'm trying to parse https://rustavi2.ge/ka/schedule <-- website using the following code,the content might be on georgian but i dont think it matters.
When you open the page you will see 07:15 ანიმაცია "სონიკ ბუმი" <- text in front.via inspect i can see the elements tag and class also but the following code returns only None.I know i'm doing something terribly wrong but cant really figure it out.
import requests
from bs4 import BeautifulSoup
r = requests.get('https://rustavi2.ge/ka/schedule')
c = r.content
soup = BeautifulSoup(c,'html.parser')
a = soup.find("div", {"class": "sch_cont"}).find("div",{"class": "bade_line"})
print((a).encode("utf-8"))
The data is loaded via Ajax from external site:
import requests
import urllib3
requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS = 'ALL:#SECLEVEL=1'
from bs4 import BeautifulSoup
url = 'https://rustavi2.ge/includes/bade_ajax.php?dt=2020-08-17&lang=ka'
soup = BeautifulSoup(requests.get(url).content, 'html.parser')
for tm, title in zip(soup.select('.b_time'), soup.select('.b_title')):
print(tm.text, title.text)
Prints:
07:15 ანიმაცია "სონიკ ბუმი"
08:00 მხ/ფილმი
10:00 კურიერი
10:15 სერიალი "ქალური ბედნიერება"
12:00 კურიერი
12:30 სერიალი "ქალური ბედნიერება"
13:55 სერიალი "მე ვიცი რა გელის"
15:00 კურიერი
15:50 დღის კურიერი
16:30 სერიალი "უცხო მშობლიურ მხარეში"
18:00 კურიერი
18:50 სერიალი "მარიამ მაგდალინელი"
20:30 ლოტო
20:40 სერიალი "მარიამ მაგდალინელი"
21:00 კურიერი
22:00 ფარული კონვერტი
23:00 გააცინე და მოიგე
00:00 სერიალი "სენდიტონი"
00:30 მხ/ფილმი
01:00 მხ/ფილმი
03:30 კურიერის დაიჯესტი
04:00 სერიალი "ქალური ბედნიერება"
05:00 სერიალი "უცხო მშობლიურ მხარეში"

BeautifulSoup text between <a></a> not being returned

I am trying to parse an ESPN webpage to get the date, time, and teams playing in each NFL game for a given week using BeautifulSoup. I am able to get most of the information, however, I am having trouble with the time information.
For some reason, the text between the a tag is not being returned.
The html for one of the a tags is:
<a data-dateformat="time1" name="&lpos=nfl:schedule:time" href="/nfl/game?gameId=400874572">12:00 PM</a>
I am looking to get the "12:00 PM" in between the a tags, but instead I get:
<a data-dateformat="time1" href="/nfl/game?gameId=400874572" name="&lpos=nfl:schedule:time"></a>
which doesn't have any text in between the tags.
Here is what I have used to parse the webpage.
import urllib2
from bs4 import BeautifulSoup
def parse_nfl_schedule_espn():
schedule = BeautifulSoup(urllib2.urlopen("http://www.espn.com/nfl/schedule/_/week/10").read(), "lxml")
for date in schedule.find_all('h2'):
#separate by game
game_info = date.nextSibling.find_all('tr')
date = str(date).split(">")
date = date[1].split("<")
date = date[0]
#print date
for i in range(len(game_info)):
#separate each part of game row
value = game_info[i].find_all('td')
#iterate over <thead>
if len(value) > 1:
#away team abv
away = str(value[0].find('abbr')).split(">")
away = away[1].split("<")
away = away[0]
#home team abv
home = str(value[1].find('abbr')).split(">")
home = home[1].split("<")
home = home[0]
time = value[2].find_all('a')
print time
#print "%s at %s" % (away, home)
if __name__ == "__main__":
parse_nfl_schedule_espn()
Any help/suggestions would be much appreciated.
You will need to use something like Selenium to get the HTML. This would then allow the browser to run any Javascript. This can be done as follows:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
def parse_nfl_schedule_espn():
browser = webdriver.Firefox(firefox_binary=FirefoxBinary())
browser.get("http://www.espn.com/nfl/schedule/_/week/10")
schedule = BeautifulSoup(browser.page_source, "lxml")
for date in schedule.find_all('a', attrs={'data-dateformat' : "time1"}):
print date.text
if __name__ == "__main__":
parse_nfl_schedule_espn()
Which would display the following:
6:00 PM
6:00 PM
6:00 PM
6:00 PM
6:00 PM
6:00 PM
6:00 PM
6:00 PM
9:05 PM
9:25 PM
9:25 PM
1:30 AM
1:30 AM
You could also investigate "headless" solutions such as PhantomJS to avoid having to see a browser window being displayed.

Categories

Resources