I am using BeautifulSoup on python to scrape football statistics from this website: https://www.skysports.com/premier-league-results/2020-21. Yet the site only shows the first 200 games of the season and the rest of the 180 games are behind a "show more" button. The button does not change the url so I can't just replace the url.
This is my code:
from bs4 import BeautifulSoup
import requests
scores_html_text = requests.get('https://www.skysports.com/premier-league-results/2020-21').text
scores_soup = BeautifulSoup(scores_html_text, 'lxml')
fixtures = scores_soup.find_all('div', class_ = 'fixres__item')
This only gets the first 200 fixtures.
How would I access the html past the show more button?
The hidden results are inside <script> tag, so to get all 380 results you need to parse it additionally:
import requests
import pandas as pd
from bs4 import BeautifulSoup
url = "https://www.skysports.com/premier-league-results/2020-21"
soup = BeautifulSoup(requests.get(url).content, "html.parser")
script = soup.select_one('[type="text/show-more"]')
script.replace_with(BeautifulSoup(script.contents[0], "html.parser"))
all_data = []
for item in soup.select(".fixres__item"):
all_data.append(item.get_text(strip=True, separator="|").split("|")[:5])
all_data[-1].append(
item.find_previous(class_="fixres__header2").get_text(strip=True)
)
df = pd.DataFrame(
all_data, columns=["Team 1", "Score 1", "Score 2", "Time", "Team 2", "Date"]
)
print(df)
df.to_csv("data.csv", index=False)
Prints:
Team 1 Score 1 Score 2 Time Team 2 Date
0 Arsenal 2 0 16:00 Brighton and Hove Albion Sunday 23rd May
1 Aston Villa 2 1 16:00 Chelsea Sunday 23rd May
2 Fulham 0 2 16:00 Newcastle United Sunday 23rd May
3 Leeds United 3 1 16:00 West Bromwich Albion Sunday 23rd May
...
377 Crystal Palace 1 0 15:00 Southampton Saturday 12th September
378 Liverpool 4 3 17:30 Leeds United Saturday 12th September
379 West Ham United 0 2 20:00 Newcastle United Saturday 12th September
and saves data.csv (screenshot from LibreOffice):
I am not aware of how to do this with BeautifulSoup, but this is how I would do it using Selenium (note that I am very new to Selenium, so there are probably better ways of doing this).
The imports used are:
from selenium import webdriver
import time
You will also need to download the Chrome webdriver (assuming that you are on Chrome), and place it in the same directory as your script, or in your library path.
There will be a cookies popup which you have to workaround:
# prepare the driver
URL = "https://www.skysports.com/premier-league-results/2020-21"
driver = webdriver.Chrome()
driver.get(URL)
# wait so that driver has loaded before we look for the cookies popup
time.sleep(2)
# accept cookies popup, which occurs in an iframe
# begin by locating iframe
frame = driver.find_element_by_id('sp_message_iframe_533903')
# find the accept button (inspect element and copy Xpath of button)
driver.find_element_by_xpath('//*[#id="notice"]/div[3]/button[1]').click()
time.sleep(2)
driver.refresh()
# find "show more text" button and click
driver.find_element_by_class_name("plus-more__text").click()
i tried to go up a few levels and this worked , u might need to process it a wee bit more.
from bs4 import BeautifulSoup
import requests
scores_html_text = requests.get('https://www.skysports.com/premier-league-results/2020-21').text
scores_soup = BeautifulSoup(scores_html_text,'lxml')
fixtures = scores_soup.find(class_ = 'site-layout-secondary block page-nav__offset grid')
print(fixtures)
Background:
Cannot parse all the li's within the <ul class="cmn-list"> using selenium.
Code:
url= "https://www.eslcafe.com/jobs/international?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60"
chrome_options = webdriver.ChromeOptions()
preferences = {"safebrowsing.enabled": "false"}
chrome_options.add_experimental_option("prefs", preferences)
# chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
browser = webdriver.Chrome('C:/chromedriver.exe', chrome_options=chrome_options)
print(url)
browser.get(url)
delay = 20 # seconds
try:
WebDriverWait(browser, delay)
except:
pass
html_list = browser.find_element_by_class_name("cmn-list")
items = html_list.find_elements_by_tag_name("li")
for item in items:
text = item.text
print(text)
Question:
How can I parse the rows lis in the link <ul class="cmn-list"> with selenium?
There are multiple ul tags with the same class_name. Using browser.find_element_by_class_name('cmn-list') will only select the first ul tag with this class_name, not the ul tag that you want. In order to get the ul tag that you want, I recommend you to use xpaths. Here is the full code to do it:
from selenium import webdriver
import time
def printDetails(items, sponsored):
if sponsored == True:
print('-'*120)
print("Sponsored")
else:
print('-' * 120)
print("Others")
for item in items:
link = item.find_element_by_xpath('.//a').get_attribute('href')
title = item.find_element_by_xpath('.//a').text
company = item.find_element_by_class_name('job-title').find_element_by_xpath('.//p').text
date_time = item.find_element_by_xpath('.//div[#class="job-post-time ng-binding"]').text.split("\n")
datee = date_time[0]
timee = date_time[1]
print('-' * 120)
print(f"Job Title = {title}")
print(f"Link = {link}")
print(f"Company = {company}")
print(f"Date = {datee}")
print(f"Time = {timee}")
url= "https://www.eslcafe.com/jobs/international?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60"
chrome_options = webdriver.ChromeOptions()
preferences = {"safebrowsing.enabled": "false"}
chrome_options.add_experimental_option("prefs", preferences)
# chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
browser = webdriver.Chrome('chromedriver.exe', options=chrome_options)
print(url)
browser.get(url)
delay = 20 # seconds
try:
WebDriverWait(browser, delay)
except:
pass
time.sleep(3)
sponsored = browser.find_element_by_xpath('//*[#id="mid-wrapper"]/div/section[2]/div/div[1]/div[3]/ul')
sponsored_items = sponsored.find_elements_by_class_name('ng-scope')
html_list = browser.find_element_by_xpath('//*[#id="mid-wrapper"]/div/section[2]/div/div[1]/div[4]/ul')
items = html_list.find_elements_by_class_name('ng-scope')
printDetails(sponsored_items, sponsored = True)
printDetails(items, sponsored = False)
browser.close()
Output:
https://www.eslcafe.com/jobs/international?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
------------------------------------------------------------------------------------------------------------------------
Sponsored
------------------------------------------------------------------------------------------------------------------------
Job Title = Native-speaking English Teacher | Taiwan (NT$620 - NT$660 per hour)
Link = https://www.eslcafe.com/postajob-detail/native-speaking-english-teacher-nst?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = HESS International Educational Group
Date = Apr. 20, 2020
Time = 07:39 pm PST
------------------------------------------------------------------------------------------------------------------------
Others
------------------------------------------------------------------------------------------------------------------------
Job Title = University Teaching in Japan! – Tokyo, Kanagawa, Chiba, Saitama, and Aichi
Link = https://www.eslcafe.com/postajob-detail/university-teaching-in-japan---tokyo-kanagawa-37?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Westgate Corporation
Date = Oct. 23, 2020
Time = 09:22 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Elementary/Secondary School Teaching in Japan! - Tokyo, Kanagawa, and Aichi
Link = https://www.eslcafe.com/postajob-detail/elementarysecondary-school-teaching-in-japan-8?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Westgate Corporation
Date = Oct. 23, 2020
Time = 09:22 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Seeking online English Tutor - Up to $26USD/h - Work from home! Choose your own hours!
Link = https://www.eslcafe.com/postajob-detail/seeking-online-english-tutor---up-to-26usdh--?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Magic Ears
Date = Oct. 23, 2020
Time = 09:20 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = English Language Lectutrer in Oman for SY 2020
Link = https://www.eslcafe.com/postajob-detail/english-language-lectutrer-in-oman-for-sy-202?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = TATI Oman
Date = Oct. 22, 2020
Time = 11:06 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = ⭐$2000/month, 3-5 hrs per day⭐, Teach English Online with GOGOKID!
Link = https://www.eslcafe.com/postajob-detail/2000month-3-5-hrs-per-day-teach-english-onlin?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = GOGOKID
Date = Oct. 22, 2020
Time = 11:06 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = [Bachelor's Required]Part-Time Online ESL Teacher - Work from home - Flexible Job!
Link = https://www.eslcafe.com/postajob-detail/bachelors-requiredpart-time-online-esl-teache?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Magic Ears
Date = Oct. 22, 2020
Time = 11:05 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = ★★★TRAVEL ABROAD & TEACH IN THAILAND with BFITS THAILAND (Term 2 November 2020)★★★
Link = https://www.eslcafe.com/postajob-detail/travel-abroad-teach-in-thailand-with-bfits-th-22?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = BFITS Thailand
Date = Oct. 22, 2020
Time = 11:05 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Full-time In-house Academic Editor Wanted in Taipei, Taiwan
Link = https://www.eslcafe.com/postajob-detail/full-time-in-house-academic-editor-wanted-in-6?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Wallace Academic Editing
Date = Oct. 21, 2020
Time = 01:44 pm PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Online English Tutor
Link = https://www.eslcafe.com/postajob-detail/online-english-tutor?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Panda ABC
Date = Oct. 21, 2020
Time = 01:41 pm PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Native Speaker Teacher - Changhua, Taiwan
Link = https://www.eslcafe.com/postajob-detail/native-speaker-teacher?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Leader Language Schools
Date = Oct. 21, 2020
Time = 01:24 pm PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Teachers Needed in Fiji - Pacific American School
Link = https://www.eslcafe.com/postajob-detail/fiji-pacific-american-school?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Pacific American School
Date = Oct. 21, 2020
Time = 10:44 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Online ESL tutor wanted! Teach Korean students online. (CNK English)
Link = https://www.eslcafe.com/postajob-detail/online-esl-tutor-wanted-teach-korean-students-1?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = CNK English
Date = Oct. 20, 2020
Time = 08:15 pm PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Primary Section (Class Teacher for grades 2-3) - Dushanbe, Tajikistan
Link = https://www.eslcafe.com/postajob-detail/primary-section-class-teacher-for-grades-2-3?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Dushanbe International School
Date = Oct. 20, 2020
Time = 08:42 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Native English Teacher needed for private classes - Kuala Lumpur, Malaysia)
Link = https://www.eslcafe.com/postajob-detail/english-teacher-needed-for-private-classes-ku?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = BLC
Date = Oct. 20, 2020
Time = 08:40 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = English Language Center Lecturer - Taiwan
Link = https://www.eslcafe.com/postajob-detail/english-language-center-lecturer?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Tunghai University
Date = Oct. 20, 2020
Time = 08:39 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = 【⭐GOGOKID offers candidate incentive again⭐】Teach English Online
Link = https://www.eslcafe.com/postajob-detail/extra-bonus-30-for-on-boardteach-english-onli?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = GOGOKID
Date = Oct. 19, 2020
Time = 08:46 pm PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Looking for Online ESL Teacher!!!
Link = https://www.eslcafe.com/postajob-detail/looking-for-online-esl-teacher-1?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = First Future
Date = Oct. 19, 2020
Time = 09:23 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Online English Teacher
Link = https://www.eslcafe.com/postajob-detail/online-english-teacher-7?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Whales English
Date = Oct. 19, 2020
Time = 09:23 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = 🇪🇺 🇵🇱 Teach English in Poland with English Wizards! 🇵🇱 🇪🇺
Link = https://www.eslcafe.com/postajob-detail/teach-english-in-poland-with-english-wizards?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = English Wizards
Date = Oct. 19, 2020
Time = 09:22 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = US Certified Science Teacher - Tirane, Albania
Link = https://www.eslcafe.com/postajob-detail/us-certified-science-teacher?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Albanian International School
Date = Oct. 19, 2020
Time = 09:22 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Fantastic teaching jobs around Taiwan, hiring single and couples ASAP
Link = https://www.eslcafe.com/postajob-detail/fantastic-teaching-jobs-around-taiwan-hiring?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = ESLJOBTAIWAN
Date = Oct. 19, 2020
Time = 09:21 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = 🛫 🌞 🌄 Become a Mentor for Language Learners on Ski Camps - Free Hotel Stays in Europe 🛫 🌞 🌄
Link = https://www.eslcafe.com/postajob-detail/become-a-mentor-for-language-learners-on-ski?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Angloville
Date = Oct. 19, 2020
Time = 09:19 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = 【⭐Extra Bonus-First come first served】Online English Tutor-Earn up to $25/hr
Link = https://www.eslcafe.com/postajob-detail/extra-bonus-first-come-first-servedonline-eng?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = GOGOKID
Date = Oct. 19, 2020
Time = 09:19 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = University Teaching in Japan! – Tokyo, Kanagawa, Chiba, Saitama, and Aichi
Link = https://www.eslcafe.com/postajob-detail/university-teaching-in-japan---tokyo-kanagawa-36?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Westgate Corporation
Date = Oct. 19, 2020
Time = 09:18 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Elementary/Secondary School Teaching in Japan! - Tokyo, Kanagawa, and Aichi
Link = https://www.eslcafe.com/postajob-detail/elementarysecondary-school-teaching-in-japan-7?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Westgate Corporation
Date = Oct. 19, 2020
Time = 09:18 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Online ESL Tutor - No minimum teaching requirements - $26/hr part-time job
Link = https://www.eslcafe.com/postajob-detail/online-esl-tutor---no-minimum-teaching-requir?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Magic Ears
Date = Oct. 19, 2020
Time = 09:17 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = 💜💛💙 SUNNY SPAIN, MARVELOUS MADRID & an EXCITING, LOVELY LIFE with the Canterbury English TEFL & Madrid Lifestyle (for TEFL holders) Programs&Guaranteed Teaching Job for all students WITH US (that's the key), which starts during the Course! 💜💛💛
Link = https://www.eslcafe.com/postajob-detail/128156128155128153-sunny-spain-marvelous-madr-30?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Canterbury English
Date = Oct. 18, 2020
Time = 09:36 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Math Teacher - Hargeisa, Somaliland
Link = https://www.eslcafe.com/postajob-detail/math-teacher-2?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Abaarso School of Science & Technology
Date = Oct. 17, 2020
Time = 10:56 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Math/English Teacher - Hargeisa, Somaliland
Link = https://www.eslcafe.com/postajob-detail/mathenglish-teacher?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Barwaaqo Univeristy
Date = Oct. 17, 2020
Time = 10:46 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = TAIWAN! Teach English at schools throughout the beautiful island of TAIWAN - $2,200 USD per month. Taipei, Tainan, Kaohsiung, Taichung, Keelung, PingDong. Summer 2020 graduates welcome.
Link = https://www.eslcafe.com/postajob-detail/taiwan-teach-english-at-schools-throughout-th-35?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Arun Language Training & Recruitment Ltd
Date = Oct. 17, 2020
Time = 10:18 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Online Technical Copywriter
Link = https://www.eslcafe.com/postajob-detail/technical-copywriter?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Lingvoexpert
Date = Oct. 16, 2020
Time = 10:21 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = ★★★LIVE ABROAD & TEACH IN THAILAND with BFITS THAILAND (Term 2 November 2020)★★★
Link = https://www.eslcafe.com/postajob-detail/live-abroad-teach-in-thailand-with-bfits-thai-3?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = BFITS Thailand
Date = Oct. 16, 2020
Time = 10:18 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = US $2500-5000/M + PU Letter+ Teach in China + International & Public School + Training Center + IB + AP + A-level + Social Science, Math, Physics, Chemistry + All Regular Subjects
Link = https://www.eslcafe.com/postajob-detail/apiba-levelmathsciencechemistryphysicscompute?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Can-Achieve Global Talent Inc.
Date = Jul. 21, 2020
Time = 07:13 pm PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Head of Primary and Head of Secondary required ASAP - Iraq- Erbil
Link = https://www.eslcafe.com/postajob-detail/head-of-primary-and-head-of-secondary-require?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = British International School/Iraq-Kurdistan- Erbil
Date = Oct. 15, 2020
Time = 09:35 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Full Time English Teacher in Ehime, JAPAN
Link = https://www.eslcafe.com/postajob-detail/full-time-english-teacher-in-ehime-japan?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Amic International Inc.
Date = Oct. 15, 2020
Time = 09:34 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = 【⭐Dave's Recommendation】Online English Tutor-Earn up to $25/hr
Link = https://www.eslcafe.com/postajob-detail/daves-recommendationonline-english-tutor-earn?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = GOGOKID
Date = Oct. 15, 2020
Time = 09:32 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Teaching English with Magic Ears! - Work from home - Uni students are also acceptable!
Link = https://www.eslcafe.com/postajob-detail/teaching-english-with-magic-ears---work-from?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Magic Ears
Date = Oct. 15, 2020
Time = 09:30 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Teach in Taiwan
Link = https://www.eslcafe.com/postajob-detail/teach-in-taiwan-1?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Neurolink English Academy
Date = Oct. 14, 2020
Time = 09:29 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Japan: Teaching English to children!
Link = https://www.eslcafe.com/postajob-detail/japan-teaching-english-to-children?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Tamaki TEFL Recruitment (TTR)
Date = Oct. 14, 2020
Time = 09:26 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = ESL Instructors Needed for Community Education Courses Baghdad, Iraq
Link = https://www.eslcafe.com/postajob-detail/esl-instructors-needed-for-community-educatio?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = English Language Academy
Date = Oct. 14, 2020
Time = 09:25 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Online English Teacher - Up to $26/hr - With no minimum teaching requirements!
Link = https://www.eslcafe.com/postajob-detail/online-english-teacher---up-to-26hr---with-no?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Magic Ears
Date = Oct. 14, 2020
Time = 09:23 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Worldwide ESL/EFL Projects for the U.S. Department of State in 2021/2022
Link = https://www.eslcafe.com/postajob-detail/worldwide-eslefl-projects-for-the-us-departme-13?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = U.S. Department of State English Language Programs
Date = Oct. 13, 2020
Time = 01:38 pm PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Biggest ESL School in Vietnam - NOW Hiring Teachers
Link = https://www.eslcafe.com/postajob-detail/biggest-esl-school-in-vietnam---now-hiring-te-7?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = APAX English
Date = Oct. 13, 2020
Time = 10:13 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = EXPERIENCED EFL TEACHER NEEDED AT NORTHSTAR COLLEGE, Hargeisa, Somaliland
Link = https://www.eslcafe.com/postajob-detail/experienced-efl-teacher-needed-at-northstar-c?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Northstar College
Date = Oct. 13, 2020
Time = 10:12 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = Full time English Teacher - Kanazawa, Japan
Link = https://www.eslcafe.com/postajob-detail/full-time-english-teacher-8?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Bartolo English
Date = Oct. 13, 2020
Time = 09:09 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = A good choice for ESL teachers! Teaching English online for Chinese kids - Earn up to $26/hr
Link = https://www.eslcafe.com/postajob-detail/a-good-choice-for-esl-teachers-teaching-engli?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Magic Ears
Date = Oct. 13, 2020
Time = 09:07 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = ⭐Attention⭐Online Teaching Position offers up to $25/hr
Link = https://www.eslcafe.com/postajob-detail/online-english-tutor-earn-up-to-25hr?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = GOGOKID
Date = Oct. 13, 2020
Time = 09:06 am PST
------------------------------------------------------------------------------------------------------------------------
Job Title = ⭐⭐⭐⭐ESL Teaching Positions Available in Taiwan NOW ⭐⭐⭐⭐
Link = https://www.eslcafe.com/postajob-detail/esl-teaching-positions-available-in-taiwan-no-6?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Teach Taiwan
Date = Oct. 12, 2020
Time = 11:38 pm PST
------------------------------------------------------------------------------------------------------------------------
...
Job Title = Work in Japan
Link = https://www.eslcafe.com/postajob-detail/work-in-japan?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60
Company = Omni International
Date = Oct. 08, 2020
Time = 10:21 am PST
The answer above waits 3 seconds assuming your internet will take less than 3 seconds to load, but if you'd like to wait until there is a certain text in your page you can do:
url= "https://www.eslcafe.com/jobs/international?koreasearch=&koreapageno=&koreapagesize=&chinasearch=&chinapageno=&chinapagesize=&internationalsearch=&internationalpageno=1&internationalpagesize=60"
chrome_options = webdriver.ChromeOptions()
preferences = {"safebrowsing.enabled": "false"}
chrome_options.add_experimental_option("prefs", preferences)
# chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
browser = webdriver.Chrome('C:/chromedriver.exe', chrome_options=chrome_options)
print(url)
browser.get(url)
displayTimer = 0
wanted_Phrase = "li" # You should put some text that is only on the page when it has loaded.
while wanted_Phrase not in browser.page_source:
sleep(1)
displayTimer += 1
print("[{}] Seconds waited for page".format(displayTimer))
html_list = browser.find_element_by_class_name("cmn-list")
items = html_list.find_elements_by_tag_name("li")
for item in items:
text = item.text
print(text)
I want to extract date and summary of an article in a website, here is my code
from bs4 import BeautifulSoup
from selenium import webdriver
full_url = 'https://www.wsj.com/articles/readers-favorite-summer-recipes-11599238648?mod=searchresults&page=1&pos=20'
url0 = full_url
browser0 = webdriver.Chrome('C:/Users/liuzh/Downloads/chromedriver_win32/chromedriver')
browser0.get(url0)
html0 = browser0.page_source
page_soup = BeautifulSoup(html0, 'html5lib')
date = page_soup.find_all("time", class_="timestamp article__timestamp flexbox__flex--1")
sub_head = page_soup.find_all("h2", class_="sub-head")
print(date)
print(sub_head)
I got the following result, how can I obtain the standard form ?(e.g. Sept. 4, 2020 12:57 pm ET; This Labor Day weekend, we’re...)
[<time class="timestamp article__timestamp flexbox__flex--1">
Sept. 4, 2020 12:57 pm ET
</time>]
[<h2 class="sub-head" itemprop="description">This Labor Day weekend, we’re savoring the last of summer with a collection of seasonal recipes shared by Wall Street Journal readers. Each one comes with a story about what this food means to a family and why they return to it each year.</h2>]
Thanks.
Try something like:
for d in date:
print(d.text.strip())
Given your sample html, output should be:
Sept. 4, 2020 12:57 pm ET
I'm kinda new to python and i'm trying to parse https://rustavi2.ge/ka/schedule <-- website using the following code,the content might be on georgian but i dont think it matters.
When you open the page you will see 07:15 ანიმაცია "სონიკ ბუმი" <- text in front.via inspect i can see the elements tag and class also but the following code returns only None.I know i'm doing something terribly wrong but cant really figure it out.
import requests
from bs4 import BeautifulSoup
r = requests.get('https://rustavi2.ge/ka/schedule')
c = r.content
soup = BeautifulSoup(c,'html.parser')
a = soup.find("div", {"class": "sch_cont"}).find("div",{"class": "bade_line"})
print((a).encode("utf-8"))
The data is loaded via Ajax from external site:
import requests
import urllib3
requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS = 'ALL:#SECLEVEL=1'
from bs4 import BeautifulSoup
url = 'https://rustavi2.ge/includes/bade_ajax.php?dt=2020-08-17&lang=ka'
soup = BeautifulSoup(requests.get(url).content, 'html.parser')
for tm, title in zip(soup.select('.b_time'), soup.select('.b_title')):
print(tm.text, title.text)
Prints:
07:15 ანიმაცია "სონიკ ბუმი"
08:00 მხ/ფილმი
10:00 კურიერი
10:15 სერიალი "ქალური ბედნიერება"
12:00 კურიერი
12:30 სერიალი "ქალური ბედნიერება"
13:55 სერიალი "მე ვიცი რა გელის"
15:00 კურიერი
15:50 დღის კურიერი
16:30 სერიალი "უცხო მშობლიურ მხარეში"
18:00 კურიერი
18:50 სერიალი "მარიამ მაგდალინელი"
20:30 ლოტო
20:40 სერიალი "მარიამ მაგდალინელი"
21:00 კურიერი
22:00 ფარული კონვერტი
23:00 გააცინე და მოიგე
00:00 სერიალი "სენდიტონი"
00:30 მხ/ფილმი
01:00 მხ/ფილმი
03:30 კურიერის დაიჯესტი
04:00 სერიალი "ქალური ბედნიერება"
05:00 სერიალი "უცხო მშობლიურ მხარეში"
I am trying to parse an ESPN webpage to get the date, time, and teams playing in each NFL game for a given week using BeautifulSoup. I am able to get most of the information, however, I am having trouble with the time information.
For some reason, the text between the a tag is not being returned.
The html for one of the a tags is:
<a data-dateformat="time1" name="&lpos=nfl:schedule:time" href="/nfl/game?gameId=400874572">12:00 PM</a>
I am looking to get the "12:00 PM" in between the a tags, but instead I get:
<a data-dateformat="time1" href="/nfl/game?gameId=400874572" name="&lpos=nfl:schedule:time"></a>
which doesn't have any text in between the tags.
Here is what I have used to parse the webpage.
import urllib2
from bs4 import BeautifulSoup
def parse_nfl_schedule_espn():
schedule = BeautifulSoup(urllib2.urlopen("http://www.espn.com/nfl/schedule/_/week/10").read(), "lxml")
for date in schedule.find_all('h2'):
#separate by game
game_info = date.nextSibling.find_all('tr')
date = str(date).split(">")
date = date[1].split("<")
date = date[0]
#print date
for i in range(len(game_info)):
#separate each part of game row
value = game_info[i].find_all('td')
#iterate over <thead>
if len(value) > 1:
#away team abv
away = str(value[0].find('abbr')).split(">")
away = away[1].split("<")
away = away[0]
#home team abv
home = str(value[1].find('abbr')).split(">")
home = home[1].split("<")
home = home[0]
time = value[2].find_all('a')
print time
#print "%s at %s" % (away, home)
if __name__ == "__main__":
parse_nfl_schedule_espn()
Any help/suggestions would be much appreciated.
You will need to use something like Selenium to get the HTML. This would then allow the browser to run any Javascript. This can be done as follows:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
def parse_nfl_schedule_espn():
browser = webdriver.Firefox(firefox_binary=FirefoxBinary())
browser.get("http://www.espn.com/nfl/schedule/_/week/10")
schedule = BeautifulSoup(browser.page_source, "lxml")
for date in schedule.find_all('a', attrs={'data-dateformat' : "time1"}):
print date.text
if __name__ == "__main__":
parse_nfl_schedule_espn()
Which would display the following:
6:00 PM
6:00 PM
6:00 PM
6:00 PM
6:00 PM
6:00 PM
6:00 PM
6:00 PM
9:05 PM
9:25 PM
9:25 PM
1:30 AM
1:30 AM
You could also investigate "headless" solutions such as PhantomJS to avoid having to see a browser window being displayed.