How to extract data from a dynamic table with selenium python?

How to extract data from a dynamic table with selenium python? - python

I'm trying to extract data from a website. I need to enter the value in the search box and then find the details. it will generate a table. After generating the table, need to write the details to the text file or insert them into a database. I'm trying the following things.
Website: https://commtech.byu.edu/noauth/classSchedule/index.php
Search text: "C S 142"
Sample Code
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
c_options = Options()
c_options.add_experimental_option("detach", True)
s = Service('C:/Users/sidat/OneDrive/Desktop/python/WebDriver/chromedriver.exe')
URL = "http://saasta.byu.edu/noauth/classSchedule/index.php"
driver = webdriver.Chrome(service=s, options=c_options)
driver.get(URL)
element = driver.find_element("id", "searchBar")
element.send_keys("C S 142", Keys.RETURN)
search_button = driver.find_element("id", "searchBtn")
search_button.click()
table = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//*[#id='sectionTable']")))
rows = table.find_elements("xpath", "//tr")
for row in rows:
cells = row.find_elements(By.TAG_NAME, "td")
for cell in cells:
print(cell.text)
I'm using PyCharm 2022.3 to code and test the result. There is nothing printing with my code. Please help me to solve this problem with to extract data to a text file and to an SQL database table.

The following code prints the content of the table you asked for.
You need to wait for elements to be clickable in case you going to click them or send them a text or to wait for visibility in case you want to read their text content.
from selenium import webdriver
from selenium.webdriver import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 30)
url = "http://saasta.byu.edu/noauth/classSchedule/index.php"
driver.get(url)
wait.until(EC.element_to_be_clickable((By.ID, "searchBar"))).send_keys("C S 142", Keys.RETURN)
wait.until(EC.element_to_be_clickable((By.ID, "searchBtn"))).click()
table = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, "//*[#id='sectionTable']")))
headers = table.find_elements("xpath", ".//thead//th")
cells = table.find_elements("xpath", ".//tbody//td")
headers_text = ""
for header in headers:
cell_text = header.text
headers_text = headers_text + cell_text.ljust(10)
cells_text = ""
for cell in cells:
c_text = cell.text
cells_text = cells_text + c_text.ljust(10)
print(headers_text)
print(cells_text)
The output is:
Section Type Mode InstructorCredits Term Days Start End Location Available Waitlist
002 DAY Classroom 3.00 TBA 0/0 0

Try this:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
c_options = Options()
c_options.add_experimental_option("detach", True)
s = Service('C:/Users/sidat/OneDrive/Desktop/python/WebDriver/chromedriver.exe')
driver = webdriver.Chrome()
URL = "http://saasta.byu.edu/noauth/classSchedule/index.php"
driver.get(URL)
driver.maximize_window()
element = driver.find_element("id", "searchBar")
element.send_keys("C S 142", Keys.RETURN)
search_button = driver.find_element("id", "searchBtn")
search_button.click()
header = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//table[#id='sectionTable']/thead/tr/th")))
for th in header:
print(f"{th.get_attribute('textContent')}")
rows = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//table[#id='sectionTable']/tbody/tr")))
for i in range(0, len(rows)):
cells = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, f"(//table[#id='sectionTable']/tbody/tr)[{i+1}]//td")))
for cell in cells:
print(cell.get_attribute('textContent'))
You are waiting for the table, which is correct, but the table is fully loaded (the td are not loaded yet).
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//*[#id='sectionTable']//td")))
Then you wait at least for having any content into td element

Related

Can't find element by name using selenium

I'm using selenium 4.7.2 and can't find the element by its name. The following code returns NoSuchElementException error:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
# Get the website using the Chrome webbdriver
browser = webdriver.Chrome()
browser.get('https://www.woofshack.com/en/cloud-chaser-waterproof-softshell-dog-jacket-ruffwear-rw-5102.html')
# Print out the result
price = browser.find_element(By.NAME, 'data-price-665')
print("Price: " + price.text)
# Close the browser
time.sleep(3)
browser.close()
What's wrong in using find_element method?

Looks like you are using a wrong locator here. I see no element with name attribute value 'data-price-665' on that page.
The following code is working:
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)
wait = WebDriverWait(driver, 20)
actions = ActionChains(driver)
url = "https://www.woofshack.com/en/cloud-chaser-waterproof-softshell-dog-jacket-ruffwear-rw-5102.html"
driver.get(url)
price = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "#product-price-665 .price")))
print("Price: " + price.text)
The output is:
Price: €112.95

Selenium python driver doesn't click or press the key for the button all the times

I'm using selenium to get to YouTube and write something on the search bar and then press the button or press the enter key.
Both clicking or pressing a key does sometimes work, but sometimes it does not.
I tried to wait with WebDriverWait, and I even changed the waiting time from 10 to 20 seconds, but it didn't make any difference.
And if I add anything (like printing the new page title), it only shows me the first page title and not the title after the search.
Here is my code and what I tried:
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def get_driver():
firefox_options = Options()
# firefox_options.add_argument("--headless")
driver = webdriver.Firefox(executable_path=r"C:\Program Files\Mozilla Firefox\geckodriver.exe", options=firefox_options)
driver.implicitly_wait(9)
return driver
driver = get_driver()
driver.get('https://www.youtube.com/')
search = driver.find_element(By.XPATH, '//input[#id="search"]')
search.send_keys("python")
# search.send_keys(Keys.ENTER) #using the enter key # If I add nothing after this line it work
# searchbutton = driver.find_element(By.XPATH,'//*[#id="search-icon-legacy"]') # This also dose doesn't work
# searchbutton.click() # using the click method() #also dose not work
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//*[#id="search-icon-legacy"]'))).click() # Sometimes work
# driver.implicitly_wait(10)
# print(driver.title) # This show me only the title of the first page not the one after the search
Is it because I use the Firefox webdriver (should I change to Chrome)?
Or is it because of my internet connection?

To make this working you need to click the search field input first, then add a short delay and then send the Keys.ENTER or click search-icon-legacy element.
So, this is not your fault, this is how YouTube webpage works. You may even call it a kind of bug. But since this webpage it built for human users it works good since human will never click on the input field and insert the search value there within zero time.
Anyway, the 2 following codes are working:
First.
import time
from selenium import webdriver
from selenium.webdriver import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
options.add_argument('--disable-notifications')
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
url = "https://www.youtube.com/"
driver.get(url)
search = wait.until(EC.element_to_be_clickable((By.XPATH, '//input[#id="search"]')))
search.click()
time.sleep(0.2)
search.send_keys("python")
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="search-icon-legacy"]'))).click()
Second.
import time
from selenium import webdriver
from selenium.webdriver import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
options.add_argument('--disable-notifications')
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
url = "https://www.youtube.com/"
driver.get(url)
search = wait.until(EC.element_to_be_clickable((By.XPATH, '//input[#id="search"]')))
search.click()
time.sleep(0.2)
search.send_keys("python" + Keys.ENTER)

Selenium .send_keys() only sending first character of my string - Python

I'm trying to write a script that fills out the destination box on the google flights page. My code is behaving very inconsistently. Sometimes it works perfectly, other times it only types the letter 'B', rather than the full string 'Barcelona'. Sometimes, I get this error message:
"element click intercepted: Element (redacted html code) is not clickable at point (547, 472). Other element would receive the click"
Any idea why it's freaking out like this? Here's my code:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
import os
#Give path to chrome driver using service argument so it doesn't throw the path deprecation warning
script_dir = os.path.dirname(__file__) #<-- absolute dir the script is in
chromedriver_path = '/path/to/chromedriver'
abs_chromedriver_path = os.path.join(script_dir, chromedriver_path)
driver_service = Service(executable_path = abs_chromedriver_path)
browser = webdriver.Chrome(service = driver_service)
url = 'https://www.google.com/travel/flights'
selector = "div[aria-placeholder='Where from?'] input"
phrase = "Barcelona"
browser.get(url)
WebDriverWait(browser, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, selector))).click()
WebDriverWait(browser, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, selector))).send_keys(phrase)
WebDriverWait(browser, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, selector))).send_keys(Keys.ENTER)

This is one way to look for a flight from Barcelona to Vancouver, select the first suggestion from the suggestion lists for both starting/endpoint, and clicking 'Search':
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time as t
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument('disable-notifications')
chrome_options.add_argument("window-size=1280,720")
webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
browser = webdriver.Chrome(service=webdriver_service, options=chrome_options)
url = 'https://www.google.com/travel/flights'
browser.get(url)
try:
cookie_button = WebDriverWait(browser, 3).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'button[aria-label="Reject all"]')))
print(cookie_button.location_once_scrolled_into_view)
t.sleep(1)
cookie_button.click()
print('rejected cookies')
except Exception as e:
print('no cookie button')
t.sleep(1)
comboboxes = WebDriverWait(browser, 3).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,'input[role="combobox"]')))
comboboxes[0].click()
comboboxes[0].clear()
comboboxes[1].send_keys('Barcelona')
t.sleep(1)
suggestion_listbox = WebDriverWait(browser, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "ul[role='listbox']")))
print(suggestion_listbox[-1].get_attribute('outerHTML'))
suggestion_listbox[-1].find_elements(By.TAG_NAME, 'li')[0].click()
comboboxes[2].click()
comboboxes[3].send_keys('Vancouver')
t.sleep(1)
suggestion_listbox = WebDriverWait(browser, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "ul[role='listbox']")))
print(suggestion_listbox[-1].get_attribute('outerHTML'))
suggestion_listbox[-1].find_elements(By.TAG_NAME, 'li')[0].click()
t.sleep(1)
WebDriverWait(browser, 3).until(EC.element_to_be_clickable((By.XPATH,'//span[text() = "Search"]'))).click()
You just have to adapt it to your own selenium setup. Selenium docs: https://www.selenium.dev/documentation/

How would I loop through elements Python Selenium

I have a website for which I want to download excel files. (https://www.rivm.nl/media/smap/eenzaamheid.html)
First I want to click on the region and then perform the download. This I have working.
wijk_keuze = WebDriverWait(driver2nd,20).until(EC.element_to_be_clickable((By.XPATH, "//div[#class='highcharts-container ']//*[name()='svg']//*[name()='g']//*[name()='path']")))
wijk_keuze.click()
download = WebDriverWait(driver2nd, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[#class='highcharts-container ']//*[name()='svg']//*[name()='g' and #aria-label='View export menu']//*[name()='rect']")))
download.click()
WebDriverWait(download, 10).until(EC.visibility_of_element_located((By.XPATH, "//div[#class='highcharts-menu']//*[contains(text(),'XLS downloaden')]"))).click()
time.sleep(2)
above code selects the first region in the parent element and then downloads the excel.
What I want to do is loop through each element in the parent element. How would I go about doing so?
The parent element looks as follows:
<g transform="transform(0,0), scale(1,1)" animator="1">
my entire code:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
#defining URL
url='https://www.rivm.nl/media/smap/eenzaamheid.html'
#defining driver
driver = webdriver.PhantomJS(r'./phantomjs-2.1.1-windows/bin/phantomjs')
options = webdriver.ChromeOptions()
options.add_argument("start-maximized");
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(options=options, executable_path = r'./chromedriver_win32/chromedriver')
driver.get(url)
# Gemeentes
Detail_keuze = Select(driver.find_element_by_id("detail"))
options = Detail_keuze.options
Indicator_keuze = Select(driver.find_element_by_id("indicator"))
indicator_options = Indicator_keuze.options
for index in range(0, len(indicator_options) ):
#defining URL
url='https://www.rivm.nl/media/smap/eenzaamheid.html'
#defining driver
driver2nd = webdriver.PhantomJS(r'./phantomjs-2.1.1-windows/bin/phantomjs')
options = webdriver.ChromeOptions()
options.add_argument("start-maximized");
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")
options.add_experimental_option("prefs", {
"download.default_directory": r"MY_PATH",
"download.prompt_for_download": False,
"download.directory_upgrade": True,
"safebrowsing.enabled": True
})
driver2nd = webdriver.Chrome(options=options, executable_path = r'./chromedriver_win32/chromedriver')
driver2nd.get(url)
# Gemeentes
Detail_keuze = Select(driver2nd.find_element_by_id("detail"))
options = Detail_keuze.options
Indicator_keuze = Select(driver2nd.find_element_by_id("indicator"))
indicator_options = Indicator_keuze.options
time.sleep(1)
Indicator_keuze.select_by_index(index)
wijk_keuze = WebDriverWait(driver2nd,20).until(EC.element_to_be_clickable((By.XPATH, "//div[#class='highcharts-container ']//*[name()='svg']//*[name()='g']//*[name()='path']")))
wijk_keuze.click()
download = WebDriverWait(driver2nd, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[#class='highcharts-container ']//*[name()='svg']//*[name()='g' and #aria-label='View export menu']//*[name()='rect']")))
download.click()
WebDriverWait(download, 10).until(EC.visibility_of_element_located((By.XPATH, "//div[#class='highcharts-menu']//*[contains(text(),'XLS downloaden')]"))).click()
time.sleep(2)
######## HERE I WANT TO LOOP THROUGH EACH AND EVERY REGION
driver2nd.close()
As you can see I also want to loop through eachh and every indicator. This works. Now I want to add a loop through each and every region. I have it working so that I can click on the first region.

You don't have to click on the options. You can get the details by changing the url 'https://www.rivm.nl/media/smap/{indicator}?detail={detail}'
Just add the logic for downloading it.
Try:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
from itertools import product
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(ChromeDriverManager().install())#, chrome_options=chrome_options)
driver.set_window_size(1024, 600)
driver.maximize_window()
driver.get('https://www.rivm.nl/media/smap/eenzaamheid.html')
time.sleep(5)
soup = BeautifulSoup(driver.page_source, 'html.parser')
ind = soup.find('select', attrs = {'name': 'indicator'} )
indicators = [i['value'] for i in ind.findAll('option')]
det = soup.find('select', attrs = {'name': 'detail'})
details = [i['value'] for i in det.findAll('option')]
for detail, indicator in list(product(details, indicators)):
print(indicator, detail)
new_url = f'https://www.rivm.nl/media/smap/{indicator}?detail={detail}'
driver.get(new_url)
# Write code for downloading it

How to click on the second link in google search result list using selenium web driver

I want to click on the second link in result in google search area using selenium-web-driver
( Need a method that suits for any google search result )
example page
This is my code, How can I modify the if statement
import speech_recognition as sr
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option("detach", True)
global driver
driver = webdriver.Chrome('C:\Windows\chromedriver.exe', options=chrome_options)
chrome_options.add_argument("--start-maximized")
wait = WebDriverWait(driver, 10)
def google(text):
if "first link" in text:
RESULTS_LOCATOR = "//div/h3/a"
WebDriverWait(driver, 10).until(
EC.visibility_of_element_located((By.XPATH, RESULTS_LOCATOR)))
page1_results = driver.find_elements(By.XPATH, RESULTS_LOCATOR)
for item in page1_results:
print(item.text)
# driver.find_element_by_class_name().click()
else:
ggwp=text.replace(" ", "")
driver.get("https://www.google.com")
driver.find_element_by_xpath('//*[#id="tsf"]/div[2]/div[1]/div[1]/div/div[2]/input').send_keys(ggwp)
driver.find_element_by_xpath('//*[#id="tsf"]/div[2]/div[1]/div[3]/center/input[1]').send_keys(Keys.ENTER)

Second link can by placed in different div-s.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import os
browser = webdriver.Chrome(executable_path=os.path.abspath(os.getcwd()) + "/chromedriver")
link = 'http://www.google.com'
browser.get(link)
# search keys
search = browser.find_element_by_name('q')
search.send_keys("python")
search.send_keys(Keys.RETURN)
# click second link
for i in range(10):
try:
browser.find_element_by_xpath('//*[#id="rso"]/div['+str(i)+']/div/div[2]/div/div/div[1]/a').click()
break
except:
pass

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to extract data from a dynamic table with selenium python? - python

Related

Can't find element by name using selenium

Selenium python driver doesn't click or press the key for the button all the times

Selenium .send_keys() only sending first character of my string - Python

How would I loop through elements Python Selenium

How to click on the second link in google search result list using selenium web driver

Categories

Resources