Python Selenium iterate table of links clicking each link - python

So this question has been asked before but I am still struggling to get it working.
The webpage has a table with links, I want to iterate through clicking each of the links.
So this is my code so far
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome(executable_path=r'C:\Users\my_path\chromedriver_96.exe')
driver.get(r"https://www.fidelity.co.uk/shares/ftse-350/")
try:
element = WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.CLASS_NAME, "table-scroll")))
table = element.find_elements_by_xpath("//table//tbody/tr")
for row in table[1:]:
print(row.get_attribute('innerHTML'))
# link.click()
finally:
driver.close()
Sample of output
<td>FOUR</td>
<td>4imprint Group plc</td>
<td>Media & Publishing</td>
<td>888</td>
<td>888 Holdings</td>
<td>Hotels & Entertainment Services</td>
<td>ASL</td>
<td>Aberforth Smaller Companies Trust</td>
<td>Collective Investments</td>
How do a click the href and iterate to the next href?
Many thanks.
edit
I went with this solution (a few small tweaks on Prophet's solution)
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
from selenium.webdriver.common.action_chains import ActionChains
driver = webdriver.Chrome(executable_path=r'C:\Users\my_path\chromedriver_96.exe')
driver.get(r"https://www.fidelity.co.uk/shares/ftse-350/")
actions = ActionChains(driver)
#close the cookies banner
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.ID, "ensCloseBanner"))).click()
#wait for the first link in the table
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//table//tbody/tr/td/a")))
#extra wait to make all the links loaded
time.sleep(1)
#get the total links amount
links = driver.find_elements_by_xpath('//table//tbody/tr/td/a')
for index, val in enumerate(links):
try:
#get the links again after getting back to the initial page in the loop
links = driver.find_elements_by_xpath('//table//tbody/tr/td/a')
#scroll to the n-th link, it may be out of the initially visible area
actions.move_to_element(links[index]).perform()
links[index].click()
#scrape the data on the new page and get back with the following command
driver.execute_script("window.history.go(-1)") #you can alternatevely use this as well: driver.back()
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//table//tbody/tr/td/a")))
time.sleep(2)
except StaleElementReferenceException:
pass

To perform what you want to do here you first need to close cookies banner on the bottom of the page.
Then you can iterate over the links in the table.
Since by clicking on each link you are opening a new page, after scaring the data there you will have to get back to the main page and get the next link. You can not just get all the links into some list and then iterate over that list since by navigating to another web page all the existing elements grabbed by Selenium on the initial page become Stale.
Your code can be something like this:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
driver = webdriver.Chrome(executable_path=r'C:\Users\my_path\chromedriver_96.exe')
driver.get(r"https://www.fidelity.co.uk/shares/ftse-350/")
actions = ActionChains(driver)
#close the cookies banner
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.ID, "ensCloseBanner"))).click()
#wait for the first link in the table
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//table//tbody/tr/td/a")))
#extra wait to make all the links loaded
time.sleep(1)
#get the total links amount
links = driver.find_elements_by_xpath('//table//tbody/tr/td/a')
for index, val in enumerate(links):
#get the links again after getting back to the initial page in the loop
links = driver.find_elements_by_xpath('//table//tbody/tr/td/a')
#scroll to the n-th link, it may be out of the initially visible area
actions.move_to_element(links[index]).perform()
links[index].click()
#scrape the data on the new page and get back with the following command
driver.execute_script("window.history.go(-1)") #you can alternatevely use this as well: driver.back()
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//table//tbody/tr/td/a")))
time.sleep(1)

You basically have to do the following:
Click on the cookies button if available
Get all the links on the page.
Iterate over the list of links and then click on the first (by first scrolling to the web element and doing that for the list item) and then navigate back to the original screen.
Code:
driver = webdriver.Chrome(driver_path)
driver.maximize_window()
wait = WebDriverWait(driver, 30)
driver.get("https://www.fidelity.co.uk/shares/ftse-350/")
try:
wait.until(EC.element_to_be_clickable((By.ID, "ensCloseBanner"))).click()
print('Click on the cookies button')
except:
print('Could not click on the cookies button')
pass
driver.execute_script("window.scrollTo(0, 750)")
try:
all_links = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//table//tbody/tr/td/a")))
print("We have got to deal with", len(all_links), 'links')
j = 0
for link in range(len(all_links)):
links = wait.until(EC.presence_of_all_elements_located((By.XPATH, f"//table//tbody/tr/td/a")))
driver.execute_script("arguments[0].scrollIntoView(true);", links[j])
time.sleep(1)
links[j].click()
# here write the code to scrape something once the click is performed
time.sleep(1)
driver.execute_script("window.history.go(-1)")
j = j + 1
print(j)
except:
print('Bot Could not exceute all the links properly')
pass
Import:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
PS to handle stale element reference you'd have to define the list of web elements again inside the loop.

Related

StaleElementReferenceException in python selenium

I am trying to count how many time "Load More Reviews" option is clicked from this site. But I am getting the following error:
selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
Here is my python code:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
options = Options()
options.add_argument("--disable-notifications")
driver = webdriver.Chrome(ChromeDriverManager().install(), chrome_options=options)
url = "https://www.justdial.com/Delhi/S-K-Premium-Par-Hari-Nagar/011PXX11-XX11-131128122154-B8G6_BZDET"
driver.get(url)
pop_up = WebDriverWait(driver, 30).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="best_deal_detail_div"]/section/span')))
pop_up.click() # For disable pop-up
count = 0
while True:
element = WebDriverWait(driver, 20).until(
EC.element_to_be_clickable((By.XPATH, "//span[text()='Load More Reviews..']")))
element.click()
count = count + 1
print(count)
Try below code:
count = 0
while True:
try:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//span[text()='Load More Reviews..']"))).click()
count = count + 1
except StaleElementReferenceException:
pass
except TimeoutException:
break
print(count)
Issue: As per your code you are waiting for Load More Reviews button to be clickable, now once its clicked and even before page has finished loading its detecting if button is there and clickable, but when its try to click , by that time page is still in process of refresing / loading more reviews. As a result HTML DOM is disrupted/refreshed and stale element exception comes.
Also as there is no break condition in your code, i have added one. If there is no Load More Reviews button on page. It will break out of loop.

Selenium crashes when I'm trying to parse the next page (and seven after it) on a website. Any way to tackle this?

I want to parse an IMDb film rating located here on around 8 pages. In order to do that I'm using Selenium, and I'm having trouble with clicks, proceeding algorithm to next page. In the end I need 1000 titles when I'll continue using BeautifulSoup. Code below isn't working, I need to use button 'NEXT' with this HTML:
<a class="flat-button lister-page-next next-page" href="/list/ls000004717/?page=2">
Next
</a>
This is the code:
from selenium import webdriver as wb
browser = wb.Chrome()
browser.get('https://www.imdb.com/list/ls000004717/')
field = browser.find_element_by_name("flat-button lister-page-next next-page").click()
Error is the following:
NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":".flat-button lister-page-next next-page"}
(Session info: chrome=78.0.3904.108)
I suppose I lack knowledge of syntax needed, or maybe I mixed it up a little. I tried searching on SO, though every example is pretty unique and I don't possess the knowledge to extrapolate these cases fully. Any way Selenium can handle that?
You could try using an XPath to query on the Next text inside the button. You should also probably invoke WebDriverWait since you are navigating across multiple pages, then scroll into view since this is at the bottom of the page:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from time import sleep
browser = wb.Chrome()
browser.get('https://www.imdb.com/list/ls000004717/')
# keep clicking next until we reach the end
for i in range(0,9):
# wait up to 10s before locating next button
try:
next_button = WebDriverWait(browser, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[contains(#class, 'page') and contains(text(), 'Next')]")))
# scroll down to button using Javascript
browser.execute_script("arguments[0].scrollIntoView(true);", next_button)
# click the button
# next_button.click() this throws exception -- replace with JS click
browser.execute_script("arguments[0].click();", next_button)
# I never recommend using sleep like this, but WebDriverWait is not waiting on next button to fully load, so it goes stale.
sleep(5)
# case: next button no longer exists, we have reached the end
except TimeoutException:
break
I also wrapped everything in a try / except TimeoutException block to handle the case where we have reached the end of pages, and Next button no longer exists, thus breaking out of the loop. This worked on multiple pages for me.
I also had to add an explicit sleep(5) because even after invoking WebDriverWait on element_to_be_clickable, next_button was still throwing StaleElementReferenceException. It seems like WebDriverWait was finishing before page was fully loaded, causing the status of next_button to change after it had been located. Normally adding sleep(5) is bad practice, but there did not seem to be another workaround here. If anyone else has a suggestion on this, feel free to comment / edit the answer.
There are a couple of ways that could work:
1. Use a selector for the next button and loop until the end:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
browser = webdriver.Chrome()
browser.get('https://www.imdb.com/list/ls000004717/')
selector = 'a[class*="next-page"]'
num_pages = 10
for page in range(pages):
# Wait for the element to load
WebDriverWait(browser, 10).until(ec.presence_of_element_located((By.CSS_SELECTOR, selector)))
# ... Do rating parsing here
browser.find_element_by_css_selector(selector).click()
Instead of clicking on the element, the other option could be to navigate to the next page using broswer.get('...'):
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
# Set up browser as before and navigate to the page
browser = webdriver.Chrome()
browser.get('https://www.imdb.com/list/ls000004717/')
selector = 'a[class*="next-page"]'
base_url = 'https://www.imdb.com/list/ls000004717/'
page_extension = '?page='
# Already at page = 1, so only needs to loop 9 times
for page in range(2, pages + 1):
# Wait for the page to load
WebDriverWait(browser, 10).until(ec.presence_of_element_located((By.CSS_SELECTOR, selector)))
# ... Do rating parsing here
next_page = base_url + page_extension + str(page)
browser.get(next_page)
As a note: field = browser.find_element_by_name("...").click() will not assign field to a webelement, as the click() method has no return value.
You could try a partial css selector.
browser.find_element_by_css_selector("a[class*='next-page']").click()
To click on the element with text as NEXT till the 901 - 1,000 of 1,000 page you have to:
scrollIntoView() the element once the visibility_of_element_located() is achieved.
Induce WebDriverWait for the element_to_be_clickable()
You can use the following solution:
Code Block:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options, executable_path=r'C:\WebDrivers\chromedriver.exe')
driver.get('https://www.imdb.com/list/ls000004717/')
driver.execute_script("return arguments[0].scrollIntoView(true);", WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "span.pagination-range"))))
while True:
try:
WebDriverWait(driver, 20).until(EC.invisibility_of_element((By.CSS_SELECTOR, "div.row.text-center.lister-working.hidden")))
driver.execute_script("return arguments[0].scrollIntoView(true);", WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "span.pagination-range"))))
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a.flat-button.lister-page-next.next-page"))).click()
print("Clicked on NEXT button")
except TimeoutException as e:
print("No more NEXT button")
break
driver.quit()
Console Output:
Clicked on NEXT button
Clicked on NEXT button
Clicked on NEXT button
Clicked on NEXT button
Clicked on NEXT button
Clicked on NEXT button
Clicked on NEXT button
Clicked on NEXT button
Clicked on NEXT button
No more NEXT button

Unable to click on 'more' button cyclically to get all the full reviews

I've created a script in python in combination with selenium to fetch all the reviews from a certain page of google maps. There are lots of reviews in that page and they are only visible once that page is made to scroll downward. My script can do all of them successfully.
However, the only issue that I'm facing at this moment is that some of the reviews have More button which is meant to click in order to show the full review.
One of such is this:
website address
I've tried with:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
link = "https://www.google.com/maps/place/Pizzeria+Di+Matteo/#40.8512552,14.255779,17z/data=!4m7!3m6!1s0x133b0841ef6e38e5:0xece6ea09987e9baf!8m2!3d40.8512512!4d14.2579677!9m1!1b1"
driver = webdriver.Chrome()
driver.get(link)
wait = WebDriverWait(driver,10)
while True:
try:
elem = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "[class='section-loading-spinner']")))
driver.execute_script("arguments[0].scrollIntoView();",elem)
except Exception:
break
for see_more in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "button[class^='section-expand-review']"))):
see_more.click()
for item in wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, ".section-review-content"))):
name = item.find_element_by_css_selector("[class='section-review-title'] > span").text
try:
review = item.find_element_by_css_selector("[class='section-review-text']").text
except AttributeError:
review = ""
print(name)
driver.quit()
Currently the above script throws stale element error when it hits this line for see_more in wait.until().click().
How can I click on that More button cyclically to get all the full reviews?
If use WebdriverWait and presence_of_all_elements_located it wait for search the element in given time and if it is not attached to the html you will receive error.
However Check the length of element present in webpage if there then click on the element.
if len(driver.find_elements_by_css_selector("button[class^='section-expand-review']"))>0:
driver.find_element_by_css_selector("button[class^='section-expand-review']").click()
Here is the code.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
link = "https://www.google.com/maps/place/Ecstasy/#23.7399982,90.3732109,17z/data=!3m1!4b1!4m7!3m6!1s0x3755b8caa669d5e3:0x41f47ddcc39a556e!8m2!3d23.7399933!4d90.3753996!9m1!1b1"
driver = webdriver.Chrome()
driver.get(link)
wait = WebDriverWait(driver,10)
while True:
try:
elem = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "[class='section-loading-spinner']")))
driver.execute_script("arguments[0].scrollIntoView();",elem)
except Exception:
break
if len(driver.find_elements_by_css_selector("button[class^='section-expand-review']"))>0:
driver.find_element_by_css_selector("button[class^='section-expand-review']").click()
print('pass')
for item in wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, ".section-review-content"))):
name = item.find_element_by_css_selector("[class='section-review-title'] > span").text
try:
review = item.find_element_by_css_selector("[class='section-review-text']").text
except AttributeError:
review = ""
print(name)
driver.quit()
EDITED
if len(driver.find_elements_by_css_selector("button[class^='section-expand-review']"))>0:
for item in driver.find_elements_by_css_selector("button[class^='section-expand-review']"):
item.location_once_scrolled_into_view
item.click()
time.sleep(2)
this is worked with me :-
you can put it within for loop or your method to get all reviews.
try:
driver.find_element_by_class_name("mapsConsumerUiSubviewSectionReview__section-expand-review").click()
except:
continue

Difficulty with simulating clicks in Selenium and then scraping data of new page after click

I am trying to simulate a click from this page (http://www.oddsportal.com/baseball/usa/mlb/results/) to the last page number found at the bottom. The click I use on the icon in my code seems to work, but I can't get it to scrape the actual page data I want to after simulating this click. Instead, it just scrapes the data from the first original url. Any help on this would be greatly appreciated.
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
url='http://www.oddsportal.com/baseball/usa/mlb/results/'
driver = webdriver.Chrome()
driver.get(url)
timeout=5
while True:
try:
element_present = EC.presence_of_element_located((By.LINK_TEXT, '»|'))
WebDriverWait(driver, timeout).until(element_present)
last_page_link = driver.find_element_by_link_text('»|')
last_page_link.click()
element_present2 = EC.presence_of_element_located((By.XPATH, ".//th[#class='first2 tl']"))
WebDriverWait(driver, timeout).until(element_present2)
content=driver.page_source
soup=BeautifulSoup(content,'lxml')
dates2 = soup.find_all('th',{'class':'first2'})
dates2 = [element.text for element in dates2]
dates2=dates2[1:]
driver.quit()
except TimeoutException:
print('Timeout Error!')
driver.quit()
continue
break
print(dates2)

Can't get rid of "stale element" error while running my scrpt

I've written a script in python with selenium. The script is supposed to click on some links in a webpage. When I run my script, It does click on the first link and then throws an error stale element reference: element is not attached to the page document instead of chasing for the next link. I searched a lot for the last few hours to find any solution to get rid of this error but no luck.
I'm not interested in their data so any solution other than the perocess of clicking is not what I'm looking for. How can I click on the links until the last link?
This is my attempt so far:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def click_links(driver,url):
driver.get(url)
for olink in wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "result-row__item-hover-visualizer"))):
olink.click()
time.sleep(3)
if __name__ == '__main__':
weblink = "https://www.hitta.se/s%C3%B6k?vad=Markiser+%26+Persienner"
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 10)
try:
click_links(driver,weblink)
finally:
driver.quit()
You can try below code:
def click_links(driver,url):
driver.get(url)
links_len = len(wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "result-row__item-hover-visualizer"))))
for index in range(links_len):
cookies_bar = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '[data-bind="visible: showCookieDialog"]')))
driver.execute_script("arguments[0].hidden='true';", cookies_bar)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'button[data-track="click-show-more"]'))).click()
entry = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "result-row__item-hover-visualizer")))[index]
entry.click()
time.sleep(3)
driver.back()

Categories

Resources