Selenium Stale element exception - python

Hey everyone i am trying to scrape a website and there is not much problem in that but the main problem i am facing is that there is "load more" button in the site so before scraping the website i just thought to click that button till it doesnt exist any more so i wrote some code to do that but now when i run that code i am getting two error and these two errors are random i mean sometimes it occurs and other times it don't so the first error is Stale element exception and the second one is element is not clickable as it is being overlapped or something , now to stop that overlapping i added a line that whenever load button is clicked page will be scrolled back to top but now it is getting overlapped at there. So i am not able to solve these two problems here is the code:-
#importing webdriver from selenium
from numpy.lib.npyio import load
from selenium import webdriver
from selenium.common.exceptions import ElementClickInterceptedException, ElementNotInteractableException, ElementNotSelectableException, NoSuchElementException, StaleElementReferenceException
import csv
import pandas as pd
from selenium.webdriver.common.keys import Keys
#selecting Firefox as the browser
#in order to select Chrome
# webdriver.Chrome() will be used
path = r"D:\\Chromedriver.exe"
driver = webdriver.Chrome(path)
#URL of the website
url = "https://www.carecredit.com/doctor-locator/results/Any-Profession/Any-Specialty//?Sort=D&Radius=75&Page=1"
driver.maximize_window()
#opening link in the browser
driver.get(url)
driver.implicitly_wait(100)
search_bar = driver.find_element_by_xpath('//*[#id="location-display"]')
search_bar.click()
search_bar.send_keys("00704")
search_button = driver.find_element_by_xpath('//*[#id="dl-locate-button-new"]')
search_button.click()
driver.implicitly_wait(50)
distance = driver.find_element_by_xpath('//*[#id="dl-radius-new"]')
distance.click()
driver.implicitly_wait(100)
cookies = driver.find_element_by_xpath('//*[#id="onetrust-accept-btn-handler"]')
cookies.click()
i = 0
loadMoreButtonExists = True
while loadMoreButtonExists:
try:
driver.implicitly_wait(100)
load_more = driver.find_element_by_xpath('//*[#id="next-page"]')
load_more.click()
driver.find_element_by_tag_name('body').send_keys(Keys.CONTROL + Keys.HOME)
print("done"+str(i))
i = i+1
driver.implicitly_wait(30)
except ElementNotInteractableException:
loadMoreButtonExists = False
print("done loading button")
practise_name = driver.find_elements_by_class_name('page-results')
try:
for element in practise_name:
practise_name = element.find_element_by_class_name('dl-result-header dl-map-hover')
address = element.find_element_by_class_name('dl-result-address')
phone_no = element.find_element_by_class_name('no-mobile')
# l=[]
# l1 = []
# temp_list=[]
# l.append(element.text)
# print(l)
print(practise_name.text)
print(address.text)
print(phone_no.text)
except Exception as e:
print(e)
driver.close()
driver.close()
and if my code reaches the place where i actually scrape data then at that point it shows invalid id even when i checked the id and its correct. So Please check that part too.
Thank you

Try like below and confirm:
You can catch the respective exception like below:
# Imports Required:
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import ElementClickInterceptedException,StaleElementReferenceException,TimeoutException
import time
...
wait.until(EC.element_to_be_clickable((By.ID,"onetrust-accept-btn-handler"))).click()
while True:
try:
load_more = wait.until(EC.visibility_of_element_located((By.ID,"next-page")))
driver.execute_script("arguments[0].scrollIntoView(true);",load_more)
driver.execute_script("window.scrollBy(0,-200);")
load_more.click()
time.sleep(2)
except ElementClickInterceptedException: # This might not occur since we are scrolling so that the element is visible
print("in ElementClickInterceptedException")
pass
except StaleElementReferenceException: # This might not occur too, since we are trying to find the Load more element everytime.
print("in StaleElementReferenceException")
pass
except TimeoutException: # TimeoutException means there is no Load more button
print("in TimeoutException")
break
...

Related

Job Applying Bot in Python facing IndexError: list index out of range

Disclaimer: I'm coming back to scripting after more than a decade (and I was a novice to begin with) so apologies if the question is mundane but help is much needed and appreciated.
I'm trying to modify a python script to log me into a job portal, search job vacancies based on attributes and then apply to said jobs.
Since the portal opens new vacancies on separate tabs, the code is supposed to go to the next tab and test the criteria on the job description
The code snippet is as below:
for i in range(1,6):
driver.get('https://www.naukri.com/'+role+'-jobs-in-'+location+'-'+str(i)+'?ctcFilter='+str(LL)+'to'+str(UL))
driver.switch_to.window(driver.window_handles[1])
url = driver.current_url
driver.get(url)
try:
test = driver.find_element_by_xpath('//*[#id="root"]/main/div[2]/div[2]/section[2]')
if all(word in test.text.lower() for word in Skillset):
driver.find_element_by_xpath('//*[#id="root"]/main/div[2]/div[2]/section[1]/div[1]/div[3]/div/button[2]').click()
time.sleep(2)
driver.close()
driver.switch_to.window(driver.window_handles[0])
else:
driver.close()
driver.switch_to.window(driver.window_handles[0])
except:
driver.close()
driver.switch_to.window(driver.window_handles[0])
However, when I run the script, it just logs me in to the portal and goes to the correct listings page but just stays there. Plus, it pops this error:
> line 43, in <module> driver.switch_to.window(driver.window_handles[1])
> IndexError: list index out of range
Not able to understand what list this is referring to and how to fix this code. Any help is appreciated.
The complete code for those interested:
import selenium
from selenium import webdriver as wb
import pandas as pd
import time
from time import sleep
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
driver = wb.Chrome("/Users/shashank/Desktop/Naukri Auto Apply/chromedriver")
Skillset = ['Marketing','Communications','Sales']
Skillset = [x.lower() for x in Skillset]
LL = 15 #lower limit of expected CTC
UL = 25 #upper limit of expected CTC
location = 'Bangalore'
location = location.lower().replace(" ","-")
role = 'Marketing Manager'
role = role.lower().replace(" ","-")
driver.get("https://www.naukri.com")
driver.find_element_by_xpath('//*[#id="login_Layer"]/div').click()
time.sleep(5)
driver.find_element_by_xpath('//*[#id="root"]/div[3]/div[2]/div/form/div[2]/input').send_keys("email_address_here")
driver.find_element_by_xpath('//*[#id="root"]/div[3]/div[2]/div/form/div[3]/input').send_keys("password_here")
time.sleep(5)
driver.find_element_by_xpath('//*[#id="root"]/div[3]/div[2]/div/form/div[6]/button').click()
time.sleep(20)
driver.find_element_by_xpath('/html/body/div[3]/div/div[1]/div[1]/div').click()
for i in range(1,6):
driver.get('https://www.naukri.com/'+role+'-jobs-in-'+location+'-'+str(i)+'?ctcFilter='+str(LL)+'to'+str(UL))
driver.switch_to.window(driver.window_handles[1])
url = driver.current_url
driver.get(url)
try:
test = driver.find_element_by_xpath('//*[#id="root"]/main/div[2]/div[2]/section[2]')
if all(word in test.text.lower() for word in Skillset):
driver.find_element_by_xpath('//*[#id="root"]/main/div[2]/div[2]/section[1]/div[1]/div[3]/div/button[2]').click()
time.sleep(2)
driver.close()
driver.switch_to.window(driver.window_handles[0])
else:
driver.close()
driver.switch_to.window(driver.window_handles[0])
except:
driver.close()
driver.switch_to.window(driver.window_handles[0])
Thanks in advance for answering! It will really help with the job search!
No need to switch to another window
When you are opening the URL with that specific details in the for loop, the page is getting loaded one by one in the same window. Switch to window when there is a New window tab opened. Link to Refer
Choose Explicit waits instead of time.sleep(). You have refined WebdriverWait but never used it.
Try to come up with good locators. Go for Relative Xpath instead of Absolute Xpath.Link to Refer
Not sure what you are trying to do in try block. The locators does not highlight any elements in the page.
Refer below code:
# Imports
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
driver = webdriver.Chrome(service= Service("path to chromedriver.exe"))
driver.maximize_window()
driver.get("https://www.naukri.com")
wait = WebDriverWait(driver,30)
login_btn = wait.until(EC.element_to_be_clickable((By.XPATH,"//div[text()='Login']")))
login_btn.click()
email = wait.until(EC.element_to_be_clickable((By.XPATH,"//form[#name='login-form']//input[contains(#placeholder,'Email')]")))
email.send_keys("abc#gmail.com")
password = wait.until(EC.element_to_be_clickable((By.XPATH,"//form[#name='login-form']//input[contains(#placeholder,'password')]")))
password.send_keys("password")
password.submit()
role = "marketing-manager"
loc = "bangalore"
llimt = "15"
ulimit = "25"
for i in range(1,6):
driver.get(f"https://www.naukri.com/{role}-jobs-in-{loc}-{i}?ctcFilter={llimt}to{ulimit}")
time.sleep(2)
# Update for Chat bot
try:
chatbot_close = wait.until(EC.element_to_be_clickable((By.XPATH,"//div[#class='chatbot_Nav']/div")))
chatbot_close.click()
except:
print("Chat bot did not appear")

Selenium button click in loop fails after first try

Currently I am working on a web crawler that should be able to download text of a dutch newspaper bank. The first link is working correctly but suddenly the second link creates an error of which I do not know how to fix this.
It seems that selenium is unable to click the button in the second link while it succeeds doing so in the first link.
Do you know what causes the second link (telegraaf page) to fail?
UPDATE CODE:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
import numpy as np
import re
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.action_chains import ActionChains
#Set up the path to the chrome driver
driver = webdriver.Chrome()
html = driver.find_element_by_tag_name('html')
all_details = []
for c in range(1,2):
try:
driver.get("https://www.delpher.nl/nl/kranten/results?query=kernenergie&facets%5Bpapertitle%5D%5B%5D=Algemeen+Dagblad&facets%5Bpapertitle%5D%5B%5D=De+Volkskrant&facets%5Bpapertitle%5D%5B%5D=De+Telegraaf&facets%5Bpapertitle%5D%5B%5D=Trouw&page={}&sortfield=date&cql%5B%5D=(date+_gte_+%2201-01-1970%22)&cql%5B%5D=(date+_lte_+%2201-01-2018%22)&coll=ddd".format(c))
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
incategory = driver.find_elements_by_class_name("search-result")
print(driver.current_url)
links = [ i.find_element_by_class_name("thumbnail search-result__thumbnail").get_attribute("href") for i in incategory]
# Lets loop through each link to acces the page of each book
for link in links:
# get one book url
driver.get(link)
# newspaper
newspaper = driver.find_element_by_xpath("//*[#id='content']/div[2]/div/div[2]/header/h1/span[2]")
# date of the article
date = driver.find_element_by_xpath("//*[#id='content']/div[2]/div/div[2]/header/div/ul/li[1]")
#click button and find title
div_element = WebDriverWait(driver, 60).until(expected_conditions.presence_of_element_located((By.XPATH,'//*[#id="object"]/div/div/div')))
hover = ActionChains(driver).move_to_element(div_element)
hover.perform()
div_element.click()
button = WebDriverWait(driver, 90).until(expected_conditions.presence_of_element_located((By.XPATH, '//*[#id="object-viewer__ocr-button"]')))
hover = ActionChains(driver).move_to_element(button)
hover.perform()
button.click()
element = driver.find_element_by_css_selector(".object-viewer__ocr-panel-results")
driver.execute_script("$(arguments[0]).click();", element)
# content of article
try:
content = driver.find_elements_by_xpath("//*[contains(text(), 'kernenergie')]").text
except:
content = None
# Define a dictionary with details we need
r = {
"1Newspaper":newspaper.text,
"2Date":date.text,
"3Content":content,
}
# append r to all details
all_details.append(r)
except Exception as e:
print(str(e))
pass
# save the information into a CSV file
df = pd.DataFrame(all_details)
df = df.to_string()
time.sleep(3)
driver.close()
So you have some problems.
driver.implicitly_wait(10)
Should only be used once
links = [ i.find_element_by_class_name("search-result__thumbnail-link").get_attribute("href") for i in incategory]
Is a more useful way to get all links
print(driver.current_url)
Could replace
print("https://www.delpher.nl/nl/kranten/results?query=kernenergie&facets%5Bpapertitle%5D%5B%5D=Algemeen+Dagblad&facets%5Bpapertitle%5D%5B%5D=De+Volkskrant&facets%5Bpapertitle%5D%5B%5D=De+Telegraaf&facets%5Bpapertitle%5D%5B%5D=Trouw&page={}&sortfield=date&cql%5B%5D=(date+_gte_+%2201-01-1970%22)&cql%5B%5D=(date+_lte_+%2201-01-2018%22)&coll=ddd".format(c))
No need for url=link
for link in links:
driver.get(link)
Your title actually doesn't get on the second page. Use something like this for all values.
try:
content = driver.find_element_by_xpath('//*[#id="object-viewer__ocr-panel"]/div[2]/div[5]').text
except:
content = None
# Define a dictionary
r = {
"1Newspaper":newspaper,
"2Date":date,
"3Title": title,
"4Content": content,
}
You can replace your exception with to figure out the line of problem.
except Exception as e:
print(str(e))
pass
it might be that the button you are trying to reach is inside an iframe, which means you have to access that one before searching for the XPATH:
iframe = driver.find_elements_by_tag_name('iframe')
driver.switch_to.frame(iframe)
Also there might be a possibility the object youre trying to click is not visible yet, which could be solved by an timeout

Unable to click on 'more' button cyclically to get all the full reviews

I've created a script in python in combination with selenium to fetch all the reviews from a certain page of google maps. There are lots of reviews in that page and they are only visible once that page is made to scroll downward. My script can do all of them successfully.
However, the only issue that I'm facing at this moment is that some of the reviews have More button which is meant to click in order to show the full review.
One of such is this:
website address
I've tried with:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
link = "https://www.google.com/maps/place/Pizzeria+Di+Matteo/#40.8512552,14.255779,17z/data=!4m7!3m6!1s0x133b0841ef6e38e5:0xece6ea09987e9baf!8m2!3d40.8512512!4d14.2579677!9m1!1b1"
driver = webdriver.Chrome()
driver.get(link)
wait = WebDriverWait(driver,10)
while True:
try:
elem = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "[class='section-loading-spinner']")))
driver.execute_script("arguments[0].scrollIntoView();",elem)
except Exception:
break
for see_more in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "button[class^='section-expand-review']"))):
see_more.click()
for item in wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, ".section-review-content"))):
name = item.find_element_by_css_selector("[class='section-review-title'] > span").text
try:
review = item.find_element_by_css_selector("[class='section-review-text']").text
except AttributeError:
review = ""
print(name)
driver.quit()
Currently the above script throws stale element error when it hits this line for see_more in wait.until().click().
How can I click on that More button cyclically to get all the full reviews?
If use WebdriverWait and presence_of_all_elements_located it wait for search the element in given time and if it is not attached to the html you will receive error.
However Check the length of element present in webpage if there then click on the element.
if len(driver.find_elements_by_css_selector("button[class^='section-expand-review']"))>0:
driver.find_element_by_css_selector("button[class^='section-expand-review']").click()
Here is the code.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
link = "https://www.google.com/maps/place/Ecstasy/#23.7399982,90.3732109,17z/data=!3m1!4b1!4m7!3m6!1s0x3755b8caa669d5e3:0x41f47ddcc39a556e!8m2!3d23.7399933!4d90.3753996!9m1!1b1"
driver = webdriver.Chrome()
driver.get(link)
wait = WebDriverWait(driver,10)
while True:
try:
elem = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "[class='section-loading-spinner']")))
driver.execute_script("arguments[0].scrollIntoView();",elem)
except Exception:
break
if len(driver.find_elements_by_css_selector("button[class^='section-expand-review']"))>0:
driver.find_element_by_css_selector("button[class^='section-expand-review']").click()
print('pass')
for item in wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, ".section-review-content"))):
name = item.find_element_by_css_selector("[class='section-review-title'] > span").text
try:
review = item.find_element_by_css_selector("[class='section-review-text']").text
except AttributeError:
review = ""
print(name)
driver.quit()
EDITED
if len(driver.find_elements_by_css_selector("button[class^='section-expand-review']"))>0:
for item in driver.find_elements_by_css_selector("button[class^='section-expand-review']"):
item.location_once_scrolled_into_view
item.click()
time.sleep(2)
this is worked with me :-
you can put it within for loop or your method to get all reviews.
try:
driver.find_element_by_class_name("mapsConsumerUiSubviewSectionReview__section-expand-review").click()
except:
continue

Can't get rid of "stale element" error while running my scrpt

I've written a script in python with selenium. The script is supposed to click on some links in a webpage. When I run my script, It does click on the first link and then throws an error stale element reference: element is not attached to the page document instead of chasing for the next link. I searched a lot for the last few hours to find any solution to get rid of this error but no luck.
I'm not interested in their data so any solution other than the perocess of clicking is not what I'm looking for. How can I click on the links until the last link?
This is my attempt so far:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def click_links(driver,url):
driver.get(url)
for olink in wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "result-row__item-hover-visualizer"))):
olink.click()
time.sleep(3)
if __name__ == '__main__':
weblink = "https://www.hitta.se/s%C3%B6k?vad=Markiser+%26+Persienner"
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 10)
try:
click_links(driver,weblink)
finally:
driver.quit()
You can try below code:
def click_links(driver,url):
driver.get(url)
links_len = len(wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "result-row__item-hover-visualizer"))))
for index in range(links_len):
cookies_bar = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '[data-bind="visible: showCookieDialog"]')))
driver.execute_script("arguments[0].hidden='true';", cookies_bar)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'button[data-track="click-show-more"]'))).click()
entry = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "result-row__item-hover-visualizer")))[index]
entry.click()
time.sleep(3)
driver.back()

How can i wait till page reload [duplicate]

I want to scrape all the data of a page implemented by a infinite scroll. The following python code works.
for i in range(100):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(5)
This means every time I scroll down to the bottom, I need to wait 5 seconds, which is generally enough for the page to finish loading the newly generated contents. But, this may not be time efficient. The page may finish loading the new contents within 5 seconds. How can I detect whether the page finished loading the new contents every time I scroll down? If I can detect this, I can scroll down again to see more contents once I know the page finished loading. This is more time efficient.
The webdriver will wait for a page to load by default via .get() method.
As you may be looking for some specific element as #user227215 said, you should use WebDriverWait to wait for an element located in your page:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
browser = webdriver.Firefox()
browser.get("url")
delay = 3 # seconds
try:
myElem = WebDriverWait(browser, delay).until(EC.presence_of_element_located((By.ID, 'IdOfMyElement')))
print "Page is ready!"
except TimeoutException:
print "Loading took too much time!"
I have used it for checking alerts. You can use any other type methods to find the locator.
EDIT 1:
I should mention that the webdriver will wait for a page to load by default. It does not wait for loading inside frames or for ajax requests. It means when you use .get('url'), your browser will wait until the page is completely loaded and then go to the next command in the code. But when you are posting an ajax request, webdriver does not wait and it's your responsibility to wait an appropriate amount of time for the page or a part of page to load; so there is a module named expected_conditions.
Trying to pass find_element_by_id to the constructor for presence_of_element_located (as shown in the accepted answer) caused NoSuchElementException to be raised. I had to use the syntax in fragles' comment:
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver = webdriver.Firefox()
driver.get('url')
timeout = 5
try:
element_present = EC.presence_of_element_located((By.ID, 'element_id'))
WebDriverWait(driver, timeout).until(element_present)
except TimeoutException:
print "Timed out waiting for page to load"
This matches the example in the documentation. Here is a link to the documentation for By.
Find below 3 methods:
readyState
Checking page readyState (not reliable):
def page_has_loaded(self):
self.log.info("Checking if {} page is loaded.".format(self.driver.current_url))
page_state = self.driver.execute_script('return document.readyState;')
return page_state == 'complete'
The wait_for helper function is good, but unfortunately click_through_to_new_page is open to the race condition where we manage to execute the script in the old page, before the browser has started processing the click, and page_has_loaded just returns true straight away.
id
Comparing new page ids with the old one:
def page_has_loaded_id(self):
self.log.info("Checking if {} page is loaded.".format(self.driver.current_url))
try:
new_page = browser.find_element_by_tag_name('html')
return new_page.id != old_page.id
except NoSuchElementException:
return False
It's possible that comparing ids is not as effective as waiting for stale reference exceptions.
staleness_of
Using staleness_of method:
#contextlib.contextmanager
def wait_for_page_load(self, timeout=10):
self.log.debug("Waiting for page to load at {}.".format(self.driver.current_url))
old_page = self.find_element_by_tag_name('html')
yield
WebDriverWait(self, timeout).until(staleness_of(old_page))
For more details, check Harry's blog.
As mentioned in the answer from David Cullen, I've always seen recommendations to use a line like the following one:
element_present = EC.presence_of_element_located((By.ID, 'element_id'))
WebDriverWait(driver, timeout).until(element_present)
It was difficult for me to find somewhere all the possible locators that can be used with the By, so I thought it would be useful to provide the list here.
According to Web Scraping with Python by Ryan Mitchell:
ID
Used in the example; finds elements by their HTML id attribute
CLASS_NAME
Used to find elements by their HTML class attribute. Why is this
function CLASS_NAME not simply CLASS? Using the form object.CLASS
would create problems for Selenium's Java library, where .class is a
reserved method. In order to keep the Selenium syntax consistent
between different languages, CLASS_NAME was used instead.
CSS_SELECTOR
Finds elements by their class, id, or tag name, using the #idName,
.className, tagName convention.
LINK_TEXT
Finds HTML tags by the text they contain. For example, a link that
says "Next" can be selected using (By.LINK_TEXT, "Next").
PARTIAL_LINK_TEXT
Similar to LINK_TEXT, but matches on a partial string.
NAME
Finds HTML tags by their name attribute. This is handy for HTML forms.
TAG_NAME
Finds HTML tags by their tag name.
XPATH
Uses an XPath expression ... to select matching elements.
From selenium/webdriver/support/wait.py
driver = ...
from selenium.webdriver.support.wait import WebDriverWait
element = WebDriverWait(driver, 10).until(
lambda x: x.find_element_by_id("someId"))
On a side note, instead of scrolling down 100 times, you can check if there are no more modifications to the DOM (we are in the case of the bottom of the page being AJAX lazy-loaded)
def scrollDown(driver, value):
driver.execute_script("window.scrollBy(0,"+str(value)+")")
# Scroll down the page
def scrollDownAllTheWay(driver):
old_page = driver.page_source
while True:
logging.debug("Scrolling loop")
for i in range(2):
scrollDown(driver, 500)
time.sleep(2)
new_page = driver.page_source
if new_page != old_page:
old_page = new_page
else:
break
return True
Have you tried driver.implicitly_wait. It is like a setting for the driver, so you only call it once in the session and it basically tells the driver to wait the given amount of time until each command can be executed.
driver = webdriver.Chrome()
driver.implicitly_wait(10)
So if you set a wait time of 10 seconds it will execute the command as soon as possible, waiting 10 seconds before it gives up. I've used this in similar scroll-down scenarios so I don't see why it wouldn't work in your case. Hope this is helpful.
To be able to fix this answer, I have to add new text. Be sure to use a lower case 'w' in implicitly_wait.
Here I did it using a rather simple form:
from selenium import webdriver
browser = webdriver.Firefox()
browser.get("url")
searchTxt=''
while not searchTxt:
try:
searchTxt=browser.find_element_by_name('NAME OF ELEMENT')
searchTxt.send_keys("USERNAME")
except:continue
Solution for ajax pages that continuously load data. The previews methods stated do not work. What we can do instead is grab the page dom and hash it and compare old and new hash values together over a delta time.
import time
from selenium import webdriver
def page_has_loaded(driver, sleep_time = 2):
'''
Waits for page to completely load by comparing current page hash values.
'''
def get_page_hash(driver):
'''
Returns html dom hash
'''
# can find element by either 'html' tag or by the html 'root' id
dom = driver.find_element_by_tag_name('html').get_attribute('innerHTML')
# dom = driver.find_element_by_id('root').get_attribute('innerHTML')
dom_hash = hash(dom.encode('utf-8'))
return dom_hash
page_hash = 'empty'
page_hash_new = ''
# comparing old and new page DOM hash together to verify the page is fully loaded
while page_hash != page_hash_new:
page_hash = get_page_hash(driver)
time.sleep(sleep_time)
page_hash_new = get_page_hash(driver)
print('<page_has_loaded> - page not loaded')
print('<page_has_loaded> - page loaded: {}'.format(driver.current_url))
How about putting WebDriverWait in While loop and catching the exceptions.
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
browser = webdriver.Firefox()
browser.get("url")
delay = 3 # seconds
while True:
try:
WebDriverWait(browser, delay).until(EC.presence_of_element_located(browser.find_element_by_id('IdOfMyElement')))
print "Page is ready!"
break # it will break from the loop once the specific element will be present.
except TimeoutException:
print "Loading took too much time!-Try again"
You can do that very simple by this function:
def page_is_loading(driver):
while True:
x = driver.execute_script("return document.readyState")
if x == "complete":
return True
else:
yield False
and when you want do something after page loading complete,you can use:
Driver = webdriver.Firefox(options=Options, executable_path='geckodriver.exe')
Driver.get("https://www.google.com/")
while not page_is_loading(Driver):
continue
Driver.execute_script("alert('page is loaded')")
use this in code :
from selenium import webdriver
driver = webdriver.Firefox() # or Chrome()
driver.implicitly_wait(10) # seconds
driver.get("http://www.......")
or you can use this code if you are looking for a specific tag :
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Firefox() #or Chrome()
driver.get("http://www.......")
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "tag_id"))
)
finally:
driver.quit()
Very good answers here. Quick example of wait for XPATH.
# wait for sizes to load - 2s timeout
try:
WebDriverWait(driver, 2).until(expected_conditions.presence_of_element_located(
(By.XPATH, "//div[#id='stockSizes']//a")))
except TimeoutException:
pass
I struggled a bit to get this working as that didn't worked for me as expected. anyone who is still struggling to get this working, may check this.
I want to wait for an element to be present on the webpage before proceeding with my manipulations.
we can use WebDriverWait(driver, 10, 1).until(), but the catch is until() expects a function which it can execute for a period of timeout provided(in our case its 10) for every 1 sec. so keeping it like below worked for me.
element_found = wait_for_element.until(lambda x: x.find_element_by_class_name("MY_ELEMENT_CLASS_NAME").is_displayed())
here is what until() do behind the scene
def until(self, method, message=''):
"""Calls the method provided with the driver as an argument until the \
return value is not False."""
screen = None
stacktrace = None
end_time = time.time() + self._timeout
while True:
try:
value = method(self._driver)
if value:
return value
except self._ignored_exceptions as exc:
screen = getattr(exc, 'screen', None)
stacktrace = getattr(exc, 'stacktrace', None)
time.sleep(self._poll)
if time.time() > end_time:
break
raise TimeoutException(message, screen, stacktrace)
If you are trying to scroll and find all items on a page. You can consider using the following. This is a combination of a few methods mentioned by others here. And it did the job for me:
while True:
try:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
driver.implicitly_wait(30)
time.sleep(4)
elem1 = WebDriverWait(driver, 30).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "element-name")))
len_elem_1 = len(elem1)
print(f"A list Length {len_elem_1}")
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
driver.implicitly_wait(30)
time.sleep(4)
elem2 = WebDriverWait(driver, 30).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "element-name")))
len_elem_2 = len(elem2)
print(f"B list Length {len_elem_2}")
if len_elem_1 == len_elem_2:
print(f"final length = {len_elem_1}")
break
except TimeoutException:
print("Loading took too much time!")
selenium can't detect when the page is fully loaded or not, but javascript can. I suggest you try this.
from selenium.webdriver.support.ui import WebDriverWait
WebDriverWait(driver, 100).until(lambda driver: driver.execute_script('return document.readyState') == 'complete')
this will execute javascript code instead of using python, because javascript can detect when page is fully loaded, it will show 'complete'. This code means in 100 seconds, keep tryingn document.readyState until complete shows.
nono = driver.current_url
driver.find_element(By.XPATH,"//button[#value='Send']").click()
while driver.current_url == nono:
pass
print("page loaded.")

Categories

Resources