Selenium Python - Explicit waits not working - python

I am unable to get explicit waits to work while waiting for the page to render the js, so I am forced to use time.sleep() in order for the code to work as intended.
I read the docs and still wasn't able to get it to work.
http://selenium-python.readthedocs.io/waits.html
The commented out section of code with the time.sleep() works as intended.
The WebDriverWait part runs but does not wait.
from selenium import webdriver
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
driver = webdriver.Chrome()
url = "https://www.target.com/"
# tells the driver to wait up to 10 seconds before timing out
# for data that will be loaded on the screen
DELAY = 10
driver.implicitly_wait(DELAY)
SLEEP_TIME = 1
# navigate to the page
driver.get(url)
time.sleep(SLEEP_TIME)
try:
WebDriverWait(driver, DELAY).until(EC.visibility_of_element_located((By.XPATH, """//*[#id="js-toggleLeftNav"]/img"""))).click()
WebDriverWait(driver, DELAY).until(EC.visibility_of_element_located((By.XPATH, """//*[#id="5"]"""))).click()
WebDriverWait(driver, DELAY).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "#leftNavigation > ul:nth-child(2)")))
"""
# opens up the side bar javascript
driver.find_element_by_xpath("""//*[#id="js-toggleLeftNav"]/img""").click()
time.sleep(SLEEP_TIME)
# clicks on browse by category
driver.find_element_by_xpath("""//*[#id="5"]""").click()
time.sleep(SLEEP_TIME)
# gets all the category elements
items = driver.find_element_by_css_selector("#leftNavigation > ul:nth-child(2)").find_elements_by_tag_name("li")
time.sleep(SLEEP_TIME)
"""
# gets the hyperlink and category name but the first and the last,
# since the first is back to main menu and the last is exit
category_links = {}
for i in range(1, len(items) - 1):
hyperlink = items[i].find_element_by_tag_name('a').get_attribute('href')
category_name = items[i].text
category_links[category_name] = hyperlink
print(category_links)
except:
print("Timed out.")

This version successfully loads the site, waits for it to render, then opens the side menu. Notice how the wait.until method is is used successfully wait until the page is loaded. You should be able to use the pattern below with the rest of your code to achieve your goal.
CODE
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 10)
driver.get("https://www.target.com/")
wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "#leftNavigation > ul:nth-child(2)")))
button = driver.find_element_by_xpath("""//*[#id="js-toggleLeftNavLg"]""")
button.click()
time.sleep(5)
driver.quit()

Related

Page navigation click without using current_url python selenium

How to navigate through each page without using driver.current_url? In my full code, I get a bunch of errors once I navigate through the page for a loop. Without it, it runs fine but can only go through one page. I want to navigate through as many pages. Any help appreciated, thanks.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
driver_service = Service(executable_path="C:\Program Files (x86)\chromedriver.exe")
driver = webdriver.Chrome(service=driver_service)
driver.maximize_window() # load web driver
wait = WebDriverWait(driver, 5)
url_test = driver.get('https://www.seek.com.au/data-jobs-in-information-communication-technology/in-All-Perth-WA')
url_template = driver.current_url
template = url_template+ '?page={}'
for page in range(2,5):
link_job = [x.get_attribute('href') for x in driver.find_elements(By.XPATH, "//a[#data-automation='jobTitle']")]
for job in link_job:
driver.get(job)
try:
quick_apply = WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, "(//a[#data-automation='job-detail-apply' and #target='_self'])")))
quick_apply.click()
#sleep(3)
except:
print("No records found " + job)
pass
sleep(3)
driver.get(template.format(page))
If I understand you correctly you want to determine dynamically how many pages there are and loop over each of them.
I have managed to achieve this by using a while loop and look on each page if the "Next" button at the bottom is visible. If not, the last page was reached and you can exit the loop.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from time import sleep
driver_service = Service(executable_path="C:\\Users\\Stefan\\bin\\chromedriver.exe")
driver = webdriver.Chrome(service=driver_service)
driver.maximize_window() # load web driver
wait = WebDriverWait(driver, 5)
url_test = driver.get('https://www.seek.com.au/data-jobs-in-information-communication-technology/in-All-Perth-WA')
url_template = driver.current_url
template = url_template+ '?page={}'
page = 1
while True:
# check if "Next" button is visible
# -> if not, the last page was reached
try:
driver.find_element(By.XPATH, "//a[#title='Next']")
except:
# last page reached
break
link_job = [x.get_attribute('href') for x in driver.find_elements(By.XPATH, "//a[#data-automation='jobTitle']")]
for job in link_job:
driver.get(job)
try:
quick_apply = WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, "(//a[#data-automation='job-detail-apply' and #target='_self'])")))
quick_apply.click()
#sleep(3)
except:
print("No records found " + job)
pass
sleep(3)
page += 1
driver.get(template.format(page))
driver.close()
Seems your problem is with StaleElementException when you getting back from job page to jobs search results page.
The simplest approach to overcome this problem is to keep the jobs search results page url.
Actually I changed your code only with this point and it works.
I also changed driver.find_elements(By.XPATH, "//a[#data-automation='jobTitle']") with wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//a[#data-automation='jobTitle']"))) for better performance.
The code below works, but the web site itself responds badly.
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)
wait = WebDriverWait(driver, 10)
url = 'https://www.seek.com.au/data-jobs-in-information-communication-technology/in-All-Perth-WA?page={p}'
for p in range(1,20):
driver.get(url)
link_job = [x.get_attribute('href') for x in wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//a[#data-automation='jobTitle']")))]
for job in link_job:
driver.get(job)
try:
wait.until(EC.element_to_be_clickable((By.XPATH, "(//a[#data-automation='job-detail-apply' and #target='_self'])"))).click()
print("applied")
except:
print("No records found " + job)
pass
driver.get(url)

Click is not going through on website selenium python

I am trying to write a script that will automatically open to a specific bible verse with it's commentaries open, however, click does not activate the commentary window to open. What is wrong?
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
driver = webdriver.Firefox()
driver.get("https://catenabible.com/mt/1")
assert "Catena" in driver.title
elem = driver.find_element(By.ID, "mt001001")
elem.click()
You are missing a wait.
You should wait for the page to complete loading before clicking that element.
The best way to do that is to use the Expected Conditions explicit waits.
The following code should work better:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
driver = webdriver.Firefox()
wait = WebDriverWait(driver, 20)
driver.get("https://catenabible.com/mt/1")
assert "Catena" in driver.title
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "#mt001001 .bibleVerseText"))).click()
Others answers have stated that you have to click the A tag or the SPAN is not clickable but that isn't true. You just need to wait for the element to be clickable. This opens the right-side Commentaries panel for me.
import time
driver.get("https://catenabible.com/mt/1")
assert "Catena" in driver.title
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "mt001001"))).click()
count = 0
while len(driver.find_elements((By.CSS_SELECTOR, "div.commentaryPanel.expanded"))) == 0 and count < 5:
driver.find_element((By.ID, "mt001001")).click()
count += 1
time.sleep(0.25) # brief pause between loops
assert count == 5, "Commentary was not visible after 5 tries"
...
EDIT:
I got it to work by using an explicit wait of 2 seconds to allow for the element to be clickable:
import time
driver.get("https://catenabible.com/mt/1")
time.sleep(2)
driver.find_element(By.XPATH, '//*[#id="mt001001"]/span').click()
It is not ideal to use this wait because it will slow the script down, and you never really know how long will be enough time to wait. But at least it works.

Script fails to keep clicking on load more button

I've written a script in Python in association with selenium to keep clicking on MORE button to load more items until there are no new items left to load from a webpage. However, my below script can click once on that MORE button available on the bottom of that page.
Link to that site
This is my try so far:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
link = "https://angel.co/companies?company_types[]=Startup&company_types[]=Private+Company&company_types[]=Mobile+App&locations[]=1688-United+States"
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 10)
driver.get(link)
while True:
for elems in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,".results .name a.startup-link"))):
print(elems.get_attribute("href"))
try:
loadmore = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR,"[class='more']")))
driver.execute_script("arguments[0].scrollIntoView();", loadmore)
loadmore.click()
except Exception:break
driver.quit()
How can I keep clicking on that MORE button until there are no such button left to click and parse the links as I've already tried using for loop.
I've managed to solve the problem pursuing sir Andersson's logic within my exising script. This is what the modified script look like.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
link = "https://angel.co/companies?company_types[]=Startup&company_types[]=Private+Company&company_types[]=Mobile+App&locations[]=1688-United+States"
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 10)
driver.get(link)
while True:
try:
loadmore = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR,"[class='more']")))
driver.execute_script("arguments[0].click();", loadmore)
wait.until(EC.staleness_of(loadmore))
except Exception:break
for elems in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,".results .name a.startup-link"))):
print(elems.get_attribute("href"))
driver.quit()
why not just?
while (driver.FindElements(By.ClassName("more")).Count > 0)
{
driver.FindElement(By.ClassName("more")).Click();
//Some delay to wait lazyload to complete
}
c# example. pretty sure that it can be done with python as well

Can't get rid of hardcoded delay even when Explicit Wait is already there

I've written some code in python in combination with selenium to parse the different questions from quora.com. My scraper is doing it's job at this moment. The thing is I've used here hardcoded delay for the scraper to work, even when Explicit Wait has already been defined. As the page is an infinite scrolling one, i tried to make the scrolling process to a limited number. Now, I have got two questions:
Why wait.until(EC.staleness_of(page)) is not working within my scraper. It is commented out now.
If i use something else instead of page = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "question_link"))) the scraper throws an error: can't focus element.
Btw, I do not wish to go for page = driver.find_element_by_tag_name('body') this option.
Here is what I've written so far:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
driver.get("https://www.quora.com/topic/C-programming-language")
wait = WebDriverWait(driver, 10)
page = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "question_link")))
for scroll in range(10):
page.send_keys(Keys.PAGE_DOWN)
time.sleep(2)
# wait.until(EC.staleness_of(page))
for item in wait.until(EC.visibility_of_all_elements_located((By.CLASS_NAME, "rendered_qtext"))):
print(item.text)
driver.quit()
You can try below code to get as much XHR as possible and then parse the page:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
driver = webdriver.Chrome()
driver.get("https://www.quora.com/topic/C-programming-language")
wait = WebDriverWait(driver, 10)
page = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "question_link")))
links_counter = len(wait.until(EC.visibility_of_all_elements_located((By.CLASS_NAME, "question_link"))))
while True:
page.send_keys(Keys.END)
try:
wait.until(lambda driver: len(driver.find_elements_by_class_name("question_link")) > links_counter)
links_counter = len(driver.find_elements_by_class_name("question_link"))
except TimeoutException:
break
for item in wait.until(EC.visibility_of_all_elements_located((By.CLASS_NAME, "rendered_qtext"))):
print(item.text)
driver.quit()
Here we scroll page down and wait up to 10 seconds for more links to be loaded or break the while loop if the number of links remains the same
As for your questions:
wait.until(EC.staleness_of(page)) is not working because when you scroll page down you don't get the new DOM - you just make XHR which adds more links into existed DOM, so the first link (page) will not be stale in this case
(I'm not quite confident about this, but...) I guess you can send keys only to nodes that can be focused (user can set focus manually), e.g. links, input fields, textareas, buttons..., but not content division (div), paragraphs (p), etc

Extracting user comments from news website

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def wait(dr, x):
element = WebDriverWait(dr, 50).until(
EC.presence_of_all_elements_located((By.XPATH, x))
)
return element
from selenium import webdriver
browser = webdriver.Firefox()
browser.get("http://www.dinamalar.com/user_comments.asp? uid=14701&name=%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%E0%AE%9A%E0%AF%86%E0%AE%B2%E0%AF%8D%E0%AE%B5%E0%AE%A9%E0%AF%8D")
for elem in wait(browser, '//*[#id="commsec"]/div[2]/div[1]'):
print elem.text
This is the link i need to extract all the comments http://www.dinamalar.com/user_comments.asp?uid=14701&name=%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%E0%AE%9A%E0%AF%86%E0%AE%B2%E0%AF%8D%E0%AE%B5%E0%AE%A9%E0%AF%8D
But my code extracting only the first 10 comments. After clicking the button the other 10 comments are loaded dynamically. How to extract all these comments using python selenium
The idea would be to look for how many "more ideas" elements are present on the page. Every time you click the button and load more comments, one more "more ideas" red button becomes present. Implementation:
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
browser = webdriver.Firefox()
wait = WebDriverWait(browser, 10)
browser.get("http://www.dinamalar.com/user_comments.asp?uid=14701&name=%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%E0%AE%9A%E0%AF%86%E0%AE%B2%E0%AF%8D%E0%AE%B5%E0%AE%A9%E0%AF%8D")
# initial wait for the page to load
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, ".morered")))
pages = 1
while True:
browser.find_elements_by_css_selector(".morered")[-1].click()
# wait for more "load more" buttons to be present
try:
wait.until(lambda browser: len(browser.find_elements_by_css_selector(".morered")) > pages)
except TimeoutException:
break # no more data loaded, exit the loop
print("Comments loaded: %d" % len(browser.find_elements_by_css_selector(".dateg")))
pages += 1
browser.close()
Note that I've also removed that extra space inside the URL.

Categories

Resources