I have following code to scroll down a javascript enabled website. Problem is when the newHeight reaches around 229275 I get list out of range on the line browser.find_elements_by_class_name('alt')[0].click(). But why I am getting this error and how can I solve this?
My code:
browser = webdriver.PhantomJS("phantomjs")
browser.get(url)
while True:
time.sleep(pause)
newHeight = browser.execute_script("return document.body.scrollHeight")
print newHeight
browser.find_elements_by_class_name('alt')[0].click()
Try to scroll down page and click element with below approach:
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
browser = webdriver.PhantomJS("phantomjs")
browser.get(url)
while True:
browser.find_element_by_tag_name("body").send_keys(Keys.END)
try:
wait(browser, 10).until(EC.element_to_be_clickable((By.CLASS_NAME, "alt"))).click()
except NoSuchElementException:
break
This should allow you to click required element in case it can be found or break loop otherwise
Simply try/except statement
browser = webdriver.PhantomJS("phantomjs")
browser.get(url)
while True:
try:
time.sleep(pause)
newHeight = browser.execute_script("return document.body.scrollHeight")
print newHeight
browser.find_elements_by_class_name('alt')[0].click()
except:
pass
I would recommend checking the list before acting upon it.
browser = webdriver.PhantomJS("phantomjs")
browser.get(url)
while True:
time.sleep(pause)
newHeight = browser.execute_script("return document.body.scrollHeight")
print newHeight
alt_elements = browser.find_elements_by_class_name('alt')
if len(alt_elements):
alt_elements[0].click()
Just a side note, an infinite while loop can be a dangerous thing.
Related
I want to click on each product on aliexpress and do something with it.
However, I kept running into an ElementClickInterceptedException
Please verify that the code is correct before answering the question if you are using chat-GPT or any other AI to help with this problem.
These are the things that I tried
for supplier in suppliers:
driver.execute_script("arguments[0].scrollIntoView();", supplier)
actions = ActionChains(driver)
actions.move_to_element(supplier).click().perform()
for supplier in suppliers:
driver.execute_script("arguments[0].scrollIntoView();", supplier)
actions = ActionChains(driver)
actions.move_to_element(supplier)
wait.until(EC.visibility_of_element_located((By.XPATH, ".//*[#class='list--gallery--34TropR']//span/a")))
try:
supplier.click()
except ElementClickInterceptedException:
print('object not on screen')
However, this still gives me the highest click-through-rate
for supplier in suppliers:
try:
supplier.click()
print('Supplier clicked')
time.sleep(1)
except ElementClickInterceptedException:
print('object not on screen')
This is how I initialized the driver and loaded the elements.
search_key = "Motor+toy+boat"
suppliers = []
print("https://www.aliexpress.com/premium/"+search_key+".html?spm=a2g0o.best.1000002.0&initiative_id=SB_20221218233848&dida=y")
# create a webdriver object and set the path to the Chrome driver
service = Service('../venv/chromedriver.exe')
driver = webdriver.Chrome(service=service)
# navigate to the Aliexpress website
driver.get("https://www.aliexpress.com/")
# Wait for the page to load
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.ID, "search-key")))
# wait for the page to load
driver.implicitly_wait(10)
driver.get("https://www.aliexpress.com/premium/"+search_key+".html?spm=a2g0o.best.1000002.0&initiative_id=SB_20221218233848&dida=y")
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
driver.execute_script("window.scrollBy(0, 800);")
sleep(1)
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
print(new_height, last_height)
break
last_height = new_height
for element in driver.find_elements(By.XPATH, "//*[contains(#class, 'manhattan--container--1lP57Ag cards--gallery--2o6yJVt')]"):
suppliers.append(element)
Couple of issues I have identified.
It is detecting bot, so after couple of runs it will stop identifying the element.Use --disable-blink-features in chrome options.
Once you iterate the list,it is clicking somewhere else, just wait for a second and then click, it will work.
added code will click only visible element on the page, If you need to click more you needed to scroll the page and then click.
You can check the count of total visible element on the page.
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
import time
chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option("excludeSwitches", ['enable-automation'])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=chrome_options)
driver.get("https://www.aliexpress.com/w/wholesale-uk.html")
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH, "//*[contains(#class, 'manhattan--container--1lP57Ag cards--gallery--2o6yJVt')]")).click()
suppliers=WebDriverWait(driver,10).until(EC.visibility_of_all_elements_located((By.XPATH,".//*[#class='list--gallery--34TropR']//span/a")))
print("Total visible element on the page: " + str(len(suppliers)))
for supplier in suppliers:
time.sleep(1)
supplier.click()
I would like to scroll until the end of a page like : https://fr.finance.yahoo.com/quote/GM/history?period1=1290038400&period2=1612742400&interval=1d&filter=history&frequency=1d&includeAdjustedClose=true
The fact is using this :
# # Get scroll height after first time page load
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
# Scroll down to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(2)()
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
does not work. yes it should work for pages with infinite loads but doesn't work for yahoo finance, which has a finite number of loads but the condition should break when it reachs the end. So I'm quite confuse at the moment.
We could also use :
while driver.find_element_by_tag_name('tfoot'):
# Scroll down three times to load the table
for i in range(0, 3):
driver.execute_script("window.scrollBy(0, 5000)")
time.sleep(2)
but it sometimes blocks at a certain loads.
What would be the best way to do this ?
Requires pip install undetected-chromedriver, but will get the job done.
It's just my webdriver of choice, you can also do the exact same with normal selenium.
from time import sleep as s
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
import undetected_chromedriver as uc
options = uc.ChromeOptions()
options.headless = False
driver = uc.Chrome(options=options)
driver.get('https://fr.finance.yahoo.com/quote/GM/history?period1=1290038400&period2=1612742400&interval=1d&filter=history&frequency=1d&includeAdjustedClose=true')
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, '#consent-page > div > div > div > div.wizard-body > div.actions.couple > form > button'))).click() #clicks the cookie warning or whatever
last_scroll_pos=0
while True:
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'body'))).send_keys(Keys.DOWN)
s(.01)
current_scroll_pos=str(driver.execute_script('return window.pageYOffset;'))
if current_scroll_pos == last_scroll_pos:
print('scrolling is finished')
break
last_scroll_pos=current_scroll_pos
I have the problem that I did not get all tweets after scrolling and clicking the next button. I only get the few last tweets. The only idea which I have is to integrate the scraping in the while loop. But I want to find a way where I get all tweets after scrolling and clicking down. Maybe someone has an idea or could say it if it not possible.
Moreover, I am relatively new to python and especially selenium. So if there are any other tips to my code, please feel free
from selenium.webdriver.support.ui import WebDriverWait
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support import expected_conditions as EC
import random
import time
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--incognito")
driver = webdriver.Chrome(r"path",chrome_options=chrome_options)
url = 'https://twitter.com/RegSprecher/status/1251100551183507456'
driver.get(url)
wait = WebDriverWait(driver, 20)
#Click on the Cookie banner, since it covers the further
wait.until(EC.element_to_be_clickable((By.XPATH, "//span[contains(text(),'Schließen')]"))).click()
while True:
#Click on the next button if possible
try:
element = driver.find_element(By.XPATH, "//span[contains(text(),'Weitere Antworten anzeigen')]")
if element.is_displayed():
actions = ActionChains(driver)
actions.move_to_element(element).perform()
wait.until(EC.element_to_be_clickable((By.XPATH, "//span[contains(text(),'Weitere Antworten anzeigen')]"))).click()
#if not scrolle down
except NoSuchElementException :
last_height = driver.execute_script("return document.body.scrollHeight")
# Scroll down to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(random.randint(1,3))
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
# If heights are the same it will exit the function
break
last_height = new_height
#get tweets
tweet = driver.find_elements_by_css_selector("div[data-testid='tweet']")
print(tweet)
[<selenium.webdriver.remote.webelement.WebElement (session="07b337bdb89396d57c4b05c7454ec764", element="8e694557-5d4a-4af1-8b33-9c91679b31eb")>,
<selenium.webdriver.remote.webelement.WebElement (session="07b337bdb89396d57c4b05c7454ec764", element="f52a06da-09c5-47f0-bf93-1fb5409472f3")>,
<selenium.webdriver.remote.webelement.WebElement (session="07b337bdb89396d57c4b05c7454ec764", element="f773fd2b-537c-425c-9662-29439f0de9a6")>,
<selenium.webdriver.remote.webelement.WebElement (session="07b337bdb89396d57c4b05c7454ec764", element="3b278f06-46bc-4231-ad87-059f94ebbce9")>,
<selenium.webdriver.remote.webelement.WebElement (session="07b337bdb89396d57c4b05c7454ec764", element="1c1e408b-e5c8-45f7-bf31-64d778a70e0a")>]
I'm trying to use Selenium to scrape a bunch of websites, that are needed to be scrolled down and clicked on a button. Each url has same structure, but has different number of click times.
My code:
for url in url_list:
while True:
wd.get(url)
last_height = wd.execute_script("return document.body.scrollHeight")
while True:
wd.execute_script("window.scrollTo(0, document.body.scrollHeight);")
#time.sleep = time for waiting
time.sleep(3)
new_height = wd.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
next_button = wd.find_element_by_link_text('next >>')
next_button.click()
However, the code finished only the first url and returned error: "NoSuchElementException". It didn't continue the loop, and sometimes if I changed url list, it stopped in the middle of the loop with error: "ElementClickInterceptedException"
My goal is to continue and finish the loop, and ignore the error.
How can I improve the code?
Thanks in advance
Induce WebDriverWait() and element_to_be_clickable() and use try..except block if element found then click else break.
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
url_list = ['https://pantip.com/profile/2892172#topics','https://pantip.com/profile/5239396#topics','https://pantip.com/profile/349866#topics']
wd=driver=webdriver.Chrome()
for url in url_list:
print(url)
wd.get(url)
while True:
wd.execute_script("window.scrollTo(0, document.body.scrollHeight);")
try:
next_button=WebDriverWait(wd,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'a.next.numbers')))
next_button.click()
except:
print("No more pages")
break
driver.quit()
I've written a script in python to scrape names from a slow loading webpage. There are 1000 names in that page and the full content can only be loaded when the browser is made to scroll downmost. However, my script can successfully reach the lowest portion of this page and parse all the names. The issue I'm facing here is that I've used hardcoded delay which is 5 seconds in this case and it makes the browser unnecessarily wait even when the item is loaded. So how can i use explicit wait to overcome this situation and parse all the item.
Here is the script I've written so far:
from selenium import webdriver
import time
driver = webdriver.Chrome()
driver.get("http://fortune.com/fortune500/list/")
check_height = driver.execute_script("return document.body.scrollHeight;")
while True:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(5)
height = driver.execute_script("return document.body.scrollHeight;")
if height == check_height:
break
check_height = height
listElements = driver.find_elements_by_css_selector(".company-title")
for item in listElements:
print(item.text)
You can add Explicit wait as below:
from selenium.webdriver.support.ui import WebDriverWait
from selenium import webdriver
driver = webdriver.Chrome()
driver.get("http://fortune.com/fortune500/list/")
check_height = driver.execute_script("return document.body.scrollHeight;")
while True:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
try:
WebDriverWait(driver, 10).until(lambda driver: driver.execute_script("return document.body.scrollHeight;") > check_height)
check_height = driver.execute_script("return document.body.scrollHeight;")
except:
break
listElements = driver.find_elements_by_css_selector(".company-title")
for item in listElements:
print(item.text)
This should allow you to avoid hardcoding time.sleep()- instead you're just waiting for changing height value or break the loop in case height is constant after 10 seconds passed after scrolling...
You need to use explicit waits, like this:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Firefox()
driver.get("http://somedomain/url_that_delays_loading")
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "myDynamicElement"))
)
finally:
driver.quit()
More details here http://selenium-python.readthedocs.io/waits.html