I'm trying to scrape this site:https://www.wagr.com/mens-ranking, at the bottom right of the table there is a button to click to the next page, but selenium keeps throwing exceptions when I try to click it. The code below is what I'm using to click the button.
next = driver.find_element(By.CSS_SELECTOR,'.next > a:nth-child(1)')
next.click()
Here's a screenshot of the traceback:
I can't understand why this isn't working, I'd be grateful for any tips.
You need to
Handle the cookie pop up
Scroll down at the bottom of the page, so that the button could be visible
Here is a working code -
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
options = webdriver.ChromeOptions()
# options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
chrome_driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=options
)
with chrome_driver as driver:
driver.implicitly_wait(15)
driver.get('https://www.wagr.com/mens-ranking')
time.sleep(3)
# click cookie popup
cookie_btn = driver.find_element(By.XPATH, "/html/body/div[2]/div[3]/div/div/div[2]/div[1]/button")
cookie_btn.click()
time.sleep(0.3)
# scrolling bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
next_btn = driver.find_element(By.CSS_SELECTOR, '.next > a:nth-child(1)') # li.next
# next_btn = driver.find_element(By.XPATH, "//li[#class='next']")
print("found and click next", next_btn.tag_name)
next_btn.click()
time.sleep(2)
driver.quit()
Related
I want to click on each product on aliexpress and do something with it.
However, I kept running into an ElementClickInterceptedException
Please verify that the code is correct before answering the question if you are using chat-GPT or any other AI to help with this problem.
These are the things that I tried
for supplier in suppliers:
driver.execute_script("arguments[0].scrollIntoView();", supplier)
actions = ActionChains(driver)
actions.move_to_element(supplier).click().perform()
for supplier in suppliers:
driver.execute_script("arguments[0].scrollIntoView();", supplier)
actions = ActionChains(driver)
actions.move_to_element(supplier)
wait.until(EC.visibility_of_element_located((By.XPATH, ".//*[#class='list--gallery--34TropR']//span/a")))
try:
supplier.click()
except ElementClickInterceptedException:
print('object not on screen')
However, this still gives me the highest click-through-rate
for supplier in suppliers:
try:
supplier.click()
print('Supplier clicked')
time.sleep(1)
except ElementClickInterceptedException:
print('object not on screen')
This is how I initialized the driver and loaded the elements.
search_key = "Motor+toy+boat"
suppliers = []
print("https://www.aliexpress.com/premium/"+search_key+".html?spm=a2g0o.best.1000002.0&initiative_id=SB_20221218233848&dida=y")
# create a webdriver object and set the path to the Chrome driver
service = Service('../venv/chromedriver.exe')
driver = webdriver.Chrome(service=service)
# navigate to the Aliexpress website
driver.get("https://www.aliexpress.com/")
# Wait for the page to load
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.ID, "search-key")))
# wait for the page to load
driver.implicitly_wait(10)
driver.get("https://www.aliexpress.com/premium/"+search_key+".html?spm=a2g0o.best.1000002.0&initiative_id=SB_20221218233848&dida=y")
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
driver.execute_script("window.scrollBy(0, 800);")
sleep(1)
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
print(new_height, last_height)
break
last_height = new_height
for element in driver.find_elements(By.XPATH, "//*[contains(#class, 'manhattan--container--1lP57Ag cards--gallery--2o6yJVt')]"):
suppliers.append(element)
Couple of issues I have identified.
It is detecting bot, so after couple of runs it will stop identifying the element.Use --disable-blink-features in chrome options.
Once you iterate the list,it is clicking somewhere else, just wait for a second and then click, it will work.
added code will click only visible element on the page, If you need to click more you needed to scroll the page and then click.
You can check the count of total visible element on the page.
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
import time
chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option("excludeSwitches", ['enable-automation'])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=chrome_options)
driver.get("https://www.aliexpress.com/w/wholesale-uk.html")
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH, "//*[contains(#class, 'manhattan--container--1lP57Ag cards--gallery--2o6yJVt')]")).click()
suppliers=WebDriverWait(driver,10).until(EC.visibility_of_all_elements_located((By.XPATH,".//*[#class='list--gallery--34TropR']//span/a")))
print("Total visible element on the page: " + str(len(suppliers)))
for supplier in suppliers:
time.sleep(1)
supplier.click()
I am trying to scrape LinkedIn website using Selenium. I can't parse Next button. It resists as much as it can. I've spent a half of a day to adress this, but all in vain.
I tried absolutely various options, with text and so on. Only work with start ID but scrape other button.
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//button[#aria-label='Далее']"}
This is quite common for this site:
//*[starts-with(#id,'e')]
My code:
from selenium import webdriver
from selenium.webdriver import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from time import sleep
chrome_driver_path = Service("E:\programming\chromedriver_win32\chromedriver.exe")
driver = webdriver.Chrome(service=chrome_driver_path)
url = "https://www.linkedin.com/feed/"
driver.get(url)
SEARCH_QUERY = "python developer"
LOGIN = "EMAIL"
PASSWORD = "PASSWORD"
sleep(10)
sign_in_link = driver.find_element(By.XPATH, '/html/body/div[1]/main/p[1]/a')
sign_in_link.click()
login_input = driver.find_element(By.XPATH, '//*[#id="username"]')
login_input.send_keys(LOGIN)
sleep(1)
password_input = driver.find_element(By.XPATH, '//*[#id="password"]')
password_input.send_keys(PASSWORD)
sleep(1)
enter_button = driver.find_element(By.XPATH, '//*[#id="organic-div"]/form/div[3]/button')
enter_button.click()
sleep(25)
lens_button = driver.find_element(By.XPATH, '//*[#id="global-nav-search"]/div/button')
lens_button.click()
sleep(5)
search_input = driver.find_element(By.XPATH, '//*[#id="global-nav-typeahead"]/input')
search_input.send_keys(SEARCH_QUERY)
search_input.send_keys(Keys.ENTER)
sleep(5)
people_button = driver.find_element(By.XPATH, '//*[#id="search-reusables__filters-bar"]/ul/li[1]/button')
people_button.click()
sleep(5)
page_button = driver.find_element(By.XPATH, "//button[#aria-label='Далее']")
page_button.click()
sleep(60)
Chrome inspection of button
Next Button
OK, there are several issues here:
The main problem why your code not worked is because the "next" pagination is initially even not created on the page until you scrolling the page, so I added the mechanism, to scroll the page until that button can be clicked.
it's not good to create locators based on local language texts.
You should use WebDriverWait expected_conditions explicit waits, not hardcoded pauses.
I used mixed locators types to show that sometimes it's better to use By.ID and sometimes By.XPATH etc.
the following code works:
import time
from selenium import webdriver
from selenium.webdriver import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
url = "https://www.linkedin.com/feed/"
driver.get(url)
wait.until(EC.element_to_be_clickable((By.XPATH, "//a[contains(#href,'login')]"))).click()
wait.until(EC.element_to_be_clickable((By.ID, "username"))).send_keys(my_email)
wait.until(EC.element_to_be_clickable((By.ID, "password"))).send_keys(my_password)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit']"))).click()
search_input = wait.until(EC.element_to_be_clickable((By.XPATH, "//input[contains(#class,'search-global')]")))
search_input.click()
search_input.send_keys("python developer" + Keys.ENTER)
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="search-reusables__filters-bar"]/ul/li[1]/button'))).click()
wait = WebDriverWait(driver, 4)
while True:
try:
next_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "button.artdeco-pagination__button.artdeco-pagination__button--next")))
next_btn.location_once_scrolled_into_view
time.sleep(0.2)
next_btn.click()
break
except:
driver.execute_script("window.scrollBy(0, arguments[0]);", 600)
I'm trying to crawl a site and am running into a google ad. I think I've found the iframe of it but I can't find the element to click to remove the ad. I've spent about 7 hours now and think this is over my head. Help v much appreciated.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chrome_options = Options()
chrome_options.add_argument("--incognito")
chrome_options.add_argument("--window-size=1920x1080")
# chrome_options.add_argument("--headless")
driver = webdriver.Chrome(chrome_options=chrome_options, executable_path ='C:\/Users\/gblac\/OneDrive\/Desktop\/Chromedriver.exe')
url = 'https://free-mp3-download.net/'
driver.get(url)
WebDriverWait(driver, 4)
search = driver.find_element(By.ID,'q')
search.send_keys('testing songs')
search.click()
button = driver.find_element(By.ID,'snd')
button.click()
WebDriverWait(driver,20).until(EC.visibility_of_element_located((By.CLASS_NAME,'container'))).click()
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.ID,"results_t")));
results = driver.find_element(By.ID,'results_t').find_elements(By.TAG_NAME,'tr')
results[0].find_element(By.TAG_NAME,'a').click()
# The code to remove the ad would go here
# driver.find_elements(By.CSS_SELECTOR,'[text()="Close"]').click()
Add the below code block in your code - before searching any text:
time.sleep(1)
driver.execute_script("""
const elements = document.getElementsByClassName("google-auto-placed");
while (elements.length > 0) elements[0].remove();
""")
time.sleep(1)
driver.execute_script("""
const elements = document.getElementsByClassName("adsbygoogle adsbygoogle-noablate");
while (elements.length > 0) elements[0].remove();
""")
time.sleep(1)
driver.find_element(By.ID,"q").send_keys("tamil songs")
driver.find_element(By.ID,"snd").click()
It will close the 2 ad blocks in that page, but if you refresh or move forward and backward, the ads will display again, then you have to remove those ad blocks again using the above code, please add the code for that condition.
WebDriverWait(driver,20).until(EC.visibility_of_element_located((By.CLASS_NAME,'container'))).click()
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.ID,"results_t")))
results = driver.find_element(By.ID,'results_t').find_elements(By.TAG_NAME,'tr')
results[0].find_element(By.TAG_NAME,'a').click()
time.sleep(2)
driver.find_element(By.XPATH, ".//button[contains(text(),'Download')]").click()
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(1)
# handling captcha
iframe_captcha = driver.find_element(By.XPATH,".//iframe[#title='reCAPTCHA']")
driver.switch_to.frame(iframe_captcha)
time.sleep(1)
driver.find_element(By.CSS_SELECTOR, ".recaptcha-checkbox-border").click()
time.sleep(2)
driver.switch_to.default_content()
driver.find_element(By.XPATH, ".//button[contains(text(),'Download')]").click()
Here is the link to the site I am currently viewing: https://messari.io/tool/fb8d86ca-d3cf-4568-8d48-1a052c95364e. Scroll to the bottom of the page and click "View More".
I am trying to figure out how to click the x button but what I have tried hasn't worked. I get a "no such element: Unable to locate element error.
I have tried all three of these:
driver.find_element(By.CLASS_NAME, "button").click()
driver.find_element_by_xpath("//button[contains(#data-testid='CloseIcon')]").click()
driver.find_element_by_tag_name("svg").click()
Check the Xpath of the load more button //*[#id="root"]/div[2]/div/div[2]/div[2]/div[3]/button -
And then check the full Xpath of the X button that will close the pop-up window
full Xpath of the X button - '/html/body/div[2]/div[3]/div/h2/button'
working code -
import time
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
options = webdriver.ChromeOptions()
# options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
chrome_driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=options
)
def messari_scraper():
URL = "https://messari.io/tool/fb8d86ca-d3cf-4568-8d48-1a052c95364e"
with chrome_driver as driver:
driver.implicitly_wait(15) # wait max 15 sec for any element to find
driver.get(URL)
time.sleep(3)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") # scroll to the end of the page
# click the button
driver.find_element(By.XPATH, '//*[#id="root"]/div[2]/div/div[2]/div[2]/div[3]/button').click()
time.sleep(3)
# get the full Xpath of the close `x` button of the pop-up
driver.find_element(By.XPATH, '/html/body/div[2]/div[3]/div/h2/button').click()
# pop up window closed
time.sleep(5)
# do your tasks here....
messari_scraper()
I am new to selenium and web development. I am working on a project to take screenshots from the websites from web.archive.org.
Here is a link to the sample webpage. I am trying to click on the close button (on the top right of this page) before capturing the screenshots. I am not sure what kind of element is the close button and I was unsuccessful in my attempts.
Here is the element I am attempting to click from selenium:
Here is the corresponding HTML
<a id="wm-tb-close" href="#close" style="top:-2px;" title="Close the toolbar"><span class="iconochive-remove-circle" style="color:#888888;font-size:240%;"></span></a>
Here is my code:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
options = Options()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--start-maximized')
options.add_argument('--disable-dev-shm-usage')
options.binary_location = "<path-to-local-dir>/google-chrome/opt/google/chrome/google-chrome"
driver = webdriver.Chrome(options=options)
driver.get('https://web.archive.org/web/20220315011343/https://stackoverflow.com/')
# My attempts at closing the wayback toolbar:
# driver.find_element(By.LINK_TEXT, 'close').click() # Attempt 1
# driver.find_element(By.ID, 'wm-tb-close').click() # Attempt 2
# driver.find_element_by_xpath("a[#title='Close the toolbar']").click() # Attempt 3
# Capture full webpage screenshot (with scrolling)
original_size = driver.get_window_size()
# required_width = driver.execute_script('return document.body.parentNode.scrollWidth')
required_height = driver.execute_script('return document.body.parentNode.scrollHeight')
driver.set_window_size(1920, max(required_height, 1080))
driver.find_element(By.TAG_NAME, 'body').screenshot('webpage_screenshot.png') # avoids scrollbar
driver.set_window_size(original_size['width'], original_size['height']) # reset to defaults
driver.quit()
When I make an attempt to click on the close button using the three techniques (shown in the code), I receive the following error:
selenium.common.exceptions.NoSuchElementException: Message: no such
element: Unable to locate element
The element you are trying to click is inside the SHADOW-ROOT, to access such elements you need to use some special techniques.
Also you need to use Expected Conditions explicit waits to let the elements loaded before accessing them.
This should work:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--start-maximized')
options.add_argument('--disable-dev-shm-usage')
options.binary_location = "<path-to-local-dir>/google-chrome/opt/google/chrome/google-chrome"
driver = webdriver.Chrome(options=options)
wait = WebDriverWait(driver, 20)
driver.get('https://web.archive.org/web/20220315011343/https://stackoverflow.com/')
#locate the shadow root element
root_element = wait.until(EC.presence_of_element_located((By.ID, "wm-ipp-base")))
shadow_root = driver.execute_script('return arguments[0].shadowRoot', root_element)
#access the close button inside the shadow root
shadow_root.find_element_by_css_selector("span.iconochive-remove-circle").click()