I keep getting the ElementClickInterceptedException on this script I'm writing, I'm supposed to click a link that will open a new window, scrape from the new window and close it and move to the next link to scrape, but it just won't work, it gives the error after max 3 link clicks. I saw a similar question here and I tried using wait.until(EC.element_to_be_clickable()) and also maximized my screen but still did not work for me. Here is the site I am scraping from trying to scrape all the games for each day and here is a chunk of the code I'm using
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.common.exceptions import TimeoutException, NoSuchElementException, ElementNotInteractableException, StaleElementReferenceException
from time import sleep
l = "https://www.flashscore.com/"
options = FirefoxOptions()
#options.add_argument("--headless")
driver = webdriver.Firefox(executable_path="geckodriver.exe",
firefox_options=options)
driver.install_addon('C:\\Windows\\adblock_plus-3.10.1-an+fx.xpi')
driver.maximize_window()
driver.get(l)
driver.implicitly_wait(5)
cnt = 0
sleep(5)
wait = WebDriverWait(driver, 20)
a = driver.window_handles[0]
b = driver.window_handles[1]
driver.switch_to.window(a)
# Close Adblock tab
if 'Adblock' in driver.title:
driver.close()
driver.switch_to.window(a)
else:
driver.switch_to.window(b)
driver.close()
driver.switch_to.window(a)
var1 = driver.find_elements_by_xpath("//div[#class='leagues--live ']/div/div")
knt = 0
for i in range(len(var1)):
if (var1[i].get_attribute("id")):
knt += 1
#sleep(2)
#driver.switch_to.window(driver.window_handles)
var1[i].click()
sleep(2)
#var2 = wait.until(EC.visibility_of_element_located((By.XPATH, "//div[contains(#classs, 'event__match event__match--last event__match--twoLine')]")))
print(len(driver.window_handles))
driver.switch_to.window(driver.window_handles[1])
try:
sleep(4)
driver.close()
driver.switch_to.window(a)
#sleep(3)
except(Exception):
print("Exception caught")
#WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.CLASS_NAME, "event__match event__match--last event__match--twoLine")))
sleep(10)
driver.close()
Any ideas to help please.
It looks like the element you are trying to click on is covered by a banner ad or something else like a cookie message.
To fix this you can scroll down to the last element using the following code:
driver.execute_script('\
let items = document.querySelectorAll(\'div[title="Click for match detail!"]\'); \
items[items.length - 1].scrollIntoView();'
)
Add it before clicking on the desired element in the loop.
I tried to make a working example for you but it works on chromedriver not gecodriver:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
# options.add_argument("--headless")
options.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"])
service = Service(executable_path='your\path\to\chromedriver.exe')
driver = webdriver.Chrome(service=service, options=options)
wait = WebDriverWait(driver, 5)
url = 'https://www.flashscore.com/'
driver.get(url)
# accept cookies
wait.until(EC.presence_of_element_located((By.ID, 'onetrust-accept-btn-handler'))).click()
matches = driver.find_elements(By.CSS_SELECTOR, 'div[title="Click for match detail!"]')
for match in matches:
driver.execute_script('\
let items = document.querySelectorAll(\'div[title="Click for match detail!"]\'); \
items[items.length - 1].scrollIntoView();'
)
match.click()
driver.switch_to.window(driver.window_handles[1])
print('get data from open page')
driver.close()
driver.switch_to.window(driver.window_handles[0])
driver.quit()
It works in both normal and headless mode
Related
I am trying to scrape LinkedIn website using Selenium. I can't parse Next button. It resists as much as it can. I've spent a half of a day to adress this, but all in vain.
I tried absolutely various options, with text and so on. Only work with start ID but scrape other button.
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//button[#aria-label='Далее']"}
This is quite common for this site:
//*[starts-with(#id,'e')]
My code:
from selenium import webdriver
from selenium.webdriver import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from time import sleep
chrome_driver_path = Service("E:\programming\chromedriver_win32\chromedriver.exe")
driver = webdriver.Chrome(service=chrome_driver_path)
url = "https://www.linkedin.com/feed/"
driver.get(url)
SEARCH_QUERY = "python developer"
LOGIN = "EMAIL"
PASSWORD = "PASSWORD"
sleep(10)
sign_in_link = driver.find_element(By.XPATH, '/html/body/div[1]/main/p[1]/a')
sign_in_link.click()
login_input = driver.find_element(By.XPATH, '//*[#id="username"]')
login_input.send_keys(LOGIN)
sleep(1)
password_input = driver.find_element(By.XPATH, '//*[#id="password"]')
password_input.send_keys(PASSWORD)
sleep(1)
enter_button = driver.find_element(By.XPATH, '//*[#id="organic-div"]/form/div[3]/button')
enter_button.click()
sleep(25)
lens_button = driver.find_element(By.XPATH, '//*[#id="global-nav-search"]/div/button')
lens_button.click()
sleep(5)
search_input = driver.find_element(By.XPATH, '//*[#id="global-nav-typeahead"]/input')
search_input.send_keys(SEARCH_QUERY)
search_input.send_keys(Keys.ENTER)
sleep(5)
people_button = driver.find_element(By.XPATH, '//*[#id="search-reusables__filters-bar"]/ul/li[1]/button')
people_button.click()
sleep(5)
page_button = driver.find_element(By.XPATH, "//button[#aria-label='Далее']")
page_button.click()
sleep(60)
Chrome inspection of button
Next Button
OK, there are several issues here:
The main problem why your code not worked is because the "next" pagination is initially even not created on the page until you scrolling the page, so I added the mechanism, to scroll the page until that button can be clicked.
it's not good to create locators based on local language texts.
You should use WebDriverWait expected_conditions explicit waits, not hardcoded pauses.
I used mixed locators types to show that sometimes it's better to use By.ID and sometimes By.XPATH etc.
the following code works:
import time
from selenium import webdriver
from selenium.webdriver import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
url = "https://www.linkedin.com/feed/"
driver.get(url)
wait.until(EC.element_to_be_clickable((By.XPATH, "//a[contains(#href,'login')]"))).click()
wait.until(EC.element_to_be_clickable((By.ID, "username"))).send_keys(my_email)
wait.until(EC.element_to_be_clickable((By.ID, "password"))).send_keys(my_password)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit']"))).click()
search_input = wait.until(EC.element_to_be_clickable((By.XPATH, "//input[contains(#class,'search-global')]")))
search_input.click()
search_input.send_keys("python developer" + Keys.ENTER)
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="search-reusables__filters-bar"]/ul/li[1]/button'))).click()
wait = WebDriverWait(driver, 4)
while True:
try:
next_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "button.artdeco-pagination__button.artdeco-pagination__button--next")))
next_btn.location_once_scrolled_into_view
time.sleep(0.2)
next_btn.click()
break
except:
driver.execute_script("window.scrollBy(0, arguments[0]);", 600)
I just need to click the load more button once to reveal a bunch more information so that I can scrape more HTML than what is loaded.
The following "should" go to github.com/topics and find the one and only button element and click it one time.
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
driver = webdriver.Edge()
driver.get("https://github.com/topics")
time.sleep(5)
btn = driver.find_element(By.TAG_NAME, "button")
btn.click()
time.sleep(3)
driver.quit()
I'm told Message: element not interactable so I'm obviously doing something wrong but I'm not sure what.
use
btn = driver.findElementsByXPath("//button[contains(text(),'Load more')]");
You are not finding the right element. This is the reason why it is not "interactable"
There are several issues with your code:
The "Load more" button is initially out of the view, so you have to scroll the page in order to click it.
Your locator is bad.
You need to wait for elements to appear on the page before accessing them. WebDriverWait expected_conditions explicit waits should be used for that, not hardcoded sleeps.
The following code works, it scrolls the page and clicks "Load more" 1 time.
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 20)
url = "https://github.com/topics"
driver.get(url)
load_more = wait.until(EC.presence_of_element_located((By.XPATH, "//button[contains(.,'Load more')]")))
load_more.location_once_scrolled_into_view
time.sleep(1)
load_more.click()
UPD
You can simply modify the above code to make it clicking Load more button while it presented.
I implemented this with infinite while loop making a break if Load more button not found. This code works.
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 5)
url = "https://github.com/topics"
driver.get(url)
while True:
try:
load_more = wait.until(EC.presence_of_element_located((By.XPATH, "//button[contains(.,'Load more')]")))
load_more.location_once_scrolled_into_view
time.sleep(1)
load_more.click()
except:
break
How to navigate through each page without using driver.current_url? In my full code, I get a bunch of errors once I navigate through the page for a loop. Without it, it runs fine but can only go through one page. I want to navigate through as many pages. Any help appreciated, thanks.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
driver_service = Service(executable_path="C:\Program Files (x86)\chromedriver.exe")
driver = webdriver.Chrome(service=driver_service)
driver.maximize_window() # load web driver
wait = WebDriverWait(driver, 5)
url_test = driver.get('https://www.seek.com.au/data-jobs-in-information-communication-technology/in-All-Perth-WA')
url_template = driver.current_url
template = url_template+ '?page={}'
for page in range(2,5):
link_job = [x.get_attribute('href') for x in driver.find_elements(By.XPATH, "//a[#data-automation='jobTitle']")]
for job in link_job:
driver.get(job)
try:
quick_apply = WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, "(//a[#data-automation='job-detail-apply' and #target='_self'])")))
quick_apply.click()
#sleep(3)
except:
print("No records found " + job)
pass
sleep(3)
driver.get(template.format(page))
If I understand you correctly you want to determine dynamically how many pages there are and loop over each of them.
I have managed to achieve this by using a while loop and look on each page if the "Next" button at the bottom is visible. If not, the last page was reached and you can exit the loop.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from time import sleep
driver_service = Service(executable_path="C:\\Users\\Stefan\\bin\\chromedriver.exe")
driver = webdriver.Chrome(service=driver_service)
driver.maximize_window() # load web driver
wait = WebDriverWait(driver, 5)
url_test = driver.get('https://www.seek.com.au/data-jobs-in-information-communication-technology/in-All-Perth-WA')
url_template = driver.current_url
template = url_template+ '?page={}'
page = 1
while True:
# check if "Next" button is visible
# -> if not, the last page was reached
try:
driver.find_element(By.XPATH, "//a[#title='Next']")
except:
# last page reached
break
link_job = [x.get_attribute('href') for x in driver.find_elements(By.XPATH, "//a[#data-automation='jobTitle']")]
for job in link_job:
driver.get(job)
try:
quick_apply = WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, "(//a[#data-automation='job-detail-apply' and #target='_self'])")))
quick_apply.click()
#sleep(3)
except:
print("No records found " + job)
pass
sleep(3)
page += 1
driver.get(template.format(page))
driver.close()
Seems your problem is with StaleElementException when you getting back from job page to jobs search results page.
The simplest approach to overcome this problem is to keep the jobs search results page url.
Actually I changed your code only with this point and it works.
I also changed driver.find_elements(By.XPATH, "//a[#data-automation='jobTitle']") with wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//a[#data-automation='jobTitle']"))) for better performance.
The code below works, but the web site itself responds badly.
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)
wait = WebDriverWait(driver, 10)
url = 'https://www.seek.com.au/data-jobs-in-information-communication-technology/in-All-Perth-WA?page={p}'
for p in range(1,20):
driver.get(url)
link_job = [x.get_attribute('href') for x in wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//a[#data-automation='jobTitle']")))]
for job in link_job:
driver.get(job)
try:
wait.until(EC.element_to_be_clickable((By.XPATH, "(//a[#data-automation='job-detail-apply' and #target='_self'])"))).click()
print("applied")
except:
print("No records found " + job)
pass
driver.get(url)
I need to accept cookies on a specific website but I keep getting the NoSuchElementException. This is the code for entering the website:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
chrome_options = Options()
driver = webdriver.Chrome(executable_path='./chromedriver', options=chrome_options)
page_url = 'https://www.boerse.de/historische-kurse/Erdgaspreis/XD0002745517'
driver.get(page_url)
time.sleep(10)
I tried accepting the cookie button using the following:
driver.find_element_by_class_name('message-component message-button no-children focusable button global-font sp_choice_type_11 last-focusable-el').click()
driver.find_element_by_xpath('//*[#id="notice"]').click()
driver.find_element_by_xpath('/html/body/div/div[2]/div[4]/div/button').click()
I got the xpaths from copying the xpath and the full xpath from the element while using google chrome.
I am a beginner when it comes to selenium, just wanted to use it for a short workaround. Would appreciate some help.
The button Zustimmen is in iframe so first you'd have to switch to the respective iframe and then you can interact with that button.
Code:
driver.maximize_window()
page_url = 'https://www.boerse.de/historische-kurse/Erdgaspreis/XD0002745517'
driver.get(page_url)
wait = WebDriverWait(driver, 30)
try:
wait.until(EC.frame_to_be_available_and_switch_to_it((By.XPATH, "//iframe[starts-with(#id,'sp_message_iframe')]")))
wait.until(EC.element_to_be_clickable((By.XPATH, "//button[text()='Zustimmen']"))).click()
print('Clicked successfully')
except:
print('Could not click')
pass
Imports:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
I've got a similar problem with a page. I've tried to address the iframe and the "accept all" button with selenium (as suggested by #cruisebandey above).
However, the pop-up on this page seems to work differently:
https://www.kreiszeitung-wochenblatt.de/hanstedt/c-panorama/mega-faslams-umzug-in-hanstedt_a270327
This is what I've tried:
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver = webdriver.Chrome(executable_path="C:\\Users\\***\\chromedriver.exe")
driver.maximize_window()
try:
driver.get("https://www.kreiszeitung-wochenblatt.de/hanstedt/c-panorama/mega-faslams-umzug-in-hanstedt_a270327")
except:
print('Site not found')
wait = WebDriverWait(driver,10)
try:
wait.until(EC.frame_to_be_available_and_switch_to_it((By.XPATH,'/html/body/iframe')))
except:
print('paywall-layover not found')
try:
cookie = wait.until(EC.element_to_be_clickable((By.XPATH,'//*[#id="consentDialog"]/div[3]/div/div[2]/div/div[2]/div/div[1]/div[2]/div')))
cookie.click()
except:
print('Button to accept all not found')
I am trying to query all the rows from the tradingview website.I need just the ticker and name of stock. I am not able to click on the 'load more' button at the bottom of the page to load all the rows. Does anyone have a solution to this?
This is the code that I wrote to get the ticker and name of stock(It works till the 'load more' button).
from bs4 import BeautifulSoup
URL = 'https://www.tradingview.com/markets/stocks-usa/sectorandindustry-industry/biotechnology/'
page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')`
for tr in soup.find_all('tr'):
#tds = tr.find_all('td')
ticker = tr.find('a', class_='tv-screener__symbol')
stock_name = tr.find('span', class_='tv-screener__description')
if None in (ticker, stock_name):
continue
print(ticker.text.strip())
print(stock_name.text.strip())
print("\n\n")
Here is the selenium code that I tried to write for the 'Load More' button but wasn't successful -
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options = Options()
#options.headless = True
options.add_argument("--window-size=1920,1200")
driver = webdriver.Chrome(options=options, executable_path=DRIVER_PATH)
driver.get("https://www.tradingview.com/markets/stocks-usa/sectorandindustry-industry/biotechnology/")
loadMoreButton = driver.find_elements_by_xpath("//span[#class='tv-load-more__btn']")
loadMoreButton.click()
driver.quit()
And this is the error I got:
AttributeError Traceback (most recent call last)
<ipython-input-80-cf801ef16cdd> in <module>
9 driver.get("https://www.tradingview.com/markets/stocks-usa/sectorandindustry-industry/biotechnology/")
10 loadMoreButton = driver.find_elements_by_xpath("//div[#class='tv-load-more tv-load-more--screener js-screener-load-more']")
---> 11 loadMoreButton.click()
12 driver.quit()
AttributeError: 'list' object has no attribute 'click'
The issue is find_elements_by_xpath this returns a list. See below using find_element_by_xpath.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options = Options()
#options.headless = True
options.add_argument("--window-size=1920,1200")
driver = webdriver.Chrome(options=options, executable_path=DRIVER_PATH)
driver.get("https://www.tradingview.com/markets/stocks-usa/sectorandindustry-industry/biotechnology/")
loadMoreButton = driver.find_element_by_xpath("//span[#class='tv-load-more__btn']")
loadMoreButton.click()
driver.quit()
I would suggest using By, WebDriverWait, expected_conditions, and element_to_be_clickable:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
options = Options()
#options.headless = True
options.add_argument("--window-size=1920,1200")
driver = webdriver.Chrome(options=options, executable_path=DRIVER_PATH)
driver.get("https://www.tradingview.com/markets/stocks-usa/sectorandindustry-industry/biotechnology/")
loadMoreButton = WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, "//span[#class='tv-load-more__btn']")))
loadMoreButton.click()
driver.quit()
Written this piece of code just to click on the Load More button till the full page is loaded. Moreover, I always prefer using XPath over class names to find the element.
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
import time
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 5)
action = ActionChains(driver)
driver.get("https://www.tradingview.com/markets/stocks-usa/sectorandindustry-industry/biotechnology/")
# Used to get a click count
i = 0
while True:
try:
# Given some timeout so that data will be loaded in the page
time.sleep(2)
Load_More = wait.until(EC.element_to_be_clickable((By.XPATH, "//span[text()='Load More']")))
action.move_to_element(Load_More).click().perform()
i += 1
print(f"Clicked {i} time.")
except:
print("Reached End of the Page")
break