Scraping a web content executing an event but unexpected click event appeared - python

So I am trying to make a get request, access the a web execute an event ( in this case press a button to extend a table and scrape the content.
This is my code:
def exec_func():
wait = WebDriverWait(driver, 20)
wait.until(expected_conditions.visibility_of_element_located(
(By.CSS_SELECTOR, "div.section-box.kurser-table-container")))
element = driver.find_element_by_xpath("//a[#ng-click='listLengthMax = 3000']")
actions = ActionChains(driver)
actions.move_to_element(element).perform()
element.click()
return driver.page_source
However an ElementClickInterceptedError arised ! This seems to happen because some other element obscures the element i am looking forward to click.
This is the web I am interested in
How could I scrape the content of the table avoiding the issue with this blocking

You have two options
Option 1: scroll into view the element
def exec_func():
wait = WebDriverWait(driver, 20)
wait.until(expected_conditions.visibility_of_element_located(
(By.CSS_SELECTOR, "div.section-box.kurser-table-container")))
element = driver.find_element_by_xpath("//a[#ng-click='listLengthMax = 3000']")
element.location_once_scrolled_into_view #scroll into view the element first
actions = ActionChains(driver)
actions.move_to_element(element).perform()
element.click()
return driver.page_source
Option 2: Accept the cookie
def exec_func():
wait = WebDriverWait(driver, 20)
#accept the cookie
wait.until(expected_conditions.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"[id^='sp_message_iframe_']")))
wait.until(expected_conditions.element_to_be_clickable((By.XPATH,"//button[#title='Accept']"))).click()
driver.switch_to.default_content()
#=================================
wait.until(expected_conditions.visibility_of_element_located(
(By.CSS_SELECTOR, "div.section-box.kurser-table-container")))
element = driver.find_element_by_xpath("//a[#ng-click='listLengthMax = 3000']")
actions = ActionChains(driver)
actions.move_to_element(element).perform()
element.click()
return driver.page_source

This code is working fine for me.
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
def exec_func():
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.implicitly_wait(10)
driver.set_page_load_timeout(10)
driver.maximize_window()
driver.get("https://www.telegraph.co.uk/markets-hub/assets/shares/")
wait = WebDriverWait(driver, 20)
wait.until(expected_conditions.visibility_of_element_located(
(By.CSS_SELECTOR, "div.section-box.kurser-table-container")))
element = driver.find_element_by_xpath("//a[#ng-click='listLengthMax = 3000']")
actions = ActionChains(driver)
actions.move_to_element(element).perform()
element.click()
return driver.page_source
print(exec_func())

Related

How to scrape Next button on Linkedin with Selenium using Python?

I am trying to scrape LinkedIn website using Selenium. I can't parse Next button. It resists as much as it can. I've spent a half of a day to adress this, but all in vain.
I tried absolutely various options, with text and so on. Only work with start ID but scrape other button.
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//button[#aria-label='Далее']"}
This is quite common for this site:
//*[starts-with(#id,'e')]
My code:
from selenium import webdriver
from selenium.webdriver import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from time import sleep
chrome_driver_path = Service("E:\programming\chromedriver_win32\chromedriver.exe")
driver = webdriver.Chrome(service=chrome_driver_path)
url = "https://www.linkedin.com/feed/"
driver.get(url)
SEARCH_QUERY = "python developer"
LOGIN = "EMAIL"
PASSWORD = "PASSWORD"
sleep(10)
sign_in_link = driver.find_element(By.XPATH, '/html/body/div[1]/main/p[1]/a')
sign_in_link.click()
login_input = driver.find_element(By.XPATH, '//*[#id="username"]')
login_input.send_keys(LOGIN)
sleep(1)
password_input = driver.find_element(By.XPATH, '//*[#id="password"]')
password_input.send_keys(PASSWORD)
sleep(1)
enter_button = driver.find_element(By.XPATH, '//*[#id="organic-div"]/form/div[3]/button')
enter_button.click()
sleep(25)
lens_button = driver.find_element(By.XPATH, '//*[#id="global-nav-search"]/div/button')
lens_button.click()
sleep(5)
search_input = driver.find_element(By.XPATH, '//*[#id="global-nav-typeahead"]/input')
search_input.send_keys(SEARCH_QUERY)
search_input.send_keys(Keys.ENTER)
sleep(5)
people_button = driver.find_element(By.XPATH, '//*[#id="search-reusables__filters-bar"]/ul/li[1]/button')
people_button.click()
sleep(5)
page_button = driver.find_element(By.XPATH, "//button[#aria-label='Далее']")
page_button.click()
sleep(60)
Chrome inspection of button
Next Button
OK, there are several issues here:
The main problem why your code not worked is because the "next" pagination is initially even not created on the page until you scrolling the page, so I added the mechanism, to scroll the page until that button can be clicked.
it's not good to create locators based on local language texts.
You should use WebDriverWait expected_conditions explicit waits, not hardcoded pauses.
I used mixed locators types to show that sometimes it's better to use By.ID and sometimes By.XPATH etc.
the following code works:
import time
from selenium import webdriver
from selenium.webdriver import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
url = "https://www.linkedin.com/feed/"
driver.get(url)
wait.until(EC.element_to_be_clickable((By.XPATH, "//a[contains(#href,'login')]"))).click()
wait.until(EC.element_to_be_clickable((By.ID, "username"))).send_keys(my_email)
wait.until(EC.element_to_be_clickable((By.ID, "password"))).send_keys(my_password)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit']"))).click()
search_input = wait.until(EC.element_to_be_clickable((By.XPATH, "//input[contains(#class,'search-global')]")))
search_input.click()
search_input.send_keys("python developer" + Keys.ENTER)
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="search-reusables__filters-bar"]/ul/li[1]/button'))).click()
wait = WebDriverWait(driver, 4)
while True:
try:
next_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "button.artdeco-pagination__button.artdeco-pagination__button--next")))
next_btn.location_once_scrolled_into_view
time.sleep(0.2)
next_btn.click()
break
except:
driver.execute_script("window.scrollBy(0, arguments[0]);", 600)

Selenium - bypass ads Google_Vignette

I'm trying to crawl a site and am running into a google ad. I think I've found the iframe of it but I can't find the element to click to remove the ad. I've spent about 7 hours now and think this is over my head. Help v much appreciated.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chrome_options = Options()
chrome_options.add_argument("--incognito")
chrome_options.add_argument("--window-size=1920x1080")
# chrome_options.add_argument("--headless")
driver = webdriver.Chrome(chrome_options=chrome_options, executable_path ='C:\/Users\/gblac\/OneDrive\/Desktop\/Chromedriver.exe')
url = 'https://free-mp3-download.net/'
driver.get(url)
WebDriverWait(driver, 4)
search = driver.find_element(By.ID,'q')
search.send_keys('testing songs')
search.click()
button = driver.find_element(By.ID,'snd')
button.click()
WebDriverWait(driver,20).until(EC.visibility_of_element_located((By.CLASS_NAME,'container'))).click()
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.ID,"results_t")));
results = driver.find_element(By.ID,'results_t').find_elements(By.TAG_NAME,'tr')
results[0].find_element(By.TAG_NAME,'a').click()
# The code to remove the ad would go here
# driver.find_elements(By.CSS_SELECTOR,'[text()="Close"]').click()
Add the below code block in your code - before searching any text:
time.sleep(1)
driver.execute_script("""
const elements = document.getElementsByClassName("google-auto-placed");
while (elements.length > 0) elements[0].remove();
""")
time.sleep(1)
driver.execute_script("""
const elements = document.getElementsByClassName("adsbygoogle adsbygoogle-noablate");
while (elements.length > 0) elements[0].remove();
""")
time.sleep(1)
driver.find_element(By.ID,"q").send_keys("tamil songs")
driver.find_element(By.ID,"snd").click()
It will close the 2 ad blocks in that page, but if you refresh or move forward and backward, the ads will display again, then you have to remove those ad blocks again using the above code, please add the code for that condition.
WebDriverWait(driver,20).until(EC.visibility_of_element_located((By.CLASS_NAME,'container'))).click()
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.ID,"results_t")))
results = driver.find_element(By.ID,'results_t').find_elements(By.TAG_NAME,'tr')
results[0].find_element(By.TAG_NAME,'a').click()
time.sleep(2)
driver.find_element(By.XPATH, ".//button[contains(text(),'Download')]").click()
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(1)
# handling captcha
iframe_captcha = driver.find_element(By.XPATH,".//iframe[#title='reCAPTCHA']")
driver.switch_to.frame(iframe_captcha)
time.sleep(1)
driver.find_element(By.CSS_SELECTOR, ".recaptcha-checkbox-border").click()
time.sleep(2)
driver.switch_to.default_content()
driver.find_element(By.XPATH, ".//button[contains(text(),'Download')]").click()

Python Selenium iissue

So im trying to automate the cookie clicker game here https://orteil.dashnet.org/cookieclicker/
but Im having a problem purchasing the upgrades
Here is some code
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
PATH = "C:\Program Files (x86)\chromedriver.exe"
driver = webdriver.Chrome(PATH)
driver.implicitly_wait(5)
driver.maximize_window()
driver.get("https://orteil.dashnet.org/cookieclicker/")
cookies_count = driver.find_element_by_id("cookies")
cookie = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.ID, "bigCookie"))
)
items = [driver.find_element_by_id("productPrice" + str(i)) for i in range(1, -1, -1)]
while True:
cookie.click()
count = int(cookies_count.text.split(" ")[0])
for item in items:
value = int(item.text)
if value <= count:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, item))).click()
After I reach 17/18 cookies it stops with no error and my explict wait times out.
The item is not directly "clickable" because the browser thinks another element is covering it. So you have to use JavaScript to directly click it:
Replace:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, item))).click()
With:
driver.execute_script("arguments[0].click();", item)

StaleElementReferenceException in python selenium

I am trying to count how many time "Load More Reviews" option is clicked from this site. But I am getting the following error:
selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
Here is my python code:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
options = Options()
options.add_argument("--disable-notifications")
driver = webdriver.Chrome(ChromeDriverManager().install(), chrome_options=options)
url = "https://www.justdial.com/Delhi/S-K-Premium-Par-Hari-Nagar/011PXX11-XX11-131128122154-B8G6_BZDET"
driver.get(url)
pop_up = WebDriverWait(driver, 30).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="best_deal_detail_div"]/section/span')))
pop_up.click() # For disable pop-up
count = 0
while True:
element = WebDriverWait(driver, 20).until(
EC.element_to_be_clickable((By.XPATH, "//span[text()='Load More Reviews..']")))
element.click()
count = count + 1
print(count)
Try below code:
count = 0
while True:
try:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//span[text()='Load More Reviews..']"))).click()
count = count + 1
except StaleElementReferenceException:
pass
except TimeoutException:
break
print(count)
Issue: As per your code you are waiting for Load More Reviews button to be clickable, now once its clicked and even before page has finished loading its detecting if button is there and clickable, but when its try to click , by that time page is still in process of refresing / loading more reviews. As a result HTML DOM is disrupted/refreshed and stale element exception comes.
Also as there is no break condition in your code, i have added one. If there is no Load More Reviews button on page. It will break out of loop.

How to click on the Next button with doPostBack() call to browse to the next page while fetching data with Selenium and Python?

I have written this code but its not going on the next page its fetching data from the same page repeatedly.
from bs4 import BeautifulSoup
import requests
from selenium import webdriver
from selenium.webdriver import ActionChains
url="http://www.4docsearch.com/Delhi/Doctors"
driver = webdriver.Chrome(r'C:\chromedriver.exe')
driver.get(url)
next_page = True
while next_page == True:
soup = BeautifulSoup(driver.page_source, 'html.parser')
div = soup.find('div',{"id":"ContentPlaceHolder1_divResult"})
for heads in div.find_all('h2'):
links = heads.find('a')
print(links['href'])
try:
driver.find_element_by_xpath("""//* [#id="ContentPlaceHolder1_lnkNext"]""").click()
except:
print ('No more pages')
next_page=False
driver.close()
To browse to the Next page as the desired element is a JavaScript enabled element with __doPostBack() you have to:
Induce WebDriverWait for the staleness_of() the element first.
Induce WebDriverWait for the element_to_be_clickable() the element next.
You can use the following Locator Strategies:
Code Block:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("start-maximized")
driver = webdriver.Chrome(options=chrome_options, executable_path=r'C:\Utility\BrowserDrivers\chromedriver.exe')
driver.get("http://www.4docsearch.com/Delhi/Doctors")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//a[#id='ContentPlaceHolder1_lnkNext' and not(#class='aspNetDisabled')]"))).click()
while True:
try:
WebDriverWait(driver, 20).until(EC.staleness_of((driver.find_element_by_xpath("//a[#id='ContentPlaceHolder1_lnkNext' and not(#class='aspNetDisabled')]"))))
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//a[#id='ContentPlaceHolder1_lnkNext' and not(#class='aspNetDisabled')]"))).click()
print ("Next")
except:
print ("No more pages")
break
print ("Exiting")
driver.quit()
Console Output
Next
Next
Next
.
.
.
No more pages
Exiting

Categories

Resources