Selenium - bypass ads Google_Vignette - python

I'm trying to crawl a site and am running into a google ad. I think I've found the iframe of it but I can't find the element to click to remove the ad. I've spent about 7 hours now and think this is over my head. Help v much appreciated.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chrome_options = Options()
chrome_options.add_argument("--incognito")
chrome_options.add_argument("--window-size=1920x1080")
# chrome_options.add_argument("--headless")
driver = webdriver.Chrome(chrome_options=chrome_options, executable_path ='C:\/Users\/gblac\/OneDrive\/Desktop\/Chromedriver.exe')
url = 'https://free-mp3-download.net/'
driver.get(url)
WebDriverWait(driver, 4)
search = driver.find_element(By.ID,'q')
search.send_keys('testing songs')
search.click()
button = driver.find_element(By.ID,'snd')
button.click()
WebDriverWait(driver,20).until(EC.visibility_of_element_located((By.CLASS_NAME,'container'))).click()
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.ID,"results_t")));
results = driver.find_element(By.ID,'results_t').find_elements(By.TAG_NAME,'tr')
results[0].find_element(By.TAG_NAME,'a').click()
# The code to remove the ad would go here
# driver.find_elements(By.CSS_SELECTOR,'[text()="Close"]').click()

Add the below code block in your code - before searching any text:
time.sleep(1)
driver.execute_script("""
const elements = document.getElementsByClassName("google-auto-placed");
while (elements.length > 0) elements[0].remove();
""")
time.sleep(1)
driver.execute_script("""
const elements = document.getElementsByClassName("adsbygoogle adsbygoogle-noablate");
while (elements.length > 0) elements[0].remove();
""")
time.sleep(1)
driver.find_element(By.ID,"q").send_keys("tamil songs")
driver.find_element(By.ID,"snd").click()
It will close the 2 ad blocks in that page, but if you refresh or move forward and backward, the ads will display again, then you have to remove those ad blocks again using the above code, please add the code for that condition.
WebDriverWait(driver,20).until(EC.visibility_of_element_located((By.CLASS_NAME,'container'))).click()
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.ID,"results_t")))
results = driver.find_element(By.ID,'results_t').find_elements(By.TAG_NAME,'tr')
results[0].find_element(By.TAG_NAME,'a').click()
time.sleep(2)
driver.find_element(By.XPATH, ".//button[contains(text(),'Download')]").click()
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(1)
# handling captcha
iframe_captcha = driver.find_element(By.XPATH,".//iframe[#title='reCAPTCHA']")
driver.switch_to.frame(iframe_captcha)
time.sleep(1)
driver.find_element(By.CSS_SELECTOR, ".recaptcha-checkbox-border").click()
time.sleep(2)
driver.switch_to.default_content()
driver.find_element(By.XPATH, ".//button[contains(text(),'Download')]").click()

Related

How to scrape Next button on Linkedin with Selenium using Python?

I am trying to scrape LinkedIn website using Selenium. I can't parse Next button. It resists as much as it can. I've spent a half of a day to adress this, but all in vain.
I tried absolutely various options, with text and so on. Only work with start ID but scrape other button.
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//button[#aria-label='Далее']"}
This is quite common for this site:
//*[starts-with(#id,'e')]
My code:
from selenium import webdriver
from selenium.webdriver import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from time import sleep
chrome_driver_path = Service("E:\programming\chromedriver_win32\chromedriver.exe")
driver = webdriver.Chrome(service=chrome_driver_path)
url = "https://www.linkedin.com/feed/"
driver.get(url)
SEARCH_QUERY = "python developer"
LOGIN = "EMAIL"
PASSWORD = "PASSWORD"
sleep(10)
sign_in_link = driver.find_element(By.XPATH, '/html/body/div[1]/main/p[1]/a')
sign_in_link.click()
login_input = driver.find_element(By.XPATH, '//*[#id="username"]')
login_input.send_keys(LOGIN)
sleep(1)
password_input = driver.find_element(By.XPATH, '//*[#id="password"]')
password_input.send_keys(PASSWORD)
sleep(1)
enter_button = driver.find_element(By.XPATH, '//*[#id="organic-div"]/form/div[3]/button')
enter_button.click()
sleep(25)
lens_button = driver.find_element(By.XPATH, '//*[#id="global-nav-search"]/div/button')
lens_button.click()
sleep(5)
search_input = driver.find_element(By.XPATH, '//*[#id="global-nav-typeahead"]/input')
search_input.send_keys(SEARCH_QUERY)
search_input.send_keys(Keys.ENTER)
sleep(5)
people_button = driver.find_element(By.XPATH, '//*[#id="search-reusables__filters-bar"]/ul/li[1]/button')
people_button.click()
sleep(5)
page_button = driver.find_element(By.XPATH, "//button[#aria-label='Далее']")
page_button.click()
sleep(60)
Chrome inspection of button
Next Button
OK, there are several issues here:
The main problem why your code not worked is because the "next" pagination is initially even not created on the page until you scrolling the page, so I added the mechanism, to scroll the page until that button can be clicked.
it's not good to create locators based on local language texts.
You should use WebDriverWait expected_conditions explicit waits, not hardcoded pauses.
I used mixed locators types to show that sometimes it's better to use By.ID and sometimes By.XPATH etc.
the following code works:
import time
from selenium import webdriver
from selenium.webdriver import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
url = "https://www.linkedin.com/feed/"
driver.get(url)
wait.until(EC.element_to_be_clickable((By.XPATH, "//a[contains(#href,'login')]"))).click()
wait.until(EC.element_to_be_clickable((By.ID, "username"))).send_keys(my_email)
wait.until(EC.element_to_be_clickable((By.ID, "password"))).send_keys(my_password)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit']"))).click()
search_input = wait.until(EC.element_to_be_clickable((By.XPATH, "//input[contains(#class,'search-global')]")))
search_input.click()
search_input.send_keys("python developer" + Keys.ENTER)
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="search-reusables__filters-bar"]/ul/li[1]/button'))).click()
wait = WebDriverWait(driver, 4)
while True:
try:
next_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "button.artdeco-pagination__button.artdeco-pagination__button--next")))
next_btn.location_once_scrolled_into_view
time.sleep(0.2)
next_btn.click()
break
except:
driver.execute_script("window.scrollBy(0, arguments[0]);", 600)

Page navigation click without using current_url python selenium

How to navigate through each page without using driver.current_url? In my full code, I get a bunch of errors once I navigate through the page for a loop. Without it, it runs fine but can only go through one page. I want to navigate through as many pages. Any help appreciated, thanks.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
driver_service = Service(executable_path="C:\Program Files (x86)\chromedriver.exe")
driver = webdriver.Chrome(service=driver_service)
driver.maximize_window() # load web driver
wait = WebDriverWait(driver, 5)
url_test = driver.get('https://www.seek.com.au/data-jobs-in-information-communication-technology/in-All-Perth-WA')
url_template = driver.current_url
template = url_template+ '?page={}'
for page in range(2,5):
link_job = [x.get_attribute('href') for x in driver.find_elements(By.XPATH, "//a[#data-automation='jobTitle']")]
for job in link_job:
driver.get(job)
try:
quick_apply = WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, "(//a[#data-automation='job-detail-apply' and #target='_self'])")))
quick_apply.click()
#sleep(3)
except:
print("No records found " + job)
pass
sleep(3)
driver.get(template.format(page))
If I understand you correctly you want to determine dynamically how many pages there are and loop over each of them.
I have managed to achieve this by using a while loop and look on each page if the "Next" button at the bottom is visible. If not, the last page was reached and you can exit the loop.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from time import sleep
driver_service = Service(executable_path="C:\\Users\\Stefan\\bin\\chromedriver.exe")
driver = webdriver.Chrome(service=driver_service)
driver.maximize_window() # load web driver
wait = WebDriverWait(driver, 5)
url_test = driver.get('https://www.seek.com.au/data-jobs-in-information-communication-technology/in-All-Perth-WA')
url_template = driver.current_url
template = url_template+ '?page={}'
page = 1
while True:
# check if "Next" button is visible
# -> if not, the last page was reached
try:
driver.find_element(By.XPATH, "//a[#title='Next']")
except:
# last page reached
break
link_job = [x.get_attribute('href') for x in driver.find_elements(By.XPATH, "//a[#data-automation='jobTitle']")]
for job in link_job:
driver.get(job)
try:
quick_apply = WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, "(//a[#data-automation='job-detail-apply' and #target='_self'])")))
quick_apply.click()
#sleep(3)
except:
print("No records found " + job)
pass
sleep(3)
page += 1
driver.get(template.format(page))
driver.close()
Seems your problem is with StaleElementException when you getting back from job page to jobs search results page.
The simplest approach to overcome this problem is to keep the jobs search results page url.
Actually I changed your code only with this point and it works.
I also changed driver.find_elements(By.XPATH, "//a[#data-automation='jobTitle']") with wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//a[#data-automation='jobTitle']"))) for better performance.
The code below works, but the web site itself responds badly.
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)
wait = WebDriverWait(driver, 10)
url = 'https://www.seek.com.au/data-jobs-in-information-communication-technology/in-All-Perth-WA?page={p}'
for p in range(1,20):
driver.get(url)
link_job = [x.get_attribute('href') for x in wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//a[#data-automation='jobTitle']")))]
for job in link_job:
driver.get(job)
try:
wait.until(EC.element_to_be_clickable((By.XPATH, "(//a[#data-automation='job-detail-apply' and #target='_self'])"))).click()
print("applied")
except:
print("No records found " + job)
pass
driver.get(url)

ElementClickInterceptedException Error Selenium

I keep getting the ElementClickInterceptedException on this script I'm writing, I'm supposed to click a link that will open a new window, scrape from the new window and close it and move to the next link to scrape, but it just won't work, it gives the error after max 3 link clicks. I saw a similar question here and I tried using wait.until(EC.element_to_be_clickable()) and also maximized my screen but still did not work for me. Here is the site I am scraping from trying to scrape all the games for each day and here is a chunk of the code I'm using
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.common.exceptions import TimeoutException, NoSuchElementException, ElementNotInteractableException, StaleElementReferenceException
from time import sleep
l = "https://www.flashscore.com/"
options = FirefoxOptions()
#options.add_argument("--headless")
driver = webdriver.Firefox(executable_path="geckodriver.exe",
firefox_options=options)
driver.install_addon('C:\\Windows\\adblock_plus-3.10.1-an+fx.xpi')
driver.maximize_window()
driver.get(l)
driver.implicitly_wait(5)
cnt = 0
sleep(5)
wait = WebDriverWait(driver, 20)
a = driver.window_handles[0]
b = driver.window_handles[1]
driver.switch_to.window(a)
# Close Adblock tab
if 'Adblock' in driver.title:
driver.close()
driver.switch_to.window(a)
else:
driver.switch_to.window(b)
driver.close()
driver.switch_to.window(a)
var1 = driver.find_elements_by_xpath("//div[#class='leagues--live ']/div/div")
knt = 0
for i in range(len(var1)):
if (var1[i].get_attribute("id")):
knt += 1
#sleep(2)
#driver.switch_to.window(driver.window_handles)
var1[i].click()
sleep(2)
#var2 = wait.until(EC.visibility_of_element_located((By.XPATH, "//div[contains(#classs, 'event__match event__match--last event__match--twoLine')]")))
print(len(driver.window_handles))
driver.switch_to.window(driver.window_handles[1])
try:
sleep(4)
driver.close()
driver.switch_to.window(a)
#sleep(3)
except(Exception):
print("Exception caught")
#WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.CLASS_NAME, "event__match event__match--last event__match--twoLine")))
sleep(10)
driver.close()
Any ideas to help please.
It looks like the element you are trying to click on is covered by a banner ad or something else like a cookie message.
To fix this you can scroll down to the last element using the following code:
driver.execute_script('\
let items = document.querySelectorAll(\'div[title="Click for match detail!"]\'); \
items[items.length - 1].scrollIntoView();'
)
Add it before clicking on the desired element in the loop.
I tried to make a working example for you but it works on chromedriver not gecodriver:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
# options.add_argument("--headless")
options.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"])
service = Service(executable_path='your\path\to\chromedriver.exe')
driver = webdriver.Chrome(service=service, options=options)
wait = WebDriverWait(driver, 5)
url = 'https://www.flashscore.com/'
driver.get(url)
# accept cookies
wait.until(EC.presence_of_element_located((By.ID, 'onetrust-accept-btn-handler'))).click()
matches = driver.find_elements(By.CSS_SELECTOR, 'div[title="Click for match detail!"]')
for match in matches:
driver.execute_script('\
let items = document.querySelectorAll(\'div[title="Click for match detail!"]\'); \
items[items.length - 1].scrollIntoView();'
)
match.click()
driver.switch_to.window(driver.window_handles[1])
print('get data from open page')
driver.close()
driver.switch_to.window(driver.window_handles[0])
driver.quit()
It works in both normal and headless mode

Scraping a web content executing an event but unexpected click event appeared

So I am trying to make a get request, access the a web execute an event ( in this case press a button to extend a table and scrape the content.
This is my code:
def exec_func():
wait = WebDriverWait(driver, 20)
wait.until(expected_conditions.visibility_of_element_located(
(By.CSS_SELECTOR, "div.section-box.kurser-table-container")))
element = driver.find_element_by_xpath("//a[#ng-click='listLengthMax = 3000']")
actions = ActionChains(driver)
actions.move_to_element(element).perform()
element.click()
return driver.page_source
However an ElementClickInterceptedError arised ! This seems to happen because some other element obscures the element i am looking forward to click.
This is the web I am interested in
How could I scrape the content of the table avoiding the issue with this blocking
You have two options
Option 1: scroll into view the element
def exec_func():
wait = WebDriverWait(driver, 20)
wait.until(expected_conditions.visibility_of_element_located(
(By.CSS_SELECTOR, "div.section-box.kurser-table-container")))
element = driver.find_element_by_xpath("//a[#ng-click='listLengthMax = 3000']")
element.location_once_scrolled_into_view #scroll into view the element first
actions = ActionChains(driver)
actions.move_to_element(element).perform()
element.click()
return driver.page_source
Option 2: Accept the cookie
def exec_func():
wait = WebDriverWait(driver, 20)
#accept the cookie
wait.until(expected_conditions.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"[id^='sp_message_iframe_']")))
wait.until(expected_conditions.element_to_be_clickable((By.XPATH,"//button[#title='Accept']"))).click()
driver.switch_to.default_content()
#=================================
wait.until(expected_conditions.visibility_of_element_located(
(By.CSS_SELECTOR, "div.section-box.kurser-table-container")))
element = driver.find_element_by_xpath("//a[#ng-click='listLengthMax = 3000']")
actions = ActionChains(driver)
actions.move_to_element(element).perform()
element.click()
return driver.page_source
This code is working fine for me.
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
def exec_func():
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.implicitly_wait(10)
driver.set_page_load_timeout(10)
driver.maximize_window()
driver.get("https://www.telegraph.co.uk/markets-hub/assets/shares/")
wait = WebDriverWait(driver, 20)
wait.until(expected_conditions.visibility_of_element_located(
(By.CSS_SELECTOR, "div.section-box.kurser-table-container")))
element = driver.find_element_by_xpath("//a[#ng-click='listLengthMax = 3000']")
actions = ActionChains(driver)
actions.move_to_element(element).perform()
element.click()
return driver.page_source
print(exec_func())

StaleElementReferenceException in python selenium

I am trying to count how many time "Load More Reviews" option is clicked from this site. But I am getting the following error:
selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
Here is my python code:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
options = Options()
options.add_argument("--disable-notifications")
driver = webdriver.Chrome(ChromeDriverManager().install(), chrome_options=options)
url = "https://www.justdial.com/Delhi/S-K-Premium-Par-Hari-Nagar/011PXX11-XX11-131128122154-B8G6_BZDET"
driver.get(url)
pop_up = WebDriverWait(driver, 30).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="best_deal_detail_div"]/section/span')))
pop_up.click() # For disable pop-up
count = 0
while True:
element = WebDriverWait(driver, 20).until(
EC.element_to_be_clickable((By.XPATH, "//span[text()='Load More Reviews..']")))
element.click()
count = count + 1
print(count)
Try below code:
count = 0
while True:
try:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//span[text()='Load More Reviews..']"))).click()
count = count + 1
except StaleElementReferenceException:
pass
except TimeoutException:
break
print(count)
Issue: As per your code you are waiting for Load More Reviews button to be clickable, now once its clicked and even before page has finished loading its detecting if button is there and clickable, but when its try to click , by that time page is still in process of refresing / loading more reviews. As a result HTML DOM is disrupted/refreshed and stale element exception comes.
Also as there is no break condition in your code, i have added one. If there is no Load More Reviews button on page. It will break out of loop.

Categories

Resources