Python & Selenium : Scroll down to avoid overlaping advertising and click button

Python & Selenium : Scroll down to avoid overlaping advertising and click button - python

I try to loop through pages but I have overlaping advertising on my numbers of pages button.
I have this on my browser, the pages buttons are behind this "DocuSign" advertising:
So I tried to scroll down to be able to click on next page but it doesn't work.
I would like to have this to be able to click on next pages:
I tried this:
import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
driver = webdriver.Chrome(executable_path="/Users/name/Downloads/chromedriver 4")
url = 'http://www.legorafi.fr/category/france/politique'
driver.get(url)
WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"div#appconsent>iframe")))
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button.button--filled>span.baseText"))).click()
page_number = 1
while True:
driver.execute_script("window.scrollTo(0,document.body.scrollHeight)")
time.sleep(3)
try:
link = driver.find_element_by_xpath('//*[#id="main"]/div[5]/div/a[1]')
except NoSuchElementException:
break
link.click()
print(driver.current_url)
page_number += 1

You can try finding the next page button more appropriately instead.
driver.find_elements_by_xpath("//*[contains(text(), 'Next Page')]")

Related

How do i use selenium to click "Accept All Cookies" on a cookie pop up?

I am currently trying to learn selenium in Python and I am having an issue clicking the "Accept All Cookies" button.
I am using:
Python v3.9
Chrome v87
This is the HTML page i am trying to scrape
https://www.currys.co.uk/gbuk/tv-and-home-entertainment/televisions/televisions/samsung-ue75tu7020kxxu-75-smart-4k-ultra-hd-hdr-led-tv-10213562-pdt.html
Here is my code currently
# Selenium Tutorial #1
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import time
driver = webdriver.Chrome(r"C:\Users\Ste1337\Desktop\chromedriver\chromedriver.exe")
driver.get("https://www.currys.co.uk/gbuk/tv-and-home-entertainment/televisions/televisions/samsung-ue75tu7020kxxu-75-smart-4k-ultra-hd-hdr-led-tv-10213562-pdt.html")
#search = driver.find_element_by_id(ContentPlaceHolder1_NotifyBtn)
driver.implicitly_wait(10)
link = driver.find_element_by_id("onetrust-accept-btn-handler")
link.click
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "onetrust-accept-btn-handler"))
)
element.click
except:
driver.quit()

The "( )" is missing after the click.
Try this:
link = driver.find_element_by_id("onetrust-accept-btn-handler")
link.click()

Simply wait and click.
wait = WebDriverWait(driver, 10)
driver.get("https://www.currys.co.uk/gbuk/tv-and-home-entertainment/televisions/televisions/samsung-ue75tu7020kxxu-75-smart-4k-ultra-hd-hdr-led-tv-10213562-pdt.html")
wait.until(EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))).click()

Script fails to keep clicking on load more button

I've written a script in Python in association with selenium to keep clicking on MORE button to load more items until there are no new items left to load from a webpage. However, my below script can click once on that MORE button available on the bottom of that page.
Link to that site
This is my try so far:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
link = "https://angel.co/companies?company_types[]=Startup&company_types[]=Private+Company&company_types[]=Mobile+App&locations[]=1688-United+States"
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 10)
driver.get(link)
while True:
for elems in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,".results .name a.startup-link"))):
print(elems.get_attribute("href"))
try:
loadmore = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR,"[class='more']")))
driver.execute_script("arguments[0].scrollIntoView();", loadmore)
loadmore.click()
except Exception:break
driver.quit()
How can I keep clicking on that MORE button until there are no such button left to click and parse the links as I've already tried using for loop.

I've managed to solve the problem pursuing sir Andersson's logic within my exising script. This is what the modified script look like.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
link = "https://angel.co/companies?company_types[]=Startup&company_types[]=Private+Company&company_types[]=Mobile+App&locations[]=1688-United+States"
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 10)
driver.get(link)
while True:
try:
loadmore = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR,"[class='more']")))
driver.execute_script("arguments[0].click();", loadmore)
wait.until(EC.staleness_of(loadmore))
except Exception:break
for elems in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,".results .name a.startup-link"))):
print(elems.get_attribute("href"))
driver.quit()

why not just?
while (driver.FindElements(By.ClassName("more")).Count > 0)
{
driver.FindElement(By.ClassName("more")).Click();
//Some delay to wait lazyload to complete
}
c# example. pretty sure that it can be done with python as well

Difficulty with simulating clicks in Selenium and then scraping data of new page after click

I am trying to simulate a click from this page (http://www.oddsportal.com/baseball/usa/mlb/results/) to the last page number found at the bottom. The click I use on the icon in my code seems to work, but I can't get it to scrape the actual page data I want to after simulating this click. Instead, it just scrapes the data from the first original url. Any help on this would be greatly appreciated.
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
url='http://www.oddsportal.com/baseball/usa/mlb/results/'
driver = webdriver.Chrome()
driver.get(url)
timeout=5
while True:
try:
element_present = EC.presence_of_element_located((By.LINK_TEXT, '»|'))
WebDriverWait(driver, timeout).until(element_present)
last_page_link = driver.find_element_by_link_text('»|')
last_page_link.click()
element_present2 = EC.presence_of_element_located((By.XPATH, ".//th[#class='first2 tl']"))
WebDriverWait(driver, timeout).until(element_present2)
content=driver.page_source
soup=BeautifulSoup(content,'lxml')
dates2 = soup.find_all('th',{'class':'first2'})
dates2 = [element.text for element in dates2]
dates2=dates2[1:]
driver.quit()
except TimeoutException:
print('Timeout Error!')
driver.quit()
continue
break
print(dates2)

Facing issues while clicking on some links in a webpage

I've written a script in python to click on some categories in a webpage. I could manage to click on the first two categories but got stuck when it comes to initiate the final click. I've given a link leading to the two images in I have marked where to click.
This is the first link where there is a sign (marked with pencil) to click on to enter the second portion.
This is the second link where I get stuck when I try to click on the names (I've marked those names with pencil)
This is the site link.
Script I've tried with so far:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 10)
driver.get("replace_with_above_link")
wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "i4ewOd-pzNkMb-ornU0b-b0t70b-Bz112c"))).click()
post = wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div[role='checkbox']")))[1]
post.click()
for item in wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR,".HzV7m-pbTTYe-JNdkSc .suEOdc"))):
item.click()
driver.quit()
My intention is to click the names cyclically. Thanks in advance.

Try below code to click each item in list:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 10)
driver.get(URL)
wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "i4ewOd-pzNkMb-ornU0b-b0t70b-Bz112c"))).click()
post = wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div[role='checkbox']")))[1]
post.click()
for item in wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR,".HzV7m-pbTTYe-JNdkSc .suEOdc")))[1:]:
item.click()
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".HzV7m-tJHJj-LgbsSe-Bz112c.qqvbed-a4fUwd-LgbsSe-Bz112c"))).click()
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR, ".qqvbed-p83tee")))
driver.quit()

Extracting user comments from news website

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def wait(dr, x):
element = WebDriverWait(dr, 50).until(
EC.presence_of_all_elements_located((By.XPATH, x))
)
return element
from selenium import webdriver
browser = webdriver.Firefox()
browser.get("http://www.dinamalar.com/user_comments.asp? uid=14701&name=%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%E0%AE%9A%E0%AF%86%E0%AE%B2%E0%AF%8D%E0%AE%B5%E0%AE%A9%E0%AF%8D")
for elem in wait(browser, '//*[#id="commsec"]/div[2]/div[1]'):
print elem.text
This is the link i need to extract all the comments http://www.dinamalar.com/user_comments.asp?uid=14701&name=%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%E0%AE%9A%E0%AF%86%E0%AE%B2%E0%AF%8D%E0%AE%B5%E0%AE%A9%E0%AF%8D
But my code extracting only the first 10 comments. After clicking the button the other 10 comments are loaded dynamically. How to extract all these comments using python selenium

The idea would be to look for how many "more ideas" elements are present on the page. Every time you click the button and load more comments, one more "more ideas" red button becomes present. Implementation:
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
browser = webdriver.Firefox()
wait = WebDriverWait(browser, 10)
browser.get("http://www.dinamalar.com/user_comments.asp?uid=14701&name=%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%E0%AE%9A%E0%AF%86%E0%AE%B2%E0%AF%8D%E0%AE%B5%E0%AE%A9%E0%AF%8D")
# initial wait for the page to load
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, ".morered")))
pages = 1
while True:
browser.find_elements_by_css_selector(".morered")[-1].click()
# wait for more "load more" buttons to be present
try:
wait.until(lambda browser: len(browser.find_elements_by_css_selector(".morered")) > pages)
except TimeoutException:
break # no more data loaded, exit the loop
print("Comments loaded: %d" % len(browser.find_elements_by_css_selector(".dateg")))
pages += 1
browser.close()
Note that I've also removed that extra space inside the URL.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python & Selenium : Scroll down to avoid overlaping advertising and click button - python

You can try finding the next page button more appropriately instead. driver.find_elements_by_xpath("//*[contains(text(), 'Next Page')]")

Related

How do i use selenium to click "Accept All Cookies" on a cookie pop up?

Script fails to keep clicking on load more button

Difficulty with simulating clicks in Selenium and then scraping data of new page after click

Facing issues while clicking on some links in a webpage

Extracting user comments from news website

Categories

Resources