i basically want to click every load more button thats on the page before running the rest of my code because otherwise i wont be able to access each profile.
There are 2 Problems:
first how do i even access it? i tried similar methods to the fancyCompLabel part of my code but it wont work.
second im not sure how i should loop through all buttons since i would assume the second button only starts loading until the first one is clicked.
heres the relevant html part and a picture of the button
<span type="button" class="md-text-button button-orange-white" onclick="loadFollowing();">mehr anzeigen</span>
Heres the code to access each profile but as you can see it only runs until the first load button.
from bs4 import BeautifulSoup
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
time.sleep(3)
# Set some Selenium Options
options = webdriver.ChromeOptions()
# options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
# Webdriver
wd = webdriver.Chrome(executable_path='/usr/bin/chromedriver', options=options)
# URL
url = 'https://www.techpilot.de/zulieferer-suchen?laserschneiden%202d%20(laserstrahlschneiden)'
# Load URL
wd.get(url)
# Get HTML
soup = BeautifulSoup(wd.page_source, 'html.parser')
wait = WebDriverWait(wd, 15)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#bodyJSP #CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll"))).click()
wait.until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, "#efficientSearchIframe")))
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".hideFunctionalScrollbar #CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll"))).click()
#wd.switch_to.default_content() # you do not need to switch to default content because iframe is closed already
wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".fancyCompLabel")))
results = wd.find_elements_by_css_selector(".fancyCompLabel")
for profil in results:
print(profil.text) #heres the rest of my code but its not relevant
wd.close()
As I see the second pop-up element located by .hideFunctionalScrollbar #CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll is initially appearing out of the visible screen.
So after switching to the iframe you need to scroll to that element before trying to click on it.
Also, presence_of_all_elements_located doesn't actually wait for all the elements presence. It even doesn't know how many such elements will be. It returns once it finds at least 1 element matching the passed locator.
So I'd advice to add a short sleep after that line to allow all those elements to be actually loaded.
from selenium.webdriver.common.action_chains import ActionChains
wait = WebDriverWait(wd, 15)
actions = ActionChains(wd)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#bodyJSP #CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll"))).click()
wait.until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, "#efficientSearchIframe")))
second_pop_up = wd.find_element_by_css(".hideFunctionalScrollbar #CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll")
actions.move_to_element(second_pop_up).build().perform()
time.sleep(0.5)
second_pop_up.click()
wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".fancyCompLabel")))
time.sleep(0.5)
for profil in results:
print(profil.text) #heres the rest of my code but its not relevant
wd.close()
Related
Selenium does see displayed element visible on the page on the second iteration.
I click on a link, and a box within a website appears. I need to close that box.
This action will be performed 1000+ times. On the first iteration, Selenium opens the link and closes the box. On the second iteration, Selenium opens the link, and cannot close the box. At this point, it gives error message:
Exception has occurred: ElementNotInteractableException Message: element not interactable (Session info: chrome=105.0.5195.102)
My code + HTML of relevant element below.
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(chrome_options=options, executable_path=r"D:\SeleniumDriver\chromedriver.exe")
driver.get('https://sprawozdaniaopp.niw.gov.pl/')
find_button = driver.find_element("id", "btnsearch")
find_button.click()
interesting_links = driver.find_elements(By.CLASS_NAME, "dialog")
for i in range(len(interesting_links)):
interesting_links[i].click()
time.sleep(10) # I tried 60 seconds, no change
#
# HERE I WOULD DO MY THINGS
#
close_box = driver.find_element(By.CLASS_NAME, "ui-dialog-titlebar-close")
print(close_box.is_displayed())
close_box.click() # Here is where the program crushes on the 2nd iteration
if i == 4: # Stop the program after 5 iterations
break
HTML code of the relevant element:
<span class="ui-icon ui-icon-closethick">close</span>
I tried to locate the element that closes the box by CSS SELECTOR AND XPATH.
The CSS SELECTOR of the X/close button is the same every time, but
only the first time Selenium will see the X button displayed.
THE XPATH is strange. On the first opening of the link, X/close button will have path:
/html/body/div[6]/div[1]/a
However, if you open the next link, path will look this:
/html/body/div[8]/div[1]/a
Let me know what you think of that :-)
This is one way to achieve your goal:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time as t
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument('disable-notifications')
chrome_options.add_argument("window-size=1280,720")
webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
browser = webdriver.Chrome(service=webdriver_service, options=chrome_options)
wait = WebDriverWait(browser, 20)
url = 'https://sprawozdaniaopp.niw.gov.pl/'
browser.get(url)
wait.until(EC.element_to_be_clickable((By.ID, "btnsearch"))).click()
links = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "a[class='dialog']")))
counter = 0
for link in links[:5]:
link.click()
print('clicked link', link.text)
### do your stuff ###
t.sleep(1)
wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'span[class="ui-icon ui-icon-closethick"]')))[counter].click()
print('closed the popup')
counter = counter+1
This will print out in terminal:
clicked link STOWARZYSZENIE POMOCY DZIECIOM Z PORAŻENIEM MÓZGOWYM "JASNY CEL"
closed the popup
clicked link FUNDACJA NA RZECZ POMOCY DZIECIOM Z GRODZIEŃSZCZYZNY
closed the popup
clicked link FUNDACJA "ADAMA"
closed the popup
clicked link KUJAWSKO-POMORSKI ZWIĄZEK LEKKIEJ ATLETYKI
closed the popup
clicked link "RYBNICKI KLUB PIŁKARSKI - SZKÓŁKA PIŁKARSKA ROW W RYBNIKU"
closed the popup
Every time you click on a link, a new popup is created. When you close it, that popup will not disappear, but it will stay hidden. So when you click on a new link and then you want to close the new popup, you need to select the new (nth) close button. This should also apply to popup elements, so make sure you account for it. I stopped after the 5th link, of course you will need to remove the slicing to handle all links present in page.
Selenium setup above is chromedriver on linux - you just have to observe the imports, and the code after defining the browser(driver).
Selenium documentation can be found at https://www.selenium.dev/documentation/
I am trying to get the whole data of this table. However, in the last row there is "Load More" table row that I do not know how to load. So far I have tried different approaches that did not work,
I tried to click on the row itself by this:
from selenium import webdriver
driver = webdriver.Chrome()
driver.get(url)
soup = BeautifulSoup(driver.page_source, 'html.parser')
table = soup.find('table', {"class": "competition-leaderboard__table"})
i = 0
for team in table.find.all('tbody'):
rows = team.find_all('tr')
for row in rows:
i = i + 1
if (i == 51):
row.click()
//the scraping code for the first 50 elements
The code above throws an error saying that "'NoneType' object is not callable".
Another thing that I have tried that did not work is the following:
I tried to get the load more table row by its' class and click on it.
from selenium import webdriver
driver = webdriver.Chrome()
driver.get(url)
load_more = driver.find_element_by_class_name('competition-leaderboard__load-more-wrapper')
load_more.click()
soup = BeautifulSoup(driver.page_source, 'html.parser')
The code above also did not work.
So my question is how can I make python click on the "Load More" table row as in the HTML structure of the site it seems like "Load More" is not a button that is clickable.
In your code you have to accept cookies first, and then you can click 'Load more' button.
CSS selectors are the most suitable in this case.
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome(executable_path='/snap/bin/chromium.chromedriver')
driver.implicitly_wait(10)
driver.get('https://www.kaggle.com/c/coleridgeinitiative-show-us-the-data/leaderboard')
wait = WebDriverWait(driver, 30)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".sc-pAyMl.dwWbEz .sc-AxiKw.kOAUSS>.sc-AxhCb.gsXzyw")))
cookies = driver.find_element_by_css_selector(".sc-pAyMl.dwWbEz .sc-AxiKw.kOAUSS>.sc-AxhCb.gsXzyw").click()
load_more = driver.find_element_by_css_selector(".competition-leaderboard__load-more-count").click()
time.sleep(10) # Added for you to make sure that both buttons were clicked
driver.close()
driver.quit()
I tested this snippet and it clicked the desired button.
Note that I've added WebDriverWait in order to wait until the first button is clickable.
UPDATE:
I added time.sleep(10) so you could see that both buttons are clicked.
I want to find an input elment and then click it. But there is an error. I have searched many answers, but it does'\t work for me. I think thenre is no special feature in the page(http://plantpan.itps.ncku.edu.tw/promoter.php).
selenium.common.exceptions.WebDriverException: Message: unknown error: Element <input name="submit" type="SUBMIT" value="Search"> is not clickable at point (84, 595). Other element would receive the click: <html lang="en">...</html>
the code is below
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from selenium import webdriver
from time import sleep
from bs4 import BeautifulSoup
import re
driver = webdriver.Chrome()
driver.get("http://plantpan.itps.ncku.edu.tw/promoter.php")
#clear input
driver.find_element_by_name('sequence').clear()
# input
driver.find_element_by_name('sequence').send_keys('>11111\nTTTGGTTGGGTTTGGGTTTGGGTGTGTTGTGT')
sleep(5)
#choose all species
driver.find_element_by_css_selector("input[type='radio'][value='allspecies']").click()
#driver.find_element_by_xpath("//*[#id='promoter']/font[2]/input[2]").click()
#submit
submit = driver.find_element_by_css_selector("input[type='SUBMIT'][value='Search']")
submit.click()
driver.implicitly_wait(2)
# get
result = driver.page_source
soup = BeautifulSoup(result, 'html.parser')
button = driver.find_element_by_link_text("<img src='./img/search/download_analysis_result.png'/>")
button.click()
driver.implicitly_wait(3)
#获取当前的URL的地址
#关闭浏览器
sleep(2)
driver.close()
You will want to use waits and expected conditions when attempting to find elements to make sure they are able to be found and in the proper state.
Replace
submit = driver.find_element_by_css_selector("input[type='SUBMIT'][value='Search']")
with
submit = wait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[type='SUBMIT'][value='Search']")))
You will need to add the following:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.support import expected_conditions as EC
Your code is having an issue finding the last element, so you need to make this change, as well:
Replace:
button = driver.find_element_by_link_text("<img src='./img/search/download_analysis_result.png'/>")
with
button = wait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "img")))
It would be a good idea to use a similar approach to finding and interacting with elements by using waits and expected conditions in other areas of your script. These will allow your script to be more robust and able to handle timing issues without crashing better.
Try to execute js click over the element
element = driver.find_element_by_css_selector("input[type='SUBMIT'][value='Search']")
driver.execute_script("arguments[0].click();", element)
If there is some delay from the website just add wait and after that execute the click.
from selenium.webdriver.support import expected_conditions as EC
wait = WebDriverWait(driver, 10)
element = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[type='SUBMIT'][value='Search']")))
I want to parse an IMDb film rating located here on around 8 pages. In order to do that I'm using Selenium, and I'm having trouble with clicks, proceeding algorithm to next page. In the end I need 1000 titles when I'll continue using BeautifulSoup. Code below isn't working, I need to use button 'NEXT' with this HTML:
<a class="flat-button lister-page-next next-page" href="/list/ls000004717/?page=2">
Next
</a>
This is the code:
from selenium import webdriver as wb
browser = wb.Chrome()
browser.get('https://www.imdb.com/list/ls000004717/')
field = browser.find_element_by_name("flat-button lister-page-next next-page").click()
Error is the following:
NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":".flat-button lister-page-next next-page"}
(Session info: chrome=78.0.3904.108)
I suppose I lack knowledge of syntax needed, or maybe I mixed it up a little. I tried searching on SO, though every example is pretty unique and I don't possess the knowledge to extrapolate these cases fully. Any way Selenium can handle that?
You could try using an XPath to query on the Next text inside the button. You should also probably invoke WebDriverWait since you are navigating across multiple pages, then scroll into view since this is at the bottom of the page:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from time import sleep
browser = wb.Chrome()
browser.get('https://www.imdb.com/list/ls000004717/')
# keep clicking next until we reach the end
for i in range(0,9):
# wait up to 10s before locating next button
try:
next_button = WebDriverWait(browser, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[contains(#class, 'page') and contains(text(), 'Next')]")))
# scroll down to button using Javascript
browser.execute_script("arguments[0].scrollIntoView(true);", next_button)
# click the button
# next_button.click() this throws exception -- replace with JS click
browser.execute_script("arguments[0].click();", next_button)
# I never recommend using sleep like this, but WebDriverWait is not waiting on next button to fully load, so it goes stale.
sleep(5)
# case: next button no longer exists, we have reached the end
except TimeoutException:
break
I also wrapped everything in a try / except TimeoutException block to handle the case where we have reached the end of pages, and Next button no longer exists, thus breaking out of the loop. This worked on multiple pages for me.
I also had to add an explicit sleep(5) because even after invoking WebDriverWait on element_to_be_clickable, next_button was still throwing StaleElementReferenceException. It seems like WebDriverWait was finishing before page was fully loaded, causing the status of next_button to change after it had been located. Normally adding sleep(5) is bad practice, but there did not seem to be another workaround here. If anyone else has a suggestion on this, feel free to comment / edit the answer.
There are a couple of ways that could work:
1. Use a selector for the next button and loop until the end:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
browser = webdriver.Chrome()
browser.get('https://www.imdb.com/list/ls000004717/')
selector = 'a[class*="next-page"]'
num_pages = 10
for page in range(pages):
# Wait for the element to load
WebDriverWait(browser, 10).until(ec.presence_of_element_located((By.CSS_SELECTOR, selector)))
# ... Do rating parsing here
browser.find_element_by_css_selector(selector).click()
Instead of clicking on the element, the other option could be to navigate to the next page using broswer.get('...'):
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
# Set up browser as before and navigate to the page
browser = webdriver.Chrome()
browser.get('https://www.imdb.com/list/ls000004717/')
selector = 'a[class*="next-page"]'
base_url = 'https://www.imdb.com/list/ls000004717/'
page_extension = '?page='
# Already at page = 1, so only needs to loop 9 times
for page in range(2, pages + 1):
# Wait for the page to load
WebDriverWait(browser, 10).until(ec.presence_of_element_located((By.CSS_SELECTOR, selector)))
# ... Do rating parsing here
next_page = base_url + page_extension + str(page)
browser.get(next_page)
As a note: field = browser.find_element_by_name("...").click() will not assign field to a webelement, as the click() method has no return value.
You could try a partial css selector.
browser.find_element_by_css_selector("a[class*='next-page']").click()
To click on the element with text as NEXT till the 901 - 1,000 of 1,000 page you have to:
scrollIntoView() the element once the visibility_of_element_located() is achieved.
Induce WebDriverWait for the element_to_be_clickable()
You can use the following solution:
Code Block:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options, executable_path=r'C:\WebDrivers\chromedriver.exe')
driver.get('https://www.imdb.com/list/ls000004717/')
driver.execute_script("return arguments[0].scrollIntoView(true);", WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "span.pagination-range"))))
while True:
try:
WebDriverWait(driver, 20).until(EC.invisibility_of_element((By.CSS_SELECTOR, "div.row.text-center.lister-working.hidden")))
driver.execute_script("return arguments[0].scrollIntoView(true);", WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "span.pagination-range"))))
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a.flat-button.lister-page-next.next-page"))).click()
print("Clicked on NEXT button")
except TimeoutException as e:
print("No more NEXT button")
break
driver.quit()
Console Output:
Clicked on NEXT button
Clicked on NEXT button
Clicked on NEXT button
Clicked on NEXT button
Clicked on NEXT button
Clicked on NEXT button
Clicked on NEXT button
Clicked on NEXT button
Clicked on NEXT button
No more NEXT button
I am trying to scrape links to song pages for some artists on genius.com, but I'm running into issues because the links to the individual song pages are displayed inside a popup modal window.
The modal window doesn't load all links in one go, and instead loads more content via ajax when you scroll down to the bottom of the modal.
I tried using code to scroll to the bottom of the page but unfortunately that just scrolled in the window behind the modal rather than the modal itself:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
So then I tried selecting the last element in the modal and scrolling to that (with the idea of doing that a few times until all song pages had been loaded), but it wouldn't scroll far enough to get the website to load more content
last_element = driver.find_elements_by_xpath('//div[#class="mini_card-metadata"]')[-1]
last_element.location_once_scrolled_into_view
Here is my code so far:
import os
from bs4 import BeautifulSoup
from selenium import webdriver
chrome_driver = "/Applications/chromedriver"
os.environ["webdriver.chrome.driver"] = chrome_driver
driver = webdriver.Chrome(chrome_driver)
base_url = 'https://genius.com/artists/Stormzy'
driver.get(base_url)
xpath_str = '//div[contains(text(),"Show all songs by Stormzy")]'
driver.find_element_by_xpath(xpath_str).click()
Is there a way to extract all the song page links for the artist?
Try below code to get required output:
from selenium import webdriver as web
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import TimeoutException
driver = web.Chrome()
base_url = 'https://genius.com/artists/Stormzy'
driver.get(base_url)
# Open modal
driver.find_element_by_xpath('//div[normalize-space()="Show all songs by Stormzy"]').click()
song_locator = By.CSS_SELECTOR, 'a.mini_card.mini_card--small'
# Wait for first XHR complete
wait(driver, 10).until(EC.visibility_of_element_located(song_locator))
# Get current length of songs list
current_len = len(driver.find_elements(*song_locator))
while True:
# Load new XHR until it's possible
driver.find_element(*song_locator).send_keys(Keys.END)
try:
wait(driver, 3).until(lambda x: len(driver.find_elements(*song_locator)) > current_len)
current_len = len(driver.find_elements(*song_locator))
# Return full list of songs
except TimeoutException:
songs_list = [song.get_attribute('href') for song in driver.find_elements(*song_locator)]
break
print(songs_list)
This should allow you to request new XHR until length of songs list became constant and finally return the list of links
When you scroll to bottom of modal dialog it call
$scrollable_data_ctrl.load_next();
As option you can try execute it until new results appear in modal
driver.execute_script("$scrollable_data_ctrl.load_next();")