Clicking button with Selenium not working - python

https://fbref.com/en/squads/0cdc4311/Augsburg-Stats provides buttons to transform a table to csv, which I would like to scrape. I click the buttons like
elements = driver.find_elements(By.XPATH, '//button[text()="Get table as CSV (for Excel)"]')
for element in elements:
element.click()
but I get an exception
ElementNotInteractableException: Message: element not interactable
This is the element I am trying to click.
Here's the full code (I added Adblock plus as a Chrome extension, which should be configured to test locally):
import pandas as pd
import bs4
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
import time
import os
#activate adblock plus
path_to_extension = '/home/andreas/.config/google-chrome/Default/Extensions/cfhdojbkjhnklbpkdaibdccddilifddb/3.11.4_0'
options = Options()
options.add_argument('load-extension=' + path_to_extension)
#uses Chrome driver in usr/bin/ from https://chromedriver.chromium.org/downloads
driver = webdriver.Chrome(options=options)
#wait and switching back to tab with desired source
time.sleep(5)
driver.switch_to.window(driver.window_handles[0])
NO_OF_PREV_SEASONS = 5
df = pd.DataFrame()
urls = ['https://fbref.com/en/squads/247c4b67/Arminia-Stats']
for url in urls:
driver.get(url)
html = driver.page_source
soup = bs4.BeautifulSoup(html, 'html.parser')
#click button -> accept cookies
element = driver.find_element(By.XPATH, '//button[text()="AGREE"]')
element.click()
for i in range(NO_OF_PREV_SEASONS):
elements = driver.find_elements(By.XPATH, '//button[text()="Get table as CSV (for Excel)"]')
for element in elements:
element.click()
#todo: get data
#click button -> navigate to next page
time.sleep(5)
element = driver.find_element(By.LINK_TEXT, "Previous Season")
element.click()
driver.quit()

button is inside the drop-down list (i.e. <span>Share & Export</span>) so you need to hover it first.
e.g.
from selenium.webdriver.common.action_chains import ActionChains
action_chain = ActionChains(driver)
hover = driver.find_element_by_xpath("// span[contains(text(),'Share & Export')]")
action_chain.move_to_element(hover).perform() # hover to show drop down list
driver.execute_script("window.scrollTo(0, 200)") # scroll down a bit
time.sleep(1) # wait for scrolling
button = driver.find_element_by_xpath("// button[contains(text(),'Get table as CSV (for Excel)')]")
action_chain.move_to_element(button).click().perform() # move to button and click
time.sleep(3)
output:

This also happens to me sometimes. One way to overcome this problem is by getting the X and Y coordinates of this button and clicking on it.
import pyautogui
for element in elements:
element_pos = element.location
element_size = element.size
x_coordinate, y_coordinate = elemnt_pos['x'], element_pos['y']
e_width, e_height = element_size['width'], element_size['height']
click_x = x_coordinate + e_width/2
click_y = y_coordinate + e_height/2
pyauotgui.click(click_x, click_y)
Other solution that you may try is to click on the tag that contains this button.

There are several issues here:
You have to click and open Share and Export tab and then click Get table as CSV button
You have to scroll the page to access the non-first tables.
So, your code can be something like this:
import pandas as pd
import bs4
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
import time
import os
#activate adblock plus
path_to_extension = '/home/andreas/.config/google-chrome/Default/Extensions/cfhdojbkjhnklbpkdaibdccddilifddb/3.11.4_0'
options = Options()
options.add_argument('load-extension=' + path_to_extension)
options.add_argument("window-size=1920,1080")
#uses Chrome driver in usr/bin/ from https://chromedriver.chromium.org/downloads
driver = webdriver.Chrome(options=options)
actions = ActionChains(driver)
#wait and switching back to tab with desired source
time.sleep(5)
#driver.switch_to.window(driver.window_handles[0])
NO_OF_PREV_SEASONS = 5
df = pd.DataFrame()
urls = ['https://fbref.com/en/squads/247c4b67/Arminia-Stats']
for url in urls:
driver.get(url)
html = driver.page_source
soup = bs4.BeautifulSoup(html, 'html.parser')
#click button -> accept cookies
element = driver.find_element(By.XPATH, '//button[text()="AGREE"]')
element.click()
for i in range(NO_OF_PREV_SEASONS):
elements = driver.find_elements(By.XPATH, "//div[#class='section_heading_text']//li[#class='hasmore']")
for element in elements:
actions.move_to_element(element).perform()
time.sleep(0.5)
element.click()
wait.until(EC.visibility_of_element_located((By.XPATH, "//button[#tip='Get a link directly to this table on this page']"))).click()
#todo: get data

Related

How to scrape Next button on Linkedin with Selenium using Python?

I am trying to scrape LinkedIn website using Selenium. I can't parse Next button. It resists as much as it can. I've spent a half of a day to adress this, but all in vain.
I tried absolutely various options, with text and so on. Only work with start ID but scrape other button.
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//button[#aria-label='Далее']"}
This is quite common for this site:
//*[starts-with(#id,'e')]
My code:
from selenium import webdriver
from selenium.webdriver import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from time import sleep
chrome_driver_path = Service("E:\programming\chromedriver_win32\chromedriver.exe")
driver = webdriver.Chrome(service=chrome_driver_path)
url = "https://www.linkedin.com/feed/"
driver.get(url)
SEARCH_QUERY = "python developer"
LOGIN = "EMAIL"
PASSWORD = "PASSWORD"
sleep(10)
sign_in_link = driver.find_element(By.XPATH, '/html/body/div[1]/main/p[1]/a')
sign_in_link.click()
login_input = driver.find_element(By.XPATH, '//*[#id="username"]')
login_input.send_keys(LOGIN)
sleep(1)
password_input = driver.find_element(By.XPATH, '//*[#id="password"]')
password_input.send_keys(PASSWORD)
sleep(1)
enter_button = driver.find_element(By.XPATH, '//*[#id="organic-div"]/form/div[3]/button')
enter_button.click()
sleep(25)
lens_button = driver.find_element(By.XPATH, '//*[#id="global-nav-search"]/div/button')
lens_button.click()
sleep(5)
search_input = driver.find_element(By.XPATH, '//*[#id="global-nav-typeahead"]/input')
search_input.send_keys(SEARCH_QUERY)
search_input.send_keys(Keys.ENTER)
sleep(5)
people_button = driver.find_element(By.XPATH, '//*[#id="search-reusables__filters-bar"]/ul/li[1]/button')
people_button.click()
sleep(5)
page_button = driver.find_element(By.XPATH, "//button[#aria-label='Далее']")
page_button.click()
sleep(60)
Chrome inspection of button
Next Button
OK, there are several issues here:
The main problem why your code not worked is because the "next" pagination is initially even not created on the page until you scrolling the page, so I added the mechanism, to scroll the page until that button can be clicked.
it's not good to create locators based on local language texts.
You should use WebDriverWait expected_conditions explicit waits, not hardcoded pauses.
I used mixed locators types to show that sometimes it's better to use By.ID and sometimes By.XPATH etc.
the following code works:
import time
from selenium import webdriver
from selenium.webdriver import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
url = "https://www.linkedin.com/feed/"
driver.get(url)
wait.until(EC.element_to_be_clickable((By.XPATH, "//a[contains(#href,'login')]"))).click()
wait.until(EC.element_to_be_clickable((By.ID, "username"))).send_keys(my_email)
wait.until(EC.element_to_be_clickable((By.ID, "password"))).send_keys(my_password)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit']"))).click()
search_input = wait.until(EC.element_to_be_clickable((By.XPATH, "//input[contains(#class,'search-global')]")))
search_input.click()
search_input.send_keys("python developer" + Keys.ENTER)
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="search-reusables__filters-bar"]/ul/li[1]/button'))).click()
wait = WebDriverWait(driver, 4)
while True:
try:
next_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "button.artdeco-pagination__button.artdeco-pagination__button--next")))
next_btn.location_once_scrolled_into_view
time.sleep(0.2)
next_btn.click()
break
except:
driver.execute_script("window.scrollBy(0, arguments[0]);", 600)

how can i open link in href with selenium python?

The link inside the href is constantly changing but the xpath of this href is always the same.
How can I click on the www.confirmationemail.com ?
<div dir="ltr">
<p>exampleTEXT.</p>
<p>www.confirmationemail.com</p>
<p>exampleTEXT.</p>
<p>exampleTEXT,</p>
<p>exampleTEXT</p>
</div>
This is the page I'm working on:https://www.minuteinbox.com/
The process is as follows: registering on a site with the e-mail received from here and receiving an e-mail, logging in to the e-mail, but I cannot click on the link in its content.
from selenium import webdriver
from time import sleep
import config2 as cf
from selenium.webdriver.support.select import Select
import selenium.webdriver.support.ui as ui
from selenium.webdriver.common.keys import Keys
from asyncio import sleep
import time
driver = webdriver.Chrome("C:\\ChromeDriver\\chromedriver.exe")
url = "https://www.minuteinbox.com/"
url2 = "EXAMPLE.COM"
driver.get(url)
element = driver.find_element_by_xpath("XPATH").text
print(element)
time.sleep(4)
driver.execute_script("window.open('');")
driver.switch_to.window(driver.window_handles[1])
driver.get(url2)
sec = driver.find_element_by_xpath("XPATH")
sec.click()
devam = driver.find_element_by_xpath("XPATH")
devam.click()
ad = driver.find_element_by_xpath("XPATH")
ad.send_keys("deneme")
soyad = driver.find_element_by_xpath("XPATH")
soyad.send_keys("test")
eMail = driver.find_element_by_css_selector("#user_email")
eMail.send_keys(element)
eMail2 = driver.find_element_by_css_selector("#user_email_confirmation")
eMail2.send_keys(element)
sifre = driver.find_element_by_css_selector("#user_password")
sifre.send_keys("PASS")
sifre2 = driver.find_element_by_css_selector("#user_password_confirmation")
sifre2.send_keys("PASS")
buton = driver.find_element_by_css_selector("SELECT")
buton.click()
hesapol = driver.find_element_by_css_selector("SELECT")
hesapol.click()
sleep(2)
driver.switch_to.window(driver.window_handles[0])
time.sleep(7)
bas = driver.find_element_by_css_selector("#schranka > tr:nth-child(1)")
bas.click()
time.sleep(1)
time.sleep(1)
SD = driver.switch_to.frame(driver.find_element_by_css_selector("iframe#iframeMail"))
time.sleep(5)
SD = driver.find_element_by_xpath("//a[contains(#href,'minuteinbox')]").click
driver.switch_to.default_content()
sd = I put this just to be able to write it in the code section
SOLVED
İMPORTS
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
bas = driver.find_element_by_css_selector("#schranka > tr:nth-child(1)")
bas.click()
time.sleep(3)
wait = WebDriverWait(driver, 10)
wait.until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, "iframe[id='iframeMail']")))
print(driver.page_source)
link = driver.find_element_by_xpath("/html/body/div/p[2]/a")
link.click()
As you mentioned, the XPath of that element is constant.
So you can get that element based the constant XPath locator and click it.
Something like this:
driver.find_element_by_xpath('the_constant_xpath').click()
UPD
The element you want to be clicked can be located by XPath.
However, it is inside an iframe, so in order to access it you will have to switch to that iframe first.
I have also made your locators better.
So your code could be something like this:
driver.switch_to.window(driver.window_handles[0])
time.sleep(5)
bas = driver.find_element_by_css_selector("td.from")
bas.click()
time.sleep(1)
driver.switch_to.frame(driver.find_element_by_css_selector("iframe#iframeMail"))
driver.find_element_by_xpath("//a[contains(#href,'minuteinbox')]").click
When you finished working inside the iframe you will have to get out to the default content with
driver.switch_to.default_content()

Python Selenium Crawler go into element and get details

I'm trying to get details of all properties from the following website which has properties listed as elements:
https://www.altamirarealestate.com.cy/results/for-sale/flats/cyprus/35p009679979327046l33p17435142059772z9
I'm using Selenium in Python to scrape the elements' details but as soon as I go to the element I cannot click on its link to open it to a new page and get the necessary information. Code below:
from selenium.webdriver.common.keys import Keys
import webbrowser
import random
import time
import selenium.webdriver.support.ui as ui
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.select import Select
import csv
from csv import writer
from selenium.common.exceptions import ElementNotVisibleException, WebDriverException, NoSuchElementException
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
Link = 'https://www.altamirarealestate.com.cy/results/for-sale/flats/cyprus/35p009679979327046l33p17435142059772z9'
# MAIN
driver = webdriver.Chrome()
driver.maximize_window()
#Go to link
driver.get(Link)
#Accept cookies
time.sleep(2)
driver.find_element_by_xpath('//*[#id="onetrust-accept-btn-handler"]').click()
time.sleep(2)
#Load everything
while True:
try:
driver.find_element_by_xpath("//*[contains(#value,'View more')]").click()
time.sleep(3)
except Exception as no_more_properties:
print('all properties expanded: ', no_more_properties)
break
#Get properties
properties_list=driver.find_elements_by_xpath('//*[#class="minificha "]')
print (len(properties_list))#25
time.sleep(2)
#Get each property link
property_url=set()
properties_details=[]
main_window_handle = driver.current_window_handle
for i in range(0,len(properties_list)):
driver.switch_to_window(main_window_handle)
property = properties_list[i]
property_link = property.find_element_by_xpath('//a[#href="'+url+'"]')
property_link.click()
time.sleep(2)
#Switch to property window
window_after = driver.window_handles[1]
driver.switch_to.window(window_after)
#Get number of properties
number_of_flats=driver.find_elements_by_xpath('//[#class="lineainmu "]')
print(len(number_of_flats))
time.sleep(2)
currentWindow = driver.current_window_handle
for j in range(0,len(number_of_flats)):
driver.switch_to_window(currentWindow)
flat= number_of_flats[j]
flat.click()
time.sleep(2)
#Switch to flat window
window_after = driver.window_handles[1]
driver.switch_to.window(window_after)
When we click on a link on first page, it will open a new tab. In selenium in these type of cases we should switch the focus to new windows and then we can interact with web elements on the newly open page.
Once the task is done, it's important to close the tab and then switch back to original content.
This may lead to stale element reference, if we do not defined the web elements in loop again.
Code :
driver = webdriver.Chrome(driver_path)
driver.maximize_window()
driver.implicitly_wait(30)
wait = WebDriverWait(driver, 30)
driver.get("https://www.altamirarealestate.com.cy/results/for-sale/flats/cyprus/35p009679979327046l33p17435142059772z9")
try:
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "button#onetrust-accept-btn-handler"))).click()
except:
pass
size = driver.find_elements(By.XPATH, "//div[#class='slick-list draggable']")
j = 1
org_windows_handle = driver.current_window_handle
for i in range(len(size)):
ele = driver.find_element(By.XPATH, f"(//div[#class='slick-list draggable'])[{j}]")
driver.execute_script("arguments[0].scrollIntoView(true);", ele)
ele.click()
all_handles = driver.window_handles
driver.switch_to.window(all_handles[1])
try:
name = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "p#tituloFiltroTipo"))).text
print(name)
except:
pass
try:
price = wait.until(EC.visibility_of_element_located((By.ID, "soloPrecio"))).text
print(price)
except:
pass
driver.close()
driver.switch_to.window(org_windows_handle)
j = j + 1
Imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Output :
Flats - Egkomi, Nicosia
310,000
Flat - Strovolos, Nicosia
115,000
Flat - Agios Dometios, Nicosia
185,000
Flats - Aglantzia, Nicosia
765,000
Flat - Kaimakli, Nicosia
170,000
Flat - Kaimakli, Nicosia
280,000
Flat - Kaimakli, Nicosia
130,000
Flat - Germasogia, Limassol
410,000
Flat - Germasogeia, Limassol
285,000
Flat - Petrou & Pavlou, Limassol
230,000
Mixing implicit with explicit is not recommended. But in few cases like this where we are using find_element and explicit wait, does not do any harm. Please comment implicit wait line, and run the code. If it fails please uncomment and then try again.

Selenium cannot find element in webpage

I am having some trouble trying to automate some web inputs, but first i need to click some buttons and i cannot do it. I've tried a lot of stuff but i cannot complete it :'(
webpage: https://vacunacovid.catsalut.gencat.cat/
I cannot go past the image the code i have:
rom selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
# browser config
options = webdriver.ChromeOptions()
options.add_argument('--start-maximized')
options.add_argument('--disable-extensions')
driver_path= "/C:/chromedriver_linux64/chromedriver" # este es el driver del navegador https://chromedriver.chromium.org/
driver = webdriver.Chrome(driver_path,chrome_options=options)
#starting screen in optimal position
driver.set_window_position(2000,0)
driver.maximize_window()
time.sleep(1)
#getting website
driver.get("https://vacunacovid.catsalut.gencat.cat/")
#go to the element if its clickable
WebDriverWait(driver, 5)\
.until(EC.element_to_be_clickable((By.XPATH, BLABLABLA)))\
.click()
it doesn't find anything throwugh this #shadow (open) , how can i do it?
https://i.stack.imgur.com/it2nQ.png
It is under Shadow-dom #shadow-root (open) So you have not mentioned exactly which button you want to click, so I'm clicking on the first button Demana o modifica cita
#adding some wait for application to load properly
sleep(5)
You just take the JS path of the desire element as below and return the element for that
press F12->Element Tab -> right click(on the element)->copy JS path
javascript = 'return document.querySelector("body > vaccinapp-app").shadowRoot.querySelector("#pages > vaccinapp-shell").shadowRoot.querySelector("#main-shell-content > appointment-shell").shadowRoot.querySelector("#appointment-shell-content > appointment-onboarding").shadowRoot.querySelector("#dismiss-btn").shadowRoot.querySelector("#button")'
By using execute_script will access the element under shadow-root (open)
element = driver.execute_script(javascript)
element.click()
code
options = webdriver.ChromeOptions()
options.add_argument('--start-maximized')
options.add_argument('--disable-extensions')
driver_path= "/C:/chromedriver_linux64/chromedriver"
driver = webdriver.Chrome(driver_path,chrome_options=options)
#starting screen in optimal position
driver.set_window_position(2000,0)
driver.maximize_window()
time.sleep(1)
#getting website
driver.get("https://vacunacovid.catsalut.gencat.cat/")
sleep(5)
javascript = 'return document.querySelector("body > vaccinapp-app").shadowRoot.querySelector("#pages > vaccinapp-shell").shadowRoot.querySelector("#main-shell-content > appointment-shell").shadowRoot.querySelector("#appointment-shell-content > appointment-onboarding").shadowRoot.querySelector("#dismiss-btn").shadowRoot.querySelector("#button")'
element = driver.execute_script(javascript)
element.click()
For reference check here

Python Selenium click load more on table

I am trying to get the whole data of this table. However, in the last row there is "Load More" table row that I do not know how to load. So far I have tried different approaches that did not work,
I tried to click on the row itself by this:
from selenium import webdriver
driver = webdriver.Chrome()
driver.get(url)
soup = BeautifulSoup(driver.page_source, 'html.parser')
table = soup.find('table', {"class": "competition-leaderboard__table"})
i = 0
for team in table.find.all('tbody'):
rows = team.find_all('tr')
for row in rows:
i = i + 1
if (i == 51):
row.click()
//the scraping code for the first 50 elements
The code above throws an error saying that "'NoneType' object is not callable".
Another thing that I have tried that did not work is the following:
I tried to get the load more table row by its' class and click on it.
from selenium import webdriver
driver = webdriver.Chrome()
driver.get(url)
load_more = driver.find_element_by_class_name('competition-leaderboard__load-more-wrapper')
load_more.click()
soup = BeautifulSoup(driver.page_source, 'html.parser')
The code above also did not work.
So my question is how can I make python click on the "Load More" table row as in the HTML structure of the site it seems like "Load More" is not a button that is clickable.
In your code you have to accept cookies first, and then you can click 'Load more' button.
CSS selectors are the most suitable in this case.
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome(executable_path='/snap/bin/chromium.chromedriver')
driver.implicitly_wait(10)
driver.get('https://www.kaggle.com/c/coleridgeinitiative-show-us-the-data/leaderboard')
wait = WebDriverWait(driver, 30)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".sc-pAyMl.dwWbEz .sc-AxiKw.kOAUSS>.sc-AxhCb.gsXzyw")))
cookies = driver.find_element_by_css_selector(".sc-pAyMl.dwWbEz .sc-AxiKw.kOAUSS>.sc-AxhCb.gsXzyw").click()
load_more = driver.find_element_by_css_selector(".competition-leaderboard__load-more-count").click()
time.sleep(10) # Added for you to make sure that both buttons were clicked
driver.close()
driver.quit()
I tested this snippet and it clicked the desired button.
Note that I've added WebDriverWait in order to wait until the first button is clickable.
UPDATE:
I added time.sleep(10) so you could see that both buttons are clicked.

Categories

Resources