how can i open link in href with selenium python? - python

The link inside the href is constantly changing but the xpath of this href is always the same.
How can I click on the www.confirmationemail.com ?
<div dir="ltr">
<p>exampleTEXT.</p>
<p>www.confirmationemail.com</p>
<p>exampleTEXT.</p>
<p>exampleTEXT,</p>
<p>exampleTEXT</p>
</div>
This is the page I'm working on:https://www.minuteinbox.com/
The process is as follows: registering on a site with the e-mail received from here and receiving an e-mail, logging in to the e-mail, but I cannot click on the link in its content.
from selenium import webdriver
from time import sleep
import config2 as cf
from selenium.webdriver.support.select import Select
import selenium.webdriver.support.ui as ui
from selenium.webdriver.common.keys import Keys
from asyncio import sleep
import time
driver = webdriver.Chrome("C:\\ChromeDriver\\chromedriver.exe")
url = "https://www.minuteinbox.com/"
url2 = "EXAMPLE.COM"
driver.get(url)
element = driver.find_element_by_xpath("XPATH").text
print(element)
time.sleep(4)
driver.execute_script("window.open('');")
driver.switch_to.window(driver.window_handles[1])
driver.get(url2)
sec = driver.find_element_by_xpath("XPATH")
sec.click()
devam = driver.find_element_by_xpath("XPATH")
devam.click()
ad = driver.find_element_by_xpath("XPATH")
ad.send_keys("deneme")
soyad = driver.find_element_by_xpath("XPATH")
soyad.send_keys("test")
eMail = driver.find_element_by_css_selector("#user_email")
eMail.send_keys(element)
eMail2 = driver.find_element_by_css_selector("#user_email_confirmation")
eMail2.send_keys(element)
sifre = driver.find_element_by_css_selector("#user_password")
sifre.send_keys("PASS")
sifre2 = driver.find_element_by_css_selector("#user_password_confirmation")
sifre2.send_keys("PASS")
buton = driver.find_element_by_css_selector("SELECT")
buton.click()
hesapol = driver.find_element_by_css_selector("SELECT")
hesapol.click()
sleep(2)
driver.switch_to.window(driver.window_handles[0])
time.sleep(7)
bas = driver.find_element_by_css_selector("#schranka > tr:nth-child(1)")
bas.click()
time.sleep(1)
time.sleep(1)
SD = driver.switch_to.frame(driver.find_element_by_css_selector("iframe#iframeMail"))
time.sleep(5)
SD = driver.find_element_by_xpath("//a[contains(#href,'minuteinbox')]").click
driver.switch_to.default_content()
sd = I put this just to be able to write it in the code section
SOLVED
İMPORTS
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
bas = driver.find_element_by_css_selector("#schranka > tr:nth-child(1)")
bas.click()
time.sleep(3)
wait = WebDriverWait(driver, 10)
wait.until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, "iframe[id='iframeMail']")))
print(driver.page_source)
link = driver.find_element_by_xpath("/html/body/div/p[2]/a")
link.click()

As you mentioned, the XPath of that element is constant.
So you can get that element based the constant XPath locator and click it.
Something like this:
driver.find_element_by_xpath('the_constant_xpath').click()
UPD
The element you want to be clicked can be located by XPath.
However, it is inside an iframe, so in order to access it you will have to switch to that iframe first.
I have also made your locators better.
So your code could be something like this:
driver.switch_to.window(driver.window_handles[0])
time.sleep(5)
bas = driver.find_element_by_css_selector("td.from")
bas.click()
time.sleep(1)
driver.switch_to.frame(driver.find_element_by_css_selector("iframe#iframeMail"))
driver.find_element_by_xpath("//a[contains(#href,'minuteinbox')]").click
When you finished working inside the iframe you will have to get out to the default content with
driver.switch_to.default_content()

Related

How to scrape Next button on Linkedin with Selenium using Python?

I am trying to scrape LinkedIn website using Selenium. I can't parse Next button. It resists as much as it can. I've spent a half of a day to adress this, but all in vain.
I tried absolutely various options, with text and so on. Only work with start ID but scrape other button.
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//button[#aria-label='Далее']"}
This is quite common for this site:
//*[starts-with(#id,'e')]
My code:
from selenium import webdriver
from selenium.webdriver import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from time import sleep
chrome_driver_path = Service("E:\programming\chromedriver_win32\chromedriver.exe")
driver = webdriver.Chrome(service=chrome_driver_path)
url = "https://www.linkedin.com/feed/"
driver.get(url)
SEARCH_QUERY = "python developer"
LOGIN = "EMAIL"
PASSWORD = "PASSWORD"
sleep(10)
sign_in_link = driver.find_element(By.XPATH, '/html/body/div[1]/main/p[1]/a')
sign_in_link.click()
login_input = driver.find_element(By.XPATH, '//*[#id="username"]')
login_input.send_keys(LOGIN)
sleep(1)
password_input = driver.find_element(By.XPATH, '//*[#id="password"]')
password_input.send_keys(PASSWORD)
sleep(1)
enter_button = driver.find_element(By.XPATH, '//*[#id="organic-div"]/form/div[3]/button')
enter_button.click()
sleep(25)
lens_button = driver.find_element(By.XPATH, '//*[#id="global-nav-search"]/div/button')
lens_button.click()
sleep(5)
search_input = driver.find_element(By.XPATH, '//*[#id="global-nav-typeahead"]/input')
search_input.send_keys(SEARCH_QUERY)
search_input.send_keys(Keys.ENTER)
sleep(5)
people_button = driver.find_element(By.XPATH, '//*[#id="search-reusables__filters-bar"]/ul/li[1]/button')
people_button.click()
sleep(5)
page_button = driver.find_element(By.XPATH, "//button[#aria-label='Далее']")
page_button.click()
sleep(60)
Chrome inspection of button
Next Button
OK, there are several issues here:
The main problem why your code not worked is because the "next" pagination is initially even not created on the page until you scrolling the page, so I added the mechanism, to scroll the page until that button can be clicked.
it's not good to create locators based on local language texts.
You should use WebDriverWait expected_conditions explicit waits, not hardcoded pauses.
I used mixed locators types to show that sometimes it's better to use By.ID and sometimes By.XPATH etc.
the following code works:
import time
from selenium import webdriver
from selenium.webdriver import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
url = "https://www.linkedin.com/feed/"
driver.get(url)
wait.until(EC.element_to_be_clickable((By.XPATH, "//a[contains(#href,'login')]"))).click()
wait.until(EC.element_to_be_clickable((By.ID, "username"))).send_keys(my_email)
wait.until(EC.element_to_be_clickable((By.ID, "password"))).send_keys(my_password)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit']"))).click()
search_input = wait.until(EC.element_to_be_clickable((By.XPATH, "//input[contains(#class,'search-global')]")))
search_input.click()
search_input.send_keys("python developer" + Keys.ENTER)
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="search-reusables__filters-bar"]/ul/li[1]/button'))).click()
wait = WebDriverWait(driver, 4)
while True:
try:
next_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "button.artdeco-pagination__button.artdeco-pagination__button--next")))
next_btn.location_once_scrolled_into_view
time.sleep(0.2)
next_btn.click()
break
except:
driver.execute_script("window.scrollBy(0, arguments[0]);", 600)

iframe name changes each time the page is reloaded

i'm having a problem with my script in pyhton. Each time when the web page that im using opens the iframe src changes.
By.CSS_SELECTOR,"iframe#IFRAME_muVN"
(For example de code above is the current iframe when i copy with css selector)
But when i enter to the web page again the name of that iframe changes.
#IFRAME_6Eh8
is there a way to manage that change of the iframe, because im allways getting timeout due to the webdriver wait and the iframe element is never gonna be found.
this is my code:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from datetime import date
import time
s=Service(r'C:\Users\dbayona\Webdriver\chromedriver.exe')
driver = webdriver.Chrome(service = s)
driver.maximize_window()
driver.get('http://190.109.11.66:8888/BOE/BI')
iframe = driver.find_element(By.NAME, "servletBridgeIframe")
driver.switch_to.frame(iframe)
username = driver.find_element(By.XPATH, '//*[#id="_id0:logon:USERNAME"]')
username.send_keys("*****")
password = driver.find_element(By.XPATH, '//*[#id="_id0:logon:PASSWORD"]')
password.send_keys("*****")
login = driver.find_element(By.XPATH, '//*[#id="_id0:logon:logonButton"]')
login.click()
iframe =WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"iframe#iframeHome-122668")))
driver.implicitly_wait(15)
dropdown = driver.find_element(By.XPATH, '//*[#id="header3_5"]')
dropdown.click()
storesense = driver.find_element(By.XPATH, '//*[#id="li_item_0_4_1"]')
driver.implicitly_wait(5)
storesense.click()
iframe = WebDriverWait(driver, 120).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"iframe#IFRAME_muVN")))
date = driver.find_element(By.XPATH, '//*[#id="date-picker"]')
today = date.today()
d1 = today.strftime("%d/%m/%Y")
date.send_keys(d1)
driver.switch_to.default_content()
Code where im trying to find the iframe:
iframe = WebDriverWait(driver, 120).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"iframe#IFRAME_muVN")))
image of the iframe:
Iframe of the page
Since the element is dynamic try the following css selector. ^ means startswith
iframe = WebDriverWait(driver, 120).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"iframe[id^='IFRAME_']")))
try to find by tag name <iframe> but make sure if you not have multiple tag name like this inside the element that its related to

Selenium only finding some, but not all, related elements

I'm attempting to use Selenium to print out a list of users I'm subscribed to on a website, of which there are 3. The following code only prints out the first 2 of the three
from xml.dom.minidom import Element
from selenium import webdriver
from selenium.webdriver.support import wait
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService;
from webdriver_manager.chrome import ChromeDriverManager;
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
from selenium.webdriver.common.actions.action_builder import ActionBuilder
from selenium.webdriver.common.actions.mouse_button import MouseButton
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()));
#Chrome client should now start up
driver.get("website-url"); #Go to desired website
title = driver.title;
if title == "WebsiteTitle":
driver.implicitly_wait(5)
email_box = driver.find_element(By.NAME, "email"); #finds username block
pass_box = driver.find_element(By.NAME, "password"); #finds password block
email_box.send_keys('username'); #enter username
pass_box.send_keys('password'); #enter password
submit_button = driver.find_element(By.CSS_SELECTOR, "button[type='submit']"); #finds submit button
submit_button.click(); #clicks it
wait = WebDriverWait(driver, 15);
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[#id='content']/div[1]/div/div/div/div[1]/h1/span"))); #waits 15sec until the "Home" button on the home page can be clicked
def print_subs_list(): #function to print list of users you are subscribed to
driver.get("website-subs-url");
subscriptions_list = driver.find_elements(By.XPATH, "//*[#id='content']/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div[2]/div/div[1]/a/div")
for value in subscriptions_list:
print(value.text)
print_subs_list()
driver.quit()
Now, changing the XPath in subscriptions_list to //*[#id='content']/div[1]/div/div[4]/div/div/div[2]/div/div[2]/div/div[2]/div/div[1]/a/div will print out the 3rd result only.
However, my desired result would be to print all of the subscribed users, as there will definitely be more than 3.
How do I change it so that it will print out all of the subscribed users, regardless of the amount?
You identify all the three desired elements using the xpath:
//*[#id='content']/div[1]/div/div[4]/div/div//div/div/div[2]/div/div[2]/div/div[1]/a/div
Your effective line of code will be:
subscriptions_list = driver.find_elements(By.XPATH, "//*[#id='content']/div[1]/div/div[4]/div/div//div/div/div[2]/div/div[2]/div/div[1]/a/div")

Python Selenium iterate table of links clicking each link

So this question has been asked before but I am still struggling to get it working.
The webpage has a table with links, I want to iterate through clicking each of the links.
So this is my code so far
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome(executable_path=r'C:\Users\my_path\chromedriver_96.exe')
driver.get(r"https://www.fidelity.co.uk/shares/ftse-350/")
try:
element = WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.CLASS_NAME, "table-scroll")))
table = element.find_elements_by_xpath("//table//tbody/tr")
for row in table[1:]:
print(row.get_attribute('innerHTML'))
# link.click()
finally:
driver.close()
Sample of output
<td>FOUR</td>
<td>4imprint Group plc</td>
<td>Media & Publishing</td>
<td>888</td>
<td>888 Holdings</td>
<td>Hotels & Entertainment Services</td>
<td>ASL</td>
<td>Aberforth Smaller Companies Trust</td>
<td>Collective Investments</td>
How do a click the href and iterate to the next href?
Many thanks.
edit
I went with this solution (a few small tweaks on Prophet's solution)
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
from selenium.webdriver.common.action_chains import ActionChains
driver = webdriver.Chrome(executable_path=r'C:\Users\my_path\chromedriver_96.exe')
driver.get(r"https://www.fidelity.co.uk/shares/ftse-350/")
actions = ActionChains(driver)
#close the cookies banner
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.ID, "ensCloseBanner"))).click()
#wait for the first link in the table
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//table//tbody/tr/td/a")))
#extra wait to make all the links loaded
time.sleep(1)
#get the total links amount
links = driver.find_elements_by_xpath('//table//tbody/tr/td/a')
for index, val in enumerate(links):
try:
#get the links again after getting back to the initial page in the loop
links = driver.find_elements_by_xpath('//table//tbody/tr/td/a')
#scroll to the n-th link, it may be out of the initially visible area
actions.move_to_element(links[index]).perform()
links[index].click()
#scrape the data on the new page and get back with the following command
driver.execute_script("window.history.go(-1)") #you can alternatevely use this as well: driver.back()
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//table//tbody/tr/td/a")))
time.sleep(2)
except StaleElementReferenceException:
pass
To perform what you want to do here you first need to close cookies banner on the bottom of the page.
Then you can iterate over the links in the table.
Since by clicking on each link you are opening a new page, after scaring the data there you will have to get back to the main page and get the next link. You can not just get all the links into some list and then iterate over that list since by navigating to another web page all the existing elements grabbed by Selenium on the initial page become Stale.
Your code can be something like this:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
driver = webdriver.Chrome(executable_path=r'C:\Users\my_path\chromedriver_96.exe')
driver.get(r"https://www.fidelity.co.uk/shares/ftse-350/")
actions = ActionChains(driver)
#close the cookies banner
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.ID, "ensCloseBanner"))).click()
#wait for the first link in the table
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//table//tbody/tr/td/a")))
#extra wait to make all the links loaded
time.sleep(1)
#get the total links amount
links = driver.find_elements_by_xpath('//table//tbody/tr/td/a')
for index, val in enumerate(links):
#get the links again after getting back to the initial page in the loop
links = driver.find_elements_by_xpath('//table//tbody/tr/td/a')
#scroll to the n-th link, it may be out of the initially visible area
actions.move_to_element(links[index]).perform()
links[index].click()
#scrape the data on the new page and get back with the following command
driver.execute_script("window.history.go(-1)") #you can alternatevely use this as well: driver.back()
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//table//tbody/tr/td/a")))
time.sleep(1)
You basically have to do the following:
Click on the cookies button if available
Get all the links on the page.
Iterate over the list of links and then click on the first (by first scrolling to the web element and doing that for the list item) and then navigate back to the original screen.
Code:
driver = webdriver.Chrome(driver_path)
driver.maximize_window()
wait = WebDriverWait(driver, 30)
driver.get("https://www.fidelity.co.uk/shares/ftse-350/")
try:
wait.until(EC.element_to_be_clickable((By.ID, "ensCloseBanner"))).click()
print('Click on the cookies button')
except:
print('Could not click on the cookies button')
pass
driver.execute_script("window.scrollTo(0, 750)")
try:
all_links = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//table//tbody/tr/td/a")))
print("We have got to deal with", len(all_links), 'links')
j = 0
for link in range(len(all_links)):
links = wait.until(EC.presence_of_all_elements_located((By.XPATH, f"//table//tbody/tr/td/a")))
driver.execute_script("arguments[0].scrollIntoView(true);", links[j])
time.sleep(1)
links[j].click()
# here write the code to scrape something once the click is performed
time.sleep(1)
driver.execute_script("window.history.go(-1)")
j = j + 1
print(j)
except:
print('Bot Could not exceute all the links properly')
pass
Import:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
PS to handle stale element reference you'd have to define the list of web elements again inside the loop.

Python Selenium Crawler go into element and get details

I'm trying to get details of all properties from the following website which has properties listed as elements:
https://www.altamirarealestate.com.cy/results/for-sale/flats/cyprus/35p009679979327046l33p17435142059772z9
I'm using Selenium in Python to scrape the elements' details but as soon as I go to the element I cannot click on its link to open it to a new page and get the necessary information. Code below:
from selenium.webdriver.common.keys import Keys
import webbrowser
import random
import time
import selenium.webdriver.support.ui as ui
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.select import Select
import csv
from csv import writer
from selenium.common.exceptions import ElementNotVisibleException, WebDriverException, NoSuchElementException
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
Link = 'https://www.altamirarealestate.com.cy/results/for-sale/flats/cyprus/35p009679979327046l33p17435142059772z9'
# MAIN
driver = webdriver.Chrome()
driver.maximize_window()
#Go to link
driver.get(Link)
#Accept cookies
time.sleep(2)
driver.find_element_by_xpath('//*[#id="onetrust-accept-btn-handler"]').click()
time.sleep(2)
#Load everything
while True:
try:
driver.find_element_by_xpath("//*[contains(#value,'View more')]").click()
time.sleep(3)
except Exception as no_more_properties:
print('all properties expanded: ', no_more_properties)
break
#Get properties
properties_list=driver.find_elements_by_xpath('//*[#class="minificha "]')
print (len(properties_list))#25
time.sleep(2)
#Get each property link
property_url=set()
properties_details=[]
main_window_handle = driver.current_window_handle
for i in range(0,len(properties_list)):
driver.switch_to_window(main_window_handle)
property = properties_list[i]
property_link = property.find_element_by_xpath('//a[#href="'+url+'"]')
property_link.click()
time.sleep(2)
#Switch to property window
window_after = driver.window_handles[1]
driver.switch_to.window(window_after)
#Get number of properties
number_of_flats=driver.find_elements_by_xpath('//[#class="lineainmu "]')
print(len(number_of_flats))
time.sleep(2)
currentWindow = driver.current_window_handle
for j in range(0,len(number_of_flats)):
driver.switch_to_window(currentWindow)
flat= number_of_flats[j]
flat.click()
time.sleep(2)
#Switch to flat window
window_after = driver.window_handles[1]
driver.switch_to.window(window_after)
When we click on a link on first page, it will open a new tab. In selenium in these type of cases we should switch the focus to new windows and then we can interact with web elements on the newly open page.
Once the task is done, it's important to close the tab and then switch back to original content.
This may lead to stale element reference, if we do not defined the web elements in loop again.
Code :
driver = webdriver.Chrome(driver_path)
driver.maximize_window()
driver.implicitly_wait(30)
wait = WebDriverWait(driver, 30)
driver.get("https://www.altamirarealestate.com.cy/results/for-sale/flats/cyprus/35p009679979327046l33p17435142059772z9")
try:
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "button#onetrust-accept-btn-handler"))).click()
except:
pass
size = driver.find_elements(By.XPATH, "//div[#class='slick-list draggable']")
j = 1
org_windows_handle = driver.current_window_handle
for i in range(len(size)):
ele = driver.find_element(By.XPATH, f"(//div[#class='slick-list draggable'])[{j}]")
driver.execute_script("arguments[0].scrollIntoView(true);", ele)
ele.click()
all_handles = driver.window_handles
driver.switch_to.window(all_handles[1])
try:
name = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "p#tituloFiltroTipo"))).text
print(name)
except:
pass
try:
price = wait.until(EC.visibility_of_element_located((By.ID, "soloPrecio"))).text
print(price)
except:
pass
driver.close()
driver.switch_to.window(org_windows_handle)
j = j + 1
Imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Output :
Flats - Egkomi, Nicosia
310,000
Flat - Strovolos, Nicosia
115,000
Flat - Agios Dometios, Nicosia
185,000
Flats - Aglantzia, Nicosia
765,000
Flat - Kaimakli, Nicosia
170,000
Flat - Kaimakli, Nicosia
280,000
Flat - Kaimakli, Nicosia
130,000
Flat - Germasogia, Limassol
410,000
Flat - Germasogeia, Limassol
285,000
Flat - Petrou & Pavlou, Limassol
230,000
Mixing implicit with explicit is not recommended. But in few cases like this where we are using find_element and explicit wait, does not do any harm. Please comment implicit wait line, and run the code. If it fails please uncomment and then try again.

Categories

Resources