selenium clicking a href button - python

I am really new to selenium.
Currently, I am trying to use both selenium and beautifulsoup to do some webcrawling. The website that I am webcrawling on is https://bigd.big.ac.cn/dogsdv2/pages/modules/indsnp/indsnp_search.jsp.
this is the code that I have for now.
driver = webdriver.Chrome(executable_path=path_to_chromebrowser)
driver.get("https://bigd.big.ac.cn/dogsdv2/pages/modules/indsnp/indsnp_search.jsp")
input_area = driver.find_element_by_name("searchForm.genename")
input_area.send_keys("P2RY12")
searcher = driver.find_element_by_class_name("button")
searcher.click()
# table = driver.find_element_by_class_name("table7 table7-border")
# table.find_element_by_tag_name("a").click()
I am trying to click the first SNP ID that comes up, upon search. What would be the good way for me to click the href of the search result?

ON the webpage https://bigd.big.ac.cn/dogsdv2/pages/modules/indsnp/indsnp_search.jsp to search for the Gene Name as P2RY12 and click the first SNP ID that comes up upon search you need to induce WebDriverWait for the element_to_be_clickable() and you can use the following Locator Strategies:
Code Block:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options, executable_path=r'C:\WebDrivers\chromedriver.exe')
driver.get('https://bigd.big.ac.cn/dogsdv2/pages/modules/indsnp/indsnp_search.jsp')
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input#idgname[name='searchForm.genename']"))).send_keys("P2RY12")
driver.find_element_by_css_selector("button.button[type='submit']").click()
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "form[action^='/dogsdv2/com/exportFile'] table>tbody>tr td:nth-child(3)>a"))).click()
Browser Snapshot:

Try this:
firstsnpID = driver.find_element_by_xpath("(.//table[#class='table7 table7-border']/tbody/tr/td[3]/a)[1]")
firstsnpID.click()
you can not use compound classes to locate element using find_element_by_class_name

driver.find_element_by_xpath('/html/body/div/div[2]/div[2]/form/table/tbody/tr[1]/td[3]/a[1]').click()
If you need other ids:
for id in range(1,10):
driver.find_element_by_xpath('/html/body/div/div[2]/div[2]/form/table/tbody/tr[{}]/td[3]/a[1]'.format(id)).click()
sleep(5)
driver.back()

To click on first link on the table induce WebDriverWait() and element_to_be_clickable() and following CSS selector.
Code:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome(executable_path=path_to_chromebrowser)
driver.get("https://bigd.big.ac.cn/dogsdv2/pages/modules/indsnp/indsnp_search.jsp")
input_area = driver.find_element_by_name("searchForm.genename")
input_area.send_keys("P2RY12")
searcher = driver.find_element_by_class_name("button")
searcher.click()
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,"table.table7.table7-border td>a[href^='/dogsdv2/refsnp/showRefSNPDetail']"))).click()
To get all the link induce WebDriverWait() and visibility_of_all_elements_located() and get the href value then iterate each url
allelemets=WebDriverWait(driver,10).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR,"table.table7.table7-border td>a[href^='/dogsdv2/refsnp/showRefSNPDetail']")))
allurls=[item.get_attribute('href') for item in allelemets]
print(allurls)
for link in allurls:
driver.get(link)

Related

Can't find out xpath with selenium in jobsite.co.uk website

I want to find out the "Accept All" button xpath for click accept cookies.
Code trials:
from ast import Pass
import time
from selenium import webdriver
driver = driver = webdriver.Chrome(executable_path=r'C:\Users\Nahid\Desktop\Python_code\Jobsite\chromedriver.exe') # Optional argument, if not specified will search path.
driver.get('http://jobsite.co.uk/')
driver.maximize_window()
time.sleep(1)
#find out XPath in div tag but there has another span tag
cookie = driver.find_element_by_xpath('//div[#class="privacy-prompt-button primary-button ccmgt_accept_button "]/span')
cookie.click()
The desired element:
<div id="ccmgt_explicit_accept" class="privacy-prompt-button primary-button ccmgt_accept_button ">
<span>Accept All</span>
</div>
is a <span> tag having an ancestor <div>.
Solution
To click on the clickable element you need to induce WebDriverWait for the element_to_be_clickable() and you can use either of the following locator strategies:
Using CSS_SELECTOR:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div.privacy-prompt-button.primary-button.ccmgt_accept_button>span"))).click()
Using XPATH:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//span[text()='Accept All']"))).click()
Note: You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Your XPath looks correct but if can be improved.
Also you should use WebDriverWait expected conditions instead of hardcoded sleeps.
As following:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("--start-maximized")
s = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=s)
url = 'http://jobsite.co.uk/'
wait = WebDriverWait(driver, 10)
driver.get(url)
wait.until(EC.element_to_be_clickable((By.ID, "ccmgt_explicit_accept"))).click()

Can't Find Element Inside iframe

I want to get the data-sitekey, but it is inside the iframe.
I only can get the element in class="container", can't find the element insde of it.
How can I get the data-sitekey?
driver.get(url)
driver.switch_to.frame("main-iframe")
container= driver.find_element(By.CLASS_NAME, 'container')
print(container)
time.sleep(2)
captcha = driver.find_element(By.CLASS_NAME, 'g-recaptcha')
print(captcha)
The reCAPTCHA element is within an <iframe>
Solution
To extract the value of the data-sitekey attribute you have to:
Induce WebDriverWait for the desired frame to be available and switch to it.
Induce WebDriverWait for the visibility_of_element_located.
You can use either of the following locator strategies:
Using CSS_SELECTOR:
WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"iframe#main-iframe")))
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.g-recaptcha"))).get_attribute("data-sitekey"))
Using XPATH:
WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.XPATH,"//iframe[#id='main-iframe']"))).get_attribute("data-sitekey"))
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//div[#class='g-recaptcha']")))
Note : You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
This is how you get that information:
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options as Firefox_Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support import expected_conditions as EC
import time as t
firefox_options = Firefox_Options()
# firefox_options.add_argument("--width=1280")
# firefox_options.add_argument("--height=720")
# firefox_options.headless = True
driverService = Service('chromedriver/geckodriver')
browser = webdriver.Firefox(service=driverService, options=firefox_options)
url = 'https://premier.hkticketing.com/'
browser.get(url)
t.sleep(5)
WebDriverWait(browser, 20).until(EC.frame_to_be_available_and_switch_to_it((By.XPATH, "//*[#id='main-iframe']")))
print('switched')
t.sleep(5)
element_x = WebDriverWait(browser, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[#class='g-recaptcha']")) )
print(element_x.get_attribute('data-sitekey'))
Result printed in terminal:
switched
6Ld38BkUAAAAAPATwit3FXvga1PI6iVTb6zgXw62
Setup is for linux/Firefox/geckodriver, but you can adapt it to your own system, just mind the imports, and the code after defining the browser.
Selenium docs: https://www.selenium.dev/documentation/

How to get the result value of the URL shorten page?

I am trying to get automatically the result of the URL shorten.
This is the page what I am using: url shortener site
This is the code I made (URLS list contains links):
driver.get("http://paylinx.pw/linx/")
for i in URLS:
driver.find_element_by_xpath('//*[#id="url"]').click()
time.sleep(2)
driver.find_element_by_xpath('//*[#id="url"]').send_keys(i)
time.sleep(2)
driver.find_element_by_xpath('//*[#id="invisibleCaptchaShort"]').click()
time.sleep(2)
After this I get the shortened url. I would need a little help to get it somehow.
Use WebDriverWait to wait for short url result and get the value.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 10)
with driver:
driver.get("http://paylinx.pw/linx/")
for url in URLS:
driver.find_element_by_id("url").send_keys(url, Keys.ENTER)
short_url = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, ".add-link-result .input-lg"))).get_attribute("value")
print(short_url, url)
You need use page.source, because it translate you page on need code, for chromedriver, some like lifehuck )) or you can use get_attribute('innerHTML') - you can access everything from the page.
Python WebDriver how to print whole page source (html)
To extract the result value of the URL shortener automatically you need to induce WebDriverWait for the visibility_of_element_located() and you can use the following Locator Strategies:
Code Block:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
URLS = ['https://selenium.dev/downloads/','https://selenium.dev/documentation/en/']
for i in URLS:
driver = webdriver.Chrome(options=options, executable_path=r'C:\WebDrivers\chromedriver.exe')
driver.get("http://paylinx.pw/linx/")
element = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input#url")))
element.clear()
element.send_keys(i)
driver.find_element_by_css_selector("button.btn-captcha#invisibleCaptchaShort").click()
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.input-group>input.form-control.input-lg"))).get_attribute("value"))
driver.quit()
Console Output:
http://paylinx.pw/linx/Uksheqw8
http://paylinx.pw/linx/s0DA44C

Time out exception with WebDriverWait despite faster internet and element present

I am trying to scrape this:
https://www.lanebryant.com/chiffon-faux-wrap-fit-flare-midi-dress/prd-355958#color/0000091393
And this is my code:
wait = WebDriverWait(d, 10)
close = wait.until(EC.element_to_be_clickable((By.XPATH, "//a[#id='closeButton']")))
close.click()
time.sleep(5)
chart = wait.until(EC.element_to_be_clickable((By.XPATH, "//div[contains(*,'Size Guide')][#class='size-chart-link']")))
chart.click()
It first closes the pop up and then clicks the size guide, However, it always gives timeout exception and works only a couple of times.
The PARTIAL_LINK_TEXT Size Guide is pretty much unique within the page so would be your best bet would be to:
Induce WebDriverWait for invisibility_of_element() for the wrapper element
Induce WebDriverWait for the element_to_be_clickable() for the desired element
You can use the following Locator Strategy:
Code Block (using XPATH and PARTIAL_LINK_TEXT):
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
options.add_argument('start-maximized')
driver = webdriver.Chrome(options=options, executable_path=r'C:\WebDrivers\chromedriver.exe')
driver.get('https://www.lanebryant.com/chiffon-faux-wrap-fit-flare-midi-dress/prd-355958#color/0000091393')
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//a[#id='closeButton']"))).click()
WebDriverWait(driver, 20).until(EC.invisibility_of_element((By.XPATH, "//div[#id='tinymask']")))
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT, "Size Guide"))).click()
Code Block (using CSS_SELECTOR and PARTIAL_LINK_TEXT):
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
options.add_argument('start-maximized')
driver = webdriver.Chrome(options=options, executable_path=r'C:\WebDrivers\chromedriver.exe')
driver.get('https://www.lanebryant.com/chiffon-faux-wrap-fit-flare-midi-dress/prd-355958#color/0000091393')
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a#closeButton"))).click()
WebDriverWait(driver, 20).until(EC.invisibility_of_element((By.CSS_SELECTOR, "div#tinymask")))
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT, "Size Guide"))).click()
Browser Snapshot:
Use JavaScript Executor to click on the element.Seems like selenium webdriver unable to click on the element.Use the below xpath
d.get("https://www.lanebryant.com/chiffon-faux-wrap-fit-flare-midi-dress/prd-355958#color/0000091393")
wait = WebDriverWait(d, 10)
close = wait.until(EC.element_to_be_clickable((By.XPATH, "//a[#id='closeButton']")))
close.click()
chart = wait.until(EC.element_to_be_clickable((By.XPATH, "//div[#class='size-chart-link']/a[contains(.,'Size Guide')]")))
d.execute_script("arguments[0].click();", chart)
Browser snapshot:

Selenium unable to click on elements while accessing a webpage using Selenium and Python

I'm trying to scrape this page. Before getting into the page listings, a Select Location window pops up, so I'm trying to tell selenium to click two buttons in order to access the product listings.
Problem is, Selenium is not able to locate the xpath I'm using to locate this two buttons!
Here's my code:
from selenium import webdriver
driver = webdriver.Chrome("webdriver/chromedriver.exe")
driver.implicitly_wait(30)
driver.get("https://www.indiacashandcarry.com/shop/HomestyleFood")
locationButton = driver.find_element_by_xpath('//*[#id="location-list"]/li[1]/h4/a')
groceriesButton = driver.find_element_by_xpath('//*[#id="price-list-0"]/ul/li[1]')
locationButton.click()
groceriesButton.click()
Here's the site:
https://www.indiacashandcarry.com/shop/HomestyleFood
I'm thinking it is because this popup is on other type of frame, but I couldn't find any iframe index, so I'm a bit lost. Please help!
Your xpath looks fine.Use Webdriverwait to handle dynamic element.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome("webdriver/chromedriver.exe")
driver.get("https://www.indiacashandcarry.com/shop/HomestyleFood")
WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,'//*[#id="location-list"]/li[1]/h4/a'))).click()
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,'//*[#id="price-list-0"]/ul/li[1]'))).click()
On the website https://www.indiacashandcarry.com/shop/HomestyleFood first to click() on Select This Location associated with FREMONT and then click() on Groceries you need to induce WebDriverWait for the element_to_be_clickable() and you can use the following solution:
Code Block:
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
# options.add_argument('disable-infobars')
options.add_argument('--disable-extensions')
driver=webdriver.Chrome(chrome_options=options, executable_path=r'C:\Utility\BrowserDrivers\chromedriver.exe')
driver.get("https://www.indiacashandcarry.com/shop/HomestyleFood")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//h4[contains(., 'Fremont')]/a"))).click()
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//h5[#class='mtopbot5 ng-binding' and contains(., 'Groceries')]"))).click()
Browser Snapshot:

Categories

Resources