Getting the element on page source but unable to locate using xpath - python

I am trying to find the element by using xpath but they are unable to locate. While when I am getting the page source using selenium they have the element and Also I have checked but the element are not in Iframe.
Here is my code:
from requests_html import HTMLSession
import pandas as pd
from fake_useragent import UserAgent
from requests_html import AsyncHTMLSession
from selenium import webdriver
from shutil import which
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
ua = UserAgent()
s = HTMLSession()
asession = AsyncHTMLSession()
url = 'https://ordiamond-frame-categoryembed-catid23621.jewelershowcase.com/search/results?query=124405'
try : User_Agent = str(ua.chrome)
except : pass
headers = {'User-Agent':User_Agent}
response = s.get(url, headers= headers)
print(response)
link = response.html.xpath('//a[#class="image logClick containerFix"]/#href')
if link:
p_url = "https://ordiamond-frame-categoryembed-catid23621.jewelershowcase.com" + (link[0])
chrome_path = which('chromedriver')
driver = webdriver.Chrome(executable_path=chrome_path)
driver.maximize_window()
driver.get(p_url)
time.sleep(20)
with open('data.html', 'w') as file:
file.write(str(driver.page_source))
print(driver.page_source)
driver.page_source
WebDriverWait(driver, 50).until(EC.visibility_of_element_located((By.XPATH, '(//h3[#class="description"])[2]')))
# time.sleep(16)
na = driver.find_element_by_xpath('(//h3[#class="description"])[2]')
print(na.text)
Hoping to get the solution. Thanks

If there are multiple matching nodes, Selenium will always fetch the first set if we are using find_element not find_elements. also same with webdriverwait.
driver = webdriver.Chrome(driver_path)
driver.maximize_window()
driver.implicitly_wait(30)
wait = WebDriverWait(driver, 30)
driver.get("https://ordiamond-frame-categoryembed-catid23621.jewelershowcase.com/search/results?query=124405")
product = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[#id='results']/descendant::a")))
product.click()
heading = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//h3[#class='description']")))
print(heading.text)
Imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC4
Output :
14K Yellow 9x7 mm Oval Engagement Ring Mounting

Consider the fact that you want the xpath of the link of the ring, here it is:
link = response.html.xpath('//*[#id='results']//a[1]')

Related

Selenium Web driver ( driver.find_element(By.XPATH, '')) IS NOT WORKING Python

https://www.espncricinfo.com/player/aamer-jamal-793441
This is the URL and here i am trying to access Full Name "Aamer Jamal". with the help of selenium web driver. But I dont know why it gives
NoSuchElementException
`the code is written below:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium_firefox import Firefox
import time
import pandas as pd
driver = webdriver.Firefox()
#Reach to the Landing page
driver.get('https://www.espncricinfo.com/player/aamer-jamal-793441')
driver.maximize_window()
time.sleep(25)
not_now = driver.find_element(By.ID, 'wzrk-cancel')
not_now.click()
fullname = driver.find_element(By.XPATH, '/html/body/div[1]/section/section/div[4]/div[2]/div/div[1]/div/div/div[1]/div[1]/span/h5')
print(fullname.text)`
Error :
NoSuchElementException: Message: Unable to locate element: /html/body/div[1]/section/section/div[4]/div[2]/div/div[1]/div/div/div[1]/div[1]/span
You have to use WebDriverWait expected_conditions explicit waits, not a long hardcoded pauses. You also have to learn how to create correct locators. Long absolute XPaths and CSS Selectors are extremely breakable. The following code works:
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 60)
actions = ActionChains(driver)
url = "https://www.espncricinfo.com/player/aamer-jamal-793441"
driver.get(url)
wait.until(EC.element_to_be_clickable((By.ID, 'wzrk-cancel')))
fullname = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, 'ds-text-title-l'))).text
print(fullname)
The output is:
Aamer Jamal

Scraping webpage with tabs that do not change url

I am trying to scrape Nasdaq webpage and have some issue with locating elements:
My code:
from selenium import webdriver
import time
import pandas as pd
driver.get('http://www.nasdaqomxnordic.com/shares/microsite?Instrument=CSE32679&symbol=ALK%20B&name=ALK-Abell%C3%B3%20B')
time.sleep(5)
btn_overview = driver.find_element_by_xpath('//*[#id="tabarea"]/section/nav/ul/li[2]/a')
btn_overview.click()
time.sleep(5)
employees = driver.find_element_by_xpath('//*[#id="CompanyProfile"]/div[6]')
After the last call, I receive the following error:
NoSuchElementException: no such element: Unable to locate element: {"method":"xpath","selector":"//*[#id="CompanyProfile"]/div[6]"}
Normally the problem would be in wrong 'xpath' but I tried several items, also by 'id'. I suspect that it has something to do with tabs (in my case navigating to "Overview"). Visually the webpage changes, but if for example, I scrape the table, it gets it from the first page:
table_test = pd.read_html(driver.page_source)[0]
What am I missing or doing wrong?
The overview page is under iframe
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
option = webdriver.ChromeOptions()
option.add_argument("start-maximized")
#chrome to stay open
option.add_experimental_option("detach", True)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=option)
driver.get('http://www.nasdaqomxnordic.com/shares/microsite?Instrument=CSE32679&symbol=ALK%20B&name=ALK-Abell%C3%B3%20B')
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//*[#id="tabarea"]/section/nav/ul/li[2]/a'))).click()
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//*[#id="cookieConsentOK"]'))).click()
WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"iframe#MorningstarIFrame")))
employees=WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, '//*[#id="CompanyProfile"]/div[6]'))).text.split()[1]
print(employees)
Output:
2,537
webdriverManager
You sure you need Selenium?
import requests
from bs4 import BeautifulSoup
url = 'http://lt.morningstar.com/gj8uge2g9k/stockreport/default.aspx'
payload = {
'SecurityToken': '0P0000A5LL]3]1]E0EXG$XCSE_3060'}
response = requests.get(url, params=payload)
soup = BeautifulSoup(response.text, 'html.parser')
employees = soup.find('h3', text='Employees').next_sibling.text
print(employees)
Output:
2,537

python using selenium webdriver mouser

I'm trying to open the Mouser website and use the search bar to send some data. Here's an example of the code but I can't get the right CSS selector. Thank you.
import time
from openpyxl import load_workbook
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver = webdriver.Chrome(executable_path='C:/Users/amuri/AppData/Local/Microsoft/WindowsApps/PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0/site-packages/chromedriver.exe')
driver.implicitly_wait(1)
url ='https://www.mouser.com/'
driver.get(url)
print(driver.title)
wait = WebDriverWait(driver, timeout=1)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#as-input-066 .form-control")))
elem = driver.find_element_by_css_selector("#as-input-066 .form-control")
elem.click()
elem.send_keys("myString")
Try the following css:
.form-control.headerSearchBox.search-input.js-search-autosuggest.as-input
xpath is even shorter:
//input[contains(#id,'as-input')]
Explanation: it looks at id that contains as-input
One more suggestion:
Change
wait = WebDriverWait(driver, timeout=1)
to
wait = WebDriverWait(driver, timeout=15)
1 second is too small timeout. It should be at least 10.

Not able to click the radio button using selenium webdriver in python

I'm having a problem clicking the Radio button for the Registered Projects on this site. It is not clicking with my code in selenium webdriver.
import urllib.request
from bs4 import BeautifulSoup
import os
from selenium import webdriver
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.keys import Keys
url = 'https://maharerait.mahaonline.gov.in'
chrome_path = r'C:/Users/User/AppData/Local/Programs/Python/Python36/Scripts/chromedriver.exe'
driver = webdriver.Chrome(executable_path=chrome_path)
driver.implicitly_wait(10)
driver.get(url)
soup=BeautifulSoup(driver.page_source, 'lxml')
link =driver.find_element_by_link_text("Search Project Details")
link.click()
driver.find_element_by_id("Promoter").click()
Use WebDriverWait and java Scripts Executor to click on the Registered Project radio button.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
url = 'https://maharerait.mahaonline.gov.in'
chrome_path = r'C:/Users/User/AppData/Local/Programs/Python/Python36/Scripts/chromedriver.exe'
driver = webdriver.Chrome(executable_path=chrome_path)
driver.get(url)
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH,"//div[#class='search-pro-details']//a[contains(.,'Search Project Details')]"))).click()
Registered_Project_radio= WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID,"Promoter")))
driver.execute_script("arguments[0].click();",Registered_Project_radio)
Browser snapshot:
WebDriverWait - An explicit wait is a code you define to wait for a certain condition to occur before proceeding further in the code.
import urllib.request
from bs4 import BeautifulSoup
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
url = 'https://maharerait.mahaonline.gov.in'
chrome_path = r'C:/Users/User/AppData/Local/Programs/Python/Python36/Scripts/chromedriver.exe'
driver = webdriver.Chrome(executable_path=chrome_path)
driver.get(url)
links = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CLASS_NAME,\
"search-pro-details")))
#Click on Search Project Details link
links.find_element_by_link_text("Search Project Details").click()
promoter_radio_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID,\
"Promoter")))
#select radio button
promoter_radio_button.send_keys(Keys.SPACE)
Try using this:
driver.get('https://maharerait.mahaonline.gov.in')
link =driver.find_element_by_link_text("Search Project Details")
link.click()
time.sleep(2)
radio_btn = driver.find_element_by_id("Promoter")
radio_btn.click()
time.sleep(5)
driver.close()

Python, Selenium, and Beautiful Soup for URL

I am trying to write a script using Selenium to access pastebin do a search and print out in text the URL results. I need the visible URL results and nothing else.
<div class="gs-bidi-start-align gs-visibleUrl gs-visibleUrl-long" dir="ltr" style="word-break:break-all;">pastebin.com/VYQTSbzY</div>
Current script is:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
browser = webdriver.Firefox()
browser.get('http://www.pastebin.com')
search = browser.find_element_by_name('q')
search.send_keys("test")
search.send_keys(Keys.RETURN)
soup=BeautifulSoup(browser.page_source)
for link in soup.find_all('a'):
print link.get('href',None),link.get_text()
You don't actually need BeautifulSoup. selenium itself is very powerful at locating element:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
browser = webdriver.Firefox()
browser.get('http://www.pastebin.com')
search = browser.find_element_by_name('q')
search.send_keys("test")
search.send_keys(Keys.RETURN)
# wait for results to appear
wait = WebDriverWait(browser, 10)
results = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.gsc-resultsbox-visible")))
# grab results
for link in results.find_elements_by_css_selector("a.gs-title"):
print link.get_attribute("href")
browser.close()
Prints:
http://pastebin.com/VYQTSbzY
http://pastebin.com/VYQTSbzY
http://pastebin.com/VAAQCjkj
...
http://pastebin.com/fVUejyRK
http://pastebin.com/fVUejyRK
Note the use of an Explicit Wait which helps to wait for the search results to appear.

Categories

Resources