Unable to print text from items with 'find_elements_by_xpath' - python

I'm starting web scraping and followed tutorials. Yet in this code I get a "nameError: name 'avail' is not defined". I guess it's really easy, but how could I fix this ? (Error is probably in the for loop at line 15 in avail = i.text())
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome('/Users/victorfichtner/Downloads/Chromedriver')
driver.get('https://www.myntra.com/smart-watches/boat/boat-unisex-black-storm-m-
smart-watch/13471916/buy')
a = driver.find_elements_by_xpath("//*[#class='pdp-add-to-bag pdp-button pdp-flex
pdp-center']")
for i in a :
avail = i.text()
driver.quit()
print(avail)

Things to be noted.
find_elements return a list, where as find_element return a single web element.
Xpath is brittle.
Use explicit waits for dynamic loading.
It is .text in Python not .text()
Sample code :
driver = webdriver.Chrome('/Users/victorfichtner/Downloads/Chromedriver')
driver.maximize_window()
driver.implicitly_wait(50)
driver.get('https://www.myntra.com/smart-watches/boat/boat-unisex-black-storm-m- smart-watch/13471916/buy')
a = driver.find_elements_by_xpath("//*[contains(#class,'pdp-add-to-bag pdp-button pdp-flex')]")
avail = ""
for i in a :
avail = i.text
driver.quit()
print(avail)
Output :
ADD TO BAG

Related

Cant get text while webscraping a site

I'm trying to get the part where it says Avvisami on this website: https://www.nike.com/it/launch/t/womens-air-jordan-3-sp-a-ma-maniere
to appear as a string on my code. Every time I try anything it doesn't work. This is the part of the code:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
options = webdriver.ChromeOptions()
options.add_argument(r'--user-data-dir=C:\Users\mainuser\AppData\Local\Google\Chrome\User Data')
options.add_argument('--profile-directory=Profile 1')
driver = webdriver.Chrome(options = options)
driver.get('https://www.nike.com/it/launch/t/womens-air-jordan-3-sp-a-ma-maniere')
instock = (driver.find_elements_by_class_name('ncss-btn-primary-dark btn-lg'))
print(instock)
and in that, this is the part I think I need to change:
instock = (driver.find_elements_by_class_name('ncss-btn-primary-dark btn-lg'))
print(in stock)
I've been trying to fix it for an hour or so but I just can't wrap my head around how.
instock = driver.find_element_by_css_selector(".ncss-btn-primary-dark.btn-lg").text
print(instock)
Multiple class names should be used with css selector and to grab the text just use .text and then place it in your variable.
You are trying to get a text from a list of elements. Iterate it and use .text:
elems = driver.find_elements_by_css_selector(".ncss-btn-primary-dark.btn-lg")
for el in elems:
print(el.text)
All of these elements are buttons.

how do I print the contents of a <cite>?

Im trying to pull the links with python that google provides and idk why it doesnt work.
the error :
AttributeError: 'WebElement' object has no attribute 'text'
my code :
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
input = input('what do you want to *scrape* ?\n>')
driver = webdriver.Opera()
driver.get('https://google.com')
print(driver.title)
search = driver.find_element_by_xpath('//*[#id="tsf"]/div[2]/div[1]/div[1]/div/div[2]/input')
search.send_keys(input)
search.send_keys(Keys.RETURN )
headers = driver.find_elements_by_tag_name('h3')
cites = driver.find_elements_by_tag_name('cite')
for cite in cites :
print(cite.text)
time.sleep(5)
print('done')
driver.quit()
I just ran your code and it worked fine after providing the path of the Operadriver executable
driver = webdriver.Opera(executable_path=r"C:\operadriver.exe)
Try to cite.getAtrribuet("innerHTML")
Or cite.getAttribute ("outterHTML")
Or excuter.executeScript("arguments[0].innerHTML",cite) or replace inner to outter

Having trouble in getting data using css selector/xpath in selenium

I'm trying to extract data from the link below using selenium via python:
www.oanda.com
But I'm getting an error that, "Unable to Locate an Element". In browser console i tried using this Css selector:
document.querySelector('div.position.short-position.style-scope.position-ratios-app')
This querySelector returns me the data for short percentage of 1st row in the browser console(for this test), but when i used this selector in the python script below it gives me an error that, "Unable to Locate element" or sometimes empty sctring.
Please suggest me solution if there's any.Will be grateful, thanks :)
# All Imports
import time
from selenium import webdriver
#will return driver
def getDriver():
driver = webdriver.Chrome()
time.sleep(3)
return driver
def getshortPercentages(driver):
shortPercentages = []
shortList = driver.find_elements_by_css_selector('div.position.short-position.style-scope.position-ratios-app')
for elem in shortList:
shortPercentages.append(elem.text)
return shortPercentages
def getData(url):
driver = getDriver()
driver.get(url)
time.sleep(5)
# pagesource = driver.page_source
# print("Page Source: ", pagesource)
shortList = getshortPercentages(driver)
print("Returned source from selector: ", shortList)
if __name__ == '__main__':
url = "https://www.oanda.com/forex-trading/analysis/open-position-ratios"
getData(url)
Required data is located inside an iframe, so you need to switch to iframe before handling elements:
driver.switch_to.frame(driver.find_element_by_class_name('position-ratios-iframe'))
Also note that data inside iframe is dynamic, so make sure that you're using Implicit/Explicit wait (using time.sleep(5) IMHO is not the best solution)

Web scraping in Selenium (Python) throws Element Not Found

I'm trying to scrape Chinese economic data from an official website, but I keep getting an Element Not Found exception on the last line here. I've scoured stackoverflow and have tried adding implicitly_wait and switching the problem line from xpath to ID, but nothing has worked. Any thoughts?
from selenium import webdriver
FAI = []
FAIinfra = []
FAIestate = []
path_to_chromedriver = '/Users/cargillsk/Downloads/chromedriver'
browser = webdriver.Chrome(executable_path = path_to_chromedriver)
browser.implicitly_wait(30)
url = 'http://www.cqdata.gov.cn/easyquery.htm?cn=A0101'
browser.get(url)
browser.find_element_by_id('treeZhiBiao_4').click()
browser.find_element_by_xpath('//*
[#id="mySelect_sj"]/div[2]/div[1]').click()
browser.find_element_by_xpath('//*
[#id="mySelect_sj"]/div[2]/div[2]/div[3]/input').clear()
browser.find_element_by_xpath('//*
[#id="mySelect_sj"]/div[2]/div[2]/div[3]/input').send_keys('last100')
browser.find_element_by_xpath('//*
[#id="mySelect_sj"]/div[2]/div[2]/div[3]/div[1]').click()
FAIinitial = browser.find_element_by_xpath('//*[#id="main-container"]/div[2]/div[2]/div[2]/div/div[2]/table/thead/tr/th[2]/strong').text
for i in range(2,102):
i = str(i)
FAI.append(browser.find_element_by_xpath('//*[#id="table_main"]/tbody/tr[1]/td[%s]' % i).text)
FAIinfra.append(browser.find_element_by_xpath('//*[#id="table_main"]/tbody/tr[4]/td[%s]' % i).text)
FAIestate.append(browser.find_element_by_xpath('//*[#id="table_main"]/tbody/tr[55]/td[%s]' % i).text)
browser.find_element_by_id("treeZhiBiao_3").click()
browser.find_element_by_id("treeZhiBiao_14").click()
So... the implicit wait is not your issue. Looking through the websites code I found that there is no "treeZhiBiao_14", so I'm not sure what your trying to click here. Maybe try using something like this instead so you know what your clicking.
browser.find_element_by_xpath("//*[contains(text(), '工业')]").click()
or
browser.find_element_by_xpath("//*[contains(text(), 'industry')]").click()

Retrieving url from google image search for first entry, using python and selenium

Ever since the API has been deprecated, its been very hard to retrieve the google image search url using Selenium. I've scoured stackoverflow, but most of the results to this question are from years ago when scraping search engines was simpler.
Looking for a way to return the url of the first image in a google search query. I've used everything in selenium from clicks, to retrieve innerhtml of elements, to my most recent attempt, using actionchains to attempt to navigate to the url of the pic and then returning the current url.
def GoogleImager(searchterm, musedict):
page = "http://www.google.com/"
landing = driver.get(page)
actions = ActionChains(driver)
WebDriverWait(landing, '10')
images = driver.find_element_by_link_text('Images').click()
actions.move_to_element(images)
searchbox = driver.find_element_by_css_selector('#lst-ib')
WebDriverWait(searchbox, '10')
sendsearch = searchbox.send_keys('{} "logo" {}'.format('Museum of Bad Art', 'bos')+Keys.ENTER)
WebDriverWait(sendsearch, '10')
logo = driver.find_element_by_xpath('//*[#id="rg_s"]/div[1]/a').click()
WebDriverWait(logo, '10')
logolink = driver.find_element_by_xpath('//*[#id="irc_cc"]/div[3]/div[1]/div[2]/div[2]/a')
WebDriverWait(logolink, '10')
actions.move_to_element(logolink).click(logolink)
print(driver.current_url)
return driver.current_url
I'm using this to return the first image for a museum name and city in the search.
I tried to make your code work with Google, got frustrated and switched to Yahoo instead. I couldn't make heads or tails of your musedict access loops so I substituted a simple dictionary for demonstration purposes:
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
museum_dictionary = { "louvre": "Paris", "prado": "Madrid"}
driver = webdriver.Firefox()
def YahooImager(searchterm):
page = "https://images.search.yahoo.com"
landing = driver.get(page)
WebDriverWait(driver, 4)
assert "Yahoo Image Search" in driver.title
searchbox = driver.find_element_by_name("p") # Find the query box
city = museum_dictionary[searchterm]
searchbox.send_keys("{} {}".format(searchterm, city) + Keys.RETURN)
WebDriverWait(driver, 4)
try:
driver.find_element_by_xpath('//*[#id="resitem-0"]/a').click()
except NoSuchElementException:
assert 0, '//*[#id="resitem-0"]/a'
driver.close()
WebDriverWait(driver, 4)
try:
driver.find_element_by_link_text("View Image").click()
except NoSuchElementException:
assert 0, "View Image"
driver.close()
WebDriverWait(driver, 4)
# driver.close()
return driver.current_url
image_url = YahooImager("prado")
print(repr(image_url))
It works, but takes quite a while. (That's probably something someone who knows these libraries better could optimize -- I just wanted to see it work at all.) This example is fragile and occasionally just fails.

Categories

Resources