Difficulty finding elements in Selenium with Python - python

I am trying to familiarize myself with Selenium and am running into an issue finding an element, I am noticing this a lot actually.
I want to go to yahoo.com, click on Sports, and assert the page title is correct. The following throws an "Unable to locate element" error message. I have tried ID, xPath etc. I have also tried other page elements, Mail, News etc...all throw the same error. Am I missing something here?
import unittest
import selenium
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
# go to Yahoo home page, click on Sports and assert title is correct
class searchMaps(unittest.TestCase):
# def setup as class method
#classmethod
def setUpClass(inst):
inst.driver = webdriver.Chrome()
inst.driver.maximize_window()
inst.driver.get("http://www.yahoo.com")
inst.driver.implicitly_wait(20)
def test_click_sports_page(self):
self.search_field = self.driver.find_element_by_id('yui_3_18_0_3_1527260025921_1028')
self.search_field.click()
actual_title = driver.getTitle()
expected_title = 'Yahoo Sports | Sports News'
assertEquals(actual_title, expected_title)
#classmethod
def tearDownClass(inst):
inst.driver.quit()
if __name__ == '__main__':
unittest.main()

The #id of target link is dynamic, so it will be different each time you open the page.
Try to use link text to locate element:
self.search_field = self.driver.find_element_by_link_text('Sports')

Related

Getting an error:Unable to locate element: .ng-star-inserted when locating an element called ng-star-inserted in selenium python

I'm running the following code and getting the following error:
select_survey = browser.find_element_by_class_name("ng-star-inserted").click()
selenium.common.exceptions.NoSuchElementException: Message: Unable to locate element: .ng-star-inserted
this is the entire code:
import selenium
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
browser = webdriver.Firefox()
browser.get("https://www.legeropinion.com/app/todo")
email_element = browser.find_element_by_id("email")
email_element.clear()
email_element.send_keys("something#gmail.com")
password_element = browser.find_element_by_id("password")
password_element.send_keys("password")
sign_in = browser.find_element_by_class_name("submit-btn").click()
select_survey = browser.find_element_by_class_name("ng-star-inserted").click()
I am trying to automate signing into a website and completing a form but upon clicking on the form for which the element is class = ng-star-inserted i am getting that error
edit:
this is the relevant html info i believe:
<task-block _ngcontent-buv-c2="" _nghost-buv-c9="" class="ng-star-inserted"><!----><div _ngcontent-buv-c9="" class="task ng-star-inserted"><!----><a _ngcontent-buv-c9=""
Okay so after some a lot of trial and error I finally got it to work by fluke. I don't know how but it worked!!
I simply located the element by the ABSOLUTE xpath which I copied from the HTML and then before that line i had the time sleep function like so:
time.sleep(2)
select_survey = browser.find_element_by_xpath("/html/body/app-root/ion-app/ion- router-outlet/app-todo/ion-content/mat-expansion-panel[1]/div/div/div[2]/task-block[2] /div").click()

Cannot scrape AliExpress HTML element

I would like to scrape an arbitrary offer from aliexpress. Im trying to use scrapy and selenium. The issue I face is that when I use chrome and do right click > inspect on a element I see the real HTML but when I do right click > view source I see something different - a HTML CSS and JS mess all around.
As far as I understand the content is pulled asynchronously? I guess this is the reason why I cant find the element I am looking for on the page.
I was trying to use selenium to load the page first and then get the content I want but failed. I'm trying to scroll down to get to reviews section and get its content
Is this some advanced anti-bot solution that they have or maybe my approach is wrong?
The code that I currently have:
import scrapy
from selenium import webdriver
import logging
import time
logging.getLogger('scrapy').setLevel(logging.WARNING)
class MySpider(scrapy.Spider):
name = 'myspider'
start_urls = ['https://pl.aliexpress.com/item/32998115046.html']
def __init__(self):
self.driver = webdriver.Chrome()
def parse(self, response):
self.driver.get(response.url)
scroll_retries = 20
data = ''
while scroll_retries > 0:
try:
data = self.driver.find_element_by_class_name('feedback-list-wrap')
scroll_retries = 0
except:
self.scroll_down(500)
scroll_retries -= 1
print("----------")
print(data)
print("----------")
self.driver.close()
def scroll_down(self, pixels):
self.driver.execute_script("window.scrollTo(0, {});".format(pixels))
time.sleep(2)
By watching requests in network tab in inspect tool of browser you will find out comments are comming from here so you can crawl this page instead.

python selenium: cannot click invisible element

I am trying to scrape the Google News page in the following way:
from selenium import webdriver
import time
from pprint import pprint
base_url = 'https://www.google.com/'
driver = webdriver.Chrome('/home/vincent/wintergreen/chromedriver') ## change here to your location of the chromedriver
driver.implicitly_wait(30)
driver.get(base_url)
input = driver.find_element_by_id('lst-ib')
input.send_keys("brexit key dates timetable schedule briefing")
click = driver.find_element_by_name('btnK')
click.click()
news = driver.find_element_by_link_text('News')
news.click()
tools = driver.find_element_by_link_text('Tools')
tools.click()
time.sleep(1)
recent = driver.find_element_by_css_selector('div.hdtb-mn-hd[aria-label=Recent]')
recent.click()
# custom = driver.find_element_by_link_text('Custom range...')
custom = driver.find_element_by_css_selector('li#cdr_opt span')
custom.click()
from_ = driver.find_element_by_css_selector('input#cdr_min')
from_.send_keys("9/1/2018")
to_ = driver.find_element_by_css_selector('input#cdr_max')
to_.send_keys("9/2/2018")
time.sleep(1)
go_ = driver.find_element_by_css_selector('form input[type="submit"]')
print(go_)
pprint(dir(go_))
pprint(go_.__dict__)
go_.click()
This script manage to enter search terms, switch to the news tab, open the custom time period tab, fill in start and end date, but fails to click on the 'Go' button after that point.
From the print and pprint statement at the end of the script, I can deduct that it does find the 'go' button succesfully, but is somehow unable to click on it. The error displays as selenium.common.exceptions.ElementNotVisibleException: Message: element not visible
Could anyone experienced with Selenium have a quick run at it and give me hints as why it returns such error?
Thx!
Evaluating the css using developer tools in chrome yields 4 elements.
Click here for the image
use the following css instead:
go_ = driver.find_element_by_css_selector('#cdr_frm > input.ksb.mini.cdr_go')

Selenium facebook comment add

I want to create a script to automate the sending of Facebook comments. Logging in and fetching the post are done, but I cannot understand why selenium doesn't find the comment class.
Here's the code:
def fb_login():
browser.get("https://www.facebook.com")
time.sleep(5)
email = browser.find_element_by_id("email")
email.send_keys(fb_email)
pwd = browser.find_element_by_id("pass")
pwd.send_keys(fb_pass)
login = browser.find_element_by_id("loginbutton")
login.click()
time.sleep(5)
def fb_page():
browser.get(fb_post)
def fb_comment():
browser.find_element_by_class_name("._5rpu")
textbox.send_keys(fb_message)
textbox.send_keys(Keys.ENTER)
textbox.clear()
browser = webdriver.Firefox()
fb_login()
fb_page()
fb_comment()
Here's the exception:
selenium.common.exceptions.NoSuchElementException: Message: Unable to locate element: ._5rpu
It looks like you didn't click the "Comment" button, so the element with class name "_5rpu" is not yet generated.
Sorry I don't have enough posts to add a comment.

python selenium to scrape data from asos - need a better approach

Hi I'm new to Python and crawling. I've been researching and going over Stackoverflow and come up with Python + Selenium to open Webdriver to open the URL and get the page source and turn it into the data I need. However, I know there's a better approach (for example, scraping w/o selenium, not having to scrape page source, posting data to ASP, etc) and I hope I can seek some help here for an educational purpose.
Here's what I'd like to achieve.
Start:
http://www.asos.com/Women/New-In-Clothing/Cat/pgecategory.aspx?cid=2623
Obtain: product title, price, img, and its link
Next: go to next page if there is, if not, output
BEFORE you go into my code, here is some background information. Asos is a site that uses pagination so this is related to scraping through multipages. Also, I tried without Selenium by posting to http://www.asos.com/services/srvWebCategory.asmx/GetWebCategories
with this data:
{'cid':'2623', 'strQuery':"", 'strValues':'undefined', 'currentPage':'0',
'pageSize':'204','pageSort':'-1','countryId':'10085','maxResultCount':''}
but there's no return I get.
I know my approach is not good, I'd much appreciate any help/recommendation/approach/idea! Thanks!
import scrapy
import time
import logging
from random import randint
from selenium import webdriver
from asos.items import ASOSItem
from scrapy.selector import Selector
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
class ASOSSpider(scrapy.Spider):
name = "asos"
allowed_domains = ["asos.com"]
start_urls = [
"http://www.asos.com/Women/New-In-Clothing/Cat/pgecategory.aspx?cid=2623#/parentID=-1&pge=0&pgeSize=204&sort="
]
def __init__(self):
self.driver = webdriver.Firefox()
def parse(self, response):
self.driver.get(response.url)
view_204 = self.driver.find_element_by_xpath("//div[#class='product-count-bottom']/a[#class='view-max-paged']")
view_204.click() #click and show 204 pictures
time.sleep(5) #wait till 204 images loaded, I've also tried the explicit wait, but i got timedout
# element = WebDriverWait(self.driver, 8).until(EC.presence_of_element_located((By.XPATH, "category-controls bottom")))
logging.debug("wait time has reached! go CRAWL!")
next = self.driver.find_element_by_xpath("//li[#class='page-skip']/a")
pageSource = Selector(text=self.driver.page_source) # load page source instead, cant seem to crawl the page by just passing the reqular request
for sel in pageSource.xpath("//ul[#id='items']/li"):
item = ASOSItem()
item["product_title"] = sel.xpath("a[#class='desc']/text()").extract()
item["product_link"] = sel.xpath("a[#class='desc']/#href").extract()
item["product_price"] = sel.xpath("div/span[#class='price']/text()").extract()
item["product_img"] = sel.xpath("div/a[#class='productImageLink']/img/#src").extract()
yield item
next.click()
self.driver.close()

Categories

Resources