After clicking "Advanced Search" on page https://www.tmdn.org/tmview/welcome#
I want to select :
Designated territories
Trade mark offices
Trade mark status
Application date
Below is the sample script which will click on Advanced Search button and select Japan from Designated Territories option.
SampleScript.py:
import Locators
from selenium import webdriver
url = 'https://www.tmdn.org/tmview/welcome#'
driver = None
# Initialize Chrome driver
driver = webdriver.Chrome()
driver.get(url)
driver.implicitly_wait(10)
AdvancedSearchElement = driver.find_element(*Locators.AdvancedSearchElement)
AdvancedSearchElement.click()
TerritoryDropDownElement = driver.find_element(*Locators.TerritoryDropDownElement )
TerritoryDropDownElement .click()
TerritoryLabelElements = driver.find_elements(*Locators.TerritoryLabelsElement)
for elem in TerritoryLabelElements:
print(elem.text)
if elem.text == 'Japan':
elem.click()
print('Selected Japan')
TerritoryDropDownElement .click()
Locators.py:
AdvancedSearchElement = (By.CSS_SELECTOR, '#lnkAdvancedSearch')
TerritoryDropDownElement = (By.ID, 'DesignatedTerritories')
TerritoryLabelsElement = (By.CSS_SELECTOR, 'div.multiSelectOptions label')
Related
hello im practicing selenium on a practice forum this the link for it :
click here
if visit the page and inspect element on the dropdown menu for state and city you will find it consists of only div element i tried doing this but didnt work obviously :
dropdown = Select(d.find_element("xpath",'//*[#id="state"]'))
dropdown.select_by_index(0)
this the error message :
Select only works on <select> elements, not on <div>
can someone show how to loop through the value of the menu or is there any other solution ?
This code is working
search_url = 'https://demoqa.com/automation-practice-form'
driver = webdriver.Chrome(options = options, executable_path= os.path.join(os.environ['USERPROFILE'],"Desktop") + f'\\Python\\available Tender\\chromedriver\\chromedriver.exe')
driver.get(search_url)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
element1 = WebDriverWait(driver, 4).until(EC.presence_of_element_located((By.XPATH, f"//div[#id='adplus-anchor']")))
driver.execute_script("""
var element = arguments[0];
element.parentNode.removeChild(element);
""", element1)
element2 = WebDriverWait(driver, 4).until(EC.presence_of_element_located((By.XPATH, f"//div[#id='currentAddress-wrapper']")))
driver.execute_script("""
var element = arguments[0];
element.parentNode.removeChild(element);
""", element2)
driver.find_element(By.XPATH, '//*[#id="state"]/div/div[2]/div').click()
e1 = WebDriverWait(driver, 4).until(EC.presence_of_element_located((By.XPATH, f"//div[contains(#class,'menu')]")))
e1.find_element(By.XPATH, "//div[contains(text(),'NCR')]").click()
I am trying to get some data from Google but it I get the "before you continue" google popup. I am trying to make selenium locate the button and click it and return to the getting data but it seems even if I have the button ID in the code it doesn't find it
"""
Search on Google and returns the list of PAA questions in SERP.
"""
def newSearch(browser,query):
if lang== "en":
browser.get("https://www.google.com?hl=en")
WebDriverWait(browser, 10).until(EC.frame_to_be_available_and_switch_to_it((By.XPATH, "//iframe")))
agree = WebDriverWait(browser, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[#id="L2AGLb"]/span/span')))
agree.click()
browser.switch_to_default_content()
searchbox = browser.find_element_by_xpath("//input[#aria-label='Search']")
else:
browser.get("https://www.google.com?hl=es")
searchbox = browser.find_element_by_xpath("//input[#aria-label='Buscar']")
searchbox.send_keys(query)
sleepBar(2)
tabNTimes()
if lang== "en":
searchbtn = browser.find_elements_by_xpath("//input[#aria-label='Google Search']")
else:
searchbtn = browser.find_elements_by_xpath("//input[#aria-label='Buscar con Google']")
try:
searchbtn[-1].click()
except:
searchbtn[0].click()
sleepBar(2)
paa = browser.find_elements_by_xpath("//span/following-sibling::div[contains(#class,'match-mod-horizontal-padding')]")
hideGBar()
return paa
Try clicking the inner div of the button itself. HTML of the agree popup:
<button id="L2AGLb" class="tHlp8d" data-ved="0ahUKEwj89p7Swob1AhVBxhoKHS0gDxIQiZAHCCE">
<div class="QS5gu sy4vM" role="none">
Acepto
</div>
</button>
Your selector should look like this:
(By.CSS_SELECTOR, "#L2AGLb > div")
Here is a working full example:
def test_google_...(self):
driver = self.driver
if self.LANGUAGE == "en":
driver.get("https://www.google.com?hl=en")
else:
driver.get("https://www.google.com?hl=es")
WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#L2AGLb > div"))
)
driver.find_element(By.CSS_SELECTOR, "#L2AGLb > div").click()
WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.CSS_SELECTOR, 'input[name="q"]'))
)
query = "your search query"
driver.find_element(By.CSS_SELECTOR, 'input[name="q"]').send_keys(query)
driver.find_element(By.CSS_SELECTOR, 'input[name="q"]').send_keys(Keys.RETURN)
...
I'm trying to scrape the information on a page, but when I open the exported .CSV file it is blank except for the headings.
I'm trying to scrape the 10 results on this page: https://www.narpm.org/find/property-managers/?submitted=true&toresults=1&resultsperpage=10&a=managers&orderby=&fname=&lname=&company=&chapter=S005&city=&state=&xRadius=
I can scrape the name, company, city and state, but when it comes to clicking on the dropdown 'More' it just doesn't seem to work. (Not getting any errors, csv is just blank.)
I suspect the problem lies is this code block:
driver.find_element_by_xpath('//div[#class="col-md-4 col-lg-1 arrow"]').click()
Here is all of my code:
options = Options()
options.headless = True
driver = webdriver.Chrome(executable_path='/Users/vilje/anaconda3/envs/webscrape/chromedriver', options=options)
driver.set_window_size(1440, 900)
# Creates master dataframe
df = pd.DataFrame(columns=['Name','Company', 'City', 'State', 'Phone', 'About'])
# URL
driver.get('https://www.narpm.org/find/property-managers/?submitted=true&toresults=1&resultsperpage=10&a=managers&orderby=&fname=&lname=&company=&chapter=S005&city=&state=&xRadius=')
name = driver.find_elements_by_xpath('//span[#class="name"]')
company = driver.find_elements_by_xpath('//div[#class="col-md-6 col-lg-4"]')
city = driver.find_elements_by_xpath('//div[#class="col-md-4 col-lg-2"]')
state = driver.find_elements_by_xpath('//div[#class="col-md-4 col-lg-2"]')
# Expand the 'More' button
driver.find_element_by_xpath('//div[#class="col-md-4 col-lg-1 arrow"]').click()
phone = driver.find_elements_by_xpath('//div[#class="col-sm-6 col-lg-3 with-icon lighter-text"]')
about = driver.find_elements_by_xpath('//div[#class="col-sm-12"]')
name_list = []
for n in range(len(name)):
name_list.append(name[n].text)
company_list = []
for c in range(len(company)):
company_list.append(company[c].text)
city_list = []
for c in range(len(city)):
city_list.append(city[c].text)
state_list = []
for s in range(len(state)):
state_list.append(state[s].text)
phone_list = []
for p in range(len(phone)):
phone_list.append(phone[p].text)
about_list = []
for a in range(len(about)):
about_list.append(about[a].text)
# List of each property managers name, company, city, state, phone and about section paired together
data_tuples = list(zip(name_list[0:], company_list[0:], city_list[0:], state_list[0:], phone_list[0:], about_list[0:]))
# Creates dataframe of each tuple in list
temp_df = pd.DataFrame(data_tuples, columns=['Name','Company', 'City', 'State', 'Phone', 'About'])
# Appends to master dataframe
df = df.append(temp_df)
driver.close()
Can anyone please help me to click all the 'More' buttons of each individual so I can scrape the data from the dropdown.
To click all the elements with text as More you need to induce WebDriverWait for the element_to_be_clickable() and you can use either of the following Locator Strategies:
Using CSS_SELECTOR:
driver.get("https://www.narpm.org/find/property-managers/?submitted=true&toresults=1&resultsperpage=10&a=managers&orderby=&fname=&lname=&company=&chapter=S005&city=&state=&xRadius=")
for more in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.row div.arrow"))):
more.click()
Using XPATH:
driver.get("https://www.narpm.org/find/property-managers/?submitted=true&toresults=1&resultsperpage=10&a=managers&orderby=&fname=&lname=&company=&chapter=S005&city=&state=&xRadius=")
for more in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "//div[#class='row']//div[contains(#class, 'arrow') and contains(., 'More')]"))):
more.click()
Note: You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Browser Snapshot:
Code trial:
#coding=utf-8
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select, WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
# Below is to crawl data from a webpage with two different dropdown
try:
driver = webdriver.Chrome('./chromedriver')
driver.get('https://price.joinsland.joins.com/theme/index_theme.asp?sisaegbn=T05')
select1 = Select(WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//select[#name='sido']"))))
for item1 in select1.options:
item1.click()
select2 = Select(WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//select[#name='gugun']"))))
for item2 in select2.options:
item2.click()
time.sleep(2)
# below is to get the attained date into excel file
table = driver.find_element_by_class_name('tbl_box')
tbody = table.find_element_by_tag_name('tbody')
rows=tbody.find_elements_by_tag_name('tr')
total = []
result = []
for index, value in enumerate(rows):
body = value.find_elements_by_tag_name('td')
for i in range(len(body)):
data = body[i].text
result.append(data)
total.append(result)
result=[]
df = pd.DataFrame.from_records(total)
df.to_excel('text.xlsx')
except Exception as e:
print(e)
finally:
driver.quit()
I have edited this code thanks to the lovely comment below but I get the same error saying as below.:
Message: stale element reference: element is not attached to the page document
I roughly understand why this message shows up but still have no clear idea on how to fix this. I would deeply appreciate any comment! Many thanks in advance!
This is what I figured out. But I'm not sure if code is right or not. I don't know python.
#get select
select1 = Select(driver.find_element_by_xpath('//select[#name="sido"]'))
#get all options from select
options1 = select1.options
for opt1 is options1:
#select the option which has the value of opt1
select1.select_by_value(opt1.get_attribute("value"))
time.sleep(5)
select2 = Select(driver.find_element_by_xpath('//select[#name="gugu"]'))
options2 = select2.options
for opt2 in options2:
select1.select_by_value(opt2.get_attribute("value"))
time.sleep(4)
To select all the <option> elements from multiple e.g. two (2) different drop-down-select elements within the website https://price.joinsland.joins.com/theme/index_theme.asp?sisaegbn=T05 you need to induce WebDriverWait for the element_to_be_clickable() and you can use the following xpath based Locator Strategies:
driver.get('https://price.joinsland.joins.com/theme/index_theme.asp?sisaegbn=T05')
select1 = Select(WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//select[#name='sido']"))))
for item1 in select1.options:
item1.click()
select2 = Select(WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//select[#name='gugun']"))))
for item2 in select2.options:
item2.click()
time.sleep(3) # perform your web-crawling here
i am trying to scrape desired information from zillow lender profiles on this website: https://www.zillow.com/lender-directory/?sort=Relevance&location=Alabama%20Shores%20Muscle%20Shoals%20AL&language=English&page=1
i know how to scrape the info with beautiful soup... im just trying to create a list on clickable links for each profile so i can iterate to each one...scrape desired info(i can do this) and then go back to starting page and go to next profile link... probably a simple solution but ive been trying to get a list of darn clickable links for a couple hours now and i think its time to ask lol
thanks
ive tried a number of different approaches to get the list of clickable links but may have implemented them incorrectly so im open to same suggestions to double check
from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
import time
#Driver to get website...need to get phantomJS going..
driver = webdriver.Chrome(r'C:\Users\mfoytlin\Desktop\chromedriver.exe')
driver.get('https://www.zillow.com/lender-directory/?sort=Relevance&location=Alabama%20Shores%20Muscle%20Shoals%20AL&language=English&page=1')
time.sleep(2)
#Get page HTML data
soup = BeautifulSoup(driver.page_source, 'html.parser')
profile_links = []
profile_links = driver.find_elements_by_xpath("//div[#class='zsg-content-item']//a")
for profile in range(len(profile_links)):
profile_links = driver.find_elements_by_xpath("//div[#class='zsg-content-item']//a")
profile_links[profile].click()
time.sleep(2)
driver.back()
time.sleep(2)
The find_elements parameter is wrong here you can try either of followings.
This is the code that works when you use find_elements()
def find_elements(self, by=By.ID, value=None):
"""
Find elements given a By strategy and locator. Prefer the find_elements_by_* methods when
possible.
:Usage:
elements = driver.find_elements(By.CLASS_NAME, 'foo')
:rtype: list of WebElement
"""
if self.w3c:
if by == By.ID:
by = By.CSS_SELECTOR
value = '[id="%s"]' % value
elif by == By.TAG_NAME:
by = By.CSS_SELECTOR
elif by == By.CLASS_NAME:
by = By.CSS_SELECTOR
value = ".%s" % value
elif by == By.NAME:
by = By.CSS_SELECTOR
value = '[name="%s"]' % value
# Return empty list if driver returns null
# See https://github.com/SeleniumHQ/selenium/issues/4555
return self.execute(Command.FIND_ELEMENTS, {
'using': by,
'value': value})['value'] or []
Try any of the following options
profile_links = driver.find_elements_by_xpath("//div[#class='zsg-content-item']//a")
OR
profile_links = driver.find_elements(By.XPATH,"//div[#class='zsg-content-item']//a")
Here is list when you use above code.
['https://www.zillow.comhttps://www.zillow.com/lender-profile/courtneyhall17/', 'https://www.zillow.comhttps://www.zillow.com/lender-profile/SouthPointBank/', 'https://www.zillow.comhttps://www.zillow.com/lender-profile/kmcdaniel77/', 'https://www.zillow.comhttps://www.zillow.com/lender-profile/jdowney75/', 'https://www.zillow.comhttps://www.zillow.com/lender-profile/fredabutler/', 'https://www.zillow.comhttps://www.zillow.com/lender-profile/justindorroh/', 'https://www.zillow.comhttps://www.zillow.com/lender-profile/aball731/', 'https://www.zillow.comhttps://www.zillow.com/lender-profile/1stfedmort/', 'https://www.zillow.comhttps://www.zillow.com/lender-profile/tstutts/', 'https://www.zillow.comhttps://www.zillow.com/lender-profile/sbeckett0/', 'https://www.zillow.comhttps://www.zillow.com/lender-profile/DebiBretherick/', 'https://www.zillow.comhttps://www.zillow.com/lender-profile/cking313/', 'https://www.zillow.comhttps://www.zillow.com/lender-profile/Gregory%20Angus/', 'https://www.zillow.comhttps://www.zillow.com/lender-profile/cbsbankmarketing/', 'https://www.zillow.comhttps://www.zillow.com/lender-profile/ajones392/', 'https://www.zillow.comhttps://www.zillow.com/lender-profile/sschulte6/', 'https://www.zillow.comhttps://www.zillow.com/lender-profile/dreamhomemortgagellc/', 'https://www.zillow.comhttps://www.zillow.com/lender-profile/DarleenBrooksHill/', 'https://www.zillow.comhttps://www.zillow.com/lender-profile/sjones966/', 'https://www.zillow.comhttps://www.zillow.com/lender-profile/BlakeRobbins4/', 'https://www.zillow.comhttps://www.zillow.com/lender-profile/zajones5746/', 'https://www.zillow.comhttps://www.zillow.com/lender-profile/adeline%20perkins/']
Edited
As I said you need to re-assign element again.
profile_links = driver.find_elements_by_xpath("//div[#class='ld-lender-info-column']//h2//a")
for profile in range(len(profile_links)):
profile_links = driver.find_elements_by_xpath("//div[#class='ld-lender-info-column']//h2//a")
driver.execute_script("arguments[0].click();", profile_links[profile])
time.sleep(2)
driver.back()
time.sleep(2)
You can find all the clickable links using this approach. This is written in Java. you can write the equivalent in python.
List<WebElement> Links = driver.findElements(By.xpath("//div[#class='zsg-content-item']//a"));
ArrayList<String> capturedLinks = new ArrayList<>();
for(WebElement link:Links)
{
String myLink = "https://www.zillow.com"+ link.getAttribute("href")
if(!capturedLinks.contains(myLink)) //to avoid duplicates
{
capturedLinks.add(myLink);
}
}
I suppose the following script might do what you wanted to. In short, the script will parse the profile links from it's landing page and then iterate through those links to scrape the name from their target pages.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
url = 'https://www.zillow.com/lender-directory/?sort=Relevance&location=Alabama%20Shores%20Muscle%20Shoals%20AL&language=English&page=1'
with webdriver.Chrome() as driver:
wait = WebDriverWait(driver,10)
driver.get(url)
items = [item.get_attribute("href") for item in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,"h2 > a[href^='/lender-profile/']")))]
for profilelink in items:
driver.get(profilelink)
name = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "h1.lender-name"))).text
print(name)