I only want to scrape the required information contained in the black box, and delete/remove/exclude the information contained in the red box
I am doing this because class names "entry" and "partial entry" exist in both boxes. Only the first "partial entry" contains the information that I need, so I plan to delete/remove/exclude the classname "mgrRspnInLine".
My code is:
while True:
container = driver.find_elements_by_xpath('.//*[contains(#class,"review-container")]')
for item in container:
try:
element = item.find_element_by_class_name('mgrRspnInline')
driver.execute_script("""var element = document.getElementsByClassName("mgrRspnInline")[0];element.parentNode.removeChild(element);""", element)
WebDriverWait(driver, 50).until(EC.presence_of_element_located((By.XPATH,'.//*[contains(#class,"taLnk ulBlueLinks")]')))
element = WebDriverWait(driver, 50).until(EC.element_to_be_clickable((By.XPATH,'.//*[contains(#class,"taLnk ulBlueLinks")]')))
element.click()
time.sleep(2)
rating = item.find_elements_by_xpath('.//*[contains(#class,"ui_bubble_rating bubble_")]')
for rate in rating:
rate = rate.get_attribute("class")
rate = str(rate)
rate = rate[-2:]
score_list.append(rate)
time.sleep(2)
stay = item.find_elements_by_xpath('.//*[contains(#class,"recommend-titleInline noRatings")]')
for stayed in stay:
stayed = stayed.text
stayed = stayed.split(', ')
stayed.append(stayed[0])
travel_type.append(stayed[1])
WebDriverWait(driver, 50).until(EC.presence_of_element_located((By.XPATH,'.//*[contains(#class,"noQuotes")]')))
summary = item.find_elements_by_xpath('.//*[contains(#class,"noQuotes")]')
for comment in summary:
comment = comment.text
comments.append(comment)
WebDriverWait(driver, 50).until(EC.presence_of_element_located((By.XPATH,'.//*[contains(#class,"ratingDate")]')))
rating_date = item.find_elements_by_xpath('.//*[contains(#class,"ratingDate")]')
for date in rating_date:
date = date.get_attribute("title")
date = str(date)
review_date.append(date)
WebDriverWait(driver, 50).until(EC.presence_of_element_located((By.XPATH,'.//*[contains(#class,"partial_entry")]')))
review = item.find_elements_by_xpath('.//*[contains(#class,"partial_entry")]')
for comment in review:
comment = comment.text
print(comment)
reviews.append(comment)
except (NoSuchElementException) as e:
continue
try:
element = WebDriverWait(driver, 100).until(EC.element_to_be_clickable((By.XPATH,'.//*[contains(#class,"nav next taLnk ui_button primary")]')))
element.click()
time.sleep(2)
except (ElementClickInterceptedException,NoSuchElementException) as e:
print(e)
break
Basically within the "review-container" I searched first for the class name "mgrRspnInLine", then tried to delete it using the execute_script.
but unfortunately, the output still shows the contents contained in the"mgrRspnInLine".
If you want to avoid matching second element by your XPath you can just modify XPath as below:
.//*[contains(#class,"partial_entry") and not(ancestor::*[#class="mgrRspnInLine"])]
This will match element with class name "partial_entry" only if it doesn't have ancestor with class name "mgrRspnInLine"
If you want the first occurrence you could use css class selector instead of:
.partial_entry
and retrieve with find_element_by_css_selector:
find_element_by_css_selector(".partial_entry")
You can delete all the .mgrRspnInLine elements with:
driver.execute_script("[...document.querySelectorAll('.mgrRspnInLine')].map(el => el.parentNode.removeChild(el))")
Stitching the comment by Andersson, and the two answers provided by QHarr, and pguardiario. I finally solved the problem.
The key is to target a container within the container, all the information is contained in the class name "ui_column is-9" which is contained in the class name "review-container", hence addressing Andersson's comment of multiple mgrRspnInLine.
Within the nested loop, I used pguardianrio's suggestion to delete existing multiple mgrRspnInLine, then adding QHarr's answer on .partial_entry
while True:
container = driver.find_elements_by_xpath('.//*[contains(#class,"review-container")]')
for items in container:
element = WebDriverWait(driver, 1000).until(EC.element_to_be_clickable((By.XPATH,'.//*[contains(#class,"taLnk ulBlueLinks")]')))
element.click()
time.sleep(10)
contained = items.find_elements_by_xpath('.//*[contains(#class,"ui_column is-9")]')
for item in contained:
try:
driver.execute_script("[...document.querySelectorAll('.mgrRspnInLine')].map(el => el.parentNode.removeChild(el))")
rating = item.find_element_by_xpath('//*[contains(#class,"ui_bubble_rating bubble_")]')
rate = rating .get_attribute("class")
rate = str(rate)
rate = rate[-2:]
score_list.append(rate)
time.sleep(2)
stay = item.find_element_by_xpath('.//*[contains(#class,"recommend-titleInline")]')
stayed = stay.text
stayed = stayed.split(', ')
stayed.append(stayed[0])
travel_type.append(stayed[1])
WebDriverWait(driver, 50).until(EC.presence_of_element_located((By.XPATH,'.//*[contains(#class,"noQuotes")]')))
summary = item.find_element_by_xpath('.//*[contains(#class,"noQuotes")]')
comment = summary.text
comments.append(comment)
WebDriverWait(driver, 50).until(EC.presence_of_element_located((By.XPATH,'.//*[contains(#class,"ratingDate")]')))
rating_date = item.find_element_by_xpath('.//*[contains(#class,"ratingDate")]')
date = rating_date.get_attribute("title")
date = str(date)
review_date.append(date)
WebDriverWait(driver, 50).until(EC.presence_of_element_located((By.XPATH,'.//*[contains(#class,"partial_entry")]')))
review = item.find_element_by_css_selector(".partial_entry")
comment = review.text
print(comment)
except (NoSuchElementException) as e:
continue
try:
element = WebDriverWait(driver, 100).until(EC.element_to_be_clickable((By.XPATH,'.//*[contains(#class,"nav next taLnk ui_button primary")]')))
element.click()
time.sleep(2)
except (ElementClickInterceptedException,NoSuchElementException) as e:
print(e)
break
Related
wait = WebDriverWait(driver, 20) #wait 20s to let webpage load
driver.get('https://beta.clinicaltrials.gov/') #getting website url
driver.maximize_window()
time.sleep(1)
country = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="content"]/div/ctg-home/div/div[2]/ctg-advanced-search-home/div[2]/div[1]/fieldset/div[2]/div[3]/ctg-location-search-input/form/div[2]/div/label')))
country.click()
searchBar = driver.find_element("id",'location-input')
searchBar.send_keys("Singapore") #input country name into searchBar
search_dropdown = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="mat-option-14"]/span'))) #wait till xpath is visible
search_dropdown.click()
search_button = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="content"]/div/ctg-home/div/div[2]/ctg-advanced-search-home/div[2]/div[2]/div/div[2]/button')))
search_button.click()
#finding filter button for recruiting status
filter_button = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="filter-button-statusGroup"]')))
filter_button.click()
#clicking on 'recruiting' status
recruiting = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="adv-check-status"]/div[2]/div[2]/div/label')))
recruiting.click()
#scraping each clinical trial details
clinical_trial = {} #empty dict to store details
name_list = []
phone_list = []
email_list = []
enrollment = []
condition_list = []
#loop to go through all the clinical trials in the search page (10 per page)
for i in range(1,11):
time.sleep(2) #wait 2s to let page load
xpath = '//*[#id="content"]/div/ctg-search-results/div[2]/div/div[2]/div/div[2]/div[1]/ctg-search-hit-card[{}]/div/header/a'.format(i)
trials = driver.find_element("xpath", xpath)
trials.click()
#time.sleep(5) #wait 5s to let page load
#getting contact person name
name = wait.until(EC.visibility_of_element_located((By.XPATH, '//*[#id="studyDetailsInfo"]/ctg-study-info/div/ctg-study-info-view/div/div[2]/ctg-study-contacts-and-locations/div/div/div/ctg-study-contact-info/p[1]/span')))
name_list.append(name.text) #adding each name to the list
#phone number of contact person
phone = wait.until(EC.visibility_of_element_located((By.XPATH, '//*[#id="studyDetailsInfo"]/ctg-study-info/div/ctg-study-info-view/div/div[2]/ctg-study-contacts-and-locations/div/div/div/ctg-study-contact-info/p[2]/span')))
phone_list.append(phone.text) #adding each phone number to the list
#email of contact person
email = wait.until(EC.visibility_of_element_located((By.XPATH, '//*[#id="studyDetailsInfo"]/ctg-study-info/div/ctg-study-info-view/div/div[2]/ctg-study-contacts-and-locations/div/div/div/ctg-study-contact-info/p[3]/ctg-study-contact-email/span/a')))
email_list.append(email.text) #adding each email address to the list
#number of enrollment
enrollment_num = wait.until(EC.visibility_of_element_located((By.XPATH, '//*[#id="studyDetailsInfo"]/ctg-study-info/div/ctg-study-info-view/div/div[1]/ctg-study-overview/div[3]/div[2]/div[3]/div[2]')))
enrollment.append(enrollment_num.text) #adding each enrollment number to the list
#condition of study
conditions = wait.until(EC.visibility_of_element_located((By.XPATH, '//*[#id="studyDetailsInfo"]/ctg-study-info/div/ctg-study-info-view/div/div[1]/ctg-study-overview/div[3]/div[2]/div[1]/div[2]')))
condition_list.append(conditions.text) #adding conditions of the study to list
driver.back() #return to search page
#adding all the different list details to the contact_details dict
clinical_trial["name"] = name_list
clinical_trial["phone_num"] = phone_list
clinical_trial["email_address"] = email_list
clinical_trial["Enrollment"] = enrollment
clinical_trial["Conditions"] = condition_list
I am having an issue with selenium somehow not finding the xpath for enrollment_num in the loop. The loop runs through the 10 clickable links on the webpage, however it gives a TimeoutException error at the 9th link. Why is that so? When i change the loop to iterate through 8 links instead of the usual 10 links, it works fine. Its just that one link which creates the error.
Page number 9 is different from all the other pages. The difference is hard to spot. Tipp: to compare strings i use Notepad++ with the compare plugin.
This page does not have these 2 elements:
enrollment_num =...ctg-study-overview/div[3]/div[2]/di...'
here it is:
enrollment_num =...ctg-study-overview/div[2]/div[2]/di...
conditions = ...ctg-study-overview/div[3]/di...
here it is:
...ctg-study-overview/div[2]/di...
This is why it runs into a timeout. You could build a try:except:else around these to avoid the program from crashing. Below a quick fix. Of course you should tidy it up. I hope this helps.
# number of enrollment
try:
enrollment_num = wait.until(EC.visibility_of_element_located((By.XPATH,
'//*[#id="studyDetailsInfo"]/ctg-study-info/div/ctg-study-info-view/div/div[1]/ctg-study-overview/div[3]/div[2]/div[3]/div[2]')))
enrollment.append(enrollment_num.text) # adding each enrollment number to the list
except:
print("enrollement div[3] but div[2]")
enrollment_num = wait.until(EC.visibility_of_element_located((By.XPATH,
'//*[#id="studyDetailsInfo"]/ctg-study-info/div/ctg-study-info-view/div/div[1]/ctg-study-overview/div[2]/div[2]/div[3]/div[2]')))
enrollment.append(enrollment_num.text) # adding each enrollment number to the list
else:
pass
# condition of study
try:
conditions = wait.until(EC.visibility_of_element_located((By.XPATH,
'//*[#id="studyDetailsInfo"]/ctg-study-info/div/ctg-study-info-view/div/div[1]/ctg-study-overview/div[3]/div[2]/div[1]/div[2]')))
condition_list.append(conditions.text) # adding conditions of the study to list
except:
print("condition_list non div[3] but div[2]")
conditions = wait.until(EC.visibility_of_element_located((By.XPATH,
'//*[#id="studyDetailsInfo"]/ctg-study-info/div/ctg-study-info-view/div/div[1]/ctg-study-overview/div[2]/div[2]/div[1]/div[2]')))
condition_list.append(conditions.text) # adding conditions of the study to list
else:
pass
I have an e-commerce page and there are multiple products on a page. I need to click the link of a product then return on the main page and click the link of the next product, but when I return, the elements can't be found anymore.
Path = "C:\Program Files (x86)\chromedriver.exe"
driver = webdriver.Chrome(Path)
driver.get("https://www.emag.ro/")
search_bar = driver.find_element_by_id("searchboxTrigger")
search_bar.send_keys("laptopuri")
search_bar.send_keys(Keys.RETURN)
main = None
try:
main = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "main-container"))
)
print("Page loaded,main retrived succesfully")
print(" ")
except:
driver.quit()
products = main.find_elements_by_css_selector("div.card-item.js-product-data")
for product in products:
raw_name = product.text
raw_price = product.find_element_by_css_selector("p.product-new-price").text
link = product.find_element_by_tag_name("a")
#clicking the link
link.click()
spec_page = None
try:
spec_page = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "col-md-12"))
)
except:
driver.quit()
print(spec_page)
driver.back()
After the first iteration, I get the following error :
selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: element is not attached to the page document on line raw_name = product.text,basically at the beginning of the loop.
I assume the page is not loading properly or something like that, I tried using time.sleep before going through the loop but nothing
When you are writing driver.back(), it will go back to previous page and by the time it will reach to original page all the defined elements will become stale. You need to redefined them like below :-
This should handle the exception.
products = len(main.find_elements_by_css_selector("div.card-item.js-product-data"))
j = 0
for product in range(products):
elements = main.find_elements_by_css_selector("div.card-item.js-product-data")
raw_name = elements[j].text
raw_price = elements[j].find_element_by_css_selector("p.product-new-price").text
link = elements[j].find_element_by_tag_name("a")
# clicking the link
link.click()
spec_page = None
try:
spec_page = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "col-md-12"))
)
except:
driver.quit()
print(spec_page)
j = j + 1
driver.back()
I'm trying to click on a div to get to the next page of a table (the url does not change when the page changes). The go to the next page div has the same class as the go to the previous page's.
ive used:
elem = driver.find_element_by_class_name('cXQSjq')
elem.click()
timeout = 30
try:
WebDriverWait(driver, timeout).until(EC.visibility_of_element_located((By.CLASS_NAME, "gxzFwa")))
except TimeoutException:
driver.quit()
names = driver.find_elements_by_class_name('iBSZGH')
for company in names[1:]:
name.append(company.text)
mostdata = driver.find_elements_by_class_name('gvgMSe.gJYnHB')
for most in mostdata:
most = most.text
most = most.replace(',','')
data.append(most)
last7dsales = driver.find_elements_by_class_name('fGpHsy.kpsxyE')
for last in last7dsales:
last = last.text
last = last.replace(',','')
last7day.append(last)
#loop for the other pages:
for i in range(6):
elem.click()
timeout = 30
try:
WebDriverWait(driver, timeout).until(EC.visibility_of_element_located((By.CLASS_NAME, "gxzFwa")))
except TimeoutException:
driver.quit()
names = driver.find_elements_by_class_name('iBSZGH')
for company in names[1:]:
name.append(company.text)
mostdata = driver.find_elements_by_class_name('gvgMSe.gJYnHB')
for most in mostdata:
most = most.text.replace(',','')
most = most.text.replace(',','')
data.append(most)
last7dsales = driver.find_elements_by_class_name('fGpHsy.kpsxyE')
for last in last7dsales:
last = last.text.replace(',','')
last7day.append(last)
and it worked to get me to page 2, but after page 2 it gives me the error:
selenium.common.exceptions.ElementClickInterceptedException: Message: element click
intercepted: Element <div class="styles__Chevron-sc-1buchb9-1 cXQSjq">...</div> is
not clickable at point (702, 656). Other element would receive the click: <div
id="hs-eu-cookie-confirmation-inner">...</div>
(Session info: chrome=92.0.4515.107)
Do you know if there is an issue that i am able to call elem.click() after using selenium to find other parts of the page. I'm scraping data from
https://nonfungible.com/market/history
I guess the issue here is that the next page button appears on the bottom of the page, so to click on it you should first scroll this element into the view.
See if this will work now:
from selenium.webdriver.common.action_chains import ActionChains
actions = ActionChains(driver)
elem = driver.find_element_by_class_name('cXQSjq')
actions.move_to_element(elem).perform()
time.sleep(0.3)
elem.click()
timeout = 30
try:
WebDriverWait(driver, timeout).until(EC.visibility_of_element_located((By.CLASS_NAME, "gxzFwa")))
except TimeoutException:
driver.quit()
names = driver.find_elements_by_class_name('iBSZGH')
for company in names[1:]:
name.append(company.text)
mostdata = driver.find_elements_by_class_name('gvgMSe.gJYnHB')
for most in mostdata:
most = most.text
most = most.replace(',','')
data.append(most)
last7dsales = driver.find_elements_by_class_name('fGpHsy.kpsxyE')
for last in last7dsales:
last = last.text
last = last.replace(',','')
last7day.append(last)
#loop for the other pages:
for i in range(6):
actions.move_to_element(elem).perform()
time.sleep(0.3)
elem.click()
timeout = 30
try:
WebDriverWait(driver, timeout).until(EC.visibility_of_element_located((By.CLASS_NAME, "gxzFwa")))
except TimeoutException:
driver.quit()
names = driver.find_elements_by_class_name('iBSZGH')
for company in names[1:]:
name.append(company.text)
mostdata = driver.find_elements_by_class_name('gvgMSe.gJYnHB')
for most in mostdata:
most = most.text.replace(',','')
most = most.text.replace(',','')
data.append(most)
last7dsales = driver.find_elements_by_class_name('fGpHsy.kpsxyE')
for last in last7dsales:
last = last.text.replace(',','')
last7day.append(last)
I didn't check correctness of the rest of your code, but clicking the next button should work now
With Python3 and selenium I want to automate the search on a public information site. In this site it is necessary to enter the name of a person, then select the spelling chosen for that name (without or with accents or name variations), access a page with the list of lawsuits found and in this list you can access the page of each case.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from selenium.webdriver.common.keys import Keys
import time
import re
Name that will be searched
name = 'JOSE ROBERTO ARRUDA'
Create path, search start link, and empty list to store information
firefoxPath="/home/abraji/Documentos/Code/geckodriver"
link = 'https://ww2.stj.jus.br/processo/pesquisa/?aplicacao=processos.ea'
processos = []
Call driver and go to first search page
driver = webdriver.Firefox(executable_path=firefoxPath)
driver.get(link)
Position cursor, fill and click
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#idParteNome'))).click()
time.sleep(1)
driver.find_element_by_xpath('//*[#id="idParteNome"]').send_keys(name)
time.sleep(6)
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#idBotaoPesquisarFormularioExtendido'))).click()
Mark all spelling possibilities for searching
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#idBotaoMarcarTodos'))).click()
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#idBotaoPesquisarMarcados'))).click()
time.sleep(1)
Check how many pages of data there are - to be used in "for range"
capta = driver.find_element_by_xpath('//*[#id="idDivBlocoPaginacaoTopo"]/div/span/span[2]').text
print(capta)
paginas = int(re.search(r'\d+', capta).group(0))
paginas = int(paginas) + 1
print(paginas)
Capture routine
for acumula in range(1, paginas):
# Fill the field with the page number and press enter
driver.find_element_by_xpath('//*[#id="idDivBlocoPaginacaoTopo"]/div/span/span[2]/input').send_keys(acumula)
driver.find_element_by_xpath('//*[#id="idDivBlocoPaginacaoTopo"]/div/span/span[2]/input').send_keys(Keys.RETURN)
time.sleep(2)
# Captures the number of processes found on the current page - qt
qt = driver.find_element_by_xpath('//*[#id="idDivBlocoMensagem"]/div/b').text
qt = int(qt) + 2
print(qt)
# Iterate from found number of processes
for item in range(2, qt):
# Find the XPATH of each process link - start at number 2
vez = '//*[#id="idBlocoInternoLinhasProcesso"]/div[' + str(item) + ']/span[1]/span[1]/span[1]/span[2]/a'
print(vez)
# Access the direct link and click
element = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, vez)))
element.click()
# Run tests to get data
try:
num_unico = driver.find_element_by_xpath('//*[#id="idProcessoDetalhesBloco1"]/div[6]/span[2]/a').text
except NoSuchElementException:
num_unico = "sem_numero_unico"
try:
nome_proc = driver.find_element_by_xpath('//*[#id="idSpanClasseDescricao"]').text
except NoSuchElementException:
nome_proc = "sem_nome_encontrado"
try:
data_autu = driver.find_element_by_xpath('//*[#id="idProcessoDetalhesBloco1"]/div[5]/span[2]').text
except NoSuchElementException:
data_autu = "sem_data_encontrada"
# Fills dictionary and list
dicionario = {"num_unico": num_unico,
"nome_proc": nome_proc,
"data_autu": data_autu
}
processos.append(dicionario)
# Return a page to click on next process
driver.execute_script("window.history.go(-1)")
# Close driver
driver.quit()
In this case I captured the number of link pages (3) and the total number of links (84). So my initial idea was to do the "for" three times and within them split the 84 links
The direct address of each link is in XPATH (//*[#id="idBlocoInternoLinhasProcesso"]/div[41]/span[1]/span[1]/span[1]/span[2]/a) which I replace with the "item" to click
For example, when it arrives at number 42 I have an error because the first page only goes up to 41
My problem is how to go to the second page and then restart only "for" secondary
I think the ideal would be to know the exact number of links on each of the three pages
Anyone have any ideas?
Code below is "Capture routine":
wait = WebDriverWait(driver, 20)
#...
while True:
links = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//span[contains(#class,'classSpanNumeroRegistro')]")))
print("links len", len(links))
for i in range(1, len(links) + 1):
# Access the direct link and click
.until(EC.element_to_be_clickable((By.XPATH, f"(//span[contains(#class,'classSpanNumeroRegistro')])[{i}]//a"))).click()
# Run tests to get data
try:
num_unico = driver.find_element_by_xpath('//*[#id="idProcessoDetalhesBloco1"]/div[6]/span[2]/a').text
except NoSuchElementException:
num_unico = "sem_numero_unico"
try:
nome_proc = driver.find_element_by_xpath('//*[#id="idSpanClasseDescricao"]').text
except NoSuchElementException:
nome_proc = "sem_nome_encontrado"
try:
data_autu = driver.find_element_by_xpath('//*[#id="idProcessoDetalhesBloco1"]/div[5]/span[2]').text
except NoSuchElementException:
data_autu = "sem_data_encontrada"
# Fills dictionary and list
dicionario = {"num_unico": num_unico,
"nome_proc": nome_proc,
"data_autu": data_autu
}
processos.append(dicionario)
# Return a page to click on next process
driver.execute_script("window.history.go(-1)")
# wait.until(EC.presence_of_element_located((By.CLASS_NAME, "classSpanPaginacaoImagensDireita")))
next_page = driver.find_elements_by_css_selector(".classSpanPaginacaoProximaPagina")
if len(next_page) == 0:
break
next_page[0].click()
You can try run the loop until next button is present on the screen. the logic will look like this,
try:
next_page = driver.find_element_by_class_name('classSpanPaginacaoProximaPagina')
if(next_page.is_displayed()):
next_page.click()
except NoSuchElementException:
print('next page does not exists')
I am trying to switch countries programmatically in this site for some automation testing, the prices are different in each country so I am programming a little tool to help me decide where to buy from.
First, I get all the currencies into a list by doing this:
def get_all_countries():
one = WebDriverWait(driver1, 10).until(EC.element_to_be_clickable((By.CLASS_NAME, "selected-currency")))
one.click()
el = WebDriverWait(driver1, 10).until(EC.visibility_of_element_located((By.CLASS_NAME, "site-selector-list")))
list_return = []
a_tags = el.find_elements_by_tag_name('a')
for a in a_tags:
list_return.append(a.text)
return list_return
For example, it returns: ['United Kingdom', 'United States', 'France', 'Deutschland', 'España', 'Australia', 'Россия'] and then, I iterate through the list and each time calling this function:
def set_country(text):
is_change_currency_displayed = driver1.find_element_by_id("siteSelectorList").is_displayed()
if not is_change_currency_displayed: # get_all_countries function leaves dropdown open. Check if it is open before clicking it.
one = WebDriverWait(driver1, 10).until(EC.element_to_be_clickable((By.CLASS_NAME, "selected-currency")))
one.click()
div = WebDriverWait(driver1, 10).until(EC.visibility_of_element_located((By.CLASS_NAME, "site-selector-list")))
a_tags = div.find_elements_by_tag_name('a')
for a in a_tags:
try:
if a.text == text:
driver1.get(a.get_attribute("href"))
except StaleElementReferenceException:
set_country(text)
When comparing a.text to text, I got a StaleElementReferenceException, I read online that it means the object is changed from when I saved it, and a simple solution is to call the function again. However, I don't like this solution and this code a lot, I think it is not effective and takes too much time, any ideas?
EDIT:
def main(url):
driver1.get(url)
to_return_string = ''
one = WebDriverWait(driver1, 10).until(EC.element_to_be_clickable((By.CLASS_NAME, "selected-currency")))
one.click()
el = WebDriverWait(driver1, 10).until(EC.visibility_of_element_located((By.CLASS_NAME, "site-selector-list")))
a_tags = el.find_elements_by_tag_name('a')
for a in a_tags:
atext = a.text
ahref = a.get_attribute('href')
try:
is_change_currency_displayed = driver1.find_element_by_id("siteSelectorList").is_displayed()
if not is_change_currency_displayed: # get_all_countries function leaves dropdown open.
one = WebDriverWait(driver1, 10).until(EC.element_to_be_clickable((By.CLASS_NAME, "selected-currency")))
one.click()
driver1.get(ahref)
current_price = WebDriverWait(driver1, 10).until(
EC.visibility_of_element_located((By.CSS_SELECTOR, ".current-price")))
to_return_string += ("In " + atext + " : " + current_price.text + ' \n')
print("In", atext, ":", current_price.text)
except TimeoutException:
print("In", atext, ":", "Timed out waiting for page to load")
to_return_string += ("In " + atext + " : " + " Timed out waiting for page to load" + ' \n')
return to_return_string
main('http://us.asos.com/asos//prd/7011279')
If I understand the problem statement correctly, Adding break statement solves the problem:
def set_country(text):
is_change_currency_displayed = driver1.find_element_by_id("siteSelectorList").is_displayed()
if not is_change_currency_displayed: # get_all_countries function leaves dropdown open. Check if it is open before clicking it.
one = WebDriverWait(driver1, 10).until(EC.element_to_be_clickable((By.CLASS_NAME, "selected-currency")))
one.click()
div = WebDriverWait(driver1, 10).until(EC.visibility_of_element_located((By.CLASS_NAME, "site-selector-list")))
a_tags = div.find_elements_by_tag_name('a')
for a in a_tags:
try:
if a.text == text:
driver1.get(a.get_attribute("href"))
break
except StaleElementReferenceException:
set_country(text)
DOM is updated once driver.get is called. so, the references related to old page (i.e., a_tags) won't work.
Instead, you should break the loop and come out as soon as the given country page is retrieved using driver.get once the condition is satisfied. So, you set the country you want and no need to iterate over and over again to check if condition, which obviously results in StaleElementReferenceException.
If your stale element is the a tag and not the div, you can iterate over the a tags length and get each element's text through the div:
for i in range(len(div.find_elements_by_tag_name('a')):
if div.find_elements_by_tag_name('a')[i].text == text:
driver1.get(div.find_elements_by_tag_name('a')[i].get_attribute("href"))
That way you can the most recent element from the DOM.
If your stale element is the div then you'll need to verify that the drop down isn't disappearing after your one.click() with hovering it or some other way.
Another approach would be to change your a.text to have a wait:
wait = WebDriverWait(driver, 10, poll_frequency=1, ignored_exceptions=[StaleElementReferenceException])
a = wait.until(EC.text_to_be_present_in_element((By.YourBy)))