Can't locate div element - Selenium - python

I am unable to locate a div element using the css_selector. Please find my code below.
driver = wb.Firefox()
driver.get("https://www.jumia.com.ng/")
driver.maximize_window() #//For maximizing window
driver.implicitly_wait(20) #//gives an implicit wait for 20 seconds
#WebDriverWait(driver,20).until(EC.visibility_of_element_located((By.CLASS_NAME,'star _s')))
#driver.switch_to.frame(driver.find_element_by_class_name('star _s'))
#selecting phones and tablets
clickObj = driver.find_element_by_xpath("/html/body/div[1]/main/div[1]/div[1]/div[1]/div/a[4]/span").click()
#selecting mobile phones only
driver.find_element_by_xpath("/html/body/div[1]/main/div[2]/div[1]/div/article[1]/a[2]").click()
#selecting smartphones only
driver.find_element_by_xpath("/html/body/div[1]/main/div[2]/div[1]/div/article[1]/a[2]").click()
#selecting android phones only
driver.find_element_by_xpath("/html/body/div[1]/main/div[2]/div[1]/div/article[1]/a[1]").click()
product_info = driver.find_elements_by_css_selector("div.info")
product_name = list()
price = list()
rating = list()
for info in product_info:
#print(info.find_elements_by_class_name("rev")
product_name.append(info.find_element_by_css_selector("h3.name").text)
rating.append(info.find_element_by_css_selector("div.rev").text)
price.append(info.find_element_by_css_selector("div.prc").text)
#rating.append(info.find_element_by_class_name("rev").text)
data = {"product_name":product_name, "rating":rating, "price":price}
df_product = pd.DataFrame.from_dict(data)
It returns the below error despite the presence of an element such as "rev":
NoSuchElementException: Message: Unable to locate element: div.rev
This is the link to the site https://www.jumia.com.ng/android-phones/
Please assist. What am I doing wrong?

The issue is due to not considering space between tag name and class name in find_elements_by_css_selector function.
product_info = driver.find_elements_by_css_selector("div .info")
In addition, the type of product_info variable is <class 'list'>. Thus, it is not possible to apply the functions or methods of Selenium library. To fill other lists, you have to use product_info.text and apply for loop for it.
Refer to below code as the sample one:
from selenium import webdriver
driver = webdriver.Firefox()
driver.get("https://www.jumia.com.ng/android-phones/")
driver.maximize_window() #//For maximizing window
#selecting android phones only
product_info = driver.find_elements_by_css_selector("div .info")
for info in product_info:
try:
print(info.text)
except:
break
driver.close()

Related

How to extract all the google reviews from google map

I need to scrap all the google reviews. There are 90,564 reviews in my page. However the code i wrote can scrap only top 9 reviews. The other reviews are not scraped.
The code is given below:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# specify the url of the business page on Google
url = 'https://www.google.com/maps/place/ISKCON+temple+Bangalore/#13.0098328,77.5510964,15z/data=!4m7!3m6!1s0x0:0x7a7fb24a41a6b2b3!8m2!3d13.0098328!4d77.5510964!9m1!1b1'
# create an instance of the Chrome driver
driver = webdriver.Chrome()
# navigate to the specified url
driver.get(url)
# Wait for the reviews to load
wait = WebDriverWait(driver, 20) # increased the waiting time
review_elements = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'wiI7pd')))
# extract the text of each review
reviews = [element.text for element in review_elements]
# print the reviews
print(reviews)
# close the browser
driver.quit()
what should i edit/modify the code to extract all the reviews?
Here is the working code for you after launching the url
totalRev = "div div.fontBodySmall"
username = ".d4r55"
reviews = "wiI7pd"
wait = WebDriverWait(driver, 20)
totalRevCount = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, totalRev))).get_attribute("textContent").split(' ')[0].replace(',','').replace('.','')
print("totalRevCount - ", totalRevCount)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, totalRev))).click()
mydict = {}
found = 0
while found < int(totalRevCount):
review_elements = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, reviews)))
reviewer_names = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, username)))
found = len(mydict)
for rev, name in zip(review_elements, reviewer_names):
mydict[name.text] = rev.text
if len(rev.text) == 0:
found = int(totalRevCount) + 1
break
for i in range(8):
ActionChains(driver).key_down(Keys.ARROW_DOWN).perform()
print("found - ", found)
print(mydict)
time.sleep(2)
Explanation -
Get the locators for user name and review since we are going to create a key-value pair which will be useful in creating a non-duplicate result
You need to first get the total number of reviews/ratings that are present for that given location.
Get the username and review for the "visible" part of the webpage and store it in the dictionary
Scroll down the page and wait a few seconds
Get the username and review again and add them to dictionary. Only new ones will be added
As soon as a review that has no text (only rating), the loop will close and you have your results.
NOTE - If you want all reviews irrespective of the review text present or not, you can remove the "if" loop
I think you'll need to scoll down at first, and the get all the reviews.
scroll_value = 230
driver.execute_script( 'window.scrollBy( 0, '+str(scroll_value)+ ' )' ) # to scroll by value
# to get the current scroll value on the y axis
scroll_Y = driver.execute_script( 'return window.scrollY' )
That might be because the elements don't get loaded elsewise.
Since they are over 90'000, you might consider scolling down a little, then getting the reviews, repeat.
Resource: https://stackoverflow.com/a/74508235/20443541

How to scrape multiple pages from search results all at once

I am trying to scrape multiple pages from search results and print it all at once, but got an empty list instead.
Here is the code I used:
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
element_list = []
for skip in range(0, 20, 10):
page_url = "https://jdih.esdm.go.id/index.php/web/result?tahun_terbit=2022,2021,2020,2019,2018,2017,2016,2015,2014&skip=" + str(skip)
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get(page_url)
Tahun = driver.find_elements(By.CSS_SELECTOR, 'div.numb separator')
No_Peraturan = driver.find_elements(By.CSS_SELECTOR, 'span.result-value')
Nama_Peraturan = driver.find_elements(By.CSS_SELECTOR, 'div.result__content__item__title')
Deskripsi = driver.find_elements(By.CSS_SELECTOR, 'div.result__content__item__desc')
for i in range(len(Tahun)):
element_list.append([Tahun[i].text, No_Peraturan[i].text, Nama_Peraturan[i].text, Deskripsi[i].text])
print(element_list)
driver.close()
The code return only return an empty list like in this picture
enter image description here
Note: the website does not use 'page' as generally use for search results, but uses 'skip' instead
Anyone can help me with this ?
The CSS selector to find Tahun elements is incorrect as there are 2 classes assigned to the div. This results in Tahun being an empty list and since the loop to append text to element_list is based on the length of Tahun, nothing gets appended.
Update the selector to below.
Tahun = driver.find_elements(By.CSS_SELECTOR, 'div.numb.separator')

No data collected when I extract info from a website using xpath

I'd need to extract information from a website. This website has information inside the following path:
<div class="accordion-block__question">
<div class="accordion-block__text">Server</div></div>
...
<div class="block__col"><b>Country</b></div>
Running
try:
# Country
c=driver.find_element_by_xpath("//div[contains(#class,'block__col') and contains(text(),'Country')]").get_attribute('textContent')
country.append(c)
except:
country.append("Error")
I create a df with all errors. I'd interest in all the fields (but for fixing this issue, just one would be great), included the Trustscore (number), but I don't know if it'd possible to get it. I'm using selenium, web driver on Chrome.
The website is https://www.scamadviser.com/check-website.
CODE
This is the entire code:
def scam(df):
chrome_options = webdriver.ChromeOptions()
trust=[]
country = []
isp_country = []
query=df['URL'].unique().tolist()
driver=webdriver.Chrome('mypath',chrome_options=chrome_options))
for x in query:
wait = WebDriverWait(driver, 10)
response=driver.get('https://www.scamadviser.com/check-website/'+x)
try:
wait = WebDriverWait(driver, 30)
# missing trustscore
# Country
c=driver.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", driver.find_element_by_xpath("//div[contains(#class,'block__col') and contains(text(),'Country')]")).get_attribute('innerText')
country.append(c)
# ISP country
ic=driver.find_element_by_xpath("//div[contains(#class,'block__col') and contains(text(),'ISP')]").get_attribute('innerText')
isp_country.append(ic)
except:
# missing trustscore
country.append("Error")
isp_country.append("Error")
# Create dataframe
dict = {'URL': query, 'Trustscore':trust, 'Country': country, 'ISP': isp_country}
df=pd.DataFrame(dict)
driver.quit()
return df
You can try for example with df['URL'] equal to
stackoverflow.com
gitHub.com
You are looking for innerText not textContent.
Code :
try:
# Country
c = driver.find_element_by_xpath("//div[contains(#class,'block__col') and contains(text(),'Country')]").get_attribute('innerText')
print(c)
country.append(c)
except:
country.append("Error")
Updated 1 :
In case already used locator is correct.
driver.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", driver.find_element_by_xpath("//div[contains(#class,'block__col') and contains(text(),'Country')]"))
or may be try with both the options with this xpath :-
//div[contains(#class,'block__col')]/b[text()='Country']
Udpated 2 :
try:
wait = WebDriverWait(driver, 30)
# missing trustscore
# Country
time.sleep(2)
ele = driver.find_element_by_xpath("//div[contains(#class,'block__col')]/b[text()='Country']")
driver.execute_script("arguments[0].scrollIntoView(true);", ele)
country.append(ele.get_attribute('innerText'))
time.sleep(2)
# ISP country
ic = driver.find_element_by_xpath("//div[contains(#class,'block__col')]/b[text()='ISP']")
driver.execute_script("arguments[0].scrollIntoView(true);", ele)
isp_country.append(ic.get_attribute('innerText'))
Udpate 3 :
to get the Company data, Country name.
use this xpath :
//div[text()='Company data']/../following-sibling::div/descendant::b[text()='Country']/../following-sibling::div
also, make sure few things before using this xpath.
Launch browser in full screen mode.
Scroll using js, and then use sroll into view or Actions chain.
Code :-
driver.maximize_window()
time.sleep(2)
driver.execute_script("window.scrollTo(0, 1000)")
time.sleep(2)
driver.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//div[text()='Company data']"))))
# now use the mentioned xpath.
company_data_country_name` = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//div[text()='Company data']/../following-sibling::div/descendant::b[text()='Country']/../following-sibling::div")))
print(company_data_country_name.text)

Screenshot all child elements of element

I need to take a screenshot of all of the div's and p's within a selected element individually, here's what I have so far:
import selenium
from selenium import webdriver
url = 'www.example.com'
driver = webdriver.Firefox()
driver.get(url)
i = 0
body= driver.find_element_by_id('body-text')
for element in body:
i=i+1
image_title = "pic"+str(i)+".jpg"
print("saving"+image_title)
item.screenshot(image_title)
What is the proper way to go by each element individually?
Thank you
for element in body.find_elements_by_xpath(".//p | .//div"):
driver.execute_script("arguments[0].scrollIntoView();", element)
#insert your code
element.screenshot(image_title)
To get all divs and p elements regardless of nesting you can do the following.

looping through a dropdown menu using Selenium and Python

I'm trying to loop through a dropdown menu on at this url: https://www.accuform.com/safety-sign/danger-danger-authorized-personnel-only-MADM006
So, for example, the first dropdown menu - under options - lists out different materials and I want to select each one in turn and then gather some other information from the webpage before moving on to the next material. Here is my current code:
driver = webdriver.Firefox()
driver.get('https://www.accuform.com/safety-sign/danger-danger-authorized-personnel-only-MADM006')
time.sleep(3)
driver.find_element_by_id('x-mark-icon').click()
select = Select(driver.find_element_by_name('Wiqj7mb4rsAq9LB'))
options = select.options
optionsList = []
driver.find_elements_by_class_name('select-wrapper')[0].click()
element = driver.find_element_by_xpath("//select[#name='Wiqj7mb4rsAq9LB']")
actions = ActionChains(driver)
actions.move_to_element(element).perform()
# driver.execute_script("arguments[0].scrollIntoView();", element)
for option in options: #iterate over the options, place attribute value in list
optionsList.append(option.get_attribute("value"))
for optionValue in optionsList:
print("starting loop on option %s" % optionValue)
# select = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//select[#name='Wiqj7mb4rsAq9LB']")))
# select = Select(select)
select.select_by_value(optionValue)
I started with just the loop, but got this error:
ElementNotInteractableException: Message: Element <option> could not be scrolled into view
I then added the webdriverwait and get a TimeoutException error.
I then realized I should probably click on the wrapper in which the dropdown is held, so I added the click, which does pup up the menu, but I still got the TimeoutException.
So I thought, maybe I should move to the element, which I tried with the action chain lines and I got this error
WebDriverException: Message: TypeError: rect is undefined
I tried to avoid that error by using this code instead:
# driver.execute_script("arguments[0].scrollIntoView();", element)
Which just resulted in the timeoutexception again.
I pretty new to Python and Selenium and have basically just been modifying code from SO answers to similar questions, but nothing has worked.
I'm using python 3.6 and the current versions of Selenium and firefox webdriver.
If anything is unclear or if you need more info just let me know.
Thanks so much!
EDIT: Based on the answer and comments by Kajal Kunda, I've updated my code to the following:
`material_dropdown = driver.find_element_by_xpath("//input[#class='select-
dropdown']")
driver.execute_script("arguments[0].click();", material_dropdown)
materials=driver.find_elements_by_css_selector("div.select-wrapper
ul.dropdown-content li")
for material in materials:
# material_dropdown =
driver.find_element_by_xpath("//input[#class='select-dropdown']")
# driver.execute_script("arguments[0].click();", material_dropdown)
# materials=driver.find_elements_by_css_selector("div.select-wrapper ul.dropdown-content li")
material_ele=material.find_element_by_tag_name('span')
if material_ele.text!='':
material_ele.click()
time.sleep(5)
price = driver.find_element_by_class_name("dataPriceDisplay")
print(price.text)`
The result is that it successfully prints the price for the first type of material, but then it returns:
StaleElementReferenceException: Message: The element reference of <li class=""> is stale;...
I've tried variations of having the hashed out lines in and outside of the loop, but always get a version of the StaleElementReferenceException error.
Any suggestions?
Thanks!
You could do the whole thing with requests. Grab the drop down list from the options listed in drop down then concatenate the value attributes into requests url that retrieves json containing all the info on the page. Same principle applies for adding in other dropdown values. The ids for each drop down selection are the value attributes of the options in the drop down and appear in the url I show separated by // for each drop down selection.
import requests
from bs4 import BeautifulSoup as bs
url = 'https://www.accuform.com/product/getSku/danger-danger-authorized-personnel-only-MADM006/1/false/null//{}//WHFIw3xXmQx8zlz//6wr93DdrFo5JV//WdnO0RpwKpc4fGF'
startURL = 'https://www.accuform.com/safety-sign/danger-danger-authorized-personnel-only-MADM006'
res = requests.get(startURL)
soup = bs(res.content, 'lxml')
materials = [item['value'] for item in soup.select('#Wiqj7mb4rsAq9LB option')]
sizes = [item['value'] for item in soup.select('#WvXESrTyQjM3Ciw option')]
languages = [item['value'] for item in soup.select('#WUYWGMePtpmpmhy option')]
units = [item['value'] for item in soup.select('#W91eqaJ0WPXwe9b option')]
for material in materials:
data = requests.get(url.format(material)).json()
soup = bs(data['dataMaterialBullets'], 'lxml')
lines = [item.text for item in soup.select('li')]
print(lines)
print(data['dataPriceDisplay'])
# etc......
Sample of JSON:
Try the below code.It should work.
driver = webdriver.Firefox()
driver.get('https://www.accuform.com/safety-sign/danger-danger-authorized-personnel-only-MADM006')
time.sleep(3)
driver.find_element_by_id('x-mark-icon').click()
material_dropdown = driver.find_element_by_xpath("//input[#class='select-dropdown']")
driver.execute_script("arguments[0].click();", material_dropdown)
#Code for material dropdown
materials=driver.find_elements_by_css_selector("div.select-wrapper ul.dropdown-content li")
material_optionsList = []
for material in materials:
material_ele=material.find_element_by_tag_name('span')
if material_ele.text!='':
material_optionsList.append(material_ele.text)
print(material_optionsList)
driver.execute_script("arguments[0].click();", material_dropdown)
size_dropdown = driver.find_element_by_xpath("(//input[#class='select-dropdown'])[2]")
driver.execute_script("arguments[0].click();", size_dropdown)
#Code for size dropdown
Sizes=driver.find_elements_by_css_selector("div.select-wrapper ul.dropdown-content li")
size_optionsList = []
for size in Sizes:
size_ele=size.find_element_by_tag_name('span')
if size_ele.text!='':
size_optionsList.append(size_ele.text)
driver.execute_script("arguments[0].click();", size_dropdown)
Output :
[u'Adhesive Vinyl', u'Plastic', u'Adhesive Dura-Vinyl', u'Aluminum', u'Dura-Plastic\u2122', u'Aluma-Lite\u2122', u'Dura-Fiberglass\u2122', u'Accu-Shield\u2122']
Hope you will do the remaining.Let me know if it works for you.
EDIT Code for loop through and get the price value of materials.
for material in range(len(materials)):
material_ele=materials[material]
if material_ele.text!='':
#material_optionsList.append(material_ele.text)
#material_ele.click()
driver.execute_script("arguments[0].click();", material_ele)
time.sleep(2)
price = driver.find_element_by_id("priceDisplay")
print( price.text)
time.sleep(2)
material_dropdown = driver.find_element_by_xpath("//input[#class='select-dropdown']")
driver.execute_script("arguments[0].click();", material_dropdown)
materials = driver.find_elements_by_css_selector("div.select-wrapper ul.dropdown-content li")
material+=2
Output :
$8.31
$9.06
$13.22
$15.91
$15.91

Categories

Resources