I need to take a screenshot of all of the div's and p's within a selected element individually, here's what I have so far:
import selenium
from selenium import webdriver
url = 'www.example.com'
driver = webdriver.Firefox()
driver.get(url)
i = 0
body= driver.find_element_by_id('body-text')
for element in body:
i=i+1
image_title = "pic"+str(i)+".jpg"
print("saving"+image_title)
item.screenshot(image_title)
What is the proper way to go by each element individually?
Thank you
for element in body.find_elements_by_xpath(".//p | .//div"):
driver.execute_script("arguments[0].scrollIntoView();", element)
#insert your code
element.screenshot(image_title)
To get all divs and p elements regardless of nesting you can do the following.
Related
I am trying to scrape multiple pages from search results and print it all at once, but got an empty list instead.
Here is the code I used:
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
element_list = []
for skip in range(0, 20, 10):
page_url = "https://jdih.esdm.go.id/index.php/web/result?tahun_terbit=2022,2021,2020,2019,2018,2017,2016,2015,2014&skip=" + str(skip)
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get(page_url)
Tahun = driver.find_elements(By.CSS_SELECTOR, 'div.numb separator')
No_Peraturan = driver.find_elements(By.CSS_SELECTOR, 'span.result-value')
Nama_Peraturan = driver.find_elements(By.CSS_SELECTOR, 'div.result__content__item__title')
Deskripsi = driver.find_elements(By.CSS_SELECTOR, 'div.result__content__item__desc')
for i in range(len(Tahun)):
element_list.append([Tahun[i].text, No_Peraturan[i].text, Nama_Peraturan[i].text, Deskripsi[i].text])
print(element_list)
driver.close()
The code return only return an empty list like in this picture
enter image description here
Note: the website does not use 'page' as generally use for search results, but uses 'skip' instead
Anyone can help me with this ?
The CSS selector to find Tahun elements is incorrect as there are 2 classes assigned to the div. This results in Tahun being an empty list and since the loop to append text to element_list is based on the length of Tahun, nothing gets appended.
Update the selector to below.
Tahun = driver.find_elements(By.CSS_SELECTOR, 'div.numb.separator')
There is a date selection on the site I need.
I need the program to click on the current day.
Fortunately, he has a different class class="ui-state-default ui-state-highlight"
The choice of dates is divided into 5 identical blocks, in which there are 7 more identical blocks with days.
I need to find the current one among all the days, When I start going through all the days to see if there is a line I need there, I get an error message.
My full code:
from selenium import webdriver
from selenium.webdriver.common.by import By
driver = webdriver.Firefox()
driver.get('**site**')
element = driver.find_element(By.XPATH, "//div[#class='ait-schedlfind-datepickers']")
option = element.find_element(By.XPATH, "//input[#class='ait-schedlfind-cal1 hasDatepicker']")
option.click()
element = driver.find_element(By.XPATH, "//div[#class='ui-datepicker ui-widget ui-widget-content ui-helper-clearfix ui-corner-all ait-schedl-picker']")
table = element.find_element(By.XPATH, "//table[#class='ui-datepicker-calendar']")
tr = table.find_elements(By.TAG_NAME, "tr")
for v in tr:
td = v.find_elements(By.TAG_NAME, "td")
for k in td:
currect_day = k.find_elements(By.XPATH, "//a[#class='ui-state-default ui-state-highlight']")
I don't think you need a loop to select the element you need. Everything after option.click() can be replaced with:
correct_day = driver.find_element(By.XPATH, "//*[contains(#class, 'ui-state-highlight')]")
# You can check that it's the correct day with
# print(correct_day.text)
I managed to extract the names, specs, prices, and priceUnits from the products on this page: https://www.bauhaus.info/baustoffe/c/10000819.
I do, however, only manage to get the first 36 products visible on the page. How would I extract all the products on this page that appear when pressing on the button for "more items"?
For this, see the inspection of the page here:
see inspect here
Any help is very much appreciated!
This is my code:
from selenium import webdriver
import pandas as pd
import re
browser = webdriver.Chrome(r'C:\Users\KristerJens\Downloads\chromedriver_win32\chromedriver')
browser.get('https://www.bauhaus.info/baustoffe/c/10000819')
names= []
specs = []
prices = []
priceUnit = []
for li in browser.find_elements_by_xpath("//ul[#class='product-list-tiles row list-unstyled']/li"):
names.append(li.find_element_by_class_name("product-list-tile__info__name").text)
specs.append(li.find_element_by_class_name("product-list-tile__info__attributes").text)
prices.append(li.find_element_by_class_name("price-tag__box").text.split('\n')[0] + "€")
p = li.find_element_by_class_name("price-tag__sales-unit").text.split('\n')[0]
priceUnit.append(p[p.find("(")+1:p.find(")")])
df2 = pd.DataFrame()
df2['names'] = names
df2['specs'] = specs
df2['prices'] = prices
df2['priceUnit'] = priceUnit
Was able to click on More option continuously with below code. Try to incorporate this with your code.
# Imports Required for Explicit Waits:
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver.get("https://www.bauhaus.info/baustoffe/c/10000819")
wait = WebDriverWait(driver,30)
options = driver.find_elements_by_xpath("//ul[#class='product-list-tiles row list-unstyled']/li")
print(len(options))
#Using `Count` variable to keep track of number of times of clicking on More option. Remove the `Count` part of the code to continuously click on More option.
count = 0
try:
while True:
if count > 5: # Click on "More" option only for 5 times
break
moreoption = wait.until(EC.element_to_be_clickable((By.XPATH,"//button[#data-message='adb-show-more-products-button']")))
driver.execute_script("arguments[0].scrollIntoView(true);",moreoption)
driver.execute_script("window.scrollBy(0,-300);")
time.sleep(2)
moreoption.click()
count += 1
time.sleep(2)
options = driver.find_elements_by_xpath("//ul[#class='product-list-tiles row list-unstyled']/li")
print(len(options))
except:
pass
firstly try to click on "More Products" button until it gets disabled i.e all products gets listed down and then use the common xpath for locating product info.
For each page add scroll to element more items and click it, see below example of scroll to element implementation
from selenium.webdriver.common.action_chains import ActionChains
element = driver.find_element_by_id("more_items")
actions = ActionChains(driver)
actions.move_to_element(element).perform()
I'm trying to loop through a dropdown menu on at this url: https://www.accuform.com/safety-sign/danger-danger-authorized-personnel-only-MADM006
So, for example, the first dropdown menu - under options - lists out different materials and I want to select each one in turn and then gather some other information from the webpage before moving on to the next material. Here is my current code:
driver = webdriver.Firefox()
driver.get('https://www.accuform.com/safety-sign/danger-danger-authorized-personnel-only-MADM006')
time.sleep(3)
driver.find_element_by_id('x-mark-icon').click()
select = Select(driver.find_element_by_name('Wiqj7mb4rsAq9LB'))
options = select.options
optionsList = []
driver.find_elements_by_class_name('select-wrapper')[0].click()
element = driver.find_element_by_xpath("//select[#name='Wiqj7mb4rsAq9LB']")
actions = ActionChains(driver)
actions.move_to_element(element).perform()
# driver.execute_script("arguments[0].scrollIntoView();", element)
for option in options: #iterate over the options, place attribute value in list
optionsList.append(option.get_attribute("value"))
for optionValue in optionsList:
print("starting loop on option %s" % optionValue)
# select = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//select[#name='Wiqj7mb4rsAq9LB']")))
# select = Select(select)
select.select_by_value(optionValue)
I started with just the loop, but got this error:
ElementNotInteractableException: Message: Element <option> could not be scrolled into view
I then added the webdriverwait and get a TimeoutException error.
I then realized I should probably click on the wrapper in which the dropdown is held, so I added the click, which does pup up the menu, but I still got the TimeoutException.
So I thought, maybe I should move to the element, which I tried with the action chain lines and I got this error
WebDriverException: Message: TypeError: rect is undefined
I tried to avoid that error by using this code instead:
# driver.execute_script("arguments[0].scrollIntoView();", element)
Which just resulted in the timeoutexception again.
I pretty new to Python and Selenium and have basically just been modifying code from SO answers to similar questions, but nothing has worked.
I'm using python 3.6 and the current versions of Selenium and firefox webdriver.
If anything is unclear or if you need more info just let me know.
Thanks so much!
EDIT: Based on the answer and comments by Kajal Kunda, I've updated my code to the following:
`material_dropdown = driver.find_element_by_xpath("//input[#class='select-
dropdown']")
driver.execute_script("arguments[0].click();", material_dropdown)
materials=driver.find_elements_by_css_selector("div.select-wrapper
ul.dropdown-content li")
for material in materials:
# material_dropdown =
driver.find_element_by_xpath("//input[#class='select-dropdown']")
# driver.execute_script("arguments[0].click();", material_dropdown)
# materials=driver.find_elements_by_css_selector("div.select-wrapper ul.dropdown-content li")
material_ele=material.find_element_by_tag_name('span')
if material_ele.text!='':
material_ele.click()
time.sleep(5)
price = driver.find_element_by_class_name("dataPriceDisplay")
print(price.text)`
The result is that it successfully prints the price for the first type of material, but then it returns:
StaleElementReferenceException: Message: The element reference of <li class=""> is stale;...
I've tried variations of having the hashed out lines in and outside of the loop, but always get a version of the StaleElementReferenceException error.
Any suggestions?
Thanks!
You could do the whole thing with requests. Grab the drop down list from the options listed in drop down then concatenate the value attributes into requests url that retrieves json containing all the info on the page. Same principle applies for adding in other dropdown values. The ids for each drop down selection are the value attributes of the options in the drop down and appear in the url I show separated by // for each drop down selection.
import requests
from bs4 import BeautifulSoup as bs
url = 'https://www.accuform.com/product/getSku/danger-danger-authorized-personnel-only-MADM006/1/false/null//{}//WHFIw3xXmQx8zlz//6wr93DdrFo5JV//WdnO0RpwKpc4fGF'
startURL = 'https://www.accuform.com/safety-sign/danger-danger-authorized-personnel-only-MADM006'
res = requests.get(startURL)
soup = bs(res.content, 'lxml')
materials = [item['value'] for item in soup.select('#Wiqj7mb4rsAq9LB option')]
sizes = [item['value'] for item in soup.select('#WvXESrTyQjM3Ciw option')]
languages = [item['value'] for item in soup.select('#WUYWGMePtpmpmhy option')]
units = [item['value'] for item in soup.select('#W91eqaJ0WPXwe9b option')]
for material in materials:
data = requests.get(url.format(material)).json()
soup = bs(data['dataMaterialBullets'], 'lxml')
lines = [item.text for item in soup.select('li')]
print(lines)
print(data['dataPriceDisplay'])
# etc......
Sample of JSON:
Try the below code.It should work.
driver = webdriver.Firefox()
driver.get('https://www.accuform.com/safety-sign/danger-danger-authorized-personnel-only-MADM006')
time.sleep(3)
driver.find_element_by_id('x-mark-icon').click()
material_dropdown = driver.find_element_by_xpath("//input[#class='select-dropdown']")
driver.execute_script("arguments[0].click();", material_dropdown)
#Code for material dropdown
materials=driver.find_elements_by_css_selector("div.select-wrapper ul.dropdown-content li")
material_optionsList = []
for material in materials:
material_ele=material.find_element_by_tag_name('span')
if material_ele.text!='':
material_optionsList.append(material_ele.text)
print(material_optionsList)
driver.execute_script("arguments[0].click();", material_dropdown)
size_dropdown = driver.find_element_by_xpath("(//input[#class='select-dropdown'])[2]")
driver.execute_script("arguments[0].click();", size_dropdown)
#Code for size dropdown
Sizes=driver.find_elements_by_css_selector("div.select-wrapper ul.dropdown-content li")
size_optionsList = []
for size in Sizes:
size_ele=size.find_element_by_tag_name('span')
if size_ele.text!='':
size_optionsList.append(size_ele.text)
driver.execute_script("arguments[0].click();", size_dropdown)
Output :
[u'Adhesive Vinyl', u'Plastic', u'Adhesive Dura-Vinyl', u'Aluminum', u'Dura-Plastic\u2122', u'Aluma-Lite\u2122', u'Dura-Fiberglass\u2122', u'Accu-Shield\u2122']
Hope you will do the remaining.Let me know if it works for you.
EDIT Code for loop through and get the price value of materials.
for material in range(len(materials)):
material_ele=materials[material]
if material_ele.text!='':
#material_optionsList.append(material_ele.text)
#material_ele.click()
driver.execute_script("arguments[0].click();", material_ele)
time.sleep(2)
price = driver.find_element_by_id("priceDisplay")
print( price.text)
time.sleep(2)
material_dropdown = driver.find_element_by_xpath("//input[#class='select-dropdown']")
driver.execute_script("arguments[0].click();", material_dropdown)
materials = driver.find_elements_by_css_selector("div.select-wrapper ul.dropdown-content li")
material+=2
Output :
$8.31
$9.06
$13.22
$15.91
$15.91
I have a python script using selenium to go to a given Instagram profile and iterate over the user's followers. On the instagram website when one clicks to see the list of followers, a pop-up opens with the accounts listed (here's a screenshot of the site)
However both visually and in the html, only 12 accounts are shown. In order to see more one has to scroll down, so I tried doing this with the Keys.PAGE_DOWN input.
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import time
...
username = 'Username'
password = 'Password'
message = 'blahblah'
tryTime = 2
#create driver and log in
driver = webdriver.Chrome()
logIn(driver, username, password, tryTime)
#gets rid of preference pop-up
a = driver.find_elements_by_class_name("HoLwm")
a[0].click()
#go to profile
driver.get("https://www.instagram.com/{}/".format(username))
#go to followers list
followers = driver.find_element_by_xpath("//a[#href='/{}/followers/']".format(username))
followers.click()
time.sleep(tryTime)
#find all li elements in list
fBody = driver.find_element_by_xpath("//div[#role='dialog']")
fBody.send_keys(Keys.PAGE_DOWN)
fList = fBody.find_elements_by_tag("li")
print("fList len is {}".format(len(fList)))
time.sleep(tryTime)
print("ended")
driver.quit()
When I try to run this I get the following error:
Message: unknown error: cannot focus element
I know this is probably because I'm using the wrong element for fBody, but I don't know which would be the right one. Does anybody know which element I should send the PAGE_DOWN key to, or if there is another way to load the accounts?
Any help is much appreciated!
the element you're looking is //div[#class='isgrP'] and Keys.PAGE_DOWN is not work for scrollable div.
Your variable fList hold old value, you need to find again the elements after scroll.
#find all li elements in list
fBody = driver.find_element_by_xpath("//div[#class='isgrP']")
scroll = 0
while scroll < 5: # scroll 5 times
driver.execute_script('arguments[0].scrollTop = arguments[0].scrollTop + arguments[0].offsetHeight;', fBody)
time.sleep(tryTime)
scroll += 1
fList = driver.find_elements_by_xpath("//div[#class='isgrP']//li")
print("fList len is {}".format(len(fList)))
print("ended")
#driver.quit()
The above code works fine if you add iteration (for) with range
for i in range(1, 4):
try:
#find all li elements in list
fBody = self.driver.find_element_by_xpath("//div[#class='isgrP']")
scroll = 0
while scroll < 5: # scroll 5 times
self.driver.execute_script('arguments[0].scrollTop = arguments[0].scrollTop + arguments[0].offsetHeight;', fBody)
time.sleep(2)
scroll += 1
fList = self.driver.find_elements_by_xpath("//div[#class='isgrP']//li")
print("fList len is {}".format(len(fList)))
except Exception as e:
print(e, "canot scrol")
try:
#get tags with a
hrefs_in_view = self.driver.find_elements_by_tag_name('a')
# finding relevant hrefs
hrefs_in_view = [elem.get_attribute('title') for elem in hrefs_in_view]
[pic_hrefs.append(title) for title in hrefs_in_view if title not in pic_hrefs]
print("Check: pic href length " + str(len(pic_hrefs)))
except Exception as tag:
print(tag, "can not find tag")
So, the for loop makes it to possible scrol even if the while loop miss