I have a website for which I want to download excel files. (https://www.rivm.nl/media/smap/eenzaamheid.html)
First I want to click on the region and then perform the download. This I have working.
wijk_keuze = WebDriverWait(driver2nd,20).until(EC.element_to_be_clickable((By.XPATH, "//div[#class='highcharts-container ']//*[name()='svg']//*[name()='g']//*[name()='path']")))
wijk_keuze.click()
download = WebDriverWait(driver2nd, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[#class='highcharts-container ']//*[name()='svg']//*[name()='g' and #aria-label='View export menu']//*[name()='rect']")))
download.click()
WebDriverWait(download, 10).until(EC.visibility_of_element_located((By.XPATH, "//div[#class='highcharts-menu']//*[contains(text(),'XLS downloaden')]"))).click()
time.sleep(2)
above code selects the first region in the parent element and then downloads the excel.
What I want to do is loop through each element in the parent element. How would I go about doing so?
The parent element looks as follows:
<g transform="transform(0,0), scale(1,1)" animator="1">
my entire code:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
#defining URL
url='https://www.rivm.nl/media/smap/eenzaamheid.html'
#defining driver
driver = webdriver.PhantomJS(r'./phantomjs-2.1.1-windows/bin/phantomjs')
options = webdriver.ChromeOptions()
options.add_argument("start-maximized");
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(options=options, executable_path = r'./chromedriver_win32/chromedriver')
driver.get(url)
# Gemeentes
Detail_keuze = Select(driver.find_element_by_id("detail"))
options = Detail_keuze.options
Indicator_keuze = Select(driver.find_element_by_id("indicator"))
indicator_options = Indicator_keuze.options
for index in range(0, len(indicator_options) ):
#defining URL
url='https://www.rivm.nl/media/smap/eenzaamheid.html'
#defining driver
driver2nd = webdriver.PhantomJS(r'./phantomjs-2.1.1-windows/bin/phantomjs')
options = webdriver.ChromeOptions()
options.add_argument("start-maximized");
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")
options.add_experimental_option("prefs", {
"download.default_directory": r"MY_PATH",
"download.prompt_for_download": False,
"download.directory_upgrade": True,
"safebrowsing.enabled": True
})
driver2nd = webdriver.Chrome(options=options, executable_path = r'./chromedriver_win32/chromedriver')
driver2nd.get(url)
# Gemeentes
Detail_keuze = Select(driver2nd.find_element_by_id("detail"))
options = Detail_keuze.options
Indicator_keuze = Select(driver2nd.find_element_by_id("indicator"))
indicator_options = Indicator_keuze.options
time.sleep(1)
Indicator_keuze.select_by_index(index)
wijk_keuze = WebDriverWait(driver2nd,20).until(EC.element_to_be_clickable((By.XPATH, "//div[#class='highcharts-container ']//*[name()='svg']//*[name()='g']//*[name()='path']")))
wijk_keuze.click()
download = WebDriverWait(driver2nd, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[#class='highcharts-container ']//*[name()='svg']//*[name()='g' and #aria-label='View export menu']//*[name()='rect']")))
download.click()
WebDriverWait(download, 10).until(EC.visibility_of_element_located((By.XPATH, "//div[#class='highcharts-menu']//*[contains(text(),'XLS downloaden')]"))).click()
time.sleep(2)
######## HERE I WANT TO LOOP THROUGH EACH AND EVERY REGION
driver2nd.close()
As you can see I also want to loop through eachh and every indicator. This works. Now I want to add a loop through each and every region. I have it working so that I can click on the first region.
You don't have to click on the options. You can get the details by changing the url 'https://www.rivm.nl/media/smap/{indicator}?detail={detail}'
Just add the logic for downloading it.
Try:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
from itertools import product
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(ChromeDriverManager().install())#, chrome_options=chrome_options)
driver.set_window_size(1024, 600)
driver.maximize_window()
driver.get('https://www.rivm.nl/media/smap/eenzaamheid.html')
time.sleep(5)
soup = BeautifulSoup(driver.page_source, 'html.parser')
ind = soup.find('select', attrs = {'name': 'indicator'} )
indicators = [i['value'] for i in ind.findAll('option')]
det = soup.find('select', attrs = {'name': 'detail'})
details = [i['value'] for i in det.findAll('option')]
for detail, indicator in list(product(details, indicators)):
print(indicator, detail)
new_url = f'https://www.rivm.nl/media/smap/{indicator}?detail={detail}'
driver.get(new_url)
# Write code for downloading it
Related
I'm trying to extract data from a website. I need to enter the value in the search box and then find the details. it will generate a table. After generating the table, need to write the details to the text file or insert them into a database. I'm trying the following things.
Website: https://commtech.byu.edu/noauth/classSchedule/index.php
Search text: "C S 142"
Sample Code
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
c_options = Options()
c_options.add_experimental_option("detach", True)
s = Service('C:/Users/sidat/OneDrive/Desktop/python/WebDriver/chromedriver.exe')
URL = "http://saasta.byu.edu/noauth/classSchedule/index.php"
driver = webdriver.Chrome(service=s, options=c_options)
driver.get(URL)
element = driver.find_element("id", "searchBar")
element.send_keys("C S 142", Keys.RETURN)
search_button = driver.find_element("id", "searchBtn")
search_button.click()
table = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//*[#id='sectionTable']")))
rows = table.find_elements("xpath", "//tr")
for row in rows:
cells = row.find_elements(By.TAG_NAME, "td")
for cell in cells:
print(cell.text)
I'm using PyCharm 2022.3 to code and test the result. There is nothing printing with my code. Please help me to solve this problem with to extract data to a text file and to an SQL database table.
The following code prints the content of the table you asked for.
You need to wait for elements to be clickable in case you going to click them or send them a text or to wait for visibility in case you want to read their text content.
from selenium import webdriver
from selenium.webdriver import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 30)
url = "http://saasta.byu.edu/noauth/classSchedule/index.php"
driver.get(url)
wait.until(EC.element_to_be_clickable((By.ID, "searchBar"))).send_keys("C S 142", Keys.RETURN)
wait.until(EC.element_to_be_clickable((By.ID, "searchBtn"))).click()
table = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, "//*[#id='sectionTable']")))
headers = table.find_elements("xpath", ".//thead//th")
cells = table.find_elements("xpath", ".//tbody//td")
headers_text = ""
for header in headers:
cell_text = header.text
headers_text = headers_text + cell_text.ljust(10)
cells_text = ""
for cell in cells:
c_text = cell.text
cells_text = cells_text + c_text.ljust(10)
print(headers_text)
print(cells_text)
The output is:
Section Type Mode InstructorCredits Term Days Start End Location Available Waitlist
002 DAY Classroom 3.00 TBA 0/0 0
Try this:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
c_options = Options()
c_options.add_experimental_option("detach", True)
s = Service('C:/Users/sidat/OneDrive/Desktop/python/WebDriver/chromedriver.exe')
driver = webdriver.Chrome()
URL = "http://saasta.byu.edu/noauth/classSchedule/index.php"
driver.get(URL)
driver.maximize_window()
element = driver.find_element("id", "searchBar")
element.send_keys("C S 142", Keys.RETURN)
search_button = driver.find_element("id", "searchBtn")
search_button.click()
header = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//table[#id='sectionTable']/thead/tr/th")))
for th in header:
print(f"{th.get_attribute('textContent')}")
rows = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//table[#id='sectionTable']/tbody/tr")))
for i in range(0, len(rows)):
cells = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, f"(//table[#id='sectionTable']/tbody/tr)[{i+1}]//td")))
for cell in cells:
print(cell.get_attribute('textContent'))
You are waiting for the table, which is correct, but the table is fully loaded (the td are not loaded yet).
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//*[#id='sectionTable']//td")))
Then you wait at least for having any content into td element
I'm using selenium 4.7.2 and can't find the element by its name. The following code returns NoSuchElementException error:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
# Get the website using the Chrome webbdriver
browser = webdriver.Chrome()
browser.get('https://www.woofshack.com/en/cloud-chaser-waterproof-softshell-dog-jacket-ruffwear-rw-5102.html')
# Print out the result
price = browser.find_element(By.NAME, 'data-price-665')
print("Price: " + price.text)
# Close the browser
time.sleep(3)
browser.close()
What's wrong in using find_element method?
Looks like you are using a wrong locator here. I see no element with name attribute value 'data-price-665' on that page.
The following code is working:
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)
wait = WebDriverWait(driver, 20)
actions = ActionChains(driver)
url = "https://www.woofshack.com/en/cloud-chaser-waterproof-softshell-dog-jacket-ruffwear-rw-5102.html"
driver.get(url)
price = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "#product-price-665 .price")))
print("Price: " + price.text)
The output is:
Price: €112.95
I am not getting price they give me empty output this is page link https://www.amazon.com/dp/B00M0DWQYI?th=1
from selenium import webdriver
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.select import Select
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
url='https://www.amazon.com/dp/B00M0DWQYI?th=1'
PATH="C:\Program Files (x86)\chromedriver.exe"
driver =webdriver.Chrome(PATH)
driver.get(url)
item=dict()
try:
item['price'] = driver.find_element(By.XPATH, "//div[#id='corePrice_feature_div'] //span[#class='a-offscreen']").text
except:
item['price']=''
print(item)
You may want to wait for that element to properly load, prior to locating it:
[...]
wait = WebDriverWait(driver, 10)
item['price'] = wait.until(EC.element_to_be_clickable((By.XPATH, "//div[#id='corePrice_feature_div']//span[#class='a-offscreen']"))).text
Selenium documentation can be found at https://www.selenium.dev/documentation/
EDIT: Here is a complete example of how you can get that information:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import time as t
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument('disable-notifications')
chrome_options.add_argument("window-size=1920,1080")
webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
wait = WebDriverWait(driver, 5)
items = dict()
driver.get('https://www.amazon.com/dp/B00M0DWQYI?th=1')
t.sleep(1)
driver.refresh()
items['price'] = wait.until(EC.element_to_be_clickable((By.XPATH, '//div[#id="corePrice_feature_div"]//span[#class="a-price aok-align-center"]'))).text.replace('\n', '.')
print(items)
Result in terminal:
{'price': '$32.98'}
You need to wait for element visibility and then to extract it's text.
The following Selenium code works:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)
url = 'https://www.amazon.com/dp/B00M0DWQYI'
driver.get(url)
wait = WebDriverWait(driver, 10)
print(wait.until(EC.visibility_of_element_located((By.XPATH, "//div[#id='corePrice_feature_div']"))).text)
The output is
$32
98
You can use bs4 and it will work fine
from bs4 import BeautifulSoup
soup = BeautifulSoup(driver.page_source, 'lxml')
try:
item['price'] = soup.find('input', id="attach-base-product-price").get('value')
except:
item['price'] = ''
finally:
driver.close()
driver.quit()
print(item)
I want to click on the second link in result in google search area using selenium-web-driver
( Need a method that suits for any google search result )
example page
This is my code, How can I modify the if statement
import speech_recognition as sr
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option("detach", True)
global driver
driver = webdriver.Chrome('C:\Windows\chromedriver.exe', options=chrome_options)
chrome_options.add_argument("--start-maximized")
wait = WebDriverWait(driver, 10)
def google(text):
if "first link" in text:
RESULTS_LOCATOR = "//div/h3/a"
WebDriverWait(driver, 10).until(
EC.visibility_of_element_located((By.XPATH, RESULTS_LOCATOR)))
page1_results = driver.find_elements(By.XPATH, RESULTS_LOCATOR)
for item in page1_results:
print(item.text)
# driver.find_element_by_class_name().click()
else:
ggwp=text.replace(" ", "")
driver.get("https://www.google.com")
driver.find_element_by_xpath('//*[#id="tsf"]/div[2]/div[1]/div[1]/div/div[2]/input').send_keys(ggwp)
driver.find_element_by_xpath('//*[#id="tsf"]/div[2]/div[1]/div[3]/center/input[1]').send_keys(Keys.ENTER)
Second link can by placed in different div-s.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import os
browser = webdriver.Chrome(executable_path=os.path.abspath(os.getcwd()) + "/chromedriver")
link = 'http://www.google.com'
browser.get(link)
# search keys
search = browser.find_element_by_name('q')
search.send_keys("python")
search.send_keys(Keys.RETURN)
# click second link
for i in range(10):
try:
browser.find_element_by_xpath('//*[#id="rso"]/div['+str(i)+']/div/div[2]/div/div/div[1]/a').click()
break
except:
pass
I have this code , it opens chrome, but it doesn't want to continue with the code. Don't really know how to fix the issue. I do NOT want it to open selenium webdriver, want it to open my own local chrome path, I want at the same time to make the script to read read elements and print the values.
import names, time, random
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support import expected_conditions as EC
def AccGen():
while True:
# *************Static***************
prefs = {"profile.managed_default_content_settings.images": 1}
options = Options()
# options.add_argument('--disable-gpu')
# options.add_argument("--disable-extensions")
# options.add_argument('--disable-notifications')
options.add_experimental_option("prefs", prefs)
options.add_argument("--window-size=1600,900")
browser = webdriver.Chrome(executable_path='C:/Users/Jonathan/AppData/Local/Google/Chrome/Application/Chrome.exe',chrome_options=options)
browser.implicitly_wait(10)
# ------------------------------------
# Access to site
browser.get(
"https://accounts.google.com/SignUp?service=mail&continue=https%3A%2F%2Fmail.google.com%2Fmail%2F<mpl=default"
)
###################################################################
firstName = names.get_first_name()
lastName = names.get_last_name()
email = '{}.{}{}'.format(firstName, lastName, random.randint(1000, 9999))
password = '2001jl00'
###################################################################
# Write in random Name
WebDriverWait(browser, 20).until(
EC.visibility_of_element_located(
(By.XPATH, '//*[#id="firstName"]'))).send_keys(firstName)
https://mystb.in/vevivuneku.coffeescript