Is there a way we can click on the top checkbox highlighted and get all files downloaded by clicking the download button at the bottom? This way we can avoid going through all the files.
existing code:
# import statements
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
#step-1:to get list of files:
data_url_page = requests.get("https://mft.rrc.texas.gov/link/0e33071d-9891-4009-970c-5aa26da5e31a", verify=False)
files_list = []
soup = bs(data_url_page.content, "lxml")
for table_div in soup.find_all('div', class_='ui-datatable-tablewrapper'):
files_td = table_div.find_all('td', class_='NameColumn')
for file_name in files_td:
if file_name:
files_list.append(file_name.get_text())
#step-2 : for download all files.
def http_file_downloader(data_url_files, files_list):
options = Options()
options.add_argument("--headless")
options.set_preference("browser.download.folderList", 2)
options.set_preference("browser.download.manager.showWhenStarting", False)
options.set_preference("service_log_path", "/home/myuser")
options.set_preference("browser.download.dir", "a/b/c/dir/")
options.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/force-download")
driver = webdriver.Firefox(options=options)
driver.get(data_url_files)
try:
for file_name in files_list:
elem = driver.find_element(by=By.XPATH, value="//a[text()='"+file_name+"']")
elem.click()
finally:
driver.quit()
# calling above method
http_file_downloader("https://mft.rrc.texas.gov/link/0e33071d-9891-4009-970c-5aa26da5e31a", files_list)
enter image description here
Please click this link for image
Related
I am trying to locate a button that uploads a file and gets the ouput result by clicking the button on the page itself, I know how to upload file by send keys.
The website is https://huggingface.co/spaces/vaibhavsharda/semantic_clustering
My code is
import csv
import time
from selenium import webdriver
import chromedriver_autoinstaller
import datetime
from bs4 import BeautifulSoup
from selenium.webdriver import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chromedriver_autoinstaller.install() # Check if the current version of chromedriver exists
# and if it doesn't exist, download it automatically,
# then add chromedriver to path
driver = webdriver.Chrome()
count = 0
entire_data = []
driver.get("https://huggingface.co/spaces/vaibhavsharda/semantic_clustering")
driver.maximize_window()
time.sleep(10)
s = driver.find_element(By.CSS_SELECTOR,'.exg6vvm15 .edgvbvh9') # the error is here.
s.send_keys("small_test.txt")
I am trying to locate element by selenium, I don't know if it doesn't load or something else is the error but I just want to locate the "Browse Files" button. Feel free to ask me anything.
Element you trying to click is inside an iframe, so you need first to switch into the iframe in order to access that element.
The following code works:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 5)
url = "https://huggingface.co/spaces/vaibhavsharda/semantic_clustering"
driver.get(url)
wait.until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, "iframe[title]")))
wait.until(EC.element_to_be_clickable((By.XPATH, "//button[#kind='primary'][not(#disabled)]"))).click()
When finished don't forget to switch to the default content with:
driver.switch_to.default_content()
UPD
Uploading file with Selenium is done by sending the uploaded file to a special element. This is not an element you are clicking as a user via GUI to upload elements. The element actually receiving uploaded files normally matching this XPath: //input[#type='file']
This is the fully working code - I tried this on my PC uploading some text file.
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 5)
url = "https://huggingface.co/spaces/vaibhavsharda/semantic_clustering"
driver.get(url)
wait.until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, "iframe[title]")))
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "input[type='file']"))).send_keys("C:/project_name/.gitignore")
I would like to scroll a container within a webpage - not the webpage itself - using selenium within Python. Would anyone have any solutions to this?
I have tried the standard:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
Of course, this is fine for traversing the full web page, but not a container itself.
EDIT: The webpage is https://www.mixcloud.com/
The container shows when you use the search bar at the top of the page.
This is one way to scroll the page (it will stop once it goes over 300 songs):
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchShadowRootException, NoSuchElementException
import time as t
import pandas as pd
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument('disable-notifications')
chrome_options.add_argument("window-size=1280,720")
webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
browser = webdriver.Chrome(service=webdriver_service, options=chrome_options)
actions = ActionChains(browser)
wait = WebDriverWait(browser, 20)
url = "https://www.mixcloud.com/"
browser.get(url)
s_box = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[name="mixcloud_query"]')))
s_box.click()
s_box.send_keys('house')
s_box.send_keys(Keys.ENTER)
music_div = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'body[class="rebrand"]')))
music_div.click()
while True:
t.sleep(5)
music_div.send_keys(Keys.END)
print('scrolled to bottom')
songs = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div[class^="SearchAudioCard__SearchAudioCardContainer"]')))
print('songs:', len(songs))
print('______________-')
if len(songs) > 300:
print('got over 300 songs, stopping')
break
browser.quit()
Selenium documentation can be found at https://www.selenium.dev/documentation/
I have a website for which I want to download excel files. (https://www.rivm.nl/media/smap/eenzaamheid.html)
First I want to click on the region and then perform the download. This I have working.
wijk_keuze = WebDriverWait(driver2nd,20).until(EC.element_to_be_clickable((By.XPATH, "//div[#class='highcharts-container ']//*[name()='svg']//*[name()='g']//*[name()='path']")))
wijk_keuze.click()
download = WebDriverWait(driver2nd, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[#class='highcharts-container ']//*[name()='svg']//*[name()='g' and #aria-label='View export menu']//*[name()='rect']")))
download.click()
WebDriverWait(download, 10).until(EC.visibility_of_element_located((By.XPATH, "//div[#class='highcharts-menu']//*[contains(text(),'XLS downloaden')]"))).click()
time.sleep(2)
above code selects the first region in the parent element and then downloads the excel.
What I want to do is loop through each element in the parent element. How would I go about doing so?
The parent element looks as follows:
<g transform="transform(0,0), scale(1,1)" animator="1">
my entire code:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
#defining URL
url='https://www.rivm.nl/media/smap/eenzaamheid.html'
#defining driver
driver = webdriver.PhantomJS(r'./phantomjs-2.1.1-windows/bin/phantomjs')
options = webdriver.ChromeOptions()
options.add_argument("start-maximized");
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(options=options, executable_path = r'./chromedriver_win32/chromedriver')
driver.get(url)
# Gemeentes
Detail_keuze = Select(driver.find_element_by_id("detail"))
options = Detail_keuze.options
Indicator_keuze = Select(driver.find_element_by_id("indicator"))
indicator_options = Indicator_keuze.options
for index in range(0, len(indicator_options) ):
#defining URL
url='https://www.rivm.nl/media/smap/eenzaamheid.html'
#defining driver
driver2nd = webdriver.PhantomJS(r'./phantomjs-2.1.1-windows/bin/phantomjs')
options = webdriver.ChromeOptions()
options.add_argument("start-maximized");
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")
options.add_experimental_option("prefs", {
"download.default_directory": r"MY_PATH",
"download.prompt_for_download": False,
"download.directory_upgrade": True,
"safebrowsing.enabled": True
})
driver2nd = webdriver.Chrome(options=options, executable_path = r'./chromedriver_win32/chromedriver')
driver2nd.get(url)
# Gemeentes
Detail_keuze = Select(driver2nd.find_element_by_id("detail"))
options = Detail_keuze.options
Indicator_keuze = Select(driver2nd.find_element_by_id("indicator"))
indicator_options = Indicator_keuze.options
time.sleep(1)
Indicator_keuze.select_by_index(index)
wijk_keuze = WebDriverWait(driver2nd,20).until(EC.element_to_be_clickable((By.XPATH, "//div[#class='highcharts-container ']//*[name()='svg']//*[name()='g']//*[name()='path']")))
wijk_keuze.click()
download = WebDriverWait(driver2nd, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[#class='highcharts-container ']//*[name()='svg']//*[name()='g' and #aria-label='View export menu']//*[name()='rect']")))
download.click()
WebDriverWait(download, 10).until(EC.visibility_of_element_located((By.XPATH, "//div[#class='highcharts-menu']//*[contains(text(),'XLS downloaden')]"))).click()
time.sleep(2)
######## HERE I WANT TO LOOP THROUGH EACH AND EVERY REGION
driver2nd.close()
As you can see I also want to loop through eachh and every indicator. This works. Now I want to add a loop through each and every region. I have it working so that I can click on the first region.
You don't have to click on the options. You can get the details by changing the url 'https://www.rivm.nl/media/smap/{indicator}?detail={detail}'
Just add the logic for downloading it.
Try:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
from itertools import product
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(ChromeDriverManager().install())#, chrome_options=chrome_options)
driver.set_window_size(1024, 600)
driver.maximize_window()
driver.get('https://www.rivm.nl/media/smap/eenzaamheid.html')
time.sleep(5)
soup = BeautifulSoup(driver.page_source, 'html.parser')
ind = soup.find('select', attrs = {'name': 'indicator'} )
indicators = [i['value'] for i in ind.findAll('option')]
det = soup.find('select', attrs = {'name': 'detail'})
details = [i['value'] for i in det.findAll('option')]
for detail, indicator in list(product(details, indicators)):
print(indicator, detail)
new_url = f'https://www.rivm.nl/media/smap/{indicator}?detail={detail}'
driver.get(new_url)
# Write code for downloading it
I want to click on the second link in result in google search area using selenium-web-driver
( Need a method that suits for any google search result )
example page
This is my code, How can I modify the if statement
import speech_recognition as sr
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option("detach", True)
global driver
driver = webdriver.Chrome('C:\Windows\chromedriver.exe', options=chrome_options)
chrome_options.add_argument("--start-maximized")
wait = WebDriverWait(driver, 10)
def google(text):
if "first link" in text:
RESULTS_LOCATOR = "//div/h3/a"
WebDriverWait(driver, 10).until(
EC.visibility_of_element_located((By.XPATH, RESULTS_LOCATOR)))
page1_results = driver.find_elements(By.XPATH, RESULTS_LOCATOR)
for item in page1_results:
print(item.text)
# driver.find_element_by_class_name().click()
else:
ggwp=text.replace(" ", "")
driver.get("https://www.google.com")
driver.find_element_by_xpath('//*[#id="tsf"]/div[2]/div[1]/div[1]/div/div[2]/input').send_keys(ggwp)
driver.find_element_by_xpath('//*[#id="tsf"]/div[2]/div[1]/div[3]/center/input[1]').send_keys(Keys.ENTER)
Second link can by placed in different div-s.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import os
browser = webdriver.Chrome(executable_path=os.path.abspath(os.getcwd()) + "/chromedriver")
link = 'http://www.google.com'
browser.get(link)
# search keys
search = browser.find_element_by_name('q')
search.send_keys("python")
search.send_keys(Keys.RETURN)
# click second link
for i in range(10):
try:
browser.find_element_by_xpath('//*[#id="rso"]/div['+str(i)+']/div/div[2]/div/div/div[1]/a').click()
break
except:
pass
I'm trying to scrape all of the images on this site. However, when I run my script and try to get the CSS attribute of 'background-image' to extract the url of each web element, the result is printing out "None". I have no idea why it would be returning None as I print out the web element and the attribute does exist. Any help would be greatly appreciated!
import re
import selenium
import io
import pandas as pd
import urllib.request
import urllib.parse
import requests
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
import time
from _datetime import datetime
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
def parse_style_attribute(style_string):
if 'background-image' in style_string:
style_string = style_string.split(' url("')[1].replace('");', '')
return style_string
return None
#setup opening url window of website to be scraped
options = webdriver.ChromeOptions()
options.headless=False
prefs = {"profile.default_content_setting_values.notifications" : 2}
options.add_experimental_option("prefs", prefs)
#driver = webdriver.Chrome("/Users/rishi/Downloads/chromedriver 3") #possible issue by not including the file extension
# driver.maximize_window()
# time.sleep(5)
# driver.get("""https://www.tripadvisor.com/""") #get the information from the page
driver = webdriver.Chrome("/Users/rishi/Downloads/chromedriver 3")
driver.maximize_window()
driver.get("https://www.tripadvisor.com/Hotel_Review-g28970-d84078-Reviews-Hyatt_Regency_Washington_on_Capitol_Hill-Washington_DC_District_of_Columbia.html#/media/84078/?albumid=101&type=2&category=101")
time.sleep(1)
#waits for that amount of time
driver.implicitly_wait(12)
#find the searchbar and then plug in the key
#driver.find_element_by_xpath('//*[#class="typeahead_input"]').send_keys("Washington D.C.", Keys.ENTER)
#wait
time.sleep(1)
#list all of the hotels in that page
images = driver.find_elements_by_xpath('//*[#class="media-viewer-tile-gallery-v2-TileGallery__entryInner--JaADY "]')
image_url = []
for i in range(len(images)):
image_url.append(images[i].value_of_css_property("background-image"))
print("Total Number of images: ", len(images))
# print(images)
firstimage = images[0].get_attribute("innerHTML")
print(firstimage)
for i in range(len(image_url)):
print(image_url[i])
try this. it works for me.
# attach your code as set browser option
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
driver.get(
"https://www.tripadvisor.com/Hotel_Review-g28970-d84078-Reviews-Hyatt_Regency_Washington_on_Capitol_Hill-Washington_DC_District_of_Columbia.html#/media/84078/?albumid=101&type=2&category=101")
images = WebDriverWait(driver, 20).until(
EC.presence_of_all_elements_located(
(By.XPATH, '//*[#class="media-viewer-dt-root-GalleryImageWithOverlay__galleryImage--1Drp0"]')))
image_url = []
for index, image in enumerate(images):
image_url.append(images[index].value_of_css_property("background-image"))
print(image_url)