I'm trying to use Selenium to click on multiple links one each a time to download multiple CSV files, the problem here is the selenium makes the donwload of about few csv files but in the middle of the loop it stops working, crashes the Browser accusin that don't have internet and close the driver. I already put the chromedriver.exe in the same folder and put the path but it still not working.
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import os
import re
new_directory = r"Z:\BI_Database_teste"
for document in os.listdir(new_directory):
os.remove(os.path.join(new_directory, document))
url = 'myPersonalURL'
chrome_options = Options()
chrome_options.add_argument('--headless')
browser = webdriver.Chrome('chromedriver.exe', options=chrome_options)
params = {'behavior': 'allow', 'downloadPath': new_directory}
browser.execute_cdp_cmd('Page.setDownloadBehavior', params)
browser.get(url)
time.sleep(2)
input_email = browser.find_element(By.ID, 'email')
input_email.send_keys('myEmail')
input_password = browser.find_element(By.ID, 'password')
input_password.send_keys('myPassword')
input_password.submit()
input_question = browser.find_element(By.XPATH, '/html/body/div[2]/div[2]/form/table/tbody/tr[3]/td/table/tbody/tr[2]/td[2]').text
answer_field = browser.find_element(By.XPATH, '/html/body/div[2]/div[2]/form/table/tbody/tr[3]/td/table/tbody/tr[3]/td[2]/input')
if input_question == 'question':
answer_field.send_keys('answer')
elif input_question == 'question':
answer_field.send_keys('answer')
else:
answer_field.send_keys('answer')
time.sleep(2)
answer_field.submit()
time.sleep(4)
links = browser.find_elements(By.LINK_TEXT, 'Export (CSV)')
links_len = len(links)
print(str(links_len) + ' BI Databases to Download')
list_count = 0
for link in range(list_count, links_len):
time.sleep(2)
links[list_count].click()
list_count = list_count + 1
print(str(list_count) + ' BI Databases downloaded')
browser.quit()
for file in os.listdir(new_directory):
if file.startswith("PBI"):
try:
os.rename(os.path.join(new_directory, file), os.path.join(new_directory, re.sub('[0-9]', '', file)))
except:
pass
print('BI Databases Download Successfully!')```
Could someone help me to find out why the webdriver stops working in the middle of the loop?
If you think that for some reason the driver isn't loaded correctly then you can download the webdriver on runtime from your code, this might help,
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
chrome = webdriver.Chrome(ChromeDriverManager().install(), options=options)
without much further context, the question cannot be answered or reproduced.
Related
I am currently working on a scraper for aniworld.to.
My goal is it to enter the anime name and get all of the Episodes downloaded.
I have everything working except one thing...
The websites has a Watch button. That Button redirects you to https://aniworld.to/redirect/SOMETHING and that Site has a captcha which means the link is not in the html...
Is there a way to bypass this/get the link in python? Or a way to display the captcha so I can solve it?
Because the captcha only appears every lightyear.
The only thing I need from that page is the redirect link. It looks like this:
https://vidoza.net/embed-something.html
My very very wip code is here if it helps: https://github.com/wolfswolke/aniworld_scraper
Mitchdu showed me how to do it.
If anyone else needs help here is my code: https://github.com/wolfswolke/aniworld_scraper/blob/main/src/logic/captcha.py
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from threading import Thread
import os
def open_captcha_window(full_url):
working_dir = os.getcwd()
path_to_ublock = r'{}\extensions\ublock'.format(working_dir)
options = webdriver.ChromeOptions()
options.add_argument("app=" + full_url)
options.add_argument("window-size=423,705")
options.add_experimental_option('excludeSwitches', ['enable-logging'])
if os.path.exists(path_to_ublock):
options.add_argument('load-extension=' + path_to_ublock)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.get(full_url)
wait = WebDriverWait(driver, 100, 0.3)
wait.until(lambda redirect: redirect.current_url != full_url)
new_page = driver.current_url
Thread(target=threaded_driver_close, args=(driver,)).start()
return new_page
def threaded_driver_close(driver):
driver.close()
I need to open web whatsapp but it gives me Deprecation Warning
here is the code
import selenium
from selenium import webdriver
import time
driver_path = r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe"
driver = webdriver.Chrome(driver_path)
driver.get("https://web.whatsapp.com/")
driver.maximize_window()
time.sleep(20)
driver.quit
İts not giving me anything just opens Chrome
DeprecationWarning: executable_path has been deprecated, please pass in a Service object
First you need download chromedriver - https://chromedriver.chromium.org/downloads
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
import time
ser = Service('path_to\chromedriver.exe')
browser = webdriver.Chrome(service=ser)
browser.get('https://web.whatsapp.com/')
browser.maximize_window()
time.sleep(20)
browser.quit
You need to pass The driver_path to Service(). below should work for you.
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
driver_path = r"chromdriver.exe full file path here"
service = Service(driver_path)
option = webdriver.ChromeOptions()
driver = webdriver.Chrome(service = service, options = option )
driver.get("https://web.whatsapp.com/")
driver.maximize_window()
time.sleep(20)
driver.quit
So basically, I have a list of droplists that I need to interact with.
I know how to interact with the first droplist.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
import time
PATH = "C:\Program Files (x86)\chromedriver.exe"
options = Options()
options.headless = False
driver = webdriver.Chrome(PATH,options = options)
driver.set_window_size(1920,1080)
driver.get("https://www.compraensanjuan.com")
time.sleep(3)
link = driver.find_element_by_link_text("Mi cuenta")
link.click()
time.sleep(3)
email = driver.find_element_by_name("email")
email.send_keys("yourmail")
password = driver.find_element_by_name("clave")
password.send_keys("Yourpassword")
password.send_keys(Keys.RETURN)
time.sleep(3)
drp = Select(driver.find_element_by_id("acciones"))
drp.select_by_visible_text("Actualizar")
driver.back()
But how do I repeat the same action for all the following droplists??
You may be able to get a list of the options, and then iterate over them. Something like:
options = driver.find_element_by_name('DropdownElement')
options = options.text.split('\n')
for opt in options:
driver.find_element_by_xpath("//select[#name='DropdownElement']/option[text()='"+opt+"']").click()
absolute noob here but I recently started a python automation project which checks the day of the week, and if it's a weekday it should open a browser with my college schedule for example, and if it's a weekend day it should open up different websites.
I use the brave browser as my default browser but I was wondering if it is possible to open up my regular browser profile when I run my code instead of the new instance where it does not have all my bookmarks and passwords?
Thanks in advance!
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
import datetime
day = datetime.datetime.now()
dag = day.weekday()
def Testing():
if dag >= 5:
return Weekend()
else:
return Weekday()
def Weekend():
options = Options()
options.add_argument("--window-size=1920,1080")
## options.add_argument("/Users/vadim/Library/Application Support/BraveSoftware/Brave-Browser")
options.binary_location = '/Applications/Brave Browser.app/Contents/MacOS/Brave Browser'
driver_path = '/usr/local/bin/chromedriver'
driver = webdriver.Chrome(options=options, executable_path=driver_path)
driver.get('https://outlook.live.com/mail/0/inbox')
Outlook_Aanmelden = driver.find_element_by_xpath('/html/body/header/div/aside/div/nav/ul/li[2]/a')
Outlook_Aanmelden.click()
Email_Field = driver.find_element_by_xpath('//*[#id="i0116"]')
Email_Field.send_keys('#live.com')
Outlook_Volgende = driver.find_element_by_xpath('//*[#id="idSIButton9"]')
Outlook_Volgende.click()
time.sleep(0.5)
Password_Field = driver.find_element_by_xpath('//*[#id="i0118"]')
Password_Field.send_keys('pass')
Password_Field.send_keys(Keys.ENTER)
Inlog_Outlook = driver.find_element_by_xpath('//*[#id="idSIButton9"]')
Inlog_Outlook.click()
driver.execute_script("window.open('https://youtube.com');")
def Weekday():
options = Options()
options.add_argument("--window-size=1920,1080")
## options.add_argument("/Users/vadim/Library/Application Support/BraveSoftware/Brave-Browser")
options.binary_location = '/Applications/Brave Browser.app/Contents/MacOS/Brave Browser'
driver_path = '/usr/local/bin/chromedriver'
driver = webdriver.Chrome(options=options, executable_path=driver_path)
driver.get('https://outlook.live.com/mail/0/inbox')
Outlook_Aanmelden = driver.find_element_by_xpath('/html/body/header/div/aside/div/nav/ul/li[2]/a')
Outlook_Aanmelden.click()
Email_Field = driver.find_element_by_xpath('//*[#id="i0116"]')
Email_Field.send_keys('#live.com')
Outlook_Volgende = driver.find_element_by_xpath('//*[#id="idSIButton9"]')
Outlook_Volgende.click()
time.sleep(0.5)
Password_Field = driver.find_element_by_xpath('//*[#id="i0118"]')
Password_Field.send_keys('pass')
Password_Field.send_keys(Keys.ENTER)
Inlog_Outlook = driver.find_element_by_xpath('//*[#id="idSIButton9"]')
Inlog_Outlook.click()
driver.execute_script("window.open('https://youtube.com');")
Testing()
I recommend using webbrowser instead of selenium, it's part of the python standard library so you should already have it.
import webbrowser
webbrowser.open('https://stackoverflow.com/')
Here's the doc if you wan't to play around with it.
i'm currently trying to automate the task of clicking different download links from this website:
https://www.theice.com/clear-us/risk-management#margin-rates
in this page, i first have to click the "Download ICE Risk Model array files" header which gives me 2 dropdowns from which i want to first click the "Final" link which downloads a csv file for each month of each available year.
Currently, both the dropdowns change due to hidden dropdown menus above, i have first tried to make them visible which was successful as well as changing year in it using selenium click,
The problem arising is that i'm not able to click the "Final" link in the csv section but it just clicks the "
Intercontinental Exchange"
button in the footer and navigates to a new page.
Is there anyway to get this task done ?
As well as is it possible to change the download location to the current directory where the .py script is ?
This is the python code so far, i currently removed the headless part to see what's going on :
from bs4 import BeautifulSoup
import requests
from selenium.webdriver.chrome.options import Options
from shutil import which
from selenium.webdriver.common.keys import Keys
from selenium import webdriver
from selenium.webdriver.support.ui import Select
import time
import os
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--log-level=3")
chrome_path = which("chromedriver")
driver = webdriver.Chrome(executable_path=chrome_path, options=chrome_options)
driver.set_window_size(1366, 768)
driver.get("https://www.theice.com/clear-us/risk-management#margin-rates")
main_button = driver.find_element_by_xpath('//h4[#class="collapsible-section-header"]')
main_button.click()
time.sleep(5)
driver.execute_script("document.getElementById('icus-ice-form-year').style.display = 'block';")
driver.execute_script("document.getElementById('icus-ice-form-month').style.display = 'block';")
time.sleep(1)
dropdown_1 = Select(driver.find_element_by_xpath('//select[#id="icus-ice-form-year"]'))
dropdown_2 = Select(driver.find_element_by_xpath('//select[#id="icus-ice-form-year"]'))
main_table_div = driver.find_element_by_xpath('//div[#id="icus-ice-riskarraytable"]')
main_table = main_table_div.find_element_by_xpath('//table[#class="table table-data"]')
for opt in dropdown_1.options:
opt.click()
for opt2 in dropdown_2.options:
opt2.click()
time.sleep(3)
download_links_1 = main_table.find_elements_by_xpath('//td[#class="table-partitioned"]')
for dow in download_links_1:
try:
temp_dow = dow.find_element_by_xpath('//a')
temp_dow.click()
time.sleep(4)
except:
pass
This should switch the downloads to the current working directory and print all the early csvs. change '//[#id="icus-ice-riskarraytable"]/table/tbody/tr[{0}]/td[2]/a' to '//[#id="icus-ice-riskarraytable"]/table/tbody/tr[{0}]/td[3]/a' for the other final csv.
options = Options()
currentDirectory = os.getcwd()
prefs = {
"download.default_directory": currentDirectory,
"download.prompt_for_download": False
}
#print(currentDirectory)
options.add_experimental_option("prefs", prefs)
driver.get("https://www.theice.com/clear-us/risk-management#margin-rates")
driver.implicitly_wait(5)
main_button = driver.find_element_by_xpath('//h4[#class="collapsible-section-header"]')
main_button.click()
driver.implicitly_wait(5)
driver.execute_script("document.getElementById('icus-ice-form-year').style.display = 'block';")
driver.execute_script("document.getElementById('icus-ice-form-month').style.display = 'block';")
driver.implicitly_wait(5)
drop1length=len(driver.find_elements_by_xpath('//select[#id="icus-ice-form-year"]/option'))
#print(drop1length)
for i in range(1,drop1length-1):
drop1=Select(driver.find_element_by_xpath('//select[#id="icus-ice-form-year"]'))
drop1.select_by_index(i)
drop2length=len(driver.find_elements_by_xpath('//select[#id="icus-ice-form-month"]/option'))
#print(drop2length)
for j in range(1,drop2length-1):
drop2=Select(driver.find_element_by_xpath('//select[#id="icus-ice-form-month"]'))
driver.implicitly_wait(5)
drop2.select_by_index(j)
download_links_length = len(driver.find_elements_by_xpath('//*[#id="icus-ice-riskarraytable"]/table/tbody/tr/td[2]/a'))
#print(download_links_length)
for dow in range(1,download_links_length-1):
try:
element = driver.find_element_by_xpath('//*[#id="icus-ice-riskarraytable"]/table/tbody/tr[{0}]/td[2]/a'.format(str(dow)))
driver.implicitly_wait(5)
driver.execute_script("arguments[0].click();", element)
driver.switch_to.window(driver.window_handles[0])
except Exception as e:
print(e)
Import these
import os
from selenium.webdriver.chrome.options import Options