This is in reference to the solution provided by #lifeiscomplex How to handle Firefox print dialog box in Selenium
The code is:
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.options import FirefoxProfile
driver_path = 'geckodriver.exe'
firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
profile_options.set_preference('profile_options = FirefoxProfile()', user_agent)
profile_options.set_preference("print_printer", "Mozilla Save to PDF")
profile_options.set_preference("print.always_print_silent", True)
profile_options.set_preference("print.show_print_progress", True)
profile_options.set_preference('print.save_as_pdf.links.enabled', True)
profile_options.set_preference("print.printer_Mozilla_Save_to_PDF.print_to_file", True)
# set your own file path
profile_options.set_preference('print.printer_Mozilla_Save_to_PDF.print_to_file.print_to_filename',
"testprint.pdf")
driver = webdriver.Firefox(executable_path=driver_path, options=firefox_options,
firefox_profile=profile_options)
URL = 'https://finance.yahoo.com/'
driver.get(URL)
search_field_id = 'yfin-usr-qry'
element_search_field = driver.find_element_by_id(search_field_id)
element_search_field.clear()
element_search_field.send_keys('TSLA')
element_search_field.send_keys(Keys.ENTER)
driver.execute_script("window.print()")
sleep(20)
driver.quit()
When i run this code, the code executes itself does not produce any error but i get this error dialogbox in firefox. Even if you press Ctrl+P you will get the same dialog box:
However if manually clicked on print from application menu in FireFox during the page loading process i do get the print dialog:
Could you please advise why am i getting that dialogbox.
I did change the toggle to True before running the script, which shouldn't matter since since its already in profile_options.set_preference("print.printer_Mozilla_Save_to_PDF.print_to_file", True):
Edit:
I was able to get the print dialog box to start reappearing by commenting out this line:
profile_options.set_preference("print.always_print_silent", True)
But i am not able to find the file testpage.pdf which is suppose to be generated.
My system information
----------------------------------------
Platform: Windows
OS: 10
Python: 3.8.8
Selenium: 3.141.0
Firefox: 90.0.2
Geckodriver: 0.29.0
----------------------------------------
In the comments of this question "How to handle Firefox print dialog box in Selenium" you mentioned that you had changed this line from my answer:
profile_options.set_preference('print.printer_Mozilla_Save_to_PDF.print_to_file.print_to_filename', "testprint.pdf")
to this:
profile_options.set_preference('print.printer_Microsoft_Print_to_PDF.print_to_filename', "testprint.pdf")
If you want to change that line you would have to change at least one more in the code you posted.
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.options import FirefoxProfile
driver_path = 'geckodriver.exe'
firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
profile_options.set_preference('profile_options = FirefoxProfile()', user_agent)
# this line was changed
profile_options.set_preference("print_printer", "Microsoft Print to PDF")
profile_options.set_preference("print.always_print_silent", True)
profile_options.set_preference("print.show_print_progress", True)
profile_options.set_preference('print.save_as_pdf.links.enabled', True)
# this line was changed
profile_options.set_preference("print.printer_Microsoft_Print_to_PDF.print_to_file", True)
# this line was changed
profile_options.set_preference('print.printer_Microsoft_Print_to_PDF.print_to_filename', "testprint.pdf")
driver = webdriver.Firefox(executable_path=driver_path, options=firefox_options,
firefox_profile=profile_options)
URL = 'https://finance.yahoo.com/'
driver.get(URL)
search_field_id = 'yfin-usr-qry'
element_search_field = driver.find_element_by_id(search_field_id)
element_search_field.clear()
element_search_field.send_keys('TSLA')
element_search_field.send_keys(Keys.ENTER)
driver.execute_script("window.print()")
sleep(20)
driver.quit()
There might be another line that needs to be changed, but I cannot test the code above to verify this, because I'm using macOS and not Windows 10.
You also mentioned that you commented this line out:
profile_options.set_preference("print.always_print_silent", True)
commenting out this line will launch the print dialog as showed in the graphic below.
Related
I have the following code:
from selenium.webdriver import Firefox
from selenium.webdriver.common.keys import Keys
url = 'https://finance.yahoo.com/'
driver_path = 'geckodriver.exe'
browser = Firefox(executable_path = driver_path)
browser.get(url)
profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/pdf")
profile.set_preference("browser.helperApps.neverAsk.openFile", "application/pdf")
search_field_id = 'yfin-usr-qry'
element_search_field = browser.find_element_by_id(search_field_id)
element_search_field.clear()
element_search_field.send_keys('TSLA')
element_search_field.send_keys(Keys.ENTER)
from selenium.webdriver import ActionChains
action_chains = ActionChains(browser)
action_chains.key_down(Keys.CONTROL).send_keys('V').key_up(Keys.CONTROL).perform()
xpath_string = '/html/body/div[1]/div/div/div[1]/div/div[2]/div/div/div[6]/div/div/section/div/ul/li[2]/a/span'
element = browser.find_element_by_xpath(xpath_string)
action_chains.move_to_element(element).click().perform()
browser.execute_script('window.print();')
A print dialog box pops up for Firefox. I was wondering how can i accept it. Is there a way to bypass this dialog box and directly print since this is not a system dialog box but Firefox's.
Edit:
My full updated code as per input from #Prophet
from selenium.webdriver import Firefox
from selenium.webdriver.common.keys import Keys
from selenium.webdriver import ActionChains
from selenium.webdriver.support.ui import Select
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
import time
from fake_useragent import UserAgent
from selenium import webdriver
from selenium.webdriver import DesiredCapabilities
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
ua = UserAgent()
userAgent = ua.random
url = 'https://finance.yahoo.com/'
driver_path = 'geckodriver.exe'
profile = FirefoxProfile('C:\\Users\\\\AppData\\Roaming\\Mozilla\\Firefox\\Profiles\\tp3cz5dm.default-release')
profile.set_preference("general.useragent.override", userAgent)
browser = Firefox(executable_path = driver_path)
browser.get(url)
profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/pdf")
profile.set_preference("browser.helperApps.neverAsk.openFile", "application/pdf")
search_field_id = 'yfin-usr-qry'
element_search_field = browser.find_element_by_id(search_field_id)
element_search_field.clear()
element_search_field.send_keys('TSLA')
element_search_field.send_keys(Keys.ENTER)
from selenium.webdriver import ActionChains
action_chains = ActionChains(browser)
action_chains.key_down(Keys.CONTROL).send_keys('V').key_up(Keys.CONTROL).perform()
# xpath_string = '/html/body/div[1]/div/div/div[1]/div/div[2]/div/div/div[6]/div/div/section/div/ul/li[2]/a/span'
# element = browser.find_element_by_xpath(xpath_string)
# action_chains.move_to_element(element).click().perform()
browser.execute_script('window.print();')
browser.switch_to.window(browser.window_handles[-1])
time.sleep(0.5)
actionButton = browser.execute_script(
"return document.querySelector('print-preview-app').shadowRoot.querySelector('#sidebar').shadowRoot.querySelector('print-preview-button-strip').shadowRoot.querySelector('.action-button')")
cancelButton.click()
# switch back to main window
browser.switch_to.window(driver.window_handles[0])
When i run this i am getting error:
JavascriptException: TypeError: document.querySelector(...) is null
Both the solutions below are designed NOT to launch the print dialog. These solutions will either print the active webpage to your local printer or to a PDF file without having to deal with the dialog.
UPDATED POST 08-19-2021
I wanted to save the output to PDF vs printing to paper. I was shocked how hard it was to print to a PDF using the geckodriver and selenium. With the 'chromedriver' you can call the function 'execute_cdp_cmd' and pass Page.printToPDF. The geckodriver doesn't have 'execute_cdp_cmd'.
When I looked through Stack Overflow for inspiration, I discover multiple open question on printing pdf using the geckodriver with selenium. After seeing that this was a problem, I looked through the issues in selenium and the bug reports for mozilla. Again this was a problem that others had.
Some of the bug reports mentioned that certain switches used in the print process no longer worked.
profile.set_preference("print.print_to_file", True)
profile.set_preference("print.print_to_filename", "/tmp/file.pdf")
I decided to look at the source code for mozilla gecko-dev for a potential solution. After hours of research I found that the switches above were replaced with new ones and that another printer variable had also been replaced. After some testing, I was able to get your webpage to save as PDF.
The code below will print a webpage to a PDF with all the links enabled. I would recommend adding some error handling to the code. One part of the code that I need to improve on the filename part. You should be able to add a function that will rename the file, which would allow you to print as many files as you want in a single session.
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.options import FirefoxProfile
firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
profile_options.set_preference('profile_options = FirefoxProfile()', user_agent)
profile_options.set_preference("print_printer", "Mozilla Save to PDF")
profile_options.set_preference("print.always_print_silent", True)
profile_options.set_preference("print.show_print_progress", False)
profile_options.set_preference('print.save_as_pdf.links.enabled', True)
profile_options.set_preference("print.printer_Mozilla_Save_to_PDF.print_to_file", True)
# set your own file path
profile_options.set_preference('print.printer_Mozilla_Save_to_PDF.print_to_filename',
"tmp/testprint.pdf")
driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options,
firefox_profile=profile_options)
URL = 'https://finance.yahoo.com/'
driver.get(URL)
sleep(10)
search_field_id = 'yfin-usr-qry'
element_search_field = driver.find_element_by_id(search_field_id)
element_search_field.clear()
element_search_field.send_keys('TSLA')
element_search_field.send_keys(Keys.ENTER)
sleep(10)
driver.execute_script("window.print()")
sleep(20)
driver.quit()
ORIGINAL POST 08-18-2021
I decided to look at your issue, because I'm interested in selenium functionality.
I looked through the source code of the geckodriver and found printUtils.js, which provides details on the switches used in the print process, such as these:
firefox_options.set_preference("print.always_print_silent", True)
firefox_options.set_preference("print.show_print_progress", False)
After removing some of your code and adding some, I was able to print to my HP printer with the code below without dealing with a print dialog box:
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.options import FirefoxProfile
firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)
firefox_options.set_preference("print.always_print_silent", True)
firefox_options.set_preference("print.show_print_progress", False)
firefox_options.set_preference("pdfjs.disabled", True)
driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options)
URL = 'https://finance.yahoo.com/'
driver.get(URL)
sleep(10)
search_field_id = 'yfin-usr-qry'
element_search_field = driver.find_element_by_id(search_field_id)
element_search_field.clear()
element_search_field.send_keys('TSLA')
element_search_field.send_keys(Keys.ENTER)
sleep(10)
driver.execute_script("window.print()")
----------------------------------------
My system information
----------------------------------------
Platform: Apple
OS: 10.15.7
Python: 3.9
Selenium: 3.141
Firefox: 90.0.2
Geckodriver: 0.29.0
----------------------------------------
Adding these profile preferences should avoid presenting this pop-up:
profile.set_preference("print.always_print_silent", True)
profile.set_preference("print.show_print_progress", False)
UPD
After involving the printing dialog please try accepting it by this code:
# switch to print preview window
driver.switch_to.window(driver.window_handles[-1])
time.sleep(0.5)
actionButton = driver.execute_script(
"return document.querySelector('print-preview-app').shadowRoot.querySelector('#sidebar').shadowRoot.querySelector('print-preview-button-strip').shadowRoot.querySelector('.action-button')")
cancelButton.click()
# switch back to main window
driver.switch_to.window(driver.window_handles[0])
I have the following bit of code:
from selenium import webdriver
from selenium.webdriver import Firefox
from selenium.webdriver.common.keys import Keys
from selenium.webdriver import ActionChains
from selenium.webdriver.common.proxy import Proxy, ProxyType
proxy = Proxy({
'proxyType': ProxyType.MANUAL,
'httpProxy': '192.156.1.1:33',
'ftpProxy': '192.156.1.1:33',
'sslProxy': '192.156.1.1:33',
'noProxy': '' # set this value as desired
})
url = 'http://www.expressvpn.com/what-is-my-ip'
driver_path = 'C:\\Users\\user\\geckodriver.exe'
browser = Firefox(executable_path = driver_path, proxy = proxy)
browser.get(url)
For some reason everytime i check the ip, it is showing my true IP and not the proxy IP. Why is it doing that and could you please advise how this can be accomplished? Is there some problem with the code?
I started looking into this and noted that proxies are set using WebDriver capabilities and proxy configurations in the geckodriver.
I used proxy information for these sources from testing.
Free proxy lists:
free-proxy.cz
Geonode
Please let me point that using free proxy IP addresses can be highly problematic. These type of proxies are notorious for having connections issues, such as timeouts related to latency. Plus these sites can also be intermittent, which means that they can go down at anytime. And sometimes these sites are being abused, so they can get blocked.
The code below uses DesiredCapabilities with selenium.
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.options import FirefoxProfile
from selenium.webdriver.firefox.options import DesiredCapabilities
firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)
firefox_capabilities = DesiredCapabilities().FIREFOX
firefox_capabilities['proxy'] = {
"proxyType": "MANUAL",
"sslProxy": '34.95.40.165:3128',
}
driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options, desired_capabilities=firefox_capabilities)
URL = 'http://www.expressvpn.com/what-is-my-ip'
driver.get(URL)
You can also do it this way:
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.options import FirefoxProfile
from selenium.webdriver.firefox.options import DesiredCapabilities
firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)
firefox_capabilities = DesiredCapabilities().FIREFOX
firefox_proxies = Proxy()
firefox_proxies.ssl_proxy = '143.110.148.15:8080'
firefox_proxies.add_to_capabilities(firefox_capabilities)
driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options,
desired_capabilities=firefox_capabilities)
URL = 'http://www.expressvpn.com/what-is-my-ip'
driver.get(URL)
You can also use the Python package http_request_randomize to obtain a proxy IP address, which can be passed to the geckodriver.
import random
import logging
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.firefox.options import Options
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.firefox.options import FirefoxProfile
from selenium.webdriver.firefox.options import DesiredCapabilities
from http_request_randomizer.requests.proxy.ProxyObject import Protocol
from http_request_randomizer.requests.proxy.requestProxy import RequestProxy
# Obtain a list of HTTPS proxies
# Suppress the console debugging output by setting the log level
req_proxy = RequestProxy(log_level=logging.ERROR, protocol=Protocol.HTTPS)
# Obtain a random single proxy from the list of proxy addresses
random_proxy = random.sample(req_proxy.get_proxy_list(), 1)
firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)
firefox_capabilities = DesiredCapabilities().FIREFOX
# add the random proxy to firefox_capabilities
firefox_proxies = Proxy()
firefox_proxies.ssl_proxy = random_proxy[0].get_address()
firefox_proxies.add_to_capabilities(firefox_capabilities)
driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options,
desired_capabilities=firefox_capabilities)
try:
# print proxy IP for testing
print(random_proxy[0].get_address())
# output
93.183.250.200:53281
URL = 'http://www.expressvpn.com/what-is-my-ip'
driver.get(URL)
except TimeoutException as e:
print("A Page load Timeout Occurred.")
driver.quit()
As previously stated free proxy can have multiple issue. The code below shows how to use a proxy judge to check the status of an individual proxy.
import random
import logging
from time import sleep
from random import randint
from proxy_checking import ProxyChecker
from http_request_randomizer.requests.proxy.ProxyObject import Protocol
from http_request_randomizer.requests.proxy.requestProxy import RequestProxy
def random_ssl_proxy_address():
# Obtain a list of HTTPS proxies
# Suppress the console debugging output by setting the log level
req_proxy = RequestProxy(log_level=logging.ERROR, protocol=Protocol.HTTPS)
# Obtain a random single proxy from the list of proxy addresses
random_proxy = random.sample(req_proxy.get_proxy_list(), 1)
return random_proxy[0].get_address()
def get_proxy_address():
proxy_address = random_ssl_proxy_address()
checker = ProxyChecker()
proxy_judge = checker.check_proxy(proxy_address)
proxy_status = [value for key, value in proxy_judge.items() if key == 'status']
if proxy_status[0]:
return proxy_address
else:
print('Looking for a valid proxy address.')
# this sleep timer is helping with some timeout issues
# that were happening when querying
sleep(randint(5, 10))
get_proxy_address()
random_ssl_proxy = get_proxy_address()
print(f'Valid proxy address: {random_ssl_proxy}')
# output
Valid proxy address: 98.116.152.143:3128
Please note that the proxy_checker Package that I used doesn't have any embedded error handling, so you will have to add some to catch some of the errors.
I'm trying to quit and then restart a new browser session with Selenium when encountering a captcha, and I'm not sure yet why the code below isn't working.
It quits the existing driver, but after recursion browser.get() results in this error: ConnectionRefusedError: [Errno 61] Connection refused
Thanks in advance for any advice. I've included only the most relevant parts of the code below:
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.action_chains import ActionChains
path_to_chromedriver = '/Users/Myname/Desktop/a/chromedriver 2'
options = webdriver.ChromeOptions()
options.add_argument('start-maximized')
#options.add_argument('disable-infobars')
#options.add_argument('--disable-notifications')
options.add_argument('--disable-extensions')
browser = webdriver.Chrome(chrome_options=options, executable_path=path_to_chromedriver)
headers = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36'}
def get_page_info(url, browser = webdriver.Chrome(chrome_options=options, executable_path=path_to_chromedriver)):
browser.get(url)
try:
body = browser.find_element_by_tag_name('body')
if "been denied because we believe" in body.text:
print("going to new session...")
browser.quit()
human(4,6) #time delay
return winery_info(url)
Edit: I normally wouldn't use this tactic to get around a captcha, but in my use case this makes sense.
Try to use the driver.delete_all_cookies() method instead of closing browser and reopening it
edit : maybe the site block your ip adress i suggest you to use tor to change ip automatically i will give you this
import os
import time
os.system("killall tor")
os.system("tor &")
time.sleep(5)
#init driver
fp = webdriver.FirefoxProfile()
fp.set_preference("network.proxy.type", 1)
fp.set_preference("network.proxy.socks", "127.0.0.1")
fp.set_preference("network.proxy.socks_port", int("9050"))
fp.update_preferences()
browser = webdriver.Firefox(firefox_profile=fp)
browser.get(...)
...
...
if captcha:
os.system("killall tor")
os.system("tor &")
time.sleep(5)
browser.get(...)
# this will change your ip adress
# You can also configure tor to change ip every 10 seconds by changing torrc file
Trying to screen scrape a web site without having to launch an actual browser instance in a python script (using Selenium). I can do this with Chrome or Firefox - I've tried it and it works - but I want to use PhantomJS so it's headless.
The code looks like this:
import sys
import traceback
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53 "
"(KHTML, like Gecko) Chrome/15.0.87"
)
try:
# Choose our browser
browser = webdriver.PhantomJS(desired_capabilities=dcap)
#browser = webdriver.PhantomJS()
#browser = webdriver.Firefox()
#browser = webdriver.Chrome(executable_path="/usr/local/bin/chromedriver")
# Go to the login page
browser.get("https://www.whatever.com")
# For debug, see what we got back
html_source = browser.page_source
with open('out.html', 'w') as f:
f.write(html_source)
# PROCESS THE PAGE (code removed)
except Exception, e:
browser.save_screenshot('screenshot.png')
traceback.print_exc(file=sys.stdout)
finally:
browser.close()
The output is merely:
<html><head></head><body></body></html>
But when I use the Chrome or Firefox options, it works fine. I thought maybe the web site was returning junk based on the user agent, so I tried faking that out. No difference.
What am I missing?
UPDATED: I will try to keep the below snippet updated with until it works. What's below is what I'm currently trying.
import sys
import traceback
import time
import re
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support import expected_conditions as EC
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53 (KHTML, like Gecko) Chrome/15.0.87")
try:
# Set up our browser
browser = webdriver.PhantomJS(desired_capabilities=dcap, service_args=['--ignore-ssl-errors=true'])
#browser = webdriver.Chrome(executable_path="/usr/local/bin/chromedriver")
# Go to the login page
print "getting web page..."
browser.get("https://www.website.com")
# Need to wait for the page to load
timeout = 10
print "waiting %s seconds..." % timeout
wait = WebDriverWait(browser, timeout)
element = wait.until(EC.element_to_be_clickable((By.ID,'the_id')))
print "done waiting. Response:"
# Rest of code snipped. Fails as "wait" above.
I was facing the same problem and no amount of code to make the driver wait was helping.
The problem is the SSL encryption on the https websites, ignoring them will do the trick.
Call the PhantomJS driver as:
driver = webdriver.PhantomJS(service_args=['--ignore-ssl-errors=true', '--ssl-protocol=TLSv1'])
This solved the problem for me.
You need to wait for the page to load. Usually, it is done by using an Explicit Wait to wait for a key element to be present or visible on a page. For instance:
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
# ...
browser.get("https://www.whatever.com")
wait = WebDriverWait(driver, 10)
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.content")))
html_source = browser.page_source
# ...
Here, we'll wait up to 10 seconds for a div element with class="content" to become visible before getting the page source.
Additionally, you may need to ignore SSL errors:
browser = webdriver.PhantomJS(desired_capabilities=dcap, service_args=['--ignore-ssl-errors=true'])
Though, I'm pretty sure this is related to the redirecting issues in PhantomJS. There is an open ticket in phantomjs bugtracker:
PhantomJS does not follow some redirects
driver = webdriver.PhantomJS(service_args=['--ignore-ssl-errors=true', '--ssl-protocol=TLSv1'])
This worked for me
I am attempting to use Selenium/BeautifulSoup to unit test a web page. I am getting an error though that I haven't been able to Google.
selenium.common.exceptions.WebDriverException: Message: ''
I am using a Portable version of Firefox and a proxy.
import urllib2
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
import time
import sys
def getItemDivs(url):
profile = webdriver.FirefoxProfile()
profile.set_preference("general.useragent.override","Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/20100101 Firefox/21.0")
profile.set_preference("network.proxy.http", "proxy.example.com")
ffbin = webdriver.firefox.firefox_binary.FirefoxBinary('C:\\FirefoxPortable\\App\\Firefox\\firefox.exe')
# IT FAILS ON THE NEXT LINE
driver=webdriver.Firefox(profile, firefox_binary=ffbin)
driver.implicitly_wait(30)
# THIS LINE CONTAINS A VALID COOKIE, BUT IT HAS BEEN REMOVED FOR THIS QUESTION.
driver.add_cookie(<<mycookie>>)
base_url = url
verificationErrors = []
accept_next_alert = True
driver.get(base_url)
scrap1 = driver.page_source
soup = BeautifulSoup(scrap1)
This question is similar to this one, however, in that question they had a successful first request. I haven't had a success.
What can cause this type of exception but leave the message empty?
The problem was that I didn't set the network.proxy.port. Adding this line solved the problem:
profile.set_preference("network.proxy.port", "80")