I have the following bit of code:
from selenium import webdriver
from selenium.webdriver import Firefox
from selenium.webdriver.common.keys import Keys
from selenium.webdriver import ActionChains
from selenium.webdriver.common.proxy import Proxy, ProxyType
proxy = Proxy({
'proxyType': ProxyType.MANUAL,
'httpProxy': '192.156.1.1:33',
'ftpProxy': '192.156.1.1:33',
'sslProxy': '192.156.1.1:33',
'noProxy': '' # set this value as desired
})
url = 'http://www.expressvpn.com/what-is-my-ip'
driver_path = 'C:\\Users\\user\\geckodriver.exe'
browser = Firefox(executable_path = driver_path, proxy = proxy)
browser.get(url)
For some reason everytime i check the ip, it is showing my true IP and not the proxy IP. Why is it doing that and could you please advise how this can be accomplished? Is there some problem with the code?
I started looking into this and noted that proxies are set using WebDriver capabilities and proxy configurations in the geckodriver.
I used proxy information for these sources from testing.
Free proxy lists:
free-proxy.cz
Geonode
Please let me point that using free proxy IP addresses can be highly problematic. These type of proxies are notorious for having connections issues, such as timeouts related to latency. Plus these sites can also be intermittent, which means that they can go down at anytime. And sometimes these sites are being abused, so they can get blocked.
The code below uses DesiredCapabilities with selenium.
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.options import FirefoxProfile
from selenium.webdriver.firefox.options import DesiredCapabilities
firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)
firefox_capabilities = DesiredCapabilities().FIREFOX
firefox_capabilities['proxy'] = {
"proxyType": "MANUAL",
"sslProxy": '34.95.40.165:3128',
}
driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options, desired_capabilities=firefox_capabilities)
URL = 'http://www.expressvpn.com/what-is-my-ip'
driver.get(URL)
You can also do it this way:
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.options import FirefoxProfile
from selenium.webdriver.firefox.options import DesiredCapabilities
firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)
firefox_capabilities = DesiredCapabilities().FIREFOX
firefox_proxies = Proxy()
firefox_proxies.ssl_proxy = '143.110.148.15:8080'
firefox_proxies.add_to_capabilities(firefox_capabilities)
driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options,
desired_capabilities=firefox_capabilities)
URL = 'http://www.expressvpn.com/what-is-my-ip'
driver.get(URL)
You can also use the Python package http_request_randomize to obtain a proxy IP address, which can be passed to the geckodriver.
import random
import logging
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.firefox.options import Options
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.firefox.options import FirefoxProfile
from selenium.webdriver.firefox.options import DesiredCapabilities
from http_request_randomizer.requests.proxy.ProxyObject import Protocol
from http_request_randomizer.requests.proxy.requestProxy import RequestProxy
# Obtain a list of HTTPS proxies
# Suppress the console debugging output by setting the log level
req_proxy = RequestProxy(log_level=logging.ERROR, protocol=Protocol.HTTPS)
# Obtain a random single proxy from the list of proxy addresses
random_proxy = random.sample(req_proxy.get_proxy_list(), 1)
firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)
firefox_capabilities = DesiredCapabilities().FIREFOX
# add the random proxy to firefox_capabilities
firefox_proxies = Proxy()
firefox_proxies.ssl_proxy = random_proxy[0].get_address()
firefox_proxies.add_to_capabilities(firefox_capabilities)
driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options,
desired_capabilities=firefox_capabilities)
try:
# print proxy IP for testing
print(random_proxy[0].get_address())
# output
93.183.250.200:53281
URL = 'http://www.expressvpn.com/what-is-my-ip'
driver.get(URL)
except TimeoutException as e:
print("A Page load Timeout Occurred.")
driver.quit()
As previously stated free proxy can have multiple issue. The code below shows how to use a proxy judge to check the status of an individual proxy.
import random
import logging
from time import sleep
from random import randint
from proxy_checking import ProxyChecker
from http_request_randomizer.requests.proxy.ProxyObject import Protocol
from http_request_randomizer.requests.proxy.requestProxy import RequestProxy
def random_ssl_proxy_address():
# Obtain a list of HTTPS proxies
# Suppress the console debugging output by setting the log level
req_proxy = RequestProxy(log_level=logging.ERROR, protocol=Protocol.HTTPS)
# Obtain a random single proxy from the list of proxy addresses
random_proxy = random.sample(req_proxy.get_proxy_list(), 1)
return random_proxy[0].get_address()
def get_proxy_address():
proxy_address = random_ssl_proxy_address()
checker = ProxyChecker()
proxy_judge = checker.check_proxy(proxy_address)
proxy_status = [value for key, value in proxy_judge.items() if key == 'status']
if proxy_status[0]:
return proxy_address
else:
print('Looking for a valid proxy address.')
# this sleep timer is helping with some timeout issues
# that were happening when querying
sleep(randint(5, 10))
get_proxy_address()
random_ssl_proxy = get_proxy_address()
print(f'Valid proxy address: {random_ssl_proxy}')
# output
Valid proxy address: 98.116.152.143:3128
Please note that the proxy_checker Package that I used doesn't have any embedded error handling, so you will have to add some to catch some of the errors.
Related
Every time I opened an new browser in my for loop for set new proxy but is there any way I can set new proxy in every request without opening the new browser. I want browser will be open once then it will be set new proxy for every loop.
here is my code:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium_stealth import stealth
from selenium.webdriver.common.action_chains import ActionChains
import time, csv
from proxy import *
ip_list = []
free_proxy_list_func(ip_list) # this function scraping proxy from website
options = Options()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument("--disable-blink-features=AutomationControlled")
driver = webdriver.Chrome(options=options,desired_capabilities = proxy_func(ip_list))
stealth(driver,
languages=["en-US", "en"],
vendor="Google Inc.",
platform="Win32",
webgl_vendor="Intel Inc.",
renderer="Intel Iris OpenGL Engine",
fix_hairline=True,
)
file1 = open('url.txt', 'r')
Lines = file1.readlines()
for i in Lines:
product_link = i
driver = webdriver.Chrome(options=options,desired_capabilities = proxy_func(ip_list)) #proxy_func(ip_list) this my proxy function
driver.get(i)
if write driver = webdriver.Chrome(options=options,desired_capabilities = proxy_func(ip_list)) outside of my for loop then it will not set new proxy on every url request.
here is my proxy function:
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.common.proxy import Proxy, ProxyType
def proxy_func(ip_list):
print('total number of ip',len(ip_list))
item = random.choice(tuple(ip_list))
print("ip: ",item)
proxy_ip_port = item
proxy = Proxy()
proxy.proxy_type = ProxyType.MANUAL
proxy.http_proxy = proxy_ip_port
proxy.ssl_proxy = proxy_ip_port
capabilities = webdriver.DesiredCapabilities.CHROME
proxy.add_to_capabilities(capabilities)
i have this .py code that parse some info from a page, and it checks in driver.requests if request.url contains a specific url and prints the body response. Script is working good without proxy, but after i enable the proxy capabilities, driver.requests can't intercept browser traffic anymore, even if site is loading over proxy.
import sys
import json
import time
from time import sleep
from seleniumwire import webdriver
from seleniumwire.utils import decode
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.common.by import By
imei=sys.argv[1]
while True:
chrome_options = webdriver.ChromeOptions()
chrome_options.headless = False #Debug
chrome_options.add_argument("--disable-logging")
chrome_options.add_argument("--disable-infobars")
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--disable-web-security")
chrome_options.add_argument("--disable-popup-blocking")
prefs = {"profile.managed_default_content_settings.images": 2, "profile.default_content_setting_values.notifications" : 2, 'profile.default_content_setting_values':{'notifications': 1,'geolocation': 1},'profile.managed_default_content_settings':{'geolocation': 1}}
chrome_options.add_experimental_option("prefs", prefs)
caps = DesiredCapabilities().CHROME
caps["pageLoadStrategy"] = "eager"
caps["acceptSslCerts"] = True
caps["acceptInsecureCerts"] = True
caps['proxy'] = {
"httpProxy":"http://localhost:24002",
"ftpProxy":"http://localhost:24002",
"sslProxy":"http://localhost:24002",
"noProxy":None,
"proxyType":"MANUAL",
"autodetect":False
}
driver = webdriver.Chrome(desired_capabilities=caps, executable_path='/Users/admin/.wdm/drivers/chromedriver/mac64/103.0.5060/chromedriver',chrome_options=chrome_options)
driver.get('https://prepaid.t-mobile.com/bring-your-own-device')
driver.implicitly_wait(10)
driver.find_element(By.ID, 'tmo-radio-button-validation-option').click()
driver.find_element("xpath",'//input[#placeholder="IMEI number"]').send_keys(imei)
driver.find_element("xpath",'//*[text()="Check compatibility "]').click()
sleep(5)
run=0
while run==0:
for request in driver.requests:
if request.url =="https://facade.saas.api.t-mobile.com/reb3-product/v1/devices/compatibility-check":
try:
jsn=str(decode(request.response.body))[2:-1]
print(jsn)
run=1
except:
pass
exit()
break
I have the following code:
from selenium.webdriver import Firefox
from selenium.webdriver.common.keys import Keys
url = 'https://finance.yahoo.com/'
driver_path = 'geckodriver.exe'
browser = Firefox(executable_path = driver_path)
browser.get(url)
profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/pdf")
profile.set_preference("browser.helperApps.neverAsk.openFile", "application/pdf")
search_field_id = 'yfin-usr-qry'
element_search_field = browser.find_element_by_id(search_field_id)
element_search_field.clear()
element_search_field.send_keys('TSLA')
element_search_field.send_keys(Keys.ENTER)
from selenium.webdriver import ActionChains
action_chains = ActionChains(browser)
action_chains.key_down(Keys.CONTROL).send_keys('V').key_up(Keys.CONTROL).perform()
xpath_string = '/html/body/div[1]/div/div/div[1]/div/div[2]/div/div/div[6]/div/div/section/div/ul/li[2]/a/span'
element = browser.find_element_by_xpath(xpath_string)
action_chains.move_to_element(element).click().perform()
browser.execute_script('window.print();')
A print dialog box pops up for Firefox. I was wondering how can i accept it. Is there a way to bypass this dialog box and directly print since this is not a system dialog box but Firefox's.
Edit:
My full updated code as per input from #Prophet
from selenium.webdriver import Firefox
from selenium.webdriver.common.keys import Keys
from selenium.webdriver import ActionChains
from selenium.webdriver.support.ui import Select
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
import time
from fake_useragent import UserAgent
from selenium import webdriver
from selenium.webdriver import DesiredCapabilities
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
ua = UserAgent()
userAgent = ua.random
url = 'https://finance.yahoo.com/'
driver_path = 'geckodriver.exe'
profile = FirefoxProfile('C:\\Users\\\\AppData\\Roaming\\Mozilla\\Firefox\\Profiles\\tp3cz5dm.default-release')
profile.set_preference("general.useragent.override", userAgent)
browser = Firefox(executable_path = driver_path)
browser.get(url)
profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/pdf")
profile.set_preference("browser.helperApps.neverAsk.openFile", "application/pdf")
search_field_id = 'yfin-usr-qry'
element_search_field = browser.find_element_by_id(search_field_id)
element_search_field.clear()
element_search_field.send_keys('TSLA')
element_search_field.send_keys(Keys.ENTER)
from selenium.webdriver import ActionChains
action_chains = ActionChains(browser)
action_chains.key_down(Keys.CONTROL).send_keys('V').key_up(Keys.CONTROL).perform()
# xpath_string = '/html/body/div[1]/div/div/div[1]/div/div[2]/div/div/div[6]/div/div/section/div/ul/li[2]/a/span'
# element = browser.find_element_by_xpath(xpath_string)
# action_chains.move_to_element(element).click().perform()
browser.execute_script('window.print();')
browser.switch_to.window(browser.window_handles[-1])
time.sleep(0.5)
actionButton = browser.execute_script(
"return document.querySelector('print-preview-app').shadowRoot.querySelector('#sidebar').shadowRoot.querySelector('print-preview-button-strip').shadowRoot.querySelector('.action-button')")
cancelButton.click()
# switch back to main window
browser.switch_to.window(driver.window_handles[0])
When i run this i am getting error:
JavascriptException: TypeError: document.querySelector(...) is null
Both the solutions below are designed NOT to launch the print dialog. These solutions will either print the active webpage to your local printer or to a PDF file without having to deal with the dialog.
UPDATED POST 08-19-2021
I wanted to save the output to PDF vs printing to paper. I was shocked how hard it was to print to a PDF using the geckodriver and selenium. With the 'chromedriver' you can call the function 'execute_cdp_cmd' and pass Page.printToPDF. The geckodriver doesn't have 'execute_cdp_cmd'.
When I looked through Stack Overflow for inspiration, I discover multiple open question on printing pdf using the geckodriver with selenium. After seeing that this was a problem, I looked through the issues in selenium and the bug reports for mozilla. Again this was a problem that others had.
Some of the bug reports mentioned that certain switches used in the print process no longer worked.
profile.set_preference("print.print_to_file", True)
profile.set_preference("print.print_to_filename", "/tmp/file.pdf")
I decided to look at the source code for mozilla gecko-dev for a potential solution. After hours of research I found that the switches above were replaced with new ones and that another printer variable had also been replaced. After some testing, I was able to get your webpage to save as PDF.
The code below will print a webpage to a PDF with all the links enabled. I would recommend adding some error handling to the code. One part of the code that I need to improve on the filename part. You should be able to add a function that will rename the file, which would allow you to print as many files as you want in a single session.
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.options import FirefoxProfile
firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
profile_options.set_preference('profile_options = FirefoxProfile()', user_agent)
profile_options.set_preference("print_printer", "Mozilla Save to PDF")
profile_options.set_preference("print.always_print_silent", True)
profile_options.set_preference("print.show_print_progress", False)
profile_options.set_preference('print.save_as_pdf.links.enabled', True)
profile_options.set_preference("print.printer_Mozilla_Save_to_PDF.print_to_file", True)
# set your own file path
profile_options.set_preference('print.printer_Mozilla_Save_to_PDF.print_to_filename',
"tmp/testprint.pdf")
driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options,
firefox_profile=profile_options)
URL = 'https://finance.yahoo.com/'
driver.get(URL)
sleep(10)
search_field_id = 'yfin-usr-qry'
element_search_field = driver.find_element_by_id(search_field_id)
element_search_field.clear()
element_search_field.send_keys('TSLA')
element_search_field.send_keys(Keys.ENTER)
sleep(10)
driver.execute_script("window.print()")
sleep(20)
driver.quit()
ORIGINAL POST 08-18-2021
I decided to look at your issue, because I'm interested in selenium functionality.
I looked through the source code of the geckodriver and found printUtils.js, which provides details on the switches used in the print process, such as these:
firefox_options.set_preference("print.always_print_silent", True)
firefox_options.set_preference("print.show_print_progress", False)
After removing some of your code and adding some, I was able to print to my HP printer with the code below without dealing with a print dialog box:
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.options import FirefoxProfile
firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)
firefox_options.set_preference("print.always_print_silent", True)
firefox_options.set_preference("print.show_print_progress", False)
firefox_options.set_preference("pdfjs.disabled", True)
driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options)
URL = 'https://finance.yahoo.com/'
driver.get(URL)
sleep(10)
search_field_id = 'yfin-usr-qry'
element_search_field = driver.find_element_by_id(search_field_id)
element_search_field.clear()
element_search_field.send_keys('TSLA')
element_search_field.send_keys(Keys.ENTER)
sleep(10)
driver.execute_script("window.print()")
----------------------------------------
My system information
----------------------------------------
Platform: Apple
OS: 10.15.7
Python: 3.9
Selenium: 3.141
Firefox: 90.0.2
Geckodriver: 0.29.0
----------------------------------------
Adding these profile preferences should avoid presenting this pop-up:
profile.set_preference("print.always_print_silent", True)
profile.set_preference("print.show_print_progress", False)
UPD
After involving the printing dialog please try accepting it by this code:
# switch to print preview window
driver.switch_to.window(driver.window_handles[-1])
time.sleep(0.5)
actionButton = driver.execute_script(
"return document.querySelector('print-preview-app').shadowRoot.querySelector('#sidebar').shadowRoot.querySelector('print-preview-button-strip').shadowRoot.querySelector('.action-button')")
cancelButton.click()
# switch back to main window
driver.switch_to.window(driver.window_handles[0])
I'm trying to quit and then restart a new browser session with Selenium when encountering a captcha, and I'm not sure yet why the code below isn't working.
It quits the existing driver, but after recursion browser.get() results in this error: ConnectionRefusedError: [Errno 61] Connection refused
Thanks in advance for any advice. I've included only the most relevant parts of the code below:
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.action_chains import ActionChains
path_to_chromedriver = '/Users/Myname/Desktop/a/chromedriver 2'
options = webdriver.ChromeOptions()
options.add_argument('start-maximized')
#options.add_argument('disable-infobars')
#options.add_argument('--disable-notifications')
options.add_argument('--disable-extensions')
browser = webdriver.Chrome(chrome_options=options, executable_path=path_to_chromedriver)
headers = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36'}
def get_page_info(url, browser = webdriver.Chrome(chrome_options=options, executable_path=path_to_chromedriver)):
browser.get(url)
try:
body = browser.find_element_by_tag_name('body')
if "been denied because we believe" in body.text:
print("going to new session...")
browser.quit()
human(4,6) #time delay
return winery_info(url)
Edit: I normally wouldn't use this tactic to get around a captcha, but in my use case this makes sense.
Try to use the driver.delete_all_cookies() method instead of closing browser and reopening it
edit : maybe the site block your ip adress i suggest you to use tor to change ip automatically i will give you this
import os
import time
os.system("killall tor")
os.system("tor &")
time.sleep(5)
#init driver
fp = webdriver.FirefoxProfile()
fp.set_preference("network.proxy.type", 1)
fp.set_preference("network.proxy.socks", "127.0.0.1")
fp.set_preference("network.proxy.socks_port", int("9050"))
fp.update_preferences()
browser = webdriver.Firefox(firefox_profile=fp)
browser.get(...)
...
...
if captcha:
os.system("killall tor")
os.system("tor &")
time.sleep(5)
browser.get(...)
# this will change your ip adress
# You can also configure tor to change ip every 10 seconds by changing torrc file
I was trying to get the embedded video URL from https://www.fmovies.is . I'm using selenium.PhantomJS(). The exact same code works perfectly if I use selenium.Firefox() driver . It seems as though I'm doing something wrong during the waiting phase.
If someone could point out what I was doing wrong , I would really appreciate it.
from bs4 import BeautifulSoup
from selenium import webdriver
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import DesiredCapabilities
desired_capabilities = DesiredCapabilities.PHANTOMJS.copy()
desired_capabilities['phantomjs.page.customHeaders.User-Agent'] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5)AppleWebKit 537.36 (KHTML, like Gecko) Chrome"
desired_capabilities['phantomjs.page.customHeaders.Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
url = "https://fmovies.is/film/kung-fu-panda-2.9kx/q8kkyj"
driver = webdriver.PhantomJS(service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any'],desired_capabilities=desired_capabilities)
driver.get(url)
try:
element = WebDriverWait(driver, 100).until(EC.presence_of_element_located((By.ID, "jw")))
finally:
driver.find_element_by_id("player").click()
pageSource = driver.page_source
soup = BeautifulSoup(pageSource,'lxml')
url = soup.find("video",{"class":"jw-video"})
print url
videoURL = ''
if url:
videoURL = url['src']
print videoURL