How to restart Selenium browser after quit? - python

I'm trying to quit and then restart a new browser session with Selenium when encountering a captcha, and I'm not sure yet why the code below isn't working.
It quits the existing driver, but after recursion browser.get() results in this error: ConnectionRefusedError: [Errno 61] Connection refused
Thanks in advance for any advice. I've included only the most relevant parts of the code below:
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.action_chains import ActionChains
path_to_chromedriver = '/Users/Myname/Desktop/a/chromedriver 2'
options = webdriver.ChromeOptions()
options.add_argument('start-maximized')
#options.add_argument('disable-infobars')
#options.add_argument('--disable-notifications')
options.add_argument('--disable-extensions')
browser = webdriver.Chrome(chrome_options=options, executable_path=path_to_chromedriver)
headers = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36'}
def get_page_info(url, browser = webdriver.Chrome(chrome_options=options, executable_path=path_to_chromedriver)):
browser.get(url)
try:
body = browser.find_element_by_tag_name('body')
if "been denied because we believe" in body.text:
print("going to new session...")
browser.quit()
human(4,6) #time delay
return winery_info(url)
Edit: I normally wouldn't use this tactic to get around a captcha, but in my use case this makes sense.

Try to use the driver.delete_all_cookies() method instead of closing browser and reopening it
edit : maybe the site block your ip adress i suggest you to use tor to change ip automatically i will give you this
import os
import time
os.system("killall tor")
os.system("tor &")
time.sleep(5)
#init driver
fp = webdriver.FirefoxProfile()
fp.set_preference("network.proxy.type", 1)
fp.set_preference("network.proxy.socks", "127.0.0.1")
fp.set_preference("network.proxy.socks_port", int("9050"))
fp.update_preferences()
browser = webdriver.Firefox(firefox_profile=fp)
browser.get(...)
...
...
if captcha:
os.system("killall tor")
os.system("tor &")
time.sleep(5)
browser.get(...)
# this will change your ip adress
# You can also configure tor to change ip every 10 seconds by changing torrc file

Related

Selenium Scrape not reading all elements

I am trying to scrape data from the following site. I was able to click on load more yet the code doesn't catch most of the elements and I do not really know what to do.
url = 'https://www.carrefouregypt.com/mafegy/en/c/FEGY1701230'
products = []
options = Options()
driver = webdriver.Chrome(options = options)
driver.get(url)
time.sleep(8)
#click on load more
while True:
try:
btn_class = 'css-1n3fqy0'
btn = driver.find_element(By.CLASS_NAME , btn_class)
btn.click()
driver.implicitly_wait(10)
except NoSuchElementException:
break
driver.execute_script("window.scrollTo(0,document.body.scrollHeight)")
time.sleep(8)
The following code will click that button until it cannot locate it, and exit gracefully:
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options as Firefox_Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support import expected_conditions as EC
import time as t
firefox_options = Firefox_Options()
firefox_options.add_argument("--width=1280")
firefox_options.add_argument("--height=720")
# firefox_options.headless = True
firefox_options.set_preference("general.useragent.override", "Mozilla/5.0 (Linux; Android 7.0; SM-A310F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.91 Mobile Safari/537.36 OPR/42.7.2246.114996")
driverService = Service('chromedriver/geckodriver')
browser = webdriver.Firefox(service=driverService, options=firefox_options)
url = 'https://www.carrefouregypt.com/mafegy/en/c/FEGY1701230'
browser.get(url)
t.sleep(5)
while True:
try:
load_more_button = WebDriverWait(browser, 10).until(EC.element_to_be_clickable((By.XPATH,'//button[text()="Load More"]')))
browser.execute_script('window.scrollBy(0, 100);')
load_more_button.click()
print('clicked')
t.sleep(3)
except TimeoutException:
print('all elements loaded in page')
break
It's using Firefox, on a linux setup (for some reasons Chrome was temperamental on this one). You just have to observe the imports, and the code after defining the browser/driver. Selenium documentation: https://www.selenium.dev/documentation/

upload files to google without using APIs with Selenium in headerless mode?

I currently have this code
from undetected_chromedriver import Chrome
from undetected_chromedriver import ChromeOptions
import os
from time import sleep
from selenium.webdriver import ActionChains
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
opts = ChromeOptions()
opts.add_argument(f'--user-data-dir={os.getcwd()}/driver/profile')
opts.add_argument(f"--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36")
driver = Chrome(executable_path=f'{os.getcwd()}/driver/chromedriver.exe', options=opts)
sleep(2)
driver.get('https://drive.google.com/drive/my-drive')
sleep(5)
actionChains = ActionChains(driver)
sleep(2)
actionChains.click(driver.find_element(By.XPATH, '//*[#guidedhelpid="new_menu_button"]')).perform()
sleep(1)
WebDriverWait(driver, 5).until(EC.visibility_of_element_located((By.XPATH, '//*[#data-tooltip="File upload"]')))
actionChains.click(driver.find_element(By.XPATH, '//*[#data-tooltip="File upload"]')).perform()
self.driver.find_element(By.XPATH, '//input[#type="file"]').send_keys(f'{os.getcwd()}/downloads/english.srt')
If I have the window open, it works without a problem, it can upload the file, the problem is when I try to use the mode without header, it does not detect the File Upload element, so I have tried another option on drag a dop and have not had any luck, I have been trying for several days and I'm about to throw in the towel
option with drag and drop
JS_DROP_FILES = "var k=arguments,d=k[0],g=k[1],c=k[2],m=d.ownerDocument||document;for(var e=0;;){var f=d.getBoundingClientRect(),b=f.left+(g||(f.width/2)),a=f.top+(c||(f.height/2)),h=m.elementFromPoint(b,a);if(h&&d.contains(h)){break}if(++e>1){var j=new Error('Element not interactable');j.code=15;throw j}d.scrollIntoView({behavior:'instant',block:'center',inline:'center'})}var l=m.createElement('INPUT');l.setAttribute('type','file');l.setAttribute('multiple','');l.setAttribute('style','position:fixed;z-index:2147483647;left:0;top:0;');l.onchange=function(q){l.parentElement.removeChild(l);q.stopPropagation();var r={constructor:DataTransfer,effectAllowed:'all',dropEffect:'none',types:['Files'],files:l.files,setData:function u(){},getData:function o(){},clearData:function s(){},setDragImage:function i(){}};if(window.DataTransferItemList){r.items=Object.setPrototypeOf(Array.prototype.map.call(l.files,function(x){return{constructor:DataTransferItem,kind:'file',type:x.type,getAsFile:function v(){return x},getAsString:function y(A){var z=new FileReader();z.onload=function(B){A(B.target.result)};z.readAsText(x)},webkitGetAsEntry:function w(){return{constructor:FileSystemFileEntry,name:x.name,fullPath:'/'+x.name,isFile:true,isDirectory:false,file:function z(A){A(x)}}}}}),{constructor:DataTransferItemList,add:function t(){},clear:function p(){},remove:function n(){}})}['dragenter','dragover','drop'].forEach(function(v){var w=m.createEvent('DragEvent');w.initMouseEvent(v,true,true,m.defaultView,0,0,0,b,a,false,false,false,false,0,null);Object.setPrototypeOf(w,null);w.dataTransfer=r;Object.setPrototypeOf(w,DragEvent.prototype);h.dispatchEvent(w)})};m.documentElement.appendChild(l);l.getBoundingClientRect();return l"
def drop_files(element, files, offsetX=0, offsetY=0):
driver = element.parent
isLocal = not driver._is_remote or '127.0.0.1' in driver.command_executor._url
paths = []
# ensure files are present, and upload to the remote server if session is remote
for file in (files if isinstance(files, list) else [files]):
if not os.path.isfile(file):
raise FileNotFoundError(file)
paths.append(file if isLocal else element._upload(file))
value = '\n'.join(paths)
elm_input = driver.execute_script(JS_DROP_FILES, element, offsetX, offsetY)
elm_input._execute('sendKeysToElement', {'value': [value], 'text': value})
WebElement.drop_files = drop_files
dropzone = driver.find_element_by_xpath('//c-wiz[#data-region-root]')
dropzone.drop_files(f'{os.getcwd()}\\downloads\\english.srt')

There is an error in registering on the site using webdriver Selenium Python

I am trying to register a new account for this site. However, I cannot register there because of an error or block for ChromeDriver (selenium Python).
I am using this code:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import random
import string
from time import sleep
from selenium.webdriver.common.keys import Keys
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36'
options = webdriver.ChromeOptions()
options.add_argument(f'user-agent={user_agent}')
options.add_argument('disable-infobars')
options.add_argument('--profile-directory=Default')
options.add_argument("--incognito")
options.add_argument("--disable-plugins-discovery")
options.add_experimental_option("excludeSwitches", ["ignore-certificate-errors", "safebrowsing-disable-download-protection", "safebrowsing-disable-auto-update", "disable-client-side-phishing-detection"])
options.add_argument('--disable-extensions')
options.add_argument("start-maximized")
options.add_argument("--disable-blink-features")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options, executable_path=r'chromedriver1.exe')
driver.get('https://www.nordstrom.com/signin?cm_sp=SI_SP_A-_-SI_SP_B-_-SI_SP_C&origin=tab&ReturnURL=https%3A%2F%2Fwww.nordstrom.com%2F')
def email(stringLength=8):
letters = string.ascii_lowercase
return ''.join(random.choice(letters) for i in range(stringLength))
email = email(6) + "#gmail.com"
sleep(5)
# email
driver.find_element_by_name("email").send_keys(email)
sleep(5)
# next
driver.find_element_by_id('account-check-next-button').send_keys(Keys.ENTER)
I think the website is blocking WebDriver. When I use Chrome in my computer, I don't encounter any problems, but using ChromeDriver, this is the issue I receive.
Open form use:
driver.get('https://www.nordstrom.com/signin')
Not
driver.get('https://www.nordstrom.com/signin?cm_sp=SI_SP_A-_-SI_SP_B-_-SI_SP_C&origin=tab&ReturnURL=https%3A%2F%2Fwww.nordstrom.com%2F')
Try to use explicit wait before click:
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
wait = WebDriverWait(driver, 30)
wait.until(EC.element_to_be_clickable(
(By.CSS_SELECTOR, 'button[alt="next button"]')))
btn = driver.find_element_by_css_selector('button[alt="next button"]')
btn.click()
It must work because I reproduced your error. The problem is that:
You should use click() for clicking, not send_keys(Keys.ENTER) In your case you click Enter before email is completely input, just before # symbol.
Your time.sleep(5) is not enough. Use explicit wait. Or, in the works case increase your sleep (if you don't case about the speed)
Read here how to use Selenium's wait instead on time.sleep()
Update:
Clear all cookies, cache, application data for this site and try again. First manually. Looks like it block users after some unsuccessful attempts. Even valid emails.
UPDATE:
Unfortunately, Nordstrom is blocking automated requests...
Nordstrom is tracking all customers actions, so it's unlikely to be used for testing. I would suggest to try other sites to save your time.

Access denied to website using webdriver with Selenium

I have a problem with even an open website using "webdriver Chrome". Only trying to open the website end with "Access denied" information and don't know why.
Below is my code:
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
class PriceCheckPhoenix:
def __init__(self):
self.url_login = "https://www.phoenixcontact.com/online/portal/pl?1dmy&urile=wcm%3apath%3a/plpl/web/home"
self.create_session()
def create_session(self):
# Run browser with webdriver
driver = webdriver.Chrome(executable_path="D:/chromedriver_v84.exe")
driver.get(self.url_login)
time.sleep(2)
# Find link to sub-website with login
link = driver.find_element_by_xpath('//*[#id="pxc-funcnav"]/div[3]/ul/li[1]/a').get_attribute("href")
driver.get(link)
time.sleep(100)
Description to code:
#1 I create browser chrome session
#2 Loading first website from self.url_login
#3 Is loaded
#4 I need to find a link behind the active text on the website to log in
#5 I found it and try to open this, but the response after getting a link is:
Access Denied
You don't have permission to access
"http://www.phoenixcontact.com/online/portal/pl/pxc/offcontext/login/!ut/p/z1/tZJNa4NAEIZ_Sw45yszuuro9WkO1xqY2EqN7EbXGWPzYFDGlv74Gcio0oYTMZRgY3mcYHpAQg-yysa6yoe67rJnmRBqpu4zownzixDEYx2cWmIYTeYgrHSKQIFVRv0MieJZTZEITglFNLwTXRPaw03RGC6Qm10nOTttFN6hhD4lqVDPHY5nPcd-3JSQTy0ypQ5C4Onl5XUcmvgXCttzNWo-WCNuxLo-w6frPdjot_CfZxWsEciPhSjy7a7xN7xt_63M8kJdNmlSrPw4HaU2G9N1Qfg0Q_1Zke4JeiPHIeQH_KAshVE0a-GkQ24EPqm0F41WbLh5XWuKN3-fm78KgsmazH7dw0Ts!/dz/d5/L0lJSklKQ2dwUkEhIS9JRGpBQUF4QUFFUkNwcVlxLzRObEdRb1lwTWhUalVFZyEvWjZfR0FMNjE0ODI4RzNEQzBJMklPMlA2OTFHMDMvWjdfR0FMNjE0ODI4RzNEQzBJMklPMlA2OTFHSTcvdGFyZ2V0Vmlldy9sb2dpbg!!/" on this server.
Reference #18.d58655f.1597921471.5b29112
Is anyone know what is wrong here? :( When I try to load the website from the link in normal Chrome browser it's all fine :/
Thank you all for any help.
Please try the below code and let me know if it works for you :-
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
options = Options()
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.517 Safari/537.36'
options.add_argument('user-agent={0}'.format(user_agent))
driver = webdriver.Chrome(options=options)
wait = WebDriverWait(driver, 20)
action = ActionChains(driver)
driver.get("https://www.phoenixcontact.com/online/portal/pl?1dmy&urile=wcm%3apath%3a/plpl/web/home")
Login_Btn = wait.until(EC.element_to_be_clickable((By.XPATH, "//*[#class='pxc-fn-login']/a")))
action.move_to_element(Login_Btn).click().perform()
Note - Please make the changes in your code accordingly.
Google search brought me here. After trying several options. Undetected Chromedriver with a very simple script without any options worked for me.
import undetected_chromedriver as uc
driver = uc.Chrome()
driver.get(<url here>)

PhantomJS returning empty web page (python, Selenium)

Trying to screen scrape a web site without having to launch an actual browser instance in a python script (using Selenium). I can do this with Chrome or Firefox - I've tried it and it works - but I want to use PhantomJS so it's headless.
The code looks like this:
import sys
import traceback
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53 "
"(KHTML, like Gecko) Chrome/15.0.87"
)
try:
# Choose our browser
browser = webdriver.PhantomJS(desired_capabilities=dcap)
#browser = webdriver.PhantomJS()
#browser = webdriver.Firefox()
#browser = webdriver.Chrome(executable_path="/usr/local/bin/chromedriver")
# Go to the login page
browser.get("https://www.whatever.com")
# For debug, see what we got back
html_source = browser.page_source
with open('out.html', 'w') as f:
f.write(html_source)
# PROCESS THE PAGE (code removed)
except Exception, e:
browser.save_screenshot('screenshot.png')
traceback.print_exc(file=sys.stdout)
finally:
browser.close()
The output is merely:
<html><head></head><body></body></html>
But when I use the Chrome or Firefox options, it works fine. I thought maybe the web site was returning junk based on the user agent, so I tried faking that out. No difference.
What am I missing?
UPDATED: I will try to keep the below snippet updated with until it works. What's below is what I'm currently trying.
import sys
import traceback
import time
import re
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support import expected_conditions as EC
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53 (KHTML, like Gecko) Chrome/15.0.87")
try:
# Set up our browser
browser = webdriver.PhantomJS(desired_capabilities=dcap, service_args=['--ignore-ssl-errors=true'])
#browser = webdriver.Chrome(executable_path="/usr/local/bin/chromedriver")
# Go to the login page
print "getting web page..."
browser.get("https://www.website.com")
# Need to wait for the page to load
timeout = 10
print "waiting %s seconds..." % timeout
wait = WebDriverWait(browser, timeout)
element = wait.until(EC.element_to_be_clickable((By.ID,'the_id')))
print "done waiting. Response:"
# Rest of code snipped. Fails as "wait" above.
I was facing the same problem and no amount of code to make the driver wait was helping.
The problem is the SSL encryption on the https websites, ignoring them will do the trick.
Call the PhantomJS driver as:
driver = webdriver.PhantomJS(service_args=['--ignore-ssl-errors=true', '--ssl-protocol=TLSv1'])
This solved the problem for me.
You need to wait for the page to load. Usually, it is done by using an Explicit Wait to wait for a key element to be present or visible on a page. For instance:
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
# ...
browser.get("https://www.whatever.com")
wait = WebDriverWait(driver, 10)
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.content")))
html_source = browser.page_source
# ...
Here, we'll wait up to 10 seconds for a div element with class="content" to become visible before getting the page source.
Additionally, you may need to ignore SSL errors:
browser = webdriver.PhantomJS(desired_capabilities=dcap, service_args=['--ignore-ssl-errors=true'])
Though, I'm pretty sure this is related to the redirecting issues in PhantomJS. There is an open ticket in phantomjs bugtracker:
PhantomJS does not follow some redirects
driver = webdriver.PhantomJS(service_args=['--ignore-ssl-errors=true', '--ssl-protocol=TLSv1'])
This worked for me

Categories

Resources