I have a problem with even an open website using "webdriver Chrome". Only trying to open the website end with "Access denied" information and don't know why.
Below is my code:
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
class PriceCheckPhoenix:
def __init__(self):
self.url_login = "https://www.phoenixcontact.com/online/portal/pl?1dmy&urile=wcm%3apath%3a/plpl/web/home"
self.create_session()
def create_session(self):
# Run browser with webdriver
driver = webdriver.Chrome(executable_path="D:/chromedriver_v84.exe")
driver.get(self.url_login)
time.sleep(2)
# Find link to sub-website with login
link = driver.find_element_by_xpath('//*[#id="pxc-funcnav"]/div[3]/ul/li[1]/a').get_attribute("href")
driver.get(link)
time.sleep(100)
Description to code:
#1 I create browser chrome session
#2 Loading first website from self.url_login
#3 Is loaded
#4 I need to find a link behind the active text on the website to log in
#5 I found it and try to open this, but the response after getting a link is:
Access Denied
You don't have permission to access
"http://www.phoenixcontact.com/online/portal/pl/pxc/offcontext/login/!ut/p/z1/tZJNa4NAEIZ_Sw45yszuuro9WkO1xqY2EqN7EbXGWPzYFDGlv74Gcio0oYTMZRgY3mcYHpAQg-yysa6yoe67rJnmRBqpu4zownzixDEYx2cWmIYTeYgrHSKQIFVRv0MieJZTZEITglFNLwTXRPaw03RGC6Qm10nOTttFN6hhD4lqVDPHY5nPcd-3JSQTy0ypQ5C4Onl5XUcmvgXCttzNWo-WCNuxLo-w6frPdjot_CfZxWsEciPhSjy7a7xN7xt_63M8kJdNmlSrPw4HaU2G9N1Qfg0Q_1Zke4JeiPHIeQH_KAshVE0a-GkQ24EPqm0F41WbLh5XWuKN3-fm78KgsmazH7dw0Ts!/dz/d5/L0lJSklKQ2dwUkEhIS9JRGpBQUF4QUFFUkNwcVlxLzRObEdRb1lwTWhUalVFZyEvWjZfR0FMNjE0ODI4RzNEQzBJMklPMlA2OTFHMDMvWjdfR0FMNjE0ODI4RzNEQzBJMklPMlA2OTFHSTcvdGFyZ2V0Vmlldy9sb2dpbg!!/" on this server.
Reference #18.d58655f.1597921471.5b29112
Is anyone know what is wrong here? :( When I try to load the website from the link in normal Chrome browser it's all fine :/
Thank you all for any help.
Please try the below code and let me know if it works for you :-
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
options = Options()
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.517 Safari/537.36'
options.add_argument('user-agent={0}'.format(user_agent))
driver = webdriver.Chrome(options=options)
wait = WebDriverWait(driver, 20)
action = ActionChains(driver)
driver.get("https://www.phoenixcontact.com/online/portal/pl?1dmy&urile=wcm%3apath%3a/plpl/web/home")
Login_Btn = wait.until(EC.element_to_be_clickable((By.XPATH, "//*[#class='pxc-fn-login']/a")))
action.move_to_element(Login_Btn).click().perform()
Note - Please make the changes in your code accordingly.
Google search brought me here. After trying several options. Undetected Chromedriver with a very simple script without any options worked for me.
import undetected_chromedriver as uc
driver = uc.Chrome()
driver.get(<url here>)
Related
I'm trying to change my code to use an IE headless browser. The automation I'm doing is in a website that only works in internet explorer
My code was working great until I tried to use a headless browser
When I run this code, absolutely nothing happens, no error is thrown
# selenium 4
from selenium import webdriver
from selenium.webdriver.ie.service import Service
from webdriver_manager.microsoft import IEDriverManager
from selenium.webdriver.ie.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import StaleElementReferenceException
from dotenv import load_dotenv
# Inicialização do Selenium
ie_options = Options()
ie_options.ignore_zoom_level = True
## WORKS!
# driver = webdriver.Ie(service=Service(IEDriverManager().install()), options=ie_options)
## NOT WORKING
service = Service(executable_path=constantes.PATH_HEADLESS)
driver = webdriver.Ie(service=service, options=ie_options)
# Acessa a página
driver.get(constantes.URL)
I believe the reason why it seems nothing is happening is because you have no output (not printing anything). I'm not familiar with your process, but I tried it out with mine also using chrome and it worked fine. Context:
chrome_options = Options()
chrome_options.add_argument("--headless")
driver=webdriver.Chrome(service=Service('*executable
path'),options=chrome_options)
driver.get('https://stackoverflow.com/')
print(driver.title)
I want to extract the CSV download URL from website - https://www.nseindia.com/option-chain
enter image description here
Code I used till now
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
s = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=s)
driver.get("https://www.nseindia.com/option-chain")
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.ID,
"equity_underlyingVal")))
nifty = (driver.find_element(By.XPATH, '//*
[#id="equity_underlyingVal"]').text).replace('NIFTY ',
'').replace(',','')
time_stamp = driver.find_element(By.XPATH, '//*
[#id="equity_timeStamp"]').text
I need the csv link to be load in pandas df. I dont want to use selenium or if using selenium, I need it as headless. Let me know if anyone has a better idea about extracting data directly into pandas datafream..
You can extract the downloading link contained in that element with Selenium as following:
link = driver.find_element(By.CSS_SELECTOR, '#downloadOCTable').get_attribute("href")
As the download link is not present in the href attribute, the best approach is to download the csv file.
Interacting in headless mode can cause problems if the window-size argument is not specified, and a workaround to download files in headless mode is to specify the download path using the driver.command_executor method.
Code snippet to download csv in headless mode-
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import os
options = Options()
#add necessary arguments
options.add_argument("user-agent= Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36")
options.add_argument("--window-size=1920,1080")
options.add_argument("--headless")
driver = webdriver.Chrome(ChromeDriverManager().install(),options=options)
driver.command_executor._commands["send_command"] = ("POST", '/session/$sessionId/chromium/send_command')
#set download path (set to current working directory in this example)
params = {'cmd': 'Page.setDownloadBehavior', 'params': {'behavior': 'allow','downloadPath':os.getcwd()}}
command_result = driver.execute("send_command", params)
driver.get("https://www.nseindia.com/option-chain")
#wait for table details to appear
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, '//*[#id="equity_optionChainTable"]')))
#find and click on download csv button
download_button=driver.find_element_by_xpath('//*[#id="downloadOCTable"]')
download_button.click()
I am using Python selenium chromedriver and I want to add Capmonster to my code. Basically if my Code gets the URL, a ReCaptcha appears and I want to get it solved.
My Code
from threading import Thread
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import TimeoutException
import os
chromedriver = 'C:\\Users\\yvesb\\OneDrive\\Desktop\\chromedriver\\chromedriver.exe'
options = webdriver.ChromeOptions()
options.add_argument('headless')
options.add_argument('window-size=1200x600') # optional
driver = webdriver.Chrome(executable_path=chromedriver, chrome_options=options)
def test(driver, url):
driver.get(url)
try:
driver.save_screenshot('C:\\Users\\yvesb\\Downloads\\headless_chrome_test7.png')
finally:
print("screenshot done")
url = "https://www.snipes.com/login"
Thread(target=test, args=(driver, url)).start()
And when it gets the URL a ReCaptcha appears, which I want to get solved with my Capmonster Key.
I suggest you to try this library I've developed some time ago. If you have a set of labelled captchas that service would fit you. Take a look: https://github.com/punkerpunker/captcha_solver
In README there is a section "Train model on external data" that you might be interested in.
I am trying to scrape a website. Where in I have to press a link. for this purpose, I am using selenium library with chrome drive.
from selenium import webdriver
url = 'https://sjobs.brassring.com/TGnewUI/Search/Home/Home?partnerid=25222&siteid=5011&noback=1&fromSM=true#Applications'
browser = webdriver.Chrome()
browser.get(url)
time.sleep(3)
link = browser.find_element_by_link_text("Don't have an account yet?")
link.click()
But it is not working. Any ideas why it is not working? Is there a workaround?
You can get it done in several ways. Here is one of such. I've used driver.execute_script() command to force the clicking. You should not go for hardcoded delay as they are very inconsistent.
Modified script:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.support import expected_conditions as EC
url = 'https://sjobs.brassring.com/TGnewUI/Search/Home/Home?partnerid=25222&siteid=5011&noback=1&fromSM=true#Applications'
driver = webdriver.Chrome()
driver.get(url)
item = wait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "a[ng-click='newAccntScreen()']")))
driver.execute_script("arguments[0].click();",item)
Trying to screen scrape a web site without having to launch an actual browser instance in a python script (using Selenium). I can do this with Chrome or Firefox - I've tried it and it works - but I want to use PhantomJS so it's headless.
The code looks like this:
import sys
import traceback
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53 "
"(KHTML, like Gecko) Chrome/15.0.87"
)
try:
# Choose our browser
browser = webdriver.PhantomJS(desired_capabilities=dcap)
#browser = webdriver.PhantomJS()
#browser = webdriver.Firefox()
#browser = webdriver.Chrome(executable_path="/usr/local/bin/chromedriver")
# Go to the login page
browser.get("https://www.whatever.com")
# For debug, see what we got back
html_source = browser.page_source
with open('out.html', 'w') as f:
f.write(html_source)
# PROCESS THE PAGE (code removed)
except Exception, e:
browser.save_screenshot('screenshot.png')
traceback.print_exc(file=sys.stdout)
finally:
browser.close()
The output is merely:
<html><head></head><body></body></html>
But when I use the Chrome or Firefox options, it works fine. I thought maybe the web site was returning junk based on the user agent, so I tried faking that out. No difference.
What am I missing?
UPDATED: I will try to keep the below snippet updated with until it works. What's below is what I'm currently trying.
import sys
import traceback
import time
import re
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support import expected_conditions as EC
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53 (KHTML, like Gecko) Chrome/15.0.87")
try:
# Set up our browser
browser = webdriver.PhantomJS(desired_capabilities=dcap, service_args=['--ignore-ssl-errors=true'])
#browser = webdriver.Chrome(executable_path="/usr/local/bin/chromedriver")
# Go to the login page
print "getting web page..."
browser.get("https://www.website.com")
# Need to wait for the page to load
timeout = 10
print "waiting %s seconds..." % timeout
wait = WebDriverWait(browser, timeout)
element = wait.until(EC.element_to_be_clickable((By.ID,'the_id')))
print "done waiting. Response:"
# Rest of code snipped. Fails as "wait" above.
I was facing the same problem and no amount of code to make the driver wait was helping.
The problem is the SSL encryption on the https websites, ignoring them will do the trick.
Call the PhantomJS driver as:
driver = webdriver.PhantomJS(service_args=['--ignore-ssl-errors=true', '--ssl-protocol=TLSv1'])
This solved the problem for me.
You need to wait for the page to load. Usually, it is done by using an Explicit Wait to wait for a key element to be present or visible on a page. For instance:
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
# ...
browser.get("https://www.whatever.com")
wait = WebDriverWait(driver, 10)
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.content")))
html_source = browser.page_source
# ...
Here, we'll wait up to 10 seconds for a div element with class="content" to become visible before getting the page source.
Additionally, you may need to ignore SSL errors:
browser = webdriver.PhantomJS(desired_capabilities=dcap, service_args=['--ignore-ssl-errors=true'])
Though, I'm pretty sure this is related to the redirecting issues in PhantomJS. There is an open ticket in phantomjs bugtracker:
PhantomJS does not follow some redirects
driver = webdriver.PhantomJS(service_args=['--ignore-ssl-errors=true', '--ssl-protocol=TLSv1'])
This worked for me