I need help, selenium Chrome getting stuck at some pages while browsing,
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
#options.add_argument('--no-sandbox')
options.add_argument("disable-infobars")
options.add_argument('--disable-browser-side-navigation')
options.add_argument("--start-maximized")
driver = webdriver.Chrome(chrome_options=options)
driver.set_page_load_timeout(10)
this how I make the driver, however it is definetly getting stuck on some pages for more than 300s (I have to Ctrl+C) to get it to keep moving (inside a for loop)
Related
A site I often access via Chrome Webdriver suddenly takes much longer to load. All needed elements seem to be there but the page is still loading and blocking the code after the driver.get(url) from executing. I then tried to set my page_load_strategy to 'eager' or 'none', but that doesn't seem to change anything, and is also not reflected in the driver's capabilities.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options = Options()
options.page_load_strategy = 'eager'
driver = webdriver.Chrome(r"C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe") , options=options)
driver.capabilities #'pageLoadStrategy': 'normal'
Any ideas what I'm doing wrong?
I am trying to scrape the website. First of all, it is not working with Beautifulsoup but when I am trying to open it with selenium chrome driver it's not opening. It's opening with firefox but it's very slow and gives an error on element click. Here is my code:
from selenium import webdriver
opt = webdriver.ChromeOptions()
opt.add_argument("--disable-xss-auditor")
opt.add_argument("--disable-web-security")
opt.add_argument("--allow-running-insecure-content")
opt.add_argument("--no-sandbox")
opt.add_argument("--disable-setuid-sandbox")
opt.add_argument("--disable-webgl")
opt.add_argument("--disable-popup-blocking")
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get(f"http://app1.nmpa.gov.cn/data_nmpa/face3/base.jsp?tableId=25&tableName=TABLE25&title=%B9%FA%B2%FA%D2%A9%C6%B7&bcId=152904713761213296322795806604&CbSlDlH0=qGrYrAktn7.tn7.tnznJalIvVetjcXpaapSdKuqmmoVqqWL")
Possibly Selenium driven ChromeDriver initiated google-chrome Browsing Context is geting detected as bot and the arguments you have added can't bypass the bot detection mechanism effectively.
Solution
You can evade the detection by adding a few arguments and experimental_option as follows:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
options = Options()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('excludeSwitches', ['enable-logging'])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument('--disable-blink-features=AutomationControlled')
s = Service('C:\\BrowserDrivers\\chromedriver.exe')
driver = webdriver.Chrome(service=s, options=options)
driver.get("http://app1.nmpa.gov.cn/data_nmpa/face3/base.jsp?tableId=25&tableName=TABLE25&title=%B9%FA%B2%FA%D2%A9%C6%B7&bcId=152904713761213296322795806604&CbSlDlH0=qGrYrAktn7.tn7.tnznJalIvVetjcXpaapSdKuqmmoVqqWL")
I am starting to learn about web scraping. For practice, I am trying to get a list with all the courses name that appears in this query: "https://www.udemy.com/courses/search/?src=ukw&q=api+python" the problem is when I start the script the web does not load en eventually the windows get closed. I think maybe Udemy has some type of security for automations
This is my code:
from selenium import webdriver
import time
website = "https://www.udemy.com/courses/search/?src=ukw&q=api+python"
path = "/"
chrome_options = webdriver.ChromeOptions();
chrome_options.add_experimental_option("excludeSwitches", ['enable-logging'])
driver = webdriver.Chrome(options=chrome_options);
driver.get(website)
time.sleep(5)
matches = driver.find_elements_by_tag_name("h3")
The reason behind udemy website not loading completely may be due to the fact that Selenium driven ChromeDriver initiated Chrome Browser gets detected as a bot and further navigation is getting blocked.
Solution
An easier hack to evade the detection would be to add the following argument:
--disable-blink-features=AutomationControlled
So effectively your code block will be:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('excludeSwitches', ['enable-logging'])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument('--disable-blink-features=AutomationControlled')
s = Service('C:\\BrowserDrivers\\chromedriver.exe')
driver = webdriver.Chrome(service=s, options=options)
driver.get('https://www.udemy.com/courses/search/?src=ukw&q=api+python')
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//h1[contains(., 'results for')]")))
driver.save_screenshot("udemy.png")
Saved Screenshot:
I’ve got grey screen when was trying to open bet365 site using Chrome driver and Selenium.
var driver = new ChromeDriver();
driver.Navigate().GoToUrl("https://www.bet365.it/");
I executed your usecase with a couple of tweaks and faced the same consequences. Here are the execution details:
Code Block [Python]:
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options, executable_path=r'C:\WebDrivers\chromedriver.exe')
driver.get('https://www.bet365.it/')
Browser Snapshot:
Deep Dive
When I checked the Terms and conditions it is clearly mentioned that:
Other
6.1 bet365 actively monitors traffic to and from its Site. Bet365 reserves the right to block access to the Site, at its discretion,
should it encounter any evidence of automated or robotized game
activity.
Conclusion
It seems Selenium driven ChromeDriver initiated google-chrome based browsing context is getting detected and the navigation is blocked.
from time import sleep
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
# chrome_options.add_argument("--headless")
chrome_options.add_argument('--start-maximized')
driver = webdriver.Chrome(options=chrome_options, executable_path=r"chromedriver.exe")
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source":
"""Object.defineProperty(navigator,
'webdriver', {get: () => undefined})"""})
url = 'https://www.bet365.com/#/IP/B1'
driver.get(url)
sleep(1)
I need to start chrome with webdriver with quic disabled as follow:
--flag-switches-begin --disable-quic --flag-switches-end
I am using python with selenium 2.47.3
from selenium import webdriver
from selenium.webdriver.chrome.options import Options as ChromeOptions
options = webdriver.ChromeOptions()
options.add_argument("--disable-quic")
_browser = webdriver.Chrome(chrome_options=options)
Doing that does not put --disable-quic in between --flags-switches-begin and end.
In case anyone is still looking for this, the correct way to do this is:
options = webdriver.ChromeOptions()
options.add_argument("disable-quic") # not "--disable-quic"
_browser = webdriver.Chrome(options=options)