I'm trying to use selenium to scrape some data from mouser.com website but after sending some data in the search bar. The website results in access denied, I need help to bypass it. I tried using an agent but the same thing.
import time
from openpyxl import load_workbook
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from fake_useragent import UserAgent
driver = webdriver.Chrome(executable_path='C:/Users/amuri/AppData/Local/Microsoft/WindowsApps/PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0/site-packages/chromedriver.exe')
driver.implicitly_wait(1)
#def get_comp_type(comp_pn):
url ='https://www.mouser.com/'
driver.get(url)
print(driver.title)
wait = WebDriverWait(driver, timeout=10)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".form-control.headerSearchBox.search-input.js-search-autosuggest.as-input")))
elem = driver.find_element_by_css_selector(".form-control.headerSearchBox.search-input.js-search-autosuggest.as-input")
elem.click()
elem.send_keys("myString")
elem.send_keys(Keys.RETURN)
time.sleep(1)
from selenium.webdriver.chrome.options import Option
options = Options()
from fake_useragent import UserAgent
ua = UserAgent()
user_agent = ua.random
print(user_agent)
options.add_argument(f'user-agent={user_agent}')
driver = webdriver.Chrome(executable_path='C:/Users/amuri/AppData/Local/Microsoft/WindowsApps/PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0/site-packages/chromedriver.exe',options=options)
You didn't use your useragent anywhere in options. ALso you need to set javascript and cookies as well.
Related
I try to select the value "Ukrainian Division" in the dropdown box of the following site:
https://www.cyberarena.live/schedule-efootball
with the following code:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from webdriver_manager.chrome import ChromeDriverManager
import time
if __name__ == '__main__':
WAIT = 3
options = Options()
options.add_experimental_option ('excludeSwitches', ['enable-logging'])
options.add_argument("start-maximized")
options.add_argument('window-size=1920x1080')
options.add_argument('--no-sandbox')
options.add_argument('--disable-gpu')
srv=Service(ChromeDriverManager().install())
driver = webdriver.Chrome (service=srv, options=options)
link = f"https://www.cyberarena.live/schedule-efootball"
driver.get (link)
time.sleep(WAIT)
select = Select(driver.find_elements(By.XPATH,"//select")[1])
select.select_by_visible_text('Ukrainian Division')
# select.select_by_value("1")
input("Press!")
driver.quit()
But unfortunately, nothing happens - the options are not selected with this code.
I also tried it with select_by_value with this line
select.select_by_value("1")
instead of
select.select_by_visible_text('Ukrainian Division')
but this doesn´t work either.
How can I select this option from the dropdown box?
I tried ypur code and I also could not use Selenium Select object there. I don't know why. But we still can do that directly, with regular Selenium commands.
The following code is working:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)
url = 'https://www.cyberarena.live/schedule-efootball'
driver.get(url)
wait = WebDriverWait(driver, 20)
wait.until(EC.element_to_be_clickable((By.XPATH, "//select[contains(.,'Division')]"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//div[contains(text(),'Ukrainian')]"))).click()
The result is:
I was working on a web scraping project with Selenium and was trying to scrape news from the site https://www.businesstimes.com.sg/government-economy.
But whenever I open the site with the selenium automated chrome window, one ad comes up in a popup which I want to close.
import selenium
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as LM
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import Select
import pandas as pd
import time
options = webdriver.ChromeOptions()
"""options.add_argument("enable-automation")
options.add_argument("--headless")"""
lists = ['disable-popup-blocking']
caps = DesiredCapabilities().CHROME
caps["pageLoadStrategy"] = "normal"
options.add_argument("--window-size=1920,1080")
options.add_argument("--disable-extensions")
options.add_argument("--disable-notifications")
options.add_argument("--disable-Advertisement")
options.add_argument("--disable-popup-blocking")
driver = webdriver.Chrome(executable_path= r"E:\chromedriver\chromedriver.exe", options=options, desired_capabilities=caps) #paste your own choromedriver path
driver.get('https://www.businesstimes.com.sg/government-economy')
I tried two methods, one was by the xpath method and one by the CSS selector method, but both failed.
#1st method
driver.find_element(By.CSS_SELECTOR, 'button[data-id="pclose-btn"]').click()
#2nd method
driver.find_element_by_xpath("//div[#class='bz-el bz-pclose-btn knd-BUTTON']").click()
Please help me with this. Thank you!
Element you trying to access is inside nested iframe.
This should work:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)
url = 'https://www.businesstimes.com.sg/government-economy'
driver.get(url)
wait = WebDriverWait(driver, 20)
wait.until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, "iframe[id*='prestitial']")))
wait.until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, "iframe")))
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[data-id='pclose-btn']"))).click()
I'm trying to get a screen shot from of a website from an url
however how can I accept the cookies before the screen shot ?
If you have any idea or clue I will appreciate.
Here is a following picture example of cookies :
Here is my code:
import time
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
start = time.time()
browser = webdriver.Chrome(ChromeDriverManager().install())
browser.get("https://candidat.pole-emploi.fr/offres/recherche/detail/136LNXS/")
browser.save_screenshot('screenshot1.png')
browser.quit()
end = time.time()
print("tempo =","\t",end - start)
You have to click on accept-button to accept cookie
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
# selenium 4
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
#from selenium.webdriver.chrome.options import Options
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--disable-extensions")
#chrome to stay open
options.add_experimental_option("detach", True)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=options)
driver.get('https://candidat.pole-emploi.fr/offres/recherche/detail/136LNXS')
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, '//*[#id="footer_tc_privacy_button_2"]'))).click()
I need some help.
There is URL: https://www.inipec.gov.it/cerca-pec/-/pecs/companies.
I need to click checkbox Captcha:
My code is look like:
import os, urllib.request, requests, datetime, time, random, ssl, json, codecs, csv, urllib
from urllib.request import Request, urlopen
from urllib.request import urlretrieve
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoAlertPresentException
from selenium.webdriver.chrome.options import Options
chromedriver = "chromedriver"
os.environ["webdriver.chrome.driver"] = chromedriver
chrome_options = webdriver.ChromeOptions()
driver = webdriver.Chrome(executable_path=chromedriver, chrome_options=chrome_options)
driver.get("https://www.inipec.gov.it/cerca-pec/-/pecs/companies")
driver.switch_to_default_content()
element = driver.find_elements_by_css_selector('iframe')[1]
driver.switch_to_frame(element)
driver.find_elements_by_xpath('//*[#id="recaptcha-anchor"]/div[1]').click()
During the execution, there is an error:
driver.find_elements_by_xpath('//*[#id="recaptcha-anchor"]/div1').click()
AttributeError: 'list' object has no attribute 'click'
Please, help to fix it.
Solution update (11-Feb-2020)
Using the following set of binaries:
Selenium v3.141.0
ChromeDriver v80.0
Chrome Version 80.0
You can use the following updated block of code as a solution:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options, executable_path=r'C:\WebDrivers\chromedriver.exe')
driver.get("https://www.inipec.gov.it/cerca-pec/-/pecs/companies")
WebDriverWait(driver, 10).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"iframe[name^='a-'][src^='https://www.google.com/recaptcha/api2/anchor?']")))
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//span[#id='recaptcha-anchor']"))).click()
Original solution
Within the URL https://www.inipec.gov.it/cerca-pec/-/pecs/companies to invoke click() on the reCAPTCHA checkbox you need to:
Induce WebDriverWait for the desired frame to be available and switch to it.
Induce WebDriverWait for the desired element to be clickable.
You can use the following solution:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_argument('disable-infobars')
driver = webdriver.Chrome(executable_path=r'C:\WebDrivers\chromedriver.exe', chrome_options=options)
driver.get("https://www.inipec.gov.it/cerca-pec/-/pecs/companies")
WebDriverWait(driver, 10).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"iframe[name^='a-'][src^='https://www.google.com/recaptcha/api2/anchor?']")))
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//span[#class='recaptcha-checkbox goog-inline-block recaptcha-checkbox-unchecked rc-anchor-checkbox']/div[#class='recaptcha-checkbox-checkmark']"))).click()
I resolved this, you can try this with your landing website url.
from selenium import webdriver
from selenium.webdriver.support.select import Select
from selenium.common.exceptions import SessionNotCreatedException
options = webdriver.ChromeOptions()
prefs = {"download.default_directory": download_dir}
options.add_experimental_option("prefs", prefs)
options.add_argument("--no-sandbox")
driver = webdriver.Chrome("/usr/bin/chromedriver", chrome_options = options)
driver.get("https://www.google.com/recaptcha/api2/demo")
driver.maximize_window()
price = driver.find_element_by_xpath("//div[#class='g-recaptcha']")
price_content = price.get_attribute('innerHTML')
start = str(price_content).find(";k=")+len(";k=")
end = str(price_content).find("&co")
driver.implicitly_wait(20)
driver.execute_script("document.getElementById('g-recaptcha-response').style.display = '';")
recaptcha_text_area = driver.find_element_by_id("g-recaptcha-response")
recaptcha_text_area.clear()
recaptcha_text_area.send_keys(price_content[start:end])
#.....................................................................................
button = driver.find_element_by_id("recaptcha-demo-submit")
I have script on selenium 3 and it works fine:
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get('https://ya.ru/')
driver.find_element_by_name('text').send_keys('some text')
driver.find_element_by_class_name('search2__button').click()
Now i reworked it for selenium 4, but now browser closing on its own when code ends:
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
s = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=s)
driver.maximize_window()
driver.get('https://ya.ru/')
driver.find_element(By.NAME, 'text').send_keys('some text')
driver.find_element(By.CLASS_NAME, 'search2__button').click()
I want to keep browser open.
I found this answer here.
You need to set the "detach" option to True when starting chromedriver:
from selenium.webdriver import ChromeOptions, Chrome
opts = ChromeOptions()
opts.add_experimental_option("detach", True)
driver = Chrome(chrome_options=opts)
One simple way would be to add a dumpy input() at the end of the code.
on_hold = input("Enter anything on the console to exit.")
Can you try this code out, it didn't auto close for me. Don't forget to change the path to the location of the chromedriver.
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
ser = Service("path/to/chromedriver.exe")
op = webdriver.ChromeOptions()
driver=webdriver.Chrome(service=ser,options=op)
driver.maximize_window()
driver.get('https://ya.ru/')
driver.find_element(By.NAME, 'text').send_keys('some text')
driver.find_element(By.CLASS_NAME, 'search2__button').click()