Python Selenium Chrome Driver - nseindia - getting 403 - python

enter image description here
''' Trying to fetch Equity Derivatives data from NSE
https://www.nseindia.com/->Market Data-> Derivatives Market
Works until click action, the browser navigates to Derivatives Market but then thorws access
denied error as below
<h1>
Access Denied
</h1>
You don't have permission to access "http://www.nseindia.com/market-data/equity-derivatives-watch" on this server.
<p>
Reference #18.386dcc17.1603823463.54b06d7
</p>
'''
from selenium import webdriver
from selenium.webdriver import ActionChains
from bs4 import BeautifulSoup
import time
# Tried all possible options below
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")
options.add_argument('--disable-gpu')
driver = webdriver.Chrome(chrome_options=options)
driver.get("https://www.nseindia.com/")
marketdata = driver.find_element_by_xpath("//*[#id='main_navbar']/ul/li[3]/a")
derivativesmarket = driver.find_element_by_xpath("//*[#id='main_navbar']/ul/li[3]/div/div[1]/div/div[1]/ul/li[3]/a")
actions = ActionChains(driver)
actions.move_to_element(marketdata).move_to_element(derivativesmarket).click().perform()
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
# soup = BeautifulSoup(html,'lxml')
time.sleep(7)
print(soup.prettify())[enter image description here][1]

Add
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
and use
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
Then access the element like so:
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "h1.h1")))
Reference:- Access Denied You don't have permission to access "site" on this server using ChromeDriver and Chrome through Selenium Python

Related

Upload image on Facebook Marketplace with selenium (python)

I am trying to automatize the creation of ads on facebook marketplace.
I success in log in and go on the correct page.
But I don't how to upload an image with selenium.
Indeed, the element which handle the uploading of image is not an input type=file but a div which has a role of a button which open the windows file window in order to choose a file.
This is the html of the element :
<div class="x1i10hfl x1qjc9v5 xjbqb8w xjqpnuy xa49m3k xqeqjp1 x2hbi6w x13fuv20 xu3j5b3 x1q0q8m5 x26u7qi x972fbf xcfux6l x1qhh985 xm0m39n x9f619 x1ypdohk xdl72j9 x2lah0s xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r x2lwn1j xeuugli xexx8yu x4uap5 x18d9i69 xkhd6sd x1n2onr6 x16tdsg8 x1hl2dhg xggy1nq x1ja2u2z x1t137rt x1o1ewxj x3x9cwd x1e5q0jg x13rtm0m x1q0g3np x87ps6o x1lku1pv x1a2a7pz x78zum5 x1iyjqo2" role="button" tabindex="0">
I already tried this code :
driver.find_element(By.XPATH, element_xpath).send_keys(absolute_path)
But it doesn't work
Is there someone who already tried and succeeded in ?
Uploading file with Selenium is done by sending the uploaded file to a special element. This is not an element you are clicking as a user via GUI to upload elements. The element actually receiving uploaded files normally matching this XPath:
//input[#type='file']
This is the fully working code - I tried this on my PC with my FB account uploading some document. I've erased the screenshot details for privacy reasons, but it clearly worked
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
options.add_argument("--disable-infobars")
options.add_argument("start-maximized")
options.add_argument("--disable-extensions")
# Pass the argument 1 to allow and 2 to block
options.add_experimental_option(
"prefs", {"profile.default_content_setting_values.notifications": 2}
)
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 20)
url = "https://www.facebook.com/"
driver.get(url)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "[name='email']"))).send_keys(my_username)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "[name='pass']"))).send_keys(my_password)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "[name='login']"))).click()
driver.get("https://www.facebook.com/marketplace/create/item")
wait.until(EC.presence_of_element_located((By.XPATH, "//input[#type='file']"))).send_keys("C:/Users/my_user/Downloads/doch.jpeg")
This is the screenshot of what this code does:

Why is the get_attribute() function in selenium returning an empty string when inspecting the webpage shows the attribute?

I am trying to grab the src attribute from the video tag from this webpage. This shows where I see the video tag when I am inspecting the image. The XPath for the tag in safari is "//*[#id="player"]/div[2]/div[4]/video"
This is my code:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import os
os.environ["SELENIUM_SERVER_JAR"] = "selenium-server-standalone-2.41.0.jar"
browser = webdriver.Safari()
browser.get("https://mplayer.me/default.php?id=MTc3ODc3")
print(WebDriverWait(browser, 20).until(EC.visibility_of_element_located((By.TAG_NAME,"video"))).get_attribute("src"))
browser.quit()
Using .text instead og .get_Attribute also returns an empty string.
I have to use safari and not chrome to get the src link because chrome uses a blob storage design due to which scraping via chrome shows "blob:https://mplayer.me/d420cb30-ed6e-4772-b169-ed33a5d3ee9f" instead of "https://wwwx18.gogocdn.stream/videos/hls/6CjH7KUeu18L4Y7ls0ohCw/1668685924/177877/81aa0af3891f4ef11da3f67f0d43ade6/ep.1.1657688313.m3u8" which is the link I want to get.
You can get a link to m3u8 file in Chrome from logs using Desired Capabilities
Here is one of the possible solutions to do this:
import json
from selenium import webdriver
from selenium.webdriver import DesiredCapabilities
from selenium.webdriver.chrome.service import Service
options = webdriver.ChromeOptions()
options.add_argument('--headless')
capabilities = DesiredCapabilities.CHROME
capabilities["goog:loggingPrefs"] = {"performance": "ALL"}
options.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"])
service = Service(executable_path="path/to/your/chromedriver.exe")
driver = webdriver.Chrome(service=service, options=options, desired_capabilities=capabilities)
driver.get('https://mplayer.me/default.php?id=MTc3ODc3')
logs = driver.get_log('performance')
for log in logs:
data = json.loads(log['message'])['message']['params'].get('request')
if data and data['url'].endswith('.m3u8'):
print(data['url'])
driver.quit()
Output:
https://wwwx18.gogocdn.stream/videos/hls/myv1spZ0483oSfvbo4bcbQ/1668706324/177877/81aa0af3891f4ef11da3f67f0d43ade6/ep.1.1657688313.m3u8
Tested on Win 10, Python 3.9.10, Selenium 4.5.0

Hover the Mouse to the tab and scrape the products

Hello I am trying to use selenium to automatically scrape the products titles and prices, i am using ActionChains and move_to_element, but somehow it gave me timeout exception, Is there a better way to do it?
titles in the tab
https://denago.com/collections/ebikes
#For Dynamic webpage, import selenium
!apt-get update
!apt install chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
!pip install selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
#set up Chrome driver
options=webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
#Define web driver as a Chrome driver
driver=webdriver.Chrome('chromedriver',options=options)
driver.implicitly_wait(10)
driver.get('https://denago.com/collections/ebikes')
action = ActionChains(driver)
ourbike = WebDriverWait(driver, 5).until(EC.visibility_of_element_located((By.XPATH, "/html/body/div[6]/div/header/nav/ul/li[1]/a/span")))
ActionChains(driver).move_to_element(ourbike).perform()
Titles=driver.find_elements(By.CLASS_NAME,'mm-title')
for i in range(len(Titles)):
print(Titles[i].text)
There are a couple of problems:
Browser opens at the default size which is small and the element you are searching for to hover is absent on the page. So you need to set options.add_argument('window-size=1200,1980').
There is a message about cookies that overlaps elements on the page. It's better to close it: driver.find_element(By.ID, 'CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll').click()
The element you tried to hover could not be found by the XPATH you used. It can easily be found with (By.XPATH, '(//li[#itemid="m9RVB"])'), but there are two such elements on the page and the first is hidden. So you need to hover the second one, so add [2] to the locator: (By.XPATH, '(//li[#itemid="m9RVB"])[2]')
So, here is the code:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
#set up Chrome driver
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('window-size=1200,1980')
#Define web driver as a Chrome driver
driver = webdriver.Chrome('chromedriver', options=options)
driver.implicitly_wait(10)
driver.get('https://denago.com/collections/ebikes')
driver.find_element(By.ID, 'CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll').click()
action = ActionChains(driver)
ourbike = WebDriverWait(driver, 5).until(EC.visibility_of_element_located((By.XPATH, '(//li[#itemid="m9RVB"])[2]')))
ActionChains(driver).move_to_element(ourbike).perform()
Titles = driver.find_elements(By.CLASS_NAME, 'mm-title')
for i in range(len(Titles)):
print(Titles[i].text)
driver.quit()
I think you are looking something like this:
# Needed libs
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# We create the driver
options=webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
# We maximize the window, because if not the page will be different
driver.maximize_window()
# We navigate to the url
driver.get('https://denago.com/collections/ebikes')
# We wait for the first title, I think it is enough
WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, "(//h5)[1]")))
# We get all the titles elements
titles=driver.find_elements(By.XPATH,'//h5')
# For each title element we get the text and also we get the price
for i in range(0,len(titles)):
product_name = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, f'(//h5)[{i+1}]'))).text
product_price = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, f'(//div[#class="price"])[{i+1}]'))).text
print(f"Product {i+1}: {product_name} - Price: {product_price}")
driver.quit()
There are 5 bikes on that page. Here is a more pythonic (and more selenium..ish) way of getting those titles (and other info on each bike, if you want):
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument('disable-notifications')
chrome_options.add_argument("window-size=1280,720")
webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
wait = WebDriverWait(driver, 25)
driver.get('https://denago.com/collections/ebikes')
try:
wait.until(EC.element_to_be_clickable((By.ID, "CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll"))).click()
print('accepted cookies')
except Exception as e:
print('no cookie button!')
bikes= wait.until(EC.presence_of_all_elements_located((By.XPATH, '//div[#class="grid-view-item product-card"]//h5/a')))
for bike in bikes:
print(bike.text.strip())
Printout in terminal:
accepted cookies
DENAGO CITY MODEL 1 STEP-THRU EBIKE
DENAGO CITY MODEL 1 TOP-TUBE EBIKE
DENAGO COMMUTE MODEL 1 STEP-THRU EBIKE
DENAGO FAT TIRE STEP-THRU EBIKE
DENAGO COMMUTE MODEL 1 TOP-TUBE EBIKE
Selenium docs: https://www.selenium.dev/documentation/

Udemy website keeps on loading while trying to Web Scrape with Selenium and Python

I am starting to learn about web scraping. For practice, I am trying to get a list with all the courses name that appears in this query: "https://www.udemy.com/courses/search/?src=ukw&q=api+python" the problem is when I start the script the web does not load en eventually the windows get closed. I think maybe Udemy has some type of security for automations
This is my code:
from selenium import webdriver
import time
website = "https://www.udemy.com/courses/search/?src=ukw&q=api+python"
path = "/"
chrome_options = webdriver.ChromeOptions();
chrome_options.add_experimental_option("excludeSwitches", ['enable-logging'])
driver = webdriver.Chrome(options=chrome_options);
driver.get(website)
time.sleep(5)
matches = driver.find_elements_by_tag_name("h3")
The reason behind udemy website not loading completely may be due to the fact that Selenium driven ChromeDriver initiated Chrome Browser gets detected as a bot and further navigation is getting blocked.
Solution
An easier hack to evade the detection would be to add the following argument:
--disable-blink-features=AutomationControlled
So effectively your code block will be:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('excludeSwitches', ['enable-logging'])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument('--disable-blink-features=AutomationControlled')
s = Service('C:\\BrowserDrivers\\chromedriver.exe')
driver = webdriver.Chrome(service=s, options=options)
driver.get('https://www.udemy.com/courses/search/?src=ukw&q=api+python')
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//h1[contains(., 'results for')]")))
driver.save_screenshot("udemy.png")
Saved Screenshot:

Hi, I am having problems with getting through to a site using DDos protection by cloudflare, when using python and selenium

I am planning to set up an automated prosess on a website. But cant get through the message 'Checking your browser before accessing sit.no' when I try to reach the site using selenium. The message is from cloudflare.
I have been trying some code that i found here but it didnt work
1.
from selenium import webdriver
url = 'https://www.sit.no/'
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options)
driver.get(url)
2.
from selenium import webdriver
url = 'https://www.sit.no/'
options = webdriver.ChromeOptions()
options.add_argument("--disable-blink-features=AutomationControlled")
driver = webdriver.Chrome(options=options)
driver.get(url)
3.
import undetected_chromedriver as uc
url = 'https://www.sit.no/'
driver= uc.Chrome()
driver.get(url)
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import subprocess
#other imports
subprocess.Popen(
'"C:\\Program Files\\yourpathtochrome\\chrome.exe" --remote-debugging-port=9222', shell=True)
options = webdriver.ChromeOptions()
options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
driver = webdriver.Chrome(options=options)
driver.maximize_window()
driver.get('https://www.sit.no/')
input()
first open the website manually and complete the browser check. now Close all chrome browsers and replace the path with your chrome.exe path. This will work ,

Categories

Resources