Element not found Selenium , site React - python

Selenium does not find the accept cookies button.
Tested: xpath, class and css
Error
Command
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from time import sleep
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import pandas as pd
import csv
options = Options()
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
navegador = webdriver.Chrome(options=options)
navegador.get('https://app-vlc.hotmart.com/market/search?categoryId=25&page=1&userLanguage=PT_BR')
navegador.implicitly_wait(30)
sleep(30)
navegador.find_element(By.CSS_SELECTOR, ".cookie-policy-accept-all.hot-button.hot-button--primary").click()
navegador.implicitly_wait(30)

elem=navegador.find_element(By.XPATH,"//div[#id='hotmart-cookie-policy']").shadow_root
elem.find_element(By.CSS_SELECTOR, ".cookie-policy-accept-all.hot-button.hot-button--primary").click()
You need to find the shadow root and then find from there.
Since the above didn't work try this one.
navegador.get('https://app-vlc.hotmart.com/market/search?categoryId=25&page=1&userLanguage=PT_BR')
time.sleep(10)
elem=navegador.find_element(By.XPATH,"//div[#id='hotmart-cookie-policy']")
script='''return arguments[0].shadowRoot.querySelector(".cookie-policy-accept-all.hot-button.hot-button--primary")'''
elem1= navegador.execute_script(script, elem)
elem1.click()

Related

Select first element of srcset with python selenium

Using selenium in Python, I have been able to successfully access some url's of an image I want to download. However, the image link is stored within a srcset image attribute. When I use get_attribute('srcset'), it returns a string with the 4 links. I just want the one. How would I go about doing this? Could I possibly just crop the string afterwards?
Here's the site that I am scraping from:
https://www.politicsanddesign.com/
Here is my code:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
import pyautogui
import time
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(ChromeDriverManager().install(), options = chrome_options)
driver.get('https://www.politicsanddesign.com/')
img_url = driver.find_element(By.XPATH, "//div[#class = 'responsive-image-wrapper']/img").get_attribute("srcset")
driver.get(img_url)
And here is what the img_url object looks like:
//images.ctfassets.net/00vgtve3ank7/6f38yjnNcU1d6dw0jt1Uhk/70dfbf208b22f7b1c08b7421f910bb36/2020_HOUSE_VA-04_D-MCEACHIN..jpg?w=400&fm=jpg&q=80 400w, //images.ctfassets.net/00vgtve3ank7/6f38yjnNcU1d6dw0jt1Uhk/70dfbf208b22f7b1c08b7421f910bb36/2020_HOUSE_VA-04_D-MCEACHIN..jpg?w=800&fm=jpg&q=80 800w, //images.ctfassets.net/00vgtve3ank7/6f38yjnNcU1d6dw0jt1Uhk/70dfbf208b22f7b1c08b7421f910bb36/2020_HOUSE_VA-04_D-MCEACHIN..jpg?w=1200&fm=jpg&q=80 1200w, //images.ctfassets.net/00vgtve3ank7/6f38yjnNcU1d6dw0jt1Uhk/70dfbf208b22f7b1c08b7421f910bb36/2020_HOUSE_VA-04_D-MCEACHIN..jpg?w=1800&fm=jpg&q=80 1800w
But I'd like it to just be:
//images.ctfassets.net/00vgtve3ank7/6f38yjnNcU1d6dw0jt1Uhk/70dfbf208b22f7b1c08b7421f910bb36/2020_HOUSE_VA-04_D-MCEACHIN..jpg?w=400&fm=jpg&q=80
The image seems to have an attribute called currentSrc which hold only the current value.
img_url = driver.find_element(By.XPATH, "//div[#class = 'responsive-image-wrapper']/img").get_attribute("currentSrc")
driver.get(img_url)
You can simply split the value extracted from that web element.
As following:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
import pyautogui
import time
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(ChromeDriverManager().install(), options = chrome_options)
driver.get('https://www.politicsanddesign.com/')
img_url = driver.find_element(By.XPATH, "//div[#class = 'responsive-image-wrapper']/img").get_attribute("srcset")
img_urls = img_url.split(",")
Now img_urls is a list containing 3 URLs, so you can use it as following:
driver.get(img_urls[0]) #open the first URL
driver.get(img_urls[1]) #open the second URL
driver.get(img_urls[2]) #open the third URL
My inefficient solution:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
import pyautogui
import time
# WILL NEED TO EVENTUALLY FIGURE OUT HOW TO WRAP ALL OF THIS INTO A FUNCTION OR LOOP TO DO IT FOR ALL DIV OBJECTS
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(ChromeDriverManager().install(), options = chrome_options)
driver.get('https://www.politicsanddesign.com/')
img_url = driver.find_element(By.XPATH, "//div[#class = 'responsive-image-wrapper']/img").get_attribute("srcset")
driver.get(img_url)
img_url2 = 'https:' + img_url.split(' 400w',1)[0]
driver.get(img_url2)

Get link video from website by selenium. How to get the link?

i want to get video link from website https://www.ofw.su/family-feud-july-29-2022
but i can't. This my code:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import time
from datetime import datetime
from random import randint
import random
import string
import os
def get(link):
CHROMEDRIVER_PATH = 'chromedriver.exe'
options = webdriver.ChromeOptions()
options.add_argument("user-data-dir=E:\\profile")
options.add_argument("--disable-notifications")
#options.add_argument("--headless")
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(executable_path=CHROMEDRIVER_PATH,options=options)
driver.get(link)
time.sleep(2)
url_video = driver.find_element_by_xpath("/html/body/div/div[2]/div[3]/video").get_attribute('src')
print(url_video)
return url_video
link = "https://www.ofw.su/family-feud-july-29-2022"
get(link)
I didn't get any links
The element you are trying to access is inside the iframe.
So, in order to access elements inside the iframe you have to switch to that iframe as follows:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import time
from datetime import datetime
from random import randint
import random
import string
import os
def get(link):
CHROMEDRIVER_PATH = 'chromedriver.exe'
options = webdriver.ChromeOptions()
options.add_argument("user-data-dir=E:\\profile")
options.add_argument("--disable-notifications")
#options.add_argument("--headless")
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(executable_path=CHROMEDRIVER_PATH,options=options)
driver.get(link)
time.sleep(2)
iframe = driver.find_element_by_xpath("//iframe[#class='embed-responsive-item']")
driver.switch_to.frame(iframe)
url_video = driver.find_element_by_xpath("/html/body/div/div[2]/div[3]/video").get_attribute('src')
print(url_video)
return url_video
link = "https://www.ofw.su/family-feud-july-29-2022"
get(link)
When you finish working with elements inside the iframe, in order to switch to the regular content you should do that with the following code:
driver.switch_to.default_content()
Also, you should use explicit waits instead of hardcoded delays time.sleep(2) and use relative locators, not the absolute XPaths like this /html/body/div/div[2]/div[3]/video

Clear CAPTCHA Success From HTML

I'm trying to scrape some site data and have cleared the CAPTCHA I'm triggering manually - however I continue to load the CAPTCHA success page after I close and reopen my session:
Code:
import urllib, os, urllib.request, time, requests, random, pandas as pd
from datetime import date
from time import sleep
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
from google_trans_new import google_translator
chrome_options = Options()
chrome_options.add_argument("user-data-dir=C:\\environments\\selenium")
driver = webdriver.Chrome(chrome_options=chrome_options)
driver.maximize_window()
driver.implicitly_wait(10)
driver.get("https://ca.indeed.com/")
search_company = driver.find_element(By.XPATH,"//*[#id='text-input-what']")
search_company.send_keys(Keys.CONTROL + "a")
search_company.send_keys(Keys.DELETE)
search_company.send_keys("Sales")
search_loc = driver.find_element(By.XPATH,"//*[#id='text-input-where']")
search_loc.send_keys(Keys.CONTROL + "a")
search_loc.send_keys(Keys.DELETE)
search_loc.send_keys("Quebec")
click_search = driver.find_element(By.XPATH,"//*[#id='jobsearch']/button")
click_search.click()
After running this block, I run:
page = driver.current_url
html = requests.get(page,verify=False)
soup = BeautifulSoup(html.content, 'html.parser', from_encoding = 'utf-8')
soup
And I can't avoid the HTML, and thus have nothing to scrape:
hCaptcha solve page
How do I stop returning the CAPTCHA success page and revert back to the page I'm trying to scrape? I've added my environment to try and retain the cookies but I'm at a loss on how to proceed.

Do I have to use pyautogui to fill in a text box with requested text?Or can I just use selenium, however everytime my xpath isn't correct. Code below

Code:
from lib2to3.pgen2 import driver
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
s=Service("/usr/local/bin/chromedriver")
driver = webdriver.Chrome(service=s)
driver.implicitly_wait(0.5)
driver.maximize_window().
driver.get("https://greenhillsschool.myschoolapp.com/app#login")
driver.find_element(By.XPATH, "//*.
[#id="Username"]").send_keys("rpatel#greenhillsschool.org")
'''
usernamebox.send_keys("email")
next = driver.find_element_by_xpath("Next")
next.click()
'''

Selenium python chrome webdriver

I'm trying to use selenium to scrape some data from mouser.com website but after sending some data in the search bar. The website results in access denied, I need help to bypass it. I tried using an agent but the same thing.
import time
from openpyxl import load_workbook
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from fake_useragent import UserAgent
driver = webdriver.Chrome(executable_path='C:/Users/amuri/AppData/Local/Microsoft/WindowsApps/PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0/site-packages/chromedriver.exe')
driver.implicitly_wait(1)
#def get_comp_type(comp_pn):
url ='https://www.mouser.com/'
driver.get(url)
print(driver.title)
wait = WebDriverWait(driver, timeout=10)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".form-control.headerSearchBox.search-input.js-search-autosuggest.as-input")))
elem = driver.find_element_by_css_selector(".form-control.headerSearchBox.search-input.js-search-autosuggest.as-input")
elem.click()
elem.send_keys("myString")
elem.send_keys(Keys.RETURN)
time.sleep(1)
from selenium.webdriver.chrome.options import Option
options = Options()
from fake_useragent import UserAgent
ua = UserAgent()
user_agent = ua.random
print(user_agent)
options.add_argument(f'user-agent={user_agent}')
driver = webdriver.Chrome(executable_path='C:/Users/amuri/AppData/Local/Microsoft/WindowsApps/PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0/site-packages/chromedriver.exe',options=options)
You didn't use your useragent anywhere in options. ALso you need to set javascript and cookies as well.

Categories

Resources