Select first element of srcset with python selenium - python

Using selenium in Python, I have been able to successfully access some url's of an image I want to download. However, the image link is stored within a srcset image attribute. When I use get_attribute('srcset'), it returns a string with the 4 links. I just want the one. How would I go about doing this? Could I possibly just crop the string afterwards?
Here's the site that I am scraping from:
https://www.politicsanddesign.com/
Here is my code:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
import pyautogui
import time
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(ChromeDriverManager().install(), options = chrome_options)
driver.get('https://www.politicsanddesign.com/')
img_url = driver.find_element(By.XPATH, "//div[#class = 'responsive-image-wrapper']/img").get_attribute("srcset")
driver.get(img_url)
And here is what the img_url object looks like:
//images.ctfassets.net/00vgtve3ank7/6f38yjnNcU1d6dw0jt1Uhk/70dfbf208b22f7b1c08b7421f910bb36/2020_HOUSE_VA-04_D-MCEACHIN..jpg?w=400&fm=jpg&q=80 400w, //images.ctfassets.net/00vgtve3ank7/6f38yjnNcU1d6dw0jt1Uhk/70dfbf208b22f7b1c08b7421f910bb36/2020_HOUSE_VA-04_D-MCEACHIN..jpg?w=800&fm=jpg&q=80 800w, //images.ctfassets.net/00vgtve3ank7/6f38yjnNcU1d6dw0jt1Uhk/70dfbf208b22f7b1c08b7421f910bb36/2020_HOUSE_VA-04_D-MCEACHIN..jpg?w=1200&fm=jpg&q=80 1200w, //images.ctfassets.net/00vgtve3ank7/6f38yjnNcU1d6dw0jt1Uhk/70dfbf208b22f7b1c08b7421f910bb36/2020_HOUSE_VA-04_D-MCEACHIN..jpg?w=1800&fm=jpg&q=80 1800w
But I'd like it to just be:
//images.ctfassets.net/00vgtve3ank7/6f38yjnNcU1d6dw0jt1Uhk/70dfbf208b22f7b1c08b7421f910bb36/2020_HOUSE_VA-04_D-MCEACHIN..jpg?w=400&fm=jpg&q=80

The image seems to have an attribute called currentSrc which hold only the current value.
img_url = driver.find_element(By.XPATH, "//div[#class = 'responsive-image-wrapper']/img").get_attribute("currentSrc")
driver.get(img_url)

You can simply split the value extracted from that web element.
As following:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
import pyautogui
import time
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(ChromeDriverManager().install(), options = chrome_options)
driver.get('https://www.politicsanddesign.com/')
img_url = driver.find_element(By.XPATH, "//div[#class = 'responsive-image-wrapper']/img").get_attribute("srcset")
img_urls = img_url.split(",")
Now img_urls is a list containing 3 URLs, so you can use it as following:
driver.get(img_urls[0]) #open the first URL
driver.get(img_urls[1]) #open the second URL
driver.get(img_urls[2]) #open the third URL

My inefficient solution:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
import pyautogui
import time
# WILL NEED TO EVENTUALLY FIGURE OUT HOW TO WRAP ALL OF THIS INTO A FUNCTION OR LOOP TO DO IT FOR ALL DIV OBJECTS
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(ChromeDriverManager().install(), options = chrome_options)
driver.get('https://www.politicsanddesign.com/')
img_url = driver.find_element(By.XPATH, "//div[#class = 'responsive-image-wrapper']/img").get_attribute("srcset")
driver.get(img_url)
img_url2 = 'https:' + img_url.split(' 400w',1)[0]
driver.get(img_url2)

Related

'NoneType' object has no attribute 'click'. Why i am getting this error

Why i am unable to use click function inside my class
# from selenium import webdriver
# from selenium.webdriver.chrome.options import Options
# from selenium.webdriver.chrome.service import Service
# from webdriver_manager.chrome import ChromeDriverManager
#
# options = Options()
# options.add_argument('--headless')
# options.add_argument('--no-sandbox')
# options.add_argument('--disable-dev-shm-usage')
# driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
#
# driver.get("https://python.org")
# print(driver.title)
####
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import NoSuchWindowException
from selenium.common.exceptions import NoAlertPresentException
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager
import RFOS
options = Options()
# options.add_argument('--headless')
# options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
import pandas as pd
from datetime import datetime
#driver = RFOS.OpenBrowser('chrome')
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
class SetWindow():
def test(self):
url = RFOS.fetchDataFromPropertiesFile("Credentials.properties","Login", "url_ChatGpt")
driver.get(url)
driver.maximize_window()
time.sleep(60)
driver.find_element_by_xpath("//*[#id='grouped-demo']").click()
#search = driver.find_element(by=By.NAME, value="q")
#search.send_keys("Hey, Tecadmin")
#search.send_keys(Keys.RETURN)
ChatGpt = SetWindow()
ChatGpt.test()
time.sleep(123)
driver.close()
I wanted to click on search bar of https://www.futurepedia.io/
Unidentified error. Whenever i want to perform click function on searchbar of any website by using this code it throws this kind of error.
enter image description here: error
The code
driver.find_element_by_xpath("//*[#id='grouped-demo']")
appears to return None. You're then attempting to call .click() on it, which fails.
You just need to add some handling for the case it returns None. Something like:
element = driver.find_element_by_xpath("//*[#id='grouped-demo']")
if element is None:
# Handle None case
...
else:
element.click()

Selecting value from dropdown-box is not possible with selenium?

I try to select the value "Ukrainian Division" in the dropdown box of the following site:
https://www.cyberarena.live/schedule-efootball
with the following code:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from webdriver_manager.chrome import ChromeDriverManager
import time
if __name__ == '__main__':
WAIT = 3
options = Options()
options.add_experimental_option ('excludeSwitches', ['enable-logging'])
options.add_argument("start-maximized")
options.add_argument('window-size=1920x1080')
options.add_argument('--no-sandbox')
options.add_argument('--disable-gpu')
srv=Service(ChromeDriverManager().install())
driver = webdriver.Chrome (service=srv, options=options)
link = f"https://www.cyberarena.live/schedule-efootball"
driver.get (link)
time.sleep(WAIT)
select = Select(driver.find_elements(By.XPATH,"//select")[1])
select.select_by_visible_text('Ukrainian Division')
# select.select_by_value("1")
input("Press!")
driver.quit()
But unfortunately, nothing happens - the options are not selected with this code.
I also tried it with select_by_value with this line
select.select_by_value("1")
instead of
select.select_by_visible_text('Ukrainian Division')
but this doesn´t work either.
How can I select this option from the dropdown box?
I tried ypur code and I also could not use Selenium Select object there. I don't know why. But we still can do that directly, with regular Selenium commands.
The following code is working:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)
url = 'https://www.cyberarena.live/schedule-efootball'
driver.get(url)
wait = WebDriverWait(driver, 20)
wait.until(EC.element_to_be_clickable((By.XPATH, "//select[contains(.,'Division')]"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//div[contains(text(),'Ukrainian')]"))).click()
The result is:

Element not found Selenium , site React

Selenium does not find the accept cookies button.
Tested: xpath, class and css
Error
Command
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from time import sleep
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import pandas as pd
import csv
options = Options()
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
navegador = webdriver.Chrome(options=options)
navegador.get('https://app-vlc.hotmart.com/market/search?categoryId=25&page=1&userLanguage=PT_BR')
navegador.implicitly_wait(30)
sleep(30)
navegador.find_element(By.CSS_SELECTOR, ".cookie-policy-accept-all.hot-button.hot-button--primary").click()
navegador.implicitly_wait(30)
elem=navegador.find_element(By.XPATH,"//div[#id='hotmart-cookie-policy']").shadow_root
elem.find_element(By.CSS_SELECTOR, ".cookie-policy-accept-all.hot-button.hot-button--primary").click()
You need to find the shadow root and then find from there.
Since the above didn't work try this one.
navegador.get('https://app-vlc.hotmart.com/market/search?categoryId=25&page=1&userLanguage=PT_BR')
time.sleep(10)
elem=navegador.find_element(By.XPATH,"//div[#id='hotmart-cookie-policy']")
script='''return arguments[0].shadowRoot.querySelector(".cookie-policy-accept-all.hot-button.hot-button--primary")'''
elem1= navegador.execute_script(script, elem)
elem1.click()

Get link video from website by selenium. How to get the link?

i want to get video link from website https://www.ofw.su/family-feud-july-29-2022
but i can't. This my code:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import time
from datetime import datetime
from random import randint
import random
import string
import os
def get(link):
CHROMEDRIVER_PATH = 'chromedriver.exe'
options = webdriver.ChromeOptions()
options.add_argument("user-data-dir=E:\\profile")
options.add_argument("--disable-notifications")
#options.add_argument("--headless")
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(executable_path=CHROMEDRIVER_PATH,options=options)
driver.get(link)
time.sleep(2)
url_video = driver.find_element_by_xpath("/html/body/div/div[2]/div[3]/video").get_attribute('src')
print(url_video)
return url_video
link = "https://www.ofw.su/family-feud-july-29-2022"
get(link)
I didn't get any links
The element you are trying to access is inside the iframe.
So, in order to access elements inside the iframe you have to switch to that iframe as follows:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import time
from datetime import datetime
from random import randint
import random
import string
import os
def get(link):
CHROMEDRIVER_PATH = 'chromedriver.exe'
options = webdriver.ChromeOptions()
options.add_argument("user-data-dir=E:\\profile")
options.add_argument("--disable-notifications")
#options.add_argument("--headless")
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(executable_path=CHROMEDRIVER_PATH,options=options)
driver.get(link)
time.sleep(2)
iframe = driver.find_element_by_xpath("//iframe[#class='embed-responsive-item']")
driver.switch_to.frame(iframe)
url_video = driver.find_element_by_xpath("/html/body/div/div[2]/div[3]/video").get_attribute('src')
print(url_video)
return url_video
link = "https://www.ofw.su/family-feud-july-29-2022"
get(link)
When you finish working with elements inside the iframe, in order to switch to the regular content you should do that with the following code:
driver.switch_to.default_content()
Also, you should use explicit waits instead of hardcoded delays time.sleep(2) and use relative locators, not the absolute XPaths like this /html/body/div/div[2]/div[3]/video

Selenium python chrome webdriver

I'm trying to use selenium to scrape some data from mouser.com website but after sending some data in the search bar. The website results in access denied, I need help to bypass it. I tried using an agent but the same thing.
import time
from openpyxl import load_workbook
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from fake_useragent import UserAgent
driver = webdriver.Chrome(executable_path='C:/Users/amuri/AppData/Local/Microsoft/WindowsApps/PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0/site-packages/chromedriver.exe')
driver.implicitly_wait(1)
#def get_comp_type(comp_pn):
url ='https://www.mouser.com/'
driver.get(url)
print(driver.title)
wait = WebDriverWait(driver, timeout=10)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".form-control.headerSearchBox.search-input.js-search-autosuggest.as-input")))
elem = driver.find_element_by_css_selector(".form-control.headerSearchBox.search-input.js-search-autosuggest.as-input")
elem.click()
elem.send_keys("myString")
elem.send_keys(Keys.RETURN)
time.sleep(1)
from selenium.webdriver.chrome.options import Option
options = Options()
from fake_useragent import UserAgent
ua = UserAgent()
user_agent = ua.random
print(user_agent)
options.add_argument(f'user-agent={user_agent}')
driver = webdriver.Chrome(executable_path='C:/Users/amuri/AppData/Local/Microsoft/WindowsApps/PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0/site-packages/chromedriver.exe',options=options)
You didn't use your useragent anywhere in options. ALso you need to set javascript and cookies as well.

Categories

Resources