Get link video from website by selenium. How to get the link? - python

i want to get video link from website https://www.ofw.su/family-feud-july-29-2022
but i can't. This my code:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import time
from datetime import datetime
from random import randint
import random
import string
import os
def get(link):
CHROMEDRIVER_PATH = 'chromedriver.exe'
options = webdriver.ChromeOptions()
options.add_argument("user-data-dir=E:\\profile")
options.add_argument("--disable-notifications")
#options.add_argument("--headless")
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(executable_path=CHROMEDRIVER_PATH,options=options)
driver.get(link)
time.sleep(2)
url_video = driver.find_element_by_xpath("/html/body/div/div[2]/div[3]/video").get_attribute('src')
print(url_video)
return url_video
link = "https://www.ofw.su/family-feud-july-29-2022"
get(link)
I didn't get any links

The element you are trying to access is inside the iframe.
So, in order to access elements inside the iframe you have to switch to that iframe as follows:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import time
from datetime import datetime
from random import randint
import random
import string
import os
def get(link):
CHROMEDRIVER_PATH = 'chromedriver.exe'
options = webdriver.ChromeOptions()
options.add_argument("user-data-dir=E:\\profile")
options.add_argument("--disable-notifications")
#options.add_argument("--headless")
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(executable_path=CHROMEDRIVER_PATH,options=options)
driver.get(link)
time.sleep(2)
iframe = driver.find_element_by_xpath("//iframe[#class='embed-responsive-item']")
driver.switch_to.frame(iframe)
url_video = driver.find_element_by_xpath("/html/body/div/div[2]/div[3]/video").get_attribute('src')
print(url_video)
return url_video
link = "https://www.ofw.su/family-feud-july-29-2022"
get(link)
When you finish working with elements inside the iframe, in order to switch to the regular content you should do that with the following code:
driver.switch_to.default_content()
Also, you should use explicit waits instead of hardcoded delays time.sleep(2) and use relative locators, not the absolute XPaths like this /html/body/div/div[2]/div[3]/video

Related

How can I get the links of the apps from a certain developer, till now i have scrapped the web objects but unable to get the actual links?

I am trying to extract the links of all application from a particular developer present on the playstore.
import time
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver. common.by import By
driver = webdriver.Chrome (executable_path=ChromeDriverManager().install())
driver.get("https://play.google.com/store/apps/dev?id=5305197572942248936")
l1 = driver.find_elements(By.CLASS_NAME, 'ULeU3b')
You are close to the solution.
Inside elements you located there are a elements containing the links.
All you need here is to wait for all those elements to become visible, get the list of those elements, iterate over the list and extract the links.
The following cod works:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
options.add_argument('--disable-notifications')
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 20)
url = "https://play.google.com/store/apps/dev?id=5305197572942248936"
driver.get(url)
links = wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, ".ULeU3b a")))
for link in links:
print(link.get_attribute("href"))
The result is:
https://play.google.com/store/apps/details?id=com.tatamotors.eguruibcrm
https://play.google.com/store/apps/details?id=com.T1.Primarun
https://play.google.com/store/apps/details?id=com.tata.skoolman
https://play.google.com/store/apps/details?id=com.ttl.tatafleetman

Select first element of srcset with python selenium

Using selenium in Python, I have been able to successfully access some url's of an image I want to download. However, the image link is stored within a srcset image attribute. When I use get_attribute('srcset'), it returns a string with the 4 links. I just want the one. How would I go about doing this? Could I possibly just crop the string afterwards?
Here's the site that I am scraping from:
https://www.politicsanddesign.com/
Here is my code:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
import pyautogui
import time
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(ChromeDriverManager().install(), options = chrome_options)
driver.get('https://www.politicsanddesign.com/')
img_url = driver.find_element(By.XPATH, "//div[#class = 'responsive-image-wrapper']/img").get_attribute("srcset")
driver.get(img_url)
And here is what the img_url object looks like:
//images.ctfassets.net/00vgtve3ank7/6f38yjnNcU1d6dw0jt1Uhk/70dfbf208b22f7b1c08b7421f910bb36/2020_HOUSE_VA-04_D-MCEACHIN..jpg?w=400&fm=jpg&q=80 400w, //images.ctfassets.net/00vgtve3ank7/6f38yjnNcU1d6dw0jt1Uhk/70dfbf208b22f7b1c08b7421f910bb36/2020_HOUSE_VA-04_D-MCEACHIN..jpg?w=800&fm=jpg&q=80 800w, //images.ctfassets.net/00vgtve3ank7/6f38yjnNcU1d6dw0jt1Uhk/70dfbf208b22f7b1c08b7421f910bb36/2020_HOUSE_VA-04_D-MCEACHIN..jpg?w=1200&fm=jpg&q=80 1200w, //images.ctfassets.net/00vgtve3ank7/6f38yjnNcU1d6dw0jt1Uhk/70dfbf208b22f7b1c08b7421f910bb36/2020_HOUSE_VA-04_D-MCEACHIN..jpg?w=1800&fm=jpg&q=80 1800w
But I'd like it to just be:
//images.ctfassets.net/00vgtve3ank7/6f38yjnNcU1d6dw0jt1Uhk/70dfbf208b22f7b1c08b7421f910bb36/2020_HOUSE_VA-04_D-MCEACHIN..jpg?w=400&fm=jpg&q=80
The image seems to have an attribute called currentSrc which hold only the current value.
img_url = driver.find_element(By.XPATH, "//div[#class = 'responsive-image-wrapper']/img").get_attribute("currentSrc")
driver.get(img_url)
You can simply split the value extracted from that web element.
As following:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
import pyautogui
import time
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(ChromeDriverManager().install(), options = chrome_options)
driver.get('https://www.politicsanddesign.com/')
img_url = driver.find_element(By.XPATH, "//div[#class = 'responsive-image-wrapper']/img").get_attribute("srcset")
img_urls = img_url.split(",")
Now img_urls is a list containing 3 URLs, so you can use it as following:
driver.get(img_urls[0]) #open the first URL
driver.get(img_urls[1]) #open the second URL
driver.get(img_urls[2]) #open the third URL
My inefficient solution:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
import pyautogui
import time
# WILL NEED TO EVENTUALLY FIGURE OUT HOW TO WRAP ALL OF THIS INTO A FUNCTION OR LOOP TO DO IT FOR ALL DIV OBJECTS
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(ChromeDriverManager().install(), options = chrome_options)
driver.get('https://www.politicsanddesign.com/')
img_url = driver.find_element(By.XPATH, "//div[#class = 'responsive-image-wrapper']/img").get_attribute("srcset")
driver.get(img_url)
img_url2 = 'https:' + img_url.split(' 400w',1)[0]
driver.get(img_url2)

Element not found Selenium , site React

Selenium does not find the accept cookies button.
Tested: xpath, class and css
Error
Command
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from time import sleep
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import pandas as pd
import csv
options = Options()
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
navegador = webdriver.Chrome(options=options)
navegador.get('https://app-vlc.hotmart.com/market/search?categoryId=25&page=1&userLanguage=PT_BR')
navegador.implicitly_wait(30)
sleep(30)
navegador.find_element(By.CSS_SELECTOR, ".cookie-policy-accept-all.hot-button.hot-button--primary").click()
navegador.implicitly_wait(30)
elem=navegador.find_element(By.XPATH,"//div[#id='hotmart-cookie-policy']").shadow_root
elem.find_element(By.CSS_SELECTOR, ".cookie-policy-accept-all.hot-button.hot-button--primary").click()
You need to find the shadow root and then find from there.
Since the above didn't work try this one.
navegador.get('https://app-vlc.hotmart.com/market/search?categoryId=25&page=1&userLanguage=PT_BR')
time.sleep(10)
elem=navegador.find_element(By.XPATH,"//div[#id='hotmart-cookie-policy']")
script='''return arguments[0].shadowRoot.querySelector(".cookie-policy-accept-all.hot-button.hot-button--primary")'''
elem1= navegador.execute_script(script, elem)
elem1.click()

copy and paste in selenium python

I'm new to selenium I am trying to copy something from one page to another, the page that I copy off of already has it so if you just click on the text once it copies automatically but it is not copying anything I am not sure why
chrome_driver = '/Applications/chromedriver'
driver = webdriver.Chrome(chrome_driver)
driver.get('https://tempail.com/en/')
time.sleep(5)
driver.find_element_by_xpath('//*[#id="eposta_adres"]').click()
driver.get('https://www.instagram.com/')
driver.find_element_by_xpath('//*[#id="user_first_name"]').send_keys('Scott')
driver.find_element_by_xpath('//*[#id="user_email"]').click()
act.key_down(Keys.META).send_key("COMMAND + v").key_up(Keys.META).perform()
On my Linux in Firefox and Chrome works
item.send_keys(Keys.CONTROL, "v")
Like
item = driver.find_element_by_xpath('//*[#id="user_email"]')
item.send_keys(Keys.CONTROL, "v")
I tried to test it with your code but you use xpath which I can't find on Instagam
So I tested with field Search... on current page
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager
#from webdriver_manager.firefox import GeckoDriverManager
import time
driver = webdriver.Chrome(executable_path=ChromeDriverManager().install())
#driver = webdriver.Firefox(executable_path=GeckoDriverManager().install())
driver.get('https://tempail.com/en/')
time.sleep(5)
driver.find_element_by_xpath('//*[#id="eposta_adres"]').click()
driver.get('https://stackoverflow.com/questions/71543113/copy-and-paste-in-selenium-python/')
item = driver.find_element_by_xpath('//*[#name="q"]')
item.send_keys(Keys.CONTROL, "v")
BTW: I described this long time ago in
Selenium: How to send clipboad to field in browser — furas.pl.
I show also how to use module pyperclip to work with clipboard.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager
#from webdriver_manager.firefox import GeckoDriverManager
import time
import pyperclip
driver = webdriver.Chrome(executable_path=ChromeDriverManager().install())
#driver = webdriver.Firefox(executable_path=GeckoDriverManager().install())
driver.get('https://stackoverflow.com/questions/71543113/copy-and-paste-in-selenium-python/')
item = driver.find_element_by_xpath('//*[#name="q"]')
#text = pyperclip.paste() # get text from cliboard
#item.clear()
#item.send_keys(text)
pyperclip.copy("Hello World") # put text in clipboard
item.send_keys(Keys.CONTROL, "v")

Value of CSS Property Selenium returning None for all images

I'm trying to scrape all of the images on this site. However, when I run my script and try to get the CSS attribute of 'background-image' to extract the url of each web element, the result is printing out "None". I have no idea why it would be returning None as I print out the web element and the attribute does exist. Any help would be greatly appreciated!
import re
import selenium
import io
import pandas as pd
import urllib.request
import urllib.parse
import requests
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
import time
from _datetime import datetime
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
def parse_style_attribute(style_string):
if 'background-image' in style_string:
style_string = style_string.split(' url("')[1].replace('");', '')
return style_string
return None
#setup opening url window of website to be scraped
options = webdriver.ChromeOptions()
options.headless=False
prefs = {"profile.default_content_setting_values.notifications" : 2}
options.add_experimental_option("prefs", prefs)
#driver = webdriver.Chrome("/Users/rishi/Downloads/chromedriver 3") #possible issue by not including the file extension
# driver.maximize_window()
# time.sleep(5)
# driver.get("""https://www.tripadvisor.com/""") #get the information from the page
driver = webdriver.Chrome("/Users/rishi/Downloads/chromedriver 3")
driver.maximize_window()
driver.get("https://www.tripadvisor.com/Hotel_Review-g28970-d84078-Reviews-Hyatt_Regency_Washington_on_Capitol_Hill-Washington_DC_District_of_Columbia.html#/media/84078/?albumid=101&type=2&category=101")
time.sleep(1)
#waits for that amount of time
driver.implicitly_wait(12)
#find the searchbar and then plug in the key
#driver.find_element_by_xpath('//*[#class="typeahead_input"]').send_keys("Washington D.C.", Keys.ENTER)
#wait
time.sleep(1)
#list all of the hotels in that page
images = driver.find_elements_by_xpath('//*[#class="media-viewer-tile-gallery-v2-TileGallery__entryInner--JaADY "]')
image_url = []
for i in range(len(images)):
image_url.append(images[i].value_of_css_property("background-image"))
print("Total Number of images: ", len(images))
# print(images)
firstimage = images[0].get_attribute("innerHTML")
print(firstimage)
for i in range(len(image_url)):
print(image_url[i])
try this. it works for me.
# attach your code as set browser option
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
driver.get(
"https://www.tripadvisor.com/Hotel_Review-g28970-d84078-Reviews-Hyatt_Regency_Washington_on_Capitol_Hill-Washington_DC_District_of_Columbia.html#/media/84078/?albumid=101&type=2&category=101")
images = WebDriverWait(driver, 20).until(
EC.presence_of_all_elements_located(
(By.XPATH, '//*[#class="media-viewer-dt-root-GalleryImageWithOverlay__galleryImage--1Drp0"]')))
image_url = []
for index, image in enumerate(images):
image_url.append(images[index].value_of_css_property("background-image"))
print(image_url)

Categories

Resources