This question already exists:
Condition to check if Selenium is done scrolling based on web element?
Closed 2 years ago.
I wrote a script to scrape images on TripAdvisor of hotels and I am able to iterate through all of them, my concern is whether to know I am finished scrolling within the popup window. I am unable to create a condition to break outside of my loop to then parse through all of the image urls and stays inside the loop infinitely. What should my if condition be in order to leave out of the loop? Any help is greatly appreciated!
# import dependencies
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
import re
import selenium
import io
import pandas as pd
import urllib.request
import urllib.parse
import requests
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
import time
from _datetime import datetime
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
options = webdriver.ChromeOptions()
options.headless=False
driver = webdriver.Chrome("/Users/rishi/Downloads/chromedriver 3")
driver.maximize_window()
prefs = {"profile.default_content_setting_values.notifications" : 2}
options.add_experimental_option("prefs", prefs)
#open up website
driver.get(
"https://www.tripadvisor.com/Hotel_Review-g28970-d84078-Reviews-Hyatt_Regency_Washington_on_Capitol_Hill-Washington_DC_District_of_Columbia.html#/media/84078/?albumid=101&type=2&category=101")
image_url = []
end = False
while not(end):
old_image_length = len(image_url)
#wait until element is found and then store all webelements into list
images = WebDriverWait(driver, 20).until(
EC.presence_of_all_elements_located(
(By.XPATH, '//*[#class="media-viewer-dt-root-GalleryImageWithOverlay__galleryImage--1Drp0"]')))
#iterate through visible images and add their url to list
for index, image in enumerate(images):
image_url.append(images[index].value_of_css_property("background-image"))
new_image_length = len(image_url)
#move to next visible images in the array which is the last one
driver.execute_script("arguments[0].scrollIntoView();", images[-1])
#wait one second
time.sleep(1)
#if the first and last image in the arrays are the same for visibility then get out
if(old_image_length == new_image_length):
end = True
#clean the list to provide clear links
for i in range(len(image_url)):
start = image_url[i].find("url(\"") + len("url(\"")
end = image_url[i].find("\")")
print(image_url[i][start:end])
#print(image_url)
#Rishiraj Kanugo You can check the visibility of last element in the popup to make sure that the popup is fully scrolled to the bottom.if(element.isVisible()){Syso("popup is fully scrolled down");}
Related
I am working on a script to gather information off Newegg to look at changes over time in graphics card prices. Currently, my script will open a Newegg search on RTX 3080's through Chromedriver and then click on the link for Desktop Graphics Cards to narrow down my search. The part that I am struggling with is developing a for item in range loop that will let me iterate through all 8 search result pages. I know that I could do this by simply changing the page number in the URL, but as this is an exercise that I'm trying to use to learn Relative Xpath better, I want to do it using the Pagination buttons at the bottom of the page. I know that each button should contain inner text of "1,2,3,4 etc." but whenever I use text() = {item} in my for loop, it doesn't click the button. The script runs and doesn't return any exceptions, but doesn't do what I want it too. Below I have attached the HTML for the page as well as my current script. Any suggestions or hints are appreciated.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
import pandas as pd
import time
options = Options()
PATH = 'C://Program Files (x86)//chromedriver.exe'
driver = webdriver.Chrome(PATH)
url = 'https://www.newegg.com/p/pl?d=RTX+3080'
driver.maximize_window()
driver.get(url)
card_path = '/html/body/div[8]/div[3]/section/div/div/div[1]/div/dl[1]/dd/ul[2]/li/a'
desktop_graphics_cards = driver.find_element(By.XPATH, card_path)
desktop_graphics_cards.click()
time.sleep(5)
graphics_card = []
shipping_cost = []
price = []
total_cost = []
for item in range(9):
try:
#next_page_click = driver.find_element(By.XPATH("//button[text() = '{item + 1}']"))
print(next_page_click)
next_page_click.click()
except:
pass
The pagination buttons are out of the initially visible area.
In order to click these elements you will have to scroll the page until the element appears.
Also, you will need to click next page buttons starting from 2 up to 9 (including) while you trying to do this with numbers from 1 up to 9.
I think this should work better:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
import pandas as pd
import time
options = Options()
PATH = 'C://Program Files (x86)//chromedriver.exe'
driver = webdriver.Chrome(PATH)
url = 'https://www.newegg.com/p/pl?d=RTX+3080'
actions = ActionChains(driver)
driver.maximize_window()
driver.get(url)
card_path = '/html/body/div[8]/div[3]/section/div/div/div[1]/div/dl[1]/dd/ul[2]/li/a'
desktop_graphics_cards = driver.find_element(By.XPATH, card_path)
desktop_graphics_cards.click()
time.sleep(5)
graphics_card = []
shipping_cost = []
price = []
total_cost = []
for item in range(2,10):
try:
next_page_click = driver.find_element(By.XPATH(f"//button[text() = '{item}']"))
actions.move_to_element(next_page_click).perform()
time.sleep(2)
#print(next_page_click) - printing a web element itself will not give you usable information
next_page_click.click()
#let the next page loaded, it takes some time
time.sleep(5)
except:
pass
I want to change the frametime in fxblue technical analysis from 1h (the default value) to 5m but i can't click its pop-up button.
here is the code i tried:
import pandas as pd
import numpy as np
import csv
import os
from selenium import webdriver
driver = webdriver.Chrome(os.getcwd() + '/chromedriver')
url = "https://www.fxblue.com/market-data/technical-analysis/EURUSD"
driver.get(url)
time.sleep(5)
timestamp = driver.find_element_by_xpath('//*[#id="TimeframeContainer"]').click()
at this point I can see the pop-up with the timeframes but I couldn't find a way to change the timeframe.
The elements in the Timeframe pop-up is in an iframe. Need to switch to frame to interact with the elements contained in it.
# Imports required
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver.get("https://www.fxblue.com/market-data/technical-analysis/EURUSD")
wait = WebDriverWait(driver,30)
# Click on timestamp button.
timestamp = wait.until(EC.element_to_be_clickable((By.ID,"txtTimeframe")))
timestamp.click()
# Switch to iframe.
wait.until(EC.frame_to_be_available_and_switch_to_it((By.XPATH,"//iframe[contains(#class,'DialogInnerIframe')]")))
# click on M5 button.
fivemin = wait.until(EC.element_to_be_clickable((By.XPATH,"//div[#class='TimeframeItem' and text()='M5']")))
fivemin.click()
# Switch to default content to interact with other elements.
driver.switch_to.default_content()
Clicking on that element opens a dialog.
So you need to select and click the desired element on that dialog.
The dialog is in iframe, you have to switch to that iframe.
After selecting the desired option and the dialog is closed you have to switch back from the iframe to the default content.
Also you should use explicit wait instead of hardcoded pauses.
Your code may be something like this
import pandas as pd
import numpy as np
import csv
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome(os.getcwd() + '/chromedriver')
wait = WebDriverWait(driver, 20)
url = "https://www.fxblue.com/market-data/technical-analysis/EURUSD"
driver.get(url)
wait.until(EC.visibility_of_element_located((By.ID, "TimeframeContainer"))).click()
wait.until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"iframe.DialogDragBar")))
driver.find_element_by_xpath("//div[#tf='300']").click()
driver.switch_to.default_content()
Ive been attempting to use selenium to go through elements on soundclouds website and am having trouble interacting with the input tags. When I try to write in the input tag of the class "headerSearch__input" with the send keys command, I get back the error "Message: element not interactable". May someone please explain to me what im doing wrong?
from tkinter import *
import random
import urllib.request
from bs4 import BeautifulSoup
from selenium import webdriver
import time
import requests
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
driver = webdriver.Chrome(executable_path='/Users/quanahbennett/PycharmProjects/SeleniumTest/chromedriver')
url= "https://soundcloud.com/"
driver.get(url)
#time.sleep(30)
wait = WebDriverWait(driver, 30)
#link = driver.find_elements_by_link_text("Sign in")
#link[0].click()
#driver.execute_script("arguments[0].click();", link[0])
#SUCCESFUL LOGIN BUTTON PUSH
#please = driver.find_element_by_css_selector('button.frontHero__loginButton')
#please.click()
attempt = driver.find_element_by_css_selector('input.headerSearch__input')
time.sleep(10)
attempt.send_keys('Hello')
breakpoint()
#driver.quit()
The locator - input.headerSearch__input is highlighting two different elements in the DOM. Its important to find unique locators. Link to refer
And also close the cookie pop-up. And then try to interact with elements.
Try like below and confirm.
driver.get("https://soundcloud.com/")
wait = WebDriverWait(driver,30)
# Click on Accept cookies button
wait.until(EC.element_to_be_clickable((By.ID,"onetrust-accept-btn-handler"))).click()
search_field = wait.until(EC.element_to_be_clickable((By.XPATH,"//div[#id='content']//input")))
search_field.send_keys("Sample text")
I'm trying to scrape all of the images on this site. However, when I run my script and try to get the CSS attribute of 'background-image' to extract the url of each web element, the result is printing out "None". I have no idea why it would be returning None as I print out the web element and the attribute does exist. Any help would be greatly appreciated!
import re
import selenium
import io
import pandas as pd
import urllib.request
import urllib.parse
import requests
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
import time
from _datetime import datetime
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
def parse_style_attribute(style_string):
if 'background-image' in style_string:
style_string = style_string.split(' url("')[1].replace('");', '')
return style_string
return None
#setup opening url window of website to be scraped
options = webdriver.ChromeOptions()
options.headless=False
prefs = {"profile.default_content_setting_values.notifications" : 2}
options.add_experimental_option("prefs", prefs)
#driver = webdriver.Chrome("/Users/rishi/Downloads/chromedriver 3") #possible issue by not including the file extension
# driver.maximize_window()
# time.sleep(5)
# driver.get("""https://www.tripadvisor.com/""") #get the information from the page
driver = webdriver.Chrome("/Users/rishi/Downloads/chromedriver 3")
driver.maximize_window()
driver.get("https://www.tripadvisor.com/Hotel_Review-g28970-d84078-Reviews-Hyatt_Regency_Washington_on_Capitol_Hill-Washington_DC_District_of_Columbia.html#/media/84078/?albumid=101&type=2&category=101")
time.sleep(1)
#waits for that amount of time
driver.implicitly_wait(12)
#find the searchbar and then plug in the key
#driver.find_element_by_xpath('//*[#class="typeahead_input"]').send_keys("Washington D.C.", Keys.ENTER)
#wait
time.sleep(1)
#list all of the hotels in that page
images = driver.find_elements_by_xpath('//*[#class="media-viewer-tile-gallery-v2-TileGallery__entryInner--JaADY "]')
image_url = []
for i in range(len(images)):
image_url.append(images[i].value_of_css_property("background-image"))
print("Total Number of images: ", len(images))
# print(images)
firstimage = images[0].get_attribute("innerHTML")
print(firstimage)
for i in range(len(image_url)):
print(image_url[i])
try this. it works for me.
# attach your code as set browser option
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
driver.get(
"https://www.tripadvisor.com/Hotel_Review-g28970-d84078-Reviews-Hyatt_Regency_Washington_on_Capitol_Hill-Washington_DC_District_of_Columbia.html#/media/84078/?albumid=101&type=2&category=101")
images = WebDriverWait(driver, 20).until(
EC.presence_of_all_elements_located(
(By.XPATH, '//*[#class="media-viewer-dt-root-GalleryImageWithOverlay__galleryImage--1Drp0"]')))
image_url = []
for index, image in enumerate(images):
image_url.append(images[index].value_of_css_property("background-image"))
print(image_url)
I am currently scrolling through a TripAdivsor page to scrape images and need to scroll until the bottom of the page but keep getting the error of:
selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: element is not attached to the page document.
I am assuming it is because it is trying to go very fast through the page but even when I change the implicit wait time to be larger, it does not solve the issue. I also tried making sure the new location is visible first before parsing through to get the url but that also did not do any good. Any help would be greatly appreciated!
# import dependencies
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
import re
import selenium
import io
import pandas as pd
import urllib.request
import urllib.parse
import requests
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
import time
from _datetime import datetime
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
options = webdriver.ChromeOptions()
options.headless=False
driver = webdriver.Chrome("/Users/rishi/Downloads/chromedriver 3")
driver.maximize_window()
prefs = {"profile.default_content_setting_values.notifications" : 2}
options.add_experimental_option("prefs", prefs)
#open up website
driver.get(
"https://www.tripadvisor.com/Hotel_Review-g28970-d84078-Reviews-Hyatt_Regency_Washington_on_Capitol_Hill-Washington_DC_District_of_Columbia.html#/media/84078/?albumid=101&type=2&category=101")
image_url = []
end = False
while not(end):
#wait until element is found and then store all webelements into list
images = WebDriverWait(driver, 20).until(
EC.presence_of_all_elements_located(
(By.XPATH, '//*[#class="media-viewer-dt-root-GalleryImageWithOverlay__galleryImage--1Drp0"]')))
#iterate through visible images and acquire their url based on background image style
for index, image in enumerate(images):
image_url.append(images[index].value_of_css_property("background-image"))
#if you are at the end of the page then leave loop
# if(length == end_length):
# end = True
#move to next visible images in the array
driver.execute_script("arguments[0].scrollIntoView();", images[-1])
#time.sleep(1)
#wait until the new web element is visible
driver.implicitly_wait(10)
#WebDriverWait(driver, 20).until(EC.visibility_of_element_located(images[-1]))
#clean the list to provide clear links
for i in range(len(image_url)):
start = image_url[i].find("url(\"") + len("url(\"")
end = image_url[i].find("\")")
print(image_url[i][start:end])