I've rceated a script to scrape different collection names from a webpage traversing multiple pages. The script can parse first 13 names from each page out of 100 names. One such collection name looks like Pudgy Penguins. How can I capture 100 names instead of only 13 from different pages of that site using selenium?
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
link = "https://opensea.io/rankings"
def scroll_to_the_bottom():
check_height = driver.execute_script("return document.body.scrollHeight;")
while True:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
try:
WebDriverWait(driver,5).until(lambda driver: driver.execute_script("return document.body.scrollHeight;") > check_height)
check_height = driver.execute_script("return document.body.scrollHeight;")
except TimeoutException:
break
def get_collection_names(link):
driver.get(link)
while True:
scroll_to_the_bottom()
for item in WebDriverWait(driver,10).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR,"[role='listitem'] [class$='Ranking--row']"))):
collection_name = WebDriverWait(item,10).until(EC.visibility_of_element_located((By.CSS_SELECTOR,"[class$='Ranking--collection-name']"))).text
yield collection_name
try:
button = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.XPATH,"//button[contains(#class,'Buttonreact__StyledButton')][./i[contains(.,'arrow_forward')]]")))
driver.execute_script('arguments[0].click();',button)
WebDriverWait(driver,10).until(EC.staleness_of(item))
except Exception as e:
return
if __name__ == '__main__':
driver = webdriver.Chrome()
for collection_name in get_collection_names(link):
print(collection_name)
Scrolling to the bottom of every page seems not to have any effect on the number of results the script produces.
I have checked your description and source code and I think there are many elements. So it doesn't load at one time. For solving this, scroll down to the bottom step by step. So, I have changed function scroll_to_the_bottom as below :
def scroll_to_the_bottom() :
H = driver.execute_script('return document.body.scrollHeight;')
h = 0
while True :
h += 300
if h >= H :
break
driver.execute_script("window.scrollTo({}, {});".format(0, h))
time.sleep(1)
So, embed above code into your code, we can change it as below :
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
link = "https://opensea.io/rankings"
def get_collection_names(link):
driver.get(link)
unique_items = set()
while True:
item = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CSS_SELECTOR,"[class$='Ranking--collection-name']")))
H = driver.execute_script('return document.body.scrollHeight;')
h = 0
while True :
h += 300
if h >= H :
break
for element in WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,"[class$='Ranking--collection-name']"))):
if element.text not in unique_items:
yield element.text
unique_items.add(element.text)
driver.execute_script("window.scrollTo(0, {});".format(h))
time.sleep(1)
try:
button = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.XPATH,"//button[contains(#class,'Buttonreact__StyledButton')][./i[contains(.,'arrow_forward')]]")))
driver.execute_script('arguments[0].click();',button)
WebDriverWait(driver,10).until(EC.staleness_of(item))
except Exception as e:
return
if __name__ == '__main__':
driver = webdriver.Chrome()
for item in get_collection_names(link):
print(item)
driver.quit()
Hope to be helpful for you. Thanks.
Related
Ok this is a bit embarrassing because I've asked a similar question on here sometime ago, but I tried the suggested solution ie (wait till element clickable), but it didn't work. So here's my code snipped.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import ElementClickInterceptedException, ElementNotInteractableException, TimeoutException, WebDriverException, NoSuchElementException
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from time import sleep
import re
import pandas as pd
def get_links(link):
driver = webdriver.Firefox()
driver.get(link)
driver.implicitly_wait(50)
sleep(5)
_flag = True
knt = 0
while _flag:
try:
WebDriverWait(driver, 50).until(EC.invisibility_of_element((By.XPATH, "//a[contains(class='ot-buttons-fw')]")))
WebDriverWait(driver, 50).until(EC.element_to_be_clickable((By.XPATH, "//a[#class='event__more event__more--static']")))
driver.find_element_by_xpath("//*[contains(text(), 'Show more matches')]").click()
print("works here!")
print("clicked....................................")
sleep(5)
_flag = True
#tmp = driver.find_elements_by_xpath("//span[contains(text(), 'NBA - Pre-season')]")
#if len(tmp) > 0:
#print("Found them!")
#_flag = False
if knt > 5: # For testing
print("Nuff clicked")
_flag = False
except(ElementNotInteractableException):
print("Error!")
_flag = False
driver.close()
return None
link = "https://www.flashscore.com/basketball/usa/nba/results/"
_ = get_links(link)
For some reason I keep getting an ElementClickInterceptedException Error at the driver.find_element_by_xpath("//*[contains(text(), 'Show more matches')]").click() line. Any help can do please
Your element overlap with other element, it cause the ElementClickInterceptedException error appear.
Before perform you code, please close the cookies popup with this code snippet:
def get_links(link):
driver = webdriver.Firefox()
driver.get(link)
driver.implicitly_wait(50)
sleep(5)
#here, close popup
if(len(driver.find_elements_by_id('onetrust-accept-btn-handler'))>0):
driver.find_element_by_id('onetrust-accept-btn-handler').click()
_flag = True
knt = 0
while _flag:
....
....
And remove this line:
WebDriverWait(driver, 50).until(EC.invisibility_of_element((By.XPATH, "//a[contains(class='ot-buttons-fw')]")))
This is invalid xpath expression, and no needed, have handled by if(popup accept) condition the above.
What I'm trying to do is making nike product auto buyer the problem is after selecting size it doesn't let me click through selenium I even tried to click manually but nothing pops up this is my code where I try to click (not full code):
from selenium import webdriver
from selenium.common.exceptions import JavascriptException
from selenium.webdriver import ChromeOptions
import re
from bs4 import BeautifulSoup
import requests
import json
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import os
user = os.environ['USERNAME']
snkrsurl = "https://www.nike.com/t/air-zoom-pegasus-38-womens-running-shoe-wide-gg8GBK/CW7358-500" #input("Please input your SNKRS url \n")
size = float(input("Please input size \n"))
options = ChromeOptions()
options.add_experimental_option('excludeSwitches',['enable-logging'])
options.add_experimental_option("useAutomationExtension", False)
options.add_experimental_option("detach",True)
options.add_argument("--disable-notifications")
chrome = webdriver.Chrome(options=options)
if "https://" in snkrsurl:
pass
elif "http://" in snkrsurl:
pass
else:
snkrsurl = "http://"+snkrsurl
chrome.get(snkrsurl)
with requests.Session() as session:
soup = BeautifulSoup(session.get(snkrsurl).text, features="lxml")
script = soup.find("script", string=re.compile('INITIAL_REDUX_STATE')).string
redux = json.loads(script[script.find('{'):-1])
products = redux["Threads"]["products"]
wait = WebDriverWait(chrome, 15)
def step1(i,v):
for key, product in products.items():
if float(product["skus"][i]["nikeSize"]) == v:
print("Found")
if v.is_integer():
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="gen-nav-footer"]/nav/button'))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[text()='{}']".format(int(v))))).click()
chrome.execute_script("window.scroll(0,609)")
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[text()="Add to Bag"]'))).click()
break
else:
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="gen-nav-footer"]/nav/button'))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[text()='{}']".format(v)))).click()
e = chrome.find_element_by_css_selector("#floating-atc-wrapper > div > button.ncss-btn-primary-dark.btn-lg.add-to-cart-btn")
chrome.execute_script("arguments[0].scrollIntoView(true);")
e.click()
break
else:
pass
for i,v in products.items():
global length
length = len(v['skus'])
break
for i in range(length):
length -=1
step1(length,size)
I use window.scroll to go to that element because if I don't it throws error saying element is not interactable and yes checkout is being only clickable from real chrome.
Thanks
but I am trying to write a unit test for my website that runs through all the links and returns an A ok or no go if the site is working. But I am having trouble with the program it's not able to constantly click the link in the site navigation bar. I've tried multiple waits implicit. Explicit, expected condition but the page loads and half the time it will click the link and go to that part of the site and the other half the program just stops and nothing is clicked.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
PATH = "C:\Program Files (x86)\chromedriver.exe"
drive = webdriver.Chrome(PATH)
drive.get("https://www.blackhempfamily.com/")
wait = WebDriverWait(drive, 10)
link = wait.until(EC.element_to_be_clickable((By.LINK_TEXT, "Why Black Hemp?")))
link.click()
Would be a better tag to use.
wait.until(EC.element_to_be_clickable((By.XPATH, "//p[text()='Why Black Hemp?']")))
The element you're searching for is not a link. It's a paragraph (p). I added a sleep call to give the page more load time.
Try this code:
time.sleep(3)
wait = WebDriverWait(drive, 10)
#link = wait.until(EC.element_to_be_clickable((By.LINK_TEXT, "Why Black Hemp?")))
link = drive.find_element_by_xpath('//*[#id="idh09fqo2label"]')
link.click()
So, it took a while ... but, I think that I was able to figure this out. The actions that you need to do are:
Click "Why Black Hemp?"
Wait until the page stops scrolling
Scroll to the top of the page
Wait until the page stops scrolling
**Attempt to scroll down so you can get the nav bar to display
Repeat until your heart is content / Test Passes with "A-OK"
In order for this to be achieved, you need to have the following imports
from selenium import webdriver
from selenium.webdriver.chrome.webdriver import WebDriver as ChromeWebDriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as DriverWait
from selenium.webdriver.support import expected_conditions as DriverConditions
from selenium.common.exceptions import WebDriverException
import time
Step 1 - Click your "Why Black Hemp?" nav bar element
chrome_driver.find_element(By.XPATH, "//nav[contains(#id, 'navContainer')]//p[text()='Why Black Hemp?']/../../..").click()
Step 2 - Check to see if our page is still scrolling
# Checks to see if our page is still scrolling
while is_same_position == False:
windowPosition1 = chrome_driver.execute_script("return document.body.scrollHeight;")
time.sleep(2)
windowPosition2 = chrome_driver.execute_script("return document.body.scrollHeight;")
if(windowPosition1 == windowPosition2):
is_same_position = True
final_window_position = windowPosition1
Step 3 - Scroll to the top of the page
chrome_driver.execute_script("window.scrollTo(0, {0})".format((0 - final_window_position)))
Step 4 - Check to see if our page is still scrolling
# Checks to see if our page is still scrolling
while is_same_position == False:
windowPosition1 = chrome_driver.execute_script("return document.body.scrollHeight;")
time.sleep(2)
windowPosition2 = chrome_driver.execute_script("return document.body.scrollHeight;")
if(windowPosition1 == windowPosition2):
is_same_position = True
Step 5 - Attempt to scroll down until our header tag does not have the style of visibility: hidden
# Scrolls down until our nav bar is displayed
for scrollNum in range(10):
chrome_driver.execute_script("window.scrollTo(0, {0})".format(scrollNum * 100 + 200))
time.sleep(2)
if is_displayed(chrome_driver, "//header[contains(#style, 'visibility: hidden')]") == False:
break
Step 6 - Repeat until your heart is content
MAIN CODE - For Reference
from selenium import webdriver
from selenium.webdriver.chrome.webdriver import WebDriver as ChromeWebDriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as DriverWait
from selenium.webdriver.support import expected_conditions as DriverConditions
from selenium.common.exceptions import WebDriverException
import time
def get_chrome_driver():
"""This sets up our Chrome Driver and returns it as an object"""
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("window-size=1500,1000")
# Removes the "This is being controlled by automation" alert / notification
chrome_options.add_experimental_option("excludeSwitches", ['enable-automation'])
path_to_chrome = "F:\Selenium_Drivers\Windows_Chrome85_Driver\chromedriver.exe"
return webdriver.Chrome(executable_path = path_to_chrome,
options = chrome_options)
def wait_displayed(driver : ChromeWebDriver, xpath : str, int = 3):
try:
DriverWait(driver, int).until(
DriverConditions.presence_of_element_located(locator = (By.XPATH, xpath))
)
except:
raise WebDriverException(f'Timeout: Failed to find {xpath}')
def is_displayed(driver : ChromeWebDriver, xpath : str, int = 3):
try:
webElement = DriverWait(driver, int).until(
DriverConditions.presence_of_element_located(locator = (By.XPATH, xpath))
)
return True if webElement != None else False
except:
return False
# Gets our chrome driver and opens our site
chrome_driver = get_chrome_driver()
chrome_driver.get("https://www.blackhempfamily.com/")
# Repeats this 5 times
for repeat in range(5):
print("Attempt to click our link. Try #{0}".format(repeat + 1))
is_same_position = False
final_window_position = 0
# Checks to see if our website's elements display
wait_displayed(chrome_driver, "//nav[contains(#id, 'navContainer')]")
wait_displayed(chrome_driver, "//nav[contains(#id, 'navContainer')]//p[text()='Why Black Hemp?']")
wait_displayed(chrome_driver, "//nav[contains(#id, 'navContainer')]//p[text()='Shop Black Hemp']")
# Clicks our "Why Black Hemp?" tab
chrome_driver.find_element(By.XPATH, "//nav[contains(#id, 'navContainer')]//p[text()='Why Black Hemp?']/../../..").click()
# Checks to see if our page is still scrolling
while is_same_position == False:
windowPosition1 = chrome_driver.execute_script("return document.body.scrollHeight;")
time.sleep(2)
windowPosition2 = chrome_driver.execute_script("return document.body.scrollHeight;")
if(windowPosition1 == windowPosition2):
is_same_position = True
final_window_position = windowPosition1
# Checks to see if our "Natural Moisture" text displays
wait_displayed(chrome_driver, "(//h2//span[contains(., 'Natural Moisture')]/../..)[1]")
# Scrolls back to the top of the page
chrome_driver.execute_script("window.scrollTo(0, {0})".format((0 - final_window_position)))
is_same_position = False
# Checks to see if our page is still scrolling
while is_same_position == False:
windowPosition1 = chrome_driver.execute_script("return document.body.scrollHeight;")
time.sleep(2)
windowPosition2 = chrome_driver.execute_script("return document.body.scrollHeight;")
if(windowPosition1 == windowPosition2):
is_same_position = True
# Scrolls down until our nav bar is displayed
for scrollNum in range(10):
chrome_driver.execute_script("window.scrollTo(0, {0})".format(scrollNum * 100 + 200))
time.sleep(2)
if is_displayed(chrome_driver, "//header[contains(#style, 'visibility: hidden')]") == False:
break
chrome_driver.quit()
chrome_driver.stop_client()
print('Congratulations! You clicked your link multiple times!')
Try it with xpath instead, and with element to be located (not clickable), as it is a paragraph. This worked for me:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
PATH = "C:\Program Files (x86)\chromedriver.exe"
drive = webdriver.Chrome(PATH)
drive.get("https://www.blackhempfamily.com/")
linkWait = EC.element_to_be_located((By.XPATH, "//div/p[contains(., 'Why Black Hemp?')]"))
WebDriverWait(drive, 10).until(linkWait)
link = drive.find_element_by_xpath("//div/p[contains(., 'Why Black Hemp?')]")
link.click()
I have written a script in python to extract and paste 400-500 lines of text from one browser to another. I am using send_keys() to put the text content into the text area. It is writing line by line (2 lines / second) which is resulting in a few minutes to complete the operation. Is there any other method in Selenium to write faster (like how we paste manually in 1 second)?
My code
<code>
import time
import re
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import ElementNotVisibleException
from selenium.webdriver.common.keys import Keys
def init_driver(uname,pwd):
driver = webdriver.Chrome()
driver.wait = WebDriverWait(driver, 5)
driver.get("https://ops.stg1.xxxxxyyyxxxx.com/login.jsp")
box = driver.wait.until(EC.presence_of_element_located((By.NAME, "j_username")))
box.send_keys(uname)
box = driver.wait.until(EC.presence_of_element_located((By.NAME, "j_password")))
box.send_keys(pwd)
button = driver.wait.until(EC.element_to_be_clickable((By.NAME, "login")))
button.click()
return driver
def copy():
with open("Tag_input.txt") as f:
for line in f:
url = line.strip()
driver.get(url)
k=re.findall('\=(\d+)',url)
print(k[0])
a=k[0]
driver.wait = WebDriverWait(driver, 10)
time.sleep(10)
PC = driver.find_elements_by_xpath("//textarea[#name='messagingMap.PRIMARY_CONTENT.message']")
PC.send_keys(Keys.CONTROL, "a")
PC.send_keys(Keys.CONTROL, "c")
print("Copied Primary content !!")
for tag in PC:
varPC = tag.text
url1 = "http://jona.ca/blog/unclosed-tag-finder"
driver.get(url1)
driver.wait = WebDriverWait(driver, 10)
time.sleep(10)
text_area = driver.find_element_by_id("unclosed-tag-finder-input")
text_area.send_keys(Keys.CONTROL, "v")
button = driver.find_element_by_xpath("//input[#value='Submit']")
button.click()
result = driver.find_element_by_xpath("//pre[#id='unclosed-tag-finder-results']")
res_list = list(result)
print(res_list)
op = result.text
print(op)
writer = csv.writer(open('Tag_OP.csv','a+'))
z = zip(k,result)
print(z)
writer.writerows(k)
writer.writerows(result)
k = k.pop()
print("List cleared",k[0])
driver.wait = WebDriverWait(driver, 10)
time.sleep(10)
return driver
if __name__ == "__main__":
driver = init_driver("abdul.salam#xxxyyxxx.com","xxyyxx")
copy()
time.sleep(25)
driver.quit()
</code>
You might try using Ctrl+A to select the text, Ctrl+C to copy it, move to new browser Ctrl+A to select all text in your target field (so that you'll replace it), Ctrl+V to paste. I could imagine that it may be faster, but I haven't done any benchmarking myself.
This question popped right up when I did a search. It has more details, but, for instance, your paste would look like this:
driver.find_element_by_id("unclosed-tag-finder-input").sendKeys(Keys.chord(Keys.CONTROL,"v"));
I try to crawl the wechat public accounts includes the key word through "http://weixin.sogou.com/"
But i find i must use twice ActionChains(driver).move_to_element(nextpage).click().perform(),it can still work,and go to the next page !
who can tell me why and how to fix ! Thank you!
The source code are as follow , and sorry the comments are in the Chinese .
# coding=utf-8
import time
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
key = u"江南大学" #搜索的关键词
driver = webdriver.Chrome()
driver.get("http://weixin.sogou.com/")
assert u'搜狗微信' in driver.title
elem = driver.find_element_by_id("upquery")
elem.clear()
elem.send_keys(key)
button = driver.find_element_by_class_name("swz2") #搜索公众号
button.click()
WebDriverWait(driver,10).until(
EC.title_contains(key)
)
count = 0
while True:
for i in range(10):
try:
wechat_name = driver.find_element_by_xpath("//*[#id=\"sogou_vr_11002301_box_{}\"]/div[2]/h3".format(i)).text
print wechat_name
wechat_id = driver.find_element_by_xpath("//*[#id=\"sogou_vr_11002301_box_{}\"]/div[2]/h4/span/label".format(i)).text
print wechat_id
wechat_intro = driver.find_element_by_xpath("//*[#id=\"sogou_vr_11002301_box_{}\"]/div[2]/p[1]/span[2]".format(i)).text
print wechat_intro
print "*************************"
count += 1
except:
pass
try:
nextpage = driver.find_element_by_xpath("//*[#id=\"sogou_next\"]") #下一页的按钮
actions = ActionChains(driver)
actions.move_to_element(nextpage)
actions.click().
actions.perform()
actions = ActionChains(driver)
actions.move_to_element(nextpage)
actions.click().
actions.perform()
except Exception,e:
print e
break
driver.quit()
print count
You can chain your action, so no need to do perform after each action.
actions = ActionChains(driver)
actions.move_to_element(nextpage)
actions.click(nextpage)
actions.perform()
OR
actions = ActionChains(driver)
actions.move_to_element(nextpage)
actions.click(nextpage).perform()