This is my code:
from selenium import webdriver
proxylist=['58.12.12.12:80','69.12.12.12:80']
weblist=['https://www.google.com','https://www.facebook.com','https://www.yahoo.com','https://aol.com']
for s in range (len(proxylist)):
service_args = ['--proxy=%s'%(proxylist[s]),'--proxy-type=socks5']
driver = webdriver.PhantomJS('phantomjs.exe', service_args = service_args)
for s in weblist:
driver.get(s)
The idea is the browser first will use proxylist[0] to go to those sites. If proxylist[0] is timeout at website[2] then proxylist[1] will continue to do the job with website[3]. I think i should use try and except but a don't know where to put them. Glad you helped!
Try something like this. Basicaly we are switching the inner and the outer loops and adding a try/except
for s in weblist:
for s in range (len(proxylist)):
try
service_args = ['--proxy=%s'%(proxylist[s]),'--proxy-type=socks5']
driver = webdriver.PhantomJS('phantomjs.exe', service_args = service_args)
driver.get(s)
break
except TimeOutException:
print 'timed out'
The try catch for time out was something like:
try:
driver.set_page_load_timeout(1)
driver.get("http://www.example.com")
except TimeoutException as ex:
print("Exception has been thrown. " + str(ex))
For your code, adding it would be something like:
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
proxylist=['58.12.12.12:80','69.12.12.12:80']
weblist=['https://www.google.com','https://www.facebook.com','https://www.yahoo.com','https://aol.com']
def test():
temp_count_proxy = 0
driver_opened = 0
for url in weblist:
if temp_count_proxy > len(proxylist):
print("Out of proxy")
return
if driver_opened == 0:
service_args = ['--proxy={}'.format(proxylist[temp_count_proxy]),'--proxy-type=socks5']
driver = webdriver.PhantomJS('phantomjs.exe', service_args = service_args)
driver_opened = 1
try:
driver.set_page_load_timeout(2)
driver.get(url)
except TimeoutException as ex:
driver.close()
driver_opened = 0
temp_count_proxy += 1
continue
test()
Just becareful, as if it fail to get one url, it will change proxy, and get the next url (as you requested) but not get the same url.
if you want it to change proxy when fail the retry with the current url , use following:
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
proxylist=['58.12.12.12:80','69.12.12.12:80']
weblist=['https://www.google.com','https://www.facebook.com','https://www.yahoo.com','https://aol.com']
def test():
temp_count_proxy = 0
driver_opened = 0
for url in weblist:
while True:
if temp_count_proxy > len(proxylist):
print("Out of proxy")
return
if driver_opened == 0:
service_args = ['--proxy={}'.format(proxylist[temp_count_proxy]),'--proxy-type=socks5']
driver = webdriver.PhantomJS('phantomjs.exe', service_args = service_args)
driver_opened = 1
try:
driver.set_page_load_timeout(2)
driver.get(url)
# Your code to process here
except TimeoutException as ex:
driver.close()
driver_opened = 0
temp_count_proxy += 1
continue
break
Related
Ok this is a bit embarrassing because I've asked a similar question on here sometime ago, but I tried the suggested solution ie (wait till element clickable), but it didn't work. So here's my code snipped.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import ElementClickInterceptedException, ElementNotInteractableException, TimeoutException, WebDriverException, NoSuchElementException
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from time import sleep
import re
import pandas as pd
def get_links(link):
driver = webdriver.Firefox()
driver.get(link)
driver.implicitly_wait(50)
sleep(5)
_flag = True
knt = 0
while _flag:
try:
WebDriverWait(driver, 50).until(EC.invisibility_of_element((By.XPATH, "//a[contains(class='ot-buttons-fw')]")))
WebDriverWait(driver, 50).until(EC.element_to_be_clickable((By.XPATH, "//a[#class='event__more event__more--static']")))
driver.find_element_by_xpath("//*[contains(text(), 'Show more matches')]").click()
print("works here!")
print("clicked....................................")
sleep(5)
_flag = True
#tmp = driver.find_elements_by_xpath("//span[contains(text(), 'NBA - Pre-season')]")
#if len(tmp) > 0:
#print("Found them!")
#_flag = False
if knt > 5: # For testing
print("Nuff clicked")
_flag = False
except(ElementNotInteractableException):
print("Error!")
_flag = False
driver.close()
return None
link = "https://www.flashscore.com/basketball/usa/nba/results/"
_ = get_links(link)
For some reason I keep getting an ElementClickInterceptedException Error at the driver.find_element_by_xpath("//*[contains(text(), 'Show more matches')]").click() line. Any help can do please
Your element overlap with other element, it cause the ElementClickInterceptedException error appear.
Before perform you code, please close the cookies popup with this code snippet:
def get_links(link):
driver = webdriver.Firefox()
driver.get(link)
driver.implicitly_wait(50)
sleep(5)
#here, close popup
if(len(driver.find_elements_by_id('onetrust-accept-btn-handler'))>0):
driver.find_element_by_id('onetrust-accept-btn-handler').click()
_flag = True
knt = 0
while _flag:
....
....
And remove this line:
WebDriverWait(driver, 50).until(EC.invisibility_of_element((By.XPATH, "//a[contains(class='ot-buttons-fw')]")))
This is invalid xpath expression, and no needed, have handled by if(popup accept) condition the above.
I want to fetch data from both classes at one time but after hit the run both classes open in one tab after adding driver.execute_script(f"window. open {link}, 'new window')") showing an error for keeps class I want to open 2 tabs at one time. Trying to open a new tab but showing an error only I can print Cardano class not keeps class. is there any way to run both classes at the same time in different tabes with the stable condition? please help I am a beginner in selenium.
//Modules
from time import sleep
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from threading import *
import pandas as pd
//Code
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.minimize_window()
wait = WebDriverWait(driver, 30)
df = pd.read_excel('chains.xlsx', sheet_name='first')
chains_links = list(df['links'])
class Cardano(Thread):
def run(self):
cardano = 1
for link in chains_links:
if 'cardanoscan' in link:
driver.get(link)
sleep(0.5)
try:
status = wait.until(EC.visibility_of_element_located(
(By.XPATH, "/html/body/div[2]/main/div/div/div[2]/div/div/div[1]/div[1]/div[1]/div[1]/div[1]/div[2]/button"))).text
saturation = wait.until(EC.visibility_of_element_located(
(By.XPATH, "/html/body/div[2]/main/div/div/div[2]/div/div/div[3]/div[1]/div/div/div/div/div[1]"))).text
except:
status = None
saturation = None
if status == "Active" and saturation != "0" and saturation != None:
print(
f"Cardano {cardano}: is {status} and Staturation is {saturation}")
else:
print(f"Something Wrong with Cardano {cardano}")
cardano = cardano + 1
class Keeps(Thread):
def run(self):
keeps = 1
for link in chains_links:
if "allthekeeps" in link:
driver.execute_script(f"window.open {link}, 'new window')")
sleep(0.5)
try:
fault = wait.until(EC.visibility_of_element_located(
(By.XPATH, "/html/body/div/div[2]/div/div/div[1]/div[2]/div[4]/div[2]/div"))).text
except:
fault = None
if fault != None and fault == "0":
print(f"Keeps {keeps}: is active with {fault} faults:")
else:
print(
f"Something wrong with Keeps {keeps}: {fault} Faults founded")
keeps = keeps + 1
t1 = Cardano()
t2 = Keeps()
t1.start()
t2.start()
t1.join()
t2.join()
driver.close()
I've rceated a script to scrape different collection names from a webpage traversing multiple pages. The script can parse first 13 names from each page out of 100 names. One such collection name looks like Pudgy Penguins. How can I capture 100 names instead of only 13 from different pages of that site using selenium?
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
link = "https://opensea.io/rankings"
def scroll_to_the_bottom():
check_height = driver.execute_script("return document.body.scrollHeight;")
while True:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
try:
WebDriverWait(driver,5).until(lambda driver: driver.execute_script("return document.body.scrollHeight;") > check_height)
check_height = driver.execute_script("return document.body.scrollHeight;")
except TimeoutException:
break
def get_collection_names(link):
driver.get(link)
while True:
scroll_to_the_bottom()
for item in WebDriverWait(driver,10).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR,"[role='listitem'] [class$='Ranking--row']"))):
collection_name = WebDriverWait(item,10).until(EC.visibility_of_element_located((By.CSS_SELECTOR,"[class$='Ranking--collection-name']"))).text
yield collection_name
try:
button = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.XPATH,"//button[contains(#class,'Buttonreact__StyledButton')][./i[contains(.,'arrow_forward')]]")))
driver.execute_script('arguments[0].click();',button)
WebDriverWait(driver,10).until(EC.staleness_of(item))
except Exception as e:
return
if __name__ == '__main__':
driver = webdriver.Chrome()
for collection_name in get_collection_names(link):
print(collection_name)
Scrolling to the bottom of every page seems not to have any effect on the number of results the script produces.
I have checked your description and source code and I think there are many elements. So it doesn't load at one time. For solving this, scroll down to the bottom step by step. So, I have changed function scroll_to_the_bottom as below :
def scroll_to_the_bottom() :
H = driver.execute_script('return document.body.scrollHeight;')
h = 0
while True :
h += 300
if h >= H :
break
driver.execute_script("window.scrollTo({}, {});".format(0, h))
time.sleep(1)
So, embed above code into your code, we can change it as below :
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
link = "https://opensea.io/rankings"
def get_collection_names(link):
driver.get(link)
unique_items = set()
while True:
item = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CSS_SELECTOR,"[class$='Ranking--collection-name']")))
H = driver.execute_script('return document.body.scrollHeight;')
h = 0
while True :
h += 300
if h >= H :
break
for element in WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,"[class$='Ranking--collection-name']"))):
if element.text not in unique_items:
yield element.text
unique_items.add(element.text)
driver.execute_script("window.scrollTo(0, {});".format(h))
time.sleep(1)
try:
button = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.XPATH,"//button[contains(#class,'Buttonreact__StyledButton')][./i[contains(.,'arrow_forward')]]")))
driver.execute_script('arguments[0].click();',button)
WebDriverWait(driver,10).until(EC.staleness_of(item))
except Exception as e:
return
if __name__ == '__main__':
driver = webdriver.Chrome()
for item in get_collection_names(link):
print(item)
driver.quit()
Hope to be helpful for you. Thanks.
I try to crawl the wechat public accounts includes the key word through "http://weixin.sogou.com/"
But i find i must use twice ActionChains(driver).move_to_element(nextpage).click().perform(),it can still work,and go to the next page !
who can tell me why and how to fix ! Thank you!
The source code are as follow , and sorry the comments are in the Chinese .
# coding=utf-8
import time
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
key = u"江南大学" #搜索的关键词
driver = webdriver.Chrome()
driver.get("http://weixin.sogou.com/")
assert u'搜狗微信' in driver.title
elem = driver.find_element_by_id("upquery")
elem.clear()
elem.send_keys(key)
button = driver.find_element_by_class_name("swz2") #搜索公众号
button.click()
WebDriverWait(driver,10).until(
EC.title_contains(key)
)
count = 0
while True:
for i in range(10):
try:
wechat_name = driver.find_element_by_xpath("//*[#id=\"sogou_vr_11002301_box_{}\"]/div[2]/h3".format(i)).text
print wechat_name
wechat_id = driver.find_element_by_xpath("//*[#id=\"sogou_vr_11002301_box_{}\"]/div[2]/h4/span/label".format(i)).text
print wechat_id
wechat_intro = driver.find_element_by_xpath("//*[#id=\"sogou_vr_11002301_box_{}\"]/div[2]/p[1]/span[2]".format(i)).text
print wechat_intro
print "*************************"
count += 1
except:
pass
try:
nextpage = driver.find_element_by_xpath("//*[#id=\"sogou_next\"]") #下一页的按钮
actions = ActionChains(driver)
actions.move_to_element(nextpage)
actions.click().
actions.perform()
actions = ActionChains(driver)
actions.move_to_element(nextpage)
actions.click().
actions.perform()
except Exception,e:
print e
break
driver.quit()
print count
You can chain your action, so no need to do perform after each action.
actions = ActionChains(driver)
actions.move_to_element(nextpage)
actions.click(nextpage)
actions.perform()
OR
actions = ActionChains(driver)
actions.move_to_element(nextpage)
actions.click(nextpage).perform()
Why does my code:
self.driver.find_element_by_xpath("//*[text()[contains(.,'%s')]]" % ('Sorry'))
Get stuck and won't pass this line? Even if I do something like:
driver.implicitly_wait(30)
self.driver.find_element_by_xpath("//*[text()[contains(.,'%s')]]" % ('Sorry'))
Complete code:
# gets stuck here
if self.is_text_present('Hello'):
print 'Hello'
# rest of code
def is_text_present(self, text):
try:
self.driver.find_element_by_xpath('//*[contains(text(), "%s")]' % (text))
except NoSuchElementException, e:
return False
return True
Your XPath can be simplified to
"//*[contains(text(),'%s')]" % ('Sorry')
Perhaps try something like:
import contextlib
import selenium.webdriver as webdriver
import selenium.webdriver.support.ui as ui
with contextlib.closing(webdriver.Firefox()) as driver:
...
# Set up a WebDriverWait instance that will poll for up to 10 seconds
wait = ui.WebDriverWait(driver, 10)
# wait.until returns the value of the callback
elt = wait.until(
lambda driver: driver.find_element_by_xpath(
"//*[contains(text(),'%s')]" % ('Sorry')
))
From the docs:
This waits up to 10 seconds before throwing a TimeoutException or if
it finds the element will return it in 0 - 10 seconds.
To debug the problem you might try saving the HTML source to a file right before calling find_element_by_xpath so you can see what the driver is seeing. Is the XPath valid for that HTML?.
def is_text_present(self, text):
with open('/tmp/debug.html', 'w') as f:
time.sleep(5) # crude, but perhaps effective enough for debugging here.
f.write(driver.page_source)
try:
self.driver.find_element_by_xpath('//*[contains(text(), "%s")]' % (text))
except NoSuchElementException, e:
return False
return True