Webscraping Data from Tradin View using selenium - python

I need to web scrape data from a trading view chart using an infinite loop, but I keep running into this error - StaleElementReferenceException.
I have tried making the program wait explicitly using the following function -
exceptions = (NoSuchElementException, StaleElementReferenceException)
def locate(path, type="xpath", time=5):
global chrome
global exceptions
if type == "xpath":
element = WebDriverWait(chrome, time, ignored_exceptions=exceptions).until(
expected_conditions.presence_of_element_located((By.XPATH, path))
)
if type == "link_text":
element = WebDriverWait(chrome, time, ignored_exceptions=exceptions).until(
expected_conditions.presence_of_element_located((By.LINK_TEXT, path))
)
if type == "name":
element = WebDriverWait(chrome, time, ignored_exceptions=exceptions).until(
expected_conditions.presence_of_element_located((By.NAME, path))
)
return element
Here is the full code that I have written:
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
driver = "D:\\Repositories\\Bot\\chromedriver v89.0.4389.23.exe"
exceptions = (NoSuchElementException, StaleElementReferenceException)
def locate(path, type="xpath", time=5):
global chrome
global exceptions
if type == "xpath":
element = WebDriverWait(chrome, time, ignored_exceptions=exceptions).until(
expected_conditions.presence_of_element_located((By.XPATH, path))
)
if type == "link_text":
element = WebDriverWait(chrome, time, ignored_exceptions=exceptions).until(
expected_conditions.presence_of_element_located((By.LINK_TEXT, path))
)
if type == "name":
element = WebDriverWait(chrome, time, ignored_exceptions=exceptions).until(
expected_conditions.presence_of_element_located((By.NAME, path))
)
return element
def login():
global chrome
chrome.get("https://in.tradingview.com/chart/iVucV9D0/")
chrome.maximize_window()
locate("Sign in", "link_text").click()
locate(
"/html/body/div[11]/div/div[2]/div/div/div/div/div/div/div[1]/div[4]/div/span"
).click()
locate("username", "name").send_keys("myemail")
locate("password", "name").send_keys("mypassword" + Keys.ENTER)
time.sleep(3)
locate("/html/body/div[6]/div/div/div[2]/div/div/div[1]/div[2]/form/button").click()
def buy():
buyprice = locate(
"/html/body/div[2]/div[5]/div/div[1]/div[1]/div[5]/div/div[2]/div[1]/div[3]/div[2]/div[3]/div[2]/span"
).text
if buyprice != "n/a":
return float(buyprice)
else:
return "na"
def sell():
sellprice = locate(
"/html/body/div[2]/div[5]/div/div[1]/div[1]/div[5]/div/div[2]/div[1]/div[3]/div[2]/div[6]/div[2]/span"
).text
if sellprice != "n/a":
return float(sellprice)
else:
return "na"
with webdriver.Chrome(driver) as chrome:
login()
while True:
if buy() != "na":
print("Supertrend Buy detected")
# execute rest of the code
if sell() != "na":
print("Supertrend Sell Detected")
# execute rest of the code
Can someone please help me?
PS: I am using python 3.9 and selenium version 3.141.0

I've found that a lot of these weird issues can be solved simply by downgrading to an older chromedriver and/or chrome/chromium, or switching to firefox/geckodriver. The range of compatibility between Selenium, the webdriver, and the browser is very narrow and unforgiving.

Related

MultiThreading with Selenium in Python

I want to fetch data from both classes at one time but after hit the run both classes open in one tab after adding driver.execute_script(f"window. open {link}, 'new window')") showing an error for keeps class I want to open 2 tabs at one time. Trying to open a new tab but showing an error only I can print Cardano class not keeps class. is there any way to run both classes at the same time in different tabes with the stable condition? please help I am a beginner in selenium.
//Modules
from time import sleep
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from threading import *
import pandas as pd
//Code
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.minimize_window()
wait = WebDriverWait(driver, 30)
df = pd.read_excel('chains.xlsx', sheet_name='first')
chains_links = list(df['links'])
class Cardano(Thread):
def run(self):
cardano = 1
for link in chains_links:
if 'cardanoscan' in link:
driver.get(link)
sleep(0.5)
try:
status = wait.until(EC.visibility_of_element_located(
(By.XPATH, "/html/body/div[2]/main/div/div/div[2]/div/div/div[1]/div[1]/div[1]/div[1]/div[1]/div[2]/button"))).text
saturation = wait.until(EC.visibility_of_element_located(
(By.XPATH, "/html/body/div[2]/main/div/div/div[2]/div/div/div[3]/div[1]/div/div/div/div/div[1]"))).text
except:
status = None
saturation = None
if status == "Active" and saturation != "0" and saturation != None:
print(
f"Cardano {cardano}: is {status} and Staturation is {saturation}")
else:
print(f"Something Wrong with Cardano {cardano}")
cardano = cardano + 1
class Keeps(Thread):
def run(self):
keeps = 1
for link in chains_links:
if "allthekeeps" in link:
driver.execute_script(f"window.open {link}, 'new window')")
sleep(0.5)
try:
fault = wait.until(EC.visibility_of_element_located(
(By.XPATH, "/html/body/div/div[2]/div/div/div[1]/div[2]/div[4]/div[2]/div"))).text
except:
fault = None
if fault != None and fault == "0":
print(f"Keeps {keeps}: is active with {fault} faults:")
else:
print(
f"Something wrong with Keeps {keeps}: {fault} Faults founded")
keeps = keeps + 1
t1 = Cardano()
t2 = Keeps()
t1.start()
t2.start()
t1.join()
t2.join()
driver.close()

scraping with selenium cant click on clickable text

I am trying to scrape some data from yahoo finance, for each stock, I want to get the historical data. Taking the Apple stock. I should go to https://finance.yahoo.com/quote/AAPL/history?p=AAPL and choose "MAX" from "Time Period". so
I believe the script I wrote so far is getting the date element, but somehow clicking on it to be able to choose "MAX" is not working.
here is my whole script:
# using linux here
project_path = os.getcwd()
driver_path = project_path + "/" + "chromedriver"
yahoo_finance = "https://finance.yahoo.com/quote/"
driver = webdriver.Chrome(driver_path)
def get_data(symbol='AAPL'):
stock_history_link = yahoo_finance + symbol + '/history?p=' + symbol
driver.get(stock_history_link)
date_picker = '//div[contains(#class, "D(ib)") and contains(#class, "Pos(r)") and contains(#class, "Cur(p)")' \
'and contains(#class, "O(n):f")]'
try:
print("I am inside")
date_picker_2 = "//div[#class='Pos(r) D(ib) O(n):f Cur(p)']"
date_picker_element = driver.find_element_by_xpath(date_picker_2)
print("date_picker_element: ", date_picker_element)
date_picker_element.click()
try:
print("I will be waiting for the date")
my_dropdown = WebDriverWait(driver, 100).until(
EC.presence_of_element_located((By.ID, 'dropdown-menu'))
)
print(my_dropdown)
print("I am not waiting anymore")
except TimeoutException as e:
print("wait timed out")
print(e)
except WebDriverException:
print("Something went wrong while trying to pick the max date")
if __name__ == '__main__':
try:
get_data()
except:
pass
# finally:
# driver.quit()
To click the button with Max just open it up and target it.
driver.get("https://finance.yahoo.com/quote/AAPL/history?p=AAPL")
wait = WebDriverWait(driver, 10)
wait.until(EC.element_to_be_clickable((By.XPATH, "//span[#class='C($linkColor) Fz(14px)']"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//button[#data-value='MAX']"))).click()
Element:
<button class="Py(5px) W(45px) Fz(s) C($tertiaryColor) Cur(p) Bd Bdc($seperatorColor) Bgc($lv4BgColor) Bdc($linkColor):h Bdrs(3px)" data-value="MAX"><span>Max</span></button>
Imports:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
You have the wrong xpath for the date_picker_2:
date_picker_2 = '//*[#id="Col1-1-HistoricalDataTable-Proxy"]/section/div[1]/div[1]/div[1]/div/div/div/span'
Using requests:
import requests
import datetime
end = int(datetime.datetime.strptime(datetime.date.today().isoformat(), "%Y-%m-%d").timestamp())
url = f"https://finance.yahoo.com/quote/AAPL/history?period1=345427200&period2={end}&interval=1d&filter=history&frequency=1d&includeAdjustedClose=true"
requests.get(url)
Gets you to the same end page.

How to stop selenium scraper from redirecting to another internal weblink of the scraped website?

Was wondering if anyone knows of a way for instructing a selenium script to avoid visiting/redirecting to an internal page that wasn't part of the code. Essentially, my code opens up this page:
https://cryptwerk.com/companies/?coins=1,6,11,2,3,8,17,7,13,4,25,29,24,32,9,38,15,30,43,42,41,12,40,44,20
keeps clicking on show more button until there's none (at end of page) - which by then - it should have collected the links of all the products listed on the page it scrolled through till the end, then visit each one respectively.
What happens instead, it successfully clicks on show more till the end of the page, but then it visits this weird promotion page of the same website instead of following each of the gathered links respectively and then scraping further data points located off each of those newly opened ones.
In a nutshell, would incredibly appreciate it if someone can explain how to avoid this automated redirection on its own! And this is the code in case someone can gratefully nudge me in the right direction :)
from selenium.webdriver import Chrome
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
from selenium.common.exceptions import NoSuchElementException, ElementNotVisibleException
import json
import selenium.common.exceptions as exception
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
from selenium import webdriver
webdriver = '/Users/karimnabil/projects/selenium_js/chromedriver-1'
driver = Chrome(webdriver)
driver.implicitly_wait(5)
url = 'https://cryptwerk.com/companies/?coins=1,6,11,2,3,8,17,7,13,4,25,29,24,32,9,38,15,30,43,42,41,12,40,44,20'
driver.get(url)
links_list = []
coins_list = []
all_names = []
all_cryptos = []
all_links = []
all_twitter = []
all_locations = []
all_categories = []
all_categories2 = []
wait = WebDriverWait(driver, 2)
sign_in = driver.find_element_by_xpath("//li[#class='nav-item nav-guest']/a")
sign_in.click()
time.sleep(2)
user_name = wait.until(EC.presence_of_element_located((By.XPATH, "//input[#name='login']")))
user_name.send_keys("karimnsaber95#gmail.com")
password = wait.until(EC.presence_of_element_located((By.XPATH, "//input[#name='password']")))
password.send_keys("PleomaxCW#2")
signIn_Leave = driver.find_element_by_xpath("//div[#class='form-group text-center']/button")
signIn_Leave.click()
time.sleep(3)
while True:
try:
loadMoreButton = driver.find_element_by_xpath("//button[#class='btn btn-outline-primary']")
time.sleep(2)
loadMoreButton.click()
time.sleep(2)
except exception.StaleElementReferenceException:
print('stale element')
break
print('no more elements to show')
try:
company_links = driver.find_elements_by_xpath("//div[#class='companies-list items-infinity']/div[position() > 3]/div[#class='media-body']/div[#class='title']/a")
for link in company_links:
links_list.append(link.get_attribute('href'))
except:
pass
try:
with open("links_list.json", "w") as f:
json.dump(links_list, f)
with open("links_list.json", "r") as f:
links_list = json.load(f)
except:
pass
try:
for link in links_list:
driver.get(link)
name = driver.find_element_by_xpath("//div[#class='title']/h1").text
try:
show_more_coins = driver.find_element_by_xpath("//a[#data-original-title='Show more']")
show_more_coins.click()
time.sleep(1)
except:
pass
try:
categories = driver.find_elements_by_xpath("//div[contains(#class, 'categories-list')]/a")
categories_list = []
for category in categories:
categories_list.append(category.text)
except:
pass
try:
top_page_categories = driver.find_elements_by_xpath("//ol[#class='breadcrumb']/li/a")
top_page_categories_list = []
for category in top_page_categories:
top_page_categories_list.append(category.text)
except:
pass
coins_links = driver.find_elements_by_xpath("//div[contains(#class, 'company-coins')]/a")
all_coins = []
for coin in coins_links:
all_coins.append(coin.get_attribute('href'))
try:
location = driver.find_element_by_xpath("//div[#class='addresses mt-3']/div/div/div/div/a").text
except:
pass
try:
twitter = driver.find_element_by_xpath("//div[#class='links mt-2']/a[2]").get_attribute('href')
except:
pass
try:
print('-----------')
print('Company name is: {}'.format(name))
print('Potential Categories are: {}'.format(categories_list))
print('Potential top page categories are: {}'.format(top_page_categories_list))
print('Supporting Crypto is:{}'.format(all_coins))
print('Registered location is: {}'.format(location))
print('Company twitter profile is: {}'.format(twitter))
time.sleep(1)
except:
pass
all_names.append(name)
all_categories.append(categories_list)
all_categories2.append(top_page_categories_list)
all_cryptos.append(all_coins)
all_twitter.append(twitter)
all_locations.append(location)
except:
pass
df = pd.DataFrame(list(zip(all_names, all_categories, all_categories2, all_cryptos, all_twitter, all_locations)), columns=['Company name', 'Categories1', 'Categories2', 'Supporting Crypto', 'Twitter Handle', 'Registered Location'])
CryptoWerk_Data = df.to_csv('CryptoWerk4.csv', index=False)
Redirect calls happen for two reasons, in your case either by executing some javascript code when clicking the last time on the load more button or by receiving an HTTP 3xx code, which is the least likely in your case.
So you need to identify when this javascript code is executed and send an ESC_KEY before it loads and then executing the rest of your script.
You could also scrape the links and append them to your list before clicking the load more button and each time it is clicked, make an if statement the verify the link of the page you're in, if it is that of the promotion page then execute the rest of your code, else click load more.
while page_is_same:
scrape_elements_add_to_list()
click_load_more()
verify_current_page_link()
if current_link_is_same != link_of_scraped_page:
page_is_same = False
# rest of the code here

what happens when find_elements can't find the class?

I am trying to find a particular class on a website. The class is sometimes present and sometimes it is absent.
So when the class is present, it takes a few seconds for the script to locate the element(logo). When the class is not present,the script runs for a long time and then end.
Why is that? is there any way to speed it up when the class doesn't exist?
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from time import sleep
chrome_path = r"C:\Users\peter\Desktop\chromedriver.exe"
driver = webdriver.Chrome(executable_path=r"C:\Users\peter\Desktop\chromedriver.exe")
driver.get("https://example.com/app/login")
driver.minimize_window()
driver.implicitly_wait(300)
input_email = driver.find_element_by_xpath("//input[#type='email']")
input_email.send_keys('example#gmail.com')
input_password = driver.find_element_by_xpath("//input[#type='password']")
input_password.send_keys('example')
click_login = driver.find_element_by_xpath("//button[#type='submit']")
click_login.click()
driver.find_element_by_id("schedule-today").click()
sleep(2)
logo = driver.find_elements_by_xpath( "//*[contains(#class, 'lbl_lesson_status label label-info lbl_lesson_open')]" );
if not logo:
print("empty")
f = open("reserved_date", "a+")
for i in logo:
opendate = i.get_attribute("data-t-start-local");
f.write((opendate)+'\n')
print(opendate)
driver.close()
You Need To Add Wait And Add Try Except for example if element not found throw message and quit that script
I Simply Code For You!
Try This Code:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import *
from selenium.webdriver.common.keys import Keys
import time
chrome_path = r"C:\Users\peter\Desktop\chromedriver.exe"
driver = webdriver.Chrome(executable_path=r"C:\Users\peter\Desktop\chromedriver.exe")
driver.get("https://example.com/app/login")
driver.minimize_window()
try:
input_email = WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,"//input[#type='email']")))
input_email.send_keys('example#gmail.com')
except (TimeoutException,NoSuchElementException):
print('There is No Email Input!')
quit()
try:
input_password = WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,"//input[#type='password']")))
input_password.send_keys('example')
except (TimeoutException,NoSuchElementException):
print('There is No Password Input!')
quit()
try:
click_login = WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,"//button[#type='submit']")))
click_login.click()
except (TimeoutException,NoSuchElementException):
print('There is No Login Button!')
quit()
try:
WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.CSS_SELECTOR,"#schedule-today")))
time.sleep(2)
except (TimeoutException,NoSuchElementException):
print("Can't Find schedule-today id!")
quit()
try:
logo = WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,"//*[contains(#class, 'lbl_lesson_status label label-info lbl_lesson_open')]")))
f = open("reserved_date", "a+")
for i in logo:
opendate = i.get_attribute("data-t-start-local");
f.write((opendate)+'\n')
print(opendate)
except (TimeoutException,NoSuchElementException):
print("Can't Find Logo Button!")
quit()
driver.close()

Selenium python not clickable element at point(X, Y) instead other element would receive the click(here the element neither a button nor a link)

This is a link to an e-commerce website which I would like to crawl. I am searching for a way to click on Most Helpful, positive, negative, most recent and by certified buyers section and scrape the values. Heads up, it's not a button so ActionChains and Javascript code is not working on it.
I want to move from one to another either by using click or with any other methods. I tried By using javascript executor and ActionChains but I am unable to get it.
For this My Xpath is:
path = '//div[#class="o5jqS-"]/div[X]//div[contains(#class,"_3MuAT6")]'
which actually returns an element. The "X" value is replaced in a loop with 1 to 5. 1 signifying "Most helpful" and 5 signifying "By Certfied Buyers"
My code is below:
for j in range(0,5):
new_xpath = xpath_hash["FirstPageReviews"]["TitleOfReviewType"].replace("[X]", "[" + str(j + 1) + "]")
new_xpath1 = xpath_hash["FirstPageReviews"]["TitleElement"].replace("[X]", "[" + str(j + 1) + "]")
title_element = driver.find_element_by_xpath(new_xpath1)
driver.execute_script("arguments[0].click();", title_element)
#ActionChains(driver).move_to_element(title_element).click().perform()
you could use my code base on page object, i have tried this worked
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
class SeleniumBaseClass(object):
def __init__(self,driver):
self.driver = driver
def open(self,URL):
self.driver.get(URL)
def driverURLChange(self,URL):
print("change URL" + URL)
self.driver.get(URL)
def currentUrl(self):
print("URL " + self.driver.current_url)
return self.driver.current_url
def locateElement(self, loc):
try:
print(loc)
element = WebDriverWait(self.driver,10).until(EC.visibility_of_element_located(loc))
return element
except:
print ("cannot find {0} element".format(loc))
return None
def waitForElementInvisible(self,loc):
#load-spinner
try:
element = WebDriverWait(self.driver,10).until(EC.invisibility_of_element_located(loc))
return True
except:
print ("cannot invisibility_of_element {0} element".format(loc))
return False
def send_key_with_Element(self,loc,value):
self.locateElement(loc).clear()
self.locateElement(loc).send_keys(value)
def click_with_Element(self,loc):
self.locateElement(loc).click()
def clickElementsBySendKey(self,loc,value):
self.locateElement(loc).send_keys(value)
customdriver = SeleniumBaseClass(webdriver.Chrome())
customdriver.open("https://www.flipkart.com/sony-mdr-zx110-wired-headphones/p/itmehuh6zm9s7kgz?pid=ACCDZRSEYPFHAT76&srno=s_1_1&otracker=search&lid=LSTACCDZRSEYPFHAT76TANM1F&qH=a684a6245806d98f")
HelpfulTab = (By.XPATH,"//div[contains(text(),'Most Helpful')]")
PositiveTab = (By.XPATH,"//div[contains(text(),'Positive')]")
customdriver.click_with_Element(PositiveTab)

Categories

Resources