MultiThreading with Selenium in Python

MultiThreading with Selenium in Python - python

I want to fetch data from both classes at one time but after hit the run both classes open in one tab after adding driver.execute_script(f"window. open {link}, 'new window')") showing an error for keeps class I want to open 2 tabs at one time. Trying to open a new tab but showing an error only I can print Cardano class not keeps class. is there any way to run both classes at the same time in different tabes with the stable condition? please help I am a beginner in selenium.
//Modules
from time import sleep
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from threading import *
import pandas as pd
//Code
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.minimize_window()
wait = WebDriverWait(driver, 30)
df = pd.read_excel('chains.xlsx', sheet_name='first')
chains_links = list(df['links'])
class Cardano(Thread):
def run(self):
cardano = 1
for link in chains_links:
if 'cardanoscan' in link:
driver.get(link)
sleep(0.5)
try:
status = wait.until(EC.visibility_of_element_located(
(By.XPATH, "/html/body/div[2]/main/div/div/div[2]/div/div/div[1]/div[1]/div[1]/div[1]/div[1]/div[2]/button"))).text
saturation = wait.until(EC.visibility_of_element_located(
(By.XPATH, "/html/body/div[2]/main/div/div/div[2]/div/div/div[3]/div[1]/div/div/div/div/div[1]"))).text
except:
status = None
saturation = None
if status == "Active" and saturation != "0" and saturation != None:
print(
f"Cardano {cardano}: is {status} and Staturation is {saturation}")
else:
print(f"Something Wrong with Cardano {cardano}")
cardano = cardano + 1
class Keeps(Thread):
def run(self):
keeps = 1
for link in chains_links:
if "allthekeeps" in link:
driver.execute_script(f"window.open {link}, 'new window')")
sleep(0.5)
try:
fault = wait.until(EC.visibility_of_element_located(
(By.XPATH, "/html/body/div/div[2]/div/div/div[1]/div[2]/div[4]/div[2]/div"))).text
except:
fault = None
if fault != None and fault == "0":
print(f"Keeps {keeps}: is active with {fault} faults:")
else:
print(
f"Something wrong with Keeps {keeps}: {fault} Faults founded")
keeps = keeps + 1
t1 = Cardano()
t2 = Keeps()
t1.start()
t2.start()
t1.join()
t2.join()
driver.close()

Related

Accessing an element for scraping data

I want to access the highlighted element. This is part of the html to access the sub comments section in 9gag website. I'm using this meme https://9gag.com/gag/a5EAv9O as an example input for the program.
I used the following code to access but it doesn't work.
sub_com_html = item.find_element(By.CSS_SELECTOR, '//*/div/section/section[2]').Get_attribute("innerHTML")
Edit:
I'm able to access the section now and print some subcomments. Thanks to #Arundeep Chohan for correcting my silly mistake. But there's an issue. It’s accessing the sub comments section but its repeating the sub comments for different main comments. This screenshot is part of the output with main comment and sub comments as list. You can see that it’s repeating same data which is wrong. It's also giving the sub comments for only a few of the comments and skipping the rest. Theoretically it should work fine but I don't understand what's going wrong here.
This is the whole code I'm working with now. The goal is to scrape all the comments and sub comments of a meme.
import csv
from email.mime import image
from re import T
from tkinter import SCROLL, Image
from unittest import result
import instanceof as instanceof
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import undetected_chromedriver as uc
if __name__ == '__main__':
options = Options()
# options.headless = True
driver = uc.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.maximize_window()
driver.get("https://9gag.com/gag/a5EAv9O")
time.sleep(5)
# click on I accept cookies
actions = ActionChains(driver)
try:
consent_button = driver.find_element(By.XPATH, '//*[#id="qc-cmp2-ui"]/div[2]/div/button[2]')
actions.move_to_element(consent_button).click().perform()
except:
pass
for i in range(31):
actions.click()
actions.send_keys(Keys.ARROW_DOWN).perform()
time.sleep(4)
# click on fresh comments section
fresh_comments = driver.find_element(By.XPATH, '//*[#id="page"]/div[1]/section[2]/section/header/div/button[2]')
actions.move_to_element(fresh_comments).click(on_element=fresh_comments).perform()
time.sleep(5)
# click on lood more comments button to load all the comments
fresh_comments = driver.find_element(By.CSS_SELECTOR, '.comment-list__load-more')
actions.move_to_element(fresh_comments).click(on_element=fresh_comments).perform()
miN = 1000
results = []
comments = {}
while miN <= 20000:
window = 'window.scrollTo(0,' + str(miN) + ')'
driver.execute_script(window)
time.sleep(3)
# Dealing with all comments
try:
# Scrape the main comments
try:
All_comments = driver.find_elements(By.CSS_SELECTOR, "div.vue-recycle-scroller__item-view")
except:
All_comments = driver.find_elements(By.CSS_SELECTOR, "div.vue-recycle-scroller__item-view")
del_comm_cnt = 1
for item in All_comments:
try:
html = item.get_attribute("innerHTML")
if "comment-list-item__text" in html:
comment = item.find_element(By.CSS_SELECTOR, "div.comment-list-item__text").text
elif "comment-list-item__deleted-text" in html:
comment = item.find_element(By.CSS_SELECTOR, "div.comment-list-item__deleted-text").text
comment = comment + str(del_comm_cnt)
del_comm_cnt += 1
if(comments.get(comment) == None):
sub_coms_list = []
comments[comment] = ""
# get sub comments
if "comment-list-item__replies" in html:
# item.find_element(By.CSS_SELECTOR, "div.comment-list-item__replies").click()
sub_comments = item.find_element(By.CSS_SELECTOR, "div.comment-list-item__replies")
actions.move_to_element(sub_comments).click(on_element=sub_comments).perform()
sub_com_section = item.find_element(By.XPATH, '//*/div/section/section[2]')
sub_com_html = sub_com_section.get_attribute("innerHTML")
#sub_coms = sub_com_section.find_elements(By.CSS_SELECTOR, "section.comment-list-item__wrapper comment-list-item__wrapper_reply")
sub_coms = sub_com_section.find_elements(By.CSS_SELECTOR, "div.comment-list-item__text")
for com in sub_coms:
sub_coms_list.append(com.text)
comments[comment] = sub_coms_list
except:
pass
except:
pass
miN = miN + 1500
driver.quit()
for i in comments:
print(i, "\n", comments[i], "\n\n")

Webscraping Data from Tradin View using selenium

I need to web scrape data from a trading view chart using an infinite loop, but I keep running into this error - StaleElementReferenceException.
I have tried making the program wait explicitly using the following function -
exceptions = (NoSuchElementException, StaleElementReferenceException)
def locate(path, type="xpath", time=5):
global chrome
global exceptions
if type == "xpath":
element = WebDriverWait(chrome, time, ignored_exceptions=exceptions).until(
expected_conditions.presence_of_element_located((By.XPATH, path))
)
if type == "link_text":
element = WebDriverWait(chrome, time, ignored_exceptions=exceptions).until(
expected_conditions.presence_of_element_located((By.LINK_TEXT, path))
)
if type == "name":
element = WebDriverWait(chrome, time, ignored_exceptions=exceptions).until(
expected_conditions.presence_of_element_located((By.NAME, path))
)
return element
Here is the full code that I have written:
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
driver = "D:\\Repositories\\Bot\\chromedriver v89.0.4389.23.exe"
exceptions = (NoSuchElementException, StaleElementReferenceException)
def locate(path, type="xpath", time=5):
global chrome
global exceptions
if type == "xpath":
element = WebDriverWait(chrome, time, ignored_exceptions=exceptions).until(
expected_conditions.presence_of_element_located((By.XPATH, path))
)
if type == "link_text":
element = WebDriverWait(chrome, time, ignored_exceptions=exceptions).until(
expected_conditions.presence_of_element_located((By.LINK_TEXT, path))
)
if type == "name":
element = WebDriverWait(chrome, time, ignored_exceptions=exceptions).until(
expected_conditions.presence_of_element_located((By.NAME, path))
)
return element
def login():
global chrome
chrome.get("https://in.tradingview.com/chart/iVucV9D0/")
chrome.maximize_window()
locate("Sign in", "link_text").click()
locate(
"/html/body/div[11]/div/div[2]/div/div/div/div/div/div/div[1]/div[4]/div/span"
).click()
locate("username", "name").send_keys("myemail")
locate("password", "name").send_keys("mypassword" + Keys.ENTER)
time.sleep(3)
locate("/html/body/div[6]/div/div/div[2]/div/div/div[1]/div[2]/form/button").click()
def buy():
buyprice = locate(
"/html/body/div[2]/div[5]/div/div[1]/div[1]/div[5]/div/div[2]/div[1]/div[3]/div[2]/div[3]/div[2]/span"
).text
if buyprice != "n/a":
return float(buyprice)
else:
return "na"
def sell():
sellprice = locate(
"/html/body/div[2]/div[5]/div/div[1]/div[1]/div[5]/div/div[2]/div[1]/div[3]/div[2]/div[6]/div[2]/span"
).text
if sellprice != "n/a":
return float(sellprice)
else:
return "na"
with webdriver.Chrome(driver) as chrome:
login()
while True:
if buy() != "na":
print("Supertrend Buy detected")
# execute rest of the code
if sell() != "na":
print("Supertrend Sell Detected")
# execute rest of the code
Can someone please help me?
PS: I am using python 3.9 and selenium version 3.141.0

I've found that a lot of these weird issues can be solved simply by downgrading to an older chromedriver and/or chrome/chromium, or switching to firefox/geckodriver. The range of compatibility between Selenium, the webdriver, and the browser is very narrow and unforgiving.

scraping with selenium cant click on clickable text

I am trying to scrape some data from yahoo finance, for each stock, I want to get the historical data. Taking the Apple stock. I should go to https://finance.yahoo.com/quote/AAPL/history?p=AAPL and choose "MAX" from "Time Period". so
I believe the script I wrote so far is getting the date element, but somehow clicking on it to be able to choose "MAX" is not working.
here is my whole script:
# using linux here
project_path = os.getcwd()
driver_path = project_path + "/" + "chromedriver"
yahoo_finance = "https://finance.yahoo.com/quote/"
driver = webdriver.Chrome(driver_path)
def get_data(symbol='AAPL'):
stock_history_link = yahoo_finance + symbol + '/history?p=' + symbol
driver.get(stock_history_link)
date_picker = '//div[contains(#class, "D(ib)") and contains(#class, "Pos(r)") and contains(#class, "Cur(p)")' \
'and contains(#class, "O(n):f")]'
try:
print("I am inside")
date_picker_2 = "//div[#class='Pos(r) D(ib) O(n):f Cur(p)']"
date_picker_element = driver.find_element_by_xpath(date_picker_2)
print("date_picker_element: ", date_picker_element)
date_picker_element.click()
try:
print("I will be waiting for the date")
my_dropdown = WebDriverWait(driver, 100).until(
EC.presence_of_element_located((By.ID, 'dropdown-menu'))
)
print(my_dropdown)
print("I am not waiting anymore")
except TimeoutException as e:
print("wait timed out")
print(e)
except WebDriverException:
print("Something went wrong while trying to pick the max date")
if __name__ == '__main__':
try:
get_data()
except:
pass
# finally:
# driver.quit()

To click the button with Max just open it up and target it.
driver.get("https://finance.yahoo.com/quote/AAPL/history?p=AAPL")
wait = WebDriverWait(driver, 10)
wait.until(EC.element_to_be_clickable((By.XPATH, "//span[#class='C($linkColor) Fz(14px)']"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//button[#data-value='MAX']"))).click()
Element:
<button class="Py(5px) W(45px) Fz(s) C($tertiaryColor) Cur(p) Bd Bdc($seperatorColor) Bgc($lv4BgColor) Bdc($linkColor):h Bdrs(3px)" data-value="MAX"><span>Max</span></button>
Imports:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

You have the wrong xpath for the date_picker_2:
date_picker_2 = '//*[#id="Col1-1-HistoricalDataTable-Proxy"]/section/div[1]/div[1]/div[1]/div/div/div/span'
Using requests:
import requests
import datetime
end = int(datetime.datetime.strptime(datetime.date.today().isoformat(), "%Y-%m-%d").timestamp())
url = f"https://finance.yahoo.com/quote/AAPL/history?period1=345427200&period2={end}&interval=1d&filter=history&frequency=1d&includeAdjustedClose=true"
requests.get(url)
Gets you to the same end page.

Python Selenium - Cannot close tab using google shortcuts

I am working on a little selenium project, but I got some issues.
So what I need to do is to click on a link to open it in a new tab, and whenever I have taken the information I need to close that tab and go into the next one. driver.close() does not work as it gives me the error: Message: no such window: target window already closed. So I intstead tried this (saw this while researching):
driver.find_element_by_tag_name('html').send_keys(Keys.CONTROL + 'w'), and I also tried with adding Keys.F4, but nothing worked.
It seems to work for other people, so why not for me?
Code:
def cpuFunc():
i = 0
print("Launching CPU")
cpu = webdriver.Chrome('chromedriver.exe',options=option)
cpu.get('https://www.komplett.se/category/11204/datorutrustning/datorkomponenter/processor')
cpu.find_element_by_xpath('/html/body/div[1]/div[2]/div[1]/div/div/div[2]/form/div/div[1]/button').click()
#while i < 10:
# cpu.find_element_by_tag_name('html').send_keys(Keys.END)
# i += 1
# time.sleep(0.5)
#print("At bottom: CPU")
cpu.find_element_by_tag_name('body').send_keys(Keys.CONTROL + Keys.HOME)
time.sleep(0.5)
link = cpu.find_element_by_xpath(f'/html/body/main/div/div[2]/div[5]/div[2]/form/div[1]/a')
ActionChains(cpu).key_down(Keys.CONTROL).click(link).key_up(Keys.CONTROL).perform()
time.sleep(1)
window = cpu.window_handles[-1]
cpu.switch_to.window(window)
title = cpu.find_element_by_xpath("/html/body/div[2]/main/div[2]/div[2]/div[3]/section/div/section/div[1]/h1/span").text
price = cpu.find_element_by_xpath("/html/body/div[2]/main/div[2]/div[2]/div[3]/section/div/section/div[3]/div[2]/div[1]/div/div/div[1]/div[1]/div[1]/span").text
btn = cpu.find_element_by_xpath('/html/body/div[2]/main/div[2]/div[2]/div[3]/div/div[2]/div/section[2]/button')
time.sleep(0.5)
cpu.execute_script("arguments[0].click();", btn)
core = cpu.find_element_by_xpath("/html/body/div[2]/main/div[2]/div[2]/div[3]/div/div[2]/div/section[2]/div/div/div/table[2]/tbody/tr[2]/td").text
thread = cpu.find_element_by_xpath("/html/body/div[2]/main/div[2]/div[2]/div[3]/div/div[2]/div/section[2]/div/div/div/table[2]/tbody/tr[3]/td").text
cache = cpu.find_element_by_xpath("/html/body/div[2]/main/div[2]/div[2]/div[3]/div/div[2]/div/section[2]/div/div/div/table[2]/tbody/tr[4]/td").text
clock = cpu.find_element_by_xpath("/html/body/div[2]/main/div[2]/div[2]/div[3]/div/div[2]/div/section[2]/div/div/div/table[2]/tbody/tr[7]/td").text
turbo = cpu.find_element_by_xpath("/html/body/div[2]/main/div[2]/div[2]/div[3]/div/div[2]/div/section[2]/div/div/div/table[2]/tbody/tr[8]/td").text
socket = cpu.find_element_by_xpath("/html/body/div[2]/main/div[2]/div[2]/div[3]/div/div[2]/div/section[2]/div/div/div/table[2]/tbody/tr[9]/td").text
wattage = cpu.find_element_by_xpath("/html/body/div[2]/main/div[2]/div[2]/div[3]/div/div[2]/div/section[2]/div/div/div/table[2]/tbody/tr[10]/td").text
cpu.find_element_by_tag_name('html').send_keys(Keys.CONTROL + 'w') # Here it shall close
time.sleep(60000)
enter code here

You can simply use ActionChains & Keys.
from selenium.webdriver import ActionChains
from selenium.webdriver.common.keys import Keys
If MacOs:
step_1 = ActionChains(cpu)
step_1.send_keys(Keys.COMMAND + 'w')
If Windows:
step_1 = ActionChains(cpu)
step_1.send_keys(Keys.CONTROL + 'w')
Hope it helps, if issues please comment.

Driver.close() worked for me after also fixed a lot of those full xpaths which are easily breakable and added webdriver waits for stability in finding elements.
wait = WebDriverWait(cpu, 10)
cpu.get('https://www.komplett.se/category/11204/datorutrustning/datorkomponenter/processor')
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[#class='btn-large primary'][#type='submit']"))).click()
link = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "form > div:nth-child(1) > a")))
ActionChains(cpu).key_down(Keys.CONTROL).click(link).key_up(Keys.CONTROL).perform()
time.sleep(1)
window = cpu.window_handles[-1]
cpu.switch_to.window(window)
title = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.product-main-info__info > h1 > span"))).text
price = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.product-price > span"))).text
print(title,price)
btn = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "section.product-section.technical-details.col-xs-12 > button")))
btn.click()
table2 = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "table:nth-child(2) > tbody")))
core = table2.find_element_by_xpath("./tr[2]/td").text
thread = table2.find_element_by_xpath("./tr[3]/td").text
cache = table2.find_element_by_xpath("./tr[4]/td").text
clock = table2.find_element_by_xpath("./tr[7]/td").text
turbo = table2.find_element_by_xpath("./tr[8]/td").text
socket = table2.find_element_by_xpath("./tr[9]/td").text
wattage = table2.find_element_by_xpath("./tr[10]/td").text
cpu.close()
print(core,thread,cache,clock,turbo,socket,wattage)
Import
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

what happens when find_elements can't find the class?

I am trying to find a particular class on a website. The class is sometimes present and sometimes it is absent.
So when the class is present, it takes a few seconds for the script to locate the element(logo). When the class is not present,the script runs for a long time and then end.
Why is that? is there any way to speed it up when the class doesn't exist?
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from time import sleep
chrome_path = r"C:\Users\peter\Desktop\chromedriver.exe"
driver = webdriver.Chrome(executable_path=r"C:\Users\peter\Desktop\chromedriver.exe")
driver.get("https://example.com/app/login")
driver.minimize_window()
driver.implicitly_wait(300)
input_email = driver.find_element_by_xpath("//input[#type='email']")
input_email.send_keys('example#gmail.com')
input_password = driver.find_element_by_xpath("//input[#type='password']")
input_password.send_keys('example')
click_login = driver.find_element_by_xpath("//button[#type='submit']")
click_login.click()
driver.find_element_by_id("schedule-today").click()
sleep(2)
logo = driver.find_elements_by_xpath( "//*[contains(#class, 'lbl_lesson_status label label-info lbl_lesson_open')]" );
if not logo:
print("empty")
f = open("reserved_date", "a+")
for i in logo:
opendate = i.get_attribute("data-t-start-local");
f.write((opendate)+'\n')
print(opendate)
driver.close()

You Need To Add Wait And Add Try Except for example if element not found throw message and quit that script
I Simply Code For You!
Try This Code:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import *
from selenium.webdriver.common.keys import Keys
import time
chrome_path = r"C:\Users\peter\Desktop\chromedriver.exe"
driver = webdriver.Chrome(executable_path=r"C:\Users\peter\Desktop\chromedriver.exe")
driver.get("https://example.com/app/login")
driver.minimize_window()
try:
input_email = WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,"//input[#type='email']")))
input_email.send_keys('example#gmail.com')
except (TimeoutException,NoSuchElementException):
print('There is No Email Input!')
quit()
try:
input_password = WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,"//input[#type='password']")))
input_password.send_keys('example')
except (TimeoutException,NoSuchElementException):
print('There is No Password Input!')
quit()
try:
click_login = WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,"//button[#type='submit']")))
click_login.click()
except (TimeoutException,NoSuchElementException):
print('There is No Login Button!')
quit()
try:
WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.CSS_SELECTOR,"#schedule-today")))
time.sleep(2)
except (TimeoutException,NoSuchElementException):
print("Can't Find schedule-today id!")
quit()
try:
logo = WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,"//*[contains(#class, 'lbl_lesson_status label label-info lbl_lesson_open')]")))
f = open("reserved_date", "a+")
for i in logo:
opendate = i.get_attribute("data-t-start-local");
f.write((opendate)+'\n')
print(opendate)
except (TimeoutException,NoSuchElementException):
print("Can't Find Logo Button!")
quit()
driver.close()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

MultiThreading with Selenium in Python - python

Related

Accessing an element for scraping data

Webscraping Data from Tradin View using selenium

scraping with selenium cant click on clickable text

Python Selenium - Cannot close tab using google shortcuts

what happens when find_elements can't find the class?

Categories

Resources