I am running a selenium code on the website DNCA to scrap for some of the document links. I am trying to get links of each value in the drop down for each section shown in this page. My code is working fine, but when I run the same code with option headless = True, I am getting the following error:
ElementClickInterceptedException: element click intercepted: Element <li data-original-index="0">...</li> is not clickable at point (226, 250). Other element would receive the click: <div class="col-md-12">...</div>
(Session info: headless chrome=104.0.5112.81)
Code:
def get_active_row(active_tab, fund_id):
active_row = active_tab.find_elements(By.XPATH, ".//tr[#style='' or #style='display: table-row;'][#fund-id = '{}']".format(fund_id))
try:
assert len(active_row) == 1
active_row = active_row[0]
return active_row
except AssertionError as asserr:
print(asserr, ' -- More than one active row for the fund id: ', fund_id)
sys.exit(1)
except Exception as err:
print(err, ' -- fund id:', fund_id)
sys.exit(1)
def scrap(driver):
tab_list = driver.find_element(By.XPATH, "//ul[contains(#role, 'tablist')]")
tab_list_names = tab_list.find_elements(By.XPATH, './/li')
data_list = []
for loc, tab_name in enumerate(tab_list_names):
if loc < 20:
tab_name.click()
html = driver.page_source
soup = BeautifulSoup(html)
bs_active_tab = soup.find('div', {'class': 'tab-pane table-datas active'})
bs_headers = bs_active_tab.find('thead')
headers = [i.text for i in bs_headers.find_all('td')]
active_tab = driver.find_element(By.XPATH, "//div[contains(#class, 'tab-pane table-datas active')]")
unique_fund_ids = [i_fund.get_attribute('fund-id') for i_fund in active_tab.find_elements(By.XPATH, ".//tr[#style]") if i_fund.get_attribute('fund-id') != '-']
lookup = set()
unique_fund_ids = [x for x in unique_fund_ids if x not in lookup and lookup.add(x) is None]
for fund_id in unique_fund_ids: #Iterate over each fund
active_row = get_active_row(active_tab, fund_id)
active_row.find_element(By.XPATH, './/button').click()
isin_list = [i.text for i in active_row.find_elements(By.XPATH, './/li')]
for pos, isin_val in enumerate(isin_list):
isin_selected = active_row.find_elements(By.XPATH, './/li')[pos]
isin_selected.click()
active_row = get_active_row(active_tab, fund_id)
fund_name = ''
for pos_inner, td in enumerate(active_row.find_elements(By.XPATH, ".//td")):
a_tag = td.find_elements(By.XPATH, ".//a")
if len(a_tag) == 1:
a_tag = a_tag[0]
if pos_inner == 0:
fund_name = a_tag.text
link = a_tag.get_attribute('href')
data_list.append([tab_name.text, fund_name, isin_val, headers[pos_inner], link])
else:
data_list.append([tab_name.text, fund_name, isin_val, headers[pos_inner], ''])
active_row = get_active_row(active_tab, fund_id)
active_row.find_element(By.XPATH, './/button').click()
isin_selected_to_close = active_row.find_elements(By.XPATH, './/li')[0]
isin_selected_to_close.click()
tlg_tr_tab = active_tab.find_element(By.XPATH, ".//tr[#fund-id='-']")
for tlg_pos_inner, tlg_td in enumerate(tlg_tr_tab.find_elements(By.XPATH, ".//td")):
tlg_a_tag = tlg_td.find_elements(By.XPATH, ".//a")
if len(tlg_a_tag) == 1:
tlg_a_tag = tlg_a_tag[0]
tlg_link = tlg_a_tag.get_attribute('href') #Get document link
data_list.append([tab_name.text, 'Toute la gamme', '', headers[tlg_pos_inner], tlg_link])
else:
data_list.append([tab_name.text, 'Toute la gamme', '', headers[tlg_pos_inner], ''])
dataset_links = pd.DataFrame(data_list, columns = ['Tab', 'Fund Name', 'ISIN', 'Type', 'Link'])
driver.quit()
Can someone please explain me why is it working fine with headless = False but not with with headless = True.
In headless mode the default screen size is very small, significantly less than screen size in regular mode.
So, to overcome this problem you need to set the screen size.
It can be done in the following ways:
options = Options()
options.add_argument("--headless")
options.add_argument("window-size=1920, 1080")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)
Or just
driver.set_window_size(1920, 1080)
Both approaches should work.
I prefer the first way :)
Related
I have problem with going to previous site.
lenPL = len(self.productList)
counter = 0
here I iterate on all product meet conditions
for productCard in self.productList:
productCard.click()`
self.driver.find_element(By.XPATH, "//input[#id='email-notification']").send_keys("test#test.com")
self.driver.find_element(By.XPATH, "//input[#value='Powiadom o dostępności']").click()
self.driver.execute_script("window.history.go(-1)")
I previous line I want to back, and select another product
CatPage.py
class CatPage:
def __init__(self, driver):
self.driver = driver
allProducts = (By.XPATH, "//div[#class='category-products'] // li[contains(#class,'item')]/div/div")
testprod = (By.XPATH, "//img[#alt='RustykalneUchwyty.pl']")
def getAllGroupedToNotifi(self):
productToTest = []
for product in (self.driver.find_elements(*CatPage.allProducts)):
if product.find_element(By.XPATH, "//div[#class='description-wrapper']/div[#class='btn-border btn-border-product-list']"):
productToTest.append(product)
print('CatPage.productToTest**', productToTest)
return productToTest
ProdPage.py
class ProdPage:
def __init__(self, driver, productList):
self.driver = driver
self.productList = productList
def signUp(self):
lenPL = len(self.productList)
counter = 0
for productCard in self.productList:
productCard.click()
self.driver.find_element(By.XPATH, "//input[#id='email-notification']").send_keys("test#test.com")
self.driver.find_element(By.XPATH, "//input[#value='Powiadom o dostępności']").click()
self.driver.execute_script("window.history.go(-1)")
# counter =+1
# if counter == lenPL:
# break
time.sleep(2)
return True
get info about selenium - how to back and select another element.
Here is my another way to back and open another product, but still doesn't work
def test_e2e(self, setup):
#przeskrolowanie w celu załadowania lisitngu
self.driver.execute_script("window.scrollTo(0,document.body.scrollHeight)")
#przejście do kategorii
category = CatPage(self.driver)
productsToTestList = category.getAllGroupedToNotifi()
productCard = ProdPage(self.driver,productsToTestList)
counter = 0
while counter < 5:
productsToTestList[counter].click()
self.driver.find_element(By.XPATH, "//input[#id='email-notification']").send_keys("test#seart.pl")
self.driver.find_element(By.XPATH, "//input[#value='Powiadom o dostępności']").click()
counter+=1
self.driver.execute_script("window.history.go(-1)")
time.sleep(5)
# productCard.signUp()
I'm am currently using selenium to take the product information from Sneider electric and this is currently the error I am receiving:
selenium.common.exceptions.NoSuchElementException: Message:
no such element: Unable to locate element:
{"method":"xpath","selector":"/html/body/div[2]/main/div[5]/ul/li/div/div/div/div/div/ul/li[1]/div/div/div[2]/div[2]/section/div/product-cards-wrapper//div/ul/li[1]/product-card/article/div/div[1]/product-card-main-info//div/pes-router-link[2]/a/h3"}
Currently, the website I am trying to pull this information from is this URL:
https://www.se.com/us/en/product-range/63426-powerlogic-accusine-pcs%2B/?N=4176697776&No=0&Nrpp=12
The Xpath file is for the description of their products which according to my inspection and findings is this:
/html/body/div[2]/main/div[5]/ul/li/div/div/div/div/div/ul/li[1]/div/div/div[2]/div[2]/section/div/product-cards-wrapper//div/ul/li[1]/product-card//article/div/div[1]/product-card-main-info//div/pes-router-link[2]/a/h3
Any ideas??
Current Code:
def page_function():
driver.get('https://www.se.com/us/en/product-range/63426-powerlogic-accusine-pcs%2B/?N=4176697776&No=12&Nrpp=12')
driver.maximize_window()
# gets the amount of items in the search bar
print("Number of products:", 69)
# for loop to read the product name and descriptions
# product = driver.find_element(By.CSS_SELECTOR, ".search-item")
# product = product.text
# print(product)
pr = "]/product-card//article/div/div[2]/div[1]/pes-product-price/p/span[1]"
nam = "]/product-card//article/div/div[1]/product-card-main-info//div/pes-router-link[1]/a"
des = "]/product-card//article/div/div[1]/product-card-main-info//div/pes-router-link[2]/a/h3"
# des_path = "#search-items > .search-item .details > a > .row.pt-5.pb-sm-5 > .multilines-3.text-truncate-multilines.xs-single-col-8.col-12 > .font-weight-bold.text-dark"
follow_loop = range(1, 70)
for x in follow_loop:
y = x
if (x > 61):
y = x - 60
elif (x > 49):
y = x - 48
elif (x > 37):
y = x - 36
elif (x > 25):
y = x - 24
elif(x > 13):
y = x - 12
else:
print("")
if ( ((x % 13) == 0) ):
driver.delete_all_cookies()
next_arrow = driver.find_element(By.CLASS_NAME, "page-links__arrow page-links__arrow--next js-page-link js-page-link-next")
driver.execute_script("arguments[0].click();", next_arrow)
xpath = "/html/body/div[2]/main/div[5]/ul/li/div/div/div/div/div/ul/li[1]/div/div/div[2]/div[2]/section/div/product-cards-wrapper//div/ul/li["
xpath += str(y)
xpath += des
driver.implicitly_wait(5)
description.append(driver.find_element(By.XPATH, xpath))
xpath2 = xpath.replace(des, '')
xpath2 += pr
unit_price.append(driver.find_element(By.XPATH, xpath2).text)
xpath3 = xpath2.replace(pr, '')
xpath3 += nam
name.append(driver.find_element(By.XPATH, xpath3).text)
The product description is within a #shadow-root (open)
Solution
Tto extract the desired text you need to use shadowRoot.querySelector() and you can use the following Locator Strategy:
driver.get("https://www.se.com/us/en/product-range/63426-powerlogic-accusine-pcs%2B/?N=4176697776&No=0&Nrpp=12")
time.sleep(5)
description = driver.execute_script('''return document.querySelector("product-cards-wrapper.hydrated").shadowRoot.querySelector("product-card.hydrated").shadowRoot.querySelector("product-card-main-info.hydrated").shadowRoot.querySelector("pes-router-link.description.hydrated a > h3")''')
print(description.text)
Console Output:
Active harmonic filter - 60 A 380..480 V AC - IP00 enclosure
References
You can find a couple of relevant detailed discussions in:
How to locate the First name field within shadow-root (open) within the website https://www.virustotal.com using Selenium and Python
How to get past a cookie agreement page using Python and Selenium?
Unable to locate the Sign In element within #shadow-root (open) using Selenium and Python
I have this code and I want to iterate through all the divs inside the div. The first time I write in python and the first time I write a bot, so the first Silenium library I come across, I write in it.
all_urls_div = browser.find_elements_by_class_name("DPiy6, qF0y9, Igw0E, IwRSH, eGOV_, _4EzTm")
j = 0
for i in range(len(all_urls_div)):
chat = all_urls_div.find_elements_by_class_name("DPiy6, qF0y9, Igw0E, IwRSH, eGOV_, _4EzTm")
chat[j].click()
lastMessageBlock = browser.find_element_by_xpath(
"/html/body/div[1]/section/div/div[2]/div/div/div[2]/div[2]/div/div[1]/div/div/div[2]/div[2]/div/div/div")
j = j + 1
act = ActionChains(browser)
act.move_to_element(lastMessageBlock).perform()
threeDots = browser.find_element_by_xpath(
"/html/body/div[1]/section/div/div[2]/div/div/div[2]/div[2]/div/div[1]/div/div/div[2]/div[2]/div/div/div/button[1]").click()
deleteLastMessage = browser.find_element_by_xpath(
"/html/body/div[1]/section/div/div[2]/div/div/div[2]/div[2]/div/div[1]/div/div/div[2]/div[2]/div/div/div/div[2]/div/div[2]/div[4]/button").click()
browser.find_element_by_xpath("/html/body/div[6]/div/div/div/div[2]/button[1]").click()
textArea = browser.find_element_by_xpath(
"/html/body/div[1]/section/div/div[2]/div/div/div[2]/div[2]/div/div[2]/div/div/div[2]/textarea")
textArea.clear()
browser.execute_script("arguments[0].value='" + message + "'", textArea)
textArea.send_keys(Keys.SPACE)
sendButton = browser.find_element_by_xpath(
"/html/body/div[1]/section/div/div[2]/div/div/div[2]/div[2]/div/div[2]/div/div/div[3]/button").click()
browser.execute_script("arguments[0].scrollTop = arguments[0].scrollTop + arguments[0].offsetHeight;",
general_path_block)
general_path_block)
I tried to write a loop, but after the first iteration an exception pops up, what should I do?
File "D:\PROJECTS\InstaBotByHushchadi\main.py", line 356, in resend_message_in_direct
chat = all_urls_div.find_elements_by_class_name("DPiy6, qF0y9, Igw0E, IwRSH, eGOV_, _4EzTm")
AttributeError: 'list' object has no attribute 'find_elements_by_class_name'
This exception
selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
is because you have performed
chat.click()
for the first element, and that may have been redirected you to some other page/link, causing other web elements to be stale in nature.
Fix:
Redefine the list, again
all_urls_div = general_path_chats_grid.find_elements_by_tag_name("div")
j = 0
for i in range(len(all_urls_div)):
chat = general_path_chats_grid.find_elements_by_tag_name("div")
chat[j].click()
lastMessageBlock = browser.find_element_by_xpath("/html/body/div[1]/section/div/div[2]/div/div/div[2]/div[2]/div/div[1]/div/div/div[2]/div[2]/div/div/div")
j = j + 1
act = ActionChains(browser)
act.move_to_element(lastMessageBlock).perform()
threeDots = browser.find_element_by_xpath(
"/html/body/div[1]/section/div/div[2]/div/div/div[2]/div[2]/div/div[1]/div/div/div[2]/div[2]/div/div/div/button[1]").click()
deleteLastMessage = browser.find_element_by_xpath(
"/html/body/div[1]/section/div/div[2]/div/div/div[2]/div[2]/div/div[1]/div/div/div[2]/div[2]/div/div/div/div[2]/div/div[2]/div[4]/button").click()
browser.find_element_by_xpath("/html/body/div[6]/div/div/div/div[2]/button[1]").click()
textArea = browser.find_element_by_xpath(
"/html/body/div[1]/section/div/div[2]/div/div/div[2]/div[2]/div/div[2]/div/div/div[2]/textarea")
textArea.clear()
browser.execute_script("arguments[0].value='" + message + "'", textArea)
textArea.send_keys(Keys.SPACE)
sendButton = browser.find_element_by_xpath(
"/html/body/div[1]/section/div/div[2]/div/div/div[2]/div[2]/div/div[2]/div/div/div[3]/button").click()
browser.execute_script("arguments[0].scrollTop = arguments[0].scrollTop + arguments[0].offsetHeight;",
general_path_block)
I want to scrape Home team and Away team from this page https://www.flashscore.com/match/hY5c1Bhh/#match-summary/match-summary
# Get HomeTeam
_ht = driver.find_element_by_xpath('//*[contains(#class, "home")]')
ht = _ht.find_element_by_xpath('//*[contains(#class, "participantName")]')
_homeName = ht.text
# Get AwayTeam
_at = driver.find_element_by_xpath('//*[contains(#class, "away")]')
at = _at.find_element_by_xpath('//*[contains(#class, "participantName")]')
_awayName = at.text
Output
Longford
Longford
try to store both of them in a list like this :
teams = driver.find_elements(By.CSS_SELECTOR, "div[class^='participantName'] a")
print("Home team : ", teams[0].text)
print("Away team : ", teams[1].text)
You are missing the . when trying to locate element inside other element.
So your code should be
# Get HomeTeam
_ht = driver.find_element_by_xpath('//*[contains(#class, "home")]')
ht = _ht.find_element_by_xpath('.//*[contains(#class, "participantName")]')
_homeName = ht.text
# Get AwayTeam
_at = driver.find_element_by_xpath('//*[contains(#class, "away")]')
at = _at.find_element_by_xpath('.//*[contains(#class, "participantName")]')
_awayName = at.text
How to select specific HTML Table, TH using python pandas, for example, from the table the TAG 2.4.33 needs to pick using python panda code return by parsing the HTML page
root#1ec99b8b97af:/opt# python lookuptag.py
Id Tag Created Layers Size Delete
0 bb84b573f76 2.4.33 2 years ago 22 179.6 MB Delete
1 bb84b573f76 2.4.33-t2 2 years ago 22 179.6 MB Delete
2 5c97c0e3531 v8-2.4.33 1 year ago 22 180.7 MB Delete
Here is my Python panda code, I can print the HTML, using the code
import requests
import pandas as pd
url = 'http://docker-registry:8080/repo/tags/httpd'
html = requests.get(url).content
df_list = pd.read_html(html, header =0, flavor = 'bs4')
df = df_list[-1]
print(df)
def FetchTable(context,tablexpath):
url = 'https://www.espncricinfo.com/table/series/8048/season/2020/indian-premier-league'
tables = pd.read_html(url)
table = tables[0].applymap(str)
return table
def LookupValueInColumnTwoKeys(context, source_table, reference_column_1, reference_value_1, reference_column_2, reference_value_2, lookup_column):
lookup_column = lookup_column.replace(' ', '')
reference_value_2 = reference_value_2.replace(' ', '')
reference_value_1 = reference_value_1.replace(' ', '')
referenceindex=0
referenceindex1=0
referenceindexfound=False
referenceindexfound1=False
lookupcolumnindex =0
rowindex=0
rowindexfound=False
lookupcolumnindexfound=False
for headers in source_table.columns:
if referenceindexfound == False:
referenceindex=referenceindex+1
if referenceindexfound1 == False:
referenceindex1=referenceindex1+1
if lookupcolumnindexfound == False:
lookupcolumnindex=lookupcolumnindex+1
if headers == reference_column_1 :
referenceindexfound = True
if headers == reference_column_2 :
referenceindexfound1 = True
if headers == lookup_column:
lookupcolumnindexfound = True
if referenceindexfound == True & lookupcolumnindexfound == True & referenceindexfound1 == True:
break
for tablerow in source_table.values:
print(tablerow)
if rowindexfound == False:
print(tablerow[referenceindex-1])
print(tablerow[referenceindex1-1])
if tablerow[referenceindex-1].find(reference_value_1)!= -1 and tablerow[referenceindex1-1].find(reference_value_2)!= -1 :
rowindexfound = True
#rowindex=rowindex+1
else:
rowindex=rowindex+1
else:
break
print("source table"+source_table.values[rowindex][lookupcolumnindex-1])
return source_table.values[rowindex][lookupcolumnindex-1]
Another files
from behave import *
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
from readTableDataFromDB import readTableDataFromDB
from pandacode import WebTableValidationHelper as pandacode
from selenium.webdriver.chrome.options import Options
context.driver.get("https://www.espncricinfo.com/table/series/8048/season/2020/indian-premier-league")
matrix = pandacode.FetchTable(context,"//*[#class='table table-sm standings-widget-table text-center mb-0 border-bottom']")
3ismatrixequal = pandacode.VerifyTable(context,matrix,matrix)
#print(ismatrixequal)
lookupvalue = pandacode.LookupValueFromColumnSingleKey(context,matrix,"TEAM", "Delhi Capitals", "PT")
print(lookupvalue)
another code:
def LookupValueFromColumnSingleKey1(context, source_table,reference_column_1, rowName, columnName):
referenceindex=0
referenceindexfound=False
columnindex =0
rowindex=0
rowindexfound=False
columnindexfound=False
for headers in source_table.columns:
if referenceindexfound == False:
referenceindex= referenceindex+1
if columnindexfound == False:
columnindex= columnindex+1
if headers == reference_column_1 :
referenceindexfound = True
if headers == columnName:
columnindexfound = True
if referenceindexfound == True & columnindexfound == True:
break
for tablerow in source_table.values:
#print(tablerow)
if rowindexfound == False:
rowindex=rowindex+1
for tupledata in tablerow:
#if tupledata.find(rowName)!= -1: c
if tupledata.lower() == rowName.lower():
print(tupledata)
rowindexfound = True
#print("source table"+source_table.values[rowindex-1][columnindex-1])
#print(source_table[columnindex][rowindex])
return source_table.values[rowindex-1][columnindex-1]