Python Selenium List Comprehension not Iterating - python

When I run this, everything works up until the point right at the bottom of the code where I use list comprehension to iterate through a list of WebElements. When I print the variable stonk_data, I get a list of the same length as all_stonks, but every element is a repeat of the first index of all_stonks.
Can anyone tell why this is happening?
from selenium.webdriver import Firefox
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
investing_url = 'https://www.investing.com/equities/united-states'
def get_driver():
ffox_options = Options()
#ffox_options.add_argument('--headless')
s = Service('/home/l/geckodriver')
driver = Firefox(service=s, options=ffox_options)
return driver
def get_stonks(driver):
driver.get(investing_url)
stonks_filter = driver.find_element(By.ID, 'stocksFilter')
return stonks_filter
def select_all_us_stonks(driver):
stonks_page_filter = get_stonks(driver)
action.move_to_element(stonks_page_filter).perform()
action.click(on_element=stonks_page_filter).perform()
action.send_keys_to_element(stonks_page_filter, Keys.UP).perform()
action.send_keys_to_element(stonks_page_filter, Keys.ENTER).perform()
time.sleep(10)
def get_all_stonks(driver):
#stonks_table_container = driver.find_element(By.ID, 'marketInnerContent')
stonks_table_selector = driver.find_elements(By.ID, 'marketInnerContent')
stonks_table = driver.find_elements(By.TAG_NAME, 'table')
stonks_table_body = driver.find_elements(By.TAG_NAME, 'tbody')
stonks_table_rows = driver.find_elements(By.XPATH, '//table/tbody/tr')
stonks_table_rows_data = driver.find_elements(By.XPATH, '//tr/td')
stonks_table_rows_namelink = driver.find_elements(By.XPATH, '//tr/td[2]')
return stonks_table_rows_namelink
def pull_all_stonks(stonk):
stonks_table_name_and_link = stonk.find_element(By.XPATH, '//tr/td[2]/a')
stonk_name = stonks_table_name_and_link.text
stonk_link = stonks_table_name_and_link.get_attribute('href')
return {
'stonk_name': stonk_name,
'stonk_link': stonk_link
}
if __name__ == '__main__':
driver = get_driver()
action = ActionChains(driver)
select_all_us_stonks(driver)
all_stonks = get_all_stonks(driver)
print(f'Pulling {len(all_stonks)} stonks')
print(all_stonks)
#the list comprehension in question
stonk_data = [pull_all_stonks(stonk) for stonk in all_stonks]
print(stonk_data)

instead of trying to see what went wrong with your code, I just modified both functions pull_all_stonks and get_all_stonks.
def pull_all_stonks(stonk):
stonk_name, stonk_link = (this_stonk:=stonk.find_elements_by_tag_name("td")[1]).text, this_stonk.find_element_by_tag_name('a').get_attribute("href")
def get_all_stonks(driver):
stonks = []
for stonk in driver.find_element_by_id("cross_rate_markets_stocks_1").find_element_by_tag_name('tbody').find_elements_by_tag_name("tr"):
stonks.append(stonk)
return stonks
which now returns
{'stonk_name': 'Ford Motor', 'stonk_link': 'https://www.investing.com/equities/ford-motor-co'}
{'stonk_name': 'Apple', 'stonk_link': 'https://www.investing.com/equities/apple-computer-inc'}
{'stonk_name': 'Ascent Solar', 'stonk_link': 'https://www.investing.com/equities/ascent-solar-tech'}
{'stonk_name': 'Bank of America', 'stonk_link': 'https://www.investing.com/equities/bank-of-america'}
{'stonk_name': 'AT&T', 'stonk_link': 'https://www.investing.com/equities/at-t'}
{'stonk_name': 'AMD', 'stonk_link': 'https://www.investing.com/equities/adv-micro-device'}
{'stonk_name': 'NVIDIA', 'stonk_link': 'https://www.investing.com/equities/nvidia-corp'}
{'stonk_name': 'Marvell', 'stonk_link': 'https://www.investing.com/equities/marvell-technology-group-ltd'}
{'stonk_name': 'Petroleo Brasileiro Petrobras ADR', 'stonk_link': 'https://www.investing.com/equities/petroleo-bras'}
#etc...

Related

How to back and select another product in product list [selenium python]

I have problem with going to previous site.
lenPL = len(self.productList)
counter = 0
here I iterate on all product meet conditions
for productCard in self.productList:
productCard.click()`
self.driver.find_element(By.XPATH, "//input[#id='email-notification']").send_keys("test#test.com")
self.driver.find_element(By.XPATH, "//input[#value='Powiadom o dostępności']").click()
self.driver.execute_script("window.history.go(-1)")
I previous line I want to back, and select another product
CatPage.py
class CatPage:
def __init__(self, driver):
self.driver = driver
allProducts = (By.XPATH, "//div[#class='category-products'] // li[contains(#class,'item')]/div/div")
testprod = (By.XPATH, "//img[#alt='RustykalneUchwyty.pl']")
def getAllGroupedToNotifi(self):
productToTest = []
for product in (self.driver.find_elements(*CatPage.allProducts)):
if product.find_element(By.XPATH, "//div[#class='description-wrapper']/div[#class='btn-border btn-border-product-list']"):
productToTest.append(product)
print('CatPage.productToTest**', productToTest)
return productToTest
ProdPage.py
class ProdPage:
def __init__(self, driver, productList):
self.driver = driver
self.productList = productList
def signUp(self):
lenPL = len(self.productList)
counter = 0
for productCard in self.productList:
productCard.click()
self.driver.find_element(By.XPATH, "//input[#id='email-notification']").send_keys("test#test.com")
self.driver.find_element(By.XPATH, "//input[#value='Powiadom o dostępności']").click()
self.driver.execute_script("window.history.go(-1)")
# counter =+1
# if counter == lenPL:
# break
time.sleep(2)
return True
get info about selenium - how to back and select another element.
Here is my another way to back and open another product, but still doesn't work
def test_e2e(self, setup):
#przeskrolowanie w celu załadowania lisitngu
self.driver.execute_script("window.scrollTo(0,document.body.scrollHeight)")
#przejście do kategorii
category = CatPage(self.driver)
productsToTestList = category.getAllGroupedToNotifi()
productCard = ProdPage(self.driver,productsToTestList)
counter = 0
while counter < 5:
productsToTestList[counter].click()
self.driver.find_element(By.XPATH, "//input[#id='email-notification']").send_keys("test#seart.pl")
self.driver.find_element(By.XPATH, "//input[#value='Powiadom o dostępności']").click()
counter+=1
self.driver.execute_script("window.history.go(-1)")
time.sleep(5)
# productCard.signUp()

Selenium ElementClickInterceptedException when headless = True

I am running a selenium code on the website DNCA to scrap for some of the document links. I am trying to get links of each value in the drop down for each section shown in this page. My code is working fine, but when I run the same code with option headless = True, I am getting the following error:
ElementClickInterceptedException: element click intercepted: Element <li data-original-index="0">...</li> is not clickable at point (226, 250). Other element would receive the click: <div class="col-md-12">...</div>
(Session info: headless chrome=104.0.5112.81)
Code:
def get_active_row(active_tab, fund_id):
active_row = active_tab.find_elements(By.XPATH, ".//tr[#style='' or #style='display: table-row;'][#fund-id = '{}']".format(fund_id))
try:
assert len(active_row) == 1
active_row = active_row[0]
return active_row
except AssertionError as asserr:
print(asserr, ' -- More than one active row for the fund id: ', fund_id)
sys.exit(1)
except Exception as err:
print(err, ' -- fund id:', fund_id)
sys.exit(1)
def scrap(driver):
tab_list = driver.find_element(By.XPATH, "//ul[contains(#role, 'tablist')]")
tab_list_names = tab_list.find_elements(By.XPATH, './/li')
data_list = []
for loc, tab_name in enumerate(tab_list_names):
if loc < 20:
tab_name.click()
html = driver.page_source
soup = BeautifulSoup(html)
bs_active_tab = soup.find('div', {'class': 'tab-pane table-datas active'})
bs_headers = bs_active_tab.find('thead')
headers = [i.text for i in bs_headers.find_all('td')]
active_tab = driver.find_element(By.XPATH, "//div[contains(#class, 'tab-pane table-datas active')]")
unique_fund_ids = [i_fund.get_attribute('fund-id') for i_fund in active_tab.find_elements(By.XPATH, ".//tr[#style]") if i_fund.get_attribute('fund-id') != '-']
lookup = set()
unique_fund_ids = [x for x in unique_fund_ids if x not in lookup and lookup.add(x) is None]
for fund_id in unique_fund_ids: #Iterate over each fund
active_row = get_active_row(active_tab, fund_id)
active_row.find_element(By.XPATH, './/button').click()
isin_list = [i.text for i in active_row.find_elements(By.XPATH, './/li')]
for pos, isin_val in enumerate(isin_list):
isin_selected = active_row.find_elements(By.XPATH, './/li')[pos]
isin_selected.click()
active_row = get_active_row(active_tab, fund_id)
fund_name = ''
for pos_inner, td in enumerate(active_row.find_elements(By.XPATH, ".//td")):
a_tag = td.find_elements(By.XPATH, ".//a")
if len(a_tag) == 1:
a_tag = a_tag[0]
if pos_inner == 0:
fund_name = a_tag.text
link = a_tag.get_attribute('href')
data_list.append([tab_name.text, fund_name, isin_val, headers[pos_inner], link])
else:
data_list.append([tab_name.text, fund_name, isin_val, headers[pos_inner], ''])
active_row = get_active_row(active_tab, fund_id)
active_row.find_element(By.XPATH, './/button').click()
isin_selected_to_close = active_row.find_elements(By.XPATH, './/li')[0]
isin_selected_to_close.click()
tlg_tr_tab = active_tab.find_element(By.XPATH, ".//tr[#fund-id='-']")
for tlg_pos_inner, tlg_td in enumerate(tlg_tr_tab.find_elements(By.XPATH, ".//td")):
tlg_a_tag = tlg_td.find_elements(By.XPATH, ".//a")
if len(tlg_a_tag) == 1:
tlg_a_tag = tlg_a_tag[0]
tlg_link = tlg_a_tag.get_attribute('href') #Get document link
data_list.append([tab_name.text, 'Toute la gamme', '', headers[tlg_pos_inner], tlg_link])
else:
data_list.append([tab_name.text, 'Toute la gamme', '', headers[tlg_pos_inner], ''])
dataset_links = pd.DataFrame(data_list, columns = ['Tab', 'Fund Name', 'ISIN', 'Type', 'Link'])
driver.quit()
Can someone please explain me why is it working fine with headless = False but not with with headless = True.
In headless mode the default screen size is very small, significantly less than screen size in regular mode.
So, to overcome this problem you need to set the screen size.
It can be done in the following ways:
options = Options()
options.add_argument("--headless")
options.add_argument("window-size=1920, 1080")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)
Or just
driver.set_window_size(1920, 1080)
Both approaches should work.
I prefer the first way :)

selenium multithreading python

I wrote a selenium script to check the case statuses of uscis cases an I want to speed it up as I am trying to check more than 500 cases every time.
How to use it with multithreading of concurrent futures library in python?
import re
import json
from datetime import date
from unittest import case
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
from multiprocessing import Pool
import concurrent.futures
options = Options()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
# browser = webdriver.Chrome('PATH', options=options)
cases = []
def getStatus(CN):
browser = webdriver.Chrome('PATH', options=options)
browser.get("https://egov.uscis.gov/casestatus/landing.do")
serachField = browser.find_element_by_xpath('/html/body/div[2]/form/div/div[1]/div/div[1]/fieldset/div[1]/div[4]/input')
serachField.click()
serachField.send_keys(CN)
searchButton = browser.find_element_by_xpath('/html/body/div[2]/form/div/div[1]/div/div[1]/fieldset/div[2]/div[2]/input')
searchButton.click()
try:
outputFieldHeading = browser.find_element_by_xpath('/html/body/div[2]/form/div/div[1]/div/div/div[2]/div[3]/h1')
outputField = browser.find_element_by_xpath('/html/body/div[2]/form/div/div[1]/div/div/div[2]/div[3]/p')
dateMatch = re.search(r'\w+\s\d+,\s\d+', outputField.text)
try:
formMatch = re.search(r'([I][-]\d+,^)|([I][-]\d+\w)', outputField.text)
formNumber = formMatch.group()
except:
formNumber = "Form Unknown"
cases.append({'caseNumber': CN,'currentDate': today, 'Date': dateMatch.group(), 'FormNumber': formNumber, 'Status': outputFieldHeading.text, 'Description': outputField.text})
print(f"{CN} : {outputFieldHeading.text} : {dateMatch.group()} : {formNumber}")
return f"{CN} : {outputFieldHeading.text} : {dateMatch.group()} : {formNumber}"
except NoSuchElementException:
cases.append({'caseNumber': CN,'currentDate': today, 'Date': "Unknown", 'FormNumber': "Unknown Form", 'Status': "Not Found", 'Description': ""})
print(f"{CN} : Not Found")
return f"{CN} : Not Found"
pass
if __name__ == '__main__':
casenumbers = ["EAC2134250100", "EAC2134250101", "EAC2134250102", "EAC2134250103", "EAC2134250104", "EAC2134250105", "EAC2134250106", "EAC2134250107", "EAC2134250108", "EAC2134250109", "EAC2134250110", "EAC2134250111", "EAC2134250112", "EAC2134250113", "EAC2134250114", "EAC2134250115", "EAC2134250116", "EAC2134250117", "EAC2134250118", "EAC2134250119", "EAC2134250120", "EAC2134250121", "EAC2134250122", "EAC2134250123", "EAC2134250124", "EAC2134250125", "EAC2134250126", "EAC2134250127", "EAC2134250128", "EAC2134250129", "EAC2134250130", "EAC2134250131", "EAC2134250132", "EAC2134250133", "EAC2134250134", "EAC2134250135", "EAC2134250136", "EAC2134250137", "EAC2134250138", "EAC2134250139", "EAC2134250140", "EAC2134250141", "EAC2134250142", "EAC2134250143", "EAC2134250144", "EAC2134250145", "EAC2134250146", "EAC2134250147", "EAC2134250148", "EAC2134250149", "EAC2134250150", "EAC2134250151", "EAC2134250152", "EAC2134250153", "EAC2134250154", "EAC2134250155", "EAC2134250156", "EAC2134250157", "EAC2134250158", "EAC2134250159", "EAC2134250160", "EAC2134250161", "EAC2134250162", "EAC2134250163", "EAC2134250164", "EAC2134250165", "EAC2134250166", "EAC2134250167", "EAC2134250168", "EAC2134250169", "EAC2134250170", "EAC2134250171", "EAC2134250172", "EAC2134250173", "EAC2134250174", "EAC2134250175", "EAC2134250176", "EAC2134250177", "EAC2134250178", "EAC2134250179", "EAC2134250180", "EAC2134250181", "EAC2134250182", "EAC2134250183", "EAC2134250184", "EAC2134250185", "EAC2134250186", "EAC2134250187", "EAC2134250188", "EAC2134250189", "EAC2134250190", "EAC2134250191", "EAC2134250192", "EAC2134250193", "EAC2134250194", "EAC2134250195", "EAC2134250196", "EAC2134250197", "EAC2134250198", "EAC2134250199", "EAC2134250200", "EAC2134250201", "EAC2134250202", "EAC2134250203", "EAC2134250204", "EAC2134250205", "EAC2134250206", "EAC2134250207", "EAC2134250208", "EAC2134250209", "EAC2134250210", "EAC2134250211", "EAC2134250212", "EAC2134250213", "EAC2134250214", "EAC2134250215", "EAC2134250216", "EAC2134250217", "EAC2134250218", "EAC2134250219", "EAC2134250220", "EAC2134250221", "EAC2134250222", "EAC2134250223", "EAC2134250224", "EAC2134250225", "EAC2134250226", "EAC2134250227", "EAC2134250228", "EAC2134250229", "EAC2134250230", "EAC2134250231", "EAC2134250232", "EAC2134250233", "EAC2134250234", "EAC2134250235", "EAC2134250236", "EAC2134250237", "EAC2134250238", "EAC2134250239", "EAC2134250240", "EAC2134250241", "EAC2134250242", "EAC2134250243", "EAC2134250244", "EAC2134250245", "EAC2134250246", "EAC2134250247", "EAC2134250248", "EAC2134250249", "EAC2134250250", "EAC2134250251", "EAC2134250252", "EAC2134250253", "EAC2134250254", "EAC2134250255", "EAC2134250256", "EAC2134250257", "EAC2134250258", "EAC2134250259", "EAC2134250260", "EAC2134250261", "EAC2134250262", "EAC2134250263", "EAC2134250264", "EAC2134250265", "EAC2134250266", "EAC2134250267", "EAC2134250268", "EAC2134250269", "EAC2134250270", "EAC2134250271", "EAC2134250272", "EAC2134250273", "EAC2134250274", "EAC2134250275", "EAC2134250276", "EAC2134250277", "EAC2134250278", "EAC2134250279", "EAC2134250280", "EAC2134250281", "EAC2134250282", "EAC2134250283", "EAC2134250284", "EAC2134250285", "EAC2134250286", "EAC2134250287", "EAC2134250288", "EAC2134250289", "EAC2134250290", "EAC2134250291", "EAC2134250292", "EAC2134250293", "EAC2134250294", "EAC2134250295", "EAC2134250296", "EAC2134250297", "EAC2134250298", "EAC2134250299", "EAC2134250300", "EAC2134250301", "EAC2134250302", "EAC2134250303", "EAC2134250304", "EAC2134250305", "EAC2134250306", "EAC2134250307", "EAC2134250308", "EAC2134250309", "EAC2134250310", "EAC2134250311", "EAC2134250312", "EAC2134250313", "EAC2134250314", "EAC2134250315", "EAC2134250316", "EAC2134250317", "EAC2134250318", "EAC2134250319", "EAC2134250320", "EAC2134250321", "EAC2134250322", "EAC2134250323", "EAC2134250324", "EAC2134250325", "EAC2134250326", "EAC2134250327", "EAC2134250328", "EAC2134250329", "EAC2134250330", "EAC2134250331", "EAC2134250332", "EAC2134250333", "EAC2134250334", "EAC2134250335", "EAC2134250336", "EAC2134250337", "EAC2134250338", "EAC2134250339", "EAC2134250340", "EAC2134250341", "EAC2134250342", "EAC2134250343", "EAC2134250344", "EAC2134250345", "EAC2134250346", "EAC2134250347", "EAC2134250348", "EAC2134250349", "EAC2134250350", "EAC2134250351", "EAC2134250352", "EAC2134250353", "EAC2134250354", "EAC2134250355", "EAC2134250356", "EAC2134250357", "EAC2134250358", "EAC2134250359", "EAC2134250360", "EAC2134250361", "EAC2134250362", "EAC2134250363", "EAC2134250364", "EAC2134250365", "EAC2134250366", "EAC2134250367", "EAC2134250368", "EAC2134250369", "EAC2134250370", "EAC2134250371", "EAC2134250372", "EAC2134250373", "EAC2134250374", "EAC2134250375", "EAC2134250376", "EAC2134250377", "EAC2134250378", "EAC2134250379", "EAC2134250380", "EAC2134250381", "EAC2134250382", "EAC2134250383", "EAC2134250384", "EAC2134250385", "EAC2134250386", "EAC2134250387", "EAC2134250388", "EAC2134250389", "EAC2134250390", "EAC2134250391", "EAC2134250392", "EAC2134250393", "EAC2134250394", "EAC2134250395", "EAC2134250396", "EAC2134250397", "EAC2134250398", "EAC2134250399", "EAC2134250400", "EAC2134250401", "EAC2134250402", "EAC2134250403", "EAC2134250404", "EAC2134250405", "EAC2134250406", "EAC2134250407", "EAC2134250408", "EAC2134250409", "EAC2134250410", "EAC2134250411", "EAC2134250412", "EAC2134250413", "EAC2134250414", "EAC2134250415", "EAC2134250416", "EAC2134250417", "EAC2134250418", "EAC2134250419"]
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
results = [executor.submit(getStatus, x) for x in casenumbers]
print(results)
This is not working and i get nothing printed in the terminal. How to improve this code, Thanks.

Web Scraping ESPN Data With Selenium

I'm trying to scrape some data off ESPN and run some calculations off the scraped data. Ideally, I will like to iterate through a dataframe, grab the players name with Selenium, send the player's name into the search box and tell Selenium to click the player's name. I was able to do this successfully with one player. I'm not quite sure how to iterate through all the players in my data frame.
The second part of the code is where I'm struggling. For some reason I am not able to get the data. Selenium isn't able to find any of the elements. I don't think I'm doing it properly. If I am able to scrape the required data, I will like to plug them into a calculation and append the calculated projected points into my dataframe, dfNBA.
Can someone please help me with my code? and point me in the right direction. I'm trying to be more efficient writing python codes but right now I'm stuck
Thanks
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
#sample data
pp = {'Player Name':['Donovan Mitchell', 'Kawhi Leonard', 'Rudy Gobert', 'Paul George','Reggie Jackson', 'Jordan Clarkson'],
'Fantasy Score': [46.0, 50.0, 40.0, 44.0, 25.0, 26.5]}
#Creating a dataframe from dictionary
dfNBA = pd.DataFrame(pp)
#Scraping ESPN
PATH = "C:\Program Files (x86)\chromedriver.exe"
driver = webdriver.Chrome(PATH)
driver.get("https://www.espn.com/")
#Clicking the search button
driver.find_element_by_xpath("//a[#id='global-search-trigger']").click()
#sending data to the search button
driver.find_element_by_xpath("//input[#placeholder='Search Sports, Teams or Players...']").send_keys(dfNBA.iloc[0,:].values[0])
WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".search_results__details")))
playerPage = driver.find_element_by_css_selector(".search_results__details").click()
#Scraping data from last 10 games
points = driver.find_element_by_xpath(".//div[#class='Table__TD']")[13]
#rebs = driver.find_element_by_xpath("//*[#id='fittPageContainer'']/div[2]/div[5]/div/div[1]/div[1]/section/div/div[3]/div/div/div[2]/table/tbody/tr[1]/td[7]")
#asts = driver.find_element_by_xpath("//*[#id='fittPageContainer']/div[2]/div[5]/div/div[1]/div[1]/section/div/div[3]/div/div/div[2]/table/tbody/tr[1]/td[8]")
#blks = driver.find_element_by_xpath("//*[#id='fittPageContainer']/div[2]/div[5]/div/div[1]/div[1]/section/div/div[3]/div/div/div[2]/table/tbody/tr[1]/td[9]")
#stls = driver.find_element_by_xpath("//*[#id='fittPageContainer']/div[2]/div[5]/div/div[1]/div[1]/section/div/div[3]/div/div/div[2]/table/tbody/tr[1]/td[10]")
#tnvrs = driver.find_element_by_xpath("//*[#id='fittPageContainer']/div[2]/div[5]/div/div[1]/div[1]/section/div/div[3]/div/div/div[2]/table/tbody/tr[1]/td[12]")
#projectedPoints = points+(rebs*1.2)+(asts*1.5)+(blks*3)+(stls*3)-(tnvrs*1)
print(points)
I think Selenium is a bit overkill when there's a viable api option.
Give this a try. Note, that in the overview, the L10 games refers to last 10 regular season games. My code here does the last 10 games which include playoffs. If you only want regular season, let me know, and I can adjust it. I also added a variable here so if you wanted for example, just last 5 games, or last 15 games, etc. you could do that too.
import requests
import pandas as pd
previous_games = 10
pp = {'Player Name':['Donovan Mitchell', 'Kawhi Leonard', 'Rudy Gobert', 'Paul George','Reggie Jackson', 'Jordan Clarkson'],
'Fantasy Score': [46.0, 50.0, 40.0, 44.0, 25.0, 26.5]}
#Creating a dataframe from dictionary
dfNBA = pd.DataFrame(pp)
search_api = 'https://site.api.espn.com/apis/search/v2'
for idx, row in dfNBA.iterrows():
playerName = row['Player Name']
payload = {'query': '%s' %playerName}
results = requests.get(search_api, params=payload).json()['results']
for each in results:
if each['type'] == 'player':
playerID = each['contents'][0]['uid'].split('a:')[-1]
break
player_api = 'https://site.web.api.espn.com/apis/common/v3/sports/basketball/nba/athletes/%s/gamelog' %playerID
playload = {'season':'2021' }
jsonData_player = requests.get(player_api, params=payload).json()
#Scraping data from last x games
last_x_gameIDs = list(jsonData_player['events'].keys())
last_x_gameIDs.sort()
last_x_gameIDs = last_x_gameIDs[-1*previous_games:]
gamelog_dict = {}
seasonTypes = jsonData_player['seasonTypes']
for gameID in last_x_gameIDs:
for each in seasonTypes:
categories = each['categories']
for category in categories:
if category['type'] == 'total':
continue
events = category['events']
for event in events:
if gameID == event['eventId']:
gamelog_dict[gameID] = event['stats']
labels = jsonData_player['labels']
# Aggrigate totals
for k, v in gamelog_dict.items():
v = dict(zip(labels, v))
gamelog_dict[k] = v
stats = pd.DataFrame(gamelog_dict.values())
points = stats['PTS'].astype(float).sum() / previous_games
rebs = stats['REB'].astype(float).sum() / previous_games
asts = stats['AST'].astype(float).sum() / previous_games
blks = stats['BLK'].astype(float).sum() / previous_games
stls = stats['STL'].astype(float).sum() / previous_games
tnvrs = stats['TO'].astype(float).sum() /previous_games
projectedPoints = float(points)+(float(rebs)*1.2)+(float(asts)*1.5)+(float(blks)*3)+(float(stls)*3)-(float(tnvrs)*1)
print('%s: %.02f' %(playerName,projectedPoints))
Output:
Donovan Mitchell: 42.72
Kawhi Leonard: 52.25
Rudy Gobert: 38.47
Paul George: 44.18
Reggie Jackson: 24.21
Jordan Clarkson: 25.88
Here's some code to accomplish (I think) what you want. You need to wait for the table elements to appear, fix your xpath, and choose the right elements from the table array.
pp = {'Player Name':['Donovan Mitchell', 'Kawhi Leonard', 'Rudy Gobert', 'Paul George','Reggie Jackson', 'Jordan Clarkson'],
'Fantasy Score': [46.0, 50.0, 40.0, 44.0, 25.0, 26.5]}
#Creating a dataframe from dictionary
dfNBA = pd.DataFrame(pp)
#Scraping ESPN
PATH = "C:\Program Files (x86)\chromedriver.exe"
driver = webdriver.Chrome(PATH)
driver.get("https://www.espn.com/")
#Clicking the search button
driver.find_element_by_xpath("//a[#id='global-search-trigger']").click()
#sending data to the search button
driver.find_element_by_xpath("//input[#placeholder='Search Sports, Teams or Players...']").send_keys(dfNBA.iloc[0,:].values[0])
WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".search_results__details")))
playerPage = driver.find_element_by_css_selector(".search_results__details").click()
#Scraping data from last 10 games
WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "//td[#class='Table__TD']")))
points = driver.find_elements_by_xpath("//td[#class='Table__TD']")[12].text
rebs = driver.find_elements_by_xpath("//td[#class='Table__TD']")[6].text
asts = driver.find_elements_by_xpath("//td[#class='Table__TD']")[7].text
blks = driver.find_elements_by_xpath("//td[#class='Table__TD']")[8].text
stls = driver.find_elements_by_xpath("//td[#class='Table__TD']")[9].text
tnvrs = driver.find_elements_by_xpath("//td[#class='Table__TD']")[11].text
projectedPoints = float(points)+(float(rebs)*1.2)+(float(asts)*1.5)+(float(blks)*3)+(float(stls)*3)-(float(tnvrs)*1)
print(projectedPoints)

Panda read html select specific table values

How to select specific HTML Table, TH using python pandas, for example, from the table the TAG 2.4.33 needs to pick using python panda code return by parsing the HTML page
root#1ec99b8b97af:/opt# python lookuptag.py
Id Tag Created Layers Size Delete
0 bb84b573f76 2.4.33 2 years ago 22 179.6 MB Delete
1 bb84b573f76 2.4.33-t2 2 years ago 22 179.6 MB Delete
2 5c97c0e3531 v8-2.4.33 1 year ago 22 180.7 MB Delete
Here is my Python panda code, I can print the HTML, using the code
import requests
import pandas as pd
url = 'http://docker-registry:8080/repo/tags/httpd'
html = requests.get(url).content
df_list = pd.read_html(html, header =0, flavor = 'bs4')
df = df_list[-1]
print(df)
def FetchTable(context,tablexpath):
url = 'https://www.espncricinfo.com/table/series/8048/season/2020/indian-premier-league'
tables = pd.read_html(url)
table = tables[0].applymap(str)
return table
def LookupValueInColumnTwoKeys(context, source_table, reference_column_1, reference_value_1, reference_column_2, reference_value_2, lookup_column):
lookup_column = lookup_column.replace(' ', '')
reference_value_2 = reference_value_2.replace(' ', '')
reference_value_1 = reference_value_1.replace(' ', '')
referenceindex=0
referenceindex1=0
referenceindexfound=False
referenceindexfound1=False
lookupcolumnindex =0
rowindex=0
rowindexfound=False
lookupcolumnindexfound=False
for headers in source_table.columns:
if referenceindexfound == False:
referenceindex=referenceindex+1
if referenceindexfound1 == False:
referenceindex1=referenceindex1+1
if lookupcolumnindexfound == False:
lookupcolumnindex=lookupcolumnindex+1
if headers == reference_column_1 :
referenceindexfound = True
if headers == reference_column_2 :
referenceindexfound1 = True
if headers == lookup_column:
lookupcolumnindexfound = True
if referenceindexfound == True & lookupcolumnindexfound == True & referenceindexfound1 == True:
break
for tablerow in source_table.values:
print(tablerow)
if rowindexfound == False:
print(tablerow[referenceindex-1])
print(tablerow[referenceindex1-1])
if tablerow[referenceindex-1].find(reference_value_1)!= -1 and tablerow[referenceindex1-1].find(reference_value_2)!= -1 :
rowindexfound = True
#rowindex=rowindex+1
else:
rowindex=rowindex+1
else:
break
print("source table"+source_table.values[rowindex][lookupcolumnindex-1])
return source_table.values[rowindex][lookupcolumnindex-1]
Another files
from behave import *
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
from readTableDataFromDB import readTableDataFromDB
from pandacode import WebTableValidationHelper as pandacode
from selenium.webdriver.chrome.options import Options
context.driver.get("https://www.espncricinfo.com/table/series/8048/season/2020/indian-premier-league")
matrix = pandacode.FetchTable(context,"//*[#class='table table-sm standings-widget-table text-center mb-0 border-bottom']")
3ismatrixequal = pandacode.VerifyTable(context,matrix,matrix)
#print(ismatrixequal)
lookupvalue = pandacode.LookupValueFromColumnSingleKey(context,matrix,"TEAM", "Delhi Capitals", "PT")
print(lookupvalue)
another code:
def LookupValueFromColumnSingleKey1(context, source_table,reference_column_1, rowName, columnName):
referenceindex=0
referenceindexfound=False
columnindex =0
rowindex=0
rowindexfound=False
columnindexfound=False
for headers in source_table.columns:
if referenceindexfound == False:
referenceindex= referenceindex+1
if columnindexfound == False:
columnindex= columnindex+1
if headers == reference_column_1 :
referenceindexfound = True
if headers == columnName:
columnindexfound = True
if referenceindexfound == True & columnindexfound == True:
break
for tablerow in source_table.values:
#print(tablerow)
if rowindexfound == False:
rowindex=rowindex+1
for tupledata in tablerow:
#if tupledata.find(rowName)!= -1: c
if tupledata.lower() == rowName.lower():
print(tupledata)
rowindexfound = True
#print("source table"+source_table.values[rowindex-1][columnindex-1])
#print(source_table[columnindex][rowindex])
return source_table.values[rowindex-1][columnindex-1]

Categories

Resources