I'm unable to scrape the table from https://solanabeach.io/validators. For some reason, I can't access it using the following code snippet. Does anyone have an idea why I'm unable to scrape the table?
from bs4 import BeautifulSoup
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException
options = webdriver.ChromeOptions()
options.add_argument('headless')
options.add_argument("--enable-javascript")
options.add_argument('--no-sandbox')
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
driver.get(f"https://solanabeach.io/validators")
driver.implicitly_wait(10)
api = BeautifulSoup(driver.find_element_by_xpath("//*").get_attribute("outerHTML"), 'html.parser')
table = api.findAll('tbody')
print(table)
driver.quit()
You dont need to use BeautifulSoup here. You can simply use selenium methods.
import re
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
PATH = "chrome driver path"
driver = webdriver.Chrome(PATH)
url = 'https://solanabeach.io/validators'
driver.get(url)
driver.maximize_window()
WebDriverWait(driver, 20).until(
EC.visibility_of_element_located((By.XPATH, "//button[contains(#class,'ShowValidatorsButton')]")))
showbutton = driver.find_element(By.XPATH, "//button[contains(#class,'ShowValidatorsButton')]")
showbutton.click()
sleep(10)
WebDriverWait(driver, 20).until(
EC.visibility_of_element_located((By.XPATH, "//table[#class='table table-bordered maintable "
"table-striped-even']//tbody/tr")))
columnHeader = driver.find_element(By.XPATH, "//table[#class='table table-bordered maintable "
"table-striped-even']//thead")
print("---------------------------------------------------------------------------------------")
print(re.sub(r"\s+", '|', columnHeader.text.strip()))
print("---------------------------------------------------------------------------------------")
textInPage = driver.find_elements(By.XPATH, "//table[#class='table table-bordered maintable "
"table-striped-even']//tbody/tr")
for element in textInPage:
print(element.text)
print("---------------------------------------------------------------------------------------")
driver.quit()
Output:
---------------------------------------------------------------------------------------
#|VALIDATOR|STAKE|CUMULATIVE|STAKE|COMMISSION|LAST|VOTE
---------------------------------------------------------------------------------------
1
Chorus One
1.7.14
15,017,730(29319)
3.78 %
3.8 %
8 %
106,531,468
---------------------------------------------------------------------------------------
2
Everstake
1.8.2
14,657,551(110561)
3.69 %
7.5 %
7 %
106,531,468
---------------------------------------------------------------------------------------
3
Certus One
1.7.14
11,346,746(15086)
2.86 %
10.3 %
10 %
106,531,468
---------------------------------------------------------------------------------------
4
Staking Facilities
1.7.14
8,949,137(3657)
2.25 %
12.6 %
8 %
106,531,467
---------------------------------------------------------------------------------------
5
Bison Trails
1.7.15
7,487,862(2830)
1.89 %
14.5 %
8 %
106,531,468
---------------------------------------------------------------------------------------
6
DokiaCapital
1.7.14
7,051,193(2478)
1.78 %
16.2 %
5 %
106,531,469
---------------------------------------------------------------------------------------
7
Kraken
1.7.14
6,671,025(105)
1.68 %
17.9 %
100 %
106,531,452
---------------------------------------------------------------------------------------
8
EVw8...3DvU
1.7.15
6,600,000(4)
1.66 %
19.6 %
100 %
106,531,468
---------------------------------------------------------------------------------------
9
47e2...GueY
1.8.2
6,536,593(541)
1.65 %
21.2 %
8 %
106,531,468
---------------------------------------------------------------------------------------
10
4khY...eP5A
1.7.15
5,799,900(1)
1.46 %
22.7 %
100 %
106,531,468
---------------------------------------------------------------------------------------
11
0% Fee to 2022 💸 | melea.xyz One ◎
1.8.2
5,560,661(6157)
1.40 %
24.1 %
0 %
106,531,468
---------------------------------------------------------------------------------------
12
Shinobi Systems 🚀 stakeview.app
1.8.2
5,232,571(8695)
1.32 %
25.4 %
2 %
106,531,468
---------------------------------------------------------------------------------------
And so on.
Related
So I am trying to scrape this page:
https://www.tripadvisor.com/CheapFlightsHome
but when ever I try to click on the choosing the flight class element it just gives this error:
File "e:\code\Python\non machine learning projects\web scrabbing\Projects\flight-anlaysis\flight-anlaysis.py", line 128, in <module>
extra_info("Economy" , 2 , 0 , 3)
File "e:\code\Python\non machine learning projects\web scrabbing\Projects\flight-anlaysis\flight-anlaysis.py", line 79, in extra_info
drop_down_btn = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH , '//span[#class = "ui_icon caret-down open-close"]')))
File "C:\Users\user\anaconda3\envs\mix\lib\site-packages\selenium\webdriver\support\wait.py", line 95, in until
raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message:
the Python code:
def extra_info(wclass , noa , nos , noc):#noa : number of adults nos: number of seniors noc: number of children
# CLicking the main button
mbtn = driver.find_element(By.XPATH , '//span[#class = "summaryContainer target"]')
mbtn.click()
time.sleep(2)
mdiv = driver.find_element(By.XPATH , '//div[#class = "prw_rup prw_flights_cos_passenger_picker cosPassengerPopover"]')
time.sleep(2)
mmdiv = mdiv.find_element(By.XPATH , '//div[#class = "popoverContents"]')
wclassbtn = mmdiv.find_element(By.XPATH , '//div[#class = "picker-inner localizationCosStrings"]')
drop_down_btn = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH , '//span[#class = "ui_icon caret-down open-close"]')))
time.sleep(5)
drop_down_btn.click()
plane_grades = []
eco = driver.find_element(By.XPATH , '//#li[text() = "Economy"]')
peco = driver.find_element(By.XPATH , '//#li[text() = "Premium Economy"]')
bus = driver.find_element(By.XPATH , '//li[#text() = "Business Class"]')
fc = driver.find_element(By.XPATH , '//li[#text = "First Class"]')
plane_grades.append(eco , peco , bus , fc)
for plane , i in enumerate(plane_grades): # this for loop choses the class grade for the plane
if plane == wclass:
choosen_btn = plane_grades[i]
choosen_btn.click()
# checking now for the nos noa noc
adult_counter_div = driver.find_element(By.XPATH , '//div[#class = "adultCounter counter"]')
senior_counter_div = driver.find_element(By.XPATH , '//div[#class = "seniorCounter counter"]')
child_counter_div = driver.find_element(By.XPATH , '//div[#class = "childrenCounter counter"]')
if noa <= 6 and senior_counter_div<=6 and child_counter_div<=5: #Checking the count of the number of tickets
add_adult_btn = adult_counter_div.find_element(By.XPATH , '//span[#class = "ui_icon plus-circle enabled"]')
add_senior_btn = senior_counter_div.find_element(By.XPATH , '//span[#class = "ui_icon plus-circle enabled"]')
add_child_btn = child_counter_div.find_element(By.XPATH , '//span[#class = "ui_icon plus-circle enabled"]')
# Clicking the the button the number of times
for i in range(noa):
add_adult_btn.click()
for i in range(nos):
add_senior_btn.click()
for i in range(noc):
add_child_btn.click()
else:
print('MORE THAN THE LIMIT')
Thanks.
please try to use WebDriverWait with Expected Conditions
from selenium.webdriver.support import expected_conditions as EC
...
...
...
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH , '//span[#class = "ui_icon caret-down open-close"]'))).click()
Find more details here - https://selenium-python.readthedocs.io/waits.html
To send the character sequence Bangalore within the From field on tripadvisor you need to induce WebDriverWait for the element_to_be_clickable() and you can use either of the following locator strategies:
driver.get('https://www.tripadvisor.com/CheapFlightsHome')
from_where = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//input[#placeholder='From where?']")))
from_where.click()
from_where.clear()
from_where.send_keys("Bangalore")
from_where = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//div[#class='ui_typeahead_results']/ul/li"))).click()
Note: You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Browser Snapshot:
I'm newbie with web scraping and using selenium I would like to make click over a SVG element to get access to the information showed by a modal window.
Making click over a point or cross in the basketball court of this webpage: https://www.fiba.basketball/euroleaguewomen/21-22/game/1310/MBA-Moscow-ZVVZ-USK-Praha#tab=shot_chart You will get a modal window with information like you can see in the next picture:
I have made this development which works find because find the "svg" element printing the values of his attributes "x" and "y":
b = self.driver
b.set_window_size(300, 300)
b.get("https://www.fiba.basketball/euroleaguewomen/21-22/game/1310/MBA-Moscow-ZVVZ-USK-Praha#tab=shot_chart")
periods = b.find_element_by_css_selector('g.team-A').find_element_by_css_selector('g.period-list').find_elements_by_css_selector("g.period-item")
for quarter in periods:
shots = quarter.find_element_by_css_selector("g.shot-list").find_elements_by_css_selector("g.shot-item")
for item in shots:
print(f"x: {item.find_element_by_tag_name('svg').get_attribute('x')} - y: {item.find_element_by_tag_name('svg').get_attribute('y')}")
print("\n")
You can see exit of the code here:
x: 40.5 - y: 99.5
x: 151.5 - y: 211.5
x: 34.5 - y: 125.5
x: 35.5 - y: 121.5
x: 157.5 - y: 204.5
x: 59.5 - y: 122.5
x: 32 - y: 142
x: 40 - y: 121
x: 27.5 - y: 117.5
x: 164 - y: 124
x: 80.5 - y: 7.5
x: 49.5 - y: 111.5
x: 135.5 - y: 42.5
x: 34.5 - y: 67.5
x: 27.5 - y: 117.5
x: 138 - y: 54
x: 22 - y: 140
x: 119.5 - y: 32.5
x: 135.5 - y: 42.5
x: 154.5 - y: 186.5
x: 37.5 - y: 106.5
x: 39 - y: 117
x: 31 - y: 114
x: 40.5 - y: 117.5
x: 22 - y: 5
x: 46.5 - y: 4.5
x: 20 - y: 125
x: 148.5 - y: 197.5
x: 71.5 - y: 169.5
x: 118 - y: 230
x: 30.5 - y: 263.5
x: 25 - y: 124
x: 135.5 - y: 213.5
x: 82.5 - y: 128.5
x: 40 - y: 119
x: 158.5 - y: 131.5
x: 50.5 - y: 174.5
x: 166.5 - y: 82.5
x: 26 - y: 149
x: 36 - y: 133
x: 114.5 - y: 239.5
x: 48 - y: 222
x: 127.5 - y: 226.5
x: 23 - y: 132
x: 110.5 - y: 107.5
x: 114 - y: 138
x: 15 - y: 260
x: 137.5 - y: 131.5
x: 34 - y: 118
x: 75 - y: 65
x: 54.5 - y: 167.5
x: 30.5 - y: 127.5
But, If I try to make click over "svg" component adding this code:
point = item.find_element_by_tag_name('svg')
point.click()
Finally, my code will be:
b = self.driver
b.set_window_size(300, 300)
b.get("https://www.fiba.basketball/euroleaguewomen/21-22/game/1310/MBA-Moscow-ZVVZ-USK-Praha#tab=shot_chart")
periods = b.find_element_by_css_selector('g.team-A').find_element_by_css_selector('g.period-list').find_elements_by_css_selector("g.period-item")
for quarter in periods:
shots = quarter.find_element_by_css_selector("g.shot-list").find_elements_by_css_selector("g.shot-item")
for item in shots:
print(f"x: {item.find_element_by_tag_name('svg').get_attribute('x')} - y: {item.find_element_by_tag_name('svg').get_attribute('y')}")
point = item.find_element_by_tag_name('svg')
point.click()
print("\n")
I've got this error:
selenium.common.exceptions.WebDriverException: Message: unknown error: Element <svg class="shot-miss icon icon-miss clickable" x="40.5" y="99.5" width="16" height="16" viewBox="0 0 30 30" title="">...</svg> is not clickable at point (44, 165). Other element would receive the click: <p class="cc_message">...</p>
(Session info: chrome=72.0.3626.121)
(Driver info: chromedriver=2.44.609551 (5d576e9a44fe4c5b6a07e568f1ebc753f1214634),platform=Linux 5.4.0-88-generic x86_64)
How is that possible? What am I doing wrong? How can I get the content of the modal window?
Edit I (solution provided by #Prophet):
Now, my code is:
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
b = self.driver
wait = WebDriverWait(b, 20)
b.set_window_size(1920, 1080)
b.get("https://www.fiba.basketball/euroleaguewomen/21-22/game/1310/MBA-Moscow-ZVVZ-USK-Praha#tab=shot_chart")
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a.cc_btn_accept_all"))).click()
b.execute_script("window.scrollTo(0, document.body.scrollHeight);")
periods = b.find_element_by_css_selector('g.team-A').find_element_by_css_selector('g.period-list').find_elements_by_css_selector("g.period-item")
for quarter in periods:
shots = quarter.find_element_by_css_selector("g.shot-list").find_elements_by_css_selector("g.shot-item")
for item in shots:
print(f"x: {item.find_element_by_tag_name('svg').get_attribute('x')} - y: {item.find_element_by_tag_name('svg').get_attribute('y')}")
point = item.find_element_by_tag_name('svg')
point.click()
print("\n")
And I've got this error:
selenium.common.exceptions.WebDriverException: Message: unknown error: Element <svg class="shot-miss icon icon-miss clickable" x="151.5" y="211.5" width="16" height="16" viewBox="0 0 30 30" title="">...</svg> is not clickable at point (805, 312). Other element would receive the click: <th class="player"></th>
(Session info: chrome=72.0.3626.121)
(Driver info: chromedriver=2.44.609551 (5d576e9a44fe4c5b6a07e568f1ebc753f1214634),platform=Linux 5.4.0-88-generic x86_64)
Edit II (solution provided by #cruisepandey:
I have edited my code and now I've got this code:
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
b = self.driver
b.set_window_size(1920, 1080)
b.get("https://www.fiba.basketball/euroleaguewomen/21-22/game/1310/MBA-Moscow-ZVVZ-USK-Praha#tab=shot_chart")
b.execute_script("window.scrollTo(0, document.body.scrollHeight);")
periods = b.find_element_by_css_selector('g.team-A').find_element_by_css_selector('g.period-list').find_elements_by_css_selector("g.period-item")
WebDriverWait(b, 20).until(EC.element_to_be_clickable((By.XPATH, "//*[name()='svg']"))).click()
for quarter in periods:
shots = quarter.find_element_by_css_selector("g.shot-list").find_elements_by_css_selector("g.shot-item")
for item in shots:
print(f"x: {item.find_element_by_tag_name('svg').get_attribute('x')} - y: {item.find_element_by_tag_name('svg').get_attribute('y')}")
point = item.find_element_by_xpath("//*[name()='svg']")
point.click()
print("\n")
But, It doesn't work :( I've got this error:
Traceback (most recent call last):
File "/home/josecarlos/Workspace/python/basketmetrics/test/test_shot_chart_get_data.py", line 27, in test_something
WebDriverWait(b, 20).until(EC.element_to_be_clickable((By.XPATH, "//*[name()='svg']"))).click()
File "/home/josecarlos/Workspace/python/basketmetrics/venv/python/lib/python3.8/site-packages/selenium/webdriver/support/wait.py", line 80, in until
raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message:
Edit III:
I have tried to access to the "svg" tag through his absoloute XPath route. Using the tools for developers of Firefox or Chrome, we can get this absolute Xpath. In my case, I have got the XPath of the element in blue:
This is the route:
/html/body/div[3]/div[3]/div/section/div[2]/div/div/ul[2]/li[6]/div/div[1]/div/div[2]/div[2]/div[2]/svg/g[1]/g/g[1]/g/g[1]/svg
If I try to acces to this route in my code I've got an error
b = self.driver
wait = WebDriverWait(b, 20)
b.set_window_size(300, 300)
b.get("https://www.fiba.basketball/euroleaguewomen/21-22/game/1310/MBA-Moscow-ZVVZ-USK-Praha#tab=shot_chart")
periods = b.find_element_by_css_selector('g.team-A').find_element_by_css_selector('g.period-list').find_elements_by_css_selector("g.period-item")
#WebDriverWait(b, 20).until(EC.element_to_be_clickable((By.XPATH, "//*[name()='svg']"))).click()
for quarter in periods:
shots = quarter.find_element_by_css_selector("g.shot-list").find_elements_by_css_selector("g.shot-item")
for item in shots:
print(f"x: {item.find_element_by_tag_name('svg').get_attribute('x')} - y: {item.find_element_by_tag_name('svg').get_attribute('y')}")
point = b.find_element_by_xpath("/html/body/div[3]/div[3]/div/section/div[2]/div/div/ul[2]/li[6]/div/div[1]/div/div[2]/div[2]/div[2]/svg/g[1]/g/g[1]/g/g[1]/svg")
point.click()
print("\n")
The error that I've got is this:
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"/html/body/div[3]/div[3]/div/section/div[2]/div/div/ul[2]/li[6]/div/div[1]/div/div[2]/div[2]/div[2]/svg/g[1]/g/g[1]/g/g[1]/svg"}
(Session info: chrome=72.0.3626.121)
(Driver info: chromedriver=2.44.609551 (5d576e9a44fe4c5b6a07e568f1ebc753f1214634),platform=Linux 5.4.0-88-generic x86_64)
What am I doing wrong? What happend?
Edit IV:
I have modified in my code the string who gives me access to the svg but it doesn't work.
b = self.driver
wait = WebDriverWait(b, 20)
b.set_window_size(1920, 1080)
b.get("https://www.fiba.basketball/euroleaguewomen/21-22/game/1310/MBA-Moscow-ZVVZ-USK-Praha#tab=shot_chart")
periods = b.find_element_by_css_selector('g.team-A').find_element_by_css_selector('g.period-list').find_elements_by_css_selector("g.period-item")
#WebDriverWait(b, 20).until(EC.element_to_be_clickable((By.XPATH, "//*[name()='svg']"))).click()
for quarter in periods:
shots = quarter.find_element_by_css_selector("g.shot-list").find_elements_by_css_selector("g.shot-item")
for item in shots:
print(f"x: {item.find_element_by_tag_name('svg').get_attribute('x')} - y: {item.find_element_by_tag_name('svg').get_attribute('y')}")
point = b.find_element_by_xpath("//div[#class='shot-chart_canvas']/*[name()='svg']/*[name()='g'][1]/*[name()='g']/*[name()='g'][1]/*[name()='g']/*[name()='g'][1]/*[name()='svg']")
point.click()
print("\n")
self.assertEqual(True, True, "Error!!!")
And the error that I get is:
selenium.common.exceptions.WebDriverException: Message: unknown error: Element <svg class="shot-miss icon icon-miss clickable" x="40.5" y="99.5" width="16" height="16" viewBox="0 0 30 30" title="">...</svg> is not clickable at point (687, 917). Other element would receive the click: <div class="cc_banner cc_container cc_container--open">...</div>
(Session info: chrome=72.0.3626.121)
(Driver info: chromedriver=2.44.609551 (5d576e9a44fe4c5b6a07e568f1ebc753f1214634),platform=Linux 5.4.0-88-generic x86_64)
Sorry, but I don't understand what happend :(((
Edit V:
I have to retrive the information of all the crosses and points. I've got two teams and inside this tag we've got an array of periods and inside of each period we've got all the shoots and of each shot-item I need to retrieve the information. So, how can I make an Xpath loop to retrieve the information of each shoot?
g tag is under svg tag. so for locating g.team-A
Please use the below xpath :
//*[name()='g' and #class='team-A']
this is an xpath expression.
so the possible fix for this :
point = item.find_element_by_tag_name('svg')
point.click()
to use this :
point = item.find_element_by_xpath("//*[name()='svg']")
point.click()
What I would suggest here is to have a explicit wait defined and then you can try to click on it.
Code-trial :
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//*[name()='svg']"))).click()
for this explicit wait, you'd have to import these as well :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Update :
driver = webdriver.Chrome(driver_path)
driver.maximize_window()
wait = WebDriverWait(driver, 30)
driver.get("https://www.fiba.basketball/euroleaguewomen/21-22/game/1310/MBA-Moscow-ZVVZ-USK-Praha#tab=shot_chart")
ele = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.shot-chart_canvas")))
driver.execute_script("arguments[0].scrollIntoView(true);", ele)
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[name()='svg' and #class='chart']//*[name()='g']//descendant::*[name()='g' and #class='shot-item']"))).click()
a = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.player-profile"))).get_attribute('innerText')
print(a)
Imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
output :
Q1 09:520 0
Alexandra Stolyar
2pt jump shot missed
View Player Profile
FG 2Pts 3Pts FT Pts
In this game
2/6
33.33%
1/4
25%
1/2
50%
3/3
100%
8
Update 2 :
driver.get("https://www.fiba.basketball/euroleaguewomen/21-22/game/1310/MBA-Moscow-ZVVZ-USK-Praha#tab=shot_chart")
ele = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.shot-chart_canvas")))
driver.execute_script("arguments[0].scrollIntoView(true);", ele)
all_points = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//*[name()='svg' and #class='chart']//*[name()='g']//descendant::*[name()='g' and #class='shot-item']")))
print(len(all_points))
for point in all_points:
point.click()
a = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.player-profile"))).get_attribute('innerText')
print(a)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button.icon--close"))).click()
time.sleep(1)
Imports :
119
Q1 09:520 0
Alexandra Stolyar
2pt jump shot missed
View Player Profile
FG 2Pts 3Pts FT Pts
In this game
2/6
33.33%
1/4
25%
1/2
50%
3/3
100%
8
Q1 09:350 0
Karina Nizamova
3pt jump shot missed
View Player Profile
FG 2Pts 3Pts FT Pts
In this game
1/7
14.29%
1/3
33.33%
0/4
0%
2/3
66.67%
4
The elements you are trying to click on are initially out of the visible screen. Also there is a "accept cookies" banner on the bottom.
You need to close the cookies banner and scroll the page up in order to make your code working.
Please try this:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
b = self.driver
wait = WebDriverWait(b, 20)
b.set_window_size(300, 300)
b.get("https://www.fiba.basketball/euroleaguewomen/21-22/game/1310/MBA-Moscow-ZVVZ-USK-Praha#tab=shot_chart")
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a.cc_btn_accept_all"))).click()
b.execute_script("window.scrollTo(0, document.body.scrollHeight);")
periods = b.find_element_by_css_selector('g.team-A').find_element_by_css_selector('g.period-list').find_elements_by_css_selector("g.period-item")
for quarter in periods:
shots = quarter.find_element_by_css_selector("g.shot-list").find_elements_by_css_selector("g.shot-item")
for item in shots:
print(f"x: {item.find_element_by_tag_name('svg').get_attribute('x')} - y: {item.find_element_by_tag_name('svg').get_attribute('y')}")
point = item.find_element_by_tag_name('svg')
point.click()
print("\n")
I would also recommend to use a normal window size, as following:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
b = self.driver
wait = WebDriverWait(b, 20)
b.set_window_size(1920, 1080)
b.get("https://www.fiba.basketball/euroleaguewomen/21-22/game/1310/MBA-Moscow-ZVVZ-USK-Praha#tab=shot_chart")
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a.cc_btn_accept_all"))).click()
b.execute_script("window.scrollTo(0, document.body.scrollHeight);")
periods = b.find_element_by_css_selector('g.team-A').find_element_by_css_selector('g.period-list').find_elements_by_css_selector("g.period-item")
for quarter in periods:
shots = quarter.find_element_by_css_selector("g.shot-list").find_elements_by_css_selector("g.shot-item")
for item in shots:
print(f"x: {item.find_element_by_tag_name('svg').get_attribute('x')} - y: {item.find_element_by_tag_name('svg').get_attribute('y')}")
point = item.find_element_by_tag_name('svg')
point.click()
print("\n")
UPD
Svg, rect,g etc are special tag names.
/svg or /g Xpath will not work. You will need to use /*[name()='svg'] or /*[name()='g'] respectively.
Also, the absolute paths are strongly NOT recommended. You need to use better locators.
For example instead of
/html/body/div[3]/div[3]/div/section/div[2]/div/div/ul[2]/li[6]/div/div[1]/div/div[2]/div[2]/div[2]/svg/g[1]/g/g[1]/g/g[1]/svg
This will work better:
//div[#class='shot-chart_canvas']/*[name()='svg']/*[name()='g'][1]/*[name()='g']/*[name()='g'][1]/*[name()='g']/*[name()='g'][1]/*[name()='svg']
So instead of
point = b.find_element_by_xpath("/html/body/div[3]/div[3]/div/section/div[2]/div/div/ul[2]/li[6]/div/div[1]/div/div[2]/div[2]/div[2]/svg/g[1]/g/g[1]/g/g[1]/svg")
point.click()
Try this:
point = b.find_element_by_xpath("//div[#class='shot-chart_canvas']/*[name()='svg']/*[name()='g'][1]/*[name()='g']/*[name()='g'][1]/*[name()='g']/*[name()='g'][1]/*[name()='svg']")
point.click()
Hey Guys it is my first time asking a question here so pls dont be to harsh to me and my english is not the yellow from the egg as well.
i have a problem i tried to webscrape companys but i am able to get all company names but if i try to scrape for the phone number and other data i only get the first 4 results hope someon can help me thanks in advance
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC, wait
from selenium.webdriver.support.expected_conditions import presence_of_element_located
from selenium.webdriver.support.expected_conditions import presence_of_element_located
PATH = "C:/Program Files (x86)/Chromedriver.exe"
driver = webdriver.Chrome(PATH)
driver.get("https://www.gelbeseiten.de/Suche/KFZ/50968")
try:
main = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "gs_treffer"))
)
except:
driver.quit
element = driver.find_element_by_id ('cmpbntyestxt')
element.click()
Firma = driver.find_elements_by_css_selector("h2")
for firma in Firma:
print(firma.text)
PLZ = driver.find_elements_by_class_name("mod-AdresseKompakt__phoneNumber")
for plz in PLZ:
print(plz.text)
driver.quit()
You need to scroll into each element panel, and then you can interact with them.
You can try the below solution :
driver = webdriver.Chrome(driver_path)
driver.maximize_window()
driver.implicitly_wait(50)
driver.get("https://www.gelbeseiten.de/Suche/KFZ/50968")
wait = WebDriverWait(driver, 10)
wait.until(EC.element_to_be_clickable((By.XPATH, "//span[#id='cmpbntyestxt']/.."))).click()
lngth = len(driver.find_elements(By.XPATH, "//h2[#data-wipe-name='Titel']"))
print(lngth)
j = 0
scrolling = 0
for item in range(lngth):
elements = driver.find_elements(By.XPATH, "//h2[#data-wipe-name='Titel']")
lst = driver.find_elements(By.XPATH, "//div[#id='gs_treffer']/child::article")
driver.execute_script("arguments[0].scrollIntoView(true);", lst[scrolling])
time.sleep(1)
print(elements[j].text)
print(elements[j].find_element(By.XPATH, ".//following-sibling::address/p[2]").text)
j = j + 1
scrolling = scrolling + 1
Imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Output :
"C:\Program Files\Python39\python.exe" C:/Users/***/PycharmProjects/SeleniumSO/Chrome.py
16
Scherschel Andreas
0221 38 59 93
Scherschel Andreas Dipl.-Ing. KFZ-Sachverst.
0221 38 59 93
La Linea Franca GmbH,Volvo Vertragshändler KFZ-Handel
0221 8 00 60-0
Kuehler Willems Services UG
0221 3 76 11 11
Autoservice Baum
0221 38 59 31
Mückl u. Rehse GbR Ingenieurbüro
0221 93 46 49-0
Procar Automobile GmbH & Co. KG - Köln Süd
0221 37 69 80
Allianz Versicherung Wolfgang Georg
0221 3 40 49 51
Pohlen Robert Autoservice
0221 37 84 72
Barmenia Versicherung - Norin Amraei
01522 6 22 70 04
Auto-Kühlerbau
0221 3 76 11 11
Barmenia Versicherung - Anders OHG
0221 9 85 49 30
Barmenia Versicherung - Kevin Wimmer
0176 70 29 27 17
Barmenia Versicherung - Yazdan Izadi
0221 9 85 49 30
GTÜ Prùfstelle Köln Bayenthal
0221 93 46 49-0
Continentale: T & T Versicherungsservice
0221 34 02 91 10
Process finished with exit code 0
You can scrape all the data from the landing page using requests module as well.
import json
import requests
from bs4 import BeautifulSoup
link = 'https://www.gelbeseiten.de/Suche/KFZ/50968'
def get_lazyloaded_details(soup):
all_details = []
for lazyload_data in soup.select("article[data-lazyloaddata]"):
container = json.loads(lazyload_data.get("data-lazyloaddata"))
name = container['name']
for item in container['trefferButtonListList']['trefferButtonListList']:
for elem in item:
if elem['gcLink']['text']=='E-Mail':
email = elem['gcLink']['href'].split("?")[0].split(":")[1]
elif elem['gcLink']['text']=='Anrufen':
phone = elem['gcLink']['href'].split(":")[1]
elif elem['gcLink']['text']=='Mehr Details':
detail_page_link = elem['gcLink']['href']
all_details.append([name,email,phone,detail_page_link])
return all_details
with requests.Session() as s:
s.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36'
resp = s.get(link)
soup = BeautifulSoup(resp.text,"lxml")
all_details = get_lazyloaded_details(soup)
for item in soup.select("article:not([data-lazyloaddata])"):
name = item.select_one("h2[data-wipe-name]").get_text(strip=True)
email = item.select_one("a.contains-icon-email").get("href").split("?")[0].split(":")[1]
phone = item.select_one("p.mod-AdresseKompakt__phoneNumber").text
detail_page_link = item.select_one("a.contains-icon-details").get("href")
all_details.append([name,email,phone,detail_page_link])
for element in all_details:
print(element)
Output (truncated):
['Scherschel Andreas', 'info#scherschel.com', '0221 38 59 93', 'https://www.gelbeseiten.de/gsbiz/5cac50a0-a60e-49ef-8937-2f0a7bcad241']
['Scherschel Andreas Dipl.-Ing. KFZ-Sachverst.', 'info#scherschel.com', '0221 38 59 93', 'https://www.gelbeseiten.de/gsbiz/5cac50a0-a60e-49ef-8937-2f0a7bcad241']
['La Linea Franca GmbH,Volvo Vertragshändler KFZ-Handel', 'buschdorf#lalinea.de', '0221800600', 'https://www.gelbeseiten.de/gsbiz/1db12570-6d7c-4fe2-ab3d-d0c02e74324a']
['Kuehler Willems Services UG', 'info#kuehler-willems.eu', '02213761111', 'https://www.gelbeseiten.de/gsbiz/54d91f8c-eebe-41e3-962e-acdbfac6be46']
I am trying to scrape data from www.blocket.se with selenium. All is going good but when i try to get a phone number, its not working. At first there is a button with "Visa telefonnummer", so i press it with selenium. Then phone number appears but when i try to get it with selenium it only gets "Visa telefonnummer" Can you please help me with getting it?
Thanks for answers
driver.get(property_link) # link is ok
driver.find_element_by_class_name("iXyZie").click() # i click the button, phone number appears on screen
phone_number = driver.find_element_by_class_name("fXYiwE").text # it gives me only "Visa telefonnummer"
This is the button before click
<div class="ShowPhoneNumberButton__StyledButton-sc-1fetcgp-0 cMGXxG">
<button type="button" class="Buttonstyles__BaseButton-hz08m4-0 iXyZie"><span class="Buttonstyles__ChildrenWrapper-hz08m4-3 fXYiwE"><svg viewBox="0 0 32 32" color="#363433" width="24" height="24" class="ShowPhoneNumberButton__StyledIconCall-sc-1fetcgp-1 ljfHXc"><defs><path id="iconCall_svg__a" d="M15.466 21.406l5.32-1.401 5.64 8.054-.402.573a8.281 8.281 0 01-2.04 2.02c-1.422.995-2.976 1.494-4.61 1.494-1.428 0-2.916-.38-4.433-1.142-3.098-1.554-6.28-4.645-9.46-9.187C2.302 17.275.487 13.227.085 9.786-.34 6.17.845 3.273 3.506 1.409A8.287 8.287 0 016.103.183L6.78 0l5.64 8.055-3.136 4.52 6.184 8.83zm7.37 7.607a6.501 6.501 0 001.123-.991l-4.011-5.728-5.32 1.4L6.845 12.58l3.136-4.52L5.97 2.332a6.475 6.475 0 00-1.317.716c-2.05 1.436-2.92 3.625-2.584 6.506.363 3.108 2.062 6.849 5.05 11.116 2.987 4.267 5.92 7.143 8.718 8.547 2.594 1.302 4.947 1.232 6.999-.204zm-7.325-12.865a1.5 1.5 0 110-3 1.5 1.5 0 010 3zm6 0a1.5 1.5 0 110-3 1.5 1.5 0 010 3zm6-3a1.5 1.5 0 110 3 1.5 1.5 0 010-3z"></path></defs><use fill="currentColor" xlink:href="#iconCall_svg__a" fill-rule="evenodd" transform="translate(1)"></use></svg><div class="TextCallout1__TextCallout1Wrapper-qzrnab-0 lkIkoj">Visa telefonnummer</div></span></button>
</div>
This is the button after click
<div class="ShowPhoneNumberButton__StyledButton-sc-1fetcgp-0 cMGXxG"><span class="Buttonstyles__ChildrenWrapper-hz08m4-3 fXYiwE">Ring 08-551 157 97</span></div>
wait = WebDriverWait(self.driver,30)
wait.until(EC.presence_of_element_located((By.XPATH, "//button[#class='Buttonstyles__BaseButton-hz08m4-0 iXyZie']//span[#class='Buttonstyles__ChildrenWrapper-hz08m4-3 fXYiwE']"))).click()
phone_number = wait.until(EC.presence_of_element_located((By.XPATH, "//a[#class='Buttonstyles__BaseButton-hz08m4-0-a Buttonstyles__BaseAnchor-hz08m4-1 hmzYTq']")))
print phone_number.text
Note please add below imports :
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as Wait
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver.find_element_by_class_name("fXYiwE").__getattribute__(Visa telefonnummer)
Please try this.
I've tried to get list of all values from dropdown.
The problem is the dropdown is activated after click on it. The list of values shows only after click on dropdown
Now I've got code like this:
browser.find_element_by_xpath("/html/body/div[3]/div[8]/div[2]/div[1]/div[4]").click()
It enable me to activate dropdown but I don't know completely how to extract all possible values from this dropdown
HTML:
<option value="">-</option><option value="0401">Aleksandrowski</option><option value="2001">Augustowski</option>
Use Select:
As per the best practices, you can add WebDriverWait and expected_conditions as follows:
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.ui import Select
locator = "/html/body/div[3]/div[8]/div[2]/div[1]/div[3]/div/div/select[1]"
botton_to_select = WebDriverWait(driver, 10).until((EC.element_to_be_clickable, (By.XPATH, locator)))
botton_to_select.click()
select = Select(browserdriver.find_element_by_xpath(locator))
for item in select.options:
print(item.get_attribute('innerText'), item.get_attribute('value'))
Hope this helps you!
I must suggest do not use absolute XPath. Try to use relative Xpath.Please find the following code to retrieve the drop-down values.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver=webdriver.Chrome()
driver.get("https://polska.e-mapa.net/")
WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,"//div[#class='searchLabel']/i"))).click()
WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,"//div[text()='Działki']"))).click()
WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,"//div[contains(.,'Powiat:')]/select"))).click()
items=WebDriverWait(driver,20).until(EC.presence_of_all_elements_located((By.XPATH,"//div[contains(.,'Powiat:')]/select/option")))
for item in items:
print(item.get_attribute('innerHTML'))
Output Printed on console
Aleksandrowski
Augustowski
Bartoszycki
Bełchatowski
Będziński
Bialski
Biała Podlaska
Białobrzeski
Białogardzki
Białostocki
Białystok
Bielski
Bielski
Bielsko-Biała
Bieruńsko-Lędziński
Bieszczadzki
Biłgorajski
Bocheński
Bolesławiecki
Braniewski
Brodnicki
Brzeski
Brzeski
Brzeziński
Brzozowski
Buski
Bydgoski
Bydgoszcz
Bytom
Bytowski
Chełm
Chełmiński
Chełmski
Chodzieski
Chojnicki
Chorzów
Choszczeński
Chrzanowski
Ciechanowski
Cieszyński
Czarnkowsko-Trzcianecki
Częstochowa
Częstochowski
Człuchowski
Dąbrowa Górnicza
Dąbrowski
Dębicki
Drawski
Działdowski
Dzierżoniowski
Elbląg
Elbląski
Ełcki
Garwoliński
Gdańsk
Gdański
Gdynia
Giżycki
Gliwice
Gliwicki
Głogowski
Głubczycki
Gnieźnieński
Goleniowski
Golubsko-Dobrzyński
Gołdapski
Gorlicki
Gorzowski
Gorzów Wielkopolski
Gostyniński
Gostyński
Górowski
Grajewski
Grodziski
Grodziski
Grójecki
Grudziądz
Grudziądzki
Gryficki
Gryfiński
Hajnowski
Hrubieszowski
Iławski
Inowrocławski
Janowski
Jarociński
Jarosławski
Jasielski
Jastrzębie-Zdrój
Jaworski
Jaworzno
Jelenia Góra
Jeleniogórski
Jędrzejowski
Kaliski
Kalisz
Kamiennogórski
Kamieński
Kartuski
Katowice
Kazimierski
Kędzierzyńsko-Kozielski
Kępiński
Kętrzyński
Kielce
Kielecki
Kluczborski
Kłobucki
Kłodzki
Kolbuszowski
Kolneński
Kolski
Kołobrzeski
Konecki
Konin
Koniński
Koszalin
Koszaliński
Kościański
Kościerski
Kozienicki
Krakowski
Kraków
Krapkowicki
Krasnostawski
Kraśnicki
Krosno
Krośnieński
Krośnieński
Krotoszyński
Kutnowski
Kwidzyński
Legionowski
Legnica
Legnicki
Leski
Leszczyński
Leszno
Leżajski
Lęborski
Lidzbarski
Limanowski
Lipnowski
Lipski
Lubaczowski
Lubański
Lubartowski
Lubelski
Lubiński
Lublin
Lubliniecki
Lwówecki
Łańcucki
Łaski
Łęczycki
Łęczyński
Łobeski
Łomża
Łomżyński
Łosicki
Łowicki
Łódzki Wschodni
Łódź
Łukowski
Makowski
Malborski
Miechowski
Mielecki
Międzychodzki
Międzyrzecki
Mikołowski
Milicki
Miński
Mławski
Mogileński
Moniecki
Mrągowski
Mysłowice
Myszkowski
Myślenicki
Myśliborski
Nakielski
Namysłowski
Nidzicki
Niżański
Nowodworski
Nowodworski
Nowomiejski
Nowosądecki
Nowosolski
Nowotarski
Nowotomyski
Nowy Sącz
Nyski
Obornicki
Olecki
Oleski
Oleśnicki
Olkuski
Olsztyn
Olsztyński
Oławski
Opatowski
Opoczyński
Opole
Opolski
Opolski
Ostrołęcki
Ostrołęka
Ostrowiecki
Ostrowski
Ostrowski
Ostródzki
Ostrzeszowski
Oświęcimski
Otwocki
Pabianicki
Pajęczański
Parczewski
Piaseczyński
Piekary Śląskie
Pilski
Pińczowski
Piotrkowski
Piotrków Trybunalski
Piski
Pleszewski
Płock
Płocki
Płoński
Poddębicki
Policki
Polkowicki
Poznań
Poznański
Proszowicki
Prudnicki
Pruszkowski
Przasnyski
Przemyski
Przemyśl
Przeworski
Przysuski
Pszczyński
Pucki
Puławski
Pułtuski
Pyrzycki
Raciborski
Radom
Radomski
Radomszczański
Radziejowski
Radzyński
Rawicki
Rawski
Ropczycko-Sędziszowski
Ruda Śląska
Rybnicki
Rybnik
Rycki
Rypiński
Rzeszowski
Rzeszów
Sandomierski
Sanocki
Sejneński
Sępoleński
Siedlce
Siedlecki
Siemianowice Śląskie
Siemiatycki
Sieradzki
Sierpecki
Skarżyski
Skierniewice
Skierniewicki
Sławieński
Słubicki
Słupecki
Słupsk
Słupski
Sochaczewski
Sokołowski
Sokólski
Sopot
Sosnowiec
Stalowowolski
Starachowicki
Stargardzki
Starogardzki
Staszowski
Strzelecki
Strzelecko-Drezdenecki
Strzeliński
Strzyżowski
Sulęciński
Suski
Suwalski
Suwałki
Szamotulski
Szczecin
Szczecinecki
Szczycieński
Sztumski
Szydłowiecki
Średzki
Średzki
Śremski
Świdnicki
Świdnicki
Świdwiński
Świebodziński
Świecki
Świętochłowice
Świnoujście
Tarnobrzeg
Tarnobrzeski
Tarnogórski
Tarnowski
Tarnów
Tatrzański
Tczewski
Tomaszowski
Tomaszowski
Toruń
Toruński
Trzebnicki
Tucholski
Turecki
Tychy
Wadowicki
Wałbrzych
Wałbrzyski
Wałecki
Warszawa
Warszawski Zachodni
Wąbrzeski
Wągrowiecki
Wejherowski
Węgorzewski
Węgrowski
Wielicki
Wieluński
Wieruszowski
Włocławek
Włocławski
Włodawski
Włoszczowski
Wodzisławski
Wolsztyński
Wołomiński
Wołowski
Wrocław
Wrocławski
Wrzesiński
Wschowski
Wysokomazowiecki
Wyszkowski
Zabrze
Zambrowski
Zamojski
Zamość
Zawierciański
Ząbkowicki
Zduńskowolski
Zgierski
Zgorzelecki
Zielona Góra
Zielonogórski
Złotoryjski
Złotowski
Zwoleński
Żagański
Żarski
Żniński
Żory
Żuromiński
Żyrardowski
Żywiecki