Python Web scraping using selenium to extract tables data - python

I am trying to web scrape this website using PYTHON selenium.
www.shareinvestor.com/fundamental/factsheet.html?counter=Z74.SI
But the selenium seem to hang and not loading.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver as web
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
binary = FirefoxBinary(r'C:\Program Files (x86)\Mozilla Firefox\firefox.exe')
driver = webdriver.Firefox(firefox_binary=binary)
import time
driver.get("http://www.shareinvestor.com/fundamental/factsheet.html?counter=Z74.SI")
WebDriverWait(driver, 20)
time.sleep(5)
print(driver.page_source)
I am interested in the table as shown below.
What is wrong with my code? I am not able to get any output.

Related

Python Scrape Issue - Sublime, Chrome

First off - no experience with Python, Sublime Text or Selenium - please be kind..
I am having an issue building a scraper in Python - using Sublime text, Selenium and Chrome. I have updated to the latest Python, Pip and downloaded the correct Chrome Driver. The webpage pops up fine, but get errors.
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
#from selenium.webdriver.common.ui import WebDriverWait - commented out due to error
#from selenium.webdriver.common import expected_conditions as EC - commented out due to error
import time
driver = webdriver.Chrome("C:\Program Files (x86)\chromedriver.exe")
driver.get ("https://www.royalcaribbean.com/account/cruise-planner/category/pt_beverage/product/3222?bookingId=1429192&shipCode=NV&sailDate=20220907")
print(driver.find_element(by=By.CLASS_NAME, value='ng-tns-c210-4 text-promo-1').text)***
wait = WebDriverWait(driver, 20)
driver.get("https://www.royalcaribbean.com/account/cruise-planner/category/pt_beverage/product/3222?bookingId=1429192&shipCode=NV&sailDate=20220907")
elem=wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '.ng-tns-c210-4.text-promo-1')))
print(elem.text)
Your class name is actually multiple class names so using css selector.
Outputs:
$80.99
Imports:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

How to upload file using Selenium with Python

I want to attach file through below link. How can I do that using selenium python?
My code:
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
driver=webdriver.Chrome(executable_path="C:\Program Files (x86)\Drivers\chromedriver.exe")
driver.get("https://www.tascoutsourcing.com/job-openings/quality-assurance-engineer/")
driver.maximize_window()
time.sleep(10)
driver.find_element_by_tag_name('body').send_keys(Keys.CONTROL + Keys.HOME)
driver.find_element_by_xpath("//*[#id='content']/div[2]/div[2]/div/div/div/div[1]/div/div/div/button").click()
driver.find_element_by_xpath("//*[#id='content']/div[2]/div[2]/div/div/div/section/div/form/div[3]/div[3]/div").click()
paul, please try to run below code - I have ran the same code in my system and it is working for me.
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
import time
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 20)
action = ActionChains(driver)
driver.get("https://www.tascoutsourcing.com/job-openings/quality-assurance-engineer/")
# Click on Apply Button
Apply_btn = wait.until(EC.element_to_be_clickable((By.XPATH, "//span[text()='Apply Now']/parent::button")))
action.move_to_element(Apply_btn).click().perform()
time.sleep(2)
Upload_File = driver.find_element_by_xpath('/html/body/label/input').send_keys("YourFilePath")
Do let me know if it shows error.

How to click on onclick link with image? Python Selenium

I'm just learning how to webscrape dynamically using Selenium in Python. I'm currently trying to click on a link within the webpage to page forward over search results.
So far this is the code that I'm using:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome('C:\\Users\\km13\\chromedriver.exe')
driver.get("http://www.congreso.gob.pe/pley-2016-2021")
elem = driver.find_element_by_css_selector("img[src='/Sicr/TraDocEstProc/CLProLey2016.nsf/8eac1ef603908b5105256cdf006c41b1/$Body/0.AB2?OpenElement&FieldElemFormat=gif']")
elem.click()
This is the HTML that corresponds with the element I'd like to click on:
`<img src="/Sicr/TraDocEstProc/CLProLey2016.nsf/8eac1ef603908b5105256cdf006c41b1/$Body/0.AB2?OpenElement&FieldElemFormat=gif" width="81" height="16" border="0">`
From my somewhat limited knowledge of HTML this seems like the link is actually embedded in the gif which is why I tried to use the CSS selector that goes along with that image. But this did not work.
Any guidance would be greatly appreciated!
Update:
I changed my code by adding the following import
from selenium.webdriver.common.by import By
And I changed the following:
elem = driver.find_element(By.CSS_SELECTOR, "img[src='/Sicr/TraDocEstProc/CLProLey2016.nsf/8eac1ef603908b5105256cdf006c41b1/$Body/0.AB2?OpenElement&FieldElemFormat=gif']")
elem.click()
Now I get an error for "no such element."
There is an iframe.You need to switch to iframe first to access the element.Try below code.use WebDriverWait to handle dynamic element.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome('C:\\Users\\km13\\chromedriver.exe')
driver.get("http://www.congreso.gob.pe/pley-2016-2021")
WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.NAME, 'ventana02')))
elem = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
(By.XPATH, "//a[contains(#onclick,'A50')]/img[contains(#src,'Sicr/TraDocEstProc/CLProLey')]")))
elem.click()
EDITED
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome('C:\\Users\\km13\\chromedriver.exe')
driver.get("http://www.congreso.gob.pe/pley-2016-2021")
driver.switch_to.frame(0)
elem=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//a[contains(#onclick,'A50')]/img[contains(#src,'Sicr/TraDocEstProc/CLProLey')]")))
elem.click()

Export the Pdf using the selenium webdriver and python

In my code, I am trying to click on the View Application button containing the pdf. But it is unable to click and export the View Application PDF.
I want the View Application PDF should be exported automatically through selenium webdriver in python.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import urllib.request
from bs4 import BeautifulSoup
import os
from selenium import webdriver
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.keys import Keys
import time
url = 'https://maharerait.mahaonline.gov.in'
chrome_path = r'C:/Users/User/AppData/Local/Programs/Python/Python36/Scripts/chromedriver.exe'
driver = webdriver.Chrome(executable_path=chrome_path)
driver.get(url)
WebDriverWait(driver,
20).until(EC.element_to_be_clickable((By.XPATH,"//div[#class='search-
pro-details']//a[contains(.,'Search Project Details')]"))).click()
Registered_Project_radio= WebDriverWait(driver,
10).until(EC.element_to_be_clickable((By.ID,"Promoter")))
driver.execute_script("arguments[0].click();",Registered_Project_radio)
Application = driver.find_element_by_id("CertiNo")
Application.send_keys("P50500000005")
Search = WebDriverWait(driver,
10).until(EC.element_to_be_clickable((By.ID,"btnSearch")))
driver.execute_script("arguments[0].click();",Search)
View = [item.get_attribute('href') for item in
driver.find_elements_by_tag_name("a") if
item.get_attribute('href') is not None]
View = View[0]
request = urllib.request.Request(View)
btn = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH,
"//a[#class='btn btn-md btn-success' and #id='btnShow_2017']")))
driver.execute_script("arguments[0].click();",btn)

How to select php filter and downloading file with Selenium

I've been trying to select the filter for "pitchers" and download to Excel from here: https://www.rotowire.com/baseball/stats.php
I've tried the following but am getting an error/don't sure how to select the necessary items
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
driver = webdriver.Firefox()
driver.get("https://www.rotowire.com/baseball/stats.php")
elem = driver.find_elements_by_xpath("//div[contains(#class,'filter-tab is-selected')]")
Ideally (for now), the script runs and downloads the file locally.
This downloads the pitcher data. It seems that there is some type of hidden html in the website. That's why the code finds the entire table first, then the excel button.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Firefox()
driver.get("https://www.rotowire.com/baseball/stats.php")
pitchers = driver.find_element_by_xpath("//div[#data-name='P']")
pitchers.click()
player_stats_elem = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, "//div[#data-pos='p']")))
excel_download = WebDriverWait(player_stats_elem, 5).until(EC.presence_of_element_located((By.XPATH, ".//img[#alt='Excel']/..")))
excel_download.click()

Categories

Resources