How to get the links (href) of several matches using python Selenium? - python

If you visit this site,
https://www.premierleague.com/results
You will be able to see several match results. If you click on each match, you will be directed to another website.
My question is how can I get the href (link) of each match.
links = driver.find_elements(By.XPATH, '//*[#id="mainContent"]/div[3]/div[1]')
for link in links:
x = link.get_attribute("href")
List.append(x)
This is what I have so far and it is not working.

I see elements like
<div data-href="//www.premierleague.com/match/66686" ...>
and you could search
//div[#data-href]
and later use get_attribute("data-href")
Full working code
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
#from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.firefox import GeckoDriverManager
#import time
url = 'https://www.premierleague.com/results'
#driver = webdriver.Chrome(executable_path=ChromeDriverManager().install())
driver = webdriver.Firefox(executable_path=GeckoDriverManager().install())
driver.get(url)
wait = WebDriverWait(driver, 10)
#time.sleep(5)
# close popup window with "Accept All Cookies"
button = wait.until(EC.visibility_of_element_located((By.XPATH, '//button[text()="Accept All Cookies"]')))
button.click()
all_items = driver.find_elements(By.XPATH, '//div[#data-href]')
print('len(all_items):', len(all_items))
for item in all_items:
print(item.get_attribute('data-href'))
Result:
len(all_items): 40
//www.premierleague.com/match/66686
//www.premierleague.com/match/66682
//www.premierleague.com/match/66687
//www.premierleague.com/match/66689
//www.premierleague.com/match/66691
//www.premierleague.com/match/66684
//www.premierleague.com/match/66705
//www.premierleague.com/match/66677
//www.premierleague.com/match/66674
//www.premierleague.com/match/66675
//www.premierleague.com/match/66676
//www.premierleague.com/match/66679
//www.premierleague.com/match/66672
//www.premierleague.com/match/66678
//www.premierleague.com/match/66680
//www.premierleague.com/match/66681
//www.premierleague.com/match/66673
//www.premierleague.com/match/66633
//www.premierleague.com/match/66584
//www.premierleague.com/match/66513
//www.premierleague.com/match/66637
//www.premierleague.com/match/66636
//www.premierleague.com/match/66635
//www.premierleague.com/match/66666
//www.premierleague.com/match/66670
//www.premierleague.com/match/66668
//www.premierleague.com/match/66665
//www.premierleague.com/match/66667
//www.premierleague.com/match/66669
//www.premierleague.com/match/66654
//www.premierleague.com/match/66656
//www.premierleague.com/match/66659
//www.premierleague.com/match/66657
//www.premierleague.com/match/66655
//www.premierleague.com/match/66652
//www.premierleague.com/match/66660
//www.premierleague.com/match/66661
//www.premierleague.com/match/66653
//www.premierleague.com/match/66658
//www.premierleague.com/match/66524

Related

unable to send information or click specific buttons on the website(selenium python)

I think this is more of my inability to read html of a specific website
I am trying to operate some things on this website:
https://fred.stlouisfed.org/series/DGS10
I am having 2 issues with it.
I tried to input the range of the date OR I also tried to put the range of the date at max by doing the either of the following(As long as I get 30 years of data or more I am ok). Below is my attempt to input a specific date and simulate pressing an enter-key on the keyboard.
range_search_bar = driver.find_element(By.XPATH, "//*[#id=\"input-cosd\"]")
range_search_bar.clear()
range_search_bar.send_keys("1980-10-10")
range_search_bar.send_keys(u'\ue007')
and below is an attempt to click the "max" date range button instead of inputting a date - I tried to do this first since this is simpler than the first code.
max_range_button = driver.find_element(By.XPATH, "//*[#id=\"zoom-all\"]")
max_range_button.click()
Unfortunately neither of them seem to change the range of the date...
I am suspecting that they are in a different iframe? But I could not find the iframe change...
I also tried to click the "download" button and then click the type "CSV(data)" button(which initiates the download) by doing the below code.
download_10_button = driver.find_element(By.XPATH, "//*[#id=\"download-button\"]/span")
download_10_button.click()
download_csv_button = driver.find_element(By.XPATH,("//*[#id=\"download-data-csv\"]"))
download_csv_button.click()
But I am getting this error : ElementNotInteractableException: element not interactable
Any ideas on what I might be doing wrong?
Thanks!
I can download file if I use time.sleep(...) because JavaScript needs time to load data and to open menu when it clicks Download
from selenium import webdriver
#from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.firefox import GeckoDriverManager
import time
url = 'https://fred.stlouisfed.org/series/DGS10'
#driver = webdriver.Chrome(executable_path=ChromeDriverManager().install())
driver = webdriver.Firefox(executable_path=GeckoDriverManager().install())
driver.get(url)
time.sleep(5)
max_range_button = driver.find_element(By.XPATH, '//*[#id="zoom-all"]')
max_range_button.click()
time.sleep(5)
download_10_button = driver.find_element(By.XPATH, '//*[#id="download-button"]/span')
download_10_button.click()
time.sleep(2)
download_csv_button = driver.find_element(By.XPATH, '//*[#id="download-data-csv"]')
download_csv_button.click()
Or you may use waits for this.
But I still needed sleep to wait for loading all data.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
#from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.firefox import GeckoDriverManager
import time
url = 'https://fred.stlouisfed.org/series/DGS10'
#driver = webdriver.Chrome(executable_path=ChromeDriverManager().install())
driver = webdriver.Firefox(executable_path=GeckoDriverManager().install())
driver.get(url)
time.sleep(5)
max_range_button = driver.find_element(By.XPATH, '//*[#id="zoom-all"]')
max_range_button.click()
time.sleep(5)
download_10_button = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, '//*[#id="download-button"]/span')))
download_10_button.click()
download_csv_button = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, '//*[#id="download-data-csv"]')))
download_csv_button.click()
And the same with putting date
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
#from selenium.common.exceptions import NoSuchElementException, TimeoutException
#from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.firefox import GeckoDriverManager
import time
url = 'https://fred.stlouisfed.org/series/DGS10'
#driver = webdriver.Chrome(executable_path=ChromeDriverManager().install())
driver = webdriver.Firefox(executable_path=GeckoDriverManager().install())
driver.get(url)
time.sleep(5)
#max_range_button = driver.find_element(By.XPATH, '//*[#id="zoom-all"]')
#max_range_button.click()
range_search_bar = driver.find_element(By.XPATH, '//*[#id="input-cosd"]')
range_search_bar.clear()
range_search_bar.send_keys("1980-10-10")
range_search_bar.send_keys(Keys.ENTER) # u'\ue007')
time.sleep(5)
download_10_button = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, '//*[#id="download-button"]/span')))
download_10_button.click()
download_csv_button = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, '//*[#id="download-data-csv"]')))
download_csv_button.click()

How to get all links from a webpage using selenium?

I am trying to webscrape a site using Python, Selenium, Beautifulsoup.
When I tried to get all the links ,It' returning an invalid string.
This is what I have tried
Can someone help me please?
from time import sleep
from selenium.webdriver.common.by import By
from selenium import webdriver
driver = webdriver.Chrome()
driver.get('https://www.hirist.com/c/filter/mobile-applications-jobs-in-cochin%20kochi_trivandrum%20thiruvananthapuram-5-70_75-0-0-1-0-0-0-0-2.html?ref=homepagecat')
sleep(10)
links = driver.find_elements(by=By.XPATH, value='.//div[#class="jobfeed-wrapper multiple-wrapper"]')
for link in links:
link.get_attribute('href')
print(link)
It is your selection with xpath, you select the <div> that do not have an href attribute. Select also its first <a> like .//div[#class="jobfeed-wrapper multiple-wrapper"]/a and it will work:
links = driver.find_elements(by=By.XPATH, value='.//div[#class="jobfeed-wrapper multiple-wrapper"]/a')
for link in links:
print(link.get_attribute('href'))
Example
Instead of time use WebDriverWait to check if specific elements are available.
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
url = 'https://www.hirist.com/c/filter/mobile-applications-jobs-in-cochin%20kochi_trivandrum%20thiruvananthapuram-5-70_75-0-0-1-0-0-0-0-2.html?ref=homepagecat'
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.maximize_window()
driver.get(url)
wait = WebDriverWait(driver, 10)
links = wait.until(EC.presence_of_all_elements_located((By.XPATH, './/div[#class="jobfeed-wrapper multiple-wrapper"]/a')))
for link in links:
print(link.get_attribute('href'))
Output
https://www.hirist.com/j/xforia-technologies-android-developer-javakotlin-10-15-yrs-1011605.html?ref=cl&jobpos=1&jobversion=2
https://www.hirist.com/j/firminiq-system-ios-developer-swiftobjective-c-3-10-yrs-1011762.html?ref=cl&jobpos=2&jobversion=2
https://www.hirist.com/j/firminiq-system-android-developer-kotlin-3-10-yrs-1011761.html?ref=cl&jobpos=3&jobversion=2
https://www.hirist.com/j/react-native-developer-mobile-app-designing-3-5-yrs-1009438.html?ref=cl&jobpos=4&jobversion=2
https://www.hirist.com/j/flutter-developer-iosandroid-apps-2-3-yrs-1008214.html?ref=cl&jobpos=5&jobversion=2
https://www.hirist.com/j/accubits-technologies-react-native-developer-ios-android-platforms-3-7-yrs-1003520.html?ref=cl&jobpos=6&jobversion=2
https://www.hirist.com/j/appincubator-react-native-developer-iosandroid-platform-2-7-yrs-1001957.html?ref=cl&jobpos=7&jobversion=2
You didn't declare path to chromedriver on your computer. Check where the chromdriver is, then try
driver = webdriver.Chrome(executable_path=CHROME_DRIVER_PATH)

Error while clicking next button with Selenium Python

I am new in Python and I am trying to Selenium, however, I have a problem with my code, I can not click the next button on pagination. This is my code:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome(executable_path='chromedriver.exe')
data = []
page_url = "https://pdb.irb.hr/search?q=&qId=&type=title&limit=2&page=1"
driver.get(page_url)
time.sleep(2)
links = driver.find_elements_by_xpath(
'//*[#id="searchResultList"]/li[2]/p[1]/a')
for x in range(20):
for i in range(len(links)):
driver.find_elements_by_xpath(
'//*[#id="searchResultList"]/li[2]/p[1]/a')[i].click()
time.sleep(2)
title = driver.find_element_by_xpath(
"//html/body/div[2]/div/h2").text
print(title)
data.append((title))
driver.implicitly_wait(5)
driver.execute_script("window.history.go(-1)")
driver.implicitly_wait(5)
driver.find_element_by_css_selector('a[rel="next"]').click()
driver.close()
Can somebody tell me what am doing wrong, how to target that next button?
Thank you
The only problem is to scroll the page till the Pagenumbers get visible.
below code is tested working perfectly fine, was able to navigate next 20 pages.
import time
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome(executable_path='c:\user1\chromedriver.exe')
data = []
page_url = "https://pdb.irb.hr/search?q=&qId=&type=title&limit=2&page=1"
driver.get(page_url)
driver.maximize_window()
time.sleep(2)
links = driver.find_elements_by_xpath('//*[#id="searchResultList"]/li[2]/p[1]/a')
print(len(links))
for x in range(20):
for i in range(len(links)):
driver.find_elements_by_xpath('//*[#id="searchResultList"]/li[2]/p[1]/a')[i].click()
time.sleep(2)
title = driver.find_element_by_xpath("//html/body/div[2]/div/h2").text
print(title)
data.append((title))
driver.implicitly_wait(5)
driver.execute_script("window.history.go(-1)")
driver.implicitly_wait(5)
below code added to make paginatio visible for selection
button = driver.find_element_by_css_selector('a[rel="next"]')
target = driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", button)
next_btn = driver.find_element_by_css_selector('a[rel="next"]')
next_btn.click()
driver.close()
The next page button is below some other element there so to click on in you have to scroll to it first.
Try this:
from selenium.webdriver.common.action_chains import ActionChains
next_btn = driver.find_element_by_css_selector('a[rel="next"]')
actions = ActionChains(driver)
actions.move_to_element(next_btn).perform()
next_btn.click()

How to click on the second link in google search result list using selenium web driver

I want to click on the second link in result in google search area using selenium-web-driver
( Need a method that suits for any google search result )
example page
This is my code, How can I modify the if statement
import speech_recognition as sr
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option("detach", True)
global driver
driver = webdriver.Chrome('C:\Windows\chromedriver.exe', options=chrome_options)
chrome_options.add_argument("--start-maximized")
wait = WebDriverWait(driver, 10)
def google(text):
if "first link" in text:
RESULTS_LOCATOR = "//div/h3/a"
WebDriverWait(driver, 10).until(
EC.visibility_of_element_located((By.XPATH, RESULTS_LOCATOR)))
page1_results = driver.find_elements(By.XPATH, RESULTS_LOCATOR)
for item in page1_results:
print(item.text)
# driver.find_element_by_class_name().click()
else:
ggwp=text.replace(" ", "")
driver.get("https://www.google.com")
driver.find_element_by_xpath('//*[#id="tsf"]/div[2]/div[1]/div[1]/div/div[2]/input').send_keys(ggwp)
driver.find_element_by_xpath('//*[#id="tsf"]/div[2]/div[1]/div[3]/center/input[1]').send_keys(Keys.ENTER)
Second link can by placed in different div-s.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import os
browser = webdriver.Chrome(executable_path=os.path.abspath(os.getcwd()) + "/chromedriver")
link = 'http://www.google.com'
browser.get(link)
# search keys
search = browser.find_element_by_name('q')
search.send_keys("python")
search.send_keys(Keys.RETURN)
# click second link
for i in range(10):
try:
browser.find_element_by_xpath('//*[#id="rso"]/div['+str(i)+']/div/div[2]/div/div/div[1]/a').click()
break
except:
pass

Facing issues while clicking on some links in a webpage

I've written a script in python to click on some categories in a webpage. I could manage to click on the first two categories but got stuck when it comes to initiate the final click. I've given a link leading to the two images in I have marked where to click.
This is the first link where there is a sign (marked with pencil) to click on to enter the second portion.
This is the second link where I get stuck when I try to click on the names (I've marked those names with pencil)
This is the site link.
Script I've tried with so far:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 10)
driver.get("replace_with_above_link")
wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "i4ewOd-pzNkMb-ornU0b-b0t70b-Bz112c"))).click()
post = wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div[role='checkbox']")))[1]
post.click()
for item in wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR,".HzV7m-pbTTYe-JNdkSc .suEOdc"))):
item.click()
driver.quit()
My intention is to click the names cyclically. Thanks in advance.
Try below code to click each item in list:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 10)
driver.get(URL)
wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "i4ewOd-pzNkMb-ornU0b-b0t70b-Bz112c"))).click()
post = wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div[role='checkbox']")))[1]
post.click()
for item in wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR,".HzV7m-pbTTYe-JNdkSc .suEOdc")))[1:]:
item.click()
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".HzV7m-tJHJj-LgbsSe-Bz112c.qqvbed-a4fUwd-LgbsSe-Bz112c"))).click()
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR, ".qqvbed-p83tee")))
driver.quit()

Categories

Resources