Page navigation click without using current_url python selenium - python

How to navigate through each page without using driver.current_url? In my full code, I get a bunch of errors once I navigate through the page for a loop. Without it, it runs fine but can only go through one page. I want to navigate through as many pages. Any help appreciated, thanks.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
driver_service = Service(executable_path="C:\Program Files (x86)\chromedriver.exe")
driver = webdriver.Chrome(service=driver_service)
driver.maximize_window() # load web driver
wait = WebDriverWait(driver, 5)
url_test = driver.get('https://www.seek.com.au/data-jobs-in-information-communication-technology/in-All-Perth-WA')
url_template = driver.current_url
template = url_template+ '?page={}'
for page in range(2,5):
link_job = [x.get_attribute('href') for x in driver.find_elements(By.XPATH, "//a[#data-automation='jobTitle']")]
for job in link_job:
driver.get(job)
try:
quick_apply = WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, "(//a[#data-automation='job-detail-apply' and #target='_self'])")))
quick_apply.click()
#sleep(3)
except:
print("No records found " + job)
pass
sleep(3)
driver.get(template.format(page))

If I understand you correctly you want to determine dynamically how many pages there are and loop over each of them.
I have managed to achieve this by using a while loop and look on each page if the "Next" button at the bottom is visible. If not, the last page was reached and you can exit the loop.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from time import sleep
driver_service = Service(executable_path="C:\\Users\\Stefan\\bin\\chromedriver.exe")
driver = webdriver.Chrome(service=driver_service)
driver.maximize_window() # load web driver
wait = WebDriverWait(driver, 5)
url_test = driver.get('https://www.seek.com.au/data-jobs-in-information-communication-technology/in-All-Perth-WA')
url_template = driver.current_url
template = url_template+ '?page={}'
page = 1
while True:
# check if "Next" button is visible
# -> if not, the last page was reached
try:
driver.find_element(By.XPATH, "//a[#title='Next']")
except:
# last page reached
break
link_job = [x.get_attribute('href') for x in driver.find_elements(By.XPATH, "//a[#data-automation='jobTitle']")]
for job in link_job:
driver.get(job)
try:
quick_apply = WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, "(//a[#data-automation='job-detail-apply' and #target='_self'])")))
quick_apply.click()
#sleep(3)
except:
print("No records found " + job)
pass
sleep(3)
page += 1
driver.get(template.format(page))
driver.close()

Seems your problem is with StaleElementException when you getting back from job page to jobs search results page.
The simplest approach to overcome this problem is to keep the jobs search results page url.
Actually I changed your code only with this point and it works.
I also changed driver.find_elements(By.XPATH, "//a[#data-automation='jobTitle']") with wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//a[#data-automation='jobTitle']"))) for better performance.
The code below works, but the web site itself responds badly.
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)
wait = WebDriverWait(driver, 10)
url = 'https://www.seek.com.au/data-jobs-in-information-communication-technology/in-All-Perth-WA?page={p}'
for p in range(1,20):
driver.get(url)
link_job = [x.get_attribute('href') for x in wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//a[#data-automation='jobTitle']")))]
for job in link_job:
driver.get(job)
try:
wait.until(EC.element_to_be_clickable((By.XPATH, "(//a[#data-automation='job-detail-apply' and #target='_self'])"))).click()
print("applied")
except:
print("No records found " + job)
pass
driver.get(url)

Related

ElementClickInterceptedException Error Selenium

I keep getting the ElementClickInterceptedException on this script I'm writing, I'm supposed to click a link that will open a new window, scrape from the new window and close it and move to the next link to scrape, but it just won't work, it gives the error after max 3 link clicks. I saw a similar question here and I tried using wait.until(EC.element_to_be_clickable()) and also maximized my screen but still did not work for me. Here is the site I am scraping from trying to scrape all the games for each day and here is a chunk of the code I'm using
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.common.exceptions import TimeoutException, NoSuchElementException, ElementNotInteractableException, StaleElementReferenceException
from time import sleep
l = "https://www.flashscore.com/"
options = FirefoxOptions()
#options.add_argument("--headless")
driver = webdriver.Firefox(executable_path="geckodriver.exe",
firefox_options=options)
driver.install_addon('C:\\Windows\\adblock_plus-3.10.1-an+fx.xpi')
driver.maximize_window()
driver.get(l)
driver.implicitly_wait(5)
cnt = 0
sleep(5)
wait = WebDriverWait(driver, 20)
a = driver.window_handles[0]
b = driver.window_handles[1]
driver.switch_to.window(a)
# Close Adblock tab
if 'Adblock' in driver.title:
driver.close()
driver.switch_to.window(a)
else:
driver.switch_to.window(b)
driver.close()
driver.switch_to.window(a)
var1 = driver.find_elements_by_xpath("//div[#class='leagues--live ']/div/div")
knt = 0
for i in range(len(var1)):
if (var1[i].get_attribute("id")):
knt += 1
#sleep(2)
#driver.switch_to.window(driver.window_handles)
var1[i].click()
sleep(2)
#var2 = wait.until(EC.visibility_of_element_located((By.XPATH, "//div[contains(#classs, 'event__match event__match--last event__match--twoLine')]")))
print(len(driver.window_handles))
driver.switch_to.window(driver.window_handles[1])
try:
sleep(4)
driver.close()
driver.switch_to.window(a)
#sleep(3)
except(Exception):
print("Exception caught")
#WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.CLASS_NAME, "event__match event__match--last event__match--twoLine")))
sleep(10)
driver.close()
Any ideas to help please.
It looks like the element you are trying to click on is covered by a banner ad or something else like a cookie message.
To fix this you can scroll down to the last element using the following code:
driver.execute_script('\
let items = document.querySelectorAll(\'div[title="Click for match detail!"]\'); \
items[items.length - 1].scrollIntoView();'
)
Add it before clicking on the desired element in the loop.
I tried to make a working example for you but it works on chromedriver not gecodriver:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
# options.add_argument("--headless")
options.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"])
service = Service(executable_path='your\path\to\chromedriver.exe')
driver = webdriver.Chrome(service=service, options=options)
wait = WebDriverWait(driver, 5)
url = 'https://www.flashscore.com/'
driver.get(url)
# accept cookies
wait.until(EC.presence_of_element_located((By.ID, 'onetrust-accept-btn-handler'))).click()
matches = driver.find_elements(By.CSS_SELECTOR, 'div[title="Click for match detail!"]')
for match in matches:
driver.execute_script('\
let items = document.querySelectorAll(\'div[title="Click for match detail!"]\'); \
items[items.length - 1].scrollIntoView();'
)
match.click()
driver.switch_to.window(driver.window_handles[1])
print('get data from open page')
driver.close()
driver.switch_to.window(driver.window_handles[0])
driver.quit()
It works in both normal and headless mode

How to extract the comments count correctly

I am trying to extract number of youtube comments and tried several methods.
My Code:
from selenium import webdriver
import pandas as pd
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
DRIVER_PATH = <your chromedriver path>
wd = webdriver.Chrome(executable_path=DRIVER_PATH)
url = 'https://www.youtube.com/watch?v=5qzKTbnhyhc'
wd.get(url)
wait = WebDriverWait(wd, 100)
time.sleep(40)
v_title = wd.find_element_by_xpath('//*[#id="container"]/h1/yt-formatted-string').text
print("title Is ")
print(v_title)
comments_xpath = '//h2[#id="count"]/yt-formatted-string/span[1]'
v_comm_cnt = wait.until(EC.visibility_of_element_located((By.XPATH, comments_xpath)))
#wd.find_element_by_xpath(comments_xpath)
print(len(v_comm_cnt))
I get the following error:
selenium.common.exceptions.TimeoutException: Message:
I get correct value for title but not for comment_cnt. Can any one please guide me what is wrong with my code?
Please note that comments count path - //h2[#id="count"]/yt-formatted-string/span[1] point to correct place if I search the value in inspect element.
Updated answer
Well, it was tricky!
There are several issues here:
This page has some bad java scripts on it making the Selenium webdriver driver.get() method to wait until the timeout for the page loaded while it looks like the page is loaded. To overcome that I used Eager page load strategy.
This page has several blocks of code for the same areas so as sometimes one of them is used (visible) and sometimes the second. This makes working with element locators difficultly. So, here I am waiting for visibility of title element from one of that blocks. In case it was visible - I'm extracting the text from there, otherwise I'm waiting for the visibility of the second element (it comes immediately) and extracting the text from there.
There are several ways to make page scrolling. Not all of them worked here. I found the one that is working and scrolling not too much.
The code below is 100% working, I run it several times.
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.chrome.service import Service
options = Options()
options.add_argument("--start-maximized")
caps = DesiredCapabilities().CHROME
caps["pageLoadStrategy"] = "eager"
s = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, desired_capabilities=caps, service=s)
url = 'https://www.youtube.com/watch?v=5qzKTbnhyhc'
driver.get(url)
driver.maximize_window()
wait = WebDriverWait(driver, 10)
title_xpath = "//div[#class='style-scope ytd-video-primary-info-renderer']/h1"
alternative_title = "//*[#id='title']/h1"
v_title = ""
try:
v_title = wait.until(EC.visibility_of_element_located((By.XPATH, title_xpath))).text
except:
v_title = wait.until(EC.visibility_of_element_located((By.XPATH, alternative_title))).text
print("Title is " + v_title)
comments_xpath = "//div[#id='title']//*[#id='count']//span[1]"
driver.execute_script("window.scrollBy(0, arguments[0]);", 600)
try:
v_comm_cnt = wait.until(EC.visibility_of_element_located((By.XPATH, comments_xpath)))
except:
pass
v_comm_cnt = driver.find_element(By.XPATH, comments_xpath).text
print("Video has " + v_comm_cnt + " comments")
The output is:
Title is Music for when you are stressed 🍀 Chil lofi | Music to Relax, Drive, Study, Chill
Video has 834 comments
Process finished with exit code 0

Selenium python getting stuck post driver.get('URL') for Internet Explorer

My selenium script is stuck post driver.get("url") and is not moving forward, later it error out.i am using below code. post executing this is paused for long, I have tried all options from the advance setting of the IE browser
from selenium import webdriver
from bs4 import BeautifulSoup
import time
from tqdm import tqdm
email='XXXXX'
password='XXXXX'
options = webdriver.IeOptions()
options.ignore_protected_mode_settings = True
driver = webdriver.Ie('C:\Program Files (x86)\selenium-
3.141.0\selenium\webdriver\ie\IEdriverServer.exe')
driver.get('https://s2fs.axisbank.com/EFTClient/Account/Login.htm')
email_box = driver.find_element_by_name('username')
email_box.send_keys(email)
pass_box = driver.find_element_by_name('password')
pass_box.send_keys(password)
submit_button = driver.find_element_by_id('loginSubmit')
submit_button.click()
time.sleep(3)
File2393= driver.find_element_by_link_text('Checkbox For Item 919020028802393.csv')
File2393.click()
time.sleep(1)
File3303= driver.find_element_by_link_text('Checkbox For Item 920020034873303.csv')
File3303.click()
time.sleep(1)
download = driver.find_element_by_class('icomoon icon-download2 toolbar-button')
download.click()
print("File is been downloaded")
You are missing a wait / delay before accessing the first element on the page.
You can simply add a sleep there, like this:
driver.get('https://s2fs.axisbank.com/EFTClient/Account/Login.htm')
time.sleep(10)
email_box = driver.find_element_by_name('username')
email_box.send_keys(email)
But it is better to use explicit waits
well, this URL :-
https://s2fs.axisbank.com/EFTClient/Account/Login.htm
is not loading at all in my browser, but if it works for you then you can try with Explicit wait as below :
options = webdriver.IeOptions()
options.ignore_protected_mode_settings = True
driver = webdriver.Ie('C:\Program Files (x86)\selenium-3.141.0\selenium\webdriver\ie\IEdriverServer.exe')
driver.maximize_window()
driver.implicitly_wait(30)
driver.get("https://s2fs.axisbank.com/EFTClient/Account/Login.htm")
wait = WebDriverWait(driver, 10)
email_box = wait.until(EC.element_to_be_clickable((By.NAME, "username")))
email_box.send_keys('email')
These would be the imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
I think, the site is not reachable, you can try to use the correct URL to access the page,
Accessing the element, you could use the explicitWait
email_box = WebDriverWait(driver,10).until(EC.visibility_of_element_located((By.XPATH,"element_XPATH")))
email_box.send_Keys("UserName")
import
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait

Unable to load main page to webscrape and next page doesn't change url

I'm trying to scrape data from: http://sekolah.data.kemdikbud.go.id/
The first landing page can be accessed by going to the url above and clicking on the orange button "Cari sekolah".
The resulting page has the following url: http://sekolah.data.kemdikbud.go.id/index.php/chome/pencarian/. Unfortunately, when url is launched, the result is a page stating specified url cannot be found.
Also, how do i go to next pages if when i click the 2nd, 3rd, etc pages the url doesnt change?
Grateful if you can help, my code so far - havent gotten far:
from selenium import webdriver
from selenium.webdriver import Chrome
import pandas as pd
option = webdriver.ChromeOptions()
option.add_argument('--incognito')
webdriver = "/Users/rainer/Desktop/learnpython/web/chromedriver"
driver = Chrome(executable_path=webdriver, chrome_options=option)
url = 'http://sekolah.data.kemdikbud.go.id/index.php/chome/pencarian/'
driver.get(url)
Induce WebDriverWait() and wait for element_to_be_clickable()
WebDriverWait(driver,15).until(EC.element_to_be_clickable((By.XPATH,"//button[text()='Cari Sekolah']"))).click()
Import following libraries.
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Once you reach the next page you need to click each page button to go next page.For an example I have provided range to 15 page you can increase the count.
url="http://sekolah.data.kemdikbud.go.id/"
driver.get(url)
WebDriverWait(driver,15).until(EC.element_to_be_clickable((By.XPATH,"//button[text()='Cari Sekolah']"))).click()
for i in range(2,15):
try:
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//ul[#id='list']//a[text()='" + str(i) + "']"))).click()
print("page number #" + str(i))
time.sleep(1)
except:
print("No such element")
break
Complete code:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
option = webdriver.ChromeOptions()
option.add_argument('--incognito')
driver=webdriver.Chrome(options=option)
url="http://sekolah.data.kemdikbud.go.id/"
driver.get(url)
WebDriverWait(driver,15).until(EC.element_to_be_clickable((By.XPATH,"//button[text()='Cari Sekolah']"))).click()
for i in range(2,15):
try:
WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.XPATH, "//ul[#id='list']//a[text()='" + str(i) + "']"))).click()
print("page number #" + str(i))
time.sleep(1)
except:
print("No such element")
break

How to click on the second link in google search result list using selenium web driver

I want to click on the second link in result in google search area using selenium-web-driver
( Need a method that suits for any google search result )
example page
This is my code, How can I modify the if statement
import speech_recognition as sr
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option("detach", True)
global driver
driver = webdriver.Chrome('C:\Windows\chromedriver.exe', options=chrome_options)
chrome_options.add_argument("--start-maximized")
wait = WebDriverWait(driver, 10)
def google(text):
if "first link" in text:
RESULTS_LOCATOR = "//div/h3/a"
WebDriverWait(driver, 10).until(
EC.visibility_of_element_located((By.XPATH, RESULTS_LOCATOR)))
page1_results = driver.find_elements(By.XPATH, RESULTS_LOCATOR)
for item in page1_results:
print(item.text)
# driver.find_element_by_class_name().click()
else:
ggwp=text.replace(" ", "")
driver.get("https://www.google.com")
driver.find_element_by_xpath('//*[#id="tsf"]/div[2]/div[1]/div[1]/div/div[2]/input').send_keys(ggwp)
driver.find_element_by_xpath('//*[#id="tsf"]/div[2]/div[1]/div[3]/center/input[1]').send_keys(Keys.ENTER)
Second link can by placed in different div-s.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import os
browser = webdriver.Chrome(executable_path=os.path.abspath(os.getcwd()) + "/chromedriver")
link = 'http://www.google.com'
browser.get(link)
# search keys
search = browser.find_element_by_name('q')
search.send_keys("python")
search.send_keys(Keys.RETURN)
# click second link
for i in range(10):
try:
browser.find_element_by_xpath('//*[#id="rso"]/div['+str(i)+']/div/div[2]/div/div/div[1]/a').click()
break
except:
pass

Categories

Resources