How to scrape Next button on Linkedin with Selenium using Python?

How to scrape Next button on Linkedin with Selenium using Python? - python

I am trying to scrape LinkedIn website using Selenium. I can't parse Next button. It resists as much as it can. I've spent a half of a day to adress this, but all in vain.
I tried absolutely various options, with text and so on. Only work with start ID but scrape other button.
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//button[#aria-label='Далее']"}
This is quite common for this site:
//*[starts-with(#id,'e')]
My code:
from selenium import webdriver
from selenium.webdriver import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from time import sleep
chrome_driver_path = Service("E:\programming\chromedriver_win32\chromedriver.exe")
driver = webdriver.Chrome(service=chrome_driver_path)
url = "https://www.linkedin.com/feed/"
driver.get(url)
SEARCH_QUERY = "python developer"
LOGIN = "EMAIL"
PASSWORD = "PASSWORD"
sleep(10)
sign_in_link = driver.find_element(By.XPATH, '/html/body/div[1]/main/p[1]/a')
sign_in_link.click()
login_input = driver.find_element(By.XPATH, '//*[#id="username"]')
login_input.send_keys(LOGIN)
sleep(1)
password_input = driver.find_element(By.XPATH, '//*[#id="password"]')
password_input.send_keys(PASSWORD)
sleep(1)
enter_button = driver.find_element(By.XPATH, '//*[#id="organic-div"]/form/div[3]/button')
enter_button.click()
sleep(25)
lens_button = driver.find_element(By.XPATH, '//*[#id="global-nav-search"]/div/button')
lens_button.click()
sleep(5)
search_input = driver.find_element(By.XPATH, '//*[#id="global-nav-typeahead"]/input')
search_input.send_keys(SEARCH_QUERY)
search_input.send_keys(Keys.ENTER)
sleep(5)
people_button = driver.find_element(By.XPATH, '//*[#id="search-reusables__filters-bar"]/ul/li[1]/button')
people_button.click()
sleep(5)
page_button = driver.find_element(By.XPATH, "//button[#aria-label='Далее']")
page_button.click()
sleep(60)
Chrome inspection of button
Next Button

OK, there are several issues here:
The main problem why your code not worked is because the "next" pagination is initially even not created on the page until you scrolling the page, so I added the mechanism, to scroll the page until that button can be clicked.
it's not good to create locators based on local language texts.
You should use WebDriverWait expected_conditions explicit waits, not hardcoded pauses.
I used mixed locators types to show that sometimes it's better to use By.ID and sometimes By.XPATH etc.
the following code works:
import time
from selenium import webdriver
from selenium.webdriver import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
url = "https://www.linkedin.com/feed/"
driver.get(url)
wait.until(EC.element_to_be_clickable((By.XPATH, "//a[contains(#href,'login')]"))).click()
wait.until(EC.element_to_be_clickable((By.ID, "username"))).send_keys(my_email)
wait.until(EC.element_to_be_clickable((By.ID, "password"))).send_keys(my_password)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit']"))).click()
search_input = wait.until(EC.element_to_be_clickable((By.XPATH, "//input[contains(#class,'search-global')]")))
search_input.click()
search_input.send_keys("python developer" + Keys.ENTER)
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="search-reusables__filters-bar"]/ul/li[1]/button'))).click()
wait = WebDriverWait(driver, 4)
while True:
try:
next_btn = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "button.artdeco-pagination__button.artdeco-pagination__button--next")))
next_btn.location_once_scrolled_into_view
time.sleep(0.2)
next_btn.click()
break
except:
driver.execute_script("window.scrollBy(0, arguments[0]);", 600)

Related

ElementClickInterceptedException Error Selenium

I keep getting the ElementClickInterceptedException on this script I'm writing, I'm supposed to click a link that will open a new window, scrape from the new window and close it and move to the next link to scrape, but it just won't work, it gives the error after max 3 link clicks. I saw a similar question here and I tried using wait.until(EC.element_to_be_clickable()) and also maximized my screen but still did not work for me. Here is the site I am scraping from trying to scrape all the games for each day and here is a chunk of the code I'm using
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.common.exceptions import TimeoutException, NoSuchElementException, ElementNotInteractableException, StaleElementReferenceException
from time import sleep
l = "https://www.flashscore.com/"
options = FirefoxOptions()
#options.add_argument("--headless")
driver = webdriver.Firefox(executable_path="geckodriver.exe",
firefox_options=options)
driver.install_addon('C:\\Windows\\adblock_plus-3.10.1-an+fx.xpi')
driver.maximize_window()
driver.get(l)
driver.implicitly_wait(5)
cnt = 0
sleep(5)
wait = WebDriverWait(driver, 20)
a = driver.window_handles[0]
b = driver.window_handles[1]
driver.switch_to.window(a)
# Close Adblock tab
if 'Adblock' in driver.title:
driver.close()
driver.switch_to.window(a)
else:
driver.switch_to.window(b)
driver.close()
driver.switch_to.window(a)
var1 = driver.find_elements_by_xpath("//div[#class='leagues--live ']/div/div")
knt = 0
for i in range(len(var1)):
if (var1[i].get_attribute("id")):
knt += 1
#sleep(2)
#driver.switch_to.window(driver.window_handles)
var1[i].click()
sleep(2)
#var2 = wait.until(EC.visibility_of_element_located((By.XPATH, "//div[contains(#classs, 'event__match event__match--last event__match--twoLine')]")))
print(len(driver.window_handles))
driver.switch_to.window(driver.window_handles[1])
try:
sleep(4)
driver.close()
driver.switch_to.window(a)
#sleep(3)
except(Exception):
print("Exception caught")
#WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.CLASS_NAME, "event__match event__match--last event__match--twoLine")))
sleep(10)
driver.close()
Any ideas to help please.

It looks like the element you are trying to click on is covered by a banner ad or something else like a cookie message.
To fix this you can scroll down to the last element using the following code:
driver.execute_script('\
let items = document.querySelectorAll(\'div[title="Click for match detail!"]\'); \
items[items.length - 1].scrollIntoView();'
)
Add it before clicking on the desired element in the loop.
I tried to make a working example for you but it works on chromedriver not gecodriver:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
# options.add_argument("--headless")
options.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"])
service = Service(executable_path='your\path\to\chromedriver.exe')
driver = webdriver.Chrome(service=service, options=options)
wait = WebDriverWait(driver, 5)
url = 'https://www.flashscore.com/'
driver.get(url)
# accept cookies
wait.until(EC.presence_of_element_located((By.ID, 'onetrust-accept-btn-handler'))).click()
matches = driver.find_elements(By.CSS_SELECTOR, 'div[title="Click for match detail!"]')
for match in matches:
driver.execute_script('\
let items = document.querySelectorAll(\'div[title="Click for match detail!"]\'); \
items[items.length - 1].scrollIntoView();'
)
match.click()
driver.switch_to.window(driver.window_handles[1])
print('get data from open page')
driver.close()
driver.switch_to.window(driver.window_handles[0])
driver.quit()
It works in both normal and headless mode

Selenium - bypass ads Google_Vignette

I'm trying to crawl a site and am running into a google ad. I think I've found the iframe of it but I can't find the element to click to remove the ad. I've spent about 7 hours now and think this is over my head. Help v much appreciated.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chrome_options = Options()
chrome_options.add_argument("--incognito")
chrome_options.add_argument("--window-size=1920x1080")
# chrome_options.add_argument("--headless")
driver = webdriver.Chrome(chrome_options=chrome_options, executable_path ='C:\/Users\/gblac\/OneDrive\/Desktop\/Chromedriver.exe')
url = 'https://free-mp3-download.net/'
driver.get(url)
WebDriverWait(driver, 4)
search = driver.find_element(By.ID,'q')
search.send_keys('testing songs')
search.click()
button = driver.find_element(By.ID,'snd')
button.click()
WebDriverWait(driver,20).until(EC.visibility_of_element_located((By.CLASS_NAME,'container'))).click()
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.ID,"results_t")));
results = driver.find_element(By.ID,'results_t').find_elements(By.TAG_NAME,'tr')
results[0].find_element(By.TAG_NAME,'a').click()
# The code to remove the ad would go here
# driver.find_elements(By.CSS_SELECTOR,'[text()="Close"]').click()

Add the below code block in your code - before searching any text:
time.sleep(1)
driver.execute_script("""
const elements = document.getElementsByClassName("google-auto-placed");
while (elements.length > 0) elements[0].remove();
""")
time.sleep(1)
driver.execute_script("""
const elements = document.getElementsByClassName("adsbygoogle adsbygoogle-noablate");
while (elements.length > 0) elements[0].remove();
""")
time.sleep(1)
driver.find_element(By.ID,"q").send_keys("tamil songs")
driver.find_element(By.ID,"snd").click()
It will close the 2 ad blocks in that page, but if you refresh or move forward and backward, the ads will display again, then you have to remove those ad blocks again using the above code, please add the code for that condition.
WebDriverWait(driver,20).until(EC.visibility_of_element_located((By.CLASS_NAME,'container'))).click()
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.ID,"results_t")))
results = driver.find_element(By.ID,'results_t').find_elements(By.TAG_NAME,'tr')
results[0].find_element(By.TAG_NAME,'a').click()
time.sleep(2)
driver.find_element(By.XPATH, ".//button[contains(text(),'Download')]").click()
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(1)
# handling captcha
iframe_captcha = driver.find_element(By.XPATH,".//iframe[#title='reCAPTCHA']")
driver.switch_to.frame(iframe_captcha)
time.sleep(1)
driver.find_element(By.CSS_SELECTOR, ".recaptcha-checkbox-border").click()
time.sleep(2)
driver.switch_to.default_content()
driver.find_element(By.XPATH, ".//button[contains(text(),'Download')]").click()

Unable to click button - Python / Selenium & bs4

Im trying to click a button and tried already several different methods with no avail.
Method 1:
WebDriverWait(driver, 20).until(EC.invisibility_of_element((By.CLASS_NAME, 'sc-gsDKAQ fWOgSr')))
driver.execute_script("arguments[0].click();", WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CLASS_NAME, 'sc-gsDKAQ fWOgSr'))))
Method 2:
driver.find_element(by=By.CLASS_NAME, value='sc-gsDKAQ fWOgSr').click()
Method 3:
btn = driver.find_element(by=By.CLASS_NAME, value='sc-gsDKAQ fWOgSr')
btn.click()
Error messages:
M1: Timeout
M2: NoSuchElementException
M3: NoSuchElementException
I tried the same by looking with Xpath, no success. Had another Error in the past, which stated that "String" cannot be interacted with.

That element is in a #shadow root element, so it's a bit fiddly to reach. Also, page seems to react at mouse movements (and only load after it detect some mouse movement). The following code seems to work: setup is on Linux, but you can adapt it to your own, just observe the imports and the part after defining the browser:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument('disable-notifications')
import time as t
webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
browser = webdriver.Chrome(service=webdriver_service, options=chrome_options)
actions = ActionChains(browser)
url = 'https://www.immobilienscout24.at/regional/wien/wien/wohnung-kaufen'
browser.get(url)
page_title = WebDriverWait(browser, 3).until(EC.presence_of_element_located((By.CSS_SELECTOR, "a[title='Zur Homepage']")))
actions.move_to_element(page_title).perform()
parent_div = WebDriverWait(browser, 20000).until(EC.presence_of_element_located((By.ID, "usercentrics-root")))
shadowRoot = browser.execute_script("return arguments[0].shadowRoot", parent_div)
try:
button = WebDriverWait(shadowRoot, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[data-testid='uc-accept-all-button']")))
button.click()
print('clicked')
except Exception as e:
print(e)
print('no click button')
This will click the button, and print in the terminal:
clicked

Scraping a web content executing an event but unexpected click event appeared

So I am trying to make a get request, access the a web execute an event ( in this case press a button to extend a table and scrape the content.
This is my code:
def exec_func():
wait = WebDriverWait(driver, 20)
wait.until(expected_conditions.visibility_of_element_located(
(By.CSS_SELECTOR, "div.section-box.kurser-table-container")))
element = driver.find_element_by_xpath("//a[#ng-click='listLengthMax = 3000']")
actions = ActionChains(driver)
actions.move_to_element(element).perform()
element.click()
return driver.page_source
However an ElementClickInterceptedError arised ! This seems to happen because some other element obscures the element i am looking forward to click.
This is the web I am interested in
How could I scrape the content of the table avoiding the issue with this blocking

You have two options
Option 1: scroll into view the element
def exec_func():
wait = WebDriverWait(driver, 20)
wait.until(expected_conditions.visibility_of_element_located(
(By.CSS_SELECTOR, "div.section-box.kurser-table-container")))
element = driver.find_element_by_xpath("//a[#ng-click='listLengthMax = 3000']")
element.location_once_scrolled_into_view #scroll into view the element first
actions = ActionChains(driver)
actions.move_to_element(element).perform()
element.click()
return driver.page_source
Option 2: Accept the cookie
def exec_func():
wait = WebDriverWait(driver, 20)
#accept the cookie
wait.until(expected_conditions.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"[id^='sp_message_iframe_']")))
wait.until(expected_conditions.element_to_be_clickable((By.XPATH,"//button[#title='Accept']"))).click()
driver.switch_to.default_content()
#=================================
wait.until(expected_conditions.visibility_of_element_located(
(By.CSS_SELECTOR, "div.section-box.kurser-table-container")))
element = driver.find_element_by_xpath("//a[#ng-click='listLengthMax = 3000']")
actions = ActionChains(driver)
actions.move_to_element(element).perform()
element.click()
return driver.page_source

This code is working fine for me.
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
def exec_func():
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.implicitly_wait(10)
driver.set_page_load_timeout(10)
driver.maximize_window()
driver.get("https://www.telegraph.co.uk/markets-hub/assets/shares/")
wait = WebDriverWait(driver, 20)
wait.until(expected_conditions.visibility_of_element_located(
(By.CSS_SELECTOR, "div.section-box.kurser-table-container")))
element = driver.find_element_by_xpath("//a[#ng-click='listLengthMax = 3000']")
actions = ActionChains(driver)
actions.move_to_element(element).perform()
element.click()
return driver.page_source
print(exec_func())

Seleniumbase TimeoutException only when using chromium from a script

I have a url login script that was working fine for a few days but then quit, that used the chromium webdriver
driver = webdriver.Chrome('/snap/bin/chromium.chromedriver', options=option)
I can access the URL using Firefox or Chromium without my script (just normal). Or Firefox in a script.
But using Chromium in a script, it clicks the login button and just hangs, that eventually leads to a timeout
selenium.common.exceptions.TimeoutException: Message:
If I open a new tab and try to login without the script but manually, it still hangs. A login is impossible (on my target site) within a launched browser from selenium, only.
#!/usr/bin/python3
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from seleniumbase import BaseCase
import time
import random
user_name = 'user'
password = 'password'
list = [
]
minptime = 4
maxptime = 24
list_length = len(list)
print('Array length ', list_length)
class MyMeClass(BaseCase):
def method_a():
option = webdriver.ChromeOptions()
option.add_argument('--disable-notifications')
option.add_argument("--mute-audio")
driver = webdriver.Chrome('/snap/bin/chromium.chromedriver', options=option)
driver.get("https://me.com/")
print(driver.title)
element = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button.login-btn.btn-shadow#login-fake-btn[data-testid='login-fake-btn']"))).click()
driver.find_element_by_xpath('/html/body/div[1]/div/div[1]/div/form/input[1]').send_keys(user_name)
driver.find_element_by_xpath('/html/body/div[1]/div/div[1]/div/form/input[2]').send_keys(password)
time.sleep(5)
driver.find_element_by_xpath('/html/body/div[1]/div/div[1]/div/form/button').click() #button click in question
time.sleep(8)
driver.get(url)
print(driver.current_url)
return driver
driver = MyMeClass.method_a()
button I am accessing
How do I use/unblock the use of this login button in chromium in a script?

Try below code:
with contains
wait = WebDriverWait(driver, 30)
wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Log in')]"))).click()
Class Name
wait = WebDriverWait(driver, 30)
wait.until(EC.element_to_be_clickable((By.XPATH, "//button[#class='btn-login btn-shadow']"))).click()
Note : please add below imports to your solution
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
Working Solution:
driver.get(" your url ")
wait = WebDriverWait(driver,30)
element = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[#id='login-fake-btn']")))
print element.text
element.click()
wait = WebDriverWait(driver,30)
element = wait.until(EC.element_to_be_clickable((By.XPATH, "//input[#id='email']"))).send_keys("Test")
wait = WebDriverWait(driver,30)
element = wait.until(EC.element_to_be_clickable((By.XPATH, "//input[#id='password']"))).send_keys("Test")
element1 = wait.until(EC.presence_of_element_located((By.XPATH, "//div[#id='login-overlay']//div//form//button")))
element1.click()
output :

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to scrape Next button on Linkedin with Selenium using Python? - python

Related

ElementClickInterceptedException Error Selenium

Selenium - bypass ads Google_Vignette

Unable to click button - Python / Selenium & bs4

Scraping a web content executing an event but unexpected click event appeared

Seleniumbase TimeoutException only when using chromium from a script

Categories

Resources