Clicking each link under a specific div - python selenium - python

I am trying to click each link in the ListNews div in the below website (chinalaborwatch).
I have done a bit of research and the following should have worked, but instead, it only clicks on one link and then it stops.
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome(executable_path=r"C:\webdrivers\chromedriver.exe")
driver.get("http://www.chinalaborwatch.org/news")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH,'/html/body/form/div[5]/div/div[2]'))).click()
What am I missing?
thanks!

You could get the url list firstly.Then visit them and scrape the data what you want:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
driver = webdriver.Chrome()
driver.get("http://www.chinalaborwatch.org/news")
element_list = driver.find_elements_by_css_selector('#form1 > div:nth-child(5) > div > div.ListNews > div')
url_list = [element.find_element_by_tag_name('a').get_attribute('href') for element in element_list] # get all the url
for i in url_list:
driver.get(i) # switch the url
# then it is your work,scrape the text you want.

Related

WebScraping with Selenium Python Question

I have this code :
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver import EdgeOptions
import os
os.environ['PATH'] += "C:\\Users\\czoca\\PycharmProjects\\pythonProject4\\chromedriver.exe"
driver = webdriver.Chrome()
driver.get("https://www.teintes.fr/")
driver.implicitly_wait(10)
myelement = driver.find_element(By.XPATH, "/html/body/div[6]/div[2]/div[1]/div[2]/div[2]/button[1]/p")
myelement = driver.find_element(By.NAME, "Carens III")
myelement1.click()
myelement.click()
Everything seems ok I am following some tutorials and documentation, for the XPATH and I tried other attributes..
But I have a pop up to consent to the web terms that i have to press Autorizate. But it wont click on it. Any ideia why?enter image description here
It is the button "Autoriser"
You can try this:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
url = 'https://www.teintes.fr/'
driver.get(url)
accept_cookies_button = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, "//*[text()='Autoriser']")))
accept_cookies_button.click()
Advices:
Use recognizables selectors
It is a good tip to wait for the element you will use

Unable to select drop down using Python Selenium

I am trying to scrape this website: http://sekolah.data.kemdikbud.go.id/
I want to select "Jenjang" field, "SMA" value. After that, need to click "Cari Sekolah" button
Unfortunately, my code does not work. I manage to select SMA but then can't click "Cari Sekolah" to start the query. Anyone knows how to fix this.
Here is my code:
from selenium import webdriver
from selenium.webdriver import Chrome
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
from selenium.webdriver.support.ui import Select
option = webdriver.ChromeOptions()
option.add_argument('--incognito')
webdriver = "/Users/rs26/Desktop/learnpython/web/chromedriver"
driver = Chrome(executable_path=webdriver, chrome_options=option)
url="http://sekolah.data.kemdikbud.go.id/"
driver.get(url)
wait = WebDriverWait(driver,15)
select_element = Select(driver.find_element_by_id("bentuk"))
select_element.select_by_value("SMA")
wait.until(EC.element_to_be_clickable((By.XPATH,"//button[text()='Cari Sekolah']"))).click()
Please find below solution to select from custom dropdown
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
# # Solution 1:
driver = webdriver.Chrome(executable_path=r"C:\New folder\chromedriver.exe")
driver.get('http://sekolah.data.kemdikbud.go.id/')
driver.maximize_window()
element = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "select2-bentuk-container")))
element.click()
list=WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//input[#class='select2-search__field']")))
list.send_keys("SMA")
select=WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "select2-results__option")))
select.click()
You can use form button[type=submit] css selector to click.
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,"form button[type=submit]"))).click()

How to click on onclick link with image? Python Selenium

I'm just learning how to webscrape dynamically using Selenium in Python. I'm currently trying to click on a link within the webpage to page forward over search results.
So far this is the code that I'm using:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome('C:\\Users\\km13\\chromedriver.exe')
driver.get("http://www.congreso.gob.pe/pley-2016-2021")
elem = driver.find_element_by_css_selector("img[src='/Sicr/TraDocEstProc/CLProLey2016.nsf/8eac1ef603908b5105256cdf006c41b1/$Body/0.AB2?OpenElement&FieldElemFormat=gif']")
elem.click()
This is the HTML that corresponds with the element I'd like to click on:
`<img src="/Sicr/TraDocEstProc/CLProLey2016.nsf/8eac1ef603908b5105256cdf006c41b1/$Body/0.AB2?OpenElement&FieldElemFormat=gif" width="81" height="16" border="0">`
From my somewhat limited knowledge of HTML this seems like the link is actually embedded in the gif which is why I tried to use the CSS selector that goes along with that image. But this did not work.
Any guidance would be greatly appreciated!
Update:
I changed my code by adding the following import
from selenium.webdriver.common.by import By
And I changed the following:
elem = driver.find_element(By.CSS_SELECTOR, "img[src='/Sicr/TraDocEstProc/CLProLey2016.nsf/8eac1ef603908b5105256cdf006c41b1/$Body/0.AB2?OpenElement&FieldElemFormat=gif']")
elem.click()
Now I get an error for "no such element."
There is an iframe.You need to switch to iframe first to access the element.Try below code.use WebDriverWait to handle dynamic element.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome('C:\\Users\\km13\\chromedriver.exe')
driver.get("http://www.congreso.gob.pe/pley-2016-2021")
WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.NAME, 'ventana02')))
elem = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
(By.XPATH, "//a[contains(#onclick,'A50')]/img[contains(#src,'Sicr/TraDocEstProc/CLProLey')]")))
elem.click()
EDITED
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome('C:\\Users\\km13\\chromedriver.exe')
driver.get("http://www.congreso.gob.pe/pley-2016-2021")
driver.switch_to.frame(0)
elem=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//a[contains(#onclick,'A50')]/img[contains(#src,'Sicr/TraDocEstProc/CLProLey')]")))
elem.click()

Selenium Python Script throws no element error exception even though the x path is right?

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
driver=webdriver.Chrome()
driver.get("https://paytm.com/")
driver.maximize_window()
driver.find_element_by_class_name("login").click()
driver.implicitly_wait(10)
driver.find_element_by_xpath("//md-input-container[#class='md-default-theme md-input-invalid']/input[#id='input_0']").send_keys("99991221212")
In the above code, I have verified the xpath using fire bug its highlighting the correct element. But when the script run its failing? Can you help me folks?
In selenium each frame is treated individually. Since the login is in a separate iframe element, you need to switch to it first using:
iframe = driver.find_elements_by_tag_name('iframe')[0]
driver.switch_to_frame(iframe)
Before trying to interact with it's elements.
Or in this case, you would wait for the frame to exist, and it would be:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
driver.get("https://paytm.com/")
driver.maximize_window()
wait = WebDriverWait(driver, 10)
wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "login"))).click()
wait.until(EC.frame_to_be_available_and_switch_to_it((By.TAG_NAME, "iframe")))
_input = wait.until(EC.visibility_of_element_located((By.ID,"input_0")))
_input.send_keys("99991221212")
You should try using WebDriverWait to wait until input element visible on the page as below :-
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
driver.get("https://paytm.com/")
driver.maximize_window()
wait = WebDriverWait(driver, 10)
wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "login"))).click()
#now switch to iframe first
wait.until(EC.frame_to_be_available_and_switch_to_it((By.TAG_NAME, "iframe")))
input = wait.until(EC.visibility_of_element_located((By.ID, "input_0")))
input.send_keys("99991221212")
Hope it helps...:)

Python, Selenium, and Beautiful Soup for URL

I am trying to write a script using Selenium to access pastebin do a search and print out in text the URL results. I need the visible URL results and nothing else.
<div class="gs-bidi-start-align gs-visibleUrl gs-visibleUrl-long" dir="ltr" style="word-break:break-all;">pastebin.com/VYQTSbzY</div>
Current script is:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
browser = webdriver.Firefox()
browser.get('http://www.pastebin.com')
search = browser.find_element_by_name('q')
search.send_keys("test")
search.send_keys(Keys.RETURN)
soup=BeautifulSoup(browser.page_source)
for link in soup.find_all('a'):
print link.get('href',None),link.get_text()
You don't actually need BeautifulSoup. selenium itself is very powerful at locating element:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
browser = webdriver.Firefox()
browser.get('http://www.pastebin.com')
search = browser.find_element_by_name('q')
search.send_keys("test")
search.send_keys(Keys.RETURN)
# wait for results to appear
wait = WebDriverWait(browser, 10)
results = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.gsc-resultsbox-visible")))
# grab results
for link in results.find_elements_by_css_selector("a.gs-title"):
print link.get_attribute("href")
browser.close()
Prints:
http://pastebin.com/VYQTSbzY
http://pastebin.com/VYQTSbzY
http://pastebin.com/VAAQCjkj
...
http://pastebin.com/fVUejyRK
http://pastebin.com/fVUejyRK
Note the use of an Explicit Wait which helps to wait for the search results to appear.

Categories

Resources