I am looking to scrape hashtags generated from a site using Selenium webdriver. Since the site is using Shadow Content (User Agent) I decided to just copy the hashtags using the button already in the site that copies them into my clipboard. However, I am failing to locate the <button>
This is the HTML
<button type="button" id="copyBtn" data-clipboard-target="#hashtag_textarea" class="btn btn-success">Copy to clipboard</button>
How is it that Selenium can't find the button. What am I doing wrong.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.wait import WebDriverWait as wait
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
browser = webdriver.Chrome('/Users/user/Documents/docs/chromedriver')
browser.get('https://seekmetrics.com/hashtag-generator')
delay = 15
print ("Headless Chrome Initialized")
print ("\n")
try:
element = wait(browser, delay).until(EC.element_to_be_clickable((By.CLASS_NAME, 'el-input__inner')))
element.click()
element.send_keys('love')
element.send_keys(Keys.ENTER)
wait(browser, delay)
browser.find_element_by_id('copyBtn').click()
print('Page is ready!')
# print(hashtags.text)
# print (browser.page_source)
except TimeoutException:
print("Loading took too much time!")
browser.quit()
You don't need to click the button just get textarea value but It need to wait until the textarea located.
element.send_keys('love')
element.send_keys(Keys.ENTER)
# wait until hashtags generated
hashtags = wait(browser, delay).until(EC.presence_of_element_located((By.ID, 'hashtag_textarea')))
print(hashtags.get_attribute('value'))
print('Page is ready!')
after inputing value in textbox page refreshed and it takes some amount of time during that time your code tries to click on button which is not clickable or say not loaded in DOM. instead doing that wait for button until it becomes clickable check following code sample:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.wait import WebDriverWait as wait
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
browser = webdriver.Chrome('C:\Python27\Scripts\chromedriver')
browser.get('https://seekmetrics.com/hashtag-generator')
delay = 15
print ("Headless Chrome Initialized")
print ("\n")
try:
element = wait(browser, delay).until(EC.element_to_be_clickable((By.CLASS_NAME, 'el-input__inner')))
element.click()
element.send_keys('love')
element.send_keys(Keys.ENTER)
wait(browser, delay)
button = wait(browser, delay).until(EC.element_to_be_clickable((By.XPATH, "//button[text()='Copy to clipboard']")))
button.click()
print('Page is ready!')
# print(hashtags.text)
# print (browser.page_source)
except TimeoutException:
print("Loading took too much time!")
browser.quit()
hope this helps you..
Related
I need to accept cookies on a specific website but I keep getting the NoSuchElementException. This is the code for entering the website:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
chrome_options = Options()
driver = webdriver.Chrome(executable_path='./chromedriver', options=chrome_options)
page_url = 'https://www.boerse.de/historische-kurse/Erdgaspreis/XD0002745517'
driver.get(page_url)
time.sleep(10)
I tried accepting the cookie button using the following:
driver.find_element_by_class_name('message-component message-button no-children focusable button global-font sp_choice_type_11 last-focusable-el').click()
driver.find_element_by_xpath('//*[#id="notice"]').click()
driver.find_element_by_xpath('/html/body/div/div[2]/div[4]/div/button').click()
I got the xpaths from copying the xpath and the full xpath from the element while using google chrome.
I am a beginner when it comes to selenium, just wanted to use it for a short workaround. Would appreciate some help.
The button Zustimmen is in iframe so first you'd have to switch to the respective iframe and then you can interact with that button.
Code:
driver.maximize_window()
page_url = 'https://www.boerse.de/historische-kurse/Erdgaspreis/XD0002745517'
driver.get(page_url)
wait = WebDriverWait(driver, 30)
try:
wait.until(EC.frame_to_be_available_and_switch_to_it((By.XPATH, "//iframe[starts-with(#id,'sp_message_iframe')]")))
wait.until(EC.element_to_be_clickable((By.XPATH, "//button[text()='Zustimmen']"))).click()
print('Clicked successfully')
except:
print('Could not click')
pass
Imports:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
I've got a similar problem with a page. I've tried to address the iframe and the "accept all" button with selenium (as suggested by #cruisebandey above).
However, the pop-up on this page seems to work differently:
https://www.kreiszeitung-wochenblatt.de/hanstedt/c-panorama/mega-faslams-umzug-in-hanstedt_a270327
This is what I've tried:
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver = webdriver.Chrome(executable_path="C:\\Users\\***\\chromedriver.exe")
driver.maximize_window()
try:
driver.get("https://www.kreiszeitung-wochenblatt.de/hanstedt/c-panorama/mega-faslams-umzug-in-hanstedt_a270327")
except:
print('Site not found')
wait = WebDriverWait(driver,10)
try:
wait.until(EC.frame_to_be_available_and_switch_to_it((By.XPATH,'/html/body/iframe')))
except:
print('paywall-layover not found')
try:
cookie = wait.until(EC.element_to_be_clickable((By.XPATH,'//*[#id="consentDialog"]/div[3]/div/div[2]/div/div[2]/div/div[1]/div[2]/div')))
cookie.click()
except:
print('Button to accept all not found')
im trying to print all links but i have an error ( ut is not clickable at point (781,748) because another element obscures it
the code updated:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import time
#driver = webdriver.Chrome(executable_path='chromedriver.exe')
driver = webdriver.Firefox(executable_path='geckodriver')
wait = WebDriverWait(driver, 20)
actions = ActionChains(driver)
driver.get("https://www.architectes-pour-tous.fr/")
driver.find_element_by_xpath("//button[contains(#class,'decline-button')]").click();
driver.find_element_by_xpath(".//a[#id='pager']").click();
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.image-projet img")))
time.sleep(1)
for img in driver.find_elements_by_css_selector('div.image-projet img'):
print(a.get_attribute('href'))
driver.find_elements_by_css_selector('button.agree-button').click()
pager = driver.find_element_by_xpath('//*[#id="pager"]')
actions.move_to_element(pager).build().perform()
time.sleep(0.5)
pager.click()
You have to accept/decline the cookies before accessing any element on the page.
driver.find_element_by_xpath("//button[contains(#class,'decline-button')]").click();
driver.find_element_by_xpath(".//a[#id='pager']").click();
The element you trying to access is initially out of the visible screen so you have to scroll to it before clicking it.
Also possibly you will have to close the accept cookies pop-up prior to clicking this element.
Also I'm quite sure you are getting no links with
for a in driver.find_elements_by_xpath('.//a'):
print(a.get_attribute('href'))
since you trying to do that before the page is loaded.
Also, if you are trying to get the search results links you have to use another locator.
So would suggest to change your code as following:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import time
#driver = webdriver.Chrome(executable_path='chromedriver.exe')
driver = webdriver.Firefox(executable_path='geckodriver')
wait = WebDriverWait(driver, 20)
actions = ActionChains(driver)
driver.get("https://www.architectes-pour-tous.fr/")
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.image-projet img")))
time.sleep(1)
for img in driver.find_elements_by_css_selector('div.image-projet img'):
print(a.get_attribute('href'))
driver.find_element_by_css_selector('button.agree-button').click()
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
pager = driver.find_element_by_xpath('//*[#id="pager"]')
actions.move_to_element(pager).perform()
time.sleep(0.5)
pager.click()
How do I click on multiple tags for yahoo search results using selenium. For example I search for states and multiple results show up. I want to click on each link>
heres what the xpath links look like
xpath
#//*[#id="yui_3_10_0_1_1607785449630_1057"]
#//*[#id="yui_3_10_0_1_1607785449630_1057"]
#//*[#id="yui_3_10_0_1_1607785449630_1189"]
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from time import sleep
from selenium.webdriver.firefox.options import Options
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
for i in range(1):
sleep(2)
driver = webdriver.Firefox()
driver.get("http://www.yahoo.com/")
search = driver.find_element_by_id("ybar-sbq")
my_choice = random.choice(["Atos stock"])
search.send_keys(my_choice)
sleep(2)
pg.press('enter')
sleep(4)
for i in range(1):
try:
#//*[#id="yui_3_10_0_1_1608697269369_685"]
sleep(2)
driver.find_elements_by_xpath('//*[#id^="yui_3"]')
#driver.find_elements_by_xpath("//*[#id^='yui_3']").click()
sleep(2)
driver.execute_script("window.scrollBy(0, 10)")
sleep(2)
driver.execute_script("window.scrollBy(0, 50)")
sleep(2)
driver.execute_script("window.scrollBy(0, 100)")
sleep(5)
#driver.back()
print('done')
sleep(2)
except:
print('error')
continue
driver.close()
driver.quit()
This should should get all elements with the starting of yui_3.
elems=driver.find_elements_by_css_selector("[id^='yui_3']")
print(len(elems))
Now what happens when you click on them may make the elements stale or not.
I am trying to simulate a click from this page (http://www.oddsportal.com/baseball/usa/mlb/results/) to the last page number found at the bottom. The click I use on the icon in my code seems to work, but I can't get it to scrape the actual page data I want to after simulating this click. Instead, it just scrapes the data from the first original url. Any help on this would be greatly appreciated.
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
url='http://www.oddsportal.com/baseball/usa/mlb/results/'
driver = webdriver.Chrome()
driver.get(url)
timeout=5
while True:
try:
element_present = EC.presence_of_element_located((By.LINK_TEXT, '»|'))
WebDriverWait(driver, timeout).until(element_present)
last_page_link = driver.find_element_by_link_text('»|')
last_page_link.click()
element_present2 = EC.presence_of_element_located((By.XPATH, ".//th[#class='first2 tl']"))
WebDriverWait(driver, timeout).until(element_present2)
content=driver.page_source
soup=BeautifulSoup(content,'lxml')
dates2 = soup.find_all('th',{'class':'first2'})
dates2 = [element.text for element in dates2]
dates2=dates2[1:]
driver.quit()
except TimeoutException:
print('Timeout Error!')
driver.quit()
continue
break
print(dates2)
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def wait(dr, x):
element = WebDriverWait(dr, 50).until(
EC.presence_of_all_elements_located((By.XPATH, x))
)
return element
from selenium import webdriver
browser = webdriver.Firefox()
browser.get("http://www.dinamalar.com/user_comments.asp? uid=14701&name=%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%E0%AE%9A%E0%AF%86%E0%AE%B2%E0%AF%8D%E0%AE%B5%E0%AE%A9%E0%AF%8D")
for elem in wait(browser, '//*[#id="commsec"]/div[2]/div[1]'):
print elem.text
This is the link i need to extract all the comments http://www.dinamalar.com/user_comments.asp?uid=14701&name=%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%E0%AE%9A%E0%AF%86%E0%AE%B2%E0%AF%8D%E0%AE%B5%E0%AE%A9%E0%AF%8D
But my code extracting only the first 10 comments. After clicking the button the other 10 comments are loaded dynamically. How to extract all these comments using python selenium
The idea would be to look for how many "more ideas" elements are present on the page. Every time you click the button and load more comments, one more "more ideas" red button becomes present. Implementation:
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
browser = webdriver.Firefox()
wait = WebDriverWait(browser, 10)
browser.get("http://www.dinamalar.com/user_comments.asp?uid=14701&name=%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%E0%AE%9A%E0%AF%86%E0%AE%B2%E0%AF%8D%E0%AE%B5%E0%AE%A9%E0%AF%8D")
# initial wait for the page to load
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, ".morered")))
pages = 1
while True:
browser.find_elements_by_css_selector(".morered")[-1].click()
# wait for more "load more" buttons to be present
try:
wait.until(lambda browser: len(browser.find_elements_by_css_selector(".morered")) > pages)
except TimeoutException:
break # no more data loaded, exit the loop
print("Comments loaded: %d" % len(browser.find_elements_by_css_selector(".dateg")))
pages += 1
browser.close()
Note that I've also removed that extra space inside the URL.