How to click and download with python selenium - python

I try to download CSV data from GoogleTrend by selenium(python).
In previous, I tried to print source page and extract data that I want later.
It worked for some period, but now it does not work.
I try to click download button to got CSV file but nothing happen.
Do you have any idea for this case?
I got button path from firebug+firepath (firefox plugin).
html/body/div[2]/div[2]/div/md-content/div/div/div[1]/trends-widget/ng-include/widget/div/div/div/widget-actions/div/button[1]
I try on chrome driver and firefox drive.
This code; put 1 (word)argument that you want to get trend of search.
import sys
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
def run_text_extract(search_word):
try:
print(search_word)
driver = webdriver.Firefox('/home/noah/Desktop/Google_Trend_downloader/chromedriver/geckodriver')
# driver = webdriver.Chrome('/home/noah/Desktop/Google_Trend_downloader/chromedriver')
driver.get("https://trends.google.com/trends/explore?date=all&geo=TH&q="+search_word)
driver.find_element_by_xpath('html/body/div[2]/div[2]/div/md-content/div/div/div[1]/trends-widget/ng-include/widget/div/div/div/widget-actions/div/button[1]').click()
try:
driver.manage().deleteAllCookies()
clear_cache(driver)
except TimeoutException as ex:
isrunning = 0
print("Exception has been thrown. " + str(ex))
print("Timeout line is", line ,".")
driver.close()
except Exception:
print ("Here 5")
pass
time.sleep(2)
driver.close()
print("======== END_OF_FILE ===============")
except:
pass
if name == 'main':
run_text_extract(sys.argv[1])
time.sleep(8)
# run_text_extract()

I have navigated to the link you have provided.
If you search for any term, you can see download csv button link will appear at the right side. But there will be 3 download csv buttton links with the same class or css selector are present. So you need to collect all the elements and loop through it so that you can click on specific element. In your case, I assume you want to click on first element. so below code should work. If you want 2nd or 3rd element to click change the index accordingly.
def run_text_extract(search_word):
from selenium import webdriver
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
import time
profile = webdriver.FirefoxProfile()
profile.set_preference("browser.download.folderList", 2)
profile.set_preference("browser.download.manager.showWhenStarting", False)
profile.set_preference("browser.download.dir", 'C:\\Python27')
profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv")
driver = webdriver.Firefox(firefox_profile=profile,executable_path=r'C:\\Python27\\geckodriver.exe')
driver.get("https://trends.google.com/trends/explore?date=all&geo=TH&q="+ search_word)
time.sleep(7)
lst = driver.find_elements_by_css_selector(".widget-actions-item.export")
lst[0].click()
run_text_extract("selenium")

Related

Find 'element by' not working in normal execution

I have the following code as part of a data download function in selenium / chrome driver
driver.find_element_by_class_name("mt-n1").click()
driver.implicitly_wait(5)
When I 'step through' the code it works fine, but in normal execution it fails to action this link on the web page, therefore does not open the next webpage as required by the function
I have inspected the web page and identified the xpath, so I amended the code as follows
driver.find_element_by_xpath("//div[#class = 'mt-n1 flex-auto']").click()
driver.implicitly_wait(5)
This produces the same result, i.e. only works if I step through the code one line at a time
Hopefully just something obvious I'm not seeing?
The full code is given below for ref.
# Download file from github function
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
workDir = r"/C:\Users\Oem\Desktop"
### Set the download directory
chromeOptions = webdriver.ChromeOptions()
chromeOptions.add_experimental_option("prefs", {"download.default_directory": workDir})
### Open a web-browser for automated control
driver = webdriver.Chrome(options=chromeOptions)
driver.get("https://github.com")
driver.fullscreen_window()
time.sleep(5)
assert "GitHub" in driver.title
elem = driver.find_element_by_class_name("header-search-input")
elem.clear()
elem.send_keys("acca-edx/python-practical-automation")
elem.send_keys(Keys.RETURN)
assert "No results found." not in driver.page_source
### Go to the first result.
# Find the results on the page using "mt-n1" and clicking on it.
# Wait for the page to complete.
driver.find_element_by_class_name("mt-n1").click()
driver.implicitly_wait(5)
driver.find_element_by_xpath("//a[#title='detection.xlsx']").click()
driver.implicitly_wait(5)
# Find the raw-url to download the file we are after.
driver.find_element_by_id('raw-url').click()
time.sleep(10)
driver.close()
if your code works while going step by step through, it might be that web page needs time to be rendered. try this, it will wait 50 seconds or until the element presence has been located.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException
driver = webdriver.Chrome(options=chromeOptions)
try:
element = WebDriverWait(driver, 50).until(EC.presence_of_element_located((By.CLASS_NAME, 'mt-n1')))
except TimeoutException as e:
# do something if element is not found

Getting Trouble in Dismissing Ads from Website using Selenium

I am trying to do web automation where I am using selenium library to moves towards one page for finding title of that page but when I am trying to click on find button suddenly ads pop up and it disturbs the flow and it will not allow the find button to click on it. Let me know that how can I close that ad so that I can move towards the next page and get the tile of that page.
Here is my code:
#Using Selenium to move towards the next pages by clicking on button
#Libs Included
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
#Path to Chrome Driver
path='chromedriver.exe'
driver=webdriver.Chrome(path)
#Main_Url Page
main_url='https://www.zameen.com/'
#Getting the MainPage
driver.get(main_url)
print(driver.title)
#Selecting the Drop Down Menu First
search=driver.find_element_by_class_name('eedc221b').click()
#How To Move to Specific Area using Finding Box To get All the List of Cities
list_of_cities=[]
Cities=driver.find_elements_by_class_name("d92d11c7")
#print(Cities)
for i in Cities:
city=i.text
list_of_cities.append(city)
#print("List of Cities are: \n",list_of_cities)
#Reach towards the first Location by sending the citname to the combobox and then hit enter
driver.find_element_by_css_selector("button[aria-label='"+Cities[0].text+"']").click()
time.sleep(3)
driver.find_element_by_css_selector("a[aria-label='Find button'][class='c3901770 _22dc5e0a']").click()
try:
WebDriverWait(driver,10).until(EC.presence_of_element_located((By.TAG_NAME,"html")))
print("Tilte of next Page is: {0}".format(driver.title))
time.sleep(5)
driver.quit()
finally:
driver.quit()
That add close button can be identified with the help of below css selector :
# Path to Chrome Driver
path = 'chromedriver.exe'
driver = webdriver.Chrome(path)
wait = WebDriverWait(driver, 10)
# Main_Url Page
main_url = 'https://www.zameen.com/'
driver.maximize_window()
# Getting the MainPage
driver.get(main_url)
try:
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "img.close_cross_big"))).click()
except:
print("could not click")
pass
Imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
and then you can continue with the rest of your code.

selecting from dropdown menu with selenium

Im trying to select a dropdown with selenium in python.
I cant get it working. I have tried "clicking" on different links via the xpath and it works. But I can't figure out the drop down menu.
This is the code I have tried using:
path = r"C:\Program Files\chromedriver_win32\chromedriver.exe"
driver = webdriver.Chrome(path)
driver.get("http://elpris.dk")
driver.find_element_by_xpath("""//*[#id="btnSelectProfile"]""").click()
The webpage is written using Angular JS, which loads data dynamically. So, use WebDriverWait so that the page gets loaded properly.
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
path = r"C:\Program Files\chromedriver_win32\chromedriver.exe"
driver = webdriver.Chrome(path)
driver.get("http://elpris.dk")
delay = 15
wait = WebDriverWait(driver, delay)
try:
elem = wait.until(
EC.element_to_be_clickable((By.ID, 'btnSelectProfile')))
elem.click()
except Exception as e:
print(e)
Another option is to add some sleep to wait for the data to get loaded properly like below:
import time
path = r"C:\Program Files\chromedriver_win32\chromedriver.exe"
driver = webdriver.Chrome(path)
driver.get("http://elpris.dk")
time.sleep(3)
driver.find_element_by_xpath("""//*[#id="btnSelectProfile"]""").click()
Then the click will work.

Script fails to keep clicking on load more button

I've written a script in Python in association with selenium to keep clicking on MORE button to load more items until there are no new items left to load from a webpage. However, my below script can click once on that MORE button available on the bottom of that page.
Link to that site
This is my try so far:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
link = "https://angel.co/companies?company_types[]=Startup&company_types[]=Private+Company&company_types[]=Mobile+App&locations[]=1688-United+States"
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 10)
driver.get(link)
while True:
for elems in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,".results .name a.startup-link"))):
print(elems.get_attribute("href"))
try:
loadmore = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR,"[class='more']")))
driver.execute_script("arguments[0].scrollIntoView();", loadmore)
loadmore.click()
except Exception:break
driver.quit()
How can I keep clicking on that MORE button until there are no such button left to click and parse the links as I've already tried using for loop.
I've managed to solve the problem pursuing sir Andersson's logic within my exising script. This is what the modified script look like.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
link = "https://angel.co/companies?company_types[]=Startup&company_types[]=Private+Company&company_types[]=Mobile+App&locations[]=1688-United+States"
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 10)
driver.get(link)
while True:
try:
loadmore = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR,"[class='more']")))
driver.execute_script("arguments[0].click();", loadmore)
wait.until(EC.staleness_of(loadmore))
except Exception:break
for elems in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,".results .name a.startup-link"))):
print(elems.get_attribute("href"))
driver.quit()
why not just?
while (driver.FindElements(By.ClassName("more")).Count > 0)
{
driver.FindElement(By.ClassName("more")).Click();
//Some delay to wait lazyload to complete
}
c# example. pretty sure that it can be done with python as well

Python selenium to select first element from the dropdown list

The below piece of code clicks the file menu on a page which contain excel worksheet.
from selenium import webdriver
driver = webdriver.PhantomJS()
driver.set_window_size(1120, 550)
driver.get(r"foo%20Data%20235.xlsx&DefaultItemOpen=3") # dummy link
driver.find_element_by_css_selector('#jewel-button-middle > span').click() # responsible for clicking the file menu
driver.quit()
And I don't know how to click the first option ie, Download a snapshot option from the popup menu. I can't able to inspect the elements of pop up or dropdown menu. I want the xlsx file to get downloaded.
The idea is to load the page with PhantomJS, wait for the contents of the workbook to load, get all the necessary parameters for the download file handler endpoint request which we can do with requests package.
Full working solution:
import json
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
WORKBOOK_TYPE = "PublishedItemsSnapshot"
driver = webdriver.PhantomJS()
driver.maximize_window()
driver.get('http://www.cbe.org.eg/en/EconomicResearch/Publications/_layouts/xlviewer.aspx?id=/MonthlyStatisticaclBulletinDL/External%20Sector%20Data%20235.xlsx&DefaultItemOpen=1#')
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.ID, "ctl00_PlaceHolderMain_m_excelWebRenderer_ewaCtl_rowHeadersDiv")))
# get workbook uri
hidden_input = wait.until(EC.presence_of_element_located((By.ID, "ctl00_PlaceHolderMain_m_excelWebRenderer_ewaCtl_m_workbookContextJson")))
workbook_uri = json.loads(hidden_input.get_attribute('value'))['EncryptedWorkbookUri']
# get session id
session_id = driver.find_element_by_id("ctl00_PlaceHolderMain_m_excelWebRenderer_ewaCtl_m_workbookId").get_attribute("value")
# get workbook filename
workbook_filename = driver.find_element_by_xpath("//h2[contains(#class, 's4-mini-header')]/span[contains(., '.xlsx')]").text
driver.close()
print("Downloading workbook '%s'..." % workbook_filename)
response = requests.get("http://www.cbe.org.eg/en/EconomicResearch/Publications/_layouts/XlFileHandler.aspx", params={
'id': workbook_uri,
'sessionId': session_id,
'workbookFileName': workbook_filename,
'workbookType': WORKBOOK_TYPE
})
with open(workbook_filename, 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
It easier to inspect such elements (closing dropdowns) using FireFox, open the developer tools and just stand on the element with the mouse cruiser after selecting the option from FireBug toolbar (marked in red square in the picture).
As for the question, the locator you are looking for is ('[id*="DownloadSnapshot"] > span')
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.PhantomJS()
driver.set_window_size(1120, 550)
driver.get(r"foo%20Data%20235.xlsx&DefaultItemOpen=3") # dummy link
wait = WebDriverWait(driver, 10)
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR, '[id*="loadingTitleText"]')))
driver.find_element_by_css_selector('#jewel-button-middle > span').click() # responsible for clicking the file menu
download = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '[id*="DownloadSnapshot"] > span')))
driver.get_screenshot_as_file('fileName')
download.click()
I observed the till the excel is completely loaded, File menu is not showing any options. So added wait till the excel book is loaded.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from time import sleep
from selenium.webdriver.common.action_chains import ActionChains
browser = webdriver.PhantomJS()
browser.maximize_window()
browser.get('http://www.cbe.org.eg/en/EconomicResearch/Publications/_layouts/xlviewer.aspx?id=/MonthlyStatisticaclBulletinDL/External%20Sector%20Data%20235.xlsx&DefaultItemOpen=1#')
wait = WebDriverWait(browser, 10)
element = wait.until(EC.visibility_of_element_located((By.XPATH, "//td[#data-range='B59']")))
element = wait.until(EC.element_to_be_clickable((By.ID, 'jewel-button-middle')))
element.click()
eleDownload = wait.until(EC.element_to_be_clickable((By.XPATH,"//span[text()='Download a Snapshot']")))
eleDownload.click()
sleep(5)
browser.quit()
find the element by id/tag, inspect options in a loop, select the one you want then do the click.

Categories

Resources