I installed geckodriver since selenium is no longer compatible with recent versions of Firefox. For that reason, I had to modify code that had worked for scraping a website. I am having trouble selecting items in a dropdown list. In the code below, everything works up until "browser.select"; I get the error: 'WebDriver' object has no attribute 'select'. I am using Spyder on a Mac.
import time
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
firefox_capabilities = DesiredCapabilities.FIREFOX
firefox_capabilities['marionette'] = True
firefox_capabilities['binary'] = '/Applications/anaconda/lib/python3.6/site-packages/selenium/webdriver/firefox'
browser = webdriver.Firefox(capabilities=firefox_capabilities)
browser.get("https://sonuc.ysk.gov.tr/module/GirisEkrani.jsf")
time.sleep(2)
browser.find_element_by_id('closeMessageButton').click()
browser.find_element_by_id('j_id112:secimSorgulamaForm:j_id115:secimSecmeTa ble:0:secimId').click()
browser.find_element_by_id('j_id112:secimSorgulamaForm:j_id142').click()
Loop through provinces
time.sleep(4)
il_sayisi = len(browser.find_element_by_id("j_id48:j_id49:j_id108:cmbSecimCevresi").find_elements_by_tag_name('option'))-1
for j in range(1,il_sayisi):
j = j +1
iller = browser.find_element_by_id("j_id48:j_id49:j_id108:cmbSecimCevresi").find_elements_by_tag_name('option')
browser.select_dropdown('j_id48:j_id49:j_id108:cmbSecimCevresi',iller[j].value)
time.sleep(2)
ilce_sayisi = len(browser.find_element_by_id("j_id48:j_id49:j_id120:cmbIlceSecimKurulu").find_elements_by_tag_name('option'))-1
for i in range(0,ilce_sayisi):
i = i + 1
ilceler = browser.find_element_by_id("j_id48:j_id49:j_id120:cmbIlceSecimKurulu").find_elements_by_tag_name('option')
browser.select('j_id48:j_id49:j_id120:cmbIlceSecimKurulu',ilceler[i].value)
time.sleep(5)
browser.find_element_by_id('j_id48:j_id49:j_id192').click()
time.sleep(5)
browser.find_element_by_id("j_id48:tabloBilgileriPanel:j_id440").click()
time.sleep(5)
browser.find_element_by_id("j_id1114:j_id1115:j_id1121").click()
time.sleep(7)
[EDIT: The last part of the code was edited to the following and now works]:
iller = browser.find_element_by_id("j_id48:j_id49:j_id108:cmbSecimCevresi")
iller_options = iller.find_elements_by_tag_name('option')
i_options = {option.text.strip(): option.get_attribute("value")
for option in iller_options if option.get_attribute("value").isdigit()}
for k in sorted(list(i_options.keys()))[4:81]:
# iller
iller = browser.find_element_by_id("j_id48:j_id49:j_id108:cmbSecimCevresi")
iller_options = iller.find_elements_by_tag_name('option')
i_options = {option.text.strip(): option.get_attribute("value")
for option in iller_options if option.get_attribute("value").isdigit()}
iller_select = Select(iller)
iller_select.select_by_value(i_options[k])
time.sleep(5)
You need to do something like below:
from selenium import webdriver
from selenium.webdriver.support.ui import Select
selectEle = driver.find_element_by_id('<id_of_select_control_containg_these_option>')
select = Select(selectEle )
# select by visible text
select.select_by_visible_text('ADANA')
# select by value
select.select_by_value('1')
so to use select method in selenium web diver first you need to create the object of select class. Then you can use below methods:
select_by_index(index)
select_by_value(value)
select_by_visible_text(text)
Let me know if it help
Related
My code :
from selenium.webdriver.common.by import By
from selenium import webdriver
import pandas as pd
url = 'https://www.tajeran-group.de/fahrzeuge/'
PATH = 'C:\\Users\\czoca\\AppData\\Roaming\\Microsoft\\Windows\\Start Menu\\Programs\\Python 3.6\\chromedriver.exe'
driver = webdriver.Chrome(PATH)
driver.get(url)
driver.maximize_window()# For maximizing window
driver.implicitly_wait(10)# gives an implicit wait for 20 seconds
dealers = driver.find_elements(By.XPATH, '/html/body/div[1]/div[4]/div/div[3]/div[1]/div/div[1]')
for n in dealers:
name = n.find_element(By.XPATH, "/html/body/div[1]/div[4]/div/div[3]/div[1]/div/div[1]/div[1]/h3/a")
km = n.find_element(By.XPATH, "/html/body/div[1]/div[4]/div/div[3]/div[1]/div/div[1]/div[2]/div/div[2]/div/div[1]/ul/li[1]/span")
firstreg = n.find_element(By.XPATH,"/html/body/div[1]/div[4]/div/div[3]/div[1]/div/div[1]/div[2]/div/div[2]/div/div[1]/ul/li[2]/span")
print(name.text,km.text,firstreg.text)
#print(email.text)
I tried adding s to "element" and did not work, tried just print(n.text) only gives me 1 result.. The website is : https://www.tajeran-group.de/fahrzeuge/ i want to get all info for each car that is just it.. any ideias? Thanks
Try with something like this:
dealers = driver.find_elements(By.XPATH, //div[#class='uk-card uk-card-small uk-box-shadow-small uk-card-default uk-margin-bottom']//div[#class='uk-card-header'])
or at least to match all elements not only the first one:
/html/body/div[1]/div[4]/div/div[3]/div[1]/div/div[*]
I am trying to build a web scraper that will go through a website's pages and download the excel files from a dropdown menu at the bottom of the page.
The webpages only allow me to download the 50 locations that are displayed on each page and I cannot download all of them at once.
I am able to download the first page's Excel file, but the following pages yield nothing else.
I get the following output after running the code I have provided below.
Skipped a page
No more pages.
If I exclude the lines where it asks to download the pages, it is able to go through each page until the end successfully.
I'll provide an example below for what I am trying to get accomplished.
I would appreciate any help and advice! Thank you!
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import Select
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
state = 'oklahoma'
rent_to_own = 'rent to own'
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.maximize_window()
driver.get('https://www.careeronestop.org/toolkit/jobs/find-businesses.aspx')
industry = driver.find_element(By.ID, "txtKeyword")
industry.send_keys(rent_to_own)
location = driver.find_element(By.ID, "txtLocation")
location.send_keys(state)
driver.find_element(By.ID, "btnSubmit").click()
driver.implicitly_wait(3)
def web_scrape():
more_drawer = driver.find_element(By.XPATH, "//div[#class='more-drawer']//a[#href='/toolkit/jobs/find-businesses.aspx?keyword="+rent_to_own+"&ajax=0&location="+state+"&lang=en&Desfillall=y#Des']")
more_drawer.click()
driver.implicitly_wait(5)
get_50 = Select(driver.find_element(By.ID, 'ViewPerPage'))
get_50.select_by_value('50')
driver.implicitly_wait(5)
filter_description = driver.find_element(By.XPATH, "//ul[#class='filters-list']//a[#href='/toolkit/jobs/find-businesses.aspx?keyword="+rent_to_own+"&ajax=0&location="+state+"&lang=en&Desfillall=y&pagesize=50¤tpage=1&descfilter=Furniture~B~Renting ~F~ Leasing']")
filter_description.click()
while True:
try:
download_excel = Select(driver.find_element(By.ID, 'ResultsDownload'))
download_excel.select_by_value('Excel')
driver.implicitly_wait(20)
first_50 = driver.find_element(By.XPATH, "//div[#id='relatedOccupations']//a[#onclick='hideMoreRelatedOccupations()']")
first_50.click()
driver.implicitly_wait(20)
next_page = driver.find_element(By.XPATH, "//div[#class='pagination-wrap']//div//a[#class='next-page']")
next_page.click()
driver.implicitly_wait(20)
print("Skipped a page.")
except:
print("No more pages.")
return
web_scrape()
Below is something that works. Again I would think the way I went about this could be improved. I stuck with Selenium but you really don't even need to open the webpage and can just webscrape using correct URL params with Beautiful Soup. Also the fastest way was probably not to write every item into excel one at a time but it works, better way is probably using pandas and then creating an excel workbook at the end. But anyway if you have any questions let me know.
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import Select
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
import openpyxl as xl
import os
import math
cwd = os.getcwd() #Or whatever dir you want
filename = '\test123.xlsx'
location = 'oklahoma'
keyword = 'rent to own'
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.maximize_window()
driver.get('https://www.careeronestop.org/toolkit/jobs/find-businesses.aspx?keyword=' + keyword + '&ajax=0&location=' + location + '&radius=50&pagesize=50¤tpage=1&lang=en')
driver.implicitly_wait(3)
wb = xl.Workbook()
ws = wb.worksheets[0]
#get number of pages
ret = driver.find_element(By.ID, 'recordNumber')
lp = math.ceil(float(ret.text)/50)
r = 1
for i in range(1, lp):
print(i)
driver.get('https://www.careeronestop.org/toolkit/jobs/find-businesses.aspx?keyword=' + keyword + '&ajax=0&location=' + location + '&radius=50&pagesize=50¤tpage=' + str(i) + '&lang=en')
table_id = driver.find_elements(By.CLASS_NAME, 'res-table')[0]
rows = table_id.find_elements(By.TAG_NAME, "tr")
for count, row in enumerate(rows, start=1):
if count >= 0:
cols = row.find_elements(By.TAG_NAME, "td")
refs = row.find_elements(By.TAG_NAME, "a")
for c, ref in enumerate(refs, start=1):
ws.cell(row=r, column=c).value = '=HYPERLINK("{}", "{}")'.format(ref.get_attribute("href"), ref.text)
for c, col in enumerate(cols, start=1):
if c > 1:
ws.cell(row=r, column=c).value = col.text
r += 1
wb.save(cwd + filename)
print('done')
This returns an excel file with 750+ rows of data with links included.
I am working on a little selenium project, but I got some issues.
So what I need to do is to click on a link to open it in a new tab, and whenever I have taken the information I need to close that tab and go into the next one. driver.close() does not work as it gives me the error: Message: no such window: target window already closed. So I intstead tried this (saw this while researching):
driver.find_element_by_tag_name('html').send_keys(Keys.CONTROL + 'w'), and I also tried with adding Keys.F4, but nothing worked.
It seems to work for other people, so why not for me?
Code:
def cpuFunc():
i = 0
print("Launching CPU")
cpu = webdriver.Chrome('chromedriver.exe',options=option)
cpu.get('https://www.komplett.se/category/11204/datorutrustning/datorkomponenter/processor')
cpu.find_element_by_xpath('/html/body/div[1]/div[2]/div[1]/div/div/div[2]/form/div/div[1]/button').click()
#while i < 10:
# cpu.find_element_by_tag_name('html').send_keys(Keys.END)
# i += 1
# time.sleep(0.5)
#print("At bottom: CPU")
cpu.find_element_by_tag_name('body').send_keys(Keys.CONTROL + Keys.HOME)
time.sleep(0.5)
link = cpu.find_element_by_xpath(f'/html/body/main/div/div[2]/div[5]/div[2]/form/div[1]/a')
ActionChains(cpu).key_down(Keys.CONTROL).click(link).key_up(Keys.CONTROL).perform()
time.sleep(1)
window = cpu.window_handles[-1]
cpu.switch_to.window(window)
title = cpu.find_element_by_xpath("/html/body/div[2]/main/div[2]/div[2]/div[3]/section/div/section/div[1]/h1/span").text
price = cpu.find_element_by_xpath("/html/body/div[2]/main/div[2]/div[2]/div[3]/section/div/section/div[3]/div[2]/div[1]/div/div/div[1]/div[1]/div[1]/span").text
btn = cpu.find_element_by_xpath('/html/body/div[2]/main/div[2]/div[2]/div[3]/div/div[2]/div/section[2]/button')
time.sleep(0.5)
cpu.execute_script("arguments[0].click();", btn)
core = cpu.find_element_by_xpath("/html/body/div[2]/main/div[2]/div[2]/div[3]/div/div[2]/div/section[2]/div/div/div/table[2]/tbody/tr[2]/td").text
thread = cpu.find_element_by_xpath("/html/body/div[2]/main/div[2]/div[2]/div[3]/div/div[2]/div/section[2]/div/div/div/table[2]/tbody/tr[3]/td").text
cache = cpu.find_element_by_xpath("/html/body/div[2]/main/div[2]/div[2]/div[3]/div/div[2]/div/section[2]/div/div/div/table[2]/tbody/tr[4]/td").text
clock = cpu.find_element_by_xpath("/html/body/div[2]/main/div[2]/div[2]/div[3]/div/div[2]/div/section[2]/div/div/div/table[2]/tbody/tr[7]/td").text
turbo = cpu.find_element_by_xpath("/html/body/div[2]/main/div[2]/div[2]/div[3]/div/div[2]/div/section[2]/div/div/div/table[2]/tbody/tr[8]/td").text
socket = cpu.find_element_by_xpath("/html/body/div[2]/main/div[2]/div[2]/div[3]/div/div[2]/div/section[2]/div/div/div/table[2]/tbody/tr[9]/td").text
wattage = cpu.find_element_by_xpath("/html/body/div[2]/main/div[2]/div[2]/div[3]/div/div[2]/div/section[2]/div/div/div/table[2]/tbody/tr[10]/td").text
cpu.find_element_by_tag_name('html').send_keys(Keys.CONTROL + 'w') # Here it shall close
time.sleep(60000)
enter code here
You can simply use ActionChains & Keys.
from selenium.webdriver import ActionChains
from selenium.webdriver.common.keys import Keys
If MacOs:
step_1 = ActionChains(cpu)
step_1.send_keys(Keys.COMMAND + 'w')
If Windows:
step_1 = ActionChains(cpu)
step_1.send_keys(Keys.CONTROL + 'w')
Hope it helps, if issues please comment.
Driver.close() worked for me after also fixed a lot of those full xpaths which are easily breakable and added webdriver waits for stability in finding elements.
wait = WebDriverWait(cpu, 10)
cpu.get('https://www.komplett.se/category/11204/datorutrustning/datorkomponenter/processor')
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[#class='btn-large primary'][#type='submit']"))).click()
link = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "form > div:nth-child(1) > a")))
ActionChains(cpu).key_down(Keys.CONTROL).click(link).key_up(Keys.CONTROL).perform()
time.sleep(1)
window = cpu.window_handles[-1]
cpu.switch_to.window(window)
title = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.product-main-info__info > h1 > span"))).text
price = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.product-price > span"))).text
print(title,price)
btn = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "section.product-section.technical-details.col-xs-12 > button")))
btn.click()
table2 = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "table:nth-child(2) > tbody")))
core = table2.find_element_by_xpath("./tr[2]/td").text
thread = table2.find_element_by_xpath("./tr[3]/td").text
cache = table2.find_element_by_xpath("./tr[4]/td").text
clock = table2.find_element_by_xpath("./tr[7]/td").text
turbo = table2.find_element_by_xpath("./tr[8]/td").text
socket = table2.find_element_by_xpath("./tr[9]/td").text
wattage = table2.find_element_by_xpath("./tr[10]/td").text
cpu.close()
print(core,thread,cache,clock,turbo,socket,wattage)
Import
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
Hello I'm trying to scrap some info from t he following page:
http://verify.sos.ga.gov/verification/
My code is the following:
import sys
reload(sys)
sys.setdefaultencoding('utf8')
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
import time
import csv
url = 'http://verify.sos.ga.gov/verification/'
def init_Selenium():
global driver
driver = webdriver.Chrome("/Users/rodrigopeniche/Downloads/chromedriver")
driver.get(url)
def select_profession():
select = Select(driver.find_element_by_name('t_web_lookup__profession_name'))
options = select.options
for index in range(1, len(options) - 1):
select = Select(driver.find_element_by_name('t_web_lookup__profession_name'))
select.select_by_index(index)
select_license_type()
def select_license_type():
select = Select(driver.find_element_by_name('t_web_lookup__license_type_name'))
options = select.options
for index in range(1, len(options) - 1):
select = Select(driver.find_element_by_name('t_web_lookup__license_type_name'))
select.select_by_index(index)
search_button = driver.find_element_by_id('sch_button')
driver.execute_script('arguments[0].click();', search_button)
scrap_licenses_results()
def scrap_licenses_results():
table_rows = driver.find_elements_by_tag_name('tr')
for index, row in enumerate(table_rows):
if index < 9:
continue
else:
attributes = row.find_elements_by_xpath('td')
try:
name = attributes[0].text
license_number = attributes[1].text
profession = attributes[2].text
license_type = attributes[3].text
status = attributes[4].text
address = attributes[5].text
license_details_page_link = attributes[0].find_element_by_id('datagrid_results__ctl3_name').get_attribute('href')
driver.get(license_details_page_link)
data_rows = driver.find_elements_by_class_name('rdata')
issued_date = data_rows[len(data_rows) - 3].text
expiration_date = data_rows[len(data_rows) - 2].text
last_renewal_day = data_rows[len(data_rows) - 1].text
print name, license_number, profession, license_type, status, address, issued_date, expiration_date, last_renewal_day
driver.back()
except:
pass
init_Selenium()
select_profession()
When I execute the script it works for the first iteration but fails in the second one. The exact place where the error is raised is in the scrap_licenses_results() function, in the attributes = row.find_elements_by_xpath('td') line.
Any help will be appreciated
The staleElementReferenceException is due to the list of rows gathered before loop iteration. Initially, You created a list of all rows ,named table_rows.
table_rows = driver.find_elements_by_tag_name('tr')
Now in loop, during first iteration, your first row element is fresh and can be found by the driver. At the end of first iteration, you are doing driver.back(), your page changes/refreshes HTML DOM . All the previously gathered references are lost now. All the rows in your table_rows list are now stale. Hence, in 2nd iteration you are facing such exception.
You have to move the find row operation in the loop, so that everytime a fresh reference is found on target application. The psuedocode shall do Something like this.
total_rows = driver.find_elements_by_tag_name('tr').length()
for i in total_rows
driver.find_element_by_xpath('//tr[i]')
.. further code..
I have written a script in python to extract and paste 400-500 lines of text from one browser to another. I am using send_keys() to put the text content into the text area. It is writing line by line (2 lines / second) which is resulting in a few minutes to complete the operation. Is there any other method in Selenium to write faster (like how we paste manually in 1 second)?
My code
<code>
import time
import re
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import ElementNotVisibleException
from selenium.webdriver.common.keys import Keys
def init_driver(uname,pwd):
driver = webdriver.Chrome()
driver.wait = WebDriverWait(driver, 5)
driver.get("https://ops.stg1.xxxxxyyyxxxx.com/login.jsp")
box = driver.wait.until(EC.presence_of_element_located((By.NAME, "j_username")))
box.send_keys(uname)
box = driver.wait.until(EC.presence_of_element_located((By.NAME, "j_password")))
box.send_keys(pwd)
button = driver.wait.until(EC.element_to_be_clickable((By.NAME, "login")))
button.click()
return driver
def copy():
with open("Tag_input.txt") as f:
for line in f:
url = line.strip()
driver.get(url)
k=re.findall('\=(\d+)',url)
print(k[0])
a=k[0]
driver.wait = WebDriverWait(driver, 10)
time.sleep(10)
PC = driver.find_elements_by_xpath("//textarea[#name='messagingMap.PRIMARY_CONTENT.message']")
PC.send_keys(Keys.CONTROL, "a")
PC.send_keys(Keys.CONTROL, "c")
print("Copied Primary content !!")
for tag in PC:
varPC = tag.text
url1 = "http://jona.ca/blog/unclosed-tag-finder"
driver.get(url1)
driver.wait = WebDriverWait(driver, 10)
time.sleep(10)
text_area = driver.find_element_by_id("unclosed-tag-finder-input")
text_area.send_keys(Keys.CONTROL, "v")
button = driver.find_element_by_xpath("//input[#value='Submit']")
button.click()
result = driver.find_element_by_xpath("//pre[#id='unclosed-tag-finder-results']")
res_list = list(result)
print(res_list)
op = result.text
print(op)
writer = csv.writer(open('Tag_OP.csv','a+'))
z = zip(k,result)
print(z)
writer.writerows(k)
writer.writerows(result)
k = k.pop()
print("List cleared",k[0])
driver.wait = WebDriverWait(driver, 10)
time.sleep(10)
return driver
if __name__ == "__main__":
driver = init_driver("abdul.salam#xxxyyxxx.com","xxyyxx")
copy()
time.sleep(25)
driver.quit()
</code>
You might try using Ctrl+A to select the text, Ctrl+C to copy it, move to new browser Ctrl+A to select all text in your target field (so that you'll replace it), Ctrl+V to paste. I could imagine that it may be faster, but I haven't done any benchmarking myself.
This question popped right up when I did a search. It has more details, but, for instance, your paste would look like this:
driver.find_element_by_id("unclosed-tag-finder-input").sendKeys(Keys.chord(Keys.CONTROL,"v"));