Selenium: Selecting from Multiple Drop-Downs at Once - python

I am building a web scraper that has to try a combination of multiple drop-down menu options and gather data from each combination.
So basically there're 5 drop-downs. I have to gather data from all of the possible combinations of the drop-down options. For each combination, I have to press a button to pull up the page with all the data on it. I am storing all the data in a dictionary.
This is the website: http://siops.datasus.gov.br/filtro_rel_ges_covid_municipal.php?S=1&UF=12;&Municipio=120001;&Ano=2020&Periodo=20
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
# General Stuff about the website
path = '/Users/admin/desktop/projects/scraper/chromedriver'
options = Options()
options.headless = True
options.add_argument("--window-size=1920,1200")
driver = webdriver.Chrome(options=options, executable_path=path)
website = 'http://siops.datasus.gov.br/filtro_rel_ges_covid_municipal.php'
driver.get(website)
# Initial Test: printing the title
print(driver.title)
print()
# Dictionary to Store stuff in
totals = {}
# Drop Down Menus
year_select = Select(driver.find_element(By.XPATH, '//*[#id="cmbAno"]'))
uf_select = Select(driver.find_element(By.XPATH, '//*[#id="cmbUF"]'))
### THIS IS WHERE THE ERROR IS OCCURING ###
# Choose from the drop down menus
uf_select.select_by_value('29')
year_select.select_by_value('2020')
# Submit button on the page
submit_button = driver.find_element(By.XPATH, '//*[#id="container"]/div[2]/form/div[2]/div/input[2]')
submit_button.click()
# Pulling data from the webpage
nameof = driver.find_element(By.XPATH, '//*[#id="arearelatorio"]/div[1]/div/table[1]/tbody/tr[2]').text
total_balance = driver.find_element(By.XPATH, '//*[#id="arearelatorio"]/div[1]/div/table[3]/tbody/tr[9]/td[2]').text
paid_expenses = driver.find_element(By.XPATH, '//*[#id="arearelatorio"]/div[1]/div/table[4]/tbody/tr[11]/td[4]').text
# Update Dictionary with the new info
totals.update({nameof: [total_balance, paid_expenses]})
totals.update({'this is a test': ['testing stuff']})
# Print the final Dictionary and quit
print(totals)
driver.quit()
For some reason, this code does not work when trying 1 possible combination (selecting value 29 from the UF drop-down, as well as value 2020 from the year_select drop-down). If I comment out of the two drop-down selections, then it works perfectly fine.
How do I try multiple combinations of drop-down options during a single iteration?

try this instead.
# Drop Down Menus
### THIS IS WHERE THE ERROR IS OCCURING ###
# Choose from the drop down menus
uf_select = Select(driver.find_element(By.XPATH, '//*[#id="cmbUF"]'))
uf_select.select_by_value('29')
year_select = Select(driver.find_element(By.XPATH, '//*[#id="cmbAno"]'))
year_select.select_by_value('2020')
This works for me. With your example i get a stale... error, means that the element disappears. HavenĀ“t checked, but maybe the checkbox is somehow updated and looses reference when selecting the other one.

Related

Selenium scraping Issues with site having an popup window with endless scroll

I am trying to scrape a website that populates a list of providers. the site makes you go through a list of options and then finally it populates a list of providers through a pop up that has an endless/continuous scroll.
i have tried:
from selenium.webdriver.common.action_chains import ActionChains
element = driver.find_element_by_id("my-id")
actions = ActionChains(driver)
actions.move_to_element(element).perform()
but this code didn't work.
I tried something similar to this:
driver.execute_script("arguments[0].scrollIntoView();", list )
but this didnt move anything. it just stayed on the first 20 providers.
i tried this alternative:
main = driver.find_element_by_id('mainDiv')
recentList = main.find_elements_by_class_name('nameBold')
for list in recentList :
driver.execute_script("arguments[0].scrollIntoView(true);", list)
time.sleep(20)
but ended up with this error message:
selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
The code that worked the best was this one:
while True:
# Scroll down to bottom
element_inside_popup = driver.find_element_by_xpath('//*[#id="mainDiv"]')
element_inside_popup.send_keys(Keys.END)
# Wait to load page
time.sleep(3)
but this is an endless scroll that i dont know how to stop since "while True:" will always be true.
Any help with this would be great and thanks in advance.
This is my code so far:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import Select
import pandas as pd
PATH = '/Users/AnthemScraper/venv/chromedriver'
driver = webdriver.Chrome(PATH)
#location for the website
driver.get('https://shop.anthem.com/sales/eox/abc/ca/en/shop/plans/medical/snq?execution=e1s13')
print(driver.title)
#entering the zipcode
search = driver.find_element_by_id('demographics.zip5')
search.send_keys(90210)
#making the scraper sleep for 5 seconds while the page loads
time.sleep(5)
#entering first name and DOB then hitting next
search = driver.find_element_by_id('demographics.applicants0.firstName')
search.send_keys('juelz')
search = driver.find_element_by_id('demographics.applicants0.dob')
search.send_keys('01011990')
driver.find_element_by_xpath('//*[#id="button/shop/getaquote/next"]').click()
#hitting the next button
driver.find_element_by_xpath('//*[#id="hypertext/shop/estimatesavings/skipthisstep"]').click()
#making the scraper sleep for 2 seconds while the page loads
time.sleep(2)
#clicking the no option to view all the health plans
driver.find_element_by_xpath('//*[#id="radioNoID"]').click()
driver.find_element_by_xpath('/html/body/div[4]/div[11]/div/button[2]/span').click()
#making the scraper sleep for 2 seconds while the page loads
time.sleep(2)
driver.find_element_by_xpath('//*[#id="hypertext/shop/medical/showmemydoctorlink"]/span').click()
time.sleep(2)
#section to choose the specialist. here we are choosing all
find_specialist=\
driver.find_element_by_xpath('//*[#id="specializedin"]')
#this is the method for a dropdown
select_provider = Select(find_specialist)
select_provider.select_by_visible_text('All Specialties')
#choosing the distance. Here we click on 50 miles
choose_mile_radius=\
driver.find_element_by_xpath('//*[#id="distanceInMiles"]')
select_provider = Select(choose_mile_radius)
select_provider.select_by_visible_text('50 miles')
driver.find_element_by_xpath('/html/body/div[4]/div[11]/div/button[2]/span').click()
#handling the endless scroll
while True:
time.sleep(20)
# Scroll down to bottom
element_inside_popup = driver.find_element_by_xpath('//*[#id="mainDiv"]')
element_inside_popup.send_keys(Keys.END)
# Wait to load page
time.sleep(3)
#block below allows us to grab the majority of the data. we would have to split it up in pandas since this info
#is nested in with classes
time.sleep(5)
main = driver.find_element_by_id('mainDiv')
sections = main.find_elements_by_class_name('firstRow')
pcp_info = []
#print(section.text)
for pcp in sections:
#the site stores the information inside inner classes which make it difficult to scrape.
#the solution would be to pull the entire text in the block and hope to clean it aftewards
#innerText allows to pull just the text inside the blocks
first_blox = pcp.find_element_by_class_name('table_content_colone').get_attribute('innerText')
second_blox = pcp.find_element_by_class_name('table_content_coltwo').get_attribute('innerText')
#creating columns and rows and assigning them
pcp_items = {
'first_block' : [first_blox],
'second_block' : [second_blox]
}
pcp_info.append(pcp_items)
df = pd.DataFrame(pcp_info)
print(df)
df.to_csv('yerp.csv',index=False)
#driver.quit()

Selenium - frame issue with google forms

Would appreciate a help with selenium.
Trying to fill in google form for several entries, so that I need to input the feirst row a df, than click "Submit" a new form and run again for the second form and to the n-th row.
Got stuck with NoSuchFrameException: Unable to locate frame with index error after the first entry. Read on Selenium docs that one can locate a window's frame in console and it gives me nothing (F12 --> find frame (any combination tried) --> no matches). No such thing in google form (or my search is wrong hands down)
Haven't got anything on the issue so tried frame(0) - no luck.
Any tips would be appreciated. The whole code is below
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
import time
import pandas as pd
options = Options()
options.binary_location = FirefoxBinary(r"C:\Program Files\Mozilla Firefox\firefox.exe")
driver = webdriver.Firefox(executable_path=r'C:\WebDriver\bin\geckodriver.exe', firefox_options=options)
driver.implicitly_wait(10)
reg = pd.read_csv(r'C:\Users\User\Desktop\Form.csv', header=0, delimiter=';', sep=r'\s*;\s*')
reg_2 = reg.values.tolist()
driver.get('https://docs.google.com/forms/d/e/1FAIpQLSd9FQ33H5SMHelf9O1jjHl7FtLTtaTdFuC4dUFv-educaFiJA/viewform?vc=0&c=0&w=1&flr=0&gxids=7628')
try:
for row in reg_2:
element_count = 0
for element in range(len(row)):
first = driver.find_element_by_xpath("/html/body/div/div[2]/form/div[2]/div/div[2]/div[2]/div/div/div[2]/div/div[1]/div/div[1]/input")
last = driver.find_element_by_xpath("/html/body/div/div[2]/form/div[2]/div/div[2]/div[1]/div/div/div[2]/div/div[1]/div/div[1]/input")
mail = driver.find_element_by_xpath("/html/body/div/div[2]/form/div[2]/div/div[2]/div[3]/div/div/div[2]/div/div[1]/div/div[1]/input")
last.send_keys(row[0])
first.send_keys(row[1])
mail.send_keys(row[2])
submit = driver.find_element_by_xpath('//*[#id="mG61Hd"]/div[2]/div/div[3]/div[1]/div/div/span/span')
submit.click()
time.sleep(3)
element_count +=1
driver.switch_to.frame(0)
driver.switch_to.default_content()
finally:
driver.quit()
driver.switch_to.frame(0)
driver.switch_to.default_content()
remove this two line of code that google form doesn't have any iframe in it
if you want to submit agian click the submit another response link:
driver.find_element_by_xpath('//a[contains(text(),"Submit another response")]').click()
Well, in fact after deleting the old form and starting anew the thing worked in the end. Had to change several other elements. Also added password and password_confirm field:
while len(reg_2) > element_count:
try:
for row in reg_2:
first = driver.find_element_by_xpath("/html/body/div/div[2]/form/div[2]/div/div[2]/div[2]/div/div/div[2]/div/div[1]/div/div[1]/input")
last = driver.find_element_by_xpath("/html/body/div/div[2]/form/div[2]/div/div[2]/div[1]/div/div/div[2]/div/div[1]/div/div[1]/input")
mail = driver.find_element_by_xpath("/html/body/div/div[2]/form/div[2]/div/div[2]/div[3]/div/div/div[2]/div/div[1]/div/div[1]/input")
password = driver.find_element_by_xpath('/html/body/div/div[2]/form/div[2]/div/div[2]/div[4]/div/div/div[2]/div/div[1]/div/div[1]/input')
password_confirm = driver.find_element_by_xpath('/html/body/div/div[2]/form/div[2]/div/div[2]/div[5]/div/div/div[2]/div/div[1]/div/div[1]/input')
last.send_keys(row[0])
first.send_keys(row[1])
mail.send_keys(row[2])
password.send_keys(row[3])
password_confirm.send_keys(row[3])
submit = driver.find_element_by_xpath('//*[#id="mG61Hd"]/div[2]/div/div[3]/div[1]/div/div/span/span')
submit.click()
time.sleep(3)
element_count +=1
#driver.find_element_by_xpath('/html/body/div[1]/div[2]/div[1]/div/div[4]/a').click()
driver.find_element_by_css_selector('.freebirdFormviewerViewResponseLinksContainer > a:nth-child(1)').click()
finally:
driver.quit()

Selenium selecting a dropdown for options

I have been trying for the past couple of days to select a dropdown and at least print out the options available, but I just cannot get it to work.
I am getting the this error when I run the module.
Traceback (most recent call last):
File "sel_test_elements2.py", line 20, in
print ([o.text for o in select_element.options])
AttributeError: 'FirefoxWebElement' object has no attribute 'options'
Currently my code looks like this.
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
# Define Global Variables
url = "https://games.pcaha.ca/teams/4329"
csv_file = "game_schedule_4329.csv"
games = []
# create a new Firefox session
driver = webdriver.Firefox()
driver.get(url)
driver.implicitly_wait(30)
# Locate the Sector and create a Select object
select_element = driver.find_element_by_css_selector(".team-filters")
# this will print out strings available for selection on select_element, used in visible text below
print ([o.text for o in select_element.options])```
The issue you face is the fact that this website is using react and doesn't use default Select and Options. They have a custom dropdown implemented, so the way to interact with it is the same as interaction with regular web elements, Select and Options won't work in this case.
I modified your code and it works for me in Chrome:
from selenium.webdriver import Chrome
from time import sleep
# Define Global Variables
url = "https://games.pcaha.ca/teams/4329"
csv_file = "game_schedule_4329.csv"
games = []
# create a new Chrome session
driver = Chrome()
driver.get(url)
driver.implicitly_wait(30)
sleep(3) # make sure svgs load before interaction
# Click on arrow down
arrow = driver.find_elements_by_css_selector(".team-filters svg")[1].click()
# Collect options
options = driver.find_elements_by_xpath("//div[contains(#id, 'react-select-2')]")
# Print text from options
print([o.text for o in options])
Note: when manually opening the dropdown in your browser and trying to use web inspector, it closes, so in order to get the html inside a dropdown, you can use something like:
dropdown = driver.find_element_by_css_selector("div.css-kj6f9i-menu")
dropdown_html = dropdown.get_attribute('innerHTML')
I hope it helped. Good luck!
I have used something similar in a small script i have written, may be it can give you an hint on how to go about
Approach 1 This is to select the last of the options available
Variable options in the code below gets be the option available for the dropdown
select_datebox = driver.find_element_by_id('jrnyDateSrchTxt') # Drop down selection, you have to change the id appropriately
select_datebox.click()
time.sleep(2)
options = select_datebox.find_elements_by_tag_name('option')
options[len(options)-1].click() #selecting the last option
Approach 1 entering the option via a variable
select = Select(driver.find_element_by_id("jrnyDateSrchTxt")) # Drop down selection, you have to change the id appropriately
time.sleep(1)
select.select_by_value(datadate) # Date selection
time.sleep(2)

Selenium - View results from form submission

I'm trying to fill out a form using Selenium and view the results. After the results are returned, I'll perform additional actions.
I am able to fill in my search terms into the field I want to, but when I try to click the "filter" button on the page, nothing happens.
Based on what I've read, it looks like I have to pass the __VIEWSTATE parameter into the next page request, but I can't figure out how to pass this parameter using Selenium.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome()
driver.get("http://mywebsite.com/FilterForm.aspx")
# Enter search term
elem = driver.find_element_by_xpath('//*[#id="SearchTerm"]')
elem.send_keys("Sample Search")
# Extract __VIEWSTATE value
elem = driver.find_element_by_xpath('//*[#id="__VIEWSTATE"]')
viewstate = elem.get_attribute("value"))
# Filter Results
elem = driver.find_element_by_xpath('//*[#id="FilterResults"]')
elem.submit()

Python Selenium Screen Scrape

I am trying to screen scrape a website (snippet below)
The website takes an input, navigates to a second page and takes more inputs and finally displays a table. I fail at this step:
driver.find_element_by_xpath("//select[#id='agencies']/option[#value='13156']").click()
The error I get is:
selenium.common.exceptions.NoSuchElementException: Message: 'Unable to locate element:
Which is strange because I do see the element (Commented out Display id). Any help/pointers, please?
(I tried requests/RoboBrowser -- can't seem to get the post to work but failed there as well)
from selenium import webdriver
from selenium import selenium
from bs4 import BeautifulSoup
driver = webdriver.Firefox()
url = 'http://www.ucrdatatool.gov/Search/Crime/Local/OneYearofData.cfm'
driver.get(url)
driver.find_element_by_xpath("//select[#id='state']/option[#value='1']").click()
#driver.find_element_by_xpath("//select[#id='groups']/option[#value='8']").click()
driver.find_element_by_xpath("//input[#type='submit' and #value='Next']").click()
driver.implicitly_wait(5) # seconds
# Display id tags
#elementsAll = driver.find_elements_by_xpath('//*[#id]')
#for elements in elementsAll:
# print("id: ", repr(elements))
# print("idName: ",elements.get_attribute("id"))
# driver.implicitly_wait(5) # seconds
driver.find_element_by_xpath("//select[#id='groups']/option[#value='2']").click()
driver.find_element_by_xpath("//select[#id='year']/option[#value=1986]").click()
driver.find_element_by_xpath("//select[#id='agencies']/option[#value='13156']").click()
Update -- the below works on Selenium. I intended to choose all options in the list box and save the query results...Thanks for the pointer, Alecxe!
select = Select(driver.find_element_by_id('agencies'))
for options in select.options:
select.select_by_visible_text(options.text)
select = Select(driver.find_element_by_id('groups'))
for options in select.options:
select.select_by_visible_text(options.text)
driver.find_element_by_xpath("//select[#id='year']/option[#value=1985]").click()
driver.find_element_by_xpath("//input[#type='submit' and #value='Get Table']").click()
There is no option with 13156 value in select with agencies id. There are values from 102 to 522, you can see them by printing:
[element.get_attribute('value') for element in driver.find_elements_by_xpath('//select[#id="agencies"]/option')]
Also, instead of finding options by value, use Select and get options by text:
from selenium.webdriver.support.ui import Select
select = Select(driver.find_element_by_id('agencies'))
print select.options
select.select_by_visible_text('Selma Police Dept')

Categories

Resources