I am building a web scraper that has to try a combination of multiple drop-down menu options and gather data from each combination.
So basically there're 5 drop-downs. I have to gather data from all of the possible combinations of the drop-down options. For each combination, I have to press a button to pull up the page with all the data on it. I am storing all the data in a dictionary.
This is the website: http://siops.datasus.gov.br/filtro_rel_ges_covid_municipal.php?S=1&UF=12;&Municipio=120001;&Ano=2020&Periodo=20
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
# General Stuff about the website
path = '/Users/admin/desktop/projects/scraper/chromedriver'
options = Options()
options.headless = True
options.add_argument("--window-size=1920,1200")
driver = webdriver.Chrome(options=options, executable_path=path)
website = 'http://siops.datasus.gov.br/filtro_rel_ges_covid_municipal.php'
driver.get(website)
# Initial Test: printing the title
print(driver.title)
print()
# Dictionary to Store stuff in
totals = {}
# Drop Down Menus
year_select = Select(driver.find_element(By.XPATH, '//*[#id="cmbAno"]'))
uf_select = Select(driver.find_element(By.XPATH, '//*[#id="cmbUF"]'))
### THIS IS WHERE THE ERROR IS OCCURING ###
# Choose from the drop down menus
uf_select.select_by_value('29')
year_select.select_by_value('2020')
# Submit button on the page
submit_button = driver.find_element(By.XPATH, '//*[#id="container"]/div[2]/form/div[2]/div/input[2]')
submit_button.click()
# Pulling data from the webpage
nameof = driver.find_element(By.XPATH, '//*[#id="arearelatorio"]/div[1]/div/table[1]/tbody/tr[2]').text
total_balance = driver.find_element(By.XPATH, '//*[#id="arearelatorio"]/div[1]/div/table[3]/tbody/tr[9]/td[2]').text
paid_expenses = driver.find_element(By.XPATH, '//*[#id="arearelatorio"]/div[1]/div/table[4]/tbody/tr[11]/td[4]').text
# Update Dictionary with the new info
totals.update({nameof: [total_balance, paid_expenses]})
totals.update({'this is a test': ['testing stuff']})
# Print the final Dictionary and quit
print(totals)
driver.quit()
For some reason, this code does not work when trying 1 possible combination (selecting value 29 from the UF drop-down, as well as value 2020 from the year_select drop-down). If I comment out of the two drop-down selections, then it works perfectly fine.
How do I try multiple combinations of drop-down options during a single iteration?
try this instead.
# Drop Down Menus
### THIS IS WHERE THE ERROR IS OCCURING ###
# Choose from the drop down menus
uf_select = Select(driver.find_element(By.XPATH, '//*[#id="cmbUF"]'))
uf_select.select_by_value('29')
year_select = Select(driver.find_element(By.XPATH, '//*[#id="cmbAno"]'))
year_select.select_by_value('2020')
This works for me. With your example i get a stale... error, means that the element disappears. HavenĀ“t checked, but maybe the checkbox is somehow updated and looses reference when selecting the other one.
Related
I am trying to scrape a website that populates a list of providers. the site makes you go through a list of options and then finally it populates a list of providers through a pop up that has an endless/continuous scroll.
i have tried:
from selenium.webdriver.common.action_chains import ActionChains
element = driver.find_element_by_id("my-id")
actions = ActionChains(driver)
actions.move_to_element(element).perform()
but this code didn't work.
I tried something similar to this:
driver.execute_script("arguments[0].scrollIntoView();", list )
but this didnt move anything. it just stayed on the first 20 providers.
i tried this alternative:
main = driver.find_element_by_id('mainDiv')
recentList = main.find_elements_by_class_name('nameBold')
for list in recentList :
driver.execute_script("arguments[0].scrollIntoView(true);", list)
time.sleep(20)
but ended up with this error message:
selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
The code that worked the best was this one:
while True:
# Scroll down to bottom
element_inside_popup = driver.find_element_by_xpath('//*[#id="mainDiv"]')
element_inside_popup.send_keys(Keys.END)
# Wait to load page
time.sleep(3)
but this is an endless scroll that i dont know how to stop since "while True:" will always be true.
Any help with this would be great and thanks in advance.
This is my code so far:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import Select
import pandas as pd
PATH = '/Users/AnthemScraper/venv/chromedriver'
driver = webdriver.Chrome(PATH)
#location for the website
driver.get('https://shop.anthem.com/sales/eox/abc/ca/en/shop/plans/medical/snq?execution=e1s13')
print(driver.title)
#entering the zipcode
search = driver.find_element_by_id('demographics.zip5')
search.send_keys(90210)
#making the scraper sleep for 5 seconds while the page loads
time.sleep(5)
#entering first name and DOB then hitting next
search = driver.find_element_by_id('demographics.applicants0.firstName')
search.send_keys('juelz')
search = driver.find_element_by_id('demographics.applicants0.dob')
search.send_keys('01011990')
driver.find_element_by_xpath('//*[#id="button/shop/getaquote/next"]').click()
#hitting the next button
driver.find_element_by_xpath('//*[#id="hypertext/shop/estimatesavings/skipthisstep"]').click()
#making the scraper sleep for 2 seconds while the page loads
time.sleep(2)
#clicking the no option to view all the health plans
driver.find_element_by_xpath('//*[#id="radioNoID"]').click()
driver.find_element_by_xpath('/html/body/div[4]/div[11]/div/button[2]/span').click()
#making the scraper sleep for 2 seconds while the page loads
time.sleep(2)
driver.find_element_by_xpath('//*[#id="hypertext/shop/medical/showmemydoctorlink"]/span').click()
time.sleep(2)
#section to choose the specialist. here we are choosing all
find_specialist=\
driver.find_element_by_xpath('//*[#id="specializedin"]')
#this is the method for a dropdown
select_provider = Select(find_specialist)
select_provider.select_by_visible_text('All Specialties')
#choosing the distance. Here we click on 50 miles
choose_mile_radius=\
driver.find_element_by_xpath('//*[#id="distanceInMiles"]')
select_provider = Select(choose_mile_radius)
select_provider.select_by_visible_text('50 miles')
driver.find_element_by_xpath('/html/body/div[4]/div[11]/div/button[2]/span').click()
#handling the endless scroll
while True:
time.sleep(20)
# Scroll down to bottom
element_inside_popup = driver.find_element_by_xpath('//*[#id="mainDiv"]')
element_inside_popup.send_keys(Keys.END)
# Wait to load page
time.sleep(3)
#block below allows us to grab the majority of the data. we would have to split it up in pandas since this info
#is nested in with classes
time.sleep(5)
main = driver.find_element_by_id('mainDiv')
sections = main.find_elements_by_class_name('firstRow')
pcp_info = []
#print(section.text)
for pcp in sections:
#the site stores the information inside inner classes which make it difficult to scrape.
#the solution would be to pull the entire text in the block and hope to clean it aftewards
#innerText allows to pull just the text inside the blocks
first_blox = pcp.find_element_by_class_name('table_content_colone').get_attribute('innerText')
second_blox = pcp.find_element_by_class_name('table_content_coltwo').get_attribute('innerText')
#creating columns and rows and assigning them
pcp_items = {
'first_block' : [first_blox],
'second_block' : [second_blox]
}
pcp_info.append(pcp_items)
df = pd.DataFrame(pcp_info)
print(df)
df.to_csv('yerp.csv',index=False)
#driver.quit()
Would appreciate a help with selenium.
Trying to fill in google form for several entries, so that I need to input the feirst row a df, than click "Submit" a new form and run again for the second form and to the n-th row.
Got stuck with NoSuchFrameException: Unable to locate frame with index error after the first entry. Read on Selenium docs that one can locate a window's frame in console and it gives me nothing (F12 --> find frame (any combination tried) --> no matches). No such thing in google form (or my search is wrong hands down)
Haven't got anything on the issue so tried frame(0) - no luck.
Any tips would be appreciated. The whole code is below
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
import time
import pandas as pd
options = Options()
options.binary_location = FirefoxBinary(r"C:\Program Files\Mozilla Firefox\firefox.exe")
driver = webdriver.Firefox(executable_path=r'C:\WebDriver\bin\geckodriver.exe', firefox_options=options)
driver.implicitly_wait(10)
reg = pd.read_csv(r'C:\Users\User\Desktop\Form.csv', header=0, delimiter=';', sep=r'\s*;\s*')
reg_2 = reg.values.tolist()
driver.get('https://docs.google.com/forms/d/e/1FAIpQLSd9FQ33H5SMHelf9O1jjHl7FtLTtaTdFuC4dUFv-educaFiJA/viewform?vc=0&c=0&w=1&flr=0&gxids=7628')
try:
for row in reg_2:
element_count = 0
for element in range(len(row)):
first = driver.find_element_by_xpath("/html/body/div/div[2]/form/div[2]/div/div[2]/div[2]/div/div/div[2]/div/div[1]/div/div[1]/input")
last = driver.find_element_by_xpath("/html/body/div/div[2]/form/div[2]/div/div[2]/div[1]/div/div/div[2]/div/div[1]/div/div[1]/input")
mail = driver.find_element_by_xpath("/html/body/div/div[2]/form/div[2]/div/div[2]/div[3]/div/div/div[2]/div/div[1]/div/div[1]/input")
last.send_keys(row[0])
first.send_keys(row[1])
mail.send_keys(row[2])
submit = driver.find_element_by_xpath('//*[#id="mG61Hd"]/div[2]/div/div[3]/div[1]/div/div/span/span')
submit.click()
time.sleep(3)
element_count +=1
driver.switch_to.frame(0)
driver.switch_to.default_content()
finally:
driver.quit()
driver.switch_to.frame(0)
driver.switch_to.default_content()
remove this two line of code that google form doesn't have any iframe in it
if you want to submit agian click the submit another response link:
driver.find_element_by_xpath('//a[contains(text(),"Submit another response")]').click()
Well, in fact after deleting the old form and starting anew the thing worked in the end. Had to change several other elements. Also added password and password_confirm field:
while len(reg_2) > element_count:
try:
for row in reg_2:
first = driver.find_element_by_xpath("/html/body/div/div[2]/form/div[2]/div/div[2]/div[2]/div/div/div[2]/div/div[1]/div/div[1]/input")
last = driver.find_element_by_xpath("/html/body/div/div[2]/form/div[2]/div/div[2]/div[1]/div/div/div[2]/div/div[1]/div/div[1]/input")
mail = driver.find_element_by_xpath("/html/body/div/div[2]/form/div[2]/div/div[2]/div[3]/div/div/div[2]/div/div[1]/div/div[1]/input")
password = driver.find_element_by_xpath('/html/body/div/div[2]/form/div[2]/div/div[2]/div[4]/div/div/div[2]/div/div[1]/div/div[1]/input')
password_confirm = driver.find_element_by_xpath('/html/body/div/div[2]/form/div[2]/div/div[2]/div[5]/div/div/div[2]/div/div[1]/div/div[1]/input')
last.send_keys(row[0])
first.send_keys(row[1])
mail.send_keys(row[2])
password.send_keys(row[3])
password_confirm.send_keys(row[3])
submit = driver.find_element_by_xpath('//*[#id="mG61Hd"]/div[2]/div/div[3]/div[1]/div/div/span/span')
submit.click()
time.sleep(3)
element_count +=1
#driver.find_element_by_xpath('/html/body/div[1]/div[2]/div[1]/div/div[4]/a').click()
driver.find_element_by_css_selector('.freebirdFormviewerViewResponseLinksContainer > a:nth-child(1)').click()
finally:
driver.quit()
I have been trying for the past couple of days to select a dropdown and at least print out the options available, but I just cannot get it to work.
I am getting the this error when I run the module.
Traceback (most recent call last):
File "sel_test_elements2.py", line 20, in
print ([o.text for o in select_element.options])
AttributeError: 'FirefoxWebElement' object has no attribute 'options'
Currently my code looks like this.
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
# Define Global Variables
url = "https://games.pcaha.ca/teams/4329"
csv_file = "game_schedule_4329.csv"
games = []
# create a new Firefox session
driver = webdriver.Firefox()
driver.get(url)
driver.implicitly_wait(30)
# Locate the Sector and create a Select object
select_element = driver.find_element_by_css_selector(".team-filters")
# this will print out strings available for selection on select_element, used in visible text below
print ([o.text for o in select_element.options])```
The issue you face is the fact that this website is using react and doesn't use default Select and Options. They have a custom dropdown implemented, so the way to interact with it is the same as interaction with regular web elements, Select and Options won't work in this case.
I modified your code and it works for me in Chrome:
from selenium.webdriver import Chrome
from time import sleep
# Define Global Variables
url = "https://games.pcaha.ca/teams/4329"
csv_file = "game_schedule_4329.csv"
games = []
# create a new Chrome session
driver = Chrome()
driver.get(url)
driver.implicitly_wait(30)
sleep(3) # make sure svgs load before interaction
# Click on arrow down
arrow = driver.find_elements_by_css_selector(".team-filters svg")[1].click()
# Collect options
options = driver.find_elements_by_xpath("//div[contains(#id, 'react-select-2')]")
# Print text from options
print([o.text for o in options])
Note: when manually opening the dropdown in your browser and trying to use web inspector, it closes, so in order to get the html inside a dropdown, you can use something like:
dropdown = driver.find_element_by_css_selector("div.css-kj6f9i-menu")
dropdown_html = dropdown.get_attribute('innerHTML')
I hope it helped. Good luck!
I have used something similar in a small script i have written, may be it can give you an hint on how to go about
Approach 1 This is to select the last of the options available
Variable options in the code below gets be the option available for the dropdown
select_datebox = driver.find_element_by_id('jrnyDateSrchTxt') # Drop down selection, you have to change the id appropriately
select_datebox.click()
time.sleep(2)
options = select_datebox.find_elements_by_tag_name('option')
options[len(options)-1].click() #selecting the last option
Approach 1 entering the option via a variable
select = Select(driver.find_element_by_id("jrnyDateSrchTxt")) # Drop down selection, you have to change the id appropriately
time.sleep(1)
select.select_by_value(datadate) # Date selection
time.sleep(2)
I'm trying to fill out a form using Selenium and view the results. After the results are returned, I'll perform additional actions.
I am able to fill in my search terms into the field I want to, but when I try to click the "filter" button on the page, nothing happens.
Based on what I've read, it looks like I have to pass the __VIEWSTATE parameter into the next page request, but I can't figure out how to pass this parameter using Selenium.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome()
driver.get("http://mywebsite.com/FilterForm.aspx")
# Enter search term
elem = driver.find_element_by_xpath('//*[#id="SearchTerm"]')
elem.send_keys("Sample Search")
# Extract __VIEWSTATE value
elem = driver.find_element_by_xpath('//*[#id="__VIEWSTATE"]')
viewstate = elem.get_attribute("value"))
# Filter Results
elem = driver.find_element_by_xpath('//*[#id="FilterResults"]')
elem.submit()
I am trying to screen scrape a website (snippet below)
The website takes an input, navigates to a second page and takes more inputs and finally displays a table. I fail at this step:
driver.find_element_by_xpath("//select[#id='agencies']/option[#value='13156']").click()
The error I get is:
selenium.common.exceptions.NoSuchElementException: Message: 'Unable to locate element:
Which is strange because I do see the element (Commented out Display id). Any help/pointers, please?
(I tried requests/RoboBrowser -- can't seem to get the post to work but failed there as well)
from selenium import webdriver
from selenium import selenium
from bs4 import BeautifulSoup
driver = webdriver.Firefox()
url = 'http://www.ucrdatatool.gov/Search/Crime/Local/OneYearofData.cfm'
driver.get(url)
driver.find_element_by_xpath("//select[#id='state']/option[#value='1']").click()
#driver.find_element_by_xpath("//select[#id='groups']/option[#value='8']").click()
driver.find_element_by_xpath("//input[#type='submit' and #value='Next']").click()
driver.implicitly_wait(5) # seconds
# Display id tags
#elementsAll = driver.find_elements_by_xpath('//*[#id]')
#for elements in elementsAll:
# print("id: ", repr(elements))
# print("idName: ",elements.get_attribute("id"))
# driver.implicitly_wait(5) # seconds
driver.find_element_by_xpath("//select[#id='groups']/option[#value='2']").click()
driver.find_element_by_xpath("//select[#id='year']/option[#value=1986]").click()
driver.find_element_by_xpath("//select[#id='agencies']/option[#value='13156']").click()
Update -- the below works on Selenium. I intended to choose all options in the list box and save the query results...Thanks for the pointer, Alecxe!
select = Select(driver.find_element_by_id('agencies'))
for options in select.options:
select.select_by_visible_text(options.text)
select = Select(driver.find_element_by_id('groups'))
for options in select.options:
select.select_by_visible_text(options.text)
driver.find_element_by_xpath("//select[#id='year']/option[#value=1985]").click()
driver.find_element_by_xpath("//input[#type='submit' and #value='Get Table']").click()
There is no option with 13156 value in select with agencies id. There are values from 102 to 522, you can see them by printing:
[element.get_attribute('value') for element in driver.find_elements_by_xpath('//select[#id="agencies"]/option')]
Also, instead of finding options by value, use Select and get options by text:
from selenium.webdriver.support.ui import Select
select = Select(driver.find_element_by_id('agencies'))
print select.options
select.select_by_visible_text('Selma Police Dept')