I need get data from website: https://www.eex.com/en/market-data/natural-gas/spot
But Changing the date in my script not working
I need get data from every available date, so i need change the date with Selenium
PLS Help. I'm new in Python
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup as bs
import pandas as pd
from selenium_stealth import stealth
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
url ="https://www.eex.com/en/market-data/natural-gas/spot"
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--headless")
chrome_options.add_argument("start-maximized")
browser = webdriver.Chrome(executable_path="chromedriver1/chromedriver", options=chrome_options)
browser.get("https://www.eex.com/en/market-data/natural-gas/spot")
time.sleep(10)
date_picker = WebDriverWait(browser, 20).until(EC.visibility_of_element_located((By.XPATH, '//*[#id="symbolheader_ngs"]/div/div/div/input')))
date_picker.send_keys("2023-01-23")
time.sleep(20)
page_source = browser.page_source
s = bs(page_source)
table = s.select('table')[1]
final_list = []
for row in table.select('tr'):
final_list.append([x.text for x in row.find_all(['td', 'th'])])
final_df = pd.DataFrame(final_list[2:], columns = final_list[:1])
final_df.columns = ['Spot', 'Last Price', 'Last Volume', 'End of Day Index', 'Volume Exchange','del']
df=final_df.drop('del',axis=1)
browser.quit()
df.to_excel('final_df.xlsx', index = False)
You need to clear the input -> enter date -> push enter. You also want to wait for the clickability not the visibility of the element. Lastly you need to pick a date that has actual data.
from selenium.webdriver.common.keys import Keys
date_picker = WebDriverWait(browser, 30).until(
EC.element_to_be_clickable((By.XPATH, """//*[#id="symbolheader_ngs"]//*/input"""))
)
date_picker.clear()
date_picker.send_keys("2023-01-20")
date_picker.send_keys(Keys.ENTER)
If you comment out chrome_options.add_argument("--headless") you will see.
Related
I need to improve this script to extract daily data from this site. However, I am not getting any data except for the "Spot" column!
Thanks for the help!
UPD. Now i can't change the date(
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup as bs
import pandas as pd
from selenium_stealth import stealth
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
url ="https://www.eex.com/en/market-data/natural-gas/spot"
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--headless")
chrome_options.add_argument("start-maximized")
browser = webdriver.Chrome(executable_path="chromedriver1/chromedriver", options=chrome_options)
browser.get("https://www.eex.com/en/market-data/natural-gas/spot")
time.sleep(10)
date_picker = WebDriverWait(browser, 20).until(EC.visibility_of_element_located((By.XPATH, '//*[#id="symbolheader_ngs"]/div/div/div/input')))
date_picker.send_keys("2023-01-23")
time.sleep(20)
page_source = browser.page_source
s = bs(page_source)
table = s.select('table')[1]
final_list = []
for row in table.select('tr'):
final_list.append([x.text for x in row.find_all(['td', 'th'])])
final_df = pd.DataFrame(final_list[2:], columns = final_list[:1])
final_df.columns = ['Spot', 'Last Price', 'Last Volume', 'End of Day Index', 'Volume Exchange','del']
df=final_df.drop('del',axis=1)
browser.quit()
df.to_excel('final_df.xlsx', index = False)
little tweaks so that all columns can be extracted. main idea is that extract logic need to be checked with how HTML dom is.
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup as bs
import pandas as pd
def get_df(page_source):
soup = bs(page_source, 'html.parser')
table = soup.select('table')[1]
table_header=table.find("tr", {"class": "mv-quote-header-row"})
table_body=table.select('tbody')
result={}
for e_header in table_header.find_all('th'):
if e_header.text:
result[e_header.text]=[]
for e_r in table_body[0].find_all('tr'):
r1=[e.text for e in e_r.find_all('td',{'class':not ['mv-quote-button']})]
result['Spot'].append(r1[0])
result['Last Price'].append(r1[1])
result['Last Volume'].append(r1[2])
result['End of Day Index'].append(r1[3])
result['Volume Exchange'].append(r1[4])
#result
df=pd.DataFrame(result)
return df
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
#chrome_options.add_argument("--headless")
chrome_options.add_argument("start-maximized")
webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
#webdriver_service = Service()
browser = webdriver.Chrome(service=webdriver_service, options=chrome_options)
browser.get("https://www.eex.com/en/market-data/natural-gas/spot")
#soup = BeautifulSoup(browser.page_source, 'html5lib')
page_source=browser.page_source
#table = soup.select('table')[1]
final_df=get_df(browser.page_source)
browser.quit()
final_df.to_excel('final_df.xlsx', index = False)
I'm trying to automate a data pull from the NOAA website (I was using requests, but there seems to be a bug, so I'm trying to use Selenium to automate the pulling of reports).
from selenium import webdriver
from selenium.webdriver.support.ui import Select, WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver = webdriver.Chrome()
driver.get('https://www.ncdc.noaa.gov/cdo-web/search')
# Select type of data
t = Select(driver.find_element(By.CSS_SELECTOR, '#selectedDataset'))
t.select_by_visible_text('Daily Summaries')
The basic drop-down (data type, code above) and input field (search term) are fairly straightforward, no problems. Where I'm struggling is with the date range picker. I can get it to open into the calendar with this:
driver.find_element(By.CSS_SELECTOR, '#dateRangeContainer').click()
I can't for the life of me get it to open the year or month drop-downs. I've tried Select, ActionChains, and a number of other things, such as:
driver.find_element(By.CSS_SELECTOR, '#dateRangeContainer > div > div > div.noaa-datepicker-start-container.center.clearfix')
Select(driver.find_element(By.CSS_SELECTOR, '#dp1662812794185 > div > div'))
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#select.ui-datepicker-year")))
start_field = driver.find_element(By.CSS_SELECTOR, '#dp1662493859959 > div > div > div > select.ui-datepicker-year')
ActionChains(driver).move_to_element(start_field).click().send_keys('2020').perform()
I've also Googled extensively and haven't found anything that seems to work. Generally what happens is that it gives me a NoSuchElementException. I've actually gone line by line through the inspect pane to copy each of the elements in desperation to see if any would work (none did). Here are some other things I've tried:
TimeOutException when use link_text with explicit wait in selenium webdriver with python
Select DropDown value using Python Selenium
https://selenium-python.readthedocs.io/locating-elements.html#locating-elements
UnexpectedTagNameException: Message: Select only works on <select> elements, not on <li>error selecting li element from a Dropdown using Selenium
https://www.swtestacademy.com/datepicker-using-selenium/
https://www.selenium.dev/selenium/docs/api/py/webdriver_support/selenium.webdriver.support.expected_conditions.html
How to select a specific date from a calender, using python-selenium?
Is the problem that Selenium is still looking for the selector on the background page and hasn't switched to the pop-up? It doesn't appear to be an iFrame (I think), but if that's the case, how do I direct Selenium to look at the pop-up instead?
REVISED CODE (Thanks to Barry's solution below):
from selenium import webdriver
from selenium.common.exceptions import NoSuchShadowRootException, NoSuchElementException
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select, WebDriverWait
chrome_options = Options()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('disable-notifications')
chrome_options.add_argument('window-size=1280,720')
webdriver_service = Service('C:/Program Files (x86)/Google/chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
actions = ActionChains(driver)
wait = WebDriverWait(driver, 10)
driver.get('https://www.ncdc.noaa.gov/cdo-web/search')
start_date = '2020-01-01'
end_date = '2020-12-31'
station = 'USW00014739'
# Select type of data
t = Select(driver.find_element(By.CSS_SELECTOR, '#selectedDataset'))
t.select_by_visible_text('Daily Summaries')
# Select date range
dataset = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "select[id='selectedDataset']"))))
dataset.select_by_index(3)
daterange = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[class="noaa-daterange-input"]')))
daterange.click()
months = {'01': 'Jan', '02': 'Feb', '03': 'Mar', '04': 'Apr', '05': 'May', '06': 'Jun', '07': 'Jul', '08': 'Aug', '09': 'Sep', '10': 'Oct', '11': 'Nov', '12': 'Dec'}
def select_date(calendar, date):
container = '.noaa-datepicker-start-container' if calendar == 'start' else '.noaa-datepicker-end-container'
day_select = date[8:]
day_select = day_select[1:2] if day_select[0] == '0' else day_select
parent = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, container)))
year = Select(parent.find_element(By.CSS_SELECTOR, 'select[data-handler="selectYear"]'))
year.select_by_visible_text(date[:4] )
month = Select(parent.find_element(By.CSS_SELECTOR, 'select[data-handler="selectMonth"]'))
month.select_by_visible_text(months.get(date[5:7]))
day = parent.find_element(By.XPATH, f'//a[text() = "{day_select}" ]')
day.click()
if int(end_date[8:]) > int(start_date[8:]):
select_date('end', end_date)
select_date('start', start_date)
else:
select_date('start', start_date)
select_date('end', end_date)
driver.find_element(By.CSS_SELECTOR, '#noaa-daterange-form > button.noaa-daterange-btn.noaa-daterange-applybtn').click()
driver.find_element(By.CSS_SELECTOR, '#selectedSearchString').send_keys(station)
driver.find_element(By.CSS_SELECTOR, '#searchSubmit').click()
driver.close()
This is one way of selecting the date on that page (I'm selecting just the start date, you can mirror the code for the end date as well):
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchShadowRootException, NoSuchElementException
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument('disable-notifications')
chrome_options.add_argument("window-size=1280,720")
webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
browser = webdriver.Chrome(service=webdriver_service, options=chrome_options)
actions = ActionChains(browser)
wait = WebDriverWait(browser, 20)
url = 'https://www.ncdc.noaa.gov/cdo-web/search'
browser.get(url)
dataset = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "select[id='selectedDataset']"))))
dataset.select_by_index(3)
daterange = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[class="noaa-daterange-input"]')))
daterange.click()
start_date_parent = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".noaa-datepicker-start-container")))
start_year = Select(start_date_parent.find_element(By.CSS_SELECTOR, 'select[data-handler="selectYear"]'))
start_year.select_by_visible_text('2009')
print('selected 2009')
start_month = Select(start_date_parent.find_element(By.CSS_SELECTOR, 'select[data-handler="selectMonth"]'))
start_month.select_by_visible_text('Jul')
print('selected July')
start_day = start_date_parent.find_element(By.XPATH, '//a[text() = "13" ]')
start_day.click()
print('selected the 13th')
This will select 2009-07-13 as starting date, and also print in terminal:
selected 2009
selected July
selected the 13th
You should now be able to write the code for end date as well, select/input the info in 'Search for'/'Search term' and click Apply.
Selenium documentation: https://www.selenium.dev/documentation/
i try to scrape the contact data from companies from this website:
https://de.statista.com/companydb/suche?idCountry=276&idBranch=0&revenueFrom=-1000000000000000000&revenueTo=1000000000000000000&employeesFrom=0&employeesTo=100000000&sortMethod=revenueDesc&p=4
I can do this with the following Code:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
import pandas as pd
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
company_list= [] #create empty list
driver = webdriver.Chrome('/Users/rieder/Anaconda3/chromedriver_win32/chromedriver.exe') #define driver
driver.get('https://de.statista.com/companydb/suche?idCountry=276&idBranch=0&revenueFrom=-1000000000000000000&revenueTo=1000000000000000000&employeesFrom=0&employeesTo=100000000&sortMethod=revenueDesc&p=1') # open Website
driver.find_element_by_id("cookiesNotificationConfirm").click(); #accept cookies
driver.find_element_by_xpath("//*[#id='content']/section[3]/div/div/form/div/div[2]/div[2]/table/tr[2]/td[1]/a").click(); #click on the first company namelink
contact_data = WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "/html/body/div[3]/div[4]/section[6]/div/div[2]/div[2]/div/div"))) #get the contactdata from the company you chose before
for cn in contact_data:
company_list.append(cn.text) # this stores the text in the list
driver.back() #navigate to previous site
time.sleep(5) #wait for the pop-up window to appear
driver.find_element_by_xpath("/html/body/div[15]/div[3]/div[3]/div[1]/button[1]").click(), #deny the websites popup
time.sleep(5) #wait for the popup to vanish
driver.find_element_by_xpath("//*[#id='content']/section[3]/div/div/form/div/div[2]/div[2]/table/tr[3]/td[1]/a").click(); #click on the next company namelink
contact_data2 = WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "/html/body/div[3]/div[4]/section[6]/div/div[2]/div[2]/div/div"))) #get the contactdata from the company you chose before
for cn in contact_data2:
company_list.append(cn.text) # this stores the text in the list
print(company_list) #show the list
My Output is this:
['GUTex GmbH\nGerhard-Unland-Str. 1\n26683\nSaterland\nDeutschland', 'Robert Bosch GmbH\nRobert-Bosch-Platz 1\n70839\nGerlingen\nDeutschland']
Problem:
I want, that my code does this to the whole list on page 1 and then goes on on the next page and do it again. This shall go on until I have for example 100 adresses in the list. I would do this with a "while loop" but my xpaths for finding the adress are too specified, so it would always loop the same companies.
Thanks a lot inbefore
Try below code for one page data extract. Update the code for iterating over the next page records.
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
company_list= [] #create empty list
driver = webdriver.Chrome() #define driver
driver.get('https://de.statista.com/companydb/suche?idCountry=276&idBranch=0&revenueFrom=-1000000000000000000&revenueTo=1000000000000000000&employeesFrom=0&employeesTo=100000000&sortMethod=revenueDesc&p=1') # open Website
if len(driver.find_elements_by_id("cookiesNotificationConfirm")) > 0:
driver.find_element_by_id("cookiesNotificationConfirm").click(); # accept cookies
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//table[#class="zebraTable zebraTable--companies"]//td[1]')))
elementsSize = len(driver.find_elements_by_xpath('//table[#class="zebraTable zebraTable--companies"]//td[1]'))
# To iterate over the company list and click on the company name then capture the address on navigated page
# come back to previous page and repeat the same.
for i in range(elementsSize):
WebDriverWait(driver, 20).until(
EC.element_to_be_clickable((By.XPATH, '//table[#class="zebraTable zebraTable--companies"]//td[1]')))
elements = driver.find_elements_by_xpath('//table[#class="zebraTable zebraTable--companies"]//td[1]/a')
company_name = elements[i].text
elements[i].click() # click on the first company namelink
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH,
'//*[#id="contactInformation"]//div[#class="companyContactBox"]'))) # get the contactdata from the company you chose before
contact_data = driver.execute_script("return document.getElementsByClassName('companyContactBox')[0].innerText")
# print(contact_data)
company_list.append(company_name + " : " + contact_data)
driver.back() # navigate to previous site
print(company_list)
Thanks to Dilip Meghwals comment above i could finish my Code:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
company_list= [] #create empty list
count = 25
chrome_options = webdriver.ChromeOptions()
prefs = {"profile.default_content_setting_values.notifications" : 2}
chrome_options.add_experimental_option("prefs",prefs)
driver = webdriver.Chrome('/Users/rieder/Anaconda3/chromedriver_win32/chromedriver.exe', chrome_options=chrome_options) #define driver
driver.get('https://de.statista.com/companydb/suche?idCountry=276&idBranch=0&revenueFrom=-1000000000000000000&revenueTo=1000000000000000000&employeesFrom=0&employeesTo=100000000&sortMethod=revenueDesc&p=1') # open Website
if len(driver.find_elements_by_id("cookiesNotificationConfirm")) > 0:
driver.find_element_by_id("cookiesNotificationConfirm").click(); # accept cookies
while len(company_list) < 1000:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//table[#class="zebraTable zebraTable--companies"]//td[1]')))
elementsSize = len(driver.find_elements_by_xpath('//table[#class="zebraTable zebraTable--companies"]//td[1]'))
# To iterate over the company list and click on the company name then capture the address on navigated page
# come back to previous page and repeat the same.
for i in range(elementsSize):
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//table[#class="zebraTable zebraTable--companies"]//td[1]')))
elements = driver.find_elements_by_xpath('//table[#class="zebraTable zebraTable--companies"]//td[1]/a')
company_name = elements[i].text
elements[i].click() # click on the first company namelink
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH,'//*[#id="contactInformation"]//div[#class="companyContactBox"]'))) # get the contactdata from the company you chose before
contact_data = driver.execute_script("return document.getElementsByClassName('companyContactBox')[0].innerText")
# print(contact_data)
company_list.append(contact_data)
driver.back() # navigate to previous site
time.sleep(5)
driver.find_element_by_xpath("//*[#id='content']/section[3]/div/div/form/div/div[2]/div[2]/div[2]/div/button[2]").click();
company_list = [w.replace('\n', ', ') for w in company_list]
print(company_list)
df_company_name = pd.DataFrame(company_list, columns =['Name'])
df_company_name.to_excel("company_name.xlsx")
I have a problem in Selenium to scrape the data that I want by selecting the specific date from the date picker on the website. However, the code below I tried (e.g. I pick 11 April 2019) could only generate the data on the latest date (24 April 2019). How can I make it work to access the date that I want?
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.common.action_chains import ActionChains
options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome('chromedriver.exe', options=options)
driver.set_window_size(1024, 768)
actions = ActionChains(driver)
ticker = '00001'
y = '2019'
m = '4'
d = '11'
year_list = {'2018':'//*[#id="date-picker"]/div[1]/b[1]/ul/li[1]/button', '2019':'//*[#id="date-picker"]/div[1]/b[1]/ul/li[2]/button'}
month_list = {'1':'//*[#id="date-picker"]/div[1]/b[2]/ul/li[1]/button', '2':'//*[#id="date-picker"]/div[1]/b[2]/ul/li[2]/button', '3':'//*[#id="date-picker"]/div[1]/b[2]/ul/li[3]/button', '4':'//*[#id="date-picker"]/div[1]/b[2]/ul/li[4]/button', '5':'//*[#id="date-picker"]/div[1]/b[2]/ul/li[5]/button', '6':'//*[#id="date-picker"]/div[1]/b[2]/ul/li[6]/button', '7':'//*[#id="date-picker"]/div[1]/b[2]/ul/li[7]/button', '8':'//*[#id="date-picker"]/div[1]/b[2]/ul/li[8]/button', '9':'//*[#id="date-picker"]/div[1]/b[2]/ul/li[9]/button', '10':'//*[#id="date-picker"]/div[1]/b[2]/ul/li[10]/button', '11':'//*[#id="date-picker"]/div[1]/b[2]/ul/li[11]/button', '12':'//*[#id="date-picker"]/div[1]/b[2]/ul/li[12]/button'}
day_list = {'1':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[1]/button','2':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[2]/button','3':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[3]/button','4':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[4]/button','5':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[5]/button','6':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[6]/button','7':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[7]/button','8':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[8]/button','9':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[9]/button','10':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[10]/button','11':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[11]/button','12':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[12]/button','13':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[13]/button','14':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[14]/button','15':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[15]/button','16':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[16]/button','17':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[17]/button','18':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[18]/button','19':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[19]/button','20':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[20]/button','21':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[21]/button','22':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[22]/button','23':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[23]/button','24':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[24]/button','25':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[25]/button','26':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[26]/button','27':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[27]/button','28':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[28]/button','29':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[29]/button','30':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[30]/button','31':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[31]/button'}
year = year_list[y]
month = month_list[m]
day = day_list[d]
ccass = driver.get('http://www.hkexnews.hk/sdw/search/searchsdw_c.aspx')
popup_datepicker = driver.find_element_by_xpath('//*[#id="txtShareholdingDate"]').click()
actions.move_to_element(popup_datepicker).click()
ccass_year = driver.find_element_by_xpath(year)
actions.move_to_element(ccass_year)
actions.double_click(ccass_year)
time.sleep(0.5)
actions.move_to_element(popup_datepicker).click()
ccass_month = driver.find_element_by_xpath(month)
actions.move_to_element(ccass_month)
actions.double_click(ccass_month)
time.sleep(0.5)
actions.move_to_element(popup_datepicker).click()
ccass_day = driver.find_element_by_xpath(day)
actions.move_to_element(ccass_day)
actions.double_click(ccass_day)
time.sleep(0.5)
ccass_search = driver.find_element_by_xpath('//*[#id="txtStockCode"]').send_keys(ticker) #Keys.ENTER)
ccass_search_click = driver.find_element_by_xpath('//*[#id="btnSearch"]').click()
You could set the value using javascript.
Try this:
driver.execute_script("document.getElementById('txtShareholdingDate').value='2019/4/11'")
Hello guys I've been trying to web scrape some pages that contain values that change all the time, but I'm not able to get the prices so far. Can anybody help me, this is where I reached so far!
import requests
import bs4
from urllib.request import Request, urlopen as uReq
from bs4 import BeautifulSoup as soup
from selenium import webdriver
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
my_url = 'https://www.cryptocompare.com/'
binary = FirefoxBinary('C:/Program Files/Mozilla Firefox/firefox.exe')
options = Options()
options.set_headless(headless=True)
options.binary = binary
cap = DesiredCapabilities().FIREFOX
cap["marionette"] = True
driver = webdriver.Firefox(firefox_options=options, capabilities=cap, executable_path="C:/Users/Genti/AppData/Local/Programs/Python/Python36-32/Lib/site-packages/selenium/geckodriver.exe")
browser = webdriver.Firefox(firefox_binary=binary)
browser.get(my_url)
html = browser.execute_script("return document.documentElement.outerHTML")
sel_soup = soup(html, 'html.parser')
prices = sel_soup.findAll("td", {"class":"price"})
print(prices)
You can try below code to get currency names, prices
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import datetime
from selenium import webdriver
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
my_url = 'https://www.cryptocompare.com/'
binary = FirefoxBinary('C:/Program Files/Mozilla Firefox/firefox.exe')
options = Options()
options.set_headless(headless=True)
options.binary = binary
cap = DesiredCapabilities().FIREFOX
cap["marionette"] = True
driver = webdriver.Firefox(firefox_options=options, capabilities=cap, executable_path="C:/Users/Genti/AppData/Local/Programs/Python/Python36-32/Lib/site-packages/selenium/geckodriver.exe")
driver.get(my_url)
names = [name.text.split('\n')[0] for name in WebDriverWait(driver, 10).until(EC.visibility_of_all_elements_located((By.CLASS_NAME, 'desktop-name')))]
prices = [price.text for price in WebDriverWait(driver, 10).until(EC.visibility_of_all_elements_located((By.CLASS_NAME, 'current-price-value')))]
print(datetime.datetime.now())
for name, price in zip(names, prices):
print(name + " - " + price)
In case, if you want all the 10 prices. you'd have to store all the prices in a list, like this :
all_prices = driver.find_elements_by_css_selector("td[class='price'] div")
then just iterate through a loop to get the values :
for price in all_prices:
print(price.text)
let me know, if you are facing any difficulties.
If you want to use BS and not Selenium Webdriver:
prices = sel_soup.select("td[class^='price'] > div")