Can't click on a element - python

Code that i am using:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
wd = webdriver.Chrome()
url = 'https://www.dailyfx.com/economic-calendar#next-seven-days'
wd.get(url)
time.sleep(20)
try:
wd.find_element(By.XPATH, "/html/body/div[7]/div/div/button/img").click()
except:
print('No Calendar Advertisement')
try:
wd.find_element(By.XPATH,"/html/body/div[1]/div[2]/div/div/div[2]/button").click()
except:
print('No Cookies Button')
time.sleep(3)
try:
wd.find_element(By.XPATH,"/html/body/div[1]/div[1]/div/div/div[1]/span").click()
except:
print('No App Advertisement')
#Clear calendar filter
wd.find_element(By.XPATH,"/html/body/div[5]/div/div[4]/div[2]/div/div/div[1]/div/div/div[3]/div/div[1]/div[1]/div[2]").click()
wd.find_element(By.XPATH,"/html/body/div[5]/div/div[4]/div[2]/div/div/div[1]/div/div/div[3]/div/div[1]/div[2]/div[2]/div[2]/div[1]/div[1]/label").click()
wd.find_element(By.XPATH,"/html/body/div[5]/div/div[4]/div[2]/div/div/div[1]/div/div/div[3]/div/div[1]/div[2]/div[2]/div[2]/div[1]/div[2]/label").click()
wd.find_element(By.XPATH,"/html/body/div[5]/div/div[4]/div[2]/div/div/div[1]/div/div/div[3]/div/div[1]/div[2]/div[2]/div[2]/div[1]/div[3]/label").click()
wd.find_element(By.XPATH,"/html/body/div[5]/div/div[4]/div[2]/div/div/div[1]/div/div/div[3]/div/div[1]/div[2]/div[2]/div[2]/div[1]/div[4]/label").click()
wd.find_element(By.XPATH,"/html/body/div[5]/div/div[4]/div[2]/div/div/div[1]/div/div/div[3]/div/div[1]/div[2]/div[2]/div[2]/div[1]/div[5]/label").click()
#Selecting only United States
wd.find_element(By.XPATH,"/html/body/div[5]/div/div[4]/div[2]/div/div/div[1]/div/div/div[3]/div/div[1]/div[2]/div[2]/div[2]/div[1]/div[1]/div/span").click()
wd.find_element(By.XPATH,"/html/body/div[5]/div/div[4]/div[2]/div/div/div[1]/div/div/div[3]/div/div[1]/div[2]/div[2]/div[2]/div[2]/div[1]/div/div/div[1]/label").click()
#Closing Calendar Filter
wd.find_element(By.XPATH,"/html/body/div[5]/div/div[4]/div[2]/div/div/div[1]/div/div/div[3]/div/div[1]/div[1]/div[2]").click()
#Working part:
wd.find_element(By.XPATH,"/html/body/div[5]/div/div[4]/div[2]/div/div/div[1]/div/div/div[4]/div[5]/table/tbody/tr[13]/td[1]/div/div[1]").click()
https://www.dailyfx.com/economic-calendar#next-seven-days
So, I am accessing this website and trying to click on a element. As you can see, the website shows some economic news and when you click on it, it shows a graphic with information, which is my goal - opening the graph. For some reason, I can only open the graphic when the table data (td\[1\]) is 1(that occurs just for the first economic news). When the table data(td\[3\]) change to 3(economic news that are more distant to happen), I can't open the graphic anymore. This code works:
wd.find_element(By.XPATH,"/html/body/div\[5\]/div/div\[4\]/div\[2\]/div/div/div\[1\]/div/div/div\[4\]/div\[5\]/table/tbody/tr\[13\]/td\[1\]/div/div\[1\]").click() When change to td\[3\], doesnt work: wd.find_element(By.XPATH,"/html/body/div\[5\]/div/div\[4\]/div\[2\]/div/div/div\[1\]/div/div/div\[4\]/div\[5\]/table/tbody/tr\[93\]/td\[3\]/div/div\[1\]").click()
I tried clicking on multiple different elements, but still doesn't work when trying to click on td\[3\] elements.
Tried to open a graphic of a economic news but only work when td\[1\], not for td\[3\].

Related

Selenium Python Pass Date parameter using Calendar

I am trying to pull data from following page:
https://www.lifeinscouncil.org/industry%20information/ListOfFundNAVs
I have to select the name of the company and select a date from the calendar and click get data button.
I am trying to achieve this using Selenium Web Driver using Chrome in Python, I am stuck how do i pass the date parameter to the page.
it seems the page is postback after selection of date from the calendar.
Date needs to be selected from the calendar else the data is not returned by the webpage.
I have tried using requests Post method as well but am not able to get the NAV data.
I need to iterate this for a period of 5 years on daily (Trading Days) basis.
PS: I am bad at understanding DOM elements and have basic knowledge of Python and coding. by profession I am a data analyst.
Thanks in Advance.
Kiran Jain
edit: adding current code below:
from selenium import webdriver
url='https://www.lifeinscouncil.org/industry%20information/ListOfFundNAVs'
opt = webdriver.ChromeOptions()
prefs = {"profile.managed_default_content_settings.images": 2}
opt.add_argument("--start-maximized")
# opt.add_argument("--headless")
opt.add_argument("--disable-notifications")
opt.add_experimental_option("prefs", prefs)
driver = webdriver.Chrome(options=opt)
driver.get('https://www.lifeinscouncil.org/industry%20information/ListOfFundNAVs');
insurer = driver.find_element_by_id("MainContent_drpselectinscompany")
nav_date=driver.find_element_by_id('MainContent_txtdateselect')
get_data_btn=driver.find_element_by_id('MainContent_btngetdetails')
options=insurer.find_elements_by_tag_name("option")
data=[]
row={'FirmName','SFIN','fundName','NAVDate','NAV'}
for option in options:
print('here')
print(option.get_attribute("value") + ' ' + option.text)
if(option.text!='--Select Insurer--'):
option.click()
driver.find_element_by_id("MainContent_imgbtncalender").click()#Calender Icon
driver.find_element_by_link_text("June").click()#Date
driver.find_element_by_link_text("25").click()#Date
get_data_btn=driver.find_element_by_id('MainContent_btngetdetails') #this is put here again because on clicking the date, the page is reloaded
get_data_btn.click()
print('clicked')
driver.quit()
The date is in "a" tag. You can try to do select the date using "link-text".
driver.find_element_by_id("MainContent_imgbtncalender").click()#Calender Icon
driver.find_element_by_link_text("27").click()#Date
As per your comment I tried to traverse through dates but it only worked for that particular month. I tried to use "send_keys()" to that text box and its not working. Below is the code to traverse it for a month.
driver.get("https://www.lifeinscouncil.org/industry%20information/ListOfFundNAVs")
driver.find_element_by_id("MainContent_drpselectinscompany").click()
driver.find_element_by_xpath("//option[starts-with(text(),'Aditya')]").click()
driver.find_element_by_id("MainContent_imgbtncalender").click()
driver.find_element_by_link_text("1").click()
driver.find_element_by_id("MainContent_btngetdetails").click()
dateval = 2
while True:
if dateval == 32:
break
try:
driver.find_element_by_id("MainContent_imgbtncalender").click()
driver.find_element_by_link_text(str(dateval)).click()
driver.find_element_by_id("MainContent_btngetdetails").click()
dateval+=1
time.sleep(2)
except:
driver.switch_to.default_content()
dateval+=1
time.sleep(2)
time.sleep(5)
driver.quit()

selenium click() not working on closing pop-up

I've been working on a fake "bet bot" in order to learn selenium, but I'm having trouble closing a pop up that shows up sometimes on the web site that I want to get the odds from.
My approach is to use the function submit_bets(); a filtered games list in the form:
"League|team 1|team 2|Date|Probability in %|and prediction(1,X or 2)"
I get the data from here. Then for each of the filtered games I open the league bet page on the betting website, and go through all the games there to find the filtered game and get the real odds. For each filtered game in filtered_games I need to open the page of the bet website and if the pop up shows up, I can't get the data.
def submit_bets(filtered_games):
driver = webdriver.Chrome(PATH)
f=codecs.open("bets.txt","r", encoding='utf-8')
for line in filtered_games:
l=line.split("|")
print(l)
driver.get(leagues_to_links.get(l[0]))
scroll_down(driver)
time.sleep(2)
try:
button = driver.find_element(By.XPATH, "/html/body/div[1]/div/section[2]/div[7]/div/div/div[1]/button" )
driver.execute_script("arguments[0].scrollIntoView(true)", button)
button.click()
except:
print("no button")
games=driver.find_elements_by_class_name("events-list__grid__event")
for i in games:
game=str(i.text).split("\n")
try:
if forebet_teams_to_betano.get(l[1]) in game[2] and forebet_teams_to_betano.get(l[2]) in game[3]:
print(game)
if str(l[5]) == "1":
print("1")
print(str(game[7]))
elif str(l[5]) == "X":
print("X")
print(str(game[9]))
else:
print("2")
print(str(game[11]))
except:
print("")
In this link you can find the html of the page when the pop up shows up:
Github page with the html
In this link you can find the page files, you might have to refresh it sometimes to get the pop up
Thank you for your time, and feel free to leave any tips to improve my code.
My solution:
#Closing popup for Portugese betting site
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
URL = "https://www.betano.pt/sport/futebol/ligas/17083r/"
# Browser options
options = Options()
options.headless = True
firefox_profile = webdriver.FirefoxProfile()
firefox_profile.set_preference("browser.privatebrowsing.autostart", True)
browser = webdriver.Firefox(firefox_profile=firefox_profile)
browser.get(URL)
##### Copy this part into your own code #####
try:
browser.find_element_by_xpath('//button[#class="sb-modal__close__btn uk-modal-close-default uk-icon uk-close"]').click() # Click pop-up close button
print("Pop-up closed.")
except:
print("Pop-up button not found.")
#########
Closes this popup:
Keep in mind this relies on finding the button by it's very specific class name. You'll need to adapt the try-except at the end into your own code.

Why do I only get first page data when using selenium?

I use the python package selenium to click the "load more" button automatically, which is successful. But why do I cannot get data after "load more"?
I want to crawl reviews from imdb using python. It only displays 25 reviews until I click "load more" button. I use the python package selenium to click the "load more" button automatically, which is successful. But why do I cannot get data after "load more" and just get the first 25 reviews data repeatedly?
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
import time
seed = 'https://www.imdb.com/title/tt4209788/reviews'
movie_review = requests.get(seed)
PATIENCE_TIME = 60
LOAD_MORE_BUTTON_XPATH = '//*[#id="browse-itemsprimary"]/li[2]/button/span/span[2]'
driver = webdriver.Chrome('D:/chromedriver_win32/chromedriver.exe')
driver.get(seed)
while True:
try:
loadMoreButton = driver.find_element_by_xpath("//button[#class='ipl-load-more__button']")
review_soup = BeautifulSoup(movie_review.text, 'html.parser')
review_containers = review_soup.find_all('div', class_ ='imdb-user-review')
print('length: ',len(review_containers))
for review_container in review_containers:
review_title = review_container.find('a', class_ = 'title').text
print(review_title)
time.sleep(2)
loadMoreButton.click()
time.sleep(5)
except Exception as e:
print(e)
break
print("Complete")
I want all the reviews, but now I can only get the first 25.
You have several issues in your script. Hardcoded wait is very inconsistent and certainly the worst option to comply. The way you have written your scraping logic within while True: loop, will slower the parsing process by collecting the same items over and over again. Moreover, every title produces a huge line gap in the output which needs to be properly stripped. I've slightly changed your script to reflect the suggestion I've given above.
Try this to get the required output:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
URL = "https://www.imdb.com/title/tt4209788/reviews"
driver = webdriver.Chrome()
wait = WebDriverWait(driver,10)
driver.get(URL)
soup = BeautifulSoup(driver.page_source, 'lxml')
while True:
try:
driver.find_element_by_css_selector("button#load-more-trigger").click()
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,".ipl-load-more__load-indicator")))
soup = BeautifulSoup(driver.page_source, 'lxml')
except Exception:break
for elem in soup.find_all(class_='imdb-user-review'):
name = elem.find(class_='title').get_text(strip=True)
print(name)
driver.quit()
Your code is fine. Great even. But, you never fetch the 'updated' HTML for the web page after hitting the 'Load More' button. That's why you are getting the same 25 reviews listed all the time.
When you use Selenium to control the web browser, you are clicking the 'Load More' button. This creates an XHR request (or more commonly called AJAX request) that you can see in the 'Network' tab of your web browser's developer tools.
The bottom line is that JavaScript (which is run in the web browser) updates the page. But in your Python program, you only get the HTML once for the page statically using the Requests library.
seed = 'https://www.imdb.com/title/tt4209788/reviews'
movie_review = requests.get(seed) #<-- SEE HERE? This is always the same HTML. You fetched in once in the beginning.
PATIENCE_TIME = 60
To fix this problem, you need to use Selenium to get the innerHTML of the div box containing the reviews. Then, have BeautifulSoup parse the HTML again. We want to avoid picking up the entire page's HTML again and again because it takes computation resources to have to parse that updated HTML over and over again.
So, find the div on the page that contains the reviews, and parse it again with BeautifulSoup. Something like this should work:
while True:
try:
allReviewsDiv = driver.find_element_by_xpath("//div[#class='lister-list']")
allReviewsHTML = allReviewsDiv.get_attribute('innerHTML')
loadMoreButton = driver.find_element_by_xpath("//button[#class='ipl-load-more__button']")
review_soup = BeautifulSoup(allReviewsHTML, 'html.parser')
review_containers = review_soup.find_all('div', class_ ='imdb-user-review')
pdb.set_trace()
print('length: ',len(review_containers))
for review_container in review_containers:
review_title = review_container.find('a', class_ = 'title').text
print(review_title)
time.sleep(2)
loadMoreButton.click()
time.sleep(5)
except Exception as e:
print(e)
break

selenium, webdriver.page_source not refreshing after click

I am trying to copy a web page's list of addresses for a given community service to a new document so i can geocode all of the locations in a map. Instead of being able to get a list of all the parcels I can only download one at a time and there are 25 parcel numbers limited to a page. As such, this would be extremely time consuming.
I want to develop a script that will look at the page source (everything including the 25 addresses which are contained in a table tag) click the next page button, copy the next page, and so on until the max page is reached. Afterwards, I can format the text to be geocoding compatible.
The code below does all of this except it only copies the first page over and over again even though I can clearly see that the program has successfully navigated to the next page:
# Open chrome
br = webdriver.Chrome()
raw_input("Navigate to web page. Press enter when done: ")
pg_src = br.page_source.encode("utf")
soup = BeautifulSoup(pg_src)
max_page = 122 #int(max_page)
#open a text doc to write the results to
f = open(r'C:\Geocoding\results.txt', 'w')
# write results page by page until max page number is reached
pg_cnt = 1 # start on 1 as we should already have the first page
while pg_cnt < max_page:
tble_elems = soup.findAll('table')
soup = BeautifulSoup(str(tble_elems))
f.write(str(soup))
time.sleep(5)
pg_cnt +=1
# clicks the next button
br.find_element_by_xpath("//div[#class='next button']").click()
# give some time for the page to load
time.sleep(5)
# get the new page source (THIS IS THE PART THAT DOESN'T SEEM TO BE WORKING)
page_src = br.page_source.encode("utf")
soup = BeautifulSoup(pg_src)
f.close()
I faced the same problem.
The problem i think is because some javascripts are not completely loaded.
All you need is wait till the object is loaded.Below code worked for me
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
delay = 10 # seconds
try:
myElem = WebDriverWait(drivr, delay).until(EC.presence_of_element_located((By.CLASS_NAME, 'legal-attribute-row')))
except :
print ("Loading took too much time!")

web scraping a site without direct access

any help is appreciated in advance.
deal is i have been trying scrape data from this website(https://www.mptax.mp.gov.in/mpvatweb/leftMenu.do),but direct access to the website is not possible.Rather then data i need,i am getting invalid access.To access the website i must go to (https://www.mptax.mp.gov.in/mpvatweb/index.jsp) and then click on 'dealer search' from dropdown menu while hovering over dealer information.
I am looking for solution in Python,
Here's something i tried.I have just started web scraping:
import requests
from bs4 import BeautifulSoup
with requests.session() as request:
MAIN="https://www.mptax.mp.gov.in/mpvatweb/leftMenu.do"
INITIAL="https://www.mptax.mp.gov.in/mpvatweb/"
page=request.get(INITIAL)
jsession=page.cookies["JSESSIONID"]
print(jsession)
print(page.headers)
result=request.post(INITIAL,headers={"Cookie":"JSESSIONID="+jsession+"; zoomType=0","Referer":INITIAL})
page1=request.get(MAIN,headers={"Referer":INITIAL})
soup=BeautifulSoup(page1.content,'html.parser')
data=soup.find_all("tr",class_="whitepapartd1")
print(data)
Deal is i want to scrape data about firm's based on their firm name.
thanks for telling me a way #Arnav and #Arman ,so here's the final code:
from selenium import webdriver #to work with website
from bs4 import BeautifulSoup #to scrap data
from selenium.webdriver.common.action_chains import ActionChains #to initiate hovering
from selenium.webdriver.common.keys import Keys #to input value
PROXY = "10.3.100.207:8080" # IP:PORT or HOST:PORT
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--proxy-server=%s' % PROXY)
#ask for input
company_name=input("tell the company name")
#import website
browser = webdriver.Chrome(chrome_options=chrome_options)
browser.get("https://www.mptax.mp.gov.in/mpvatweb/")
#perform hovering to show hovering
element_to_hover_over = browser.find_element_by_css_selector("#mainsection > form:nth-child(2) > table:nth-child(1) > tbody:nth-child(1) > tr:nth-child(3) > td:nth-child(3) > a:nth-child(1)")
hover = ActionChains(browser).move_to_element(element_to_hover_over)
hover.perform()
#click on dealer search from dropdown menu
browser.find_element_by_css_selector("#dropmenudiv > a:nth-child(1)").click()
#we are now on the leftmenu page
#click on radio button
browser.find_element_by_css_selector("#byName").click()
#input company name
inputElement = browser.find_element_by_css_selector("#showNameField > td:nth-child(2) > input:nth-child(1)")
inputElement.send_keys(company_name)
#submit form
inputElement.submit()
#now we are on dealerssearch page
#scrap data
soup=BeautifulSoup(browser.page_source,"lxml")
#get the list of values we need
list=soup.find_all('td',class_="tdBlackBorder")
#check length of 'list' and on that basis decide what to print
if(len(list)!=0):
#company name at index=9
#tin no. at index=10
#registration status at index=11
#circle name at index=15
#store the values
name=list[9].get_text()
tin=list[10].get_text()
status=list[11].get_text()
circle=list[15].get_text()
#make dictionary
Company_Details={"TIN":tin ,"Firm name":name ,"Circle_Name":circle, "Registration_Status":status}
print(Company_Details)
else:
Company_Details={"VAT RC No":"Not found in database"}
print(Company_Details)
#close the chrome
browser.stop_client()
browser.close()
browser.quit()
Would you mind using a browser?
You can use a browser and access the link at xpath (//*[#id="dropmenudiv"]/a[1]).
You might have to download and put chromedriver in the mentioned directory if you haven't used chromedriver before. You can also use selenium + phantomjs if you want to do headless browsing (without the browser opening up each time).
from selenium import webdriver
xpath = "//*[#id="dropmenudiv"]/a[1]"
browser = webdriver.Chrome('/usr/local/bin/chromedriver')
browser.set_window_size(1120,550)
browser.get('https://www.mptax.mp.gov.in/mpvatweb')
link = browser.find_element_by_xpath("//*[#id="dropmenudiv"]/a[1]")
link.click()
url = browser.current_url

Categories

Resources