I am trying to get the name of the restaurant and the address from this website: [https://www.kravekar.com/restaurants][webPage]
The issue is that each time I return to the main page, I get this error:
Element <div class="restaurant-card">...</div> is not clickable at point (1129, 435). Other element would receive the click: <i class="fa fa-spinner fa-pulse"></i>
I tried to implement a driver refresh, a time sleep but is not working. I got the same error in the third iteration.
So far this is my reproducible code:
driver.get('https://www.kravekar.com/restaurants')
comment_button = driver.find_elements(by =By.CSS_SELECTOR, value = "div.restaurant-card")
result = []
for btn in comment_button :
btn.click()
try:
name = driver.find_element(by=By.XPATH, value = '//*.
[#id="restaurant_menu_head"]/div/div/div[2]/div[1]/div/div/h4')
name = name.text
print(name)
address = driver.find_element(by = By.XPATH, value = '//*
[#id="restaurant_menu_head"]/div/div/div[2]/div[1]/div/div/div/span')
address = address.text
print(address)
except:
print("No address or name")
driver.execute_script("window.history.go(-1)")
When you do btn.click() or driver.execute_script("window.history.go(-1)") it might be possible that the reference to the correct webpage is lost. So it is better to store the url of all the restaurants right from the home page, and then loop over the stored urls.
driver.get('https://www.kravekar.com/restaurants')
cards = driver.find_elements(By.CSS_SELECTOR, ".restaurant-card-wrapper")
urls = [card.get_attribute('href') for card in cards]
names = [card.find_element(By.CSS_SELECTOR, ".restaurant-card-title").text for card in cards]
for idx,url in enumerate(urls):
try:
driver.get(url)
# name = driver.find_element(By.CSS_SELECTOR, "#tab_menu_info h4.media-heading").text
print(names[idx])
address = driver.find_element(By.CSS_SELECTOR, "#tab_menu_info .restaurant_menu_info-addresss").text
print(address)
except:
print("No address or name")
which outputs
Arby's
51171 Highway 6 & 24, Glenwood Springs, CO 81601
Springs Bar and Grill
722 Grand Ave, Glenwood Springs, CO 81601
etc.
Related
hello im practicing selenium on a practice forum this the link for it :
click here
if visit the page and inspect element on the dropdown menu for state and city you will find it consists of only div element i tried doing this but didnt work obviously :
dropdown = Select(d.find_element("xpath",'//*[#id="state"]'))
dropdown.select_by_index(0)
this the error message :
Select only works on <select> elements, not on <div>
can someone show how to loop through the value of the menu or is there any other solution ?
This code is working
search_url = 'https://demoqa.com/automation-practice-form'
driver = webdriver.Chrome(options = options, executable_path= os.path.join(os.environ['USERPROFILE'],"Desktop") + f'\\Python\\available Tender\\chromedriver\\chromedriver.exe')
driver.get(search_url)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
element1 = WebDriverWait(driver, 4).until(EC.presence_of_element_located((By.XPATH, f"//div[#id='adplus-anchor']")))
driver.execute_script("""
var element = arguments[0];
element.parentNode.removeChild(element);
""", element1)
element2 = WebDriverWait(driver, 4).until(EC.presence_of_element_located((By.XPATH, f"//div[#id='currentAddress-wrapper']")))
driver.execute_script("""
var element = arguments[0];
element.parentNode.removeChild(element);
""", element2)
driver.find_element(By.XPATH, '//*[#id="state"]/div/div[2]/div').click()
e1 = WebDriverWait(driver, 4).until(EC.presence_of_element_located((By.XPATH, f"//div[contains(#class,'menu')]")))
e1.find_element(By.XPATH, "//div[contains(text(),'NCR')]").click()
url = 'http://www.mtv.de/charts/c6mc86/single-top-100?expanded=true'
chromedriver = Service("/usr/local/bin/chromedriver")
op = webdriver.ChromeOptions()
browser = webdriver.Chrome(service=chromedriver, options=op)
browser.get(url)
timeout = 60
browser.implicitly_wait(20)
browser.execute_script("window.scrollTo(0, document.body.scrollHeight,)")
time.sleep(5)
try:
WebDriverWait(browser, timeout).until(EC.visibility_of_element_located((By.XPATH, '/html/body/div[1]/main/div/section/div/div/div/object')))
print('========================')
except TimeoutException:
browser.quit()
items = browser.switch_to.frame(browser.find_element(By.TAG_NAME,'object'))
print(items)
itembox = items.find_elements(By.CLASS_NAME, 'charts-marslnet')
# print(itembox)
for item in itembox:
print(item.text)
I have been trying to scrap the song name, author and url for the song from this website but unable to access the html inside the tag under #document section. I am not able to figure why i cant access it. Any insights on what can be the issue with my code or what should be done to access this html inside #document section would be very helpful.
[HTML inside the tag with #document(Screenshot 2][1]
You can grab it from the direct url:
import requests
from bs4 import BeautifulSoup
url = 'https://mtv.marsl.net/demo/showdbcharts.php?c=4'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
acts = soup.find_all('div', {'class':'cmn-act'})
for each in acts:
title = each.find_next('div', {'class':'cmn-title'}).text.strip()
artist = each.find_next('div', {'class':'cmn-artist'}).text.strip()
link = each.find_next('a', href=True)['href']
print(f'{title}\n{artist}\n{link}\n\n')
Output:
abcdefu
Gayle
https://www.mtv.de/musikvideos/r9d9sl/abcdefu
Wenn ich will
Gzuz & Bonez MC
https://www.mtv.de/musikvideos/7evkst/10von10
10von10
Pajel
https://www.mtv.de/musikvideos/7evkst/10von10
Shivers
Ed Sheeran
https://www.mtv.de/musikvideos/miq9lq/shivers
Heat Waves
Glass Animals
https://www.mtv.de/musikvideos/l9rv5d/heat-waves
...
I am trying to get some data from Google but it I get the "before you continue" google popup. I am trying to make selenium locate the button and click it and return to the getting data but it seems even if I have the button ID in the code it doesn't find it
"""
Search on Google and returns the list of PAA questions in SERP.
"""
def newSearch(browser,query):
if lang== "en":
browser.get("https://www.google.com?hl=en")
WebDriverWait(browser, 10).until(EC.frame_to_be_available_and_switch_to_it((By.XPATH, "//iframe")))
agree = WebDriverWait(browser, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[#id="L2AGLb"]/span/span')))
agree.click()
browser.switch_to_default_content()
searchbox = browser.find_element_by_xpath("//input[#aria-label='Search']")
else:
browser.get("https://www.google.com?hl=es")
searchbox = browser.find_element_by_xpath("//input[#aria-label='Buscar']")
searchbox.send_keys(query)
sleepBar(2)
tabNTimes()
if lang== "en":
searchbtn = browser.find_elements_by_xpath("//input[#aria-label='Google Search']")
else:
searchbtn = browser.find_elements_by_xpath("//input[#aria-label='Buscar con Google']")
try:
searchbtn[-1].click()
except:
searchbtn[0].click()
sleepBar(2)
paa = browser.find_elements_by_xpath("//span/following-sibling::div[contains(#class,'match-mod-horizontal-padding')]")
hideGBar()
return paa
Try clicking the inner div of the button itself. HTML of the agree popup:
<button id="L2AGLb" class="tHlp8d" data-ved="0ahUKEwj89p7Swob1AhVBxhoKHS0gDxIQiZAHCCE">
<div class="QS5gu sy4vM" role="none">
Acepto
</div>
</button>
Your selector should look like this:
(By.CSS_SELECTOR, "#L2AGLb > div")
Here is a working full example:
def test_google_...(self):
driver = self.driver
if self.LANGUAGE == "en":
driver.get("https://www.google.com?hl=en")
else:
driver.get("https://www.google.com?hl=es")
WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#L2AGLb > div"))
)
driver.find_element(By.CSS_SELECTOR, "#L2AGLb > div").click()
WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.CSS_SELECTOR, 'input[name="q"]'))
)
query = "your search query"
driver.find_element(By.CSS_SELECTOR, 'input[name="q"]').send_keys(query)
driver.find_element(By.CSS_SELECTOR, 'input[name="q"]').send_keys(Keys.RETURN)
...
I am trying to scrape data of user review beer on beeradvocate.com to analyze user attitude towards different of beer type. But I can only have result of the first few page, remain is empty
Situation:
There are 500 different type of beer, each beer has different number of rating and reviews
Site only show 1 page of results for guest, to see all the information, you need to login
My approach
Get the beer link, number of rating of each beer to define range of loop for each beer
Login using request session and post
def review_scrape (beer_link, number_of_ratings):
reviews=[]
rate =[]
for pages_i in range(0,int(number_of_ratings),25): #site shows 25 resulst/page
session = requests.session() # Start the session
payload = {'login':'suzie102', 'password':''}
page1 = session.post("https://www.beeradvocate.com/community/login/login", data=payload)
url = beer_link+'/?view=beer&sort=&start=%d'%(pages_i)
page1= session.get(url)
time.sleep(3)
soup1 = lxml.html.fromstring(page1.text)
rate_i = soup1.xpath('//span[#class = "muted"]/text()')[8::3]
print(url)
reviews_i = soup1.xpath('//div/text()')
reviews.append(reviews_i)
print(len(reviews))
rate.append(rate_i)
return rate,reviews
Results:
There is only one problem that ive see.
url = beer_link+'/?view=beer&sort=&start=%d'%(pages_i)
/ is redudant, what you need is
url = beer_link+'?view=beer&sort=&start=%d'%(pages_i)
that is why there are //?view in your print of links.
I can see that there are anchor links "next" leding to next page. I would recommend while loop or recursion.
Other than that, I cant see what is missing from your script. Everything else looks in order and it should work.
If you could give us more details, we might have more to work with.
update, thanks to everyone comment, I tried to use it with selenium to scrape. It works now
def webstite_scrape_p2 (beer_link, number_of_ratings):
driver = webdriver.Chrome('/home/sam/Downloads/chromedriver')
url = 'https://www.beeradvocate.com/community/login/'
driver.get(url)
loginelement = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//form[#class="xenForm formOverlay"]//dd//input[#name ="login"]')))
loginelement.send_keys('suzie102')
pwelement = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//form[#class="xenForm formOverlay"]//dl[#class ="ctrlUnit"]//dd//ul//li[#id = "ctrl_pageLogin_registered_Disabler"]//input[#name ="password"]')))
pwelement.send_keys('')
page_click = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//form[#class="xenForm formOverlay"]//dl[#class ="ctrlUnit submitUnit"]//dd//input[#type ="submit"]')))
page_click.click()
rate = []
reviews =[]
avg_user =[]
for link, i in zip(beer_link, number_of_rev):
for pages_i in tqdm(range(0,int(i),25)): #site shows 25 resulst/page)
new_url = link+'?view=beer&sort=&start=%d'%(pages_i)
print(new_url)
driver.get(new_url)
#print(driver.find_element_by_name("hideRatings").is_selected())
#check_box = WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH, '//form[#style="display:inline;margin:0;padding:0;"]//input[#type = "checkbox"]')))#check_box.click()
#check_box.click()
time.sleep(5)
driver.get(new_url)
page_source = driver.page_source
soup = BeautifulSoup(page_source,'html.parser')
rate_i = [ i.get_text() for i in soup.find_all('span', class_ = "muted")][8::3]
rate.append(rate_i)
reviews_i = [ i.get_text() for i in soup.find_all('div')]
reviews.append(reviews_i)
avg_i = [i.get_text() for i in soup.find_all('span', class_= "BAscore_norm")]
avg_user.append(avg_i)
return rate, reviews, avg_user
So I'm not sure if this practically valid, but was wondering if there's a way in selenium to wait for an <a tag - out of two based on their href value or the text contained after the tag closes.
What I'm trying to do is to power up this page https://www.coingecko.com/en/exchanges, iterate through the exchanges links, visit each one of them, then click on the about tab of each of those newly opened pages as they contain the info to be extracted. The code actually worked up until halfway through when it failed to identify the tab properly through a StaleElementException and elementNotFound as I did it through driver.find_element_by_text.
The problem is that the 'about' tab changes from one page to the other, so it's either //ul[#role='tablist']/li[3] or li[2], and that's why I'm trying to wait and click on the right element based on its href value. That is since one of the a tags on the page href's value contains the text # about ---> //ul[#role='tablist']/li[3]/a
Apologies if it wasn't straightforward but I was trying to pinpoint what the issue was until recently :)
This is the code that I've attempted so far if anyone can gratefully point me in the right direction
from selenium.webdriver import Chrome
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
from selenium.common.exceptions import NoSuchElementException, ElementNotVisibleException
webdriver = '/Users/karimnabil/projects/selenium_js/chromedriver-1'
driver = Chrome(webdriver)
num_of_pages = 4
exchanges_list = []
names_list = []
websites_list = []
emails_list = []
years_list = []
countries_list = []
twitter_list = []
for i in range(num_of_pages):
url = 'https://www.coingecko.com/en/exchanges?page=' + str(i+1)
driver.get(url)
links = driver.find_elements_by_xpath("//tbody[#data-target='exchanges-list.tableRows']/tr/td[2]/div/span[2]/a")
links = [url.get_attribute('href') for url in links]
time.sleep(0.5)
for link in links:
driver.get(link)
wait = WebDriverWait(driver, 2)
wait.until(EC.text_to_be_present_in_element_value((By.XPATH, "//ul[#role='tablist']/li[position()=2 or position()=3]/a"), '#about'))
try:
name = driver.find_element_by_xpath("//div[#class='exchange-details-header-content']/div/h1").text
website = driver.find_element_by_xpath("//div[#class='row no-gutters']/div[8]/a").get_attribute('href')
email = driver.find_element_by_xpath("//div[#class='row no-gutters']/div[9]/a").get_attribute('href')
year_est = driver.find_element_by_xpath("//div[#class='row no-gutters']/div[10]").text
inc_country = driver.find_element_by_xpath("//div[#class='row no-gutters']/div[12]").text
twitter = driver.find_element_by_xpath("//div[#class='row no-gutters']/div[16]/div[2]/div[2]/a").get_attribute('title')
except:
pass
try:
print('---------------')
print('exchange name is : {}'.format(name))
print('exchange website is : {}'.format(website))
print('exchange email is : {}'.format(email))
print('exchange established in year: {}'.format(year_est))
print('exchange incorporated in : {}'.format(Inc_country))
print('exchange twitter handle is: {}'.format(twitter))
except:
pass
try:
names_list.append(name)
websites_list.append(website)
emails_list.append(email)
years_list.append(year_est)
countries_list.append(Inc_country)
twitter_list.append(twitter)
except:
pass
df = pd.DataFrame(list(zip(names_list, websites_list,emails_list, years_list, countries_list, twitter_list)), columns=['Ex_Names', 'Website', 'Support Email', 'Inc Year', 'Inc Country', 'Twitter Handle' ])
CoinGecko2_data = df.to_csv('CoinGecko4.csv', index=False)
If you know the href just wait for: //a[contains(#href, 'my-href')]
I am not sue if there is any but you can create your custom wait. Here is an example:
https://seleniumbyexamples.github.io/waitcustom