Print out selenium text variable - python
I have a function which extracts data from a Twitter page however when the script completes I receive no outputs. The function is meant to output various information from a tweet. Im just trying to print out the second tweet on the page.
card definition
Function
def get_tweet_data(card):
username - card.find_element_by_xpath(".//span").text
handle = card.find_element_by_xpath('.//span[contains(text(), "#" )]').text #
try:
postdate = card.find_element_by_xpath('.//time').get_attribute('datetime')
except NoSuchElementException:
return
comment = card.find_element_by_xpath('.//div[2]/div[2]/div[1]').text
responding = card.find_element_by_xpath('.//div[2]/div[2]/div[1]').text
text = comment + responding # add the both text fields together
reply_cnt = card.find_element_by_xpath('.//div[#data-testid="reply"]').text
retweet_cnt = card.find_element_by_xpath('.//div[#data-testid="retweet"]').text
like_cnt = card.find_element_by_xpath('.//div[#data-testid="like"]').text
tweet = (username, handle, postdate, text, reply_cnt, retweet_cnt, like_cnt)
return tweet
Command line arguments
python twitter.py get_tweet_data(1)
So, this one took a while; but, I was able to get the information for you. When I went through Twitter's HTML, 6 different xpath calls were needed
# Count of number of Tweets
(//main[#role='main']//div[#data-testid='primaryColumn']//section[#aria-labelledby='accessible-list-0']//div[contains(#aria-label, 'Timeline:')]//div[contains(#style, 'position: absolute; width: 100%;')]//article[#role='article']//div[#data-testId='tweet'])
# First Card
(//main[#role='main']//div[#data-testid='primaryColumn']//section[#aria-labelledby='accessible-list-0']//div[contains(#aria-label, 'Timeline:')]//div[contains(#style, 'position: absolute; width: 100%;')]//article[#role='article']//div[#data-testId='tweet'])[1]
# Twiter Card Likes, Retweets, Replies
(//main[#role='main']//div[#data-testid='primaryColumn']//section[#aria-labelledby='accessible-list-0']//div[contains(#aria-label, 'Timeline:')]//div[contains(#style, 'position: absolute; width: 100%;')]//article[#role='article']//div[#data-testId='tweet'])[1]//div[contains(#aria-label, 'likes')]
# Twitter's Text Content
(//main[#role='main']//div[#data-testid='primaryColumn']//section[#aria-labelledby='accessible-list-0']//div[contains(#aria-label, 'Timeline:')]//div[contains(#style, 'position: absolute; width: 100%;')]//article[#role='article']//div[#data-testId='tweet'])[1]//div[#lang]
# Twitter's DateTime
(//main[#role='main']//div[#data-testid='primaryColumn']//section[#aria-labelledby='accessible-list-0']//div[contains(#aria-label, 'Timeline:')]//div[contains(#style, 'position: absolute; width: 100%;')]//article[#role='article']//div[#data-testId='tweet'])[1]//time[#datetime]
# Twitter href is the Twitter Account Poster
((//main[#role='main']//div[#data-testid='primaryColumn']//section[#aria-labelledby='accessible-list-0']//div[contains(#aria-label, 'Timeline:')]//div[contains(#style, 'position: absolute; width: 100%;')]//article[#role='article']//div[#data-testId='tweet'])[1]//a[#role='link'])[1]
Once I determined the proper xpath calls, I, then, created a class to store my data
class Twitter_Info:
"""This class contains the information regarding to the Twitter Card"""
CardNumber : int
Likes : int
Retweets : int
Replies : int
ContentInfo : str
PostDate : str
PosterAccount : str
def print_info(self):
print(f'Card Number: {self.CardNumber}')
print(f'Poster Account: {self.PosterAccount}')
print(f'Tweet Date: {self.PostDate}')
print(f'Likes: {self.Likes}')
print(f'Replies: {self.Replies}')
print(f'Retweets: {self.Retweets}')
print(f'Tweet Content: {self.ContentInfo}')
Once this was accomplished, I added different methods to help accomplish the task at hand
wait_for_tweets_to_load
number_of_tweets_displayed
scroll_to_card
get_card_likes_retweets_replies
get_card_text_content
get_card_datetime
get_card_poster_info
Once these were determined, I was able to scroll to each card and scrape the data
MAIN PROGRAM - For Reference
from selenium import webdriver
from selenium.webdriver.chrome.webdriver import WebDriver as ChromeDriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as DriverWait
from selenium.webdriver.support import expected_conditions as DriverConditions
from selenium.common.exceptions import WebDriverException
import time
class Twitter_Info:
"""This class contains the information regarding to the Twitter Card"""
CardNumber : int
Likes : int
Retweets : int
Replies : int
ContentInfo : str
PostDate : str
PosterAccount : str
def print_info(self):
print(f'Card Number: {self.CardNumber}')
print(f'Poster Account: {self.PosterAccount}')
print(f'Tweet Date: {self.PostDate}')
print(f'Likes: {self.Likes}')
print(f'Replies: {self.Replies}')
print(f'Retweets: {self.Retweets}')
print(f'Tweet Content: {self.ContentInfo}')
def get_chrome_driver():
"""This sets up our Chrome Driver and returns it as an object"""
path_to_chrome = "F:\Selenium_Drivers\Windows_Chrome85_Driver\chromedriver.exe"
chrome_options = webdriver.ChromeOptions()
# Browser is displayed in a custom window size
chrome_options.add_argument("window-size=1500,1000")
return webdriver.Chrome(executable_path = path_to_chrome,
options = chrome_options)
def wait_displayed(driver : ChromeDriver, xpath: str, int = 5):
try:
DriverWait(driver, int).until(
DriverConditions.presence_of_element_located(locator = (By.XPATH, xpath))
)
except:
raise WebDriverException(f'Timeout: Failed to find {xpath}')
def is_displayed(driver : ChromeDriver, xpath: str, int = 5):
try:
webElement = DriverWait(driver, int).until(
DriverConditions.presence_of_element_located(locator = (By.XPATH, xpath))
)
return True if webElement != None else False
except:
return False
def scroll_to_element(driver : ChromeDriver, xpath: str, int = 5):
try:
webElement = DriverWait(driver, int).until(
DriverConditions.presence_of_element_located(locator = (By.XPATH, xpath))
)
driver.execute_script("arguments[0].scrollIntoView();", webElement)
except:
raise WebDriverException(f'Timeout: Failed to find {xpath}\nResult: Failed to Scroll')
def wait_for_tweets_to_load(driver : ChromeDriver):
if is_displayed(driver, "//main[#role='main']//div[#data-testid='primaryColumn']//div[contains(#aria-label, 'Loading Tweets')]"):
for counter in range(10):
if is_displayed(driver, "//main[#role='main']//div[#data-testid='primaryColumn']//div[contains(#aria-label, 'Loading Tweets')]") and counter == 9:
raise Exception("Page Failed To Load Tweets")
elif is_displayed(driver, "//main[#role='main']//div[#data-testid='primaryColumn']//div[contains(#aria-label, 'Loading Tweets')]") == False:
break
else:
time.sleep(3)
def number_of_tweets_displayed(driver : ChromeDriver):
"""Note: This number will change dynamically when we scroll down on the page ( new Tweets will start loading )"""
xpath = "{0}{1}{2}".format("(//main[#role='main']//div[#data-testid='primaryColumn']//section[#aria-labelledby='accessible-list-0']",
"//div[contains(#aria-label, 'Timeline:')]//div[contains(#style, 'position: absolute; width: 100%;')]",
"//article[#role='article']//div[#data-testId='tweet'])")
return driver.find_elements(By.XPATH, xpath).__len__()
def scroll_to_card(driver : ChromeDriver, card_number : int):
xpath = "{0}{1}{2}".format("(//main[#role='main']//div[#data-testid='primaryColumn']//section[#aria-labelledby='accessible-list-0']",
"//div[contains(#aria-label, 'Timeline:')]//div[contains(#style, 'position: absolute; width: 100%;')]",
"//article[#role='article']//div[#data-testId='tweet'])")
scroll_to_element(driver, xpath = f'{xpath}[{card_number}]')
def get_card_likes_retweets_replies(driver : ChromeDriver, card_number : int):
xpath = "{0}{1}{2}".format("(//main[#role='main']//div[#data-testid='primaryColumn']//section[#aria-labelledby='accessible-list-0']",
"//div[contains(#aria-label, 'Timeline:')]//div[contains(#style, 'position: absolute; width: 100%;')]",
"//article[#role='article']//div[#data-testId='tweet'])")
xpath = f'{xpath}[{card_number}]//div[contains(#aria-label, "likes")]'
return driver.find_element(By.XPATH, xpath).get_attribute('aria-label').split(',')
def get_card_text_content(driver : ChromeDriver, card_number : int):
xpath = "{0}{1}{2}".format("(//main[#role='main']//div[#data-testid='primaryColumn']//section[#aria-labelledby='accessible-list-0']",
"//div[contains(#aria-label, 'Timeline:')]//div[contains(#style, 'position: absolute; width: 100%;')]",
"//article[#role='article']//div[#data-testId='tweet'])")
xpath = f'{xpath}[{card_number}]//div[#lang]'
return driver.find_element(By.XPATH, xpath).text
def get_card_datetime(driver : ChromeDriver, card_number : int):
xpath = "{0}{1}{2}".format("(//main[#role='main']//div[#data-testid='primaryColumn']//section[#aria-labelledby='accessible-list-0']",
"//div[contains(#aria-label, 'Timeline:')]//div[contains(#style, 'position: absolute; width: 100%;')]",
"//article[#role='article']//div[#data-testId='tweet'])")
xpath = f'{xpath}[{card_number}]//time[#datetime]'
return driver.find_element(By.XPATH, xpath).get_attribute('datetime')
def get_card_poster_info(driver : ChromeDriver, card_number : int):
xpath = "{0}{1}{2}".format("((//main[#role='main']//div[#data-testid='primaryColumn']//section[#aria-labelledby='accessible-list-0']",
"//div[contains(#aria-label, 'Timeline:')]//div[contains(#style, 'position: absolute; width: 100%;')]//article[#role='article']",
"//div[#data-testId='tweet'])")
xpath = f'{xpath}[{card_number}]//a[#role="link"])[1]'
return driver.find_element(By.XPATH, xpath).get_attribute('href')
# Gets our chrome driver and opens our site
chrome_driver = get_chrome_driver()
chrome_driver.get("https://twitter.com/bbc")
wait_displayed(chrome_driver, "//div[#data-testid='placementTracking']//div[#role='button']//span[text()='Follow']")
wait_displayed(chrome_driver, "//section[#aria-label='Sign up']")
wait_displayed(chrome_driver, "//aside[#aria-label='Who to follow']")
wait_for_tweets_to_load(chrome_driver)
# Get number of Tweets that are displayed
numberOfTweetsDisplayed = number_of_tweets_displayed(chrome_driver)
twitter_cards = []
# Scrape Card Information
for cards in range(numberOfTweetsDisplayed):
scroll_to_card(chrome_driver, (cards + 1))
twitter_card = Twitter_Info()
twitter_card.CardNumber = cards + 1
# Get the Like | Retweet | Replies Info
raw_info = get_card_likes_retweets_replies(chrome_driver, (cards + 1))
twitter_card.Replies = raw_info[0].strip().split(' ')[0]
twitter_card.Retweets = raw_info[1].strip().split(' ')[0]
twitter_card.Likes = raw_info[2].strip().split(' ')[0]
# Get rest of our data
twitter_card.ContentInfo = get_card_text_content(chrome_driver, (cards + 1))
twitter_card.PostDate = get_card_datetime(chrome_driver, (cards + 1))
twitter_card.PosterAccount = get_card_poster_info(chrome_driver, (cards + 1))
# Display our information and add it to our list
twitter_card.print_info()
twitter_cards.append(twitter_card)
print(f'Added Card Number {(cards + 1)} successfully')
print('========================================================\n')
# Print how many twitter cards were scraped
print(f'Twitter Cards Added: {twitter_cards.__len__()}')
chrome_driver.quit()
chrome_driver.service.stop()
SAMPLE OUTPUT
Card Number: 1
Poster Account: https://twitter.com/BBC
Tweet Date: 2020-06-22T11:22:53.000Z
Likes: 1106
Replies: 2827
Retweets: 841
Tweet Content: We’ve always been here to celebrate diversity. But we need to do more, and we will.
This is our commitment to long-term change. #RightTheScript
Read more about our £100m commitment here: https://bbc.in/37OPMLv
Added Card Number 1 successfully
========================================================
Card Number: 2
Poster Account: https://twitter.com/BBC
Tweet Date: 2020-11-16T17:01:00.000Z
Likes: 100
Replies: 10
Retweets: 36
Tweet Content: More than 100 intact sarcophagi, dating back 2,500 years, have been unearthed near Cairo.
Added Card Number 2 successfully
========================================================
Card Number: 3
Poster Account: https://twitter.com/BBC
Tweet Date: 2020-11-15T16:01:00.000Z
Likes: 68
Replies: 5
Retweets: 16
Tweet Content: With Cornish wildlife facing so many threats from humans, these residents do whatever they can to help
#Cornwall with
#simon_reeve
| 8:10pm |
#bbctwo
&
#bbciplayer
.
Added Card Number 3 successfully
========================================================
Card Number: 4
Poster Account: https://twitter.com/bbcasiannetwork
Tweet Date: 2020-11-14T09:44:41.000Z
Likes: 133
Replies: 7
Retweets: 33
Tweet Content: Happy Diwali and Bandi Chhor Divas!
Added Card Number 4 successfully
========================================================
Card Number: 5
Poster Account: https://twitter.com/BBC
Tweet Date: 2020-11-13T22:18:26.000Z
Likes: 443
Replies: 13
Retweets: 86
Tweet Content: It's the clash of the tennis titans
#Andy_Murray
and... er,
#petercrouch
?
#ChildrenInNeed
Added Card Number 5 successfully
========================================================
Card Number: 6
Poster Account: https://twitter.com/BBC
Tweet Date: 2020-11-13T20:57:23.000Z
Likes: 426
Replies: 25
Retweets: 109
Tweet Content: The official video for this year's star-studded
#bbccin
single, 'Stop Crying Your Heart Out' is here!
Watch now and don't forget to download the song to support #ChildrenInNeed
https://bbc.in/32I60EZ
Added Card Number 6 successfully
========================================================
Card Number: 7
Poster Account: https://twitter.com/BBC
Tweet Date: 2020-11-13T15:37:06.000Z
Likes: 18
Replies: 7
Retweets: 7
Tweet Content: It's time for #ChildrenInNeed
2020!
Starting RIGHT NOW on
#BBCOne
&
#BBCiPlayer
http://bbc.in/3kuv1cG
Added Card Number 7 successfully
========================================================
Twitter Cards Added: 7
Related
Scrape "Button" tag with Selenium
import requests from selenium import webdriver import bs4 PATH = 'C:\Program Files (x86)\chromedriver.exe' driver = webdriver.Chrome(PATH) oLat = 33.8026087 oLong = -84.3369491999999 dLat = 33.79149 dLong = -84.32312 url = "https://ride.lyft.com/ridetype?origin=" + str(oLat) + "%2C" + str(oLong) + "&destination=" + str(dLat) + "%2C" + str(dLong) + "&ride_type=&offerProductId=standard" driver.get(url) content = driver.page_source soup = bs4.BeautifulSoup(content) print(soup) print(url) Here is my code currently. I am trying to scrape the lyft price estimate. The data is in the "button" tag. This does not show up in the html from the code I provided above. How can I get this data to show up? import requests from selenium import webdriver import bs4 from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC PATH = 'C:\Program Files (x86)\chromedriver.exe' driver = webdriver.Chrome(PATH) oLat = 33.7885662 oLong = -84.326684 dLat = 33.4486296 dLong = -84.4550443 url = "https://ride.lyft.com/ridetype?origin=" + str(oLat) + "%2C" + str(oLong) + "&destination=" + str(dLat) + "%2C" + str(dLong) + "&ride_type=&offerProductId=standard" driver.get(url) spanThing = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR , "span.sc-7e9e68d9-0 lctkqn"))) print(spanThing) driver.quit() I tried this additional code, but it doesn't find the span and class for some reason. I'm not sure why
To extract the Page Source you need to induce WebDriverWait for the visibility_of_element_located() of a static element and you can use the following locator strategies: oLat = 33.8026087 oLong = -84.3369491999999 dLat = 33.79149 dLong = -84.32312 url = "https://ride.lyft.com/ridetype?origin=" + str(oLat) + "%2C" + str(oLong) + "&destination=" + str(dLat) + "%2C" + str(dLong) + "&ride_type=&offerProductId=standard" driver.get(url) WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//span[contains(., 'Sign up / Log in to request ride')]"))) print(driver.page_source) driver.quit() Note : You have to add the following imports : from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC Console Output: <html lang="en-US" class="js-focus-visible" data-js-focus-visible=""><head><meta name="viewport" content="width=device-width"><script type="module"> if (window.performance) { const toSnake = (str) => str.replace(/([A-Z])/g, function($1) {return '_' + $1.toLowerCase();}); const measure = () => { const { timing } = window.performance; if (!timing.navigationStart) return; const al = [ 'event_name','sending_service','connection_end','connection_start','dom_complete', 'dom_content_loaded_event_end','dom_content_loaded_event_start','dom_interactive', 'dom_loading','domain_lookup_end','domain_lookup_start','fetch_start','load_event_end', 'load_event_start','navigation_start','redirect_end','redirect_start','request_start', 'response_end','response_start','secure_connection_start','unload_event_end', 'unload_event_start','connect_start','connect_end','ms_first_paint','source','uri_path', 'request_end','code','track_id','uri_href' ]; const { href = '', pathname = '' } = window.location; const sE = { event_name: 'navigation_timing_absolute', uri_href: href, uri_path: pathname, sending_service: 'riderweb', source: 'riderweb' }; for (let eN in timing) { const sEN = toSnake(eN); if (al.includes(sEN)) { sE[sEN] = timing[eN]; } } // iOS 11 supports ES modules, but sendBeacon not available until 11.3. if (navigator.sendBeacon) { navigator.sendBeacon('https://www.lyft.com/api/track', JSON.stringify(sE)); } }; try { if (document.readyState === 'complete') { measure(); } else { window.addEventListener('load', measure); } } catch(e) {} } </script><script> var _i18n_extends = Object.assign || function (target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i]; for (var key in source) { if (Object.prototype.hasOwnProperty.call(source, key)) { target[key] = source[key]; } } } return target; }; ;if(!window.__TRANSLATIONS__) window.__TRANSLATIONS__ = {}; window.__TRANSLATIONS__.locale = "en-US"; window.__TRANSLATIONS__.bundleName = "common"; if (!window.__TRANSLATIONS__.data) window.__TRANSLATIONS__.data = {}; _i18n_extends(window.__TRANSLATIONS__.data, {"%;":{"s":"OK"},"#":{"s":"Sorry, we can't find that page"},"$":{"s":"Sorry, there was an error"},"%":{"s":"Back"},"A":{"s":"No tip"},"T":{"s":"Lyft: Request a ride on the web"},"p":{"s":"Current location"},"q":{"s":"You set your pickup as \"Your Location\"{originatingAppMsg}"},"r":{"s":" in Google Maps"},"s":{"s":"To use the same pickup location, Lyft needs access to your current location."},"t":{"s":"Share your location"},"u":{"s":"Location sharing is denied"},"w":{"s":"Submit"},"x":{"s":"Save"},"y":{"s":"Confirm"},"z":{"s":"Unknown error"},"{":{"s":"Close"},"|":{"s":"Cancel"},"}":{"s":"Edit"},"~":{"s":"Delete"},"! ":{"s":"Done"},"!!":{"s":"Log out"},"!#":{"s":"Are you sure you want to log out?"},"!%":{"s":"Payment defaults"},"!&":{"s":"Add a payment method to get started."},"!(":{"s":"Add new card"},"!)":{"s":"Could not update payment method"},"!*":{"s":"Payment"},"!+":{"s":"manage your payment methods"},"!,":{"s":"Payment method"},"!-":{"s":"Card failed!"},"!.":{"s":"Payment method not supported on ride.lyft.com."},"!\u002F":{"s":"Payment method updated across Lyft apps."},"!0":{"s":"You cannot delete your only valid payment method."},"!1":{"s":"Gift cards"},"!2":{"s":"redeem gift cards"},"!3":{"s":"This field is required"},"!4":{"s":"Something went wrong. Please try again."},"!5":{"s":"Click to log out or switch accounts"},"!6":{"s":"Go back"},"!Z":{"s":"Schedule"},"!k":{"s":"schedule a ride"},"(6":{"s":"Ride"},"(7":{"s":"Rent"},"(8":{"s":"Rent a car through Lyft or our partner Sixt"},"(9":{"s":"Help"},"(:":{"s":"Business"},"(;":{"s":"Upcoming rides"},"(\u003C":{"s":"Install on Phone"},"(=":{"s":"Sign up \u002F Log in"},"(\u003E":{"s":"Log in"},"(l":{"s":"Install app"},"(m":{"s":"Free"},")z":{"s":"Not now"},"){":{"s":"Get the Lyft app"},")|":{"s":"More travel options from the palm of your hand"},")}":{"s":"From bikes to rentals and everything in between. If it gets you there, it's in the app."},"*\u003E":{"s":"Install on Desktop"},"*?":{"s":"Install on Desktop. It's free and takes up no space on your device"},"*C":{"s":"Text me a link"},"*D":{"s":"We'll send you a text with a link to download the app."},"*E":{"s":"Enter mobile phone number"},"*F":{"s":"Phone invalid"},"*G":{"s":"Refresh"},"*H":{"s":"An update is available"},",+":{"s":"View profile"},",,":{"s":"Get a ride"},",-":{"s":"Rides"},",.":{"s":"Gift cards"},",\u002F":{"s":"Promos"},",0":{"s":"Donate"},",1":{"s":"Invite friends"},",2":{"s":"Help"},",3":{"s":"Settings"},",4":{"s":"Safety Tools"},",5":{"s":"Lyft Rentals"},")d":{"s":"Log in \u002F Sign up"},")e":{"s":"You will need to log in to {action}!"},")f":{"s":"Log in"},")g":{"s":"Cancel"},"a":{"s":"Lyft and OpenStreetMap watermark"},"#L":{"s":"add promotions"},"&^":{"s":"Just now"},"&`.zero":{"s":"{minutes} minutes ago"},"&_.one":{"s":"{minutes} minute ago"},"&`.two":{"s":"{minutes} minutes ago"},"&`.few":{"s":"{minutes} minutes ago"},"&`.many":{"s":"{minutes} minutes ago"},"&`.other":{"s":"{minutes} minutes ago"},"&b.zero":{"s":"{hours} hours ago"},"&a.one":{"s":"{hours} hour ago"},"&b.two":{"s":"{hours} hours ago"},"&b.few":{"s":"{hours} hours ago"},"&b.many":{"s":"{hours} hours ago"},"&b.other":{"s":"{hours} hours ago"},"&d.zero":{"s":"{days} days ago"},"&c.one":{"s":"{days} day ago"},"&d.two":{"s":"{days} days ago"},"&d.few":{"s":"{days} days ago"},"&d.many":{"s":"{days} days ago"},"&d.other":{"s":"{days} days ago"},"&e":{"s":"Less than a minute"},"&g.zero":{"s":"{minutes} Total minutes"},"&f.one":{"s":"{minutes} Total minute"},"&g.two":{"s":"{minutes} Total minutes"},"&g.few":{"s":"{minutes} Total minutes"},"&g.many":{"s":"{minutes} Total minutes"},"&g.other":{"s":"{minutes} Total minutes"},"&i.zero":{"s":"{hours} Total hours"},"&h.one":{"s":"{hours} Total hour"},"&i.two":{"s":"{hours} Total hours"},"&i.few":{"s":"{hours} Total hours"},"&i.many":{"s":"{hours} Total hours"},"&i.other":{"s":"{hours} Total hours"},"&k.zero":{"s":"{days} Total days"},"&j.one":{"s":"{days} Total day"},"&k.two":{"s":"{days} Total days"},"&k.few":{"s":"{days} Total days"},"&k.many":{"s":"{days} Total days"},"&k.other":{"s":"{days} Total days"},"(a":{"s":"Any fare exceeding your Lyft Cash balance will be charged to your default payment method."},"(|":{"s":"Total"},"(}":{"s":"You'll pay this price unless you add a stop, change your destination, or if credit expires."},"(~":{"s":"This is an estimated range for your trip."},") ":{"s":"\u003CLink\u003ELog in\u003C\u002FLink\u003E or sign up to lock in your price and request a ride."},")?":{"s":"Driver Name:"},")#":{"s":"Driver's car image"},")A":{"s":"License Plate Number:"},")B":{"s":"Pick up"},")C":{"s":"Picked up"},")D":{"s":"Drop-off"},")E":{"s":"Dropped off"},")F":{"s":"Current location"},")c":{"s":"Close banner"},")y":{"s":"Riders"},"*I":{"s":"Add card"},"*J":{"s":"Edit {cardLabel}"},"+2":{"s":"$10"},"+3":{"s":"$8"},"+4":{"s":"$10"},"+5":{"s":"Unlimited 180-min classic rides for 24 hours"},"+6":{"s":"$15"},"+7":{"s":"Unlimited 30-min classic rides for 24 hours"},"+8":{"s":"Your payment info will be stored securely."},",#":{"s":"Please follow \u003CSupportLink\u003Ethese instructions\u003C\u002FSupportLink\u003E to allow this site to show notifications."},",$":{"s":"Notifications are blocked"},",\u003C":{"s":"Session expired"},",=":{"s":"You have been logged out. Please log back in to continue."},"!u":{"s":"Click to edit your pickup location"},"%\u002F":{"s":"You must \u003CLink\u003Elog in\u003C\u002FLink\u003E to {action}."},"&J":{"s":"Something went wrong. Unable to load your referral history. Please try again."},"7f523512b795a02fd9b9b05a1e22ff9b":{"s":"Card number"},"3effb3a930ea2ce61705bffc624e19b6":{"s":"Expiration"},"755c8f863223ae3f7ac0ac1cfe8b3072":{"s":"Name on card"},"22b715147b81b76566fa183406659069":{"s":"Country"},"4b3d5e03b24b6bbc630d15ad2251755f":{"s":"Billing address"},"e0a8872668d31bb76156a8d80a5d7a6c":{"s":"City"},"f420cf2cf310bbff1ead064745e66ec1":{"s":"State"},"8e9d206ff46216065a42a3953a63bd9f":{"s":"Province \u002F Territory"},"9dca7ddd59d7aca64aae58c7a99e16ce":{"s":"State \u002F Province"},"50be4be10369e747d757e7b2db2c9ed3":{"s":"Zip code"},"11ceb56a912fd18cc9ea1054c5405c13":{"s":"Postal code"},"5a0a89ab4fd1ceebfd9f68b88d27e685":{"s":"Save"},"45c9b92858c6ce6b50c1967661063ae8":{"s":"Cancel"},"29fc403cabcebe790ddd09c592f7e7cd":{"s":"There was a problem reading your card details. Please try again."},"1ae24aeff3771f629b2f865074b68050":{"s":"You must be logged in to add a payment method."},"275c89584bcddfbf0019d8d5a2ce6128":{"s":"You must be logged in to edit a payment method."},"2a420e791e0ec6d47cb64d5fab8376a9":{"s":"Please fill out all required fields"},"a966a08942254351695c6993e781301e":{"s":"Something went wrong. Please check your information and try again"}}); </script><meta charset="utf-8"><meta content="IE=Edge" http-equiv="X-UA-Compatible"><meta name="google" content="notranslate"><meta http-equiv="Accept-CH" content="DPR, Viewport-Width, Width, Downlink, Save-Data, Content-DPR"><link rel="home" href="https://ride.lyft.com"><link rel="canonical" href="https://ride.lyft.com"><link rel="icon" href="https://cdn.lyft.com/static/www-meta-assets/favicon.ico"><link rel="shortcut icon" sizes="192x192" href="https://cdn.lyft.com/static/riderweb/images/icons/icon-192x192.png"><link rel="apple-touch-startup-image" href="https://cdn.lyft.com/static/riderweb/images/icons/icon-192x192.png"><link rel="apple-touch-icon" href="https://cdn.lyft.com/static/riderweb/images/icons/icon-192x192.png"><meta name="apple-mobile-web-app-capable" content="yes"><meta name="apple-mobile-web-app-status-bar-style" content="black-translucent"><meta property="og:title" content="Lyft: Request a ride on the web"><meta property="og:url" content="https://ride.lyft.com"><meta name="twitter:card" content="summary_large_image"><meta name="twitter:site" content="#lyft"><meta name="msapplication-starturl" content="https://ride.lyft.com"><link rel="stylesheet" href="https://cdn.lyft.com/coreui/base.4.6.5.css"><meta name="google-site-verification" content="V9fk-oLTj9Ewu7Kc6Vetf94qp8HZ3gfjxFMkn8LmZ3Y"><link rel="manifest" href="/manifest.json" crossorigin="use-credentials"><meta name="theme-color" content="#FFFFFF"><meta name="description" content="Request a Lyft ride in a web browser on your phone, tablet, or laptop – no app download required. Get a ride from a friendly driver in minutes."><meta property="og:description" content="Request a Lyft ride in a web browser on your phone, tablet, or laptop – no app download required. Get a ride from a friendly driver in minutes."><meta property="og:image" content="/images/share.png"> . , <next-route-announcer><p aria-live="assertive" id="__next-route-announcer__" role="alert" style="border: 0px; clip: rect(0px, 0px, 0px, 0px); height: 1px; margin: -1px; overflow: hidden; padding: 0px; position: absolute; width: 1px; white-space: nowrap; overflow-wrap: normal;"></p></next-route-announcer><iframe name="__privateStripeMetricsController9540" frameborder="0" allowtransparency="true" scrolling="no" role="presentation" allow="payment *" src="https://js.stripe.com/v3/m-outer-93afeeb17bc37e711759584dbfc50d47.html#url=https%3A%2F%2Fride.lyft.com%2Fridetype%3Forigin%3D33.8026087%252C-84.3369491999999%26destination%3D33.79149%252C-84.32312%26ride_type%3D%26offerProductId%3Dstandard&title=Lyft%3A%20Price%20estimate&referrer=&muid=NA&sid=NA&version=6&preview=false" aria-hidden="true" tabindex="-1" style="border: none !important; margin: 0px !important; padding: 0px !important; width: 1px !important; min-width: 100% !important; overflow: hidden !important; display: block !important; visibility: hidden !important; position: fixed !important; height: 1px !important; pointer-events: none !important; user-select: none !important;"></iframe></body></html>
How i can scrape a mobile number and Email from an hidden html tags?
I work on a small project to scrape some data from a website, everything seems to work well, but I can't scrape the mobile number it shows me a blank output in some cases and full HTML tags with the mobile phone in other cases. I want to scrape the phone number along with other data, all the data get scraped correctly except the mobile phone. Here is the output I get: Name: Klinik Seeschau AG Address: Bernrainstrasse 17, 8280 Kreuzlingen Phone: Here is my code: from bs4 import BeautifulSoup from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.by import By import time # initialize the Chrome driver driver = webdriver.Chrome() # navigate to the URL driver.get("https://www.local.ch/en/",) # Searching for "Clinic" def search_query(query): search = driver.find_element("name", "what") search.clear() time.sleep(3) search.send_keys(query) time.sleep(3) search.send_keys(Keys.RETURN) time.sleep(3) # extract the source code def source(): source_code = driver.page_source # Sleep for 3 second time.sleep(3) # parse the source code with BeautifulSoup soup = BeautifulSoup(source_code, "html.parser") time.sleep(3) # Extracting the data def datasearch(): searchResult = driver.find_element(By.CLASS_NAME, "search-header-results") data = searchResult.text print(f"there's {data}\n") time.sleep(2) # Get the phone_numbers elements def data_scrape(): # data = driver.find_element(By.CLASS_NAME, "col-xs-12.col-md-8") # Loop in data end extract phone numbers components = driver.find_elements(By.CSS_SELECTOR, ".js-entry-card-container.row.lui-margin-vertical-xs.lui-sm-margin-vertical-m") for component in components: name = component.find_element(By.CSS_SELECTOR, ".lui-margin-vertical-zero.card-info-title").text addre = component.find_element(By.CSS_SELECTOR, ".card-info-address").text phone = component.find_element(By.CLASS_NAME, "lui-sm-margin-left-xxs").text print(f"Name: {name}\nAddress: {addre}\n Phone: {phone}\n") # Sleep for 2 second search_query("Clinique") source() datasearch() data_scrape() time.sleep(2) driver.quit()
Here is one way to get that information, based on your existing code: from bs4 import BeautifulSoup from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.by import By import time # initialize the Chrome driver driver = webdriver.Chrome() # navigate to the URL driver.get("https://www.local.ch/en/",) # Searching for "Clinic" def search_query(query): search = driver.find_element("name", "what") search.clear() time.sleep(3) search.send_keys(query) time.sleep(3) search.send_keys(Keys.RETURN) time.sleep(3) # extract the source code def source(): source_code = driver.page_source # Sleep for 3 second time.sleep(3) # parse the source code with BeautifulSoup soup = BeautifulSoup(source_code, "html.parser") time.sleep(3) # Extracting the data def datasearch(): searchResult = driver.find_element(By.CLASS_NAME, "search-header-results") data = searchResult.text print(f"there's {data}\n") time.sleep(2) # Get the phone_numbers elements def data_scrape(): # data = driver.find_element(By.CLASS_NAME, "col-xs-12.col-md-8") # Loop in data end extract phone numbers components = driver.find_elements(By.CSS_SELECTOR, ".js-entry-card-container.row.lui-margin-vertical-xs.lui-sm-margin-vertical-m") for component in components: name = component.find_element(By.CSS_SELECTOR, ".lui-margin-vertical-zero.card-info-title").text addre = component.find_element(By.CSS_SELECTOR, ".card-info-address").text phone = component.find_element(By.XPATH, './/a[#title="Call"]').get_attribute('href').split('tel:')[1] if component.find_element(By.XPATH, './/a[#title="Call"]') else None print(f"Name: {name}\nAddress: {addre}\n Phone: {phone}\n") # Sleep for 2 second search_query("Clinique") source() datasearch() data_scrape() time.sleep(2) driver.quit() Result in terminal: there's 2013 results for Clinique in Switzerland, in French Name: HerzKlinik Hirslanden Address: Witellikerstrasse 40, 8008 Zürich Phone: +41443879700 Name: Berner Klinik Montana - Clinique Bernoise Montana Address: Impasse Palace Bellevue 1, 3963 Crans-Montana Phone: +41274855288 Name: PZM Psychiatriezentrum Münsingen AG Address: Hunzigenallee 1, 3110 Münsingen Phone: +41317208111 Name: Spitalzentrum Biel AG Address: Vogelsang 84, 2502 Biel/Bienne Phone: +41323242424 Name: LipoFilling Address: Dammstrasse 29, 8702 Zollikon Phone: +41443971717 Name: Adipositas und StoffwechselZentrum Zürich Address: Witellikerstrasse 36, 8008 Zürich Phone: +41443874000 Name: Maison Tóā - Clinique esthétique Address: Voie du Chariot 6, 1003 Lausanne Phone: +41217917070 Name: Klinik Seeschau AG Address: Bernrainstrasse 17, 8280 Kreuzlingen Phone: +41716775353 Name: Clinica Holistica Engiadina SA Address: Plaz 40, 7542 Susch Phone: +41813002030 Name: Kantonsspital Baselland Liestal Address: Rheinstrasse 26, 4410 Liestal Phone: +41619252525 You can find Selenium documentation here.
Message: element click intercepted
I am trying to get the name of the restaurant and the address from this website: [https://www.kravekar.com/restaurants][webPage] The issue is that each time I return to the main page, I get this error: Element <div class="restaurant-card">...</div> is not clickable at point (1129, 435). Other element would receive the click: <i class="fa fa-spinner fa-pulse"></i> I tried to implement a driver refresh, a time sleep but is not working. I got the same error in the third iteration. So far this is my reproducible code: driver.get('https://www.kravekar.com/restaurants') comment_button = driver.find_elements(by =By.CSS_SELECTOR, value = "div.restaurant-card") result = [] for btn in comment_button : btn.click() try: name = driver.find_element(by=By.XPATH, value = '//*. [#id="restaurant_menu_head"]/div/div/div[2]/div[1]/div/div/h4') name = name.text print(name) address = driver.find_element(by = By.XPATH, value = '//* [#id="restaurant_menu_head"]/div/div/div[2]/div[1]/div/div/div/span') address = address.text print(address) except: print("No address or name") driver.execute_script("window.history.go(-1)")
When you do btn.click() or driver.execute_script("window.history.go(-1)") it might be possible that the reference to the correct webpage is lost. So it is better to store the url of all the restaurants right from the home page, and then loop over the stored urls. driver.get('https://www.kravekar.com/restaurants') cards = driver.find_elements(By.CSS_SELECTOR, ".restaurant-card-wrapper") urls = [card.get_attribute('href') for card in cards] names = [card.find_element(By.CSS_SELECTOR, ".restaurant-card-title").text for card in cards] for idx,url in enumerate(urls): try: driver.get(url) # name = driver.find_element(By.CSS_SELECTOR, "#tab_menu_info h4.media-heading").text print(names[idx]) address = driver.find_element(By.CSS_SELECTOR, "#tab_menu_info .restaurant_menu_info-addresss").text print(address) except: print("No address or name") which outputs Arby's 51171 Highway 6 & 24, Glenwood Springs, CO 81601 Springs Bar and Grill 722 Grand Ave, Glenwood Springs, CO 81601 etc.
Handling Date picker using Selenium
<input placeholder="MM/DD/YYYY" autocomplete="on" type="text" class="form-control" value="01/01/2020" style="height: 40px; color: (25, 25, 25); font-weight: bold; font-size: 14px; background: > error: Message: element not interactable
Can you check this #You can select the datepicker based on the XPath index [1][2] date_input = driver.find_element_by_xpath('((//input[#type='text']))') date_input.click() date_input.send_keys(Keys.CONTROL, "a") date_input.send_keys(Keys.BACKSPACE) date_input.send_keys("02/14/2020",Keys.RETURN)
This is my python script for date picker. Hope this can be useful in someways. from selenium import webdriver #set chromodriver.exe path driver = webdriver.Chrome(executable_path="C:\\chromedriver.exe") driver.implicitly_wait(0.5) #launch URL driver.get("https://jqueryui.com/datepicker/") #switch to frame l = driver.find_element_by_xpath("//iframe[#class='demo-frame']") driver.switch_to.frame(l); #identify element inside frame d= driver.find_element_by_id("datepicker") d.click() #identify list of all dates m = driver.find_elements_by_xpath("//table/tbody/tr/td") #iterate over list for i in m: #verify required date then click if i.text == '3': i.click() break #get selected date s = d.get_attribute('value') print("Date entered is: ") print(s) #browser quit driver.quit()
How do I get the link inside href?
I am building a bot, and going to get the href part out, which is /VegSpringRoll/status/1205121838302420993, from the html of twitter.com below, <a class="css-4rbku5 css-18t94o4 css-901oao r-1re7ezh r-1loqt21 r-1q142lx r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-3s2u2q r-qvutc0" title="9:46 PM · Dec 12, 2019" href="/VegSpringRoll/status/1205121838302420993" dir="auto" aria-label="Dec 12" role="link" data-focusable="true"</a> my script is: class TwitterBot: def __init__(self, username, password): self.username = username self.password = password self.bot = webdriver.Firefox() def login(self): bot = self.bot bot.get('https://twitter.com/login') time.sleep(1) email = bot.find_element_by_class_name('js-username-field.email-input.js-initial-focus') password = bot.find_element_by_class_name('js-password-field') email.clear() password.clear() email.send_keys(self.username) password.send_keys(self.password) password.send_keys(Keys.RETURN) time.sleep() def like_tweet(self,hashtag): bot = self.bot bot.get('https://twitter.com/search?q=%23' + hashtag + '&src=type') time.sleep(1) for i in range(1,10): bot.execute_script('window.scrollTo(0,document.body.scrollHeight)')# this scroll 1 time only. time.sleep(1) tweets = bot.find_elements_by_class_name('css-4rbku5 css-18t94o4 css-901oao r-1re7ezh r-1loqt21 r-1q142lx r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-3s2u2q r-qvutc0') links = [elem.get_attribute('href') for elem in tweets] print(links) everything works until the tweets part. but nothing get printed. would anybody please assist?
Selenium compound class names are not permitted and you have to use css selector or xpath. Following code should work tweets = bot.find_elements_by_css_selector('.css-4rbku5.css-18t94o4.css-901oao.r-1re7ezh.r-1loqt21.r-1q142lx.r-1qd0xha.r-a023e6.r-16dba41.r-ad9z0x.r-bcqeeo.r-3s2u2q.r-qvutc0') links = [elem.get_attribute('href') for elem in tweets] print(links) Please read this discussion to get more info.