Instagram Selenium Web scraping - #Followers - python

I am scraping an instagramm page where I need to get the user's:
number of posts
Number of followers
I managed to login on instagram then search for the user(in this example 'leonardodicaprio') then go to his page. I am not able to select the text though.
Can someone help please?
Thanks!
# -*- coding: utf-8 -*-
import scrapy
from scrapy_splash import SplashRequest
from scrapy.selector import Selector
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from shutil import which
import logging
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
import time
class InstatestSpider(scrapy.Spider):
name = 'instatest'
allowed_domains = ['www.instagram.com']
start_urls = ['https://www.instagram.com/accounts/login']
def __init__(self):
chrome_option = Options()
#chrome_option.add_argument("--headless")
chrome_path = which("chromedriver")
driver = webdriver.Chrome(executable_path=chrome_path, options = chrome_option)
driver.set_window_size(1920, 1080)
driver.get("https://www.instagram.com/accounts/login")
logging.info('Website opened...')
# username = driver.find_element_by_name("username")
# username = driver.find_element(By.XPATH, '//input[#name="username"]')
username = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//input[#name="username"]')))
password = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//input[#name="password"]')))
username.clear()
username.send_keys("username")
logging.info('Typing Username...')
password.clear()
password.send_keys("password")
logging.info('Typing Password...')
Login_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[#type="submit"]'))).click()
alert_1 = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click()
logging.info('Do NOT save password...')
alert_2 = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click() #search for a text="Not Now"
logging.info('Do NOT turn notifications on...')
logging.info('Logging Successful...')
influencer = "leonardodicaprio"
driver.get("https://www.instagram.com/" + influencer + "/")
time.sleep(5)
driver.save_screenshot('Influencer_Home_Page.png')
P.S: For the number of followers I want to get the exact number to the nearest digit as found in the title attribute in the selector. Please see picture below:
insta
Getting this error when running:
error in jupyterlab
wait = WebDriverWait(driver, 20)
number_of_post = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a[href$='profile_posts'] span"))).text
print(number_of_post)
number_of_follower = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a[href$='followed_by_list'] span"))).get_attribute('title')
print(number_of_follower)
error
Updated code:
# -*- coding: utf-8 -*-
import scrapy
from scrapy_splash import SplashRequest
from scrapy.selector import Selector
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from shutil import which
import logging
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
import time
class InstatestSpider(scrapy.Spider):
name = 'instatest'
allowed_domains = ['www.instagram.com']
start_urls = ['https://www.instagram.com/accounts/login']
def __init__(self):
chrome_option = Options()
#chrome_option.add_argument("--headless")
chrome_path = which("chromedriver")
driver = webdriver.Chrome(executable_path=chrome_path, options = chrome_option)
driver.set_window_size(1920, 1080)
driver.get("https://www.instagram.com/accounts/login")
logging.info('Website opened...')
# username = driver.find_element_by_name("username")
# username = driver.find_element(By.XPATH, '//input[#name="username"]')
username = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//input[#name="username"]')))
password = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//input[#name="password"]')))
username.clear()
username.send_keys("username")
logging.info('Typing Username...')
password.clear()
password.send_keys("password")
logging.info('Typing Password...')
Login_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[#type="submit"]'))).click()
alert_1 = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click()
logging.info('Do NOT save password...')
alert_2 = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click() #search for a text="Not Now"
logging.info('Do NOT turn notifications on...')
logging.info('Logging Successful...')
influencer = "leonardodicaprio"
driver.get("https://www.instagram.com/" + influencer + "/")
time.sleep(5)
wait = WebDriverWait(driver, 20)
number_of_post = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a[href$='profile_posts'] span"))).text
print(number_of_post)
number_of_follower = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a[href$='followed_by_list'] span"))).get_attribute('title')
print(number_of_follower)
driver.save_screenshot('Influencer_Home_Page.png')

You can use the below CSS_SELECTOR, to get number of posts, and Number of followers. to get title, you can use .get_attribute()
wait = WebDriverWait(driver, 20)
number_of_post = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a[href$='profile_posts'] span"))).text
print(number_of_post)
number_of_follower = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a[href$='followed_by_list'] span"))).get_attribute('title')
print(number_of_follower)
Imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

Related

Why getting exception time out error or element not found on this line By.CSS_SELECTOR, "#typeahead-input-control-35 .up-menu-item-text"

Problem Here I don't know why I am getting these errors on this line wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#typeahead-input-control-35 .up-menu-item-text"))).click() even I am not getting any error If I am running this code in a separate file, But when I merge this code to my main code this line not worked.
Problem line wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#typeahead-input-control-35 .up-menu-item-text"))).click()
Code that works fine if run in a separate file
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('F:\\work\\chromedriver_win32\\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
url = "https://www.upwork.com/nx/jobs/search/?sort=recency"
driver.get(url)
keys = ["Web Scraping", "Selenium Webdriver", "Data Scraping", "Selenium", "Beautiful Soup", "Scrapy", "Data Extraction", "Automation"]
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'button#onetrust-accept-btn-handler')))
time.sleep(5)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'button#onetrust-accept-btn-handler'))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, '//button[contains(#title,"Advanced Search")]'))).click()
wait.until(EC.element_to_be_clickable((By.XPATH,'//input[contains(#aria-labelledby,"tokenizer-label")]'))).clear()
wait.until(EC.element_to_be_clickable((By.XPATH, '//input[contains(#aria-labelledby,"tokenizer-label")]'))).click()
time.sleep(3)
for i in range(len(keys)):
search_field = wait.until(EC.element_to_be_clickable((By.XPATH, '//input[contains(#aria-labelledby,"tokenizer-label")]')))
search_field.click()
for character in keys[i]:
search_field.send_keys(character)
time.sleep(0.05)
time.sleep(2)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#typeahead-input-control-35 .up-menu-item-text"))).click()
time.sleep(2)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'[data-test="modal-advanced-search-search-btn"]'))).click()
line not work in this code
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.common.proxy import Proxy, ProxyType
from fake_useragent import UserAgent
import pyttsx3
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def main():
options = Options()
service = Service('F:\\work\\chromedriver_win32\\chromedriver.exe')
options.add_argument("start-maximized")
options.add_argument('--disable-blink-features=AutomationControlled') #Adding the argument
options.add_experimental_option("excludeSwitches",["enable-automation"])#Disable chrome contrlled message (Exclude the collection of enable-automation switches)
options.add_experimental_option('useAutomationExtension', False) #Turn-off useAutomationExtension
options.add_experimental_option('useAutomationExtension', False) #Turn-off useAutomationExtension
prefs = {"credentials_enable_service": False,
"profile.password_manager_enabled": False}
options.add_experimental_option("prefs", prefs)
ua = UserAgent()
userAgent = ua.random
options.add_argument(f'user-agent={userAgent}')
driver = webdriver.Chrome(service=service , options=options)
wait = WebDriverWait(driver, 10)
url = 'https://www.upwork.com/nx/jobs/search/?sort=recency'
driver.get(url)
time.sleep(7)
logbtn = driver.find_element(By.XPATH,'//a[contains(#class,"nav-item login-link d-none d-lg-block px-20")]')
logbtn.click()
time.sleep(7)
pop_one = driver.find_element(By.XPATH, '//*[#id="onetrust-accept-btn-handler"]')
pop_one.click()
time.sleep(7)
search_box = driver.find_element(By.NAME, "login[username]")
search_box.send_keys('my_mailaccountid#gmail.com')
time.sleep(7)
Login_button = driver.find_element(By.ID, "login_password_continue")
Login_button.submit()
time.sleep(7)
pass_box = driver.find_element(By.ID, "login_password")
pass_box.send_keys('myupworkpassword000')
Login_btn = driver.find_element(By.ID, "login_control_continue")
Login_btn.submit()
time.sleep(7)
closebtn = driver.find_element(By.XPATH, '//*[#id="main"]/div/div/aside/div/div[1]/div[1]/section/div[2]/div[2]/div/div/div/div[3]/div/div[2]/div[2]/div[2]/div[3]/div/button')
closebtn.click()
url = 'https://www.upwork.com/nx/jobs/search/?sort=recency'
driver.get(url)
time.sleep(7)
for i in range(0,20):
box1 = driver.find_element(By.XPATH,'//span[contains(text(),"Intermediate")]')
box1.click()
time.sleep (3)
box2 = driver.find_element(By.XPATH,'//span[contains(text(),"Expert")]')
box2.click()
time.sleep (3)
box3 = driver.find_element(By.XPATH,'//span[contains(text(),"Less than 5")]')
box3.click()
time.sleep (3)
box4 = driver.find_element(By.XPATH,'//span[contains(text(),"5 to 10")]')
box4.click()
time.sleep (3)
box5 = driver.find_element(By.XPATH,'//span[contains(text(),"Payment verified")]')
box5.click()
time.sleep (7)
cat1 = driver.find_element(By.XPATH,'//strong[contains(text(),"Job type")]')
cat1.click()
time.sleep (7)
box6 = driver.find_element(By.XPATH,'//span[contains(text(),"Hourly")]')
box6.click()
time.sleep (3)
box7 = driver.find_element(By.XPATH,'//span[contains(text(),"Less than $100")]')
box7.click()
time.sleep (3)
box8 = driver.find_element(By.XPATH,'//span[contains(text(),"$100 to $500")]')
box8.click()
time.sleep (3)
keys = ["Web Scraping", "Selenium Webdriver", "Data Scraping", "Selenium", "Beautiful Soup", "Scrapy", "Data Extraction", "Automation"]
wait.until(EC.element_to_be_clickable((By.XPATH, '//button[contains(#title,"Advanced Search")]'))).click()
wait.until(EC.element_to_be_clickable((By.XPATH,'//input[contains(#aria-labelledby,"tokenizer-label")]'))).clear()
wait.until(EC.element_to_be_clickable((By.XPATH, '//input[contains(#aria-labelledby,"tokenizer-label")]'))).click()
time.sleep(3)
for i in range(len(keys)):
search_field = wait.until(EC.element_to_be_clickable((By.XPATH, '//input[contains(#aria-labelledby,"tokenizer-label")]')))
search_field.click()
for character in keys[i]:
search_field.send_keys(character)
time.sleep(0.05)
time.sleep(7)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#typeahead-input-control-35 .up-menu-item-text"))).click()
time.sleep(7)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'[data-test="modal-advanced-search-search-btn"]'))).click()
main()

How do I press load more button while scraping comments on Instagram with Selenium Python

I'm working on a project that can scrape comments off posts on instagram and write them into an excel file.
Here's my code:
from selenium.webdriver.common.by import By
from selenium import webdriver
import time
import sys
import pandas as pd
from pandas import ExcelWriter
import os.path
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
url = [
"https://www.instagram.com/p/CcVTqRtJ2gj/",
"https://www.instagram.com/p/CcXpLHepve-/",
]
user_names = []
user_comments = []
driver = driver = webdriver.Chrome("C:\chromedriver.exe")
driver.get(url[0])
time.sleep(3)
username = WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='username']")))
password = WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.CSS_SELECTOR,"input[name='password']")))
username.clear()
username.send_keys("username")
password.clear()
password.send_keys("pwd")
Login_button = (
WebDriverWait(driver, 2)
.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit']")))
.click()
)
time.sleep(4)
not_now = (
WebDriverWait(driver, 30)
.until(
EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))
)
.click()
)
for n in url:
try:
driver.get(n)
time.sleep(3)
load_more_comment = driver.find_element_by_xpath("//button[class='wpO6b ']")
print("Found {}".format(str(load_more_comment)))
i = 0
while load_more_comment.is_displayed() and i < 10:
load_more_comment.click()
time.sleep(1.5)
load_more_comment = driver.find_element_by_xpath(
"//button[class='wpO6b ']"
)
print("Found {}".format(str(load_more_comment)))
i += 1
user_names.pop(0)
user_comments.pop(0)
except Exception as e:
print(e)
pass
comment = driver.find_elements_by_class_name("gElp9 ")
for c in comment:
container = c.find_element_by_class_name("C4VMK")
name = container.find_element_by_class_name("_6lAjh ").text
content = container.find_element_by_class_name("MOdxS ").text
content = content.replace("\n", " ").strip().rstrip()
user_names.append(name)
user_comments.append(content)
print(content)
user_names.pop(0)
user_comments.pop(0)
# export(user_names, user_comments)
driver.close()
df = pd.DataFrame(list(zip(user_names, user_comments)), columns=["Name", "Comments"])
# df.to_excel("Anime Content Engagement.xlsx")
print(df)
And the load-more-comments part, doesn't seem to work.
Since there are more than one buttons with the same class name, I"m not able to choose the right button to click on. And I'm a beginner so if there's anyone with any solution to how I can solve this it would be great.
you can select by aria-label text:
driver.find_element_by_css_selector("svg._8-yf5[aria-label='TEXT']")
i believe the text inside changes according to instagram language, put it according to what appears on your

Selenium For Loop Stuck on a popup message

I have an application that is almost working as intended. The problem arises after it runs through the loop on the 5th instance. The search states there are two results which results in the same end result. When this occurs I'd like to select the first of the two.
The popup messages looks like the following:
I'm using the following code to create the list and then loop:
from selenium import webdriver
import pandas as pd
import random
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.proxy import Proxy, ProxyType
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
#service = Service('C:\Program Files\Chrome Driver\chromedriver.exe')
URL = "https://mor.nlm.nih.gov/RxClass/search?query=ALIMENTARY TRACT AND METABOLISM"
driver = webdriver.Chrome('C:\Program Files\Chrome Driver\chromedriver.exe')
driver.get(URL)
category = [my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.drug_class img+a")))]
classid = [my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.propText")))]
dfObj = pd.DataFrame(category)
dfObj.columns =['Category']
dfObj.dropna(inplace = True)
new = dfObj["Category"].str.split("(", n = 1, expand = True)
dfObj["New Category"]= new[0]
dfObj["Count"]= new[1]
dfObj.drop(columns =["Category"], inplace = True)
dfObj['Count'] = dfObj['Count'].str.rstrip(')')
dfObj['IsNumber'] = dfObj['Count'].str.isnumeric()
dfObj = dfObj[(dfObj['IsNumber'] == True)]
searchcat = dfObj['New Category'].tolist()
print(searchcat)
dfObj.to_csv('tabledf.csv',index=False)
time.sleep(8)
driver.quit()
for search in searchcat:
page = f"https://mor.nlm.nih.gov/RxClass/search?query={search}"
driver = webdriver.Chrome('C:\Program Files\Chrome Driver\chromedriver.exe')
driver.get(page)
time.sleep(4)
table = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'tr.dbsearch')))
time.sleep(4)
filename = search[0:30]+'table.csv'
pd.read_html(driver.page_source)[1].iloc[:,:-1].to_csv(filename,index=False)
time.sleep(4)
driver.quit()
The loop will continue to run if I manually click each search result. However, I would like for selenium to always select the first option. How would I go about this?
Updated Code:
from selenium import webdriver
import pandas as pd
import random
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.proxy import Proxy, ProxyType
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait, TimeoutException
import time
with webdriver.Chrome('C:\Program Files\Chrome Driver\chromedriver.exe') as driver:
URL = "https://mor.nlm.nih.gov/RxClass/search?query=ALIMENTARY TRACT AND METABOLISM"
driver.get(URL)
category = [my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.drug_class img+a")))]
dfObj = pd.DataFrame(category)
dfObj.columns =["Category"]
dfObj.dropna(inplace = True)
new = dfObj["Category"].str.split("(", n = 1, expand = True)
dfObj["New Category"]= new[0]
dfObj["Count"]= new[1]
dfObj.drop(columns =["Category"], inplace = True)
dfObj["Count"] = dfObj["Count"].str.rstrip(')')
dfObj["IsNumber"] = dfObj["Count"].str.isnumeric()
dfObj = dfObj[(dfObj["IsNumber"] == True)]
searchcat = dfObj["New Category"].tolist()
dfObj.to_csv('tabledf.csv',index=False)
time.sleep(3)
for search in searchcat:
page = f"https://mor.nlm.nih.gov/RxClass/search?query={search}"
driver = webdriver.Chrome('C:\Program Files\Chrome Driver\chromedriver.exe')
driver.get(page)
table = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'tr.dbsearch')))
modal_wait = WebDriverWait(driver, 1)
try:
modal_el = modal_wait.until(EC.visibility_of_element_located((By.ID, 'optionModal')))
modal_el.find_element(By.CSS_SELECTOR, '.uloption').click()
except TimeoutException:
pass
filename = search[0:30]+'table.csv'
classid = [my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.table-responsive div.propText strong:nth-child(2)")))]
classname = [my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.table-responsive div.propText strong:nth-child(1)")))]
classtype = [my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.table-responsive div.propText strong:nth-child(3)")))]
df = pd.read_html(driver.page_source)[1].iloc[:,:-1]
df["ClassID"] = pd.Series(classid)
df["ClassName"] = pd.Series(classname)
df["ClassType"] = pd.Series(classtype)
df.to_csv(filename,index=False)
time.sleep(4)
driver.quit()
First of, I will suggest that you use the with context manager. It will handle opening/closing the driver (Chrome) by itself. This ensure if any exception is raised that it will still be closed.
To do so, use:
with webdriver.Chrome() as driver:
...
In your code I see you close/open a new browser for each URL. This is not needed and not doing so will speed up your script. Just use driver.get() to change the URL.
For your main issue, just add a portion of code that will detect the modal and chose the first option. Something along those lines
modal_wait = WebDriverWait(driver, 1)
try:
modal_el = modal_wait.until(EC.element_to_be_clickable((By.ID, 'optionModal')))
modal_el.find_element(By.CSS_SELECTOR, '.uloption').click()
except TimeoutException:
pass
You must include the following imports:
from selenium.webdriver.support.wait import WebDriverWait, TimeoutException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By

how to get the dm text in instagram from selenium?

I want to get the text of the sender on my console, I tried beautiful soup for scraping but it didn't work. I had used several other features like XPath and different class names on selenium but not able to resolve this issue.
Here, is my code,
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import time
myemail = "<username>"
mypassword = "<password>"
friendusernames = ["<>sender username"]
PATH = "C:/Chromedriver.exe"
driver = webdriver.Chrome(PATH)
url = "https://www.instagram.com/"
driver.get(url)
usernamebox = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.NAME, 'username')))
usernamebox.send_keys(myemail)
passwordbox = driver.find_element_by_name('password')
passwordbox.send_keys(mypassword)
loginbutton = driver.find_element_by_css_selector('.Igw0E')
loginbutton.click()
print("Logging in")
dmbtn = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, '.xWeGp')))
dmbtn.click()
notificationsnotnow = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, '.HoLwm')))
notificationsnotnow.click()
for friendusername in friendusernames:
searchuser = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, '.EQ1Mr')))
searchuser.click()
searchuserbox = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, '.j_2Hd')))
searchuserbox.send_keys(friendusername)
time.sleep(3)
firstuser = driver.find_element_by_xpath(
'/html/body/div[5]/div/div/div[2]/div[2]/div[1]/div')
firstuser.click()
pressingnext = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, '.rIacr')))
pressingnext.click()
names = driver.find_element_by_class_name(
'_7UhW9 > span').text
# names = driver.find_element_by_class_name(
# '.xLCgt').text
# names = driver.find_element_by_class_name(
# '.MMzanKV-D4').text
# names = driver.find_element_by_class_name(
# '.p1tLr').text
# names = driver.find_element_by_class_name(
# '.hjZTB').text
print(names)
time.sleep(1)
I want this text on my console
How, can I do so??
Since every page on the internet has HTML in it, I would inspect the page with right click and find out which tag belongs to the message in the chat, then I'd find the tag's XPath or class and finally get its innerText for retrieving the string.

svg tag scraping from funnels

I am trying to scrape data from here but getting error.
I have taken code from here Scraping using Selenium and python
This code was working perfectly fine but now I am getting error
wait.until(EC.visibility_of_element_located((By.LINK_TEXT, "All Boards")))
raise TimeoutException(message, screen, stacktrace)
After clicking on pe-funnel link , you can try with this code :
wait.until(EC.visibility_of_element_located((By.XPATH, "//*[name()='text' and #text-anchor='end']")))
all_data = driver.find_elements_by_xpath("//*[name()='text' and #text-anchor='end']")
print(len(all_data))
for data in all_data:
print(data.text)
UPDATE1 :
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.select import Select
import time
driver = webdriver.Chrome(executable_path = r'D:/Automation/chromedriver.exe')
driver.maximize_window()
driver.get("https://eu1.dashboard.clevertap.com/login.html")
wait = WebDriverWait(driver, 20)
action = ActionChains(driver)
driver.switch_to.default_content()
wait.until(EC.element_to_be_clickable((By.NAME, "email"))).send_keys("abhishe***")
wait.until(EC.element_to_be_clickable((By.NAME,"password"))).send_keys("***")
wait.until(EC.element_to_be_clickable((By.ID,"submitBtn"))).click()
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div.ct-breadcrumb')))
driver.switch_to.default_content()
action.move_to_element(driver.find_element_by_css_selector("div.sidebar__brand+ul>li:first-child>a")).perform()
wait.until(EC.visibility_of_element_located((By.LINK_TEXT, "All Boards")))
wait.until(EC.element_to_be_clickable((By.LINK_TEXT,"All Boards"))).click()
wait.until(EC.element_to_be_clickable((By.LINK_TEXT,"pe-funnel"))).click()
time.sleep(1)
driver.execute_script("window.scrollTo(0,100)")
wait.until(EC.presence_of_all_elements_located((By.XPATH,"//*[name()='svg' and #class='highcharts-root']")))
all_charts = driver.find_elements_by_xpath("//*[name()='svg' and #class='highcharts-root']")
length_of_list = len(all_charts)
print(length_of_list)
i=0
while(i<len(all_charts)):
wait.until(EC.presence_of_all_elements_located((By.XPATH,"//*[name()='svg' and #class='highcharts-root']")))
all_charts = driver.find_elements_by_xpath("//*[name()='svg' and #class='highcharts-root']")
all_charts[i].click()
i=i+1
try:
print("Switch to frame")
wait.until(EC.frame_to_be_available_and_switch_to_it((By.ID,"wiz-iframe-intent")))
print("Switched to frame")
wait.until(EC.element_to_be_clickable((By.XPATH, "//button[text()='OK' and #class='annoucement-popup__btn']"))).click()
driver.switch_to.default_content()
print("Clicked on Ok button")
except:
print("in catch block")
pass
print("last of CATCH BLOCK")
driver.execute_script("window.scrollTo(0,1100)")
ActionChains(driver).move_to_element(driver.find_element_by_css_selector("input[data-introp='View your analysis']")).click().perform()
#wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,""))).click()
#ActionChains(driver).move_to_element(driver.find_element_by_css_selector("label[for='funnelProgressionPercent']")).send_keys(Keys.END).perform()
wait.until(EC.presence_of_all_elements_located((By.XPATH,"//*[name()='tspan' and #class='highcharts-text-outline']")))
all_values = driver.find_elements_by_xpath("//*[name()='tspan' and #class='highcharts-text-outline']")
for values in all_values:
print(values.text)
driver.execute_script("window.history.go(-1)")
driver.refresh()

Categories

Resources