Scrolling with Selenium

Scrolling with Selenium - python

I am having some trouble scrolling down to the end of the second web page. The first scroll works fine, the second won't run.
This issue seems to be happening on line 33 and 34
Please see lines of code below:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
from selenium.webdriver.common.keys import Keys
def main():
n = 1
# LMS -> PSYC1101 -> Quiz
driver = webdriver.Chrome("/usr/local/bin/chromedriver")
driver.get("https://lms.uwa.edu.au/webapps/portal/execute/tabs/tabAction?tab_tab_group_id=_1_1")
window_before = driver.window_handles[0]
username = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID,"login-user"))).send_keys("username")
password = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID,"login-pass"))).send_keys("password")
login = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID,"form_button_0"))).click()
time.sleep(10)
units = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH,"/html/body/div[4]/table/tbody/tr/td/div/div[2]/table/tbody/tr/td[2]/a/span"))).click()
psych = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH,"/html/body/div[5]/div/div/div/div/div/div/div/div[2]/div/div[2]/div/div[2]/ul/li[4]/a"))).click()
page = driver.find_element_by_tag_name("html")
page.send_keys(Keys.END)
time.sleep(1)
quizzes = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH,"/html/body/div[5]/div[2]/nav/div/div[2]/div[1]/div[2]/ul/li[22]/a/span"))).click()
# Quiz One -> Begin -> Scroll -> Continue -> Sumbit -> Submit2 -> View Results
quizOne = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH,"/html/body/div[5]/div[2]/div/div/div/div/div[2]/ul/li[2]/div[1]/h3/a/span"))).click()
time.sleep(1)
begin = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH,"/html/body/div[1]/div/div/div/div[4]/div/div[2]/div/div[1]/div[17]/button"))).click()
window_after = driver.window_handles[0]
driver.switch_to.window(window_after)
page = driver.find_element_by_tag_name("html")
page.send_keys(Keys.END)
#driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
Continue = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH,"/html/body/div/div/main/div[5]/button[1]"))).click()
submit = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH,"/html/body/div/header/div/div/button[2]"))).click()
submit2 = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH,"/html/body/div/ic-modal[2]/ic-modal-main/div[2]/button[2]"))).click()
viewResults = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH,"/html/body/div/ic-modal[3]/ic-modal-main/div/button"))).click()
main()

Related

How to get all comments in 9gag using selenium?

I'm working on scraping the memes and all their comments from 9gag.
I used this code below but I am only getting few extra comments.
actions = ActionChains(driver)
link = driver.find_element(By.XPATH, "//button[#class='comment-list__load-more']")
actions.move_to_element(link).click(on_element=link).perform()
I would also like to access the subcomments under a comment by simulating click on view more replies.
From the html I found this XPATH element = driver.find_element(By.XPATH, "//div[#class='vue-recycle-scroller ready page-mode direction-vertical']")holds the comments section but I'm not sure how to iterate through each comment in this element and simulate these clicks.
This code should work directly provided the necessary libraries are present in case you wanna test it.
Please help me with these following tasks:
Getting all the comments from view all comments
Iterating through each comment section and clicking on view more replies to get all the subcomments
My Code
import time
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import undetected_chromedriver as uc
if __name__ == '__main__':
options = Options()
# options.headless = True
options.add_argument("start-maximized") # ensure window is full-screen
driver = uc.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.get("https://9gag.com/gag/a5EAv9O")
prev_h = 0
for i in range(10):
height = driver.execute_script("""
function getActualHeight() {
return Math.max(
Math.max(document.body.scrollHeight, document.documentElement.scrollHeight),
Math.max(document.body.offsetHeight, document.documentElement.offsetHeight),
Math.max(document.body.clientHeight, document.documentElement.clientHeight)
);
}
return getActualHeight();
""")
driver.execute_script(f"window.scrollTo({prev_h},{prev_h + 200})")
time.sleep(1)
prev_h += 200
if prev_h >= height:
break
time.sleep(5)
title = driver.title[:-7]
try:
upvotes_count = \
driver.find_element(By.XPATH, "//meta[#property='og:description']").get_attribute("content").split(' ')[0]
comments_count = \
driver.find_element(By.XPATH, "//meta[#property='og:description']").get_attribute("content").split(' ')[3]
upvotes_count = int(upvotes_count) if len(upvotes_count) <= 3 else int("".join(upvotes_count.split(',')))
comments_count = int(comments_count) if len(comments_count) <= 3 else int("".join(comments_count.split(',')))
date_posted = driver.find_element(By.XPATH, "//p[#class='message']")
date_posted = date_posted.text.split("·")[1].strip()
# actions = ActionChains(driver)
# link = driver.find_element(By.XPATH, "//button[#class='comment-list__load-more']")
# actions.move_to_element(link).click(on_element=link).perform()
element = driver.find_element(By.XPATH,
"//div[#class='vue-recycle-scroller ready page-mode direction-vertical']")
print(element.text)
driver.quit()
except NoSuchElementException or Exception as err:
print(err)
Output
Edit:
I managed to make the code work better. It scrolls through the page until it sees all the comments. It also clicks on view more replies if there are subcomments.
But it's only able to read comments from middle to end. Maybe as the page is scrolled down, the initial comments are hidden dynamically. I do not know how to overcome this. And clicking on view more replies stops after some clicks and is throwing the error
selenium.common.exceptions.MoveTargetOutOfBoundsException: Message: move target out of bounds
Here's the updated code
import driver as driver
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import time
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import undetected_chromedriver as uc
def scroll_page(scrl_hgt):
prev_h = 0
for i in range(10):
height = driver.execute_script("""
function getActualHeight() {
return Math.max(
Math.max(document.body.scrollHeight, document.documentElement.scrollHeight),
Math.max(document.body.offsetHeight, document.documentElement.offsetHeight),
Math.max(document.body.clientHeight, document.documentElement.clientHeight)
);
}
return getActualHeight();
""")
driver.execute_script(f"window.scrollTo({prev_h},{prev_h + scrl_hgt})")
time.sleep(1)
prev_h += scrl_hgt
if prev_h >= height:
break
if __name__ == '__main__':
options = Options()
# options.headless = True
driver = uc.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.maximize_window()
driver.get("https://9gag.com/gag/a5EAv9O")
time.sleep(5)
# click on I accept cookies
actions = ActionChains(driver)
consent_button = driver.find_element(By.XPATH, '//*[#id="qc-cmp2-ui"]/div[2]/div/button[2]')
actions.move_to_element(consent_button).click().perform()
scroll_page(150)
time.sleep(2)
# click on fresh comments sectin
fresh_comments = driver.find_element(By.XPATH, '//*[#id="page"]/div[1]/section[2]/section/header/div/button[2]')
actions.move_to_element(fresh_comments).click(on_element=fresh_comments).perform()
time.sleep(5)
# getting meta data
title = driver.title[:-7]
upvotes_count = driver.find_element(By.XPATH, "//meta[#property='og:description']").get_attribute("content").split(' ')[0]
comments_count = driver.find_element(By.XPATH, "//meta[#property='og:description']").get_attribute("content").split(' ')[3]
upvotes_count = int(upvotes_count) if len(upvotes_count) <= 3 else int("".join(upvotes_count.split(',')))
comments_count = int(comments_count) if len(comments_count) <= 3 else int("".join(comments_count.split(',')))
date_posted = driver.find_element(By.XPATH, "//p[#class='message']")
date_posted = date_posted.text.split("·")[1].strip()
time.sleep(3)
# click on lood more comments button to load all the comments
load_more_comments = driver.find_element(By.XPATH, "//button[#class='comment-list__load-more']")
actions.move_to_element(load_more_comments).click(on_element=load_more_comments).perform()
scroll_page(500)
print([my_elem.text for my_elem in driver.find_elements(By.CSS_SELECTOR, "div.comment-list-item__text")])
comments = driver.find_elements(By.CSS_SELECTOR, "div.vue-recycle-scroller__item-view")
for item in comments:
html = item.get_attribute("innerHTML")
if "comment-list-item__text" in html:
print(item.find_element(By.CSS_SELECTOR, "div.comment-list-item__text").text)
elif "comment-list-item__deleted-text" in html:
print(item.find_element(By.CSS_SELECTOR, "div.comment-list-item__deleted-text").text)
# get sub comments
if "comment-list-item__replies" in html:
#item.find_element(By.CSS_SELECTOR, "div.comment-list-item__replies").click()
sub_comments = item.find_element(By.CSS_SELECTOR, "div.comment-list-item__replies")
actions.move_to_element(sub_comments).click(on_element=sub_comments).perform()
time.sleep(2)
driver.quit()
PS: My goal is to get every single comments and all their sub comments (whether they are text, image, gif, etc) in the order they appear and save them somewhere so that I should be able to recreate the comments section again.

To extract and print the comment texts you need to induce WebDriverWait for visibility_of_all_elements_located() and you can use the following Locator Strategies:
driver.get("https://9gag.com/gag/a5EAv9O")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button.comment-list__load-more"))).click()
print([my_elem.text for my_elem in driver.find_elements(By.CSS_SELECTOR, "div.comment-list-item__text")])
Console Output:
['Man, the battle of the cults is getting interesting now.', 'rent free in your head', 'Sorry saving all my money up for the Joe Biden Depends Multipack and the Karmella knee pads.', "It's basically a cult now.", "I'll take one. I'm not even American", '', 'that eagle looks familiar.', "Who doesn't want a trump card?"]
Note : You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

having trouble with disabled button on selenium

Im trying to login in a site with selenium, in this site, the 'enter' field is disabled, and it will only be enabled after you starting typing. The problem is that when I enter the username with sendkeys the button won't change the 'enable status' to true. What can I do?
t1 = time.perf_counter()
url = 'https://auth.iupp.com.br/login?client_id=2raqh4sjj73q10efeguphmk4gn&nonce=5149eba248ed491cbde001d686e688dd&redirect_uri=https%3A%2F%2Fwww.iupp.com.br%2Fauth%2Fcallback&response_type=token&scope=profile%20email%20openid%20aws.cognito.signin.user.admin%20webpremios.campaigns%2F40455&state=dc6544eec0244aa0be61d3b8aeded338'
op = webdriver.ChromeOptions()
op.add_argument('headless')
op.add_argument("user-agent=" + ua.random)
op.add_argument("incognito")
driver = webdriver.Chrome(options=op)
driver.get(url)
driver.implicitly_wait(5)
username = driver.find_element_by_xpath(".//*[#id='username']")
username.send_keys("45143080581")
print('0')
element = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, "btnContinue")))
print('1')
driver.find_element_by_id("btnContinue").click()
bs = BeautifulSoup(driver.page_source, 'html.parser')
driver.quit()
t2 = time.perf_counter()
print(f'tempo:{t2-t1}')
Ps: the output show 0 and 1 printed

wait = WebDriverWait(driver, 30)
t1 = time.perf_counter()
driver.get('https://auth.iupp.com.br/login?client_id=2raqh4sjj73q10efeguphmk4gn&nonce=5149eba248ed491cbde001d686e688dd&redirect_uri=https%3A%2F%2Fwww.iupp.com.br%2Fauth%2Fcallback&response_type=token&scope=profile%20email%20openid%20aws.cognito.signin.user.admin%20webpremios.campaigns%2F40455&state=dc6544eec0244aa0be61d3b8aeded338')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#root > div > div.alertLDGPBackground > div > div > div > div.col-12.col-md-auto.txt-center > a'))).click()
print('0')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#username'))).send_keys('45143080581')
print('1')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#btnContinue'))).click()
bs = BeautifulSoup(driver.page_source, 'html.parser')
driver.quit()
t2 = time.perf_counter()
print(f'tempo:{t2-t1}')
You should click the accept when the page loads to make sure the other element is clickable.
Just wait for the element to be clickable using waits.
Import:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

Selenium For Loop Stuck on a popup message

I have an application that is almost working as intended. The problem arises after it runs through the loop on the 5th instance. The search states there are two results which results in the same end result. When this occurs I'd like to select the first of the two.
The popup messages looks like the following:
I'm using the following code to create the list and then loop:
from selenium import webdriver
import pandas as pd
import random
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.proxy import Proxy, ProxyType
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
#service = Service('C:\Program Files\Chrome Driver\chromedriver.exe')
URL = "https://mor.nlm.nih.gov/RxClass/search?query=ALIMENTARY TRACT AND METABOLISM"
driver = webdriver.Chrome('C:\Program Files\Chrome Driver\chromedriver.exe')
driver.get(URL)
category = [my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.drug_class img+a")))]
classid = [my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.propText")))]
dfObj = pd.DataFrame(category)
dfObj.columns =['Category']
dfObj.dropna(inplace = True)
new = dfObj["Category"].str.split("(", n = 1, expand = True)
dfObj["New Category"]= new[0]
dfObj["Count"]= new[1]
dfObj.drop(columns =["Category"], inplace = True)
dfObj['Count'] = dfObj['Count'].str.rstrip(')')
dfObj['IsNumber'] = dfObj['Count'].str.isnumeric()
dfObj = dfObj[(dfObj['IsNumber'] == True)]
searchcat = dfObj['New Category'].tolist()
print(searchcat)
dfObj.to_csv('tabledf.csv',index=False)
time.sleep(8)
driver.quit()
for search in searchcat:
page = f"https://mor.nlm.nih.gov/RxClass/search?query={search}"
driver = webdriver.Chrome('C:\Program Files\Chrome Driver\chromedriver.exe')
driver.get(page)
time.sleep(4)
table = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'tr.dbsearch')))
time.sleep(4)
filename = search[0:30]+'table.csv'
pd.read_html(driver.page_source)[1].iloc[:,:-1].to_csv(filename,index=False)
time.sleep(4)
driver.quit()
The loop will continue to run if I manually click each search result. However, I would like for selenium to always select the first option. How would I go about this?
Updated Code:
from selenium import webdriver
import pandas as pd
import random
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.proxy import Proxy, ProxyType
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait, TimeoutException
import time
with webdriver.Chrome('C:\Program Files\Chrome Driver\chromedriver.exe') as driver:
URL = "https://mor.nlm.nih.gov/RxClass/search?query=ALIMENTARY TRACT AND METABOLISM"
driver.get(URL)
category = [my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.drug_class img+a")))]
dfObj = pd.DataFrame(category)
dfObj.columns =["Category"]
dfObj.dropna(inplace = True)
new = dfObj["Category"].str.split("(", n = 1, expand = True)
dfObj["New Category"]= new[0]
dfObj["Count"]= new[1]
dfObj.drop(columns =["Category"], inplace = True)
dfObj["Count"] = dfObj["Count"].str.rstrip(')')
dfObj["IsNumber"] = dfObj["Count"].str.isnumeric()
dfObj = dfObj[(dfObj["IsNumber"] == True)]
searchcat = dfObj["New Category"].tolist()
dfObj.to_csv('tabledf.csv',index=False)
time.sleep(3)
for search in searchcat:
page = f"https://mor.nlm.nih.gov/RxClass/search?query={search}"
driver = webdriver.Chrome('C:\Program Files\Chrome Driver\chromedriver.exe')
driver.get(page)
table = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'tr.dbsearch')))
modal_wait = WebDriverWait(driver, 1)
try:
modal_el = modal_wait.until(EC.visibility_of_element_located((By.ID, 'optionModal')))
modal_el.find_element(By.CSS_SELECTOR, '.uloption').click()
except TimeoutException:
pass
filename = search[0:30]+'table.csv'
classid = [my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.table-responsive div.propText strong:nth-child(2)")))]
classname = [my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.table-responsive div.propText strong:nth-child(1)")))]
classtype = [my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.table-responsive div.propText strong:nth-child(3)")))]
df = pd.read_html(driver.page_source)[1].iloc[:,:-1]
df["ClassID"] = pd.Series(classid)
df["ClassName"] = pd.Series(classname)
df["ClassType"] = pd.Series(classtype)
df.to_csv(filename,index=False)
time.sleep(4)
driver.quit()

First of, I will suggest that you use the with context manager. It will handle opening/closing the driver (Chrome) by itself. This ensure if any exception is raised that it will still be closed.
To do so, use:
with webdriver.Chrome() as driver:
...
In your code I see you close/open a new browser for each URL. This is not needed and not doing so will speed up your script. Just use driver.get() to change the URL.
For your main issue, just add a portion of code that will detect the modal and chose the first option. Something along those lines
modal_wait = WebDriverWait(driver, 1)
try:
modal_el = modal_wait.until(EC.element_to_be_clickable((By.ID, 'optionModal')))
modal_el.find_element(By.CSS_SELECTOR, '.uloption').click()
except TimeoutException:
pass
You must include the following imports:
from selenium.webdriver.support.wait import WebDriverWait, TimeoutException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By

Instagram Selenium Web scraping - #Followers

I am scraping an instagramm page where I need to get the user's:
number of posts
Number of followers
I managed to login on instagram then search for the user(in this example 'leonardodicaprio') then go to his page. I am not able to select the text though.
Can someone help please?
Thanks!
# -*- coding: utf-8 -*-
import scrapy
from scrapy_splash import SplashRequest
from scrapy.selector import Selector
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from shutil import which
import logging
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
import time
class InstatestSpider(scrapy.Spider):
name = 'instatest'
allowed_domains = ['www.instagram.com']
start_urls = ['https://www.instagram.com/accounts/login']
def __init__(self):
chrome_option = Options()
#chrome_option.add_argument("--headless")
chrome_path = which("chromedriver")
driver = webdriver.Chrome(executable_path=chrome_path, options = chrome_option)
driver.set_window_size(1920, 1080)
driver.get("https://www.instagram.com/accounts/login")
logging.info('Website opened...')
# username = driver.find_element_by_name("username")
# username = driver.find_element(By.XPATH, '//input[#name="username"]')
username = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//input[#name="username"]')))
password = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//input[#name="password"]')))
username.clear()
username.send_keys("username")
logging.info('Typing Username...')
password.clear()
password.send_keys("password")
logging.info('Typing Password...')
Login_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[#type="submit"]'))).click()
alert_1 = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click()
logging.info('Do NOT save password...')
alert_2 = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click() #search for a text="Not Now"
logging.info('Do NOT turn notifications on...')
logging.info('Logging Successful...')
influencer = "leonardodicaprio"
driver.get("https://www.instagram.com/" + influencer + "/")
time.sleep(5)
driver.save_screenshot('Influencer_Home_Page.png')
P.S: For the number of followers I want to get the exact number to the nearest digit as found in the title attribute in the selector. Please see picture below:
insta
Getting this error when running:
error in jupyterlab
wait = WebDriverWait(driver, 20)
number_of_post = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a[href$='profile_posts'] span"))).text
print(number_of_post)
number_of_follower = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a[href$='followed_by_list'] span"))).get_attribute('title')
print(number_of_follower)
error
Updated code:
# -*- coding: utf-8 -*-
import scrapy
from scrapy_splash import SplashRequest
from scrapy.selector import Selector
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from shutil import which
import logging
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
import time
class InstatestSpider(scrapy.Spider):
name = 'instatest'
allowed_domains = ['www.instagram.com']
start_urls = ['https://www.instagram.com/accounts/login']
def __init__(self):
chrome_option = Options()
#chrome_option.add_argument("--headless")
chrome_path = which("chromedriver")
driver = webdriver.Chrome(executable_path=chrome_path, options = chrome_option)
driver.set_window_size(1920, 1080)
driver.get("https://www.instagram.com/accounts/login")
logging.info('Website opened...')
# username = driver.find_element_by_name("username")
# username = driver.find_element(By.XPATH, '//input[#name="username"]')
username = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//input[#name="username"]')))
password = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//input[#name="password"]')))
username.clear()
username.send_keys("username")
logging.info('Typing Username...')
password.clear()
password.send_keys("password")
logging.info('Typing Password...')
Login_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[#type="submit"]'))).click()
alert_1 = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click()
logging.info('Do NOT save password...')
alert_2 = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click() #search for a text="Not Now"
logging.info('Do NOT turn notifications on...')
logging.info('Logging Successful...')
influencer = "leonardodicaprio"
driver.get("https://www.instagram.com/" + influencer + "/")
time.sleep(5)
wait = WebDriverWait(driver, 20)
number_of_post = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a[href$='profile_posts'] span"))).text
print(number_of_post)
number_of_follower = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a[href$='followed_by_list'] span"))).get_attribute('title')
print(number_of_follower)
driver.save_screenshot('Influencer_Home_Page.png')

You can use the below CSS_SELECTOR, to get number of posts, and Number of followers. to get title, you can use .get_attribute()
wait = WebDriverWait(driver, 20)
number_of_post = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a[href$='profile_posts'] span"))).text
print(number_of_post)
number_of_follower = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a[href$='followed_by_list'] span"))).get_attribute('title')
print(number_of_follower)
Imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

how to get the dm text in instagram from selenium?

I want to get the text of the sender on my console, I tried beautiful soup for scraping but it didn't work. I had used several other features like XPath and different class names on selenium but not able to resolve this issue.
Here, is my code,
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import time
myemail = "<username>"
mypassword = "<password>"
friendusernames = ["<>sender username"]
PATH = "C:/Chromedriver.exe"
driver = webdriver.Chrome(PATH)
url = "https://www.instagram.com/"
driver.get(url)
usernamebox = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.NAME, 'username')))
usernamebox.send_keys(myemail)
passwordbox = driver.find_element_by_name('password')
passwordbox.send_keys(mypassword)
loginbutton = driver.find_element_by_css_selector('.Igw0E')
loginbutton.click()
print("Logging in")
dmbtn = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, '.xWeGp')))
dmbtn.click()
notificationsnotnow = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, '.HoLwm')))
notificationsnotnow.click()
for friendusername in friendusernames:
searchuser = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, '.EQ1Mr')))
searchuser.click()
searchuserbox = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, '.j_2Hd')))
searchuserbox.send_keys(friendusername)
time.sleep(3)
firstuser = driver.find_element_by_xpath(
'/html/body/div[5]/div/div/div[2]/div[2]/div[1]/div')
firstuser.click()
pressingnext = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, '.rIacr')))
pressingnext.click()
names = driver.find_element_by_class_name(
'_7UhW9 > span').text
# names = driver.find_element_by_class_name(
# '.xLCgt').text
# names = driver.find_element_by_class_name(
# '.MMzanKV-D4').text
# names = driver.find_element_by_class_name(
# '.p1tLr').text
# names = driver.find_element_by_class_name(
# '.hjZTB').text
print(names)
time.sleep(1)
I want this text on my console
How, can I do so??

Since every page on the internet has HTML in it, I would inspect the page with right click and find out which tag belongs to the message in the chat, then I'd find the tag's XPath or class and finally get its innerText for retrieving the string.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Scrolling with Selenium - python

Related

How to get all comments in 9gag using selenium?

having trouble with disabled button on selenium

Selenium For Loop Stuck on a popup message

Instagram Selenium Web scraping - #Followers

how to get the dm text in instagram from selenium?

Categories

Resources