I am trying to scrape Twitter followers and the bio of the followers from a certain Twitter account.
The account has more than 10000 followers. I have ran the code many times but it scrapes like 5000,7000 sometimes 9000 followers and then throws StaleElementRefrenceException.
I am a beginner, so it would be of great help if you suggest where to make what changes in the code, so it won't throw the exception.
import csv
from getpass import getpass
from time import sleep
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver import Chrome
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
def get_followers_and_bio(cardd):
screen_name = cardd.find_element_by_xpath('./div//span').text
username = cardd.find_element_by_xpath('.//span[contains(text(), "#")]').text
i = cardd.text.split("\n").index('Follow')
bio = cardd.text.split("\n")[i+1:]
user = (screen_name, username, bio)
return user
# Create instance of the web driver
driver = webdriver.Chrome(ChromeDriverManager().install())
# Navigate to login screen
driver.get('https://www.twitter.com/login')
driver.maximize_window()
sleep(5)
username = driver.find_element_by_xpath('//input[#name="text"]')
username.send_keys('myemail#gmail.com')
username.send_keys(Keys.RETURN)
sleep(10)
username1 = driver.find_element_by_xpath('//input[#name="text"]')
username1.send_keys('myusername')
username1.send_keys(Keys.RETURN)
my_password = getpass()
password = driver.find_element_by_xpath('//input[#name="password"]')
password.send_keys(my_password)
password.send_keys(Keys.RETURN)
sleep(5)
# Find search input and search for term or user
search_input = driver.find_element_by_xpath('//input[#aria-label="Search query"]')
search_input.send_keys('#username')
search_input.send_keys(Keys.RETURN)
sleep(5)
driver.find_element_by_link_text('People').click()
sleep(5)
driver.find_element_by_link_text('#username').click()
sleep(5)
# Opening user's followers list
driver.find_element_by_xpath("//a[#href='/username/followers']").click()
sleep(5)
# Get all followers and their bio on the page
followers_list = []
last_position = driver.execute_script("return window.pageYOffset;")
scrolling = True
while scrolling:
cards = driver.find_elements_by_xpath('//div[#data-testid="UserCell"]')
for card in cards:
data = get_followers_and_bio(card)
if data:
followers_list.append(data)
scroll_attempt = 0
while True:
# Check scroll position
driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
sleep(2)
curr_position = driver.execute_script("return window.pageYOffset;")
if last_position == curr_position:
scroll_attempt += 1
# End of scroll region
if scroll_attempt >= 5:
scrolling = False
break
else:
sleep(3) # Attempt to scroll again
else:
last_position = curr_position
break
Related
I am on VS Code and my Selenium Instagram Bot's intentional design is to read from a list of profiles from a .txt file, visit those profiles, follow and like a specified number of their posts(if they are private, it just follows them) then goes on to the next profile in the list, all the while using different pre-made bot accounts who's usernames are also on a list, so the code may iterate over them once a number of profiles have been engaged with by a single bot.
I am able to iterate over target profiles, but right now I am just having problems with locating elements and having them to be clicked by the bot. I got it to work on 1 profile, after going to the next profile, it simply didn't do anything and seems to can't find the follow button to click again(I can't recreate this, after some changes were made lol, just getting back into Python after briefly touching on it in school). I still haven't even seen the bot like a post too. Although, the XPATHS on the Log In and the Pop Ups seems to work. It's now just not interacting with the profiles.
~
Any insights would be highly appreciated!
Source Code:
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time, random
from selenium.webdriver.common.keys import Keys
profilePath = (r'C:\Users\****\AppData\Roaming\Mozilla\Firefox\Profiles\75d4lwz2.3rd')
options = Options()
service = Service('geckodrivere.exe')
firefox = webdriver.Firefox(options=options, service=service)
wait = WebDriverWait(firefox, 20)
file = open('scrape_archivepages.txt', 'r')
data = file.read()
igUsers = data.split('\n')
file.close()
file2 = open('botlist.txt', 'r')
data2 = file2.read()
bots = data2.split('\n')
file2.close()
def startLogIn(user_, pass_,):
firefox.get('https://www.instagram.com/')
while True:
try:
cookiesAccept = firefox.find_element(By.XPATH, '/html/body/div[2]/div/div/div/div[2]/div/div/div[1]/div/div[2]/div/div/div/div/div[2]/div/button[2]')
time.sleep(4)
cookiesAccept.click()
time.sleep(4)
break
except:
pass
username = firefox.find_element(By.XPATH, '//*[#id="loginForm"]/div/div[1]/div/label/input')
password = firefox.find_element(By.XPATH, '//*[#id="loginForm"]/div/div[2]/div/label/input')
username.click()
username.send_keys(user_)
time.sleep(random.randint(1, 2))
password.click()
password.send_keys(pass_)
time.sleep(random.randint(1, 2))
log_in=firefox.find_element(By.XPATH, '//*[#id="loginForm"]/div/div[3]')
log_in.click()
time.sleep(5)
# while True:
# try:
# credentials= firefox.find_element(By.XPATH, '//button[text()="Not Now"]')
# time.sleep(3)
# credentials.click()
# break
# except:
# pass
# while True:
# try:
# notifications = firefox.find_element(By.XPATH, '//button[text()="Not Now"]')
# time.sleep(3)
# notifications.click()
# break
# except:
# pass
def interact(igUserLink, n):
firefox.get(igUserLink)
time.sleep(2)
#while True:
# try:
follow = firefox.find_element(By.CSS_SELECTOR, '#mount_0_0_0I > div > div > div > div.x9f619.x1n2onr6.x1ja2u2z > div > div > div > div.x78zum5.xdt5ytf.x10cihs4.x1t2pt76.x1n2onr6.x1ja2u2z > div.x9f619.xnz67gz.x78zum5.x168nmei.x13lgxp2.x5pf9jr.xo71vjh.x1uhb9sk.x1plvlek.xryxfnj.x1c4vz4f.x2lah0s.x1q0g3np.xqjyukv.x1qjc9v5.x1oa3qoh.x1qughib > div.xh8yej3.x1gryazu.x10o80wk.x14k21rp.x1porb0y.x17snn68.x6osk4m > section > main > div > header > section > div.x6s0dn4.x78zum5.x1q0g3np.xs83m0k.xeuugli.x1n2onr6 > div._ab8w._ab94._ab99._ab9f._ab9k._ab9p._abb3._abcm > div > div._ab8w._ab94._ab99._ab9f._ab9m._ab9o._abb0._abcm > button > div > div')
time.sleep(2)
private = firefox.find_element(By.XPATH, '/html/body/div[2]/div/div/div/div[1]/div/div/div/div[1]/section/main/div/div/article/div[1]/div/h2')
if(bool(private)):
print('lol')
follow.click()
time.sleep(2)
##xpath of header from IG saying profile is private
if (not(bool(private))):
print('here')
follow.click()
time.sleep(2)
time.sleep(2)
c = 0
numPosts = firefox.find_element(By.XPATH, '/html/body/div[2]/div/div/div/div[1]/div/div/div/div[1]/div[1]/div[2]/section/main/div/header/section/ul/li[1]/div/span/span')
numPosts = int(numPosts.text)
if n == 0:
#do nothing
if n <= numPosts:
media = firefox.find_element(By.XPATH, '/html/body/div[2]/div/div/div/div[1]/div/div/div/div[1]/div[1]/div[2]/section/main/div/div[2]/article/div/div/div[1]/div[1]')
media.click()
time.sleep(1)
like = firefox.find_element(By.NAME, 'Like')
next = firefox.find_element(By.NAME, 'Next')
while(c<n):
like.click()
time.sleep(3)
c=c+1
next.click()
#break
#except:
#pass
def VibeFinderInteract(listOfBots, passw, userLink):
for userbot in listOfBots:
startLogIn(userbot, passw)
for u in userLink:
interact(u, 2)
print('')
VibeFinderInteract(bots, 'samepasswordforallthebots', igUsers)
Hi I am trying to write a sign-up bot for UEFA.com using Selenium as requests I find to be too difficult for me to try so I am just working on automating the sign-up process even if it is a lot slower.
I am able to get to the final stage where I click on Create an Account, but faced with a reCaptcha which only appears after clicking on Create an Account. And after solving the captcha there is no 'Submit' button but it will automatically submit the details for you.
I am able to get the captcha token returned from 2captcha solving service, and inputted it into the innerHTML of the g-response-token field using javascript. However I do not know how to submit the captcha and the form.
import requests
import time
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from seleniumwire import webdriver
import pyautogui
from twocaptcha import TwoCaptcha
import random
import os
from random import randint
import sys
firstnames = []
lastnames = []
API_Key = '6LehfZUbAAAAAJhue_6BVqqxLulLiXLP0rEgpdRH'
# Open Names File
with open('firstnames.txt', 'r') as f:
for name in f:
name = name.strip()
firstnames.append(name)
with open('lastnames.txt', 'r') as e:
for name in e:
name = name.strip()
lastnames.append(name)
with open('proxies.txt') as f:
proxy = f.readlines()
proxy_rand = randint(1, 35)
s_proxy = str(proxy[proxy_rand])
p_strip = s_proxy.rstrip()
# Proxy Input and Format
bare_proxy = p_strip.split(':')
username = bare_proxy[2]
password = bare_proxy[3]
ip = bare_proxy[0]
port = bare_proxy[1]
options = {
'proxy': {
'http': f'http://{username}:{password}#{ip}:{port}',
'https': f'https://{username}:{password}#{ip}:{port}',
'no_proxy': 'localhost,127.0.0.1'
}
}
os.environ['PATH'] += 'C:/SeleniumDrivers'
homepage_URL = 'https://www.uefa.com/tickets/'
driver = webdriver.Chrome(seleniumwire_options=options)
driver.get(homepage_URL)
# Accessing Register Page
reject_cookies = driver.find_element(By.ID, 'onetrust-reject-all-handler')
reject_cookies.click()
time.sleep(1)
login_button = driver.find_element(By.CSS_SELECTOR, "a[class='btn btn-secondary tickets__btn js-tracking-card']")
login_button.click()
time.sleep(10)
create_account = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[4]/a')
create_account.click()
time.sleep(10)
# Inputting SignUp Details
letters = 'abcdefghijklmnopqrstuvwxyz'
a = random.choice(letters)
b = random.choice(letters)
c = random.choice(letters)
d = random.choice(letters)
email = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[6]/input')
email.send_keys(f'{a}{b}{c}{d}#nottingham.pro')
time.sleep(2)
password = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[7]/input')
password.send_keys('19741002Rw!')
time.sleep(2)
first_name = driver.find_element(By.XPATH, '//*[#id="gigya-textbox-130722358975432270"]')
first_range = len(firstnames) - 1
random_first = randint(1, first_range)
f_name = firstnames[random_first]
first_name.send_keys(f'{f_name}')
time.sleep(2)
last_name = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[9]/input')
last_range = len(lastnames) - 1
random_last = randint(1, first_range)
l_name = lastnames[random_last]
last_name.send_keys(f'{l_name}')
time.sleep(2)
day_of_birth = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[10]/div[1]/input')
day = randint(1, 28)
day_of_birth.send_keys(f'{day}')
time.sleep(2)
month_of_birth = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[10]/div[2]/input')
month = randint(1, 12)
month_of_birth.send_keys(f'{month}')
time.sleep(2)
year_of_birth = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[10]/div[3]/input')
year = randint(1940, 2000)
year_of_birth.send_keys(f'{year}')
driver.execute_script("window.scrollTo(0, 500)")
time.sleep(2)
pyautogui.moveTo(353, 619)
time.sleep(2)
pyautogui.click()
time.sleep(5)
current_url = driver.current_url
print(current_url)
g_key = '6LehfZUbAAAAAJhue_6BVqqxLulLiXLP0rEgpdRH'
def SolveCaptcha():
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
api_key = os.getenv(g_key, 'a733edea49a8327795d56edc9f06d391')
solver = TwoCaptcha(api_key)
try:
result = solver.recaptcha(
sitekey=g_key,
url=current_url)
except Exception as e:
print(e)
else:
return result
result = SolveCaptcha()
code = result['code']
print(code)
token = f'document.getElementById("g-recaptcha-response").innerHTML="{code}";'
driver.execute_script(token)
time.sleep(10000)
As you can see by the end of the code I have managed to input the captcha token but not sure how to submit as there is no submit button
I have tried to look for a callback function but can't seem to find it when I inspect the page.
submit the first form on the page:
driver.execute_script('document.forms[0].submit()')
I have written a script which can scrape followers' usernames. But the issue is I am getting all the usernames in the first attempt but when I try to scroll the page using javascript the page keeps on going down instead of going once and scraping the ids and then going down.Although I am getting date till 34th username but later its just messed up. I am sharing the code here you can use your own username and password to check what is the issue with the code. If you copy paste this code (entering your username and password in the empty string) it will run on your PC completely fine.
import warnings
warnings.filterwarnings('ignore')
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException
from getpass import getpass
from time import sleep
chrome_options = Options()
driver=webdriver.Chrome(ChromeDriverManager().install(),chrome_options=chrome_options)
driver.maximize_window()
website='https://twitter.com/i/flow/login'
driver.get(website)
print('website getting')
sleep(5)
username = driver.find_element_by_xpath('//input[#name="text"]')
username.send_keys('')
print('username running')
username.send_keys(Keys.RETURN)
sleep(3)
password = driver.find_element_by_xpath('//input[#name="password"]')
print('password running')
sleep(2)
password.send_keys('')
password.send_keys(Keys.RETURN)
website='https://twitter.com/MehroozW/followers'
driver.get(website)
print('got it')
import warnings
warnings.filterwarnings('ignore')
import csv
data = []
tweet_ids = set()
last_position = driver.execute_script("return window.pageYOffset;")
scrolling = True
count = 1
i = 1
while scrolling:
for i in range(1,190):
try:
follower_username = driver.find_element_by_xpath(f'//div[#data-testid="primaryColumn"]/div[1]/section/div[1]/div[1]/div[{count}]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[2]/div[1]/a/div[1]/div[1]/span').text
print('index', count, follower_username)
data.append(follower_username)
count +=1
sleep(1)
except Exception:
pass
scroll_attempt = 0
while True:
driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
sleep(2)
curr_position = driver.execute_script("return window.pageYOffset;")
print('curr_position',curr_position, last_position, scroll_attempt, scrolling)
if last_position == curr_position:
scroll_attempt += 1
# end of scroll region
if scroll_attempt >= 3:
scrolling = False
break
else:
sleep(2) # attempt another scroll
else:
last_position = curr_position
break
data
I have tried to write something in this email box:
Here is my part of the code that sends it:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import random
import pyperclip
while True:
driver = webdriver.Chrome('/usr/lib/chromium-browser/chromedriver')
#Get email Site
driver.get('https://temp-mail.org/en/');
time.sleep(5)
#Click Button
#bt = driver.find_element_by_link_text('Change')
#bt.click()
#Copy Email
element = driver.find_element_by_id('mail')
emailtextvalue = element.text
time.sleep(5)
pyperclip.copy(emailtextvalue)
time.sleep(2)
#Enter Email
driver.find_element_by_tag_name('body').send_keys(Keys.COMMAND + 't')
time.sleep(2)
driver.get('https://www.pointsprizes.com/ref/18429434')
time.sleep(5)
#This is the problem
entermail = driver.find_elements_by_name('email').send_keys(emailtextvalue)
time.sleep(2)
# enter code here
This is supposed to take a random email and put it on the email box on the point prize email box! But send keys will not work!
while True:
driver = webdriver.Chrome('/usr/lib/chromium-browser/chromedriver')
#Get email Site
driver.get('https://temp-mail.org/en/');
time.sleep(5)
#Click Button
#bt = driver.find_element_by_link_text('Change')
#bt.click()
#Copy Email
element = driver.find_element_by_id('mail')
emailtextvalue = element.get_attribute("value")
time.sleep(5)
#If you dont use you dont need copy string
pyperclip.copy(emailtextvalue)
time.sleep(2)
#Enter Email
driver.find_element_by_tag_name('body').send_keys(Keys.COMMAND + 't')
time.sleep(2)
driver.get('https://www.pointsprizes.com/ref/18429434')
time.sleep(5)
#This is the problem
entermail = driver.find_elements_by_name('email')[0].send_keys(emailtextvalue)
time.sleep(2)
I changed your code to this
Try this:
entermail = driver.find_element_by_xpath("//*[#id='wrapper']/div[4]/div/div/div[1]/form/div[1]/input[1]")
driver.implicity_wait(5)
entermail.send_keys(emailtextvalue)
Edit: I get X path like
Edit 2: driver.find_elements_by_name('email')[0] can work
I am attempting to identify an HTML Button with Xpath and have attempted both the relative and absolute Xpath without success. I am attempting to click the button.
The relative path:
click = webdriver.find.element_by_xpath("//onboarding-mobile-fixed-bottom-container/div[1]/div/sprout-button/button").click()
Absolute path: /html/body/cfapp-root/main/cfapp-spa-host/main/onboarding-root/div/div[1]/main/onboarding-business-phone/section/form/onboarding-next-button/onboarding-mobile-fixed-bottom-container/div[2]/sprout-button/button
absolute = webdriver.find.element_by_xpath("/html/body/cfapp-root/main/cfapp-spa-host/main/onboarding-root/div/div[1]/main/onboarding-business-phone/section/form/onboarding-next-button/onboarding-mobile-fixed-bottom-container/div[2]/sprout-button/button").click()
Even when using the absolute xpath (I know, frowned upon practice) I can't get the button to click.
For reference, I am automating: site: https://account.kabbage.com/onboarding/data/number-of-employees; Username: testingoverflow#aol.com; Pw: Kabbage123
(click finish applying; finish applying; working on the continue box)
Any help is much appreciated!!
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from time import sleep, strftime
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
import csv
import xlrd
info = "info.xlsx"
openwb = xlrd.open_workbook(info)
inputws = openwb.sheet_by_index(0)
print(inputws.nrows)
print(inputws.ncols)
print(inputws.cell_value(1,0))
email_log = inputws.cell_value(2,0)
businesslog = inputws.cell_value(2,1)
firstname = inputws.cell_value(2,2)
lastname = inputws.cell_value(2,3)
phone = int(inputws.cell_value(2,4))
employees = int(inputws.cell_value(2,5))
business_types = inputws.cell_value(2,6)
print(email_log)
print(businesslog)
print(firstname)
print(lastname)
print(phone)
sleep(1)
chromedriver_path = 'C:/Users/Documents/Scale/Programs/chromedriver.exe'
webdriver = webdriver.Chrome(executable_path=chromedriver_path)
webdriver.get('https://app.kabbage.com/signup/create_account')
sleep(1)
#input email
input_emails = webdriver.find_element_by_xpath('//*[#id="CreateAccount.EmailAddress_inner"]').send_keys(email_log)
sleep(1)
#re-input email
reinput = webdriver.find_element_by_xpath('//*[#id="CreateAccount.ConfirmEmail_inner"]').send_keys(email_log)
# Password
passwrd = webdriver.find_element_by_xpath('//*[#id="CreateAccount.CreatePassword"]')
sleep(1)
passwrd.send_keys('Paycheck11!!')
sleep(1)
button_started = webdriver.find_element_by_class_name("btn-full-width").click()
sleep(5)
#ApplyNow
#apply = webdriver.find_element_by_class_name('spr-btn spr-btn-primary')
#apply = webdriver.find_elements_by_class_name("spr-btn-primary").click()
#xpath("//div[#class='fc-day-content' and text()='15']")
applynow = webdriver.find_element_by_xpath("//sprout-button/button[contains(#class, 'spr-btn-primary')]").click()
sleep(5)
applyfinal = webdriver.find_element_by_xpath("//sprout-button/button[contains(#class, 'spr-btn-primary')]").click()
sleep(5)
business_name = webdriver.find_element_by_xpath('//*[#id="businessName-input"]').send_keys(businesslog)
business_send = webdriver.find_element_by_xpath("/html/body/cfapp-root/main/cfapp-spa-host/main/onboarding-root/div/div[1]/main/onboarding-business-name/section/form/onboarding-next-button/onboarding-mobile-fixed-bottom-container/div[2]/sprout-button/button").click()
sleep(5)
first_name = webdriver.find_element_by_xpath('//*[#id="lastName-input"]').send_keys(lastname)
last_name = webdriver.find_element_by_xpath('//*[#id="firstName-input"]').send_keys(firstname)
names_send = webdriver.find_element_by_xpath("/html/body/cfapp-root/main/cfapp-spa-host/main/onboarding-root/div/div[1]/main/onboarding-personal-name/section/form/onboarding-next-button/onboarding-mobile-fixed-bottom-container/div[2]/sprout-button/button").click()
sleep(5)
phone_num = webdriver.find_element_by_xpath('//*[#id="businessPhone-input"]').send_keys(phone)
phone_check = webdriver.find_element_by_xpath('//html/body/cfapp-root/main/cfapp-spa-host/main/onboarding-root/div/div[1]/main/onboarding-business-phone/section/form/kbg-consent-box/div/sprout-checkbox/div/label').click()
#phone_send = names_send = webdriver.find_element_by_xpath("/html/body/cfapp-root/main/cfapp-spa-host/main/onboarding-root/div/div[1]/main/onboarding-personal-name/section/form/onboarding-next-button/onboarding-mobile-fixed-bottom-container/div[2]/sprout-button/button").click()
phone_submits = webdriver.find_element_by_xpath("/html/body/cfapp-root/main/cfapp-spa-host/main/onboarding-root/div/div[1]/main/onboarding-business-phone/section/form/onboarding-next-button/onboarding-mobile-fixed-bottom-container/div[2]/sprout-button/button").click()
sleep(5)
num_empl = webdriver.find_element_by_xpath('//*[#id="numberOfEmployees-input"]').send_keys(employees)
#emp_submit = webdriver.find_element_by_xpath("//sprout-button/button[contains(#class, 'spr-btn-block')][2]").click()
sending = webdriver.find.element_by_xpath("//button[#class='spr-btn spr-btn-primary' and contains(text(),'Continue')]").click()
You can use any of these Xpaths:
Correct relative XPath for Continue button
Xpath 1:
*//onboarding-next-button//onboarding-mobile-fixed-bottom-container//div[2]//sprout-button//button[contains(text(),Continue)]
Xpath 2:
*//sprout-button[#class='desktop-button']//button[contains(text(),Continue)]
Give this a go:
webdriver.find_element_by_xpath("//button[#class="spr-btn spr-btn-primary" and contains(text(),'Continue')]").click()