How to solve invisible captcha on Selenium

How to solve invisible captcha on Selenium - python

Hi I am trying to write a sign-up bot for UEFA.com using Selenium as requests I find to be too difficult for me to try so I am just working on automating the sign-up process even if it is a lot slower.
I am able to get to the final stage where I click on Create an Account, but faced with a reCaptcha which only appears after clicking on Create an Account. And after solving the captcha there is no 'Submit' button but it will automatically submit the details for you.
I am able to get the captcha token returned from 2captcha solving service, and inputted it into the innerHTML of the g-response-token field using javascript. However I do not know how to submit the captcha and the form.
import requests
import time
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from seleniumwire import webdriver
import pyautogui
from twocaptcha import TwoCaptcha
import random
import os
from random import randint
import sys
firstnames = []
lastnames = []
API_Key = '6LehfZUbAAAAAJhue_6BVqqxLulLiXLP0rEgpdRH'
# Open Names File
with open('firstnames.txt', 'r') as f:
for name in f:
name = name.strip()
firstnames.append(name)
with open('lastnames.txt', 'r') as e:
for name in e:
name = name.strip()
lastnames.append(name)
with open('proxies.txt') as f:
proxy = f.readlines()
proxy_rand = randint(1, 35)
s_proxy = str(proxy[proxy_rand])
p_strip = s_proxy.rstrip()
# Proxy Input and Format
bare_proxy = p_strip.split(':')
username = bare_proxy[2]
password = bare_proxy[3]
ip = bare_proxy[0]
port = bare_proxy[1]
options = {
'proxy': {
'http': f'http://{username}:{password}#{ip}:{port}',
'https': f'https://{username}:{password}#{ip}:{port}',
'no_proxy': 'localhost,127.0.0.1'
}
}
os.environ['PATH'] += 'C:/SeleniumDrivers'
homepage_URL = 'https://www.uefa.com/tickets/'
driver = webdriver.Chrome(seleniumwire_options=options)
driver.get(homepage_URL)
# Accessing Register Page
reject_cookies = driver.find_element(By.ID, 'onetrust-reject-all-handler')
reject_cookies.click()
time.sleep(1)
login_button = driver.find_element(By.CSS_SELECTOR, "a[class='btn btn-secondary tickets__btn js-tracking-card']")
login_button.click()
time.sleep(10)
create_account = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[4]/a')
create_account.click()
time.sleep(10)
# Inputting SignUp Details
letters = 'abcdefghijklmnopqrstuvwxyz'
a = random.choice(letters)
b = random.choice(letters)
c = random.choice(letters)
d = random.choice(letters)
email = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[6]/input')
email.send_keys(f'{a}{b}{c}{d}#nottingham.pro')
time.sleep(2)
password = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[7]/input')
password.send_keys('19741002Rw!')
time.sleep(2)
first_name = driver.find_element(By.XPATH, '//*[#id="gigya-textbox-130722358975432270"]')
first_range = len(firstnames) - 1
random_first = randint(1, first_range)
f_name = firstnames[random_first]
first_name.send_keys(f'{f_name}')
time.sleep(2)
last_name = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[9]/input')
last_range = len(lastnames) - 1
random_last = randint(1, first_range)
l_name = lastnames[random_last]
last_name.send_keys(f'{l_name}')
time.sleep(2)
day_of_birth = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[10]/div[1]/input')
day = randint(1, 28)
day_of_birth.send_keys(f'{day}')
time.sleep(2)
month_of_birth = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[10]/div[2]/input')
month = randint(1, 12)
month_of_birth.send_keys(f'{month}')
time.sleep(2)
year_of_birth = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[10]/div[3]/input')
year = randint(1940, 2000)
year_of_birth.send_keys(f'{year}')
driver.execute_script("window.scrollTo(0, 500)")
time.sleep(2)
pyautogui.moveTo(353, 619)
time.sleep(2)
pyautogui.click()
time.sleep(5)
current_url = driver.current_url
print(current_url)
g_key = '6LehfZUbAAAAAJhue_6BVqqxLulLiXLP0rEgpdRH'
def SolveCaptcha():
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
api_key = os.getenv(g_key, 'a733edea49a8327795d56edc9f06d391')
solver = TwoCaptcha(api_key)
try:
result = solver.recaptcha(
sitekey=g_key,
url=current_url)
except Exception as e:
print(e)
else:
return result
result = SolveCaptcha()
code = result['code']
print(code)
token = f'document.getElementById("g-recaptcha-response").innerHTML="{code}";'
driver.execute_script(token)
time.sleep(10000)
As you can see by the end of the code I have managed to input the captcha token but not sure how to submit as there is no submit button
I have tried to look for a callback function but can't seem to find it when I inspect the page.

submit the first form on the page:
driver.execute_script('document.forms[0].submit()')

Related

Python Selenium .send_keys() only sending first character of my string

I was trying to automate a post to Facebook using Python Selenium, and it was 90% complete. The only issue is that the string I give is "test," but when Facebook posts, it just sends the first character of "test," which is "t."
This is the code:
#libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
import selenium.webdriver.common.keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import bs4
from bs4 import BeautifulSoup as soup
from urllib.request import Request, urlopen
from time import sleep
import pyautogui
#fetching hashtags
def hashtags(hash_idea):
url = 'http://best-hashtags.com/hashtag/' + hash_idea
try:
req = Request(url, headers={'User-Agent' : 'Mozilla/5.0'})
page = urlopen(req, timeout=10)
page_html = page.read()
page.close()
page_soup = soup(page_html, 'html.parser')
result = page_soup.find('div',{'class':'tag-box tag-box-v3 margin-bottom-40'})
tags = result.decode()
start_index = tags.find('#')
end_index = tags.find('</p1>')
tags = tags[start_index:end_index]
return tags
except:
print('Something went wrong While Fetching hashtags')
def login(username, password):
try:
url = 'https://facebook.com'
driver.get(url)
user = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.NAME, 'email')))
user.send_keys(username)
pas = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.NAME, 'pass')))
pas.send_keys(password)
login_btn = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.NAME,'login')))
login_btn.click()
except:
print('Something went wrong while login process')
def upload(img_path,caption):
try:
btn1 = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[3]/div/div/div/div[1]/div[1]/div/div[2]/div/div/div/div[3]/div/div[2]/div/div/div/div[1]/div/div[1]')))
btn1.click()
btn2= WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div/div/form/div/div[1]/div/div/div/div[3]/div[1]/div[2]/div/div[1]/div/span/div/div/div[1]/div/div/div[1]/i')))
btn2.click()
btn3 = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div/div/form/div/div[1]/div/div/div/div[2]/div[1]/div[2]/div/div[1]/div/div/div/div[1]/div/div/div/div[1]/div/i')))
btn3.click()
pyautogui.write(img_path)
pyautogui.press('enter')
cap = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div/div/form/div/div[1]/div/div/div/div[2]/div[1]/div[1]/div[1]/div/div/div[1]')))
cap.send_keys(caption)
sleep(5) # this is mandatory while doing some thing with bot
btn_post = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div/div/form/div/div[1]/div/div/div/div[3]/div[2]/div/div/div[1]/div')))
btn_post.click()
except:
print('Something Went Wrong While posting the image or video')
if __name__== "__main__":
#turn for credentials, driver, and caption
username = input('username : ')
password = input('pass : ')
img_path = 'pic1.jpg'
hash_idea = 'covid'
caption = 'test' # if you want to
caption = caption + '\n' + hashtags(hash_idea)
driver = webdriver.Firefox(executable_path="C:/Users/Asus/Downloads/Compressed/geckodriver-v0.32.0-win64/geckodriver.exe")
login(username,password)
upload(img_path,caption)
I wanted to automate the post with the text I provided in the code.

You can try several alternatives
In the definition of cap replace presence_of_element_located with element_to_be_clickable.
Do what in 1. and moreover add
cap = ...
cap.clear()
cap.click()
cap.send_keys(caption)
Do what in 1. and moreover use ActionChains
from selenium.webdriver.common.action_chains import ActionChains
actions = ActionChains(driver)
cap = ...
actions.move_to_element(cap) # move the mouse to the middle of element
actions.click()
actions.send_keys(caption).perform()
If none works, then you can always send one character at a time
[cap.send_keys(c) for c in caption]

How to handle StaleElementReferenceException in Selenium Python?

I am trying to scrape Twitter followers and the bio of the followers from a certain Twitter account.
The account has more than 10000 followers. I have ran the code many times but it scrapes like 5000,7000 sometimes 9000 followers and then throws StaleElementRefrenceException.
I am a beginner, so it would be of great help if you suggest where to make what changes in the code, so it won't throw the exception.
import csv
from getpass import getpass
from time import sleep
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver import Chrome
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
def get_followers_and_bio(cardd):
screen_name = cardd.find_element_by_xpath('./div//span').text
username = cardd.find_element_by_xpath('.//span[contains(text(), "#")]').text
i = cardd.text.split("\n").index('Follow')
bio = cardd.text.split("\n")[i+1:]
user = (screen_name, username, bio)
return user
# Create instance of the web driver
driver = webdriver.Chrome(ChromeDriverManager().install())
# Navigate to login screen
driver.get('https://www.twitter.com/login')
driver.maximize_window()
sleep(5)
username = driver.find_element_by_xpath('//input[#name="text"]')
username.send_keys('myemail#gmail.com')
username.send_keys(Keys.RETURN)
sleep(10)
username1 = driver.find_element_by_xpath('//input[#name="text"]')
username1.send_keys('myusername')
username1.send_keys(Keys.RETURN)
my_password = getpass()
password = driver.find_element_by_xpath('//input[#name="password"]')
password.send_keys(my_password)
password.send_keys(Keys.RETURN)
sleep(5)
# Find search input and search for term or user
search_input = driver.find_element_by_xpath('//input[#aria-label="Search query"]')
search_input.send_keys('#username')
search_input.send_keys(Keys.RETURN)
sleep(5)
driver.find_element_by_link_text('People').click()
sleep(5)
driver.find_element_by_link_text('#username').click()
sleep(5)
# Opening user's followers list
driver.find_element_by_xpath("//a[#href='/username/followers']").click()
sleep(5)
# Get all followers and their bio on the page
followers_list = []
last_position = driver.execute_script("return window.pageYOffset;")
scrolling = True
while scrolling:
cards = driver.find_elements_by_xpath('//div[#data-testid="UserCell"]')
for card in cards:
data = get_followers_and_bio(card)
if data:
followers_list.append(data)
scroll_attempt = 0
while True:
# Check scroll position
driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
sleep(2)
curr_position = driver.execute_script("return window.pageYOffset;")
if last_position == curr_position:
scroll_attempt += 1
# End of scroll region
if scroll_attempt >= 5:
scrolling = False
break
else:
sleep(3) # Attempt to scroll again
else:
last_position = curr_position
break

Bypass Yahoo reCAPTCHA by 2captcha API

I'm trying to create yahoo account using python selenium, and while creating i have to bypass a recaptcha. I'm using 2Captcha API to automate solving captchas
My issue is i can't solve recaptcha
based on my tests, i noticed that yahoo is using entreprise captcha, not sure if it's a V2 or V3
Here the API Documentation : https://2captcha.com/2captcha-api
here is my code :
import os
import time
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
import random
from twocaptcha import TwoCaptcha
opt = Options()
opt.add_argument("--disable-infobars")
opt.add_argument("start-maximized")
# Pass the argument 1 to allow and 2 to block
opt.add_experimental_option("excludeSwitches", ["enable-logging"])
opt.add_experimental_option("prefs", {
"profile.default_content_setting_values.media_stream_mic": 2,
"profile.default_content_setting_values.media_stream_camera": 2,
"profile.default_content_setting_values.geolocation": 2,
"profile.default_content_setting_values.notifications": 2
})
executable_path = r'chromedriver'
os.environ["webdriver.chrome.driver"] = executable_path
global driver
driver = webdriver.Chrome(r'chromedriver', options=opt)
time.sleep(5)
driver.get("https://login.yahoo.com/account/create")
# Fname and Lname
time.sleep(6)
driver.find_element_by_xpath("//input[#name='firstName']").send_keys("fname")
time.sleep(3)
driver.find_element_by_xpath("//input[#name='lastName']").send_keys("lname")
# Email
time.sleep(3)
numberid = random.randint(100000, 900000)
driver.find_element_by_xpath("//input[#name='yid']").send_keys("fname" + str(numberid) + "lname")
# Password
time.sleep(3)
driver.find_element_by_xpath("//input[#name='password']").send_keys("TestEPWD.")
######## number region +
FC = '(+212)'
option_el = driver.find_element_by_xpath("//option[contains(text(),'%s')]" % FC)
option_el.click()
driver.find_element_by_xpath("//input[#name='phone']").send_keys('684838340')
# Choose date
Month = random.randint(1, 12)
Months = "//option[#value='{}']".format(Month)
monthselect = driver.find_element_by_xpath(Months)
monthselect.click()
time.sleep(3)
Day = random.randint(1, 27)
driver.find_element_by_xpath("//input[#name='dd']").send_keys(Day)
time.sleep(3)
Year = random.randint(1975, 2000)
driver.find_element_by_xpath("//input[#name='yyyy']").send_keys(Year)
time.sleep(3)
list = ["Man", "Woman"]
item = random.choice(list)
driver.find_element_by_xpath("//input[#name='freeformGender']").send_keys(item)
time.sleep(3)
driver.find_element_by_xpath("//button[#name='signup']").click()
time.sleep(5)
# CAPTCHA PART :
api_key = os.getenv('APIKEY_2CAPTCHA', 'mycaptchaAPI')
solver = TwoCaptcha(api_key)
yy = driver.current_url
try:
result = solver.recaptcha(
sitekey="6LeGXAkbAAAAAAMGHQaxwylqpyvtF2jJMrvJff1h",
url=yy,
entreprise=1,
version='v3',
score=0.2
)
except Exception as e:
print(e)
else:
print('result: ' + str(result))

How can I write in this text box using python webdriver?

I have tried to write something in this email box:
Here is my part of the code that sends it:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import random
import pyperclip
while True:
driver = webdriver.Chrome('/usr/lib/chromium-browser/chromedriver')
#Get email Site
driver.get('https://temp-mail.org/en/');
time.sleep(5)
#Click Button
#bt = driver.find_element_by_link_text('Change')
#bt.click()
#Copy Email
element = driver.find_element_by_id('mail')
emailtextvalue = element.text
time.sleep(5)
pyperclip.copy(emailtextvalue)
time.sleep(2)
#Enter Email
driver.find_element_by_tag_name('body').send_keys(Keys.COMMAND + 't')
time.sleep(2)
driver.get('https://www.pointsprizes.com/ref/18429434')
time.sleep(5)
#This is the problem
entermail = driver.find_elements_by_name('email').send_keys(emailtextvalue)
time.sleep(2)
# enter code here
This is supposed to take a random email and put it on the email box on the point prize email box! But send keys will not work!

while True:
driver = webdriver.Chrome('/usr/lib/chromium-browser/chromedriver')
#Get email Site
driver.get('https://temp-mail.org/en/');
time.sleep(5)
#Click Button
#bt = driver.find_element_by_link_text('Change')
#bt.click()
#Copy Email
element = driver.find_element_by_id('mail')
emailtextvalue = element.get_attribute("value")
time.sleep(5)
#If you dont use you dont need copy string
pyperclip.copy(emailtextvalue)
time.sleep(2)
#Enter Email
driver.find_element_by_tag_name('body').send_keys(Keys.COMMAND + 't')
time.sleep(2)
driver.get('https://www.pointsprizes.com/ref/18429434')
time.sleep(5)
#This is the problem
entermail = driver.find_elements_by_name('email')[0].send_keys(emailtextvalue)
time.sleep(2)
I changed your code to this

Try this:
entermail = driver.find_element_by_xpath("//*[#id='wrapper']/div[4]/div/div/div[1]/form/div[1]/input[1]")
driver.implicity_wait(5)
entermail.send_keys(emailtextvalue)
Edit: I get X path like
Edit 2: driver.find_elements_by_name('email')[0] can work

how to make scrapy crawler work properly when dealing with multilevel webpage crawling

I am learning crawling skills, and I want to do as follows:
login to a specific webpage (done)
go to a page that contains the links that I need
for each link in that page, crawl its content.
The problem is I have tested my code for a single link, it worked, but when I tried it for the multilevel job. It failed in a way I could not understand: It can only crawl some part of each link. I am wondering if there is some logical mistake in my code, please help. Below is the code
import scrapy
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
class BaiduSpider(scrapy.Spider):
name = 'baidu'
allowed_domains = ['baidu.com']
start_urls = ['http://tieba.baidu.com']
main_url = 'http://tieba.baidu.com/f?kw=%E5%B4%94%E6%B0%B8%E5%85%83&ie=utf-8'
username = ""
password = ""
def __init__(self, username=username, password=password):
#options = webdriver.ChromeOptions()
#options.add_argument('headless')
#options.add_argument('window-size=1200x600')
self.driver = webdriver.Chrome()#chrome_options=options)
self.username = username
self.password = password
# checked
def logIn(self):
elem = self.driver.find_element_by_css_selector('#com_userbar > ul > li.u_login > div > a')
elem.click()
wait = WebDriverWait(self.driver,10).until(EC.presence_of_element_located((By.CSS_SELECTOR,'#TANGRAM__PSP_10__footerULoginBtn')))
elem = self.driver.find_element_by_css_selector('#TANGRAM__PSP_10__footerULoginBtn')
elem.click()
elem = self.driver.find_element_by_css_selector('#TANGRAM__PSP_10__userName')
elem.send_keys(self.username)
elem = self.driver.find_element_by_css_selector('#TANGRAM__PSP_10__password')
elem.send_keys(self.password)
self.driver.find_element_by_css_selector('#TANGRAM__PSP_10__submit').click()
# basic checked
def parse(self, response):
self.driver.get(response.url)
self.logIn()
# wait for hand input verify code
time.sleep(20)
self.driver.get('http://tieba.baidu.com/f?kw=%E5%B4%94%E6%B0%B8%E5%85%83&ie=utf-8')
# try first page first
for url in self.driver.find_elements_by_css_selector('a.j_th_tit'):
#new_url = response.urljoin(url)
new_url = url.get_attribute("href")
yield scrapy.Request(url=new_url, callback=self.parse_sub)
# checked
def pageScroll(self, url):
self.log('I am scrolling' + url)
self.driver.get(url)
SCROLL_PAUSE_TIME = 0.5
SCROLL_LENGTH = 1200
page_height = int(self.driver.execute_script("return document.body.scrollHeight"))
scrollPosition = 0
while scrollPosition < page_height:
scrollPosition = scrollPosition + SCROLL_LENGTH
self.driver.execute_script("window.scrollTo(0, " + str(scrollPosition) + ");")
time.sleep(SCROLL_PAUSE_TIME)
time.sleep(1.2)
def parse_sub(self, response):
self.log('I visited ' + response.url)
self.pageScroll(response.url)
for sel in self.driver.find_elements_by_css_selector('div.l_post.j_l_post.l_post_bright'):
name = sel.find_element_by_css_selector('.d_name').text
try:
content = sel.find_element_by_css_selector('.j_d_post_content').text
except: content = ''
replys = []
for i in sel.find_elements_by_xpath('.//div[#class="lzl_cnt"]'):
user1 = i.find_element_by_xpath('.//a[#username]')
user1 = self.driver.execute_script("return arguments[0].firstChild.textContent", user1)
try:
user2 = i.find_element_by_xpath('.//span[#class="lzl_content_main"]/a[#username]')
user2 = self.driver.execute_script("return arguments[0].firstChild.textContent", user2)
except: user2 = name
span = i.find_element_by_xpath('.//span[#class="lzl_content_main"]')
reply = self.driver.execute_script('return arguments[0].lastChild.textContent;', span)
replys.append(tuple(user1, user2, reply))
yield {"topic": response.css(".core_title_txt::text").extract(), "name":name, "content":content, "replys":replys}
#follow to next page
#next_sel = self.driver.find_element_by_css_selector('#thread_theme_7 a:nth-child(3)')
#next_url_name = next_sel.text
#if next_sel and next_url_name == '下一页':
# next_url = next_sel.get_attribute('href')
# yield scrapy.Request(url=next_url, callback=self.parse_sub)

Seems like you are using a hardcoded container for the link instead of a generic one and hence getting back just one link in
for url in self.driver.find_elements_by_css_selector('a.j_th_tit')
This - j_th_tit - seems to be a dynamically generated class name and might not be the same for all anchor(a) tags.
You could try
for url in self.driver.find_elements_by_css_selector('a')
for getting all links of the page.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to solve invisible captcha on Selenium - python

submit the first form on the page: driver.execute_script('document.forms[0].submit()')

Related

Python Selenium .send_keys() only sending first character of my string

How to handle StaleElementReferenceException in Selenium Python?

Bypass Yahoo reCAPTCHA by 2captcha API

How can I write in this text box using python webdriver?

how to make scrapy crawler work properly when dealing with multilevel webpage crawling

Categories

Resources