Bypass Yahoo reCAPTCHA by 2captcha API - python

I'm trying to create yahoo account using python selenium, and while creating i have to bypass a recaptcha. I'm using 2Captcha API to automate solving captchas
My issue is i can't solve recaptcha
based on my tests, i noticed that yahoo is using entreprise captcha, not sure if it's a V2 or V3
Here the API Documentation : https://2captcha.com/2captcha-api
here is my code :
import os
import time
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
import random
from twocaptcha import TwoCaptcha
opt = Options()
opt.add_argument("--disable-infobars")
opt.add_argument("start-maximized")
# Pass the argument 1 to allow and 2 to block
opt.add_experimental_option("excludeSwitches", ["enable-logging"])
opt.add_experimental_option("prefs", {
"profile.default_content_setting_values.media_stream_mic": 2,
"profile.default_content_setting_values.media_stream_camera": 2,
"profile.default_content_setting_values.geolocation": 2,
"profile.default_content_setting_values.notifications": 2
})
executable_path = r'chromedriver'
os.environ["webdriver.chrome.driver"] = executable_path
global driver
driver = webdriver.Chrome(r'chromedriver', options=opt)
time.sleep(5)
driver.get("https://login.yahoo.com/account/create")
# Fname and Lname
time.sleep(6)
driver.find_element_by_xpath("//input[#name='firstName']").send_keys("fname")
time.sleep(3)
driver.find_element_by_xpath("//input[#name='lastName']").send_keys("lname")
# Email
time.sleep(3)
numberid = random.randint(100000, 900000)
driver.find_element_by_xpath("//input[#name='yid']").send_keys("fname" + str(numberid) + "lname")
# Password
time.sleep(3)
driver.find_element_by_xpath("//input[#name='password']").send_keys("TestEPWD.")
######## number region +
FC = '(+212)'
option_el = driver.find_element_by_xpath("//option[contains(text(),'%s')]" % FC)
option_el.click()
driver.find_element_by_xpath("//input[#name='phone']").send_keys('684838340')
# Choose date
Month = random.randint(1, 12)
Months = "//option[#value='{}']".format(Month)
monthselect = driver.find_element_by_xpath(Months)
monthselect.click()
time.sleep(3)
Day = random.randint(1, 27)
driver.find_element_by_xpath("//input[#name='dd']").send_keys(Day)
time.sleep(3)
Year = random.randint(1975, 2000)
driver.find_element_by_xpath("//input[#name='yyyy']").send_keys(Year)
time.sleep(3)
list = ["Man", "Woman"]
item = random.choice(list)
driver.find_element_by_xpath("//input[#name='freeformGender']").send_keys(item)
time.sleep(3)
driver.find_element_by_xpath("//button[#name='signup']").click()
time.sleep(5)
# CAPTCHA PART :
api_key = os.getenv('APIKEY_2CAPTCHA', 'mycaptchaAPI')
solver = TwoCaptcha(api_key)
yy = driver.current_url
try:
result = solver.recaptcha(
sitekey="6LeGXAkbAAAAAAMGHQaxwylqpyvtF2jJMrvJff1h",
url=yy,
entreprise=1,
version='v3',
score=0.2
)
except Exception as e:
print(e)
else:
print('result: ' + str(result))

Related

How to solve invisible captcha on Selenium

Hi I am trying to write a sign-up bot for UEFA.com using Selenium as requests I find to be too difficult for me to try so I am just working on automating the sign-up process even if it is a lot slower.
I am able to get to the final stage where I click on Create an Account, but faced with a reCaptcha which only appears after clicking on Create an Account. And after solving the captcha there is no 'Submit' button but it will automatically submit the details for you.
I am able to get the captcha token returned from 2captcha solving service, and inputted it into the innerHTML of the g-response-token field using javascript. However I do not know how to submit the captcha and the form.
import requests
import time
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from seleniumwire import webdriver
import pyautogui
from twocaptcha import TwoCaptcha
import random
import os
from random import randint
import sys
firstnames = []
lastnames = []
API_Key = '6LehfZUbAAAAAJhue_6BVqqxLulLiXLP0rEgpdRH'
# Open Names File
with open('firstnames.txt', 'r') as f:
for name in f:
name = name.strip()
firstnames.append(name)
with open('lastnames.txt', 'r') as e:
for name in e:
name = name.strip()
lastnames.append(name)
with open('proxies.txt') as f:
proxy = f.readlines()
proxy_rand = randint(1, 35)
s_proxy = str(proxy[proxy_rand])
p_strip = s_proxy.rstrip()
# Proxy Input and Format
bare_proxy = p_strip.split(':')
username = bare_proxy[2]
password = bare_proxy[3]
ip = bare_proxy[0]
port = bare_proxy[1]
options = {
'proxy': {
'http': f'http://{username}:{password}#{ip}:{port}',
'https': f'https://{username}:{password}#{ip}:{port}',
'no_proxy': 'localhost,127.0.0.1'
}
}
os.environ['PATH'] += 'C:/SeleniumDrivers'
homepage_URL = 'https://www.uefa.com/tickets/'
driver = webdriver.Chrome(seleniumwire_options=options)
driver.get(homepage_URL)
# Accessing Register Page
reject_cookies = driver.find_element(By.ID, 'onetrust-reject-all-handler')
reject_cookies.click()
time.sleep(1)
login_button = driver.find_element(By.CSS_SELECTOR, "a[class='btn btn-secondary tickets__btn js-tracking-card']")
login_button.click()
time.sleep(10)
create_account = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[4]/a')
create_account.click()
time.sleep(10)
# Inputting SignUp Details
letters = 'abcdefghijklmnopqrstuvwxyz'
a = random.choice(letters)
b = random.choice(letters)
c = random.choice(letters)
d = random.choice(letters)
email = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[6]/input')
email.send_keys(f'{a}{b}{c}{d}#nottingham.pro')
time.sleep(2)
password = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[7]/input')
password.send_keys('19741002Rw!')
time.sleep(2)
first_name = driver.find_element(By.XPATH, '//*[#id="gigya-textbox-130722358975432270"]')
first_range = len(firstnames) - 1
random_first = randint(1, first_range)
f_name = firstnames[random_first]
first_name.send_keys(f'{f_name}')
time.sleep(2)
last_name = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[9]/input')
last_range = len(lastnames) - 1
random_last = randint(1, first_range)
l_name = lastnames[random_last]
last_name.send_keys(f'{l_name}')
time.sleep(2)
day_of_birth = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[10]/div[1]/input')
day = randint(1, 28)
day_of_birth.send_keys(f'{day}')
time.sleep(2)
month_of_birth = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[10]/div[2]/input')
month = randint(1, 12)
month_of_birth.send_keys(f'{month}')
time.sleep(2)
year_of_birth = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[10]/div[3]/input')
year = randint(1940, 2000)
year_of_birth.send_keys(f'{year}')
driver.execute_script("window.scrollTo(0, 500)")
time.sleep(2)
pyautogui.moveTo(353, 619)
time.sleep(2)
pyautogui.click()
time.sleep(5)
current_url = driver.current_url
print(current_url)
g_key = '6LehfZUbAAAAAJhue_6BVqqxLulLiXLP0rEgpdRH'
def SolveCaptcha():
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
api_key = os.getenv(g_key, 'a733edea49a8327795d56edc9f06d391')
solver = TwoCaptcha(api_key)
try:
result = solver.recaptcha(
sitekey=g_key,
url=current_url)
except Exception as e:
print(e)
else:
return result
result = SolveCaptcha()
code = result['code']
print(code)
token = f'document.getElementById("g-recaptcha-response").innerHTML="{code}";'
driver.execute_script(token)
time.sleep(10000)
As you can see by the end of the code I have managed to input the captcha token but not sure how to submit as there is no submit button
I have tried to look for a callback function but can't seem to find it when I inspect the page.
submit the first form on the page:
driver.execute_script('document.forms[0].submit()')

How to handle StaleElementReferenceException in Selenium Python?

I am trying to scrape Twitter followers and the bio of the followers from a certain Twitter account.
The account has more than 10000 followers. I have ran the code many times but it scrapes like 5000,7000 sometimes 9000 followers and then throws StaleElementRefrenceException.
I am a beginner, so it would be of great help if you suggest where to make what changes in the code, so it won't throw the exception.
import csv
from getpass import getpass
from time import sleep
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver import Chrome
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
def get_followers_and_bio(cardd):
screen_name = cardd.find_element_by_xpath('./div//span').text
username = cardd.find_element_by_xpath('.//span[contains(text(), "#")]').text
i = cardd.text.split("\n").index('Follow')
bio = cardd.text.split("\n")[i+1:]
user = (screen_name, username, bio)
return user
# Create instance of the web driver
driver = webdriver.Chrome(ChromeDriverManager().install())
# Navigate to login screen
driver.get('https://www.twitter.com/login')
driver.maximize_window()
sleep(5)
username = driver.find_element_by_xpath('//input[#name="text"]')
username.send_keys('myemail#gmail.com')
username.send_keys(Keys.RETURN)
sleep(10)
username1 = driver.find_element_by_xpath('//input[#name="text"]')
username1.send_keys('myusername')
username1.send_keys(Keys.RETURN)
my_password = getpass()
password = driver.find_element_by_xpath('//input[#name="password"]')
password.send_keys(my_password)
password.send_keys(Keys.RETURN)
sleep(5)
# Find search input and search for term or user
search_input = driver.find_element_by_xpath('//input[#aria-label="Search query"]')
search_input.send_keys('#username')
search_input.send_keys(Keys.RETURN)
sleep(5)
driver.find_element_by_link_text('People').click()
sleep(5)
driver.find_element_by_link_text('#username').click()
sleep(5)
# Opening user's followers list
driver.find_element_by_xpath("//a[#href='/username/followers']").click()
sleep(5)
# Get all followers and their bio on the page
followers_list = []
last_position = driver.execute_script("return window.pageYOffset;")
scrolling = True
while scrolling:
cards = driver.find_elements_by_xpath('//div[#data-testid="UserCell"]')
for card in cards:
data = get_followers_and_bio(card)
if data:
followers_list.append(data)
scroll_attempt = 0
while True:
# Check scroll position
driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
sleep(2)
curr_position = driver.execute_script("return window.pageYOffset;")
if last_position == curr_position:
scroll_attempt += 1
# End of scroll region
if scroll_attempt >= 5:
scrolling = False
break
else:
sleep(3) # Attempt to scroll again
else:
last_position = curr_position
break

I am very new to scraping please bear with me and this is my 1st project. I am trying to scrape a site using selenium

"problem lines"
for_tariff_loop = driver.find_elements_by_xpath("//span[#class='phx-radio__element']")
radio_label_list = for_tariff_loop[i].find_element_by_css_selector('span[class="phx-radio__label"]')
print(radio_label_list)
time.sleep(1)
website I'm scraping https://www.telekom.de/unterwegs/apple/apple-iphone-13-pro/graphit-512gb
label image
I was not able to print the radio buttons label according to checked button. I don't know what is the mistake and where I did it. could anyone help on this. It will be helpful for me to learn. Change tariff links given below links,
import xlwt
from selenium import webdriver
import re
import time
from datetime import date
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
class telekommobiles:
def __init__(self):
self.url="https://www.telekom.de/mobilfunk/geraete/smartphone?page=1&pageFilter=promotion"
self.country='DE'
self.currency='GBP'
self.VAT='Included'
self.shipping = 'free shipping within 3-4 weeks'
self.Pre_PromotionPrice ='N/A'
self.color ='N/A'
def telekom(self):
#try:
driver=webdriver.Chrome()
driver.maximize_window()
driver.get(self.url)
today = date.today()
time.sleep(5)
cookies = driver.find_element_by_css_selector('button.cl-btn.cl-btn--accept-all').click()
print("cookies accepted")
links_prod_check = []
prod_models = []
prod_manufacturer =[]
prod_memorys = []
product_colors =[]
product_price_monthly_payments = []
product_price_one_time_payments =[]
product_links = []
containers = driver.find_elements_by_css_selector('div[class="styles_item__12Aw4"]')
i = 1
for container in containers:
p_links =container.find_element_by_tag_name('a').get_attribute('href')
i = i + 1
product_links.append(p_links)
#print(p_links)
for links in product_links:
driver.get(links)
#time.sleep(5)
#print(driver.current_url)
#links_prod_check.append(driver.current_url)
coloroptions = WebDriverWait(driver, 30).until(EC.presence_of_all_elements_located((By.XPATH,"//li[#data-qa='list_ColorVariant']")))
#print(coloroptions)
for i in range(len(coloroptions)):
coloroption = driver.find_elements_by_xpath("//li[#data-qa='list_ColorVariant']")
coloroption[i].click()
#print(coloroption[i])
time.sleep(3)
memoryoptions = WebDriverWait(driver, 30).until(EC.presence_of_all_elements_located((By.XPATH,"//span[#class='phx-radio__element']")))
for i in range(len(memoryoptions)):
memoryoption = driver.find_elements_by_xpath("//span[#class='phx-radio__element']")
try:
memoryoption[i].click()
except:
pass
time.sleep(5)
change_traiff = driver.find_element_by_css_selector('button[class="phx-link phx-list-of-links__link js-mod tracking-added"]').click()
time.sleep(3)
#looping for each section
section_loops = driver.find_elements_by_css_selector('section[class="tariff-catalog--layer"]')
#print(len(section_loops))
for section_loop in section_loops:
#print(section_loop)
time.sleep(5)
#Headings
heading_1 = section_loop.find_element_by_css_selector('h2[class="page-title page-title--lowercase"]').text
print(heading_1)
# looping for each separate boxes
each_box_subcontainers = section_loop.find_elements_by_css_selector('.phx-tariff-box__section')
#print(len(each_box_subcontainers))
for subcontainer in each_box_subcontainers:
#print(subcontainer)
looping_for_tariff = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH,"//span[#class='phx-radio__element']")))
#print(looping_for_tariff)
for i in range(len(looping_for_tariff)):
#print(i)
try:
for_tariff_loop = driver.find_elements_by_xpath("//span[#class='phx-radio__element']")
for_tariff_loop[i].click()
time.sleep(3)
except:
pass
for_tariff_loop = driver.find_elements_by_xpath("//span[#class='phx-radio__element']")
radio_label_list = for_tariff_loop[i].find_element_by_css_selector('span[class="phx-radio__label"]')
print(radio_label_list)
time.sleep(1)
change_traiff_close_button = driver.find_element_by_css_selector('span[class="icon-after-yellow-close right close popup-close-tr js-popup-close"]').click()
telekom_de=telekommobiles()
telekom_de.telekom()
You are trying to find element within an element. Finding radio_label_list using for_tariff_loop[i], xpath for radio_label_list will become like below:
//span[#class='phx-radio__element']//span[#class="phx-radio__label"]
Which does not exist in the DOM.
I tried the last part of the code. And was able to print the Memory size like below. Do try and confirm:
Replaced css-selector for radio_label_list with this xpath ./following-sibling::span
looping_for_tariff = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//span[#class='phx-radio__element']")))
# print(looping_for_tariff)
for i in range(len(looping_for_tariff)):
# print(i)
try:
for_tariff_loop = driver.find_elements_by_xpath("//span[#class='phx-radio__element']")
for_tariff_loop[i].click()
time.sleep(3)
except:
pass
for_tariff_loop = driver.find_elements_by_xpath("//span[#class='phx-radio__element']")
radio_label_list = for_tariff_loop[i].find_element_by_xpath("./following-sibling::span").text
print(radio_label_list)
time.sleep(1)
As per the comments, check this code:
driver.get("https://www.telekom.de/unterwegs/apple/apple-iphone-13-pro/graphit-512gb")
wait = WebDriverWait(driver,30)
wait.until(EC.element_to_be_clickable((By.XPATH,"//button[text()='Accept All']"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH,"//ul[contains(#class,'phx-tariff-notification-box-new__element--desktop-tablet')]/li[2]/button"))).click()
length = len(driver.find_elements_by_class_name("phx-tariff-box__section"))
for i in range(length):
print("----------------------------------------------------------------------------------------------------------")
options = driver.find_elements_by_class_name("phx-tariff-box__section")
datas = options[i].find_element_by_xpath(".//div[contains(#class,'phx-tariff-box__volume')]").get_attribute("innerText")
print("data: {}".format(datas))
len_types = len(options[i].find_elements_by_xpath(".//div[#class='phx-tariff-box__radios-inner']//label"))
types = options[i].find_elements_by_xpath(".//div[#class='phx-tariff-box__radios-inner']//label")
if len(types) == 0:
price = options[i].find_element_by_xpath(".//p[#data-qa='block_TariffPrice']").get_attribute("innerText")
print(price)
else:
for j in range(len_types):
types[j].click()
time.sleep(2)
options = driver.find_elements_by_class_name("phx-tariff-box__section")
types = options[i].find_elements_by_xpath(".//div[#class='phx-tariff-box__radios-inner']//label")
try:
types[j].find_element_by_xpath("./input[#checked]")
type = types[j].find_element_by_xpath("./span[2]").get_attribute("innerText")
price = options[i].find_element_by_xpath(".//p[#data-qa='block_TariffPrice']").get_attribute("innerText")
print(f"{type}: {price}")
except:
pass

perform() and reset_actions() in ActionChains not working selenium python

This is the code that habe no error:
perform() and reset_actions()
but these two functions have to work combinedly
import os
import time
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import random
# Setting the chrome_options
global chrome_options
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument('--profile-directory=Default')
prefs = {"profile.default_content_setting_values.notifications": 2}
chrome_options.add_experimental_option("prefs", prefs)
chrome_options.add_argument('disable-infobars')
chrome_options.add_experimental_option("useAutomationExtension", False)
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
google_search = [
"1.' driver.switch_to.active_element' ",
"2.this code is a one of important snippet for facebook automation.",
]
random_google_search = random.choice(google_search)
# Setting the Chrome Driver
global driver
driver = webdriver.Chrome("chromedriver.exe", chrome_options=chrome_options)
# Setting the Actions
global actions
actions = ActionChains(driver)
#the loop Running
def navigation():
time.sleep(5)
actions.reset_actions()
driver.get("https://google.com")
actions.send_keys(random_profile_post)
total_tab = 3
sleep_time = 1
implicitly_wait_time = 4
actions.reset_actions()
driver.implicitly_wait(implicitly_wait_time)
time.sleep(sleep_time)
for i in range(total_tab):
actions.send_keys(Keys.TAB)
print("Pressing * " + str(i + 1) + " * No Tab")
actions.send_keys(Keys.ENTER)
actions.perform()
for i in range(10):
navigation()
print("Pressing * " + str(i + 1) + " * st navigation function")
I am working with navigation() functions:
in the loop area
actions.send_keys(Keys.TAB)
actions.reset_actions()
I need to reset action but it's not reseating previous preform()
What will be the batter way to do that.
Please watch the youtube video for more clear understanding.
The issue is fixed already but will be in the next releases. Check the issue #6837 in the github.
For now you can use temporary solution.
def perform_actions():
""" Perform and reset actions """
actions.perform()
actions.reset_actions()
for device in actions.w3c_actions.devices:
device.clear_actions()
# the loop Running
def navigation():
time.sleep(5)
driver.get("https://google.com")
actions.send_keys("A")
total_tab = 4
sleep_time = 1
implicitly_wait_time = 4
# actions.reset_actions()
driver.implicitly_wait(implicitly_wait_time)
time.sleep(sleep_time)
for i in range(total_tab):
actions.send_keys(Keys.TAB)
print("Pressing * " + str(i + 1) + " * No Tab")
actions.send_keys(Keys.ENTER)
perform_actions()
print()
You can just instantiate it inside the function. If this does not work you can try to throw it all inside def, aside from the global lines, which should be deleted.
#the loop Running
def navigation():
actions = ActionChains(driver)
.
.
.
You should store ActionChains in a variable, so you can store you action in a object.
def navigateGroupPostBtn():
navigateGroupJoinBtnActions = ActionChains(driver)
total_tab = 23
sleepTime = 1
implicitlyWaitTime = 20
time.sleep(sleepTime)
for i in range(total_tab):
driver.implicitly_wait(implicitlyWaitTime)
navigateGroupJoinBtnActions.send_keys(Keys.TAB)
print("Pressing * " + str(i + 1) + " * No Tab")
navigateGroupJoinBtnActions.send_keys(Keys.ENTER)
navigateGroupJoinBtnActions.perform_actions()
print("Navigate Groum Join Btn Successfully ")
navigateGroupJoinBtnActions = ActionChains(driver)

How to scrape this site for topics that I answer in

Question
How can I modify my script to successfully show me the number of answers that I have made by topics.
Code
This was a script that I tried
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
def get_topics('Juan-Gallardo'):
url = "http://www.quora.com/" + 'Juan-Gallardo' + "/topics"
browser = webdriver.Chrome()
browser.get(url)
time.sleep(2)
bod = browser.find_element_by_tag_name("body")
no_of_pagedowns = 40
while no_of_pagedowns:
bod.send_keys(Keys.PAGE_DOWN)
time.sleep(0.3)
no_of_pagedowns-=1
topics = [t.text.encode('ascii', 'replace') for t in browser.find_elements_by_class_name("name_text")]
counts = [c.text.encode('ascii', 'replace').split(' ')[0] for c in browser.find_elements_by_class_name("name_meta")]
li = [[topics[i], int(counts[i])] for i in xrange(len(topics)) if counts[i] != '']
browser.quit()
return li
Errors
You need to define an argument for the get_topics() function:
def get_topics(user):
url = "http://www.quora.com/" + user + "/topics"
...
Then, call the function this way:
get_topics('Juan-Gallardo')

Categories

Resources