Here is my code:
DEVICE = {
'instagram_version': '26.0.0.10.86',
'android_version': 24,
'android_release': '7.0',
'dpi': '640dpi',
'resolution': '1440x2560',
'manufacturer': 'samsung',
'device': 'SM-G930F',
'model': 'herolte',
'cpu': 'samsungexynos8890'
}
USER_AGENT_BASE = (
'Instagram {instagram_version} '
'Android ({android_version}/{android_release}; '
'{dpi}; {resolution}; {manufacturer}; '
'{device}; {model}; {cpu}; en_US)'
)
user_agent = USER_AGENT_BASE.format(**DEVICE)
REQUEST_HEADERS = {'Content-type': 'application/x-www-form-urlencoded; charset=UTF-8'}
sess = requests.Session()
sess.headers.update(REQUEST_HEADERS)
sess.headers.update({'User-Agent': user_agent})
response = sess.post(LOGIN_URL, data=data)
assert response.status_code == 200
Instead of user_agent I need to use module fake_headers to generate random headers. How do i do it?
add it like this :
import urllib.request
REQUEST_HEADERS = {'Content-type': 'application/x-www-form-urlencoded; charset=UTF-8'}
urllib.request.Request("your link", headers=REQUEST_HEADERS)
Related
hello so i am trying to print only the [code] here but i couldn't
import requests
token = 'token'
id = "35633560231"
headers = {
'Authorization': 'Bearer ' + token,
'Accept': 'application/json',
}
response = requests.get('https://5sim.net/v1/user/check/' + id, headers=headers)
datas = response.json()
smss = datas['sms']
print(smss)
sms print : [{'created_at': '2022-09-09T14:25:01.486075Z', 'date': '2022-09-09T14:25:01.481586Z', 'sender': 'Amazon', 'text': "625172 is your Amazon OTP. Don't share it with anyone.", 'code': '625172'}]
i want to get the code value only i tried smss = smss['code'] but it didn't work
data['sms'] is a list and you have the json object at index 0.
This should work:
sms=[{'created_at': '2022-09-09T14:25:01.486075Z', 'date': '2022-09-09T14:25:01.481586Z', 'sender': 'Amazon', 'text': "625172 is your Amazon OTP. Don't share it with anyone.", 'code': '625172'}]
print(f"Code: {sms[0]['code']}")
output:
Code: 625172
I am trying to scrape servers list from https://www.astrill.com/member-zone/tools/vpn-servers which is for members only. Username, password and captcha are required. Everything works if I login with browser and copy 'PHPSESSID' cookie, but I want to log in with Python. I am downloading capthca and enter it manually. But anyway I am not able to login. Login URL: https://www.astrill.com/member-zone/log-in
Could anybody help me, please?
SERVERS_URL = 'https://www.astrill.com/member-zone/tools/vpn-servers'
LOGIN_URL = 'https://www.astrill.com/member-zone/log-in'
def get_capcha(url):
print(f'Scraping url: {url}')
try:
response = requests.get(url)
response.raise_for_status()
except Exception as e:
print(type(e), e)
if response.status_code == 200:
print('Success!')
page = response.content
soup = bs4.BeautifulSoup(page, 'html.parser')
captcha_url = (soup.find('img', alt='captcha')['src'])
captcha_file = os.path.join(BASE_FOLDER, 'captcha.jpg')
id = soup.find(id='csrf_token')
print(id['value'])
print(f'Captcha: {captcha_url}')
print(response.headers)
urlretrieve(captcha_url, captcha_file)
return id['value']
def login(url, id):
captcha_text = input('Captcha: ')
print(id)
payload = {
'action': 'log-in',
'username': 'myusername#a.com',
'password': '1111111',
'captcha': captcha_text,
'_random': 'l4r1b7hf4g',
'csrf_token': id
}
session = requests.session()
post = session.post(url, data=payload)
r = session.get(SERVERS_URL)
print(r.text)
print(r.cookies)
if __name__ == '__main__':
id = get_capcha(LOGIN_URL)
login(LOGIN_URL, id)
First of all I was not sure about payload fields to POST. They can be easily discovered with Firefox Developer Tools - Network. You can find what does your browser actually post there. Second thing which I discovered was that I need to request capthca file within the session with my headers and cookies. So my code looks like following now and it works! (probably some header fields can be removed)
cookies = {}
headers = {
'Host': 'www.astrill.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Language': 'ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3',
'Accept-Encoding': 'gzip, deflate, br',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'X-Requested-With': 'XMLHttpRequest',
'Content-Length': '169',
'Origin': 'https://www.astrill.com',
'Connection': 'keep-alive',
'Referer': 'https://www.astrill.com/member-zone/log-in',
}
payload = {
'action': 'log-in',
'username': 'myusername#a.com',
'password': '1111111',
'remember_me': 0,
'captcha': '',
'_random': 'somerandom1',
'csrf_token': ''
}
def get_capcha(url):
print(f'Scraping url: {url}')
try:
response = session.get(url)
response.raise_for_status()
except Exception as e:
print(type(e), e)
if response.status_code == 200:
print('Success!')
page = response.content
soup = bs4.BeautifulSoup(page, 'html.parser')
captcha_url = (soup.find('img', alt='captcha')['src'])
captcha_file = os.path.join(BASE_FOLDER, 'captcha.jpg')
payload['csrf_token'] = soup.find(id='csrf_token')['value']
print(f'csrf_token: {payload["csrf_token"]}')
print(f'Captcha: {captcha_url}')
cookies.update(response.cookies)
captcha_img = session.get(captcha_url, headers=headers, cookies=cookies)
file = open(captcha_file, "wb")
file.write(captcha_img.content)
file.close()
payload['captcha'] = input('Captcha: ')
return
def login(url):
post = session.post(url, data=payload, headers=headers, cookies=cookies)
print(post.text)
r = session.get(SERVERS_URL, cookies=cookies)
print(r.text)
print(r.cookies)
def main():
get_capcha(LOGIN_URL)
login(LOGIN_URL)
if __name__ == '__main__':
main()
I did research on og python requests by passing xml parameters but without much success with this specific page. I can not log in. If someone already had a similar experience any direction help is welcome. My code as follows:
import requests
from bs4 import BeautifulSoup
def main():
# 'https://plataformafinanceira.xxxxxxxxbr.corp/xxxxxxxxcdc/login/login.html?timestamp=1478706683443?redirect=true'
# 'LOGIN:Login'
s = requests.Session()
headers = {
'Accept': 'application/xml, text/xml, */*',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9',
'Connection': 'Keep-Alive',
'Content-Type': 'text/xml',
'Host': 'plataformafinanceira.xxxxxxxxbr.corp',
'Referer': 'https://plataformafinanceira.xxxxxxxxbr.corp/xxxxxxxxcdc/login/login.html?redirect=true',
'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E)'
}
url = 'https://plataformafinanceira.xxxxxxxxbr.corp/xxxxxxxxcdc/login/login.html?timestamp=1478706683443?redirect=true'
r = s.get(url, headers=headers, verify=False)
print('************************************')
print(r.status_code)
print(r.cookies.get_dict())
print('************************************')
cookies = r.cookies.get_dict()
xml = '''
<?xml version="1.0" encoding="ISO-8859-1"?><request>
<login type="group">
<row>
<id_user><![CDATA[x050432]]></id_user>
<ds_password><![CDATA[NDY0NnBvcnQ=]]></ds_password>
<version><![CDATA[2]]></version>
</row>
</login>
</request>
'''
payload = {
'id_user': 'x050432',
'txtcd_Pwd': '4646port',
'version': '2'
}
url = 'https://plataformafinanceira.xxxxxxxxbr.corp/xxxxxxxxcdc/common/callService.do?name=LOGIN:Login-%3Elogin'
r = s.post(url, headers=headers, auth=('x050432', '4646port'), cookies=cookies, verify=False)
print('++++++++++++++++++++++++++++++++++++')
print(r.status_code)
print(r.cookies.get_dict())
print('++++++++++++++++++++++++++++++++++++')
# r = s.post(url, headers=headers, auth=('x050432', '4646port'), data=payload, cookies=cookies)
# r = s.post(url, headers=headers, data=payload, cookies=cookies, verify=False)
# url = 'https://plataformafinanceira.xxxxxxxxbr.corp/xxxxxxxxcdc/login/iframePrincipal.html?funcao=index×tamp=1562604252980'
# r = s.post(url, headers=headers, cookies=cookies, verify=False)
# r = s.post(url, headers=headers, auth=('x050432', '4646port'), data=payload, cookies=cookies)
# print(r.status_code)
# print(r.cookies.get_dict())
# # print(r.text)
with open('portal.html', 'w') as f:
f.write(r.text)
# print(r.text)
# InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
# url = 'https://plataformafinanceira.xxxxxxxxbr.corp/xxxxxxxxcdc/login/login.html'
if __name__ == '__main__':
main()
The image:
In the end I want to "scrape" the following webpage when doing an advanced search for last two days.
https://launchstudio.bluetooth.com/Listings/Search
Results seems to be generated by a javascript that calls a API
bt.apiUrl = 'https://platformapi.bluetooth.com/';
$.ajax({
method: 'post',
contentType: "application/json; charset=utf-8",
dataType: "json",
url: bt.apiUrl +'api/platform/Listings/Search',
data: JSON.stringify(this.searchCriteria),
success: function (data) {
if (data.length) {
var listings = data
console.log('listing count: ' + listings.length);
this.listings = listings;
}
}.bind(this)
Found the following API documentation:
https://platformapi.bluetooth.com/Help/Api/POST-api-Platform-Listings-Search
However, my simple script returns 404
import requests
import json
payload ={
'UserId': '',
'MemberId': '',
'SearchString': '',
'SearchQualificationsAndDesigns': 1,
'SearchDeclarationOnly': 1,
'SearchEndProductList': 1,
'SearchPRDProductList': 1,
'SearchMyCompany': 1,
'BQAApprovalStatusId': 9,
'BQALockStatusId': 10,
'ProductTypeId': 1,
'SpecName': 1,
'ListingDateEarliest': "2017-11-17T09:43:09.2031162-06:00",
'ListingDateLatest': "2017-11-18T09:43:09.2031162-06:00",
'Layers': [],
'MaxResults': 11,
'IncludeTestData': 1
}
url = 'https://platformapi.bluetooth.com/api/Platform/Listings/Search'
headers = {'Content-type': 'application/json', 'Accept': 'text/json'}
r = requests.post(url, data=json.dumps(payload), headers=headers)
print (r.status_code)
Anyone can see why? Using Python 3.5 btw.
import requests
import json
payload = {
"searchString":"bluetooth",
"searchQualificationsAndDesigns":True,
"searchDeclarationOnly":True,
"searchEndProductList":False,
"searchPRDProductList":True,
"searchMyCompany":False,
"productTypeId":0,
"specName":0,
"bqaApprovalStatusId":-1,
"bqaLockStatusId":-1,
"listingDateEarliest":"",
"listingDateLatest":"",
"userId":0,
"memberId":None,
"layers":[],
"maxResults":5000
}
url = 'https://platformapi.bluetooth.com/api/Platform/Listings/Search'
headers = {'Content-type': 'application/json; charset=utf-8', 'Accept': 'text/json'}
r = requests.post(url, data=json.dumps(payload), headers=headers)
print (r.status_code)
#print r.json()
Result:
200
I am having trouble sending a topic downstream message using Firebase. Everything works fine when I send to single or multiples users using tokens, my code looks like this
notif = {
'to': 'TOKEN',
'data': {'msg': 'whatever'},
}
opener = urllib2.build_opener()
data = json.dumps(notif)
req = urllib2.Request(
FCM_URL,
data=data,
headers={
'Content-Type': 'application/json',
'Authorization': 'key=' + API_KEY,
}
)
response = opener.open(req)
However if I replace the recipients using a topic, more precisely the code becomes
notif = {
'to': '/topic/MY_TOPIC',
'data': {'msg': 'whatever'},
}
opener = urllib2.build_opener()
data = json.dumps(notif)
req = urllib2.Request(
FCM_URL,
data=data,
headers={
'Content-Type': 'application/json',
'Authorization': 'key=' + API_KEY,
}
)
response = opener.open(req)
{"multicast_id":id,"success":0,"failure":1,"canonical_ids":0,"results":[{"error":"InvalidRegistration"}]}
Is there something I am missing? I should outline that sending topic messages from the firebase console works fine.
Any help more than welcome,
Best & thanks!
Alex
Ah so silly...
I was missing s in topics, the correct form is hence
notif = {
'to': '/topics/MY_TOPIC',
'data': {'msg': 'whatever'},
}
Hope it helps someone anyway!
Best,
A