Python request cookies setting does not work - python

I want to crawl the data from this website:'http://www.stcn.com/article/search.html?search_type=all&page_time=1', but the website needs to have cookies on the homepage first, so I first get the cookies he needs from this website('http://www.stcn.com/article/search.html') and set them into the request, but it doesn't work after many attempts.
My code looks like this:
import requests
headers = {
'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36','Host':'www.stcn.com'}
def _getStcnCookie(keyWords='all'):
url = "http://www.stcn.com/article/search.html"
data = {'keyword': keyWords}
r = requests.get(url, data, headers=headers, timeout=10)
if r.status_code != 200:
return None
return requests.utils.dict_from_cookiejar(r.cookies)
def searchStcnData(url,keyWords) :
myHeader = dict.copy(headers)
myHeader['X-Requested-With'] = 'XMLHttpRequest'
cookies = _getStcnCookie(keyWords=keyWords)
print(cookies)
jar = requests.cookies.cookiejar_from_dict(cookies)
data = {'keyword':'Paxlovid', 'page_time': 1, 'search_type': 'all'}
#Option One
s = requests.Session()
response = s.post(url, data, headers=myHeader, timeout=5, cookies=cookies)
print(response.text)
# Option two
# myHeader['Cookie'] = 'advanced-stcn_web=potef1789mm5nqgmd6jc1rcih3; path=/; HttpOnly;'+cookiesStr
# Option three
r = requests.post(url, data, headers=myHeader, timeout=5, cookies=cookies)
print(r.json())
return r.json()
searchStcnData('http://www.stcn.com/article/search.html?search_type=all&page_time=1','Paxlovid')
I've tried options 1, 2, and 3 to no avail.
I set cookies in Postman, and only set 'advanced-stcn_web=5sdfitvu42qggmnjvop4dearj4' can get the data, like this :
{
"state": 1,
"msg": "操作成功",
"data": "<li class=\"\">\n <div class=\"content\">\n <div class=\"tt\">\n <a href=\"/article/detail/769123.html\" target=\"_blank\">\n ......
"page_time": 2
}

Related

problem with coinex futures api signature and authorization in python

when I whant to send request to coinex futures api I get authorization fail.can you send me the correct code for python3.9?
coinex docs:
Authorization
The authorization process is as follows
The input parameter string of the http message is as follows:
market=BTCUSD&type=buy&price=680&amount=1.0&timestamp=1550743431000
Paste the secret_key to the end of the above string as:
market=BTCUSD&type=buy&price=680&amount=1.0&timestamp=1550743431000&secret_key=B51068CF10B34E7789C374AB932696A05E0A629BE7BFC62F
Note: secret_key parameter is not required to send the http message body, this step is just for calculating the sha256 signature.
Perform sha256 on the above string, convert it to hexadecimal lowercase, and the length is 64 bits, and then add this signature to the http header as follows:
Authorization: a174066d9ccbeb33803c2a84e20792d31bed5a6e3da8fca23e38fc8dbb917a13
Add AccessId in the http header, and the server will look for the corresponding user information according to AccessId: 4DA36FFC61334695A66F8D29020EB589
After receiving the http message, the server finds the user’s secret key according to the AccessId, and performs the same operation as above to determine whether the received signature is equal to the signature calculated by itself. If they are equal, the authorization succeeds, otherwise it fails.
import time
import hashlib
import requests
access_id = '5#######################8'
secret_key = 'C##########################################7'
base_url = 'https://api.coinex.com/perpetual/v1'
def get_sign(params,secret_key):
data = []
for item in params:
data.append(item + '=' + str(params[item]))
str_params = "{0}&secret_key={1}".format('&'.join(data), secret_key)
token = hashlib.sha256(str_params.encode()).hexdigest().lower()
return token
def Adjust_Leverage():
header = {
'Content-Type': 'application/json; charset=utf-8',
'Accept': 'application/json',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36'
}
timestamp = int(time.time()*1000)
params = {
'market': 'BTCUSDT',
'leverage':'10',
'position_type':1,
'timestamp':timestamp}
header['Authorization'] = get_sign(params , secret_key)
header['AccessId'] = access_id
res = requests.post(
url=f'{base_url}/market/adjust_leverage',
headers=header,
json=params
)
return res.text
def Market_Order():
header = {
'Content-Type': 'application/json; charset=utf-8',
'Accept': 'application/json',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36'
}
timestamp = int(time.time()*1000)
params = {
'market': 'BTCUSDT',
'side':1,
'amount':'10',
'timestamp':timestamp}
header['authorization'] = get_sign(params , secret_key)
header['AccessId'] = access_id
res = requests.post(
url=f'{base_url}/order/put_market',
headers=header,
json=params
)
return res.text
print(Adjust_Leverage())
I use below code but I got authorization fail again:
def get_sign(params, secret_key):
data = ['='.join([str(k), str(v)]) for k, v in params.items()]
str_params = "{0}&secret_key={1}".format(
'&'.join(data), secret_key).encode()
token = hashlib.sha256(str_params).hexdigest()
return token

How to get a correct session_id? (Scrapy, Python)

There is an url: https://maps.leicester.gov.uk/map/Aurora.svc/run?inspect_query=QPPRN&inspect_value=ROH9385&script=%5CAurora%5Cw3%5CPLANNING%5Cw3PlanApp_MG.AuroraScript%24&nocache=f73eee56-45da-f708-87e7-42e82982370f&resize=always
It returns the coordinates. To get the coordinates - it does 3 requests(I SUPPOSE):
the url mentioned above
requesting session_id
getting coordinates using previousely mentioned session_id.
I am getting session_id in the 2nd step, but it is wrong. I can't get coordinates in step 3 using it. How can I know that the problem is in session_id? When I insert the session_id taken from the browser - my code works fine and coordinates are received.
Here are the requests in browser:
Here is the correct response from browser:
And this is what I'm getting with my code:
Here is my code (it is for Scrapy framework):
'''
import inline_requests
#inline_requests.inline_requests
def get_map_data(self, response):
""" Getting map data. """
map_referer = ("https://maps.leicester.gov.uk/map/Aurora.svc/run?inspect_query=QPPRN&"
"inspect_value=ROH9385&script=%5CAurora%5Cw3%5CPLANNING%5Cw3PlanApp_MG.AuroraScript"
"%24&nocache=f73eee56-45da-f708-87e7-42e82982370f&resize=always")
response = yield scrapy.Request(
url=map_referer,
meta=response.meta,
method='GET',
dont_filter=True,
)
time_str = str(int(time.time()*1000))
headers = {
'Referer': response.url,
'Accept': 'application/javascript, */*; q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7',
'Host': 'maps.leicester.gov.uk',
'Sec-Fetch-Dest': 'script',
'Sec-Fetch-Mode': 'no-cors',
'Sec-Fetch-Site': 'same-origin',
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36'
}
response.meta['handle_httpstatus_all'] = True
url = ( 'https://maps.leicester.gov.uk/map/Aurora.svc/RequestSession?userName=inguest'
'&password=&script=%5CAurora%5Cw3%5CPLANNING%5Cw3PlanApp_MG.AuroraScript%24&'
f'callback=_jqjsp&_{time_str}=' )
reqest_session_response = yield scrapy.Request(
url=url,
meta=response.meta,
method='GET',
headers=headers,
dont_filter=True,
)
session_id = re.search(r'"SessionId":"([^"]+)', reqest_session_response.text)
session_id = session_id.group(1) if session_id else None
print(8888888888888)
print(session_id)
# session_id = '954f04e2-e52c-4dd9-9046-f3f013d3f633'
# pprn = item.get('other', {}).get('PPRN')
pprn = 'ROH9385' # hard coded for the current page
if session_id and pprn:
time_str = str(int(time.time()*1000))
url = ('https://maps.leicester.gov.uk/map/Aurora.svc/FindValue'
f'Location?sessionId={session_id}&value={pprn}&query=QPPRN&callback=_jqjsp'
f'&_{time_str}=')
coords_response = yield scrapy.Request(
url = url,
method='GET',
meta=reqest_session_response.meta,
dont_filter = True,
)
print(coords_response.text)
breakpoint()'''
Could you please correct my code so that it could get coordinates?
The website creates a sessionId first, then use the sessionId creates a layer on server (I guess). Then you can start requesting, otherwise it can't find the map layer under that sessionId.
import requests
url = "https://maps.leicester.gov.uk/map/Aurora.svc/RequestSession?userName=inguest&password=&script=%5CAurora%5Cw3%5CPLANNING%5Cw3PlanApp_MG.AuroraScript%24"
res = requests.get(url, verify=False).json()
sid = res["Session"]["SessionId"]
url = f"https://maps.leicester.gov.uk/map/Aurora.svc/OpenScriptMap?sessionId={sid}"
res = requests.get(url, verify=False)
url = f"https://maps.leicester.gov.uk/map/Aurora.svc/FindValueLocation?sessionId={sid}&value=ROH9385&query=QPPRN"
res = requests.get(url, verify=False).json()
print(res)

How to login to instacart using requests?

So I tried the following rough version:
import requests
from bs4 import BeautifulSoup
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
}
session = requests.Session()
res1 = session.get('http://www.instacart.com', headers=headers)
soup = BeautifulSoup(res1.content, 'html.parser')
token = soup.find('meta', {'name': 'csrf-token'}).get('content')
data = {"user": {"email": "user#gmail.com", "password": "xxxxx"},
"authenticity_token": token}
res2 = session.post('https://www.instacart.com/accounts/login', headers=headers, data=data)
print(res2)
I always get the following error:
<Response [400]>
apparent_encoding:'ascii'
connection:<requests.adapters.HTTPAdapter object at 0x0000021F3FF8F940>
content:b'{"status":400,"error":"There was a problem in the JSON you submitted: Empty input () at line 1, column 1"}'
What am I doing wrong?
Actually you were missing the correct Params for the POST request.
I've made a GET request to the main site to collect the necessary authenticity_token which is used within the POST request. and then made the POST request for the correct login url.
import requests
from bs4 import BeautifulSoup
params = {
'source': 'web',
'cache_key': 'undefined'
}
data = {
'email': 'email#email.com',
'grant_type': 'password',
'password': 'yourpassword',
'scope': '',
'signup_v3_endpoints_web': 'null'
}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0",
}
def main(url):
with requests.Session() as req:
r = req.get(url, headers=headers)
soup = BeautifulSoup(r.content, 'html.parser')
data['authenticity_token'] = soup.find(
"meta", {'name': 'csrf-token'}).get("content")
r = req.post(
"https://www.instacart.com/v3/dynamic_data/authenticate/login", params=params, json=data, headers=headers).json()
print(r)
main("https://www.instacart.com")

python handling incoming from url

i'm sending below request to URL and get the response from it
import requests
url = "http://localhost/dat.txt"
payload = {}
headers = {
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
'Sec-Fetch-Dest': 'document',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9'
}
response = requests.request("GET", url, headers=headers, data = payload)
print(response.text.encode('utf8'))
Below is the response data that I get -
mohame4|nameon#example.com|passsd!##$4|head,customer|manager,devlop
mohame3|nameon3#example.com|passsd!##$4|head,customer|manager,devlop
I do this with the data
for i in response.text:
try:
i = i.strip().split('|')
userna = i[0]
emaill = i[1]
passd = i[2]
rol1= i[3]
rol2= i[4]
except:
pass
How can I make rol1 as
this head,customer
to
rol1=['head','customer']
Simply split the string you're getting:
rol1 = i[3].split(',')
You could do this more... gracefully, though, using iterable unpacking:
username, email, password, rol1, rol2 = i.strip().split('|')
rol1 = rol1.split(',')
thanks for all helper special #ForceBru
import requests
url = "http://localhost/dat.txt"
response = requests.request("GET", url)
print(response.text)
dat = str(response.text).split('\n')
for i in dat:
i = i.strip().split('|')
print(i[3].split(","))
# TODO: write code...

python post url with requests.Session

I am trying to download a file with python using "requests.Session" in order to take cookies into account.
My following code return the source code of the second webpage but does not download the file... even if I have included the header and the parameters...
I am running out of idea to find the problem here ...
s = requests.Session()
url_euronex = "https://www.euronext.com/equities/directory"
response = s.get(url_euronex)
response_code = response.status_code
if response_code==200:
content = response.text
token = content[content.index('formKey=nyx_pd_filter_values:')+29 : content.index('dataTableInitCallback')-3]
url = "https://www.euronext.com/fr/popup/data/download?ml=nyx_pd_stocks&cmd=default&formKey=nyx_pd_filter_values%3A"+str(token)
print(url)
headers = {
'Host':'www.euronext.com',
'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:56.0) Gecko/20100101 Firefox/56.0',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language':'en-US,en;q=0.5',
'Accept-Encoding':'gzip, deflate, br',
'Referer':url,
'Content-Type':'application/x-www-form-urlencoded',
'Content-Length':'135',
'Connection':'keep-alive',
'Upgrade-Insecure-Requests':'1'}
Params = {'Query string':{'ml':'nyx_pd_stocks', 'cmd':'default', 'formKey':'nyx_pd_filter_values:'+str(token)}, 'Form data':{'format':'3','layout':'2','decimal_separator':'1','date_format':'1','op':'Go','form_build_id':'form-64080cce1044e288464d174290cb40e9','form_id':'nyx_download_form'}}
data = [{'url': url, 'params': Params, 'method': 'post'}]
r = s.post(url, json=data, headers=headers)
if r.status_code==200:
print('coucou')
#resultat = (r.text).encode('utf-8')
print(r.text)

Categories

Resources