headers = {
'Accept': 'application/json, text/javascript, */*; q=0.01',
'X-Requested-With': 'XMLHttpRequest',
'Referer': 'http://www.namestation.com/domain-search?autosearch=1',
'Origin': 'http://www.namestation.com',
'Host': 'www.namestation.com',
'Content-Type': 'application/json; charset=UTF-8',
'Connection': 'keep-alive'
}
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
opener.addHeaders(headers)?
Your opener should have an attribute addheaders, which is a list of tuples. By default it contains the user agent.
opener.addheaders.append(('Host', 'www.namestation.com'))
Something like this may work:
def opener():
cj=cookielib.CookieJar()
#Process Hadlers
opener=urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
opener.addheaders=[
('Accept', 'application/json, text/javascript, */*; q=0.01'),
('X-Requested-With', 'XMLHttpRequest'),
('Referer', 'http://www.namestation.com/domain-search?autosearch=1'),
('Host', 'www.namestation.com'),
('Content-Type', 'application/json; charset=UTF-8'),
('Connection', 'keep-alive'),
]
return opener
You should turn the dict to the tuple first.
opener.addHeaders=tuple(items for items in headers.items())
Related
How can I login to this site and get cookies after login by Python? I tried to use requests but failed.
import requests
url = 'https://shahid.mbc.net/en/widgets/login'
Login = {'email': 'mail#mail.com',
'password': 'pass'}
response = requests.get(url,data=Login)
print (response.headers)
print (response.cookies)
Thank you.
You have to change your email, password and apikey to login in successfully. You will find apikey in network tab. For that you can follow Google Chrome's Dev Tool Network Tab > XHR.
import requests
with requests.Session() as req:
headers = {
'authority': 'login.mbc.net',
'pragma': 'no-cache',
'cache-control': 'no-cache',
'sec-ch-ua': '"Chromium";v="88", "Google Chrome";v="88", ";Not A Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36',
'content-type': 'application/x-www-form-urlencoded',
'accept': '*/*',
'origin': 'https://shahid.mbc.net',
'sec-fetch-site': 'same-site',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://shahid.mbc.net/',
'accept-language': 'en-US,en;q=0.9',
'cookie': '_gcl_au=1.1.1199633606.1614921781; WZRK_G=38c72e930fdb4b74837aa643dc47b107; _ga=GA1.1.1880367528.1614921781; _scid=a233719a-6581-477a-a7db-7111775ad5d2; gmid=gmid.ver4.AcbHoSuw-Q.WFFC0rz1YY3Z8Jkf3PSY2-d85Q20q4ngU541yDeCdCPz9Jb4_6zdRw9_rHKK7NHL.4vpNb7p4gJcxVMMRVoaPaKM774BnYDXqsNqgEqF8-DNz6t5OzbeAT9DpKFeAh6cnMbi_rbYE-mVIQT7isOAPIg.sc3; ucid=ctWpLfyUS5YIrftLu6u8-w; hasGmid=ver4; gig_bootstrap_3_Pm0x4fe9XSy6gv04PewESwqZ_HLgUCbXwWWPHCbGmUGFbW1xyHa42dFt0XTVay0T=login_ver4; _fbp=fb.1.1614921782478.439841241; _sctr=1|1614880800000; PAPVisitorId=JL2A8d7cVVZ64SlP4IkRS6AFl9t5nmdv; _ga_9ZLGVMG0QJ=GS1.1.1614921780.1.1.1614924710.2; WZRK_S_65W-567-675Z=%7B%22p%22%3A12%2C%22s%22%3A1614921781%2C%22t%22%3A1614924710%7D',
}
data = {
'loginID': 'Your email', # registered Email
'password': 'your password',
'sessionExpiration': '0',
'targetEnv': 'jssdk',
'include': 'profile,data',
'includeUserInfo': 'true',
'lang': 'en',
'APIKey': '3_Pm0x4fe9XSy6gv04PewESwqZ_HLgUCbXwWWPHCbGmUGFbW1xyHa42dFt0XTVay0T', # Your apikey
'sdk': 'js_latest',
'authMode': 'cookie',
'pageURL': 'https://login.mbc.net/accounts.login',
'sdkBuild': '11903',
'format': 'json'
}
req.headers.update(headers)
r = req.post('https://login.mbc.net/accounts.login',data=data)
print(r.json()['statusCode']) # 200
print(r.json()['isRegistered']) # True
print(r.json()['profile']) # {'email': 'XXX#yyyy.com'}
for the successful login output will be:
statusCode= 200
isRegistered= True
profile= {'email': 'XXX#yyyy.com'}
My request:
# python 3.7.3
import requests
from requests import Session
session = Session()
session.head('https://www.basspro.com/shop/en/blazer-brass-handgun-ammo')
cookies = requests.utils.cookiejar_from_dict(requests.utils.dict_from_cookiejar(session.cookies))
response = session.post(
url='https://www.basspro.com/shop/BPSGetInventoryStatusByIDView',
data={
'productId': '3074457345616736172',
'itemId': '3074457345616736949',
'isGunFlag': 'false',
},
cookies=cookies,
headers={
'accept': '*/*',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9',
'content-length': '72',
'content-type': 'application/x-www-form-urlencoded',
'origin': 'https://www.basspro.com',
'referer': 'https://www.basspro.com/shop/en/blazer-brass-handgun-ammo',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.92 Safari/537.36 Vivaldi/2.9.1705.38',
'x-requested-with': 'XMLHttpRequest',
},
)
print(response.text)
Output:
<input type="hidden" class="relativeToAbsolute" value="true" />
/*
{
"onlineInventory": {
"status": "Status Not Available",
"image": "widget_product_info/outofstock_icon.svg",
"altText": "Status Not Available",
"isDropShip": false,
"availableDate":""
},
"inStoreInventory": {
"stores": [],
"checkStoreText": "Check Store Availability",
"isInStoreInventory": true,
"isPickupInventory": false
}
}
*/
My output when inspecting and running the same AJAX request via browser:
/*
{
"onlineInventory": {
"status": "Backordered",
"image": "widget_product_info/backordered_icon.svg",
"altText": "Backordered",
"isDropShip": false,
"quantity": 0,
"availableDate":"May 1-8"
},
"inStoreInventory": {
"stores": [{
id: '715839555',
name: '83',
gunRestricted: 'false',
dsName: 'TX - Round Rock',
status: 'Unavailable',
statusText: 'Out of Stock',
image: 'widget_product_info/outofstock_icon.svg',
altText: 'Out of Stock',
availableDate: '',
availableQuantity: '',
availableQuantityDisplay: 'false',
cityState: 'Round Rock, TX',
ISPavailableDate: '',
ISPavailableQuantity: '',
pickupTime: 'by 2:00pm',
offerISPOnBPS: 'Yes',
offerISPOnCAB: 'No'}],
"checkStoreText": "Change Store",
"isInStoreInventory": true,
"isPickupInventory": true
}
}
*/
I tried assigning cookies this way as well:
url = "https://www.basspro.com/shop/en/blazer-brass-handgun-ammo"
r = requests.get(url)
cookies = r.cookies
# fails to pass the right cookie
If I instead copy the cookie verbatim from an inspected GET request at https://www.basspro.com/shop/en/blazer-brass-handgun-ammo and put that into the POST headers, it works. How do I get cookies to work properly programatically?
EDIT:
Here's my attempt at just using Session() for cookies:
# python 3.7.3
import requests
from requests import Session
session = Session()
session.get("https://www.basspro.com/shop/en/blazer-brass-handgun-ammo")
# session.head('https://www.basspro.com/shop/en/blazer-brass-handgun-ammo')
response = session.post(
url='https://www.basspro.com/shop/BPSGetInventoryStatusByIDView',
data={
'productId': '3074457345616736172',
'itemId': '3074457345616736949',
'isGunFlag': 'false',
},
headers={
'accept': '*/*',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9',
'content-length': '72',
'content-type': 'application/x-www-form-urlencoded',
'origin': 'https://www.basspro.com',
'referer': 'https://www.basspro.com/shop/en/blazer-brass-handgun-ammo',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.92 Safari/537.36 Vivaldi/2.9.1705.38',
'x-requested-with': 'XMLHttpRequest',
},
)
print(response.text)
I get the same result as before ("status": "Status Not Available", etc.)
Here's my attempt at the second solution:
# python 3.7.3
import requests
from requests import Session
url = "https://www.basspro.com/shop/en/blazer-brass-handgun-ammo"
r = requests.get(url)
cookies = r.cookies # the type is RequestsCookieJar
response = requests.post(
url='https://www.basspro.com/shop/BPSGetInventoryStatusByIDView',
data={
'productId': '3074457345616736172',
'itemId': '3074457345616736949',
'isGunFlag': 'false',
},
cookies=cookies,
headers={
'accept': '*/*',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9',
'content-length': '72',
'content-type': 'application/x-www-form-urlencoded',
'origin': 'https://www.basspro.com',
'referer': 'https://www.basspro.com/shop/en/blazer-brass-handgun-ammo',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.92 Safari/537.36 Vivaldi/2.9.1705.38',
'x-requested-with': 'XMLHttpRequest',
},
)
print(response.text)
Again, I get the same result as before. What am I doing wrong?
can you try like this
session = Session()
session.get("https://www.basspro.com/shop/en/blazer-brass-handgun-ammo")
Then all the following calls with
session.xxx
donot use cookies parameter in it
another way I have tested,
cookies = r.cookies # the type is RequestsCookieJar
requests.post(.... cookies=cookies...)
at last ,I tested this works:
Please compare carefully
from requests import Session
session = Session()
agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'
r1 = session.get("https://www.basspro.com/shop/en/blazer-brass-handgun-ammo",headers={'user-agent': agent})
response = session.post(
url='https://www.basspro.com/shop/BPSGetOnlineInventoryStatusByIDView',
data={
'productId': '3074457345616736172',
'catalogId': '3074457345616676768',
'storeId': '715838534',
'langId':-1
},
headers={
'user-agent': agent,
'x-requested-with': 'XMLHttpRequest',
},
cookies=r1.cookies
)
print(response.text)
I have the following code, but its gives be 200 OK with first page (state of default drop down) response. Please note that the Drop Down lists are dymanic and progressive until final search button appears , Can someone correct me as to what is wrong with my code?
def process(ghatno):
home_url = 'http://igrmaharashtra.gov.in/eASR/eASRCommon.aspx?hDistName=Nashik'
post_url = 'http://igrmaharashtra.gov.in/eASR/eASRCommon.aspx?hDistName=Nashik'
print "Please wait...getting details of :" + ghatno
with requests.Session() as session:
r = session.get(url=post_url)
cookies = r.cookies
pprint.pprint(r.headers)
gethead = r.headers
soup = BeautifulSoup(r.text, 'html.parser')
viewstate = soup.select('input[name="__VIEWSTATE"]')[0]['value']
csrftoken = soup.select('input[name="__CSRFTOKEN"]')[0]['value']
eventvalidation = soup.select('input[name="__EVENTVALIDATION"]')[0]['value']
viewgen = soup.select('input[name="__VIEWSTATEGENERATOR"]')[0]['value']
data = {
'__CSRFTOKEN':csrftoken,
'__EVENTARGUMENT':'',
'__EVENTTARGET':'',
'__LASTFOCUS':'',
'__SCROLLPOSITION':'0',
'__SCROLLPOSITIONY':'0',
'__EVENTVALIDATION': eventvalidation,
'__VIEWSTATE':viewstate,
'__VIEWSTATEGENERATOR': viewgen,
'ctl00$ContentPlaceHolder5$ddlLanguage' : 'en-US',
'ctl00$ContentPlaceHolder5$btnSearchCommonSr':'Search',
'ctl00$ContentPlaceHolder5$ddlTaluka': '2',
'ctl00$ContentPlaceHolder5$ddlVillage': '25',
'ctl00$ContentPlaceHolder5$ddlYear': '20192020',
'ctl00$ContentPlaceHolder5$grpSurveyLocation': 'rdbSurveyNo',
'ctl00$ContentPlaceHolder5$txtCommonSurvey': 363
}
headers = {
'Host': 'igrmaharashtra.gov.in',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:60.0) Gecko/20100101 Firefox/60.0',
'Referer': 'http://igrmaharashtra.gov.in/eASR/eASRCommon.aspx?hDistName=Nashik',
'Host': 'igrmaharashtra.gov.in',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
}
r = requests.post(url=post_url, data=json.dumps(data), cookies=cookies, headers = headers)
soup = BeautifulSoup(r.text, 'html.parser')
table = SoupStrainer('tr')
soup = BeautifulSoup(soup.get_text(), 'html.parser', parse_only=table)
print(soup.get_text())
pprint.pprint(r.headers)
print r.text
getpost = r.headers
getpostrequest = r.request.headers
getresponsebody = r.request.body
f = open('/var/www/html/nashik/hiren.txt', 'w')
f.write(str(gethead))
f.write(str(getpostrequest))
f.write(str(getresponsebody))
f.write(str(getpost))
My response is as below :
Response header - (GET Request)
{'Content-Length': '5994', 'X-AspNet-Version': '4.0.30319', 'Set-Cookie': 'ASP.NET_SessionId=24wwh11lwvzy5gf0xlzi1we4; path=/; HttpOnly, __CSRFCOOKIE=d7b10286-fc9f-4ed2-863d-304737df8758; path=/; HttpOnly', 'Content-Encoding': 'gzip', 'Vary': 'Accept-Encoding', 'X-Powered-By': 'ASP.NET', 'Server': 'Microsoft-IIS/8.0', 'Cache-Control': 'private', 'Date': 'Thu, 02 May 2019 08:21:48 GMT', 'Content-Type': 'text/html; charset=utf-8'}
Response header - (GET Request)
{'Content-Length': '3726', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate', 'Host': 'igrmaharashtra.gov.in', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:60.0) Gecko/20100101 Firefox/60.0', 'Connection': 'keep-alive', 'Referer': 'http://igrmaharashtra.gov.in/eASR/eASRCommon.aspx?hDistName=Nashik', 'Cookie': '__CSRFCOOKIE=d7b10286-fc9f-4ed2-863d-304737df8758; ASP.NET_SessionId=24wwh11lwvzy5gf0xlzi1we4', 'Content-Type': 'application/x-www-form-urlencoded'}
Response header - (POST Request)
{'Content-Length': '7834', 'X-AspNet-Version': '4.0.30319', 'Content-Encoding': 'gzip', 'Vary': 'Accept-Encoding', 'X-Powered-By': 'ASP.NET', 'Server': 'Microsoft-IIS/8.0', 'Cache-Control': 'private', 'Date': 'Fri, 03 May 2019 10:21:45 GMT', 'Content-Type': 'text/html; charset=utf-8'}
**Default Page Selected Drop Down is returned **
नाशिक and
- - Select Taluka - - INSTEAD of option value "2" i.e इगतपुरी once option "2" is selected I want value "25" in next drop down before I put my final survey "363" for results.
Please note I tried Mechanize browser too, but no luck !!
Finally the solution is to do post requests multiple times in same "session" with same "cookie" and iterate through them. It works now !
For fun, I'm trying to use Python requests to log on to my school's student portal. This is what I've come up with so far. I'm trying to be very explicit on the headers, because I'm getting a 200 status code (the code you also get when failing to login) instead of a 302 (successful login).
import sys
import os
import requests
def login(username, password):
url = '(link)/home.html#sign-in-content'
values = {
'translator_username' : '',
'translator_password' : '',
'translator_ldappassword' : '',
'returnUrl' : '',
'serviceName' : 'PS Parent Portal',
'serviceTicket' : '',
'pcasServerUrl' : '\/',
'credentialType' : 'User Id and Password Credential',
'account' : username,
'pw' : password,
'translatorpw' : password
}
headers = {
'accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'accept-encoding' : 'gzip, deflate, br',
'accept-language' : 'en-US,en;q=0.9',
'cache-control' : 'max-age=0',
'connection' : 'keep-alive',
'content-type' : 'application/x-www-form-urlencoded',
'host' : '(link)',
'origin' : '(link)',
'referer' : '(link)guardian/home.html',
'upgrade-insecure-requests' : '1'
}
with requests.Session() as s:
p = s.post(url, data=values)
if p.status_code == 302:
print(p.text)
print('Authentication error', p.status_code)
r = s.get('(link)guardian/home.html')
print(r.text)
def main():
login('myname', 'mypass')
if __name__ == '__main__':
main()
Using Chrome to examine the network requests, all of these headers are under 'Request Headers' in addition to a long cookie number, content-length, and user-agent.
The forms are as follows:
pstoken:(token)
contextData:(text)
translator_username:
translator_password:
translator_ldappassword:
returnUrl:(url)guardian/home.html
serviceName:PS Parent Portal
serviceTicket:
pcasServerUrl:\/
credentialType:User Id and Password Credential
account:f
pw:(id)
translatorpw:
Am I missing something with the headers/form names? Is it a problem with cookies?
If I look at p.requests.headers, this is what is sent:
{'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36', 'accept-encoding': 'gzip, deflate, br', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'connection': 'keep-alive', 'accept-language': 'en-US,en;q=0.9', 'cache-control': 'max-age=0', 'content-type': 'application/x-www-form-urlencoded', 'host': '(url)', 'origin': '(url)', 'referer': '(url)guardian/home.html', 'upgrade-insecure-requests': '1', 'Content-Length': '263'}
p.text gives me the HTML of the login page
Tested with PowerAPI, requests, Mechanize, and RoboBrowser. All fail.
What response do you expect? You are using a wrong way to analyze your response.
with requests.Session() as s:
p = s.post(url, data=values)
if p.status_code == 302:
print(p.text)
print('Authentication error', p.status_code)
r = s.get('(link)guardian/home.html')
print(r.text)
In your code, you print out Authentication error ignoring status_code, I think it at least should like this:
with requests.Session() as s:
p = s.post(url, data=values)
if p.status_code == 302:
print(p.text)
r = s.get('(link)guardian/home.html')
print(r.text)
else:
print('Authentication error', p.status_code)
In Python, how can I obtain headers and payload information for a particular website to make requests via requests.Session()?
e.g.:
headers = {
'Host': 'www.testsite.com',
'Accept': 'application/json',
'Proxy-Connection': 'keep-alive',
'X-Requested-With': 'XMLHttpRequest',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'en-us',
'Content-Type': 'application/x-www-form-urlencoded',
'Origin': 'http://www.testsite.com',
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 7_1_2 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) Mobile/11D257',
'Referer': 'http://www.testsite.com/mobile'
}
Thank you in advance and will be sure to upvote and accept answer
Most of those headers are automatically supplied by the requests module. Here is an example:
import requests
from pprint import pprint
with requests.Session() as s:
s.get('http://httpbin.org/cookies/set?name=joe')
r = s.get('http://httpbin.org/cookies')
pprint(dict(r.request.headers))
assert r.json()['cookies']['name'] == 'joe'
The output of the pprint() call is this:
{'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate',
'Connection': 'keep-alive',
'Cookie': 'name=joe',
'User-Agent': 'python-requests/2.9.1'}
As you can see, s.get() fills in several headers.
A response object has a headers attribute:
import requests
with requests.Session() as s:
r = s.get("http://google.es")
print(r.headers)
Output:
>> {
'Date': 'Tue, 22 Aug 2017 00:37:13 GMT',
'Expires': '-1',
'Cache-Control': 'private,
max-age=0',
'Content-Type': 'text/html; charset=ISO-8859-1',
...
}