I am using the requests module.
I have figured out how to submit data to a login form on a website and retrieve the session key, but I can't see an obvious way to use this session key in subsequent requests.
Can someone fill in the ellipsis in the code below or suggest another approach?
>>> import requests
>>> login_data = {'formPosted': '1', 'login_email': 'me#example.com', 'password': 'pw'}
>>> r = requests.post('https://localhost/login.py', login_data)
>>>
>>> r.text
'You are being redirected here'
>>> r.cookies
{'session_id_myapp': '127-0-0-1-825ff22a-6ed1-453b-aebc-5d3cf2987065'}
>>>
>>> r2 = requests.get('https://localhost/profile_data.json', ...)
You can easily create a persistent session using:
s = requests.Session()
After that, continue with your requests as you would:
s.post('https://localhost/login.py', login_data)
# logged in! cookies saved for future requests.
r2 = s.get('https://localhost/profile_data.json', ...)
# cookies sent automatically!
# do whatever, s will keep your cookies intact :)
For more about Sessions: https://requests.readthedocs.io/en/latest/user/advanced/#session-objects
the other answers help to understand how to maintain such a session. Additionally, I want to provide a class which keeps the session maintained over different runs of a script (with a cache file). This means a proper "login" is only performed when required (timout or no session exists in cache). Also it supports proxy settings over subsequent calls to 'get' or 'post'.
It is tested with Python3.
Use it as a basis for your own code. The following snippets are release with GPL v3
import pickle
import datetime
import os
from urllib.parse import urlparse
import requests
class MyLoginSession:
"""
a class which handles and saves login sessions. It also keeps track of proxy settings.
It does also maintine a cache-file for restoring session data from earlier
script executions.
"""
def __init__(self,
loginUrl,
loginData,
loginTestUrl,
loginTestString,
sessionFileAppendix = '_session.dat',
maxSessionTimeSeconds = 30 * 60,
proxies = None,
userAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',
debug = True,
forceLogin = False,
**kwargs):
"""
save some information needed to login the session
you'll have to provide 'loginTestString' which will be looked for in the
responses html to make sure, you've properly been logged in
'proxies' is of format { 'https' : 'https://user:pass#server:port', 'http' : ...
'loginData' will be sent as post data (dictionary of id : value).
'maxSessionTimeSeconds' will be used to determine when to re-login.
"""
urlData = urlparse(loginUrl)
self.proxies = proxies
self.loginData = loginData
self.loginUrl = loginUrl
self.loginTestUrl = loginTestUrl
self.maxSessionTime = maxSessionTimeSeconds
self.sessionFile = urlData.netloc + sessionFileAppendix
self.userAgent = userAgent
self.loginTestString = loginTestString
self.debug = debug
self.login(forceLogin, **kwargs)
def modification_date(self, filename):
"""
return last file modification date as datetime object
"""
t = os.path.getmtime(filename)
return datetime.datetime.fromtimestamp(t)
def login(self, forceLogin = False, **kwargs):
"""
login to a session. Try to read last saved session from cache file. If this fails
do proper login. If the last cache access was too old, also perform a proper login.
Always updates session cache file.
"""
wasReadFromCache = False
if self.debug:
print('loading or generating session...')
if os.path.exists(self.sessionFile) and not forceLogin:
time = self.modification_date(self.sessionFile)
# only load if file less than 30 minutes old
lastModification = (datetime.datetime.now() - time).seconds
if lastModification < self.maxSessionTime:
with open(self.sessionFile, "rb") as f:
self.session = pickle.load(f)
wasReadFromCache = True
if self.debug:
print("loaded session from cache (last access %ds ago) "
% lastModification)
if not wasReadFromCache:
self.session = requests.Session()
self.session.headers.update({'user-agent' : self.userAgent})
res = self.session.post(self.loginUrl, data = self.loginData,
proxies = self.proxies, **kwargs)
if self.debug:
print('created new session with login' )
self.saveSessionToCache()
# test login
res = self.session.get(self.loginTestUrl)
if res.text.lower().find(self.loginTestString.lower()) < 0:
raise Exception("could not log into provided site '%s'"
" (did not find successful login string)"
% self.loginUrl)
def saveSessionToCache(self):
"""
save session to a cache file
"""
# always save (to update timeout)
with open(self.sessionFile, "wb") as f:
pickle.dump(self.session, f)
if self.debug:
print('updated session cache-file %s' % self.sessionFile)
def retrieveContent(self, url, method = "get", postData = None, **kwargs):
"""
return the content of the url with respect to the session.
If 'method' is not 'get', the url will be called with 'postData'
as a post request.
"""
if method == 'get':
res = self.session.get(url , proxies = self.proxies, **kwargs)
else:
res = self.session.post(url , data = postData, proxies = self.proxies, **kwargs)
# the session has been updated on the server, so also update in cache
self.saveSessionToCache()
return res
A code snippet for using the above class may look like this:
if __name__ == "__main__":
# proxies = {'https' : 'https://user:pass#server:port',
# 'http' : 'http://user:pass#server:port'}
loginData = {'user' : 'usr',
'password' : 'pwd'}
loginUrl = 'https://...'
loginTestUrl = 'https://...'
successStr = 'Hello Tom'
s = MyLoginSession(loginUrl, loginData, loginTestUrl, successStr,
#proxies = proxies
)
res = s.retrieveContent('https://....')
print(res.text)
# if, for instance, login via JSON values required try this:
s = MyLoginSession(loginUrl, None, loginTestUrl, successStr,
#proxies = proxies,
json = loginData)
Check out my answer in this similar question:
python: urllib2 how to send cookie with urlopen request
import urllib2
import urllib
from cookielib import CookieJar
cj = CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
# input-type values from the html form
formdata = { "username" : username, "password": password, "form-id" : "1234" }
data_encoded = urllib.urlencode(formdata)
response = opener.open("https://page.com/login.php", data_encoded)
content = response.read()
EDIT:
I see I've gotten a few downvotes for my answer, but no explaining comments. I'm guessing it's because I'm referring to the urllib libraries instead of requests. I do that because the OP asks for help with requests or for someone to suggest another approach.
The documentation says that get takes in an optional cookies argument allowing you to specify cookies to use:
from the docs:
>>> url = 'http://httpbin.org/cookies'
>>> cookies = dict(cookies_are='working')
>>> r = requests.get(url, cookies=cookies)
>>> r.text
'{"cookies": {"cookies_are": "working"}}'
http://docs.python-requests.org/en/latest/user/quickstart/#cookies
Upon trying all the answers above, I found that using "RequestsCookieJar" instead of the regular CookieJar for subsequent requests fixed my problem.
import requests
import json
# The Login URL
authUrl = 'https://whatever.com/login'
# The subsequent URL
testUrl = 'https://whatever.com/someEndpoint'
# Logout URL
testlogoutUrl = 'https://whatever.com/logout'
# Whatever you are posting
login_data = {'formPosted':'1',
'login_email':'me#example.com',
'password':'pw'
}
# The Authentication token or any other data that we will receive from the Authentication Request.
token = ''
# Post the login Request
loginRequest = requests.post(authUrl, login_data)
print("{}".format(loginRequest.text))
# Save the request content to your variable. In this case I needed a field called token.
token = str(json.loads(loginRequest.content)['token']) # or ['access_token']
print("{}".format(token))
# Verify Successful login
print("{}".format(loginRequest.status_code))
# Create your Requests Cookie Jar for your subsequent requests and add the cookie
jar = requests.cookies.RequestsCookieJar()
jar.set('LWSSO_COOKIE_KEY', token)
# Execute your next request(s) with the Request Cookie Jar set
r = requests.get(testUrl, cookies=jar)
print("R.TEXT: {}".format(r.text))
print("R.STCD: {}".format(r.status_code))
# Execute your logout request(s) with the Request Cookie Jar set
r = requests.delete(testlogoutUrl, cookies=jar)
print("R.TEXT: {}".format(r.text)) # should show "Request Not Authorized"
print("R.STCD: {}".format(r.status_code)) # should show 401
Save only required cookies and reuse them.
import os
import pickle
from urllib.parse import urljoin, urlparse
login = 'my#email.com'
password = 'secret'
# Assuming two cookies are used for persistent login.
# (Find it by tracing the login process)
persistentCookieNames = ['sessionId', 'profileId']
URL = 'http://example.com'
urlData = urlparse(URL)
cookieFile = urlData.netloc + '.cookie'
signinUrl = urljoin(URL, "/signin")
with requests.Session() as session:
try:
with open(cookieFile, 'rb') as f:
print("Loading cookies...")
session.cookies.update(pickle.load(f))
except Exception:
# If could not load cookies from file, get the new ones by login in
print("Login in...")
post = session.post(
signinUrl,
data={
'email': login,
'password': password,
}
)
try:
with open(cookieFile, 'wb') as f:
jar = requests.cookies.RequestsCookieJar()
for cookie in session.cookies:
if cookie.name in persistentCookieNames:
jar.set_cookie(cookie)
pickle.dump(jar, f)
except Exception as e:
os.remove(cookieFile)
raise(e)
MyPage = urljoin(URL, "/mypage")
page = session.get(MyPage)
snippet to retrieve json data, password protected
import requests
username = "my_user_name"
password = "my_super_secret"
url = "https://www.my_base_url.com"
the_page_i_want = "/my_json_data_page"
session = requests.Session()
# retrieve cookie value
resp = session.get(url+'/login')
csrf_token = resp.cookies['csrftoken']
# login, add referer
resp = session.post(url+"/login",
data={
'username': username,
'password': password,
'csrfmiddlewaretoken': csrf_token,
'next': the_page_i_want,
},
headers=dict(Referer=url+"/login"))
print(resp.json())
This will work for you in Python;
# Call JIRA API with HTTPBasicAuth
import json
import requests
from requests.auth import HTTPBasicAuth
JIRA_EMAIL = "****"
JIRA_TOKEN = "****"
BASE_URL = "https://****.atlassian.net"
API_URL = "/rest/api/3/serverInfo"
API_URL = BASE_URL+API_URL
BASIC_AUTH = HTTPBasicAuth(JIRA_EMAIL, JIRA_TOKEN)
HEADERS = {'Content-Type' : 'application/json;charset=iso-8859-1'}
response = requests.get(
API_URL,
headers=HEADERS,
auth=BASIC_AUTH
)
print(json.dumps(json.loads(response.text), sort_keys=True, indent=4, separators=(",", ": ")))
Related
I am following the fastapi docs to implement an user authentication system. Here is a minimal example of app.py:
# import lines and utilities omitted
#app.post("/token", response_model=Token)
async def login_for_access_token(form_data: OAuth2PasswordRequestForm = Depends()):
user = authenticate_user(form_data.username, form_data.password)
if not user:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Incorrect username or password",
headers={"WWW-Authenticate": "Bearer"},
)
access_token_expires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
access_token = create_access_token(
data={"sub": user.username}, expires_delta=access_token_expires
)
return {"access_token": access_token, "token_type": "bearer"}
#app.get("/user/me", response_model=UserRead)
def read_user(*, session=Depends(get_session), current_user=Depends(get_current_user)):
user = session.get(User, current_user.username)
if not user:
raise HTTPException(status_code=404, detail="User not found")
return user
When the user calls /users/me, it returns the current user. This works completely well as described in the tutorial in the SwaggerUI. But, I am not able to perform the same authorization and operation via python requests. Here is a following python code which I used:
import requests
backend_url = "http://localhost:8000/"
login_data = {'username':'user1', 'password':'secret1'}
s = requests.Session()
s.post(backend_url + "token", login_data) # response 200
s.get(backend_url + "user/me") # response 401
I am looking for some ways by which I can reuse the access_token returned by fastapi.
I found the answer in docs of fastapi itself:
import requests
backend_url = "http://localhost:8000/"
login_data = {'username':'user1', 'password':'secret1'}
session = requests.Session()
response = session.post(backend_url + "token", login_data)
response = json.loads(response.content.decode('utf-8'))
session.headers.update({"Authorization": 'Bearer ' + response['access_token']})
session.get(backend_url + "user/me")
You are not using the session variable s = requests.Session() to send HTTP requests.
So the post and get methods are getting sent independently from each other.
Try using
s = requests.Session()
s.post(backend_url + "token", login_data) # use the session post method
s.get(backend_url + "user/me") # use the session get method
I am using requests and cfscrape library to login to https://kissanime.to/Login
'''Login to website'''
def login(self, usr, pw):
login_url = 'https://kissanime.to/Login'
sess = requests.Session()
# login credentials
payload = {
'username': usr,
'password': pw,
'redirect': ''
}
# Creating cfscrape instance of the session
scraper_sess = cfscrape.create_scraper(sess)
a = scraper_sess.post(login_url, data=payload)
print(a.text)
print(a.status_code)
a.text gives me the same login page
a.status_code gives me 200
That means my login is not working at all. Am I missing something? According to chrome's network monitor, I should also get status code 302
POST DATA image:
I solved it using mechanicalsoup
Code:
import mechanicalsoup
'''Login to website'''
def login(self, usr, pw):
login_url = 'https://kissanime.to/Login'
# Creating cfscrape instance
self.r = cfscrape.create_scraper()
login_page = self.r.get(login_url)
# Creating a mechanicalsoup browser instance with
# response object of cfscrape
browser = mechanicalsoup.Browser(self.r)
soup = BeautifulSoup(login_page.text, 'html.parser')
# grab the login form
login_form = soup.find('form', {'id':'formLogin'})
# find login and password inputs
login_form.find('input', {'name': 'username'})['value'] = usr
login_form.find('input', {'name': 'password'})['value'] = pw
browser.submit(login_form, login_page.url)
This content is from Requests Documentation:
Many web services that require authentication accept HTTP Basic Auth. This is the simplest kind, and Requests supports it straight out of the box.
requests.get('https://api.github.com/user', auth=HTTPBasicAuth('user', 'pass'))
You have to send the payload as JSON..
import requests,json
'''Login to website'''
def login(self, usr, pw):
login_url = 'https://kissanime.to/Login'
sess = requests.Session()
# login credentials
payload = {
'username': usr,
'password': pw,
'redirect': ''
}
# Creating cfscrape instance of the session
scraper_sess = cfscrape.create_scraper(sess)
a = scraper_sess.post(login_url, data=json.dumps(payload))
print(a.text)
print(a.status_code)
Reference: http://docs.python-requests.org/en/master/user/authentication/
So I have been looking around and have managed to cobble together some code that lets me login to the website, http://forums.somethingawful.com
It works, I can see from the response that it works.
When I try using the same urllib2 opener that I created for the above login, to visit this part of the site http://forums.somethingawful.com/attachment.php?attachmentid=300 (which I need to be logged in to view) to open this page, I get a response of "ÿØÿà"
EDIT: http://i.imgur.com/PmWl1s4.png
I have included a screenshot of what the target page looks like when logged in, if this is anymore help
Any ideas why?
"""
# Script to log in to website and store cookies.
# run as: python web_login.py USERNAME PASSWORD
#
# sources of code include:
#
# http://stackoverflow.com/questions/2954381/python-form-post-using-urllib2-also-question-on-saving-using-cookies
# http://stackoverflow.com/questions/301924/python-urllib-urllib2-httplib-confusion
# http://www.voidspace.org.uk/python/articles/cookielib.shtml
#
# mashed together by Martin Chorley
#
# Licensed under a Creative Commons Attribution ShareAlike 3.0 Unported License.
# http://creativecommons.org/licenses/by-sa/3.0/
"""
import urllib, urllib2
import cookielib
import sys
import urlparse
from BeautifulSoup import BeautifulSoup as bs
class WebLogin(object):
def __init__(self, username, password):
# url for website we want to log in to
self.base_url = 'http://forums.somethingawful.com/'
# login action we want to post data to
# could be /login or /account/login or something similar
self.login_action = '/account.php?'
# file for storing cookies
self.cookie_file = 'login.cookies'
# user provided username and password
self.username = username
self.password = password
# set up a cookie jar to store cookies
self.cj = cookielib.MozillaCookieJar(self.cookie_file)
# set up opener to handle cookies, redirects etc
self.opener = urllib2.build_opener(
urllib2.HTTPRedirectHandler(),
urllib2.HTTPHandler(debuglevel=0),
urllib2.HTTPSHandler(debuglevel=0),
urllib2.HTTPCookieProcessor(self.cj)
)
# pretend we're a web browser and not a python script
self.opener.addheaders = [('User-agent',
('Chrome/16.0.912.77'))
]
# open the front page of the website to set and save initial cookies
response = self.opener.open(self.base_url)
self.cj.save()
# try and log in to the site
response = self.login()
response2 = self.opener.open("http://forums.somethingawful.com/attachment.php?attachmentid=300")
print response2.read() + "LLLLLL"
# method to do login
def login(self):
# parameters for login action
# may be different for different websites
# check html source of website for specifics
login_data = urllib.urlencode({
'action': 'login',
'username': 'username',
'password': 'password'
})
# construct the url
login_url = self.base_url + self.login_action
# then open it
response = self.opener.open(login_url, login_data)
# save the cookies and return the response
self.cj.save()
return response
if __name__ == "__main__":
username = "username"
password = "password"
# initialise and login to the website
test = WebLogin(username, password)
Try this instead:
import urllib2,cookielib
def login(username,password):
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.CookieJar()))
url1 = "http://forums.somethingawful.com/attachment.php?attachmentid=300"
url2 = "http://forums.somethingawful.com/account.php?action=loginform"
data = "&username="+username+"&password="+password
socket = opener.open(url1)
socket = opener.open(url2,data)
return socket.read()
P.S.: I wrote it as a standalone function; you can integrate it into your class if it works for you. In addition, the call to opener.open(url1) might be redundant; would need a valid pair of username/password in order to verify that...
I was using Mechanize module a while ago, and now try to use Requests module.
(Python mechanize doesn't work when HTTPS and Proxy Authentication required)
I have to go through proxy-server when I access the Internet.
The proxy-server requires authentication. I wrote the following codes.
import requests
from requests.auth import HTTPProxyAuth
proxies = {"http":"192.168.20.130:8080"}
auth = HTTPProxyAuth("username", "password")
r = requests.get("http://www.google.co.jp/", proxies=proxies, auth=auth)
The above codes work well when proxy-server requires basic authentication.
Now I want to know what I have to do when proxy-server requires digest authentication.
HTTPProxyAuth seems not to be effective in digest authentication (r.status_code returns 407).
No need to implement your own! in most cases
Requests has built in support for proxies, for basic authentication:
proxies = { 'https' : 'https://user:password#proxyip:port' }
r = requests.get('https://url', proxies=proxies)
see more on the docs
Or in case you need digest authentication HTTPDigestAuth may help.
Or you might need try to extend it like yutaka2487 did bellow.
Note: must use ip of proxy server not its name!
I wrote the class that can be used in proxy authentication (based on digest auth).
I borrowed almost all codes from requests.auth.HTTPDigestAuth.
import requests
import requests.auth
class HTTPProxyDigestAuth(requests.auth.HTTPDigestAuth):
def handle_407(self, r):
"""Takes the given response and tries digest-auth, if needed."""
num_407_calls = r.request.hooks['response'].count(self.handle_407)
s_auth = r.headers.get('Proxy-authenticate', '')
if 'digest' in s_auth.lower() and num_407_calls < 2:
self.chal = requests.auth.parse_dict_header(s_auth.replace('Digest ', ''))
# Consume content and release the original connection
# to allow our new request to reuse the same one.
r.content
r.raw.release_conn()
r.request.headers['Authorization'] = self.build_digest_header(r.request.method, r.request.url)
r.request.send(anyway=True)
_r = r.request.response
_r.history.append(r)
return _r
return r
def __call__(self, r):
if self.last_nonce:
r.headers['Proxy-Authorization'] = self.build_digest_header(r.method, r.url)
r.register_hook('response', self.handle_407)
return r
Usage:
proxies = {
"http" :"192.168.20.130:8080",
"https":"192.168.20.130:8080",
}
auth = HTTPProxyDigestAuth("username", "password")
# HTTP
r = requests.get("http://www.google.co.jp/", proxies=proxies, auth=auth)
r.status_code # 200 OK
# HTTPS
r = requests.get("https://www.google.co.jp/", proxies=proxies, auth=auth)
r.status_code # 200 OK
I've written a Python module (available here) which makes it possible to authenticate with a HTTP proxy using the digest scheme. It works when connecting to HTTPS websites (through monkey patching) and allows to authenticate with the website as well. This should work with latest requests library for both Python 2 and 3.
The following example fetches the webpage https://httpbin.org/ip through HTTP proxy 1.2.3.4:8080 which requires HTTP digest authentication using user name user1 and password password1:
import requests
from requests_digest_proxy import HTTPProxyDigestAuth
s = requests.Session()
s.proxies = {
'http': 'http://1.2.3.4:8080/',
'https': 'http://1.2.3.4:8080/'
}
s.auth = HTTPProxyDigestAuth('user1', 'password1')
print(s.get('https://httpbin.org/ip').text)
Should the website requires some kind of HTTP authentication, this can be specified to HTTPProxyDigestAuth constructor this way:
# HTTP Basic authentication for website
s.auth = HTTPProxyDigestAuth(('user1', 'password1'),
auth=requests.auth.HTTPBasicAuth('user1', 'password0'))
print(s.get('https://httpbin.org/basic-auth/user1/password0').text))
# HTTP Digest authentication for website
s.auth = HTTPProxyDigestAuth(('user1', 'password1'),,
auth=requests.auth.HTTPDigestAuth('user1', 'password0'))
print(s.get('https://httpbin.org/digest-auth/auth/user1/password0').text)
This snippet works for both types of requests (http and https). Tested on the current version of requests (2.23.0).
import re
import requests
from requests.utils import get_auth_from_url
from requests.auth import HTTPDigestAuth
from requests.utils import parse_dict_header
from urllib3.util import parse_url
def get_proxy_autorization_header(proxy, method):
username, password = get_auth_from_url(proxy)
auth = HTTPProxyDigestAuth(username, password)
proxy_url = parse_url(proxy)
proxy_response = requests.request(method, proxy_url, auth=auth)
return proxy_response.request.headers['Proxy-Authorization']
class HTTPSAdapterWithProxyDigestAuth(requests.adapters.HTTPAdapter):
def proxy_headers(self, proxy):
headers = {}
proxy_auth_header = get_proxy_autorization_header(proxy, 'CONNECT')
headers['Proxy-Authorization'] = proxy_auth_header
return headers
class HTTPAdapterWithProxyDigestAuth(requests.adapters.HTTPAdapter):
def proxy_headers(self, proxy):
return {}
def add_headers(self, request, **kwargs):
proxy = kwargs['proxies'].get('http', '')
if proxy:
proxy_auth_header = get_proxy_autorization_header(proxy, request.method)
request.headers['Proxy-Authorization'] = proxy_auth_header
class HTTPProxyDigestAuth(requests.auth.HTTPDigestAuth):
def init_per_thread_state(self):
# Ensure state is initialized just once per-thread
if not hasattr(self._thread_local, 'init'):
self._thread_local.init = True
self._thread_local.last_nonce = ''
self._thread_local.nonce_count = 0
self._thread_local.chal = {}
self._thread_local.pos = None
self._thread_local.num_407_calls = None
def handle_407(self, r, **kwargs):
"""
Takes the given response and tries digest-auth, if needed.
:rtype: requests.Response
"""
# If response is not 407, do not auth
if r.status_code != 407:
self._thread_local.num_407_calls = 1
return r
s_auth = r.headers.get('proxy-authenticate', '')
if 'digest' in s_auth.lower() and self._thread_local.num_407_calls < 2:
self._thread_local.num_407_calls += 1
pat = re.compile(r'digest ', flags=re.IGNORECASE)
self._thread_local.chal = requests.utils.parse_dict_header(
pat.sub('', s_auth, count=1))
# Consume content and release the original connection
# to allow our new request to reuse the same one.
r.content
r.close()
prep = r.request.copy()
requests.cookies.extract_cookies_to_jar(prep._cookies, r.request, r.raw)
prep.prepare_cookies(prep._cookies)
prep.headers['Proxy-Authorization'] = self.build_digest_header(prep.method, prep.url)
_r = r.connection.send(prep, **kwargs)
_r.history.append(r)
_r.request = prep
return _r
self._thread_local.num_407_calls = 1
return r
def __call__(self, r):
# Initialize per-thread state, if needed
self.init_per_thread_state()
# If we have a saved nonce, skip the 407
if self._thread_local.last_nonce:
r.headers['Proxy-Authorization'] = self.build_digest_header(r.method, r.url)
r.register_hook('response', self.handle_407)
self._thread_local.num_407_calls = 1
return r
session = requests.Session()
session.proxies = {
'http': 'http://username:password#proxyhost:proxyport',
'https': 'http://username:password#proxyhost:proxyport'
}
session.trust_env = False
session.mount('http://', HTTPAdapterWithProxyDigestAuth())
session.mount('https://', HTTPSAdapterWithProxyDigestAuth())
response_http = session.get("http://ww3.safestyle-windows.co.uk/the-secret-door/")
print(response_http.status_code)
response_https = session.get("https://stackoverflow.com/questions/13506455/how-to-pass-proxy-authentication-requires-digest-auth-by-using-python-requests")
print(response_https.status_code)
Generally, the problem of proxy autorization is also relevant for other types of authentication (ntlm, kerberos) when connecting using the protocol HTTPS. And despite the large number of issues (since 2013, and maybe there are earlier ones that I did not find):
in requests: Digest Proxy Auth, NTLM Proxy Auth, Kerberos Proxy Auth
in urlib3: NTLM Proxy Auth, NTLM Proxy Auth
and many many others,the problem is still not resolved.
The root of the problem in the function _tunnel of the module httplib(python2)/http.client(python3). In case of unsuccessful connection attempt, it raises an OSError without returning a response code (407 in our case) and additional data needed to build the autorization header. Lukasa gave a explanation here.
As long as there is no solution from maintainers of urllib3 (or requests), we can only use various workarounds (for example, use the approach of #Tey' or do something like this).In my version of workaround, we pre-prepare the necessary authorization data by sending a request to the proxy server and processing the received response.
You can use digest authentication by using requests.auth.HTTPDigestAuth instead of requests.auth.HTTPProxyAuth
For those of you that still end up here, there appears to be a project called requests-toolbelt that has this plus other common but not built in functionality of requests.
https://toolbelt.readthedocs.org/en/latest/authentication.html#httpproxydigestauth
This works for me. Actually, don't know about security of user:password in this soulution:
import requests
import os
http_proxyf = 'http://user:password#proxyip:port'
os.environ["http_proxy"] = http_proxyf
os.environ["https_proxy"] = http_proxyf
sess = requests.Session()
# maybe need sess.trust_env = True
print(sess.get('https://some.org').text)
import requests
import os
# in my case I had to add my local domain
proxies = {
'http': 'proxy.myagency.com:8080',
'https': 'user#localdomain:password#proxy.myagency.com:8080',
}
r=requests.get('https://api.github.com/events', proxies=proxies)
print(r.text)
Here is an answer that is not for http Basic Authentication - for example a transperant proxy within organization.
import requests
url = 'https://someaddress-behindproxy.com'
params = {'apikey': '123456789'} #if you need params
proxies = {'https': 'https://proxyaddress.com:3128'} #or some other port
response = requests.get(url, proxies=proxies, params=params)
I hope this helps someone.
I am using the requests module.
I have figured out how to submit data to a login form on a website and retrieve the session key, but I can't see an obvious way to use this session key in subsequent requests.
Can someone fill in the ellipsis in the code below or suggest another approach?
>>> import requests
>>> login_data = {'formPosted': '1', 'login_email': 'me#example.com', 'password': 'pw'}
>>> r = requests.post('https://localhost/login.py', login_data)
>>>
>>> r.text
'You are being redirected here'
>>> r.cookies
{'session_id_myapp': '127-0-0-1-825ff22a-6ed1-453b-aebc-5d3cf2987065'}
>>>
>>> r2 = requests.get('https://localhost/profile_data.json', ...)
You can easily create a persistent session using:
s = requests.Session()
After that, continue with your requests as you would:
s.post('https://localhost/login.py', login_data)
# logged in! cookies saved for future requests.
r2 = s.get('https://localhost/profile_data.json', ...)
# cookies sent automatically!
# do whatever, s will keep your cookies intact :)
For more about Sessions: https://requests.readthedocs.io/en/latest/user/advanced/#session-objects
the other answers help to understand how to maintain such a session. Additionally, I want to provide a class which keeps the session maintained over different runs of a script (with a cache file). This means a proper "login" is only performed when required (timout or no session exists in cache). Also it supports proxy settings over subsequent calls to 'get' or 'post'.
It is tested with Python3.
Use it as a basis for your own code. The following snippets are release with GPL v3
import pickle
import datetime
import os
from urllib.parse import urlparse
import requests
class MyLoginSession:
"""
a class which handles and saves login sessions. It also keeps track of proxy settings.
It does also maintine a cache-file for restoring session data from earlier
script executions.
"""
def __init__(self,
loginUrl,
loginData,
loginTestUrl,
loginTestString,
sessionFileAppendix = '_session.dat',
maxSessionTimeSeconds = 30 * 60,
proxies = None,
userAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',
debug = True,
forceLogin = False,
**kwargs):
"""
save some information needed to login the session
you'll have to provide 'loginTestString' which will be looked for in the
responses html to make sure, you've properly been logged in
'proxies' is of format { 'https' : 'https://user:pass#server:port', 'http' : ...
'loginData' will be sent as post data (dictionary of id : value).
'maxSessionTimeSeconds' will be used to determine when to re-login.
"""
urlData = urlparse(loginUrl)
self.proxies = proxies
self.loginData = loginData
self.loginUrl = loginUrl
self.loginTestUrl = loginTestUrl
self.maxSessionTime = maxSessionTimeSeconds
self.sessionFile = urlData.netloc + sessionFileAppendix
self.userAgent = userAgent
self.loginTestString = loginTestString
self.debug = debug
self.login(forceLogin, **kwargs)
def modification_date(self, filename):
"""
return last file modification date as datetime object
"""
t = os.path.getmtime(filename)
return datetime.datetime.fromtimestamp(t)
def login(self, forceLogin = False, **kwargs):
"""
login to a session. Try to read last saved session from cache file. If this fails
do proper login. If the last cache access was too old, also perform a proper login.
Always updates session cache file.
"""
wasReadFromCache = False
if self.debug:
print('loading or generating session...')
if os.path.exists(self.sessionFile) and not forceLogin:
time = self.modification_date(self.sessionFile)
# only load if file less than 30 minutes old
lastModification = (datetime.datetime.now() - time).seconds
if lastModification < self.maxSessionTime:
with open(self.sessionFile, "rb") as f:
self.session = pickle.load(f)
wasReadFromCache = True
if self.debug:
print("loaded session from cache (last access %ds ago) "
% lastModification)
if not wasReadFromCache:
self.session = requests.Session()
self.session.headers.update({'user-agent' : self.userAgent})
res = self.session.post(self.loginUrl, data = self.loginData,
proxies = self.proxies, **kwargs)
if self.debug:
print('created new session with login' )
self.saveSessionToCache()
# test login
res = self.session.get(self.loginTestUrl)
if res.text.lower().find(self.loginTestString.lower()) < 0:
raise Exception("could not log into provided site '%s'"
" (did not find successful login string)"
% self.loginUrl)
def saveSessionToCache(self):
"""
save session to a cache file
"""
# always save (to update timeout)
with open(self.sessionFile, "wb") as f:
pickle.dump(self.session, f)
if self.debug:
print('updated session cache-file %s' % self.sessionFile)
def retrieveContent(self, url, method = "get", postData = None, **kwargs):
"""
return the content of the url with respect to the session.
If 'method' is not 'get', the url will be called with 'postData'
as a post request.
"""
if method == 'get':
res = self.session.get(url , proxies = self.proxies, **kwargs)
else:
res = self.session.post(url , data = postData, proxies = self.proxies, **kwargs)
# the session has been updated on the server, so also update in cache
self.saveSessionToCache()
return res
A code snippet for using the above class may look like this:
if __name__ == "__main__":
# proxies = {'https' : 'https://user:pass#server:port',
# 'http' : 'http://user:pass#server:port'}
loginData = {'user' : 'usr',
'password' : 'pwd'}
loginUrl = 'https://...'
loginTestUrl = 'https://...'
successStr = 'Hello Tom'
s = MyLoginSession(loginUrl, loginData, loginTestUrl, successStr,
#proxies = proxies
)
res = s.retrieveContent('https://....')
print(res.text)
# if, for instance, login via JSON values required try this:
s = MyLoginSession(loginUrl, None, loginTestUrl, successStr,
#proxies = proxies,
json = loginData)
Check out my answer in this similar question:
python: urllib2 how to send cookie with urlopen request
import urllib2
import urllib
from cookielib import CookieJar
cj = CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
# input-type values from the html form
formdata = { "username" : username, "password": password, "form-id" : "1234" }
data_encoded = urllib.urlencode(formdata)
response = opener.open("https://page.com/login.php", data_encoded)
content = response.read()
EDIT:
I see I've gotten a few downvotes for my answer, but no explaining comments. I'm guessing it's because I'm referring to the urllib libraries instead of requests. I do that because the OP asks for help with requests or for someone to suggest another approach.
The documentation says that get takes in an optional cookies argument allowing you to specify cookies to use:
from the docs:
>>> url = 'http://httpbin.org/cookies'
>>> cookies = dict(cookies_are='working')
>>> r = requests.get(url, cookies=cookies)
>>> r.text
'{"cookies": {"cookies_are": "working"}}'
http://docs.python-requests.org/en/latest/user/quickstart/#cookies
Upon trying all the answers above, I found that using "RequestsCookieJar" instead of the regular CookieJar for subsequent requests fixed my problem.
import requests
import json
# The Login URL
authUrl = 'https://whatever.com/login'
# The subsequent URL
testUrl = 'https://whatever.com/someEndpoint'
# Logout URL
testlogoutUrl = 'https://whatever.com/logout'
# Whatever you are posting
login_data = {'formPosted':'1',
'login_email':'me#example.com',
'password':'pw'
}
# The Authentication token or any other data that we will receive from the Authentication Request.
token = ''
# Post the login Request
loginRequest = requests.post(authUrl, login_data)
print("{}".format(loginRequest.text))
# Save the request content to your variable. In this case I needed a field called token.
token = str(json.loads(loginRequest.content)['token']) # or ['access_token']
print("{}".format(token))
# Verify Successful login
print("{}".format(loginRequest.status_code))
# Create your Requests Cookie Jar for your subsequent requests and add the cookie
jar = requests.cookies.RequestsCookieJar()
jar.set('LWSSO_COOKIE_KEY', token)
# Execute your next request(s) with the Request Cookie Jar set
r = requests.get(testUrl, cookies=jar)
print("R.TEXT: {}".format(r.text))
print("R.STCD: {}".format(r.status_code))
# Execute your logout request(s) with the Request Cookie Jar set
r = requests.delete(testlogoutUrl, cookies=jar)
print("R.TEXT: {}".format(r.text)) # should show "Request Not Authorized"
print("R.STCD: {}".format(r.status_code)) # should show 401
Save only required cookies and reuse them.
import os
import pickle
from urllib.parse import urljoin, urlparse
login = 'my#email.com'
password = 'secret'
# Assuming two cookies are used for persistent login.
# (Find it by tracing the login process)
persistentCookieNames = ['sessionId', 'profileId']
URL = 'http://example.com'
urlData = urlparse(URL)
cookieFile = urlData.netloc + '.cookie'
signinUrl = urljoin(URL, "/signin")
with requests.Session() as session:
try:
with open(cookieFile, 'rb') as f:
print("Loading cookies...")
session.cookies.update(pickle.load(f))
except Exception:
# If could not load cookies from file, get the new ones by login in
print("Login in...")
post = session.post(
signinUrl,
data={
'email': login,
'password': password,
}
)
try:
with open(cookieFile, 'wb') as f:
jar = requests.cookies.RequestsCookieJar()
for cookie in session.cookies:
if cookie.name in persistentCookieNames:
jar.set_cookie(cookie)
pickle.dump(jar, f)
except Exception as e:
os.remove(cookieFile)
raise(e)
MyPage = urljoin(URL, "/mypage")
page = session.get(MyPage)
snippet to retrieve json data, password protected
import requests
username = "my_user_name"
password = "my_super_secret"
url = "https://www.my_base_url.com"
the_page_i_want = "/my_json_data_page"
session = requests.Session()
# retrieve cookie value
resp = session.get(url+'/login')
csrf_token = resp.cookies['csrftoken']
# login, add referer
resp = session.post(url+"/login",
data={
'username': username,
'password': password,
'csrfmiddlewaretoken': csrf_token,
'next': the_page_i_want,
},
headers=dict(Referer=url+"/login"))
print(resp.json())
This will work for you in Python;
# Call JIRA API with HTTPBasicAuth
import json
import requests
from requests.auth import HTTPBasicAuth
JIRA_EMAIL = "****"
JIRA_TOKEN = "****"
BASE_URL = "https://****.atlassian.net"
API_URL = "/rest/api/3/serverInfo"
API_URL = BASE_URL+API_URL
BASIC_AUTH = HTTPBasicAuth(JIRA_EMAIL, JIRA_TOKEN)
HEADERS = {'Content-Type' : 'application/json;charset=iso-8859-1'}
response = requests.get(
API_URL,
headers=HEADERS,
auth=BASIC_AUTH
)
print(json.dumps(json.loads(response.text), sort_keys=True, indent=4, separators=(",", ": ")))