Create a HTTP object from a string in Python - python

I am having a device which is sending the following http message to my RaspberryPi:
POST /sinvertwebmonitor/InverterService/InverterService.asmx/CollectInverterData HTTP/1.1
Host: www.automation.siemens.com
Content-Type: application/x-www-form-urlencoded
Content-Length: 349
xmlData=<rd><m>xxxxx</m><s>yyyyyy</s><d t="1483019400" l="600"><p i="1">460380AE</p><p i="2">43655DE7</p><p i="3">4212C986</p><p i="4">424805BC</p><p i="5">4604E3D1</p><p i="6">441F616A</p><p i="7">4155E7F5</p><p i="8">E1</p><p i="9">112</p><p i="C">153</p><p i="D">4</p><p i="E">11ABAC</p><p i="F">22A48C</p><p i="10">0</p></d></rd>
I cannot change anything on the device.
On the RaspberryPi im running a script to listen and receive the message from a socket.
This works so far and the received message is the one above.
Now, I would like to create a HTTP object from this message and then extract comfortably the header, content and so on.
Similar to the following example:
r = requests.get('https://www.google.com')
r.status_code
However, without "getting" an url. I just want to read the string I already have.
Pseudo-example:
r = requests.read(hereComesTheString)
r.status_code
I hope the problem became understandable.
Would be glad to get some hints.
Thanks and best regards,
Christoph

You use the status_code property in your example, but what you are receiving is a request not a response. However you can still create a simple object for accessing the data in the request.
It is probably easiest to create your own custom class:
import mimetools
from StringIO import StringIO
request = """POST /sinvertwebmonitor/InverterService/InverterService.asmx/CollectInverterData HTTP/1.1
Host: www.automation.siemens.com
Content-Type: application/x-www-form-urlencoded
Content-Length: 349
xmlData=<rd><m>xxxxx</m><s>yyyyyy</s><d t="1483019400" l="600"><p i="1">460380AE</p><p i="2">43655DE7</p><p i="3">4212C986</p><p i="4">424805BC</p><p i="5">4604E3D1</p><p i="6">441F616A</p><p i="7">4155E7F5</p><p i="8">E1</p><p i="9">112</p><p i="C">153</p><p i="D">4</p><p i="E">11ABAC</p><p i="F">22A48C</p><p i="10">0</p></d></rd>"""
class Request:
def __init__(self, request):
stream = StringIO(request)
request = stream.readline()
words = request.split()
[self.command, self.path, self.version] = words
self.headers = mimetools.Message(stream, 0)
self.content = stream.read()
def __getitem__(self, key):
return self.headers.get(key, '')
r = Request(request)
print(r.command)
print(r.path)
print(r.version)
for header in r.headers:
print(header, r[header])
print(r.content)
This outputs:
POST
/sinvertwebmonitor/InverterService/InverterService.asmx/CollectInverterData
HTTP/1.1
('host', 'www.automation.siemens.com')
('content-type', 'application/x-www-form-urlencoded')
('content-length', '349')
xmlData=<rd><m>xxxxx</m><s>yyyyyy</s><d t="1483019400" l="600"><p i="1">460380AE</p><p i="2">43655DE7</p><p i="3">4212C986</p><p i="4">424805BC</p><p i="5">4604E3D1</p><p i="6">441F616A</p><p i="7">4155E7F5</p><p i="8">E1</p><p i="9">112</p><p i="C">153</p><p i="D">4</p><p i="E">11ABAC</p><p i="F">22A48C</p><p i="10">0</p></d></rd>

If you're using plain socket server, then you need to implement an HTTP server so that you can split the request and respond according to the protocol.
It's probably easier just to use an existing HTTP server and app server. Flask is ideal for this:
from flask import Flask
from flask import request
app = Flask(__name__)
#app.route("/sinvertwebmonitor/InverterService/InverterService.asmx/CollectInverterData", methods=['POST'])
def dataCollector():
data = request.form['xmlData']
print(data)
# parseData. Take a look at ElementTree
if __name__ == "__main__":
app.run(host=0.0.0.0, port=80)

Thanks Alden. Below your code with a few changes so it works with Python3.
import email
from io import StringIO
request = """POST /sinvertwebmonitor/InverterService/InverterService.asmx/CollectInverterData HTTP/1.1
Host: www.automation.siemens.com
Content-Type: application/x-www-form-urlencoded
Content-Length: 349
xmlData=<rd><m>xxxxx</m><s>yyyyyy</s><d t="1483019400" l="600"><p i="1">460380AE</p><p i="2">43655DE7</p><p i="3">4212C986</p><p i="4">424805BC</p><p i="5">4604E3D1</p><p i="6">441F616A</p><p i="7">4155E7F5</p><p i="8">E1</p><p i="9">112</p><p i="C">153</p><p i="D">4</p><p i="E">11ABAC</p><p i="F">22A48C</p><p i="10">0</p></d></rd>"""
class Request:
def __init__(self, request):
stream = StringIO(request)
request = stream.readline()
words = request.split()
[self.command, self.path, self.version] = words
self.headers = email.message_from_string(request)
self.content = stream.read()
def __getitem__(self, key):
return self.headers.get(key, '')
r = Request(request)
print(r.command)
print(r.path)
print(r.version)
for header in r.headers:
print(header, r[header])
print(r.content)

Related

POST method in Python: errno 104

I am trying to query a website in Python. I need to use a POST method (according to what is happening in my browser when I monitor it with the developer tools).
If I query the website with cURL, it works well:
curl -i --data "param1=var1&param2=var2" http://www.test.com
I get this header:
HTTP/1.1 200 OK
Date: Tue, 26 Sep 2017 08:46:18 GMT
Server: Apache/1.3.33 (Unix) mod_gzip/1.3.26.1a mod_fastcgi/2.4.2 PHP/4.3.11
Transfer-Encoding: chunked
Content-Type: text/html
But when I do it in Python 3, I get an error 104.
Here is what I tried so far. First, with urllib (getting inspiration from this thread to manage to use a POST method instead of GET):
import re
from urllib import request as ur
# URL to handle request
url = "http://www.test.com"
data = "param1=var1&param2=var2"
# Build a request dictionary
preq = [re.findall("[^=]+", i) for i in re.findall("[^\&]+", data)]
dreq = {i[0]: i[1] if len(i) == 2 else "" for i in preq}
# Initiate request & add method
ureq = ur.Request(url)
ureq.get_method = lambda: "POST"
# Send request
req = ur.urlopen(ureq, data=str(dreq).encode())
I did basically the same with requests:
import re
import requests
# URL to handle request
url = "http://www.test.com"
data = "param1=var1&param2=var2"
# Build a request dictionary
preq = [re.findall("[^=]+", i) for i in re.findall("[^\&]+", data)]
dreq = {i[0]: i[1] if len(i) == 2 else "" for i in preq}
# Initiate request & add method
req = requests.post(url, data=dreq)
In both cases, I get a HTTP 104 error:
ConnectionResetError: [Errno 104] Connection reset by peer
That I don't understand since the same request is working with cURL. I guess I misunderstood something with Python request but so far I'm stuck. Any hint would be appreciated!
I've just figured out I did not pass data in the right format. I thought it needed to be store in a dict; that is not the case and it is therefore much more simple that what I tried previously.
With urllib:
req = ur.urlopen(ureq, data=str(data).encode())
With requests:
req = requests.post(url, data=data)

Flask test doesn't populate request.authorization when username and password are URL encoded

I'm trying to use the following test:
def post_webhook(self, payload, **kwargs):
webhook_username = 'test'
webhook_password = 'test'
webhook_url = 'http://{}:{}#localhost:8000/webhook_receive'
webhook_receive = self.app.post(
webhook_url.format(webhook_username, webhook_password),
referrer='http://localhost:8000',
json=payload)
return webhook_receive.status_code
However the main issue is request.authorization is None. Though if I launch the server and use curl -X POST <webhook_url> or requests.post(<webhook_url>), then request.authorization is properly populated.
Trying to figure out the main issue of how to fix this problem.
Using the snippet code and the Flask Test Client, the next pytest code works for me. The way I send the HTTP Basic Auth is the same way curl and HTTPie send it; in the Authorization header with the user and password encoded in base64.
import base64
from app import app
def test_secret_endpoint():
client = app.test_client()
# testing a route without authentication required
rv = client.get('/')
assert rv.status_code == 200
# testing a secured route without credentials
rv = client.post('/secret-page')
assert rv.status_code == 401
# testing a secured route with valid credentials
value = base64.encodestring('admin:secret').replace('\n', '')
headers = {'Authorization': 'Basic {}'.format(value)}
rv = client.post('/secret-page', headers=headers)
assert rv.status_code == 200
assert 'This is secret' in rv.data
The route definitions are:
#app.route('/')
def index():
return 'Hello World :)'
#app.route('/secret-page', methods=['POST'])
#requires_auth
def secret_page():
return 'This is secret'
The request header sending the credentials looks something like this:
POST /secret-page HTTP/1.1
Accept: */*
Authorization: Basic YWRtaW46c2VjcmV0
Connection: keep-alive
Content-Length: 0
Host: localhost:5000
...

Python POST binary data

I am writing some code to interface with redmine and I need to upload some files as part of the process, but I am not sure how to do a POST request from python containing a binary file.
I am trying to mimic the commands here:
curl --data-binary "#image.png" -H "Content-Type: application/octet-stream" -X POST -u login:password http://redmine/uploads.xml
In python (below), but it does not seem to work. I am not sure if the problem is somehow related to encoding the file or if something is wrong with the headers.
import urllib2, os
FilePath = "C:\somefolder\somefile.7z"
FileData = open(FilePath, "rb")
length = os.path.getsize(FilePath)
password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
password_manager.add_password(None, 'http://redmine/', 'admin', 'admin')
auth_handler = urllib2.HTTPBasicAuthHandler(password_manager)
opener = urllib2.build_opener(auth_handler)
urllib2.install_opener(opener)
request = urllib2.Request( r'http://redmine/uploads.xml', FileData)
request.add_header('Content-Length', '%d' % length)
request.add_header('Content-Type', 'application/octet-stream')
try:
response = urllib2.urlopen( request)
print response.read()
except urllib2.HTTPError as e:
error_message = e.read()
print error_message
I have access to the server and it looks like a encoding error:
...
invalid byte sequence in UTF-8
Line: 1
Position: 624
Last 80 unconsumed characters:
7z¼¯'ÅÐз2^Ôøë4g¸R<süðí6kĤª¶!»=}jcdjSPúá-º#»ÄAtD»H7Ê!æ½]j):
(further down)
Started POST "/uploads.xml" for 192.168.0.117 at 2013-01-16 09:57:49 -0800
Processing by AttachmentsController#upload as XML
WARNING: Can't verify CSRF token authenticity
Current user: anonymous
Filter chain halted as :authorize_global rendered or redirected
Completed 401 Unauthorized in 13ms (ActiveRecord: 3.1ms)
Basically what you do is correct. Looking at redmine docs you linked to, it seems that suffix after the dot in the url denotes type of posted data (.json for JSON, .xml for XML), which agrees with the response you get - Processing by AttachmentsController#upload as XML. I guess maybe there's a bug in docs and to post binary data you should try using http://redmine/uploads url instead of http://redmine/uploads.xml.
Btw, I highly recommend very good and very popular Requests library for http in Python. It's much better than what's in the standard lib (urllib2). It supports authentication as well but I skipped it for brevity here.
import requests
with open('./x.png', 'rb') as f:
data = f.read()
res = requests.post(url='http://httpbin.org/post',
data=data,
headers={'Content-Type': 'application/octet-stream'})
# let's check if what we sent is what we intended to send...
import json
import base64
assert base64.b64decode(res.json()['data'][len('data:application/octet-stream;base64,'):]) == data
UPDATE
To find out why this works with Requests but not with urllib2 we have to examine the difference in what's being sent. To see this I'm sending traffic to http proxy (Fiddler) running on port 8888:
Using Requests
import requests
data = 'test data'
res = requests.post(url='http://localhost:8888',
data=data,
headers={'Content-Type': 'application/octet-stream'})
we see
POST http://localhost:8888/ HTTP/1.1
Host: localhost:8888
Content-Length: 9
Content-Type: application/octet-stream
Accept-Encoding: gzip, deflate, compress
Accept: */*
User-Agent: python-requests/1.0.4 CPython/2.7.3 Windows/Vista
test data
and using urllib2
import urllib2
data = 'test data'
req = urllib2.Request('http://localhost:8888', data)
req.add_header('Content-Length', '%d' % len(data))
req.add_header('Content-Type', 'application/octet-stream')
res = urllib2.urlopen(req)
we get
POST http://localhost:8888/ HTTP/1.1
Accept-Encoding: identity
Content-Length: 9
Host: localhost:8888
Content-Type: application/octet-stream
Connection: close
User-Agent: Python-urllib/2.7
test data
I don't see any differences which would warrant different behavior you observe. Having said that it's not uncommon for http servers to inspect User-Agent header and vary behavior based on its value. Try to change headers sent by Requests one by one making them the same as those being sent by urllib2 and see when it stops working.
This has nothing to do with a malformed upload. The HTTP error clearly specifies 401 unauthorized, and tells you the CSRF token is invalid. Try sending a valid CSRF token with the upload.
More about csrf tokens here:
What is a CSRF token ? What is its importance and how does it work?
you need to add Content-Disposition header, smth like this (although I used mod-python here, but principle should be the same):
request.headers_out['Content-Disposition'] = 'attachment; filename=%s' % myfname
You can use unirest, It provides easy method to post request.
`
import unirest
def callback(response):
print "code:"+ str(response.code)
print "******************"
print "headers:"+ str(response.headers)
print "******************"
print "body:"+ str(response.body)
print "******************"
print "raw_body:"+ str(response.raw_body)
# consume async post request
def consumePOSTRequestASync():
params = {'test1':'param1','test2':'param2'}
# we need to pass a dummy variable which is open method
# actually unirest does not provide variable to shift between
# application-x-www-form-urlencoded and
# multipart/form-data
params['dummy'] = open('dummy.txt', 'r')
url = 'http://httpbin.org/post'
headers = {"Accept": "application/json"}
# call get service with headers and params
unirest.post(url, headers = headers,params = params, callback = callback)
# post async request multipart/form-data
consumePOSTRequestASync()

How to construct a webob.Request or a WSGI 'environ' dict from raw HTTP request byte stream?

Suppose I have a byte stream with the following in it:
POST /mum/ble?q=huh
Content-Length: 18
Content-Type: application/json; charset="utf-8"
Host: localhost:80
["do", "re", "mi"]
Is there a way to produce an WSGI-style 'environ' dict from it?
Hopefully, I've overlooked an easy answer, and it is as easy to achieve as the opposite operation.
Consider:
>>> import json
>>> from webob import Request
>>> r = Request.blank('/mum/ble?q=huh')
>>> r.method = 'POST'
>>> r.content_type = 'application/json'
>>> r.charset = 'utf-8'
>>> r.body = json.dumps(['do', 're', 'mi'])
>>> print str(r) # Request's __str__ method gives raw HTTP bytes back!
POST /mum/ble?q=huh
Content-Length: 18
Content-Type: application/json; charset="utf-8"
Host: localhost:80
["do", "re", "mi"]
Reusing Python's standard library code for the purpose is a bit tricky (it was not designed to be reused that way!-), but should be doable, e.g:
import cStringIO
from wsgiref import simple_server, util
input_string = """POST /mum/ble?q=huh HTTP/1.0
Content-Length: 18
Content-Type: application/json; charset="utf-8"
Host: localhost:80
["do", "re", "mi"]
"""
class FakeHandler(simple_server.WSGIRequestHandler):
def __init__(self, rfile):
self.rfile = rfile
self.wfile = cStringIO.StringIO() # for error msgs
self.server = self
self.base_environ = {}
self.client_address = ['?', 80]
self.raw_requestline = self.rfile.readline()
self.parse_request()
def getenv(self):
env = self.get_environ()
util.setup_testing_defaults(env)
env['wsgi.input'] = self.rfile
return env
handler = FakeHandler(rfile=cStringIO.StringIO(input_string))
wsgi_env = handler.getenv()
print wsgi_env
Basically, we need to subclass the request handler to fake out the construction process that's normally performed for it by the server (rfile and wfile built from the socket to the client, and so on). This isn't quite complete, I think, but should be close and I hope it proves helpful!
Note that I've also fixed your example HTTP request: without an HTTP/1.0 or 1.1 at the end of the raw request line, a POST is considered ill-formed and causes an exception and a resulting error message on handler.wfile.

How do you get default headers in a urllib2 Request?

I have a Python web client that uses urllib2. It is easy enough to add HTTP headers to my outgoing requests. I just create a dictionary of the headers I want to add, and pass it to the Request initializer.
However, other "standard" HTTP headers get added to the request as well as the custom ones I explicitly add. When I sniff the request using Wireshark, I see headers besides the ones I add myself. My question is how do a I get access to these headers? I want to log every request (including the full set of HTTP headers), and can't figure out how.
any pointers?
in a nutshell: How do I get all the outgoing headers from an HTTP request created by urllib2?
If you want to see the literal HTTP request that is sent out, and therefore see every last header exactly as it is represented on the wire, then you can tell urllib2 to use your own version of an HTTPHandler that prints out (or saves, or whatever) the outgoing HTTP request.
import httplib, urllib2
class MyHTTPConnection(httplib.HTTPConnection):
def send(self, s):
print s # or save them, or whatever!
httplib.HTTPConnection.send(self, s)
class MyHTTPHandler(urllib2.HTTPHandler):
def http_open(self, req):
return self.do_open(MyHTTPConnection, req)
opener = urllib2.build_opener(MyHTTPHandler)
response = opener.open('http://www.google.com/')
The result of running this code is:
GET / HTTP/1.1
Accept-Encoding: identity
Host: www.google.com
Connection: close
User-Agent: Python-urllib/2.6
The urllib2 library uses OpenerDirector objects to handle the actual opening. Fortunately, the python library provides defaults so you don't have to. It is, however, these OpenerDirector objects that are adding the extra headers.
To see what they are after the request has been sent (so that you can log it, for example):
req = urllib2.Request(url='http://google.com')
response = urllib2.urlopen(req)
print req.unredirected_hdrs
(produces {'Host': 'google.com', 'User-agent': 'Python-urllib/2.5'} etc)
The unredirected_hdrs is where the OpenerDirectors dump their extra headers. Simply looking at req.headers will show only your own headers - the library leaves those unmolested for you.
If you need to see the headers before you send the request, you'll need to subclass the OpenerDirector in order to intercept the transmission.
Hope that helps.
EDIT: I forgot to mention that, once the request as been sent, req.header_items() will give you a list of tuples of ALL the headers, with both your own and the ones added by the OpenerDirector. I should have mentioned this first since it's the most straightforward :-) Sorry.
EDIT 2: After your question about an example for defining your own handler, here's the sample I came up with. The concern in any monkeying with the request chain is that we need to be sure that the handler is safe for multiple requests, which is why I'm uncomfortable just replacing the definition of putheader on the HTTPConnection class directly.
Sadly, because the internals of HTTPConnection and the AbstractHTTPHandler are very internal, we have to reproduce much of the code from the python library to inject our custom behaviour. Assuming I've not goofed below and this works as well as it did in my 5 minutes of testing, please be careful to revisit this override if you update your Python version to a revision number (ie: 2.5.x to 2.5.y or 2.5 to 2.6, etc).
I should therefore mention that I am on Python 2.5.1. If you have 2.6 or, particularly, 3.0, you may need to adjust this accordingly.
Please let me know if this doesn't work. I'm having waaaayyyy too much fun with this question:
import urllib2
import httplib
import socket
class CustomHTTPConnection(httplib.HTTPConnection):
def __init__(self, *args, **kwargs):
httplib.HTTPConnection.__init__(self, *args, **kwargs)
self.stored_headers = []
def putheader(self, header, value):
self.stored_headers.append((header, value))
httplib.HTTPConnection.putheader(self, header, value)
class HTTPCaptureHeaderHandler(urllib2.AbstractHTTPHandler):
def http_open(self, req):
return self.do_open(CustomHTTPConnection, req)
http_request = urllib2.AbstractHTTPHandler.do_request_
def do_open(self, http_class, req):
# All code here lifted directly from the python library
host = req.get_host()
if not host:
raise URLError('no host given')
h = http_class(host) # will parse host:port
h.set_debuglevel(self._debuglevel)
headers = dict(req.headers)
headers.update(req.unredirected_hdrs)
headers["Connection"] = "close"
headers = dict(
(name.title(), val) for name, val in headers.items())
try:
h.request(req.get_method(), req.get_selector(), req.data, headers)
r = h.getresponse()
except socket.error, err: # XXX what error?
raise urllib2.URLError(err)
r.recv = r.read
fp = socket._fileobject(r, close=True)
resp = urllib2.addinfourl(fp, r.msg, req.get_full_url())
resp.code = r.status
resp.msg = r.reason
# This is the line we're adding
req.all_sent_headers = h.stored_headers
return resp
my_handler = HTTPCaptureHeaderHandler()
opener = urllib2.OpenerDirector()
opener.add_handler(my_handler)
req = urllib2.Request(url='http://www.google.com')
resp = opener.open(req)
print req.all_sent_headers
shows: [('Accept-Encoding', 'identity'), ('Host', 'www.google.com'), ('Connection', 'close'), ('User-Agent', 'Python-urllib/2.5')]
How about something like this:
import urllib2
import httplib
old_putheader = httplib.HTTPConnection.putheader
def putheader(self, header, value):
print header, value
old_putheader(self, header, value)
httplib.HTTPConnection.putheader = putheader
urllib2.urlopen('http://www.google.com')
A low-level solution:
import httplib
class HTTPConnection2(httplib.HTTPConnection):
def __init__(self, *args, **kwargs):
httplib.HTTPConnection.__init__(self, *args, **kwargs)
self._request_headers = []
self._request_header = None
def putheader(self, header, value):
self._request_headers.append((header, value))
httplib.HTTPConnection.putheader(self, header, value)
def send(self, s):
self._request_header = s
httplib.HTTPConnection.send(self, s)
def getresponse(self, *args, **kwargs):
response = httplib.HTTPConnection.getresponse(self, *args, **kwargs)
response.request_headers = self._request_headers
response.request_header = self._request_header
return response
Example:
conn = HTTPConnection2("www.python.org")
conn.request("GET", "/index.html", headers={
"User-agent": "test",
"Referer": "/",
})
response = conn.getresponse()
response.status, response.reason:
1: 200 OK
response.request_headers:
[('Host', 'www.python.org'), ('Accept-Encoding', 'identity'), ('Referer', '/'), ('User-agent', 'test')]
response.request_header:
GET /index.html HTTP/1.1
Host: www.python.org
Accept-Encoding: identity
Referer: /
User-agent: test
A other solution, witch used the idea from How do you get default headers in a urllib2 Request? But doesn't copy code from std-lib:
class HTTPConnection2(httplib.HTTPConnection):
"""
Like httplib.HTTPConnection but stores the request headers.
Used in HTTPConnection3(), see below.
"""
def __init__(self, *args, **kwargs):
httplib.HTTPConnection.__init__(self, *args, **kwargs)
self.request_headers = []
self.request_header = ""
def putheader(self, header, value):
self.request_headers.append((header, value))
httplib.HTTPConnection.putheader(self, header, value)
def send(self, s):
self.request_header = s
httplib.HTTPConnection.send(self, s)
class HTTPConnection3(object):
"""
Wrapper around HTTPConnection2
Used in HTTPHandler2(), see below.
"""
def __call__(self, *args, **kwargs):
"""
instance made in urllib2.HTTPHandler.do_open()
"""
self._conn = HTTPConnection2(*args, **kwargs)
self.request_headers = self._conn.request_headers
self.request_header = self._conn.request_header
return self
def __getattribute__(self, name):
"""
Redirect attribute access to the local HTTPConnection() instance.
"""
if name == "_conn":
return object.__getattribute__(self, name)
else:
return getattr(self._conn, name)
class HTTPHandler2(urllib2.HTTPHandler):
"""
A HTTPHandler which stores the request headers.
Used HTTPConnection3, see above.
>>> opener = urllib2.build_opener(HTTPHandler2)
>>> opener.addheaders = [("User-agent", "Python test")]
>>> response = opener.open('http://www.python.org/')
Get the request headers as a list build with HTTPConnection.putheader():
>>> response.request_headers
[('Accept-Encoding', 'identity'), ('Host', 'www.python.org'), ('Connection', 'close'), ('User-Agent', 'Python test')]
>>> response.request_header
'GET / HTTP/1.1\\r\\nAccept-Encoding: identity\\r\\nHost: www.python.org\\r\\nConnection: close\\r\\nUser-Agent: Python test\\r\\n\\r\\n'
"""
def http_open(self, req):
conn_instance = HTTPConnection3()
response = self.do_open(conn_instance, req)
response.request_headers = conn_instance.request_headers
response.request_header = conn_instance.request_header
return response
EDIT: Update the source
see urllib2.py:do_request (line 1044 (1067)) and urllib2.py:do_open (line 1073)
(line 293) self.addheaders = [('User-agent', client_version)] (only 'User-agent' added)
It sounds to me like you're looking for the headers of the response object, which include Connection: close, etc. These headers live in the object returned by urlopen. Getting at them is easy enough:
from urllib2 import urlopen
req = urlopen("http://www.google.com")
print req.headers.headers
req.headers is a instance of httplib.HTTPMessage
It should send the default http headers (as specified by w3.org) alongside the ones you specify. You can use a tool like WireShark if you would like to see them in their entirety.
Edit:
If you would like to log them, you can use WinPcap to capture packets sent by specific applications (in your case, python). You can also specify the type of packets and many other details.
-John

Categories

Resources