How can I unit test django messages? - python

In my django application, I'm trying to write a unit test that performs an action and then checks the messages in the response.
As far as I can tell, there is no nice way of doing this.
I'm using the CookieStorage storage method, and I'd like to do something similar to the following:
response = self.client.post('/do-something/', follow=True)
self.assertEquals(response.context['messages'][0], "fail.")
The problem is, all I get back is a
print response.context['messages']
<django.contrib.messages.storage.cookie.CookieStorage object at 0x3c55250>
How can I turn this into something useful, or am I doing it all wrong?
Thanks,
Daniel

I found a really easy approach:
response = self.client.post('/foo/')
messages = list(response.context['messages'])
self.assertEqual(len(messages), 1)
self.assertEqual(str(messages[0]), 'my message')
If you need to check for messages on a response that has no context you can use the following:
from django.contrib.messages import get_messages
messages = list(get_messages(response.wsgi_request))
self.assertEqual(len(messages), 1)
self.assertEqual(str(messages[0]), 'my message')
The fallback storage doesn't support indexing, however it is an iterable.

From django documentation:
Outside of templates, you can use get_messages()
So, you could write something like:
from django.contrib.messages import get_messages
[...]
messages = [m.message for m in get_messages(response.wsgi_request)]
self.assertIn('My message', messages)

This works for me (displays all messages):
print [m.message for m in list(response.context['messages'])]
Also here are a couple of utility methods I have in a test class inherited from Django's TestCase. If you'd prefer to have them as functions, remove the self arguments and replace self.fail()'s with a raise.
def assert_message_count(self, response, expect_num):
"""
Asserts that exactly the given number of messages have been sent.
"""
actual_num = len(response.context['messages'])
if actual_num != expect_num:
self.fail('Message count was %d, expected %d' %
(actual_num, expect_num))
def assert_message_contains(self, response, text, level=None):
"""
Asserts that there is exactly one message containing the given text.
"""
messages = response.context['messages']
matches = [m for m in messages if text in m.message]
if len(matches) == 1:
msg = matches[0]
if level is not None and msg.level != level:
self.fail('There was one matching message but with different'
'level: %s != %s' % (msg.level, level))
return
elif len(matches) == 0:
messages_str = ", ".join('"%s"' % m for m in messages)
self.fail('No message contained text "%s", messages were: %s' %
(text, messages_str))
else:
self.fail('Multiple messages contained text "%s": %s' %
(text, ", ".join(('"%s"' % m) for m in matches)))
def assert_message_not_contains(self, response, text):
""" Assert that no message contains the given text. """
messages = response.context['messages']
matches = [m for m in messages if text in m.message]
if len(matches) > 0:
self.fail('Message(s) contained text "%s": %s' %
(text, ", ".join(('"%s"' % m) for m in matches)))

Update
My original answer was written when django was still 1.1 or so. This answer is no longer relevant. See #daveoncode's answer for a better solution.
Original Answer
I did an experiment to test this. I changed the MESSAGE_STORAGE setting in one of my projects to 'django.contrib.messages.storage.cookie.CookieStorage' and executed a test that I had written to check for messages. It worked.
The key difference from what you were doing is the way I retrieved messages. See below:
def test_message_sending(self):
data = dict(...)
response = self.client.post(reverse('my_view'), data)
messages = self.user.get_and_delete_messages()
self.assertTrue(messages)
self.assertEqual('Hey there!', messages[0])
This may be worth a try.

I created a python class to simplify messages testing :
class TestMessageCase(TestCase):
""" class inherited from TestCase to add a function to test messages response """
def assertMessageContains(self, response, messages_list, debug=False):
"""
Function to test messages returned by the response view
:param response: The response of the view
:param messages_list: An ordered list of messages to test
:type messages_list: list of string
:param debug: Show all response messages
:type debug: bool
"""
response_messages = list(response.context['messages'])
if debug:
print(
" ---------------------------------------------------------------------\n",
"| DEBUG MESSAGES RETURNED BY THE RESPONSE |\n",
"---------------------------------------------------------------------"
)
for i in range(len(response_messages)):
print(f"Message n°{i + 1} :\n{response_messages[i]}\n\n")
print(
" ---------------------------------------------------------------------\n",
"| END DEBUG |\n",
"---------------------------------------------------------------------"
)
self.assertEqual(len(response_messages), len(messages_list))
for i in range(len(response_messages)):
self.assertEqual(str(response_messages[i]), messages_list[i])
And in test function :
response = self.client.post('/foo/')
self.assertMessageContains(response, ["Foo"])

Simpler version of the stalemate one:
class TestCaseMessagesMixture(object):
def assertMessageCount(self, response, expect_num):
"""
Asserts that exactly the given number of messages have been sent.
"""
actual_num = len(response.context['messages'])
if actual_num != expect_num:
self.fail('Message count was %d, expected %d' %
(actual_num, expect_num)
)
def assertMessageEqual(self, response, text):
"""
Asserts that the response includes the message text.
"""
messages = [m.message for m in response.context['messages']]
if text not in messages:
self.fail(
'No message with text "%s", messages were: %s' %
(text, messages)
)
def assertMessageNotEqual(self, response, text):
"""
Asserts that the response does not include the message text.
"""
messages = [m.message for m in response.context['messages']]
if text in messages:
self.fail(
'Message with text "%s" found, messages were: %s' %
(text, messages)
)

Test helpers for validation of response messages count and content
def get_response_messages(self, response):
from django.contrib.messages import get_messages
return list(get_messages(response.wsgi_request))
def check_response_messages(self, response, message_index=None, message_value=None, exp_count=None):
messages = self.get_response_messages(response)
if exp_count is not None:
self.assertEqual(len(messages), exp_count)
if message_index is not None:
message = messages[message_index]
self.assertIn(message_value, str(message))
Can be used like this
message_value = "You can not switch to another type of account"
self.check_response_messages(response, exp_count=1, message_index=0, message_value=message_value)

Related

Parse Email Reply from Thread in Zapier

I'm looking at parsing out just the most recent reply/message from an email thread as part of a zap.
I've found this link but how to I use it within a Zap? https://github.com/zapier/email-reply-parser
i.e. when I pick up a thread from gmail how do I just extract the most recent message?
Is this possible in Code by Zapier and if so how?
E.g.
Input:
Yes that is fine, I will email you in the morning.
On Fri, Nov 16, 2012 at 1:48 PM, Zapier wrote:
Our support team just commented on your open Ticket:
"Hi Royce, can we chat in the morning about your question?"
Ouput: i.e. the parsed email:
Yes that is fine, I will email you in the morning.
First off: it's not possible to use that in a code step directly. Python code steps don't have access to external packages.
That said, that package is just Python code, and there's nothing stopping you copying all of the important code into the Code step and using it that way.
It's worth noting that the linked code is pretty old and looks to be unmaintained, so it's unlikely to work without modifications.
I had a go at adapting this https://github.com/zapier/email-reply-parser which seemed to work as well.
"""
email_reply_parser is a python library port of GitHub's Email Reply Parser.
For more information, visit https://github.com/zapier/email-reply-parser
"""
import re
class EmailReplyParser(object):
""" Represents a email message that is parsed.
"""
#staticmethod
def read(text):
""" Factory method that splits email into list of fragments
text - A string email body
Returns an EmailMessage instance
"""
return EmailMessage(text).read()
#staticmethod
def parse_reply(text):
""" Provides the reply portion of email.
text - A string email body
Returns reply body message
"""
return EmailReplyParser.read(text).reply
class EmailMessage(object):
""" An email message represents a parsed email body.
"""
SIG_REGEX = re.compile(r'(--|__|-\w)|(^Sent from my (\w+\s*){1,3})')
QUOTE_HDR_REGEX = re.compile('On.*wrote:$')
QUOTED_REGEX = re.compile(r'(>+)')
HEADER_REGEX = re.compile(r'^\*?(From|Sent|To|Subject):\*? .+')
_MULTI_QUOTE_HDR_REGEX = r'(?!On.*On\s.+?wrote:)(On\s(.+?)wrote:)'
MULTI_QUOTE_HDR_REGEX = re.compile(_MULTI_QUOTE_HDR_REGEX, re.DOTALL | re.MULTILINE)
MULTI_QUOTE_HDR_REGEX_MULTILINE = re.compile(_MULTI_QUOTE_HDR_REGEX, re.DOTALL)
def __init__(self, text):
self.fragments = []
self.fragment = None
self.text = text.replace('\r\n', '\n')
self.found_visible = False
def read(self):
""" Creates new fragment for each line
and labels as a signature, quote, or hidden.
Returns EmailMessage instance
"""
self.found_visible = False
is_multi_quote_header = self.MULTI_QUOTE_HDR_REGEX_MULTILINE.search(self.text)
if is_multi_quote_header:
self.text = self.MULTI_QUOTE_HDR_REGEX.sub(is_multi_quote_header.groups()[0].replace('\n', ''), self.text)
# Fix any outlook style replies, with the reply immediately above the signature boundary line
# See email_2_2.txt for an example
self.text = re.sub('([^\n])(?=\n ?[_-]{7,})', '\\1\n', self.text, re.MULTILINE)
self.lines = self.text.split('\n')
self.lines.reverse()
for line in self.lines:
self._scan_line(line)
self._finish_fragment()
self.fragments.reverse()
return self
#property
def reply(self):
""" Captures reply message within email
"""
reply = []
for f in self.fragments:
if not (f.hidden or f.quoted):
reply.append(f.content)
return '\n'.join(reply)
def _scan_line(self, line):
""" Reviews each line in email message and determines fragment type
line - a row of text from an email message
"""
is_quote_header = self.QUOTE_HDR_REGEX.match(line) is not None
is_quoted = self.QUOTED_REGEX.match(line) is not None
is_header = is_quote_header or self.HEADER_REGEX.match(line) is not None
if self.fragment and len(line.strip()) == 0:
if self.SIG_REGEX.match(self.fragment.lines[-1].strip()):
self.fragment.signature = True
self._finish_fragment()
if self.fragment \
and ((self.fragment.headers == is_header and self.fragment.quoted == is_quoted) or
(self.fragment.quoted and (is_quote_header or len(line.strip()) == 0))):
self.fragment.lines.append(line)
else:
self._finish_fragment()
self.fragment = Fragment(is_quoted, line, headers=is_header)
def quote_header(self, line):
""" Determines whether line is part of a quoted area
line - a row of the email message
Returns True or False
"""
return self.QUOTE_HDR_REGEX.match(line[::-1]) is not None
def _finish_fragment(self):
""" Creates fragment
"""
if self.fragment:
self.fragment.finish()
if self.fragment.headers:
# Regardless of what's been seen to this point, if we encounter a headers fragment,
# all the previous fragments should be marked hidden and found_visible set to False.
self.found_visible = False
for f in self.fragments:
f.hidden = True
if not self.found_visible:
if self.fragment.quoted \
or self.fragment.headers \
or self.fragment.signature \
or (len(self.fragment.content.strip()) == 0):
self.fragment.hidden = True
else:
self.found_visible = True
self.fragments.append(self.fragment)
self.fragment = None
class Fragment(object):
""" A Fragment is a part of
an Email Message, labeling each part.
"""
def __init__(self, quoted, first_line, headers=False):
self.signature = False
self.headers = headers
self.hidden = False
self.quoted = quoted
self._content = None
self.lines = [first_line]
def finish(self):
""" Creates block of content with lines
belonging to fragment.
"""
self.lines.reverse()
self._content = '\n'.join(self.lines)
self.lines = None
#property
def content(self):
return self._content.strip()
return {'emailstring': EmailReplyParser.parse_reply(input_data['body'])}

Why are my ZenDesk macros being updated, but no change actually going through?

I was trying to bulk edit the signature of my personal macros on ZenDesk, and the only way to do that is via the API. So I wrote this quick Python script to try to do it:
import sys
import time
import logging
import requests
import re
start_time = time.time()
# Set up logging
logger = logging.getLogger()
log_handler = logging.StreamHandler(sys.stdout)
log_handler.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s - %(funcName)s - line %(lineno)d"))
log_handler.setLevel(logging.DEBUG)
logger.addHandler(log_handler)
logger.setLevel(logging.DEBUG)
def doTheGet(url, user, pwd):
response = requests.get(url, auth=(user + "/token", pwd))
if response.status_code != 200:
logger.error("Status: %s (%s) Problem with the request. Exiting. %f seconds elapsed" % (response.status_code, response.reason, time.time() - start_time))
exit()
data = response.json()
return data
def doThePut(url, updated_data, user, pwd):
response = requests.put(url, json="{'macro': {'actions': %r}}" % updated_data, headers={"Content-Type": "application/json"}, auth=(user + "/token", pwd))
if response.status_code != 200:
logger.error("Status: %s (%s) Problem with the request. Exiting. %f seconds elapsed" % (response.status_code, response.reason, time.time() - start_time))
exit()
data = response.json()
return data
def getMacros():
macros = {}
data = doTheGet("https://mydomain.zendesk.com/api/v2/macros.json", "me#mydomain.com", "111tokenZZZ")
def getMacros(macro_list, page, page_count):
if not page:
for macro in macro_list:
if macro["restriction"] and macro["active"]:
if macro["restriction"]["type"] == "User":
macros[macro["id"]] = macro["actions"]
else:
for macro in macro_list:
if macro["restriction"] and macro["active"]:
if macro["restriction"]["type"] == "User":
macros[macro["id"]] = macro["actions"]
page_count += 1
new_data = doTheGet(page, "me#mydomain.com", "111tokenZZZ")
new_macs = new_data["macros"]
new_next_page = new_data["next_page"]
getMacros(new_macs, new_next_page, page_count)
macs = data["macros"]
current_page = 1
next_page = data["next_page"]
getMacros(macs, next_page, current_page)
return macros
def updateMacros():
macros = getMacros()
regular = "RegEx to match signature to be replaced$" #since some macros already have the updated signature
for macro in macros:
for action in macros[macro]:
if action["field"] == "comment_value":
if re.search(regular, action["value"][1]):
ind = action["value"][1].rfind("\n")
action["value"][1] = action["value"][1][:ind] + "\nNew signature"
return macros
macs = updateMacros()
for mac in macs:
doThePut("https://mydomain.zendesk.com/api/v2/macros/%d.json" % (mac), macs[mac], "me#mydomain.com", "111tokenZZZ")
Now, everything's running as expected, and I get no errors. When I go to my macros on ZenDesk and sort them by last updated, I do see that the script did something, since they show as being last updated today. However, nothing changes on them. I made sure the data I'm sending over is edited (updateMacros is doing its job). I made sure the requests send back an OK response. So I'm sending updated data, getting back a 200 response, but the response sent back shows me the macros as they were before, with zero changes.
The only thing that occurs to me as maybe being wrong in some way is the format of the data I'm sending over, or something of the sort. But even then, I'd expect the response to not be a 200, then...
What am I missing here?
Looks like you're double-encoding the JSON data in your PUT request:
response = requests.put(url, json="{'macro': {'actions': %r}}" % updated_data, headers={"Content-Type": "application/json"}, auth=(user + "/token", pwd))
The json parameter expects an object, which it then dutifully encodes as JSON and sends as the body of the request; this is merely a convenience; the implementation is simply,
if not data and json is not None:
# urllib3 requires a bytes-like body. Python 2's json.dumps
# provides this natively, but Python 3 gives a Unicode string.
content_type = 'application/json'
body = complexjson.dumps(json)
if not isinstance(body, bytes):
body = body.encode('utf-8')
(source: https://github.com/kennethreitz/requests/blob/master/requests/models.py#L424)
Since the value is always passed through json.dumps(), if you pass a string representing already-encoded JSON it will itself be encoded:
"{\'macro\': {\'actions\': [{\'field\': \'comment_value\', \'value\': [\'channel:all\', \'Spiffy New Sig that will Never Be Saved\']}]}}"
ZenDesk, upon being given JSON it doesn't expect, updates the updated_at field and... Does nothing else. You can verify this by passing an empty string - same result.
Note that you're also relying on Python's repr formatting to fill in your JSON; that's probably a bad idea too. Instead, let's just reconstruct our macro object and let requests encode it:
response = requests.put(url, json={'macro': {'actions': updated_data}}, headers={"Content-Type": "application/json"}, auth=(user + "/token", pwd))
This should do what you expect.

Retrieve data from klout exception KloutHTTPError

I am trying to retrieve klouID using the kloud python API. I am taking username from mongo database and I am trying to retrieve kloudid using this username. I ve noticed that I can retrieve ids only for some users(they, or some of their follower are registered in klout). Thus I want to create a try - except in order to overcome the raise KloutHTTPError( e, uri)
klout.api.KloutHTTPError: ERROR: HTTP Error 404: Not Found error for the users that klout cannot return a kloudid.
My code :
for cursor in collection.find().limit(100):
name = cursor['database'].get('name')
print name
kloutId = k.identity.klout(screenName=name).get('id')
score = k.user.score(kloutId=kloutId).get('score')
print "User's klout score is: %s" % (score)
# By default all communication is not secure (HTTP). An optional secure parameter
# can be sepcified for secure (HTTPS) communication
k = Klout('...', secure=True)
# Optionally a timeout parameter (seconds) can also be sent with all calls
score = k.user.score(kloutId=kloutId, timeout=5).get('score')
I add the following change and it works fine:
for cursor in collection.find().limit(10000):
try:
name = cursor['user']['name']
print name.encode('utf-8')
kloutId = k.identity.klout(screenName=name).get('id')
score = k.user.score(kloutId=kloutId).get('score')
print "User's klout score is: %s" % (score)
# By default all communication is not secure (HTTP). An optional secure parameter
# can be sepcified for secure (HTTPS) communication
k = Klout('...', secure=True)
# Optionally a timeout parameter (seconds) can also be sent with all calls
score = k.user.score(kloutId=kloutId, timeout=5).get('score')
counter = counter+1
except (KloutHTTPError, UnicodeEncodeError) as e:
print "Oops! That was no kloudId found with that name, or unicode exception. Try again... ", counter

How to abstract from pycurl the correct way

I would like to write a pseudo module that makes one do a GET request that keeps on going (pretty much like the one to consume the Twitter Streamming API), but making unnecessary to give in all the parameters everytime someone wants to call a function to make that same GET request.
In my module.py I have
class viewResults():
def __init__(self,username,password,keyname,consume):
self.buffer = ""
self.consume = consume
self.conn = pycurl.Curl()
self.conn.setopt(pycurl.USERPWD, "%s:%s" % (username, password))
self.conn.setopt(pycurl.URL, "http://crowdprocess.no.de/"+keyname+"/results")
self.conn.setopt(pycurl.WRITEFUNCTION, self.on_receive)
# self.conn.setopt(pycurl.VERBOSE, 1)
# self.conn.setopt(pycurl.DEBUGFUNCTION, self.debug)
self.conn.perform()
# def debug(self,debug_type,debug_message):
# print 'type: '+str(debug_type)+' message'+str(debug_message)
def on_receive(self, data):
self.buffer += data
if data.endswith("\r\n") and self.buffer.strip():
content = json.loads(self.buffer)
self.consume(content)
self.buffer = ""
And on index.py I have
from module import viewResults
def consume(content):
print content
viewResults('username','password','keyname',consume)
So I wanted to pass only the parameters username, password, keyname and the "consume" function that should be called when the buffer is full of valid JSON data...
What's happening is that the request is actually made, if VERBOSE is on I can see all the data arriving, but that "higher level consume" function get's nothing...
How can I achieve this ?
Thanks.
As I understand you want to archive debug data?
Create your custom debug function to store you data: custom_debug(debug_type, debug_msg)
>>> import human_curl as hurl
>>> import json
>>> r = hurl.get("http://crowdprocess.no.de/"+keyname+"/results"",
... debug=custom_debug, auth=('username', 'password'),)
>>> consume(json.loads(r.content))
I could not see in your post code that on_receive(self, data): print something.
Add to it sys.stderr.write("%s\n" % data)
def on_receive(self, data):
# -- print data to stderr --
import sys
sys.stderr.write("%s\n" % data)
# -- end --
self.buffer += data
if data.endswith("\r\n") and self.buffer.strip():
content = json.loads(self.buffer)
self.consume(content)
self.buffer = ""

Python - seek in http response stream

Using urllibs (or urllibs2) and wanting what I want is hopeless.
Any solution?
I'm not sure how the C# implementation works, but, as internet streams are generally not seekable, my guess would be it downloads all the data to a local file or in-memory object and seeks within it from there. The Python equivalent of this would be to do as Abafei suggested and write the data to a file or StringIO and seek from there.
However, if, as your comment on Abafei's answer suggests, you want to retrieve only a particular part of the file (rather than seeking backwards and forwards through the returned data), there is another possibility. urllib2 can be used to retrieve a certain section (or 'range' in HTTP parlance) of a webpage, provided that the server supports this behaviour.
The range header
When you send a request to a server, the parameters of the request are given in various headers. One of these is the Range header, defined in section 14.35 of RFC2616 (the specification defining HTTP/1.1). This header allows you to do things such as retrieve all data starting from the 10,000th byte, or the data between bytes 1,000 and 1,500.
Server support
There is no requirement for a server to support range retrieval. Some servers will return the Accept-Ranges header (section 14.5 of RFC2616) along with a response to report if they support ranges or not. This could be checked using a HEAD request. However, there is no particular need to do this; if a server does not support ranges, it will return the entire page and we can then extract the desired portion of data in Python as before.
Checking if a range is returned
If a server returns a range, it must send the Content-Range header (section 14.16 of RFC2616) along with the response. If this is present in the headers of the response, we know a range was returned; if it is not present, the entire page was returned.
Implementation with urllib2
urllib2 allows us to add headers to a request, thus allowing us to ask the server for a range rather than the entire page. The following script takes a URL, a start position, and (optionally) a length on the command line, and tries to retrieve the given section of the page.
import sys
import urllib2
# Check command line arguments.
if len(sys.argv) < 3:
sys.stderr.write("Usage: %s url start [length]\n" % sys.argv[0])
sys.exit(1)
# Create a request for the given URL.
request = urllib2.Request(sys.argv[1])
# Add the header to specify the range to download.
if len(sys.argv) > 3:
start, length = map(int, sys.argv[2:])
request.add_header("range", "bytes=%d-%d" % (start, start + length - 1))
else:
request.add_header("range", "bytes=%s-" % sys.argv[2])
# Try to get the response. This will raise a urllib2.URLError if there is a
# problem (e.g., invalid URL).
response = urllib2.urlopen(request)
# If a content-range header is present, partial retrieval worked.
if "content-range" in response.headers:
print "Partial retrieval successful."
# The header contains the string 'bytes', followed by a space, then the
# range in the format 'start-end', followed by a slash and then the total
# size of the page (or an asterix if the total size is unknown). Lets get
# the range and total size from this.
range, total = response.headers['content-range'].split(' ')[-1].split('/')
# Print a message giving the range information.
if total == '*':
print "Bytes %s of an unknown total were retrieved." % range
else:
print "Bytes %s of a total of %s were retrieved." % (range, total)
# No header, so partial retrieval was unsuccessful.
else:
print "Unable to use partial retrieval."
# And for good measure, lets check how much data we downloaded.
data = response.read()
print "Retrieved data size: %d bytes" % len(data)
Using this, I can retrieve the final 2,000 bytes of the Python homepage:
blair#blair-eeepc:~$ python retrieverange.py http://www.python.org/ 17387
Partial retrieval successful.
Bytes 17387-19386 of a total of 19387 were retrieved.
Retrieved data size: 2000 bytes
Or 400 bytes from the middle of the homepage:
blair#blair-eeepc:~$ python retrieverange.py http://www.python.org/ 6000 400
Partial retrieval successful.
Bytes 6000-6399 of a total of 19387 were retrieved.
Retrieved data size: 400 bytes
However, the Google homepage does not support ranges:
blair#blair-eeepc:~$ python retrieverange.py http://www.google.com/ 1000 500
Unable to use partial retrieval.
Retrieved data size: 9621 bytes
In this case, it would be necessary to extract the data of interest in Python prior to any further processing.
It may work best just to write the data to a file (or even to a string, using StringIO), and to seek in that file (or string).
I did not find any existing implementations of a file-like interface with seek() to HTTP URLs, so I rolled my own simple version: https://github.com/valgur/pyhttpio. It depends on urllib.request but could probably easily be modified to use requests, if necessary.
The full code:
import cgi
import time
import urllib.request
from io import IOBase
from sys import stderr
class SeekableHTTPFile(IOBase):
def __init__(self, url, name=None, repeat_time=-1, debug=False):
"""Allow a file accessible via HTTP to be used like a local file by utilities
that use `seek()` to read arbitrary parts of the file, such as `ZipFile`.
Seeking is done via the 'range: bytes=xx-yy' HTTP header.
Parameters
----------
url : str
A HTTP or HTTPS URL
name : str, optional
The filename of the file.
Will be filled from the Content-Disposition header if not provided.
repeat_time : int, optional
In case of HTTP errors wait `repeat_time` seconds before trying again.
Negative value or `None` disables retrying and simply passes on the exception (the default).
"""
super().__init__()
self.url = url
self.name = name
self.repeat_time = repeat_time
self.debug = debug
self._pos = 0
self._seekable = True
with self._urlopen() as f:
if self.debug:
print(f.getheaders())
self.content_length = int(f.getheader("Content-Length", -1))
if self.content_length < 0:
self._seekable = False
if f.getheader("Accept-Ranges", "none").lower() != "bytes":
self._seekable = False
if name is None:
header = f.getheader("Content-Disposition")
if header:
value, params = cgi.parse_header(header)
self.name = params["filename"]
def seek(self, offset, whence=0):
if not self.seekable():
raise OSError
if whence == 0:
self._pos = 0
elif whence == 1:
pass
elif whence == 2:
self._pos = self.content_length
self._pos += offset
return self._pos
def seekable(self, *args, **kwargs):
return self._seekable
def readable(self, *args, **kwargs):
return not self.closed
def writable(self, *args, **kwargs):
return False
def read(self, amt=-1):
if self._pos >= self.content_length:
return b""
if amt < 0:
end = self.content_length - 1
else:
end = min(self._pos + amt - 1, self.content_length - 1)
byte_range = (self._pos, end)
self._pos = end + 1
with self._urlopen(byte_range) as f:
return f.read()
def readall(self):
return self.read(-1)
def tell(self):
return self._pos
def __getattribute__(self, item):
attr = object.__getattribute__(self, item)
if not object.__getattribute__(self, "debug"):
return attr
if hasattr(attr, '__call__'):
def trace(*args, **kwargs):
a = ", ".join(map(str, args))
if kwargs:
a += ", ".join(["{}={}".format(k, v) for k, v in kwargs.items()])
print("Calling: {}({})".format(item, a))
return attr(*args, **kwargs)
return trace
else:
return attr
def _urlopen(self, byte_range=None):
header = {}
if byte_range:
header = {"range": "bytes={}-{}".format(*byte_range)}
while True:
try:
r = urllib.request.Request(self.url, headers=header)
return urllib.request.urlopen(r)
except urllib.error.HTTPError as e:
if self.repeat_time is None or self.repeat_time < 0:
raise
print("Server responded with " + str(e), file=stderr)
print("Sleeping for {} seconds before trying again".format(self.repeat_time), file=stderr)
time.sleep(self.repeat_time)
A potential usage example:
url = "https://www.python.org/ftp/python/3.5.0/python-3.5.0-embed-amd64.zip"
f = SeekableHTTPFile(url, debug=True)
zf = ZipFile(f)
zf.printdir()
zf.extract("python.exe")
Edit: There is actually a mostly identical, if slightly more minimal, implementation in this answer: https://stackoverflow.com/a/7852229/2997179

Categories

Resources