I'm trying to test this code:
def read_classes(file):
if CLASSES in file:
classes = open(file, "rb").read()
else:
with ZipFile(file, "r") as archive:
classes = archive.read(CLASSES)
return classes
What is important for me is, when the provided file contains CLASSES in its name, open will be called, otherwise, ZipFile will be used. The first part I was able to test already, however, I cannot mock ZipFile in order to return a mocked object (archive) - which I then can assert that had the read method called. This is what I've been trying so far:
#patch('zipfile.ZipFile')
def test_givenFile_whenReadClasses_expectArchiveCalled(self, mock_zipfile):
file = 'sample.file'
archive = Mock()
mock_zipfile.return_value = archive
read_classes(file)
archive.read.assert_called_once_with("classes.file")
When I do that, it continues to execute the original ZipFile constructor, giving me:
IOError: [Errno 2] No such file or directory: 'sample.file'
Straight to the point:
#patch('zipfile.ZipFile')
def test_givenFile_whenReadClasses_expectArchiveCalled(self, mocked_zip_file):
file = 'file'
archive = Mock()
mocked_read = Mock()
archive.return_value.read = mocked_read
mocked_zip_file.return_value.__enter__ = archive
read_classes(dex_file)
mocked_read.assert_called_once_with('another_file')
Related
I have a function that loads data from a data.json file defined in models.py as follow:
def load_data():
file_path = Path(__file__).parent / 'data.json'
with open(file_path, 'r') as file:
data = json.load(file)['data']
return data
loaded_data = load_data()
I used loaded_data throughout all fuctions defined in models.py. The data.json file contains a JSON array.
My test_models.py is as follow:
from unittest.mock import patch
from models import ... (a list of function to test)
# For replaceing model.load_data()
mock_data = []
def get_mock_data():
return mock_data
#patch('models.load_data', side_effect= get_mock_restaurants)
class TestRestaurantsModel(unittest.TestCase):
However, somehow the real models.load_data still get executed. I know it because I changed the file_path to randomabc.json and got FileNotFoundError. How to I prevent the execution of models.load_data? I do not need to mock models.load_data essentially. I just need to prevent its execution during the test and assigned a mock data to models.data.
For Reference
I have a python class which is supposed to unpack an archive and recursively iterate over the directory structure and then return the files for further processing. In my case I want to hash those files. I'm struggling with returning the files. Here is my take.
I created an unzip function, a function which creates a log-file with all the paths of the files which were unpacked. Then I want to access this log-file and return ALL of the files so I can use them in another python class for further processing.This doesn't seem to work yet.
Structure of log-file:
/home/usr/Downloads/outdir/XXX.log
/home/usr/Downloads/outdir/Code/XXX.py
/home/usr/Downloads/outdir/Code/XXX.py
/home/usr/Downloads/outdir/Code/XXX.py
Code of interest:
#staticmethod
def read_received_files(from_log):
with open(from_log, 'r') as data:
data = data.readlines()
for lines in data:
\\ This does not seem to work zet
read_files = open(lines.strip())
return read_files
I believe that's what you're looking for:
#staticmethod
def read_received_files(from_log):
files = []
with open(from_log, 'r') as data:
for line in data:
files.append(open(line.strip()))
return files
You returned while iterating, preventing from opening the other files.
Since you are primarily after the meta data and hash of the files stored in the zip file, but not the file itself, there is no need to extract the files to the file system.
Instead you can use the ZipFile.open() method to access the contents of the file through a file-like object. Meta data could be gathered using the ZipInfo object for each file. Here's an example which gets file name and file size as meta data, and the hash of the file.
import hashlib
import zipfile
from collections import namedtuple
def get_files(archive):
FileInfo = namedtuple('FileInfo', ('filename', 'size', 'hash'))
with zipfile.ZipFile(archive) as zf:
for info in zf.infolist():
if not info.filename.endswith('/'): # exclude directories
f = zf.open(info)
hash_ = hashlib.md5(f.read()).hexdigest()
yield FileInfo(info.filename, info.file_size, hash_)
for f in get_files('some_file.zip'):
print('{}: {} {} bytes'.format(f.hash, f.filename, f.size))
I wanted to ask if it's possible to create PDF/XLS documents as temporary files. I'm doing that to send them using flask afterwards. For pdf/xls files creation I use reportlab and xlsxwriter packages respectively. When I save document using their methods, I get the "Python temporary file permission denied" error. When I try to close using the tempfile methods, files become corrupted. Is there any way to overcome this? Or any other suitable solution?
EDIT:
Some code snippets:
import xlswriter
import tempfile
from flask import after_this_request
#app.route('/some_url', method=['POST'])
def create_doc_function():
#after_this_request
def cleanup(response):
temp.close()
return response
temp = tempfile.TemporaryFile()
book = xlsxwriter.Workbook(temp.name)
# some actions here ...
book.close() # raises "Python temporaty file permission denied" error.
# If missed, Excel book is gonna be corrupted,
# i.e. blank, which make sense
return send_file(temp, as_attachment=True,
attachment_filename='my_document_name.xls')
Similar story with pdf files.
Use tempfile.mkstemp() which will create a standard temp file on disk which will persist until removed:
import tempfile
import os
handle, filepath = tempfile.mkstemp()
f = os.fdopen(handle) # convert raw handle to file object
...
EDIT
tempfile.TemporaryFile() will be destroyed as soon as it's closed, which is why your code above is failing.
You can use and delete NamedTemporaryFile with context manager (or atexit module). It may do the dirty job for you.Example 1:
import os
from tempfile import NamedTemporaryFile
# define class, because everyone loves objects
class FileHandler():
def __init__(self):
'''
Let's create temporary file in constructor
Notice that there is no param (delete=True is not necessary)
'''
self.file = NamedTemporaryFile()
# write something funny into file...or do whatever you need
def write_into(self, btext):
self.file.write(btext)
def __enter__(self):
'''
Define simple but mandatory __enter__ function - context manager will require it.
Just return the instance, nothing more is requested.
'''
return self
def __exit__(self, exc_type, exc_val, exc_tb):
'''
Also define mandatory __exit__ method which is called at the end.
NamedTemporaryFile is deleted as soon as is closed (function checks it before and after close())
'''
print('Calling __exit__:')
print(f'File exists = {os.path.exists(self.file.name)}')
self.file.close()
print(f'File exists = {os.path.exists(self.file.name)}')
# use context mamager 'with' to create new instance and do something
with FileHandler() as fh:
fh.write_into(b'Hi happy developer!')
print(f'\nIn this point {fh.file.name} does not exist (exists = {os.path.exists(fh.file.name)})')
Output:
Calling __exit__:
File exists = True
File exists = False
In this point D:\users\fll2cj\AppData\Local\Temp\tmpyv37sp58 does not exist (exists = False)
Or you can use atexit module which calls defined function when program (cmd) exits.Example 2:
import os, atexit
from tempfile import NamedTemporaryFile
class FileHandler():
def __init__(self):
self.file = NamedTemporaryFile()
# register function called when quit
atexit.register(self._cleanup)
def write_into(self, btext):
self.file.write(btext)
def _cleanup(self):
# because self.file has been created without delete=False, closing the file causes its deletion
self.file.close()
# create new instance and do whatever you need
fh = FileHandler()
fh.write_into(b'Hi happy developer!')
# now the file still exists, but when program quits, _cleanup() is called and file closed and automaticaly deleted.
I have the following view code that attempts to "stream" a zipfile to the client for download:
import os
import zipfile
import tempfile
from pyramid.response import FileIter
def zipper(request):
_temp_path = request.registry.settings['_temp']
tmpfile = tempfile.NamedTemporaryFile('w', dir=_temp_path, delete=True)
tmpfile_path = tmpfile.name
## creating zipfile and adding files
z = zipfile.ZipFile(tmpfile_path, "w")
z.write('somefile1.txt')
z.write('somefile2.txt')
z.close()
## renaming the zipfile
new_zip_path = _temp_path + '/somefilegroup.zip'
os.rename(tmpfile_path, new_zip_path)
## re-opening the zipfile with new name
z = zipfile.ZipFile(new_zip_path, 'r')
response = FileIter(z.fp)
return response
However, this is the Response I get in the browser:
Could not convert return value of the view callable function newsite.static.zipper into a response object. The value returned was .
I suppose I am not using FileIter correctly.
UPDATE:
Since updating with Michael Merickel's suggestions, the FileIter function is working correctly. However, still lingering is a MIME type error that appears on the client (browser):
Resource interpreted as Document but transferred with MIME type application/zip: "http://newsite.local:6543/zipper?data=%7B%22ids%22%3A%5B6%2C7%5D%7D"
To better illustrate the issue, I have included a tiny .py and .pt file on Github: https://github.com/thapar/zipper-fix
FileIter is not a response object, just like your error message says. It is an iterable that can be used for the response body, that's it. Also the ZipFile can accept a file object, which is more useful here than a file path. Let's try writing into the tmpfile, then rewinding that file pointer back to the start, and using it to write out without doing any fancy renaming.
import os
import zipfile
import tempfile
from pyramid.response import FileIter
def zipper(request):
_temp_path = request.registry.settings['_temp']
fp = tempfile.NamedTemporaryFile('w+b', dir=_temp_path, delete=True)
## creating zipfile and adding files
z = zipfile.ZipFile(fp, "w")
z.write('somefile1.txt')
z.write('somefile2.txt')
z.close()
# rewind fp back to start of the file
fp.seek(0)
response = request.response
response.content_type = 'application/zip'
response.app_iter = FileIter(fp)
return response
I changed the mode on NamedTemporaryFile to 'w+b' as per the docs to allow the file to be written to and read from.
current Pyramid version has 2 convenience classes for this use case- FileResponse, FileIter. The snippet below will serve a static file. I ran this code - the downloaded file is named "download" like the view name. To change the file name and more set the Content-Disposition header or have a look at the arguments of pyramid.response.Response.
from pyramid.response import FileResponse
#view_config(name="download")
def zipper(request):
path = 'path_to_file'
return FileResponse(path, request) #passing request is required
docs:
http://docs.pylonsproject.org/projects/pyramid/en/latest/api/response.html#
hint: extract the Zip logic from the view if possible
I am wondering whether there is a way to upload a zip file to django web server and put the zip's files into django database WITHOUT accessing the actual file system in the process (e.g. extracting the files in the zip into a tmp dir and then load them)
Django provides a function to convert python File to Django File, so if there is a way to convert ZipExtFile to python File, it should be fine.
thanks for help!
Django model:
from django.db import models
class Foo:
file = models.FileField(upload_to='somewhere')
Usage:
from zipfile import ZipFile
from django.core.exceptions import ValidationError
from django.core.files import File
from io import BytesIO
z = ZipFile('zipFile')
istream = z.open('subfile')
ostream = BytesIO(istream.read())
tmp = Foo(file=File(ostream))
try:
tmp.full_clean()
except Validation, e:
print e
Output:
{'file': [u'This field cannot be blank.']}
[SOLUTION] Solution using an ugly hack:
As correctly pointed out by Don Quest, file-like classes such as StringIO or BytesIO should represent the data as a virtual file. However, Django File's constructor only accepts the build-in file type and nothing else, although the file-like classes would have done the job as well. The hack is to set the variables in Django::File manually:
buf = bytesarray(OPENED_ZIP_OBJECT.read(FILE_NAME))
tmp_file = BytesIO(buf)
dummy_file = File(tmp_file) # this line actually fails
dummy_file.name = SOME_RANDOM_NAME
dummy_file.size = len(buf)
dummy_file.file = tmp_file
# dummy file is now valid
Please keep commenting if you have a better solution (except for custom storage)
There's an easier way to do this:
from django.core.files.base import ContentFile
uploaded_zip = zipfile.ZipFile(uploaded_file, 'r') # ZipFile
for filename in uploaded_zip.namelist():
with uploaded_zip.open(filename) as f: # ZipExtFile
my_django_file = ContentFile(f.read())
Using this, you can convert a file that was uploaded to memory directly to a django file. For a more complete example, let's say you wanted to upload a series of image files inside of a zip to the file system:
# some_app/models.py
class Photo(models.Model):
image = models.ImageField(upload_to='some/upload/path')
...
# Upload code
from some_app.models import Photo
for filename in uploaded_zip.namelist():
with uploaded_zip.open(filename) as f: # ZipExtFile
new_photo = Photo()
new_photo.image.save(filename, ContentFile(f.read(), save=True)
Without knowing to much about Django, i can tell you to take a look at the "io" package.
You could do something like:
from zipfile import ZipFile
from io import StringIO
zname,zipextfile = 'zipcontainer.zip', 'file_in_archive'
istream = ZipFile(zname).open(zipextfile)
ostream = StringIO(istream.read())
And then do whatever you would like to do with your "virtual" ostream Stream/File.
I've used the following django file class to avoid the need to read ZipExtFile into a another datastructure (StingIO or BytesIO) while properly impelementing what Django needs in order to save the file directly.
from django.core.files.base import File
class DjangoZipExtFile(File):
def __init__(self, zipextfile, zipinfo):
self.file = zipextfile
self.zipinfo = zipinfo
self.mode = 'r'
self.name = zipinfo.filename
self._size = zipinfo.file_size
def seek(self, position):
if position != 0:
#this will raise an unsupported operation
return self.file.seek(position)
#TODO if we have already done a read, reopen file
zipextfile = archive.open(path, 'r')
zipinfo = archive.getinfo(path)
djangofile = DjangoZipExtFile(zipextfile, zipinfo)
storage = DefaultStorage()
result = storage.save(djangofile.name, djangofile)