How to zip an html file from a stream/rendered dictionary?

How to zip an html file from a stream/rendered dictionary? - python

I am having trouble downloading an html file through the flask send_file.
Basically, to download an html file alone, it works perfectly. by giving the stream to the send_file function as a parameter
However; I need to put this file into a zip along with other unrelated files. There, in the write function, neither the stream nor the string (result_html) work. I need somehow to transform it directly to an html file and put in the zip file
I don't see how I could do this for the moment. I have the data (output) as a dict...
Thank you if you have any pointers
from flask import render_template, send_file
from io import BytesIO
result_html = render_template('myResult.html', **output)
result_stream = BytesIO(str(result_html).encode())
with ZipFile("zipped_result.zip", "w") as zf:
zf.write(result_html)
# zf.write(other_files)
send_file(zf, as_attachment=True, attachment_filename="myfile.zip")

If I understand you correctly, it is sufficient to write the zip file in a stream and add the result of the rendering as a character string to the zip file. The stream can then be transmitted via send_file.
from flask import render_template, send_file
from io import BytesIO
from zipfile import ZipFile
# ...
#app.route('/download')
def download():
output = { 'name': 'Unknown' }
result_html = render_template('result.html', **output)
stream = BytesIO()
with ZipFile(stream, 'w') as zf:
zf.writestr('result.html', result_html)
# ...
stream.seek(0)
return send_file(stream, as_attachment=True, attachment_filename='archive.zip')

Related

How to send zip file with send_file flask framework

I am having an issue trying to download download in-memory ZIP-FILE object using Flask send_file. my zip exists in memory and is full of text documents but when I try with this code
the result I get is: it downloads like it is supposed to but it downloads an empty zip file! it's like it is copying nothing ... I have no idea how to solve this problem.
#app.route('/downloads/', methods=['GET'])
def download():
from flask import send_file
import io
import zipfile
import time
FILEPATH = r"C:\Users\JD\Downloads\trydownload.zip"
fileobj = io.BytesIO()
with zipfile.ZipFile(fileobj, 'w') as zip_file:
zip_info = zipfile.ZipInfo(FILEPATH)
zip_info.date_time = time.localtime(time.time())[:6]
zip_info.compress_type = zipfile.ZIP_DEFLATED
with open(FILEPATH, 'rb') as fd:
zip_file.writestr(zip_info, fd.read())
fileobj.seek(0)
return send_file(fileobj, mimetype='zip', as_attachment=True,
attachment_filename='%s.zip' % os.path.basename(FILEPATH))

I had the exact same issue with the Flask send_file method.
Details:
Flask version 2.0.1
OS: Windows 10
Solution
I figured out a workaround to this i.e. instead of the send_file method, this can be done by returning a Response object with the data. Replace the return statement in your code with the following and this should work.
#app.route('/downloads/', methods=['GET'])
def download():
from flask import Response # Changed line
import io
import zipfile
import time
FILEPATH = r"C:\Users\JD\Downloads\trydownload.zip"
fileobj = io.BytesIO()
with zipfile.ZipFile(fileobj, 'w') as zip_file:
zip_info = zipfile.ZipInfo(FILEPATH)
zip_info.date_time = time.localtime(time.time())[:6]
zip_info.compress_type = zipfile.ZIP_DEFLATED
with open(FILEPATH, 'rb') as fd:
zip_file.writestr(zip_info, fd.read())
fileobj.seek(0)
# Changed line below
return Response(fileobj.getvalue(),
mimetype='application/zip',
headers={'Content-Disposition': 'attachment;filename=your_filename.zip'})

Python Flask send_file StringIO blank files

I want to process a Pandas dataframe and send it to download as a CSV without a temp file. The best way to accomplish this I've seen is to use StringIO. Using the code below, a file downloads with the proper name, however the file is completely blank, and no error is shown. Why doesn't the file contain data?
#app.route('/test_download', methods = ['POST'])
def test_download():
buffer = StringIO()
buffer.write('Just some letters.')
buffer.seek(0)
return send_file(
buffer,
as_attachment=True,
download_name='a_file.txt',
mimetype='text/csv'
)

The issue here is that in Python 3 you need to use StringIO with csv.write and send_file requires BytesIO, so you have to do both.
#app.route('/test_download')
def test_download():
row = ['hello', 'world']
proxy = io.StringIO()
writer = csv.writer(proxy)
writer.writerow(row)
# Creating the byteIO object from the StringIO Object
mem = io.BytesIO()
mem.write(proxy.getvalue().encode())
# seeking was necessary. Python 3.5.2, Flask 0.12.2
mem.seek(0)
proxy.close()
return send_file(
mem,
as_attachment=True,
download_name='test.csv',
mimetype='text/csv'
)
Prior to Flask 2.0, download_name was called attachment_filename.

Use BytesIO to write bytes.
from io import BytesIO
from flask import Flask, send_file
app = Flask(__name__)
#app.route('/test_download', methods=['POST'])
def test_download():
# Use BytesIO instead of StringIO here.
buffer = BytesIO()
buffer.write(b'Just some letters.')
# Or you can encode it to bytes.
# buffer.write('Just some letters.'.encode('utf-8'))
buffer.seek(0)
return send_file(
buffer,
as_attachment=True,
download_name='a_file.txt',
mimetype='text/csv'
)
Prior to Flask 2.0, download_name was called attachment_filename.

make_response
To get Flask to download a csv file to the user, we pass a csv string to the make_response function, which returns a
Response object.
Then we add a Header which tells the browser to accept the file as a download.
The Mimetype also must be set to text/csv in order to get the web browser to save it in something other than an html document.
from flask import Flask, make_response
app = Flask(__name__)
#app.route('/test_download', methods=['POST'])
def test_download():
with StringIO() as buffer:
# forming a StringIO object
buffer = StringIO()
buffer.write('Just some letters.')
# forming a Response object with Headers to return from flask
response = make_response(buffer.getvalue())
response.headers['Content-Disposition'] = 'attachment; filename=namaste.csv'
response.mimetype = 'text/csv'
# return the Response object
return response
P.S. It is preferred to use python's built-in csv library to deal with csv files
References
https://matthewmoisen.com/blog/how-to-download-a-csv-file-in-flask/
https://www.geeksforgeeks.org/stringio-module-in-python/
https://docs.python.org/3/library/csv.html
Namaste 🙏

if someone use python 2.7 with Flask and got the error about the module StringIO by importing it. This post can help you to solve your problem.
If you are importing String IO module, you can just change the import syntax by using this : from io import StringIO instead from StringIO import StringIO.
You can Also use from io import BytesIO if you are using image or some others ressource.
Thank you

Send with multiple CSVs using Flask?

I have an app that takes in some information, performs some calculations using pandas, and turns the final pandas data frame into a CSV that is then downloaded using the Flask app. How do I download multiple CSVs within one view? It seems that I can only return a single response at a time.
An example snippet:
def serve_csv(dataframe,filename):
buffer = StringIO.StringIO()
dataframe.to_csv(buffer, encoding='utf-8', index=False)
buffer.seek(0)
return send_file(buffer,
attachment_filename=filename,
mimetype='text/csv')
def make_calculation(arg1, arg2):
'''Does some calculations.
input: arg1 - string, arg2- string
returns: a pandas data frame'''
#app.route('test_app', methods=['GET', 'POST'])
def test_app():
form = Form1()
if form.validate_on_submit():
calculated_dataframe = make_calculation(str(form.input_1.data), str(form.input_2.data))
return serve_csv(calculated_dataframe, 'Your_final_output.csv')
return render_template('test_app.html', form=form)
So let's say in that example above that make_calculation returned two pandas data frames. How would I print both of them to a CSV?

This is all the code you need using the Zip files. It will return a zip file with all of your files.
In my program everything I want to zip is in an output folder so i just use os.walk and put it in the zip file with write. Before returning the file you need to close it, if you don't close it will return an empty file.
import zipfile
import os
from flask import send_file
#app.route('/download_all')
def download_all():
zipf = zipfile.ZipFile('Name.zip','w', zipfile.ZIP_DEFLATED)
for root,dirs, files in os.walk('output/'):
for file in files:
zipf.write('output/'+file)
zipf.close()
return send_file('Name.zip',
mimetype = 'zip',
attachment_filename= 'Name.zip',
as_attachment = True)
In the html I simply call the route:
DOWNLOAD ALL
I hope this helped somebody. :)

You could return a MIME Multipart response, a zip file, or a TAR ball (please note the linked RFC is somewhat out of date, but is easier to quickly get up to speed with because it's in HTML; the official one is here).
If you choose to do a MIME multipart response, a good starting point might be to look at the MultipartEncoder and MultipartDecoder in requests toolbelt; you may be able to use them directly, or at least subclass/compose using those to get your desired behavior. Zip files and TAR balls can be implemented using standard library modules.
An alternative would be to design your API so that you were returning JSON, use a header (or XML element or JSON field) to indicate that additional CSVs could be obtained by another request, or similar.

Building on #desfido's answer above, here would be some code implementation that does not involve using zip, and instead downloads two different files:
from requests_toolbelt import MultipartEncoder
def make_calculation(arg1, arg2):
'''Does some calculations.
input: arg1 - string, arg2- string
puts results in two different dataframes
and stores them in two different files,
returns the names of those two files'''
return filename1, filename2
#app.route('test_app', methods=['GET', 'POST'])
def test_app():
form = Form1()
if form.validate_on_submit():
f1, f2 = make_calculation(str(form.input_1.data), str(form.input_2.data))
m = MultipartEncoder({
'field1': (f1, open(f1, 'rb'), 'text/plain'),
'field2': (f2, open(f2, 'rb'), 'text/plain')
})
return Response(m.to_string(), mimetype=m.content_type)
return render_template('test_app.html', form=form)

Also you may try this, using zip module --
import zipfile
from os.path import basename
UPLOAD_PATH = <upload_location>
base_files = ["file1.csv", "file2.csv"]
with zipfile.ZipFile(UPLOAD_PATH + 'Test.zip', 'w') as zipF:
for file in base_files:
zipF.write(UPLOAD_PATH + file, basename(UPLOAD_PATH + file), compress_type=zipfile.ZIP_DEFLATED)
zipF.close()
return send_file(METAFILE_UPLOADS+'Test.zip', mimetype='zip', attachment_filename='Test.zip', as_attachment=True)

How to use pyramid.response.FileIter

I have the following view code that attempts to "stream" a zipfile to the client for download:
import os
import zipfile
import tempfile
from pyramid.response import FileIter
def zipper(request):
_temp_path = request.registry.settings['_temp']
tmpfile = tempfile.NamedTemporaryFile('w', dir=_temp_path, delete=True)
tmpfile_path = tmpfile.name
## creating zipfile and adding files
z = zipfile.ZipFile(tmpfile_path, "w")
z.write('somefile1.txt')
z.write('somefile2.txt')
z.close()
## renaming the zipfile
new_zip_path = _temp_path + '/somefilegroup.zip'
os.rename(tmpfile_path, new_zip_path)
## re-opening the zipfile with new name
z = zipfile.ZipFile(new_zip_path, 'r')
response = FileIter(z.fp)
return response
However, this is the Response I get in the browser:
Could not convert return value of the view callable function newsite.static.zipper into a response object. The value returned was .
I suppose I am not using FileIter correctly.
UPDATE:
Since updating with Michael Merickel's suggestions, the FileIter function is working correctly. However, still lingering is a MIME type error that appears on the client (browser):
Resource interpreted as Document but transferred with MIME type application/zip: "http://newsite.local:6543/zipper?data=%7B%22ids%22%3A%5B6%2C7%5D%7D"
To better illustrate the issue, I have included a tiny .py and .pt file on Github: https://github.com/thapar/zipper-fix

FileIter is not a response object, just like your error message says. It is an iterable that can be used for the response body, that's it. Also the ZipFile can accept a file object, which is more useful here than a file path. Let's try writing into the tmpfile, then rewinding that file pointer back to the start, and using it to write out without doing any fancy renaming.
import os
import zipfile
import tempfile
from pyramid.response import FileIter
def zipper(request):
_temp_path = request.registry.settings['_temp']
fp = tempfile.NamedTemporaryFile('w+b', dir=_temp_path, delete=True)
## creating zipfile and adding files
z = zipfile.ZipFile(fp, "w")
z.write('somefile1.txt')
z.write('somefile2.txt')
z.close()
# rewind fp back to start of the file
fp.seek(0)
response = request.response
response.content_type = 'application/zip'
response.app_iter = FileIter(fp)
return response
I changed the mode on NamedTemporaryFile to 'w+b' as per the docs to allow the file to be written to and read from.

current Pyramid version has 2 convenience classes for this use case- FileResponse, FileIter. The snippet below will serve a static file. I ran this code - the downloaded file is named "download" like the view name. To change the file name and more set the Content-Disposition header or have a look at the arguments of pyramid.response.Response.
from pyramid.response import FileResponse
#view_config(name="download")
def zipper(request):
path = 'path_to_file'
return FileResponse(path, request) #passing request is required
docs:
http://docs.pylonsproject.org/projects/pyramid/en/latest/api/response.html#
hint: extract the Zip logic from the view if possible

Downloading and unzipping a .zip file without writing to disk

I have managed to get my first python script to work which downloads a list of .ZIP files from a URL and then proceeds to extract the ZIP files and writes them to disk.
I am now at a loss to achieve the next step.
My primary goal is to download and extract the zip file and pass the contents (CSV data) via a TCP stream. I would prefer not to actually write any of the zip or extracted files to disk if I could get away with it.
Here is my current script which works but unfortunately has to write the files to disk.
import urllib, urllister
import zipfile
import urllib2
import os
import time
import pickle
# check for extraction directories existence
if not os.path.isdir('downloaded'):
os.makedirs('downloaded')
if not os.path.isdir('extracted'):
os.makedirs('extracted')
# open logfile for downloaded data and save to local variable
if os.path.isfile('downloaded.pickle'):
downloadedLog = pickle.load(open('downloaded.pickle'))
else:
downloadedLog = {'key':'value'}
# remove entries older than 5 days (to maintain speed)
# path of zip files
zipFileURL = "http://www.thewebserver.com/that/contains/a/directory/of/zip/files"
# retrieve list of URLs from the webservers
usock = urllib.urlopen(zipFileURL)
parser = urllister.URLLister()
parser.feed(usock.read())
usock.close()
parser.close()
# only parse urls
for url in parser.urls:
if "PUBLIC_P5MIN" in url:
# download the file
downloadURL = zipFileURL + url
outputFilename = "downloaded/" + url
# check if file already exists on disk
if url in downloadedLog or os.path.isfile(outputFilename):
print "Skipping " + downloadURL
continue
print "Downloading ",downloadURL
response = urllib2.urlopen(downloadURL)
zippedData = response.read()
# save data to disk
print "Saving to ",outputFilename
output = open(outputFilename,'wb')
output.write(zippedData)
output.close()
# extract the data
zfobj = zipfile.ZipFile(outputFilename)
for name in zfobj.namelist():
uncompressed = zfobj.read(name)
# save uncompressed data to disk
outputFilename = "extracted/" + name
print "Saving extracted file to ",outputFilename
output = open(outputFilename,'wb')
output.write(uncompressed)
output.close()
# send data via tcp stream
# file successfully downloaded and extracted store into local log and filesystem log
downloadedLog[url] = time.time();
pickle.dump(downloadedLog, open('downloaded.pickle', "wb" ))

Below is a code snippet I used to fetch zipped csv file, please have a look:
Python 2:
from StringIO import StringIO
from zipfile import ZipFile
from urllib import urlopen
resp = urlopen("http://www.test.com/file.zip")
myzip = ZipFile(StringIO(resp.read()))
for line in myzip.open(file).readlines():
print line
Python 3:
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
# or: requests.get(url).content
resp = urlopen("http://www.test.com/file.zip")
myzip = ZipFile(BytesIO(resp.read()))
for line in myzip.open(file).readlines():
print(line.decode('utf-8'))
Here file is a string. To get the actual string that you want to pass, you can use zipfile.namelist(). For instance,
resp = urlopen('http://mlg.ucd.ie/files/datasets/bbc.zip')
myzip = ZipFile(BytesIO(resp.read()))
myzip.namelist()
# ['bbc.classes', 'bbc.docs', 'bbc.mtx', 'bbc.terms']

My suggestion would be to use a StringIO object. They emulate files, but reside in memory. So you could do something like this:
# get_zip_data() gets a zip archive containing 'foo.txt', reading 'hey, foo'
import zipfile
from StringIO import StringIO
zipdata = StringIO()
zipdata.write(get_zip_data())
myzipfile = zipfile.ZipFile(zipdata)
foofile = myzipfile.open('foo.txt')
print foofile.read()
# output: "hey, foo"
Or more simply (apologies to Vishal):
myzipfile = zipfile.ZipFile(StringIO(get_zip_data()))
for name in myzipfile.namelist():
[ ... ]
In Python 3 use BytesIO instead of StringIO:
import zipfile
from io import BytesIO
filebytes = BytesIO(get_zip_data())
myzipfile = zipfile.ZipFile(filebytes)
for name in myzipfile.namelist():
[ ... ]

I'd like to offer an updated Python 3 version of Vishal's excellent answer, which was using Python 2, along with some explanation of the adaptations / changes, which may have been already mentioned.
from io import BytesIO
from zipfile import ZipFile
import urllib.request
url = urllib.request.urlopen("http://www.unece.org/fileadmin/DAM/cefact/locode/loc162txt.zip")
with ZipFile(BytesIO(url.read())) as my_zip_file:
for contained_file in my_zip_file.namelist():
# with open(("unzipped_and_read_" + contained_file + ".file"), "wb") as output:
for line in my_zip_file.open(contained_file).readlines():
print(line)
# output.write(line)
Necessary changes:
There's no StringIO module in Python 3 (it's been moved to io.StringIO). Instead, I use io.BytesIO]2, because we will be handling a bytestream -- Docs, also this thread.
urlopen:
"The legacy urllib.urlopen function from Python 2.6 and earlier has been discontinued; urllib.request.urlopen() corresponds to the old urllib2.urlopen.", Docs and this thread.
Note:
In Python 3, the printed output lines will look like so: b'some text'. This is expected, as they aren't strings - remember, we're reading a bytestream. Have a look at Dan04's excellent answer.
A few minor changes I made:
I use with ... as instead of zipfile = ... according to the Docs.
The script now uses .namelist() to cycle through all the files in the zip and print their contents.
I moved the creation of the ZipFile object into the with statement, although I'm not sure if that's better.
I added (and commented out) an option to write the bytestream to file (per file in the zip), in response to NumenorForLife's comment; it adds "unzipped_and_read_" to the beginning of the filename and a ".file" extension (I prefer not to use ".txt" for files with bytestrings). The indenting of the code will, of course, need to be adjusted if you want to use it.
Need to be careful here -- because we have a byte string, we use binary mode, so "wb"; I have a feeling that writing binary opens a can of worms anyway...
I am using an example file, the UN/LOCODE text archive:
What I didn't do:
NumenorForLife asked about saving the zip to disk. I'm not sure what he meant by it -- downloading the zip file? That's a different task; see Oleh Prypin's excellent answer.
Here's a way:
import urllib.request
import shutil
with urllib.request.urlopen("http://www.unece.org/fileadmin/DAM/cefact/locode/2015-2_UNLOCODE_SecretariatNotes.pdf") as response, open("downloaded_file.pdf", 'w') as out_file:
shutil.copyfileobj(response, out_file)

I'd like to add my Python3 answer for completeness:
from io import BytesIO
from zipfile import ZipFile
import requests
def get_zip(file_url):
url = requests.get(file_url)
zipfile = ZipFile(BytesIO(url.content))
files = [zipfile.open(file_name) for file_name in zipfile.namelist()]
return files.pop() if len(files) == 1 else files

write to a temporary file which resides in RAM
it turns out the tempfile module ( http://docs.python.org/library/tempfile.html ) has just the thing:
tempfile.SpooledTemporaryFile([max_size=0[,
mode='w+b'[, bufsize=-1[, suffix=''[,
prefix='tmp'[, dir=None]]]]]])
This
function operates exactly as
TemporaryFile() does, except that data
is spooled in memory until the file
size exceeds max_size, or until the
file’s fileno() method is called, at
which point the contents are written
to disk and operation proceeds as with
TemporaryFile().
The resulting file has one additional
method, rollover(), which causes the
file to roll over to an on-disk file
regardless of its size.
The returned object is a file-like
object whose _file attribute is either
a StringIO object or a true file
object, depending on whether
rollover() has been called. This
file-like object can be used in a with
statement, just like a normal file.
New in version 2.6.
or if you're lazy and you have a tmpfs-mounted /tmp on Linux, you can just make a file there, but you have to delete it yourself and deal with naming

Adding on to the other answers using requests:
# download from web
import requests
url = 'http://mlg.ucd.ie/files/datasets/bbc.zip'
content = requests.get(url)
# unzip the content
from io import BytesIO
from zipfile import ZipFile
f = ZipFile(BytesIO(content.content))
print(f.namelist())
# outputs ['bbc.classes', 'bbc.docs', 'bbc.mtx', 'bbc.terms']
Use help(f) to get more functions details for e.g. extractall() which extracts the contents in zip file which later can be used with with open.

All of these answers appear too bulky and long. Use requests to shorten the code, e.g.:
import requests, zipfile, io
r = requests.get(zip_file_url)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall("/path/to/directory")

Vishal's example, however great, confuses when it comes to the file name, and I do not see the merit of redefing 'zipfile'.
Here is my example that downloads a zip that contains some files, one of which is a csv file that I subsequently read into a pandas DataFrame:
from StringIO import StringIO
from zipfile import ZipFile
from urllib import urlopen
import pandas
url = urlopen("https://www.federalreserve.gov/apps/mdrm/pdf/MDRM.zip")
zf = ZipFile(StringIO(url.read()))
for item in zf.namelist():
print("File in zip: "+ item)
# find the first matching csv file in the zip:
match = [s for s in zf.namelist() if ".csv" in s][0]
# the first line of the file contains a string - that line shall de ignored, hence skiprows
df = pandas.read_csv(zf.open(match), low_memory=False, skiprows=[0])
(Note, I use Python 2.7.13)
This is the exact solution that worked for me. I just tweaked it a little bit for Python 3 version by removing StringIO and adding IO library
Python 3 Version
from io import BytesIO
from zipfile import ZipFile
import pandas
import requests
url = "https://www.nseindia.com/content/indices/mcwb_jun19.zip"
content = requests.get(url)
zf = ZipFile(BytesIO(content.content))
for item in zf.namelist():
print("File in zip: "+ item)
# find the first matching csv file in the zip:
match = [s for s in zf.namelist() if ".csv" in s][0]
# the first line of the file contains a string - that line shall de ignored, hence skiprows
df = pandas.read_csv(zf.open(match), low_memory=False, skiprows=[0])

It wasn't obvious in Vishal's answer what the file name was supposed to be in cases where there is no file on disk. I've modified his answer to work without modification for most needs.
from StringIO import StringIO
from zipfile import ZipFile
from urllib import urlopen
def unzip_string(zipped_string):
unzipped_string = ''
zipfile = ZipFile(StringIO(zipped_string))
for name in zipfile.namelist():
unzipped_string += zipfile.open(name).read()
return unzipped_string

Use the zipfile module. To extract a file from a URL, you'll need to wrap the result of a urlopen call in a BytesIO object. This is because the result of a web request returned by urlopen doesn't support seeking:
from urllib.request import urlopen
from io import BytesIO
from zipfile import ZipFile
zip_url = 'http://example.com/my_file.zip'
with urlopen(zip_url) as f:
with BytesIO(f.read()) as b, ZipFile(b) as myzipfile:
foofile = myzipfile.open('foo.txt')
print(foofile.read())
If you already have the file downloaded locally, you don't need BytesIO, just open it in binary mode and pass to ZipFile directly:
from zipfile import ZipFile
zip_filename = 'my_file.zip'
with open(zip_filename, 'rb') as f:
with ZipFile(f) as myzipfile:
foofile = myzipfile.open('foo.txt')
print(foofile.read().decode('utf-8'))
Again, note that you have to open the file in binary ('rb') mode, not as text or you'll get a zipfile.BadZipFile: File is not a zip file error.
It's good practice to use all these things as context managers with the with statement, so that they'll be closed properly.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to zip an html file from a stream/rendered dictionary? - python

Related

How to send zip file with send_file flask framework

Python Flask send_file StringIO blank files

Send with multiple CSVs using Flask?

How to use pyramid.response.FileIter

Downloading and unzipping a .zip file without writing to disk

Categories

Resources