Python convert list of strings to zip archive of files - python

In a python script, I have a list of strings where each string in the list will represent a text file. How would I convert this list of strings to a zip archive of files?
For example:
list = ['file one content \nblah \nblah', 'file two content \nblah \nblah']
I have tried variations of the following so far
import zipfile
from io import BytesIO
from datetime import datetime
from django.http import HttpResponse
def converting_strings_to_zip():
list = ['file one content \nblah \nblah', 'file two content \nblah \nblah']
mem_file = BytesIO()
with zipfile.ZipFile(mem_file, "w") as zip_file:
for i in range(2):
current_time = datetime.now().strftime("%G-%m-%d")
file_name = 'some_file' + str(current_time) + '(' + str(i) + ')' + '.txt'
zip_file.writestr(file_name, str.encode(list[i]))
zip_file.close()
current_time = datetime.now().strftime("%G-%m-%d")
response = HttpResponse(mem_file, content_type='application/zip')
response['Content-Disposition'] = 'attachment; filename="'str(current_time) + '.zip"'
return response
But just leads to 0kb zip files

Was able to solve it by swapping a couple of lines (not resorting to saving the lists as files, then zipping files on the hard drive of the server, then loading the zip into memory and piping to the client like current answers suggest).
import zipfile
from io import BytesIO
from datetime import datetime
from django.http import HttpResponse
def converting_strings_to_zip():
list = ['file one content \nblah \nblah', 'file two content \nblah \nblah']
mem_file = BytesIO()
zip_file = zipfile.ZipFile(mem_file, 'w', zipfile.ZIP_DEFLATED)
for i in range(2):
current_time = datetime.now().strftime("%G-%m-%d")
file_name = 'some_file' + str(current_time) + '(' + str(i) + ')' + '.txt'
zip_file.writestr(file_name, str.encode(list[i]))
zip_file.close()
current_time = datetime.now().strftime("%G-%m-%d")
response = HttpResponse(mem_file.getvalue(), content_type='application/zip')
response['Content-Disposition'] = 'attachment; filename="'str(current_time) + '.zip"'
return response

You could simply write each of the string into it's own file inside a folder you mkdir and then zip it with the zipfile library or by using shutil which is part of python standard library.
Once you have written the string into a directory of your choice you can do:
import shutil
shutil.make_archive('strings_archive.zip', 'zip', 'folder_to_zip')
reference.

Related

Django: Download a temporary image

I am currently trying to create a function within a Django app to download a pandas dataframe as an image. I wanted to create the image as a temporary file, download it, then delete it. Does anyone know how to integrate tempfile into this code?
Views.py
def download_png(request, study):
Names(study) #Funciton to get (name) variable
Retrieve(study) #Function to get (data) variable
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
pngfilename = str(name) + "_" + str(current_time) + ".png"
temp = tempfile.NamedTemporaryFile(suffix=".png")
fig = temp.write(df2img.plot_dataframe(data))
filepath = temp.name
response = HttpResponse(df2img.save_dataframe(fig=fig, filename=filepath), content_type=mimetypes.guess_type(filepath))
response['Content-Disposition'] = "attachment; filename=%s" % pngfilename
return response
Update 06/30/22:
I had a lot of difficulty integrating the tempfile module because it requires data to be converted to bytes-like objects. Instead I simply resolved on deleting the file after creating it.
def download_png(request, study):
Names(study)
Retrieve(study)
pngfilename = str(name) + "_" + str(current_time) + ".png"
mime_type, _ = mimetypes.guess_type(pngfilename)
fig = df2img.plot_dataframe(data)
df2img.save_dataframe(fig=fig, filename=pngfilename)
response = FileResponse(open(pngfilename, 'rb'), content_type=mime_type)
response['Content-Disposition'] = "attachment; filename=%s" % pngfilename
os.remove(pngfilename)
return response
This is how you can use the tempfile library:
tempfile = tempfile.mktemp(suffix='.png')
with open(temp, 'w') as f:
f.write(df2img.plot_dataframe(data))
do_something_with_your_file(temp)
os.unlink(temp)
Edit: was just reading the tempfile documentation and apparently tempfile.mktemp is prone to race conditions, so you should either use tempfile.mkstemp like:
fd, tempf = tempfile.mkstemp()
Or there is a context manager:
with tempfile.NamedTemporaryFile() as tmp:
with open(tmp.name, 'w') as f:
f.write('some data')
# do something with your tempfile file
And then it will automatically delete the tempfile afterwards.

Error opening photos stored in zip file in Django

I'm going to create a zip file from some of the image files stored on my server.
I've used the following function to do this:
def create_zip_file(user, examination):
from lms.models import StudentAnswer
f = BytesIO()
zip = zipfile.ZipFile(f, 'w')
this_student_answer = StudentAnswer.objects.filter(student_id=user.id, exam=examination)
for answer in this_student_answer:
if answer.answer_file:
answer_file_full_path = answer.answer_file.path
fdir, fname = os.path.split(answer_file_full_path)
zip.writestr(fname, answer_file_full_path)
zip.close() # Close
zip_file_name = "student-answers_"+ str(examination.id)+"_" + str(user.id) + "_" + date=datetime.datetime.now().strftime("%Y-%m-%d-%H-%M") + '.zip'
response = HttpResponse(f.getvalue(), content_type="application/x-zip-compressed")
response['Content-Disposition'] = 'attachment; filename=%s' % zip_file_name
return response
Everything is fine and all photos are made in zip file but there is only one problem.
The problem is that the photos won't open and this error will appear in Windows:
Its look like we don't support this file format.
What is wrong with my codes?
To append data from file you have to use
write(filename)
Using writestr(filename) you add only string from variable filename but not from file.

How to set path with glob when the fileName is use for csv?

I'm looking for a solution about path making for glob and for pandas to_csv anyone have a solution ?
My code :
from glob import glob
import json
import pandas as pd
PathIn = 'c:\\Users\\***\\PycharmProjects\\Project\\In'
PathOut = 'c:\\Users\\***\\PycharmProjects\\Project\\Out'
for fileName in glob(PathIn + '*.json', recursive=True):
with open(fileName, 'rb') as f:
json_dict = json.load(f)
print(json_dict)
.
.
.
.
.
.
df.to_csv(PathOut + fileName + '.csv', sep=";")
He doesn't print me my JSON file so don't take any file in my In. And I don't have any CSV in my Output.
the key here is you want to create the output file in the relevant user dir based on the input file, so you could instead just get a list of the users dirs and iterate over each of them settting the in and output file then search the json files and create the csv in the coresponding dir. something like.
import json
from glob import glob
import os.path as op
basepath = r'C:\Users\***\PycharmProjects'
_in = 'In'
_out = 'Out'
suffix = '\*.json'
output_suffix = '.csv'
for path in glob(basepath):
in_dir = op.join(path, _in)
out_dir = op.join(path, _out)
for json_file in glob(in_dir + suffix, recursive=True):
in_file_name = op.basename(json_file)
out_file_name = in_file_name.split('.')[0] + output_suffix
output_file = op.join(out_dir, out_file_name)
with open(json_file) as jf:
json_data = json.load(jf)
print(json_data)
###do some stuff with the json
with open(output_file, 'w') as of:
of.write("some data or json stuff")
Just slightly modifying your code I think you missed a \ when writing the path for searching in the input directory.
For the output directory you need to build your filename by replacing the extension .json with .csv. There are many ways to do that:
for fileName in glob(PathIn + '\*.json', recursive=True):
with open(fileName, 'rb') as f:
json_dict = json.load(f)
print(json_dict)
out_file_name = os.path.split(fileName)[0] + '.csv'
out_file_dir = os.path.join(PathOut, out_file_name)
# Here do something with your output file

Unable to download all documents from eml file

I have a .eml file with 3 attachments in it. I was able to download one of the attachment but unable to download all the attachments.
import os
import email
import base64
# Get list of all files
files = [f for f in os.listdir('.') if os.path.isfile(f)]
# Create output directory
if os.path.exists("output"):
pass
else:
os.makedirs("output")
for eml_file in files:
if eml_file.endswith(".eml"):
with open(eml_file) as f:
email = f.read()
ext=".docx"
if ext is not "":
# Extract the base64 encoding part of the eml file
encoding = email.split(ext+'"')[-1]
if encoding:
# Remove all whitespaces
encoding = "".join(encoding.strip().split())
encoding = encoding.split("=", 1)[0]
# Convert base64 to string
if len(encoding) % 4 != 0: #check if multiple of 4
while len(encoding) % 4 != 0:
encoding = encoding + "="
try:
decoded = base64.b64decode(encoding)
except:
print(encoding)
for i in range(100):
print('\n')
# Save it as docx
path = os.path.splitext(eml_file)[0]
if path:
path = os.path.join("output", path + ext)
try:
os.remove(path)
except OSError:
pass
with open(path, "wb") as f:
f.write(decoded)
else:
print("File not done: " + eml_file)
How can I download all the attachments?
edit: I have initialized the eml_file still not downloading all files.
You import the email module. So why do you ignore it and try to write an email parser yourself? In addition:
You can use glob to list all files with a given extension.
Use should have used not operator in the condition: (if not os.path.exists("output"): os.makedirs("output")), but even this is not necessary, because makedirs has exist_ok parameter.
import os
import glob
import email
from email import policy
indir = '.'
outdir = os.path.join(indir, 'output')
os.makedirs(outdir, exist_ok=True)
files = glob.glob(os.path.join(indir, '*.eml'))
for eml_file in files:
# This will not work in Python 2
msg = email.message_from_file(open(eml_file), policy=policy.default)
for att in msg.iter_attachments():
# Tabs may be added for indentation and not stripped automatically
filename = att.get_filename().replace('\t', '')
# Here we suppose for simplicity sake that each attachment has a valid unique filename,
# which, generally speaking, is not true.
with open(os.path.join(outdir, filename), 'wb') as f:
f.write(att.get_content())

Zip and server multiple in-memory files

I am currently using the code below to generate a word document and then serve this on the web using cherrypy.
tpl.get_docx().save(iostream)
cherrypy.response.headers['Content-Type'] = (
'application/vnd.openxmlformats-officedocument'
'.wordprocessingml.document'
)
cherrypy.response.headers['Content-Disposition'] = (
'attachment; filename={fname}.docx'.format(
fname='SP' + kwargs['sp'] + '-'+ kwargs['WO'] + ' ' + kwargs['site'] + ' - ' + 'RPC Report' +'.docx'
)
)
iostream.seek(0)
return file_generator(iostream)
I plan to create more documents, then zip them in-memory and then serve them on the web. how could this be implmented, i have tried using the zipfile library, it seems complicated zipping in-memory files.
the following example i google around may solve my issue, but not sure how to use it.
import zipfile
import StringIO
zipped_file = StringIO.StringIO()
with zipfile.ZipFile(zipped_file, 'w') as zip:
for i, file in enumerate(files):
file.seek(0)
zip.writestr("{}.csv".format(i), file.read())
zipped_file.seek(0)
After hours of persistance, i got this working, yeahhhh
iostream = BytesIO()
tpl.get_docx().save(iostream)
iostream1 = BytesIO()
tpl1.get_docx().save(iostream1)
zip_output = StringIO.StringIO()
file = zipfile.ZipFile(zip_output, "w")
file.writestr("test.docx", iostream.getvalue())
file.writestr("test1.docx", iostream1.getvalue())
file.close()
cherrypy.response.headers['Content-Type'] = 'application/zip'
cherrypy.response.headers['Content-Disposition'] = 'attachment; filename="test.zip"'
return zip_output.getvalue()

Categories

Resources