I have an application with the back-end written in Python that converts html files to pdf files. To do this it implements wkhtmltopdf (https://wkhtmltopdf.org/). It currently works perfectly for creating a single PDF file from an html file and outputs that to the user.
However, I need to be able to create multiple separate PDF files and then merge the files together into a single PDF.
I have been trying to do this using Pypdf2 with the PdfFileMerger() function (https://pythonhosted.org/PyPDF2/PdfFileMerger.html) and haven't been able to do it. I keep getting 'bytes' object has no attribute 'seek'
Here is my current code:
def multi_test_sheet(request, equipment_id):
if not request.user.is_authenticated:
return render(request, "jobs/login.html", {"message": None})
from io import BytesIO
from PyPDF2 import PdfFileReader, PdfFileMerger
if not request.user.is_authenticated:
return render(request, "jobs/login.html", {"message": None})
equipment = Equipment.objects.filter(pk=equipment_id).first()
if not job:
raise Http404("test sheet error. Error code: get job failed")
pdf_write = PdfFileWriter()
user_properties=UserProperties.objects.get(user=request.user)
context = {
"equipment": equipment,
"job": equipment.equipments,
"test_sheet": equipment.sheet_eq,
"user_properties": user_properties,
"now": datetime.now().strftime("%b-%d-%Y %H:%M"),
"now_date": datetime.now().date()
}
html_sheet = render_to_string('jobs/test_sheet_gear1.html', context)
html_sheet2 = render_to_string('jobs/test_sheet_gear2.html', context)
pdf_content1 = pdfkit.from_string(html_sheet, None)
pdf_content2 = pdfkit.from_string(html_sheet2, None)
pdfadder = PdfFileMerger(strict=False)
pdfadder.append(pdf_content1)
pdfadder.append(pdf_content2)
pdf_adder.write("combined_sheets.pdf")
response = HttpResponse(pdf_adder, content_type="application/pdf")
response["Content-Disposition"] = f"filename={equipment.site_id}.pdf"
return response
I resolved this by hiring someone. The problem was that the objects being passed into the PyPDF2 function called PdfFileMerger() were not being recognized as pdf objects.
To resolve that, save the files (I place them in a folder called interim) using the second argument from the pdfkit.from_string() function, then assign the newly created files to independent variables using open() function, and finally proceed with the merging function by merging those variables.
def multi_test_sheet(request, equipment_id):
if not request.user.is_authenticated:
return render(request, "jobs/login.html", {"message": None})
from io import BytesIO
from PyPDF2 import PdfFileReader, PdfFileMerger
if not request.user.is_authenticated:
return render(request, "jobs/login.html", {"message": None})
equipment = Equipment.objects.filter(pk=equipment_id).first()
if not job:
raise Http404("test sheet error. Error code: get job failed")
page_quantity = 2 #temporary value for a property that will be added to either equipment or test sheet model
pdf_file_object = BytesIO()
stream = BytesIO()
pdf_write = PdfFileWriter()
user_properties=UserProperties.objects.get(user=request.user)
today = datetime.now()
now=today.strftime("%b-%d-%Y %H:%M")
now_date = today.date()
context = {
"equipment": equipment,
"job": equipment.equipments,
"test_sheet": equipment.sheet_eq,
"user_properties": user_properties,
"now": now,
"now_date": now_date
}
html_sheet = render_to_string('jobs/test_sheet_gear1.html', context)
html_sheet2 = render_to_string('jobs/test_sheet_gear2.html', context)
pdf_content1 = pdfkit.from_string(html_sheet, 'interm/test_sheet_gear1.pdf')
pdf_content2 = pdfkit.from_string(html_sheet2, 'interm/test_sheet_gear2.pdf')
pdfadder = PdfFileMerger(strict=False)
pdf1_v=PdfFileReader(open('interm/test_sheet_gear1.pdf', 'rb'))
pdf2_v=PdfFileReader(open('interm/test_sheet_gear2.pdf', 'rb'))
pdfadder.append(pdf1_v, import_bookmarks=False)
pdfadder.append(pdf2_v, import_bookmarks=False)
pdfadder.write('interm/'+str(user_properties.pk)+'combined_sheets.pdf')
output_file = open('interm/combined_sheets.pdf', 'rb')
response = HttpResponse(output_file, content_type="application/pdf")
response["Content-Disposition"] = f"filename={equipment.site_id}.pdf"
return response
To the new readers, I'd really appreciate it if you could suggest how I could improve my question. Thanks!
I have two working PDF generating views. I would like to create a view that combines these two views & merges the output PDFs produced by the two views to generate one PDF containing the output PDFs of the two views.
I would also like to specify that these two views use different approaches to generate PDFs. The first one renders an HTML template to create PDF & the second creates a PDF from scratch using Reportlab.
I am new to Django, How can I do this?
View 1
class GenerateAllDocs(View):
def allDocGen(request):
if request.method == 'POST':
all_doc_gen_form = GenerateAllForms(request.POST)
if all_doc_gen_form.is_valid():
some_Field_1= all_doc_gen_form.cleaned_data['some_Field_1']
some_Field_2= all_doc_gen_form.cleaned_data['some_Field_2']
template = get_template('PDF_templates/att_pg_pdf_template.html')
data = {
'some_Field_1': some_Field_1,
'some_Field_2': some_Field_2,
}
html = template.render(data)
pdf = render_to_pdf('PDF_templates/att_pg_pdf_template.html', data)
if pdf:
response = HttpResponse(pdf, content_type = 'application/pdf')
filename = "something - %s.pdf" %(data.get('zzzzz'))
content = "inline; filename=%s" %(filename)
download = request.GET.get('download')
if download:
content = "attachment; filename%s" %(filename)
response['Content-Disposition'] = content
return response
return HttpResponse('Not Found')
all_doc_gen_form = GenerateAllForms()
return render(request, 'form_UI_templates/pg_att_form_UI_template.html', {'all_doc_gen_form':all_doc_gen_form})
View 2
def template_PDF_view(request):
# Create the HttpResponse object with the appropriate PDF headers.
response = HttpResponse(content_type='application/pdf')
response['Content-Disposition'] = 'attachment; inline; filename="somefilename.pdf"'
buffer = BytesIO()
# Create the PDF object, using the BytesIO object as its "file."
p = canvas.Canvas(buffer, pagesize=letter)
# Draw things on the PDF. Here's where the PDF generation happens.
p.drawImage('D:/worrk/PyDjango/mysite - PROD VERSION/main/static/images/cerfaImg.jpg',0,0, width=8.27 * inch, height= 11.69 * inch)
p.drawString(40, 724, " ".join('XX-XXX-XX'.replace('-','').upper())) # car_licence_plate
p.drawString(193, 149, 'hello 123') # address_city
# Close the PDF object cleanly.
p.showPage()
p.save()
# Get the value of the BytesIO buffer and write it to the response.
pdf = buffer.getvalue()
buffer.close()
response.write(pdf)
return response
EDIT #1
Following suggestion by ktowen, here is my updated code. I created a function based view for my view 2 and call that function inside of view 1 & then attempt to merge the two PDFs created by the views. This still doesn't work - error message is written after code.
def create_cerfa(request):
response = HttpResponse(content_type='application/pdf')
response['Content-Disposition'] = 'attachment; filename="somefilename.pdf"'
buffer_1 = BytesIO()
p = canvas.Canvas(buffer_1)
p.drawImage('D:/worrk/PyDjango/mysite - PROD VERSION/main/static/images/cerfaImg.jpg',0,0, width=8.27 * inch, height= 11.69 * inch)
p.drawString(40, 724, " ".join('AZ-343-BT'.replace('-','').upper()))
p.drawString(178, 724, " ".join('VF77JNFUC9J177958').upper())
p.save()
pdf = buffer_1.getvalue()
buffer_1.close()
response.write(pdf)
return response
def createAttestation(request):
if request.method == 'POST':
all_doc_gen_form = GenerateAllForms(request.POST)
if all_doc_gen_form.is_valid():
data1 = all_doc_gen_form.cleaned_data['data1']
data12 = all_doc_gen_form.cleaned_data['data12']
template = get_template('PDF_templates/att_pg_pdf_template.html')
data = {
'data1': data1,
'data12': data12,
}
html = template.render(data)
pdf = render_to_pdf('PDF_templates/att_pg_pdf_template.html', data)
cerfa = create_cerfa(request)
pdf1resp = HttpResponse(pdf, content_type = 'application/pdf')
pdf2resp = HttpResponse(cerfa, content_type = 'application/pdf')
pdfs = [pdf1resp, pdf2resp]
merger = PdfFileMerger()
for item in pdfs:
merger.append(item)
merger.write()
if pdf:
response = HttpResponse(merger, content_type = 'application/pdf')
return response
all_doc_gen_form = GenerateAllForms()
return render(request, 'form_UI_templates/pg_att_form_UI_template.html', {'all_doc_gen_form':all_doc_gen_form})
Error Message
Traceback (most recent call last):
File "C:\Users\Work\AppData\Local\Programs\Python\Python38-32\lib\site-packages\django\core\handlers\exception.py", line 34, in inner
response = get_response(request)
File "C:\Users\Work\AppData\Local\Programs\Python\Python38-32\lib\site-packages\django\core\handlers\base.py", line 115, in _get_response
response = self.process_exception_by_middleware(e, request)
File "C:\Users\Work\AppData\Local\Programs\Python\Python38-32\lib\site-packages\django\core\handlers\base.py", line 113, in _get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "D:\worrk\PyDjango\mysite - PROD VERSION\main\views.py", line 460, in createAttestation
merger.append(item)
File "C:\Users\Work\AppData\Local\Programs\Python\Python38-32\lib\site-packages\PyPDF2\merger.py", line 203, in append
self.merge(len(self.pages), fileobj, bookmark, pages, import_bookmarks)
File "C:\Users\Work\AppData\Local\Programs\Python\Python38-32\lib\site-packages\PyPDF2\merger.py", line 133, in merge
pdfr = PdfFileReader(fileobj, strict=self.strict)
File "C:\Users\Work\AppData\Local\Programs\Python\Python38-32\lib\site-packages\PyPDF2\pdf.py", line 1084, in __init__
self.read(stream)
File "C:\Users\Work\AppData\Local\Programs\Python\Python38-32\lib\site-packages\PyPDF2\pdf.py", line 1689, in read
stream.seek(-1, 2)
Exception Type: AttributeError at /tst2/
Exception Value: 'HttpResponse' object has no attribute 'seek'
EDIT #2
So I've narrowed down the problem to the actual code of merging the PDFs. I tested the approach given in EDIT 2 by ktowen.
I commented the merging part and then returned the PDFs as follows
return FileResponse(pdf1, as_attachment=True, content_type='application/pdf')
I checked it with PDF1 & PDF2 - Both are getting generated, but I don't understand what is wrong with the merging part? & how can I fix it?
Minor Edit - #2.1
Here's my Render to PDF function
from __future__ import print_function
from io import BytesIO
from django.http import HttpResponse
from django.template.loader import get_template
from xhtml2pdf import pisa
def render_to_pdf(template_src, context_dict={}):
template = get_template(template_src)
html = template.render(context_dict)
result = BytesIO()
pdf = pisa.pisaDocument(BytesIO(html.encode("ISO-8859-1")), result)
if not pdf.err:
return HttpResponse(result.getvalue(), content_type='application/pdf')
return None
EDIT #3
Here I modified the PDF definition inside the render_to_pdf function as follows.
pdf = pisa.pisaDocument(BytesIO(html.encode("ISO-8859-1")), result)
pdf = result.getvalue()
Now error is 'utf-8' codec can't decode byte 0x93 in position 10: invalid start byte
So to resolve this error, I tried changing the encoding inside the render_to_pdf function. Since I use French a lot in this project, I tested recommended encodings like latin1, UTF-8, & the default ISO-8859-1 but still get the same error.
Following is the full error message
Traceback (most recent call last):
File "C:\Users\Work\AppData\Roaming\Python\Python38\site-packages\django\core\handlers\exception.py", line 47, in inner
response = get_response(request)
File "C:\Users\Work\AppData\Roaming\Python\Python38\site-packages\django\core\handlers\base.py", line 179, in _get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "D:\worrk\PyDjango\mysite - PROD VERSION\main\views.py", line 596, in createAttestation
pdf1 = PdfFileReader(open(p1,'rb'))
Exception Type: UnicodeDecodeError at /tst2/
Exception Value: 'utf-8' codec can't decode byte 0x93 in position 10: invalid start byte
You can extract the pdf generation logic to two functions and use PyPDF2 PdfFileMerger to merge the two pdfs.
This is the general idea:
from PyPDF2 import PdfFileMerger
def merged_PDF_view(request):
# Create the HttpResponse object with the appropriate PDF headers.
response = HttpResponse(content_type='application/pdf')
response['Content-Disposition'] = 'attachment; inline; filename="somefilename.pdf"'
pdf1_buffer = get_pdf1()
pdf2_buffer = get_pdf2()
pdf_merger = PdfFileMerger()
pdf_merger.append(pdf1_buffer)
pdf_merger.append(pdf2_buffer)
# This can probably be improved
pdf_merger.write(buffer)
pdf_merger.close()
buffer.seek(0)
response.write(buffer.getvalue())
return response
EDIT 1
Try with this
class WillThisWork(View): # I hope so :|
def merged_PDF(request):
buffer = BytesIO()
response = HttpResponse(content_type='application/pdf')
response['Content-Disposition'] = 'attachment; inline; filename="somefilename.pdf"'
pdf1_buffer = createAttestation(request)
pdf2_buffer = create_cerfa(request)
pdf_merger = PdfFileMerger()
pdf_merger.append(pdf1_buffer)
pdf_merger.append(pdf2_buffer)
pdf_merger.write(buffer)
pdf_merger.close()
buffer.seek(0)
return FileResponse(buffer, as_attachment=True, filename='hello.pdf')
EDIT 2
Based in your edit try this
def create_cerfa_pdf():
filestream = BytesIO()
p = canvas.Canvas(filestream)
p.drawImage('D:/worrk/PyDjango/mysite - PROD VERSION/main/static/images/cerfaImg.jpg',0,0, width=8.27 * inch, height= 11.69 * inch)
p.drawString(40, 724, " ".join('AZ-343-BT'.replace('-','').upper()))
p.drawString(178, 724, " ".join('VF77JNFUC9J177958').upper())
p.save()
filestream.seek(0)
return filestream
def create_cerfa(request):
pdf = create_cerfa_pdf()
return FileResponse(pdf, as_attachment=True, filename="somefilename.pdf")
def createAttestation(request):
if request.method == 'POST':
all_doc_gen_form = GenerateAllForms(request.POST)
if all_doc_gen_form.is_valid():
data1 = all_doc_gen_form.cleaned_data['data1']
data12 = all_doc_gen_form.cleaned_data['data12']
template = get_template('PDF_templates/att_pg_pdf_template.html')
data = {
'data1': data1,
'data12': data12,
}
html = template.render(data)
pdf1 = render_to_pdf('PDF_templates/att_pg_pdf_template.html', data)
pdf2 = create_cerfa_pdf()
merger = PdfFileMerger()
for item in [pdf1, pdf2]:
merger.append(item)
filestream = BytesIO()
merger.write(filestream)
merger.close()
filestream.seek(0)
return FileResponse(filestream, as_attachment=True)
all_doc_gen_form = GenerateAllForms()
return render(request, 'form_UI_templates/pg_att_form_UI_template.html', {'all_doc_gen_form':all_doc_gen_form})
So I solved it quite a while ago...
In approach 1, I was using an HTML template, & filling data from the form in it and converting it to PDF, and in approach 2 I was using reportlab. So I just re-wrote the approach 1 using reportlab - No merging needeed. the .showpage method creates page breaks.
I have a scenario where a user uploads some data, Django does some processing in pandas and returns a potentially large txt file. I've got this working but I'm unsure about the scalability of the approach and want to know if there's a better way.
Adapted the Outputting to CSV section of the Django doc I have the following:
class MyClass(LoginRequiredMixin,FormView):
template_name = 'myapp/mytemplate.html'
form_class = MyForm
success_url = '/' # Replace with your URL or reverse().
def post(self, request, *args, **kwargs):
if request.method == 'POST':
form = MyForm(request.POST, request.FILES)
#print("filename",files[0].name)
if form.is_valid() :
filename = "my-file.txt"
content = 'any string generated by django'
response = HttpResponse(content, content_type='text/plain')
response['Content-Disposition'] = 'attachment; filename={0}'.format(filename)
return response
else:
print("i am invalid")
return self.form_invalid(form)
In practice I will need to output a text file of perhaps 1000 lines, built by looping over numerous dataframes, should I just build an extremely long text string (content), or is there a better way? In pure python I am more used to creating txt file output using:
f = open( 'some_file.txt', 'w+')
f.write("text")
f.write("text")
f.close()
Which seems more intuitive.
As requested by comments, updated to show exactly the code I was trying in Django which was returning an empty text file:
class MyClass(LoginRequiredMixin,FormView):
template_name = 'myapp/mytemplate.html'
form_class = MyForm
success_url = '/' # Replace with your URL or reverse().
def post(self, request, *args, **kwargs):
if request.method == 'POST':
form = MyForm(request.POST, request.FILES)
if form.is_valid() :
f = open( 'some_file.txt', 'w+')
f.write("text")
return FileResponse(f, as_attachment=True, filename='some_file.txt')
It's very simple:
response = HttpResponse(content_type='text/plain')
response['Content-Disposition'] = 'attachment; filename="filename.txt"'
response.write('Hello')
return response
https://docs.djangoproject.com/en/3.2/howto/outputting-csv/
It's same as CSV, just change extension to .txt
Example:
response = HttpResponse(content_type='text/plain')
response['Content-Disposition'] = 'attachment; filename="filename.txt"'
writer = csv.writer(response)
writer.writerow(['Hello'])
def pdf_invoice(request, id=None):
# some code
return render_to_pdf(
'voucher_pdf/voucher_pdf.html',
{
'pagesize': page_size,
'title': title,
'init_data': init_data,
}
)
def render_to_pdf(template_src, context_dict):
template = get_template(template_src)
html = template.render(context_dict)
result = BytesIO()
pdf = pisa.CreatePDF(BytesIO(html.encode("ISO-8859-1")), result)
if not pdf.err:
return HttpResponse(result.getvalue(), content_type='application/pdf')
return HttpResponse('We had some errors<pre>%s</pre>' % escape(html))
The pdf print options get populated when I call the pdf_invoice function through url. but I need auto print dialog option.. Is there any solution. If the question is unclear do let me know.
I would suggest something, maybe you can do this "request.get_full_path()" for pdf_invoice's url.
THis is the download view i have created.Its giving error
def download(request):#Download View
class GeneratePdf(View):
def get(self, request, *args, **kwargs):
pdf = render_to_pdf('pdf/invoice.html', data)
return HttpResponse(pdf, content_type='application/pdf')
Here is some syntax error. Ex: class under function.
you can try this
def download(request):
pdf = render_to_pdf('pdf/invoice.html', data)
response = HttpResponse(pdf, content_type='application/pdf')
response['Content-Disposition'] = 'attachment; filename="mypdf.pdf"'
return response