How to save pdfs - python

How could I save dfa as a pdf?
So far I was able to have each of the elements saved separately in each file. How now can I have the whole file saved as a single pdf?
dfa = pd.DataFrame({'STREAM':['EAGLE','HAWK','HAWK','HAWK','EAGLE','HAWK','EAGLE'],'MAT':['A','D','F','D','C','C','E'],'KIS':['B','D','E','D','A','C','D'],'GEO':['B','C','E','E','F','A','B']})
dfa.to_csv('results.csv',index=False)
students_data = csv.reader(open("results.csv", 'r'))
for row in students_data:
STREAM = row[0]
MAT = row[1]
GEO = row[2]
KIS = row[3]
c = canvas.Canvas(MAT +".pdf")
c.drawString(60, 700, "STREAM: " + STREAM)
c.drawString(60, 600, "MAT: " + MAT)
c.drawString(60, 500, "KIS: " + KIS)
c.drawString(60, 400, "GEO: " + GEO)
c.save()

My suggestion would be to create a blank PDF as your template page and then
merge the new PDF to it
packet = io.BytesIO()
# Create the initial canvas.
c = canvas.Canvas(packet)
# your code for adding to the canvas
packet.seek(0)
new_pdf = PdfFileReader(packet)
# Import The Template
template = PdfFileReader(open('path_to_template'), "rb")
output = PdfFileWriter()
# add the created PDF as a watermark to the template
page = template.getPage(0)
page.mergePage(new_pdf.getPage(0))
output.addPage(page)
# finally, write "output" to a real file
outputStream = open(output_path, "wb")
output.write(outputStream)
outputStream.close()
you will need these imports
from PyPDF2 import PdfFileWriter, PdfFileReader
import io

Related

Python add watermark to pdf files

I wrote code to add watermark to pdf files however, the resulting files do not contain the watermark.
import os
import io
from PyPDF2 import PdfReader, PdfWriter
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
folder_path = 'D:/pdf/input/'
output_folder = 'D:/pdf/output/'
image_path = 'D:/pdf/watermark.jpg'
x, y = 50, 50
page_number = 1
if not os.path.exists(output_folder):
os.makedirs(output_folder)
for pdf_file in os.listdir(folder_path):
if pdf_file.endswith(".pdf"):
pdf_reader = PdfReader(open(os.path.join(folder_path, pdf_file), "rb"))
pdf_writer = PdfWriter()
for page_num in range(len(pdf_reader.pages)):
pdf_page = pdf_reader.pages[page_num]
pdf_writer.add_page(pdf_page)
if page_num == page_number - 1:
packet = io.BytesIO()
can = canvas.Canvas(packet, pagesize=letter)
can.drawImage(image_path, x, y)
can.save()
packet.seek(0)
new_pdf = PdfReader(packet)
pdf_page.merge_page(new_pdf.pages[0])
with open(os.path.join(output_folder, pdf_file), "wb") as output_file:
pdf_writer.write(output_file)
print("Image added to the specified page of all pdf files and saved in the output folder.")
I've tried changing the image position and page number, but still no effect. I expect watermark to be added at a specific place in the pdf file.
FIXED
import os
import io
from PyPDF2 import PdfReader, PdfWriter
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
folder_path = 'D:/pdf/pliki/'
output_folder = 'D:/pdf/output/'
sign_path = 'D:/pdf/obraz/podpis.png'
x_position = 300
y_position = 260
width = 150
height = 150
if not os.path.exists(output_folder):
os.makedirs(output_folder)
packet = io.BytesIO()
can = canvas.Canvas(packet, pagesize=letter)
can.drawImage(sign_path, x_position, y_position, width, height, [112,113,112,113,112,113])
can.save()
packet.seek(0)
new_pdf = PdfReader(packet)
for pdf_file in os.listdir(folder_path):
if pdf_file.endswith(".pdf"):
pdf_reader = PdfReader(open(os.path.join(folder_path, pdf_file), "rb"))
pdf_merged = pdf_reader.pages[0]
pdf_merged.merge_page(new_pdf.pages[0])
pdf_writer = PdfWriter()
for i in range(len(pdf_reader.pages)):
if i == 0:
pdf_writer.add_page(pdf_merged)
else:
pdf_writer.add_page(pdf_reader.pages[i])
with open(os.path.join(output_folder, pdf_file), "wb") as output_file:
pdf_writer.write(output_file)
print("Watermark added to the specified page of all pdf files and saved in the output folder.")

Text rotated when merging pdf pages using Pypdf2 and Reportlab

I'm trying to merge two pages
one from reportlab that has the text I wish and another one is my source pdf
But when I merge those two pages, my text is rotated 90 degree
Pdf created using Report lab -> Overlay Created using Reportlab
when Merged with Source pdf -> Source Pdf
Code that I have Used :
from PyPDF2 import PdfFileWriter, PdfFileReader
import io
from reportlab.pdfgen import canvas
from reportlab.lib.units import inch
packet = io.BytesIO()
c = canvas.Canvas(packet)
c.drawString(0,0,"Hello World")
c.save()
packet.seek(0)
packet_pdf = PdfFileReader(packet)
input_pdf = PdfFileReader(open("Source.pdf", "rb"))
output = PdfFileWriter()
page = input_pdf.getPage(0)
page.mergePage(packet_pdf.getPage(0))
output.addPage(page)
outputStream = open("destination.pdf", "wb")
output.write(outputStream)
outputStream.close()
reference : Add text to Existing PDF using Python
Refered to this and created an own solution -> Python PyPDF2 merge rotated pages
from PyPDF2 import PdfFileWriter, PdfFileReader
import io
from reportlab.pdfgen.canvas import Canvas
page_to_merge = 0 #Refers to the First page of PDF
xcoor = 250 #To be changed according to your pdf
ycoor = 650 #To be changed according to your pdf
input_pdf = PdfFileReader(open("Source.pdf", "rb"))
page_count = input_pdf.getNumPages()
inputpdf_page_to_be_merged = input_pdf.getPage(page_to_merge)
packet = io.BytesIO()
c = Canvas(packet,pagesize=(inputpdf_page_to_be_merged.mediaBox.getWidth(),inputpdf_page_to_be_merged.mediaBox.getHeight()))
c.drawString(xcoor,ycoor,"Hello World")
c.save()
packet.seek(0)
overlay_pdf = PdfFileReader(packet)
overlay = overlay_pdf.getPage(0)
output = PdfFileWriter()
for PAGE in range(page_count):
if PAGE == page_to_merge:
inputpdf_page_to_be_merged.mergeRotatedTranslatedPage(overlay,
inputpdf_page_to_be_merged.get('/Rotate') or 0,
overlay.mediaBox.getWidth()/2, overlay.mediaBox.getWidth()/2)
output.addPage(inputpdf_page_to_be_merged)
else:
Page_in_pdf = input_pdf.getPage(PAGE)
output.addPage(Page_in_pdf)
outputStream = open("destination.pdf", "wb")
output.write(outputStream)
outputStream.close()

PyPDF2 & ReportLab editing a PDF and merging multiple pages

I'm trying to add some text (page numbers) to an existing PDF file.
Using PyPDF2 package iterating through the original file, creating a canvas, then merging the two files. My problem is that once the program is finished, the new pdf file only has the last page from the original pdf, not all the pages.
eg. If the original pdf has 33 pages, the new pdf only has the last page but with the correct numbering.
Maybe the code can do a better job at explainng:
def test(location, reference, destination):
file = open(location, "rb")
read_pdf = PyPDF2.PdfFileReader(file)
for i in range (0, read_pdf.getNumPages()):
page = read_pdf.getPage(i)
pageReference = "%s_%s"%(reference,format(i+1, '03d'))
width = getPageSizeW(page)
height = getPageSizeH(page)
pagesize = (width, height)
packet = io.BytesIO()
can = canvas.Canvas(packet, pagesize = pagesize)
can.setFillColorRGB(1,0,0)
can.drawString(height*3.5, height*2.75, pageReference)
can.save()
packet.seek(0)
new_pdf = PyPDF2.PdfFileReader(packet)
#add new pdf to old pdf
output = PyPDF2.PdfFileWriter()
page.mergePage(new_pdf.getPage(0))
output.addPage(page)
outputStream = open(destination, 'wb')
output.write(outputStream)
print(pageReference)
outputStream.close()
file.close()
def getPageSizeH(p):
h = float(p.mediaBox.getHeight()) * 0.352
return h
def getPageSizeW(p):
w = float(p.mediaBox.getWidth()) * 0.352
return w
Also if anyone has any ideas on how to insert the references on the top right in a better way, it would be appreciated.
I'm not an expert at PyPDF2 but it looks like the only area in your function where you have PyPDF2.PdfFileWriter() is in your for loop, so I suspect you are initiating a new file and adding to it each time in your for loop, which may cause the end result what you see.

Writing a CSV file into a PDF document

I have a python script that parses data from an SQLite DB file and writes it to a .csv file. I would like to write this data to a forensic style report in PDF format. I have already been able to create a template pdf with a heading, date, case number, and short paragraph on details. I was wondering how should I write the .csv file data into a table in the PDF. As shown i have tried iterating through the .csv file after reading it with csv.reader. I can write the initial headings into the file but it will not pull the data from the .csv file and write it. Can anyone point me in the right direction.
# Script to generate a PDF report after data has been parsed into .csv file
# import statements
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import portrait
import csv
# PDF document layout
canvas = canvas.Canvas("H:\College Fourth Year\Development Project\Final Year Project 2018\Forensic Reports\SMS Report.pdf", pagesize=letter)
canvas.setLineWidth(.3)
canvas.setFont('Helvetica', 12)
canvas.drawString(30,750,'LYIT MOBILE FORENSICS DIVISION')
canvas.drawString(500,750,"Date: 12/02/2018")
canvas.line(500,747,595,747)
canvas.drawString(500,725,'Case Number:')
canvas.drawString(580,725,"10")
canvas.line(500,723,595,723)
# Introduction text
line1 = 'This forensic report on SMS data has been compiled by the forensic'
line2 = 'examiner in conclusion to the investigation into the RTA'
line3 = 'case which occured on 23/01/2018'
textObject = canvas.beginText(30, 700)
lines = [line1, line2, line3]
for line in lines:
textObject.textLine(line)
canvas.drawText(textObject)
# File that must be written to report
data_file = 'H:\College Fourth Year\Development Project\Final Year Project 2018\ExtractedEvidence\smsInfo.csv'
c = canvas
# Function for importing data
def import_Data(data_file):
smsInfo = csv.reader(open(data_file, "r"))
for row in smsInfo:
ID = row[0]
Incoming_Number = row[1]
Date_And_Time = row[2]
Read = row[3]
Sent_Replied = row[4]
Body = row[5]
Seen = [6]
pdf_filename = 'SMS Data Report.pdf'
generate_report(ID, Incoming_Number, Date_And_Time, Read, Sent_Replied, Body, Seen)
def generate_report(ID, Date_And_Time, Read, Sent_Replied, Body, Seen, pdf_filename):
#c = canvas.Canvas(pdf_filename, pagesize=portrait(letter))
import_Data(data_file)
canvas.save()
print("Forensic Report Generated!")
Maybe you could try something different. Recently I had to make a similar PDF files using data from csv file. I hope this help you:
# import statements
import requests
from reportlab.lib import colors
from reportlab.lib.pagesizes import A4, inch, landscape, legal, letter
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Image, Spacer, PageBreak, Table, TableStyle
from reportlab.lib.styles import getSampleStyleSheet
import csv
import os
# Get de work directory
cwd = os.getcwd()
# Introduction text
line1 = 'This forensic report on SMS data has been compiled by the forensic'
line2 = 'examiner in conclusion to the investigation into the RTA'
line3 = 'case which occured on 23/01/2018'
#PDF document layout
table_style = TableStyle([('ALIGN',(1,1),(-2,-2),'RIGHT'),
('TEXTCOLOR',(1,1),(-2,-2),colors.red),
('VALIGN',(0,0),(0,-1),'TOP'),
('TEXTCOLOR',(0,0),(0,-1),colors.blue),
('ALIGN',(0,-1),(-1,-1),'CENTER'),
('VALIGN',(0,-1),(-1,-1),'MIDDLE'),
('TEXTCOLOR',(0,-1),(-1,-1),colors.green),
('INNERGRID', (0,0), (-1,-1), 0.25, colors.black),
('BOX', (0,0), (-1,-1), 0.25, colors.black),
])
styles = getSampleStyleSheet()
styleNormal = styles['Normal']
styleHeading = styles['Heading1']
styleHeading2 = styles['Heading2']
styleHeading.alignment = 1 # centre text (TA_CENTRE)
#Configure style and word wrap
s = getSampleStyleSheet()
s = s["BodyText"]
s.wordWrap = 'CJK'
# File that must be written to report
with open ('smsInfo.csv', 'rb') as csvfile:
reader = csv.reader(csvfile)
lista = list(reader)
headers = lista[0]
conteo = 1
for numRecord in range(1,len(lista)):
record1 = lista[numRecord]
data = list()
emptyRecords = list()
records = list()
header = list()
countRecords = 0
for line in record1:
if line == '':
emptyRecords.append(line)
else:
records.append(line)
header.append(headers[countRecords])
data.append([str(headers[countRecords]), str(line)])
countRecords = countRecords + 1
data2 = [[Paragraph(cell, s) for cell in row] for row in data]
t = Table(data2)
t.setStyle(table_style)
elements = []
# Name of file
fileName = cwd + '\\' + 'SMS Data Report' + '-' + str(conteo) + '.pdf'
conteo = conteo + 1
archivo_pdf = SimpleDocTemplate(fileName, pagesize = letter, rightMargin = 40, leftMargin = 40, topMargin = 40, bottomMargin = 28)
#Send the data and build the file
elements.append(Paragraph(line1, styleNormal))
elements.append(Paragraph(line2, styleNormal))
elements.append(Paragraph(line3, styleNormal))
elements.append(Spacer(inch, .25*inch))
elements.append(t)
archivo_pdf.build(elements)
print 'Forensic Report Generated:', fileName
This code generate files like this one:
PDF report

QR Code code inserting '[' and ']', how to stop this?

I'm trying to loop through a list of ~3,000 URLs and create QR codes for them. In one column I have the URLs and in another column I have what I want the QR code file names to be named when output as images.
The problem is the URLs that get converted to QR codes and my file names both come out encased in brackets.
For example:
URL Filename
www.abel.com Abel
Comes out as:
URL in QR Code Filename of QR Code
[www.abel.com] [Abel]
Here's my code so far:
import csv
import qrcode
import pandas as pd
df = pd.read_csv('QR_Python_Test.csv')
i = 1
x = df.iloc[[i]]
print(
x.QR_Code_Name.values)
for i in df.index:
z = df.iloc[[i]]
x = str(z.Link_Short.values)
qr = qrcode.QRCode(version=5, error_correction=qrcode.constants.ERROR_CORRECT_L,box_size=5,border=2,)
qr.add_data(x)
qr.make(fit=True)
img = qr.make_image()
file_name = str(z.QR_Code_Name.values) + ".png"
print('Saving %s' % file_name)
image_file = open(file_name, "w")
img.save(file_name)
image_file.close()
file.close()
And some sample data:
URL Filename
www.apple.com Apple
www.google.com Google
www.microsoft.com Microsoft
www.linux.org Linux
Thank you for your help,
Me
If your DataFrame contains the correct information, you can use DataFrame.itertuples
also separate the functions
reading the data from the file
generating the qr-code
saving the files
That way, you can test each of these individually
def generate_images(df):
for row in df.itertuples():
yield row.Filename, generate_qr(row.URL)
def generate_qr(url):
qr = qrcode.QRCode(version=5, error_correction=qrcode.constants.ERROR_CORRECT_L,box_size=5,border=2,)
qr.add_data(url)
qr.make(fit=True)
return qr.make_image()
def save_qr_code(qr_codes):
for filename, qr_code in qr_codes:
filename = filename + '.png'
print('saving to file %s' % (filename,)
with open(filename, 'wb') as file:
qr_code.save(file)
df = pd.read_csv('my_data.csv')
qr_codes = generate_images(df)
save_qr_code(qr_codes)

Categories

Resources