I'm trying to merge two pages
one from reportlab that has the text I wish and another one is my source pdf
But when I merge those two pages, my text is rotated 90 degree
Pdf created using Report lab -> Overlay Created using Reportlab
when Merged with Source pdf -> Source Pdf
Code that I have Used :
from PyPDF2 import PdfFileWriter, PdfFileReader
import io
from reportlab.pdfgen import canvas
from reportlab.lib.units import inch
packet = io.BytesIO()
c = canvas.Canvas(packet)
c.drawString(0,0,"Hello World")
c.save()
packet.seek(0)
packet_pdf = PdfFileReader(packet)
input_pdf = PdfFileReader(open("Source.pdf", "rb"))
output = PdfFileWriter()
page = input_pdf.getPage(0)
page.mergePage(packet_pdf.getPage(0))
output.addPage(page)
outputStream = open("destination.pdf", "wb")
output.write(outputStream)
outputStream.close()
reference : Add text to Existing PDF using Python
Refered to this and created an own solution -> Python PyPDF2 merge rotated pages
from PyPDF2 import PdfFileWriter, PdfFileReader
import io
from reportlab.pdfgen.canvas import Canvas
page_to_merge = 0 #Refers to the First page of PDF
xcoor = 250 #To be changed according to your pdf
ycoor = 650 #To be changed according to your pdf
input_pdf = PdfFileReader(open("Source.pdf", "rb"))
page_count = input_pdf.getNumPages()
inputpdf_page_to_be_merged = input_pdf.getPage(page_to_merge)
packet = io.BytesIO()
c = Canvas(packet,pagesize=(inputpdf_page_to_be_merged.mediaBox.getWidth(),inputpdf_page_to_be_merged.mediaBox.getHeight()))
c.drawString(xcoor,ycoor,"Hello World")
c.save()
packet.seek(0)
overlay_pdf = PdfFileReader(packet)
overlay = overlay_pdf.getPage(0)
output = PdfFileWriter()
for PAGE in range(page_count):
if PAGE == page_to_merge:
inputpdf_page_to_be_merged.mergeRotatedTranslatedPage(overlay,
inputpdf_page_to_be_merged.get('/Rotate') or 0,
overlay.mediaBox.getWidth()/2, overlay.mediaBox.getWidth()/2)
output.addPage(inputpdf_page_to_be_merged)
else:
Page_in_pdf = input_pdf.getPage(PAGE)
output.addPage(Page_in_pdf)
outputStream = open("destination.pdf", "wb")
output.write(outputStream)
outputStream.close()
Related
I wrote code to add watermark to pdf files however, the resulting files do not contain the watermark.
import os
import io
from PyPDF2 import PdfReader, PdfWriter
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
folder_path = 'D:/pdf/input/'
output_folder = 'D:/pdf/output/'
image_path = 'D:/pdf/watermark.jpg'
x, y = 50, 50
page_number = 1
if not os.path.exists(output_folder):
os.makedirs(output_folder)
for pdf_file in os.listdir(folder_path):
if pdf_file.endswith(".pdf"):
pdf_reader = PdfReader(open(os.path.join(folder_path, pdf_file), "rb"))
pdf_writer = PdfWriter()
for page_num in range(len(pdf_reader.pages)):
pdf_page = pdf_reader.pages[page_num]
pdf_writer.add_page(pdf_page)
if page_num == page_number - 1:
packet = io.BytesIO()
can = canvas.Canvas(packet, pagesize=letter)
can.drawImage(image_path, x, y)
can.save()
packet.seek(0)
new_pdf = PdfReader(packet)
pdf_page.merge_page(new_pdf.pages[0])
with open(os.path.join(output_folder, pdf_file), "wb") as output_file:
pdf_writer.write(output_file)
print("Image added to the specified page of all pdf files and saved in the output folder.")
I've tried changing the image position and page number, but still no effect. I expect watermark to be added at a specific place in the pdf file.
FIXED
import os
import io
from PyPDF2 import PdfReader, PdfWriter
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
folder_path = 'D:/pdf/pliki/'
output_folder = 'D:/pdf/output/'
sign_path = 'D:/pdf/obraz/podpis.png'
x_position = 300
y_position = 260
width = 150
height = 150
if not os.path.exists(output_folder):
os.makedirs(output_folder)
packet = io.BytesIO()
can = canvas.Canvas(packet, pagesize=letter)
can.drawImage(sign_path, x_position, y_position, width, height, [112,113,112,113,112,113])
can.save()
packet.seek(0)
new_pdf = PdfReader(packet)
for pdf_file in os.listdir(folder_path):
if pdf_file.endswith(".pdf"):
pdf_reader = PdfReader(open(os.path.join(folder_path, pdf_file), "rb"))
pdf_merged = pdf_reader.pages[0]
pdf_merged.merge_page(new_pdf.pages[0])
pdf_writer = PdfWriter()
for i in range(len(pdf_reader.pages)):
if i == 0:
pdf_writer.add_page(pdf_merged)
else:
pdf_writer.add_page(pdf_reader.pages[i])
with open(os.path.join(output_folder, pdf_file), "wb") as output_file:
pdf_writer.write(output_file)
print("Watermark added to the specified page of all pdf files and saved in the output folder.")
I'm trying to crop a PDF file but different dimensions for each page. I have managed to use PyPDF2 but not sure how to crop on different dimensions for the second (and final) page. This is what I have for dimensions for first page so far. What do I need to add?
from PyPDF2 import PdfFileWriter, PdfFileReader
print_on = True
output = PdfFileWriter()
input = PdfFileReader(open('/Users/Downloads/lol.pdf', 'rb'))
n = input.getNumPages()
for i in range(n):
page = input.getPage(i)
page.cropBox.upperLeft = (63.389830508474574,643.7972508591065)
page.cropBox.lowerRight = (561.8644067796611,483.2096219931271)
output.addPage(page)
outputStream = open('/Users/Downloads/result.pdf', 'wb')
print('Done')
output.write(outputStream)
outputStream.close()
I need to combine (merge/overlay) 2 pdf files like second on first by each page. I've tried the code
import fitz
doc1 = fitz.open(background)
doc2 = fitz.open(only_text_path)
doc1.insertPDF(doc2)
but it only concatenates doc1 + doc2, doesn't overlay
Is where a way to do this using fitz (pymupdf library)?
I found a code with PyPDF2, but it works slow and not so stable:
from PyPDF2 import PdfFileWriter, PdfFileReader
output = PdfFileWriter()
with open(background, "rb") as f:
empty_pdf = PdfFileReader(f)
with open(only_text_path, 'rb') as f2:
text_pdf = PdfFileReader(f2)
for i in range(empty_pdf.getNumPages()):
empty_page = empty_pdf.getPage(i)
text_page = text_pdf.getPage(i)
empty_page.mergePage(text_page)
output.addPage(empty_page)
with open(merge_result_path, "wb") as out_pdf:
output.write(out_pdf)
Thanks to #KJ
I used this code, it's working
doc1 = fitz.open(empty_path)
doc2 = fitz.open(only_text_path)
for i in range(doc1.page_count):
page = doc1.load_page(i)
page_front = fitz.open()
page_front.insert_pdf(doc2, from_page=i, to_page=i)
page.show_pdf_page(page.rect, page_front, pno=0, keep_proportion=True, overlay=True, oc=0, rotate=0, clip=None)
doc1.save(merge_result_path, encryption=fitz.PDF_ENCRYPT_KEEP)
I'm trying to make unique edits to individual pages in a pre-existing pdf. However, the edits remain the same.
I've tried using FPDF (wasn't sure of how to edit a pre-existing pdf with this) and then am now trying PYPDF2 with reportlab.
#
from PyPDF2 import PdfFileWriter, PdfFileReader
import io
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
def WriteOnPdf (targetpdf, pageTopicsDict):
packet = io.BytesIO()
# Create a new PDF with Reportlab
can = canvas.Canvas(packet, pagesize=letter)
can.setFont('Helvetica', 13)
can.drawString(5, 730, pageTopicsDict[0])
can.save()
# Move to the beginning of the StringIO buffer
packet.seek(0)
new_pdf = PdfFileReader(packet)
# Read your existing PDF
existing_pdf = PdfFileReader(open(targetpdf, "rb"))
output = PdfFileWriter()
# Add the "watermark" (which is the new pdf) on the existing page
for i in range(existing_pdf.numPages):
print(i, pageTopicsDict[i])
can.drawString(5, 730, pageTopicsDict[i])
page = existing_pdf.getPage(i)
page.mergePage(new_pdf.getPage(0))# index out of range if not set to 0.
output.addPage(page)
# Finally, write "output" to a real file
outputStream = open("destination.pdf", "wb")
output.write(outputStream)
outputStream.close()
dummyDict = {0: "abc", 1: "de, fg", 2: "hijklmn"}
WriteOnPdf ("test.pdf", dummyDict)
Expected: pdf with "abc" on top left hand corner of page 0, "de, fg" on page 1, "hijklmn" on page 2...
Actual: all pages have "abc"
Solved; initialized the packet and relevant variables in the for loop instead of outside.
I'm trying to add some text (page numbers) to an existing PDF file.
Using PyPDF2 package iterating through the original file, creating a canvas, then merging the two files. My problem is that once the program is finished, the new pdf file only has the last page from the original pdf, not all the pages.
eg. If the original pdf has 33 pages, the new pdf only has the last page but with the correct numbering.
Maybe the code can do a better job at explainng:
def test(location, reference, destination):
file = open(location, "rb")
read_pdf = PyPDF2.PdfFileReader(file)
for i in range (0, read_pdf.getNumPages()):
page = read_pdf.getPage(i)
pageReference = "%s_%s"%(reference,format(i+1, '03d'))
width = getPageSizeW(page)
height = getPageSizeH(page)
pagesize = (width, height)
packet = io.BytesIO()
can = canvas.Canvas(packet, pagesize = pagesize)
can.setFillColorRGB(1,0,0)
can.drawString(height*3.5, height*2.75, pageReference)
can.save()
packet.seek(0)
new_pdf = PyPDF2.PdfFileReader(packet)
#add new pdf to old pdf
output = PyPDF2.PdfFileWriter()
page.mergePage(new_pdf.getPage(0))
output.addPage(page)
outputStream = open(destination, 'wb')
output.write(outputStream)
print(pageReference)
outputStream.close()
file.close()
def getPageSizeH(p):
h = float(p.mediaBox.getHeight()) * 0.352
return h
def getPageSizeW(p):
w = float(p.mediaBox.getWidth()) * 0.352
return w
Also if anyone has any ideas on how to insert the references on the top right in a better way, it would be appreciated.
I'm not an expert at PyPDF2 but it looks like the only area in your function where you have PyPDF2.PdfFileWriter() is in your for loop, so I suspect you are initiating a new file and adding to it each time in your for loop, which may cause the end result what you see.