Python add watermark to pdf files - python

I wrote code to add watermark to pdf files however, the resulting files do not contain the watermark.
import os
import io
from PyPDF2 import PdfReader, PdfWriter
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
folder_path = 'D:/pdf/input/'
output_folder = 'D:/pdf/output/'
image_path = 'D:/pdf/watermark.jpg'
x, y = 50, 50
page_number = 1
if not os.path.exists(output_folder):
os.makedirs(output_folder)
for pdf_file in os.listdir(folder_path):
if pdf_file.endswith(".pdf"):
pdf_reader = PdfReader(open(os.path.join(folder_path, pdf_file), "rb"))
pdf_writer = PdfWriter()
for page_num in range(len(pdf_reader.pages)):
pdf_page = pdf_reader.pages[page_num]
pdf_writer.add_page(pdf_page)
if page_num == page_number - 1:
packet = io.BytesIO()
can = canvas.Canvas(packet, pagesize=letter)
can.drawImage(image_path, x, y)
can.save()
packet.seek(0)
new_pdf = PdfReader(packet)
pdf_page.merge_page(new_pdf.pages[0])
with open(os.path.join(output_folder, pdf_file), "wb") as output_file:
pdf_writer.write(output_file)
print("Image added to the specified page of all pdf files and saved in the output folder.")
I've tried changing the image position and page number, but still no effect. I expect watermark to be added at a specific place in the pdf file.

FIXED
import os
import io
from PyPDF2 import PdfReader, PdfWriter
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
folder_path = 'D:/pdf/pliki/'
output_folder = 'D:/pdf/output/'
sign_path = 'D:/pdf/obraz/podpis.png'
x_position = 300
y_position = 260
width = 150
height = 150
if not os.path.exists(output_folder):
os.makedirs(output_folder)
packet = io.BytesIO()
can = canvas.Canvas(packet, pagesize=letter)
can.drawImage(sign_path, x_position, y_position, width, height, [112,113,112,113,112,113])
can.save()
packet.seek(0)
new_pdf = PdfReader(packet)
for pdf_file in os.listdir(folder_path):
if pdf_file.endswith(".pdf"):
pdf_reader = PdfReader(open(os.path.join(folder_path, pdf_file), "rb"))
pdf_merged = pdf_reader.pages[0]
pdf_merged.merge_page(new_pdf.pages[0])
pdf_writer = PdfWriter()
for i in range(len(pdf_reader.pages)):
if i == 0:
pdf_writer.add_page(pdf_merged)
else:
pdf_writer.add_page(pdf_reader.pages[i])
with open(os.path.join(output_folder, pdf_file), "wb") as output_file:
pdf_writer.write(output_file)
print("Watermark added to the specified page of all pdf files and saved in the output folder.")

Related

Converting PDF page to JPG returns blank

I have a function that asks the user for a PDF file and receive the page number the user wish to convert into an image. The function usually works fine however with a few PDFs it does not work, the image that is returned is blank and it has 4 mega bytes. Apparently it has something to do with the size of the file. Is there a way to solve this problem?
from PyPDF2 import PdfFileReader, PdfFileWriter
from tkinter.filedialog import askopenfilename
from pdf2image import convert_from_path
import os
import PIL
PIL.Image.MAX_IMAGE_PIXELS = None
def convert_pdf(page_number):
filename = askopenfilename()
pdf_file_path = filename
file_base_name = pdf_file_path.replace('.pdf', '')
pdf = PdfFileReader(pdf_file_path)
pages = [page_number]
pdfWriter = PdfFileWriter()
for page_num in pages:
pdfWriter.addPage(pdf.getPage(page_num))
with open('{0}_subset.pdf'.format(file_base_name[:-5]), 'wb') as f:
pdfWriter.write(f)
f.close()
n = file_base_name[:-5]
nome = f'{n}_subset.pdf'
pages = convert_from_path(nome, poppler_path=r'C:\Program Files\poppler-0.68.0\bin')
i = 1
name = os.path.basename(nome).split('/')[-1][:-4]
for page in pages:
image_name = "Page_" + str(i) + f"{name}.jpg"
page.save(image_name, "JPEG")
i = i + 1
The solution to this problem was to change the DPI parameter of convert_from_path function. It is important to leave the DPI as it is, since I found that certain images become really small, and therefore unreadable.
try:
pages = convert_from_path(nome, poppler_path=r'C:\Program Files\poppler-0.68.0\bin')
i = 1
except:
PIL.Image.MAX_IMAGE_PIXELS = None
pages = convert_from_path(nome, 25,poppler_path=r'C:\Program Files\poppler-0.68.0\bin')
i = 1

PyPDF2 creates unreadable PDF

My code merges two PDFs but in trying to set the desktop of the user as the destination of the new PDF the result is an unreadable PDF.
from PyPDF2 import *
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import LETTER
from datetime import datetime
from PyPDF2 import PdfFileReader, PdfFileMerger
import io
from tkinter.filedialog import askopenfilename
import os
from tkinter import *
root=Tk()
root.withdraw()\
#CREATE THE CARB PDF PAGE##################################
currentDay = datetime.now().day
currentMonth = datetime.now().month
currentYear = datetime.now().year
c = canvas.Canvas("CARB.pdf", pagesize=LETTER)
c.drawString(10, 170, "TEST")
c.drawString(45, 160, "TEST")
c.drawString(25, 150, "TEST")
c.drawString(50, 140, "___________ MONTH __________ YEAR")
c.drawString(80, 140, str(currentMonth))
c.drawString(200, 140, str(currentYear))
c.save()
#########################################################
username = os.getenv('username')
packet = io.BytesIO()
filename = askopenfilename()
packet.seek(0)
new_pdf = PdfFileReader(filename)
name = (os.path.basename(filename))
name = name[:-4]
#MERGE THE CREATED PDF WITH THE EXESTING PDF##############
existing_pdf = "CARB.pdf"
f1 = new_pdf
f2 = PdfFileReader(open('carb.pdf', 'rb'))
merger = PdfFileMerger(strict=True)
merger.append(f1)
merger.append(f2)
#merger.write(name + "-CARB.pdf")
output = PdfFileWriter()
outputStream = open("C:\\Users\\" + username + "\\Desktop\\" + name + "-CARB.pdf", "wb")
output.write(outputStream)
outputStream.close()

How to downgrade a pdf version from 1.7 (Acrobat 8.x) to 1.4 (Acrobat 5.x) in Python

I have a script that modifies pdf files so that they comply with some specifications that are required to be uploaded to some other app (grayscale or black and white, 300 dpi, letter sized, etc.). I'm using pdf2image, img2pdf. PIL.Image and fitz.
The problem is that when I'm done modifying the files, the pdf version upgrades from 1.4 to 1.7 and I need it to be specifically 1.4. After reading online, I found out that PyPDF2 automatically converts pdf files to 1.3. I tried that thinking that 1.3 could work, but to my surprise it did not. It HAS to be 1.4. Here is my code if it helps:
import os
from os.path import join
from tempfile import TemporaryDirectory
from pdf2image import convert_from_path
from img2pdf import convert
import PIL.Image as Image
import fitz
from PyPDF2 import PdfFileWriter, PdfFileReader
#Here's where the source pdf is located.
pdf_input = os.path.join("PDF")
#Converting pdf to images
with TemporaryDirectory() as temp_dir:
for file in os.listdir(pdf_input):
pdfName = os.fsdecode(file)
pdf_to_open = os.path.join(pdf_input, pdfName)
images = convert_from_path(
pdf_to_open,
dpi=282, #For some reason, if I put 300dpi I end up with 325 dpi.
output_folder=temp_dir,
grayscale=True,
fmt="png",
thread_count=4
)
#Iterating through images
image_list = list()
for page_number in range(1, len(images) + 1):
path = join(temp_dir, "page_" + str(page_number) + ".png")
image_list.append(path)
images[page_number-1].save(path, "PNG")
#Converting to Black and WHite
image_file = Image.open(path)
image_file = image_file.convert('1')
image_file.save(path)
#Converting images to pdf
if not os.path.exists(pdf_input):
os.mkdir(pdf_input)
pdfPath = os.path.join(pdf_input, pdfName)
with open(pdfPath, "bw") as gray_pdf:
gray_pdf.write(convert(image_list))
#Changing pdf size
src = fitz.open(gray_pdf)
doc = fitz.open()
for ipage in src:
fmt = fitz.paper_rect("Letter")
page = doc.new_page(width=fmt.width, height=fmt.height)
page.show_pdf_page(page.rect, src, ipage.number)
src.close()
doc.save(gray_pdf)
#Downgrading with PyPDF2
infile = PdfFileReader(pdfPath, 'rb')
output = PdfFileWriter()
for i in range(infile.getNumPages()):
p = infile.getPage(i)
output.addPage(p)
with open(pdfPath, 'wb') as f:
output.write(f)
I managed to do it using ghostsrcipt. I have no idea how ghostscript works, but this code worked just fine:
import sys
import ghostscript
pdfPath = "path/pdfName.pdf"
newPdfPath = "path/NEW_pdfName.pdf"
args = [
"downgradePDF",
"-sDEVICE=pdfwrite", "-dCompatibilityLevel=1.4", "-dNOPAUSE", "-dQUIET", "-dBATCH",
"-sOutputFile=" + newPdfPath, pdfPath
]
ghostscript.Ghostscript(*args)

Text rotated when merging pdf pages using Pypdf2 and Reportlab

I'm trying to merge two pages
one from reportlab that has the text I wish and another one is my source pdf
But when I merge those two pages, my text is rotated 90 degree
Pdf created using Report lab -> Overlay Created using Reportlab
when Merged with Source pdf -> Source Pdf
Code that I have Used :
from PyPDF2 import PdfFileWriter, PdfFileReader
import io
from reportlab.pdfgen import canvas
from reportlab.lib.units import inch
packet = io.BytesIO()
c = canvas.Canvas(packet)
c.drawString(0,0,"Hello World")
c.save()
packet.seek(0)
packet_pdf = PdfFileReader(packet)
input_pdf = PdfFileReader(open("Source.pdf", "rb"))
output = PdfFileWriter()
page = input_pdf.getPage(0)
page.mergePage(packet_pdf.getPage(0))
output.addPage(page)
outputStream = open("destination.pdf", "wb")
output.write(outputStream)
outputStream.close()
reference : Add text to Existing PDF using Python
Refered to this and created an own solution -> Python PyPDF2 merge rotated pages
from PyPDF2 import PdfFileWriter, PdfFileReader
import io
from reportlab.pdfgen.canvas import Canvas
page_to_merge = 0 #Refers to the First page of PDF
xcoor = 250 #To be changed according to your pdf
ycoor = 650 #To be changed according to your pdf
input_pdf = PdfFileReader(open("Source.pdf", "rb"))
page_count = input_pdf.getNumPages()
inputpdf_page_to_be_merged = input_pdf.getPage(page_to_merge)
packet = io.BytesIO()
c = Canvas(packet,pagesize=(inputpdf_page_to_be_merged.mediaBox.getWidth(),inputpdf_page_to_be_merged.mediaBox.getHeight()))
c.drawString(xcoor,ycoor,"Hello World")
c.save()
packet.seek(0)
overlay_pdf = PdfFileReader(packet)
overlay = overlay_pdf.getPage(0)
output = PdfFileWriter()
for PAGE in range(page_count):
if PAGE == page_to_merge:
inputpdf_page_to_be_merged.mergeRotatedTranslatedPage(overlay,
inputpdf_page_to_be_merged.get('/Rotate') or 0,
overlay.mediaBox.getWidth()/2, overlay.mediaBox.getWidth()/2)
output.addPage(inputpdf_page_to_be_merged)
else:
Page_in_pdf = input_pdf.getPage(PAGE)
output.addPage(Page_in_pdf)
outputStream = open("destination.pdf", "wb")
output.write(outputStream)
outputStream.close()

How to save pdfs

How could I save dfa as a pdf?
So far I was able to have each of the elements saved separately in each file. How now can I have the whole file saved as a single pdf?
dfa = pd.DataFrame({'STREAM':['EAGLE','HAWK','HAWK','HAWK','EAGLE','HAWK','EAGLE'],'MAT':['A','D','F','D','C','C','E'],'KIS':['B','D','E','D','A','C','D'],'GEO':['B','C','E','E','F','A','B']})
dfa.to_csv('results.csv',index=False)
students_data = csv.reader(open("results.csv", 'r'))
for row in students_data:
STREAM = row[0]
MAT = row[1]
GEO = row[2]
KIS = row[3]
c = canvas.Canvas(MAT +".pdf")
c.drawString(60, 700, "STREAM: " + STREAM)
c.drawString(60, 600, "MAT: " + MAT)
c.drawString(60, 500, "KIS: " + KIS)
c.drawString(60, 400, "GEO: " + GEO)
c.save()
My suggestion would be to create a blank PDF as your template page and then
merge the new PDF to it
packet = io.BytesIO()
# Create the initial canvas.
c = canvas.Canvas(packet)
# your code for adding to the canvas
packet.seek(0)
new_pdf = PdfFileReader(packet)
# Import The Template
template = PdfFileReader(open('path_to_template'), "rb")
output = PdfFileWriter()
# add the created PDF as a watermark to the template
page = template.getPage(0)
page.mergePage(new_pdf.getPage(0))
output.addPage(page)
# finally, write "output" to a real file
outputStream = open(output_path, "wb")
output.write(outputStream)
outputStream.close()
you will need these imports
from PyPDF2 import PdfFileWriter, PdfFileReader
import io

Categories

Resources