Combine PDF using PyPDF2

Combine PDF using PyPDF2 - python

Anyone have ideas on how to combine two pdfs into single A4 pdf file. I can combine on landscape, but not on portrait. Below is my code using Python3 and PyPDF2.
(assuming there are 2 pdf files in "output" directory which named "left.pdf" and "right.pdf")
import sys import os import PyPDF2
from pdfrw import PdfReader, PdfWriter, PageMerge, IndirectPdfDict from PyPDF2 import PdfFileMerger, PageObject
inpfn = 'output.pdf'
outfn = 'output/right.pdf'
reader = PdfReader(inpfn)
writer = PdfWriter(outfn)
writer.addpage(adjust3(reader.pages[0]))
writer.trailer.Info = IndirectPdfDict(reader.Info or {})
writer.write()
merger = PdfFileMerger()
path_to_files = r'output/'
for root, dirs, file_names in os.walk(path_to_files):
for file_name in file_names:
merger.append(path_to_files + file_name)
merger.write("file_gabungan.pdf")
merger.close()

Related

Getting this error: RuntimeError: Proxy error(FileNotFoundException): Could not find file 'C:\Users\user\stuff\tests\pythonlearn.pdf'

import os
import aspose.words as aw
rootdir = 'C:/Users/user/stuff/tests'
for subdir, dirs, files in os.walk(rootdir):
for file in files:
a = os.path.join(subdir, file)
doc = aw.Document(a)
doc.save("utput.docx")
doc = aw.Document("Output.docx")
doc.save("output.pdf")
This is my program.
I am trying to run python through a folder containing pdf files, and decrypt them one by one by converting it to word, and then to pdf. What am I doing wrong?

Don't use os.walk. use os.listdir(rootdir) instead. Please note that the saved file and the used file have the same name.
Example:
import os
import aspose.words as aw
root = "C:/Users/user/stuff/tests"
for item in os.listdir(root):
if os.path.isfile(os.path.join(root, item)):
doc = aw.Document(item)
doc.save("Output.docx")
doc = aw.Document("Output.docx")
doc.save("output.pdf")
[EDIT]
above code cant find other folders so i decide to use glob to find all folders
Here:
import os
import aspose.words as aw
import glob
# Set base directory
os.chdir(os.path.join("C:/Users/user/stuff/tests"))
# Geting all pdf files in list
pdf_files = glob.glob("*.pdf")
for files in pdf_files:
doc = aw.Document(files)
doc.save("Output.docx")
doc = aw.Document("Output.docx")
doc.save("output.pdf")
[EDIT-2]
First take all .pdf files in one list :
pdf_files = glob.glob("*.pdf")
other_pdf_files = glob.glob('*/*.pdf')
all_pdf_files=(*pdf_files,*other_pdf_files)
Secondly, you need to use PyPDF2 to get rid of password.
Get unencrypted pdfs by sending all pdf files into decrypt_pdf (don't forget to specify the password). For example: (More detail here and here)
from PyPDF2 import PdfFileReader, PdfFileWriter
def decrypt_pdf(input_path, output_path, password):
with open(input_path, 'rb') as input_file, \
open(output_path, 'wb') as output_file:
reader = PdfFileReader(input_file)
reader.decrypt(password)
writer = PdfFileWriter()
for i in range(reader.getNumPages()):
writer.addPage(reader.getPage(i))
writer.write(output_file)
You can run other parts in the same way.
for files in all_pdf_files:
doc = aw.Document(files)
...

create bookmarks for separate pdf files using the filename

Hi I am trying to merge multiple pdfs with the filename of each pdf as the bookmark I intent to do this by merging the pdfs once the bookmark has been created on each pdf individually, I have managed to merge all pdfs and I have managed to make a bookmark for an individual pdf using the pdfs filename however I have not been able to add a bookmark to multiple pdfs at once. please could anyone advise on how this could be done? below is the code for making the bookmark the pdf name:
import os
from PyPDF2 import PdfFileMerger, PdfFileReader, PdfFileWriter
directory = r'C:filepath\\21-07398.pdf'
source_dir = os.getcwd()
pathname, extension = os.path.splitext(directory)
filename = pathname.split('\\')
filename1 = filename[-1]
print(filename1)
reader = PdfFileReader(directory)
writer = PdfFileWriter()
n = reader.getNumPages()
for i in range(n):
writer.addPage(reader.getPage(i))
writer.addBookmark(filename1, 0, parent=None)
with open("./result/result.pdf", "wb") as gg:
writer.write(gg)

https://pypdf2.readthedocs.io/en/latest/user/merging-pdfs.html
https://pypdf2.readthedocs.io/en/latest/modules/PdfMerger.html?highlight=bookmark#PyPDF2.PdfMerger.add_bookmark
You can merge the pdfs and create the bookmarks as you go.
from PyPDF2 import PdfMerger
merger = PdfMerger()
//files is supposed to be a list of your filenames.
for file in files:
merger.append(file)
for i in range(len(files)):
merger.add_outline_item(title=str(files[i],pagenum=i))
merger.write(<outputFileName>)
merger.close()

Python script to merge PDF files with a blank page

I have the following script compiled from other's suggestions, but I can't seem to get it to run properly. I need to merge several 3 page bill files into a single file for printing while adding a blank page in between each bill file so that each bill prints properly (we don't want the first page of one bill printed on the back of the previous bill).
# If the file errors with "no module PyPDF2" then from command line, run pip install PyPDF2
import os
from os import listdir, mkdir, startfile
from os.path import isfile, join, exists
from PyPDF2 import PdfFileMerger, PdfFileReader, PdfFileWriter
#Input file path and print the pdf files in that path
path = input("Enter the folder location: ")
pdffiles = [f for f in listdir(path) if isfile(join(path, f)) and '.pdf' in f]
print('\nList of PDF Files:\n')
for file in pdffiles:
print(file)
def add_blank_to_end(pdffiles: list) -> list:
names = []
for f in pdffiles:
pdf_in = open(f, 'rb')
pdf_file = PdfFileReader(pdf_in)
output = PdfFileWriter()
output.appendPagesFromReader(pdf_file)
output.addBlankPage()
names.append(f'b{f}')
outputStream = open(f'b{f}', 'wb')
output.write(outputStream)
return names
#Append the pdf files
def merge_pdfs(pdffiles: list):
merger = PdfFileMerger()
for f in pdffiles:
merger.append(f)
merger.write("document-output.pdf")
with_blank = add_blank_to_end(pdffiles)
merge_pdfs(with_blank)

# If the file errors with "no module PyPDF2" then from command line, run pip install PyPDF2
import os
from os import listdir, mkdir, startfile
from os.path import isfile, join, exists
from PyPDF2 import PdfFileMerger, PdfFileReader, PdfFileWriter
#Input file path and print the pdf files in that path
path = input("Enter the folder location")
pdffiles = [f for f in listdir(path) if isfile(join(path, f)) and '.pdf' in f]
print('\nList of PDF Files:\n')
for file in pdffiles:
print(file)
def add_blank_to_end(pdffiles: list) -> list:
names = []
for f in pdffiles:
pdf_in = open(path+'/'+f, 'rb')
pdf_file = PdfFileReader(pdf_in)
output = PdfFileWriter()
output.appendPagesFromReader(pdf_file)
output.addBlankPage()
names.append(f'b{f}')
outputStream = open(f'b{f}', 'wb')
output.write(outputStream)
return names
def merge_pdfs(pdffiles: list):
merger = PdfFileMerger()
for f in pdffiles:
merger.append(f)
merger.write("document-output.pdf")
with_blank = add_blank_to_end(pdffiles)
merge_pdfs(with_blank)

Python: JPEGs from folder to a Multipage-PDF with img2pdf

I'm an newbie and I'm trying to create a multipage pdf with img2pdf (recursive), but only the last picture is saved in a pdf file.
from pathlib import Path
import os
import img2pdf
main_dir = Path(Path.cwd(),'MAIN')
for subfolder in main_dir.iterdir():
if subfolder.is_file():
continue
for filename in subfolder.iterdir():
#pdf as foldername
pdf_name = os.path.basename(subfolder)
#write image-file to pdf file
with open(pdf_name+".pdf", "wb") as f:
f.write(img2pdf.convert(str(filename)))
When I test it with print(filename) all images are going through the loop.
Maybe someone can tell me where my misconception is.

img2pdf module can directly take a list of image file names as an argument and converts them into pdf. Take a look at the documentation here
from pathlib import Path
import os
import img2pdf
main_dir = Path(Path.cwd(),'your/path/to/images/folder')
for subfolder in main_dir.iterdir():
imgs = []
if subfolder.is_file():
continue
for filename in subfolder.iterdir():
imgs.append(os.path.join(subfolder, filename))
pdf_name = os.path.basename(subfolder)
with open(pdf_name+".pdf", "wb") as f:
f.write(img2pdf.convert(imgs))

Batch rotate PDF files with PyPDF2

I've been working on a code to batch rotate PDF files inside a folder, but I can't find a way to iterate and change the destination folder of the rotated file.
My intention is to save the new file with the same name in another folder.
from os import listdir
from PyPDF2 import PdfReader, PdfWriter
# Collect files
root = "C:\z_PruebPy\pdf"
archs = []
for x in listdir(root):
archs.append(root + x)
# Batch rotate
for arch in archs:
pdf_in = open(arch, "rb")
reader = PdfReader(pdf_in)
writer = PdfWriter()
for page in reader.pages:
page.rotate_clockwise(270)
writer.add_page(page)
with open(arch, "wb") as pdf_out: # ????????
writer.write(pdf_out)
pdf_in.close()

You have to give PdfFileWriter a file pointer to the new location.
Also you don't need to create a list and iterate on the list, just iterate on os.listdir results.
Finally you had unused variables, like loc.
I cleaned your code a bit.
So this should work, assuming you create the output folder :
from os import listdir
from PyPDF2 import PdfReader, PdfWriter
input_dir = "C:\\z_PruebPy\\pdf\\"
output_dir = "C:\\z_PruebPy\\output_pdf\\"
for fname in listdir(input_dir):
if not fname.endswith(".pdf"): # ignore non-pdf files
continue
reader = PdfReader(input_dir + fname)
writer = PdfWriter()
for page in reader.pages:
# page.rotate_clockwise(270) # (before pypdf3.0 - deprecated - thanks to Maciejg for the update)
page.rotate(270)
writer.add_page(page)
with open(output_dir + fname, "wb") as pdf_out:
writer.write(pdf_out)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Combine PDF using PyPDF2 - python

Related

Getting this error: RuntimeError: Proxy error(FileNotFoundException): Could not find file 'C:\Users\user\stuff\tests\pythonlearn.pdf'

create bookmarks for separate pdf files using the filename

Python script to merge PDF files with a blank page

Python: JPEGs from folder to a Multipage-PDF with img2pdf

Batch rotate PDF files with PyPDF2

Categories

Resources