I have a folder contains files of type PDF, PNG, and JPEG. I'm trying to convert PDF files to images and this is the code I've tried:
from pdf2image import convert_from_path, convert_from_bytes
from pdf2image.exceptions import (
PDFInfoNotInstalledError,
PDFPageCountError,
PDFSyntaxError
)
images = convert_from_path('41117 UIs in eng.pdf')
for i, image in enumerate(images):
fname = 'image'+str(i)+'.jpg'
image.save(fname, "JPEG")
Clearly this code is only for a single pdf file and I want to transfer the code to serve several pdf files that are mixed with other file types in the same folder.
please help.
You could try something like this (this script finds pdf files in the same directory with your python program):
import os
from pdf2image import convert_from_path, convert_from_bytes
from pdf2image.exceptions import (
PDFInfoNotInstalledError,
PDFPageCountError,
PDFSyntaxError
)
# get all pdf files from directory
pdf_files = [filename for filename in os.listdir(
'.') if filename.endswith('.pdf')]
for pdf_file in pdf_files:
images = convert_from_path(pdf_file)
print(pdf_file)
for i, image in enumerate(images):
fname = pdf_file+'_image'+str(i)+'.jpg'
image.save(fname, "JPEG")
Related
I'm an newbie and I'm trying to create a multipage pdf with img2pdf (recursive), but only the last picture is saved in a pdf file.
from pathlib import Path
import os
import img2pdf
main_dir = Path(Path.cwd(),'MAIN')
for subfolder in main_dir.iterdir():
if subfolder.is_file():
continue
for filename in subfolder.iterdir():
#pdf as foldername
pdf_name = os.path.basename(subfolder)
#write image-file to pdf file
with open(pdf_name+".pdf", "wb") as f:
f.write(img2pdf.convert(str(filename)))
When I test it with print(filename) all images are going through the loop.
Maybe someone can tell me where my misconception is.
img2pdf module can directly take a list of image file names as an argument and converts them into pdf. Take a look at the documentation here
from pathlib import Path
import os
import img2pdf
main_dir = Path(Path.cwd(),'your/path/to/images/folder')
for subfolder in main_dir.iterdir():
imgs = []
if subfolder.is_file():
continue
for filename in subfolder.iterdir():
imgs.append(os.path.join(subfolder, filename))
pdf_name = os.path.basename(subfolder)
with open(pdf_name+".pdf", "wb") as f:
f.write(img2pdf.convert(imgs))
This Python script is Creates PDF file from List of Images.
( It have a series of folders inside a parent one. Each folder has several images)
My current problem is,
all image files contained in each folder merged by folder and save(creates) as a PDf file.
the merged file save it as a single PDf
the name of pdf files is by each folder name.
Example))
enter image description here
Root(Current) Folder
Folder1 : FileA.tif , FileB.gif
Folder2 : FileC.tif
Folder2 : FileD.tif
Result Folder
Folder1.pdf (Contains FileA.tif and FileB.gif combined into one pdf)
Folder2.pdf (Contains FileC.tif as pdf)
Folder3.pdf (Contains Filed.tif as pdf)
((MY Python script))
import os
import re
from fpdf import FPDF
import img2pdf
pdf = FPDF()
imagelist = []
TDPath = "D:\\Data\\ToDo" #Root(Current) Folder
RPath = "D:\\Data\\Result" #Result Folder
# Image to PDF
for dirpath, dirnames, filenames in os.walk(TDPath):
for filename in [f for f in filenames if re.match('.*([.]jpg|[.]png|[.]tif|[.]gif|[.]jpeg|[.]bmp|[.]jpg)', f)]:
full_path = os.path.join(dirpath, filename)
imagelist.append(full_path)
imagelist.sort()
#Image to PDF
for image in imagelist:
pdf.add_page()
pdf.image(image, 0, 0, 200)
# Save the pdf file
pdf.output(TDPath+".pdf", "F")
print("\nFound " + str(len(imagelist)) + " image files. Converting to PDF....\n")
I try to convert all the pdf of a directory into images, but I have an issues because it only convert one pdf not all of them.
import matplotlib
import pytesseract
import os
import argparse
import cv2
from PIL import Image
import PyPDF2
from wand.image import Image as wi
for filename in os.listdir(src_path):
count = count + 1
# apply tesseract OCR and write to text file in specified target directory
target_path = args.trg_dir
# check if a file is a directory
if os.path.isdir(filename):
pass
else:
# check if a file is a pdf
try:
PyPDF2.PdfFileReader(open(os.path.join(src_path, filename), "rb"))
except PyPDF2.utils.PdfReadError:
else:
pdf = wi(filename=filename, resolution=300)
pdfimage = pdf.convert("jpeg")
i=1
for img in pdfimage.sequence:
page = wi(image=img)
page.save(filename=str(i)+".jpg")
i +=1
IIUC, try:
files = [file for file in os.listdir(src_path) if file.endswith(".pdf")]
for file in files:
with wi(file, resolution=300) as img_pdf:
for page, img in enumerate(img_pdf.sequence):
wi(img).save(f"{file}_{page}.jpg")
Does anybody know how to upload an image (using filedialog.askopenfile) and then storing the uploaded image to an existing folder on my computer?! All the examples available on the internet require image paths, and i get an error whenever i provide the filepath for the uploaded image, am i doing something wrong?
import cv2
import os
from tkinter.filedialog import askopenfile
filename = askopenfile(title ='open', filetypes=(("PNGs", "*.png"),("JPGs", "*.jpg"), ("GIFs", "*.gif")))
img = cv2.imread(filename)
path = "/Users/mac/desktop/test" # => Folder path
cv2.imwrite(os.path.join(path, img)
I am generating a gif with images2gif from some .png pictures. I am using the following script:
__author__ = 'Robert'
from images2gif import writeGif
from PIL import Image
import os
file_names = sorted((fn for fn in os.listdir('/home/manager/Desktop/sf_linux_shared/project/prueba') if fn.endswith('.png')))
#['animationframa.png', 'animationframb.png', ...] "
images = [Image.open(fn) for fn in file_names]
size = (150,150)
for im in images:
im.thumbnail(size, Image.ANTIALIAS)
print writeGif.__doc__
filename = "my_gif.GIF"
writeGif(filename, images, duration=0.2)
however I have the following error:
IOError: [Errno 2] No such file or directory: 'cosa.png'
cosa.png is one of the pictures I want to create the gif with. The problem seems to be in:
images = [Image.open(fn) for fn in file_names]
but I cannot detect it
open(filename) looks in the current working directory if filename is not an
absolute path. The current working directory is the directory from which the script is run.
Either use os.chdir to change the working directory, or make all your filenames absolute:
WORKDIR = '/home/manager/Desktop/sf_linux_shared/project/prueba'
file_names = sorted((os.path.join(WORDIR, fn)
for fn in os.listdir(WORKDIR) if fn.endswith('.png')))