Convert Microsoft Word document to PDF using Python

Convert Microsoft Word document to PDF using Python - python

I have tons of Word and Excel files. I want to convert many Word files in folders by sub folders to PDF, and I try following code.
This code is not active (I mean there aren't Word convert to PDF) although no error.
What could be the problem? Is there another solution?
This is my code:
import os
from win32com import client
path = 'D:\programing\test'
word_file_names = []
word = client.DispatchEx("Word.Application")
for dirpath, dirnames, filenames in os.walk(path):
print (dirpath)
for f in filenames:
if f.lower().endswith(".docx") and re.search('Addendum', f):
new_name = f.replace(".docx", r".pdf")
in_file = word_file_names.append(dirpath + "\\" + f)
new_file = word_file_names.append(dirpath + "\\" + new_name)
doc = word.Documents.Open(in_file)
doc.SaveAs(new_file, FileFormat = 17)
doc.Close()
if f.lower().endswith(".doc") and re.search('Addendum', f):
new_name = f.replace(".doc", r".pdf")
in_file = word_file_names.append(dirpath + "\\" + f)
new_file = word_file_names.append(dirpath + "\\" + new_name)
doc = word.Documents.Open(in_file)
doc.SaveAs(new_file, FileFormat = 17)
doc.Close()
word.Quit()

This is way easier:
from docx2pdf import convert
convert(word_path, pdf_path)

You can use comtypes,
from comtypes.client import CreateObject
import os
folder = "folder path"
wdToPDF = CreateObject("Word.Application")
wdFormatPDF = 17
files = os.listdir(folder)
word_files = [f for f in files if f.endswith((".doc", ".docx"))]
for word_file in word_files:
word_path = os.path.join(folder, word_file)
pdf_path = word_path
if pdf_path[-3:] != 'pdf':
pdf_path = pdf_path + ".pdf"
if os.path.exists(pdf_path):
os.remove(pdf_path)
pdfCreate = wdToPDF.Documents.Open(word_path)
pdfCreate.SaveAs(pdf_path, wdFormatPDF)

i solved this problem and fixed the code has following
import os
import win32com.client
import re
path = (r'D:\programing\test')
word_file_names = []
word = win32com.client.Dispatch('Word.Application')
for dirpath, dirnames, filenames in os.walk(path):
for f in filenames:
if f.lower().endswith(".docx") :
new_name = f.replace(".docx", ".pdf")
in_file =(dirpath + '/'+ f)
new_file =(dirpath + '/' + new_name)
doc = word.Documents.Open(in_file)
doc.SaveAs(new_file, FileFormat = 17)
doc.Close()
if f.lower().endswith(".doc"):
new_name = f.replace(".doc", ".pdf")
in_file =(dirpath +'/' + f)
new_file =(dirpath +'/' + new_name)
doc = word.Documents.Open(in_file)
doc.SaveAs(new_file, FileFormat = 17)
doc.Close()
word.Quit()

Related

How to detect folder path i ran script for?

the below script is to unzip zip file and rename extaracted subtitles names according to tv show episodes names. and then convert them to utf-8.
here is the problem:
I want to run this script in a linux os and inside any tv show folder i want.but I want the path in fixing function to be detected from the folder itselt i run the python script for because it is not a constant path, there are many tv show folders.
sorry for my english
import zipfile
import os
import re
from chardet import detect
import fnmatch
def find(pattern, path):
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result.append(os.path.join(root, name))
return result
def get_encoding_type(file):
with open(file, 'rb') as f:
rawdata = f.read()
return detect(rawdata)['encoding']
def correctSubtitleEncoding(filename, newFilename, encoding_from, encoding_to='UTF-8'):
with open(filename, 'r', encoding=encoding_from) as fr:
with open(newFilename, 'w', encoding=encoding_to) as fw:
for line in fr:
fw.write(line[:-1]+'\r\n')
def fixing(path):
oldsrtfiles = [f for f in os.listdir(path) if '.srt' in f or '.ass' in f ]
print(len(oldsrtfiles), ' old subtitles found')
if len(oldsrtfiles) != 0:
for oldsrt in oldsrtfiles:
os.remove(f'{path}{oldsrt}')
print(f'{oldsrt} removed')
filename = find('*.zip', path)[0]
with zipfile.ZipFile(f'{filename}',"r") as zip_ref:
zip_ref.extractall(path)
print('files extarcted')
os.remove(f'{filename}')
print("Zip File Removed!")
newsrtFiles = [f for f in os.listdir(path) if '.srt' in f or '.ass' in f ]
print(len(newsrtFiles), ' subtitles found')
showsTitles = [f for f in os.listdir(path) if '.mkv' in f or '.avi' in f or '.mp4' in f]
print(len(showsTitles), ' tv show found')
pattern = r'S(\d{1,2})E(\d{1,2})'
for show in showsTitles:
SEneeded = re.search(pattern, show).group(0)
for i, sub in enumerate(newsrtFiles):
if SEneeded in sub:
if sub[-3:] == 'srt':
newsrtFiles[i] = show.replace(show[-3:],'ar.srt')
os.rename(f'{path}{sub}',f'{path}{newsrtFiles[i]}')
elif sub[-3:] == 'ass':
subs[i] = show.replace(show[-3:],'ar.ass')
forencoding = [f for f in os.listdir(path) if '.srt' in f or '.ass' in f ]
for newsrtfile in forencoding:
from_codec = get_encoding_type(f'{path}{newsrtfile}')
print(from_codec)
correctSubtitleEncoding(f'{path}{newsrtfile}', f'{path}{newsrtfile}', from_codec, encoding_to='UTF-8')

function to get the current working directory
os.getcwd()
to call this function you need to import os module
import os

How to move and rename multiple files to a specific folder?

I have a small problem with a tool I built in Python.
This tool works classifying files by filenames and making folders by a word in every filename and then moving all the files to the corresponding folder.
Files:
09052019_6_filetype1_currenttime_randomnumber.xml
09052019_2_filetype2_currenttime_randomnumber.xml
09052019_9_filetype3_currenttime_randomnumber.xml
09052019_1_filetype3_currenttime_randomnumber.xml
09052019_1_filetype3_currenttime_randomnumber.xml
Actual results:
filetype1_Status_6(folder)
09052019_6_filetype1_currenttime_randomnumber.xml
filetype2_Status_2(folder)
09052019_2_filetype2_currenttime_randomnumber.xml
filetype3_Status_9(folder)
09052019_9_filetype3_currenttime_randomnumber.xml
filetype3_Status_1(folder)
09052019_1_filetype3_currenttime_randomnumber.xml
09052019_1_filetype3_currenttime_randomnumber.xml
Code Version 1.0
#!/usr/bin/python3
# v1.0
# Importing modules
import os
import shutil
import sys
# Path of input and output files
src = input('Input files: ')
dest = input('Destination files: ')
os.chdir(dest)
def classify():
for f in os.listdir(src):
splitname = f.split('_')
status = splitname[1]
topic = splitname[2]
foldername = topic + '_' + 'Status_' + status
if not os.path.exists(foldername):
os.mkdir(foldername)
shutil.move(os.path.join(src, f), foldername)
print('Sorting out files, please wait...')
classify()
print('¡DONE!')
Improvement
But in the v2.0 I would like to "improve" it a little more, just keeping the same usability but changing filenames from original name to "Message_*.xml" and it works but only moving one file, not all of them.
Current results:
filetype1_Status_6(folder)
Message_.xml
filetype2_Status_2(folder)
Message.xml
filetype3_Status_9(folder)
Message_.xml
filetype3_Status_1(folder)
Message_.xml
Expected results:
filetype1_Status_6(folder)
Message_.xml
filetype2_Status_2(folder)
Message.xml
filetype3_Status_9(folder)
Message_.xml
filetype3_Status_1(folder)
Message_.xml
Message_1.xml
Code Version 2.0
#!/usr/bin/python3
# v2.0
# Importing modules
import os
import shutil
import sys
# Path of input and output files
src = input('Input files: ')
dest = input('Destination files: ')
os.chdir(dest)
def classify():
for f in os.listdir(src):
splitname = f.split('_')
status = splitname[1]
topic = splitname[2]
foldername = topic + '_' + 'Status_' + status
newFileName = foldername + '\\' + 'Message_' + '.xml'
if not os.path.exists(foldername):
os.mkdir(foldername)
shutil.copy(os.path.join(src, f), newFileName)
print('Sorting out files, please wait...')
classify()
print('¡DONE!')

You are naming everything Message_ so you will never get multiple files. You need to parse the names in the folder and then increment the filenames accordingly.
msgName = 'Message_0'
newFileName = foldername + '\\' + msgName + '.xml'
if not os.path.exists(foldername):
os.mkdir(foldername)
else:
while os.path.isfile(newFileName) is True:
msgInt = int(msgName[8:])
msgInt += 1
msgName = msgName[:8] + str(msgInt)
newFileName = foldername + '\\' + msgName + '.xml'
shutil.copy(os.path.join(src, f), newFileName)
Now if you already have message_0.xml in your folder, you will get a message_1.xml instead, and so on.

How would I exclude directories from os.listdir results?

I'm making a script that will encode files within a directory using b64/b16 and I'm using os.listdir to do so, but it also lists directories which causes problems since now it's trying to encode directories as if it were a file.
How would I be able to exclude directories from os.listdir results?
import os
import sys
import base64
import codecs
import time
import string
import glob
#C:\\Users\\Fedora\\Desktop\\Win 10
path = "C:\\Users\\Fedora\\Desktop\\Win 10"
dirs = os.listdir(path)
files = []
filecount = 0
fileprogress = 0
for file in dirs:
files.append(file)
filecount = filecount + 1
for x in files:
os.system("cls")
fileprogress = fileprogress + 1
print("File " + str(fileprogress) + "/" + str(filecount))
print("Encrypting " + x + "...")
inputfile = open(path + "\\" + x, "rb")
data = inputfile.read()
inputfile.close()
data = base64.b16encode(data)
data = base64.b64encode(data)
data = base64.b16encode(data)
data = base64.b64encode(data)
data = base64.b16encode(data)
outputfile = open(path + "\\" + x + ".crypt", "wb")
outputfile.write(data)
outputfile.close()

use filter
filepath = "C:\\Users\\Fedora\\Desktop\\Win 10"
dirs = os.listdir(path)
files = filter(lambda x:os.path.isfile(os.path.join(filepath, x)), dirs)
or list comprehension with os.path.isfile()
filepath = "C:\\Users\\Fedora\\Desktop\\Win 10"
dirs = os.listdir(path)
files = [x for x in dirs if os.path.isfile(os.path.join(filepath, x))]

You can use os.path.isdir function to check if the current file is a directory.
Also, it is much better to use string formatting operations instead of string concatenation: not
print("File " + str(fileprogress) + "/" + str(filecount))
but
print("File {}/{}".format(fileprogress, filecount))
Such code is much easier to understand and modify.

Instead of using os.listdir() your can use os.walk which will return separate list for files and directories
python-oswalk-example
import os
path = "C:\\Users\\Fedora\\Desktop\\Win 10"
for (path, dirs, files) in os.walk(path):
print path
print dirs
print files
pythoncentral os-walk
#Import the os module, for the os.walk function
import os
#Set the directory you want to start from
path = "C:\\Users\\Fedora\\Desktop\\Win 10"
for dirName, subdirList, fileList in os.walk(path):
print('Found directory: %s' % dirName)
for fname in fileList:
print('\t%s' % fname)

Zipping file with shutil module

I use below code to moving files to their specific folders but at the end I don't know how i can zip those folders.
Note: i want use shutil module to zip the file.
import shutil
import os
source="/tmp/"
destination1="/tmp/music/"
destination2="/tmp/picture/"
destination3="/tmp/video/"
if not os.path.exists(destination1):
os.makedirs(destination1)
if not os.path.exists(destination2):
os.makedirs(destination2)
if not os.path.exists(destination3):
os.makedirs(destination3)
for f in os.listdir(source):
if f.endswith(".MP3") or f.endswith(".wma") or f.endswith(".WMA") or f.endswith(".mp3") :
shutil.move(source + f,destination1)
if f.endswith(".png") or f.endswith(".PNG") or f.endswith(".jpg") or f.endswith(".JPG") or f.endswith(".GIF") or f.endswith(".gif"):
shutil.move(source + f,destination2)
if f.endswith(".MP4") or f.endswith(".mp4") or f.endswith(".WMV") or f.endswith(".FLV") or f.endswith(".flv") or f.endswith(".wmv"):
shutil.move(source + f,destination3)
#now zipping:
shutil.make_archive("archive",'zip',"/tmp/","music"+"video"+"picture")

"music"+"video"+"picture"
gives you
'musicvideopicture'
the simplest way will be make dir /tmp/archive/ and there music, video, pictures,
and then
shutil.make_archive("archive",'zip',"/tmp/archive")
Edit:
consider using gztar :)
Edit2:
import shutil
import os
source = "/tmp/"
dest_base = "/tmp/archive/"
destination1 = dest_base + "music/"
destination2 = dest_base + "picture/"
destination3 = dest_base + "video/"
audio_ext = ('mp3', 'wma')
pictu_ext = ('png', 'jpg', 'gif')
video_ext = ('mp4', 'wmv', 'flv', 'avi')
if not os.path.exists(destination1):
os.makedirs(destination1)
if not os.path.exists(destination2):
os.makedirs(destination2)
if not os.path.exists(destination3):
os.makedirs(destination3)
for f in os.listdir(source):
ext = f.split('.')[-1].lower()
if ext in audio_ext:
shutil.move(source + f, destination1)
elif ext in pictu_ext:
shutil.move(source + f, destination2)
elif ext in video_ext:
shutil.move(source + f, destination3)
#now zipping:
shutil.make_archive("archive", 'gztar', "/tmp/archive")

get Path from xml-files (Python)

I have 300 XML files, in each file there is a path (see code) and i want to make a list (.CSV) of this Paths with Python.
<da:AdminData>
<da:Datax />
<da:DataID>223</da:DataID>
<da:Date>2013-08-19</da:Date>
<da:Time>13:27:25</da:Time>
<da:Modification>2013-08-19</da:Modification>
<da:ModificationTime>13:27:25</da:ModificationTime>
**<da:Path>D:\08\06\xxx-aaa_20130806_111339.dat</da:Path>**
<da:ID>xxx-5225-fff</da:ID>
I wrote the following code, but does not work for subdirectories
import os, glob, re, time, shutil
xmlpath = r'D:'
outfilename = "result.csv"
list = glob.glob(os.path.join(xmlpath,'*.xml'))
output = ""
for file in list :
fh = open(file)
text = fh.read()
pattern = "<da:Path>(.*)</da:Path>"
pattern = re.compile(pattern);
a = pattern.search(text)
if a:
output += '\n' + a.group(1)
logfile = open(outfile, "w")
logfile.write(output)
logfile.close()

To glob recursively, it is best to use a combination of os.walk and fnmatch.fnmatch. Example:
import os
import fnmatch
def recursive_glob(rootdir, pattern):
matching_files = []
for d, _, fnames in os.walk(rootdir):
matching_files.extend(
os.path.join(d, fname) for fname in fnames
if fnmatch.fnmatch(fname, pattern)
)
return matching_files
xmlfiles = recursive_glob(r"D:\", "*.xml")

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Convert Microsoft Word document to PDF using Python - python

This is way easier: from docx2pdf import convert convert(word_path, pdf_path)

Related

How to detect folder path i ran script for?

How to move and rename multiple files to a specific folder?

How would I exclude directories from os.listdir results?

Zipping file with shutil module

get Path from xml-files (Python)

Categories

Resources