I have the below code, the folder to where it contains the Excel file with two columns (Original Name of the PDF file, and New Name desired column called "Matched Results.xls"; as well as all of the original name PDF files that are contained in this folder. How do I run this code so that all of my PDFs will be renamed?
def rename_file(file_to_rename, source_file):
p = Path(file_to_rename)
filename = p.stem
wb = xlrd.open_workbook(r'C:\\Users\Chris Lee\\Desktop\\File_Rename_Python\\Matched_Results.xls')
# excel file to get new filename????
sheet = wb.sheet_by_index(0)
for row_num in range(sheet.nrows):
row_value = sheet.row_values(row_num)
col = 2 # 'john smith' col number
if row_value[col] == filename:
new_filename = f'{row_value[col+1]}' # format as you want
p.rename(Path(p.parent, new_filename + p.suffix)) # rename
break
def get_paths_in_directory(directory):
return Path(directory).glob('*.pdf')
if __name__ == "__main__":
source_file = r'C:\\Users\Chris Lee\\Desktop\\File_Rename_Python\\Matched_Results.xlsx' # excel file to get new filename
source_directory = r'C:\\Users\Chris Lee\\Desktop\\File_Rename_Python' # directory where your files to rename are.
# iterate all pdf files in the given directory
paths = get_paths_in_directory(source_directory)
for file_to_rename in paths:
rename_file(str(file_to_rename), source_file)
The point of if __name__ == "__main__": is to check if the code was run directly and not imported. Since that you are not importing:
def rename_file(file_to_rename, source_file):
p = Path(file_to_rename)
filename = p.stem
wb = xlrd.open_workbook(r'C:\\Users\Chris Lee\\Desktop\\File_Rename_Python\\Matched_Results.xls')
# wb should be the below variable instead
#wb = xlrd.open_workbook(source_file)
# excel file to get new filename????
sheet = wb.sheet_by_index(0)
for row_num in range(sheet.nrows):
row_value = sheet.row_values(row_num)
col = 2 # 'john smith' col number
if row_value[col] == filename:
new_filename = f'{row_value[col+1]}' # format as you want
p.rename(Path(p.parent, new_filename + p.suffix)) # rename
print("the if was matched!", new_filename, Path(p.parent, new_filename + p.suffix))
break
def get_paths_in_directory(directory):
return Path(directory).glob('*.pdf')
if __name__ == "__main__":
source_file = r'C:\\Users\Chris Lee\\Desktop\\File_Rename_Python\\Matched_Results.xlsx' # excel file to get new filename
source_directory = r'C:\\Users\Chris Lee\\Desktop\\File_Rename_Python' # directory where your files to rename are.
# iterate all pdf files in the given directory
paths = get_paths_in_directory(source_directory)
for file_to_rename in paths:
rename_file(str(file_to_rename), source_file)
You can replace everything after and including if __name__ == "__main__": with:
def my_func():
source_file = r'C:\\Users\Chris Lee\\Desktop\\File_Rename_Python\\Matched_Results.xlsx' # excel file to get new filename
source_directory = r'C:\\Users\Chris Lee\\Desktop\\File_Rename_Python' # directory where your files to rename are.
# iterate all pdf files in the given directory
paths = get_paths_in_directory(source_directory)
for file_to_rename in paths:
rename_file(str(file_to_rename), source_file)
and call the function with:
my_func()
so your whole jupyter field would look like:
def rename_file(file_to_rename, source_file):
p = Path(file_to_rename)
filename = p.stem
wb = xlrd.open_workbook(r'C:\\Users\Chris Lee\\Desktop\\File_Rename_Python\\Matched_Results.xls')
# excel file to get new filename????
sheet = wb.sheet_by_index(0)
for row_num in range(sheet.nrows):
row_value = sheet.row_values(row_num)
col = 3 # 'john smith' col number
if row_value[col] == filename:
new_filename = f'{row_value[col+1]}' # format as you want
p.rename(Path(p.parent, new_filename + p.suffix)) # rename
print("the if was matched!", new_filename, Path(p.parent, new_filename + p.suffix))
break
def get_paths_in_directory(directory):
return Path(directory).glob('*.pdf')
def my_func():
source_file = r'C:\\Users\Chris Lee\\Desktop\\File_Rename_Python\\Matched_Results.xlsx' # excel file to get new filename
source_directory = r'C:\\Users\Chris Lee\\Desktop\\File_Rename_Python' # directory where your files to rename are.
# iterate all pdf files in the given directory
paths = get_paths_in_directory(source_directory)
for file_to_rename in paths:
rename_file(str(file_to_rename), source_file)
my_func()
provided you have all the needed imports.
EDIT: A function I put together with some threads on SO - you will need to figure out how you want to rename your file, create the algorithm, then implement it in the if statement.
import os
def rename_func(directory):
d = os.fsencode(directory)
for file in os.listdir(d):
filename = os.fsdecode(file)
if filename.endswith(".pdf"):
# this is what your file will be renamed to
os.rename(os.path.join(d, filename), 'renamed_file.pdf')
rename_func(r"C:\my_dir")
Does Python have any built-in functionality to add a number to a filename if it already exists?
My idea is that it would work the way certain OS's work - if a file is output to a directory where a file of that name already exists, it would append a number or increment it.
I.e: if "file.pdf" exists it will create "file2.pdf", and next time "file3.pdf".
I ended up writing my own simple function for this. Primitive, but gets the job done:
def uniquify(path):
filename, extension = os.path.splitext(path)
counter = 1
while os.path.exists(path):
path = filename + " (" + str(counter) + ")" + extension
counter += 1
return path
In a way, Python has this functionality built into the tempfile module. Unfortunately, you have to tap into a private global variable, tempfile._name_sequence. This means that officially, tempfile makes no guarantee that in future versions _name_sequence even exists -- it is an implementation detail.
But if you are okay with using it anyway, this shows how you can create uniquely named files of the form file#.pdf in a specified directory such as /tmp:
import tempfile
import itertools as IT
import os
def uniquify(path, sep = ''):
def name_sequence():
count = IT.count()
yield ''
while True:
yield '{s}{n:d}'.format(s = sep, n = next(count))
orig = tempfile._name_sequence
with tempfile._once_lock:
tempfile._name_sequence = name_sequence()
path = os.path.normpath(path)
dirname, basename = os.path.split(path)
filename, ext = os.path.splitext(basename)
fd, filename = tempfile.mkstemp(dir = dirname, prefix = filename, suffix = ext)
tempfile._name_sequence = orig
return filename
print(uniquify('/tmp/file.pdf'))
I was trying to implement the same thing in my project but #unutbu's answer seemed too 'heavy' for my needs so I came up with following code finally:
import os
index = ''
while True:
try:
os.makedirs('../hi'+index)
break
except WindowsError:
if index:
index = '('+str(int(index[1:-1])+1)+')' # Append 1 to number in brackets
else:
index = '(1)'
pass # Go and try create file again
Just in case someone stumbled upon this and requires something simpler.
If all files being numbered isn't a problem, and you know beforehand the name of the file to be written, you could simply do:
import os
counter = 0
filename = "file{}.pdf"
while os.path.isfile(filename.format(counter)):
counter += 1
filename = filename.format(counter)
recently I encountered the same thing and here is my approach:
import os
file_name = "file_name.txt"
if os.path.isfile(file_name):
expand = 1
while True:
expand += 1
new_file_name = file_name.split(".txt")[0] + str(expand) + ".txt"
if os.path.isfile(new_file_name):
continue
else:
file_name = new_file_name
break
Let's say you already have those files:
This function generates the next available non-already-existing filename, by adding a _1, _2, _3, ... suffix before the extension if necessary:
import os
def nextnonexistent(f):
fnew = f
root, ext = os.path.splitext(f)
i = 0
while os.path.exists(fnew):
i += 1
fnew = '%s_%i%s' % (root, i, ext)
return fnew
print(nextnonexistent('foo.txt')) # foo_3.txt
print(nextnonexistent('bar.txt')) # bar_1.txt
print(nextnonexistent('baz.txt')) # baz.txt
Since the tempfile hack A) is a hack and B) still requires a decent amount of code anyway, I went with a manual implementation. You basically need:
A way to Safely create a file if and only if it does not exist (this is what the tempfile hack affords us).
A generator for filenames.
A wrapping function to hide the mess.
I defined a safe_open that can be used just like open:
def iter_incrementing_file_names(path):
"""
Iterate incrementing file names. Start with path and add " (n)" before the
extension, where n starts at 1 and increases.
:param path: Some path
:return: An iterator.
"""
yield path
prefix, ext = os.path.splitext(path)
for i in itertools.count(start=1, step=1):
yield prefix + ' ({0})'.format(i) + ext
def safe_open(path, mode):
"""
Open path, but if it already exists, add " (n)" before the extension,
where n is the first number found such that the file does not already
exist.
Returns an open file handle. Make sure to close!
:param path: Some file name.
:return: Open file handle... be sure to close!
"""
flags = os.O_CREAT | os.O_EXCL | os.O_WRONLY
if 'b' in mode and platform.system() == 'Windows':
flags |= os.O_BINARY
for filename in iter_incrementing_file_names(path):
try:
file_handle = os.open(filename, flags)
except OSError as e:
if e.errno == errno.EEXIST:
pass
else:
raise
else:
return os.fdopen(file_handle, mode)
# Example
with safe_open("some_file.txt", "w") as fh:
print("Hello", file=fh)
I haven't tested this yet but it should work, iterating over possible filenames until the file in question does not exist at which point it breaks.
def increment_filename(fn):
fn, extension = os.path.splitext(path)
n = 1
yield fn + extension
for n in itertools.count(start=1, step=1)
yield '%s%d.%s' % (fn, n, extension)
for filename in increment_filename(original_filename):
if not os.isfile(filename):
break
This works for me.
The initial file name is 0.yml, if it exists, it will add one until meet the requirement
import os
import itertools
def increment_filename(file_name):
fid, extension = os.path.splitext(file_name)
yield fid + extension
for n in itertools.count(start=1, step=1):
new_id = int(fid) + n
yield "%s%s" % (new_id, extension)
def get_file_path():
target_file_path = None
for file_name in increment_filename("0.yml"):
file_path = os.path.join('/tmp', file_name)
if not os.path.isfile(file_path):
target_file_path = file_path
break
return target_file_path
import os
class Renamer():
def __init__(self, name):
self.extension = name.split('.')[-1]
self.name = name[:-len(self.extension)-1]
self.filename = self.name
def rename(self):
i = 1
if os.path.exists(self.filename+'.'+self.extension):
while os.path.exists(self.filename+'.'+self.extension):
self.filename = '{} ({})'.format(self.name,i)
i += 1
return self.filename+'.'+self.extension
I found that the os.path.exists() conditional function did what I needed. I'm using a dictionary-to-csv saving as an example, but the same logic could work for any file type:
import os
def smart_save(filename, dict):
od = filename + '_' # added underscore before number for clarity
for i in np.arange(0,500,1): # I set an arbitrary upper limit of 500
d = od + str(i)
if os.path.exists(d + '.csv'):
pass
else:
with open(d + '.csv', 'w') as f: #or any saving operation you need
for key in dict.keys():
f.write("%s,%s\n"%(key, dictionary[key]))
break
Note: this appends a number (starting at 0) to the file name by default, but it's easy to shift that around.
This function validates if the file name exists using regex expresion and recursion
def validate_outfile_name(input_path):
filename, extension = os.path.splitext(input_path)
if os.path.exists(input_path):
output_path = ""
pattern = '\([0-9]\)'
match = re.search(pattern, filename)
if match:
version = filename[match.start() + 1]
try: new_version = int(version) + 1
except: new_version = 1
output_path = f"{filename[:match.start()]}({new_version}){extension}"
output_path = validate_outfile_name(output_path)
else:
version = 1
output_path = f"{filename}({version}){extension}"
return output_path
else:
return input_path
I've implemented a similar solution with pathlib:
Create file-names that match the pattern path/<file-name>-\d\d.ext. Perhaps this solution can help...
import pathlib
from toolz import itertoolz as itz
def file_exists_add_number(path_file_name, digits=2):
pfn = pathlib.Path(path_file_name)
parent = pfn.parent # parent-dir of file
stem = pfn.stem # file-name w/o extension
suffix = pfn.suffix # NOTE: extension starts with '.' (dot)!
try:
# search for files ending with '-\d\d.ext'
last_file = itz.last(parent.glob(f"{stem}-{digits * '?'}{suffix}"))
except:
curr_no = 1
else:
curr_no = int(last_file.stem[-digits:]) + 1
# int to string and add leading zeros
curr_no = str(last_no).zfill(digits)
path_file_name = parent / f"{stem}-{curr_no}{suffix}"
return str(path_file_name)
Pls note: That solution starts at 01 and will only find file-pattern containing -\d\d!
def create_file():
counter = 0
filename = "file"
while os.path.isfile(f"dir/{filename}{counter}.txt"):
counter += 1
print(f"{filename}{counter}.txt")
A little bit later but there is still something like this should work properly, mb it will be useful for someone.
You can use built-in iterator to do this ( image downloader as example for you ):
def image_downloader():
image_url = 'some_image_url'
for count in range(10):
image_data = requests.get(image_url).content
with open(f'image_{count}.jpg', 'wb') as handler:
handler.write(image_data)
Files will increment properly. Result is:
image.jpg
image_0.jpg
image_1.jpg
image_2.jpg
image_3.jpg
image_4.jpg
image_5.jpg
image_6.jpg
image_7.jpg
image_8.jpg
image_9.jpg
Easy way for create new file if this name in your folder
if 'sample.xlsx' in os.listdir('testdir/'):
i = 2
while os.path.exists(f'testdir/sample ({i}).xlsx'):
i += 1
wb.save(filename=f"testdir/sample ({i}).xlsx")
else:
wb.save(filename=f"testdir/sample.xlsx")
Below is the data in CFS_Config.txt. What this textfile does is to know where the documents have stored and to avoid hardcodes in the program. For instance, if the program is moved to other environment, we only need to change the directory paths in the CFS_Config.txt file.
Folder Path = ../dataprep/source_documents
ED Notes name = ED Notes
ED Notes output = ../dataprep/ED_Notes
This below codes in a python file what it actually does is to read configuration from the CFS_Config.txt mentioned earlier and also to do an auto generated textfile.
The problem encountered is that they tell me the ../dataprep/ED_Notes path was not found. Please do take a look at the codes if need more codes I will try my best to provide, thanks!!! :((
from preprocessing import ednotes_extractor
import os
def read_config():
# open existing file to read configuration
cfs_config_txt = open("..\CFS_Config.txt", "r")
file_list = []
root_dir = ""
ednotes_name = ""
ednotes_output = ""
for line in cfs_config_txt:
file_list.append(line)
if "Folder Path = " in file_list[0]:
root_dir = str(file_list[0])
root_dir = root_dir.replace("Folder Path = ", "")
root_dir = root_dir.replace("\n", "")
if "ED Notes name = " in file_list[1]:
ednotes_name = str(file_list[1])
ednotes_name = ednotes_name.replace("ED Notes name = ", "")
ednotes_name = ednotes_name.replace("\n", "")
if "ED Notes output = " in file_list[2]:
ednotes_output = str(file_list[2])
ednotes_output = ednotes_output.replace("ED Notes output = ", "")
ednotes_output = ednotes_output + ".txt"
ednotes_output = ednotes_output.replace("\n", "")
return root_dir, ednotes_name, ednotes_output
def convert_txt(choices):
root_dir, ednotes_name, ednotes_output = read_config()
if(choices == 1):
# open new file to write string data textfile
text_file = open(ednotes_output, 'w', encoding='utf-8')
text_file.write("cat_id|content\n")
for filename in os.listdir(root_dir):
source_directory = root_dir + '/' + filename
arr = ednotes_extractor.get_ednotes(source_directory)
# open existing file to append the items in the array to the previously written textfile
text_file = open(ednotes_output, 'a', encoding='utf-8')
for item in arr:
text_file.write("%s\n" % item)
elif(choices==2):
print("Hi")
I'm trying to replicate the exporting of a Code Module from an Excel sheet in Python.
The following works in VBA:
Public Sub ExportModules()
Dim wb As Workbook
Set wb = ThisWorkbook
Dim D As String
Dim N
D = ThisWorkbook.Path
For Each VBComp In wb.VBProject.VBComponents
If (VBComp.Type = 1) Then
N = D + "\" + VBComp.Name + ".txt"
VBComp.Export N
End If
Next
End Sub
And I have the following in Python:
import os
import sys
import glob
from win32com.client import Dispatch
scripts_dir = 'folder address'
com_instance = Dispatch("Excel.Application")
com_instance.Visible = False
com_instance.DisplayAlerts = False
for script_file in glob.glob(os.path.join(scripts_dir, "*.xlsm")):
print "Processing: %s" % script_file
(file_path, file_name) = os.path.split(script_file)
objworkbook = com_instance.Workbooks.Open(script_file)
for xlmodule in objworkbook.VBProject.VBComponents:
xlmodule.Export('export file name')
My question is, what do I have to do in Python to replicate the Export of the file as per the VBA code?
Use the default oletools xltrails provides a good way to extract .bas files from .xlsm or other excel files
import os
import shutil
from oletools.olevba3 import VBA_Parser
EXCEL_FILE_EXTENSIONS = ('xlsb', 'xls', 'xlsm', 'xla', 'xlt', 'xlam',)
def parse(workbook_path):
vba_path = workbook_path + '.vba'
vba_parser = VBA_Parser(workbook_path)
vba_modules = vba_parser.extract_all_macros() if vba_parser.detect_vba_macros() else []
for _, _, _, content in vba_modules:
decoded_content = content.decode('latin-1')
lines = []
if '\r\n' in decoded_content:
lines = decoded_content.split('\r\n')
else:
lines = decoded_content.split('\n')
if lines:
name = lines[0].replace('Attribute VB_Name = ', '').strip('"')
content = [line for line in lines[1:] if not (
line.startswith('Attribute') and 'VB_' in line)]
if content and content[-1] == '':
content.pop(len(content)-1)
lines_of_code = len(content)
non_empty_lines_of_code = len([c for c in content if c])
if non_empty_lines_of_code > 0:
if not os.path.exists(os.path.join(vba_path)):
os.makedirs(vba_path)
with open(os.path.join(vba_path, name + '.bas'), 'w') as f:
f.write('\n'.join(content))
if __name__ == '__main__':
for root, dirs, files in os.walk('.'):
for f in dirs:
if f.endswith('.vba'):
shutil.rmtree(os.path.join(root, f))
for f in files:
if f.endswith(EXCEL_FILE_EXTENSIONS):
parse(os.path.join(root, f))
I have tried it and it works great.
Ref: https://www.xltrail.com/blog/auto-export-vba-commit-hook
I'm working on a program to split excel files into sections of 1000. I can't seem to get it to create a second excel file, as xlsxwriter doesn't create the second file.
from os.path import join, dirname, abspath
from xlrd.sheet import ctype_text
import csv
import os
import sys
import xlrd
import xlsxwriter
import xlwt
file_paths = sys.argv[1:]
draganddrop = ''.join(file_paths)
beginGrab = 0
counting = 0
endGrab = 1000
thousands = 0
if draganddrop == "":
fileName = raw_input("\nInput the file with extension\n>")
else:
fileName = draganddrop
stopPoint = fileName.index('.')
prepRev = fileName[stopPoint:]
preName = fileName[:stopPoint]
if prepRev == ".csv":
excelFile = xlsxwriter.Workbook(preName + '.xlsx')
worksheet = excelFile.add_worksheet()
with open(fileName,'rb') as f:
content = csv.reader(f)
for index_col, data_in_col in enumerate(content):
for index_row, data_in_cell in enumerate(data_in_col):
worksheet.write(index_col,index_row,data_in_cell)
excelFile.close()
fileName = (preName + '.xlsx')
delMe = 1
print("Temporary Convert to xlsx done.\n")
stopPoint = fileName.index('.')
prepRev = fileName[0:stopPoint]
fname = join(dirname(abspath(__file__)), fileName)
xl_workbook = xlrd.open_workbook(fname)
sheet_names = xl_workbook.sheet_names()
xl_sheet = xl_workbook.sheet_by_name(sheet_names[0])
book = xlwt.Workbook(encoding="utf-8")
worksheet = book.add_sheet("Results", cell_overwrite_ok=True)
workbook = xlrd.open_workbook(fileName)
for sheet in workbook.sheets():
for row in range(sheet.nrows):
row = int(row)
if(int(row)>1000):
subDivide = int(row) / 1000
while(thousands != subDivide + 1):
thousands = thousands + 1
counting = 0
totalName = preName + "_" + str(thousands) + ".xlsx"
print(totalName)
excelFile = xlsxwriter.Workbook(str(totalName))
worksheet = excelFile.add_worksheet()
with open(totalName,'rb') as f:
col = xl_sheet.col_slice(0,1,10101010)
for idx, cell_obj in enumerate(col, start=beginGrab):
counting = counting + 1
if(counting == 1000):
break
cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
cell_obj_str = str(cell_obj)
telePhone = (cell_obj_str[7:19])
worksheet.write(idx+1, 0, "1" + telePhone)
worksheet.write(0,0, "Telephone Number")
beginGrab = thousands * 1000
endGrab = beginGrab + 1000
excelFile.close()
excelFile = None
else:
print("Mate, this is Tiny!")
print ("Ding! Job Done!")
I've been rubber ducking this and I can't find where I'm at fault.
EDIT:
SOLVED!!
By creating a sheet and then closing it, the program can then grasp it. I will probably make a git issue about this.
if prepRev == ".csv":
totalName = preName + '.xlsx'
excelFile = xlsxwriter.Workbook(totalName)
excelFile.close()
Closing it lets open see it while it still contains the same info.
excelFile = xlsxwriter.Workbook(totalName)
worksheet = excelFile.add_worksheet()
with open(fileName,'rb') as f:
Doesn't the save/close line need to be within the while loop? Otherwise it looks like it will only save either the first/last item:
while(thousands != subDivide + 1):
# write file
excelFile.close()
that line is probably the reason why you cannot read back your file and your script crashes:
fname = join(dirname(abspath('__file__')), '%s' % fileName)
'__file__' shouldn't have quotes. I'd do:
fname = join(dirname(abspath(__file__)), fileName)