Python: Compare file name after deleting some characters from the name - python

Python:
I'm trying to compare file names in a directory after stripping some n characters from the name. If the file name exists after the strip, then it will add a number to the end of the name.
I created a code that renames all the file names in the directory, but I'm having trouble trying to do the comparison AND THEN renaming due to the existing same file name after the strip.
import os
def main():
i = 0
for filename in os.listdir("C:\\Users\User\Desktop\Tests"):
try:
dirName != filename
print (filename)
except dirName == filename:
dst ="dup" + str(i) + ".txt"
src = dirName
dst ='Test'+ dst
# rename() function will
# rename all the files
os.rename(src, dst)
i += 1
# Driver Code
if __name__ == '__main__':
# Calling main() function
main()
I get it to rename the files directly but unable to do the comparison with the file names THEN renaming if it is the same name. New to python!

def main():
i = 0
for root, dirs, files in os.walk("C:\\Users\User\Desktop\Tests"):
for filename in files:
for filename2 in files :
if filename != filename2: # if your file name is not repetitious you will pass it and compare next one
#print(filename)
continue
# if a repetitious filename found your code for rename it will come up
dst ="dup" + str(i) + ".txt"
src = filename
dst ='Test'+ dst
os.rename(src, dst)

Related

Shutil - Move and Rename

In my python script I am moving a bunch of files from different sub directories to one location the trouble with this is that there are multiple files the same.
The code I am currently running is:
for root, dirs, files in os.walk(dir_path):
for file in files:
if file.endswith('.log'):
shutil.move(os.path.join(root, file), FILE_LOCATION_PATH)
The trouble with this is it will throw the error eventually of:
File "downloader.py", line 78, in <module>
shutil.move(os.path.join(root, file), FILE_LOCATION_PATH)
File "/usr/lib/python3.6/shutil.py", line 548, in move
raise Error("Destination path '%s' already exists" % real_dst)
shutil.Error: Destination path '/home/a.log' already exists
I can negate this by changing my move line to copy such as the below
shutil.move(os.path.join(root, file), FILE_LOCATION_PATH)
This will however replace any files with this name with the latest copy. I am trying to figure out a way I can rename any files with the same name to follow a naming convention like this
a.log
a_1.log
a_2.log
Any suggestions on how best to approach this or sample code. I am new to Python and trying to complete my first practical script.
If you are fine with filename like filename.log, filename.log_1 ... then code is just 4 lines more to your original code:
import shutil
import os
FILE_LOCATION_PATH='/destination/directory'
dir_path='/source/directory'
for root, dirs, files in os.walk(dir_path):
for file in files:
if file.endswith('.log'):
count = 1
destination_file = os.path.join(FILE_LOCATION_PATH, file)
while os.path.exists(destination_file):
destination_file = os.path.join(FILE_LOCATION_PATH, f"{file}_{count}")
count += 1
shutil.move(os.path.join(root, file), destination_file)
Probably this is what you need
import re
import os
import shutil
FILE_LOCATION_PATH = 'dst'
def increment_file_name(f_name):
"""
a.txt -> a_1.txt
a_1.txt -> a_2.txt
"""
def split_f_name(f_name):
m = re.match(r"^(.+?)(_(\d+))?(\.(.+))??$", f_name)
return m.group(1), int(m.group(3) or 0), m.group(5)
def join_f_name(name, suffix, ext):
if not ext:
return "{}_{}".format(name, suffix)
return "{}_{}.{}".format(name, suffix, ext)
dirname = os.path.dirname(f_name)
name, suffix, ext = split_f_name(os.path.basename(f_name))
if dirname:
return os.path.join(dirname, join_f_name(name, suffix + 1, ext))
else:
return join_f_name(name, suffix + 1, ext)
def safe_move(path):
dir, f_name = os.path.split(path)
while os.path.exists(os.path.join(FILE_LOCATION_PATH, f_name)):
f_name = increment_file_name(f_name)
shutil.move(path, os.path.join(FILE_LOCATION_PATH, f_name))
for root, dirs, files in os.walk('src'):
for file in files:
if file.endswith('.log'):
safe_move(os.path.join(root, file))

Python rename file based on file in same directory

I have a file directory structure like this:
/folder
aaa.pdf
bbb.xml
stamped.pdf
Where PDF and XML file names have no patterns to them, except that every folder has stamped.pdf in it (stamped.pdf needs to be ignored).
I want to rename the .xml file in the directory to match the .pdf file name, so I end up with:
/folder
aaa.pdf
aaa.xml
stamped.pdf
Python so far (not renaming anything yet, just trying to get the filenames at this point)
import os
pdf = ('.pdf')
xml = ('.xml')
stamped = ('stamped.pdf')
for folderName, subfolders, filenames in os.walk('folder'):
print('The current folder is ' + folderName)
for filename in filenames:
namefile = os.path.splitext(filename)[0]
if (filename.endswith(pdf) and filename != stamped):
pdfname = namefile
print('PDF File Name: ' + pdfname)
if filename.endswith(xml):
print('RENAME XML FILE NAME: ' + namefile + 'TO: ' pdfname)
else:
print('')
print('')
Right now I'm just printing values before I get into the renaming.
In the script above, pdfname is undefined in the XML conditional, because the pdfname variable isn't set/available in the XML conditional.
How can I pass the pdfname variable so that it can be used to rename the XML file in the same directory?
Thanks!
import os
for parent, _, files in os.walk('.'):
if not files:
continue
pdf_file = None
xml_file = None
for filename in files:
if filename.lower().endswith('.pdf') and filename.lower() != 'stamped.pdf':
pdf_file = filename
elif filename.lower().endswith('.xml'):
xml_file = filename
new_xml_filename = '{}/{}.xml'.format(parent, os.path.splitext(pdf_file)[0])
xml_file = '{}/{}'.format(parent, xml_file)
if os.path.exists(new_xml_filename):
print('cannot rename %s without overwriting an existing file. skipping' % xml_file)
continue
else:
os.rename(xml_file, new_xml_filename)
print('renamed {} -> {}'.format(xml_file, new_xml_filename))

Change a filename?

I have some files in a folder named like this test_1999.0000_seconds.vtk. What I would like to do is to is to change the name of the file to test_1999.0000.vtk.
You can use os.rename
os.rename("test_1999.0000_seconds.vtk", "test_1999.0000.vtk")
import os
currentPath = os.getcwd() # get the current working directory
unWantedString = "_seconds"
matchingFiles =[]
for path, subdirs, files in os.walk(currentPath):
for f in files:
if f.endswith(".vtk"): # To group the vtk files
matchingFiles.append(path+"\\"+ f) #
print matchingFiles
for mf in matchingFiles:
if unWantedString in mf:
oldName = mf
newName = mf.replace(unWantedString, '') # remove the substring from the string
os.rename(oldName, newName) # rename the old files with new name without the string

Organizing data by filetype

I am trying to sort a large number of files based off of their file extension. A lot of the files are .doc, .docx, .xls, etc.
This is what I was thinking in my head, but if there is a simpler way to do things, let me know! I do have multiple files with the same extension, so I don't want it to create a new folder for that extension every time and overwrite the previous file. I also have a much larger list, but for this example I don't believe all of them are needed. The OS is MacOS.
import os, shutil
extList = ['.doc', '.docx', '.xls']
for ext in extList:
os.mkdir(path + '/' + ext +'_folder')
for file in os.listdir(filepath):
if file.endswith(ext): #missing an indent
print(file)
shutil.copyfile(file + '/' + ext +'_folder' + file)
Also, if I run into a file that I do not have on my list, I would like it to go into a folder named 'noextlist'.
Here is what I was able to create quickly
import os, re, shutil
DocFolder = r'...'#Your doc folder path
DocxFolder = r'...'#Your docx folder path
XlsFolder = r'...'#Your xls folder path
MiscFolder = r'...'#Your misc folder path
for root, dirs, files in os.walk(r'...'): #Your folder path you want to sort
for file in files:
if file.endswith(".doc"):
sourceFolder = os.path.join(root,file)
print sourceFolder
shutil.copy2(sourceFolder,DocFolder)
elif file.endswith(".docx"):
sourceFolder = os.path.join(root,file)
print sourceFolder
shutil.copy2(sourceFolder,DocxFolder)
elif file.endswith(".xls"):
sourceFolder = os.path.join(root,file)
print sourceFolder
shutil.copy2(sourceFolder,XlsFolder)
else:
sourceFolder = os.path.join(root,file)
print sourceFolder
shutil.copy2(sourceFolder,MiscFolder)
Edit:The main function here is the for root,dirs,files in os.walk This allows the program to transverse through the provided path to search all files including the ones in the sub folder and sort it out accordingly.
import errno
import shutil
from os import listdir, mkdir
from os.path import splitext, join
# set for fast lookup
extList = set(['.doc', '.docx', '.xls'])
# source path
filepath = ...
# dest path
path = ...
for f in listdir(filepath):
# extract extension from file name
ext = splitext(f)[1]
if ext in extList:
dir_ = join(path, "{}_folder".format(ext))
try:
mkdir(dir_)
except OSError as e:
if ex.errno != errno.EEXIST:
raise # raise if any other error than "already exists"
dest = join(dir_, f)
else:
dest = join(path, "noextlist_folder", f)
shutil.copy2(join(filepath, f), dest)
If I understand correctly, you like your solution but you need a way to rename files with duplicate names so that the extras don't disappear. You can check if the destination file already exists and construct a variant name by adding _1, _2, etc. to the filename until you find something unused.
newpathname = path + '/' + ext +'_folder' + "/" + file
n = 0
while os.path.exists(newpathname):
n += 1
base, ext = os.path.splitext(newpathname)
newpathname = "%s_%d%s" % (base, n, ext)
shutil.copyfile(filepath+"/"+file, newpathname)
But your code has some other glitches, so here's a rewritten scanner. It uses os.walk() to descend into several levels of subdirectories (you don't say if that's needed or not), and it collects files of all extensions in one pass. And it constructs variant names as before.
import os, shutil
extList = ['.doc', '.docx', '.xls']
from os.path import join as joinpath
# Make sure the destination directories exist
for ext in extList:
extdir = joinpath(path, ext[1:]+"_folder")
if not os.path.exists(extdir):
os.mkdir(extdir)
for dirname, _dirs, files in os.walk(filepath):
for file in files:
base, ext = os.path.splitext(file)
if ext not in extList:
continue
destpath = joinpath(path, ext[1:]+"_folder")
n = 0
newpathname = joinpath(destpath, file)
# If the new name is in use, find an unused variant
while os.path.exists(newpathname):
n += 1
newfile = "%s_%d%s" % (base, n, ext)
newpathname = joinpath(path, newfile)
sh.copy(joinpath(dirname, file), newpathname) # or other copy method

Move file to a folder or make a renamed copy if it exists in the destination folder

I have a piece of code i wrote for school:
import os
source = "/home/pi/lab"
dest = os.environ["HOME"]
for file in os.listdir(source):
if file.endswith(".c")
shutil.move(file,dest+"/c")
elif file.endswith(".cpp")
shutil.move(file,dest+"/cpp")
elif file.endswith(".sh")
shutil.move(file,dest+"/sh")
what this code is doing is looking for files in a source directory and then if a certain extension is found the file is moved to that directory. This part works. If the file already exists in the destination folder of the same name add 1 at end of the file name, and before the extension and if they are multiples copies do "1++".
Like this: test1.c,test2.c, test3.c
I tried using os.isfile(filename) but this only looks at the source directory. and I get a true or false.
To test if the file exists in the destination folder you should os.path.join the dest folder with the file name
import os
import shutil
source = "/home/pi/lab"
dest = os.environ["HOME"]
# Avoid using the reserved word 'file' for a variable - renamed it to 'filename' instead
for filename in os.listdir(source):
# os.path.splitext does exactly what its name suggests - split the name and extension of the file including the '.'
name, extension = os.path.splitext(filename)
if extension == ".c":
dest_filename = os.path.join(dest, filename)
if not os.path.isfile(dest_filename):
# We copy the file as is
shutil.copy(os.path.join(source, filename) , dest)
else:
# We rename the file with a number in the name incrementing the number until we find one that is not used.
# This should be moved to a separate function to avoid code duplication when handling the different file extensions
i = 0
dest_filename = os.path.join(dest, "%s%d%s" % (name, i, extension))
while os.path.isfile(dest_filename):
i += 1
dest_filename = os.path.join(dest, "%s%d%s" % (name, i, extension))
shutil.copy(os.path.join(source, filename), dest_filename)
elif extension == ".cpp"
...
# Handle other extensions
If you want to have put the renaming logic in a separate function using glob and re this is one way:
import glob
import re
...
def rename_file(source_filename, source_ext):
filename_pattern = os.path.join(dest, "%s[0-9]*%s"
% (source_filename, source_ext))
# Contains file such as 'a1.c', 'a2.c', etc...
existing_files = glob.glob(filename_pattern)
regex = re.compile("%s([0-9]*)%s" % (source_filename, source_ext))
# Retrieve the max of the index used for this file using regex
max_index = max([int(match.group(1))
for match in map(regex.search, existing_files)
if match])
source_full_path = os.path.join(source, "%s%s"
% (source_filename, source_ext))
# Rebuild the destination filename with the max index + 1
dest_full_path = os.path.join(dest, "%s%d%s"
% (source_filename,
(max_index + 1),
source_ext))
shutil.copy(source_full_path, dest_full_path)
...
# If the file already exists i.e. replace the while loop in the else statement
rename_file(name, extension)
I din't test the code. But something like this should do the job:-
i = 0
filename = "a.txt"
while True:
if os.isfile(filename):
i+= 1
break
if i:
fname, ext = filename.split('.')
filename = fname + str(i) + '.' + ext

Categories

Resources