I'm coding a script that either zips the hall folder or you can choose files with specific extension such as ".txt". I use sys.argv to pass the folder path and extension, if needed. However, when I pass the path the only thing I get is this error:
fileEx = sys.argv[2]
IndexError: list index out of range
How can I make the "file extension" command line optional?
My script:
import zipfile, os, sys
if len(sys.argv) < 2:
folder_path = sys.argv[1]
fileEx = None
else:
folder_path = sys.argv[1]
fileEx = sys.argv[2]
def zipf(folder_path, fileEx=None):
folder_path = os.path.abspath(folder_path)
number = 1
while True:
zipfilename= os.path.basename(folder_path) + "_" + str(number) +'.zip'
if not os.path.exists(zipfilename):
break
number = number + 1
# creat the zip file
print(f'creating {zipfilename}')
backupZip = zipfile.ZipFile(zipfilename, 'w')
# walking through folders
for foldername , subfolders , filenames in os.walk(folder_path):
print(f'Adding files in {foldername}. . . ')
#adding the folder
backupZip.write(foldername)
# adding all files in the folder to zipfile
for filename in filenames:
newBase = os.path.basename(folder_path) + '_'
if filename.startswith(newBase) and filename.endswith('.zip'):
continue
if fileEx:
if filename.endswith(fileEx):
os.chdir(foldername)
backupZip.write(os.path.join(foldername, filename))
else:
backupZip.write(os.path.join(foldername, filename))
backupZip.close()
zipf("folder_path, fileEx")
Related
I have this function here that renames (if necessary) all files in a given folder and returns all filenames as absolute paths:
I forgot to mention: filepath is always an absolute path.
import re
import os
# This function renames all files in a given
# folder and return them as absolute paths
def rename_get_Files(filepath):
files = os.listdir(filepath)
files_list = []
counter = 1
files_renamed = 0
for filename in files:
# If the file does not start with 'Offer_Pic_' > rename it and add to the files_list
if not re.search('Offer_Pic_.+', filename):
file_name, file_extension = os.path.splitext(filepath+filename)
print(file_extension)
new_filename = "Offer_Pic_" + str(counter) + file_extension
old_filename = filepath + filename
new_filename = filepath + new_filename
# rename() function will
# rename all the files
os.rename(old_filename, new_filename)
counter += 1
print(f'This is the new filename: ' + new_filename)
files_list.append(new_filename)
files_renamed += 1
else:
# Append the absolute paths of all already correctly named files to the files_list
files_list.append(os.path.abspath(filename))
print(files_list)
print(f'We have renamed ' + str(files_renamed) + ' files.')
return files_list
However, when I call the function from another one to use these absolute paths:
pictures = rename_get_Files(filepath)
print(pictures)
... it returns the paths being inside the script's working directory.
Because of that, the next function of my script crashes because it assumes that the files are in the working directory of the script - which they are not as they were not moved from their initial location (filepath).
Please help me to keep the absolute file paths.
I tried to regain the absolute path of the pictures, but the wrong one (inside script dir keeps being returned)
for pic in pictures:
abs_path_pic = os.path.abspath(pic)
print(pic)
print(abs_path_pic)
pictureBox.send_keys(abs_path_pic)
I found the error: initially no absolute path was returned by os.
I added to the end of the function:
path = os.path.abspath(filepath)
files_list = [entry.path for entry in os.scandir(path) if entry.is_file()]
The whole function now looks like this:
# This function renames all files in a given folder and return them as absolute paths
def rename_get_files(filepath):
print(f'this is the given filepath: {filepath}')
files = os.listdir(filepath)
files_list = []
print(f'these are our initial files: {files}')
counter = 1
files_renamed = 0
for filename in files:
# If the file does not start with 'Offer_Pic_' > rename it and add to the files_list
if not re.search('Offer_Pic_.+', filename):
file_name, file_extension = os.path.splitext(filepath+filename)
print(file_extension)
new_filename = "Offer_Pic_" + str(counter) + file_extension
old_filename = filepath + "/" + filename
print(old_filename)
new_filename = filepath + "/" + new_filename
print(new_filename)
# rename() function will
# rename all the files
os.rename(old_filename, new_filename)
counter += 1
print(f'This is the new filename: ' + new_filename)
files_list.append(os.path.abspath(new_filename))
files_renamed += 1
else:
# Append the absolute paths of all already correctly named files to the files_list
files_list.append(os.path.abspath(filename))
print(f'We have renamed ' + str(files_renamed) + ' files.')
path = os.path.abspath(filepath)
files_list = [entry.path for entry in os.scandir(path) if entry.is_file()]
print(f'this is our files_list, no files were renamed: {files_list}')
return files_list
Now it works perfectly! Thank you all for your help!
I wrote a loop which ignores all sub-directories which contain .txt files within them.
src = raw_input("Enter source disk location: ")
src = os.path.abspath(src)
dst = raw_input("Enter first destination to copy: ")
dst = os.path.abspath(dst)
dest = raw_input("Enter second destination to move : ")
dest = os.path.abspath(dest)
path_patter = '(\S+)_(\d+)_(\d+)_(\d+)__(\d+)_(\d+)_(\d+)'
for dir, dirs, files in os.walk(src):
if any(f.endswith('.txt') for f in files):
dirs[:] = [] # do not recurse into subdirectories
continue
files = [os.path.join(dir, f) for f in files ]
for f in files:
part1 = os.path.dirname(f)
part2 = os.path.dirname(os.path.dirname(part1))
part3 = os.path.split(part1)[1]
path_miss1 = os.path.join(dst, "missing_txt")
path_miss = os.path.join(path_miss1, part3)
path_missing = os.path.join(dest, "missing_txt")
searchFileName = re.search(path_patter, part3)#### update
if searchFileName:#####update
try:
if not os.path.exists(path_miss):
os.makedirs(path_miss)
else:
pass
if os.path.exists(path_miss):
distutils.dir_util.copy_tree(part1, path_miss)
else:
debug_status += "missing_file\n"
pass
if (get_size(path_miss)) == 0:
os.rmdir(path_miss)
else:
pass
if not os.path.exists(path_missing):
os.makedirs(path_missing)
else:
pass
if os.path.exists(path_missing):
shutil.move(part1, path_missing)
else:
pass
if (get_size(path_missing)) == 0:
os.rmdir(path_missing)
else:
pass
except Exception:
pass
else:
continue
How to modify this code to compare directory name with regular expression in this case. (it has to ignore directories with .txt files)
import os
import re
def createEscapedPattern(path,pattern):
newPath = os.path.normpath(path)
newPath = newPath.replace("\\","\\\\\\\\")
return newPath + "\\\\\\\\" + pattern
def createEscapedPath(path):
newPath = os.path.normpath(path)
return newPath.replace("\\","\\\\")
src = 'C:\\Home\\test'
path_patter = '(\S+)_(\d+)_(\d+)_(\d+)__(\d+)_(\d+)_(\d+)$'
p = re.compile(createEscapedPattern(src,path_patter))
for dir, dirs, files in os.walk(src):
if any(f.endswith('.txt') for f in files):
dirs[:] = []
continue
if any(p.match(createEscapedPath(dir)) for f in files):
for f in files:
print createEscapedPath(dir + "/" + f)
p = re.compile(createEscapedPattern(dir,path_patter))
There are a couple of things i did here and hope this example helps
I wrote this for windows fs so used two path convert functions.
This script ignores dirs with .txt files like you implemented it
This script will start at the directory you start the script and will only print file names if the pattern matches. This is done for all subdirectory's that are not ignored by the previous rule.
Used regex in python and made it compile again for each directory so you get: 'directory/(\S+)(\d+)(\d+)_(\d+)__(\d+)(\d+)(\d+)$'
Currently trying to develop a catch all script that when pointed to a folder it will look through all of the subfolders and anything that is a .gdb will be moved to a new folder. Some GDBS are zipped and have a folder which then contains the GDB inside. If the GDB files have the same name once extracted, they will be appended with the suffix _2, _3, _4 and then moved to the folder with the other GDBs and won't conflict with any filenames.
Everything works except the renaming. Whereby it will rename one ok and then start placing folders inside of another gdb, before eventually failing with. WindowsError: [Error 183] Cannot create a file when that file already exists.
import shutil, errno, re, os, zipfile, os.path, sys
def unzip(source_filename, dest_dir):
with zipfile.ZipFile(source_filename) as zf:
for member in zf.infolist():
words = member.filename.split('/')
path = dest_dir
for word in words[:-1]:
drive, word = os.path.splitdrive(word)
head, word = os.path.split(word)
if word in (os.curdir, os.pardir, ''): continue
path = os.path.join(path, word)
zf.extract(member, path)
results = []
input_path = sys.argv[1]
output_path = sys.argv[2]
if input_path not in output_path:
for path, subdirs, files in os.walk(input_path):
for f in files:
if f.endswith(".zip") or f.endswith(".ZIP"):
unzip(os.path.join(path,f), path)
if input_path not in output_path:
for path, subdirs, files in os.walk(input_path):
counter = 2
for dir_name in subdirs:
if os.path.join(path, dir_name).endswith(".gdb") or os.path.join(path, dir_name).endswith(".GDB"):
if os.path.exists(output_path + "\\" + dir_name):
print "Moving " + os.path.join(path, dir_name) + " To " + output_path + "\\" + str(counter) + dir_name
os.rename(os.path.join(path, dir_name), output_path + "\\" + dir_name[:-4] + "_" + str(counter) + ".gdb")
counter + 1
else:
print "Moving " + os.path.join(path, dir_name) + " To " + output_path
shutil.move(os.path.join(path, dir_name), output_path)
else:
print "########################################"
print "########################################"
print "Please select a different output path"
print "The output path should not be contained"
print " In the input path"
print "########################################"
print "########################################"
Any help anyone could give would be greatly appreciated. Cheers!
I am trying to sort a large number of files based off of their file extension. A lot of the files are .doc, .docx, .xls, etc.
This is what I was thinking in my head, but if there is a simpler way to do things, let me know! I do have multiple files with the same extension, so I don't want it to create a new folder for that extension every time and overwrite the previous file. I also have a much larger list, but for this example I don't believe all of them are needed. The OS is MacOS.
import os, shutil
extList = ['.doc', '.docx', '.xls']
for ext in extList:
os.mkdir(path + '/' + ext +'_folder')
for file in os.listdir(filepath):
if file.endswith(ext): #missing an indent
print(file)
shutil.copyfile(file + '/' + ext +'_folder' + file)
Also, if I run into a file that I do not have on my list, I would like it to go into a folder named 'noextlist'.
Here is what I was able to create quickly
import os, re, shutil
DocFolder = r'...'#Your doc folder path
DocxFolder = r'...'#Your docx folder path
XlsFolder = r'...'#Your xls folder path
MiscFolder = r'...'#Your misc folder path
for root, dirs, files in os.walk(r'...'): #Your folder path you want to sort
for file in files:
if file.endswith(".doc"):
sourceFolder = os.path.join(root,file)
print sourceFolder
shutil.copy2(sourceFolder,DocFolder)
elif file.endswith(".docx"):
sourceFolder = os.path.join(root,file)
print sourceFolder
shutil.copy2(sourceFolder,DocxFolder)
elif file.endswith(".xls"):
sourceFolder = os.path.join(root,file)
print sourceFolder
shutil.copy2(sourceFolder,XlsFolder)
else:
sourceFolder = os.path.join(root,file)
print sourceFolder
shutil.copy2(sourceFolder,MiscFolder)
Edit:The main function here is the for root,dirs,files in os.walk This allows the program to transverse through the provided path to search all files including the ones in the sub folder and sort it out accordingly.
import errno
import shutil
from os import listdir, mkdir
from os.path import splitext, join
# set for fast lookup
extList = set(['.doc', '.docx', '.xls'])
# source path
filepath = ...
# dest path
path = ...
for f in listdir(filepath):
# extract extension from file name
ext = splitext(f)[1]
if ext in extList:
dir_ = join(path, "{}_folder".format(ext))
try:
mkdir(dir_)
except OSError as e:
if ex.errno != errno.EEXIST:
raise # raise if any other error than "already exists"
dest = join(dir_, f)
else:
dest = join(path, "noextlist_folder", f)
shutil.copy2(join(filepath, f), dest)
If I understand correctly, you like your solution but you need a way to rename files with duplicate names so that the extras don't disappear. You can check if the destination file already exists and construct a variant name by adding _1, _2, etc. to the filename until you find something unused.
newpathname = path + '/' + ext +'_folder' + "/" + file
n = 0
while os.path.exists(newpathname):
n += 1
base, ext = os.path.splitext(newpathname)
newpathname = "%s_%d%s" % (base, n, ext)
shutil.copyfile(filepath+"/"+file, newpathname)
But your code has some other glitches, so here's a rewritten scanner. It uses os.walk() to descend into several levels of subdirectories (you don't say if that's needed or not), and it collects files of all extensions in one pass. And it constructs variant names as before.
import os, shutil
extList = ['.doc', '.docx', '.xls']
from os.path import join as joinpath
# Make sure the destination directories exist
for ext in extList:
extdir = joinpath(path, ext[1:]+"_folder")
if not os.path.exists(extdir):
os.mkdir(extdir)
for dirname, _dirs, files in os.walk(filepath):
for file in files:
base, ext = os.path.splitext(file)
if ext not in extList:
continue
destpath = joinpath(path, ext[1:]+"_folder")
n = 0
newpathname = joinpath(destpath, file)
# If the new name is in use, find an unused variant
while os.path.exists(newpathname):
n += 1
newfile = "%s_%d%s" % (base, n, ext)
newpathname = joinpath(path, newfile)
sh.copy(joinpath(dirname, file), newpathname) # or other copy method
I have a piece of code i wrote for school:
import os
source = "/home/pi/lab"
dest = os.environ["HOME"]
for file in os.listdir(source):
if file.endswith(".c")
shutil.move(file,dest+"/c")
elif file.endswith(".cpp")
shutil.move(file,dest+"/cpp")
elif file.endswith(".sh")
shutil.move(file,dest+"/sh")
what this code is doing is looking for files in a source directory and then if a certain extension is found the file is moved to that directory. This part works. If the file already exists in the destination folder of the same name add 1 at end of the file name, and before the extension and if they are multiples copies do "1++".
Like this: test1.c,test2.c, test3.c
I tried using os.isfile(filename) but this only looks at the source directory. and I get a true or false.
To test if the file exists in the destination folder you should os.path.join the dest folder with the file name
import os
import shutil
source = "/home/pi/lab"
dest = os.environ["HOME"]
# Avoid using the reserved word 'file' for a variable - renamed it to 'filename' instead
for filename in os.listdir(source):
# os.path.splitext does exactly what its name suggests - split the name and extension of the file including the '.'
name, extension = os.path.splitext(filename)
if extension == ".c":
dest_filename = os.path.join(dest, filename)
if not os.path.isfile(dest_filename):
# We copy the file as is
shutil.copy(os.path.join(source, filename) , dest)
else:
# We rename the file with a number in the name incrementing the number until we find one that is not used.
# This should be moved to a separate function to avoid code duplication when handling the different file extensions
i = 0
dest_filename = os.path.join(dest, "%s%d%s" % (name, i, extension))
while os.path.isfile(dest_filename):
i += 1
dest_filename = os.path.join(dest, "%s%d%s" % (name, i, extension))
shutil.copy(os.path.join(source, filename), dest_filename)
elif extension == ".cpp"
...
# Handle other extensions
If you want to have put the renaming logic in a separate function using glob and re this is one way:
import glob
import re
...
def rename_file(source_filename, source_ext):
filename_pattern = os.path.join(dest, "%s[0-9]*%s"
% (source_filename, source_ext))
# Contains file such as 'a1.c', 'a2.c', etc...
existing_files = glob.glob(filename_pattern)
regex = re.compile("%s([0-9]*)%s" % (source_filename, source_ext))
# Retrieve the max of the index used for this file using regex
max_index = max([int(match.group(1))
for match in map(regex.search, existing_files)
if match])
source_full_path = os.path.join(source, "%s%s"
% (source_filename, source_ext))
# Rebuild the destination filename with the max index + 1
dest_full_path = os.path.join(dest, "%s%d%s"
% (source_filename,
(max_index + 1),
source_ext))
shutil.copy(source_full_path, dest_full_path)
...
# If the file already exists i.e. replace the while loop in the else statement
rename_file(name, extension)
I din't test the code. But something like this should do the job:-
i = 0
filename = "a.txt"
while True:
if os.isfile(filename):
i+= 1
break
if i:
fname, ext = filename.split('.')
filename = fname + str(i) + '.' + ext