can some one please provide me with an explanation of the code especially the use of maxversions and statements following the line "for f in files:".
I want to understand what xrange(MAXVERSION) means? What is the use of indexing i.e
for index in xrange(MAXVERSIONS): backup = '%s.%2.2d' % (destpath, index)
The code:
!/usr/bin/env python
import sys,os, shutil, filecmp
MAXVERSIONS=100
BAKFOLDER = '.bak'
def backup_files(tree_top, bakdir_name=BAKFOLDER):
top_dir = os.path.basename(tree_top)
tree_top += os.sep
for dir, subdirs, files in os.walk(tree_top):
if os.path.isabs(bakdir_name):
relpath = dir.replace(tree_top,'')
backup_dir = os.path.join(bakdir_name, top_dir, relpath)
else:
backup_dir = os.path.join(dir, bakdir_name)
if not os.path.exists(backup_dir):
os.makedirs(backup_dir)
subdirs[:] = [d for d in subdirs if d != bakdir_name]
for f in files:
filepath = os.path.join(dir, f)
destpath = os.path.join(backup_dir, f)
for index in xrange(MAXVERSIONS):
backup = '%s.%2.2d' % (destpath, index)
abspath = os.path.abspath(filepath)
if index > 0:
old_backup = '%s.%2.2d' % (destpath, index-1)
if not os.path.exists(old_backup): break
abspath = os.path.abspath(old_backup)
try:
if os.path.isfile(abspath) and filecmp.cmp(abspath, filepath, shallow=False):
continue
except OSError:
pass
try:
if not os.path.exists(backup):
print 'Copying %s to %s...' % (filepath, backup)
shutil.copy(filepath, backup)
except (OSError, IOError), e:
pass
if __name__=="__main__":
if len(sys.argv)<2:
sys.exit("Usage: %s [directory] [backup directory]" % sys.argv[0])
tree_top = os.path.abspath(os.path.expanduser(os.path.expandvars(sys.argv[1])))
if len(sys.argv)>=3:
bakfolder = os.path.abspath(os.path.expanduser(os.path.expandvars(sys.argv[2])))
else:
bakfolder = BAKFOLDER
if os.path.isdir(tree_top):
backup_files(tree_top, bakfolder)
The script tries to recursively copy the contents of a directory (defaults to current directory) to a backup directory (defaults to .bak in the current directory);
for each filename.ext, it creates a duplicate named filename.ext.00; if filename.ext.00 already exists, it creates filename.ext.01 instead, and so on.
xrange() is a generator which returns all numbers in 0..(MAXVERSION-1), so MAXVERSION controls how many version-suffixes to try, ie how many old versions of the file to keep.
Related
I am trying to write a python script that removes txt files that have less than "wordLimit" words, where "wordLimit" is a given number. The files should be removed from all the directories and subdirectories at a certain path. The directory structure should stay the same.
My version always prints "Error while deleting file"
import os, glob
wordLimit = 1000
directory = os.getcwd()
def shouldArticleBeRemoved(filepath, minWords):
number = 0
filename = os.path.basename(filepath)
with open(filename, 'r+') as f:
for line in f:
words = line.split()
number += len(words)
if number < minWords:
return True
else: return False
def iterateCheckAndRemove():
notRemoved = 0
removed = 0
fileList = glob.glob('C:/Users/HP/Desktop/Articles/*/*/*.txt', recursive=True)
for filePath in fileList:
try:
if shouldArticleBeRemoved(filePath, wordLimit):
os.remove(filePath)
removed += 1
else: notRemoved +=1
except OSError:
notRemoved +=1
print("Error while deleting file")
print(removed)
iterateCheckAndRemove()
Try this. It's much simpler:-
import os
def main():
wordLimit = 1000
for r, _, f in os.walk(r'C:\Users\HP\Desktop\Articles'):
for _f in f:
apath = os.path.join(r, _f)
_, ext = os.path.splitext(apath)
if ext == '.txt':
try:
rflag = False
with open(apath) as text:
if len(text.read().split()) < wordLimit:
rflag = True
if rflag:
os.remove(apath)
print(f'{apath} was deleted')
except Exception as e:
print(f'Error while processing {apath} -> {e}')
if __name__ == '__main__':
main()
I am trying to write a python2 function that will recursively traverse through the whole directory structure of a given directory, and print out the results.
All without using os.walk
This is what I have got so far:
test_path = "/home/user/Developer/test"
def scanning(sPath):
output = os.path.join(sPath, 'output')
if os.path.exists(output):
with open(output) as file1:
for line in file1:
if line.startswith('Final value:'):
print line
else:
for name in os.listdir(sPath):
path = os.path.join(sPath, name)
if os.path.isdir(path):
print "'", name, "'"
print_directory_contents(path)
scanning(test_path)
This is what I currently get, the script doesn't enter the new folder:
' test2'
'new_folder'
The issue is that it does not go further down than one directory. I would also like to able to indicate visually what is a directory, and what is a file
Try this:
import os
test_path = "YOUR_DIRECTORY"
def print_directory_contents(dir_path):
for child in os.listdir(dir_path):
path = os.path.join(dir_path, child)
if os.path.isdir(path):
print("FOLDER: " + "\t" + path)
print_directory_contents(path)
else:
print("FILE: " + "\t" + path)
print_directory_contents(test_path)
I worked on windows, verify if still working on unix.
Adapted from:
http://codegists.com/snippet/python/print_directory_contentspy_skobnikoff_python
Try this out with recursion
it is much simple and less code
import os
def getFiles(path="/var/log", files=[]):
if os.path.isfile(path):
return files.append(path)
for item in os.listdir(path):
item = os.path.join(path, item)
if os.path.isfile(item):
files.append(item)
else:
files = getFiles(item, files)
return files
for f in getFiles("/home/afouda/test", []):
print(f)
Try using a recursive function,
def lastline(fil):
with open(fil) as f:
for li in f.readlines():
if li.startswith("Final Value:"):
print(li)
## If it still doesnt work try putting 'dirs=[]' here
def lookforfiles(basepath):
contents = os.listdir(basepath)
dirs = []
i = 0
while i <= len(contents):
i += 1
for n in contents:
f = os.path.join(basepath, n)
if os.path.isfile(f):
lastline(f)
print("\n\nfile %s" % n)
elif os.path.isdir(f):
print("Adding dir")
if f in dirs:
pass
else:
dirs.append(f)
else:
for x in dirs:
print("dir %s" % x)
lookforfiles(x)
sorry if this doesn't fit your example precisely but I had a hard time understanding what you were trying to do.
This question is a duplicate of Print out the whole directory tree.
TL;TR: Use os.listdir.
I wrote a loop which ignores all sub-directories which contain .txt files within them.
src = raw_input("Enter source disk location: ")
src = os.path.abspath(src)
dst = raw_input("Enter first destination to copy: ")
dst = os.path.abspath(dst)
dest = raw_input("Enter second destination to move : ")
dest = os.path.abspath(dest)
path_patter = '(\S+)_(\d+)_(\d+)_(\d+)__(\d+)_(\d+)_(\d+)'
for dir, dirs, files in os.walk(src):
if any(f.endswith('.txt') for f in files):
dirs[:] = [] # do not recurse into subdirectories
continue
files = [os.path.join(dir, f) for f in files ]
for f in files:
part1 = os.path.dirname(f)
part2 = os.path.dirname(os.path.dirname(part1))
part3 = os.path.split(part1)[1]
path_miss1 = os.path.join(dst, "missing_txt")
path_miss = os.path.join(path_miss1, part3)
path_missing = os.path.join(dest, "missing_txt")
searchFileName = re.search(path_patter, part3)#### update
if searchFileName:#####update
try:
if not os.path.exists(path_miss):
os.makedirs(path_miss)
else:
pass
if os.path.exists(path_miss):
distutils.dir_util.copy_tree(part1, path_miss)
else:
debug_status += "missing_file\n"
pass
if (get_size(path_miss)) == 0:
os.rmdir(path_miss)
else:
pass
if not os.path.exists(path_missing):
os.makedirs(path_missing)
else:
pass
if os.path.exists(path_missing):
shutil.move(part1, path_missing)
else:
pass
if (get_size(path_missing)) == 0:
os.rmdir(path_missing)
else:
pass
except Exception:
pass
else:
continue
How to modify this code to compare directory name with regular expression in this case. (it has to ignore directories with .txt files)
import os
import re
def createEscapedPattern(path,pattern):
newPath = os.path.normpath(path)
newPath = newPath.replace("\\","\\\\\\\\")
return newPath + "\\\\\\\\" + pattern
def createEscapedPath(path):
newPath = os.path.normpath(path)
return newPath.replace("\\","\\\\")
src = 'C:\\Home\\test'
path_patter = '(\S+)_(\d+)_(\d+)_(\d+)__(\d+)_(\d+)_(\d+)$'
p = re.compile(createEscapedPattern(src,path_patter))
for dir, dirs, files in os.walk(src):
if any(f.endswith('.txt') for f in files):
dirs[:] = []
continue
if any(p.match(createEscapedPath(dir)) for f in files):
for f in files:
print createEscapedPath(dir + "/" + f)
p = re.compile(createEscapedPattern(dir,path_patter))
There are a couple of things i did here and hope this example helps
I wrote this for windows fs so used two path convert functions.
This script ignores dirs with .txt files like you implemented it
This script will start at the directory you start the script and will only print file names if the pattern matches. This is done for all subdirectory's that are not ignored by the previous rule.
Used regex in python and made it compile again for each directory so you get: 'directory/(\S+)(\d+)(\d+)_(\d+)__(\d+)(\d+)(\d+)$'
I have a piece of code i wrote for school:
import os
source = "/home/pi/lab"
dest = os.environ["HOME"]
for file in os.listdir(source):
if file.endswith(".c")
shutil.move(file,dest+"/c")
elif file.endswith(".cpp")
shutil.move(file,dest+"/cpp")
elif file.endswith(".sh")
shutil.move(file,dest+"/sh")
what this code is doing is looking for files in a source directory and then if a certain extension is found the file is moved to that directory. This part works. If the file already exists in the destination folder of the same name add 1 at end of the file name, and before the extension and if they are multiples copies do "1++".
Like this: test1.c,test2.c, test3.c
I tried using os.isfile(filename) but this only looks at the source directory. and I get a true or false.
To test if the file exists in the destination folder you should os.path.join the dest folder with the file name
import os
import shutil
source = "/home/pi/lab"
dest = os.environ["HOME"]
# Avoid using the reserved word 'file' for a variable - renamed it to 'filename' instead
for filename in os.listdir(source):
# os.path.splitext does exactly what its name suggests - split the name and extension of the file including the '.'
name, extension = os.path.splitext(filename)
if extension == ".c":
dest_filename = os.path.join(dest, filename)
if not os.path.isfile(dest_filename):
# We copy the file as is
shutil.copy(os.path.join(source, filename) , dest)
else:
# We rename the file with a number in the name incrementing the number until we find one that is not used.
# This should be moved to a separate function to avoid code duplication when handling the different file extensions
i = 0
dest_filename = os.path.join(dest, "%s%d%s" % (name, i, extension))
while os.path.isfile(dest_filename):
i += 1
dest_filename = os.path.join(dest, "%s%d%s" % (name, i, extension))
shutil.copy(os.path.join(source, filename), dest_filename)
elif extension == ".cpp"
...
# Handle other extensions
If you want to have put the renaming logic in a separate function using glob and re this is one way:
import glob
import re
...
def rename_file(source_filename, source_ext):
filename_pattern = os.path.join(dest, "%s[0-9]*%s"
% (source_filename, source_ext))
# Contains file such as 'a1.c', 'a2.c', etc...
existing_files = glob.glob(filename_pattern)
regex = re.compile("%s([0-9]*)%s" % (source_filename, source_ext))
# Retrieve the max of the index used for this file using regex
max_index = max([int(match.group(1))
for match in map(regex.search, existing_files)
if match])
source_full_path = os.path.join(source, "%s%s"
% (source_filename, source_ext))
# Rebuild the destination filename with the max index + 1
dest_full_path = os.path.join(dest, "%s%d%s"
% (source_filename,
(max_index + 1),
source_ext))
shutil.copy(source_full_path, dest_full_path)
...
# If the file already exists i.e. replace the while loop in the else statement
rename_file(name, extension)
I din't test the code. But something like this should do the job:-
i = 0
filename = "a.txt"
while True:
if os.isfile(filename):
i+= 1
break
if i:
fname, ext = filename.split('.')
filename = fname + str(i) + '.' + ext
I'm obviously doing something very wrong. I'd like to find files, that are in one directory but not in second directory (for instance xxx.phn in one directory and xxx.wav in second directory...
IT seems that I cannot detect, when file is NOT present in second directory (it's always showing like all files are)... I don't get any file displayed, although they exist...
import shutil, random, os, sys
if len(sys.argv) < 4:
print """usage: python del_orphans_dir1_dir2.py source_folder source_ext dest_folder dest_ext
"""
sys.exit(-1)
folder = sys.argv[1]
ext = sys.argv[2]
dest_folder = sys.argv[3]
dest_ext = sys.argv[4]
i = 0
for d, ds, fs in os.walk(folder):
for fname in fs:
basename = os.path.splitext(fname)[0]
if (not os.path.exists(dest_folder+'/'+basename + '.' + dest_ext) ):
print str(i)+': No duplicate for: '+fname
i=i+1
print str(i)+' files found'
Can I suggest that you make the filename you're looking at checking and print it before checking whether it exists..
dest_fname = dest_folder+'/'+basename + '.' + dest_ext
print "dest exists? %s" % dest_fname
os.path.exists(dest_fname)
Also as an aside please join paths using the join() method. (If you really want the basename without the leading path elements there's a basename() function).
I tried your program out and it worked for two simple flat directories. Here are the directory contents:
a\a.txt
a\b.txt # Missing from b directory
a\c.txt
b\a.csv
b\c.csv
And result of your script with a txt b csv as parameters. If your result was different, maybe you used different parameters?
0: No duplicate for: b.txt
1 files found
But when I added subdirectories:
a\a.txt
a\b.txt # Missing from b directory
a\c.txt
a\c\d.txt
a\c\e.txt # Missing from b\c directory
b\a.csv
b\c.csv
b\c\d.csv
Your script gives:
0: No duplicate for: b.txt
1: No duplicate for: d.txt # Error here
2: No duplicate for: e.txt
3 files found
To work with sub-directories you need to compute the path relative to the source directory, and then add it to the destination directory. Here's the result with a few other minor cleanups and prints to see what is going on. Note that fname is always just the file name and needs to be joined with d to get the whole path:
#!python2
import os, sys
if len(sys.argv) < 4:
print """usage: python del_orphans_dir1_dir2.py source_folder source_ext dest_folder dest_ext
"""
sys.exit(-1)
folder = sys.argv[1]
ext = sys.argv[2]
dest_folder = sys.argv[3]
dest_ext = sys.argv[4]
i = 0
for d, ds, fs in os.walk(folder):
for fname in fs:
relpath = os.path.relpath(os.path.join(d,fname),folder)
relbase = os.path.splitext(relpath)[0]
path_to_check = os.path.join(dest_folder,relbase+'.'+dest_ext)
if not os.path.exists(path_to_check):
print '{}: No duplicate for: {}, {} not found.'.format(i,os.path.join(folder,relpath),path_to_check)
i += 1
print i,'files found'
Output:
0: No duplicate for: a\b.txt, b\b.csv not found.
1: No duplicate for: a\c\e.txt, b\c\e.csv not found.
2 files found
What you're doing is looking for are matching files, not duplicate ones. One problem is that you're not using use the source_ext argument when searching. Another is I think the command-line argument handling is messed-up. Here's a corrected version that accomplishes what you're trying to do:
import os
import sys
if len(sys.argv) != 5:
print("usage: python "
"del_orphans_dir1_dir2.py " # argv[0] (script name)
"source_folder " # argv[1]
"source_ext " # argv[2]
"dest_folder " # argv[3]
"dest_ext") # argv[4]
sys.exit(2) # command line error
source_folder, source_ext, dest_folder, dest_ext = sys.argv[1:6]
dest_ext = dest_ext if dest_ext.startswith('.') else '.'+dest_ext # check dot
found = 0
for d, ds, fs in os.walk(source_folder):
for i, fname in enumerate(fs, start=1):
basename, ext = os.path.splitext(fname)
if ext == source_ext:
if os.path.exists(os.path.join(dest_folder, basename+dest_ext)):
found += 1
else:
print '{}: No matching file found for: {}'.format(i, fname)
print '{} matches found'.format(found)
sys.exit(0)