Exclude hidden files in Python - python

Well, there's a thing I have to do: I have to count files with or without hidden files, with or without recursion, with certain extension or without (it's up to a user)(CLI). The problem is with hidden files.
My method:
if namespace.recursive == True:
for files in os.walk(top=namespace.path, topdown=True):
for i in files[2]:
countF += 1
print('Number of files in directory (with recursion): ', countF)
else:
p = Path(namespace.path)
for subdirs in p.iterdir():
if (subdirs.is_file()):
count += 1
print('Number of files in directory (without recursion): ', count)
counts files WITH the hidden ones.
What I want to do: I want this method to count files WITHOUT the hidden ones. But if a user inputs -h parameter, I want to count ONLY hidden files. So I tried to do a check-method for it:
def check_attributes(filename):
if(os.path.isfile(filename)):
return win32api.GetFileAttributes(filename) & win32con.FILE_ATTRIBUTE_HIDDEN
else:
return 0
and then I tried to modify my method and add after
for i in files[2]:
something like:
if check_attributes(f) == 0: #if it's not hidden - then count
But it still counts with hidden files. I want to understand how to do it right.
Thank you so much in advance for every answer!
EDIT: full function with checking
def countFiles():
countF = int(0)
count = int(0)
c = int(0)
try:
if namespace.extension == '.':
if namespace.recursive == True:
if namespace.hidden == False:
for files in os.walk(top=namespace.path, topdown=True):
for i in files[2]:
if check_attributes(i) == 0:
countF += 1
print('Number of files in directory (with recursion): ', countF)
else:
if namespace.hidden == False:
p = Path(namespace.path)
for subdirs in p.iterdir():
if (subdirs.is_file()):
count += 1
print('Number of files in directory (without recursion): ', count)
else:
if namespace.recursive == True:
for files in os.walk(namespace.path):
for f in files[2]:
if os.path.splitext(f)[1] == namespace.extension:
c += 1
print('Number if files with extension ' + namespace.extension + ' in directory (without recursion):', c)
else:
for files in os.listdir(namespace.path):
if os.path.splitext(files)[1] == namespace.extension:
c += 1
print('Number if files with extension ' + namespace.extension + ' in directory (without recursion): ', c)
except Exception as e:
print('Error:\n', e)
sys.exit(0)

In your original code, there are multiple boolean args creating different paths. Your extension == '.' path was the only one where check_attributes was being called from what I can tell, so that might have been the issue. I decided to take a crack at rewriting it. The way I rewrote it has 2 phases: 1. get the files, either recursively or not then 2. filter the files with the args provided. Here's what I came up with:
import argparse
import os
import win32api
import win32con
def count_files(args):
files = []
# Get the files differently based on whether recursive or not.
if args.recursive:
# Note here I changed how you're iterating. os.walk returns a list of tuples, so
# you can unpack the tuple in your for. current_dir is the current dir it's in
# while walking and found_files are all the files in that dir
for current_dir, dirs, found_files in os.walk(top=args.path, topdown=True):
files += [os.path.join(current_dir, found_file) for found_file in found_files]
else
# Note the os.path.join with the dir each file is in. It's important to store the
# absolute path of each file.
files += [os.path.join(args.path, found_file) for found_file in os.listdir(args.path)
if os.path.isfile(os.path.join(args.path, found_file))]
filtered_files = []
for found_file in files:
print(found_file)
if not args.hidden and (win32api.GetFileAttributes(found_file) & win32con.FILE_ATTRIBUTE_HIDDEN):
continue # hidden == False and file has hidden attribute, go to next one
if args.extension and not found_file.endswith(args.extension):
continue # File doesn't end in provided extension
filtered_files.append(found_file)
print(f'Length: {len(filtered_files)}')
return len(filtered_files)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Process some integers.')
# Note that I took advantage of some other argparse features here like
# required vs optional arguments and boolean types
parser.add_argument('path')
parser.add_argument('--recursive', action='store_true', default=False)
parser.add_argument('--hidden', action='store_true', default=False)
parser.add_argument('--extension', type=str)
args = parser.parse_args()
count_files(args)

Related

Looping through folders and comparing files using win32

Looking to use win32 to compare multiple word docs. The naming convention is the same except the modified doc has test.docx added to the file name. The below is the code i have but it is coming up with "pywintypes.com_error: (-2147023170, 'The remote procedure call failed.', None, None)". Any ideas on how i can get this to work? I have around 200docs to compare so python seems to be the way to do it.
import win32com.client
from docx import Document
import os
def get_docx_list(dir_path):
'''
:param dir_path:
:return: List of docx files in the current directory
'''
file_list = []
for path,dir,files in os.walk(dir_path):
for file in files:
if file.endswith("docx") == True and str(file[0]) != "~": #Locate the docx document and exclude temporary files
file_root = path+"\\"+file
file_list.append(file_root)
print("The directory found a total of {0} related files!".format(len(file_list)))
return file_list
def main():
modified_path = r"C:\...\Replaced\SWI\\"
original_path = r"C:\...\Replaced\SWI original\\"
for i, file in enumerate(get_docx_list(modified_path), start=1):
print(f"{i}、Files in progress:{file}")
for i, files in enumerate(get_docx_list(original_path), start=1):
Application = win32com.client.gencache.EnsureDispatch("Word.Application")
Application.CompareDocuments(
Application.Documents.Open(modified_path + file),
Application.Documents.Open(str(original_path) + files))
Application.ActiveDocument.SaveAs(FileName=modified_path + files + "Comparison.docx")
Application.Quit()
if __name__ == '__main__':
main()
For anyone chasing the solution to do bulk word comparisons below is the code I successfully ran through a few hundred docs. Delete the print statements once you have the naming convention sorted.
import win32com.client
import os
def main():
#path directories
modified_path = r"C:\Users\Admin\Desktop\Replaced\SOP- Plant and Equipment\\"
original_path = r"C:\Users\Admin\Desktop\Replaced\SOP - Plant and Equipment Original\\"
save_path = r"C:\Users\Admin\Desktop\Replaced\TEST\\"
file_list1 = os.listdir(r"C:\Users\Admin\Desktop\Replaced\SOP- Plant and Equipment\\")
file_list2 = os.listdir(r"C:\Users\Admin\Desktop\Replaced\SOP - Plant and Equipment Original\\")
#text counter
Number = 0
#loop through files and compare
for file in file_list1:
for files in file_list2:
#if files match do comparision, naming convention to be changed
if files[:-5] + " test.docx" == file:
Number += 1
print(f"The program has completed {Number} of a total of {len(file_list1)} related files!")
try:
Application = win32com.client.gencache.EnsureDispatch("Word.Application")
Application.CompareDocuments(
Application.Documents.Open(modified_path + file),
Application.Documents.Open(str(original_path) + files))
Application.ActiveDocument.ActiveWindow.View.Type = 3
Application.ActiveDocument.SaveAs(FileName=save_path + files[:-5] + " Comparison.docx")
except:
Application.Quit()
pass
if __name__ == '__main__':
main()

Find the large_files but output unexpected results

I have such a program to find the large files
import os, time, shelve
start = time.time()
root = '/'
# errors= set()
# dirs = set()
while True:
try:
root = os.path.abspath(root) #ensure its a abspath
#set the baseline as 100M
#consider the shift
baseline = 100 * 2**20 # 2*20 is1M
#setup to collect the large files
large_files = []
#root is a better choise as the a concept
for foldername, subfolders, files in os.walk(root):
for f in files:
# print(f"{foldername}, {f}")
abspath = os.path.join(foldername, f)
size = os.path.getsize(abspath)
if size >= baseline:
large_files.append((os.path.basename(abspath), size))
print(abspath, size/(2**20))
#write the large files to shelf
shelf = shelve.open('/root/large_files.db')
shelf["large_files"] = large_files
shelf.close()
if subfolders == []:
end = time.time()
break
except (PermissionError,FileNotFoundError) as e:
# errors.add(e)
pass
It consistently output the identical results
[root#iz2ze9wve43n2nyuvmsfx5z ~]# python3 search_large_files.py
/dev/core 134217726.0078125
/dev/core 134217726.0078125
/dev/core 134217726.0078125
....
However, I found no reasons that
print(abspath, size/(2**20))
will do this constantly.
What's the problem might be in my code:
You have an infinite outer loop with while True:, and apparently /dev/core is the only file in your filesystem that exceeds the file size specified by baseline, so it would keep outputting the same file over and over again.
Remove while True: and un-indent the block inside and your code would work.
Note that your if subfolders == []: condition is outside your for foldername, subfolders, files in os.walk(root): loop and would therefore not be useful. You should record the end time unconditionally anyway so you should simply remove the if condition and the break statement as well.

Multiple Paths Traversed and Displayed Filed type in Maya Menu with Python

I'm new here so bare in mind that and I hope my questions are clearly asked for you lot to help me out. I am trying to alter Brent Tylers Dropbox script so that I will be able to list Python under Python, Mel under Mel and so on(eventually plugins and other files too but not for now)
Ok so my directory is like so:
1.
sf=C:/users/scripts/ a.py + b.mel
pf=C:/users/scripts/Python/c.py
mf=C:/users/scripts/Mel/d.mel
(These are the folders my scripts will be placed in)
Code :
absoluteFiles = []
relativeFiles = []
folders = []
allFiles = []
currentFile = ''
for root, dirs, files in os.walk(sf):
for x in files:
correct = root.replace('\\', '/')
currentFile = (correct + '/' + x)
allFiles.append(currentFile)
if currentFile.endswith('.mel'):
relativeFiles.append(currentFile.replace((mf + '/'), ""))
if currentFile.endswith('.py'):
relativeFiles.append(currentFile.replace((pf + '/'), ""))
relativeFiles.sort()
for relativeFile in relativeFiles:
split = relativeFile.split('/')
fileName = split[-1].split('.')
i=0
while i<(len(split)):
### Create Folders ###
if i==0 and len(split) != 1:
if cmds.menu(split[i] ,ex=1) == 0:
cmds.menuItem(split[i], p=PadraigsTools, bld=1, sm=1, to=1, l=split[i])
if i > 0 and i < (len(split)-1):
if cmds.menu(split[i] ,ex=1) == 0:
cmds.menuItem(split[i], p=split[i-1], bld=1, sm=1, to=1, l=split[i])
### Create .mel Files ###
if fileName[-1] == 'mel':
if i==len(split)-1 and len(split) > 1:
scriptName = split[-1].split('.')
temp1 = 'source ' + '"' + sf + '/' + relativeFile + '"; ' + scriptName[0]
command = '''mel.eval(''' + "'" + temp1 + '''')'''
cmds.menuItem(split[i], p=split[i-1], c=command, l=split[i])
if i==len(split)-1 and len(split) == 1:
scriptName = split[-1].split('.')
temp1 = 'source ' + '"' + sf + '/' + relativeFile + '"; ' + scriptName[0]
command = '''mel.eval(''' + "'" + temp1 + '''')'''
cmds.menuItem(split[i], p=Mel, c=command, l=split[i])
### Create .py Files ###
if fileName[-1] == 'py':
if i==len(split)-1 and len(split) > 1:
command = 'import ' + fileName[0] + '\n' + fileName[0] + '.' + fileName[0]+ '()'
cmds.menuItem(split[i], p=split[i-1], c=command, l=split[i])
if i==len(split)-1 and len(split) == 1:
command = 'import ' + fileName[0] + '\n' + fileName[0] + '.' + fileName[0]+ '()'
cmds.menuItem(split[i], p=Python, c=command, l=split[i])
i+=1
So far I can print out individually (sf, pf, mf) to the corresponding Directory but I cant list out everything at once and the files under sf will not show at all. regarding the folders created it ends up very odd. sometimes i would get a duplicate folder as a submenu and if i use sf it give me C:/.
After days and hours of research trying to mend this script I have found no answer including
from itertools import chain
paths = (mf, sf, pf)
for path, dirs, files in chain.from_iterable(os.walk(path) for path in paths):
::QUESTION::
Is there a way i can put this together sanely so that new folders will show up with their contents on refresh as a submenu and the files will show up and allow me to execute them from their corresponding submenu.
I would appreciate any help possible including down votes haha. And bare in mind I don't want you to hand me the answer on a golden spoon because I wont know what is corrected or needs to be :)
Thanks Greatly
-- Padraig
There's a couple of things you can do to simplify things a bit.
First, it's a good idea to make this as data-driven as possible so you don't have to re-write it if your needs change. This does more or less what you do, but collects the results into a dictionary where the key are the root paths you supplied and the values are lists of relative paths:
def find_files(root, extensions = ('mel', 'py')):
def clean_path(*p):
return "/".join(p).replace('\\', '/')
for root, _, files in os.walk(root):
used = [f for f in files if f.split(".")[-1] in extensions]
for u in used:
yield clean_path(root, u)
def relativize(abs, roots):
low_roots = map (str.lower, roots) # all lower for comparison
for root, low_root in zip(roots,low_roots):
if abs.lower().startswith(low_root):
return root, abs[len(root):]
return ("", abs)
relative_paths = find_files('c:/users/scripts')
root_dict = {}
for item in relative_paths :
folder, file = relativize(item, ('C:/users/scripts/Python/', 'C:/users/scripts/Mel/', 'C:/users/scripts/'))
if not folder in root_dict:
root_dict[folder] = []
root_dict[folder].append(file)
So now you have a dictionary with a bunch of root folders and lists of relative paths (files that were not in any relative path you supplied are keyed to empty string and show up as absolute paths). You can make the menus in a very generic way because they are all in the same format. If you need the entire list, you can get it like this:
results = []
for each_root in root_dict:
for relpath in root_dict[each_root]:
results.append(each_root + relpath)
For creating the actual menus, you want to use a single function and bind it to the filename for each menu item as you make it. This is a slightly tricky topic (more detail here). The easy way to do this is to use a functools.partial object, which bundles a command and a bunch of arguments into an object which looks like a function: you can create a partial and attach it to the command of your menu items so they all call the same function with their individual arguments. Here's a simple example using the variables from above and a menubar; you can see how to adapt it to other kinds of menus pretty easily:
from functools import partial
# call this on every button selection
def test(filepath, ignore):
# maya will send "test(name, False)"; we just ignore the 'False'
print "Here's where I would reload", filepath
example = cmds.window(title = 'example')
menubar = cmds.menuBarLayout()
for name in folder_names:
menuname = name
if menuname:
menuname = menuname.split("/")[-2] # we used trailing slashes
else:
menuname = "root"
cmds.menu(label = menuname)
file_names = root_dict[name]
file_names.sort()
for fn in file_names:
mi = cmds.menuItem(label = fn, command = partial(test, fn))
cmds.setParent(menubar)

Python: detecting existing file : os.file.exists

I'm obviously doing something very wrong. I'd like to find files, that are in one directory but not in second directory (for instance xxx.phn in one directory and xxx.wav in second directory...
IT seems that I cannot detect, when file is NOT present in second directory (it's always showing like all files are)... I don't get any file displayed, although they exist...
import shutil, random, os, sys
if len(sys.argv) < 4:
print """usage: python del_orphans_dir1_dir2.py source_folder source_ext dest_folder dest_ext
"""
sys.exit(-1)
folder = sys.argv[1]
ext = sys.argv[2]
dest_folder = sys.argv[3]
dest_ext = sys.argv[4]
i = 0
for d, ds, fs in os.walk(folder):
for fname in fs:
basename = os.path.splitext(fname)[0]
if (not os.path.exists(dest_folder+'/'+basename + '.' + dest_ext) ):
print str(i)+': No duplicate for: '+fname
i=i+1
print str(i)+' files found'
Can I suggest that you make the filename you're looking at checking and print it before checking whether it exists..
dest_fname = dest_folder+'/'+basename + '.' + dest_ext
print "dest exists? %s" % dest_fname
os.path.exists(dest_fname)
Also as an aside please join paths using the join() method. (If you really want the basename without the leading path elements there's a basename() function).
I tried your program out and it worked for two simple flat directories. Here are the directory contents:
a\a.txt
a\b.txt # Missing from b directory
a\c.txt
b\a.csv
b\c.csv
And result of your script with a txt b csv as parameters. If your result was different, maybe you used different parameters?
0: No duplicate for: b.txt
1 files found
But when I added subdirectories:
a\a.txt
a\b.txt # Missing from b directory
a\c.txt
a\c\d.txt
a\c\e.txt # Missing from b\c directory
b\a.csv
b\c.csv
b\c\d.csv
Your script gives:
0: No duplicate for: b.txt
1: No duplicate for: d.txt # Error here
2: No duplicate for: e.txt
3 files found
To work with sub-directories you need to compute the path relative to the source directory, and then add it to the destination directory. Here's the result with a few other minor cleanups and prints to see what is going on. Note that fname is always just the file name and needs to be joined with d to get the whole path:
#!python2
import os, sys
if len(sys.argv) < 4:
print """usage: python del_orphans_dir1_dir2.py source_folder source_ext dest_folder dest_ext
"""
sys.exit(-1)
folder = sys.argv[1]
ext = sys.argv[2]
dest_folder = sys.argv[3]
dest_ext = sys.argv[4]
i = 0
for d, ds, fs in os.walk(folder):
for fname in fs:
relpath = os.path.relpath(os.path.join(d,fname),folder)
relbase = os.path.splitext(relpath)[0]
path_to_check = os.path.join(dest_folder,relbase+'.'+dest_ext)
if not os.path.exists(path_to_check):
print '{}: No duplicate for: {}, {} not found.'.format(i,os.path.join(folder,relpath),path_to_check)
i += 1
print i,'files found'
Output:
0: No duplicate for: a\b.txt, b\b.csv not found.
1: No duplicate for: a\c\e.txt, b\c\e.csv not found.
2 files found
What you're doing is looking for are matching files, not duplicate ones. One problem is that you're not using use the source_ext argument when searching. Another is I think the command-line argument handling is messed-up. Here's a corrected version that accomplishes what you're trying to do:
import os
import sys
if len(sys.argv) != 5:
print("usage: python "
"del_orphans_dir1_dir2.py " # argv[0] (script name)
"source_folder " # argv[1]
"source_ext " # argv[2]
"dest_folder " # argv[3]
"dest_ext") # argv[4]
sys.exit(2) # command line error
source_folder, source_ext, dest_folder, dest_ext = sys.argv[1:6]
dest_ext = dest_ext if dest_ext.startswith('.') else '.'+dest_ext # check dot
found = 0
for d, ds, fs in os.walk(source_folder):
for i, fname in enumerate(fs, start=1):
basename, ext = os.path.splitext(fname)
if ext == source_ext:
if os.path.exists(os.path.join(dest_folder, basename+dest_ext)):
found += 1
else:
print '{}: No matching file found for: {}'.format(i, fname)
print '{} matches found'.format(found)
sys.exit(0)

Why the extract function stopped extracting?

Can someone explain and help me resolve why my function stopped extracting .tgz files when I added a counter to create folders with different names to keep the extracted folder from overwriting the previous one when I extracted another .tgz file in the same directory? What am I doing wrong? Thanks! Below are the two functions ... the first function extracts files properly; the second function extracts a numbered folder and quits.
Works:
def extract(tar_url, extract_path='.'):
print tar_url
tar = tarfile.open(tar_url, 'r')
for item in tar:
tar.extract(item, extract_path)
if item.name.find(".tgz") != -1 or item.name.find(".tar") != -1:
extract(item.name, "./" + item.name[:item.name.rfind('/')])
Does not work:
global counter
counter=1
def extract(tar_url, extract_path='.'):
global counter
print tar_url
tar = tarfile.open(tar_url, 'wb')# changed from r to wb 6/12
for item in tar:
tar.extract(item, extract_path+"_%d"%counter)
counter+=1
if item.name.find(".tgz") != -1 or item.name.find(".tar") != -1:
extract(item.name, "./" + item.name[:item.name.rfind('/')])
Here is how I call it in main (I'm using easygui):
direct = diropenbox(msg="Choose path to place extracted files!", title='SQA Extractor', default='c:\\Extracted')
msg = "Are you sure you want to extract?"
title = "Confirm"
os.chdir(direct)
try:
for root, dirname, files in os.walk(directory):
for file1 in files:
if file1.endswith(".tgz") or file1.endswith(".tar"):
extract(os.path.join(root, file1))
Perhaps it was this change that broke your code:
tar = tarfile.open(tar_url, 'r')
Changed to:
tar = tarfile.open(tar_url, 'wb')# changed from r to wb 6/12
Does the extract path with the counter exist?
for item in tar:
os.mkdir(extract_path + "_%d" % counter)
tar.extract(item, extract_path+"_%d" % counter)
counter+=1
if item.name.find(".tgz") != -1 or item.name.find(".tar") != -1:
extract(item.name, "./" + item.name[:item.name.rfind('/')])
The original version relies on the created folder names matching the relative paths specified in the archive. In the new version, the recursive call tries to put the files into a folder without a 'tag' number, after extracting the other files at that level into one that does.
Try adding the tag to the path name used for the recursive call as well.
BTW, the Python-idiomatic spelling of item.name.find(".tar") != -1 is '.tar' in item.name.

Categories

Resources