Printing final (leaf?) nodes in directory listing Python - python

I can walk the directory and print just folder/directory names but I would like to exclude folder names of directories that contain other directories. For some reason I am calling that a "final node" in the tree structure but I could well be fooling myself, wouldn't be the first time. =) On reveiewing the list of other answers perhaps this is called a "leaf node" ?
import os
chosen_path = (os.getcwd())
FoldersFound =[]
for root, dirs, files in os.walk(chosen_path, topdown=True):
for name in dirs:
FoldersFound.append(name)
FoldersFound.sort()
for FolderName in FoldersFound:
print FolderName

This will print the full names of the directories that have no child directories:
for root, dirs, files in os.walk(here):
if not dirs:
print '%s is a leaf' % root
To print only the base name, replace root with os.path.basename(root)
To put them in a list use:
folders = []
for root, dirs, files in os.walk(here):
if not dirs:
folders.append(root)
Likewise to put only the basename in the list, replace root with os.path.basename(root)

This is a solution using "os.listdir":
import os
def print_leaf_dir(pathname, dirname):
dirnames = [subfolder for subfolder in os.listdir(os.path.join(pathname, dirname)) if os.path.isdir(os.path.join(pathname, dirname, subfolder))]
if(dirnames):
for subfolder in dirnames:
print_leaf_dir(os.path.join(pathname, dirname), subfolder)
else:
print(os.path.join(pathname, dirname))
if(__name__ == '__main__'):
print_leaf_dir(r'C:\TEMP', '')

Related

Recursive listing of files in a directory matching a pattern

I am trying to recursively list all file names that are in sub directories called Oracle (but not list files in other sub directories).
I have the following code:
for root, dirs, files in os.walk(r"Y:\Data\MXD_DC\DataSourceChange", topdown=True):
for name in dirs:
if fnmatch.fnmatch(name, 'Oracle'):
for filename in files:
fullpath = os.path.join(root, filename)
print "FullPath is: " + fullpath
I can only get it to list all file names of all sub directories. It does not even go to the sub directory called Oracle.
Currently, when you find a directory named Oracle, you list the files that are at the same level in the hierachy instead of listing the files contained in the Oracle folder, because the tuple returned by os.walk contains directories and files at same level.
You have 2 ways to list the expected files:
only use dirnames from walk and use listdir once you have found an Oracle folder
for root, dirs, files in os.walk(r"Y:\Data\MXD_DC\DataSourceChange", topdown=True):
for name in dirs:
if name == 'Oracle':
path = os.path.join(root, name)
for filename in os.listdir(path):
fullpath = os.path.join(path, filename)
print "FullPath is: " + fullpath
ignore dirnames, use last component from root and test if it is Oracle:
for root, dirs, files in os.walk(r"Y:\Data\MXD_DC\DataSourceChange", topdown=True):
if os.path.basename(root) == 'Oracle':
for filename in files:
fullpath = os.path.join(root, filename)
print "FullPath is: " + fullpath
if you want to list the files in a particular directory you can use
import os
os.listdir("Oracle")
to print the directories from a script use this
import os
print "%s" %os.listdir("Oracle")

How to traverse directory tree and continue loop?

I'm trying to learn python. I'm trying to traverse a directory and print directory paths, digits extracted from directory names and file names, but my loop does not continue: it prints this info only from the first directory. How to continue this loop?
#!/usr/bin/python
import os
import re
#change directory
os.chdir('/Users/me/Pictures/')
for root, dirs, files in os.walk(os.getcwd()):
for i in dirs:
val = re.search('\d+', i)
if val:
a = os.path.join(root, i)
for f in files:
print a, val.group(0), f
You are matching the names of subdirectories, not the name of the current directory. If you only have one level of nested directories with digits your if statement will be wrong as soon as you enter into those subdirectories.
I don't think you understand that in each iteration, root is the path to the directory being listed, and dirs is a list of names of subdirectories.
Match against root instead:
for root, dirs, files in os.walk(os.getcwd()):
dirname = os.path.basename(root)
val = re.search('\d+', dirname)
if val:
for f in files:
print a, val.group(0), f

searching for a filename with extension and printing its relative path

I have the below code to print the filename which is find criteria with file extension *.org. How could I print the relative path of the file found. Thanks in advance
def get_filelist() :
directory = "\\\\networkpath\\123\\abc\\"
filelist = []
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith('Org'):
print(str(dirs) +"\\" + str(file)) #prints empty list [] followed by filename
filelist.append(os.path.splitext(file)[0])
return (filelist)
Please see me as novice in python
files and dirs list the children of root. dirs thus lists siblings of file. You want to print this instead:
print(os.path.relpath(os.path.join(root, file)))
you need to use os.path.join:
def get_filelist() :
directory = "\\\\networkpath\\123\\abc\\"
filelist = []
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith('org'): # here 'org' will be in small letter
print(os.path.join(root,file))
filelist.append(os.path.join(root,file))
return filelist

How to remove all empty files within folder and its sub folders?

I am trying to remove all empty files in a folder, and there are folders within the folder so it needs to check inside those folders too:
e.g
remove all empty files within C:\folder1\folder1 and C:\folder1\folder2 etc
import sys
import os
def main():
getemptyfiles(sys.argv[1])
def getemptyfiles(rootdir):
for root, dirs, files in os.walk(rootdir):
for d in ['RECYCLER', 'RECYCLED']:
if d in dirs:
dirs.remove(d)
for f in files:
fullname = os.path.join(root, f)
try:
if os.path.getsize(fullname) == 0:
print fullname
os.remove(fullname)
except WindowsError:
continue
This will work with a bit of adjusting:
The os.remove() statement could fail so you might want to wrap it with try...except as well. WindowsError is platform specific. Filtering the traversed directories is not strictly necessary but helpful.
The for loop uses dir to find all files, but not directories, in the current directory and all subfolders recursively. Then the second line checks to see if the length of each file is less than 1 byte before deleting it.
cd /d C:\folder1
for /F "usebackq" %%A in (`dir/b/s/a-d`) do (
if %%~zA LSS 1 del %%A
)
import os
while(True):
path = input("Enter the path")
if(os.path.isdir(path)):
break
else:
print("Entered path is wrong!")
for root,dirs,files in os.walk(path):
for name in files:
filename = os.path.join(root,name)
if os.stat(filename).st_size == 0:
print(" Removing ",filename)
os.remove(filename)
I do first remove empty files, afterwards by following this answer (https://stackoverflow.com/a/6215421/2402577), I have removed the empty folders.
In addition, I added topdown=False in os.walk() to walk from leaf to roo since the default behavior of os.walk() is to walk from root to leaf.
So empty folders that also contains empty folders or files are removed as well.
import os
def remove_empty_files_and_folders(dir_path) -> None:
for root, dirnames, files in os.walk(dir_path, topdown=False):
for f in files:
full_name = os.path.join(root, f)
if os.path.getsize(full_name) == 0:
os.remove(full_name)
for dirname in dirnames:
full_path = os.path.realpath(os.path.join(root, dirname))
if not os.listdir(full_path):
os.rmdir(full_path)
I hope this can help you
#encoding = utf-8
import os
docName = []
def listDoc(path):
docList = os.listdir(path)
for doc in docList:
docPath = os.path.join(path,doc)
if os.path.isfile(docPath):
if os.path.getsize(docPath)==o:
os.remove(docPath)
if os.path.isdir(docPath):
listDoc(docPath)
listDoc(r'C:\folder1')

How can I get a specific folder with a specific folders sibling in python

I would like to find the path to specific folders that have only specific folders brothers
ex:
I want to find all folders named : zeFolder with siblings folders brotherOne and brotherTwo
|-dad1|---brotherOne|---brotherFour|---zeFolder (not match)
|-dad2|---brotherOne|---brotherTwo|---zeFolder (♥♥♥Match♥♥♥)
[...]
Below is my code, but with this solution I find all the folders.
import os
for root, dirs, files in os.walk("/"):
#print (dirs)
for name in dirs:
if name == 'totolo':
print ('finded')
print(os.path.join(root, name))
I don't know how to use Conditional Statements to do that
Thanks for you help.
Basically it sounds like you want to find a specific set of subfolders so using sets is both natural and makes this a fairly easy thing to do. Their use also removes order dependencies when checking for equality.
import os
start_path = '/'
target = 'zeFolder'
siblings = ['brotherOne', 'brotherTwo']
sought = set([target] + siblings)
for root, dirs, files in os.walk(start_path):
if sought == set(dirs):
print('found')
What about using lists
import os
folder = 'zeFolder'
brothers = ['brotherOne', 'brotherTwo']
for dirpath, dirnames, filenames in os.walk('/'):
if folder in dirnames and all(brother in dirnames for brother in brothers):
print 'matches on %s' % os.path.join(dirpath, 'zeFolder')
or sets
import os
folder = 'zeFolder'
brothers = set(['brotherOne', 'brotherTwo', folder])
for dirpath, dirnames, filenames in os.walk('/'):
if set(dirnames).issuperset(brothers) :
print 'matches on %s' % os.path.join(dirpath, 'zeFolder')
Both run at same speed for me.
import os
import glob
filelist = glob.glob(r"dad1/*brotherOne")
for f in filelist:
print(f)
filelist = glob.glob(r"dad1/*brotherTwo")
for f in filelist:
print(f)
You could also try the glob technique. And do whatever action you'd like to in the for loop.

Categories

Resources