How to traverse directory tree and continue loop? - python

I'm trying to learn python. I'm trying to traverse a directory and print directory paths, digits extracted from directory names and file names, but my loop does not continue: it prints this info only from the first directory. How to continue this loop?
#!/usr/bin/python
import os
import re
#change directory
os.chdir('/Users/me/Pictures/')
for root, dirs, files in os.walk(os.getcwd()):
for i in dirs:
val = re.search('\d+', i)
if val:
a = os.path.join(root, i)
for f in files:
print a, val.group(0), f

You are matching the names of subdirectories, not the name of the current directory. If you only have one level of nested directories with digits your if statement will be wrong as soon as you enter into those subdirectories.
I don't think you understand that in each iteration, root is the path to the directory being listed, and dirs is a list of names of subdirectories.
Match against root instead:
for root, dirs, files in os.walk(os.getcwd()):
dirname = os.path.basename(root)
val = re.search('\d+', dirname)
if val:
for f in files:
print a, val.group(0), f

Related

How to search for file meeting condition in directory?

In a directory, I am searching for files that meet a certain condition. I am using code provided by an other stackoverflow post, but it's sending me into an infinite loop for a reason I can't see.
I iterate through directories in a subdirectory. Then, I iterate through the subsubdirectories in the subdirectory, looking for ones that meet a condition. Here is my code:
import os
rootdir ="/mnt/data/SHAVE_cases"
cases = []
for subdirs, dirs, files in os.walk(rootdir):
for subdir in subdirs:
print("1")
# now we are in the file subdirectory.
# we need to see if these subsubdirectories start with "multi"
for subdirs2, d, f in os.walk(rootdir + subdir):
for s in subdirs2:
if s.startswith("multi"):
cases.append(s)
Here is another code following the advice of msi
for pathname, subdirs, files in os.walk(rootdir):
for subdir in subdirs:
for path, sub, f in os.walk(rootdir + subdir):
print(sub)
It still returns an infinite loop.
I solved this using the answer from this question.
To search through the subdirectories for a name, we use find_dir().
import os
def find_dir(name, start):
for root, dirs, files in os.walk(start):
for d in dirs:
if d == name:
return os.path.abspath(os.path.join(root, d))
I've changed my code completely. Here is a piece of it, so you can see find_dir() in action.
for key in dictionary:
name = "goal_dir"
if find_dir(name, key) != None:
# perform analysis on the directory

searching for a filename with extension and printing its relative path

I have the below code to print the filename which is find criteria with file extension *.org. How could I print the relative path of the file found. Thanks in advance
def get_filelist() :
directory = "\\\\networkpath\\123\\abc\\"
filelist = []
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith('Org'):
print(str(dirs) +"\\" + str(file)) #prints empty list [] followed by filename
filelist.append(os.path.splitext(file)[0])
return (filelist)
Please see me as novice in python
files and dirs list the children of root. dirs thus lists siblings of file. You want to print this instead:
print(os.path.relpath(os.path.join(root, file)))
you need to use os.path.join:
def get_filelist() :
directory = "\\\\networkpath\\123\\abc\\"
filelist = []
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith('org'): # here 'org' will be in small letter
print(os.path.join(root,file))
filelist.append(os.path.join(root,file))
return filelist

How to remove all empty files within folder and its sub folders?

I am trying to remove all empty files in a folder, and there are folders within the folder so it needs to check inside those folders too:
e.g
remove all empty files within C:\folder1\folder1 and C:\folder1\folder2 etc
import sys
import os
def main():
getemptyfiles(sys.argv[1])
def getemptyfiles(rootdir):
for root, dirs, files in os.walk(rootdir):
for d in ['RECYCLER', 'RECYCLED']:
if d in dirs:
dirs.remove(d)
for f in files:
fullname = os.path.join(root, f)
try:
if os.path.getsize(fullname) == 0:
print fullname
os.remove(fullname)
except WindowsError:
continue
This will work with a bit of adjusting:
The os.remove() statement could fail so you might want to wrap it with try...except as well. WindowsError is platform specific. Filtering the traversed directories is not strictly necessary but helpful.
The for loop uses dir to find all files, but not directories, in the current directory and all subfolders recursively. Then the second line checks to see if the length of each file is less than 1 byte before deleting it.
cd /d C:\folder1
for /F "usebackq" %%A in (`dir/b/s/a-d`) do (
if %%~zA LSS 1 del %%A
)
import os
while(True):
path = input("Enter the path")
if(os.path.isdir(path)):
break
else:
print("Entered path is wrong!")
for root,dirs,files in os.walk(path):
for name in files:
filename = os.path.join(root,name)
if os.stat(filename).st_size == 0:
print(" Removing ",filename)
os.remove(filename)
I do first remove empty files, afterwards by following this answer (https://stackoverflow.com/a/6215421/2402577), I have removed the empty folders.
In addition, I added topdown=False in os.walk() to walk from leaf to roo since the default behavior of os.walk() is to walk from root to leaf.
So empty folders that also contains empty folders or files are removed as well.
import os
def remove_empty_files_and_folders(dir_path) -> None:
for root, dirnames, files in os.walk(dir_path, topdown=False):
for f in files:
full_name = os.path.join(root, f)
if os.path.getsize(full_name) == 0:
os.remove(full_name)
for dirname in dirnames:
full_path = os.path.realpath(os.path.join(root, dirname))
if not os.listdir(full_path):
os.rmdir(full_path)
I hope this can help you
#encoding = utf-8
import os
docName = []
def listDoc(path):
docList = os.listdir(path)
for doc in docList:
docPath = os.path.join(path,doc)
if os.path.isfile(docPath):
if os.path.getsize(docPath)==o:
os.remove(docPath)
if os.path.isdir(docPath):
listDoc(docPath)
listDoc(r'C:\folder1')

Printing final (leaf?) nodes in directory listing Python

I can walk the directory and print just folder/directory names but I would like to exclude folder names of directories that contain other directories. For some reason I am calling that a "final node" in the tree structure but I could well be fooling myself, wouldn't be the first time. =) On reveiewing the list of other answers perhaps this is called a "leaf node" ?
import os
chosen_path = (os.getcwd())
FoldersFound =[]
for root, dirs, files in os.walk(chosen_path, topdown=True):
for name in dirs:
FoldersFound.append(name)
FoldersFound.sort()
for FolderName in FoldersFound:
print FolderName
This will print the full names of the directories that have no child directories:
for root, dirs, files in os.walk(here):
if not dirs:
print '%s is a leaf' % root
To print only the base name, replace root with os.path.basename(root)
To put them in a list use:
folders = []
for root, dirs, files in os.walk(here):
if not dirs:
folders.append(root)
Likewise to put only the basename in the list, replace root with os.path.basename(root)
This is a solution using "os.listdir":
import os
def print_leaf_dir(pathname, dirname):
dirnames = [subfolder for subfolder in os.listdir(os.path.join(pathname, dirname)) if os.path.isdir(os.path.join(pathname, dirname, subfolder))]
if(dirnames):
for subfolder in dirnames:
print_leaf_dir(os.path.join(pathname, dirname), subfolder)
else:
print(os.path.join(pathname, dirname))
if(__name__ == '__main__'):
print_leaf_dir(r'C:\TEMP', '')

os.walk without digging into directories below

How do I limit os.walk to only return files in the directory I provide it?
def _dir_list(self, dir_name, whitelist):
outputList = []
for root, dirs, files in os.walk(dir_name):
for f in files:
if os.path.splitext(f)[1] in whitelist:
outputList.append(os.path.join(root, f))
else:
self._email_to_("ignore")
return outputList
Don't use os.walk.
Example:
import os
root = "C:\\"
for item in os.listdir(root):
if os.path.isfile(os.path.join(root, item)):
print item
Use the walklevel function.
import os
def walklevel(some_dir, level=1):
some_dir = some_dir.rstrip(os.path.sep)
assert os.path.isdir(some_dir)
num_sep = some_dir.count(os.path.sep)
for root, dirs, files in os.walk(some_dir):
yield root, dirs, files
num_sep_this = root.count(os.path.sep)
if num_sep + level <= num_sep_this:
del dirs[:]
It works just like os.walk, but you can pass it a level parameter that indicates how deep the recursion will go.
I think the solution is actually very simple.
use
break
to only do first iteration of the for loop, there must be a more elegant way.
for root, dirs, files in os.walk(dir_name):
for f in files:
...
...
break
...
The first time you call os.walk, it returns tulips for the current directory, then on next loop the contents of the next directory.
Take original script and just add a break.
def _dir_list(self, dir_name, whitelist):
outputList = []
for root, dirs, files in os.walk(dir_name):
for f in files:
if os.path.splitext(f)[1] in whitelist:
outputList.append(os.path.join(root, f))
else:
self._email_to_("ignore")
break
return outputList
The suggestion to use listdir is a good one. The direct answer to your question in Python 2 is root, dirs, files = os.walk(dir_name).next().
The equivalent Python 3 syntax is root, dirs, files = next(os.walk(dir_name))
You could use os.listdir() which returns a list of names (for both files and directories) in a given directory. If you need to distinguish between files and directories, call os.stat() on each name.
If you have more complex requirements than just the top directory (eg ignore VCS dirs etc), you can also modify the list of directories to prevent os.walk recursing through them.
ie:
def _dir_list(self, dir_name, whitelist):
outputList = []
for root, dirs, files in os.walk(dir_name):
dirs[:] = [d for d in dirs if is_good(d)]
for f in files:
do_stuff()
Note - be careful to mutate the list, rather than just rebind it. Obviously os.walk doesn't know about the external rebinding.
for path, dirs, files in os.walk('.'):
print path, dirs, files
del dirs[:] # go only one level deep
Felt like throwing my 2 pence in.
baselevel = len(rootdir.split(os.path.sep))
for subdirs, dirs, files in os.walk(rootdir):
curlevel = len(subdirs.split(os.path.sep))
if curlevel <= baselevel + 1:
[do stuff]
The same idea with listdir, but shorter:
[f for f in os.listdir(root_dir) if os.path.isfile(os.path.join(root_dir, f))]
Since Python 3.5 you can use os.scandir instead of os.listdir. Instead of strings you get an iterator of DirEntry objects in return. From the docs:
Using scandir() instead of listdir() can significantly increase the performance of code that also needs file type or file attribute information, because DirEntry objects expose this information if the operating system provides it when scanning a directory. All DirEntry methods may perform a system call, but is_dir() and is_file() usually only require a system call for symbolic links; DirEntry.stat() always requires a system call on Unix but only requires one for symbolic links on Windows.
You can access the name of the object via DirEntry.name which is then equivalent to the output of os.listdir
You could also do the following:
for path, subdirs, files in os.walk(dir_name):
for name in files:
if path == ".": #this will filter the files in the current directory
#code here
In Python 3, I was able to do this:
import os
dir = "/path/to/files/"
#List all files immediately under this folder:
print ( next( os.walk(dir) )[2] )
#List all folders immediately under this folder:
print ( next( os.walk(dir) )[1] )
root folder changes for every directory os.walk finds. I solver that checking if root == directory
def _dir_list(self, dir_name, whitelist):
outputList = []
for root, dirs, files in os.walk(dir_name):
if root == dir_name: #This only meet parent folder
for f in files:
if os.path.splitext(f)[1] in whitelist:
outputList.append(os.path.join(root, f))
else:
self._email_to_("ignore")
return outputList
import os
def listFiles(self, dir_name):
names = []
for root, directory, files in os.walk(dir_name):
if root == dir_name:
for name in files:
names.append(name)
return names
This is how I solved it
if recursive:
items = os.walk(target_directory)
else:
items = [next(os.walk(target_directory))]
...
There is a catch when using listdir. The os.path.isdir(identifier) must be an absolute path. To pick subdirectories you do:
for dirname in os.listdir(rootdir):
if os.path.isdir(os.path.join(rootdir, dirname)):
print("I got a subdirectory: %s" % dirname)
The alternative is to change to the directory to do the testing without the os.path.join().
You can use this snippet
for root, dirs, files in os.walk(directory):
if level > 0:
# do some stuff
else:
break
level-=1
create a list of excludes, use fnmatch to skip the directory structure and do the process
excludes= ['a\*\b', 'c\d\e']
for root, directories, files in os.walk('Start_Folder'):
if not any(fnmatch.fnmatch(nf_root, pattern) for pattern in excludes):
for root, directories, files in os.walk(nf_root):
....
do the process
....
same as for 'includes':
if **any**(fnmatch.fnmatch(nf_root, pattern) for pattern in **includes**):
Why not simply use a range and os.walk combined with the zip? Is not the best solution, but would work too.
For example like this:
# your part before
for count, (root, dirs, files) in zip(range(0, 1), os.walk(dir_name)):
# logic stuff
# your later part
Works for me on python 3.
Also: A break is simpler too btw. (Look at the answer from #Pieter)
A slight change to Alex's answer, but using __next__():
print(next(os.walk('d:/'))[2])
or
print(os.walk('d:/').__next__()[2])
with the [2] being the file in root, dirs, file mentioned in other answers
This is a nice python example
def walk_with_depth(root_path, depth):
if depth < 0:
for root, dirs, files in os.walk(root_path):
yield [root, dirs[:], files]
return
elif depth == 0:
return
base_depth = root_path.rstrip(os.path.sep).count(os.path.sep)
for root, dirs, files in os.walk(root_path):
yield [root, dirs[:], files]
cur_depth = root.count(os.path.sep)
if base_depth + depth <= cur_depth:
del dirs[:]

Categories

Resources