Print out the whole directory tree - python

The code I have now:
import os
Tree = {}
Tree = os.listdir('Dir')
>>> print(Tree)
['New Folder', 'Textfile1.txt', 'Textfile2.txt']
That doesn't print out the files in the subdirectories. (New Folder is a subdirectory).
My question is, how can I output all the files in the directory and the files in subdirectories?

import os
def Test1(rootDir):
list_dirs = os.walk(rootDir)
for root, dirs, files in list_dirs:
for d in dirs:
print os.path.join(root, d)
for f in files:
print os.path.join(root, f)
OR:
import os
def Test2(rootDir):
for lists in os.listdir(rootDir):
path = os.path.join(rootDir, lists)
print path
if os.path.isdir(path):
Test2(path)
For the test file tree:
E:\TEST
│--A
│ │--A-A
│ │ │--A-A-A.txt
│ │--A-B.txt
│ │--A-C
│ │ │--A-B-A.txt
│ │--A-D.txt
│--B.txt
│--C
│ │--C-A.txt
│ │--C-B.txt
│--D.txt
│--E
Running the following code:
Test1('E:\TEST')
print '======================================='
Test2('E:\TEST')
You can see there are difference between the results:
>>>
E:\TEST\A
E:\TEST\C
E:\TEST\E
E:\TEST\B.txt
E:\TEST\D.txt
E:\TEST\A\A-A
E:\TEST\A\A-C
E:\TEST\A\A-B.txt
E:\TEST\A\A-D.txt
E:\TEST\A\A-A\A-A-A.txt
E:\TEST\A\A-C\A-B-A.txt
E:\TEST\C\C-A.txt
E:\TEST\C\C-B.txt
=======================================
E:\TEST\A
E:\TEST\A\A-A
E:\TEST\A\A-A\A-A-A.txt
E:\TEST\A\A-B.txt
E:\TEST\A\A-C
E:\TEST\A\A-C\A-B-A.txt
E:\TEST\A\A-D.txt
E:\TEST\B.txt
E:\TEST\C
E:\TEST\C\C-A.txt
E:\TEST\C\C-B.txt
E:\TEST\D.txt
E:\TEST\E
>>>
To save them in a list:
import os
files = []
def Test1(rootDir):
files.append(rootDir)
list_dirs = os.walk(rootDir)
for root, dirs, files in list_dirs:
for d in dirs:
files.append(os.path.join(root, d))
for f in files:
files.append(os.path.join(root, f))
import os
files = [rootDir]
def Test2(rootDir):
for lists in os.listdir(rootDir):
path = os.path.join(rootDir, lists)
files.append(path)
if os.path.isdir(path):
Test2(path)

From recipe 577091 on the Python Cookbook, you might use or learn from the TREE Emulator there.
import sys, os
FILES = False
def main():
if len(sys.argv) > 2 and sys.argv[2].upper() == '/F':
global FILES; FILES = True
try:
tree(sys.argv[1])
except:
print('Usage: {} <directory>'.format(os.path.basename(sys.argv[0])))
def tree(path):
path = os.path.abspath(path)
dirs, files = listdir(path)[:2]
print(path)
walk(path, dirs, files)
if not dirs:
print('No subfolders exist')
def walk(root, dirs, files, prefix=''):
if FILES and files:
file_prefix = prefix + ('|' if dirs else ' ') + ' '
for name in files:
print(file_prefix + name)
print(file_prefix)
dir_prefix, walk_prefix = prefix + '+---', prefix + '| '
for pos, neg, name in enumerate2(dirs):
if neg == -1:
dir_prefix, walk_prefix = prefix + '\\---', prefix + ' '
print(dir_prefix + name)
path = os.path.join(root, name)
try:
dirs, files = listdir(path)[:2]
except:
pass
else:
walk(path, dirs, files, walk_prefix)
def listdir(path):
dirs, files, links = [], [], []
for name in os.listdir(path):
path_name = os.path.join(path, name)
if os.path.isdir(path_name):
dirs.append(name)
elif os.path.isfile(path_name):
files.append(name)
elif os.path.islink(path_name):
links.append(name)
return dirs, files, links
def enumerate2(sequence):
length = len(sequence)
for count, value in enumerate(sequence):
yield count, count - length, value
if __name__ == '__main__':
main()

Here is yet another version appropriate for python3
Example output:
pyvarstar/
|-- .bashrc
|-- README
|-- vstars -> versions/vstars_20170804/
|-- versions/
|   |-- vstars_20170804/
|   |   |-- lib/
|   |   |   |-- vstars/
|   |   |-- bin/
|   |   |   |-- getcoords
|   |   |   |-- find_burst
The code:
def realname(path, root=None):
if root is not None:
path=os.path.join(root, path)
result=os.path.basename(path)
if os.path.islink(path):
realpath=os.readlink(path)
result= '%s -> %s' % (os.path.basename(path), realpath)
return result
def ptree(startpath, depth=-1):
prefix=0
if startpath != '/':
if startpath.endswith('/'): startpath=startpath[:-1]
prefix=len(startpath)
for root, dirs, files in os.walk(startpath):
level = root[prefix:].count(os.sep)
if depth >-1 and level > depth: continue
indent=subindent =''
if level > 0:
indent = '| ' * (level-1) + '|-- '
subindent = '| ' * (level) + '|-- '
print('{}{}/'.format(indent, realname(root)))
# print dir only if symbolic link; otherwise, will be printed as root
for d in dirs:
if os.path.islink(os.path.join(root, d)):
print('{}{}'.format(subindent, realname(d, root=root)))
for f in files:
print('{}{}'.format(subindent, realname(f, root=root)))

Use os.walk:
>>> import os
>>> print(os.walk.__doc__)
Directory tree generator.
For each directory in the directory tree rooted at top (including top
itself, but excluding '.' and '..'), yields a 3-tuple
...

If you want to limit your printed tree to some given depth, probably because you have a lot of nested folders, then you don't want to use os.walk. Instead you want to stop iterating through the nested folders as soon as you've reached your desired depth.
from typing import Union, Optional
from pathlib import Path
def ptree(startpath: Union[str, Path],
max_depth:int = 1,
quick_glance: Optional[int] = None,
_current_depth:int = 0) -> None:
"""
Recursively print directory tree up to a given `max_depth`, specifying if you
like a limited number of files and dirs to include in a `quick_glance`.
Parameters
----------
startpath: Union[str, Path]
The filepath at which to start.
max_depth: int
The maximum depth of nested directories to explore.
quick_glance: Optional[int]
If specified, limits exploration to the first however-many files and dirs.
_current_depth: int
So that we can track our depth as we call the function recursively.
"""
if _current_depth==0:
print(startpath)
else:
print(f'{"--"*_current_depth}{[d for d in startpath.split(os.sep) if d][-1]}')
_current_depth += 1
if _current_depth > max_depth:
return None
else:
ls = os.listdir(startpath)
files = [f for f in ls if os.path.isfile(os.path.join(startpath,f))]
dirs = [d for d in ls if os.path.isdir(os.path.join(startpath,d))]
if quick_glance:
files = files[:quick_glance]
dirs = dirs[:quick_glance]
[print(f'{".."*_current_depth}{f}') for f in files]
[ptree(os.path.join(startpath, d), max_depth, quick_glance, _current_depth)
for d in dirs]
return None

Related

Python traverse over folder, and get file in folder (linux) [duplicate]

I want to navigate from the root directory to all other directories within and print the same.
Here's my code:
#!/usr/bin/python
import os
import fnmatch
for root, dir, files in os.walk("."):
print root
print ""
for items in fnmatch.filter(files, "*"):
print "..." + items
print ""
And here's my O/P:
.
...Python_Notes
...pypy.py
...pypy.py.save
...classdemo.py
....goutputstream-J9ZUXW
...latest.py
...pack.py
...classdemo.pyc
...Python_Notes~
...module-demo.py
...filetype.py
./packagedemo
...classdemo.py
...__init__.pyc
...__init__.py
...classdemo.pyc
Above, . and ./packagedemo are directories.
However, I need to print the O/P in the following manner:
A
---a.txt
---b.txt
---B
------c.out
Above, A and B are directories and the rest are files.
This will give you the desired result
#!/usr/bin/python
import os
# traverse root directory, and list directories as dirs and files as files
for root, dirs, files in os.walk("."):
path = root.split(os.sep)
print((len(path) - 1) * '---', os.path.basename(root))
for file in files:
print(len(path) * '---', file)
try this:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""FileTreeMaker.py: ..."""
__author__ = "legendmohe"
import os
import argparse
import time
class FileTreeMaker(object):
def _recurse(self, parent_path, file_list, prefix, output_buf, level):
if len(file_list) == 0 \
or (self.max_level != -1 and self.max_level <= level):
return
else:
file_list.sort(key=lambda f: os.path.isfile(os.path.join(parent_path, f)))
for idx, sub_path in enumerate(file_list):
if any(exclude_name in sub_path for exclude_name in self.exn):
continue
full_path = os.path.join(parent_path, sub_path)
idc = "┣━"
if idx == len(file_list) - 1:
idc = "┗━"
if os.path.isdir(full_path) and sub_path not in self.exf:
output_buf.append("%s%s[%s]" % (prefix, idc, sub_path))
if len(file_list) > 1 and idx != len(file_list) - 1:
tmp_prefix = prefix + "┃ "
else:
tmp_prefix = prefix + " "
self._recurse(full_path, os.listdir(full_path), tmp_prefix, output_buf, level + 1)
elif os.path.isfile(full_path):
output_buf.append("%s%s%s" % (prefix, idc, sub_path))
def make(self, args):
self.root = args.root
self.exf = args.exclude_folder
self.exn = args.exclude_name
self.max_level = args.max_level
print("root:%s" % self.root)
buf = []
path_parts = self.root.rsplit(os.path.sep, 1)
buf.append("[%s]" % (path_parts[-1],))
self._recurse(self.root, os.listdir(self.root), "", buf, 0)
output_str = "\n".join(buf)
if len(args.output) != 0:
with open(args.output, 'w') as of:
of.write(output_str)
return output_str
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-r", "--root", help="root of file tree", default=".")
parser.add_argument("-o", "--output", help="output file name", default="")
parser.add_argument("-xf", "--exclude_folder", nargs='*', help="exclude folder", default=[])
parser.add_argument("-xn", "--exclude_name", nargs='*', help="exclude name", default=[])
parser.add_argument("-m", "--max_level", help="max level",
type=int, default=-1)
args = parser.parse_args()
print(FileTreeMaker().make(args))
you will get this:
root:.
[.]
┣━[.idea]
┃ ┣━[scopes]
┃ ┃ ┗━scope_settings.xml
┃ ┣━.name
┃ ┣━Demo.iml
┃ ┣━encodings.xml
┃ ┣━misc.xml
┃ ┣━modules.xml
┃ ┣━vcs.xml
┃ ┗━workspace.xml
┣━[test1]
┃ ┗━test1.txt
┣━[test2]
┃ ┣━[test2-2]
┃ ┃ ┗━[test2-3]
┃ ┃ ┣━test2
┃ ┃ ┗━test2-3-1
┃ ┗━test2
┣━folder_tree_maker.py
┗━tree.py
Recursive walk through a directory where you get ALL files from all dirs in the current directory and you get ALL dirs from the current directory - because codes above don't have a simplicity (imho):
for root, dirs, files in os.walk(rootFolderPath):
for filename in files:
doSomethingWithFile(os.path.join(root, filename))
for dirname in dirs:
doSomewthingWithDir(os.path.join(root, dirname))
There are more suitable functions for this in os package. But if you have to use os.walk, here is what I come up with
def walkdir(dirname):
for cur, _dirs, files in os.walk(dirname):
pref = ''
head, tail = os.path.split(cur)
while head:
pref += '---'
head, _tail = os.path.split(head)
print(pref+tail)
for f in files:
print(pref+'---'+f)
output:
>>> walkdir('.')
.
---file3
---file2
---my.py
---file1
---A
------file2
------file1
---B
------file3
------file2
------file4
------file1
---__pycache__
------my.cpython-33.pyc
You could also recursively walk through a folder and lists all it's contents using pathlib.Path()
from pathlib import Path
def check_out_path(target_path, level=0):
""""
This function recursively prints all contents of a pathlib.Path object
"""
def print_indented(folder, level):
print('\t' * level + folder)
print_indented(target_path.name, level)
for file in target_path.iterdir():
if file.is_dir():
check_out_path(file, level+1)
else:
print_indented(file.name, level+1)
my_path = Path(r'C:\example folder')
check_out_path(my_path)
Output:
example folder
folder
textfile3.txt
textfile1.txt
textfile2.txt
You can use os.walk, and that is probably the easiest solution, but here is another idea to explore:
import sys, os
FILES = False
def main():
if len(sys.argv) > 2 and sys.argv[2].upper() == '/F':
global FILES; FILES = True
try:
tree(sys.argv[1])
except:
print('Usage: {} <directory>'.format(os.path.basename(sys.argv[0])))
def tree(path):
path = os.path.abspath(path)
dirs, files = listdir(path)[:2]
print(path)
walk(path, dirs, files)
if not dirs:
print('No subfolders exist')
def walk(root, dirs, files, prefix=''):
if FILES and files:
file_prefix = prefix + ('|' if dirs else ' ') + ' '
for name in files:
print(file_prefix + name)
print(file_prefix)
dir_prefix, walk_prefix = prefix + '+---', prefix + '| '
for pos, neg, name in enumerate2(dirs):
if neg == -1:
dir_prefix, walk_prefix = prefix + '\\---', prefix + ' '
print(dir_prefix + name)
path = os.path.join(root, name)
try:
dirs, files = listdir(path)[:2]
except:
pass
else:
walk(path, dirs, files, walk_prefix)
def listdir(path):
dirs, files, links = [], [], []
for name in os.listdir(path):
path_name = os.path.join(path, name)
if os.path.isdir(path_name):
dirs.append(name)
elif os.path.isfile(path_name):
files.append(name)
elif os.path.islink(path_name):
links.append(name)
return dirs, files, links
def enumerate2(sequence):
length = len(sequence)
for count, value in enumerate(sequence):
yield count, count - length, value
if __name__ == '__main__':
main()
You might recognize the following documentation from the TREE command in the Windows terminal:
Graphically displays the folder structure of a drive or path.
TREE [drive:][path] [/F] [/A]
/F Display the names of the files in each folder.
/A Use ASCII instead of extended characters.
This does it for folder names:
def printFolderName(init_indent, rootFolder):
fname = rootFolder.split(os.sep)[-1]
root_levels = rootFolder.count(os.sep)
# os.walk treats dirs breadth-first, but files depth-first (go figure)
for root, dirs, files in os.walk(rootFolder):
# print the directories below the root
levels = root.count(os.sep) - root_levels
indent = ' '*(levels*2)
print init_indent + indent + root.split(os.sep)[-1]
#!/usr/bin/python
import os
def tracing(a):
global i>
for item in os.listdir(a):
if os.path.isfile(item):
print i + item
else:
print i + item
i+=i
tracing(item)
i = "---"
tracing(".")
Would be the best way
import os
def traverse_dir_recur(directory):
l = os.listdir(directory)
for d in l:
if os.path.isdir(directory + d):
traverse_dir_recur(directory + d +"/")
else:
print(directory + d)
Given a folder name, walk through its entire hierarchy recursively.
#! /usr/local/bin/python3
# findLargeFiles.py - given a folder name, walk through its entire hierarchy
# - print folders and files within each folder
import os
def recursive_walk(folder):
for folderName, subfolders, filenames in os.walk(folder):
if subfolders:
for subfolder in subfolders:
recursive_walk(subfolder)
print('\nFolder: ' + folderName + '\n')
for filename in filenames:
print(filename + '\n')
recursive_walk('/name/of/folder')
Try this:
import os
root_name = next(os.walk("."))[0]
dir_names = next(os.walk("."))[1]
file_names = next(os.walk("."))[2]
Here I'm assuming your path as "." in which the root_file and other directories are there.
So, Basically we are just iterating throughout the tree by using next() call, as our os.walk is only generative function.
By doing this we can save all the Directory and file names in dir_names and file_names respectively.
Do try this; easy one
#!/usr/bin/python
import os
# Creating an empty list that will contain the already traversed paths
donePaths = []
def direct(path):
for paths,dirs,files in os.walk(path):
if paths not in donePaths:
count = paths.count('/')
if files:
for ele1 in files:
print '---------' * (count), ele1
if dirs:
for ele2 in dirs:
print '---------' * (count), ele2
absPath = os.path.join(paths,ele2)
# recursively calling the direct function on each directory
direct(absPath)
# adding the paths to the list that got traversed
donePaths.append(absPath)
path = raw_input("Enter any path to get the following Dir Tree ...\n")
direct(path)
========OUTPUT below========
/home/test
------------------ b.txt
------------------ a.txt
------------------ a
--------------------------- a1.txt
------------------ b
--------------------------- b1.txt
--------------------------- b2.txt
--------------------------- cde
------------------------------------ cde.txt
------------------------------------ cdeDir
--------------------------------------------- cdeDir.txt
------------------ c
--------------------------- c.txt
--------------------------- c1
------------------------------------ c1.txt
------------------------------------ c2.txt
Let's say you have an arbitrary parent directory with subdirectories as such:
/home/parent_dir
├── 0_N
├── 1_M
├── 2_P
├── 3_R
└── 4_T
And here is what you can do to estimate the approximate percent distribution #files in each subdirectory relative to the total #files in parent:
from os import listdir as osl
from os import walk as osw
from os.path import join as osj
def subdir_summary(parent_dir):
parent_dir_len = sum([len(files) for _, _, files in osw(parent_dir)])
print(f"Total files in parent: {parent_dir_len}")
for subdir in sorted(osl(parent_dir)):
subdir_files_len = len(osl(osj(parent_dir, subdir)))
print(subdir, subdir_files_len, f"{int(100*(subdir_files_len / parent_dir_len))}%")
subdir_summary("/home/parent_dir")
It will print in terminal as follows:
Total files in parent: 5876
0_N 3254 55%
1_M 509 8%
2_P 1187 20%
3_R 594 10%
4_T 332 5%
import os
os.chdir('/your/working/path/')
dir = os.getcwd()
list = sorted(os.listdir(dir))
marks = ""
for s_list in list:
print marks + s_list
marks += "---"
tree_list = sorted(os.listdir(dir + "/" + s_list))
for i in tree_list:
print marks + i

Show path tree with files

import os
path = "G:\krunker\mod"
abcde = open("path.txt", "w")
for dirpath, dirnames, filenames in os.walk(path):
directory_level = dirpath.replace(path, "")
directory_level = directory_level.count(os.sep)
indent = " " * 4
print("{}{}/".format(indent*directory_level, os.path.basename(dirpath)), file=abcde)
for f in filenames:
print("{}{}".format(indent*(directory_level+1), f), file=abcde)
abcde.close()
I want it to print the files in every single folder of the path but it does only on the last
The indentation is not correct. The second for loop also has to be inside the first for loop.
Correct code:
import os
path = "/home/user/my_folder/tools"
abcde = open("path.txt", "w")
for dirpath, dirnames, filenames in os.walk(path):
directory_level = dirpath.replace(path, "")
directory_level = directory_level.count(os.sep)
indent = " " * 4
print("{}{}/".format(indent*directory_level, os.path.basename(dirpath)), file=abcde)
for f in filenames:
print("{}{}".format(indent*(directory_level+1), f), file=abcde)
abcde.close()
A part of path.txt content:
tools/
.gitignore
README.md
__init__.py
requirements3.txt
test.py
path.txt
.git/
description
hooks/
commit-msg.sample
info/
exclude
refs/
heads/
master
You can also use a recursive function which gets recursivly all subfolder's content until there's any subfolder:
from os import walk
output = open("path.txt", "w")
def listFiles(path, indent):
for (openedPath, folders, files) in walk(path):
for file in files:
output.write("\t" * (indent) + file + "\n")
for folder in folders:
output.write("\t" * (indent) + folder + "/\n")
listFiles(path + "/" + folder, indent + 1)
break
source = "/my/path/to/my/folder"
print(source + "/")
listFiles(source, 1)
There's an example with a little code project folder.
/my/path/to/my/folder/
input.txt
main.py
output/
error.cpp
trying.cpp
logo.cpp
You can use this kind of code to simplify
import os
folder = r"C:\path\to\find\files"
x = [os.path.join(r,file) for r,d,f in os.walk(folder) for file in f # If you want specific files if file.endswith(".txt")]
y = [os.path.join(r,folder) for r,d,f in os.walk(folder) for folder in d]
print(x) #For files in main directory and subdirectories
print(y) #For files in main directory and subdirectories

Python: How to get the full path of a file in order to move it?

I had files that were in zips. I unzipped them with Zip-7 so they are in folders with the zip file names.
Each of these folders has either a .otf or .ttf (some have both) that I want out of them and moved to another folder.
I have tried a few methods of getting the full path of the files but every one of them leaves out the folder that the file is actually in.
Here is my latest try:
import os
import shutil
from pathlib import Path
result = []
for root, dirs, files in os.walk("."):
for d in dirs:
continue
for f in files:
if f.endswith(".otf"):
print(f)
p = Path(f).absolute()
parent_dir = p.parents[1]
p.rename(parent_dir / p.name)
elif f.endswith(".ttf"):
print(f)
p = Path(f).absolute()
parent_dir = p.parents[1]
p.rename(parent_dir / p.name)
else:
continue
Other attempts:
# parent_dir = Path(f).parents[1]
# shutil.move(f, parent_dir)
#print("OTF: " + f)
# fn = f
# f = f[:-4]
# f += '\\'
# f += fn
# result.append(os.path.realpath(f))
#os.path.relpath(os.path.join(root, f), "."))
I know this is something simple but I just can't figure it out. Thanks!
You should join the file name with the path name root:
for root, dirs, files in os.walk("."):
for d in dirs:
continue
for f in files:
if f.endswith(".otf"):
p = Path(os.path.join(root, f)).absolute()
parent_dir = p.parents[1]
p.rename(parent_dir / p.name)
elif f.endswith(".ttf"):
p = Path(os.path.join(root, f)).absolute()
parent_dir = p.parents[1]
p.rename(parent_dir / p.name)
else:
continue
for root, dirs, files in os.walk(".")
for d in dirs:
continue
for f in files:
print(os.path.abspath(f))
You can use os.path.abspath() to get a path of a full file
You would also need to still filter for the certain file types.

Python - match directories with pattern (regular expression)

I wrote a loop which ignores all sub-directories which contain .txt files within them.
src = raw_input("Enter source disk location: ")
src = os.path.abspath(src)
dst = raw_input("Enter first destination to copy: ")
dst = os.path.abspath(dst)
dest = raw_input("Enter second destination to move : ")
dest = os.path.abspath(dest)
path_patter = '(\S+)_(\d+)_(\d+)_(\d+)__(\d+)_(\d+)_(\d+)'
for dir, dirs, files in os.walk(src):
if any(f.endswith('.txt') for f in files):
dirs[:] = [] # do not recurse into subdirectories
continue
files = [os.path.join(dir, f) for f in files ]
for f in files:
part1 = os.path.dirname(f)
part2 = os.path.dirname(os.path.dirname(part1))
part3 = os.path.split(part1)[1]
path_miss1 = os.path.join(dst, "missing_txt")
path_miss = os.path.join(path_miss1, part3)
path_missing = os.path.join(dest, "missing_txt")
searchFileName = re.search(path_patter, part3)#### update
if searchFileName:#####update
try:
if not os.path.exists(path_miss):
os.makedirs(path_miss)
else:
pass
if os.path.exists(path_miss):
distutils.dir_util.copy_tree(part1, path_miss)
else:
debug_status += "missing_file\n"
pass
if (get_size(path_miss)) == 0:
os.rmdir(path_miss)
else:
pass
if not os.path.exists(path_missing):
os.makedirs(path_missing)
else:
pass
if os.path.exists(path_missing):
shutil.move(part1, path_missing)
else:
pass
if (get_size(path_missing)) == 0:
os.rmdir(path_missing)
else:
pass
except Exception:
pass
else:
continue
How to modify this code to compare directory name with regular expression in this case. (it has to ignore directories with .txt files)
import os
import re
def createEscapedPattern(path,pattern):
newPath = os.path.normpath(path)
newPath = newPath.replace("\\","\\\\\\\\")
return newPath + "\\\\\\\\" + pattern
def createEscapedPath(path):
newPath = os.path.normpath(path)
return newPath.replace("\\","\\\\")
src = 'C:\\Home\\test'
path_patter = '(\S+)_(\d+)_(\d+)_(\d+)__(\d+)_(\d+)_(\d+)$'
p = re.compile(createEscapedPattern(src,path_patter))
for dir, dirs, files in os.walk(src):
if any(f.endswith('.txt') for f in files):
dirs[:] = []
continue
if any(p.match(createEscapedPath(dir)) for f in files):
for f in files:
print createEscapedPath(dir + "/" + f)
p = re.compile(createEscapedPattern(dir,path_patter))
There are a couple of things i did here and hope this example helps
I wrote this for windows fs so used two path convert functions.
This script ignores dirs with .txt files like you implemented it
This script will start at the directory you start the script and will only print file names if the pattern matches. This is done for all subdirectory's that are not ignored by the previous rule.
Used regex in python and made it compile again for each directory so you get: 'directory/(\S+)(\d+)(\d+)_(\d+)__(\d+)(\d+)(\d+)$'

Flatten complex directory structure in Python

I want to move files from a complex directory structure to just one place. For example i have this deep hierarchy:
foo/
foo2/
1.jpg
2.jpg
...
I want it to be:
1.jpg
2.jpg
...
My current solution:
def move(destination):
for_removal = os.path.join(destination, '\\')
is_in_parent = lambda x: x.find(for_removal) > -1
with directory(destination):
files_to_move = filter(is_in_parent,
glob_recursive(path='.'))
for file in files_to_move:
shutil.move(file, destination)
Definitions: directory and glob_recursive. Note, that my code only moves files to their common parent directory, not an arbitrary destination.
How can i move all files from a complex hierarchy to a single place succinctly and elegantly?
I don't like testing the name of the file about to be moved to see if we're already in the destination directory. Instead, this solution only scans the subdirectories of the destination
import os
import itertools
import shutil
def move(destination):
all_files = []
for root, _dirs, files in itertools.islice(os.walk(destination), 1, None):
for filename in files:
all_files.append(os.path.join(root, filename))
for filename in all_files:
shutil.move(filename, destination)
Explanation: os.walk walks recursively the destination in a "top down" manner. whole filenames are constructed with the os.path.join(root, filename) call. Now, to prevent scanning files at the top of the destination, we just need to ignore the first element of the iteration of os.walk. To do that I use islice(iterator, 1, None). One other more explicit way would be to do this:
def move(destination):
all_files = []
first_loop_pass = True
for root, _dirs, files in os.walk(destination):
if first_loop_pass:
first_loop_pass = False
continue
for filename in files:
all_files.append(os.path.join(root, filename))
for filename in all_files:
shutil.move(filename, destination)
this would do, it also renames files if they collide (I commented out the actual move and replaced with a copy):
import os
import sys
import string
import shutil
#Generate the file paths to traverse, or a single path if a file name was given
def getfiles(path):
if os.path.isdir(path):
for root, dirs, files in os.walk(path):
for name in files:
yield os.path.join(root, name)
else:
yield path
destination = "./newdir/"
fromdir = "./test/"
for f in getfiles(fromdir):
filename = string.split(f, '/')[-1]
if os.path.isfile(destination+filename):
filename = f.replace(fromdir,"",1).replace("/","_")
#os.rename(f, destination+filename)
shutil.copy(f, destination+filename)
Run recursively through directory, move the files and launch move for directories:
import shutil
import os
def move(destination, depth=None):
if not depth:
depth = []
for file_or_dir in os.listdir(os.path.join([destination] + depth, os.sep)):
if os.path.isfile(file_or_dir):
shutil.move(file_or_dir, destination)
else:
move(destination, os.path.join(depth + [file_or_dir], os.sep))
import os.path, shutil
def move(src, dest):
not_in_dest = lambda x: os.path.samefile(x, dest)
files_to_move = filter(not_in_dest,
glob_recursive(path=src))
for f in files_to_move:
shutil.move(f, dest)
Source for glob_recursive. Does not change name of file, if they collide.
samefile is a safe way to compare paths. But it doesn't work on Windows, so check How to emulate os.path.samefile behaviour on Windows and Python 2.7?.
def splitPath(p):
a,b = os.path.split(p)
return (splitPath(a) if len(a) and len(b) else []) + [b]
def safeprint(s):
try:
print(s)
except UnicodeEncodeError:
if sys.version_info >= (3,):
print(s.encode('utf8').decode(sys.stdout.encoding))
else:
print(s.encode('utf8'))
def flatten(root, doit):
SEP = "¦"
REPL = "?"
folderCount = 0
fileCount = 0
if not doit:
print("Simulating:")
for path, dirs, files in os.walk(root, topdown=False):
if path != root:
for f in files:
sp = splitPath(path)
np = ""
for element in sp[1:]:
e2 = element.replace(SEP, REPL)
np += e2 + SEP
f2 = f.replace(SEP, REPL)
newName = np + f2
safeprint("Moved: "+ newName )
if doit:
shutil.move(os.path.join(path, f), os.path.join(root, f))
# Uncomment, if you want filenames to be based on folder hierarchy.
#shutil.move(os.path.join(path, f), os.path.join(root, newName))
fileCount += 1
safeprint("Removed: "+ path)
if doit:
os.rmdir(path)
folderCount += 1
if doit:
print("Done.")
else:
print("Simulation complete.")
print("Moved files:", fileCount)
print("Removed folders:", folderCount)
directory_path = r"C:\Users\jd\Documents\myFtpData"
flatten(directory_path, True)
Adding on to the answers, I believe my answer will satisfy all your needs, the other answers fail when there is a subdirectory and file with the same filename as the upper directory.
This was SOLVED here, Also look at my Github Repo for Structured File Copy and Flattened File Copy:
import os, fnmatch, shutil
PATTERN = '*.txt' # Regex Pattern to Match files
INPUT_FOLDER = "A" # os.getcwd()
INPUT_FOLDER = os.path.abspath(INPUT_FOLDER)
include_input_foldername = False
prepend = "_included" if include_input_foldername else ""
OUTPUT_FOLDER = f"Structured_Copy_{os.path.basename(INPUT_FOLDER)}{prepend}"
os.makedirs(OUTPUT_FOLDER, exist_ok=True)
def find(pattern, path):
"""Utility to find files wrt a regex search"""
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result.append(os.path.join(root, name))
return result
all_files = find(PATTERN, INPUT_FOLDER)
for each_path in all_files:
relative_path = os.path.relpath(each_path, os.path.dirname(INPUT_FOLDER)) if include_input_foldername else os.path.relpath(each_path, INPUT_FOLDER)
flattened_relative_fullpath = os.path.join(OUTPUT_FOLDER, relative_path)
os.makedirs(os.path.dirname(flattened_relative_fullpath), exist_ok=True)
shutil.copy(each_path, flattened_relative_fullpath)
print(f"Copied {each_path} to {flattened_relative_fullpath}")
print(f"Finished Copying {len(all_files)} Files from : {INPUT_FOLDER} to : {OUTPUT_FOLDER}")

Categories

Resources