python get files recursively - python

I have a folder structure:
I am using os.walk(path) to get all the files from the "test" folder. I would like to all files except the folder "B" and the files inside it.
test (root-folder)
t1.txt
t2.txt
A
f.txt
B
f1.txt
C
f4.txt
list1 = ['A', 'C']
result = [os.path.join(dp, f) for dp, dn, filenames in os.walk(path) for f in filenames if os.path.splitext(f)[1] == '.txt']
for items in result:
for fname in list1:
if fname in items.lower():
result.remove(items)
print(result)
I tried it, but it takes only the A and C. Not the files in main folder? Can you help? Where am i wrong?
Thank you

Possible solution is to use glob library:
import glob
dir_to_exclude = ['B', 'C']
files = glob.glob('**/*.txt', recursive=True)
files_paths = [_ for _ in files if _.split("\\")[0] not in dir_to_exclude]
files_names = [_.split("\\")[-1] for _ in files if _.split("\\")[0] not in dir_to_exclude]
print(f'List of file names with path: {files_paths}')
print(f'List of file names: {files_names}')

I think this should work
file_paths = []
forbidden_path = GetForbiddenPath()
for root, dirs, files in os.walk(path):
for name in files:
file_path = os.path.join(root, name)
if forbidden_path in file_path:
if os.path.splitext(file_path)[1] == '.txt':
file_paths += [file_path]

Related

How to traverse through all subfolders inside a folder for renaming using glob function? [duplicate]

I have a folder structure:
I am using os.walk(path) to get all the files from the "test" folder. I would like to all files except the folder "B" and the files inside it.
test (root-folder)
t1.txt
t2.txt
A
f.txt
B
f1.txt
C
f4.txt
list1 = ['A', 'C']
result = [os.path.join(dp, f) for dp, dn, filenames in os.walk(path) for f in filenames if os.path.splitext(f)[1] == '.txt']
for items in result:
for fname in list1:
if fname in items.lower():
result.remove(items)
print(result)
I tried it, but it takes only the A and C. Not the files in main folder? Can you help? Where am i wrong?
Thank you
Possible solution is to use glob library:
import glob
dir_to_exclude = ['B', 'C']
files = glob.glob('**/*.txt', recursive=True)
files_paths = [_ for _ in files if _.split("\\")[0] not in dir_to_exclude]
files_names = [_.split("\\")[-1] for _ in files if _.split("\\")[0] not in dir_to_exclude]
print(f'List of file names with path: {files_paths}')
print(f'List of file names: {files_names}')
I think this should work
file_paths = []
forbidden_path = GetForbiddenPath()
for root, dirs, files in os.walk(path):
for name in files:
file_path = os.path.join(root, name)
if forbidden_path in file_path:
if os.path.splitext(file_path)[1] == '.txt':
file_paths += [file_path]

Combing multiple csv files from multiple subfolders in one folder

Im trying to combine multiple files located in a directory. Each of the the files is located 3 subfolders(each subfolder has another folder or file) down from the main folder and I am unable to combine all of them. The best I can do is combine the ones in each bottom most subfolder. I can get a list of every specific file I want to combine from scanning but I can't combine them. I've gone through several methods and tutorials and can't find a way to do this. The code I have is below:
import pandas as pd
import os
import glob
os.getcwd()
path_of_the_directory = 'C:\\Users\\user\\Downloads\\top_folder'
ext = ('.csv')
for files in os.listdir(path_of_the_directory):
if files.endswith(ext):
print(files)
else:
continue
def list_files(dir):
r = []
for root, dirs, files in os.walk(dir):
for name in files:
filepath = root + os.sep + name
if filepath.endswith(".csv"):
r.append(os.path.join(root, name))
return r
print(r)
files = []
for file in r:
#for dir, dir_name, file_list in os.walk(path):
files.append(os.path.join(path,file))
combined_df = pd.concat([pd.read_csv(file) for file in files])
df = pd.concat([pd.read_csv(f) for f in files])
df.to_csv("merged.csv")
print(files)
list_files(data_dir)
data_dir = r'C:\\Users\\user\\Downloads\top_folder'
sub_folders = os.listdir(data_dir)
sub_folders
path = os.path.join(data_dir, sub_folders[2])
os.chdir(path)
files = glob.glob(path + ".\*\*.csv")
files
df = pd.concat([pd.read_csv(f) for f in chat_files])
df.to_csv("merged.csv")
Any help or direction would be extremely appreciated.

Python: How to get the full path of a file in order to move it?

I had files that were in zips. I unzipped them with Zip-7 so they are in folders with the zip file names.
Each of these folders has either a .otf or .ttf (some have both) that I want out of them and moved to another folder.
I have tried a few methods of getting the full path of the files but every one of them leaves out the folder that the file is actually in.
Here is my latest try:
import os
import shutil
from pathlib import Path
result = []
for root, dirs, files in os.walk("."):
for d in dirs:
continue
for f in files:
if f.endswith(".otf"):
print(f)
p = Path(f).absolute()
parent_dir = p.parents[1]
p.rename(parent_dir / p.name)
elif f.endswith(".ttf"):
print(f)
p = Path(f).absolute()
parent_dir = p.parents[1]
p.rename(parent_dir / p.name)
else:
continue
Other attempts:
# parent_dir = Path(f).parents[1]
# shutil.move(f, parent_dir)
#print("OTF: " + f)
# fn = f
# f = f[:-4]
# f += '\\'
# f += fn
# result.append(os.path.realpath(f))
#os.path.relpath(os.path.join(root, f), "."))
I know this is something simple but I just can't figure it out. Thanks!
You should join the file name with the path name root:
for root, dirs, files in os.walk("."):
for d in dirs:
continue
for f in files:
if f.endswith(".otf"):
p = Path(os.path.join(root, f)).absolute()
parent_dir = p.parents[1]
p.rename(parent_dir / p.name)
elif f.endswith(".ttf"):
p = Path(os.path.join(root, f)).absolute()
parent_dir = p.parents[1]
p.rename(parent_dir / p.name)
else:
continue
for root, dirs, files in os.walk(".")
for d in dirs:
continue
for f in files:
print(os.path.abspath(f))
You can use os.path.abspath() to get a path of a full file
You would also need to still filter for the certain file types.

Flatten complex directory structure in Python

I want to move files from a complex directory structure to just one place. For example i have this deep hierarchy:
foo/
foo2/
1.jpg
2.jpg
...
I want it to be:
1.jpg
2.jpg
...
My current solution:
def move(destination):
for_removal = os.path.join(destination, '\\')
is_in_parent = lambda x: x.find(for_removal) > -1
with directory(destination):
files_to_move = filter(is_in_parent,
glob_recursive(path='.'))
for file in files_to_move:
shutil.move(file, destination)
Definitions: directory and glob_recursive. Note, that my code only moves files to their common parent directory, not an arbitrary destination.
How can i move all files from a complex hierarchy to a single place succinctly and elegantly?
I don't like testing the name of the file about to be moved to see if we're already in the destination directory. Instead, this solution only scans the subdirectories of the destination
import os
import itertools
import shutil
def move(destination):
all_files = []
for root, _dirs, files in itertools.islice(os.walk(destination), 1, None):
for filename in files:
all_files.append(os.path.join(root, filename))
for filename in all_files:
shutil.move(filename, destination)
Explanation: os.walk walks recursively the destination in a "top down" manner. whole filenames are constructed with the os.path.join(root, filename) call. Now, to prevent scanning files at the top of the destination, we just need to ignore the first element of the iteration of os.walk. To do that I use islice(iterator, 1, None). One other more explicit way would be to do this:
def move(destination):
all_files = []
first_loop_pass = True
for root, _dirs, files in os.walk(destination):
if first_loop_pass:
first_loop_pass = False
continue
for filename in files:
all_files.append(os.path.join(root, filename))
for filename in all_files:
shutil.move(filename, destination)
this would do, it also renames files if they collide (I commented out the actual move and replaced with a copy):
import os
import sys
import string
import shutil
#Generate the file paths to traverse, or a single path if a file name was given
def getfiles(path):
if os.path.isdir(path):
for root, dirs, files in os.walk(path):
for name in files:
yield os.path.join(root, name)
else:
yield path
destination = "./newdir/"
fromdir = "./test/"
for f in getfiles(fromdir):
filename = string.split(f, '/')[-1]
if os.path.isfile(destination+filename):
filename = f.replace(fromdir,"",1).replace("/","_")
#os.rename(f, destination+filename)
shutil.copy(f, destination+filename)
Run recursively through directory, move the files and launch move for directories:
import shutil
import os
def move(destination, depth=None):
if not depth:
depth = []
for file_or_dir in os.listdir(os.path.join([destination] + depth, os.sep)):
if os.path.isfile(file_or_dir):
shutil.move(file_or_dir, destination)
else:
move(destination, os.path.join(depth + [file_or_dir], os.sep))
import os.path, shutil
def move(src, dest):
not_in_dest = lambda x: os.path.samefile(x, dest)
files_to_move = filter(not_in_dest,
glob_recursive(path=src))
for f in files_to_move:
shutil.move(f, dest)
Source for glob_recursive. Does not change name of file, if they collide.
samefile is a safe way to compare paths. But it doesn't work on Windows, so check How to emulate os.path.samefile behaviour on Windows and Python 2.7?.
def splitPath(p):
a,b = os.path.split(p)
return (splitPath(a) if len(a) and len(b) else []) + [b]
def safeprint(s):
try:
print(s)
except UnicodeEncodeError:
if sys.version_info >= (3,):
print(s.encode('utf8').decode(sys.stdout.encoding))
else:
print(s.encode('utf8'))
def flatten(root, doit):
SEP = "¦"
REPL = "?"
folderCount = 0
fileCount = 0
if not doit:
print("Simulating:")
for path, dirs, files in os.walk(root, topdown=False):
if path != root:
for f in files:
sp = splitPath(path)
np = ""
for element in sp[1:]:
e2 = element.replace(SEP, REPL)
np += e2 + SEP
f2 = f.replace(SEP, REPL)
newName = np + f2
safeprint("Moved: "+ newName )
if doit:
shutil.move(os.path.join(path, f), os.path.join(root, f))
# Uncomment, if you want filenames to be based on folder hierarchy.
#shutil.move(os.path.join(path, f), os.path.join(root, newName))
fileCount += 1
safeprint("Removed: "+ path)
if doit:
os.rmdir(path)
folderCount += 1
if doit:
print("Done.")
else:
print("Simulation complete.")
print("Moved files:", fileCount)
print("Removed folders:", folderCount)
directory_path = r"C:\Users\jd\Documents\myFtpData"
flatten(directory_path, True)
Adding on to the answers, I believe my answer will satisfy all your needs, the other answers fail when there is a subdirectory and file with the same filename as the upper directory.
This was SOLVED here, Also look at my Github Repo for Structured File Copy and Flattened File Copy:
import os, fnmatch, shutil
PATTERN = '*.txt' # Regex Pattern to Match files
INPUT_FOLDER = "A" # os.getcwd()
INPUT_FOLDER = os.path.abspath(INPUT_FOLDER)
include_input_foldername = False
prepend = "_included" if include_input_foldername else ""
OUTPUT_FOLDER = f"Structured_Copy_{os.path.basename(INPUT_FOLDER)}{prepend}"
os.makedirs(OUTPUT_FOLDER, exist_ok=True)
def find(pattern, path):
"""Utility to find files wrt a regex search"""
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result.append(os.path.join(root, name))
return result
all_files = find(PATTERN, INPUT_FOLDER)
for each_path in all_files:
relative_path = os.path.relpath(each_path, os.path.dirname(INPUT_FOLDER)) if include_input_foldername else os.path.relpath(each_path, INPUT_FOLDER)
flattened_relative_fullpath = os.path.join(OUTPUT_FOLDER, relative_path)
os.makedirs(os.path.dirname(flattened_relative_fullpath), exist_ok=True)
shutil.copy(each_path, flattened_relative_fullpath)
print(f"Copied {each_path} to {flattened_relative_fullpath}")
print(f"Finished Copying {len(all_files)} Files from : {INPUT_FOLDER} to : {OUTPUT_FOLDER}")

Simple list comprehension

I want a dictionary of files:
files = [files for (subdir, dirs, files) in os.walk(rootdir)]
But I get,
files = [['filename1', 'filename2']]
when I want
files = ['filename1', 'filename2']
How do I prevent looping through that tuple? Thanks!
Both of these work:
[f for (subdir, dirs, files) in os.walk(rootdir) for f in files]
sum([files for (subdir, dirs, files) in os.walk(rootdir)], [])
Sample output:
$ find /tmp/test
/tmp/test
/tmp/test/subdir1
/tmp/test/subdir1/file1
/tmp/test/subdir2
/tmp/test/subdir2/file2
$ python
>>> import os
>>> rootdir = "/tmp/test"
>>> [f for (subdir, dirs, files) in os.walk(rootdir) for f in files]
['file1', 'file2']
>>> sum([files for (subdir, dirs, files) in os.walk(rootdir)], [])
['file1', 'file2']
for (subdir, dirs, f) in os.walk(rootdir): files.extend(f)
files = [filename for (subdir, dirs, files) in os.walk(rootdir) for filename in files]
import os, glob
files = [file for file in glob.glob('*') if os.path.isfile(file)]
if your files have extensions, then even simpler:
import glob
files = glob.glob('*.*')

Categories

Resources