How to get list files matching name in Python - python

I am trying to get a list of files in a directory which match a certain name e.g in Bash the Code would be :
BASH
FOLDER="MainProject"
FILES=`find "$FOLDER" -name "Localizations*swift"`
Python
import os
def read_files():
path = 'MainProject/'
folders = []
for r, d, f in os.walk(path):
for folder in d:
folders.append(os.path.join(r, folder))
for f in folders:
print(f)
Please note I am new to python hence I am struggling with this.

If you only need files in a single directory (no depth):
from glob import glob
import os
def read_files():
path = 'MainProject/'
print(list(glob(os.path.join(path, "Localizations*swift"))))
If you only need file names (and not directory names) recursively:
from fnmatch import fnmatch
import os
def read_files():
path = 'MainProject/'
for r, d, f in os.walk(path):
if fnmatch(f, "Localizations*swift"):
print(os.path.join(r, f))

If You want to look at the current dir:
path = 'MainProject/'
f_name = 'Localizations*swift'
all_files = os.listdir(path)
matching_files = [file for file in all_files if file==f_name]
if You want to 'walk' through the current and all subdirs, make use of the os.walk function.
Also You can use some regex, to match file names
import os
for root, dirs, files in os.walk(path, topdown=False):
for name in files:
# if name.find(f_name)>-1: # name contains f_name
if name==f_name:
print(os.path.join(root, name))

Related

Iterate through directories, subdirectories, to get files with specialized ext. , copy in new directory

I have a question and I went all the other topics through with similar problems but I didn't get my solved.
I have a folder where two subfolders are. Inside of them are a lot of files, but I need just files with extension .trl. I need to copy them and save them in a new folder that is already created.
My code don't give me an error but I don't see any result. What I'm doing wrong?
import os
import shutil
import fnmatch
directory = "/home/.../test_daten"
ext = ('.trl')
dest_dir = "/home/.../test_korpus"
for root, dirs, files in os.walk(directory):
for extension in ext:
for filename in fnmatch.filter(files, extension+'.trl'):
source = (os.path.join(root, filename))
shutil.copy2(source, dest_dir)
Use os.walk() and find all files. Then use endswith() to find the files ending with .trl.
Sample code:
import os, shutil;
directory = "/home/.../test_daten";
dest_dir = "/home/.../test_korpus";
filelist = [];
for root, dirs, files in os.walk(directory):
for file in files:
filelist.append(os.path.join(root,file));
for trlFile in filelist:
if trlFile.endswith(".trl"):
shutil.copy(trlFile, dest_dir);
Maybe your problem is fnmatch.filter(files, extension+'.trl'). You have extension from for extension in ext: which will loop though the variable ext and give you a letter from it each time. Your extension+'.trl will be ..trl, t.trl, l.trl
import os
import shutil
directory = "/home/.../test_daten"
dest_dir = "/home/.../test_korpus"
for root, _, files in os.walk(directory): #Get the root, files
for x in files:
if x.endswith('.trl'): #Check the extension
shutil.copy2(f'{root}/{x}', dest_dir)
import os
import shutil
import fnmatch
source = "/home/.../test_daten"
ext = '*.trl'
target = "/home/.../test_korpus"
for root, _, files in os.walk(source):
for filename in fnmatch.filter(files, ext):
shutil.copy2(os.path.join(root, filename), target)

list only files of a directory and ignore the subdirectories with python

I would like to list only the files of a directory and skip the subdirectories.
The current code lists also the files in the subdirectories but that's not what I want:
import os
list_files = []
for root, dirs, files in os.walk(input_folder):
for filename in files:
joined = os.path.join(input_folder, filename)
list_files.append(joined)
The files in the directory to process look like this:
/headfolder/subfolder/subfile.pdf #ignore this directory
/headfolder/subfolder2/subfile.pdf #ignore this directory
/headfolder/file.pdf #get this path
/headfolder/file2.pdf #get this path
desired output:
['/headfolder/file.pdf','/headfolder/file2.pdf ']
files=[i for i in os.listdir() if os.path.isfile(i)]
Edit:
for list all files in specific direcotry
import os
from os import listdir
from os.path import isfile, join
input_folder = 'headfolder/'
list_files = []
files = [f for f in listdir(input_folder) if isfile(join(input_folder, f))]
for filename in files:
joined = os.path.join(input_folder, filename)
list_files.append(joined)
print(list_files)

Search all files with same name in a directory python

I Have a Question :
I need to get paths of a file in a directory, I have a folder that contains other folders and other folders etc.... and each of them contains a file "tv.sas7bdat" I need to get every path to that file.
Thank you !!!
You can try the following code, where PATH stands for the parent directory
import os
def getAlldirInDiGui(path,resultList):
filesList=os.listdir(path)
for fileName in filesList:
fileAbpath=os.path.join(path,fileName)
if os.path.isdir(fileAbpath):
getAlldirInDiGui(fileAbpath,resultList)
else:
if fileName=='tv.sas7bdat':
resultList.append(fileAbpath)
resultList = []
PATH = ""
getAlldirInDiGui(PATH,resultList)
You can use os.walk()
import os
for root, dirs, files in os.walk(os.getcwd()):
for f in files:
if f.find("tv.sas7bdat")>=0:
print(root,f)
If I get your problem right you can achieve your goal using Pythons's os.walk function, like so:
import os
for root, dirs, files in os.walk("<starting folder here>", topdown=False):
for name in files:
if name == "tv.sas7bdat":
print(os.path.join(root, name))
p.s: as for comments in your question, next time please provide as many details possible in your question and provide code of your attempt, see the asking guidelines
Hope fully below code should work for you:
import glob
initial_path = "c:\<intital folder location>"
files = [file for file in glob.glob(initial_path+ "tv.sas7bdat" , recursive=True)]
for f in files:
print(f)
You could use the os python package combined with a recursive function to search through a certain directory
import os
from os.path import isfile, join, isdir
def get_files_path(directory, paths):
for item in os.listdir(directory):
if isfile(join(directory, item)) and item == "tv.sas7bda":
paths.append(directory + item)
elif isdir(directory+item):
get_files_path(directory + item, paths)
return paths
directory_to_search = "./"
get_files_path(directory_to_search , [])

How to stop os.walk from walking all sub-directories down?

Using:
def simpleFunc(dirName):
import os
fileList=[]
for dir, dirs, filenames in os.walk(dirName):
for filename in filenames:
filepath=dir+'/'+filename
fileList.append(filepath)
print fileList
simpleFunc(os.path.dirname('/entire/path/to/file.ext'))
The problem is that os.walk just doesn't stop at /entire/path/to/ directory level but goes all the way down to the lowest sub-directory it can find. So if there is /entire/path/to/subdir1/subdir2/subdir3 then in additional to /entire/path/to/ all three sub-directories will be searched: ./subdir1/, ././subdir2/, ./././subdir3/.
Question: how to make sure the function stops at the directory level specified: /entire/path/to/ and doesn't go all the way down?
Based on what you've written, if you just want to search the specified directory. It might be better to use os.listdir and then just filter on os.path.isfile, e.g., like this:
def simple_func(dirpath):
paths = os.listdir(dirpath)
# create absolute paths
abs_paths = [os.path.join(dirpath, p) for p in paths]
# filter for paths that are files
file_list = [p for p in paths if os.path.isfile(p)]
return file_list
That way you don't have to deal with stopping the recursion and it's pretty clear which files you want. You might want to profile to see whether the multiple isfile calls hurt anything.
from os import listdir
from os.path import isfile, join
path='/entire/path/to/'
files = [ join(path,f) for f in listdir(path) if isfile(join(path,f)) ]
files return ur files. no need to declare new filelist
Sounds like you should use os.listdir instead.
import os
my_path = '/entire/path/to/files/'
file_list = []
for filename in os.listdir(my_path):
filepath = os.path.join(my_path, filename)
if os.path.isfile(filepath):
fileList.append(filepath)
You need to take the only first item from the os.walk generator:
import os
root, _, files = next(os.walk('/entire/path/to'))
and then append root dir to each filename:
files = map(lambda f: os.path.join(root, f), files)
or just:
files = [os.path.join(root, f) for f in files]
You can tell os.walk which directory to use explicitly
def simpleFunc(dirName):
import os
# set the dir name in a var
dirName = '/higher_lever_dir/'
for dir, dirs, filenames in os.walk(dirName):
# this line forces loop to look ONLY in the specified dir
dirs[:] = [d for d in dirs if d in dirName]
for filename in filenames:
filepath=dir+'/'+filename
fileList.append(filepath)
print fileList
simpleFunc(os.path.dirname('/entire/path/to/file.ext'))
Setting dirs[:] = [d for d in dirs if d in dirName] will exclude files in subdirectories that may exist in /higher_level_dir/

Find all files in a directory with extension .txt in Python

This question's answers are a community effort. Edit existing answers to improve this post. It is not currently accepting new answers or interactions.
How can I find all the files in a directory having the extension .txt in python?
You can use glob:
import glob, os
os.chdir("/mydir")
for file in glob.glob("*.txt"):
print(file)
or simply os.listdir:
import os
for file in os.listdir("/mydir"):
if file.endswith(".txt"):
print(os.path.join("/mydir", file))
or if you want to traverse directory, use os.walk:
import os
for root, dirs, files in os.walk("/mydir"):
for file in files:
if file.endswith(".txt"):
print(os.path.join(root, file))
Use glob.
>>> import glob
>>> glob.glob('./*.txt')
['./outline.txt', './pip-log.txt', './test.txt', './testingvim.txt']
Something like that should do the job
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith('.txt'):
print(file)
You can simply use pathlibs glob 1:
import pathlib
list(pathlib.Path('your_directory').glob('*.txt'))
or in a loop:
for txt_file in pathlib.Path('your_directory').glob('*.txt'):
# do something with "txt_file"
If you want it recursive you can use .glob('**/*.txt')
1The pathlib module was included in the standard library in python 3.4. But you can install back-ports of that module even on older Python versions (i.e. using conda or pip): pathlib and pathlib2.
Something like this will work:
>>> import os
>>> path = '/usr/share/cups/charmaps'
>>> text_files = [f for f in os.listdir(path) if f.endswith('.txt')]
>>> text_files
['euc-cn.txt', 'euc-jp.txt', 'euc-kr.txt', 'euc-tw.txt', ... 'windows-950.txt']
import os
path = 'mypath/path'
files = os.listdir(path)
files_txt = [i for i in files if i.endswith('.txt')]
I like os.walk():
import os
for root, dirs, files in os.walk(dir):
for f in files:
if os.path.splitext(f)[1] == '.txt':
fullpath = os.path.join(root, f)
print(fullpath)
Or with generators:
import os
fileiter = (os.path.join(root, f)
for root, _, files in os.walk(dir)
for f in files)
txtfileiter = (f for f in fileiter if os.path.splitext(f)[1] == '.txt')
for txt in txtfileiter:
print(txt)
Here's more versions of the same that produce slightly different results:
glob.iglob()
import glob
for f in glob.iglob("/mydir/*/*.txt"): # generator, search immediate subdirectories
print f
glob.glob1()
print glob.glob1("/mydir", "*.tx?") # literal_directory, basename_pattern
fnmatch.filter()
import fnmatch, os
print fnmatch.filter(os.listdir("/mydir"), "*.tx?") # include dot-files
Try this this will find all your files recursively:
import glob, os
os.chdir("H:\\wallpaper")# use whatever directory you want
#double\\ no single \
for file in glob.glob("**/*.txt", recursive = True):
print(file)
Python v3.5+
Fast method using os.scandir in a recursive function. Searches for all files with a specified extension in folder and sub-folders. It is fast, even for finding 10,000s of files.
I have also included a function to convert the output to a Pandas Dataframe.
import os
import re
import pandas as pd
import numpy as np
def findFilesInFolderYield(path, extension, containsTxt='', subFolders = True, excludeText = ''):
""" Recursive function to find all files of an extension type in a folder (and optionally in all subfolders too)
path: Base directory to find files
extension: File extension to find. e.g. 'txt'. Regular expression. Or 'ls\d' to match ls1, ls2, ls3 etc
containsTxt: List of Strings, only finds file if it contains this text. Ignore if '' (or blank)
subFolders: Bool. If True, find files in all subfolders under path. If False, only searches files in the specified folder
excludeText: Text string. Ignore if ''. Will exclude if text string is in path.
"""
if type(containsTxt) == str: # if a string and not in a list
containsTxt = [containsTxt]
myregexobj = re.compile('\.' + extension + '$') # Makes sure the file extension is at the end and is preceded by a .
try: # Trapping a OSError or FileNotFoundError: File permissions problem I believe
for entry in os.scandir(path):
if entry.is_file() and myregexobj.search(entry.path): #
bools = [True for txt in containsTxt if txt in entry.path and (excludeText == '' or excludeText not in entry.path)]
if len(bools)== len(containsTxt):
yield entry.stat().st_size, entry.stat().st_atime_ns, entry.stat().st_mtime_ns, entry.stat().st_ctime_ns, entry.path
elif entry.is_dir() and subFolders: # if its a directory, then repeat process as a nested function
yield from findFilesInFolderYield(entry.path, extension, containsTxt, subFolders)
except OSError as ose:
print('Cannot access ' + path +'. Probably a permissions error ', ose)
except FileNotFoundError as fnf:
print(path +' not found ', fnf)
def findFilesInFolderYieldandGetDf(path, extension, containsTxt, subFolders = True, excludeText = ''):
""" Converts returned data from findFilesInFolderYield and creates and Pandas Dataframe.
Recursive function to find all files of an extension type in a folder (and optionally in all subfolders too)
path: Base directory to find files
extension: File extension to find. e.g. 'txt'. Regular expression. Or 'ls\d' to match ls1, ls2, ls3 etc
containsTxt: List of Strings, only finds file if it contains this text. Ignore if '' (or blank)
subFolders: Bool. If True, find files in all subfolders under path. If False, only searches files in the specified folder
excludeText: Text string. Ignore if ''. Will exclude if text string is in path.
"""
fileSizes, accessTimes, modificationTimes, creationTimes , paths = zip(*findFilesInFolderYield(path, extension, containsTxt, subFolders))
df = pd.DataFrame({
'FLS_File_Size':fileSizes,
'FLS_File_Access_Date':accessTimes,
'FLS_File_Modification_Date':np.array(modificationTimes).astype('timedelta64[ns]'),
'FLS_File_Creation_Date':creationTimes,
'FLS_File_PathName':paths,
})
df['FLS_File_Modification_Date'] = pd.to_datetime(df['FLS_File_Modification_Date'],infer_datetime_format=True)
df['FLS_File_Creation_Date'] = pd.to_datetime(df['FLS_File_Creation_Date'],infer_datetime_format=True)
df['FLS_File_Access_Date'] = pd.to_datetime(df['FLS_File_Access_Date'],infer_datetime_format=True)
return df
ext = 'txt' # regular expression
containsTxt=[]
path = 'C:\myFolder'
df = findFilesInFolderYieldandGetDf(path, ext, containsTxt, subFolders = True)
path.py is another alternative: https://github.com/jaraco/path.py
from path import path
p = path('/path/to/the/directory')
for f in p.files(pattern='*.txt'):
print f
To get all '.txt' file names inside 'dataPath' folder as a list in a Pythonic way:
from os import listdir
from os.path import isfile, join
path = "/dataPath/"
onlyTxtFiles = [f for f in listdir(path) if isfile(join(path, f)) and f.endswith(".txt")]
print onlyTxtFiles
Python has all tools to do this:
import os
the_dir = 'the_dir_that_want_to_search_in'
all_txt_files = filter(lambda x: x.endswith('.txt'), os.listdir(the_dir))
I did a test (Python 3.6.4, W7x64) to see which solution is the fastest for one folder, no subdirectories, to get a list of complete file paths for files with a specific extension.
To make it short, for this task os.listdir() is the fastest and is 1.7x as fast as the next best: os.walk() (with a break!), 2.7x as fast as pathlib, 3.2x faster than os.scandir() and 3.3x faster than glob.
Please keep in mind, that those results will change when you need recursive results. If you copy/paste one method below, please add a .lower() otherwise .EXT would not be found when searching for .ext.
import os
import pathlib
import timeit
import glob
def a():
path = pathlib.Path().cwd()
list_sqlite_files = [str(f) for f in path.glob("*.sqlite")]
def b():
path = os.getcwd()
list_sqlite_files = [f.path for f in os.scandir(path) if os.path.splitext(f)[1] == ".sqlite"]
def c():
path = os.getcwd()
list_sqlite_files = [os.path.join(path, f) for f in os.listdir(path) if f.endswith(".sqlite")]
def d():
path = os.getcwd()
os.chdir(path)
list_sqlite_files = [os.path.join(path, f) for f in glob.glob("*.sqlite")]
def e():
path = os.getcwd()
list_sqlite_files = [os.path.join(path, f) for f in glob.glob1(str(path), "*.sqlite")]
def f():
path = os.getcwd()
list_sqlite_files = []
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith(".sqlite"):
list_sqlite_files.append( os.path.join(root, file) )
break
print(timeit.timeit(a, number=1000))
print(timeit.timeit(b, number=1000))
print(timeit.timeit(c, number=1000))
print(timeit.timeit(d, number=1000))
print(timeit.timeit(e, number=1000))
print(timeit.timeit(f, number=1000))
Results:
# Python 3.6.4
0.431
0.515
0.161
0.548
0.537
0.274
import os
import sys
if len(sys.argv)==2:
print('no params')
sys.exit(1)
dir = sys.argv[1]
mask= sys.argv[2]
files = os.listdir(dir);
res = filter(lambda x: x.endswith(mask), files);
print res
To get an array of ".txt" file names from a folder called "data" in the same directory I usually use this simple line of code:
import os
fileNames = [fileName for fileName in os.listdir("data") if fileName.endswith(".txt")]
This code makes my life simpler.
import os
fnames = ([file for root, dirs, files in os.walk(dir)
for file in files
if file.endswith('.txt') #or file.endswith('.png') or file.endswith('.pdf')
])
for fname in fnames: print(fname)
Use fnmatch: https://docs.python.org/2/library/fnmatch.html
import fnmatch
import os
for file in os.listdir('.'):
if fnmatch.fnmatch(file, '*.txt'):
print file
A copy-pastable solution similar to the one of ghostdog:
def get_all_filepaths(root_path, ext):
"""
Search all files which have a given extension within root_path.
This ignores the case of the extension and searches subdirectories, too.
Parameters
----------
root_path : str
ext : str
Returns
-------
list of str
Examples
--------
>>> get_all_filepaths('/run', '.lock')
['/run/unattended-upgrades.lock',
'/run/mlocate.daily.lock',
'/run/xtables.lock',
'/run/mysqld/mysqld.sock.lock',
'/run/postgresql/.s.PGSQL.5432.lock',
'/run/network/.ifstate.lock',
'/run/lock/asound.state.lock']
"""
import os
all_files = []
for root, dirs, files in os.walk(root_path):
for filename in files:
if filename.lower().endswith(ext):
all_files.append(os.path.join(root, filename))
return all_files
You can also use yield to create a generator and thus avoid assembling the complete list:
def get_all_filepaths(root_path, ext):
import os
for root, dirs, files in os.walk(root_path):
for filename in files:
if filename.lower().endswith(ext):
yield os.path.join(root, filename)
I suggest you to use fnmatch and the upper method. In this way you can find any of the following:
Name.txt;
Name.TXT;
Name.Txt
.
import fnmatch
import os
for file in os.listdir("/Users/Johnny/Desktop/MyTXTfolder"):
if fnmatch.fnmatch(file.upper(), '*.TXT'):
print(file)
Here's one with extend()
types = ('*.jpg', '*.png')
images_list = []
for files in types:
images_list.extend(glob.glob(os.path.join(path, files)))
Functional solution with sub-directories:
from fnmatch import filter
from functools import partial
from itertools import chain
from os import path, walk
print(*chain(*(map(partial(path.join, root), filter(filenames, "*.txt")) for root, _, filenames in walk("mydir"))))
In case the folder contains a lot of files or memory is an constraint, consider using generators:
def yield_files_with_extensions(folder_path, file_extension):
for _, _, files in os.walk(folder_path):
for file in files:
if file.endswith(file_extension):
yield file
Option A: Iterate
for f in yield_files_with_extensions('.', '.txt'):
print(f)
Option B: Get all
files = [f for f in yield_files_with_extensions('.', '.txt')]
use Python OS module to find files with specific extension.
the simple example is here :
import os
# This is the path where you want to search
path = r'd:'
# this is extension you want to detect
extension = '.txt' # this can be : .jpg .png .xls .log .....
for root, dirs_list, files_list in os.walk(path):
for file_name in files_list:
if os.path.splitext(file_name)[-1] == extension:
file_name_path = os.path.join(root, file_name)
print file_name
print file_name_path # This is the full path of the filter file
Many users have replied with os.walk answers, which includes all files but also all directories and subdirectories and their files.
import os
def files_in_dir(path, extension=''):
"""
Generator: yields all of the files in <path> ending with
<extension>
\param path Absolute or relative path to inspect,
\param extension [optional] Only yield files matching this,
\yield [filenames]
"""
for _, dirs, files in os.walk(path):
dirs[:] = [] # do not recurse directories.
yield from [f for f in files if f.endswith(extension)]
# Example: print all the .py files in './python'
for filename in files_in_dir('./python', '*.py'):
print("-", filename)
Or for a one off where you don't need a generator:
path, ext = "./python", ext = ".py"
for _, _, dirfiles in os.walk(path):
matches = (f for f in dirfiles if f.endswith(ext))
break
for filename in matches:
print("-", filename)
If you are going to use matches for something else, you may want to make it a list rather than a generator expression:
matches = [f for f in dirfiles if f.endswith(ext)]

Categories

Resources