Read specific folder's name from folder's path Python - python

I'm trying to read the folder name at the specific place from the file path. My current code:
import os
# search for and input multiple files
def get_files(source):
matches = []
for root, dirnames, filenames in os.walk(source):
for filename in filenames:
matches.append(os.path.join(root, filename))
return matches
def parse(files):
for file in files:
xml_information = {}
metadata = []
# Get the file path
filepath = os.path.dirname(file)
xml_information['file_path'] = '%s' % filepath
# Get customer name
customer = filepath.split("\\")[5]
xml_information['customer_name'] = '%s' % customer
metadata.append(xml_information)
print(metadata)
path = 'C:\\Users\\quan.nguyen\\SAGE\\Lania Thompson - Searching Project Files'
parse(get_files(path))
My program searches through folders and find the files and report back their folder path. However, I would like to read the folder path as well as the folder name at the sixth place which is customer name. When I run customer = filepath.split("\\")[5] it report an error:
Traceback (most recent call last):
File "*hidden*", line 33, in <module>
parse(get_files(path))
File "*hidden*", line 26, in parse
customer = filepath.split("\\")[5]
~~~~~~~~~~~~~~~~~~~~^^^
IndexError: list index out of range
However, when I run with customer = filepath.split("\\")[4], the program works and reads the last folder specified in path which is Lania Thompson - Searching Project Files. The result is as follows:
[{'file_path': 'C:\\Users\\quan.nguyen\\SAGE\\Lania Thompson - Searching Project Files\\Hazor Ltd\\PCS Mah\\Machine', 'customer_name': 'Lania Thompson - Searching Project Files'}]
My expecting result is Hazor Ltd:
[{'file_path': 'C:\\Users\\quan.nguyen\\SAGE\\Lania Thompson - Searching Project Files\\Hazor Ltd\\PCS Mah\\Machine', 'customer_name': 'Hazor Ltd'}]
The names are made up except from my name

So I have worked out the code using the pathlib library. The code is:
import os
from pathlib import Path
# search for and input multiple files
def get_files(source):
matches = []
for root, dirnames, filenames in os.walk(source):
for filename in filenames:
matches.append(os.path.join(root, filename))
return matches
def parse(files):
for file in files:
xml_information = {}
metadata = []
# Get the file path
filepath = os.path.dirname(file)
# Get customer name
p = Path(filepath)
files = [f for f in p.rglob('*') if f.is_file()]
for f in files:
xml_information['Customer'] = f.parts[5]
metadata.append(xml_information)
print(metadata)
path = 'C:\\Users\\quan.nguyen\\SAGE\\Lania Thompson - Searching Project Files'
parse(get_files(path))
change the number [5] in xml_information['Customer'] = f.parts[5] for the place you want to get the folder's name from.

Related

Os path Join Two args

i need help to find a file inside a folder by name, i can do this with one file name, how could i do this with two file name?
This is the code used
path = r"Z:/Equities/ReferencePrice/"
files = []
for file in glob.glob(os.path.join(path ,"*OptionOnEquitiesReferencePriceFile*"+"*.txt*")):
df = pd.read_csv(file, delimiter = ';')
the first file contains the name
"OptionOnEquitiesReferencePriceFile"
the Second file contains the name
"BDRReferencePrice"
how to place the second file how to search between one or the other or both
I dont think you can do that in a straightforward way, so here's an alternative solution (with a function) that you can use :
import os
from fnmatch import fnmatch
# folder path :
# here in this path i have many files some start with 'other'
# some with 'test and some with random names.
# in the example im fetchinf only the 'test' and 'other' patterns
dir_path = './test_dir'
def find_by_patterns(patterns, path):
results = []
# check for any matches and save them in the results list
for root, dirs, files in os.walk(path):
for name in files:
if max([fnmatch(name, pattern) for pattern in patterns]):
results.append(os.path.join(root, name))
return results
# printing the results
print(find_by_patterns(['test*.txt', 'other*.txt'], dir_path))
output:
['./test_dir/other1.txt', './test_dir/other2.txt', './test_dir/test1.txt', './test_dir/test2.txt', './test_dir/test3.txt']

How to rename all files to include the directory name?

I'm trying to use a For loop in the code below to go through a list of files and rename them with the file directory's name.
import re # add this to your other imports
import os
for files in os.walk("."):
for f_new in files:
folder = files.split(os.sep)[-2]
print(folder)
name_elements = re.findall(r'(Position)(\d+)', f_new)[0]
name = name_elements[0] + str(int(name_elements[1]))
print(name) # just for demonstration
dst = folder + '_' + name
print(dst)
os.rename('Position014 (RGB rendering) - 1024 x 1024 x 1 x 1 - 3 ch (8 bits).tif', dst)
Use pathlib
Path.rglob: This is like calling Path.glob() with '**/' added in front of the given relative pattern:
.parent or .parents[0]: An immutable sequence providing access to the logical ancestors of the path
If yo want different parts of the path, index parents[] differently
file.parents[0].stem returns 'test1' or 'test2' depending on the file
file.parents[1].stem returns 'photos'
file.parents[2].stem returns 'stack_overflow'
.stem: The final path component, without its suffix
.suffix: The file extension of the final component
.rename: Rename this file or directory to the given target
The following code, finds only .tiff files. Use *.* to get all files.
If you only want the first 10 characters of file_name:
file_name = file_name[:10]
form pathlib import Path
# set path to files
p = Path('e:/PythonProjects/stack_overflow/photos/')
# get all files in subdirectories with a tiff extension
files = list(p.rglob('*.tiff'))
# print files example
[WindowsPath('e:/PythonProjects/stack_overflow/photos/test1/test.tiff'), WindowsPath('e:/PythonProjects/stack_overflow/photos/test2/test.tiff')]
# iterate through files
for file in files:
file_path = file.parent # get only path
dir_name = file.parent.stem # get the directory name
file_name = file.stem # get the file name
suffix = file.suffix # get the file extension
file_name_new = f'{dir_name}_{file_name}{suffix}' # make the new file name
file.rename(file_path / file_name_new) # rename the file
# output files renamed
[WindowsPath('e:/PythonProjects/stack_overflow/photos/test1/test1_test.tiff'), WindowsPath('e:/PythonProjects/stack_overflow/photos/test2/test2_test.tiff')]

file exists or not through matching list of file names provided

i have files in folders and subfolders.
folder structure is like this
2020(folder)
-01(sub folder)
--14(sub-sub folder)
----abc1-2020-01-14.csv
----abc2-2020-01-14.csv
-02(subfolder in 2020)
--17(sub-sub folder in 02)
----abc1-2020-02-17.csv
----abc4-2020-02-17.csv
now i have list of file names.
li = ['abc1','abc2','abc3','abc4']
i want to know if these file exists in directory or not. each subdirectory should have all 4 files. if not then code must return path where particular file doesnot exist.
what i have tried
BASE_PATH = r'2020/'
all_files = glob.glob(BASE_PATH + "**/*.csv",recursive=True)
li = ['abc1','abc2','abc3','abc4']
for filename in all_files:
h = os.path.split(filename)
print("Head of '% s:'" % os.path, h[0])
print("Tail of '% s:'" % os.path, h[1], "\n")
for fn in li:
if (head_tail[1].startswith(fn)):
print('True', os.path(filename))
else:
print('False', os.path(filename))
any help would be great.

Best way to search through folders and delete files if they exist in a list?

I've created a list that contains file paths to files that I want to delete. What's the most Pythonic way to search through a folder, and it's sub folders for these files, then delete them?
Currently I'm looping through the list of file paths, then walking through a directory and comparing the files in the directory to the file that is in the list. There has to be a better way.
for x in features_to_delete:
name_checker = str(x) + '.jpg'
print 'this is name checker {}'.format(name_checker)
for root, dir2, files in os.walk(folder):
print 'This is the root directory at the moment:{} The following are files inside of it'.format(root)
for b in files:
if b.endswith('.jpg'):
local_folder = os.path.join(folder, root)
print 'Here is name of file {}'.format(b)
print 'Here is name of name checker {}'.format(name_checker)
if b == name_checker:
counter += 1
print '{} needs to be deleted..'.format(b)
#os.remove(os.path.join(local_folder, b))
print 'Removed {} \n'.format(os.path.join(day_folder, b))
else:
print 'This file can stay {} \n'.format(b)
else:
pass
So to clarify, what I'm doing now is looping through the entire list of features to delete, every iteration I'm also looping through every single file in the directory and all sub directories and comparing that file to the file that is currently looping in the features to delete list. It takes a very long time and seems like a terrible way to go about doing it.
You should only visit each directory once. You can use sets to compare the list of file names in a given directory to your delete list. The list of contained and not-contained files become simple one-step operations. If you don't care about printing out the file names, its rather compact:
delete_set = set(str(x) + '.jpg' for x in features_to_delete)
for root, dirs, files in os.walk(folder):
for delete_name in delete_set.intersection(files):
os.remove(os.path.join(root, delete_name))
But if you want to print as you go, you have to add a few intermediate variables
delete_set = set(str(x) + '.jpg' for x in features_to_delete)
for root, dirs, files in os.walk(folder):
files = set(files)
delete_these = delete_set & files
keep_these = files - delete_set
print 'This is the root directory at the moment:{} The following are files inside of it'.format(root)
print 'delete these: {}'.format('\n '.join(delete_these))
print 'keep these: {}'.format('\n '.join(keep_these))
for delete_name in delete_these:
os.remove(os.path.join(root, delete_name))
Create a function to separate the recursive glob like functionality from your own deletion logic. Then just iterate over the list and delete any that match your blacklist.
You can make a set to give improved performance matching the file names. The larger the list the greater the improvement, but for smaller lists it might be negligible.
from fnmatch import fnmatch
import os
from os import path
def globber(rootpath, wildcard):
for root, dirs, files in os.walk(rootpath):
for file in files:
if fnmatch(file, wildcard):
yield path.join(root, file)
features_to_delete = ['blah', 'oh', 'xyz']
todelete = {'%s.jpg' % x for x in features_to_delete}
print(todelete)
for f in globber('/home/prooney', "*.jpg"):
if f in todelete:
print('deleting file: %s' % f)
os.remove(f)
Please look if this code helps you. I included a timer that compares the time of the two different approaches.
import os
from timeit import default_timer as timer
features_to_delete = ['a','b','c']
start = timer()
for x in features_to_delete:
name_checker = str(x) + '.jpg'
print 'this is name checker {}'.format(name_checker)
folder = '.'
for root, dir2, files in os.walk(folder):
print 'This is the root directory at the moment:{} The following are files inside of it'.format(root)
for b in files:
if b.endswith('.jpg'):
local_folder = os.path.join(folder, root)
print 'Here is name of file {}'.format(b)
print 'Here is name of name checker {}'.format(name_checker)
counter = 0
if b == name_checker:
counter += 1
print '{} needs to be deleted..'.format(b)
os.remove(os.path.join(local_folder, b))
print 'Removed {} \n'.format(os.path.join(local_folder, b))
else:
print 'This file can stay {} \n'.format(b)
else:
pass
end = timer()
print(end - start)
start = timer()
features_to_delete = ['d','e','f']
matches = []
folder = '.'
for x in features_to_delete:
x = str(x) + '.jpg'
features_to_delete = [e + '.jpg' for e in features_to_delete]
print 'features' + str(features_to_delete)
for root, dirnames, filenames in os.walk(folder):
for filename in set(filenames).intersection(features_to_delete):#fnmatch.filter(filenames, features_to_delete)# fnmatch.filter(filenames, features_to_delete):
local_folder = os.path.join(folder, root)
os.remove(os.path.join(local_folder, filename))
print 'Removed {} \n'.format(os.path.join(local_folder, filename))
end = timer()
print(end - start)
Test
$ touch foo/bar/d.jpg
$ touch foo/bar/b.jpg
$ python deletefiles.py
this is name checker a.jpg
This is the root directory at the moment:. The following are files inside of it
This is the root directory at the moment:./.idea The following are files inside of it
This is the root directory at the moment:./foo The following are files inside of it
This is the root directory at the moment:./foo/bar The following are files inside of it
Here is name of file d.jpg
Here is name of name checker a.jpg
This file can stay d.jpg
Here is name of file b.jpg
Here is name of name checker a.jpg
This file can stay b.jpg
this is name checker b.jpg
This is the root directory at the moment:. The following are files inside of it
This is the root directory at the moment:./.idea The following are files inside of it
This is the root directory at the moment:./foo The following are files inside of it
This is the root directory at the moment:./foo/bar The following are files inside of it
Here is name of file d.jpg
Here is name of name checker b.jpg
This file can stay d.jpg
Here is name of file b.jpg
Here is name of name checker b.jpg
b.jpg needs to be deleted..
Removed ././foo/bar/b.jpg
this is name checker c.jpg
This is the root directory at the moment:. The following are files inside of it
This is the root directory at the moment:./.idea The following are files inside of it
This is the root directory at the moment:./foo The following are files inside of it
This is the root directory at the moment:./foo/bar The following are files inside of it
Here is name of file d.jpg
Here is name of name checker c.jpg
This file can stay d.jpg
0.000916957855225
features['d.jpg', 'e.jpg', 'f.jpg']
Removed ././foo/bar/d.jpg
0.000241994857788

Getting paths of each file of a directory into an Array in python

Im trying to put into an array files[] the paths of each file from the Data folder but when I try to go into subfolders I want it to be able to go down to the end of the Data file, for example I can read files in a subfolder of the main folder Data which im trying to get a list of all the paths of each file into an array but it doesn't go deeper it does not access the subfolder of the subfolder of Data without writing a loop. Want I want is a loop which has infinit depth of view of files in the Data folder so I can get all the file paths.
For example this is what I get:
['Data/DataReader.py', 'Data/DataReader - Copy.py', 'Data/Dat/DataReader.py', 'Data/fge/er.txt']
This is what I want but it can still go into deeper folders:
['Data/DataReader.py', 'Data/DataReader - Copy.py', 'Data/Dat/DataReader.py', 'Data/fge/er.txt', 'Data/fge/Folder/dummy.png', 'Data/fge/Folder/AnotherFolder/data.dat']
This is my current path, what would i need to add or change?
import os
from os import walk
files = []
folders = []
for (dirname, dirpath, filename) in walk('Data'):
folders.extend(dirpath)
files.extend(filename)
break
filecount = 0
for i in files:
i = 'Data/' + i
files[filecount] = i
filecount += 1
foldercount = 0
for i in folders:
i = 'Data/' + i
folders[foldercount] = i
foldercount += 1
subfolders = []
subf_files = []
for i in folders:
for (dirname, dirpath, filename) in walk(i):
subfolders.extend(dirpath)
subf_files.extend(filename)
break
subf_files_count = 0
for a in subf_files:
a = i + '/'+a
files = files
files.append(a)
print files
subf_files = []
print files
print folders
Thanks a lot!
Don't understand what are your trying to do, especially why you break your walk after the first element:
import os
files = []
folders = []
for (path, dirnames, filenames) in os.walk('Data'):
folders.extend(os.path.join(path, name) for name in dirnames)
files.extend(os.path.join(path, name) for name in filenames)
print files
print folders

Categories

Resources