I'm trying to use a For loop in the code below to go through a list of files and rename them with the file directory's name.
import re # add this to your other imports
import os
for files in os.walk("."):
for f_new in files:
folder = files.split(os.sep)[-2]
print(folder)
name_elements = re.findall(r'(Position)(\d+)', f_new)[0]
name = name_elements[0] + str(int(name_elements[1]))
print(name) # just for demonstration
dst = folder + '_' + name
print(dst)
os.rename('Position014 (RGB rendering) - 1024 x 1024 x 1 x 1 - 3 ch (8 bits).tif', dst)
Use pathlib
Path.rglob: This is like calling Path.glob() with '**/' added in front of the given relative pattern:
.parent or .parents[0]: An immutable sequence providing access to the logical ancestors of the path
If yo want different parts of the path, index parents[] differently
file.parents[0].stem returns 'test1' or 'test2' depending on the file
file.parents[1].stem returns 'photos'
file.parents[2].stem returns 'stack_overflow'
.stem: The final path component, without its suffix
.suffix: The file extension of the final component
.rename: Rename this file or directory to the given target
The following code, finds only .tiff files. Use *.* to get all files.
If you only want the first 10 characters of file_name:
file_name = file_name[:10]
form pathlib import Path
# set path to files
p = Path('e:/PythonProjects/stack_overflow/photos/')
# get all files in subdirectories with a tiff extension
files = list(p.rglob('*.tiff'))
# print files example
[WindowsPath('e:/PythonProjects/stack_overflow/photos/test1/test.tiff'), WindowsPath('e:/PythonProjects/stack_overflow/photos/test2/test.tiff')]
# iterate through files
for file in files:
file_path = file.parent # get only path
dir_name = file.parent.stem # get the directory name
file_name = file.stem # get the file name
suffix = file.suffix # get the file extension
file_name_new = f'{dir_name}_{file_name}{suffix}' # make the new file name
file.rename(file_path / file_name_new) # rename the file
# output files renamed
[WindowsPath('e:/PythonProjects/stack_overflow/photos/test1/test1_test.tiff'), WindowsPath('e:/PythonProjects/stack_overflow/photos/test2/test2_test.tiff')]
Related
I'm trying to read the folder name at the specific place from the file path. My current code:
import os
# search for and input multiple files
def get_files(source):
matches = []
for root, dirnames, filenames in os.walk(source):
for filename in filenames:
matches.append(os.path.join(root, filename))
return matches
def parse(files):
for file in files:
xml_information = {}
metadata = []
# Get the file path
filepath = os.path.dirname(file)
xml_information['file_path'] = '%s' % filepath
# Get customer name
customer = filepath.split("\\")[5]
xml_information['customer_name'] = '%s' % customer
metadata.append(xml_information)
print(metadata)
path = 'C:\\Users\\quan.nguyen\\SAGE\\Lania Thompson - Searching Project Files'
parse(get_files(path))
My program searches through folders and find the files and report back their folder path. However, I would like to read the folder path as well as the folder name at the sixth place which is customer name. When I run customer = filepath.split("\\")[5] it report an error:
Traceback (most recent call last):
File "*hidden*", line 33, in <module>
parse(get_files(path))
File "*hidden*", line 26, in parse
customer = filepath.split("\\")[5]
~~~~~~~~~~~~~~~~~~~~^^^
IndexError: list index out of range
However, when I run with customer = filepath.split("\\")[4], the program works and reads the last folder specified in path which is Lania Thompson - Searching Project Files. The result is as follows:
[{'file_path': 'C:\\Users\\quan.nguyen\\SAGE\\Lania Thompson - Searching Project Files\\Hazor Ltd\\PCS Mah\\Machine', 'customer_name': 'Lania Thompson - Searching Project Files'}]
My expecting result is Hazor Ltd:
[{'file_path': 'C:\\Users\\quan.nguyen\\SAGE\\Lania Thompson - Searching Project Files\\Hazor Ltd\\PCS Mah\\Machine', 'customer_name': 'Hazor Ltd'}]
The names are made up except from my name
So I have worked out the code using the pathlib library. The code is:
import os
from pathlib import Path
# search for and input multiple files
def get_files(source):
matches = []
for root, dirnames, filenames in os.walk(source):
for filename in filenames:
matches.append(os.path.join(root, filename))
return matches
def parse(files):
for file in files:
xml_information = {}
metadata = []
# Get the file path
filepath = os.path.dirname(file)
# Get customer name
p = Path(filepath)
files = [f for f in p.rglob('*') if f.is_file()]
for f in files:
xml_information['Customer'] = f.parts[5]
metadata.append(xml_information)
print(metadata)
path = 'C:\\Users\\quan.nguyen\\SAGE\\Lania Thompson - Searching Project Files'
parse(get_files(path))
change the number [5] in xml_information['Customer'] = f.parts[5] for the place you want to get the folder's name from.
i need help to find a file inside a folder by name, i can do this with one file name, how could i do this with two file name?
This is the code used
path = r"Z:/Equities/ReferencePrice/"
files = []
for file in glob.glob(os.path.join(path ,"*OptionOnEquitiesReferencePriceFile*"+"*.txt*")):
df = pd.read_csv(file, delimiter = ';')
the first file contains the name
"OptionOnEquitiesReferencePriceFile"
the Second file contains the name
"BDRReferencePrice"
how to place the second file how to search between one or the other or both
I dont think you can do that in a straightforward way, so here's an alternative solution (with a function) that you can use :
import os
from fnmatch import fnmatch
# folder path :
# here in this path i have many files some start with 'other'
# some with 'test and some with random names.
# in the example im fetchinf only the 'test' and 'other' patterns
dir_path = './test_dir'
def find_by_patterns(patterns, path):
results = []
# check for any matches and save them in the results list
for root, dirs, files in os.walk(path):
for name in files:
if max([fnmatch(name, pattern) for pattern in patterns]):
results.append(os.path.join(root, name))
return results
# printing the results
print(find_by_patterns(['test*.txt', 'other*.txt'], dir_path))
output:
['./test_dir/other1.txt', './test_dir/other2.txt', './test_dir/test1.txt', './test_dir/test2.txt', './test_dir/test3.txt']
I've created a list that contains file paths to files that I want to delete. What's the most Pythonic way to search through a folder, and it's sub folders for these files, then delete them?
Currently I'm looping through the list of file paths, then walking through a directory and comparing the files in the directory to the file that is in the list. There has to be a better way.
for x in features_to_delete:
name_checker = str(x) + '.jpg'
print 'this is name checker {}'.format(name_checker)
for root, dir2, files in os.walk(folder):
print 'This is the root directory at the moment:{} The following are files inside of it'.format(root)
for b in files:
if b.endswith('.jpg'):
local_folder = os.path.join(folder, root)
print 'Here is name of file {}'.format(b)
print 'Here is name of name checker {}'.format(name_checker)
if b == name_checker:
counter += 1
print '{} needs to be deleted..'.format(b)
#os.remove(os.path.join(local_folder, b))
print 'Removed {} \n'.format(os.path.join(day_folder, b))
else:
print 'This file can stay {} \n'.format(b)
else:
pass
So to clarify, what I'm doing now is looping through the entire list of features to delete, every iteration I'm also looping through every single file in the directory and all sub directories and comparing that file to the file that is currently looping in the features to delete list. It takes a very long time and seems like a terrible way to go about doing it.
You should only visit each directory once. You can use sets to compare the list of file names in a given directory to your delete list. The list of contained and not-contained files become simple one-step operations. If you don't care about printing out the file names, its rather compact:
delete_set = set(str(x) + '.jpg' for x in features_to_delete)
for root, dirs, files in os.walk(folder):
for delete_name in delete_set.intersection(files):
os.remove(os.path.join(root, delete_name))
But if you want to print as you go, you have to add a few intermediate variables
delete_set = set(str(x) + '.jpg' for x in features_to_delete)
for root, dirs, files in os.walk(folder):
files = set(files)
delete_these = delete_set & files
keep_these = files - delete_set
print 'This is the root directory at the moment:{} The following are files inside of it'.format(root)
print 'delete these: {}'.format('\n '.join(delete_these))
print 'keep these: {}'.format('\n '.join(keep_these))
for delete_name in delete_these:
os.remove(os.path.join(root, delete_name))
Create a function to separate the recursive glob like functionality from your own deletion logic. Then just iterate over the list and delete any that match your blacklist.
You can make a set to give improved performance matching the file names. The larger the list the greater the improvement, but for smaller lists it might be negligible.
from fnmatch import fnmatch
import os
from os import path
def globber(rootpath, wildcard):
for root, dirs, files in os.walk(rootpath):
for file in files:
if fnmatch(file, wildcard):
yield path.join(root, file)
features_to_delete = ['blah', 'oh', 'xyz']
todelete = {'%s.jpg' % x for x in features_to_delete}
print(todelete)
for f in globber('/home/prooney', "*.jpg"):
if f in todelete:
print('deleting file: %s' % f)
os.remove(f)
Please look if this code helps you. I included a timer that compares the time of the two different approaches.
import os
from timeit import default_timer as timer
features_to_delete = ['a','b','c']
start = timer()
for x in features_to_delete:
name_checker = str(x) + '.jpg'
print 'this is name checker {}'.format(name_checker)
folder = '.'
for root, dir2, files in os.walk(folder):
print 'This is the root directory at the moment:{} The following are files inside of it'.format(root)
for b in files:
if b.endswith('.jpg'):
local_folder = os.path.join(folder, root)
print 'Here is name of file {}'.format(b)
print 'Here is name of name checker {}'.format(name_checker)
counter = 0
if b == name_checker:
counter += 1
print '{} needs to be deleted..'.format(b)
os.remove(os.path.join(local_folder, b))
print 'Removed {} \n'.format(os.path.join(local_folder, b))
else:
print 'This file can stay {} \n'.format(b)
else:
pass
end = timer()
print(end - start)
start = timer()
features_to_delete = ['d','e','f']
matches = []
folder = '.'
for x in features_to_delete:
x = str(x) + '.jpg'
features_to_delete = [e + '.jpg' for e in features_to_delete]
print 'features' + str(features_to_delete)
for root, dirnames, filenames in os.walk(folder):
for filename in set(filenames).intersection(features_to_delete):#fnmatch.filter(filenames, features_to_delete)# fnmatch.filter(filenames, features_to_delete):
local_folder = os.path.join(folder, root)
os.remove(os.path.join(local_folder, filename))
print 'Removed {} \n'.format(os.path.join(local_folder, filename))
end = timer()
print(end - start)
Test
$ touch foo/bar/d.jpg
$ touch foo/bar/b.jpg
$ python deletefiles.py
this is name checker a.jpg
This is the root directory at the moment:. The following are files inside of it
This is the root directory at the moment:./.idea The following are files inside of it
This is the root directory at the moment:./foo The following are files inside of it
This is the root directory at the moment:./foo/bar The following are files inside of it
Here is name of file d.jpg
Here is name of name checker a.jpg
This file can stay d.jpg
Here is name of file b.jpg
Here is name of name checker a.jpg
This file can stay b.jpg
this is name checker b.jpg
This is the root directory at the moment:. The following are files inside of it
This is the root directory at the moment:./.idea The following are files inside of it
This is the root directory at the moment:./foo The following are files inside of it
This is the root directory at the moment:./foo/bar The following are files inside of it
Here is name of file d.jpg
Here is name of name checker b.jpg
This file can stay d.jpg
Here is name of file b.jpg
Here is name of name checker b.jpg
b.jpg needs to be deleted..
Removed ././foo/bar/b.jpg
this is name checker c.jpg
This is the root directory at the moment:. The following are files inside of it
This is the root directory at the moment:./.idea The following are files inside of it
This is the root directory at the moment:./foo The following are files inside of it
This is the root directory at the moment:./foo/bar The following are files inside of it
Here is name of file d.jpg
Here is name of name checker c.jpg
This file can stay d.jpg
0.000916957855225
features['d.jpg', 'e.jpg', 'f.jpg']
Removed ././foo/bar/d.jpg
0.000241994857788
I have some files in a folder named like this test_1999.0000_seconds.vtk. What I would like to do is to is to change the name of the file to test_1999.0000.vtk.
You can use os.rename
os.rename("test_1999.0000_seconds.vtk", "test_1999.0000.vtk")
import os
currentPath = os.getcwd() # get the current working directory
unWantedString = "_seconds"
matchingFiles =[]
for path, subdirs, files in os.walk(currentPath):
for f in files:
if f.endswith(".vtk"): # To group the vtk files
matchingFiles.append(path+"\\"+ f) #
print matchingFiles
for mf in matchingFiles:
if unWantedString in mf:
oldName = mf
newName = mf.replace(unWantedString, '') # remove the substring from the string
os.rename(oldName, newName) # rename the old files with new name without the string
I am writing a Python function to change the extension of a list of files into another extension, like txt into rar, that's just an idle example. But I'm getting an error. The code is:
import os
def dTask():
#Get a file name list
file_list = os.listdir('C:\Users\B\Desktop\sil\sil2')
#Change the extensions
for file_name in file_list:
entry_pos = 0;
#Filter the file name first for '.'
for position in range(0, len(file_name)):
if file_name[position] == '.':
break
new_file_name = file_name[0:position]
#Filtering done !
#Using the name filtered, add extension to that name
new_file_name = new_file_name + '.rar'
#rename the entry in the file list, using new file name
print 'Expected change from: ', file_list[entry_pos]
print 'into File name: ', new_file_name
os.rename(file_list[entry_pos], new_file_name)
++entry_pos
Error:
>>> dTask()
Expected change from: New Text Document (2).txt
into File name: New Text Document (2).rar
Traceback (most recent call last):
File "<pyshell#10>", line 1, in <module>
dTask()
File "C:\Users\B\Desktop\dTask.py", line 19, in dTask
os.rename(file_list[entry_pos], new_file_name)
WindowsError: [Error 2] The system cannot find the file specified
I can succeed in getting the file name with another extension in variable level as you can see in the print-out, but not in reality because I can not end this process in OS level. The error is coming from os.rename(...). Any idea how to fix this ?
As the others have already stated, you either need to provide the path to those files or switch the current working directory so the os can find the files.
++entry_pos doesn't do anything. There is no increment operator in Python. Prefix + is just there fore symmetry with prefix -. Prefixing something with two + is just two no-ops. So you're not actually doing anything (and after you change it to entry_pos += 1, you're still resetting it to zero in each iteration.
Also, your code is very inelegant - for example, you are using a separate index to file_list and fail to keep that in synch with the iteration variable file_name, even though you could just use that one! To show how this can be done better.
-
def rename_by_ext(to_ext, path):
if to_ext[0] != '.':
to_ext = '.'+to_ext
print "Renaming files in", path
for file_name in os.listdir(path):
root, ext = os.path.splitext(file_name)
print "Renaming", file_name, "to", root+ext
os.rename(os.path.join(path, file_name), os.path.join(path, root+to_ext))
rename_by_ext('.rar', '...')
os.rename really doesn't like variables. Use shutil. Example taken from How to copy and move files with Shutil.
import shutil
import os
source = os.listdir("/tmp/")
destination = "/tmp/newfolder/"
for files in source:
if files.endswith(".txt"):
shutil.move(files,destination)
In your case:
import shutil
shutil.move(file_list[entry_pos], new_file_name)
You also want to double backslashes to escape them in Python strings, so instead of
file_list = os.listdir('C:\Users\B\Desktop\sil\sil2')
you want
file_list = os.listdir('C:\\Users\\B\\Desktop\\sil\\sil2')
Or use forward slashes - Python magically treats them as path separators on Windows.
You must use the full path for the rename.
import os
def dTask():
#Get a file name list
dir = 'C:\Users\B\Desktop\sil\sil2'
file_list = os.listdir(dir)
#Change the extensions
for file_name in file_list:
entry_pos = 0;
#Filter the file name first for '.'
for position in range(0, len(file_name)):
if file_name[position] == '.':
break
new_file_name = file_name[0:position]
#Filtering done !
#Using the name filtered, add extension to that name
new_file_name = new_file_name + '.rar'
#rename the entry in the file list, using new file name
print 'Expected change from: ', file_list[entry_pos]
print 'into File name: ', new_file_name
os.rename( os.path.join(dir, file_list[entry_pos]), os.path.join(dir,new_file_name))
++entry_pos
If you aren't in the directory C:\Users\B\Desktop\sil\sil2, then Python certainly won't be able to find those files.
import os
def extChange(path,newExt,oldExt=""):
if path.endswith != "\\" and path.endswith != "/":
myPath = path + "\\"
directory = os.listdir(myPath)
for i in directory:
x = myPath + i[:-4] + "." + newExt
y = myPath + i
if oldExt == "":
os.rename(y,x)
else:
if i[-4:] == "." + oldExt:
os.rename(y,x)
now call it:
extChange("C:/testfolder/","txt","lua") #this will change all .txt files in C:/testfolder to .lua files
extChange("C:/testfolder/","txt") #leaving the last parameter out will change all files in C:/testfolder to .txt