How to erase certain names in files with Python - python

I am trying to erase certain names of subfolders that were download with weird names containing useless information, (in this case, these are movie files so things like 720p, BluRay, etc.)
My code so far is this:
os.chdir(r"E:\Users\Lucas\HD externo\Filmes_séries_documentários\Diretores")
lista = ["1080p","720p", "BDrip", "BRRip", "Xvid", "XViD","XviD", "ETRG", "Woody Allen", "DVDRip", "avi", "x264", "AC3-JYK", "BluRay", "DC", ".....anoXmous", "EXTENDED", "pt", "BR"]
for i in range(len(os.listdir())):
for b in os.listdir():
os.chdir(r"E:\Users\Lucas\HD externo\Filmes_séries_documentários\Diretores\\" + os.listdir()[i])
for c in range(len(os.listdir())):
for item in lista:
if item in os.listdir()[i]:
a = os.listdir()[i].replace(item, "")
os.rename(os.listdir()[i], a)

It's a good idea to use os.walk when looking through a directory structure.
If you want to remove the file, I'd use os.remove(filename)
import os
dir = r"C:\Windows\Path\"
lista = ['all','of','the','things']
for root, dirs, files in os.walk(dir):
for file in files:
for part in lista:
if part in file:
os.remove(os.path.join(root, file))

Related

Python - move all files from one folder to another if their file names contain specified words

I have a folder with many files named like homeXXX_roomXXX_high.csv or homeXXX_roomXXX_low.csv, where the XXX part is replaced with a three-digit number.
I want to use some code to move the files into separate folders based on the number next to "home" in the filename. For example, I want to specify that files with names starting home101, home103, home320, home553, etc. should all be moved into folder A whereas those starting with home555, home431, home105 should go to FolderB.
I have this code so far:
import shutil
import os
source = '/path/to/source_folder'
dest1 = '/path/to/FolderA'
dest2 = '/path/to/FolderB'
files = os.listdir(source)
for f in files:
if (f.startswith("home101") or f.startswith("home103")):
shutil.move(f, dest1)
elif (f.startswith("home431") or f.startswith("home555")):
shutil.move(f, dest2)
However, it's tedious to specify all the if and else cases. I'd like to use some kind of structured data, such as a list, to specify groups of "home" numbers and the corresponding folder paths. How can I do this in Python?
it seems like you can use another for, it would look something like this:
import shutil
import os
source = '/path/to/source_folder'
dest1 = '/path/to/FolderA'
dest2 = '/path/to/FolderB'
list1 = ["home101", "home103"]
list2 = ["home431", "home555"]
files = os.listdir(source)
for f in files:
for home1 in list1:
if f.startswith(home1):
shutil.move(f, dest1)
break
for home2 in list2:
if f.startswith(home2):
shutil.move(f, dest2)
break
You can also create a function:
def check_and_move(file, list_of_patterns, destination):
for pattern in list_of_patterns:
if file.startswith(pattern):
shutil.move(file, destination)
and the code will get cleaner because you avoid repetition :)
for f in files:
check_and_move(f, list1, dest1)
check_and_move(f, list2, dest2)
# etc...
You can make an array for folderA that contains the "home+number"
FolderAGroup = ['home101', 'home103', 'homeXXX', 'homeXXX']
And if they get split like you say with a "_" use this code to filter them
Won't work if they are not split like that.
files = os.listdir(source)
for f in files:
parts = f.split('_')
# Get the first part of the filename before the _
home_number = parts[0]
# Check if the home number is in the FolderA group array
if home_number in FolderAGroup:
shutil.move(f, dest1)
else:
shutil.move(f, dest2)
You can expand with more elif statements if you would want more folders.
If the names homexxx are incremental, you could try something like this:
home_names_list_1 = []
home_names_list_2 = []
for i in range(100):
home_names_list_1.append("home" + str(i))
for i in range(100,200):
home_names_list_2.append("home" + str(i))
for file in files:
moved = False
for name in home_names_list_1:
if file.startswith(name):
print("move somewhere")
moved = True
break
if moved:
break
for name in home_names_list_2:
if file.startswith(name):
print("move somewhere else")
break
print(" did not move because did not match anything")

How to get the full file path including the directory?

I have a quiet complex problem. I have multiple filenames in a list, the root directory of those files is the same: mother_directory. However every file has a different subdirectory. Now I have a script which is processing some files and I need to know the exact full path including the subdirectories of every file. I know that I could use os.walk but that will make my function too nested as inside this function I'm planning to use another function which uses those full paths.
This is the file structure:
mother_directory:
|_child1:
20211011.xml
20211001.xml
|_child2:
20211002.xml
This is my current code:
mother_path = r'c:\data\user1\Desktop\mother_directory'
blue_dates = ['20211011', '20211012', '20211013', '20211001', '20211002']
red_dates = ['20211011', '20211009', '20211008', '20211001', '20211002']
file_names = ['20211011.xml', '20211001.xml', '20211002.xml']
def process_files(x):
if x in red_dates:
match_file = [s for s in file_names if x in s]
file_path = os.path.join(mother_path, match_file [0])
print(file_path)
for x in blue_dates:
process_files(x)
My current output:
c:\data\user1\Desktop\mother_directory\20211011.xml
c:\data\user1\Desktop\mother_directory\20211001.xml
c:\data\user1\Desktop\mother_directory\20211002.xml
When I run my function I want my desired output to be like this:
c:\data\user1\Desktop\mother_directory\child1\20211011.xml
c:\data\user1\Desktop\mother_directory\child1\20211001.xml
c:\data\user1\Desktop\mother_directory\child2\20211002.xml
I added a condition, I believe it will work now.
def process_files(x):
if x in red_dates:
match_file = [s for s in file_names if x in s]
for root, dirs, files in os.walk(mother_path):
for file in files:
if match_file[0] in file:
print(os.path.join(root,match_file[0]))

Is there a way to dynamically generate a dictionary from looping thru a directory of files

I have some files like below in a directory where I'm looping thru directory to processed each one.
tenents.txt
people.txt
customers.txt
clients'.txt
I'm trying to dynamically create a dictionary like below i have search high and below and tried similar examples on here but can't get the desired output . Need someone's help in identifying what I'm doing wrong :
expected output:
[{'input' : 'tenents.txt' , 'config':'tenents.json'},{'input' : 'people.txt' , 'config':'people.json'}
{'input' : 'customers.txt' , 'config':'customers.json'},{'input' : 'clients.txt' , 'config':'clients.json'}]
actual output from my code:
[{'config': 'tenents.json','input': 'tenents.txt'}]
[{'config': 'people.json','input': 'people.txt'}]
[{'config': 'customers.json','input': 'customers.txt'}]
[{'config': 'clients.json','input': 'clients.txt'}]
Also why are the keys in the wrong order from left to right. why is schema key coming before the input key even after i try to sort it.
Snippet of my code that's not working:
import os
from pprint import pprint
filelist = []
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith(".txt"):
keys = ['input', 'config']
filename = os.path.join(root, file)
cname = filename.rstrip('.txt') + '.json'
names = [[filename, cname]]
filelist = [{k: v for k, v in zip(keys, n)} for n in names]
pprint(filelist)
I'll appreciate someone's help. I'm using anaconda python 3.
The following code seems to work:
file_list = []
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith('.txt'):
file_list.append({'input': file, 'config': file.replace('.txt', '.json')})
print(file_list)
I just created a file list like you did, and appended a dictionary to it for each file with the respective values.

how can i rename files with different names at once in python

path = '/Users/my/path/tofile'
files = os.listdir(path)
names= ["GAT4", "LO", "sds"]
for filename in files:
if files.startswith("sample" + str[i]):
original_file= os.path.join(path, filename)
new_file= os.path.join(path, names.join([str(i), '.html']))
os.rename(original_file, new_file)
i have many files and i wanna rename all of them using a python code that changes the name depending on a given name from a list:
for example i have a list of x = [sample1, sample236, GAT988] and my files are named like: exp1.html exp2.html exp3.html
how can i make the files names become GAT988.html instead of exp3.html?enter code here
thank you.

Rename multiple files inside multiple folders

So I have a lot of folders with a certain name. In each folder I have +200 items. The items inside the folders has names like:
CT.34562346.246.dcm
RD.34562346.dcm
RN.34562346.LAO.dcm
And some along that style.
I now wish to rename all files inside all folders so that the number (34562346) is replaced with the name of the folder. So for example in the folder named "1" the files inside should become:
CT.1.246.dcm
RD.1.dcm
RN.1.LAO.dcm
So only the large number is replaced. And yes, all files are similar like this. It would be the number after the first . that should be renamed.
So far I have:
import os
base_dir = "foo/bar/" #In this dir I have all my folders
dir_list = []
for dirname in os.walk(base_dir):
dir_list.append(dirname[0])
This one just lists the entire paths of all folders.
dir_list_split = []
for name in dir_list[1:]: #The 1 is because it lists the base_dir as well
x = name.split('/')[2]
dir_list_split.append(x)
This one extracts the name of each folder.
And then the next thing would be to enter the folders and rename them. And I'm kind of stuck here ?
The pathlib module, which was new in Python 3.4, is often overlooked. I find that it often makes code simpler than it would otherwise be with os.walk.
In this case, .glob('**/*.*') looks recursively through all of the folders and subfolders that I created in a sample folder called example. The *.* part means that it considers all files.
I put path.parts in the loop to show you that pathlib arranges to parse pathnames for you.
I check that the string constant '34562346' is in its correct position in each filename first. If it is then I simply replace it with the items from .parts that is the next level of folder 'up' the folders tree.
Then I can replace the rightmost element of .parts with the newly altered filename to create the new pathname and then do the rename. In each case I display the new pathname, if it was appropriate to create one.
>>> from pathlib import Path
>>> from os import rename
>>> for path in Path('example').glob('**/*.*'):
... path.parts
... if path.parts[-1][3:11]=='34562346':
... new_name = path.parts[-1].replace('34562346', path.parts[-2])
... new_path = '/'.join(list(path.parts[:-1])+[new_name])
... new_path
... ## rename(str(path), new_path)
... else:
... 'no change'
...
('example', 'folder_1', 'id.34562346.6.a.txt')
'example/folder_1/id.folder_1.6.a.txt'
('example', 'folder_1', 'id.34562346.wax.txt')
'example/folder_1/id.folder_1.wax.txt'
('example', 'folder_2', 'subfolder_1', 'ty.34562346.90.py')
'example/folder_2/subfolder_1/ty.subfolder_1.90.py'
('example', 'folder_2', 'subfolder_1', 'tz.34562346.98.py')
'example/folder_2/subfolder_1/tz.subfolder_1.98.py'
('example', 'folder_2', 'subfolder_2', 'doc.34.34562346.implication.rtf')
'no change'
This will rename files in subdirectories too:
import os
rootdir = "foo" + os.sep + "bar"
for subdir, dirs, files in os.walk(rootdir):
for file in files:
filepath = subdir + os.sep + file
foldername = subdir.split(os.sep)[-1]
number = ""
foundnumber = False
for c in filepath:
if c.isdigit():
foundnumber = True
number = number + c
elif foundnumber:
break
if foundnumber:
newfilepath = filepath.replace(number,foldername)
os.rename(filepath, newfilepath)
Split each file name on the . and replace the second item with the file name, then join on .'s again for the new file name. Here's some sample code that demonstrates the concept.
folder_name = ['1', '2']
file_names = ['CT.2345.234.dcm', 'BG.234234.222.dcm', "RA.3342.221.dcm"]
for folder in folder_name:
new_names = []
for x in file_names:
file_name = x.split('.')
file_name[1] = folder
back_together = '.'.join(file_name)
new_names.append(back_together)
print(new_names)
Output
['CT.1.234.dcm', 'BG.1.222.dcm', 'RA.1.221.dcm']
['CT.2.234.dcm', 'BG.2.222.dcm', 'RA.2.221.dcm']

Categories

Resources