Find files within a changed subdirectory in Python

Find files within a changed subdirectory in Python - python

I have a text-file full of filenames. Like:
C:\Folder\Subfolder_01\file_1001.csv
C:\Folder\Subfolder_02\file_3030.xls
...
I want to check whether the files still exists (which is easy) or if the name of the subfolder has changed. The name of some subfolders changed by adding some string in front of it (starting with a 4 digit number e.g C:\Folder\Subfolder_02\file_3030.xls has changed to C:\Folder\2019 - Subfolder_02\file_3030.xls).
I tried to solve this with pathlib.glob(). It's possible to do this for one specific file 'by hand' like
list(file.parent.parent.glob('* - Subfolder_02\file_3030.xls'))
which returns a list with the new file-name. But i failed to do this in a loop surrounding the glob with parameters.
This is what I got so far, but my attempt to concatenate the glob with other variables (using +) failes for obvious reasons:
import pathlib
file = pathlib.Path(file_names.txt)
lines=[]
with open(file,'r') as f:
# reading the txt-file line by line
for line in f:
line = line.replace("\r", "").replace("\n", "")
lines.append(line)
for file in lines:
file = pathlib.Path(file)
# check if file exists ...
if file.exists():
print('OK - ' + file.name)
# ... if not, find new location
else:
new_files = list(file.parent.parent.glob('* - ') + file.name)
print(files_files)

I would set your top directory as a path and use that to glob the files under the directory if you can't find the file in its original location. Using ** in the glob will search all folders.
# Set top level directory as desired.
parent_dir = Path('.')
# you can use splitlines() to parse the file into a list
with Path('file_names.txt').open() as f:
files = f.read().splitlines()
for f in files:
orig = Path(f)
# Still in location, no need to look further
if orig.exists():
print(f"{orig.absolute()} is still in place.")
continue
# See if we can find it under parent_dir
matches = [*parent_dir.glob(f"**/{orig.name}")]
if len(matches) > 1:
print("Multiple Matches Found")
for match in matches:
print(f"{orig.absolute()} might be in {match.absolute()}")

Try watchdog
For example:
import os
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
RESOURCES_PATH = "C:\Folder"
class dirs_watcher(FileSystemEventHandler):
def __init__(self):
self.observe()
self.cur_dirs = os.listdir(RESOURCES_PATH)
def observe(self):
self.observer = Observer()
self.my_watch = self.observer.schedule(self, path=RESOURCES_PATH, recursive=True)
self.observer.start()
def on_modified(self, event=None):
# A folder was modified:
self.new_dirs = os.listdir(RESOURCES_PATH)
old = set(self.cur_dirs) - set(self.new_dirs)
new = set(self.new_dirs) - set(self.cur_dirs)
print("{} changed to {}".format(old, new))
self.cur_dirs = self.new_dirs # update cur_dirs
on_modified will be triggered when a sub directory changes and you can extract the changed folders names by keeping a sub directories list

Related

move files to subdirectories that are named on part of the filenames

I have a few data files in a directory, and I want to move them to the subdirectories based on their filenames. Let's say we created the first directory named "20220322_170444," and it should contain the first four files only because in the next file the "el" is less than the previous one, so the second folder, let's say is "20220322_170533", then it should contain next eight files until the el becomes less again than the previous name.
example data
files =[
'cfrad.20220322_170444.122_COW1_v2_s02_el3.40_SUR.nc',
'cfrad.20220322_170456.550_COW1_v2_s03_el4.22_SUR.nc',
'cfrad.20220322_170508.975_COW1_v2_s04_el5.09_SUR.nc',
'cfrad.20220322_170521.397_COW1_v2_s05_el5.99_SUR.nc',
'cfrad.20220322_170533.811_COW1_v2_s06_el0.45_SUR.nc',
'cfrad.20220322_170546.228_COW1_v2_s07_el1.20_SUR.nc',
'cfrad.20220322_170558.648_COW1_v2_s08_el1.90_SUR.nc',
'cfrad.20220322_170611.072_COW1_v2_s09_el2.61_SUR.nc',
'cfrad.20220322_170623.503_COW1_v2_s10_el3.40_SUR.nc',
'cfrad.20220322_170635.923_COW1_v2_s11_el4.21_SUR.nc',
'cfrad.20220322_170648.341_COW1_v2_s12_el5.09_SUR.nc',
'cfrad.20220322_170700.765_COW1_v2_s13_el5.99_SUR.nc',
'cfrad.20220322_170713.179_COW1_v2_s14_el0.45_SUR.nc',
'cfrad.20220322_170725.604_COW1_v2_s15_el1.20_SUR.nc',
'cfrad.20220322_170738.030_COW1_v2_s16_el1.90_SUR.nc',
'cfrad.20220322_170750.461_COW1_v2_s17_el2.61_SUR.nc',
'cfrad.20220322_170802.877_COW1_v2_s18_el3.40_SUR.nc',
'cfrad.20220322_170815.301_COW1_v2_s19_el4.22_SUR.nc',
'cfrad.20220322_170827.715_COW1_v2_s20_el8.01_SUR.nc',
'cfrad.20220322_170840.144_COW1_v2_s21_el11.02_SUR.nc']
for file in files:
np.savetxt(fname=file, X=np.array([1,1]))
What I tried is
import numpy as np
from datetime import datetime
import glob, os, re
import shutil
sweeps = []
temp = []
for i, file in enumerate(files[:19]):
match_str = re.search(r'\d{4}\d{2}\d{2}_\d{2}\d{2}\d{2}', file)
res = datetime.strptime(match_str.group(), '%Y%m%d_%H%M%S')
print(res.strftime("%Y%m%d_%H%M%S"))
el_pos = int(files[i].find('el'))
st_pos = files[i][el_pos+1:el_pos+3]
el_pos1 = int(files[i+1].find('el'))
end_pos = files[i+1][el_pos1+1:el_pos1+3]
# print(files[i][s_pos+1:s_pos+3],files[i+1][s_pos1+1:s_pos1+3])
temp.append(files[i])
print("len(files):",len(files),i)
print(st_pos,end_pos)
# print()
if st_pos>end_pos:
print("temp len: ", len(temp))
sweeps.append(temp)
temp = []
elif len(files)-i==2:
print('entered')
sweeps.append(temp)
I now have a list named sweeps, and it contains the desired files; how can I now move these files to the directories,m but the directories should be named as I stated above based on the date. I have also the date string in variable res.strftime("%Y%m%d_%H%M%S") can be used to create directories.

Some string splitting can do this for you.
import shutil
import os
files = [
"cfrad.20220322_170444.122_COW1_v2_s02_el3.40_SUR.nc",
"cfrad.20220322_170456.550_COW1_v2_s03_el4.22_SUR.nc",
"cfrad.20220322_170508.975_COW1_v2_s04_el5.09_SUR.nc",
"cfrad.20220322_170521.397_COW1_v2_s05_el5.99_SUR.nc",
"cfrad.20220322_170533.811_COW1_v2_s06_el0.45_SUR.nc",
"cfrad.20220322_170546.228_COW1_v2_s07_el1.20_SUR.nc",
"cfrad.20220322_170558.648_COW1_v2_s08_el1.90_SUR.nc",
"cfrad.20220322_170611.072_COW1_v2_s09_el2.61_SUR.nc",
"cfrad.20220322_170623.503_COW1_v2_s10_el3.40_SUR.nc",
"cfrad.20220322_170635.923_COW1_v2_s11_el4.21_SUR.nc",
"cfrad.20220322_170648.341_COW1_v2_s12_el5.09_SUR.nc",
"cfrad.20220322_170700.765_COW1_v2_s13_el5.99_SUR.nc",
"cfrad.20220322_170713.179_COW1_v2_s14_el0.45_SUR.nc",
"cfrad.20220322_170725.604_COW1_v2_s15_el1.20_SUR.nc",
"cfrad.20220322_170738.030_COW1_v2_s16_el1.90_SUR.nc",
"cfrad.20220322_170750.461_COW1_v2_s17_el2.61_SUR.nc",
"cfrad.20220322_170802.877_COW1_v2_s18_el3.40_SUR.nc",
"cfrad.20220322_170815.301_COW1_v2_s19_el4.22_SUR.nc",
"cfrad.20220322_170827.715_COW1_v2_s20_el8.01_SUR.nc",
"cfrad.20220322_170840.144_COW1_v2_s21_el11.02_SUR.nc",
]
for f in files:
with open(f, "w") as of:
of.write("\n")
# force the if statement below to be True on first run
el = 99999999
basepath = "."
for f in files:
new_el = int(f.split(".")[2].split("_")[-1].replace("el", ""))
if new_el < el:
# store new dir name
curr_dir = f.split(".")[1]
print(curr_dir)
# create directory
os.makedirs(curr_dir, exist_ok=True)
# store new el
el = new_el
# move file
shutil.move(f"{basepath}{os.sep}{f}", f"{basepath}{os.sep}{curr_dir}{os.sep}{f}")

Os path Join Two args

i need help to find a file inside a folder by name, i can do this with one file name, how could i do this with two file name?
This is the code used
path = r"Z:/Equities/ReferencePrice/"
files = []
for file in glob.glob(os.path.join(path ,"*OptionOnEquitiesReferencePriceFile*"+"*.txt*")):
df = pd.read_csv(file, delimiter = ';')
the first file contains the name
"OptionOnEquitiesReferencePriceFile"
the Second file contains the name
"BDRReferencePrice"
how to place the second file how to search between one or the other or both

I dont think you can do that in a straightforward way, so here's an alternative solution (with a function) that you can use :
import os
from fnmatch import fnmatch
# folder path :
# here in this path i have many files some start with 'other'
# some with 'test and some with random names.
# in the example im fetchinf only the 'test' and 'other' patterns
dir_path = './test_dir'
def find_by_patterns(patterns, path):
results = []
# check for any matches and save them in the results list
for root, dirs, files in os.walk(path):
for name in files:
if max([fnmatch(name, pattern) for pattern in patterns]):
results.append(os.path.join(root, name))
return results
# printing the results
print(find_by_patterns(['test*.txt', 'other*.txt'], dir_path))
output:
['./test_dir/other1.txt', './test_dir/other2.txt', './test_dir/test1.txt', './test_dir/test2.txt', './test_dir/test3.txt']

How to extract the file name from a file path?

I have the following code:
os.listdir("staging")
# Seperate filename from extension
sep = os.sep
# Change the casing
for n in os.listdir("staging"):
print(n)
if os.path.isfile("staging" + sep + n):
filename_one, extension = os.path.splitext(n)
os.rename("staging" + sep + n, "staging" + sep + filename_one.lower() + extension)
# Show the new file names
print ('\n--------------------------------\n')
for n in os.listdir("staging"):
print (n)
# Remove the blanks, -, %, and /
for n in os.listdir("staging"):
print (n)
if os.path.isfile("staging" + sep + n):
filename_zero, extension = os.path.splitext(n)
os.rename("staging" + sep + n , "staging" + sep + filename_zero.replace(' ','_').replace('-','_').replace('%','pct').replace('/','_') + extension)
# Show the new file names
print ('\n--------------------------------\n')
for n in os.listdir("staging"):
print (n)
"""
In order to fix all of the column headers and to solve the encoding issues and remove nulls,
first read in all of the CSV's to python as dataframes, then make changes and rewrite the old files
"""
import os
import glob
import pandas as pd
files = glob.glob(os.path.join("staging" + "/*.csv"))
print(files)
# Create an empty dictionary to hold the dataframes from csvs
dict_ = {}
# Write the files into the dictionary
for file in files:
dict_[file] = pd.read_csv(file, header = 0, dtype = str, encoding = 'cp1252').fillna('')
In the dictionary, the dataframes are named as "folder/name(csv)" what I would like to do is remove the prefix "staging/" from the keys in the dictionary.
How can I do this?

If all you want to do is truncate the file paths to just the filename, you can use os.path.basename:
for file in files:
fname = os.path.basename(file)
dict_[fname] = (pd.read_csv(file, header=0, dtype=str, encoding='cp1252')
.fillna(''))
Example:
os.path.basename('Desktop/test.txt')
# 'test.txt'

import os
pathname ='c:\\hello\\dickins\\myfile.py'
head, tail = os.path.split(pathname)
print head
print tail

This article here worked out just fine for me
import os
inputFilepath = 'path/to/file/foobar.txt'
filename_w_ext = os.path.basename(inputFilepath)
filename, file_extension = os.path.splitext(filename_w_ext)
#filename = foobar
#file_extension = .txt
path, filename = os.path.split(path/to/file/foobar.txt)
# path = path/to/file
# filename = foobar.txt
Hope it helps someone searching for this answer

In the same spirt as truncate the file paths, use pathlib in python standard library. It will turn the path into an easy to use class.
from pathlib import Path
path = Path('Desktop/folder/test.txt')
path.name # test.txt
path.stem # test
path.suffix # .txt
path.parent.name # folder
path.parent.name.name # Desktop

As ColdSpeed said, you can use "os.path.basename" to truncate a file to its name, but I think what you are refering to is the ability to pycache the data?
For Example here is my Directory:
You see the pycache folder? that initializes it as a module.
Then, you can import a file from that module (for example the staging.txt file and operate on it.)
I use the IpConfig.txt File from the assets folder level (or should be) and take a line of information out of it.
import pygame as pyg
import sys
import os
import math
import ssl
import socket as sock
import ipaddress as ipad
import threading
import random
print("Modules Installed!")
class two:
# Find out how to refer to class super construct
def main(Display, SecSock, ipadd, clock):
# I have code here that has nothing to do with the question...
def __init__():
print("Initializing[2]...")
# Initialization of Pygame and SSL Socket goes here
searchQuery = open("IpConfig.txt", 'r') #Opening the File IpConfig(Which now should open on the top level of the game files)
step2 = searchQuery.readlines()# read the file
ipadd = step2[6] # This is what you should have or something similar where you reference the line you want to copy or manipulate.
main(gameDisplay, SSLSock, ipadd, clock)# Im having issues here myself - (main() is not defined it says)
print(ipadd)
print("Server Certificate Configuration Enabled...")
__init__() # Start up the procedure

How to batch process a folder of videos using MoviePy

I wrote a MoviePy script that takes an input video, does some processing, and outputs a video file. I want to run this through an entire folder of videos. Any help or direction is appreciated.
Here's what I tried...
for f in *; do python resize.py $f; done
and resize.py source code here:
from moviepy.editor import *
clip = VideoFileClip(input)
clip1 = clip.rotate(270)
clip2 = clip1.crop(x_center=540,y_center=960,width=1080,height=608)
clip3 = clip2.resize(width=1920)
clip3.write_videofile(output,codec='libx264')
Really wasn't sure what to put for "input" and "output" in my .py file.
Thanks,
Evan

I know you have an answer on Github, but I'll add my own solution.
First, you'll want to put your code inside a function:
def process_video(input):
"""Parameter input should be a string with the full path for a video"""
clip = VideoFileClip(input, output)
clip1 = clip.rotate(270)
clip2 = clip1.crop(x_center=540,y_center=960,width=1080,height=608)
clip3 = clip2.resize(width=1920)
clip3.write_videofile(output,codec='libx264')
Then, you can have a function that returns a list of file paths, and a list of final file names to use with the above function (note that the final file names will be the same as the original file names but with "output" in front):
import os
def get_video_paths(folder_path):
"""
Parameter folder_path should look like "Users/documents/folder1/"
Returns a list of complete paths
"""
file_name_list = os.listdir(folder_path)
path_name_list = []
final_name_list = []
for name in file_name_list:
# Put any sanity checks here, e.g.:
if name == ".DS_Store":
pass
else:
path_name_list.append(folder_path + name)
# Change the format of the output file names below
final_name_list.append(folder_path + "output" + name)
return path_name_list, final_name_list
Finally, at the bottom, we get the input folder, and utilise the above two functions:
if __name__ == "__main__":
video_folder = input("What folder would you like to process? ")
path_list, final_name_list = get_video_paths(video_folder)
for path, name in zip(path_list, final_name_list):
process_video(path, name)
print("Finished")
Just watch out, because this will crash if there are any files in the folder that can't be read as a movie. For instance, on mac, the OS puts a ".DS_Store" file in each folder, which will crash the program. I've put an area for a sanity check to ignore certain filenames.
Complete code:
import os
from moviepy.editor import *
def process_video(input, output):
"""Parameter input should be a string with the full path for a video"""
clip = VideoFileClip(input)
clip1 = clip.rotate(270)
clip2 = clip1.crop(x_center=540,y_center=960,width=1080,height=608)
clip3 = clip2.resize(width=1920)
clip3.write_videofile(output,codec='libx264')
def get_video_paths(folder_path):
"""
Parameter folder_path should look like "Users/documents/folder1/"
Returns a list of complete paths
"""
file_name_list = os.listdir(folder_path)
path_name_list = []
final_name_list = []
for name in file_name_list:
# Put any sanity checks here, e.g.:
if name == ".DS_Store":
pass
else:
path_name_list.append(folder_path + name)
final_name_list.append(folder_path + "output" + name)
return path_name_list, final_name_list
if __name__ == "__main__":
video_folder = input("What folder would you like to process? ")
path_list, final_name_list = get_video_paths(video_folder)
for path, name in zip(path_list, final_name_list):
process_video(path, name)
print("Finished")

I responded on your Github issue #542, but I copied it here for future reference!
First off, the below example isn't ironclad, but it should do what you need.
You can achieve this via something like this:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Convert all media assets located in a specified directory."""
import glob
import os
from optparse import OptionParser
from moviepy.editor import VideoFileClip
def get_dir_files(dir_path, patterns=None):
"""Get all absolute paths for pattern matched files in a directory.
Args:
dir_path (str): The path to of the directory containing media assets.
patterns (list of str): The list of patterns/file extensions to match.
Returns:
(list of str): A list of all pattern-matched files in a directory.
"""
if not patterns or type(patterns) != list:
print('No patterns list passed to get_dir_files, defaulting to patterns.')
patterns = ['*.mp4', '*.avi', '*.mov', '*.flv']
files = []
for pattern in patterns:
dir_path = os.path.abspath(dir_path) + '/' + pattern
files.extend(glob.glob(dir_path))
return files
def modify_clip(path, output):
"""Handle conversion of a video file.
Args:
path (str): The path to the directory of video files to be converted.
output (str): The filename to associate with the converted file.
"""
clip = VideoFileClip(path)
clip = clip.rotate(270)
clip = clip.crop(x_center=540, y_center=960, width=1080, height=608)
clip = clip.resize(width=1920)
clip.write_videofile(output, codec='libx264')
print('File: {} should have been created.'.format(output))
if __name__ == '__main__':
status = 'Failed!'
parser = OptionParser(version='%prog 1.0.0')
parser.add_option('-p', '--path', action='store', dest='dir_path',
default='.', type='string',
help='the path of the directory of assets, defaults to .')
options, args = parser.parse_args()
print('Running against directory path: {}'.format(options.dir_path))
path_correct = raw_input('Is that correct?').lower()
if path_correct.startswith('y'):
dir_paths = get_dir_files(options.dir_path)
for dir_path in dir_paths:
output_filename = 'converted_' + os.path.basename(dir_path)
modify_clip(path=dir_path, output=output_filename)
status = 'Successful!'
print('Conversion {}'.format(status))
With the above example, you can simply drop that into the directory of assets you wish to convert and run: python this_file.py and it should convert the files for you in the same directory with the name prepended with: converted_
Likewise, you can drop that file anywhere and run it against an absolute path:
python this_file.py -p /Users/thisguy/media and it will convert all files with the extensions: ['*.mp4', '*.avi', '*.mov', '*.flv']
Either way, let me know if you have any questions (or if this resolves your issue) and I'll do my best to help you out!
Thanks for using moviepy!

python os.rename(...) won't work !

I am writing a Python function to change the extension of a list of files into another extension, like txt into rar, that's just an idle example. But I'm getting an error. The code is:
import os
def dTask():
#Get a file name list
file_list = os.listdir('C:\Users\B\Desktop\sil\sil2')
#Change the extensions
for file_name in file_list:
entry_pos = 0;
#Filter the file name first for '.'
for position in range(0, len(file_name)):
if file_name[position] == '.':
break
new_file_name = file_name[0:position]
#Filtering done !
#Using the name filtered, add extension to that name
new_file_name = new_file_name + '.rar'
#rename the entry in the file list, using new file name
print 'Expected change from: ', file_list[entry_pos]
print 'into File name: ', new_file_name
os.rename(file_list[entry_pos], new_file_name)
++entry_pos
Error:
>>> dTask()
Expected change from: New Text Document (2).txt
into File name: New Text Document (2).rar
Traceback (most recent call last):
File "<pyshell#10>", line 1, in <module>
dTask()
File "C:\Users\B\Desktop\dTask.py", line 19, in dTask
os.rename(file_list[entry_pos], new_file_name)
WindowsError: [Error 2] The system cannot find the file specified
I can succeed in getting the file name with another extension in variable level as you can see in the print-out, but not in reality because I can not end this process in OS level. The error is coming from os.rename(...). Any idea how to fix this ?

As the others have already stated, you either need to provide the path to those files or switch the current working directory so the os can find the files.
++entry_pos doesn't do anything. There is no increment operator in Python. Prefix + is just there fore symmetry with prefix -. Prefixing something with two + is just two no-ops. So you're not actually doing anything (and after you change it to entry_pos += 1, you're still resetting it to zero in each iteration.
Also, your code is very inelegant - for example, you are using a separate index to file_list and fail to keep that in synch with the iteration variable file_name, even though you could just use that one! To show how this can be done better.
-
def rename_by_ext(to_ext, path):
if to_ext[0] != '.':
to_ext = '.'+to_ext
print "Renaming files in", path
for file_name in os.listdir(path):
root, ext = os.path.splitext(file_name)
print "Renaming", file_name, "to", root+ext
os.rename(os.path.join(path, file_name), os.path.join(path, root+to_ext))
rename_by_ext('.rar', '...')

os.rename really doesn't like variables. Use shutil. Example taken from How to copy and move files with Shutil.
import shutil
import os
source = os.listdir("/tmp/")
destination = "/tmp/newfolder/"
for files in source:
if files.endswith(".txt"):
shutil.move(files,destination)
In your case:
import shutil
shutil.move(file_list[entry_pos], new_file_name)

You also want to double backslashes to escape them in Python strings, so instead of
file_list = os.listdir('C:\Users\B\Desktop\sil\sil2')
you want
file_list = os.listdir('C:\\Users\\B\\Desktop\\sil\\sil2')
Or use forward slashes - Python magically treats them as path separators on Windows.

You must use the full path for the rename.
import os
def dTask():
#Get a file name list
dir = 'C:\Users\B\Desktop\sil\sil2'
file_list = os.listdir(dir)
#Change the extensions
for file_name in file_list:
entry_pos = 0;
#Filter the file name first for '.'
for position in range(0, len(file_name)):
if file_name[position] == '.':
break
new_file_name = file_name[0:position]
#Filtering done !
#Using the name filtered, add extension to that name
new_file_name = new_file_name + '.rar'
#rename the entry in the file list, using new file name
print 'Expected change from: ', file_list[entry_pos]
print 'into File name: ', new_file_name
os.rename( os.path.join(dir, file_list[entry_pos]), os.path.join(dir,new_file_name))
++entry_pos

If you aren't in the directory C:\Users\B\Desktop\sil\sil2, then Python certainly won't be able to find those files.

import os
def extChange(path,newExt,oldExt=""):
if path.endswith != "\\" and path.endswith != "/":
myPath = path + "\\"
directory = os.listdir(myPath)
for i in directory:
x = myPath + i[:-4] + "." + newExt
y = myPath + i
if oldExt == "":
os.rename(y,x)
else:
if i[-4:] == "." + oldExt:
os.rename(y,x)
now call it:
extChange("C:/testfolder/","txt","lua") #this will change all .txt files in C:/testfolder to .lua files
extChange("C:/testfolder/","txt") #leaving the last parameter out will change all files in C:/testfolder to .txt

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Find files within a changed subdirectory in Python - python

Related

move files to subdirectories that are named on part of the filenames

Os path Join Two args

How to extract the file name from a file path?

How to batch process a folder of videos using MoviePy

python os.rename(...) won't work !

Categories

Resources