I want to find the newest directory in root directory:
Currently this code finds all directories in root path and also finds modified time:
import os
from datetime import datetime
root = r'D:\drivers'
def totimeformat(tm):
return tm.strftime('%Y-%m-%d %H:%M:%S')
dirs = []
for dir in directories:
d = os.path.join(root, dir)
if os.path.isdir(d):
print(d + '' + totimeformat(datetime.fromtimestamp(os.path.getatime(d))))
dirs.append(d)
Your code is mostly good as is. You just need to change it to reflect the logic you describe:
import os
from datetime import datetime
root = r'D:\drivers'
def totimeformat(tm):
return tm.strftime('%Y-%m-%d %H:%M:%S')
dirs = []
newestdir = None
newesttime = 0
for dir in directories:
d = os.path.join(root, dir)
if os.path.isdir(d):
t = os.path.getatime(d)
if t > newesttime:
newestdir = d
newesttime = t
print(newestdir + '' + totimeformat(datetime.fromtimestamp(newesttime)))
You can create list with tuples (timestamp, dir) and use sorted() to sort by timestamp.
After sorting: first element is the oldest dir, last element is the newest dir.
import os
from datetime import datetime
def totimeformat(tm):
return tm.strftime('%Y-%m-%d %H:%M:%S')
root = r'D:\drivers'
dirs = []
for dir in os.listdir(root):
d = os.path.join(root, dir)
if os.path.isdir(d):
dirs.append( (os.path.getatime(d), d) )
newest = sorted(dirs)[-1]
oldest = sorted(dirs)[1]
print(newest[1] + '' + totimeformat(datetime.fromtimestamp(newest[0])))
print(oldest[1] + '' + totimeformat(datetime.fromtimestamp(oldest[0])))
You can also get three the newest dirs sorted(dirs)[-3:]
You can use os.scandir to get all the information of all the files in the given directory in one system call for much better efficiency, and use the max function with a key function that returns the modified time of a given to find the directory with the newest modified time:
os.path.join(root, max((f for f in os.scandir(root) if f.is_dir()), key=lambda f: f.stat().st_mtime).name)
Related
Lets say some paths like these:
C:/Test/path_i_need/test2/test3/test4
C:/Test/test2/path_i_need/test3
C:/Test/test2/test3/path_i_need/test4
How I can extract the path that i need in each of the scenarios using python, for example:
C:/Test/path_i_need
C:/Test/test2/path_i_need
C:/Test/test2/test3/path_i_need
So basically i don't know how many sub folder are before the path_i_need or after it, I only need that path, i dont care whats after.
You could do a DFS (depth-first search) from the root directory until you find all the paths you're looking for:
from os import listdir, path
ROOT_DIR = "./example"
FLAG = "example1"
found_dirs = []
def find_dirs(p):
subdirs = listdir(p)
for subdir in subdirs:
curdir = path.join(p, subdir)
if subdir == FLAG:
found_dirs.append(curdir)
elsif path.isdir(curdir):
find_dirs(curdir)
find_dirs(ROOT_DIR)
Try this, without using os module or any imports:
paths = """
C:/Test/path_i_need/test2/test3/test4
C:/Test/test2/path_i_need/test3
C:/Test/test2/test3/path_i_need/test4
""".strip().split('\n')
need_this_path = 'path_i_need'
len_that_which_i_need = len(need_this_path)
extracted_paths = [p[:p.index(need_this_path) + len_that_which_i_need] for p in paths]
print(*extracted_paths, sep='\n')
Outputs:
C:/Test/path_i_need
C:/Test/test2/path_i_need
C:/Test/test2/test3/path_i_need
I have Logs_26052021.tar.xz
Description: 26052021--> date: 26, month: 05, year: 2021
And want to permanently remove this kind of files from location using python script.
its on Unix server, version: Linux localhost 3.10.0-1160.21.1.el7.x86_64
what should I do.
Thanks In Advance
using proper separation into methods. Tested on python 3.9.6
import os
from datetime import datetime
def get_files_from_path(path: str) -> list:
result = []
for subdir, dirs, files in os.walk(path):
for filename in files:
filepath = subdir + os.sep + filename
if filename.startswith('Logs_') and filename.endswith('.tar.xz'):
result.append(filepath)
return result
def get_old_files(filelist: list, max_days=184) -> list:
currentdate = datetime.now()
result = []
for file in filelist:
datestr = file.split('Logs_')[1].split('.tar.xz')[0]
filedate = datetime.strptime(datestr, '%d%m%Y')
tdelta = currentdate - filedate
if tdelta.days > max_days:
result.append(file)
return result
def delete_files(filelist: list):
for file in filelist:
os.remove(file)
logfiles = get_files_from_path('testing')
oldfiles = get_old_files(logfiles)
delete_files(oldfiles)
related documentation:
Iterate over files
strptime behavior
timedelta for substraction of dates
File deletion
Found Answer,
find /path/Logs/ -name "*.log.*" -type f -mtime +180
But while executing this command getting SyntaxError: invalid syntax for "*.log.*"
How should I execute this one using python ?
I'm looking to create a script that will look through specific directories and its subdirectories, and if it contains a folder called trash, and it is older than 3 months it will delete that trash folder and its contents.
The code i have already tried is shown below. It contains a couple of options of how to get the time of 3 months. I have also used os.walk to try to traverse the directories, but i have not managed to target just the trash folders and then remove them.
import os, sys, time
from datetime import datetime
from datetime import timedelta
from subprocess import call
import path
now = time.time()
cutoff = now - (30)#(91 * 86400)
three_months = datetime.today() - timedelta(0,0,0,0,0,1)
path = '/users/shoot_station_5/documents/folderfordeletetest/'
for root, d_names, f_names in os.walk(path):
print (root, d_names, f_names)
for _dir in d_names:
time_thing = os.path.getmtime(os.path.join (root, _dir))
time_thing1 = datetime.fromtimestamp(time_thing)
if time_thing1 <= three_months and _dir == 'trash':
# fname = os.path.join(d_names, _dir)
# if time.ctime(os.path.getmtime(os.path.join (root, _dir))) < cutoff:
print ('good')
Below code delete a directory called trash that are older than 3 months and all its contents.
import os
import datetime
import shutil
def Delete_Folder(filesPath,No_of_Days):
for root, d_names, f_names in os.walk(filesPath):
for _dir in d_names:
if str(_dir).lower() == "trash":
DName = os.path.join (root, _dir)
today = datetime.datetime.today()
modified_date = datetime.datetime.fromtimestamp(os.path.getmtime(DName))
duration = today - modified_date
if (duration.days > No_of_Days):
shutil.rmtree(DName)
# Call Function
NoDays = 90
filesPath = r"C:\Sample"
Delete_Folder(filesPath,NoDays)
based on this script:
#!/usr/bin/python
# run by crontab
# removes any files in /tmp/ older than 7 days
import os, sys, time
from subprocess import call
now = time.time()
cutoff = now - (7 * 86400)
files = os.listdir("/tmp")
for xfile in files:
if os.path.isfile( "/tmp/" + xfile ):
t = os.stat( "/tmp/" + xfile )
c = t.st_ctime
# delete file if older than a week
if c < cutoff:
os.remove("/tmp/" + xfile)
we can delete files in a path based on their time modified, but how can we delete folders in other folders based on their time modification?
it means there are many folders in the main folder but we need to keep main folders and subfolders and only delete folders which their modification time is older than a specific time.
You can try something along these lines
import shutil, os, time
top_dir = '/tmp'
now = time.time()
cutoff = now - (7 * 86400)
def del_old_files_and_dirs(top_dir, cutoff_time):
for root, dirs, files in os.walk(top_dir, topdown=False):
for cdir in dirs:
fdir = os.path.join(root, cdir)
if os.path.getmtime(fdir) < cutoff_time:
shutil.rmtree(fdir)
else:
# Process this dir again recursively
del_old_files_and_dirs(fdir, cutoff_time)
for cfile in files:
ffile = os.path.join(root, cfile)
if os.path.getmtime(ffile) < cutoff_time:
os.remove(ffile)
del_old_files_and_dirs(top_dir, cutoff)
I'm writing yet another python purge script. This is replacing a very old bash script with tons of find -delete which take up to 9h to purge our video backend.
I know there is tons of those either on stack or right in google but thing is i have a few more constraints which left me to write what i find poor/unefficient code.
consider the following dir structure:
/data/channel1/video_800/0001/somefile_800_001.ts
/data/channel1/video_800/0001/somefile_800_002.ts
/data/channel1/video_800/0002/somediffile_800_001.ts
/data/channel1/video_800/0002/somediffile_800_002.ts
/data/channel1/video_800.m3u8
/data/channel1/video_900/0001/someotherfile_900_001.ts
/data/channel1/video_900/0002/afile_900_001.ts
/data/channel1/video_900/0003/bfile_900_001.ts
/data/channel1/video_900/0003/cfile_900_001.ts
/data/channel1/video_900.m3u8
/data/channel2/video_800/0001/againsomefile_800_001.ts
/data/channel2/video_800/0001/againsomefile_800_001.ts
/data/channel2/video_800.m3u8
/data/sport_channel/video_1000/0001/somefile.ts
/data/sport_channel/video_1000/0001/somefile2.ts
First thing that interests me is the channel name since there is a rule for channel* and one for sport*.
Second thing is the end of the video dirs that equals the bitrate... 800, 900, 1000 since these can have different retention days.
Finaly i'm going through everything and remove files based on bitrate and extention.
The bellow code works but is overly complicated and i'm sure not very pythonic. Since what i care most in this case is performance i'm sure there is a more efficient way to do this. Stacking for loop in for loop is not only poor design but also gets me a 'find_files' is too complex [mccabe] in my pymode.
** Left the remove function out of the code example but it's just a plain try:except using os.rmdir and os.remove
I'm open to all suggestions to improving my code.
Thanks!
#!/usr/bin/python
import os
import time
import fnmatch
path = '/data'
debits_short = ['200', '700', '1000', '1300', '2500']
debits_long = ['400', '1800']
def find_files(chan_name, debits, duration):
time_in_secs = time.time() - (duration * 24 * 60 * 60)
# List channel
for channel in os.listdir(path):
# Match category channels
if fnmatch.fnmatch(channel, chan_name):
# Go through bitrates
for debit in debits:
# Channel path now the default search path
channel_path = path + channel
# Walk through channel path to match bitrate files
for root, dirs, files in os.walk(channel_path, topdown=False):
for filename in files:
# Remove files that contain _bitrate_ and end with ts
if '_' + debit + '_' in filename:
if filename.endswith('.ts'):
if os.path.isfile(os.path.join(root, filename)):
if os.stat(os.path.join(root, filename)).st_mtime <= time_in_secs:
remove(os.path.join(root, filename))
# Remove playlist files that contain bitrate.m3u8
if filename.endswith(debit + '.m3u8'):
if os.path.isfile(os.path.join(root, filename)):
if os.stat(os.path.join(root, filename)).st_mtime <= time_in_secs:
remove(os.path.join(root, filename))
# Remove empty dirs
for dir in dirs:
if not os.listdir(os.path.join(root, dir)):
remove(os.path.join(root, dir))
find_files('channel*', debits_long, 3)
find_files('sport*', debits_short, 7)
Here's a possible approach:
import os
import glob
import time
class Purge(object):
removable_extensions = ['ts', 'm3u8']
def __init__(self, basedir, channel_pattern, debits,
older_than_days, test_mode=False):
self.basedir = basedir
self.channel_pattern = channel_pattern
self.debits = debits
self.older_than_secs = time.time() - 24*60*60*older_than_days
self.test_mode = test_mode # If `True`, do not delete files.
def delete_file(self, filepath):
try:
os.remove(filepath)
except OSError:
pass
def file_for_deletion(self, filepath):
# Return `True` if a file meets all conditions for deletion.
filename, ext = os.path.splitext(os.path.basename(filepath))
condition_ext = ext[1:] in self.removable_extensions
condition_old = os.stat(filepath).st_mtime <= self.older_than_secs
condition_deb = any(
'_{}_'.format(d) in filename or filename.endswith(d)
for d in self.debits
)
return all((condition_ext, condition_old, condition_deb))
def purge_channel(self, channel_dir):
for root, dirs, files in os.walk(channel_dir):
for name in files:
filepath = os.path.join(root, name)
if self.file_for_deletion(filepath):
print filepath
if not self.test_mode:
self.delete_file(filepath)
#TODO: delete empty directories here.
def purge(self):
channels = glob.glob(os.path.join(self.basedir, self.channel_pattern))
for channel_dir in channels:
self.purge_channel(channel_dir)
if __name__ == '__main__':
purge_job_info = dict(
basedir=r'path/to/data', # All channel folders live here.
channel_pattern='channel*', # `glob` pattern.
debits=['400', '1800'],
older_than_days=7,
)
p = Purge(**purge_job_info)
p.test_mode = True
p.purge()