How to compare ctime properly? - python

I have a program that gets the modified date/time of directories and files. I then want to get the date/time from 30 seconds ago and compare that to the modified date/time.
If the modified time is less than 30 seconds ago, I want to trigger an alert. My code is triggering alert even if the modified time occurred more than 30 seconds ago.
Is there a way I can only trigger an alert if the modification occurred less than 30 seconds ago?
import os.path
import time, stat
import sys
share_dir = 'C:/mydir'
source_dir = r'' + share_dir + '/'
def trigger():
print("Triggered")
def check_dir():
while True:
for currentdir, dirs, files in os.walk(source_dir):
for file in files:
currentfile = os.path.join(currentdir, file)
# get modified time for files
ftime = os.stat(currentfile )[stat.ST_MTIME]
past = time.time() - 30 # last 30 seconds
if time.ctime(ftime) >= time.ctime(past):
print(time.ctime(ftime) + " > " + time.ctime(past))
print("Found modification in last 30 seconds for file =>", currentfile, time.ctime(ftime))
trigger()
sys.exit()
else:
print('No recent modifications.' + currentfile)
for folder in dirs:
currentfolder = os.path.join(currentdir, folder)
# get modified time for directories
dtime = os.stat(currentfolder)[stat.ST_MTIME]
past = time.time() - 30 # last 30 seconds
if time.ctime(dtime) >= time.ctime(past):
print(time.ctime(dtime) + " > " + time.ctime(past))
print("Found modification in last 30 seconds for folder =>", currentfolder, time.ctime(dtime))
trigger()
sys.exit()
else:
print('No recent modifications: ' + currentfolder)
time.sleep(4)
if __name__ == "__main__":
check_dir()

I'm doing this on a large scale file system. I personally use SQLite3 and round the mtime of the file (I had weird things happen using any other sort of operation and it was more consistent).
I'm also unsure why you're not just doing a pure math solution. Take the current time, take the mtime of the file, find the difference between them and if it's less than or equal to thirty, you get a hit.
I redid some of the code. I recommend trying this:
import os.path
import time, stat
import sys
def trigger():
print("Triggered")
def check_dir(source_dir):
for currentdir, dirs, files in os.walk(source_dir):
for file in files:
currentfile = os.path.join(currentdir, file)
# get modified time for files
ftime = os.path.getmtime(currentfile)
if time.time() - ftime <= 30:
print("Found modification in last 30 seconds for file =>", currentfile, time.ctime(ftime))
trigger()
exit(0)
else:
print('No recent modifications.' + currentfile)
for folder in dirs:
currentfolder = os.path.join(currentdir, folder)
# get modified time for directories
dtime = os.stat(currentfolder)[stat.ST_MTIME]
if time.time() - dtime <= 30:
print("Found modification in last 30 seconds for folder =>", currentfolder, time.ctime(dtime))
trigger()
exit(0)
else:
print('No recent modifications: ' + currentfolder)
if __name__ == "__main__":
check_dir('yourdirectoryhere')
Did some light testing on my own system and it seemed to work perfectly. Might want to add back the while loop but it should work.

Related

Monitoring Folders - Python

I am trying to write a python script which will monitor folders. The files are being written into the folder from a third party GUI based program. Some exported files grow in situ and other are written in a tmp folder elsewhere before being copied into the target folder. In the tmp folder cases, an empty folder is placed at the target destination until the file is ready to move. There may be multiples of these empty folders, but they are only created after the previous one has been populated.
The below appears to work well until there are Zero size files/folders.
I think the main issue lies in zero_files. Providing the rest for context.
import os
import datetime
import time
import itertools
print('Starting to Monitor File growth')
print(datetime.datetime.now())
print("")
path = os.path.normpath(r'C:\Users\ed\Desktop\Test_Run')
check_rate = 180
#time in seconds between checks
print("Waiting for a moment before starting monitoring")
print("")
time.sleep(60)
#wait for the first files to appear
def get_directory_size(directory):
"""Returns the `directory` size in bytes."""
total = 0
try:
# print("[+] Getting the size of", directory)
for entry in os.scandir(directory):
if entry.is_file():
# if it's a file, use stat() function
total += entry.stat().st_size
elif entry.is_dir():
# if it's a directory, recursively call this function
try:
total += get_directory_size(entry.path)
except FileNotFoundError:
pass
except NotADirectoryError:
# if `directory` isn't a directory, get the file size then
return os.path.getsize(directory)
except PermissionError:
# if for whatever reason we can't open the folder, return 0
return 0
return total
def folder_growing(path):
sizes = [1,2]
while sizes[-1] > sizes[-2]:
time.sleep(check_rate)
sizes.append(get_directory_size(path))
print('Monitoring Folder')
def zero_files(path):
files = os.listdir(path)
a= []
for i in files:
file_size = a.append(os.path.getsize(f'{path}\\{i}'))
a.sort()
try:
while a[-1] == 0:
file_size = a.append(os.path.getsize(f'{path}\\{i}'))
a.sort()
print("test")
time.sleep(120)
except FileNotFoundError:
pass
print(f"***Checking folders every", (int(check_rate/60)),"mins***")
get_directory_size(path)
folder_growing(path)
time.sleep(120)
zero_files(path)
wait = 10
print('No Folder Growth Detected')
print("")
print(f"***Waiting ", (int(wait/60)),"mins for Safety***")
time.sleep(wait)
print("")
print(datetime.datetime.now())
print("Done")

How to delete files based on the creation time for each day in python

I have a system that generates 3 files per minute every day. The files before 4AM and after 10PM for that particular day are insignificant and they consume a lot of space. I want to get rid of them. The files are generated continuously and hence there is almost a month of data files stored in a subfolder for each day. How can I remove the data files that are insignificant by using python code?
My code is as follows:
from datetime import date, timedelta
def daterange(start_date, end_date):
for n in range(int ((end_date - start_date).days)):
yield start_date + timedelta(n)
start_date = datetime.datetime(2020, 3, 5,00,00)
end_date = datetime.datetime(2020, 3, 8, 23,59)
for single_date in daterange(start_date, end_date):
fpath = r"C:\Users\basantrp\Desktop\Data Trimming"
os.chdir(fpath)
for root, dirs, files in os.walk(fpath):
for f in files:
st=os.path.getmtime(fpath)
print(datetime.datetime.fromtimestamp(st))
if datetime.datetime.fromtimestamp(st) < (start_date + datetime.timedelta(0,18000)):
os.unlink(f)
But this doesn't seem to work. the output from
datetime.datetime.fromtimestamp(st) is 2020-03-19 00:16:10.550944
This is not a desired solution because this shows the time quite close to when the program was compiled or initiated.
# importing the required modules
import os
import shutil
import time
# main function
def main():
# initializing the count
deleted_folders_count = 0
deleted_files_count = 0
# specify the path
path = "/PATH_TO_DELETE"
# specify the days
days = 30
# converting days to seconds
# time.time() returns current time in seconds
seconds = time.time() - (days * 24 * 60 * 60)
# checking whether the file is present in path or not
if os.path.exists(path):
# iterating over each and every folder and file in the path
for root_folder, folders, files in os.walk(path):
# comparing the days
if seconds >= get_file_or_folder_age(root_folder):
# removing the folder
remove_folder(root_folder)
deleted_folders_count += 1 # incrementing count
# breaking after removing the root_folder
break
else:
# checking folder from the root_folder
for folder in folders:
# folder path
folder_path = os.path.join(root_folder, folder)
# comparing with the days
if seconds >= get_file_or_folder_age(folder_path):
# invoking the remove_folder function
remove_folder(folder_path)
deleted_folders_count += 1 # incrementing count
# checking the current directory files
for file in files:
# file path
file_path = os.path.join(root_folder, file)
# comparing the days
if seconds >= get_file_or_folder_age(file_path):
# invoking the remove_file function
remove_file(file_path)
deleted_files_count += 1 # incrementing count
else:
# if the path is not a directory
# comparing with the days
if seconds >= get_file_or_folder_age(path):
# invoking the file
remove_file(path)
deleted_files_count += 1 # incrementing count
else:
# file/folder is not found
print(f'"{path}" is not found')
deleted_files_count += 1 # incrementing count
print(f"Total folders deleted: {deleted_folders_count}")
print(f"Total files deleted: {deleted_files_count}")
def remove_folder(path):
# removing the folder
if not shutil.rmtree(path):
# success message
print(f"{path} is removed successfully")
else:
# failure message
print(f"Unable to delete the {path}")
def remove_file(path):
# removing the file
if not os.remove(path):
# success message
print(f"{path} is removed successfully")
else:
# failure message
print(f"Unable to delete the {path}")
def get_file_or_folder_age(path):
# getting ctime of the file/folder
# time will be in seconds
ctime = os.stat(path).st_ctime
# returning the time
return ctime
if __name__ == '__main__':
main()
You need to adjust the following two variables in the above code based on the requirement.
days = 30
path = "/PATH_TO_DELETE"

How can I improve the speed of data downloading from an ftp server?

I am writing a Python script that downloads the data from the National Digital Forecast Database (NDFD) server. The FTP server files are organized in this way: Year/YearMonth/YearMonthDay and I have to download one file from each days folder, go up the folder and download for next day and so on. My current code is very slow and takes around 20 seconds to download one days data, which translates into 2hours for one year. I would like it to be much faster. Please find my code below.
from ftplib import FTP
import ftplib
import os
import datetime as dt
import pandas as pd
import time
def ndfd_download(keyword, days_, forecast_hour):
# search for the files between 30 minutes on either side
# of the forecast hour -40 is to convert 100 to 60 minutes
time_start = int(float(forecast_hour)*100 - 30 - 40)
time_end = int(float(forecast_hour)*100 + 30)
print('Starting connection to NOAA database')
# Try connecting to the NCDC server
try:
ftp = FTP('nomads.ncdc.noaa.gov')
ftp.login()
print('Connect successful')
except ftplib.all_errors as e:
errorcode_string = str(e).split(None, 1)[0]
print(errorcode_string)
ftp.cwd('/NDFD/')
print('Current working directory is %s' % ftp.pwd())
# go through all the days
for day_ in days_:
start = time.time()
# get year, month, day information from day_
year = "{:02d}".format(day_.year)
year_month = "{:02d}".format(day_.year) + "{:02d}".format(day_.month)
year_month_day = "{:02d}".format(day_.year) + "{:02d}".format(day_.month) + "{:02d}".format(day_.day)
try:
# Change to the desired NDFD directory to get your data
# print('Changing directory to \"/NDFD/{}/{}/\"'.format(month, day))
ftp.cwd('/NDFD/{}/{}/'.format(year_month, year_month_day))
# getting names of all files in the current working directory
all_files = ftp.nlst()
# filtering all the files with desired keyword
all_files = [key for key in (all_files) if key.startswith(keyword)]
# creating a directory to store the data
directoryName = '{}/{}/{}'.format(year, year_month, year_month_day)
if not os.path.exists(directoryName):
os.makedirs(directoryName)
# Move into the folder
directoryPath = '%s/%s' % (os.getcwd(), directoryName)
os.chdir(directoryPath)
print('Downloading data for {}'.format(year_month_day))
# go through all the files in the directory
for f in all_files:
# get the last 4 characters of file name
# they contain the time of forecast
file_time = float(f[-4:])
# check if time of forecast is within our bounds
if (file_time <= time_end and file_time >= time_start):
# open a new file
file = open(f, 'wb')
try:
# save the file with the same name
ftp.retrbinary('RETR %s' % f, file.write)
# print('Successfully downloaded: {}'.format(f))
except ftplib.all_errors as e:
errorcode_string = str(e).split(None, 1)[0]
print('Error', errorcode_string)
file.close()
# going 3 directories up
os.chdir("../../..")
except ftplib.error_perm as e:
errorcode_string = str(e).split(None, 1)[0]
print('Error', e)
print(time.time() - start)
ftp.close()
if __name__ == "__main__":
keyword = "YAUZ98"
years = [2018]
for year in years:
month = 1
day = 30
days_ = []
# no_of_days = 366 if calendar.isleap(year) else 365
no_of_days = 100
t = dt.datetime(year,month,day)
for i in range(no_of_days):
days_.append((t))
t = t + dt.timedelta(days = 1)
forecast_hour = '14'
ndfd_download(keyword, days_, forecast_hour)

Moving Files by creation/modification date then moving with Python

I am new to programming, even more so with Python. So please excuse any ignorance on my part. I am trying to write a script for myself that will move files that have been modified in the last 24 hours. So far I have came up with this:
import datetime
import os
import shutil
src = "C:\Users\Student\Desktop\FolderA"
dst = "C:\Users\Student\Desktop\FolderB"
now = dt.datetime.now()
before = now - dt.timedelta(hours=24)
def mins_since_mod(fname):
return (os.path.getmtime(fname))
for fname in os.listdir(src):
if mins_since_mod > before:
src_fname = os.path.join(src,fname)
os.path.join(dst,fname)
shutil.move(src_fname, dst)
I know i'm close to the solution, but I can't seem to figure out how to get this to work. I looked around here on the community and was not able to find a solution to my problem. Thank you for any leads or suggestions.
There are a few things to change. First, you can't compare the datetime in before to the Unix timestamp that getmtime() returns. It's easier to just use that directly. Also, you actually need to pass the (full) filename to mins_since_mod() for it to do anything.
Here's something that should work, changing the name of mins_since_mod() to reflect what it does better:
import time
import os
import shutil
SECONDS_IN_DAY = 24 * 60 * 60
src = "C:\Users\Student\Desktop\FolderA"
dst = "C:\Users\Student\Desktop\FolderB"
now = time.time()
before = now - SECONDS_IN_DAY
def last_mod_time(fname):
return os.path.getmtime(fname)
for fname in os.listdir(src):
src_fname = os.path.join(src, fname)
if last_mod_time(src_fname) > before:
dst_fname = os.path.join(dst, fname)
shutil.move(src_fname, dst_fname)
Hey mate I have actually just done something like this myself. I found that there will be a few issues will the time comparison as well as some issues in comparing and moving folders.
Try this:
import os
import shutil
import datetime
def filter_by_date(src_folder, archive_date):
os.chdir(src_folder)
delay_time = 24 * 60 * 60
archive_period = archive_date - delay_time
return [
name for name in os.listdir(u'.')
if os.path.isdir(name)
and datetime.datetime.fromtimestamp(os.path.getmtime(name)) < archive_period
]
if __name__ == '__main__':
folders = filter_by_date("C:/Users/Student/Desktop/FolderA", time.time())
for files in folders:
print files
try:
shutil.copytree(files, os.path.join("C:/Users/Student/Desktop/New", files))
except OSError as e:
print('\nDirectory not copied. Error: %s' % e)
except shutil.Error as e:
try:
files = files.encode('UTF-8')
dst_path = os.path.join('C:/Users/Student/Desktop/FolderB/', files)
shutil.copytree(files, dst_path)
finally:
print('\nDirectory not copied. Error: %s' % e)
print "\Completed"
This is going to ensure any file name (including Chinese, Russian and Japanese will be copied) and any folder (directory or sub-directory) is copied. It will also keep all file attributes.

Python processing a range of numbered (dated) files in a directory

I am trying to find a range of specific files in a directory using python 2.7.
I have many files in a directory that are named like AB_yyyyjjjhhmmss_001.txt, where y is year, j is julian date, h is hour and so on. Each time corresponds to the time some data was taken and not necessarily the time the file was created or manipulated. I like to pick out a range of time, say from 2013305010000 to 2013306123000 and process them.
I have something like,
import glob
def get_time (start_time = None, end_time = None):
if start_time == None:
start_time = input("start: ")
if end_time == None:
end_time = input("end: ")
duration = str(start_time) + "-" + str(end_time)
listing = glob.glob("*_[" + duration + "]_*")
I learned that [ ] are only meant to match single digit. So I am totally off track here. I also tried {start_time..end_time} combo with no avail.
If all files have the same structure, you can simply write:
import os
import re
start = sys.argv[1]
end = sys.argv[2]
for filename in os.listdir('test'):
if start <= filename.split('_')[1] <= end:
print "Process %s" % filename
Example:
$ ls test
AB_2013105010000_001.txt AB_2013305010000_001.txt AB_2013306103000_001.txt
AB_2013306123000_001.txt AB_2013316103000_001.txt
$ python t.py 2013305010000 2013306123000
Process AB_2013305010000_001.txt
Process AB_2013306103000_001.txt
Process AB_2013306123000_001.txt
I might try
import re
import os
import datetime
def filename_to_datetime(filename):
filedate = re.match(r'.*(\d{13}).*', filename)
if filedate:
return datetime.datetime.strptime(re.match(filedate.group(1), '%Y%j%H%M%S')
else:
raise ValueError("File has wrong format!")
def get_time(start_time, end_time):
return [filename for filename in os.listdir('.') if
start_time < filename_to_datetime(filename) < end_time]

Categories

Resources