Copy files with their creation date in a specific range - python

'''I am currently trying to copy files from one folder to another folder using shutil but I can't seem to get it to work, the process is saying it has finished but nothing happens?'''
The current criteria I have added raw_input that lets the user choose file extension.
The next criteria I am looking to add is a date range function so I can choose a date range for example:
17/07/2020 to 04/08/2020 or the day's date.
*UPDATED CODE
import os
import shutil
import os.path, time
from pip._vendor.distlib.compat import raw_input
os.chdir('C://')
src = ("C:/Users/eldri/OneDrive/Desktop/")
dst = ("C:/Users/eldri/OneDrive/Desktop/output")
ext = raw_input("[+] File format: ")
created = (" last modified: %s" % time.ctime(os.path.getmtime(src)))
start = raw_input("[+] Date start: ")
end = raw_input("[+] Date end: ")
def date_to_num(date):
return int("".join(date.split('/')[::-1]))
def date_in_range(date, start, end):
return date_to_num(date) > date_to_num(start) and date_to_num(date) < date_to_num(end)
for filename in os.listdir(src):
if filename.endswith('.'+ext) and created.startswith(start) and created.endswith(end):
shutil.copy( src + filename, dst)
print("[+] File transferred "+filename + created)
else:
print("[+] File not transferred "+filename + created)
print("[+] Transfer complete")
I was looking at maybe pandas? but not sure as still quite new to python.
example on terminal
file extension = .csv
startdate = 12/05/2020
enddate = 07/08/2020
once the user has input these fields it would copy only the required files over.
The current output of the created files are:
[+] File transferred BASE1011.xls last modified: Fri Jul 17 10:11:40 2020
[+] File transferred BASE1112.xls last modified: Fri Jul 17 10:11:40 2020
[+] File transferred BASE1213.xls last modified: Fri Jul 17 10:11:40 2020
[+] File transferred BASE1314.xls last modified: Fri Jul 17 10:11:40 2020
[+] File transferred BASE1415.xls last modified: Fri Jul 17 10:11:40 2020
I want these to be in an easier format for user input as explained above:
example: start 12/05/2020 end date = 07/08/2020
Thank you for your help, I am not the best at python but I am trying to learn so any help would be amazing.
Thanks

I've carried on your work using time.ctime(os.path.getmtime(src)) and created a function dateRange(createdDate, startDate, endDate) that uses datetime to convert the strings into datetime objects and returns True or False if the created date falls between start and end dates
import os
import shutil
import time
from datetime import datetime
src = "C:/Users/eldri/OneDrive/Desktop/"
dst = "C:/Users/eldri/OneDrive/Desktop/output"
ext = input("[+] File format: ") # "txt"
start = input("[+] Date start: ") # "01/07/2020"
end = input("[+] Date end: ") # "30/07/2020"
def dateRange(createdDate, startDate, endDate):
"""determines if date is in range"""
createdDate = datetime.strptime(createdDate, '%a %b %d %H:%M:%S %Y')
startDate = datetime.strptime(startDate, '%d/%m/%Y')
endDate = datetime.strptime(endDate, '%d/%m/%Y')
return startDate < createdDate < endDate
for filename in os.listdir(src):
created = time.ctime(os.path.getmtime(src + filename))
if filename.endswith('.' + ext) and dateRange(created, start, end):
shutil.copy(src + filename, dst)
print("[+] File transferred " + filename + created)
else:
print("[+] File not transferred " + filename + created)
print("[+] Transfer complete")
I've added examples at the end of the ext, start, and end variables to provide an idea of the format

For specific range you can:
create a function that parse the date to a number:
def date_to_num(date):
return int("".join(date.split('/')[::-1]))
def date_in_range(date, start, end):
return date_to_num(date) > date_to_num(start) and date_to_num(date) < date_to_num(end)
And then use it like that:
date_in_range("03/02/2020", "01/01/2020", "05/05/2020")

Related

Why does this script move all files instead of just the newest files?

import os
import datetime
import shutil
source = 'C:/Users/user/Desktop/Files to move/'
destination = 'C:/Users/user/Desktop/Delete Logs/'
today = datetime.datetime.today() # Get current time
# Create log file with datestamp
file=open(logging_path+datetime.datetime.today().strftime('%d-%m-%Y')+'.txt', 'a')
# Move files
allfiles = os.listdir(source)
for f in allfiles:
# Check last modified time
t = os.stat(os.path.join(f, source))[8]
filetime = datetime.datetime.fromtimestamp(t) - today
# Is file less than a day old? If yes, move.
if filetime.days <= 1:
print(os.path.join(f, source), filetime.days)
file.write(os.path.join(f, source) + ' created ' + str(-1 * filetime.days)+' day(s) ago has moved\n')
shutil.move(source + f, destination + f)
Like the title says, I wrote this trying to move files less than a day old to a new location on a different disk but it moves all files in the location instead of the newest files. What am I doing wrong?
Your code works fine, you just need to add '-' sign before the last condition, because the output of filetime.days is -1, your condition will be :
if -filetime.days <= 1:
And you have inversed between source and filename just after the for loop
import os
import datetime
import shutil
source = 'f2/'
destination = 'f1/'
today = datetime.datetime.today() # Get current time
# Create log file with datestamp
file=open(logging_path+datetime.datetime.today().strftime('%d-%m-%Y')+'.txt', 'a')
# Move files
allfiles = os.listdir(source)
for f in allfiles:
# Check last modified time
t = os.stat(os.path.join(source, f))[8]
filetime = datetime.datetime.fromtimestamp(t) - today
print (filetime.days, type(filetime.days), t, f)
# Is file less than a day old? If yes, move.
if -filetime.days <= 1: # ==============> Here
print(os.path.join(f, source), filetime.days)
file.write(os.path.join(f, source) + ' created ' + str(-1 * filetime.days)+' day(s) ago has moved\n')
shutil.move(source + f, destination + f)

How to remove 6 month before logs using python script

I have Logs_26052021.tar.xz
Description: 26052021--> date: 26, month: 05, year: 2021
And want to permanently remove this kind of files from location using python script.
its on Unix server, version: Linux localhost 3.10.0-1160.21.1.el7.x86_64
what should I do.
Thanks In Advance
using proper separation into methods. Tested on python 3.9.6
import os
from datetime import datetime
def get_files_from_path(path: str) -> list:
result = []
for subdir, dirs, files in os.walk(path):
for filename in files:
filepath = subdir + os.sep + filename
if filename.startswith('Logs_') and filename.endswith('.tar.xz'):
result.append(filepath)
return result
def get_old_files(filelist: list, max_days=184) -> list:
currentdate = datetime.now()
result = []
for file in filelist:
datestr = file.split('Logs_')[1].split('.tar.xz')[0]
filedate = datetime.strptime(datestr, '%d%m%Y')
tdelta = currentdate - filedate
if tdelta.days > max_days:
result.append(file)
return result
def delete_files(filelist: list):
for file in filelist:
os.remove(file)
logfiles = get_files_from_path('testing')
oldfiles = get_old_files(logfiles)
delete_files(oldfiles)
related documentation:
Iterate over files
strptime behavior
timedelta for substraction of dates
File deletion
Found Answer,
find /path/Logs/ -name "*.log.*" -type f -mtime +180
But while executing this command getting SyntaxError: invalid syntax for "*.log.*"
How should I execute this one using python ?

Incrementing a file name in python

I am making code which generates a new text file with today's date each time it is run. For exemple today's file name would be 2020-10-05. I would like to increment it so that if the program is run one or more times the same day it becomes 2020-10-05_1, _2 etc..
I have this code that I found from another question and i've tried tinkering with it but I'm still stuck. The problem is here they convert the file name to an int 1,2,3 and this way it works but this isn't the result I want.
def incrementfile():
todayday = datetime.datetime.today().date()
output_folder = "//10.2.30.61/c$/Qlikview_Tropal/Raport/"
highest_num = 0
for f in os.listdir(output_folder):
if os.path.isfile(os.path.join(output_folder, f)):
file_name = os.path.splitext(f)[0]
try:
file_num = int(file_name)
if file_num > highest_num:
highest_num = file_num
except ValueError:
print("The file name %s is not an integer. Skipping" % file_name)
output_file = os.path.join(output_folder, str(highest_num + 1) + f"{todayday}" + ".txt")
return output_file
How can I modify this code so that the output I get in the end is something like 2020-10-05_0, _1, _2 etc.. ?
Thanks !
I strongly recommend you to use pathlib instead of os.path.join. This is more convenient.
def incrementfile():
td = datetime.datetime.today().date()
path = pathlib.Path("/tmp") #set your output folder isntead of /tmp
inc = len(list(path.glob(f"{td}*")))+1
outfile = path/f"{td}_{inc}.txt"
return outfile
Not a direct answer to your question, but instead of using _1, _2 etc, you could use a full timestamp with date and current time, which would avoid duplication, EG:
from datetime import datetime
t = str(datetime.now()).replace(":", "-").replace(" ", "_")
print(t)
Example output:
2020-10-05_13-06-53.825870
I think this will work-
import os
import datetime
#assuming files will be .txt format
def incrementfile():
output_folder = "//10.2.30.61/c$/Qlikview_Tropal/Raport/"
files=os.listdir(output_folder)
current_name=datetime.date.today().strftime('%Y-%m-%d_0')
current_num=1
def nameChecker(name,files):
return True if name +'.txt' in files else False
while namChecker(current_name,files):
current_name+='_'+str(current_num)
current_num+=1
return current_name+'.txt'

Python FTP get the most recent file by date

I am using ftplib to connect to an ftp site. I want to get the most recently uploaded file and download it. I am able to connect to the ftp server and list the files, I also have put them in a list and got the datefield converted. Is there any function/module which can get the recent date and output the whole line from the list?
#!/usr/bin/env python
import ftplib
import os
import socket
import sys
HOST = 'test'
def main():
try:
f = ftplib.FTP(HOST)
except (socket.error, socket.gaierror), e:
print 'cannot reach to %s' % HOST
return
print "Connect to ftp server"
try:
f.login('anonymous','al#ge.com')
except ftplib.error_perm:
print 'cannot login anonymously'
f.quit()
return
print "logged on to the ftp server"
data = []
f.dir(data.append)
for line in data:
datestr = ' '.join(line.split()[0:2])
orig-date = time.strptime(datestr, '%d-%m-%y %H:%M%p')
f.quit()
return
if __name__ == '__main__':
main()
RESOLVED:
data = []
f.dir(data.append)
datelist = []
filelist = []
for line in data:
col = line.split()
datestr = ' '.join(line.split()[0:2])
date = time.strptime(datestr, '%m-%d-%y %H:%M%p')
datelist.append(date)
filelist.append(col[3])
combo = zip(datelist,filelist)
who = dict(combo)
for key in sorted(who.iterkeys(), reverse=True):
print "%s: %s" % (key,who[key])
filename = who[key]
print "file to download is %s" % filename
try:
f.retrbinary('RETR %s' % filename, open(filename, 'wb').write)
except ftplib.err_perm:
print "Error: cannot read file %s" % filename
os.unlink(filename)
else:
print "***Downloaded*** %s " % filename
return
f.quit()
return
One problem, is it possible to retrieve the first element from the dictionary? what I did here is that the for loop runs only once and exits thereby giving me the first sorted value which is fine, but I don't think it is a good practice to do it in this way..
For those looking for a full solution for finding the latest file in a folder:
MLSD
If your FTP server supports MLSD command, a solution is easy:
entries = list(ftp.mlsd())
entries.sort(key = lambda entry: entry[1]['modify'], reverse = True)
latest_name = entries[0][0]
print(latest_name)
LIST
If you need to rely on an obsolete LIST command, you have to parse a proprietary listing it returns.
Common *nix listing is like:
-rw-r--r-- 1 user group 4467 Mar 27 2018 file1.zip
-rw-r--r-- 1 user group 124529 Jun 18 15:31 file2.zip
With a listing like this, this code will do:
from dateutil import parser
# ...
lines = []
ftp.dir("", lines.append)
latest_time = None
latest_name = None
for line in lines:
tokens = line.split(maxsplit = 9)
time_str = tokens[5] + " " + tokens[6] + " " + tokens[7]
time = parser.parse(time_str)
if (latest_time is None) or (time > latest_time):
latest_name = tokens[8]
latest_time = time
print(latest_name)
This is a rather fragile approach.
MDTM
A more reliable, but a way less efficient, is to use MDTM command to retrieve timestamps of individual files/folders:
names = ftp.nlst()
latest_time = None
latest_name = None
for name in names:
time = ftp.voidcmd("MDTM " + name)
if (latest_time is None) or (time > latest_time):
latest_name = name
latest_time = time
print(latest_name)
For an alternative version of the code, see the answer by #Paulo.
Non-standard -t switch
Some FTP servers support a proprietary non-standard -t switch for NLST (or LIST) command.
lines = ftp.nlst("-t")
latest_name = lines[-1]
See How to get files in FTP folder sorted by modification time.
Downloading found file
No matter what approach you use, once you have the latest_name, you download it as any other file:
with open(latest_name, 'wb') as f:
ftp.retrbinary('RETR '+ latest_name, f.write)
See also
Get the latest FTP folder name in Python
How to get FTP file's modify time using Python ftplib
Why don't you use next dir option?
ftp.dir('-t',data.append)
With this option the file listing is time ordered from newest to oldest. Then just retrieve the first file in the list to download it.
With NLST, like shown in Martin Prikryl's response,
you should use sorted method:
ftp = FTP(host="127.0.0.1", user="u",passwd="p")
ftp.cwd("/data")
file_name = sorted(ftp.nlst(), key=lambda x: ftp.voidcmd(f"MDTM {x}"))[-1]
If you have all the dates in time.struct_time (strptime will give you this) in a list then all you have to do is sort the list.
Here's an example :
#!/usr/bin/python
import time
dates = [
"Jan 16 18:35 2012",
"Aug 16 21:14 2012",
"Dec 05 22:27 2012",
"Jan 22 19:42 2012",
"Jan 24 00:49 2012",
"Dec 15 22:41 2012",
"Dec 13 01:41 2012",
"Dec 24 01:23 2012",
"Jan 21 00:35 2012",
"Jan 16 18:35 2012",
]
def main():
datelist = []
for date in dates:
date = time.strptime(date, '%b %d %H:%M %Y')
datelist.append(date)
print datelist
datelist.sort()
print datelist
if __name__ == '__main__':
main()
I don't know how it's your ftp, but your example was not working for me. I changed some lines related to the date sorting part:
import sys
from ftplib import FTP
import os
import socket
import time
# Connects to the ftp
ftp = FTP(ftpHost)
ftp.login(yourUserName,yourPassword)
data = []
datelist = []
filelist = []
ftp.dir(data.append)
for line in data:
col = line.split()
datestr = ' '.join(line.split()[5:8])
date = time.strptime(datestr, '%b %d %H:%M')
datelist.append(date)
filelist.append(col[8])
combo = zip(datelist,filelist)
who = dict(combo)
for key in sorted(who.iterkeys(), reverse=True):
print "%s: %s" % (key,who[key])
filename = who[key]
print "file to download is %s" % filename
try:
ftp.retrbinary('RETR %s' % filename, open(filename, 'wb').write)
except ftplib.err_perm:
print "Error: cannot read file %s" % filename
os.unlink(filename)
else:
print "***Downloaded*** %s " % filename
ftp.quit()

How to append a file's creation date to its filename?

I would like to create a python script that
appends the file created date to the end of the filename while retaining the oringinal file name (Report) for a batch of pdf documents.
directory = T:\WISAARD_Web Portal Projects\PortalLogging\WebLogExpert
filenames = Report.pdf
import os,time
root="/home"
path=os.path.join(root,"dir1")
os.chdir(path)
for files in os.listdir("."):
if files.endswith(".pdf"):
f,ext = os.path.splitext(files)
d=time.ctime(os.path.getmtime(files)).split() #here is just example. you can use strftime, strptime etc to format your date as desired
filedate = d[-1]+"-"+d[-2]+"-"+d[-3]
newname = f+filedate+ext
try:
os.rename(files,newname)
except Exception,e:
print e
else:
print "ok: renamed %s to %s " %(files,newname)

Categories

Resources