I have the following script -
import os
import stat
import zipfile
from datetime import datetime, timedelta
import logging
logfile = 'D:\\logfiles\\MasterLogsArchive\\archive.log'
logging.basicConfig(filename=logfile, format='%(asctime)s %(message)s', level=logging.DEBUG)
try:
import zlib
compression = zipfile.ZIP_DEFLATED
except:
compression = zipfile.ZIP_STORED
modes = { zipfile.ZIP_DEFLATED: 'deflated',
zipfile.ZIP_STORED: 'stored',
}
def modified_date(filename):
return datetime.fromtimestamp(os.stat(filename)[stat.ST_MTIME])
def find_between( s, first, last ):
try:
start = s.index( first ) + len( first )
end = s.index( last, start )
return s[start:end]
except ValueError:
return ""
move_date = datetime.now() - timedelta(minutes = 2)
src = "D:\\program files (x86)\\TIDAL\\Scheduler\\Master\\log"
for filename in os.listdir(src):
full_filename = os.path.join(src, filename)
scheduler = os.path.join(src, 'scheduler.out')
if modified_date(full_filename) < move_date and filename.startswith('Master'):
filedate = find_between(filename, '-', '.')[:-7]
date = filedate[:-2]
year = filedate[:-6]
month = filedate[4:-4]
day = filedate[6:-2]
hour = filedate[8:]
dest = "D:\\logfiles\\MasterLogsArchive\\" + date
if not os.path.exists(dest):
os.makedirs(dest)
zf = dest + '\\' + 'Master%s%s%s-%s.zip' % (year, month, day, hour)
## add Master Logs
if (os.path.isfile(full_filename)):
if (os.path.isfile(zf)):
try:
logging.info('%s is archived' % full_filename)
zip = zipfile.ZipFile(zf, mode='a')
zip.write(full_filename, compress_type=compression)
os.remove(full_filename)
finally:
zip.close()
else:
try:
logging.info('%s is archived' % full_filename)
zip = zipfile.ZipFile(dest + '\\' + 'Master%s%s%s-%s.zip' % (year, month, day, hour), mode='w')
zip.write(full_filename, compress_type=compression)
os.remove(full_filename)
finally:
zip.close()
The problem I'm having is that in compression it's doing the full path which I don't want. I only want the file in the zip. If I change the zip.write to do 'filename' instead of 'full_filename' it then complains it can't find the file.
So how do I get the write to know what folder to grab the file out of?
The actual write needs to be changed to the following -
zip.write(full_filename, os.path.basename(full_filename), compress_type=compression)
Got the answer from here -
How can I zip file with a flattened directory structure using Zipfile in Python?
Related
import os
import datetime
import shutil
source = 'C:/Users/user/Desktop/Files to move/'
destination = 'C:/Users/user/Desktop/Delete Logs/'
today = datetime.datetime.today() # Get current time
# Create log file with datestamp
file=open(logging_path+datetime.datetime.today().strftime('%d-%m-%Y')+'.txt', 'a')
# Move files
allfiles = os.listdir(source)
for f in allfiles:
# Check last modified time
t = os.stat(os.path.join(f, source))[8]
filetime = datetime.datetime.fromtimestamp(t) - today
# Is file less than a day old? If yes, move.
if filetime.days <= 1:
print(os.path.join(f, source), filetime.days)
file.write(os.path.join(f, source) + ' created ' + str(-1 * filetime.days)+' day(s) ago has moved\n')
shutil.move(source + f, destination + f)
Like the title says, I wrote this trying to move files less than a day old to a new location on a different disk but it moves all files in the location instead of the newest files. What am I doing wrong?
Your code works fine, you just need to add '-' sign before the last condition, because the output of filetime.days is -1, your condition will be :
if -filetime.days <= 1:
And you have inversed between source and filename just after the for loop
import os
import datetime
import shutil
source = 'f2/'
destination = 'f1/'
today = datetime.datetime.today() # Get current time
# Create log file with datestamp
file=open(logging_path+datetime.datetime.today().strftime('%d-%m-%Y')+'.txt', 'a')
# Move files
allfiles = os.listdir(source)
for f in allfiles:
# Check last modified time
t = os.stat(os.path.join(source, f))[8]
filetime = datetime.datetime.fromtimestamp(t) - today
print (filetime.days, type(filetime.days), t, f)
# Is file less than a day old? If yes, move.
if -filetime.days <= 1: # ==============> Here
print(os.path.join(f, source), filetime.days)
file.write(os.path.join(f, source) + ' created ' + str(-1 * filetime.days)+' day(s) ago has moved\n')
shutil.move(source + f, destination + f)
I have a folder with multiple PDFs with datestamps at the end of their names e.g.
hello_20200820.pdf
hello_20200821.pdf
hello_20200822.pdf
hello_20200717.pdf
I am trying to write a function to remove all the PDFs in the folder other than the TWO most recent pdf's.
The code I have written however is deleting the only the 3rd most recent file or the oldest file if there are less than 3. How can I fix this and remove ALL pdfs with the name 'hello' other than the two most recent?
Here is my code so far:
def remove_old_pdf(wsp, folder):
date_diff = float('inf')
today = datetime.now()
filename = ''
files = os.listdir('PDFs/' + folder)
# print(files)
for file in files:
if file.endswith('.pdf') and wsp in file:
date_str = file.split('_')[1].split('.')[0]
curr_diff = today - datetime.strptime(date_str, '%Y%m%d')
if date_diff == float('inf') or curr_diff < date_diff:
date_diff = curr_diff
filename = file
# print(filename)
else:
pass
print(filename)
files.remove(filename)
# print(files)
for file in files:
if file.endswith('.pdf') and wsp in file:
date_str = file.split('_')[1].split('.')[0]
curr_diff = today - datetime.strptime(date_str, '%Y%m%d')
filename = file
else:
pass
if filename in files:
files.remove(filename)
print(filename)
else:
print('lol')
# print(files)
for file in files:
if file.endswith('.pdf') and wsp in file:
date_str = file.split('_')[1].split('.')[0]
curr_diff = today - datetime.strptime(date_str, '%Y%m%d')
filename = file
else:
pass
delFile = 'PDFs/' + folder + '/' + filename
finalFiles = os.listdir('PDFs/' + folder)
if filename in finalFiles:
os.remove('PDFs/' + folder + '/' + filename)
print('Deleted ' + filename +'.')
else:
print("No PDFs deleted")
You can use glob to list all the files that match, restrict to the first n-2 and delete those:
import os
from glob import glob
dryrun = True # change this to False to actually delete
wc = 'hello_????????.pdf'
for name in sorted(glob(wc))[:-2]:
print(f'delete {name}{" (DRY-RUN)" if dryrun else ""}')
if not dryrun:
os.unlink(name)
Note: personally I always prefer to have globs that are as strict as possible. So I often define something like:
wildcards = {
'Y': '[12][0-9][0-9][0-9]',
'm': '[01][0-9]',
'd': '[0-3][0-9]',
'H': '[0-2][0-9]',
'M': '[0-5][0-9]',
'S': '[0-5][0-9]',
}
# and then:
ymdglob = ''.join([wildcards[datepart] for datepart in 'Ymd'])
wc = f'hello_{ymdglob}.pdf'
# etc.
I'm trying to Pull the file from s3 based on id and date of the filename:
Naming Convention:
The naming convention are as follows:
**
ID_NAME_DATE.csv : filename follow that same pattern
example : 9919USEN_File_20180216.csv
example : 9919GBEN_File_20180211.csv
**
Code:
import boto3
import re
def downloadFiletest():
#connect to s3
client = boto3.resource(u's3', aws_access_key_id=u'KEY',
aws_secret_access_key=u'TOKEN')
#used for downloading
s3 = boto3.client(u's3', aws_access_key_id=u'KEY',
aws_secret_access_key=u'TOKEN')
dateIdReg = '[0-9]{8}'
dateSuffix = re.compile(date)
print (u"= S3 Client Connected =")
# configure s3 bucket
bucket = client.Bucket(u'us-eu-Bucket')
b_folder = "/folder/example/"
c_folder = b_folder.lower() + '/'
files_not_found = True
for cList in bucket.objects.filter(Prefix=b_folder):
cFiles= cList.key
print ('file : ', cFiles)
for fileId in cFiles.lower():
files_not_found = False
f = fileId.rstrip()
print(f)
fileidreg= '[0-9]{4}[a-zA-Z]{4}'
FileID = re.compile(fileidreg)
if FileID.match(f) and dateSuffix.match(f):
print(u'cList.key.lower(): ', cList.key.lower())
old_file = cList.key
dot_index = old_file.find(u'.')
print (u'old dot file name: ', dot_index)
file_ext = old_file[dot_index:]
cfile = fileId + '_file_' + dateSuffix + file_ext
tmp_path = "/tmp/folder/" + cfile
b_path = cVal + cfile
print (u'b path : ', b_path)
s3.download_file("us-eu-Bucket", b_path, tmp_path)
print ("TEMP PATH: ", tmp_path)
if files_not_found:
print("ALERT", "No file in {0}/{1}".format(bucket, b_folder))
downloadFiletest()
Error:
It Skips over for fileId in cFiles.lower(): and closes the script.
Goal:
Pull file from S3 and Download it to tmp_path to be used as desired.
When pulling file i'd like the script to pick file based on ID and Date. For instance:
Rule: Pseudo:
If S3 has file 9919USEN_File_20180216.csv and 9919USEN_File_20180217.csv then pick 9919USEN_File_20180217.csv to download. Also IF 991USEN_File_2018.csv in S3 then don't pick file as it doesn't match rule, fileidreg = '[0-9]{4}[a-zA-Z]{4}' and dateIdReg = '[0-9]{8}'.
Rule: Visual:
9919USEN_File_20180217.csv > 9919USEN_File_20180216.csv [due to date]
9919USEN_File_20180217.csv > 991USEN_File_2018.csv [Due to Incorrect ID and Date]
Solution
The issue was the way it was structured. I've reorganized and put it in side a try, exception conditional loop. I've also used FileIDPrefix.search instead of FileIDPrefix.match since it was only looking specifically looking at the index and wasn't proper for the question in hand.
final solution.
import boto3
import re
#connect to s3
client = boto3.resource(u's3', aws_access_key_id=u'KEY',
aws_secret_access_key=u'TOKEN')
#used for downloading
s3 = boto3.client(u's3', aws_access_key_id=u'KEY',
aws_secret_access_key=u'TOKEN')
def downloadFiletest():
date = '[0-9]{8}' # fileDate regex
dateSuffix = re.compile(dates) # regex used to check the date of the file
reg = '[0-9]{4}[a-zA-Z]{4}' # filename regex
fileIDPrefix = re.compile(reg) # check fileID of the Filename.
folder = u"/folder/example/" # directory
bucket = client.Bucket(bucketname) # bucket
try:
for cuList in bucket.objects.filter(Prefix=folder): # filter to the folder
filenames= cList.key # directory of the files that we would like to use
print(cu)
# specific locations of site fileID of the file and date of the file
fileID = filenames[33:41]
fileDate = filenames[51:59]
# check the length of each values to be verified later.
lenf = len(fileID)
lenG = len(fileDate)
old_file = cList.key
dot_index = old_file.find(u'.')
file_ext = old_file[dot_index:]
# this check that the files in directory match our specified rules. if does it proceeds.
if fileIDPrefix.search(cu) and fileDateSuffix.search(cu):
filename = fileID + u'_file_' + fileDate + file_ext
tmp_path = "/tmp/mpcmt/" + filename
file_path = folder + filename
s3.download_file(bucketname, file_path, tmp_path)
return filename, tmp_path, fileID, fileDate
# this check the number of values/char in a directory to see it matches up to what is expected.
if dot_index > 59 or dot_index < 59:
print('File has wrong fileID or Wrong Date')
if lenG > 8 or lenG < 8:
print('File has wrong fileDate Format')
if lenf > 8 or lenf < 8:
print('File has wrong fileID')
except Exception as e: # this closes and displays an error if the file doesn't exist.
print("ALERT", "No file in {0}/{1}".format(bucket, folder))
# There was some issue / error / problem and that is why the program is exiting.
print >> sys.stderr, "No file in {0}/{1}".format(bucket, folder)
print >> sys.stderr, "Exception: %s" % str(e)
sys.exit(1)
downloadFiletest()
I am trying to create a script that will move only new or updated files from the past 24 hours into a new folder. I created a script so far that will move files in general, any leads or suggestions would be greatly appreciated.
import os, shutil
source = os.listdir('C:\Users\Student\Desktop\FolderA')
destination = 'C:\Users\Student\Desktop\FolderB'
os.chdir('C:\Users\Student\Desktop\FolderA')
for files in os.listdir("C:\Users\Student\Desktop\FolderA"):
if files.endswith(".txt"):
src = os.path.join("C:\Users\Student\Desktop\FolderA",files)
dst = os.path.join(destination,files)
shutil.move(src,dst)
I believe I found a solution, let me know what you guys think.
# copy files from folder_a to folder_b
# if the files in folder_a have been modified within the past 24 hours
# copy them to folder_b
#
import shutil
import os
from os import path
import datetime
from datetime import date, time, timedelta
def file_has_changed(fname):
# print 'in file_has_changed with file : %s' % fname
# print str(path.getmtime(fname))
# get file modified time
file_m_time = datetime.datetime.fromtimestamp(path.getmtime(fname))
# print datetime.datetime.now()
# print file_m_time
#get the delta between today and filed mod time
td = datetime.datetime.now() - file_m_time
# print td
# print 'days : %d' % td.days
# file can be archived if mod within last 24 hours
if td.days == 0:
global ready_to_archive
ready_to_archive = ready_to_archive + 1
return True
else: return False
def main():
global ready_to_archive
global archived
ready_to_archive, archived = 0, 0
# src = "c:\users\gail\desktop\foldera"
# dst = "c:\users\gail\desktop\folderb"
for fname in os.listdir('c:\users\gail\Desktop\FolderA'):
src_fname = 'c:\users\gail\Desktop\FolderA\%s' % fname
if file_has_changed(src_fname):
dst_fname = 'c:\users\gail\Desktop\FolderB\%s' % fname
dst_folder = 'c:\users\gail\Desktop\FolderB'
try:
shutil.copy2(src_fname, dst_folder)
global archived;
archived = archived + 1
# print 'Copying file : %s ' % (src_fname)
# print ' To loc : %s ' % (dst_fname)
except IOError as e:
print 'could not open the file: %s ' % e
if __name__ == "__main__":
main()
print '****** Archive Report for %s ******' % datetime.datetime.now()
print '%d files ready for archiving ' % ready_to_archive
print '%d files archived' % archived
print '****** End of Archive Report ******'
I wrote a Python script that collects file metadata (filename, creation date, creation time, last modified data, last modified time) from a file directory. However, when the directory is a path that is located in an external hard drive the script doesn't work. I can't figure out why.
Here is the code:
import os
from os.path import basename
import datetime
import time
def getSize(filename):
st = os.stat(filename)
print st
return st.st_size
#get last modified date
def getMTime(filename):
fileModTime = os.path.getmtime(filename)
return fileModTime
#get creation date
def getCTime(filename):
fileModTime = os.path.getctime(filename)
return fileModTime
#get data from directory
MyDirectory = "H:\0_tempfiles\150115_Portfolio\Work\Work\BarBackUp"
MyExtension = ".jpg"
#write to file
WorkingDirectory = "C:\\Users\Admin\Downloads\demo\\"
MyTxtFile = WorkingDirectory + "fileData6.txt"
delim = ";"
with open(MyTxtFile, 'wb') as f:
f.write(delim.join(["FILENAME", "FILESIZE", "mDATE","mTIME",
"cDATE","cTIME"]) + "\n")
for root, dirs, files in os.walk(MyDirectory):
for file in files:
if file.endswith(MyExtension):
#get File Name
a = (os.path.join(root, file))
#print a
filename = a
MyFileName = basename(a)
#get File Size
MyFileSize = getSize(filename) / 1000
print MyFileName + " >>> file size: " + str(MyFileSize) + "Kb"
#get modification time V2
modTimeV2 = getMTime(filename)
modTimeV2 = time.strftime("%Y/%d/%m;%I:%M:%S %p", \
time.localtime(modTimeV2))
print "time modified: " + str(modTimeV2)
#get creation time
creTime = getCTime(filename)
creTime = time.strftime("%Y/%d/%m;%I:%M:%S %p", \
time.localtime(creTime))
print "time created: " + str(creTime)
#--------
#write data to file
entry = delim.join([str(MyFileName), str(MyFileSize), \
str(modTimeV2), str(creTime)]) + "\n"
f.write(entry)
print "<<<<<<everything went fine>>>>>>"
Your code works fine for me. Your "MyDirectory" variable has escape characters in it. Try adding an r in front of the quotations:
MyDirectory = r"H:\0_tempfiles\150115_Portfolio\Work\Work\BarBackUp"
or
MyDirectory = "H:/0_tempfiles/150115_Portfolio/Work/Work/BarBackUp"
or
MyDirectory = "H:\\0_tempfiles\\150115_Portfolio\\Work\\Work\\BarBackUp"