I would like to create a python script that
appends the file created date to the end of the filename while retaining the oringinal file name (Report) for a batch of pdf documents.
directory = T:\WISAARD_Web Portal Projects\PortalLogging\WebLogExpert
filenames = Report.pdf
import os,time
root="/home"
path=os.path.join(root,"dir1")
os.chdir(path)
for files in os.listdir("."):
if files.endswith(".pdf"):
f,ext = os.path.splitext(files)
d=time.ctime(os.path.getmtime(files)).split() #here is just example. you can use strftime, strptime etc to format your date as desired
filedate = d[-1]+"-"+d[-2]+"-"+d[-3]
newname = f+filedate+ext
try:
os.rename(files,newname)
except Exception,e:
print e
else:
print "ok: renamed %s to %s " %(files,newname)
Related
I am making code which generates a new text file with today's date each time it is run. For exemple today's file name would be 2020-10-05. I would like to increment it so that if the program is run one or more times the same day it becomes 2020-10-05_1, _2 etc..
I have this code that I found from another question and i've tried tinkering with it but I'm still stuck. The problem is here they convert the file name to an int 1,2,3 and this way it works but this isn't the result I want.
def incrementfile():
todayday = datetime.datetime.today().date()
output_folder = "//10.2.30.61/c$/Qlikview_Tropal/Raport/"
highest_num = 0
for f in os.listdir(output_folder):
if os.path.isfile(os.path.join(output_folder, f)):
file_name = os.path.splitext(f)[0]
try:
file_num = int(file_name)
if file_num > highest_num:
highest_num = file_num
except ValueError:
print("The file name %s is not an integer. Skipping" % file_name)
output_file = os.path.join(output_folder, str(highest_num + 1) + f"{todayday}" + ".txt")
return output_file
How can I modify this code so that the output I get in the end is something like 2020-10-05_0, _1, _2 etc.. ?
Thanks !
I strongly recommend you to use pathlib instead of os.path.join. This is more convenient.
def incrementfile():
td = datetime.datetime.today().date()
path = pathlib.Path("/tmp") #set your output folder isntead of /tmp
inc = len(list(path.glob(f"{td}*")))+1
outfile = path/f"{td}_{inc}.txt"
return outfile
Not a direct answer to your question, but instead of using _1, _2 etc, you could use a full timestamp with date and current time, which would avoid duplication, EG:
from datetime import datetime
t = str(datetime.now()).replace(":", "-").replace(" ", "_")
print(t)
Example output:
2020-10-05_13-06-53.825870
I think this will work-
import os
import datetime
#assuming files will be .txt format
def incrementfile():
output_folder = "//10.2.30.61/c$/Qlikview_Tropal/Raport/"
files=os.listdir(output_folder)
current_name=datetime.date.today().strftime('%Y-%m-%d_0')
current_num=1
def nameChecker(name,files):
return True if name +'.txt' in files else False
while namChecker(current_name,files):
current_name+='_'+str(current_num)
current_num+=1
return current_name+'.txt'
I have a file directory structure like this:
/folder
aaa.pdf
bbb.xml
stamped.pdf
Where PDF and XML file names have no patterns to them, except that every folder has stamped.pdf in it (stamped.pdf needs to be ignored).
I want to rename the .xml file in the directory to match the .pdf file name, so I end up with:
/folder
aaa.pdf
aaa.xml
stamped.pdf
Python so far (not renaming anything yet, just trying to get the filenames at this point)
import os
pdf = ('.pdf')
xml = ('.xml')
stamped = ('stamped.pdf')
for folderName, subfolders, filenames in os.walk('folder'):
print('The current folder is ' + folderName)
for filename in filenames:
namefile = os.path.splitext(filename)[0]
if (filename.endswith(pdf) and filename != stamped):
pdfname = namefile
print('PDF File Name: ' + pdfname)
if filename.endswith(xml):
print('RENAME XML FILE NAME: ' + namefile + 'TO: ' pdfname)
else:
print('')
print('')
Right now I'm just printing values before I get into the renaming.
In the script above, pdfname is undefined in the XML conditional, because the pdfname variable isn't set/available in the XML conditional.
How can I pass the pdfname variable so that it can be used to rename the XML file in the same directory?
Thanks!
import os
for parent, _, files in os.walk('.'):
if not files:
continue
pdf_file = None
xml_file = None
for filename in files:
if filename.lower().endswith('.pdf') and filename.lower() != 'stamped.pdf':
pdf_file = filename
elif filename.lower().endswith('.xml'):
xml_file = filename
new_xml_filename = '{}/{}.xml'.format(parent, os.path.splitext(pdf_file)[0])
xml_file = '{}/{}'.format(parent, xml_file)
if os.path.exists(new_xml_filename):
print('cannot rename %s without overwriting an existing file. skipping' % xml_file)
continue
else:
os.rename(xml_file, new_xml_filename)
print('renamed {} -> {}'.format(xml_file, new_xml_filename))
I'm trying to Pull the file from s3 based on id and date of the filename:
Naming Convention:
The naming convention are as follows:
**
ID_NAME_DATE.csv : filename follow that same pattern
example : 9919USEN_File_20180216.csv
example : 9919GBEN_File_20180211.csv
**
Code:
import boto3
import re
def downloadFiletest():
#connect to s3
client = boto3.resource(u's3', aws_access_key_id=u'KEY',
aws_secret_access_key=u'TOKEN')
#used for downloading
s3 = boto3.client(u's3', aws_access_key_id=u'KEY',
aws_secret_access_key=u'TOKEN')
dateIdReg = '[0-9]{8}'
dateSuffix = re.compile(date)
print (u"= S3 Client Connected =")
# configure s3 bucket
bucket = client.Bucket(u'us-eu-Bucket')
b_folder = "/folder/example/"
c_folder = b_folder.lower() + '/'
files_not_found = True
for cList in bucket.objects.filter(Prefix=b_folder):
cFiles= cList.key
print ('file : ', cFiles)
for fileId in cFiles.lower():
files_not_found = False
f = fileId.rstrip()
print(f)
fileidreg= '[0-9]{4}[a-zA-Z]{4}'
FileID = re.compile(fileidreg)
if FileID.match(f) and dateSuffix.match(f):
print(u'cList.key.lower(): ', cList.key.lower())
old_file = cList.key
dot_index = old_file.find(u'.')
print (u'old dot file name: ', dot_index)
file_ext = old_file[dot_index:]
cfile = fileId + '_file_' + dateSuffix + file_ext
tmp_path = "/tmp/folder/" + cfile
b_path = cVal + cfile
print (u'b path : ', b_path)
s3.download_file("us-eu-Bucket", b_path, tmp_path)
print ("TEMP PATH: ", tmp_path)
if files_not_found:
print("ALERT", "No file in {0}/{1}".format(bucket, b_folder))
downloadFiletest()
Error:
It Skips over for fileId in cFiles.lower(): and closes the script.
Goal:
Pull file from S3 and Download it to tmp_path to be used as desired.
When pulling file i'd like the script to pick file based on ID and Date. For instance:
Rule: Pseudo:
If S3 has file 9919USEN_File_20180216.csv and 9919USEN_File_20180217.csv then pick 9919USEN_File_20180217.csv to download. Also IF 991USEN_File_2018.csv in S3 then don't pick file as it doesn't match rule, fileidreg = '[0-9]{4}[a-zA-Z]{4}' and dateIdReg = '[0-9]{8}'.
Rule: Visual:
9919USEN_File_20180217.csv > 9919USEN_File_20180216.csv [due to date]
9919USEN_File_20180217.csv > 991USEN_File_2018.csv [Due to Incorrect ID and Date]
Solution
The issue was the way it was structured. I've reorganized and put it in side a try, exception conditional loop. I've also used FileIDPrefix.search instead of FileIDPrefix.match since it was only looking specifically looking at the index and wasn't proper for the question in hand.
final solution.
import boto3
import re
#connect to s3
client = boto3.resource(u's3', aws_access_key_id=u'KEY',
aws_secret_access_key=u'TOKEN')
#used for downloading
s3 = boto3.client(u's3', aws_access_key_id=u'KEY',
aws_secret_access_key=u'TOKEN')
def downloadFiletest():
date = '[0-9]{8}' # fileDate regex
dateSuffix = re.compile(dates) # regex used to check the date of the file
reg = '[0-9]{4}[a-zA-Z]{4}' # filename regex
fileIDPrefix = re.compile(reg) # check fileID of the Filename.
folder = u"/folder/example/" # directory
bucket = client.Bucket(bucketname) # bucket
try:
for cuList in bucket.objects.filter(Prefix=folder): # filter to the folder
filenames= cList.key # directory of the files that we would like to use
print(cu)
# specific locations of site fileID of the file and date of the file
fileID = filenames[33:41]
fileDate = filenames[51:59]
# check the length of each values to be verified later.
lenf = len(fileID)
lenG = len(fileDate)
old_file = cList.key
dot_index = old_file.find(u'.')
file_ext = old_file[dot_index:]
# this check that the files in directory match our specified rules. if does it proceeds.
if fileIDPrefix.search(cu) and fileDateSuffix.search(cu):
filename = fileID + u'_file_' + fileDate + file_ext
tmp_path = "/tmp/mpcmt/" + filename
file_path = folder + filename
s3.download_file(bucketname, file_path, tmp_path)
return filename, tmp_path, fileID, fileDate
# this check the number of values/char in a directory to see it matches up to what is expected.
if dot_index > 59 or dot_index < 59:
print('File has wrong fileID or Wrong Date')
if lenG > 8 or lenG < 8:
print('File has wrong fileDate Format')
if lenf > 8 or lenf < 8:
print('File has wrong fileID')
except Exception as e: # this closes and displays an error if the file doesn't exist.
print("ALERT", "No file in {0}/{1}".format(bucket, folder))
# There was some issue / error / problem and that is why the program is exiting.
print >> sys.stderr, "No file in {0}/{1}".format(bucket, folder)
print >> sys.stderr, "Exception: %s" % str(e)
sys.exit(1)
downloadFiletest()
I have the following issue, I wrote a piece of code which rename file names in a directory and its sub-directories. Now instead of changing it to the current date I want it to change to the file modification date.
How can I do that?
import os, path
from datetime import datetime
import time
def walk_dir(path):
current_day = datetime.now().strftime("%Y-%m-%d")
for root, dirs, files in os.walk(path):
for filename in files:
current = root + "/" + filename
if os.path.isfile(current):
print "ORIGINAL NAME: " + current
ext = os.path.splitext(filename)[1]
target_name = os.path.join(root, '{}{}'.format(current_day, ext))
print "NEW NAME: " + target_name
os.rename(current, target_name)
walk_dir("/Users/shirin/Desktop/Artez")
import os
import datetime
for filename in directory:
modified_time = os.path.getmtime(filename) # get file modification timestamp
modified_date = datetime.date.fromtimestamp(modified_time) # convert timestamp to a date
os.rename(filename, modified_date.strftime("%Y-%m-%d"))
Note this is dangerous, since you might override files who share the same modification date.
I have a working script that will print all files in a given directory. I would like help making it do two additional things:
(1) Also be able to print the date_created or time stamp for each file.
(2) Do all of the above not only for files in the given directory, but in all subdirectories as well.
Here is the working script:
from os import listdir
from os.path import isfile, join
from sys import argv
script, filename = argv
mypath = os.getcwd()
allfiles = [ f for f in listdir(mypath) if isfile(join(mypath,f)) ]
output = open(filename, 'w')
for i in allfiles:
string = "%s" %i
output.write(string + "\n")
output.close()
print "Directory printed."
I would hope to be able to print something like (filename + ", " + timestamp + "\n"), or some substitute.
Thanks!
http://docs.python.org/2/library/os.html and http://docs.python.org/2/library/stat.html have you covered.
os.walk will give you the recursive directory walking
stat will give you file timestamps (atime,ctime,mtime)
This snippet walks through files in a directory + subdirectories and prints out created and modified timestamps.
import os
import time
def walk_files(directory_path):
# Walk through files in directory_path, including subdirectories
for root, _, filenames in os.walk(directory_path):
for filename in filenames:
file_path = root + '/' + filename
created = os.path.getctime(file_path)
modified = os.path.getmtime(file_path)
# Process stuff for the file here, for example...
print "File: %s" % file_path
print " Created: %s" % time.ctime(created)
print " Last modified: %s" % time.ctime(modified)
walk_files('/path/to/directory/')