Using Python to change the date on an xml document - python

I have an xml in this form
<project name="Hello World">
<testfile name="testfile1">
<type>TXT</type>
<size>1000</size>
<lastModified>2014-08-03 03:40:00</lastModified>
</testfile>
<testfile name="testfile2">
<type>PDF</type>
<size>500</size>
<lastModified>2015-09-23 17:40:17</lastModified>
</testfile>
</project>
This is an .xml file containing info about my project, so I can update my testfiles should they are more than 3 months old.
Right now, i'm stuck trying to figure out how to change the element in the .xml file. This is my code so far...
import xml.etree.ElementTree as ET
import sys
from datetime import datetime
def updateLastModified(self):
today = datetime.now().strftime('%Y-%m-%d %H:%M:%S') # This variable stores todays date and time stamp for future reference. We shouldn't compute a new one every time.
today = datetime.strptime(today, '%Y-%m-%d %H:%M:%S')
# Now we need to iterate through all the testfiles in our metadata and update their lastModified tag with the current date.
for testfile in self.getMetadataRoot().findall('testfile'):
lastmodified = testfile.find('lastModified') # get the lastmodified field in it's whole, so we can modify it.
previous_update = datetime.strptime(lastmodified.text, '%Y-%m-%d %H:%M:%S') # get the previous date from the lastmodified field and translate it from the str format
if previous_update < today:
lastmodified.text = str(today.strftime('%Y-%m-%d %H:%M:%S'))
self.getMetadataTree().write(self.meta_file)
But for some reason, the meta_file is not changing... What am I doing wrong???
The problem is after the if statement, where the file is not being modified
here are the other methods i'm using in this class:
def __init__(self, filepath):
self.meta_file = filepath
def getMetadataTree(self):
return ET.parse(self.meta_file)
def getMetadataRoot(self):
tree = self.getMetadataTree()
root = tree.getroot()
return root

Your definition of self.getMetadataTree() re-parse the input file every time it gets called. So in the line self.getMetadataTree().write(self.meta_file), it parse the meta file and write it back (the same info). All previous modification to timestamp is not relevant (it's a different instance of ElementTree).
I guess you want to do something like this:
import xml.etree.ElementTree as ET
import sys
from datetime import datetime
class TimestampUpdater(object):
def __init__(self, filepath):
self.meta_file = filepath
self.tree = ET.parse(self.meta_file)
def getMetadataTree(self):
return self.tree
def getMetadataRoot(self):
return self.tree.getroot()
def updateLastModified(self):
today = datetime.now()
for testfile in self.getMetadataRoot().findall('testfile'):
lastmodified = testfile.find('lastModified')
previous_update = datetime.strptime(lastmodified.text, '%Y-%m-%d %H:%M:%S')
if previous_update < today:
lastmodified.text = today.strftime('%Y-%m-%d %H:%M:%S')
self.getMetadataTree().write(self.meta_file)
def print_file_content(filename):
"""Print contents of a file."""
with open(filename, 'r') as fh:
for line in fh:
print line.rstrip()
if __name__ == '__main__':
metafile = 'test.xml'
print "\n====Before updating:===="
print_file_content(metafile)
updater = TimestampUpdater(metafile)
updater.updateLastModified()
print "\n====After updating:===="
print_file_content(metafile)
Output:
====Before updating:====
<project name="Hello World">
<testfile name="testfile1">
<type>TXT</type>
<size>1000</size>
<lastModified>2016-08-07 16:58:23</lastModified>
</testfile>
<testfile name="testfile2">
<type>PDF</type>
<size>500</size>
<lastModified>2016-08-07 16:58:23</lastModified>
</testfile>
</project>
====After updating:====
<project name="Hello World">
<testfile name="testfile1">
<type>TXT</type>
<size>1000</size>
<lastModified>2016-08-07 16:58:36</lastModified>
</testfile>
<testfile name="testfile2">
<type>PDF</type>
<size>500</size>
<lastModified>2016-08-07 16:58:36</lastModified>
</testfile>
</project>

Related

Python 3.8 datetime date comparison not work between "internal generated date" and imported date

I'm trying to compare the actual date with externally generated date, always generated from datetime but in another script and saved in a txt file.
This is the code:
import datetime
datin = datetime.datetime.today()
with open('date.txt', 'r') as mydate:
mdate = mydate.read()
datex = datetime.datetime.strptime(mdate, '%d-%m-%Y')
if datin.date == datex.date:
print('=')
else:
print('!=')
print(datin.strftime('%d-%m-%Y'))
print(datex.strftime('%d-%m-%Y'))
this is the txt file:
03-07-2020
(the same date I'm testing the script)
should return = but return !=
What am I doing wrong?
You have a slight error in that you are accessing the method of the date objects instead of calling the method.
You can find this out by trying to print
datin.date versus datin.date()
Here is the corrected code that runs as expected:
import datetime
datin = datetime.datetime.today()
mdate = '03-07-2020'
datex = datetime.datetime.strptime(mdate,"%d-%m-%Y")
print(datin.date())
print(datex.date())
if datin.date() == datex.date():
print("=")
else:
print("!=")
print (datin.strftime("%d-%m-%Y"))
print(datex.strftime("%d-%m-%Y"))

Save a file name as "date - backup"

I am currently exporting a table from by Bigquery to G.C.S as another form of a backup. This is the code I have so far that saves the file name as "firebase_connectioninfo.csv".
# Export table to GCS as a CSV
data = 'dataworks-356fa'
destination = 'gs://firebase_results/firebase_backups1/Firebase_ConnectionInfo.csv'
def export_data_to_gcs(data, Firebase_ConnectionInfo, destination):
bigquery_client = bigquery.Client(data)
dataset = bigquery_client.dataset('FirebaseArchive')
table = dataset.table('Firebase_ConnectionInfo')
job_name = str(uuid.uuid4())
job = bigquery_client.extract_table_to_storage(
job_name, table, 'gs://firebase_results/firebase_backups1/Firebase_ConnectionInfo.csv')
job.source_format = 'CSV'
job.begin()
wait_for_job(job)
def wait_for_job(job):
while True:
job.reload()
if job.state == 'DONE':
if job.error_result:
raise RuntimeError(job.errors)
return
time.sleep(1)
export_data_to_gcs(data, 'Firebase_ConnectionInfo', destination)
I want this file to be named as "thedate_firebase_connectioninfo_backup". How do I add this command in a Python script?
So this is your string:
gs://firebase_results/firebase_backups1/Firebase_ConnectionInfo.csv'
What I would suggest is putting it into its own variable:
filename = 'gs://firebase_results/firebase_backups1/Firebase_ConnectionInfo.csv'
Additionally, we should put in a spot for the date. We can handle formatting the string a couple different ways, but this is my preferred method:
filename = 'gs://firebase_results/firebase_backups1/{date}-Firebase_ConnectionInfo.csv'
We can then call format() on the filename with the date like this:
from datetime import datetime
date = datetime.now().strftime("%M-%D-%Y")
filename.format(date=date)
Another way we could format the string would be the old string formatting style with %. I hate this method, but some people like it. I think it may be faster.
date = datetime.now().strftime("%M-%D-%Y")
filename = 'gs://firebase_results/firebase_backups1/%s-Firebase_ConnectionInfo.csv' % date
Or, you could use the other guy's answer and just add the strings like
"This " + "is " + "a " + "string."
outputs: "This is a string."
Try something like this:
import datetime
datestr = datetime.date.today().strftime("%B-%d-%Y")
destination = 'gs://firebase_results/firebase_backups1/' + datestr + '_Firebase_ConnectionInfo.csv'

Incorrect Dates when Writing to XML with Python

I am new to Python and this is driving me crazy. My XML File isn't being updated. I am trying to find the day difference between today and the youngest date in an XML. Then I want to update all other dates by that amount of days. My console shows the dates are being updated, but the XML file is not being changed. I believe the error occurs in this function.
# Open File to be modified
tree = ET.parse('MAV_Case1.xml')
root = tree
datesArray = []
# Parser to convert date from ISOFormat to Date Object
# This allows us to manipulate the date range.
def getDateTimeFromISO8601String(i):
d = dateutil.parser.parse(i)
return d
#This gathers all the transDates in the XMl
def oldDate(xmlFile):
transactions = tree.iter('transaction')
for transaction in transactions:
transDate = transaction.find('transDate').text
#print(transDate)
transactionDate = getDateTimeFromISO8601String(transDate)
#print(transactionDate)
datesArray.append(transactionDate)
#print(datesArray)
newArray = datesArray
#print(newArray)
#dateArray = list(newArray)
#print(dateArray)
return newArray
#This Function converts the old dates into new ones
def newDate(newArray):
newDateArray = []
for date in newArray:
#print(date)
youngest_date = max(newArray)
#print(youngest_date)
todayDate = datetime.now()
dateDiff = abs((todayDate - youngest_date).days)
#print(dateDiff)
newDate = date + dateutil.relativedelta.relativedelta(days=dateDiff)
#print(newDate)
date = str(newDate.isoformat())
newDateArray.append(date)
#print(newDateArray)
return newDateArray
#Function Carries Updated Dates
def updateXML(newDateArray):
for transDate in root.iter('transDate'):
#print(newDate.text)
updatedDate = transDate.text
for date in newDateArray:
updatedDate = date
transDate.text = updatedDate
return transDate
updateXML(newDate(oldDate(tree)))
#Writing Back to File
now = datetime.now()
actual_time = str(now.strftime("%Y-%m-%d-%H-%M-%S"))
tree.write("Dag Account - " + str(actual_time) + ".xml", xml_declaration=True)

Proper way to use python's datetime's strftime method

I am using FUSE (a virtual file system) to try and implement a read call that will give me the current date/time as a string.
import os
import sys
import errno
import datetime
from fuse import FUSE, FuseOSError, Operations
class FileSys(Operations):
def __init__(self, root):
self.root = root
def _full_path(self, partial):
if partial.startswith("/"):
partial = partial[1:]
path = os.path.join(self.root, partial)
return path
# allows us to set attributes
def getattr(self, path, fh= None):
full_path = self._full_path(path)
st = os.lstat(full_path)
return dict((key, getattr(st, key)) for key in ('st_atime', 'st_ctime',
'st_gid', 'st_mode', 'st_mtime', 'st_nlink', 'st_size', 'st_uid'))
# allows us to see files
def readdir(self, path, fh):
#logging.info("Enter readdir")
full_path = self._full_path(path)
dirents = ['.', '..']
if(os.path.isdir(full_path)):
dirents.extend(os.listdir(full_path))
for r in dirents:
yield r
def read(self, path, length, offset, fh= None):
date = datetime.datetime.today()
date = date.strftime("%a %b %d %H:%M:%S %Z %Y")
return date
def main(root, mountpoint):
FUSE(FileSys(root), mountpoint, foreground= True)
if __name__ == '__main__':
main('/home/user/mydir', '/mnt/dummy')
However, my output is printing like this
Tue May 2
When I really want something like this
Tue May 27 14:43:06 CDT 2014
So, only getting up to the first digit of the day. Anyone see what I am doing wrong? I looked at the strftime documentation and I am sure all of my letters are corresponding to the correct pieces of the formatted string.

Cherrypy and Parsing XML Data from multiple files

So this is sort of a piggy-back post of another question I had. I've successfully pulled data from multiple xml files and am able to get the data to display within the terminal using the print function, but when I try to use the return function to show the data in the browser, I only get the data from the first file. Any ideas on why I only get data from the first file rather than all of them? Thanks!
from xml.dom.minidom import parse, parseString
import os, glob, re
import cherrypy
class Root(object):
def index(self):
path = 'C:\Vestigo\XML'
TOTALXML = len(glob.glob(os.path.join(path, '*.xml')))
print TOTALXML
i = 0
for XMLFile in glob.glob(os.path.join(path, '*.xml')):
xmldoc = parse(XMLFile)
order_number = xmldoc.getElementsByTagName('Extrinsic')[0].firstChild.data
order_name = xmldoc.getElementsByTagName('DeliverTo')[0].firstChild.data
street1 = xmldoc.getElementsByTagName('Street1')[0].firstChild.data
state = xmldoc.getElementsByTagName('State')[0].firstChild.data
zip_code = xmldoc.getElementsByTagName('PostalCode')[0].firstChild.data
OUTPUTi = order_number+' '+order_name+' '+street1+' '+state+' '+zip_code
i += 1
print OUTPUTi
return (OUTPUTi, """<br><br>Quit""")
index.exposed = True
def exit(self):
raise SystemExit(0)
exit.exposed = True
def start():
import webbrowser
cherrypy.tree.mount(Root(), '/')
cherrypy.engine.start_with_callback(
webbrowser.open,
('http://localhost:8080/',),
)
cherrypy.engine.block()
if __name__=='__main__':
start()
You are not collecting the data anywhere; you store everything in a variable named OUTPUTi, then only return the last iteration of that variable. Python does not magically make that variable use the i counter.
Use a list to collect the strings:
TOTALXML = len(glob.glob(os.path.join(path, '*.xml')))
print TOTALXML
OUTPUT = []
for XMLFile in glob.glob(os.path.join(path, '*.xml')):
xmldoc = parse(XMLFile)
order_number = xmldoc.getElementsByTagName('Extrinsic')[0].firstChild.data
order_name = xmldoc.getElementsByTagName('DeliverTo')[0].firstChild.data
street1 = xmldoc.getElementsByTagName('Street1')[0].firstChild.data
state = xmldoc.getElementsByTagName('State')[0].firstChild.data
zip_code = xmldoc.getElementsByTagName('PostalCode')[0].firstChild.data
OUTPUT.append(order_number+' '+order_name+' '+street1+' '+state+' '+zip_code)
print OUTPUT[-1]
OUTPUT = ''.join(OUTPUT)
return (OUTPUT, """<br><br>Quit""")

Categories

Resources