I am using FUSE (a virtual file system) to try and implement a read call that will give me the current date/time as a string.
import os
import sys
import errno
import datetime
from fuse import FUSE, FuseOSError, Operations
class FileSys(Operations):
def __init__(self, root):
self.root = root
def _full_path(self, partial):
if partial.startswith("/"):
partial = partial[1:]
path = os.path.join(self.root, partial)
return path
# allows us to set attributes
def getattr(self, path, fh= None):
full_path = self._full_path(path)
st = os.lstat(full_path)
return dict((key, getattr(st, key)) for key in ('st_atime', 'st_ctime',
'st_gid', 'st_mode', 'st_mtime', 'st_nlink', 'st_size', 'st_uid'))
# allows us to see files
def readdir(self, path, fh):
#logging.info("Enter readdir")
full_path = self._full_path(path)
dirents = ['.', '..']
if(os.path.isdir(full_path)):
dirents.extend(os.listdir(full_path))
for r in dirents:
yield r
def read(self, path, length, offset, fh= None):
date = datetime.datetime.today()
date = date.strftime("%a %b %d %H:%M:%S %Z %Y")
return date
def main(root, mountpoint):
FUSE(FileSys(root), mountpoint, foreground= True)
if __name__ == '__main__':
main('/home/user/mydir', '/mnt/dummy')
However, my output is printing like this
Tue May 2
When I really want something like this
Tue May 27 14:43:06 CDT 2014
So, only getting up to the first digit of the day. Anyone see what I am doing wrong? I looked at the strftime documentation and I am sure all of my letters are corresponding to the correct pieces of the formatted string.
Related
I want to call a Python module from the command line to convert a time in my timezone to UTC time like this:
$ dt-la-utc.py "2017-10-14 12:10:00"
When I execute the module shown below, the convert_la_utc function works correctly if I hard-code the date and time. However, I want to feed it the date and time as input on the command line. But the parse_args function isn't working. If I run the Python debugger and examine the "args" variable, there's nothing in it. What am I doing wrong?
#!/usr/bin/env python
import argparse
import datetime
from pdb import set_trace as debug
import pytz
import sys
def parse_args():
"""Parse arguments."""
parser = argparse.ArgumentParser(description="Convert LA time to UTC time.")
parser.add_argument("dt", help="LA date and time in format: YYYY-MM-DD HH:MM:SS")
args = parser.parse_args()
debug()
return args
def convert_la_utc():
"""Convert time in Los Angeles to UTC time."""
date = '2017-10-12'
time = '20:45:00'
date_time = date + ' ' + time
datetime_format = '%Y-%m-%d %H:%M:%S'
local = pytz.timezone("America/Los_Angeles")
naive = datetime.datetime.strptime(date_time, datetime_format)
local_dt = local.localize(naive, is_dst=None)
utc_dt = local_dt.astimezone(pytz.utc)
print "Datetime in Los Angeles: {0}".format(date_time)
print "UTC equivalent datetime: {0}".format(utc_dt.strftime("%Y-%m-%d %H:%M:%S"))
def main():
args = parse_args()
convert_la_utc()
if __name__ == '__main__':
sys.exit(main())
You need to further retrieve your argument, for example:
def main():
args = parse_args()
dt = args.dt
What parser.parse_args() returns is an argparse.Namespace object - you can verify it by adding print type(args) in your def main(). More explanation can be found here.
I'm writing a program which periodically dumps old data from a RethinkDB database into a file and removes it from the database. Currently, the data is dumped into a single file which grows without limit. I'd like to change this so that the maximum file size is, say, 250 Mb, and the program starts to write to a new output file just before this size is exceeded.
It seems like Python's RotatingFileHandler class for loggers does approximately what I want; however, I'm not sure whether logging can be applied to any JSON-dumpable object or just to strings.
Another possible approach would be to use (a variant of) Mike Pennington's
RotatingFile class (see python: outfile to another text file if exceed certain file size).
Which of these approaches is likely to be the most fruitful?
For reference, my current program is as follows:
import os
import sys
import json
import rethinkdb as r
import pytz
from datetime import datetime, timedelta
import schedule
import time
import functools
from iclib import RethinkDB
import msgpack
''' The purpose of the Controller is to periodically archive data from the "sensor_data" table so that it does not grow without limit.'''
class Controller(RethinkDB):
def __init__(self, db_address=(os.environ['DB_ADDR'], int(os.environ['DB_PORT'])), db_name=os.environ['DB_NAME']):
super(Controller, self).__init__(db_address=db_address, db_name=db_name) # Initialize the IperCronComponent with the default logger name (in this case, "Controller")
self.db_table = RethinkDB.SENSOR_DATA_TABLE # The table name is "sensor_data" and is stored as a class variable in RethinkDBMixIn
def generate_archiving_query(self, retention_period=timedelta(days=3)):
expiry_time = r.now() - retention_period.total_seconds() # Timestamp before which data is to be archived
if "timestamp" in r.table(self.db_table).index_list().run(self.db): # If "timestamp" is a secondary index
beginning_of_time = r.time(1400, 1, 1, 'Z') # The minimum time of a ReQL time object (i.e., the year 1400 in the UTC timezone)
data_to_archive = r.table(self.db_table).between(beginning_of_time, expiry_time, index="timestamp") # Generate query using "between" (faster)
else:
data_to_archive = r.table(self.db_table).filter(r.row['timestamp'] < expiry_time) # Generate the same query using "filter" (slower, but does not require "timestamp" to be a secondary index)
return data_to_archive
def archiving_job(self, data_to_archive=None, output_file="archived_sensor_data.json"):
if data_to_archive is None:
data_to_archive = self.generate_archiving_query() # By default, the call the "generate_archiving_query" function to generate the query
old_data = data_to_archive.run(self.db, time_format="raw") # Without time_format="raw" the output does not dump to JSON
with open(output_file, 'a') as f:
ids_to_delete = []
for item in old_data:
print item
# msgpack.dump(item, f)
json.dump(item, f)
f.write('\n') # Separate each document by a new line
ids_to_delete.append(item['id'])
r.table(self.db_table).get_all(r.args(ids_to_delete)).delete().run(self.db) # Delete based on ID. It is preferred to delete the entire batch in a single operation rather than to delete them one by one in the for loop.
def test_job_1():
db_name = "ipercron"
table_name = "sensor_data"
port_offset = 1 # To avoid interference of this testing program with the main program, all ports are initialized at an offset of 1 from the default ports using "rethinkdb --port_offset 1" at the command line.
conn = r.connect("localhost", 28015 + port_offset)
r.db(db_name).table(table_name).delete().run(conn)
import rethinkdb_add_data
controller = Controller(db_address=("localhost", 28015+port_offset))
archiving_job = functools.partial(controller.archiving_job, data_to_archive=controller.generate_archiving_query())
return archiving_job
if __name__ == "__main__":
archiving_job = test_job_1()
schedule.every(0.1).minutes.do(archiving_job)
while True:
schedule.run_pending()
It is not completely 'runnable' from the part shown, but the key point is that I would like to replace the line
json.dump(item, f)
with a similar line in which f is a rotating, and not fixed, file object.
Following Stanislav Ivanov, I used json.dumps to convert each RethinkDB document to a string and wrote this to a RotatingFileHandler:
import os
import sys
import json
import rethinkdb as r
import pytz
from datetime import datetime, timedelta
import schedule
import time
import functools
from iclib import RethinkDB
import msgpack
import logging
from logging.handlers import RotatingFileHandler
from random_data_generator import RandomDataGenerator
''' The purpose of the Controller is to periodically archive data from the "sensor_data" table so that it does not grow without limit.'''
os.environ['DB_ADDR'] = 'localhost'
os.environ['DB_PORT'] = '28015'
os.environ['DB_NAME'] = 'ipercron'
class Controller(RethinkDB):
def __init__(self, db_address=None, db_name=None):
if db_address is None:
db_address = (os.environ['DB_ADDR'], int(os.environ['DB_PORT'])) # The default host ("rethinkdb") and port (28015) are stored as environment variables
if db_name is None:
db_name = os.environ['DB_NAME'] # The default database is "ipercron" and is stored as an environment variable
super(Controller, self).__init__(db_address=db_address, db_name=db_name) # Initialize the instance of the RethinkDB class. IperCronComponent will be initialized with its default logger name (in this case, "Controller")
self.db_name = db_name
self.db_table = RethinkDB.SENSOR_DATA_TABLE # The table name is "sensor_data" and is stored as a class variable of RethinkDBMixIn
self.table = r.db(self.db_name).table(self.db_table)
self.archiving_logger = logging.getLogger("archiving_logger")
self.archiving_logger.setLevel(logging.DEBUG)
self.archiving_handler = RotatingFileHandler("archived_sensor_data.log", maxBytes=2000, backupCount=10)
self.archiving_logger.addHandler(self.archiving_handler)
def generate_archiving_query(self, retention_period=timedelta(days=3)):
expiry_time = r.now() - retention_period.total_seconds() # Timestamp before which data is to be archived
if "timestamp" in self.table.index_list().run(self.db):
beginning_of_time = r.time(1400, 1, 1, 'Z') # The minimum time of a ReQL time object (namely, the year 1400 in UTC)
data_to_archive = self.table.between(beginning_of_time, expiry_time, index="timestamp") # Generate query using "between" (faster, requires "timestamp" to be a secondary index)
else:
data_to_archive = self.table.filter(r.row['timestamp'] < expiry_time) # Generate query using "filter" (slower, but does not require "timestamp" to be a secondary index)
return data_to_archive
def archiving_job(self, data_to_archive=None):
if data_to_archive is None:
data_to_archive = self.generate_archiving_query() # By default, the call the "generate_archiving_query" function to generate the query
old_data = data_to_archive.run(self.db, time_format="raw") # Without time_format="raw" the output does not dump to JSON or msgpack
ids_to_delete = []
for item in old_data:
print item
self.dump(item)
ids_to_delete.append(item['id'])
self.table.get_all(r.args(ids_to_delete)).delete().run(self.db) # Delete based on ID. It is preferred to delete the entire batch in a single operation rather than to delete them one by one in the for-loop.
def dump(self, item, mode='json'):
if mode == 'json':
dump_string = json.dumps(item)
elif mode == 'msgpack':
dump_string = msgpack.packb(item)
self.archiving_logger.debug(dump_string)
def populate_database(db_name, table_name, conn):
if db_name not in r.db_list().run(conn):
r.db_create(db_name).run(conn) # Create the database if it does not yet exist
if table_name not in r.db(db_name).table_list().run(conn):
r.db(db_name).table_create(table_name).run(conn) # Create the table if it does not yet exist
r.db(db_name).table(table_name).delete().run(conn) # Empty the table to start with a clean slate
# Generate random data with timestamps uniformly distributed over the past 6 days
random_data_time_interval = timedelta(days=6)
start_random_data = datetime.utcnow().replace(tzinfo=pytz.utc) - random_data_time_interval
random_generator = RandomDataGenerator(seed=0)
packets = random_generator.packets(N=100, start=start_random_data)
# print packets
print "Adding data to the database..."
r.db(db_name).table(table_name).insert(packets).run(conn)
if __name__ == "__main__":
db_name = "ipercron"
table_name = "sensor_data"
port_offset = 1 # To avoid interference of this testing program with the main program, all ports are initialized at an offset of 1 from the default ports using "rethinkdb --port_offset 1" at the command line.
host = "localhost"
port = 28015 + port_offset
conn = r.connect(host, port) # RethinkDB connection object
populate_database(db_name, table_name, conn)
# import rethinkdb_add_data
controller = Controller(db_address=(host, port))
archiving_job = functools.partial(controller.archiving_job, data_to_archive=controller.generate_archiving_query()) # This ensures that the query is only generated once. (This is sufficient since r.now() is re-evaluated every time a connection is made).
schedule.every(0.1).minutes.do(archiving_job)
while True:
schedule.run_pending()
In this context the RethinkDB class does little other than define the class variable SENSOR_DATA_TABLE and the RethinkDB connection, self.db = r.connect(self.address[0], self.address[1]). This is run together with a module for generating fake data, random_data_generator.py:
import random
import faker
from datetime import datetime, timedelta
import pytz
import rethinkdb as r
class RandomDataGenerator(object):
def __init__(self, seed=None):
self._seed = seed
self._random = random.Random()
self._random.seed(seed)
self.fake = faker.Faker()
self.fake.random.seed(seed)
def __getattr__(self, x):
return getattr(self._random, x)
def name(self):
return self.fake.name()
def datetime(self, start=None, end=None):
if start is None:
start = datetime(2000, 1, 1, tzinfo=pytz.utc) # Jan 1st 2000
if end is None:
end = datetime.utcnow().replace(tzinfo=pytz.utc)
if isinstance(end, datetime):
dt = end - start
elif isinstance(end, timedelta):
dt = end
assert isinstance(dt, timedelta)
random_dt = timedelta(microseconds=self._random.randrange(int(dt.total_seconds() * (10 ** 6))))
return start + random_dt
def packets(self, N=1, start=None, end=None):
return [{'name': self.name(), 'timestamp': self.datetime(start=start, end=end)} for _ in range(N)]
When I run controller it produces several rolled-over output logs, each at most 2 kB in size, as expected:
I have an xml in this form
<project name="Hello World">
<testfile name="testfile1">
<type>TXT</type>
<size>1000</size>
<lastModified>2014-08-03 03:40:00</lastModified>
</testfile>
<testfile name="testfile2">
<type>PDF</type>
<size>500</size>
<lastModified>2015-09-23 17:40:17</lastModified>
</testfile>
</project>
This is an .xml file containing info about my project, so I can update my testfiles should they are more than 3 months old.
Right now, i'm stuck trying to figure out how to change the element in the .xml file. This is my code so far...
import xml.etree.ElementTree as ET
import sys
from datetime import datetime
def updateLastModified(self):
today = datetime.now().strftime('%Y-%m-%d %H:%M:%S') # This variable stores todays date and time stamp for future reference. We shouldn't compute a new one every time.
today = datetime.strptime(today, '%Y-%m-%d %H:%M:%S')
# Now we need to iterate through all the testfiles in our metadata and update their lastModified tag with the current date.
for testfile in self.getMetadataRoot().findall('testfile'):
lastmodified = testfile.find('lastModified') # get the lastmodified field in it's whole, so we can modify it.
previous_update = datetime.strptime(lastmodified.text, '%Y-%m-%d %H:%M:%S') # get the previous date from the lastmodified field and translate it from the str format
if previous_update < today:
lastmodified.text = str(today.strftime('%Y-%m-%d %H:%M:%S'))
self.getMetadataTree().write(self.meta_file)
But for some reason, the meta_file is not changing... What am I doing wrong???
The problem is after the if statement, where the file is not being modified
here are the other methods i'm using in this class:
def __init__(self, filepath):
self.meta_file = filepath
def getMetadataTree(self):
return ET.parse(self.meta_file)
def getMetadataRoot(self):
tree = self.getMetadataTree()
root = tree.getroot()
return root
Your definition of self.getMetadataTree() re-parse the input file every time it gets called. So in the line self.getMetadataTree().write(self.meta_file), it parse the meta file and write it back (the same info). All previous modification to timestamp is not relevant (it's a different instance of ElementTree).
I guess you want to do something like this:
import xml.etree.ElementTree as ET
import sys
from datetime import datetime
class TimestampUpdater(object):
def __init__(self, filepath):
self.meta_file = filepath
self.tree = ET.parse(self.meta_file)
def getMetadataTree(self):
return self.tree
def getMetadataRoot(self):
return self.tree.getroot()
def updateLastModified(self):
today = datetime.now()
for testfile in self.getMetadataRoot().findall('testfile'):
lastmodified = testfile.find('lastModified')
previous_update = datetime.strptime(lastmodified.text, '%Y-%m-%d %H:%M:%S')
if previous_update < today:
lastmodified.text = today.strftime('%Y-%m-%d %H:%M:%S')
self.getMetadataTree().write(self.meta_file)
def print_file_content(filename):
"""Print contents of a file."""
with open(filename, 'r') as fh:
for line in fh:
print line.rstrip()
if __name__ == '__main__':
metafile = 'test.xml'
print "\n====Before updating:===="
print_file_content(metafile)
updater = TimestampUpdater(metafile)
updater.updateLastModified()
print "\n====After updating:===="
print_file_content(metafile)
Output:
====Before updating:====
<project name="Hello World">
<testfile name="testfile1">
<type>TXT</type>
<size>1000</size>
<lastModified>2016-08-07 16:58:23</lastModified>
</testfile>
<testfile name="testfile2">
<type>PDF</type>
<size>500</size>
<lastModified>2016-08-07 16:58:23</lastModified>
</testfile>
</project>
====After updating:====
<project name="Hello World">
<testfile name="testfile1">
<type>TXT</type>
<size>1000</size>
<lastModified>2016-08-07 16:58:36</lastModified>
</testfile>
<testfile name="testfile2">
<type>PDF</type>
<size>500</size>
<lastModified>2016-08-07 16:58:36</lastModified>
</testfile>
</project>
I have the following plugin that puts a time stamp at the top of the document on line 1 but I'd like it to insert the string on a different line, like line 6. At first I thought the insert method was 0 indexed but that doesn't seem to be the case. How would I tell the insert method which line to insert the signature string at?
import sublime, sublime_plugin
import datetime, getpass
class SignatureCommand(sublime_plugin.TextCommand):
def run(self, edit):
signature = "[%s]\n" % (datetime.datetime.now().strftime("%A, %B %d %I:%M %p"))
self.view.insert(edit, 0, signature)
Thanks for your help :)
Update: thanks to Enteleform for the wonderful answer, I added a line_num variable for added clarity :)
import sublime, sublime_plugin
import datetime, getpass
class SignatureOnSpecificLineCommand(sublime_plugin.TextCommand):
def run(self, edit):
line_num = 6 # line number that signature will go on
signature = "[%s]\n" % (datetime.datetime.now().strftime("%A, %B %d %I:%M %p"))
line6_column0 = self.view.text_point(line_num - 1, 0)
self.view.insert(edit, line6_column0, signature)
view.insert() takes a point as it's location argument.
Points are essentially sequential character positions within a document.
For example, in the following document:
Hello
World
a caret at the end of World would be at point 11
5 characters in Hello
1 NewLine character after Hello
5 characters in World
In order to calculate the point of a particular row & column, use:
view.text_point(row, column)
Example:
import sublime, sublime_plugin
import datetime, getpass
class SignatureCommand(sublime_plugin.TextCommand):
def run(self, edit):
signature = "[%s]\n" % (datetime.datetime.now().strftime("%A, %B %d %I:%M %p"))
line = 6
point = self.view.text_point(line - 1, 0)
self.view.insert(edit, point, signature)
Note:
rows start at 0 and thus are offset from the displayed lines in SublimeText by -1, which is why I included line - 1 in view.text_point()
I'm a python newbie. My script (below) contains a function named
"fn_regex_raw_date_string" that is intended to convert
a "raw" date string like this: Mon, Oct 31, 2011 at 8:15 PM
into a date string like this: _2011-Oct-31_PM_8-15_
Question No. 1: When the "raw" date string contains extraneous
characters eg (xxxxxMon, Oct 31, 2011 at 8:15 PMyyyyyy), how should
I modify my regular expression routine to exclude the extraneous characters?
I was tempted to remove my comments from the script below to make it
simpler to read, but I thought it might be more helpful for me to leave
them in the script.
Question No. 2: I suspect that I should code another function that will
replace the "Oct" in "2011-Oct-31_PM_8-15_ " with "11". But I can't
help wondering if there is some way to include that functionality in
my fn_regex_raw_date_string function.
Any help would be much appreciated.
Thank you,
Marceepoo
import sys
import re, pdb
#pdb.set_trace()
def fn_get_datestring_sysarg():
this_scriptz_FULLName = sys.argv[0]
try:
date_string_raw = sys.argv[1]
#except Exception, e:
except Exception:
date_string_raw_error = this_scriptz_FULLName + ': sys.argv[1] error: No command line argument supplied'
print date_string_raw_error
#returnval = this_scriptz_FULLName + '\n' + date_string_raw
returnval = date_string_raw
return returnval
def fn_regex_raw_date_string(date_string_raw):
# Do re replacements
# p:\Data\VB\Python_MarcsPrgs\Python_ItWorks\FixCodeFromLegislaturezCalifCode_MikezCode.py
# see also (fnmatch) p:\Data\VB\Python_MarcsPrgs\Python_ItWorks\bookmarkPDFs.aab.py
#srchstring = r"(.?+)(Sun|Mon|Tue|Wed|Thu|Fri|Sat)(, )(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)( )([\d]{1,2})(, )([\d]{4})( at )([\d]{1,2})(\:)([\d]{1,2})( )(A|P)(M)(.?+)"
srchstring = r"(Sun|Mon|Tue|Wed|Thu|Fri|Sat)(, )(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)( )([\d]{1,2})(, )([\d]{4})( at )([\d]{1,2})(\:)([\d]{1,2})( )(A|P)(M)"
srchstring = re.compile(srchstring)
replacement = r"_\7-\3-\5_\13M_\9-\11_"
#replacement = r"_\8-\4-\6_\14M_\10-\12_"
regex_raw_date_string = srchstring.sub(replacement, date_string_raw)
return regex_raw_date_string
# Mon, Oct 31, 2011 at 8:15 PM
if __name__ == '__main__':
try:
this_scriptz_FULLName = sys.argv[0]
date_string_raw = fn_get_datestring_sysarg()
date_string_mbh = fn_regex_raw_date_string(date_string_raw)
print date_string_mbh
except:
print 'error occurred - fn_get_datestring_sysarg()'
You probably want to use python's standard datetime stuff:
http://docs.python.org/library/time.html#time.strptime
http://mail.python.org/pipermail/tutor/2006-March/045729.html
This code uses a regular expression that replaces everything at the start of a string before an abbreviated weekday is matched, and then everything to the end of the string after matching either AM or PM.
Then it calls datetime.strptime(date_str, date_format) which does the hard work of parsing and gives us a datetime instance:
from datetime import datetime
import calendar
import re
# -------------------------------------
# _months = "|".join(calendar.month_abbr[1:])
_weekdays = "|".join(calendar.day_abbr)
_clean_regex = re.compile(r"""
^
.*?
(?=""" + _weekdays + """)
|
(?<=AM|PM)
.*?
$
""", re.X)
# -------------------------------------
def parseRawDateString(raw_date_str):
try:
date_str = _clean_regex.sub("", raw_date_str)
return datetime.strptime(date_str, "%a, %b %d, %Y at %I:%M %p")
except ValueError as ex:
print("Error parsing date from '{}'!".format(raw_date_str))
raise ex
# -------------------------------------
if __name__ == "__main__":
from sys import argv
s = argv[1] if len(argv) > 1 else "xxxxxMon, Oct 31, 2011 at 8:15 PMyyyyyy"
print("Raw date: '{}'".format(s))
d = parseRawDateString(s)
print("datetime object:")
print(d)
print("Formatted date: '{}'".format(d.strftime("%A, %d %B %Y # %I:%M %p")))