Python Logging - Closing one file across multiple logs (ResourceWarning) - python

I created a custom logging.Logger that is used by several different objects in a script I'm running like so:
class TestLogger(logging.Logger):
def __init__(self, name, file=None):
super(TestLogger, self).__init__(name, level=logging.DEBUG)
self.log_file = file
...
def addLogFile(self, log_file):
self.log_file = log_file
self.setFormat()
# set the format of the log
def setFormat(self, default=True, end='\n'):
# remove any Handlers
self.removeStreamHandlers()
self.removeFileHandlers()
# get the log format string, default or message
format_str = DEFAULT_FORMAT if default else CUSTOM_FORMAT
std_formatter = logging.Formatter(format_str, datefmt=self.DATE_FORMAT)
# add the stream handler
console = logging.StreamHandler(sys.stdout)
console.setFormatter(std_formatter)
console.terminator = end
self.addHandler(console)
# add the file handler
if self.log_file:
file_formatter = logging.Formatter(format_str, datefmt=self.DATE_FORMAT)
logger = logging.FileHandler(self.log_file)
logger.setFormatter(file_formatter)
self.addHandler(logger)
# remove all stream handlers
def removeStreamHandlers(self):
stream_handlers = [h for h in self.handlers if isinstance(h, logging.StreamHandler)
and not isinstance(h, logging.FileHandler)]
for sh in stream_handlers:
self.removeHandler(sh)
# remove all file handlers
def removeFileHandlers(self):
file_handlers = [h for h in self.handlers if isinstance(h, logging.StreamHandler)
and isinstance(h, logging.FileHandler)]
for fh in file_handlers:
self.removeHandler(fh)
class Something:
def __init__(self):
self.log = TestLogger('Something')
...
def __del__(self):
self.log.removeFileHandlers()
self.log.removeStreamHandler()
class SomethingElse:
def __init__(self):
self.log = TestLogger('SomethingElse')
...
def __del__(self):
self.log.removeFileHandlers()
self.log.removeStreamHandler()
All of these objects are initialized and designed to share the same log file like so:
log_file = 'test.log'
s = Something()
se = SomethingElse()
s.addLogFile(log_file)
se.addLogFile(log_file)
...
del s, se
The problem seems to be that when I try to rerun my program, it throws a ResourceWarning every time I run setFormat(). It seems like the file isn't properly being closed and I'm not sure where this could be happening.

First of all, you should reuse the handlers. If you want to change the formatter just call setFormatter on the existing handlers and keep them.
If you really do want to throw away the handler and use a new one, there is a close() method on the FileHandler that is supposed to be called to clean up when the handler is done logging. So in your case you would change your code to look like this:
for fh in file_handlers:
fh.close()
self.removeHandler(fh)

Related

Rotating file handler for JSON logs in Python

I am working on saving the json logs using python. Below is the code:
log_file = 'app_log.json'
log_json = dict()
log_json["Data"] = {}
log_json['Data']['Key1'] = "value1"
log_json['Data']['alert'] = False
log_json['Data']['Key2'] = "N/A"
log_json['Created'] = datetime.datetime.utcnow().isoformat()
with open(log_file, "a") as f:
json.dump(log_json, f)#, ensure_ascii=False)
f.write("\n")
Now above code is generating the log file. But I have noticed that the file size is increasing a lot and in future, I might face disk space issue. I was wondering if there is any pre built rotating file handler available for json in which we can mention fixed size lets say 100mb and upon reaching this size it will delete and recreate the new file.
I have previously used from logging.handlers import RotatingFileHandler to do this in case of .log files but also want to do this for .json files. Please help. Thanks
Python does not care about the log file name.
You can use the rotating handler which you used for .log file for .json file also.
See sample example below
# logging_example.py
import logging
import logging.handlers
import os
import time
logfile = os.path.join("/tmp", "demo_logging.json")
logger = logging.getLogger(__name__)
fh = logging.handlers.RotatingFileHandler(logfile, mode='a', maxBytes=1000, backupCount=5) # noqa:E501
fh.setLevel(logging.DEBUG)
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
fh.setFormatter(formatter)
logger.addHandler(fh)
logger.setLevel(logging.DEBUG)
while 1:
time.sleep(1)
logger.info("Long string to increase the file size")
You can also look at logrotate if you are working in Unix environment. It is a great and simple tool with good documentation to do just exactly what you need.
You can implement structured logging with RotatingFileHandler
import json
import logging
import logging.handlers
from datetime import datetime
class StructuredMessage:
def __init__(self, message, /, **kwargs):
self.message = message
self.kwargs = kwargs
def __str__(self):
return '%s >>> %s' % (self.message, json.dumps(self.kwargs))
_ = StructuredMessage # optional, to improve readability
log_json = {}
log_json["Data"] = {}
log_json['Data']['Key1'] = "value1"
log_json['Data']['alert'] = False
log_json['Data']['Key2'] = "N/A"
log_json['Created'] = datetime.utcnow().isoformat()
LOG_FILENAME = 'logging_rotatingfile_example.out'
# Set up a specific logger with our desired output level
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
# Add the log message handler to the logger
handler = logging.handlers.RotatingFileHandler(
LOG_FILENAME, maxBytes=20, backupCount=5)
bf = logging.Formatter('%(message)s')
handler.setFormatter(bf)
logger.addHandler(handler)
logger.info(_('INFO', **log_json))
Note: check here for more info about structured-logging-python
you can use also use json-logging-python with RotatingFileHandler
import logging
import json
import traceback
from datetime import datetime
import copy
import json_logging
import sys
json_logging.ENABLE_JSON_LOGGING = True
def extra(**kw):
'''Add the required nested props layer'''
return {'extra': {'props': kw}}
class CustomJSONLog(logging.Formatter):
"""
Customized logger
"""
def get_exc_fields(self, record):
if record.exc_info:
exc_info = self.format_exception(record.exc_info)
else:
exc_info = record.exc_text
return {'python.exc_info': exc_info}
#classmethod
def format_exception(cls, exc_info):
return ''.join(traceback.format_exception(*exc_info)) if exc_info else ''
def format(self, record):
json_log_object = {"#timestamp": datetime.utcnow().isoformat(),
"level": record.levelname,
"message": record.getMessage(),
"caller": record.filename + '::' + record.funcName
}
json_log_object['data'] = {
"python.logger_name": record.name,
"python.module": record.module,
"python.funcName": record.funcName,
"python.filename": record.filename,
"python.lineno": record.lineno,
"python.thread": record.threadName,
"python.pid": record.process
}
if hasattr(record, 'props'):
json_log_object['data'].update(record.props)
if record.exc_info or record.exc_text:
json_log_object['data'].update(self.get_exc_fields(record))
return json.dumps(json_log_object)
json_logging.init_non_web(custom_formatter=CustomJSONLog, enable_json=True)
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
LOG_FILENAME = 'logging_rotating_json_example.out'
handler = logging.handlers.RotatingFileHandler(
LOG_FILENAME, maxBytes=20, backupCount=5)
logger.addHandler(handler)
log_json = {}
log_json["Data"] = {}
log_json['Data']['Key1'] = "value1"
log_json['Data']['alert'] = False
log_json['Data']['Key2'] = "N/A"
logger.info('Starting')
logger.debug('Working', extra={"props":log_json})
Note: check here for more info about json-logging-python
You can try this just before you write / append to the file. This should check to see if the file has reached the max lines size, then it will remove one line of code from the beginning of the file before you append as usual to the end of the file.
filename = 'file.txt'
maxLines = 100
count = len(open(filename).readlines())
if(count > maxLines) {
with open(filename, 'r') as fin:
data = fin.read().splitlines(True)
with open(filename, 'w') as fout:
fout.writelines(data[1:])
}

Getting rid of string padding in log messages

I'm using a logging filter to print out my log messages including some custom fields that are not present in the usual logging framework.
For instance:
class NISARLogger(object):
def __init__(self, filename):
self.filename = filename
fid = logging.FileHandler(filename)
formatter_str = '%(asctime)s, %(levelname)s, %(pge)s, %(module)s, %(error_code)i, \
%(source)s:%(line_number)i, "%(error_name)s: %(message)s"'
formatter = logging.Formatter(formatter_str)
fid.setFormatter(formatter)
self.logger = logging.getLogger(name="NISAR")
self.logger.setLevel(logging.DEBUG)
self.logger.addHandler(fid)
def log_message(self, class_filter, message):
xfilter = class_filter()
log_funct = getattr(self.logger, xfilter.level)
self.logger.addFilter(xfilter)
log_funct(message)
def close(self):
logging.shutdown()
Everything seems to be working fine except my log looks like this:
2020-08-18 14:41:07,431, INFO, QA, misc, 100000, '../verify_rslc.py':70, "N/A: Opening file L_JOINT_00213_LINE12_RUN1_FP_12122019134617.h5 with xml spec /Users/cmoroney/Desktop/working/NISAR/src/GitHub/QualityAssurance/xml/nisar_L1_SLC.xml"
2020-08-18 14:41:07,432, INFO, QA, misc, 100000, '/Users/cmoroney/Desktop/working/NISAR/src/GitHub/QualityAssurance/quality/SLCFile.py':28, "N/A: Opening file L_JOINT_00213_LINE12_RUN1_FP_12122019134617.h5"
where there's a lot of padding between the '100000' (error code parameter) and the filename (source parameter) both of which are extra parameters passed into the logger via the 'addFilter' call. I've tried experimenting with the length of the 'source' and 'error_code' fields in the formatter_str variable but no luck. Any idea where that padding is coming from?
The extra space is coming from the whitespace in the source code itself at the start of the second line.
formatter_str = '%(asctime)s, %(levelname)s, %(pge)s, %(module)s, %(error_code)i, \
%(source)s:%(line_number)i, "%(error_name)s: %(message)s"'
Try this instead:
formatter_str = ('%(asctime)s, %(levelname)s, %(pge)s, %(module)s, %(error_code)i, '
'%(source)s:%(line_number)i, "%(error_name)s: %(message)s"')

The easiest way of logging

I would like to collect info with the help of logging.
The idea is simple. I have hash_value of some data, which I want to write to log. So, I set up my logging this way:
import logging
logger.setLevel(logging.DEBUG)
logging.basicConfig(format='%(asctime)s :%(message)s', level=logging.INFO)
As you can see, now timing and some message will automatically write to log file, for example I can use it like this:
logger.info('Initial data: {}'.format(data))
But what if I want to write hash_value of my data automatically? Like it is happening with time now.
I looked through documentation and find nothing useful. There is no attribute for variable in module logging.
So I am forced to do it awry. Like this:
hash_value = hash(data)
logger.info('Initial data: {} {}'.format(hash_value, data))
I would expect from this code:
logging.basicConfig(format='%(asctime)s: %(variable)s :%(message)s', level=logging.INFO)
and
logger.info('Initial data: {}'.format(hash_value, data))
to do the job. But it does not work (and it should not basically) and I did not find the solution in documentation.
So, how to avoid this awry code:
logger.info('Initial data: {} {}'.format(hash_value, data))
which I am having now?
import logging
import sys
MY_PARAMS = ("variable1", "var2", )
class ExtraFilter(logging.Filter):
def filter(self, record):
# this one used for second, simplier handler
# to avoid duplicate of logging entries if "extra" keyword was passed.
# Check all of your custom params:
# if all of them are present - record should be filtered
# * all because if any of them is missing - there would be silent exception and record wont be logged at all
# bellow is just an example how to check.
# You can use something like this:
# if all(hasattr(record, param) for param in MY_PARAMS): return False
if hasattr(record, "variable1"):
return False
return True
# init logging
log = logging.getLogger()
# init handlers and formatters
h1 = logging.StreamHandler(sys.stdout)
f1 = logging.Formatter('%(asctime)s: %(variable1)s: %(var2)s: %(message)s')
h2 = logging.StreamHandler(sys.stdout)
f2 = logging.Formatter('%(asctime)s: %(message)s')
h1.setFormatter(f1)
h2.setFormatter(f2)
h2.addFilter(ExtraFilter())
log.addHandler(h1)
log.addHandler(h2)
# example of data:
extra = {"variable1": "test1", "var2": "test2"}
log.setLevel(logging.DEBUG)
log.debug("debug message", extra=extra)
log.info("info message")
The above code will produce following output:
2017-11-04 09:16:36,787: test1: test2: debug message
2017-11-04 09:16:36,787: info message
It is not awry code, you want to add two informations, therefore you must either pass two parameters to format or concatenate the string more "manually"
You could go with
Logging.info("initial data " + hash_value + " " + data)
Or you could change the "data" object so its "str" or the repr method adds the hash by itself (preferably the repr in this case)
Class Data():
....
def __repr__(self):
Return self.hash() + " " self.data
Which in this case will print the hash and the string version of the parameter data( or simply whatever you want to show as string) passing only one parameter in the string format.
Anyway, you could make the formating string prettier with....
Logging.info("Initial data {hash} {data}".format(hash=hash_value, data=data))
By the way, in C++ and Java you would also need to declare two "entries" for those two atributes. In java would be something like this:
LOGGING.info("Initial data {} {}", hash, data);

How can I log current line, and stack info with Python?

I have logging function as follows.
logging.basicConfig(
filename = fileName,
format = "%(levelname) -10s %(asctime)s %(message)s",
level = logging.DEBUG
)
def printinfo(string):
if DEBUG:
logging.info(string)
def printerror(string):
if DEBUG:
logging.error(string)
print string
I need to login the line number, stack information. For example:
1: def hello():
2: goodbye()
3:
4: def goodbye():
5: printinfo()
---> Line 5: goodbye()/hello()
How can I do this with Python?
SOLVED
def printinfo(string):
if DEBUG:
frame = inspect.currentframe()
stack_trace = traceback.format_stack(frame)
logging.debug(stack_trace[:-1])
if LOG:
logging.info(string)
gives me this info which is exactly what I need.
DEBUG 2011-02-23 10:09:13,500 [
' File "/abc.py", line 553, in <module>\n runUnitTest(COVERAGE, PROFILE)\n',
' File "/abc.py", line 411, in runUnitTest\n printinfo(string)\n']
Current function name, module and line number you can do simply by changing your format string to include them.
logging.basicConfig(
filename = fileName,
format = "%(levelname) -10s %(asctime)s %(module)s:%(lineno)s %(funcName)s %(message)s",
level = logging.DEBUG
)
Most people only want the stack when logging an exception, and the logging module does that automatically if you call logging.exception(). If you really want stack information at other times then you will need to use the traceback module for extract the additional information you need.
import inspect
import traceback
def method():
frame = inspect.currentframe()
stack_trace = traceback.format_stack(frame)
print ''.join(stack_trace)
Use stack_trace[:-1] to avoid including method/printinfo in the stack trace.
As of Python 3.2, this can be simplified to passing the stack_info=True flag to the logging calls. However, you'll need to use one of the above answers for any earlier version.
Late answer, but oh well.
Another solution is that you can create your own formatter with a filter as specified in the docs here. This is a really great feature as you now no longer have to use a helper function (and have to put the helper function everywhere you want the stack trace). Instead, a custom formatted implements it directly into the logs themselves.
import logging
class ContextFilter(logging.Filter):
def __init__(self, trim_amount)
self.trim_amount = trim_amount
def filter(self, record):
import traceback
record.stack = ''.join(
str(row) for row in traceback.format_stack()[:-self.trim_amount]
)
return True
# Now you can create the logger and apply the filter.
logger = logging.getLogger(__name__)
logger.addFilter(ContextFilter(5))
# And then you can directly implement a stack trace in the formatter.
formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s \n %(stack)s')
Note: In the above code I trim the last 5 stack frames. This is just for convenience and so that we don't show stack frames from the python logging package itself.(It also might have to be adjusted for different versions of the logging package)
Use the traceback module.
logging.error(traceback.format_exc())
Here is an example that i hope it can help you:
import inspect
import logging
logging.basicConfig(
format = "%(levelname) -10s %(asctime)s %(message)s",
level = logging.DEBUG
)
def test():
caller_list = []
frame = inspect.currentframe()
this_frame = frame # Save current frame.
while frame.f_back:
caller_list.append('{0}()'.format(frame.f_code.co_name))
frame = frame.f_back
caller_line = this_frame.f_back.f_lineno
callers = '/'.join(reversed(caller_list))
logging.info('Line {0} : {1}'.format(caller_line, callers))
def foo():
test()
def bar():
foo()
bar()
Result:
INFO 2011-02-23 17:03:26,426 Line 28 : bar()/foo()/test()
Look at traceback module
>>> import traceback
>>> def test():
>>> print "/".join( str(x[2]) for x in traceback.extract_stack() )
>>> def main():
>>> test()
>>> main()
<module>/launch_new_instance/mainloop/mainloop/interact/push/runsource/runcode/<module>/main/test
This is based on #mouad's answer but made more useful (IMO) by including at each level the filename (but not its full path) and line number of the call stack, and by leaving the stack in most-recently-called-from (i.e. NOT reversed) order because that's the way I want to read it :-)
Each entry has file:line:func() which is the same sequence as the normal stacktrace, but all on the same line so much more compact.
import inspect
def callers(self):
caller_list = []
frame = inspect.currentframe()
while frame.f_back:
caller_list.append('{2}:{1}:{0}()'.format(frame.f_code.co_name,frame.f_lineno,frame.f_code.co_filename.split("\\")[-1]))
frame = frame.f_back
callers = ' <= '.join(caller_list)
return callers
You may need to add an extra f_back if you have any intervening calls to produce the log text.
frame = inspect.currentframe().f_back
Produces output like this:
file2.py:620:func1() <= file3.py:211:func2() <= file3.py:201:func3() <= main.py:795:func4() <= file4.py:295:run() <= main.py:881:main()
I only need this stacktrace in two key functions, so I add the output of callers into the text in the logger.debug() call, like htis:
logger.debug("\nWIRE: justdoit request -----\n"+callers()+"\n\n")

Python logger dynamic filename

I want to configure my Python logger in such a way so that each instance of logger should log in a file having the same name as the name of the logger itself.
e.g.:
log_hm = logging.getLogger('healthmonitor')
log_hm.info("Testing Log") # Should log to /some/path/healthmonitor.log
log_sc = logging.getLogger('scripts')
log_sc.debug("Testing Scripts") # Should log to /some/path/scripts.log
log_cr = logging.getLogger('cron')
log_cr.info("Testing cron") # Should log to /some/path/cron.log
I want to keep it generic and dont want to hardcode all kind of logger names I can have. Is that possible?
How about simply wrap the handler code in a function:
import os
def myLogger(name):
logger = logging.getLogger(name)
logger.setLevel(logging.DEBUG)
handler = logging.FileHandler(os.path.join('/some/path/', name + '.log'), 'w')
logger.addHandler(handler)
return logger
log_hm = myLogger('healthmonitor')
log_hm.info("Testing Log") # Should log to /some/path/healthmonitor.log
To prevent creating duplicate handlers, care needs to be taken to ensure that myLogger(name) is only called once per name. Usually that means putting myLogger(name) inside
if __name__ == '__main__':
log_hm = myLogger('healthmonitor')
of the main script.
import os
import logging
class MyFileHandler(object):
def __init__(self, dir, logger, handlerFactory, **kw):
kw['filename'] = os.path.join(dir, logger.name)
self._handler = handlerFactory(**kw)
def __getattr__(self, n):
if hasattr(self._handler, n):
return getattr(self._handler, n)
raise AttributeError, n
logger = logging.getLogger('test')
logger.setLevel(logging.INFO)
handler = MyFileHandler(os.curdir, logger, logging.FileHandler)
logger.addHandler(handler)
logger.info('hello mylogger')
The approach used in the above solution is correct, but that has issue of adding duplicate handlers when called more than once. Here is the improved version.
import os
def getLogger(name):
# logger.getLogger returns the cached logger when called multiple times
# logger.Logger created a new one every time and that avoids adding
# duplicate handlers
logger = logging.Logger(name)
logger.setLevel(logging.DEBUG)
handler = logging.FileHandler(os.path.join('/some/path/', name + '.log'), 'a')
logger.addHandler(handler)
return logger
def test(i):
log_hm = getLogger('healthmonitor')
log_hm.info("Testing Log %s", i) # Should log to /some/path/healthmonitor.log
test(1)
test(2)
I'm trying to implement this solution with both dynamic path and file name but nothing is written in the file.
class PaymentViewSet(viewsets.ModelViewSet):
serializer_class = PaymentSerializer
queryset = Payment.objects.all()
permission_classes = [IsAuthenticated]
def paymentLog(self, paymentSerializer):
# file : logs/${terminalName}/${%Y-%m}-payments.log
terminalName = TerminalSerializer(Terminal.objects.get(pk=paymentSerializer.data.get("terminal"))).data.get("name")
filePath = os.path.join(settings.LOG_PATH, terminalName)
if not os.path.exists(filePath):
os.makedirs(filePath)
fileName = filePath + "/" + datetime.now().strftime("%Y-%m") +'-payments.log'
handler = logging.FileHandler(fileName)
handler.setFormatter('%(asctime)s [PAYMENT]- %(message)s')
logger = logging.Logger("payment")
logger.setLevel(logging.INFO)
logger.addHandler(handler)
# logger.propagate = False
logging.info(paymentSerializer.data)
# printout()
def create(self, request, *args, **kwargs):
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
self.perform_create(serializer)
# log here
self.paymentLog(serializer)
headers = self.get_success_headers(serializer.data)
return Response(serializer.data, status=status.HTTP_201_CREATED, headers=headers)
The path and file are created like intended but the log never writes.

Categories

Resources