I come across a strange question about logging library when using multiprocessing.
First example (wrong):
import multiprocessing
import logging
import time
class A:
def __init__(self):
self.logger = None
self.v = "v1"
def run(self):
# start a new process here
p = multiprocessing.Process(target=self.work, name="sub-process")
p.start()
def work(self):
i = 0
# print info of logger object
print(f"v: {self.v}; logger: {self.logger}; handler:{self.logger.handlers}")
while True and i < 10:
name = multiprocessing.current_process().name
self.logger.info(f"current name: {name} + v : {self.v}")
time.sleep(0.5)
i += 1
class B(A):
def __init__(self):
super(B, self).__init__()
self.v = "v2"
# if init logger object inside class
# then something wrong happens...
logger = logging.getLogger()
ch = logging.FileHandler("test.log")
ch.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(funcName)s(): %(message)s'))
ch.setLevel(logging.DEBUG)
logger.setLevel(logging.DEBUG)
logger.addHandler(ch)
logger.addHandler(logging.StreamHandler())
self.logger = logger
self.logger.info("B init success")
if __name__ == '__main__':
b = B()
b.run()
and it outputs incorrectly:
B init success # this line is the output of logger in __init__
v: v2; logger: <RootLogger root (WARNING)>; handler: [] # this line is the output of print function in work
We can see that logger has changed to raw root logger object(with default WARNING and empty handler) while in work method.
But things go well when I change the location of init of logger.
import multiprocessing
import logging
import time
# if init logger object outside class
# then all goes well
logger = logging.getLogger()
ch = logging.FileHandler("test.log")
ch.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(funcName)s(): %(message)s'))
ch.setLevel(logging.DEBUG)
logger.setLevel(logging.DEBUG)
logger.addHandler(ch)
logger.addHandler(logging.StreamHandler())
class A:
def __init__(self):
self.logger = None
self.v = "v1"
def run(self):
# start a new process here
p = multiprocessing.Process(target=self.work, name="sub-process")
p.start()
def work(self):
i = 0
# print info of logger object
print(f"v: {self.v}; logger: {self.logger}; handler:{self.logger.handlers}")
while True and i < 10:
name = multiprocessing.current_process().name
self.logger.info(f"current name: {name} + v : {self.v}")
time.sleep(0.5)
i += 1
class B(A):
def __init__(self):
super(B, self).__init__()
self.v = "v2"
self.logger = logger
self.logger.info("B init success")
if __name__ == '__main__':
b = B()
b.run()
And this time, the outputs are correct!(the logger object is just I define in code)
B init success
# the following line is output of print function
current name: sub-process + v : v2
v: v2; logger: <RootLogger root (DEBUG)>; handler:[<FileHandler /Users/zjj/test.log (DEBUG)>, <StreamHandler <stderr> (NOTSET)>]
current name: sub-process + v : v2
current name: sub-process + v : v2
current name: sub-process + v : v2
current name: sub-process + v : v2
current name: sub-process + v : v2
current name: sub-process + v : v2
current name: sub-process + v : v2
current name: sub-process + v : v2
current name: sub-process + v : v2
But, in my project,I have to init the logger object until I get some parameters from __init__ method, is there a way to fix this problem(the wrong version)?
Thanks!
Related
I have a problem with the python logging library. I have a flask application that uses the following class (_WrapperLog) to write logs to a file, the library correctly writes logs to today's file and cleans up files older than 7 days correctly. the problem is that when the log rotation occurs, the old files remain saved but their content disappears (as in the example below), only the first 2/4 lines of the file remain, and then nothing, what could be due ?
import logging
from logging.handlers import TimedRotatingFileHandler
import os
from datetime import datetime
from os import name as os_name
from sys import stdout as sys_stdout
class _WrapperLog(object):
class __Log:
logger = None
loggerNameTimeStamp=None
def __init__(self):
self.logger = self.get_logger()
def __str__(self):
return str(self)
def get_logger(self):
if datetime.now().strftime("%Y-%m-%d") == self.loggerNameTimeStamp:
return self.logger
if self.logger:
while len(self.logger.handlers) > 0:
self.logger.handlers.pop()
self.logger = logging.getLogger(__name__)
self.logger.setLevel(logging.INFO)
formatter = logging.Formatter('[%(asctime)s] %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
if os_name == 'nt':
handler = logging.StreamHandler(sys_stdout)
else:
handler = TimedRotatingFileHandler(os.path.join('/opt/my_folder', 'my_log.log'),
when='midnight', backupCount=7)
handler.setLevel(logging.INFO)
handler.setFormatter(formatter)
self.logger.addHandler(handler)
self.loggerNameTimeStamp = datetime.now().strftime("%Y-%m-%d")
return self.logger
def info(self, message:str):
self.get_logger().info(message)
def debug(self, message:str):
print("DEBUG: " + message)
def warning(self, message:str):
self.get_logger().warning(message)
def error(self, message:str):
self.get_logger().error(message)
instance = None
def __new__(cls):
if not _WrapperLog.instance:
_WrapperLog.instance = _WrapperLog.__Log()
return _WrapperLog.instance
def __getattr__(self, name):
return getattr(self._WrapperLog, name)
def __setattr__(self, name):
return setattr(self._WrapperLog, name)
Log = _WrapperLog()
example of what remains of the files from yesterday and the day before yesterday:
my_log.log.2022-04-11:
[2022-04-12 00:00:01] INFO: <censored>
[2022-04-12 00:00:01] INFO: <censored>
my_log.log.2022-04-10:
[2022-04-11 00:00:01] INFO: <censored>
[2022-04-11 00:00:01] INFO: <censored>
[2022-04-11 00:00:01] INFO: <this line started like this and stopped after INFO>
I have a module called myLog.py which is being accessed by multiple other modules in a project. The myLog.py module has two handlers: file_handler that inputs logs into file and stream_handler that outputs logs to a console. For modules where no threading is occurring i.e myLog.py is only being accessed by a single process the logs are being inserted properly but for modules where threading is being implemented i.e myLog.py is being accessed by multiple processes at the same time I am getting multiple logs of the same line being inserted in my log_file.txt.
While going through logging documentation I found out that logging module is thread_safe but my implementation says things differently. How should I initialize the function setLogger() in myLog.py such that if it gets accessed by multiple threads at the same time it gives the correct output?
#myLog.py
#setup of logger
def setLogger(logfile_name = "log_file.txt"):
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(message)s')
file_handler = logging.FileHandler(logfile_name)
file_handler.setFormatter(formatter)
stream_handler = logging.StreamHandler()
logger.addHandler(file_handler)
logger.addHandler(stream_handler)
return logger
So suppose for example it is being accessed by a module called parser.py which implements threading then the log statements get printed out in a very random duplicated fashion.
#parser.py
import threading
import myLog
logger = myLog.setLogger()
class investigate(threading.Thread):
def __init__(self, section, file, buffer, args):
threading.Thread.__init__(self)
self.section = section
self.file = file
self.buffer = buffer
self.args = args
self.sig = self.pub = None
self.exc = None
def run(self):
aprint("Starting section %d file %d" % (self.section, self.file))
self.exc = None
try:
self.sign()
aprint("Done section %d file %d" % (self.section, self.file))
except:
self.exc = sys.exc_info()
def sign(self):
self.sig, self.pub = sign_hsm(self.buffer, self.args)
if self.sig is None or self.pub is None:
raise Exception("Empty signing result")
def store(self, bot):
sec = filter(lambda x: x.type == self.section, bot.sections)[0]
if self.file == 0xFF:
signature = sec.signature
else:
signature = sec.files[self.file].signature
signature.sig = self.sig
signature.pub = self.pub
def join(self, *args, **kwargs):
threading.Thread.join(self, *args, **kwargs)
if self.exc:
msg = "Thread '%s' threw an exception: %s" % (self.getName(), self.exc[1])
new_exc = Exception(msg)
raise new_exc.__class__, new_exc, self.exc[2]
def PrintVersion():
logger.info("This is output.")
print_lock = threading.RLock()
def aprint(*args, **kwargs):
if verbosityLevel > 0:
with print_lock:
return logger.info(*args, **kwargs)
def multipleTimes():
logger.info("Multiple times.")
if __name__ == "__main__":
PrintVersion()
for investigate in investigations:
investigate.start()
.......
.......
.......
logger.info("This gets repeated")
multipleTimes()
So since multiple threads are trying to access the setLogger() I get logger.info() outputs such as:
This is output.
This is output.
This is output.
This is output.
This is output.
This gets repeated.
This gets repeated.
This gets repeated.
Multiple times.
Multiple times.
Multiple times.
Multiple times.
Multiple times.
Multiple times.
What I should be getting:
This is output.
This gets repeated.
Multiple times.
I have a custom logger class that worked fine in one module, but if I use it in other module of the same package and with the same filehandler it multiplies record from 1 to n times.
I use it in 5 threads that's why I'm trying to close filehandler each time message was recorded
class RootLogger():
def __init__(self):
self.logger = None
# skeleton for creating custom logger
# name - name of the logger, string or class/module name
def set_config(self, name, logfile):
self.logger = logging.getLogger(name)
# format of the message
formatter = logging.Formatter('%(asctime)s - [%(name)s] [%(levelname)s] %(message)s')
# log file rotation, time
handler = RotatingFileHandler(filename=logfile,
maxBytes=8000000,
backupCount=5,
encoding='utf-8')
#handler = StreamHandler()
handler.setFormatter(formatter)
self.logger.setLevel(logging.DEBUG)
self.logger.addHandler(handler)
self.logger.setLevel(logging.DEBUG)
return self.logger
# patch for excluding 'too many opened files' error
def __del__(self):
if self.logger:
for hdlr in self.logger.handlers:
#hdlr.flush()
hdlr.close()
# override commands
def info(self, msg):
self.logger.info(msg)
self.__del__()
def debug(self, msg):
self.logger.debug(msg)
self.__del__()
def error(self, msg):
self.logger.error(msg)
self.__del__()
def warning(self, msg):
self.logger.warning(msg)
self.__del__()
def critical(self, msg):
self.logger.warning(msg)
self.__del__()
You want to check if handlers already exist for the logger before creating more handlers.
self.logger = logging.getLogger(name)
if not self.logger.handlers:
#Create the handlers
I have a python pipeline code which calls around 10 to 12 python classes.
I wanted to know and log time consumed by each method in each class.
Could anyone suggest some methods to do this in python
I'm using python inbuilt logging module for logging
import logging
import time
class myClass():
logErrors = False
logger = None
def log(self, msg):
if self.logger != None:
self.logger.warning(msg)
def __init__(self, log=False):
self.logErrors = log
if self.logErrors == True:
self.logger = logging.getLogger("myClass")
self.logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
self.ch = logging.StreamHandler()
self.ch.setLevel(logging.INFO)
self.ch.setFormatter(formatter)
self.logger.addHandler(self.ch)
def method1(self):
# log start time of method
self.log("method1() started")
# code
print("foobar")
time.sleep(1)
# log end time of method
self.log("method1() ended")
my = myClass(log=True)
my.method1()
I have created new process like this:
from multiprocessing import Process
import logging.handlers
import time
def new_log(file_name, level):
log = logging.getLogger()
h = logging.handlers.RotatingFileHandler(file_name, maxBytes=10485760)
h.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s: %(message)s'))
log.addHandler(h)
log_level = logging.DEBUG
if level == 'debug':
log_level = logging.DEBUG
elif level == 'info':
log_level = logging.INFO
elif level == 'warn':
log_level = logging.WARN
elif level == 'error':
log_level = logging.ERROR
elif level == 'critical':
log_level = logging.CRITICAL
log.setLevel(log_level)
return log
def func():
child_logger = new_log('child', 'debug')
for i in range(0, 10):
child_logger.debug('child process running on %d' % i)
time.sleep(1)
def father():
father_logger = new_log('father', 'debug')
father_logger.debug('this is father process')
proc = Process(target = func, args = ())
proc.start()
father()
the two different logger will log different messages, but when child_logger log some message, the father_logger still log the same message, why this happend? how to disable this behavior?
getLogger() returns the root logger. This means each time new_logger() is called it is adding a new handler to the root logger and setting the root loggers level.
Get a new logger on each call with this and set it's level:
log = logging.getLogger(file_name)
h = logging.handlers.RotatingFileHandler(file_name, maxBytes=10485760)
h.setLevel(level)
Note: Use capitalised strings for level as logger accepts them i.e 'DEBUG' will work without converting it to logging.DEBUG