Thread count increases with running logger.info in greenlet

Thread count increases with running logger.info in greenlet - python

With the following code if I uncomment log_it, threading.active_count reaches 11.
Since It means each logger creates _DummyThread daemons.Now
Q1.Is their any way to achieve the same without creating extra threads?
Q2.Why logger needs to create another thread.Why can't it execute in same way, like fun function?
from gevent import monkey
monkey.patch_all()
import threading
import gc
import gevent
import logging
def print_stats():
while True:
gc.collect()
print threading.active_count()
gevent.sleep(2)
jobs = [gevent.spawn(print_stats)]
logger = logging.getLogger(__name__)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
logger.setLevel(logging.INFO)
form = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch.setFormatter(form)
logger.addHandler(ch)
def log_it():
string = 'abcdefghijklmnopqrstuvwxyz'
logger.info(string)
def fun():
print "hello world"
def block_for_some_time():
log_it()
fun()
gevent.sleep(5)
print 'exiting thread'
for i in range(10):
jobs.append(gevent.spawn(block_for_some_time))
gevent.joinall(jobs)

If you change pathcing to this:
from gevent import monkey
monkey.patch_all(thread=False)
threading.active_count() will print 1 all times. The reason is in patching threading module,
that provides you with information about "greenlets", not about real threads.

Related

How to set logging configuration when using python multiprocessing

I'm trying to figure out why, when I attempt to log from a process forked using multiprocessing.Process, the log message doesn't honor the logging configuration set in the constructor, however, when I fork the same process using os.fork(), it does. Following is a pared-down code sample to demonstrate:
from multiprocessing import Process
import logging
import logging.config
import os
import sys
class Test():
def __init__(self):
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
ch.setFormatter(formatter)
self.log = logging.getLogger(__name__)
self.log.setLevel(logging.DEBUG)
self.log.addHandler(ch)
def test_log(self):
self.log.warning(f'{os.getpid()}')
def mp_test(self):
self.log.warning('starting multiprocessing test')
proc = Process(target=self.test_log)
proc.start()
proc.join()
def fork_test(self):
self.log.warning('starting fork test')
if os.fork() == 0:
self.test_log()
else:
os.wait()
if __name__ == '__main__':
test = Test()
test.mp_test()
test.fork_test()
Output:
❯ python3 concurrent.py
2022-09-06 19:52:20,933 - __main__ - WARNING - starting multiprocessing test
68926
2022-09-06 19:52:20,977 - __main__ - WARNING - starting fork test
2022-09-06 19:52:20,978 - __main__ - WARNING - 68927
Why is the configuration for self.log not set anymore when self.test_log() is called by multiprocessing.Process? I was under the impression that multiprocessing.Process and os.fork() basically did the same thing and that the new process gets a copy of the parent's address space. I've probably misunderstood something but I'm really keen to find out what that is. Thanks.

python logger prints everything twice

I try using this openshift-restclient-python library. My custom Logger prints everything twice after I run into this bug.
modules/logging/Logging.py
import logging
class CustomLogger:
logger=None
def __init__(self):
if (CustomLogger.logger==None):
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
stdout_handler = logging.StreamHandler()
stdout_handler.setLevel(logging.DEBUG)
logger.addHandler(stdout_handler)
CustomLogger.logger=logger
def getLogger(self):
return CustomLogger.logger
logger = CustomLogger().getLogger()
This ist my main.py:
#!/usr/bin/env python3
import sys
from modules.logging.Logging import logger
from kubernetes import client, config
from openshift.dynamic import DynamicClient
from openshift.helper.userpassauth import OCPLoginConfiguration
import warnings
warnings.filterwarnings("ignore")
apihost = 'myhost'
username = 'myuser'
password = 'insecure'
ca_cert = '/path/to/cert'
kubeConfig = OCPLoginConfiguration(ocp_username=username, ocp_password=password)
kubeConfig.host = apihost
kubeConfig.verify_ssl = False
kubeConfig.ssl_ca_cert = ca_cert
kubeConfig.get_token()
k8s_client = client.ApiClient(kubeConfig)
logger.warning("this is printed once")
dyn_client = DynamicClient(k8s_client)
logger.warning("this is printed twice")
v1_projects = dyn_client.resources.get(api_version='project.openshift.io/v1', kind='Project')
project_list = v1_projects.get()
sys.exit(0)
executing the main.py I get the following output
this is printed once
ERROR:root:load cache error: ResourceList.__init__() got an unexpected keyword argument 'base_resource_lookup'
this is printed twice
WARNING:modules.logging.Logging:this is printed twice
If I do not use my custom logger but a simple configuration as below in main.py then everything is printed once.
import logging
logging.basicConfig(level=logging.DEBUG)
I have found this answer so I also tried removing any handler but the only handler is the one that contains my customization, so I end up with a basic logger.
What am I doing wrong?
Thanks
EDIT:
There is an easier way reproducing the issue.
I still have my custom logger as posted before but my main.py now:
#!/usr/bin/env python3
import sys
from modules.logging.Logging import logger
import logging
print(logger.handlers)
print("number handlers: " +str(len(logger.handlers)))
logger.warning("this is printed once")
logging.basicConfig(level=logging.DEBUG)
logger.warning("this is printed twice")
print("number handlers: " +str(len(logger.handlers)))
for h in logger.handlers:
logger.removeHandler(h)
print("number handlers: " +str(len(logger.handlers)))
logger.warning("still get printed")
sys.exit(0)
the output:
[<StreamHandler <stderr> (DEBUG)>]
number handlers: 1
this is printed once
this is printed twice
WARNING:modules.logging.Logging:this is printed twice
number handlers: 1
number handlers: 0
WARNING:modules.logging.Logging:still get printed
The code logging.basicConfig(level=logging.DEBUG) doesn't add another handler but cause everything to be logged. I actually only want the customized logs printed by the streamingHandler. How can I revert what is done by logging.basicConfig(level=logging.DEBUG)?

Please try remove this peace of code from class CustomLogger
stdout_handler = logging.StreamHandler()
stdout_handler.setLevel(logging.DEBUG)
logger.addHandler(stdout_handler)
btw i was struggling with the same. Found answers using search on this website.
https://stackoverflow.com/a/26730545/15637940
https://stackoverflow.com/a/70876179/15637940
and a lot more answered questions...

I solved it that way:
class CustomLogger:
logger=None
def __init__(self):
if (CustomLogger.logger==None):
logging.basicConfig(filename='/dev/null', filemode='w', format='%(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
fmt = '%(asctime)s | %(message)s'
stdout_handler = logging.StreamHandler()
stdout_handler.setLevel(logging.DEBUG)
stdout_handler.setFormatter(CustomFormatter(fmt))
logger.addHandler(stdout_handler)
CustomLogger.logger=logger
def getLogger(self):
return CustomLogger.logger
logger = CustomLogger().getLogger()
It seems that the library I am using at some place logs to the RootLogger. According to this answer logging.basicConfig() is a constructor of a streamHandler that connects to the RootLogger.
If I use logger = logging.getLogger('root') instead of logger = logging.getLogger(__name__) then everything is printed once. However, in that case everything the library logs on DEBUG-Level is printed to the terminal.
The line logging.basicConfig(filename='/dev/null', filemode='w', format='%(name)s - %(levelname)s - %(message)s') causes that everything logged by the root logger is printed to /dev/null.

Python-threading: separate logging of individual threads

I would like individual .log file for each thread. Unfortunately, after using logging.basicConfig, many different files are created for logs, but finally all logs end up in the last declared file.
What should threads do to have independent log files?
import logging
import threading
import time
from datetime import datetime
def test_printing(name):
logging.basicConfig(
format="%(asctime)s, %(levelname)-8s | %(filename)-23s:%(lineno)-4s | %(threadName)15s: %(message)s", # noqa
datefmt="%Y-%m-%d:%H:%M:%S",
level=logging.INFO,
force=True,
handlers=[
logging.FileHandler(f"{name}.log"),
logging.StreamHandler()])
logging.info(f"Test {name}")
time.sleep(20)
logging.info(f"Test {name} after 20s")
def function_thread():
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
thread = threading.Thread(
target=test_printing,
kwargs={"name": timestamp}
)
thread.start()
for i in range(5):
time.sleep(1)
function_thread()

From https://docs.python.org/3/library/logging.html#logger-objects
Note that Loggers should NEVER be instantiated directly, but always through the module-level function logging.getLogger(name).
So you have to create and configure a new logger inside each thread:
logger = logging.getLogger(name)
logger.basicConfig(...)
more info at: https://docs.python.org/3/howto/logging.html#logging-from-multiple-modules
Edit: Use already defined name as logger identifier, instead of __name__
Edit:
You cannot use logging.basicConfig, instead you need to configure each thread logger on its own.
Full code provided and tested:
import logging
import threading
import time
from datetime import datetime
def test_printing(name):
logger = logging.getLogger(name)
logger.setLevel(logging.INFO)
formatter = logging.Formatter(
fmt="%(asctime)s, %(levelname)-8s | %(filename)-23s:%(lineno)-4s | %(threadName)15s: %(message)s",
datefmt="%Y-%m-%d:%H:%M:%S")
sh = logging.StreamHandler()
fh = logging.FileHandler(f"{name}.log")
sh.setFormatter(formatter)
fh.setFormatter(formatter)
logger.addHandler(sh)
logger.addHandler(fh)
logger.info(f"Test {name}")
time.sleep(20)
logger.info(f"Test {name} after 20s")
def function_thread():
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
thread = threading.Thread(
target=test_printing,
kwargs={"name": timestamp}
)
thread.start()
for i in range(5):
time.sleep(1)
function_thread()

Python logging across multiple modules

I'm trying to add logging (to console rather than a file) to my a piece of code I've been working on for a while. Having read around a bit I have a pattern that I think should work, but I'm not quite sure where I'm going wrong.
I have the following three files (simplified, obviously):
controller.py
import my_module
import logging
from setup_log import configure_log
def main():
logger = configure_log(logging.DEBUG, __name__)
logger.info('Started logging')
my_module.main()
if __name__ == "__main__":
main()
setup_log.py
import logging
def configure_log(level=None, name=None):
logger = logging.getLogger(name)
logger.setLevel(level)
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.DEBUG)
chFormatter = logging.Formatter('%(levelname)s - %(filename)s - Line: %(lineno)d - %(message)s')
console_handler.setFormatter(chFormatter)
logger.addHandler(console_handler)
return logger
my_module.py
import logging
def main():
logger = logging.getLogger(__name__)
logger.info("Starting my_module")
print "Something"
if __name__ == "__main__":
main()
When I run them, only the first call to logging produces an output to console - 'Started logging'. The second call to logging - 'Starting my module' is just passed over.
What have I misunderstood/mangled?

According to the documentation it looks like you might get away with an even simpler setup like so:
If your program consists of multiple modules, here’s an example of how
you could organize logging in it:
# myapp.py
import logging
import mylib
def main():
logging.basicConfig(filename='myapp.log', level=logging.INFO)
logging.info('Started')
mylib.do_something()
logging.info('Finished')
if __name__ == '__main__':
main()
# mylib.py
import logging
def do_something():
logging.info('Doing something')
If you run myapp.py, you should see this in myapp.log:
INFO:root:Started
INFO:root:Doing something
INFO:root:Finished
It looks like your call to logger = logging.getLogger(__name__) inside your module is creating a separate track (with a level of NOTSET but no parent relationship to result in a log entry)

The actual bug can be seen by putting the line:
print '__name__', __name__
at the beginning of both your mains which yields:
$ python controller.py
__name__ __main__
INFO - controller.py - Line: 8 - Started logging
__name__ my_module
Something
So you properly configured a logger called __main__ but the logger named my_module isn't configured.
The deeper problem is that you have two main methods which is probably confusing you (it did me).

Getting multiprocessing Errors

So I have this;
from multiprocessing import Process
def run():
4/0
sys.exit()
def go(self):
p = Process(target=run, args=())
p.start()
p.join()
How can get the errors from the Process and maybe store them in a file?

you can use log module: import logging and let each process log the errors/log directly in the log file.
logger = logging.getLogger('spam_application')
logger.warning("Something bad happened")
Do the following in your code. Note - This is a rotating logger you can use others too.[http://docs.python.org/2/library/logging.html]
from multiprocessing import Process
import logging
from logging.handlers import RotatingFileHandler
r_logger = logging.getLogger('parsexmlfiles')
def set_logger()
FORMAT = '%(asctime)-15s %(clientip)s %(user)-8s %(message)s'
parser_logger = logging.getLogger('A_A_logfile')
if isdaemon is True:
# Log into a Log File.
rotatingFH = RotatingFileHandler("/tmp/A_Alogfile.log", mode='a',
maxBytes=7340032, backupCount=4,
encoding=None, delay=False)
rotatingFH.setFormatter(logging.Formatter(
fmt="%(asctime)s : %(levelname)s : %(message)s",
datefmt=None))
parser_logger.addHandler(rotatingFH)
parser_logger.setLevel(logging.DEBUG)
def run():
4/0
r_logger.info("Info Message")
sys.exit()
def go(self):
set_logger()
p = Process(target=run, args=())
p.start()
p.join()

Wrap the function in try/except, blanket catch the exceptions. Use the python logging module to log the stacktrace, and perhaps locals() as well for context.
You could also skip the logging module and just use 'print' to print the exception handling messages to the console.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Thread count increases with running logger.info in greenlet - python

If you change pathcing to this: from gevent import monkey monkey.patch_all(thread=False) threading.active_count() will print 1 all times. The reason is in patching threading module, that provides you with information about "greenlets", not about real threads.

Related

How to set logging configuration when using python multiprocessing

python logger prints everything twice

Python-threading: separate logging of individual threads

Python logging across multiple modules

Getting multiprocessing Errors

Categories

Resources