The code pasted below does the following:
creates an import hook
creates a context manager which sets the meta_path and cleans on exit.
dumps all the imports done by a program passed in input in imports.log
Now I was wondering if using a context manager is a good idea in this case, because actually I don't have the standard try/finally flow, but just a set up and clean up.
Another thing — with this line:
with CollectorContext(cl, sys.argv, 'imports.log') as cc:
does cc become None? Shouldn't it be a CollectorContext object?
from __future__ import with_statement
import os
import sys
class CollectImports(object):
"""
Import hook, adds each import request to the loaded set and dumps
them to file
"""
def __init__(self):
self.loaded = set()
def __str__(self):
return str(self.loaded)
def dump_to_file(self, fname):
"""Dump the loaded set to file
"""
dumped_str = '\n'.join(x for x in self.loaded)
open(fname, 'w').write(dumped_str)
def find_module(self, module_name, package=None):
self.loaded.add(module_name)
class CollectorContext(object):
"""Sets the meta_path hook with the passed import hook when
entering and clean up when exiting
"""
def __init__(self, collector, argv, output_file):
self.collector = collector
self.argv = argv
self.output_file = output_file
def __enter__(self):
self.argv = self.argv[1:]
sys.meta_path.append(self.collector)
def __exit__(self, type, value, traceback):
# TODO: should assert that the variables are None, otherwise
# we are quitting with some exceptions
self.collector.dump_to_file(self.output_file)
sys.meta_path.remove(self.collector)
def main_context():
cl = CollectImports()
with CollectorContext(cl, sys.argv, 'imports.log') as cc:
progname = sys.argv[0]
code = compile(open(progname).read(), progname, 'exec')
exec(code)
if __name__ == '__main__':
sys.argv = sys.argv[1:]
main_context()
I think this concept is ok. As well, I don't see any reasons against having the clean-up stuff in a finally: clause, so the context manager fits perfectly.
Your cc is None, because you told it to be so.
If you don't want that, change your __enter__ method to return something else:
The value returned by this method is bound to the identifier in the as clause of with statements using this context manager.
def __enter__(self):
self.argv = self.argv[1:]
sys.meta_path.append(self.collector)
return self
# or
return self.collector
# or
return "I don't know what to return here"
and then
with CollectorContext(cl, sys.argv, 'imports.log') as cc:
print cc, repr(cc) # there you see what happens.
progname = sys.argv[0]
code = compile(open(progname).read(), progname, 'exec')
exec(code)
If you always want the cleanup to occur, you should use a context manager. I'm not sure where you use try..finally if you implement the context manager using the low-level special methods. If you use the #contextmanager decorator, you code the context manager in a "natural" way, so that's where you use try..finally instead of getting the exception as a parameter.
Also, cc will be the value you return from __enter__(). In your case, None. The way I understand the context manager design is that the return value is the "context". What the context manager does is set up and clean up contexts in which something else happens. E.g. a database connection will create transactions, and database operations happen in the scope of those transactions.
That said, the above is just there to provide maximum flexibility. There's nothing wrong with just creating a context (that manages itself) directly and returning self, or even not returning anything if you don't need to use the context value inside the with. Since you don't use cc anywhere, you could just do and not worry about the return value:
with CollectorContext(cl, sys.argv, 'imports.log'):
progname = sys.argv[0]
code = compile(open(progname).read(), progname, 'exec')
exec(code)
Thanks everyone now it works smoothly, I actually wanted with to return something because I wanted to encapsulate the "run" inside the context manager, so I get something as below.
Moreover, now I store the old sys.argv and restore it on exit, probably not fundamental but still a nice thing to do I think..
class CollectorContext(object):
"""Sets the meta_path hook with the passed import hook when
entering and clean up when exiting
"""
def __init__(self, collector, argv, output_file):
self.collector = collector
self.old_argv = argv[:]
self.output_file = output_file
self.progname = self.old_argv[1]
def __enter__(self):
sys.argv = self.old_argv[1:]
sys.meta_path.append(self.collector)
return self
def __exit__(self, type, value, traceback):
# TODO: should assert that the variables are None, otherwise
# we are quitting with some exceptions
self.collector.dump_to_file(self.output_file)
sys.meta_path.remove(self.collector)
sys.argv = self.old_argv[:]
def run(self):
code = compile(open(self.progname).read(), self.progname, 'exec')
exec(code)
def main_context():
cl = CollectImports()
with CollectorContext(cl, sys.argv, 'imports.log') as cc:
cc.run()
Related
I would like to understand why using the following snippet leads me to an error:
a) I want to use the following class to create a context manager, as outlined in the link attached below: for me it is very important to keep the "class PrintStop(ExitStack)" form, so please bear in mind when trying to solve this issue, that I already know there are other ways to use ExitStack(), but I am interested in this specific way of using it:
class PrintStop(ExitStack):
def __init__(self, verbose: bool = False):
super().__init__()
self.verbose = verbose
def __enter__(self):
super().__enter__()
if not self.verbose:
sys.stdout = self.enter_context(open(os.devnull, 'w'))
b) when trying to use the class in the more appropriate way, I get the desired effect to stop all the printing within the "with" block, but when trying to print again after that block I get an error:
with PrintStop(verbose=False):
print('this shouldn't be printed') <------ok till here
print('this should be printed again as it is outside the with block) <-----ERROR
c) the error I get is "ValueError: I/O operation on closed file": the reason I guess is the fact that exit method of ExitStack() is not automatically called once we exit the 'with' block, so, how may I change the class to fix this bug?
Here is a quick reference to a similar topic,
Pythonic way to compose context managers for objects owned by a class
ExitStack.__exit__ simply ensures that each context you enter has its __exit__ method called; it does not ensure that any changes you made (like assigning to sys.stdout) inside the corresponding __enter__ is undone.
Also, the purpose of an exit stack is to make it easy to enter contexts that require information not known when the with statement is introduced, or to create a variable number of contexts without having to enumerate them statically.
If you really want to use an exit stack, you'll need something like
class PrintStop(ExitStack):
def __init__(self, verbose: bool = False):
super().__init__()
self.verbose = verbose
def __enter__(self):
rv = super().__enter__()
if not self.verbose:
sys.stdout = self.enter_context(open(os.devnull, 'w'))
return rv
def __exit__(self):
sys.stdout = sys.__stdout__ # Restore the original
return super().__exit__()
Keep in mind that contextlib already provides a context manager for temporarily replacing standard output with a different file, appropriately named redirect_stdout.
with redirect_stdout(open(os.devnull, 'w')):
...
Using this as the basis for PrintStop makes use of composition, rather than inheritance.
from contextlib import redirect_stdout, nullcontext
class PrintStop:
def __init__(self, verbose: bool = False):
super().__init__()
if verbose:
self.cm = redirect_stdout(open(os.devnull, 'w'))
else:
self.cm = nullcontext()
def __enter__(self):
return self.cm.__enter__()
def __exit__(self):
return self.cm.__exit__()
In a main method, my_object needs access to several members of passed_object, including a file that is opened (passed_file = passed_object.create_file(). An example of this:
import os
def main():
print('Start of program...')
passed_object = PassedObject()
my_object = MyObject(passed_object)
my_object.use_passed_object()
print('End of program.')
class MyObject(object):
def __init__(self, passed_object):
self.passed_object = passed_object
def use_passed_object(self):
f = self.passed_object.create_file()
print('attribute:')
print(self.passed_object.attr1)
print('contents of first file:')
print(list(f))
class PassedObject(object):
def __init__(self):
self.attr1 = 'some attribute string'
def create_file(self):
path = '/tmp'
files = [file for file in os.listdir(path)
if os.path.isfile(os.path.join(path, file))]
f = open(files[0], 'r')
return f
main()
The problem: passed_object creates a file object(s) that is needed by my_object, and by others not shown in this simple example. How can I close these file objects when my_object is done with it without breaking encapsulation?
Potential solutions I see:
don't pass passed_object: pass passed_object.create_file() and passed_object.attr1, then use a context manager in main with open.... However, I now have to pass in each attribute/created object to my_class.
write method my_object.close_file(), and call it from main. This seems to break encapsulation also, as main shouldn't need to know about this.
write a my_object.__del__() method that closes the file.
don't worry about closing it; your program terminates in a few lines.
Assuming the simplest situation (because details are missing):
PassedObject.create_file just opens a file, returns it and does not keep a reference to it
Usage of the file is limited to the scope of MyObject.use_passed_object
The solution is simple: close the file when use_passed_object finishes:
class MyObject(object):
def __init__(self, passed_object):
self.passed_object = passed_object
def use_passed_object(self):
f = self.passed_object.create_file()
try:
print('attribute:')
print(self.passed_object.attr1)
print('contents of first file:')
print(list(f))
finally:
f.close()
Alternatively, since passed_object.create_file() is just returning a file object, which supports context manager interface, you can also do this:
def use_passed_object(self):
with self.passed_object.create_file() as f:
print('attribute:')
print(self.passed_object.attr1)
print('contents of first file:')
print(list(f))
In a more complex scenario (e.g. something other than builtin file is returned), you could create you own contextmanager which encapsulates access to passed_object.create_file()...
On the other hand, if the file is used by multiple methods of MyObject during its lifetime, you need a contextmanager around the usage of a MyObject instance.
To do that, you'll have to:
remember in MyObject which file(s) it opened (you'll have to do that anyway to use it in multiple methods)
implement MyObject.close which closes all of those files
class MyObject(object):
def close(self):
for file_object in self.opened_files:
file_object.close()
Then implement a context manager and use it for this.
Option 1: use contextlib.closing
import contextlib
def main():
print('Start of program...')
passed_object = PassedObject()
with contextlib.closing(MyObject(passed_object)) as my_object:
my_object.use_passed_object()
print('End of program.')
Option 2: implement context manager interface on MyObject itself
class MyObject(object):
def __enter__(self):
return self
def __exit__(self, type, value, traceback):
self.close()
def main():
print('Start of program...')
passed_object = PassedObject()
with MyObject(passed_object) as my_object:
my_object.use_passed_object()
print('End of program.')
Are there any built-in ways to have different threads have different destinations for print() and similar?
I'm exploring the creation of an interactive Python environment, so I can't just use print() from module spamegg. It has to be the globally available one with no arguments.
You can replace sys.stdout with an object that checks the current thread and writes to the appropriate file:
import sys, threading
class CustomOutput(object):
def __init__(self):
# the "softspace" slot is used internally by Python's print
# to keep track of whether to prepend space to the
# printed expression
self.softspace = 0
self._old_stdout = None
def activate(self):
self._old_stdout = sys.stdout
sys.stdout = self
def deactivate(self):
sys.stdout = self._old_stdout
self._old_stdout = None
def write(self, s):
# actually write to an open file obtained from an attribute
# on the current thread
threading.current_thread().open_file.write(s)
def writelines(self, seq):
for s in seq:
self.write(s)
def close(self):
pass
def flush(self):
pass
def isatty(self):
return False
It is possible to do what you're asking, although it's complicated and clunky and possibly not portable, and I don't think it's what you want to do.
Your objection to just using spamegg.print is:
I'm exploring the creation of an interactive Python environment, so I can't just use print() from module spamegg. It has to be the globally available one with no arguments.
But the solution to that is easy: Just use print from module spamegg in your code, and from spamegg import print in the interactive interpreter. That's all there is to it.
For that matter, there's no good reason this even needs to be called print in the first place. If all of your code used some other output function with a different name, you could do the same thing in the interactive interpreter.
But how does that let each thread have a different destination?
The easy way to do that is to just look up the destination in a threading.local().
But if you really want to do both parts of this the hard way, you can.
To do the global print the hard way, you can either have spamegg replace the builtin print instead of just giving you a way to shadow it, or have it replace sys.stdout, so the builtin print with default arguments will print somewhere else.
import builtins
_real_print = builtins.print
def _print(*args, **kwargs):
kwargs.setdefault('file', my_output_destination)
_real_print(*args, **kwargs)
builtins.print = _print
import io
import sys
class MyStdOut(io.TextIOBase):
# ... __init__, write, etc.
sys.stdout = MyStdOut()
That still requires having MyStdOut use a thread-local target.
Alternatively, you can compile or wrap each thread function in its own custom globals environment that replaces __builtins__ and/or sys from the default, allowing you to give a different one to each thread from the start. For example:
from functools import partial
from threading import Thread
from types import FunctionType
class MyThread(Thread):
def __init__(self, group=None, target=None, *args, **kwargs):
if target:
g = target.__globals__.copy()
g['__builtins__'] = g['__builtins__'].copy()
output = make_output_for_new_thread()
g['__builtins__']['print'] = partial(print, file=output)
target = FunctionType(thread_func.__code__, g, thread_func.__name__,
thread_func.__defaults__, thread_func.__closure__)
super().__init__(self, group, target, *args, **kwargs)
I might have solution for you, but it's quite more complicated than just print.
class ClusteredLogging(object):
'''
Class gathers all logs performed inside with statement and flush
it to mainHandler on exit at once.
Good for multithreaded applications that has to log several
lines at once.
'''
def __init__(self, mainHandler, formatter):
self.mainHandler = mainHandler
self.formatter = formatter
self.buffer = StringIO()
self.handler = logging.StreamHandler(self.buffer)
self.handler.setFormatter(formatter)
def __enter__(self):
rootLogger = logging.getLogger()
rootLogger.addHandler(self.handler)
def __exit__(self, t, value, tb):
rootLogger = logging.getLogger()
rootLogger.removeHandler(self.handler)
self.handler.flush()
self.buffer.flush()
rootLogger.addHandler(self.mainHandler)
logging.info(self.buffer.getvalue().strip())
rootLogger.removeHandler(self.mainHandler)
Using this, you can create log handler for each thread and configure them to store logs to different locations.
Keep in mind that this is developed with slightly different goal in mind (see comments) but you can adapt it by taking handler juggling feature of ClusteredLogging as a start.
And some test code:
import concurrent.futures
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
import logging
import sys
# put ClusteredLogging here
if __name__ == "__main__":
formatter = logging.Formatter('%(asctime)s %(levelname)8s\t%(message)s')
onlyMessageFormatter = logging.Formatter("%(message)s")
mainHandler = logging.StreamHandler(sys.stdout)
mainHandler.setFormatter(onlyMessageFormatter)
mainHandler.setLevel(logging.DEBUG)
rootLogger = logging.getLogger()
rootLogger.setLevel(logging.DEBUG)
def logSomethingLong(label):
with ClusteredLogging(mainHandler, formatter):
for i in range(15):
logging.info(label + " " + str(i))
labels = ("TEST", "EXPERIMENT", "TRIAL")
executor = concurrent.futures.ProcessPoolExecutor()
futures = [executor.submit(logSomethingLong, label) for label in labels]
concurrent.futures.wait(futures)
I have this somewhat complicated command line function in Python (lets call it myFunction()), and I am working to integrate it in a graphical interface (using PySide/Qt).
The GUI is used to help select inputs, and display outputs. However, myFunction is designed to work as a stand-alone command line function, and it occasionnaly prints out the progress.
My question is: how can I intercept these print calls and display them in the GUI?
I know it would be possible to modify myFunction() to send processEvents() to the GUI, but I would then lose the ability to execute myFunction() in a terminal.
Ideally, I would like something similar to Ubuntu's graphical software updater, which has a small embeded terminal-looking widget displaying what apt-get would display were it executed in a terminal.
you could redirect stdout and restore after. for example:
import StringIO
import sys
# somewhere to store output
out = StringIO.StringIO()
# set stdout to our StringIO instance
sys.stdout = out
# print something (nothing will print)
print 'herp derp'
# restore stdout so we can really print (__stdout__ stores the original stdout)
sys.stdout = sys.__stdout__
# print the stored value from previous print
print out.getvalue()
Wrap it with a function that hijacks stdout:
def stdin2file(func, file):
def innerfunc(*args, **kwargs):
old = sys.stdout
sys.stdout = file
try:
return func(*args, **kwargs)
finally:
sys.stdout = old
return innerfunc
Then simply provide a file like object that supports write():
class GUIWriter:
def write(self, stuff):
#send stuff to GUI
MyFunction = stdin2file(MyFunction, GUIWriter())
The wrapper can be turned into a decorator too:
def redirect_stdin(file):
def stdin2file(func, file):
def innerfunc(*args, **kwargs):
old = sys.stdout
sys.stdout = file
try:
return func(*args, **kwargs)
finally:
sys.stdout = old
return innerfunc
return stdin2file
The use it when declaring MyFunction():
#redirect_stdin(GUIWriter())
def MyFunction(a, b, c, d):
# any calls to print will call the 'write' method of the GUIWriter
# do stuff
Here is a Python 3 pattern using contextmanager that both encapsulates the monkey-patch technique and also ensures that sys.stdout is restored in case of an exception.
from io import StringIO
import sys
from contextlib import contextmanager
#contextmanager
def capture_stdout():
"""
context manager encapsulating a pattern for capturing stdout writes
and restoring sys.stdout even upon exceptions
Examples:
>>> with capture_stdout() as get_value:
>>> print("here is a print")
>>> captured = get_value()
>>> print('Gotcha: ' + captured)
>>> with capture_stdout() as get_value:
>>> print("here is a print")
>>> raise Exception('oh no!')
>>> print('Does printing still work?')
"""
# Redirect sys.stdout
out = StringIO()
sys.stdout = out
# Yield a method clients can use to obtain the value
try:
yield out.getvalue
finally:
# Restore the normal stdout
sys.stdout = sys.__stdout__
All printing is done via sys.stdout, which is a ordinary file-like object: iirc, it requires a method write(str). As long as your replacement has that method, it's quite easy to drop in your hook:
import sys
class CaptureOutput:
def write(self, message):
log_message_to_textbox(message)
sys.stdout = CaptureOutput()
The actual contents of log_message_to_textbox are up to you.
I am finding that I am using plenty of context managers in Python. However, I have been testing a number of things using them, and I am often needing the following:
class MyTestCase(unittest.TestCase):
def testFirstThing(self):
with GetResource() as resource:
u = UnderTest(resource)
u.doStuff()
self.assertEqual(u.getSomething(), 'a value')
def testSecondThing(self):
with GetResource() as resource:
u = UnderTest(resource)
u.doOtherStuff()
self.assertEqual(u.getSomething(), 'a value')
When this gets to many tests, this is clearly going to get boring, so in the spirit of SPOT/DRY (single point of truth/dont repeat yourself), I'd want to refactor those bits into the test setUp() and tearDown() methods.
However, trying to do that has lead to this ugliness:
def setUp(self):
self._resource = GetSlot()
self._resource.__enter__()
def tearDown(self):
self._resource.__exit__(None, None, None)
There must be a better way to do this. Ideally, in the setUp()/tearDown() without repetitive bits for each test method (I can see how repeating a decorator on each method could do it).
Edit: Consider the undertest object to be internal, and the GetResource object to be a third party thing (which we aren't changing).
I've renamed GetSlot to GetResource here—this is more general than specific case—where context managers are the way which the object is intended to go into a locked state and out.
How about overriding unittest.TestCase.run() as illustrated below? This approach doesn't require calling any private methods or doing something to every method, which is what the questioner wanted.
from contextlib import contextmanager
import unittest
#contextmanager
def resource_manager():
yield 'foo'
class MyTest(unittest.TestCase):
def run(self, result=None):
with resource_manager() as resource:
self.resource = resource
super(MyTest, self).run(result)
def test(self):
self.assertEqual('foo', self.resource)
unittest.main()
This approach also allows passing the TestCase instance to the context manager, if you want to modify the TestCase instance there.
Manipulating context managers in situations where you don't want a with statement to clean things up if all your resource acquisitions succeed is one of the use cases that contextlib.ExitStack() is designed to handle.
For example (using addCleanup() rather than a custom tearDown() implementation):
def setUp(self):
with contextlib.ExitStack() as stack:
self._resource = stack.enter_context(GetResource())
self.addCleanup(stack.pop_all().close)
That's the most robust approach, since it correctly handles acquisition of multiple resources:
def setUp(self):
with contextlib.ExitStack() as stack:
self._resource1 = stack.enter_context(GetResource())
self._resource2 = stack.enter_context(GetOtherResource())
self.addCleanup(stack.pop_all().close)
Here, if GetOtherResource() fails, the first resource will be cleaned up immediately by the with statement, while if it succeeds, the pop_all() call will postpone the cleanup until the registered cleanup function runs.
If you know you're only ever going to have one resource to manage, you can skip the with statement:
def setUp(self):
stack = contextlib.ExitStack()
self._resource = stack.enter_context(GetResource())
self.addCleanup(stack.close)
However, that's a bit more error prone, since if you add more resources to the stack without first switching to the with statement based version, successfully allocated resources may not get cleaned up promptly if later resource acquisitions fail.
You can also write something comparable using a custom tearDown() implementation by saving a reference to the resource stack on the test case:
def setUp(self):
with contextlib.ExitStack() as stack:
self._resource1 = stack.enter_context(GetResource())
self._resource2 = stack.enter_context(GetOtherResource())
self._resource_stack = stack.pop_all()
def tearDown(self):
self._resource_stack.close()
Alternatively, you can also define a custom cleanup function that accesses the resource via a closure reference, avoiding the need to store any extra state on the test case purely for cleanup purposes:
def setUp(self):
with contextlib.ExitStack() as stack:
resource = stack.enter_context(GetResource())
def cleanup():
if necessary:
one_last_chance_to_use(resource)
stack.pop_all().close()
self.addCleanup(cleanup)
pytest fixtures are very close to your idea/style, and allow for exactly what you want:
import pytest
from code.to.test import foo
#pytest.fixture(...)
def resource():
with your_context_manager as r:
yield r
def test_foo(resource):
assert foo(resource).bar() == 42
The problem with calling __enter__ and __exit__ as you did, is not that you have done so: they can be called outside of a with statement. The problem is that your code has no provision to call the object's __exit__ method properly if an exception occurs.
So, the way to do it is to have a decorator that will wrap the call to your original method in a withstatement. A short metaclass can apply the decorator transparently to all methods named test* in the class -
# -*- coding: utf-8 -*-
from functools import wraps
import unittest
def setup_context(method):
# the 'wraps' decorator preserves the original function name
# otherwise unittest would not call it, as its name
# would not start with 'test'
#wraps(method)
def test_wrapper(self, *args, **kw):
with GetSlot() as slot:
self._slot = slot
result = method(self, *args, **kw)
delattr(self, "_slot")
return result
return test_wrapper
class MetaContext(type):
def __new__(mcs, name, bases, dct):
for key, value in dct.items():
if key.startswith("test"):
dct[key] = setup_context(value)
return type.__new__(mcs, name, bases, dct)
class GetSlot(object):
def __enter__(self):
return self
def __exit__(self, *args, **kw):
print "exiting object"
def doStuff(self):
print "doing stuff"
def doOtherStuff(self):
raise ValueError
def getSomething(self):
return "a value"
def UnderTest(*args):
return args[0]
class MyTestCase(unittest.TestCase):
__metaclass__ = MetaContext
def testFirstThing(self):
u = UnderTest(self._slot)
u.doStuff()
self.assertEqual(u.getSomething(), 'a value')
def testSecondThing(self):
u = UnderTest(self._slot)
u.doOtherStuff()
self.assertEqual(u.getSomething(), 'a value')
unittest.main()
(I also included mock implementations of "GetSlot" and the methods and functions in your example so that I myself could test the decorator and metaclass I am suggesting on this answer)
I'd argue you should separate your test of the context manager from your test of the Slot class. You could even use a mock object simulating the initialize/finalize interface of slot to test the context manager object, and then test your slot object separately.
from unittest import TestCase, main
class MockSlot(object):
initialized = False
ok_called = False
error_called = False
def initialize(self):
self.initialized = True
def finalize_ok(self):
self.ok_called = True
def finalize_error(self):
self.error_called = True
class GetSlot(object):
def __init__(self, slot_factory=MockSlot):
self.slot_factory = slot_factory
def __enter__(self):
s = self.s = self.slot_factory()
s.initialize()
return s
def __exit__(self, type, value, traceback):
if type is None:
self.s.finalize_ok()
else:
self.s.finalize_error()
class TestContextManager(TestCase):
def test_getslot_calls_initialize(self):
g = GetSlot()
with g as slot:
pass
self.assertTrue(g.s.initialized)
def test_getslot_calls_finalize_ok_if_operation_successful(self):
g = GetSlot()
with g as slot:
pass
self.assertTrue(g.s.ok_called)
def test_getslot_calls_finalize_error_if_operation_unsuccessful(self):
g = GetSlot()
try:
with g as slot:
raise ValueError
except:
pass
self.assertTrue(g.s.error_called)
if __name__ == "__main__":
main()
This makes code simpler, prevents concern mixing and allows you to reuse the context manager without having to code it in many places.