In this question, I defined a context manager that contains a context manager. What is the easiest correct way to accomplish this nesting? I ended up calling self.temporary_file.__enter__() in self.__enter__(). However, in self.__exit__, I am pretty sure I have to call self.temporary_file.__exit__(type_, value, traceback) in a finally block in case an exception is raised. Should I be setting the type_, value, and traceback parameters if something goes wrong in self.__exit__? I checked contextlib, but couldn't find any utilities to help with this.
Original code from question:
import itertools as it
import tempfile
class WriteOnChangeFile:
def __init__(self, filename):
self.filename = filename
def __enter__(self):
self.temporary_file = tempfile.TemporaryFile('r+')
self.f = self.temporary_file.__enter__()
return self.f
def __exit__(self, type_, value, traceback):
try:
try:
with open(self.filename, 'r') as real_f:
self.f.seek(0)
overwrite = any(
l != real_l
for l, real_l in it.zip_longest(self.f, real_f))
except IOError:
overwrite = True
if overwrite:
with open(self.filename, 'w') as real_f:
self.f.seek(0)
for l in self.f:
real_f.write(l)
finally:
self.temporary_file.__exit__(type_, value, traceback)
The easy way to create context managers is with contextlib.contextmanager. Something like this:
#contextlib.contextmanager
def write_on_change_file(filename):
with tempfile.TemporaryFile('r+') as temporary_file:
yield temporary_file
try:
... some saving logic that you had in __exit__ ...
Then use with write_on_change_file(...) as f:.
The body of the with statement will be executed “instead of” the yield. Wrap the yield itself in a try block if you want to catch any exceptions that happen in the body.
The temporary file will always be properly closed (when its with block ends).
contextlib.contextmanager works great for functions, but when I need a classes as context manager, I'm using the following util:
class ContextManager(metaclass=abc.ABCMeta):
"""Class which can be used as `contextmanager`."""
def __init__(self):
self.__cm = None
#abc.abstractmethod
#contextlib.contextmanager
def contextmanager(self):
raise NotImplementedError('Abstract method')
def __enter__(self):
self.__cm = self.contextmanager()
return self.__cm.__enter__()
def __exit__(self, exc_type, exc_value, traceback):
return self.__cm.__exit__(exc_type, exc_value, traceback)
This allow to declare contextmanager classes with the generator syntax from #contextlib.contextmanager. It makes it much more natural to nest contextmanager, without having to manually call __enter__ and __exit__. Example:
class MyClass(ContextManager):
def __init__(self, filename):
self._filename = filename
#contextlib.contextmanager
def contextmanager(self):
with tempfile.TemporaryFile() as temp_file:
yield temp_file
... # Post-processing you previously had in __exit__
with MyClass('filename') as x:
print(x)
I wish this was in the standard library...
Related
I would like to have a class that gets passed either a string or an already opened file during initialization. If it gets a string, it opens the file.
from typing import IO
class Parser:
def __init__(self, fin: str|IO[str]) -> None:
if isinstance(fin, str):
self.fin = open(fin, 'r')
else:
if not fin.readable():
raise ValueError("Input file must be readable.")
else:
self.fin = fin
My question is, what is the correct way to close the file if it gets opened. I imagined it could be closed in the __del__ method, but after reading up on it, it seems to be the consensus that using __del__ is not a great idea. Is there a better way to do this?
Provide a separate class method to deal with opening and closing the file using a context manager.
from contextlib import contextmanager
class Parser:
def __init__(self, fh: IO[str]) -> None:
if not fh.readable():
raise ValueError(...)
self.fh = fh
# Typing-hinting is out of scope for this question, so I'm
# just using the Python-3.11-style Self hint for simplicity.
#classmethod
#contextmanager
def from_filename(cls: Self, name: str) -> Self:
with open(name) as fh:
yield Parser(fh)
Now you can use either
with open("some_file.txt") as fh:
p = Parser(fh)
...
or
with Parser.from_filename("some_file.txt") as p:
...
Indeed __del__ should NOT be used for this purpose, since it is highly unpredictable when it is called.
Also, best is to avoid side-effects (such as opening files) in __init__, so I really like the solution by chepner. When possible, that's the method I would choose.
Just to add an alternative: you could also implement a close method and then use contextlib.closing like this:
import contextlib
class Parser:
# your __init__ here
def close(self):
self.fin.close()
parser = Parser('test.txt')
with contextlib.closing(parser):
# do something with the object here
# the file will be automatically closed afterwards
Even better, you could make the class a context manager itself like below, which is a bit similar to the answer of chepner. (You could even combine a number of these methods, depending on your needs).
class Parser:
# your __init__ here
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self.fin.close()
with Parser('test.txt') as parser:
# do some stuff here...
In a main method, my_object needs access to several members of passed_object, including a file that is opened (passed_file = passed_object.create_file(). An example of this:
import os
def main():
print('Start of program...')
passed_object = PassedObject()
my_object = MyObject(passed_object)
my_object.use_passed_object()
print('End of program.')
class MyObject(object):
def __init__(self, passed_object):
self.passed_object = passed_object
def use_passed_object(self):
f = self.passed_object.create_file()
print('attribute:')
print(self.passed_object.attr1)
print('contents of first file:')
print(list(f))
class PassedObject(object):
def __init__(self):
self.attr1 = 'some attribute string'
def create_file(self):
path = '/tmp'
files = [file for file in os.listdir(path)
if os.path.isfile(os.path.join(path, file))]
f = open(files[0], 'r')
return f
main()
The problem: passed_object creates a file object(s) that is needed by my_object, and by others not shown in this simple example. How can I close these file objects when my_object is done with it without breaking encapsulation?
Potential solutions I see:
don't pass passed_object: pass passed_object.create_file() and passed_object.attr1, then use a context manager in main with open.... However, I now have to pass in each attribute/created object to my_class.
write method my_object.close_file(), and call it from main. This seems to break encapsulation also, as main shouldn't need to know about this.
write a my_object.__del__() method that closes the file.
don't worry about closing it; your program terminates in a few lines.
Assuming the simplest situation (because details are missing):
PassedObject.create_file just opens a file, returns it and does not keep a reference to it
Usage of the file is limited to the scope of MyObject.use_passed_object
The solution is simple: close the file when use_passed_object finishes:
class MyObject(object):
def __init__(self, passed_object):
self.passed_object = passed_object
def use_passed_object(self):
f = self.passed_object.create_file()
try:
print('attribute:')
print(self.passed_object.attr1)
print('contents of first file:')
print(list(f))
finally:
f.close()
Alternatively, since passed_object.create_file() is just returning a file object, which supports context manager interface, you can also do this:
def use_passed_object(self):
with self.passed_object.create_file() as f:
print('attribute:')
print(self.passed_object.attr1)
print('contents of first file:')
print(list(f))
In a more complex scenario (e.g. something other than builtin file is returned), you could create you own contextmanager which encapsulates access to passed_object.create_file()...
On the other hand, if the file is used by multiple methods of MyObject during its lifetime, you need a contextmanager around the usage of a MyObject instance.
To do that, you'll have to:
remember in MyObject which file(s) it opened (you'll have to do that anyway to use it in multiple methods)
implement MyObject.close which closes all of those files
class MyObject(object):
def close(self):
for file_object in self.opened_files:
file_object.close()
Then implement a context manager and use it for this.
Option 1: use contextlib.closing
import contextlib
def main():
print('Start of program...')
passed_object = PassedObject()
with contextlib.closing(MyObject(passed_object)) as my_object:
my_object.use_passed_object()
print('End of program.')
Option 2: implement context manager interface on MyObject itself
class MyObject(object):
def __enter__(self):
return self
def __exit__(self, type, value, traceback):
self.close()
def main():
print('Start of program...')
passed_object = PassedObject()
with MyObject(passed_object) as my_object:
my_object.use_passed_object()
print('End of program.')
Say I'd like to have a class which holds multiple unmanaged resources, such as files. I'd also like the public interface of that class to allow the user to use it in an exception-safe manner, i.e. not to leak unmanaged resources/leave them at the mercy of the garbage-collector, which isn't ever guaranteed to run at any point (deterministic resource reclamation).
Take, for instance, the following case:
class Gizmo(object):
def __init__(self, filepath1, filepath2):
self._file1 = open(filepath1, 'rb')
self._file2 = open(filepath2, 'rb')
def __enter__(self):
return self
def __exit__(self):
self.close()
return False
def __del__(self):
self.close()
def frob(self):
...manipulate the files...
def close(self):
self._file1.close()
self._file2.close()
This is not exception-safe, because if the line opening the second file in __init__ fails, the first file is leaked in the sense that it is left at the mercy of the garbage collector (no matter whether I use the class via a with-statement or not).
My question is this: what's the cleanest way to achieve what I'd like, preferably in a manner which I'd be able to scale up to more than two unmanaged resources, and which doesn't clutter the public interface of my class too horribly (if at all)? The idea of having an initialization method separate to the __init__ method came to mind, but it sounds a bit odd.
If you're on Python 3, this looks like a job for contextlib.ExitStack. If you're on Python 2, there appears to be a backport of this feature available.
from contextlib import ExitStack
class Gizmo(object):
def __init__(self, filepath1, filepath2):
with ExitStack() as stack:
# If opening the second file fails,
# unwinding the stack will close the first file.
self._file1 = stack.enter_context(open(filepath1, 'rb'))
self._file2 = stack.enter_context(open(filepath2, 'rb'))
# It worked! But we don't want to close the files when the with statement ends.
# Take the files off the stack without closing them
# (and put them on a new stack).
self._exitstack = stack.pop_all()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, exc_tb):
return self._exitstack.__exit__(exc_type, exc_value, exc_tb)
def __del__(self):
self.close()
def frob(self):
...manipulate the files...
def close(self):
# __del__ will try to close a Gizmo even if we couldn't set up its _exitstack,
# so don't try to close a nonexistent _exitstack.
if hasattr(self, '_exitstack'):
# The stack calls __exit__ on all the files, exactly as if we were
# exiting a "with open(...) as file1, open(...) as file2" block.
# If closing one file fails, we'll still try to close the other.
self._exitstack.close()
Suppose I want to extend the built-in file abstraction with extra operations at open and close time. In Python 2.7 this works:
class ExtFile(file):
def __init__(self, *args):
file.__init__(self, *args)
# extra stuff here
def close(self):
file.close(self)
# extra stuff here
Now I'm looking at updating the program to Python 3, in which open is a factory function that might return an instance of any of several different classes from the io module depending on how it's called. I could in principle subclass all of them, but that's tedious, and I'd have to reimplement the dispatching that open does. (In Python 3 the distinction between binary and text files matters rather more than it does in 2.x, and I need both.) These objects are going to be passed to library code that might do just about anything with them, so the idiom of making a "file-like" duck-typed class that wraps the return value of open and forwards necessary methods will be most verbose.
Can anyone suggest a 3.x approach that involves as little additional boilerplate as possible beyond the 2.x code shown?
You could just use a context manager instead. For example this one:
class SpecialFileOpener:
def __init__ (self, fileName, someOtherParameter):
self.f = open(fileName)
# do more stuff
print(someOtherParameter)
def __enter__ (self):
return self.f
def __exit__ (self, exc_type, exc_value, traceback):
self.f.close()
# do more stuff
print('Everything is over.')
Then you can use it like this:
>>> with SpecialFileOpener('C:\\test.txt', 'Hello world!') as f:
print(f.read())
Hello world!
foo bar
Everything is over.
Using a context block with with is preferred for file objects (and other resources) anyway.
tl;dr Use a context manager. See the bottom of this answer for important cautions about them.
Files got more complicated in Python 3. While there are some methods that can be used on normal user classes, those methods don't work with built-in classes. One way is to mix-in a desired class before instanciating it, but this requires knowing what the mix-in class should be first:
class MyFileType(???):
def __init__(...)
# stuff here
def close(self):
# more stuff here
Because there are so many types, and more could possibly be added in the future (unlikely, but possible), and we don't know for sure which will be returned until after the call to open, this method doesn't work.
Another method is to change both our custom type to have the returned file's ___bases__, and modifying the returned instance's __class__ attribute to our custom type:
class MyFileType:
def close(self):
# stuff here
some_file = open(path_to_file, '...') # ... = desired options
MyFileType.__bases__ = (some_file.__class__,) + MyFile.__bases__
but this yields
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: __bases__ assignment: '_io.TextIOWrapper' deallocator differs from 'object'
Yet another method that could work with pure user classes is to create the custom file type on the fly, directly from the returned instance's class, and then update the returned instance's class:
some_file = open(path_to_file, '...') # ... = desired options
class MyFile(some_file.__class__):
def close(self):
super().close()
print("that's all, folks!")
some_file.__class__ = MyFile
but again:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: __class__ assignment: only for heap types
So, it looks like the best method that will work at all in Python 3, and luckily will also work in Python 2 (useful if you want the same code base to work on both versions) is to have a custom context manager:
class Open(object):
def __init__(self, *args, **kwds):
# do custom stuff here
self.args = args
self.kwds = kwds
def __enter__(self):
# or do custom stuff here :)
self.file_obj = open(*self.args, **self.kwds)
# return actual file object so we don't have to worry
# about proxying
return self.file_obj
def __exit__(self, *args):
# and still more custom stuff here
self.file_obj.close()
# or here
and to use it:
with Open('some_file') as data:
# custom stuff just happened
for line in data:
print(line)
# data is now closed, and more custom stuff
# just happened
An important point to keep in mind: any unhandled exception in __init__ or __enter__ will prevent __exit__ from running, so in those two locations you still need to use the try/except and/or try/finally idioms to make sure you don't leak resources.
I had a similar problem, and a requirement of supporting both Python 2.x and 3.x. What I did was similar to the following (current full version):
class _file_obj(object):
"""Check if `f` is a file name and open the file in `mode`.
A context manager."""
def __init__(self, f, mode):
if isinstance(f, str):
self.file = open(f, mode)
else:
self.file = f
self.close_file = (self.file is not f)
def __enter__(self):
return self
def __exit__(self, *args, **kwargs):
if (not self.close_file):
return # do nothing
# clean up
exit = getattr(self.file, '__exit__', None)
if exit is not None:
return exit(*args, **kwargs)
else:
exit = getattr(self.file, 'close', None)
if exit is not None:
exit()
def __getattr__(self, attr):
return getattr(self.file, attr)
def __iter__(self):
return iter(self.file)
It passes all calls to the underlying file objects and can be initialized from an open file or from a filename. Also works as a context manager. Inspired by this answer.
The code pasted below does the following:
creates an import hook
creates a context manager which sets the meta_path and cleans on exit.
dumps all the imports done by a program passed in input in imports.log
Now I was wondering if using a context manager is a good idea in this case, because actually I don't have the standard try/finally flow, but just a set up and clean up.
Another thing — with this line:
with CollectorContext(cl, sys.argv, 'imports.log') as cc:
does cc become None? Shouldn't it be a CollectorContext object?
from __future__ import with_statement
import os
import sys
class CollectImports(object):
"""
Import hook, adds each import request to the loaded set and dumps
them to file
"""
def __init__(self):
self.loaded = set()
def __str__(self):
return str(self.loaded)
def dump_to_file(self, fname):
"""Dump the loaded set to file
"""
dumped_str = '\n'.join(x for x in self.loaded)
open(fname, 'w').write(dumped_str)
def find_module(self, module_name, package=None):
self.loaded.add(module_name)
class CollectorContext(object):
"""Sets the meta_path hook with the passed import hook when
entering and clean up when exiting
"""
def __init__(self, collector, argv, output_file):
self.collector = collector
self.argv = argv
self.output_file = output_file
def __enter__(self):
self.argv = self.argv[1:]
sys.meta_path.append(self.collector)
def __exit__(self, type, value, traceback):
# TODO: should assert that the variables are None, otherwise
# we are quitting with some exceptions
self.collector.dump_to_file(self.output_file)
sys.meta_path.remove(self.collector)
def main_context():
cl = CollectImports()
with CollectorContext(cl, sys.argv, 'imports.log') as cc:
progname = sys.argv[0]
code = compile(open(progname).read(), progname, 'exec')
exec(code)
if __name__ == '__main__':
sys.argv = sys.argv[1:]
main_context()
I think this concept is ok. As well, I don't see any reasons against having the clean-up stuff in a finally: clause, so the context manager fits perfectly.
Your cc is None, because you told it to be so.
If you don't want that, change your __enter__ method to return something else:
The value returned by this method is bound to the identifier in the as clause of with statements using this context manager.
def __enter__(self):
self.argv = self.argv[1:]
sys.meta_path.append(self.collector)
return self
# or
return self.collector
# or
return "I don't know what to return here"
and then
with CollectorContext(cl, sys.argv, 'imports.log') as cc:
print cc, repr(cc) # there you see what happens.
progname = sys.argv[0]
code = compile(open(progname).read(), progname, 'exec')
exec(code)
If you always want the cleanup to occur, you should use a context manager. I'm not sure where you use try..finally if you implement the context manager using the low-level special methods. If you use the #contextmanager decorator, you code the context manager in a "natural" way, so that's where you use try..finally instead of getting the exception as a parameter.
Also, cc will be the value you return from __enter__(). In your case, None. The way I understand the context manager design is that the return value is the "context". What the context manager does is set up and clean up contexts in which something else happens. E.g. a database connection will create transactions, and database operations happen in the scope of those transactions.
That said, the above is just there to provide maximum flexibility. There's nothing wrong with just creating a context (that manages itself) directly and returning self, or even not returning anything if you don't need to use the context value inside the with. Since you don't use cc anywhere, you could just do and not worry about the return value:
with CollectorContext(cl, sys.argv, 'imports.log'):
progname = sys.argv[0]
code = compile(open(progname).read(), progname, 'exec')
exec(code)
Thanks everyone now it works smoothly, I actually wanted with to return something because I wanted to encapsulate the "run" inside the context manager, so I get something as below.
Moreover, now I store the old sys.argv and restore it on exit, probably not fundamental but still a nice thing to do I think..
class CollectorContext(object):
"""Sets the meta_path hook with the passed import hook when
entering and clean up when exiting
"""
def __init__(self, collector, argv, output_file):
self.collector = collector
self.old_argv = argv[:]
self.output_file = output_file
self.progname = self.old_argv[1]
def __enter__(self):
sys.argv = self.old_argv[1:]
sys.meta_path.append(self.collector)
return self
def __exit__(self, type, value, traceback):
# TODO: should assert that the variables are None, otherwise
# we are quitting with some exceptions
self.collector.dump_to_file(self.output_file)
sys.meta_path.remove(self.collector)
sys.argv = self.old_argv[:]
def run(self):
code = compile(open(self.progname).read(), self.progname, 'exec')
exec(code)
def main_context():
cl = CollectImports()
with CollectorContext(cl, sys.argv, 'imports.log') as cc:
cc.run()