subclassing file objects (to extend open and close operations) in python 3 - python

Suppose I want to extend the built-in file abstraction with extra operations at open and close time. In Python 2.7 this works:
class ExtFile(file):
def __init__(self, *args):
file.__init__(self, *args)
# extra stuff here
def close(self):
file.close(self)
# extra stuff here
Now I'm looking at updating the program to Python 3, in which open is a factory function that might return an instance of any of several different classes from the io module depending on how it's called. I could in principle subclass all of them, but that's tedious, and I'd have to reimplement the dispatching that open does. (In Python 3 the distinction between binary and text files matters rather more than it does in 2.x, and I need both.) These objects are going to be passed to library code that might do just about anything with them, so the idiom of making a "file-like" duck-typed class that wraps the return value of open and forwards necessary methods will be most verbose.
Can anyone suggest a 3.x approach that involves as little additional boilerplate as possible beyond the 2.x code shown?

You could just use a context manager instead. For example this one:
class SpecialFileOpener:
def __init__ (self, fileName, someOtherParameter):
self.f = open(fileName)
# do more stuff
print(someOtherParameter)
def __enter__ (self):
return self.f
def __exit__ (self, exc_type, exc_value, traceback):
self.f.close()
# do more stuff
print('Everything is over.')
Then you can use it like this:
>>> with SpecialFileOpener('C:\\test.txt', 'Hello world!') as f:
print(f.read())
Hello world!
foo bar
Everything is over.
Using a context block with with is preferred for file objects (and other resources) anyway.

tl;dr Use a context manager. See the bottom of this answer for important cautions about them.
Files got more complicated in Python 3. While there are some methods that can be used on normal user classes, those methods don't work with built-in classes. One way is to mix-in a desired class before instanciating it, but this requires knowing what the mix-in class should be first:
class MyFileType(???):
def __init__(...)
# stuff here
def close(self):
# more stuff here
Because there are so many types, and more could possibly be added in the future (unlikely, but possible), and we don't know for sure which will be returned until after the call to open, this method doesn't work.
Another method is to change both our custom type to have the returned file's ___bases__, and modifying the returned instance's __class__ attribute to our custom type:
class MyFileType:
def close(self):
# stuff here
some_file = open(path_to_file, '...') # ... = desired options
MyFileType.__bases__ = (some_file.__class__,) + MyFile.__bases__
but this yields
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: __bases__ assignment: '_io.TextIOWrapper' deallocator differs from 'object'
Yet another method that could work with pure user classes is to create the custom file type on the fly, directly from the returned instance's class, and then update the returned instance's class:
some_file = open(path_to_file, '...') # ... = desired options
class MyFile(some_file.__class__):
def close(self):
super().close()
print("that's all, folks!")
some_file.__class__ = MyFile
but again:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: __class__ assignment: only for heap types
So, it looks like the best method that will work at all in Python 3, and luckily will also work in Python 2 (useful if you want the same code base to work on both versions) is to have a custom context manager:
class Open(object):
def __init__(self, *args, **kwds):
# do custom stuff here
self.args = args
self.kwds = kwds
def __enter__(self):
# or do custom stuff here :)
self.file_obj = open(*self.args, **self.kwds)
# return actual file object so we don't have to worry
# about proxying
return self.file_obj
def __exit__(self, *args):
# and still more custom stuff here
self.file_obj.close()
# or here
and to use it:
with Open('some_file') as data:
# custom stuff just happened
for line in data:
print(line)
# data is now closed, and more custom stuff
# just happened
An important point to keep in mind: any unhandled exception in __init__ or __enter__ will prevent __exit__ from running, so in those two locations you still need to use the try/except and/or try/finally idioms to make sure you don't leak resources.

I had a similar problem, and a requirement of supporting both Python 2.x and 3.x. What I did was similar to the following (current full version):
class _file_obj(object):
"""Check if `f` is a file name and open the file in `mode`.
A context manager."""
def __init__(self, f, mode):
if isinstance(f, str):
self.file = open(f, mode)
else:
self.file = f
self.close_file = (self.file is not f)
def __enter__(self):
return self
def __exit__(self, *args, **kwargs):
if (not self.close_file):
return # do nothing
# clean up
exit = getattr(self.file, '__exit__', None)
if exit is not None:
return exit(*args, **kwargs)
else:
exit = getattr(self.file, 'close', None)
if exit is not None:
exit()
def __getattr__(self, attr):
return getattr(self.file, attr)
def __iter__(self):
return iter(self.file)
It passes all calls to the underlying file objects and can be initialized from an open file or from a filename. Also works as a context manager. Inspired by this answer.

Related

What is the correct way to close a file opened inside object initialization?

I would like to have a class that gets passed either a string or an already opened file during initialization. If it gets a string, it opens the file.
from typing import IO
class Parser:
def __init__(self, fin: str|IO[str]) -> None:
if isinstance(fin, str):
self.fin = open(fin, 'r')
else:
if not fin.readable():
raise ValueError("Input file must be readable.")
else:
self.fin = fin
My question is, what is the correct way to close the file if it gets opened. I imagined it could be closed in the __del__ method, but after reading up on it, it seems to be the consensus that using __del__ is not a great idea. Is there a better way to do this?
Provide a separate class method to deal with opening and closing the file using a context manager.
from contextlib import contextmanager
class Parser:
def __init__(self, fh: IO[str]) -> None:
if not fh.readable():
raise ValueError(...)
self.fh = fh
# Typing-hinting is out of scope for this question, so I'm
# just using the Python-3.11-style Self hint for simplicity.
#classmethod
#contextmanager
def from_filename(cls: Self, name: str) -> Self:
with open(name) as fh:
yield Parser(fh)
Now you can use either
with open("some_file.txt") as fh:
p = Parser(fh)
...
or
with Parser.from_filename("some_file.txt") as p:
...
Indeed __del__ should NOT be used for this purpose, since it is highly unpredictable when it is called.
Also, best is to avoid side-effects (such as opening files) in __init__, so I really like the solution by chepner. When possible, that's the method I would choose.
Just to add an alternative: you could also implement a close method and then use contextlib.closing like this:
import contextlib
class Parser:
# your __init__ here
def close(self):
self.fin.close()
parser = Parser('test.txt')
with contextlib.closing(parser):
# do something with the object here
# the file will be automatically closed afterwards
Even better, you could make the class a context manager itself like below, which is a bit similar to the answer of chepner. (You could even combine a number of these methods, depending on your needs).
class Parser:
# your __init__ here
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self.fin.close()
with Parser('test.txt') as parser:
# do some stuff here...

ExitStack within classes

I would like to understand why using the following snippet leads me to an error:
a) I want to use the following class to create a context manager, as outlined in the link attached below: for me it is very important to keep the "class PrintStop(ExitStack)" form, so please bear in mind when trying to solve this issue, that I already know there are other ways to use ExitStack(), but I am interested in this specific way of using it:
class PrintStop(ExitStack):
def __init__(self, verbose: bool = False):
super().__init__()
self.verbose = verbose
def __enter__(self):
super().__enter__()
if not self.verbose:
sys.stdout = self.enter_context(open(os.devnull, 'w'))
b) when trying to use the class in the more appropriate way, I get the desired effect to stop all the printing within the "with" block, but when trying to print again after that block I get an error:
with PrintStop(verbose=False):
print('this shouldn't be printed') <------ok till here
print('this should be printed again as it is outside the with block) <-----ERROR
c) the error I get is "ValueError: I/O operation on closed file": the reason I guess is the fact that exit method of ExitStack() is not automatically called once we exit the 'with' block, so, how may I change the class to fix this bug?
Here is a quick reference to a similar topic,
Pythonic way to compose context managers for objects owned by a class
ExitStack.__exit__ simply ensures that each context you enter has its __exit__ method called; it does not ensure that any changes you made (like assigning to sys.stdout) inside the corresponding __enter__ is undone.
Also, the purpose of an exit stack is to make it easy to enter contexts that require information not known when the with statement is introduced, or to create a variable number of contexts without having to enumerate them statically.
If you really want to use an exit stack, you'll need something like
class PrintStop(ExitStack):
def __init__(self, verbose: bool = False):
super().__init__()
self.verbose = verbose
def __enter__(self):
rv = super().__enter__()
if not self.verbose:
sys.stdout = self.enter_context(open(os.devnull, 'w'))
return rv
def __exit__(self):
sys.stdout = sys.__stdout__ # Restore the original
return super().__exit__()
Keep in mind that contextlib already provides a context manager for temporarily replacing standard output with a different file, appropriately named redirect_stdout.
with redirect_stdout(open(os.devnull, 'w')):
...
Using this as the basis for PrintStop makes use of composition, rather than inheritance.
from contextlib import redirect_stdout, nullcontext
class PrintStop:
def __init__(self, verbose: bool = False):
super().__init__()
if verbose:
self.cm = redirect_stdout(open(os.devnull, 'w'))
else:
self.cm = nullcontext()
def __enter__(self):
return self.cm.__enter__()
def __exit__(self):
return self.cm.__exit__()

Managing file open and close responsibilities with different objects

In a main method, my_object needs access to several members of passed_object, including a file that is opened (passed_file = passed_object.create_file(). An example of this:
import os
def main():
print('Start of program...')
passed_object = PassedObject()
my_object = MyObject(passed_object)
my_object.use_passed_object()
print('End of program.')
class MyObject(object):
def __init__(self, passed_object):
self.passed_object = passed_object
def use_passed_object(self):
f = self.passed_object.create_file()
print('attribute:')
print(self.passed_object.attr1)
print('contents of first file:')
print(list(f))
class PassedObject(object):
def __init__(self):
self.attr1 = 'some attribute string'
def create_file(self):
path = '/tmp'
files = [file for file in os.listdir(path)
if os.path.isfile(os.path.join(path, file))]
f = open(files[0], 'r')
return f
main()
The problem: passed_object creates a file object(s) that is needed by my_object, and by others not shown in this simple example. How can I close these file objects when my_object is done with it without breaking encapsulation?
Potential solutions I see:
don't pass passed_object: pass passed_object.create_file() and passed_object.attr1, then use a context manager in main with open.... However, I now have to pass in each attribute/created object to my_class.
write method my_object.close_file(), and call it from main. This seems to break encapsulation also, as main shouldn't need to know about this.
write a my_object.__del__() method that closes the file.
don't worry about closing it; your program terminates in a few lines.
Assuming the simplest situation (because details are missing):
PassedObject.create_file just opens a file, returns it and does not keep a reference to it
Usage of the file is limited to the scope of MyObject.use_passed_object
The solution is simple: close the file when use_passed_object finishes:
class MyObject(object):
def __init__(self, passed_object):
self.passed_object = passed_object
def use_passed_object(self):
f = self.passed_object.create_file()
try:
print('attribute:')
print(self.passed_object.attr1)
print('contents of first file:')
print(list(f))
finally:
f.close()
Alternatively, since passed_object.create_file() is just returning a file object, which supports context manager interface, you can also do this:
def use_passed_object(self):
with self.passed_object.create_file() as f:
print('attribute:')
print(self.passed_object.attr1)
print('contents of first file:')
print(list(f))
In a more complex scenario (e.g. something other than builtin file is returned), you could create you own contextmanager which encapsulates access to passed_object.create_file()...
On the other hand, if the file is used by multiple methods of MyObject during its lifetime, you need a contextmanager around the usage of a MyObject instance.
To do that, you'll have to:
remember in MyObject which file(s) it opened (you'll have to do that anyway to use it in multiple methods)
implement MyObject.close which closes all of those files
class MyObject(object):
def close(self):
for file_object in self.opened_files:
file_object.close()
Then implement a context manager and use it for this.
Option 1: use contextlib.closing
import contextlib
def main():
print('Start of program...')
passed_object = PassedObject()
with contextlib.closing(MyObject(passed_object)) as my_object:
my_object.use_passed_object()
print('End of program.')
Option 2: implement context manager interface on MyObject itself
class MyObject(object):
def __enter__(self):
return self
def __exit__(self, type, value, traceback):
self.close()
def main():
print('Start of program...')
passed_object = PassedObject()
with MyObject(passed_object) as my_object:
my_object.use_passed_object()
print('End of program.')

Python multiple unmanaged resources and exception safety

Say I'd like to have a class which holds multiple unmanaged resources, such as files. I'd also like the public interface of that class to allow the user to use it in an exception-safe manner, i.e. not to leak unmanaged resources/leave them at the mercy of the garbage-collector, which isn't ever guaranteed to run at any point (deterministic resource reclamation).
Take, for instance, the following case:
class Gizmo(object):
def __init__(self, filepath1, filepath2):
self._file1 = open(filepath1, 'rb')
self._file2 = open(filepath2, 'rb')
def __enter__(self):
return self
def __exit__(self):
self.close()
return False
def __del__(self):
self.close()
def frob(self):
...manipulate the files...
def close(self):
self._file1.close()
self._file2.close()
This is not exception-safe, because if the line opening the second file in __init__ fails, the first file is leaked in the sense that it is left at the mercy of the garbage collector (no matter whether I use the class via a with-statement or not).
My question is this: what's the cleanest way to achieve what I'd like, preferably in a manner which I'd be able to scale up to more than two unmanaged resources, and which doesn't clutter the public interface of my class too horribly (if at all)? The idea of having an initialization method separate to the __init__ method came to mind, but it sounds a bit odd.
If you're on Python 3, this looks like a job for contextlib.ExitStack. If you're on Python 2, there appears to be a backport of this feature available.
from contextlib import ExitStack
class Gizmo(object):
def __init__(self, filepath1, filepath2):
with ExitStack() as stack:
# If opening the second file fails,
# unwinding the stack will close the first file.
self._file1 = stack.enter_context(open(filepath1, 'rb'))
self._file2 = stack.enter_context(open(filepath2, 'rb'))
# It worked! But we don't want to close the files when the with statement ends.
# Take the files off the stack without closing them
# (and put them on a new stack).
self._exitstack = stack.pop_all()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, exc_tb):
return self._exitstack.__exit__(exc_type, exc_value, exc_tb)
def __del__(self):
self.close()
def frob(self):
...manipulate the files...
def close(self):
# __del__ will try to close a Gizmo even if we couldn't set up its _exitstack,
# so don't try to close a nonexistent _exitstack.
if hasattr(self, '_exitstack'):
# The stack calls __exit__ on all the files, exactly as if we were
# exiting a "with open(...) as file1, open(...) as file2" block.
# If closing one file fails, we'll still try to close the other.
self._exitstack.close()

Construct object via __init__ and ignore constructor exception

I have a Python class whose __init__ method raises a custom exception called WrongFileSpecified.
However, when I write a unit test, I want to assign the attributes of the instance object from a test fixture. So normally what I would be doing is reading data off a file and then working with the instance object.
But with the test, I cannot use any test files, so I basically need to hard code the data in the instance object in the setUp method of the unit test. Is there any way to get a instance created without __init__ complaining about the exception?
Sample code:
class A(object):
def __init__(self, folderPath):
#check folder path using os.isdir() otherwise raise exception
#...
self.folderPath = folderPath
#Call load record
self._load_records() #uses self.folderPath and raises exceptions as well
#Note i cannot avoid raising these exceptions, its required
class TestA(unittest.TestCase):
.......
obj = None
def setUp(self):
obj = A('fake folder path')
obj.val1 = "testparam1"
obj.param2 = "testparam2"
def test_1(self):
.....
You can create an empty object, bypassing __init__ by using __new__.
obj = obj_type.__new__(obj_type)
Note that obj_type is the appropriate type object. This is a little hacky but it works. You are reponsible for setting the object's members.
Edit: here is an example.
class Foo():
def __init__(self):
self.x = 1
self.y = 2
def say_hello(self):
print('Hello!')
r = Foo.__new__(Foo)
r.say_hello()
print(r.x)
Console output:
Hello!
Traceback (most recent call last):
File "C:\WinPython-64bit-3.3.5.7\python-
3.3.5.amd64\Scripts\projects\luc_utils\dev\test\
unit_test_serialization.py", line 29, in <module>
print(r.x)
AttributeError: 'Foo' object has no attribute 'x'
Here are two options:
Refactor the file loading out to a class method, which is the Pythonic method of providing an alternate constructor (see below); or
Provide an additional parameter to __init__ to suppress the exceptions when necessary (e.g. def __init__(self, folderPath, suppress=False), or validate=True, whichever makes more sense for your usage).
The latter is a bit awkward, in my opinion, but would mean that you don't have to refactor existing code creating A instances. The former would look like:
class A(object):
def __init__(self, ...):
"""Pass whatever is loaded from the file to __init__."""
...
#classmethod
def from_file(cls, folderPath):
"""Load the data from the file, or raise an exception."""
...
and you would replace e.g. a = A(whatever) with a = A.from_file(whatever).
There is a very useful module called mock, you can check it out later, I feel that in this case it will be too much. Instead, you should consider redesigning your class, like this, for example:
class A(object):
def __init__(self, folderPath):
self.folderPath = folderPath
def _load_records(self)
#check folder path using os.isdir() otherwise raise exception
...
#uses self.folderPath and raises exceptions as well
...
#classmethod
def load_records(cls, folderpath):
obj = cls(folderpath)
obj._load_records()
return obj
# Usage
records = A.load_records('/path/to/records')
Then you can do:
class TestA(unittest.TestCase):
.......
obj = None
def setUp(self):
self.obj = A('fake folder path')
self.obj.val1 = "testparam1"
self.obj.param2 = "testparam2"
def test_1(self):
self.assertRaises(self.obj._load_records, HorribleFailureError)
Also i highly recommend to check out pytest, it has wonderful facilities for test fixtures, including fixtures for files and folders.

Categories

Resources