Python testing: using a fake file with mock & io.StringIO - python

I'm trying to test some code that operates on a file, and I can't seem to get my head around how to replace using a real file with mock and io.StringIO
My code is pretty much the following:
class CheckConfig(object):
def __init__(self, config):
self.config = self._check_input_data(config)
def _check_input_data(self, data):
if isinstance(data, list):
return self._parse(data)
elif os.path.isfile(data):
with open(data) as f:
return self._parse(f.readlines())
def _parse(self, data):
return data
I have a class that can take either a list or a file, if it's a file it opens it and extracts the contents into a list, and then does what it needs to do to the resulting list.
I have a working test as follows:
def test_CheckConfig_with_file():
config = 'config.txt'
expected = parsed_file_data
actual = CheckConfig(config).config
assert expected == actual
I want to replace the call to the filesystem. I have tried replacing the file with io.StringIO but I get a TypeError from os.path.isfile() as it's expecting either a string, bytes or int. I also tried mocking the isfile method like so:
#mock.patch('mymodule.os.path')
def test_CheckConfig_with_file(mock_path):
mock_path.isfile.return_value = True
config = io.StringIO('data')
expected = parsed_file_data
actual = CheckConfig(config).config
assert expected == actual
but I still get the same TypeError as the _io.StringIO type is causing the exception before isfile gets a chance to return something.
How can I get os.path.isfile to return True, when I pass it a fake file? Or is this a suggestion I should change my code?

Just mock out both os.path.isfile and the open() call, and pass in a fake filename (you are not expected to pass in an open file, after all).
The mock library includes a utility for the latter: mock_open():
#mock.patch('os.path.isfile')
def test_CheckConfig_with_file(mock_isfile):
mock_isfile.return_value = True
config_data = mock.mock_open(read_data='data')
with mock.patch('mymodule.open', config_data) as mock_open:
expected = parsed_file_data
actual = CheckConfig('mocked/filename').config
assert expected == actual
This causes the if isinstance(data, list): test to be false (because data is a string instead), followed by the elif os.path.isfile(data): returning True, and the open(data) call to use your mocked data from the mock_open() result.
You can use the mock_open variable to assert that open() was called with the right data (mock_open. assert_called_once_with('mocked/filename') for example).
Demo:
>>> import os.path
>>> from unittest import mock
>>> class CheckConfig(object):
... def __init__(self, config):
... self.config = self._check_input_data(config)
... def _check_input_data(self, data):
... if isinstance(data, list):
... return self._parse(data)
... elif os.path.isfile(data):
... with open(data) as f:
... return self._parse(f.readlines())
... def _parse(self, data):
... return data
...
>>> with mock.patch('os.path.isfile') as mock_isfile:
... mock_isfile.return_value = True
... config_data = mock.mock_open(read_data='line1\nline2\n')
... with mock.patch('__main__.open', config_data) as mock_open:
... actual = CheckConfig('mocked/filename').config
...
>>> actual
['line1\n', 'line2\n']
>>> mock_open.mock_calls
[call('mocked/filename'),
call().__enter__(),
call().readlines(),
call().__exit__(None, None, None)]

In case you end up here wondering how to solve this using the pytest-mock library, here is how you do it:
def test_open(mocker):
m = mocker.patch('builtins.open', mocker.mock_open(read_data='bibble'))
with open('foo') as h:
result = h.read()
m.assert_called_once_with('foo')
assert result == 'bibble'
This code example was found (but had to be adjusted) here.

Related

How to delete test files when python unittest fails

I'm using python unittest for functions that write data to JSON. I use tearDownClass to delete the output test files so they don't clutter the local repo. Ground truths are also stored as JSON files.
I do want to store the output test files when tests fail, so its easier for troubleshooting.
My current implementation is to use a global boolean keep_file = False. When the unittest fails the assertion, it modifies keep_file = True. tearDownClass only deletes the files when keep_file == False. I don't like the idea of modifying global variables and the try exception blocks for each assert.
import json
import os
import unittest
from src.mymodule import foo1, foo2
# These are defined outside the class on purpose so the classmethods can access them
FILE_1 = "unittest.file1.json"
EXPECTED_FILE_1 = "expected.file1.json"
FILE_2 = "unittest.file2.json"
EXPECTED_FILE_2 = "expected.file2.json"
keep_files = False
class TestRhaPostPayload(unittest.TestCase):
#classmethod
def setUpClass(cls):
cls.get_file1()
cls.get_file2()
#classmethod
def get_file1(cls):
output1 = foo1()
with open(FILE_1, "w") as f:
f.write(output1)
#classmethod
def get_file2(cls):
output2 = foo1()
with open(FILE_2, "w") as f:
f.write(output2)
#classmethod
def tearDownClass(cls):
if not keep_files:
os.remove(FILE_1)
os.remove(FILE_2)
def test_foo1(self):
# code that reads in file1 and expected_file_1
try:
self.assert(expected_output1, output1)
except AssertionError:
global keep_files
keep_files = True
raise
def test_foo2(self):
# code that reads in file2 and expected_file_2
try:
self.assert(expected_output2, output2)
except AssertionError:
global keep_files
keep_files = True
raise
You could simply check, if there were any errors/failures in your test case during tear-down and only delete the files, if there were none.
How to perform this check was explained in this post.
This check is done on a TestCase instance so tearDownClass won't work. But you are using different files in different tests anyway, so you might as well use normal setUp/tearDown to remove the current file.
Here is a working example:
from pathlib import Path
from typing import Optional
from unittest import TestCase
class Test(TestCase):
def all_tests_passed(self) -> bool:
"""Returns `True` if no errors/failures occurred at the time of calling."""
outcome = getattr(self, "_outcome")
if hasattr(outcome, "errors"): # Python <=3.10
result = self.defaultTestResult()
getattr(self, "_feedErrorsToResult")(result, outcome.errors)
else: # Python >=3.11
result = outcome.result
return all(test != self for test, _ in result.errors + result.failures)
def setUp(self) -> None:
super().setUp()
self.test_file: Optional[Path] = None
def tearDown(self) -> None:
super().tearDown()
if self.test_file and self.all_tests_passed():
self.test_file.unlink()
def test_foo(self) -> None:
self.test_file = Path("foo.txt")
self.test_file.touch()
self.assertTrue(True)
def test_bar(self) -> None:
self.test_file = Path("bar.txt")
self.test_file.touch()
self.assertTrue(False)
Running this test case leaves bar.txt in the current working directory, whereas foo.txt is gone.

Is there a way to manually validate object typing?

I was writing a test case for a function that accepts typing.BinaryIO that comes from fastapi.UploadFile.file.
def upload_binary(data: typing.BinaryIO):
...
I was confused what kind of object do I create that will pass type check. I tried io.StringIO and io.BytesIO, and the only way to check which one will be accepted as typing.BinaryIO was to use IDE's highlighting. It didn't accept StringIO but accepted BytesIO.
So my question - is there a way in Python to manually check if object will be validated with given typing hint.
For example some function like
file1 = StringIO("text")
file2 = BytesIO(b"text")
typing_check(file1, typing.BinaryIO) # >>> False
typing_check(file2, typing.BinaryIO) # >>> True
UPD
Looking at starlette/datastructures.py we have
class UploadFile:
...
file: typing.BinaryIO
def __init__(...):
if self.file is None:
self.file = tempfile.SpooledTemporaryFile(...)
And if you try to test it
s = tempfile.SpooledTemporaryFile()
isinstance(s, typing.BinaryIO) # >>> False
Somehow you can try building basic static type checking decorator using annotations and inspect module.
inspect.signature(fn) can read annotations of function parameters, and you can compare types with isinstance function.
import inspect
def static_type_checker(fn):
spec = inspect.signature(fn)
params = spec.parameters
def inner_fn(*args, **kwargs):
for arg, (name, param) in zip(args, params.items()):
assert isinstance(arg, param.annotation)
for key, value in kwargs.items():
assert isinstance(value, params[key].annotation)
return fn(*args, **kwargs)
return inner_fn
#static_type_checker
def sample(a: int):
print(a)
sample(1) # prints 1
sample("a") # AssertionError
Note that this is not perfect solution. It has many limitations, like when giving various-length arguments. I'm just suggesting basic idea.

How to check that print was called in a function?

Suppose f conditionally calls print; I'd like to know whether this happens within test_*(). How can this be accomplished?
Example:
def f(integer): # defined in and imported from separate module
if isinstance(integer, str):
print("WARNING: integer is str")
def test_f():
f("5")
assert print.called
Attempted approach:
def tracked_call(self, *args, **kwargs):
self.called = True
self.__call__(*args, **kwargs)
print.__call__ = tracked_call
>>> AttributeError: 'builtin_function_or_method' object attribute '__call__' is read-only
Solution 1 (best): check that print was called, and that it prints specific text; doesn't use a fixture:
import builtins
import contextlib, io
from unittest.mock import Mock
def test_f():
mock = Mock()
mock.side_effect = print # ensure actual print is called to capture its txt
print_original = print
builtins.print = mock
try:
str_io = io.StringIO()
with contextlib.redirect_stdout(str_io):
f("5")
output = str_io.getvalue()
assert print.called # `called` is a Mock attribute
assert output.startswith("WARNING:")
finally:
builtins.print = print_original # ensure print is "unmocked"
(If print in f writes to sys.stderr instead of the default sys.stdout, use contextlib.redirect_stderr.)
Solution 2: check that print prints specific text within call; from docs:
def test_f(capsys):
f("5")
out, err = capsys.readouterr()
assert out.startswith("WARNING:")
This assuming the default print(file=sys.stdout), else the string of interest is in err. If specific text is of no interest, can do assert out or err to verify that something was printed. This doesn't necessarily test whether print was called, as we can do print(end='').

python: create a test for reading invalid YAML file

I am trying to create a test in Python, I want to create a mock for opening YAML file, and mock its content to be an illegal YAML and assert the exception.
I tried mocking:
def test_illegal_yaml_file(self):
with patch('os.path.isfile', return_value=True):
with patch('__main__.open', mock_open(read_data='wifi_password 12345678')):
myObj = MyClass()
params = myObj.get_params()
# TODO catch exception with assert here
and inside MyClass:
def get_params(self):
path = "configuration.yaml"
params = None
if os.path.isfile(path): # os.path.isfile is mocked here to be True
params = self.get_parameters(path)
return params
def get_parameters(self, path):
try:
params = self.load_params_from_yaml(path)
except Exception as e:
Log.error('Invalid YAML file, error: {}'. format(e))
return None
return params
#staticmethod
def yaml_load(camera_configuration):
return yaml.load(camera_configuration, Loader=yaml.FullLoader)
I also tried using IO straight to the inner function:
def test_illegal_yaml_file(self):
un_valid_yaml = io.BytesIO('param 12345678')
params = MyClass.yaml_load(un_valid_yaml)
self.assertIsNone(params)
and inside MyClass:
#staticmethod
def yaml_load(camera_configuration):
return yaml.load(camera_configuration, Loader=yaml.FullLoader)
How can I mock it correctly so an exception of invalid YAML file is thrown?

How to overload __init__ method based on argument type?

Let's say I have a class that has a member called data which is a list.
I want to be able to initialize the class with, for example, a filename (which contains data to initialize the list) or with an actual list.
What's your technique for doing this?
Do you just check the type by looking at __class__?
Is there some trick I might be missing?
I'm used to C++ where overloading by argument type is easy.
A much neater way to get 'alternate constructors' is to use classmethods. For instance:
>>> class MyData:
... def __init__(self, data):
... "Initialize MyData from a sequence"
... self.data = data
...
... #classmethod
... def fromfilename(cls, filename):
... "Initialize MyData from a file"
... data = open(filename).readlines()
... return cls(data)
...
... #classmethod
... def fromdict(cls, datadict):
... "Initialize MyData from a dict's items"
... return cls(datadict.items())
...
>>> MyData([1, 2, 3]).data
[1, 2, 3]
>>> MyData.fromfilename("/tmp/foobar").data
['foo\n', 'bar\n', 'baz\n']
>>> MyData.fromdict({"spam": "ham"}).data
[('spam', 'ham')]
The reason it's neater is that there is no doubt about what type is expected, and you aren't forced to guess at what the caller intended for you to do with the datatype it gave you. The problem with isinstance(x, basestring) is that there is no way for the caller to tell you, for instance, that even though the type is not a basestring, you should treat it as a string (and not another sequence.) And perhaps the caller would like to use the same type for different purposes, sometimes as a single item, and sometimes as a sequence of items. Being explicit takes all doubt away and leads to more robust and clearer code.
Excellent question. I've tackled this problem as well, and while I agree that "factories" (class-method constructors) are a good method, I would like to suggest another, which I've also found very useful:
Here's a sample (this is a read method and not a constructor, but the idea is the same):
def read(self, str=None, filename=None, addr=0):
""" Read binary data and return a store object. The data
store is also saved in the interal 'data' attribute.
The data can either be taken from a string (str
argument) or a file (provide a filename, which will
be read in binary mode). If both are provided, the str
will be used. If neither is provided, an ArgumentError
is raised.
"""
if str is None:
if filename is None:
raise ArgumentError('Please supply a string or a filename')
file = open(filename, 'rb')
str = file.read()
file.close()
...
... # rest of code
The key idea is here is using Python's excellent support for named arguments to implement this. Now, if I want to read the data from a file, I say:
obj.read(filename="blob.txt")
And to read it from a string, I say:
obj.read(str="\x34\x55")
This way the user has just a single method to call. Handling it inside, as you saw, is not overly complex
with python3, you can use Implementing Multiple Dispatch with Function Annotations as Python Cookbook wrote:
import time
class Date(metaclass=MultipleMeta):
def __init__(self, year:int, month:int, day:int):
self.year = year
self.month = month
self.day = day
def __init__(self):
t = time.localtime()
self.__init__(t.tm_year, t.tm_mon, t.tm_mday)
and it works like:
>>> d = Date(2012, 12, 21)
>>> d.year
2012
>>> e = Date()
>>> e.year
2018
Quick and dirty fix
class MyData:
def __init__(string=None,list=None):
if string is not None:
#do stuff
elif list is not None:
#do other stuff
else:
#make data empty
Then you can call it with
MyData(astring)
MyData(None, alist)
MyData()
A better way would be to use isinstance and type conversion. If I'm understanding you right, you want this:
def __init__ (self, filename):
if isinstance (filename, basestring):
# filename is a string
else:
# try to convert to a list
self.path = list (filename)
You should use isinstance
isinstance(...)
isinstance(object, class-or-type-or-tuple) -> bool
Return whether an object is an instance of a class or of a subclass thereof.
With a type as second argument, return whether that is the object's type.
The form using a tuple, isinstance(x, (A, B, ...)), is a shortcut for
isinstance(x, A) or isinstance(x, B) or ... (etc.).
You probably want the isinstance builtin function:
self.data = data if isinstance(data, list) else self.parse(data)
OK, great. I just tossed together this example with a tuple, not a filename, but that's easy. Thanks all.
class MyData:
def __init__(self, data):
self.myList = []
if isinstance(data, tuple):
for i in data:
self.myList.append(i)
else:
self.myList = data
def GetData(self):
print self.myList
a = [1,2]
b = (2,3)
c = MyData(a)
d = MyData(b)
c.GetData()
d.GetData()
[1, 2]
[2, 3]
My preferred solution is:
class MyClass:
_data = []
__init__(self,data=None):
# do init stuff
if not data: return
self._data = list(data) # list() copies the list, instead of pointing to it.
Then invoke it with either MyClass() or MyClass([1,2,3]).
Hope that helps. Happy Coding!
Why don't you go even more pythonic?
class AutoList:
def __init__(self, inp):
try: ## Assume an opened-file...
self.data = inp.read()
except AttributeError:
try: ## Assume an existent filename...
with open(inp, 'r') as fd:
self.data = fd.read()
except:
self.data = inp ## Who cares what that might be?

Categories

Resources