Is it possible to mock os.scandir and its attributes? - python

for entry in os.scandir(document_dir)
if os.path.isdir(entry):
# some code goes here
else:
# else the file needs to be in a folder
file_path = entry.path.replace(os.sep, '/')
I am having trouble mocking os.scandir and the path attribute within the else statement. I am not able to mock the mock object's property I created in my unit tests.
with patch("os.scandir") as mock_scandir:
# mock_scandir.return_value = ["docs.json", ]
# mock_scandir.side_effect = ["docs.json", ]
# mock_scandir.return_value.path = PropertyMock(return_value="docs.json")
These are all the options I've tried. Any help is greatly appreciated.

It depends on what you realy need to mock. The problem is that os.scandir returns entries of type os.DirEntry. One possibility is to use your own mock DirEntry and implement only the methods that you need (in your example, only path). For your example, you also have to mock os.path.isdir. Here is a self-contained example for how you can do this:
import os
from unittest.mock import patch
def get_paths(document_dir):
# example function containing your code
paths = []
for entry in os.scandir(document_dir):
if os.path.isdir(entry):
pass
else:
# else the file needs to be in a folder
file_path = entry.path.replace(os.sep, '/')
paths.append(file_path)
return paths
class DirEntry:
def __init__(self, path):
self.path = path
def path(self):
return self.path
#patch("os.scandir")
#patch("os.path.isdir")
def test_sut(mock_isdir, mock_scandir):
mock_isdir.return_value = False
mock_scandir.return_value = [DirEntry("docs.json")]
assert get_paths("anydir") == ["docs.json"]
Depending on your actual code, you may have to do more.
If you want to patch more file system functions, you may consider to use pyfakefs instead, which patches the whole file system. This will be overkill for a single test, but can be handy for a test suite relying on file system functions.
Disclaimer: I'm a contributor to pyfakefs.

Related

How can I redirect module imports with modern Python?

I am maintaining a python package in which I did some restructuring. Now, I want to support clients who still do from my_package.old_subpackage.foo import Foo instead of the new from my_package.new_subpackage.foo import Foo, without explicitly reintroducing many files that do the forwarding. (old_subpackage still exists, but no longer contains foo.py.)
I have learned that there are "loaders" and "finders", and my impression was that I should implement a loader for my purpose, but I only managed to implement a finder so far:
RENAMED_PACKAGES = {
'my_package.old_subpackage.foo': 'my_package.new_subpackage.foo',
}
# TODO: ideally, we would not just implement a "finder", but also a "loader"
# (using the importlib.util.module_for_loader decorator); this would enable us
# to get module contents that also pass identity checks
class RenamedFinder:
#classmethod
def find_spec(cls, fullname, path, target=None):
renamed = RENAMED_PACKAGES.get(fullname)
if renamed is not None:
sys.stderr.write(
f'WARNING: {fullname} was renamed to {renamed}; please adapt import accordingly!\n')
return importlib.util.find_spec(renamed)
return None
sys.meta_path.append(RenamedFinder())
https://docs.python.org/3.5/library/importlib.html#importlib.util.module_for_loader and related functionality, however, seem to be deprecated. I know it's not a very pythonic thing I am trying to achieve, but I would be glad to learn that it's achievable.
On import of your package's __init__.py, you can place whatever objects you want into sys.modules, the values you put in there will be returned by import statements:
from . import new_package
from .new_package import module1, module2
import sys
sys.modules["my_lib.old_package"] = new_package
sys.modules["my_lib.old_package.module1"] = module1
sys.modules["my_lib.old_package.module2"] = module2
If someone now uses import my_lib.old_package or import my_lib.old_package.module1 they will obtain a reference to my_lib.new_package.module1. Since the import machinery already finds the keys in the sys.modules dictionary, it never even begins looking for the old files.
If you want to avoid importing all the submodules immediately, you can emulate a bit of lazy loading by placing a module with a __getattr__ in sys.modules:
from types import ModuleType
import importlib
import sys
class LazyModule(ModuleType):
def __init__(self, name, mod_name):
super().__init__(name)
self.__mod_name = name
def __getattr__(self, attr):
if "_lazy_module" not in self.__dict__:
self._lazy_module = importlib.import(self.__mod_name, package="my_lib")
return self._lazy_module.__getattr__(attr)
sys.modules["my_lib.old_package"] = LazyModule("my_lib.old_package", "my_lib.new_package")
In the init file of the old module, have it import from the newer modules
Old (package.oldpkg):
foo = __import__("Path to new module")
New (package.newpkg):
class foo:
bar = "thing"
so
package.oldpkg.foo.bar is the same as package.newpkg.foo.bar
Hope this helps!
I think that this is what you are looking for:
RENAMED_PACKAGES = {
'my_package.old_subpackage.foo': 'my_package.new_subpackage.foo',
}
class RenamedFinder:
#classmethod
def find_spec(cls, fullname, path, target=None):
renamed = RENAMED_PACKAGES.get(fullname)
if renamed is not None:
sys.stderr.write(
f'WARNING: {fullname} was renamed to {renamed}; please adapt import accordingly!\n')
spec = importlib.util.find_spec(renamed)
spec.loader = cls
return spec
return None
#staticmethod
def create_module(spec):
return importlib.import_module(spec.name)
#staticmethod
def exec_module(module):
pass
sys.meta_path.append(RenamedFinder())
Still, IMO the approach that manipulates sys.modules is preferable as it is more readable, more explicit, and provides you much more control. It might become useful especially in further versions of your package when my_package.new_subpackage.foo starts to diverge from my_package.old_subpackage.foo while you would still need to provide the old one for backward compatibility. For that reason, you would maybe need to preserve the code of both anyway.
Consolidate all the old package names into my_package.
Old packages (old_package):
image_processing (class) Will be deleted and replaced by better_image_processing
text_recognition (class) Will be deleted and replaced by better_text_recognition
foo (variable) Will be moved to better_text_recognition
still_there (class) Will not move
New packages:
super_image_processing
better_text_recognition
Redirector (class of my_package):
class old_package:
image_processing = super_image_processing # Will be replaced
text_recognition = better_text_recognition # Will be replaced
Your main new module (my_package):
#imports here
class super_image_processing:
def its(gets,even,better):
pass
class better_text_recognition:
def now(better,than,ever):
pass
class old_package:
#Links
image_processing = super_image_processing
text_recognition = better_text_recognition
still_there = __import__("path to unchanged module")
This allows you to delete some files and keep the rest. If you want to redirect variables you would do:
class super_image_processing:
def its(gets,even,better):
pass
class better_text_recognition:
def now(better,than,ever):
pass
class old_package:
#Links
image_processing = super_image_processing
text_recognition = better_text_recognition
foo = text_recognition.foo
still_there = __import__("path to unchanged module")
Would this work?

Why FileNotFoundError on Path.rename while using Pyfakefs?

I wrote a test for a function that renames files from e.g. /videos/vid_youtube.mp4 to /videos/youtube/vid.mp4. The test patches the fs with Pyfakefs.
When the code actually renames the file, I get this error.
FileNotFoundError: [Errno 2] No such file or directory: '/home/user/code/project/test/DLV/videos/vid_youtube.mp4' -> '/home/user/code/project/test/DLV/videos/youtube/vid.mp4'
This is how I set up fakefs
def setUp(self) -> None:
self.setUpPyfakefs()
self.fs.create_dir(Path(Dirs.VIDEOS)) # /home/user/code/project/test/DLV/videos
self.fs.create_file(Path(Dirs.VIDEOS / "vid_youtube.mp4"))
The code under test.
class Files:
#staticmethod
def rename_video_files():
all_files = Collect.video_files()
for files_for_type in all_files:
for file in all_files[files_for_type]:
path = Path(file)
platform = Files.detect_platform(path)
platform_dir = Path(Dirs.VIDEOS, platform)
platform_dir.mkdir(exist_ok=True)
new_name = path.stem.replace(f'_{platform}', '')
new_path = Dirs.VIDEOS / platform / f'{new_name}{path.suffix}'
old_path = Dirs.VIDEOS / path
old_path.rename(new_path) # throws FileNotFoundError
I debugged the test and the method under test and even passed the fake fs to rename_video_files(fakefs) to inspect the files and directories. All files and directories look correct.
What is going wrong here?
The problem here is most likely the static initialization of Dirs.VIDEOS. This is initialized at load time as a pathlib.Path, and won't be patched later at the time you setup pyfakefs (the same problem would happen if you where to use unittest.patch for patching).
There are two ways to fix this:
adapt the code to not initialize the path statically
This could be done by statically defining the str path, and converting it to a Path at run time, or by using a method to get the path instead of an attribute (e.g. Dirs.VIDEO() instead of Dirs.VIDEO`).
adapt the test to reload the tested code
If reloading the tested code after pyfakefs has been initialized, it will be correctly patched. pyfakefs provides an argument in setUpPyfakefs that does that:
from pyfakefs.fake_filesystem_unittest import TestCase
from my_module import video_files
from my_module.video_files import Dirs, Files
class MyTest(TestCase):
def setUp(self) -> None:
self.setUpPyfakefs(modules_to_reload=[video_files])
self.fs.create_dir(
Path(Dirs.VIDEOS)) # /home/user/code/project/test/DLV/videos
self.fs.create_file(Path(Dirs.VIDEOS / "vid_youtube.mp4"))
(under the assumption, that your code under test is located in my_module.video_files.py)
Disclaimer:
I'm a contributor to pyfakefs.

How should a Python module be structured?

This is my first time posting on stack overflow, so I apologize if I do something wrong.
I am trying to understand the best way to structure a Python module. As an example, I made a backup module that syncs the source and destination, only copying files if there are differences between source and destination. The backup module contains only a class named Backup.
Now I was taught that OOP is the greatest thing ever, but this seems wrong. After looking through some of the standard library source, I see that most everything isn't broken out into a class. I tried to do some research on this to determine when I should use a class and when I should just have functions and I got varying info. I guess my main question is, should the following code be left as a class or should it just be a module with functions. It is very simple currently, but I may want to add more in the future.
"""Class that represents a backup event."""
import hashlib
import os
import shutil
class Backup:
def __init__(self, source, destination):
self.source = source
self.destination = destination
def sync(self):
"""Synchronizes root of source and destination paths."""
sroot = os.path.normpath(self.source)
droot = os.path.normpath(self.destination) + '/' + os.path.basename(sroot)
if os.path.isdir(sroot) and os.path.isdir(droot):
Backup.sync_helper(sroot, droot)
elif os.path.isfile(sroot) and os.path.isfile(droot):
if not Backup.compare(sroot, droot):
Backup.copy(sroot, droot)
else:
Backup.copy(sroot, droot)
def sync_helper(source, destination):
"""Synchronizes source and destination."""
slist = os.listdir(source)
dlist = os.listdir(destination)
for s in slist:
scurr = source + '/' + s
dcurr = destination + '/' + s
if os.path.isdir(scurr) and os.path.isdir(dcurr):
Backup.sync_helper(scurr, dcurr)
elif os.path.isfile(scurr) and os.path.isfile(dcurr):
if not Backup.compare(scurr, dcurr):
Backup.copy(scurr, dcurr)
else:
Backup.copy(scurr, dcurr)
for d in dlist:
if d not in slist:
Backup.remove(destination + '/' + d)
def copy(source, destination):
"""Copies source file, directory, or symlink to destination"""
if os.path.isdir(source):
shutil.copytree(source, destination, symlinks=True)
else:
shutil.copy2(source, destination)
def remove(path):
"""Removes file, directory, or symlink located at path"""
if os.path.isdir(path):
shutil.rmtree(path)
else:
os.unlink(path)
def compare(source, destination):
"""Compares the SHA512 hash of source and destination."""
blocksize = 65536
shasher = hashlib.sha512()
dhasher = hashlib.sha512()
while open(source, 'rb') as sfile:
buf = sfile.read(blocksize)
while len(buf) > 0:
shasher.update(buf)
buf = sfile.read(blocksize)
while open(destination, 'rb') as dfile:
buf = dfile.read(blocksize)
while len(buf) > 0:
dhasher.update(buf)
buf = dfile.read(blocksize)
if shasher.digest() == dhasher.digest():
return True
else:
return False
I guess it doesn't really make sense as a class, since the only method is sync. On the other hand a backup is a real world object. This really confuses me.
As some side questions. My sync method and sync_helper function seem very similar and it is probably possible to collapse the two somehow (I will leave that as an exercise for myself), but is this generally how this is done when using a recursive function that needs a certain initial state. Meaning, is it ok to do some stuff in one function to reach a certain state and then call the recursive function that does the actual thing. This seems messy.
Finally, I have a bunch of utility functions that aren't actually part of the object, but are used by sync. Would it make more sense to break these out into like a utility submodule or something as to not cause confusion?
Structuring my programs is the most confusing thing to me right now, any help would be greatly appreciated.
This method (and several others) is wrong:
def copy(source, destination):
"""Copies source file, directory, or symlink to destination"""
It works the way it was used (Backup.copy(scurr, dcurr)), but it does not work when used on an instance.
All methods in Python should take self as the first positional argument: def copy(self, source, destination), or should be turned into staticmethod (or moved out of the class).
A static method is declared using the staticmethod decorator:
#staticmethod
def copy(source, destination):
"""Copies source file, directory, or symlink to destination"""
But in this case, source and destination are actually attributes of Backup instances, so probably it should be modified to use attributes:
def copy(self):
# use self.source and self.destination

Python: Creating a mock or fake directory with files for unittesting

I am trying to create a unit test for the following function:
def my_function(path):
#Search files at the given path
for file in os.listdir(path):
if file.endswith(".json"):
#Search for file i'm looking for
if file == "file_im_looking_for.json":
#Open file
os.chdir(path)
json_file=json.load(open(file))
print json_file["name"]
However I am having trouble successfully creating a fake directory with files in order for the function to work correctly and not through errors.
Below is what I have so far but it is not working for me, and I'm not sure how to incorporate "file_im_looking_for" as the file in the fake directory.
tmpfilepath = os.path.join(tempfile.gettempdir(), "tmp-testfile")
#mock.patch('my_module.os')
def test_my_function(self):
# make the file 'exist'
mock_path.endswith.return_value = True
file_im_looking_for=[{
"name": "test_json_file",
"type": "General"
}]
my_module.my_function("tmpfilepath")
Any advice where I'm going wrong or other ideas to approach this problem are appreciated!
First of all, you forgot to pass the mocked object to test function. The right way to use mock in your test should be like this.
#mock.patch('my_module.os')
def test_my_function(self, mock_path):
Anyway, you shouldn't mock the endswith, but the listdir. The snippet below is an example and may help you.
app.py
def check_files(path):
files = []
for _file in os.listdir(path):
if _file.endswith('.json'):
files.append(_file)
return files
test_app.py
import unittest
import mock
from app import check_files
class TestCheckFile(unittest.TestCase):
#mock.patch('app.os.listdir')
def test_check_file_should_succeed(self, mock_listdir):
mock_listdir.return_value = ['a.json', 'b.json', 'c.json', 'd.txt']
files = check_files('.')
self.assertEqual(3, len(files))
#mock.patch('app.os.listdir')
def test_check_file_should_fail(self, mock_listdir):
mock_listdir.return_value = ['a.json', 'b.json', 'c.json', 'd.txt']
files = check_files('.')
self.assertNotEqual(2, len(files))
if __name__ == '__main__':
unittest.main()
Edit: Answering your question in comment, you need to mock the json.loads and the open from your app.
#mock.patch('converter.open')
#mock.patch('converter.json.loads')
#mock.patch('converter.os.listdir')
def test_check_file_load_json_should_succeed(self, mock_listdir, mock_json_loads, mock_open):
mock_listdir.return_value = ['a.json', 'file_im_looking_for.json', 'd.txt']
mock_json_loads.return_value = [{"name": "test_json_file", "type": "General"}]
files = check_files('.')
self.assertEqual(1, len(files))
But remember! If your is too broad or hard to maintain, perhaps refactoring your API should be a good idea.
I would suggest to use Python's tempfile library, specifically TemporaryDirectory.
The issue with your and Mauro Baraldi's solution is that you have to patch multiple functions. This is a very error prone way, since with mock.patch you have to know exactly what you are doing! Otherwise, this may cause unexpected errors and eventually frustration.
Personally, I prefer pytest, since it has IMO nicer syntax and better fixtures, but since the creator used unittest I will stick with it.
I would rewrite your test code like this:
import json
import pathlib
import tempfile
import unittest
wrong_data = {
"name": "wrong_json_file",
"type": "Fake"
}
correct_data = {
"name": "test_json_file",
"type": "General"
}
class TestMyFunction(unittest.TestCase):
def setUp(self):
""" Called before every test. """
self._temp_dir = tempfile.TemporaryDirectory()
temp_path = pathlib.Path(self._temp_dir.name)
self._create_temporary_file_with_json_data(temp_path / 'wrong_json_file.json', wrong_data)
self._create_temporary_file_with_json_data(temp_path / 'file_im_looking_for.json', correct_data)
def tearDown(self):
""" Called after every test. """
self._temp_dir.cleanup()
def _create_temporary_file_with_json_data(self, file_path, json_data):
with open(file_path, 'w') as ifile:
ifile.write(json.dumps(content))
def test_my_function(self):
my_module.my_function(str(self._temp_dir))
You see that your actual test is compressed down to a single line! Admittedly, there is no assert, but if your function would return something, the result would behave as expected.
No mocking, because everything exists and will be cleaned up afterwards. And the best thing is that you now can add more tests with a lower barrier of entry.

How to mock open differently depending on the parameters passed to open()

My question is how to mock open in python, such that it reacts differently depending on the argument open() is called with. These are some different scenario's that should be possible:
open a mocked file; read preset contents, the basic scenario.
open two mocked files and have them give back different values for the read() method. The order in which the files are opened/read from should not influence the results.
Furthermore, if I call open('actual_file.txt') to open an actual file, I want the actual file to be opened, and not a magic mock with mocked behavior. Or if I just don't want the access to a certain file mocked, but I do want other files to be mocked, this should be possible.
I know about this question: Python mock builtin 'open' in a class using two different files.
But that answer only partially answers up to the second requirement. The part about order independent results is not included and it does not specify how to mock only some calls, and allow other calls to go through to the actual files (default behavior).
A bit late, but I just recently happened upon the same need, so I'd like to share my solution, based upon this answer from the referred-to question:
import pytest
from unittest.mock import mock_open
from functools import partial
from pathlib import Path
mock_file_data = {
"file1.txt": "some text 1",
"file2.txt": "some text 2",
# ... and so on ...
}
do_not_mock: {
# If you need exact match (see note in mocked_file(),
# you should replace these with the correct Path() invocations
"notmocked1.txt",
"notmocked2.txt",
# ... and so on ...
}
# Ref: https://stackoverflow.com/a/38618056/149900
def mocked_file(m, fn, *args, **kwargs):
m.opened_file = Path(fn)
fn = Path(fn).name # If you need exact path match, remove this line
if fn in do_not_mock:
return open(fn, *args, **kwargs)
if fn not in mock_file_data:
raise FileNotFoundError
data = mock_file_data[fn]
file_obj = mock_open(read_data=data).return_value
file_obj.__iter__.return_value = data.splitlines(True)
return file_obj
def assert_opened(m, fn):
fn = Path(fn)
assert m.opened_file == fn
#pytest.fixture()
def mocked_open(mocker):
m = mocker.patch("builtins.open")
m.side_effect = partial(mocked_file, m)
m.assert_opened = partial(assert_opened, m)
return m
def test_something(mocked_open):
...
# Something that should NOT invoke open()
mocked_open.assert_not_called()
...
# Something that SHOULD invoke open()
mocked_open.assert_called_once()
mocked_open.assert_opened("file1.txt")
# Depends on how the tested unit handle "naked" filenames,
# you might have to change the arg to:
# Path.cwd() / "file1.txt"
# ... and so on ...
Do note that (1) I am using Python 3, and (2) I am using pytest.
This can be done by following the approach in the other question's accepted answer (Python mock builtin 'open' in a class using two different files) with a few alterations.
First off. Instead of just specifying a side_effect that can be popped. We need to make sure the side_effect can return the correct mocked_file depending on the parameters used with the open call.
Then if the file we wish to open is not among the files we wish to mock, we instead return the original open() of the file instead of any mocked behavior.
The code below demonstrates how this can be achieved in a clean, repeatable way. I for instance have this code inside of a file that provides some utility functions to make testing easier.
from mock import MagicMock
import __builtin__
from mock import patch
import sys
# Reference to the original open function.
g__test_utils__original_open = open
g__test_utils__file_spec = None
def create_file_mock(read_data):
# Create file_spec such as in mock.mock_open
global g__test_utils__file_spec
if g__test_utils__file_spec is None:
# set on first use
if sys.version_info[0] == 3:
import _io
g__test_utils__file_spec = list(set(dir(_io.TextIOWrapper)).union(set(dir(_io.BytesIO))))
else:
g__test_utils__file_spec = file
file_handle = MagicMock(spec=g__test_utils__file_spec)
file_handle.write.return_value = None
file_handle.__enter__.return_value = file_handle
file_handle.read.return_value = read_data
return file_handle
def flexible_mock_open(file_map):
def flexible_side_effect(file_name):
if file_name in file_map:
return file_map[file_name]
else:
global g__test_utils__original_open
return g__test_utils__original_open(file_name)
global g__test_utils__original_open
return_value = MagicMock(name='open', spec=g__test_utils__original_open)
return_value.side_effect = flexible_side_effect
return return_value
if __name__ == "__main__":
a_mock = create_file_mock(read_data="a mock - content")
b_mock = create_file_mock(read_data="b mock - different content")
mocked_files = {
'a' : a_mock,
'b' : b_mock,
}
with patch.object(__builtin__, 'open', flexible_mock_open(mocked_files)):
with open('a') as file_handle:
print file_handle.read() # prints a mock - content
with open('b') as file_handle:
print file_handle.read() # prints b mock - different content
with open('actual_file.txt') as file_handle:
print file_handle.read() # prints actual file contents
This borrows some code straight from the mock.py (python 2.7) for the creating of the file_spec.
side note: if there's any body that can help me in how to hide these globals if possible, that'd be very helpful.

Categories

Resources