Problems with relative imports and pytest - python

I am having an issue whereby relative imports from a .py file within a module are not recognised as such by pytest.
Consider the following directory structure:
.
├── import_pytest_issue
│ ├── __init__.py
│ ├── main.py
│ ├── user.py
│ └── utils.py
└── tests
├── __init__.py
└── test_main.py
main.py, user.py, and utils.py are as follows"
# ./import_pytest_issue/main.py
from user import User
if __name__ == "__main__":
user = User("Fred")
print(f"The user is called {user.name}, and their favourite number is {user.favourite_number}")
# ----------------------------------------------------------------------
# ./import_pytest_issue/user.py
from utils import random_number
class User:
favourite_number = random_number()
def __init__(self, name: str) -> None:
self.name = name
# ----------------------------------------------------------------------
# ./import_pytest_issue/utils.py
from random import choice
def random_number():
return choice(range(1, 11))
...and my test_main.py file is like this:
# ./tests/test_main.py
from import_pytest_issue.user import User
def test_user():
user = User("Larry")
assert user.name == "Larry"
assert user.favourite_number in range(1, 11)
main.py runs without issue:
> python import_pytest_issue/main.py
# The user is called Fred, and their favourite number is 1
...but running pytest I get a ModuleNotFoundError:
> pytest
# ======================================================================== test session starts ========================================================================
# platform linux -- Python 3.10.6, pytest-7.2.1, pluggy-1.0.0
# collected 0 items / 1 error
# ============================================================================== ERRORS ===============================================================================
# ________________________________________________________________ ERROR collecting tests/test_main.py ________________________________________________________________
# ImportError while importing test module 'import-pytest-issue/tests/test_main.py'.
# Hint: make sure your test modules/packages have valid Python names.
# Traceback:
# /usr/lib/python3.10/importlib/__init__.py:126: in import_module
# return _bootstrap._gcd_import(name[level:], package, level)
# tests/test_main.py:1: in <module>
# from import_pytest_issue.user import User
# import_pytest_issue/user.py:1: in <module>
# from utils import random_number
# E ModuleNotFoundError: No module named 'utils'
# ====================================================================== short test summary info ======================================================================
# ERROR tests/test_main.py
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Interrupted: 1 error during collection !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# ========================================================================= 1 error in 0.05s ==========================================================================
FWIW this also happens if I run `python -m pytest`.
Any ideas?

Related

Pytest not able to run test where script A importing another script B in the same folder level as A and giving me ModuleNotFoundError

I am trying to run the unit test using pytest in this project, here main_0.py is importing s3 file.
I am getting ModuleNotFoundError: no module named 's3'
Project Folder Structure
some_project
└───src
├───main
│ └───lambda_function
│ └───some
│ main_0.py
│ s3.py
│
└───test
└───unittest
└───lambda_function
└───some
test_main_0.py
test_s3.py
main_0.py
from s3 import PrintS3
def lambda_handler():
obj = PrintS3()
res = obj.print_txt()
return res
s3.py
class PrintS3:
def __init__(self) -> None:
self.txt = "Hello"
def print_txt(self):
print(self.txt)
return self.txt
test_main_0.py
import unittest
class TestSomeMain(unittest.TestCase):
def test_main_0(self):
from src.main.lambda_function.some.main_0 import lambda_handler
res = lambda_handler()
assert res == "Hello"
test_s3.py is empty.
I also tried adding an empty __init__.py file in both the dir but still the same error
Project Folder Structure after adding __init__.py file
some_project
└───src
├───main
│ └───lambda_function
│ └───some
│ main_0.py
│ s3.py
│ __init__.py
│
└───test
└───unittest
└───lambda_function
└───some
test_main_0.py
test_s3.py
__init__.py
the command I am using to run pytest:
python -m pytest ./src/test
and I am inside some_project folder and also using main_0.py instead of main.py because to not get confused with main folder
Edit 2:
I am to run the test case successfully by adding sys.path in the test_main_0.py file but it is breaking linting and hinting in the code editor (vscode) it didn't broke the linting and hinting, both import statement works but is there any better way.
new test_main_0.py:
import unittest
import os
import sys
sys.path.append(os.path.abspath("./src/main/lambda_function/some/"))
class TestSomeMain(unittest.TestCase):
def test_main_0(self):
from src.main.lambda_function.some.main_0 import lambda_handler # this works
from main_0 import lambda_handler # this also works but break linting and hinting in the code editor
res = lambda_handler()
assert res == "Hello"
could you please try
import os
import sys
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from some.s3 import PrintS3
def lambda_handler():
obj = PrintS3()
res = obj.print_txt()
return res
I found a somewhat working solution.
added setUp() and tearDown() methods in the class for inserting and removing path in sys.path
path in sys.path is the location of the directory where the main_0.py and s3.py is located
import unittest
import os
import sys
class TestSomeMain(unittest.TestCase):
def setUp(self) -> None:
sys.path.insert(0, os.path.abspath("./src/main/lambda_function/some/"))
def tearDown(self) -> None:
sys.path.remove(os.path.abspath("./src/main/lambda_function/some/"))
def test_main_0(self):
from src.main.lambda_function.some.main_0 import lambda_handler
res = lambda_handler()
assert res == "Hello"
also update the test command in the terminal:
python -m pytest ./src/test/unittest/lambda_function/some --cov ./src/main/lambda_function/some --cov-report html

How to best deal with circular import ImportError due to import statement in __init__ file

So I'm working on a rather big Python 3 project where I'm writing unit tests for some of the files using the unittest library. In one unit test, the tested file imports a function from another python package whose __init__ file itself imports from the tested file. This leads to an ImportError during the unit test.
It is desired for the __init__.py to import from the tested file periphery\foo.py, so I would like to know if there is a possibility to make the unit test work without removing the import from __init__.py
Since the project contains a lot of files, I created a minimal example that illustrates the structure and in which the error can be reproduced. The project structure looks like this
├───core
│ bar.py
│ __init__.py
│
├───periphery
│ foo.py
│ __init__.py
│
└───tests
│ __init__.py
│
├───core
│ __init__.py
│
└───periphery
test_foo.py
__init__.py
The init file in core core/__init__.py contains the code
# --- periphery ---
from periphery.foo import Foo
while periphery/foo.py, which is the file to be tested, looks like
from core.bar import Bar
def Foo():
bar = Bar()
return bar
Finally, the unit test has the following structure
from unittest import TestCase
from periphery.foo import Foo
class Test(TestCase):
def test_foo(self):
""" Test that Foo() returns "bar" """
self.assertEqual(Foo(), "bar")
Running the unit test yields the following error:
ImportError: cannot import name 'Foo' from partially initialized module 'periphery.foo' (most likely due to a circular import)

Pytest needs to specify folder project when importing modules

I am working with python 3.7 with anaconda and visual code.
I have a project folder called providers.
Inside providers I have two folders:
├── Providers
├── __init__.py
|
├── _config
| ├── database_connection.py
| ├── __init__.py
├── src
| ├── overview.py
| ├── __init__.py
├── utils
| ├── pandas_functions.py
| ├── __init__.py
I want to import a class named DatabaseConnection that is inside the file database_connection.py in the file ovewview.py
# Overview.py
from config.database_connection import DatabaseConnection
This works as expected.
I want to run some tests, I am using pytest, and the script looks like this:
from unittest.mock import patch, Mock
import pandas as pd
from config.database_connection import DatabaseConnection
from providers.utils.pandas_functions import get_df
#patch("providers.utils.pandas_functions.pd.read_sql")
def test_get_df(read_sql: Mock):
read_sql.return_value = pd.DataFrame({"foo_id": [1, 2, 3]})
results = get_df()
read_sql.assert_called_once()
pd.testing.assert_frame_equal(results, pd.DataFrame({"bar_id": [1, 2, 3]}))
But is giving me this error:
plugins: hypothesis-5.5.4, arraydiff-0.3, astropy-header-0.1.2, doctestplus-0.5.0, mock-3.4.0, openfiles-0.4.0, remotedata-0.3.2
collected 0 items / 1 error
===================================================================================================== ERRORS =====================================================================================================
________________________________________________________________________________ ERROR collecting tests/test_pandas_functions.py _________________________________________________________________________________
ImportError while importing test module 'C:\Users\jordi_adm\Documents\GitHub\mcf-pipelines\cpke-cash-advance\providers\tests\test_pandas_functions.py'.
Hint: make sure your test modules/packages have valid Python names.
Traceback:
tests\test_pandas_functions.py:3: in <module>
from config.database_connection import DatabaseConnection
E ModuleNotFoundError: No module named 'config'
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Interrupted: 1 error during collection !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
================================================================================================ 1 error in 0.35s ================================================================================================
It cannot find the module config, while if I run the overview.py file it can import it without problems.
If import config.database_connection import DatabaseConnection with providers at the beginning, pytest is able to call this function:
from unittest.mock import patch, Mock
import pandas as pd
from providers.config.database_connection import DatabaseConnection
from providers.utils.pandas_functions import get_df
#patch("providers.utils.pandas_functions.pd.read_sql")
def test_get_df(read_sql: Mock):
read_sql.return_value = pd.DataFrame({"foo_id": [1, 2, 3]})
results = get_df()
read_sql.assert_called_once()
pd.testing.assert_frame_equal(results, pd.DataFrame({"bar_id": [1, 2, 3]}))
plugins: hypothesis-5.5.4, arraydiff-0.3, astropy-header-0.1.2, doctestplus-0.5.0, mock-3.4.0, openfiles-0.4.0, remotedata-0.3.2
collected 1 item
tests\test_pandas_functions.py . [100%]
=============================================================================================== 1 passed in 0.29s ================================================================================================
If I try to run a script inside of this project with providers at the beginning, for example modifying overview.py:
from providers.config.database_connection import DatabaseConnection
I obtain this error:
from providers.config.database_connection import DatabaseConnection
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
c:\Users\jordi_adm\Documents\GitHub\mcf-pipelines\cpke-cash-advance\providers\run.py in
----> 2 from providers.config.database_connection import DatabaseConnection
ModuleNotFoundError: No module named 'providers'
What is the reason to pytest needs to specify providers at the begging?

Convert python script to directory with __main__.py

As much as I think I understand python's import system, I still find my self lost...
I want to change a file (which is my programs main entry point) into a directory, yet I can't get the imports to run successfully
I can't seem to understand how to get sys.path to match.
$ cat > prog.py << EOF
> import sys
> pprint(sys.path[0])
> EOF
$ python3 prog.py
/home/me/pyprogram
$ mkdir prog
$ mv prog.py prog/__main__.py
$ python3 prog
prog
$ mv prog/__main__.py prog/__init__.py
$ python3 prog/__init__.py
/home/me/pyprogram/prog
for a bit more context on what I am trying to achieve, (and I might be designing my program wrong, feedback gladly accepted)
$ tree --dirsfirst
.
├── prog
│ ├── data_process.py
│ └── __init__.py
├── destination.py
└── source.py
1 directory, 4 files
$ cat source.py
def get():
return 'raw data'
$ cat destination.py
def put(data):
print(f"{data} has ',
'/usr/lib/python37.zip',
'/usr/lib/python3.7',
'/usr/lib/python3.7/lib-dynload',
'/home/me/.local/lib/python3.7/site-packages',
'/usr/local/lib/python3.7/dist-packages',
'/usr/lib/python3/dist-packages']
been passed successfully")
$ cat prog/__init__.py
#!/usr/bin/env python
import os
class Task:
def __init__(self, func, args=None, kwargs=None):
self.func = func
self.args = args if args else []
self.kwargs = kwargs if kwargs else {}
def run(self):
self.func(*self.args, **self.kwargs)
tasks = []
def register_task(args=None, kwargs=None):
def registerer(func):
tasks.append(Task(func, args, kwargs))
return func
return registerer
for module in os.listdir(os.path.dirname(os.path.abspath(__file__))):
if module.startswith('_') or module.startswith('.'):
continue
__import__(os.path.splitext(module)[0])
del module
for task in tasks:
task.run()
$ cat prog/data_process.py
from source import get
from destination import put
from . import register_task
#register_task(kwargs={'replace_with': 'cleaned'})
def process(replace_with):
raw = get()
cleaned = raw.replace('raw', replace_with)
put(cleaned)
$ python3 prog/__init__.py
Traceback (most recent call last):
File "prog/__init__.py", line 27, in <module>
__import__(os.path.splitext(module)[0])
File "/home/me/pyprogram/prog/data_process.py", line 1, in <module>
from source import get
ModuleNotFoundError: No module named 'source'
$ mv prog/__init__.py prog/__main__.py
$ python3 prog/
Traceback (most recent call last):
File "/usr/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "prog/__main__.py", line 27, in <module>
__import__(os.path.splitext(module)[0])
File "prog/data_process.py", line 1, in <module>
from source import get
ModuleNotFoundError: No module named 'source'
Project structure update
I changed the structure;
1. Placing all libraries into utils.
2. Placing all projects into projects (using __init__.py to allow for easy import of all created projects in the folder).
3. Main program script program.py in the top project directory.
Project structure:
$ tree
.
├── utils
│   ├── source.py
│   ├── remote_dest.py
│   ├── local_dest.py
│   └── __init__.py
├── projects
│   ├── process2.py
│   ├── process1.py
│   └── __init__.py
└── program.py
Contents of libraries defined in utils directory:
$ cat utils/source.py
"""
Emulates expensive resource to get,
bringing the need to cache it for all client projects.
"""
import time
class _Cache:
def __init__(self):
self.data = None
_cache = _Cache()
def get():
"""
Exposed source API for getting the data,
get from remote resource or returns from available cache.
"""
if _cache.data is None: # As well as cache expiration.
_cache.data = list(_expensive_get())
return _cache.data
def _expensive_get():
"""
Emulate an expensive `get` request,
prints to console if it was invoked.
"""
print('Invoking expensive get')
sample_data = [
'some random raw data',
'which is in some raw format',
'it is so raw that it will need cleaning',
'but now it is very raw'
]
for row in sample_data:
time.sleep(1)
yield row
$ cat utils/remote_dest.py
"""
Emulate limited remote resource.
Use thread and queue to have the data sent in the backround.
"""
import time
import threading
import queue
_q = queue.Queue()
def put(data):
"""
Exposed remote API `put` method
"""
_q.put(data)
def _send(q):
"""
Emulate remote resource,
prints to console when data is processed.
"""
while True:
time.sleep(1)
data = q.get()
print(f"Sending {data}")
threading.Thread(target=_send, args=(_q,), daemon=True).start()
$ cat utils/local_dest.py
"""
Emulate second source of data destination.
Allowing to demonstrate need from shared libraries.
"""
import datetime
import os
# Create `out` dir if it doesn't yet exist.
_out_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'out')
if not os.path.exists(_out_dir):
os.makedirs(_out_dir)
def save(data):
"""
Exposed API to store data locally.
"""
out_file = os.path.join(_out_dir, 'data.txt')
with open(out_file, 'a') as f:
f.write(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] {data}\n")
Main program execution script contents:
$ cat program.py
#!/usr/bin/env python
import os
class Task:
"""
Class storing `func` along with its `args` and `kwargs` to be run with.
"""
def __init__(self, func, args=None, kwargs=None):
self.func = func
self.args = args if args else []
self.kwargs = kwargs if kwargs else {}
def run(self):
"""
Executes stored `func` with its arguments.
"""
self.func(*self.args, **self.kwargs)
def __repr__(self):
return f"<Task({self.func.__name__})>"
# List that will store the registered tasks to be executed by the main program.
tasks = []
def register_task(args=None, kwargs=None):
"""
Registers decorated function along with the passed `args` and `kwargs` in the `tasks` list
as a `Task` for maintained execution.
"""
def registerer(func):
print(f"Appending '{func.__name__}' in {__name__}")
tasks.append(Task(func, args, kwargs)) # Saves the function as a task.
print(f"> tasks in {__name__}: {tasks}")
return func # returns the function untouched.
return registerer
print(f"Before importing projects as {__name__}. tasks: {tasks}")
import projects
print(f"After importing projects as {__name__}. tasks: {tasks}")
print(f"Iterating over tasks: {tasks} in {__name__}")
while True:
for task in tasks:
task.run()
break # Only run once in the simulation
Contents of the individual projects defined in the projects directory:
$ cat projects/process1.py
"""
Sample project that uses the shared remote resource to get data
and passes it on to another remote resource after processing.
"""
from utils.source import get
from utils.remote_dest import put
from program import register_task
#register_task(kwargs={'replace_with': 'cleaned'})
def process1(replace_with):
raw = get()
for record in raw:
put(record.replace('raw', replace_with))
$ cat projects/process2.py
"""
Sample project that uses the shared remote resource to get data
and saves it locally after processing.
"""
from utils.source import get
from utils.local_dest import save
from program import register_task
#register_task()
def process2():
raw = get()
for record in raw:
save(record.replace('raw', '----'))
Content of __init__.py file in the projects directory:
$ cat projects/__init__.py
"""
use __init__ file to import all projects
that might have been registered with `program.py` using `register_task`
"""
from . import process1, process2
# TODO: Dynamically import all projects (whether file or directory (as project)) that wil be created in the `projects` directory automatically (ignoring any modules that will start with an `_`)
# Something in the sense of:
# ```
# for module in os.listdir(os.path.dirname(os.path.abspath(__file__))):
# if module.startswith('_') or module.startswith('.'):
# continue
# __import__(os.path.splitext(module)[0])
# ```
Yet when I run the program I see that;
1. program.py gets executed twice (once as __main__ and once as program).
2. The tasks are appended (in the second execution run).
Yet when iterating over the tasks, none are found.
$ python3 program.py
Before importing projects as __main__. tasks: []
Before importing projects as program. tasks: []
After importing projects as program. tasks: []
Iterating over tasks: [] in program
Appending 'process1' in program
> tasks in program: [<Task(process1)>]
Appending 'process2' in program
> tasks in program: [<Task(process1)>, <Task(process2)>]
After importing projects as __main__. tasks: []
Iterating over tasks: [] in __main__
I don't understand;
Why is the main (program.py) file being executed twice, I thought that there can't be circular imports as python caches the imported modules?
(I took the idea of the circular imports used in flask applications, i.e. app.py imports routes, models etc. which all of them import app and use it to define the functionality, and app.py imports them back so that the functionality is added (as flask only runs app.py))
Why is the tasks list empty after the processes are appended to it?
After comparing my circular import to a flask based app that does circular imports as follows
Sample flask program that uses circular imports
Flask app structure
(venv) $ echo $FLASK_APP
mgflask.py
(venv) $ tree
.
├── app
│   ├── models
│   │   ├── __init__.py
│   │   ├── post.py
│   │   └── user.py
│   ├── templates/
│   ├── forms.py
│   ├── __init__.py
│   └── routes.py
├── config.py
└── mgflask.py
(venv) $ cat mgflask.py
#!/usr/bin/env python
from app import app
# ...
(venv) $ cat app/__init__.py
from flask import Flask
from config import Config
# ... # config imports
app = Flask(__name__) # <---
# ... # config setup
from . import routes, models, errors # <---
(venv) $ cat app/routes.py
from flask import render_template, flash, redirect, url_for, request
# ... # import extensions
from . import app, db # <---
from .forms import ...
from .models import ...
#app.route('/')
def index():
return render_template('index.html', title='Home')
(venv) $ flask run
* Serving Flask app "mgflask.py" (lazy loading)
* Environment: production
WARNING: Do not use the development server in a production environment.
Use a production WSGI server instead.
* Debug mode: on
* Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
* Restarting with stat
* Debugger is active!
* Debugger PIN: ???-???-???
I restructured my app by;
I moved the Task class, tasks list, register_task decorator function into projects/__init__.py and in the bottom of the init.py file I import the projects defined in the directory
In the program.py file I just from projects import tasks and everything works as desired.
the only question that stays is what is the difference between running prog.py vs prog/ (which contains __main__.py) (first iteration of my question here...)

how to access a json file(test data like config.json) in conftest.py

Suppose in this example, how to access the respective config.json file in conftest fixtures upon executing test-suite using pytest.
$ pwd
/home/user/repo/main
$ pytest testcases/project_(1/2)/test_suite_(1/2).py
Directory structure:
├── main
│ ├── conftest.py # conftest file for my fixtures
│ ├── testcases
│ ├── project_1
│ │ (contains these files -- test_suite_1.py, config.json)
│ └── project_2
│ (contains these files -- test_suite_2.py, config.json)
├── workflows
│ └── libs
You can access the path of the currently executed module via request.node.fspath and build the path to the config.json relative to it. request is a fixture provided by pytest. Here's an example based on the directory structure you provided.
# main/conftest.py
import json
import pathlib
import pytest
#pytest.fixture(autouse=True)
def read_config(request):
file = pathlib.Path(request.node.fspath)
print('current test file:', file)
config = file.with_name('config.json')
print('current config file:', config)
with config.open() as fp:
contents = json.load(fp)
print('config contents:', contents)
If you copy the code above to your conftest.py and run the tests with -s, you should get an output similar to this:
$ pytest -sv
=============================== test session starts ===============================
platform linux -- Python 3.6.5, pytest-3.4.1, py-1.5.3, pluggy-0.6.0 -- /data/gentoo64/usr/bin/python3.6
cachedir: .pytest_cache
rootdir: /data/gentoo64/tmp/so-50329629, inifile:
collected 2 items
main/project1/test_one.py::test_spam
current file: /data/gentoo64/tmp/so-50329629/main/project1/test_one.py
current config: /data/gentoo64/tmp/so-50329629/main/project1/config.json
config contents: {'name': 'spam'}
PASSED
main/project2/test_two.py::test_eggs
current file: /data/gentoo64/tmp/so-50329629/main/project2/test_two.py
current config: /data/gentoo64/tmp/so-50329629/main/project2/config.json
config contents: {'name': 'eggs'}
PASSED
============================= 2 passed in 0.08 seconds ============================
Use parsed config values
You can access the parsed JSON data by returning it in the fixture and using the fixture as one of the test arguments. I slightly modified the fixture from above so it returns the parsed data and removed the autouse=True:
#pytest.fixture
def json_config(request):
file = pathlib.Path(request.node.fspath.strpath)
config = file.with_name('config.json')
with config.open() as fp:
return json.load(fp)
Now simply use the fixture name in the test arguments, the value will be what the fixture returns. for example:
def test_config_has_foo_set_to_bar(json_config):
assert json_config['foo'] == 'bar'

Categories

Resources