Does pytest support multiprocessing.set_start_method? - python

The doc of multiprocessing.set_start_method note that:
Note that this should be called at most once, and it should be protected inside the if name == 'main' clause of the main module.
However, if I put multiprocessing.set_start_method('spawn') in a pytest module fixture, I do not know will does it work perfectly.

Indeed, as stated in the documentation, you will be in trouble if you try to call multiprocessing.set_start_method() from multiple unit tests functions. Moreover, this will affect your whole program and may interoperate badly with the entire tests suit.
However, there exists a workaround which is described in the documentation too:
Alternatively, you can use get_context() to obtain a context
object. Context objects have the same API as the multiprocessing
module, and allow one to use multiple start methods in the same
program.
import multiprocessing as mp
def foo(q):
q.put('hello')
if __name__ == '__main__':
ctx = mp.get_context('spawn')
q = ctx.Queue()
p = ctx.Process(target=foo, args=(q,))
p.start()
print(q.get())
p.join() ```
This method can be used per-test to avoid compatibility issues discussed. It can be combined with "monkeypatching" or "mocking" to test your class with different start methods:
# my_class.py
import multiprocessing
class MyClass:
def __init__(self):
self._queue = multiprocessing.Queue()
def process(self, x):
# Very simplified example of a method using a multiprocessing Queue
self._queue.put(x)
return self._queue.get()
# tests/test_my_class.py
import multiprocessing
import my_class
def test_spawn(monkeypatch):
ctx = multiprocessing.get_context('spawn')
monkeypatch.setattr(my_class.multiprocessing, "Queue", ctx.Queue)
obj = my_class.MyClass()
assert obj.process(6) == 6
def test_fork(monkeypatch):
ctx = multiprocessing.get_context('fork')
monkeypatch.setattr(my_class.multiprocessing, "Queue", ctx.Queue)
obj = my_class.MyClass()
assert obj.process(6) == 6

If you really do always want to use the same start method, you can set it in a session-scoped fixture in the file conftest.py in the root of your source tree. E.g.
# conftest.py
import multiprocessing
import pytest
#pytest.fixture(scope="session", autouse=True)
def always_spawn():
multiprocessing.set_start_method("spawn")

Related

Proper way to return mocked object using pytest.fixture

I'm trying to setup the target under test in #pytest.fixture and use it in all my tests in the module. I'm able to patch the test correctly, but after I add the #pytest.fixture to return the mock object and invoke the mocked object in other unit tests the object starting to refer back to the original function.
Following is the code I have. I was expecting the mocked_worker in the unit test to refer to the return value, but it is invoking the actual os.getcwd method instead.
Please help me correct the code:
import os
import pytest
from unittest.mock import patch
class Worker:
def work_on(self):
path = os.getcwd()
print(f'Working on {path}')
return path
#pytest.fixture()
def mocked_worker():
with patch('test.test_module.os.getcwd', return_value="Testing"):
result = Worker()
return result
def test_work_on(mocked_worker):
ans = mocked_worker.work_on()
assert ans == "Testing"
The problem is that when the worker returns the scope of "with" statement ends making the object take its real value, the solution is to use "yield".
#pytest.fixture()
def mocked_worker():
with patch('test.test_module.os.getcwd', return_value="Testing"):
result = Worker()
yield result
I would recommend to use pytest-mock. So full example of one file (test_file.py) solution using this library would be:
import os
import pytest
from unittest.mock import patch
class Worker:
def work_on(self):
path = os.getcwd()
print(f'Working on {path}')
return path
#pytest.fixture()
def mocked_worker(mocker): # mocker is pytest-mock fixture
mocker.patch('test_file.os.getcwd', return_value="Testing")
def test_work_on(mocked_worker):
worker = Worker() # here we create instance of Worker, not mock itself!!
ans = worker.work_on()
assert ans == "Testing"
used libraries for reference:
pytest==5.3.0
pytest-mock==1.12.1

Verify mocked object is created in Python unit test

I'm trying to test a function that optionally takes in a multiprocessing pool like object. If one is provided, then that pool will be used, if one is not it will create a ThreadPool to use.
I'd like to test this behavior. Specifically, that ThreadPool is called when appropriate, and that it isn't otherwise.
In the minimal example below, I'm trying to verify the call status of ThreadPool creation. I use a MockThreadPool because there are some downstream things I need to verify in a test environment that can only be checked in serial operation.
Currently this fails in TempTest.test_pool_created. How can I verify that ThreadPool is called?
Other than the example below, I've tried to mock __init__ of ThreadPool without any luck.
temp.py
from multiprocessing.pool import ThreadPool
def run(execution_pool=None):
values = [1, 2]
if execution_pool:
out = execution_pool.map(lambda x: x+1, values)
else:
with ThreadPool(2) as p:
out = p.map(lambda x: x+1, values)
return out
if __name__ == "__main__":
out = run()
print(out)
temp_test.py
import unittest
import unittest.mock as mock
from multiprocessing.pool import ThreadPool
from temp import run
# Mock ThreadPool for diverting parallel code to serial
class MockThreadPool:
def map(self, run_simulation, all_inputs, chunksize=1):
map(run_simulation, all_inputs)
class TempTest(unittest.TestCase):
def test_check_runs(self):
self.assertTrue(True)
# Want to test:
# - ThreadPool is created when no execution pool is passed to run()
# - ThreadPool is not created when an execution pool is passed to run()
#mock.patch('multiprocessing.pool.ThreadPool', return_value=MockThreadPool())
def test_pool_created(self, fcn_pool):
out = run(None)
self.assertTrue(fcn_pool.called)
#mock.patch('multiprocessing.pool.ThreadPool', return_value=MockThreadPool())
def test_pool_not_created(self, fcn_pool):
out = run(execution_pool=MockThreadPool())
self.assertFalse(fcn_pool.called)
I've had the same problem before. You're patching multiprocessing.pool.ThreadPool, but your code in the temp module is calling ThreadPool directly. I'm pretty sure it will work if you change your patch() call to this:
#mock.patch('temp.ThreadPool', return_value=MockThreadPool())

Queue and Process subclass instantiation behavior

I'm new to python multiprocess API. I have a custom subclass of multiprocess.Process(), lets call it MyProcess. Many examples I see defineQueues in __main__ and then pass to the Process constructor.
In my case, I spawn N Process subclasses and 2 Queue for each (pre and post process). I'd prefer to put the Queue initialization in each subprocess:
import multiprocessing as mp
class MyProcess(mp.Process) :
def __init__(self,ID) :
mp.Process.__init__(self)
self.name = ID
self.queues = {'pre':mp.Queue(),'post':mp.Queue()}
if __name__ == "__main__" :
my_proc = MyProcess(ID)
Rather than:
import multiprocessing as mp
class MyProcess(mp.Process) :
def __init__(self,ID,queues) :
mp.Process.__init__(self)
self.name = ID
self.queues = queues
if __name__ == "__main__" :
my_proc = MyProcess(ID,{'pre':mp.Queue(),'post':mp.Queue()})
Is this possible or is there a pickle/sync/scope problem here?
After some testing, the latter appears to work just fine.

Sharing data between two functions in two different modules in python 2.x

I know this is possible using thread local in python but for some reason I am unable to find the exact syntax to achieve this. I have following sample code to test this but this is not working -
module1.py
import threading
def print_value():
local = threading.local() // what should I put here? this is actually creating a new thread local instead of returning a thread local created in main() method of module2.
print local.name;
module2.py
import module1
if __name__ == '__main__':
local = threading.local()
local.name = 'Shailendra'
module1.print_value()
Edit1 - Shared data should be available to only a thread which will invoke these functions and not to all the threads in the system. One example is request id in a web application.
In module 1, define a global variable that is a threading.local
module1
import threading
shared = threading.local()
def print_value():
print shared.name
module2
import module1
if __name__ == '__main__':
module1.shared.name = 'Shailendra'
module1.print_value()
If it's within the same process, why not use a singleton?
import functools
def singleton(cls):
''' Use class as singleton. '''
cls.__new_original__ = cls.__new__
#functools.wraps(cls.__new__)
def singleton_new(cls, *args, **kw):
it = cls.__dict__.get('__it__')
if it is not None:
return it
cls.__it__ = it = cls.__new_original__(cls, *args, **kw)
it.__init_original__(*args, **kw)
return it
cls.__new__ = singleton_new
cls.__init_original__ = cls.__init__
cls.__init__ = object.__init__
return cls
#singleton
class Bucket(object):
pass
Now just import Bucket and bind some data to it
from mymodule import Bucket
b = Bucket()
b.name = 'bob'
b.loves_cats = True

Python threading: how to use return values of external scripts?

(I found a decent solution here for this, but unfortunately I'm using IronPython which does not implement the mutliprocessing module ...)
Driving script Threader.py will call Worker.py's single function twice, using the threading module.
Its single function just fetches a dictionary of data.
Roughly speaking:
Worker.py
def GetDict():
:
:
:
return theDict
Threader.py
import threading
from Worker import GetDict
:
:
:
def ThreadStart():
t = threading.Thread(target=GetDict)
t.start()
:
:
In the driver script Threader.py, I want to be able to operate on the two dictionaries outputted by the 2 instances of Worker.py.
The accepted answer here involving the Queue module seems to be what I need in terms of accessing return values, but this is written from the point of view of everthing being doen in a single script. How do I go about making the return values of the function called in Worker.py available to Threader.py (or any other script for that matter)?
Many thanks
another way to do what you want (without using a Queue) would be by using the concurrent.futures module (from python3.2, for earlier versions there is a backport).
using this, your example would work like this:
from concurrent import futures
def GetDict():
return {'foo':'bar'}
# imports ...
# from Worker import GetDict
def ThreadStart():
executor = futures.ThreadPoolExecutor(max_workers=4)
future = executor.submit(GetDict)
print(future.result()) # blocks until GetDict finished
# or doing more then one:
jobs = [executor.submit(GetDict) for i in range(10)]
for j in jobs:
print(j.result())
if __name__ == '__main__':
ThreadStart()
edit:
something similar woule be to use your own thread to execute the target function and save it's return value, something like this:
from threading import Thread
def GetDict():
return {'foo':'bar'}
# imports ...
# from Worker import GetDict
class WorkerThread(Thread):
def __init__(self, fnc, *args, **kwargs):
super(WorkerThread, self).__init__()
self.fnc = fnc
self.args = args
self.kwargs = kwargs
def run(self):
self.result = self.fnc(*self.args, **self.kwargs)
def ThreadStart():
jobs = [WorkerThread(GetDict) for i in range(10)]
for j in jobs:
j.start()
for j in jobs:
j.join()
print(j.result)
if __name__ == '__main__':
ThreadStart()

Categories

Resources