Related
I'm running some Matlab code in parallel from inside a Python context (I know, but that's what's going on), and I'm hitting an import error involving matlab.double. The same code works fine in a multiprocessing.Pool, so I am having trouble figuring out what the problem is. Here's a minimal reproducing test case.
import matlab
from multiprocessing import Pool
from joblib import Parallel, delayed
# A global object that I would like to be available in the parallel subroutine
x = matlab.double([[0.0]])
def f(i):
print(i, x)
with Pool(4) as p:
p.map(f, range(10))
# This prints 1, [[0.0]]\n2, [[0.0]]\n... as expected
for _ in Parallel(4, backend='multiprocessing')(delayed(f)(i) for i in range(10)):
pass
# This also prints 1, [[0.0]]\n2, [[0.0]]\n... as expected
# Now run with default `backend='loky'`
for _ in Parallel(4)(delayed(f)(i) for i in range(10)):
pass
# ^ this crashes.
So, the only problematic one is the one using the 'loky' backend.
The full traceback is:
exception calling callback for <Future at 0x7f63b5a57358 state=finished raised BrokenProcessPool>
joblib.externals.loky.process_executor._RemoteTraceback:
'''
Traceback (most recent call last):
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/joblib/externals/loky/process_executor.py", line 391, in _process_worker
call_item = call_queue.get(block=True, timeout=timeout)
File "~/miniconda3/envs/myenv/lib/python3.6/multiprocessing/queues.py", line 113, in get
return _ForkingPickler.loads(res)
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/matlab/mlarray.py", line 31, in <module>
from _internal.mlarray_sequence import _MLArrayMetaClass
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/matlab/_internal/mlarray_sequence.py", line 3, in <module>
from _internal.mlarray_utils import _get_strides, _get_size, \
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/matlab/_internal/mlarray_utils.py", line 4, in <module>
import matlab
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/matlab/__init__.py", line 24, in <module>
from mlarray import double, single, uint8, int8, uint16, \
ImportError: cannot import name 'double'
'''
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/joblib/externals/loky/_base.py", line 625, in _invoke_callbacks
callback(self)
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/joblib/parallel.py", line 309, in __call__
self.parallel.dispatch_next()
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/joblib/parallel.py", line 731, in dispatch_next
if not self.dispatch_one_batch(self._original_iterator):
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/joblib/parallel.py", line 759, in dispatch_one_batch
self._dispatch(tasks)
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/joblib/parallel.py", line 716, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/joblib/_parallel_backends.py", line 510, in apply_async
future = self._workers.submit(SafeFunction(func))
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/joblib/externals/loky/reusable_executor.py", line 151, in submit
fn, *args, **kwargs)
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/joblib/externals/loky/process_executor.py", line 1022, in submit
raise self._flags.broken
joblib.externals.loky.process_executor.BrokenProcessPool: A task has failed to un-serialize. Please ensure that the arguments of the function are all picklable.
joblib.externals.loky.process_executor._RemoteTraceback:
'''
Traceback (most recent call last):
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/joblib/externals/loky/process_executor.py", line 391, in _process_worker
call_item = call_queue.get(block=True, timeout=timeout)
File "~/miniconda3/envs/myenv/lib/python3.6/multiprocessing/queues.py", line 113, in get
return _ForkingPickler.loads(res)
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/matlab/mlarray.py", line 31, in <module>
from _internal.mlarray_sequence import _MLArrayMetaClass
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/matlab/_internal/mlarray_sequence.py", line 3, in <module>
from _internal.mlarray_utils import _get_strides, _get_size, \
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/matlab/_internal/mlarray_utils.py", line 4, in <module>
import matlab
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/matlab/__init__.py", line 24, in <module>
from mlarray import double, single, uint8, int8, uint16, \
ImportError: cannot import name 'double'
'''
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "test.py", line 20, in <module>
for _ in Parallel(4)(delayed(f)(i) for i in range(10)):
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/joblib/parallel.py", line 934, in __call__
self.retrieve()
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/joblib/parallel.py", line 833, in retrieve
self._output.extend(job.get(timeout=self.timeout))
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/joblib/_parallel_backends.py", line 521, in wrap_future_result
return future.result(timeout=timeout)
File "~/miniconda3/envs/myenv/lib/python3.6/concurrent/futures/_base.py", line 432, in result
return self.__get_result()
File "~/miniconda3/envs/myenv/lib/python3.6/concurrent/futures/_base.py", line 384, in __get_result
raise self._exception
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/joblib/externals/loky/_base.py", line 625, in _invoke_callbacks
callback(self)
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/joblib/parallel.py", line 309, in __call__
self.parallel.dispatch_next()
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/joblib/parallel.py", line 731, in dispatch_next
if not self.dispatch_one_batch(self._original_iterator):
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/joblib/parallel.py", line 759, in dispatch_one_batch
self._dispatch(tasks)
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/joblib/parallel.py", line 716, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/joblib/_parallel_backends.py", line 510, in apply_async
future = self._workers.submit(SafeFunction(func))
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/joblib/externals/loky/reusable_executor.py", line 151, in submit
fn, *args, **kwargs)
File "~/miniconda3/envs/myenv/lib/python3.6/site-packages/joblib/externals/loky/process_executor.py", line 1022, in submit
raise self._flags.broken
joblib.externals.loky.process_executor.BrokenProcessPool: A task has failed to un-serialize. Please ensure that the arguments of the function are all picklable.
Looking at the traceback, it seems like the root cause is an issue importing the matlab package in the child process.
It's probably worth noting that this all runs just fine if instead I had defined x = np.array([[0.0]]) (after importing numpy as np). And of course the main process has no problem with any matlab imports, so I am not sure why the child process would.
I'm not sure if this error has anything in particular to do with the matlab package, or if it's something to do with global variables and cloudpickle or loky. In my application it would help to stick with loky, so I'd appreciate any insight!
I should also note that I'm using the official Matlab engine for Python: https://www.mathworks.com/help/matlab/matlab-engine-for-python.html. I suppose that might make it hard for others to try out the test cases, so I wish I could reproduce this error with a type other than matlab.double, but I haven't found another yet.
Digging around more, I've noticed that the process of importing the matlab package is more circular than I would expect, and I'm speculating that this could be part of the problem? The issue is that when import matlab is run by loky's _ForkingPickler, first some file matlab/mlarray.py is imported, which imports some other files, one of which contains import matlab, and this causes matlab/__init__.py to be run, which internally has from mlarray import double, single, uint8, ... which is the line that causes the crash.
Could this circularity be the issue? If so, why can I import this module in the main process but not in the loky backend?
The error is caused by incorrect loading order of global objects in the child processes. It can be seen clearly in the traceback
_ForkingPickler.loads(res) -> ... -> import matlab -> from mlarray import ...
that matlab is not yet imported when the global variable x is loaded by cloudpickle.
joblib with loky seems to treat modules as normal global objects and send them dynamically to the child processes. joblib doesn't record the order in which those objects/modules were defined. Therefore they are loaded (initialized) in a random order in the child processes.
A simple workaround is to manually pickle the matlab object and load it after importing matlab inside your function.
import matlab
import pickle
px = pickle.dumps(matlab.double([[0.0]]))
def f(i):
import matlab
x=pickle.loads(px)
print(i, x)
Of course you can also use the joblib.dumps and loads to serialize the objects.
Use initializer
Thanks to the suggestion of #Aaron, you can also use an initializer (for loky) to import Matlab before loading x.
Currently there's no simple API to specify initializer. So I wrote a simple function:
def with_initializer(self, f_init):
# Overwrite initializer hook in the Loky ProcessPoolExecutor
# https://github.com/tomMoral/loky/blob/f4739e123acb711781e46581d5ed31ed8201c7a9/loky/process_executor.py#L850
hasattr(self._backend, '_workers') or self.__enter__()
origin_init = self._backend._workers._initializer
def new_init():
origin_init()
f_init()
self._backend._workers._initializer = new_init if callable(origin_init) else f_init
return self
It is a little bit hacky but works well with the current version of joblib and loky.
Then you can use it like:
import matlab
from joblib import Parallel, delayed
x = matlab.double([[0.0]])
def f(i):
print(i, x)
def _init_matlab():
import matlab
with Parallel(4) as p:
for _ in with_initializer(p, _init_matlab)(delayed(f)(i) for i in range(10)):
pass
I hope the developers of joblib will add initializer argument to the constructor of Parallel in the future.
I am running under Python 3.7 on Linux Ubuntu 18.04 under Eclipse 4.8 and Pydev.
The declaration:
args:Dict[str: Optional[Any]] = {}
is in a module that is imported from my testing code. and it is flagged with the following error message from typing.py:
TypeError: Parameters to generic types must be types. Got slice(<class 'str'>, typing.Union[typing.Any, NoneType], None). The stack trace follows: Finding files... done. Importing test modules ... Traceback (most recent call last): File "/Data/WiseOldBird/Eclipse/pool/plugins/org.python.pydev.core_7.0.3.201811082356/pysrc/_pydev_runfiles/pydev_runfiles.py", line 468, in __get_module_from_str
mod = __import__(modname) File "/Data/WiseOldBird/Workspaces/WikimediaAccess/WikimediaLoader/Tests/wiseoldbird/loaders/TestWikimediaLoader.py", line 10, in <module>
from wiseoldbird.application_controller import main File "/Data/WiseOldBird/Workspaces/WikimediaAccess/WikimediaLoader/src/wiseoldbird/application_controller.py", line 36, in <module>
args:Dict[str: Optional[Any]] = {} File "/usr/local/lib/python3.7/typing.py", line 251, in inner
return func(*args, **kwds) File "/usr/local/lib/python3.7/typing.py", line 626, in __getitem__
params = tuple(_type_check(p, msg) for p in params) File "/usr/local/lib/python3.7/typing.py", line 626, in <genexpr>
params = tuple(_type_check(p, msg) for p in params) File "/usr/local/lib/python3.7/typing.py", line 139, in _type_check
raise TypeError(f"{msg} Got {arg!r:.100}.") TypeError: Parameters
This prevents my testing module from being imported.
What am I doing wrong?
The proper syntax for a dict's type is
Dict[str, Optional[Any]]
When you write [a: b], Python interprets this as a slice, i.e. the thing that makes taking parts of arrays work, like a[1:10]. You can see this in the error message: Got slice(<class 'str'>, typing.Union[typing.Any, NoneType], None).
I am using the Parallel Python module (pp), and want to submit a job to a worker. However, the function that I want to execute is in another module (written with Cython), and I don't know how to import the function name to the new worker. The method suggested here, i.e importing the module "walkerc" inside the function cannot work since walk itself is defined in walkerc, from the filename "walkerc.so"
import pp
from walkerc import walk
# Other stuff here
ser = pp.Server()
# Some more definitions
ser.submit(walk, (it, params))
ser.submit(walk, (1000, params), modules = ("walkerc",), globals = globals())
Both the statements above fail, I get the following error:
Traceback (most recent call last):
File "", line 1, in
ser.submit(walk, (1000, params), modules = ("walkerc",), globals = globals())
File "/usr/lib/python2.7/site-packages/pp.py", line 458, in submit
sfunc = self.__dumpsfunc((func, ) + depfuncs, modules)
File "/usr/lib/python2.7/site-packages/pp.py", line 629, in
__dumpsfunc
sources = [self.__get_source(func) for func in funcs]
File "/usr/lib/python2.7/site-packages/pp.py", line 696, in
__get_source
sourcelines = inspect.getsourcelines(func)[0]
File "/usr/lib/python2.7/inspect.py", line 690, in getsourcelines
lines, lnum = findsource(object)
File "/usr/lib/python2.7/inspect.py", line 526, in findsource
file = getfile(object)
File "/usr/lib/python2.7/inspect.py", line 420, in getfile
'function, traceback, frame, or code object'.format(object))
TypeError: '<'built-in function walk'>' is not a module, class, method,
function, traceback, frame, or code object
The function 'walk' itself is imported properly within the main program, it is the process of submitting it to a new worker that is problematic.
How can I specify the function name 'walk' properly?
I do not want to define 'walk' in the same file as which I have called it because I have modified it in Cython and want to have better performance. Is there an alternative?
Try renaming your walk function to something else, mywalk for example. As the exception text suggests, your environment seems to have a built-in function that goes by the name walk, so the inspect module gets confused.
I can successfully pass my imported walk function like this on my system, no conflict here and nothing more needed, the function gets executed using the given argument:
import pp
from walkerc import walk
pps = pp.Server()
pps.submit(walk, args=(1,))
But passing dir, which is a built-in function for sure:
pps.submit(dir)
I get the exact same error as you do:
Traceback (most recent call last):
File "parallel.py", line 9, in
pps.submit(dir)
...
File ".../lib/python2.7/inspect.py", line 420, in getfile
'function, traceback, frame, or code object'.format(object))
TypeError: is not a module, class, method, function, traceback, frame, or code object
Update after the below discussion:
So the problem here is that Python treats the members that come from C extensions as built-ins. The code above works with the regular Python module, but I was able to replicate the OP's error when importing and passing the function from a C extension.
Therefore I wrapped the C extension function call inside a normal Python function, which does the trick. Note that now the walk function import was moved to the wrapping function, so that it can construct it's own context itself when dispatched.
import pp
def walk(n):
import walkerc
return walkerc.walk(n)
def print_callback(result):
print('callback: ', result)
pps = pp.Server()
job = pps.submit(walk, args=(1,), callback=print_callback)
I have a problem with this code , but don't no why...
import inspect
inspect.getsource(min)
and the error is:
Traceback (most recent call last):
File "<pyshell#1>", line 1, in <module>
inspect.getsource(min)
File "C:\Python33\lib\inspect.py", line 726, in getsource
lines, lnum = getsourcelines(object)
File "C:\Python33\lib\inspect.py", line 715, in getsourcelines
lines, lnum = findsource(object)
File "C:\Python33\lib\inspect.py", line 551, in findsource
file = getfile(object)
File "C:\Python33\lib\inspect.py", line 435, in getfile
'function, traceback, frame, or code object'.format(object))
TypeError: <built-in function min> is not a module, class, method, function, traceback, frame,or code object
The built-in min() is implemented in C code, and inspect.getsource() can only show you Python code:
>>> min
<built-in function min>
The built-in function type is always implemented in C.
The code for this function comes from the bltinmodule.c source file; the builtin_min() function delegates to the min_max() utility function in the same source file.
I'm trying to get the name of a WMI win32 class. But the __name__ attribute is not defined for it.
>> import wmi
>> machine = wmi.WMI()
>> machine.Win32_ComputerSystem.__name__
I get the following error:
Traceback (most recent call last):
File "<pyshell#21>", line 1, in <module>
machine.Win32_ComputerSystem.__name__
File "C:\Python27\lib\site-packages\wmi.py", line 796, in __getattr__
return _wmi_object.__getattr__ (self, attribute)
File "C:\Python27\lib\site-packages\wmi.py", line 561, in __getattr__
return getattr (self.ole_object, attribute)
File "C:\Python27\lib\site-packages\win32com\client\dynamic.py", line 457, in __getattr__
raise AttributeError(attr)
AttributeError: __name__
I thought that the __name__ attribute is defined for all Python functions, so I don't know what the problem is here. How is it possible that this function doesn't have that attribute?
OK, The reason that I thought it was a method is because machine.Win32_ComputerSystem() is defined, but I guess that isn't enough for something to be a method. I realise that it isn't a method.
However, this doesn't work:
>> machine.Win32_ComputerSystem.__class__.__name__
'_wmi_class'
I want it to return 'Win32_ComputerSystem'. How can I do this?
From what I can tell looking at the documentation (specifically, based on this snippet), wmi.Win32_ComputerSystem is a class, not a method. If you want to get its name you could try:
machine.Win32_ComputerSystem.__class__.__name__
I've found a way to get the output that I want, however it doesn't satisfy me.
repr(machine.Win32_ComputerSystem).split(':')[-1][:-1]
returns: 'Win32_ComputerSystem'
There must be a more Pythonic way to do this.