Passing IPython parallel cluster object into custom class for batch execution - python

I am a novice programmer trying to use python for scientific programming. I think these posts (How to work with interactively-defined classes in IPython.parallel? and ipython parallel push custom object) touches on a similar issue but are not useful for me. I want to run my code as a script (for PBS or SGE queued schedulers) and I don't know how I would use dill.
Essentially, I am trying to use Ipython parallel cluster for splitting up computation that is defined in a custom class method.
I want to pass a cluster object into my custom class instance, then use the cluster to split up computation that operate on pieces of data defined as a member.
Having started a cluster using ipcluster (/path/to/ipcontroller-client.json),
Then, I want to run, python
Where, is
class Foo(object):
def __init__(self):
from numpy import arange = arange(10)*10
def A(self, y):
print "in A:", y[y]
def parallelA(self, z, cl):
print "in parallelA:", cl[:].map_sync(self.A, z)
def serialA(self, z):
print "in serialA:", map(self.A, z)
if __name__ == "__main__":
from IPython.parallel import Client
f = '/path/to/security/ipcontroller-client.json'
c = Client(f)
asdf = Foo()
asdf.serialA([1, 3, 5]) ## works
asdf.parallelA([1, 3, 5], c) ## doesn't work
The output is
$ ~/Projects/parcellation$ python
in serialA: in A: 1
in A: 3
in A: 5
[None, None, None]
in parallelA:
Traceback (most recent call last):
File "", line 24, in <module>
asdf.parallelA([1, 3, 5], c) ## doesn't work
File "", line 11, in parallelA
print "in parallelA:", cl[:].map_sync(self.A, z)
File "/usr/local/lib/python2.7/dist-packages/IPython/parallel/client/", line 366, in map_sync
File "<string>", line 2, in map
File "/usr/local/lib/python2.7/dist-packages/IPython/parallel/client/", line 66, in sync_results
ret = f(self, *args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/IPython/parallel/client/", line 624, in map
File "/usr/local/lib/python2.7/dist-packages/IPython/parallel/client/", line 271, in map
ret = self(*sequences)
File "<string>", line 2, in __call__
File "/usr/local/lib/python2.7/dist-packages/IPython/parallel/client/", line 78, in sync_view_results
return f(self, *args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/IPython/parallel/client/", line 243, in __call__
ar = view.apply(f, *args)
File "/usr/local/lib/python2.7/dist-packages/IPython/parallel/client/", line 233, in apply
return self._really_apply(f, args, kwargs)
File "<string>", line 2, in _really_apply
File "/usr/local/lib/python2.7/dist-packages/IPython/parallel/client/", line 66, in sync_results
ret = f(self, *args, **kwargs)
File "<string>", line 2, in _really_apply
File "/usr/local/lib/python2.7/dist-packages/IPython/parallel/client/", line 51, in save_ids
ret = f(self, *args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/IPython/parallel/client/", line 567, in _really_apply
File "/usr/local/lib/python2.7/dist-packages/IPython/parallel/client/", line 1263, in send_apply_request
File "/usr/local/lib/python2.7/dist-packages/IPython/kernel/zmq/", line 145, in pack_apply_message
arg_bufs = flatten(serialize_object(arg, buffer_threshold, item_threshold) for arg in args)
File "/usr/local/lib/python2.7/dist-packages/IPython/utils/", line 30, in flatten
return [x for subseq in seq for x in subseq]
File "/usr/local/lib/python2.7/dist-packages/IPython/kernel/zmq/", line 145, in <genexpr>
arg_bufs = flatten(serialize_object(arg, buffer_threshold, item_threshold) for arg in args)
File "/usr/local/lib/python2.7/dist-packages/IPython/kernel/zmq/", line 89, in serialize_object
buffers.insert(0, pickle.dumps(cobj, PICKLE_PROTOCOL))
cPickle.PicklingError: Can't pickle <type 'instancemethod'>: attribute lookup __builtin__.instancemethod failed
Any help in understanding why this does not work, and a fix that requires minimal code change would be very helpful.
Thank you!

I figured out a solution:
class Foo(object):
def __init__(self):
from numpy import arange = arange(10)*10
def A(data, y):
print "in A:", y ## doesn't produce an output
return data[y]
def parallelA(self, z, cl):
print "in parallelA:", cl[:].map_sync(self.A, []*len(z), z)
if __name__ == "__main__":
from IPython.parallel import Client
f = '/path/to/security/ipcontroller-client.json'
c = Client(f)
asdf = Foo()
asdf.parallelA([1, 3, 5], c)
Output when above code is run:
$ python
in parallelA: [10, 30, 50]


TypeError: cannot unpack non-iterable float object - MapReduce - mrjob

I'm testing a simple example to learn about MapReduce and mrjob.
The goal is to sum up the logarithm of all the numbers and divide the count of all numbers by this summation.
The code is pretty easy and straightforward:
from mrjob.job import MRJob
from mrjob.step import MRStep
import math
class MrMedian(MRJob):
def __init__(self, *args, **kwargs):
super(MrMedian, self).__init__(*args, **kwargs)
self.inCount = 0
self.inLogSum = 0.0
#increment the count of elements and add the
# logarithm of the current number to the summation
def map(self, key, val):
inVal = float(val)
self.inCount += 1
self.inLogSum += math.log(inVal)
# return the count and summation after all numbers are processed
def map_final(self):
yield (1, [self.inCount, self.inLogSum])
# aggregate the count and summation values and yield the result
def reduce(self, key, packedValues):
for valArr in packedValues:
nj = int(valArr[0])
cumN += nj
cumLogSum += float(valArr[1])
median = cumN/cumLogSum
yield (median)
# define mapper and reducer
def steps(self):
return ([
MRStep(, reducer=self.reduce, mapper_final=self.map_final)
# to run:
# python < inputFile.txt
if __name__ == '__main__':
In the map_final method I'm yielding (1, [self.inCount, self.inLogSum]). the value 1 is the key which is ignored and the list [self.inCount, self.inLogSum] is the value that in the reduce method we should treat with it (packedValues) as an iterable and somehow iterate through it using a for loop.
I am getting this error:
(venv) shahriar#Lenovo:/media/shahriar/01D779182B58B9D0$ python < inputFile.txt > outFile.txt No configs found; falling back on auto-configuration No configs specified for inline runner Creating temp directory /tmp/mrMedian.shahriar.20221113.152412.029427 Running step 1 of 1... reading from STDIN
Error while reading from /tmp/mrMedian.shahriar.20221113.152412.029427/step/000/reducer/00000/input:
Traceback (most recent call last):
File "/media/shahriar/01D779182B58B9D0/assignment2/", line 43, in <module>
File "/media/shahriar/01D779182B58B9D0/venv/lib/python3.10/site-packages/mrjob/", line 616, in run
File "/media/shahriar/01D779182B58B9D0/venv/lib/python3.10/site-packages/mrjob/", line 687, in execute
File "/media/shahriar/01D779182B58B9D0/venv/lib/python3.10/site-packages/mrjob/", line 636, in run_job
File "/media/shahriar/01D779182B58B9D0/venv/lib/python3.10/site-packages/mrjob/", line 503, in run
File "/media/shahriar/01D779182B58B9D0/venv/lib/python3.10/site-packages/mrjob/", line 161, in _run
self._run_step(step, step_num)
File "/media/shahriar/01D779182B58B9D0/venv/lib/python3.10/site-packages/mrjob/", line 170, in _run_step
self._run_streaming_step(step, step_num)
File "/media/shahriar/01D779182B58B9D0/venv/lib/python3.10/site-packages/mrjob/", line 187, in _run_streaming_step
self._run_reducers(step_num, num_reducer_tasks)
File "/media/shahriar/01D779182B58B9D0/venv/lib/python3.10/site-packages/mrjob/", line 289, in _run_reducers
File "/media/shahriar/01D779182B58B9D0/venv/lib/python3.10/site-packages/mrjob/", line 130, in _run_multiple
File "/media/shahriar/01D779182B58B9D0/venv/lib/python3.10/site-packages/mrjob/", line 746, in _run_task
invoke_task( File "/media/shahriar/01D779182B58B9D0/venv/lib/python3.10/site-packages/mrjob/", line 133, in invoke_task
File "/media/shahriar/01D779182B58B9D0/venv/lib/python3.10/site-packages/mrjob/", line 681, in execute
File "/media/shahriar/01D779182B58B9D0/venv/lib/python3.10/site-packages/mrjob/", line 795, in run_reducer
for k, v in self.reduce_pairs(read_lines(), step_num=step_num):
File "/media/shahriar/01D779182B58B9D0/venv/lib/python3.10/site-packages/mrjob/", line 866, in reduce_pairs
for k, v in self._combine_or_reduce_pairs(pairs, 'reducer', step_num):
File "/media/shahriar/01D779182B58B9D0/venv/lib/python3.10/site-packages/mrjob/", line 889, in _combine_or_reduce_pairs
for k, v in task(key, values) or ():
TypeError: cannot unpack non-iterable float object
The input file which is the result of map_final method is ok:
shahriar#Lenovo-:/tmp/mrMedian.shahriar.20221113.152412.029427/step/000/reducer/00000$ cat input
1 [13, 78.5753201837955]
1 [13, 77.20894832945609]
1 [12, 75.70546637672973]
1 [12, 73.97942285230064]
1 [13, 78.7642193551817]
1 [13, 74.83203774429285]
1 [13, 72.28868623927899]
1 [11, 67.51370208632588]
I commented the for loop inside the reducer method to check whether the error is because of packedValues but I was getting the error again.
Any idea is appreciated.

sharing a PyTable across multiprocesses

I create a PyTable object W_hat where processes should share and save the results their instead of returning them.
from multiprocessing import Lock
from multiprocessing import Pool
import tables as tb
def parallel_l21(labels, X, lam, g, W_hat):
g_indxs = np.where(labels == g)[0]
tmp = rfs(X[g_indxs, 1:].T, X[:, :-1].T, gamma=lam, verbose=False).T
tmp[abs(tmp) <= 1e-6] = 0
with lock:
W_hat[:, g_indxs] = np.array(tmp)
def init_child(lock_):
global lock
lock = lock_
#Previous code is omitted.
n_ = X_test.shape[0]
f = tb.open_file(path_name + 'dot' + sub_num + str(lam) + '.h5', 'w')
filters = tb.Filters(complevel=5, complib='blosc')
W_hat = f.create_carray(f.root, 'data', tb.Float32Atom(), shape=(n_, n_), filters=filters)
W_hats = []
for i in np.unique(labels):
lock = Lock()
with Pool(processes=cpu_count, initializer=init_child, initargs=(lock,)) as pool:
pool.starmap(parallel_l21, zip(repeat(labels), repeat(X), repeat(lam), np.unique(labels), W_hats))
Now, when running into starmap, this error shows up:
Traceback (most recent call last):
File "/Applications/PyCharm CE", line 3, in Exec
exec(exp, global_vars, local_vars)
File "<input>", line 1, in <module>
File "/usr/local/Cellar/python#3.8/3.8.6_1/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/", line 372, in starmap
return self._map_async(func, iterable, starmapstar, chunksize).get()
File "/usr/local/Cellar/python#3.8/3.8.6_1/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/", line 771, in get
raise self._value
File "/usr/local/Cellar/python#3.8/3.8.6_1/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/", line 537, in _handle_tasks
File "/usr/local/Cellar/python#3.8/3.8.6_1/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/", line 206, in send
File "/usr/local/Cellar/python#3.8/3.8.6_1/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/", line 51, in dumps
cls(buf, protocol).dump(obj)
File "stringsource", line 2, in tables.hdf5extension.Array.__reduce_cython__
TypeError: self.dims,self.dims_chunk,self.maxdims cannot be converted to a Python object for pickling
Note: I thought that the code works fine on Python 3.6.8 but it turns out that it is not the case.

Can't use namedtuple with concurrent.futures? [duplicate]

>>> import concurrent.futures
>>> from collections import namedtuple
>>> #1. Initialise namedtuple here
>>> # tm = namedtuple("tm", ["pk"])
>>> class T:
... #2. Initialise named tuple here
... #tm = namedtuple("tm", ["pk"])
... def __init__(self):
... #3: Initialise named tuple here
... tm = namedtuple("tm", ["pk"])
... self.x = {'key': [tm('value')]}
... def test1(self):
... with concurrent.futures.ProcessPoolExecutor(max_workers=1) as executor:
... results =, ["key"])
... return results
... def test(self, s):
... print(self.x[s])
>>> t = T().test1()
This gets stuck here.
^CTraceback (most recent call last):
File "<stdin>", line 1, in <module>
Process ForkProcess-1:
File "<stdin>", line 10, in test1
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/concurrent/futures/", line 623, in __exit__
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/concurrent/futures/", line 681, in shutdown
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/", line 1044, in join
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/", line 297, in _bootstrap
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/", line 99, in run
self._target(*self._args, **self._kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/concurrent/futures/", line 233, in _process_worker
call_item = call_queue.get(block=True)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/", line 94, in get
res = self._recv_bytes()
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/", line 216, in recv_bytes
buf = self._recv_bytes(maxlength)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/", line 407, in _recv_bytes
buf = self._recv(4)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/", line 379, in _recv
chunk = read(handle, remaining)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/", line 1060, in _wait_for_tstate_lock
elif lock.acquire(block, timeout):
If I initialise the named tuple outside of the class (in #1), in that case, this works fine. Could someone please let me know what is the issue if I initialise as per #2 or #3 ?
You're not changing where you initialize the namedtuple. You're changing where you create the namedtuple class.
When you create a namedtuple class named "x" in module "y" with collections.namedtuple, its __module__ is set to 'y' and its __qualname__ is set to 'x'. Pickling and unpickling relies on this class actually being available in the y.x location indicated by these attributes, but in cases 2 and 3 of your example, it's not.
Python can't pickle the namedtuple, which breaks inter-process communication with the workers. Executing self.test in a worker process relies on pickling self.test and unpickling a copy of it in the worker process, and that can't happen if self.x is an instance of a class that can't be pickled.

Multiprocessing Gremlin "OSError: [Errno 9] Bad file descriptor"

I'm trying to compute a feature for every vertex in my graph using gremlinpython. It's too slow to sequentially iterate over every single vertex. While batching could help to provide a speedup, I thought first I'd try parallizing the query.
Broadly, 1. get the full set of vertices, 2. split them over num_cores=x, 3. iterate over each sub-vertex set in parallel.
But I'm getting the error "OSError: [Errno 9] Bad file descriptor". The below code is my latest attempt at solving this.
import multiprocessing
from gremlin_python.structure.graph import Graph
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
from gremlin_python.process.traversal import lt
def create_traversal_object():
graph = Graph()
g = graph.traversal().withRemote(DriverRemoteConnection('ws://localhost:8182/gremlin', 'g'))
return g
g = create_traversal_object()
num_cores = 1
vertex_lsts = np.array_split(g.V().limit(30).id().toList(), num_cores)
class FeatureClass():
def __init__(self, g, vertex_list):
self.g = g
self.vertex_list = vertex_list
def orchestrator(self):
for vertex_id in self.vertex_list:
def get_names(self, vertex_id):
return self.g.V(vertex_id).inE().values('benef_nm').dedup().toList()
class Simulation(multiprocessing.Process):
def __init__(self, id, worker, *args, **kwargs):
# must call this before anything else
multiprocessing.Process.__init__(self) = id
self.worker = worker
self.args = args
self.kwargs = kwargs
sys.stdout.write('[%d] created\n' % (
def run(self):
sys.stdout.write('[%d] running ... process id: %s\n' % (, os.getpid()))
sys.stdout.write('[%d] completed\n' % (
list_of_objects = [FeatureClass(create_traversal_object(), vertex_lst) for vertex_lst in vertex_lsts]
list_of_sim = [Simulation(id=k, worker=obj) for k, obj in enumerate(list_of_objects)]
for sim in list_of_sim:
Here's the full stack-trace, looks like it's an issue with tornado, which gremlinpython uses.
Process Simulation-1:
Traceback (most recent call last):
File "/Users/greatora/anaconda3/lib/python3.6/multiprocessing/", line 258, in _bootstrap
File "<ipython-input-4-b3177477fabe>", line 42, in run
File "<ipython-input-4-b3177477fabe>", line 23, in orchestrator
File "<ipython-input-4-b3177477fabe>", line 26, in compute_number_of_names
File "/Users/greatora/anaconda3/lib/python3.6/site-packages/gremlin_python/process/", line 88, in next
return self.__next__()
File "/Users/greatora/anaconda3/lib/python3.6/site-packages/gremlin_python/process/", line 47, in __next__
File "/Users/greatora/anaconda3/lib/python3.6/site-packages/gremlin_python/process/", line 512, in apply_strategies
File "/Users/greatora/anaconda3/lib/python3.6/site-packages/gremlin_python/driver/", line 148, in apply
remote_traversal = self.remote_connection.submit(traversal.bytecode)
File "/Users/greatora/anaconda3/lib/python3.6/site-packages/gremlin_python/driver/", line 53, in submit
result_set = self._client.submit(bytecode)
File "/Users/greatora/anaconda3/lib/python3.6/site-packages/gremlin_python/driver/", line 108, in submit
return self.submitAsync(message, bindings=bindings).result()
File "/Users/greatora/anaconda3/lib/python3.6/concurrent/futures/", line 432, in result
return self.__get_result()
File "/Users/greatora/anaconda3/lib/python3.6/concurrent/futures/", line 384, in __get_result
raise self._exception
File "/Users/greatora/anaconda3/lib/python3.6/site-packages/gremlin_python/driver/", line 63, in cb
File "/Users/greatora/anaconda3/lib/python3.6/concurrent/futures/", line 425, in result
return self.__get_result()
File "/Users/greatora/anaconda3/lib/python3.6/concurrent/futures/", line 384, in __get_result
raise self._exception
File "/Users/greatora/anaconda3/lib/python3.6/concurrent/futures/", line 56, in run
result = self.fn(*self.args, **self.kwargs)
File "/Users/greatora/anaconda3/lib/python3.6/site-packages/gremlin_python/driver/", line 74, in write
File "/Users/greatora/anaconda3/lib/python3.6/site-packages/gremlin_python/driver/tornado/", line 37, in write
lambda: self._ws.write_message(message, binary=True))
File "/Users/greatora/anaconda3/lib/python3.6/site-packages/tornado/", line 453, in run_sync
File "/Users/greatora/anaconda3/lib/python3.6/site-packages/tornado/", line 863, in start
event_pairs = self._impl.poll(poll_timeout)
File "/Users/greatora/anaconda3/lib/python3.6/site-packages/tornado/platform/", line 66, in poll
kevents = self._kqueue.control(None, 1000, timeout)
OSError: [Errno 9] Bad file descriptor
I'm using Pythton3.7, gremlinpython==3.4.6, MacOS.
I'm still not entirely sure what the issue was, but this works.
import multiprocessing
from multiprocessing import Pool
import itertools
def graph_function(vertex_id_list):
graph = Graph()
g = graph.traversal().withRemote(DriverRemoteConnection('ws://localhost:8182/gremlin', 'g'))
res = []
for vertex_id in vertex_id_list:
return res
num_cores = 4
vertex_lst = g.V().limit(30).id().toList()
vertex_lsts = np.array_split(vertex_lst, num_cores)
with Pool(processes=num_cores) as pool:
results =, vertex_lsts)
results = [*itertools.chain.from_iterable(results)]

Python class and function

I defined a function in my class, but when i called this function into my main program:
class real :
def __init__(self):
self.nmodes = 4
self.L_ch = 1
self.w = 2
def func1(self,x):
return f
And my main program is:
from dev import *
Unfortunately i got this error:
Traceback (most recent call last):
File "", line 4, in <module>
AttributeError: real instance has no attribute 'func1'
When i don't include the function in my class, my parameters are not recognized and i got this error:
Traceback (most recent call last):
File "", line 75, in <module>
File "/usr/local/lib64/python2.7/site-packages/scipy/optimize/", line 127, in fsolve
res = _root_hybr(func, x0, args, jac=fprime, **options)
File "/usr/local/lib64/python2.7/site-packages/scipy/optimize/", line 183, in _root_hybr
_check_func('fsolve', 'func', func, x0, args, n, (n,))
File "/usr/local/lib64/python2.7/site-packages/scipy/optimize/", line 14, in _check_func
res = atleast_1d(thefunc(*((x0[:numinputs],) + args)))
File "/home/cfd1/ndiaye/ATACAMAC/", line 75, in func1
AttributeError: 'numpy.ndarray' object has no attribute 'nmodes'
What can i do to avoid all this? Thank you for your answers.
Your above code runs if you just fix the indentation:
class real :
def __init__(self):
self.nmodes = 4
self.L_ch = 1
self.w = 2
def func1(self,x):
return f
You have an indentation error. The lines starting def func1 should be lined up with def __init__.

