Can't use namedtuple with concurrent.futures? [duplicate] - python

>>> import concurrent.futures
>>> from collections import namedtuple
>>> #1. Initialise namedtuple here
>>> # tm = namedtuple("tm", ["pk"])
>>> class T:
... #2. Initialise named tuple here
... #tm = namedtuple("tm", ["pk"])
... def __init__(self):
... #3: Initialise named tuple here
... tm = namedtuple("tm", ["pk"])
... self.x = {'key': [tm('value')]}
... def test1(self):
... with concurrent.futures.ProcessPoolExecutor(max_workers=1) as executor:
... results = executor.map(self.test, ["key"])
... return results
... def test(self, s):
... print(self.x[s])
...
>>> t = T().test1()
This gets stuck here.
^CTraceback (most recent call last):
File "<stdin>", line 1, in <module>
Process ForkProcess-1:
File "<stdin>", line 10, in test1
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/concurrent/futures/_base.py", line 623, in __exit__
self.shutdown(wait=True)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/concurrent/futures/process.py", line 681, in shutdown
self._queue_management_thread.join()
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/threading.py", line 1044, in join
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/concurrent/futures/process.py", line 233, in _process_worker
call_item = call_queue.get(block=True)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/queues.py", line 94, in get
res = self._recv_bytes()
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
buf = self._recv_bytes(maxlength)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
buf = self._recv(4)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/connection.py", line 379, in _recv
chunk = read(handle, remaining)
KeyboardInterrupt
self._wait_for_tstate_lock()
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/threading.py", line 1060, in _wait_for_tstate_lock
elif lock.acquire(block, timeout):
KeyboardInterrupt
If I initialise the named tuple outside of the class (in #1), in that case, this works fine. Could someone please let me know what is the issue if I initialise as per #2 or #3 ?

You're not changing where you initialize the namedtuple. You're changing where you create the namedtuple class.
When you create a namedtuple class named "x" in module "y" with collections.namedtuple, its __module__ is set to 'y' and its __qualname__ is set to 'x'. Pickling and unpickling relies on this class actually being available in the y.x location indicated by these attributes, but in cases 2 and 3 of your example, it's not.
Python can't pickle the namedtuple, which breaks inter-process communication with the workers. Executing self.test in a worker process relies on pickling self.test and unpickling a copy of it in the worker process, and that can't happen if self.x is an instance of a class that can't be pickled.

Related

sharing a PyTable across multiprocesses

I create a PyTable object W_hat where processes should share and save the results their instead of returning them.
from multiprocessing import Lock
from multiprocessing import Pool
import tables as tb
def parallel_l21(labels, X, lam, g, W_hat):
g_indxs = np.where(labels == g)[0]
tmp = rfs(X[g_indxs, 1:].T, X[:, :-1].T, gamma=lam, verbose=False).T
tmp[abs(tmp) <= 1e-6] = 0
with lock:
W_hat[:, g_indxs] = np.array(tmp)
def init_child(lock_):
global lock
lock = lock_
#Previous code is omitted.
n_ = X_test.shape[0]
tb.file._open_files.close_all()
f = tb.open_file(path_name + 'dot' + sub_num + str(lam) + '.h5', 'w')
filters = tb.Filters(complevel=5, complib='blosc')
W_hat = f.create_carray(f.root, 'data', tb.Float32Atom(), shape=(n_, n_), filters=filters)
W_hats = []
for i in np.unique(labels):
W_hats.append(W_hat)
lock = Lock()
with Pool(processes=cpu_count, initializer=init_child, initargs=(lock,)) as pool:
print(pool)
pool.starmap(parallel_l21, zip(repeat(labels), repeat(X), repeat(lam), np.unique(labels), W_hats))
Now, when running into starmap, this error shows up:
Traceback (most recent call last):
File "/Applications/PyCharm CE 2.app/Contents/plugins/python-ce/helpers/pydev/_pydevd_bundle/pydevd_exec2.py", line 3, in Exec
exec(exp, global_vars, local_vars)
File "<input>", line 1, in <module>
File "/usr/local/Cellar/python#3.8/3.8.6_1/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/pool.py", line 372, in starmap
return self._map_async(func, iterable, starmapstar, chunksize).get()
File "/usr/local/Cellar/python#3.8/3.8.6_1/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/pool.py", line 771, in get
raise self._value
File "/usr/local/Cellar/python#3.8/3.8.6_1/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/pool.py", line 537, in _handle_tasks
put(task)
File "/usr/local/Cellar/python#3.8/3.8.6_1/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/connection.py", line 206, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "/usr/local/Cellar/python#3.8/3.8.6_1/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
File "stringsource", line 2, in tables.hdf5extension.Array.__reduce_cython__
TypeError: self.dims,self.dims_chunk,self.maxdims cannot be converted to a Python object for pickling
Note: I thought that the code works fine on Python 3.6.8 but it turns out that it is not the case.

How do I put an existing object into shared memory?

I've managed to create an object that can exist in shared memory with BaseManager and NamespaceProxy, however all the examples I've seen require me to create the object with the proxy. For example:
class Foo:
def __init__(self):
self._a = 1000
def get_a(self):
return self._a
class SharedFoo(NamespaceProxy):
_exposed_ = ('__getattribute__', '__getattr__', '__setattr__', '__init__', 'get_a')
def get_a(self):
callmethod = object.__getattribute__(self, '_callmethod')
return callmethod('get_a', ())
class FooManager(BaseManager):
pass
def test():
FooManager.register('Foo', Foo, SharedFoo)
with FooManager() as manager:
ls = []
t = time.time()
for i in range(100):
ls.append(manager.Foo())
print(time.time() - t)
which prints out:
0.44 (and some other numbers that i ommitted)
Since I would likely create millions of Foo objects, this is too slow for the task. I tried to make it faster like this:
def do_stuff(obj):
obj.ls[4].set_a(300)
def test():
FooManager.register('Foo', Foo, SharedFoo)
with FooManager() as manager:
if manager._Server != None:
manager._Server.mutex = NoLock()
ls = []
t = time.time()
for i in range(100000):
ls.append(Foo())
foos = manager.Foo()
foos.ls = mp.Manager().list(ls)
print(time.time() - t)
processes = [Process(target=do_stuff, args = (foos,)) for _ in range(3)]
for process in processes:
process.start()
for process in processes:
process.join()
print(foos.ls[4].get_a())
which gave me this error:
Traceback (most recent call last):
File "/path/to/lib/python3.7/multiprocessing/managers.py", line 788, in _callmethod
conn = self._tls.connection
AttributeError: 'ForkAwareLocal' object has no attribute 'connection'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/path/to/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/path/to/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/path/to/myproject/test.py", line 121, in do_stuff
obj.ls[4].set_a(300)
File "/path/to/lib/python3.7/multiprocessing/managers.py", line 1099, in __getattr__
return callmethod('__getattribute__', (key,))
File "/path/to/lib/python3.7/multiprocessing/managers.py", line 792, in _callmethod
self._connect()
File "/path/to/lib/python3.7/multiprocessing/managers.py", line 779, in _connect
conn = self._Client(self._token.address, authkey=self._authkey)
File "/path/to/lib/python3.7/multiprocessing/connection.py", line 492, in Client
c = SocketClient(address)
File "/path/to/lib/python3.7/multiprocessing/connection.py", line 619, in SocketClient
s.connect(address)
FileNotFoundError: [Errno 2] No such file or directory
Is what I'm trying to do possible? If so what should I use (not looking for a complete solution, just some ressources on how to do it)? I'm using Python 3.7 on Linux if that's relevant
Thanks
Edit: is this feasible with mmap, or am I going in a completely wrong direction? It looks promising but the documentations seems to say that it's more for files (again not looking for a complete solution, just if mmap would work with custom objects)

How to return a counter dictionary from a function passed to multiprocessing?

I have a list of CSV files. I want to do a set of operations on each of them and then produce a counter dict and i want to cerate a master list containing individual counter dict from all CSV files. I want to parallelize processing each of the csv file and then return the counter dict from each file. I found a similar solution here : How can I recover the return value of a function passed to multiprocessing.Process?
I used the solution suggested by David Cullen. This solution works perfectly for strings, but when I tried to return a counter dict or a normal dict. All the CSV files are processed until the send_end.send(result) and it hangs on there forever when executed and then throws a memory error. I am running this in a Linux server with more than sufficient memory for creating the list of counter dicts.
I used the following code:
import multiprocessing
#get current working directory
cwd = os.getcwd()
#take a list of all files in cwd
files = os.listdir(cwd)
#defining the function that needs to be done on all csv files
def worker(f,send_end):
infile= open(f)
#read liens in csv file
lines = infile.readlines()
#split the lines by "," and store it in a list of lists
master_lst = [line.strip().split(“,”) for line in lines]
#extract the second field in each sublist
counter_lst = [ element[1] for element in master_lst]
print “Total elements in the list” + str(len(counter_lst))
#create a dictionary of count elements
a = Counter(counter_lst)
# return the counter dict
send_end.send(a)
def main():
jobs = []
pipe_list = []
for f in files:
if f.endswith('.csv'):
recv_end, send_end = multiprocessing.Pipe(duplex=False)
p = multiprocessing.Process(target=worker, args=(f, send_end))
jobs.append(p)
pipe_list.append(recv_end)
p.start()
for proc in jobs:
proc.join()
result_list = [x.recv() for x in pipe_list]
print len(result_list)
if __name__ == '__main__':
main()
The error that i get is the following:
Process Process-42:
Traceback (most recent call last):
File "/usr/lib64/python2.7/multiprocessing/process.py", line 258, in
_bootstrap
self.run()
File "/usr/lib64/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "/home/amm/python/collapse_multiprocessing_return.py", line 32, in
worker
a = Counter(counter_lst)
File "/usr/lib64/python2.7/collections.py", line 444, in __init__
self.update(iterable, **kwds)
File "/usr/lib64/python2.7/collections.py", line 526, in update
self[elem] = self_get(elem, 0) + 1
MemoryError
Process Process-17:
Traceback (most recent call last):
Process Process-6:
Traceback (most recent call last):
File "/usr/lib64/python2.7/multiprocessing/process.py", line 258, in
_bootstrap
File "/usr/lib64/python2.7/multiprocessing/process.py", line 258, in
_bootstrap
Process Process-8:
Traceback (most recent call last):
File "/usr/lib64/python2.7/multiprocessing/process.py", line 258, in
_bootstrap
self.run()
self.run()
self.run()
File "/usr/lib64/python2.7/multiprocessing/process.py", line 114, in run
File "/usr/lib64/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib64/python2.7/multiprocessing/process.py", line 114, in run
File "/home/amm/python/collapse_multiprocessing_return.py", line 32, in
worker
self._target(*self._args, **self._kwargs)
self._target(*self._args, **self._kwargs)
File "/home/amm/python/collapse_multiprocessing_return.py", line 32, in
worker
File "/home/amm/python/collapse_multiprocessing_return.py", line 32, in
worker
a = Counter(counter_lst_lst)
a = Counter(counter_lst_lst)
a = Counter(counter_lst_lst)
File "/usr/lib64/python2.7/collections.py", line 444, in __init__
File "/usr/lib64/python2.7/collections.py", line 444, in __init__
File "/usr/lib64/python2.7/collections.py", line 444, in __init__
self.update(iterable, **kwds)
File "/usr/lib64/python2.7/collections.py", line 526, in update
self[elem] = self_get(elem, 0) + 1
MemoryError
self.update(iterable, **kwds)
self.update(iterable, **kwds)
File "/usr/lib64/python2.7/collections.py", line 526, in update
File "/usr/lib64/python2.7/collections.py", line 526, in update
self[elem] = self_get(elem, 0) + 1
self[elem] = self_get(elem, 0) + 1
MemoryError
MemoryError
Process Process-10:
Traceback (most recent call last):
File "/usr/lib64/python2.7/multiprocessing/process.py", line 258, in
_bootstrap
self.run()
File "/usr/lib64/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "/home/amm/python/collapse_multiprocessing_return.py", line 32, in
worker
a = Counter(counter_lst)
File "/usr/lib64/python2.7/collections.py", line 444, in __init__
self.update(iterable, **kwds)
File "/usr/lib64/python2.7/collections.py", line 526, in update
self[elem] = self_get(elem, 0) + 1
MemoryError
^Z
[18]+ Stopped collapse_multiprocessing_return.py
Now instead of "a" in send_end.send(a) if i replace f, the filename. It prints the number of csv files in the directory (which is what len(result_list) does in this case). But when the counter dict "a" is returned it gets stuck forever, throwing the above error.
I would like to have the code pass the counter dict to receive end without any error/problems. Is there a work around? Could someone please suggest a possible solution?
p.s: I am new to multiprocessing module, sorry if this question sounds naive. Also, i tried the multiprocessing.Manager(), but got a similar error
Your traceback mentions Process Process-42:, so there are at least 42 processes being created. You're creating a process for every CSV file, which is not useful and is probably causing the memory error.
Your problem can be solved much more simply using multiprocessing.Pool.map. The worker function can also be shortened greatly:
def worker(f):
with open(f) as infile:
return Counter(line.strip().split(",")[1]
for line in infile)
def main():
pool = multiprocessing.Pool()
result_list = pool.map(worker, [f for f in files if f.endswith('.csv')])
Passing no arguments to the pool means it'll create as many processes as you have CPU cores. Using more may or may not increase performance.

Python Multiprocessing Process exits before operation

I have a python object - list of dictionaries which I want to fill with key-value pairs in each of those dicts but simultaneously using multiple processors and using the multiprocessing module in python. For that purpose I am using the Manager module for storing that python object. Here is the following code:
from pylab import *
from numpy.random import *
import multiprocessing
import threading
import random
def tasks_start(id, global_lists):
counter_lock = threading.Lock()
with counter_lock:
num = int(10*random.random())
global_lists[num] = {'1':'Random'}
print("Id: ", id)
print(global_lists[0])
if __name__ == '__main__':
numProcessors = 6
pool = multiprocessing.Pool(numProcessors)
global_list = multiprocessing.Manager().list(range(100))
for idx in range(100):
global_list[idx] = multiprocessing.Manager().dict()
tasks = []
for id in range(10):
tasks.append((id, global_list))
pool.starmap(tasks_start, tasks)
pool.close()
pool.join()
So what I am doing here is creating a list of dictionaries stored as global_list and then calling the tasks_start() method 10 times using the python's starmap() module (just so that I can later extend to multiple arguments) to fill the list of dictionaries. As a simple test case, I just use the random generator to randomly pick up one dictionary among the lists everytime and fill it with some value. When I run the program, the following error occurs:
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/usr/lib/python3.4/multiprocessing/pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "/usr/lib/python3.4/multiprocessing/pool.py", line 47, in starmapstar
return list(itertools.starmap(args[0], args[1]))
File "/home/cysis/inhibition_soum/motif_temporal_patterns/code_versions/2016/09/09_08/parallel_test/test_error_manager.py", line 14, in tasks_start
print(global_lists[0])
File "<string>", line 2, in __getitem__
File "/usr/lib/python3.4/multiprocessing/managers.py", line 732, in _callmethod
kind, result = conn.recv()
File "/usr/lib/python3.4/multiprocessing/connection.py", line 251, in recv
return ForkingPickler.loads(buf.getbuffer())
File "/usr/lib/python3.4/multiprocessing/managers.py", line 852, in RebuildProxy
return func(token, serializer, incref=incref, **kwds)
File "/usr/lib/python3.4/multiprocessing/managers.py", line 706, in __init__
self._incref()
File "/usr/lib/python3.4/multiprocessing/managers.py", line 756, in _incref
conn = self._Client(self._token.address, authkey=self._authkey)
File "/usr/lib/python3.4/multiprocessing/connection.py", line 495, in Client
c = SocketClient(address)
File "/usr/lib/python3.4/multiprocessing/connection.py", line 624, in SocketClient
s.connect(address)
FileNotFoundError: [Errno 2] No such file or directory
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/cysis/inhibition_soum/motif_temporal_patterns/code_versions/2016/09/09_08/parallel_test/test_error_manager.py", line 29, in <module>
pool.starmap(tasks_start, tasks)
File "/usr/lib/python3.4/multiprocessing/pool.py", line 268, in starmap
return self._map_async(func, iterable, starmapstar, chunksize).get()
File "/usr/lib/python3.4/multiprocessing/pool.py", line 599, in get
raise self._value
FileNotFoundError: [Errno 2] No such file or directory
In my opinion before the last print(global_lists[0) is executed, the Manager exits and therefore is not able to find global_lists[0]. Can anybody shed some light on this sort of stuff?

need help with this Kombu error

Pardon the copy and paste from the python interpreter but I'm trying to play with Kombu but I can't seem to create a consumer. Please help, I'm utterly in the dark here.
>>> from kombu.messaging import Consumer, Producer
>>> from kombu.entity import Exchange, Queue
>>> x = Exchange("stmt",type="topic")
>>> helloQ = Queue("hello", exchange=x, routing_key="stmt.hello")
>>>
>>> from kombu.connection import BrokerConnection
>>> conn = BrokerConnection("scheduledb.lab.compete.com", "clippy", "clippy", "clippy")
>>> channel = conn.channel()
>>> c = Consumer(channel, helloQ)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.6/site-packages/kombu-1.0.6-py2.6.egg/kombu/messaging.py", line 231, in __init__
self.declare()
File "/usr/lib/python2.6/site-packages/kombu-1.0.6-py2.6.egg/kombu/messaging.py", line 241, in declare
queue.declare()
File "/usr/lib/python2.6/site-packages/kombu-1.0.6-py2.6.egg/kombu/entity.py", line 362, in declare
self.name and self.queue_declare(nowait, passive=False),
File "/usr/lib/python2.6/site-packages/kombu-1.0.6-py2.6.egg/kombu/entity.py", line 380, in queue_declare
nowait=nowait)
File "/usr/lib/python2.6/site-packages/kombu-1.0.6-py2.6.egg/kombu/syn.py", line 14, in blocking
return __sync_current(fun, *args, **kwargs)
File "/usr/lib/python2.6/site-packages/kombu-1.0.6-py2.6.egg/kombu/syn.py", line 30, in __blocking__
return fun(*args, **kwargs)
File "build/bdist.cygwin-1.7.8-i686/egg/amqplib/client_0_8/channel.py", line 1294, in queue_declare
File "build/bdist.cygwin-1.7.8-i686/egg/amqplib/client_0_8/abstract_channel.py", line 89, in wait
File "build/bdist.cygwin-1.7.8-i686/egg/amqplib/client_0_8/connection.py", line 218, in _wait_method
File "build/bdist.cygwin-1.7.8-i686/egg/amqplib/client_0_8/abstract_channel.py", line 105, in wait
File "build/bdist.cygwin-1.7.8-i686/egg/amqplib/client_0_8/connection.py", line 367, in _close
amqplib.client_0_8.exceptions.AMQPConnectionException: (530, u"NOT_ALLOWED - parameters for queue 'hello' in vhost 'clippy' not equivalent", (50, 10), 'Channel.queue_declare')
>>> boundX = x(helloQ)
>>> c = Consumer(channel, helloQ)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.6/site-packages/kombu-1.0.6-py2.6.egg/kombu/messaging.py", line 231, in __init__
self.declare()
File "/usr/lib/python2.6/site-packages/kombu-1.0.6-py2.6.egg/kombu/messaging.py", line 241, in declare
queue.declare()
File "/usr/lib/python2.6/site-packages/kombu-1.0.6-py2.6.egg/kombu/entity.py", line 361, in declare
return (self.name and self.exchange.declare(nowait),
File "/usr/lib/python2.6/site-packages/kombu-1.0.6-py2.6.egg/kombu/entity.py", line 151, in declare
nowait=nowait)
File "/usr/lib/python2.6/site-packages/kombu-1.0.6-py2.6.egg/kombu/syn.py", line 14, in blocking
return __sync_current(fun, *args, **kwargs)
File "/usr/lib/python2.6/site-packages/kombu-1.0.6-py2.6.egg/kombu/syn.py", line 30, in __blocking__
return fun(*args, **kwargs)
File "build/bdist.cygwin-1.7.8-i686/egg/amqplib/client_0_8/channel.py", line 839, in exchange_declare
File "build/bdist.cygwin-1.7.8-i686/egg/amqplib/client_0_8/abstract_channel.py", line 69, in _send_method
AttributeError: 'NoneType' object has no attribute 'method_writer'
Adding to #asksol's answer. If you are using rabbitmq you can use the rabbitmqctl command to list the queue details and compare those settings with the settings in your own code. Hopefully that gives you enough information to detect the conflict.
Look at the error
amqplib.client_0_8.exceptions.AMQPConnectionException: (530, u"NOT_ALLOWED - parameters for queue 'hello' in vhost 'clippy' not equivalent", (50, 10), 'Channel.queue_declare')
This means the queue has already been declared, but with other parameters than what you
are declaring it with now.

Categories

Resources