I want to send tasks to the POOL inside the shared class based on some conditions. But I got some unexpected result, Which are shown below.
• Why the len(self.map) is 0, not 100.
• Do I have to reconstruct my code to achieve this goal.
from multiprocessing import Pool
from multiprocessing.managers import BaseManager
pool = None
def doSomething(obj, *args):
obj.doSomething(*args)
class SharedClass:
def __init__(self):
global pool
self.map = set()
pool = Pool(4)
def someCondition(self):
# the condition is rely on the instance, here is just an example
return True
def go(self, n):
global pool
for i in xrange(n):
if self.someCondition():
# pass the shared class to other process
pool.apply_async(doSomething, (self, i))
pool.close()
pool.join()
# got AssertionError here
# why the len of self.map is 0
assert len(self.map) == 100
def doSomething(self, n):
# this should change the same SharedClass instance?
self.map.add(n)
class MyManager(BaseManager):
pass
MyManager.register("SharedClass", SharedClass)
def main():
manager = MyManager()
manager.start()
obj = manager.SharedClass()
obj.go(100)
if __name__ == "__main__":
main()
Related
I'm trying to keep a "static" defined multiprocessing Queue through multiple Processes, but it appears that this context is not copied to the new spawned process. Is there a way to keep them without storing them to derived process classes (so without self.q = A.q)?
main.py
from class_b import B
if __name__ == "__main__":
b = B()
b.start()
while True:
pass
class_a.py
from multiprocessing import Process, Queue
class A(Process):
q = Queue()
def __init__(self) -> None:
super().__init__(daemon=True)
class_b.py
from multiprocessing import Process
from class_a import A
class B(Process):
def __init__(self):
super().__init__(daemon=True)
print(A.q)
def run(self):
print(A.q)
console
<multiprocessing.queues.Queue object at 0x000001F77851B280>
<multiprocessing.queues.Queue object at 0x0000023C420C2580>
When you import from class_a.py to access A.q, then so does multiprocessing in its own process. Then there will be two copies. You should create it as a local in "main" and pass it into B.
from class_b import B
from multiprocessing import Queue
if __name__ == "__main__":
q = Queue()
b = B(q)
b.start()
while True:
pass
Then make B store that reference for itself:
from multiprocessing import Process
class B(Process):
def __init__(self, q):
super().__init__(daemon=True)
print(q)
self.q = q
def run(self):
print(self.q)
the following code works fine -
import time
from concurrent.futures import Future, as_completed
from concurrent.futures.process import ProcessPoolExecutor
import multiprocessing as mp
from multiprocessing.managers import BaseManager
class Progress(object):
_target: int = 0
_progress: int = 0
def __init__(self):
self._target = 0
self._progress = 0
def completed(self, n):
self._progress += n
def progress(self):
return (self._progress/self._target) * 100
def set_target(self, n):
self._target = n
class ObjectManager(BaseManager):
pass
def dummy_worker(progress: Progress, cancel: mp.Event = None):
print("--> Worker started")
for i in range(10):
time.sleep(1)
progress.completed(1)
return 1
if __name__ == "__main__":
ObjectManager.register('Progress', Progress)
print('Starting manager')
with ObjectManager() as manager:
print('Manager started')
progress = manager.Progress()
progress.set_target(10)
with ProcessPoolExecutor() as pool:
f = pool.submit(dummy_worker, progress)
futures = [f]
for f in as_completed(futures):
print(f.result())
print(f'Progress: {progress.progress()}')
while the following gives me Condition objects should only be shared between processes through inheritance. i am not sure how to use Event in dummy_worker. the main goal is to pass some shared object and also have a way to cancel the worker. sorry for the long post, i wanted to give full code for clarity.
import time
from concurrent.futures import Future, as_completed
from concurrent.futures.process import ProcessPoolExecutor
import multiprocessing as mp
from multiprocessing.managers import BaseManager
class Progress(object):
_target: int = 0
_progress: int = 0
def __init__(self):
self._target = 0
self._progress = 0
def completed(self, n):
self._progress += n
def progress(self):
return (self._progress/self._target) * 100
def set_target(self, n):
self._target = n
class ObjectManager(BaseManager):
pass
def dummy_worker(progress: Progress, cancel: mp.Event = None):
print("--> Worker started")
for i in range(10):
time.sleep(1)
progress.completed(1)
return 1
if __name__ == "__main__":
ObjectManager.register('Progress', Progress)
cancel = mp.Event()
print('Starting manager')
with ObjectManager() as manager:
print('Manager started')
progress = manager.Progress()
progress.set_target(10)
with ProcessPoolExecutor() as pool:
f = pool.submit(dummy_worker, progress, cancel)
futures = [f]
for f in as_completed(futures):
print(f.result())
print(f'Progress: {progress.progress()}')
assume i am using python 3.5+.
Try the following changes:
from multiprocessing.managers import SyncManager
class ObjectManager(SyncManager):
pass
# use an Event() created by ObjectManager instance: cancel = manager.Event()
if __name__ == "__main__":
ObjectManager.register('Progress', Progress)
#cancel = mp.Event() # not this
print('Starting manager')
with ObjectManager() as manager:
print('Manager started')
progress = manager.Progress()
cancel = manager.Event() # but rather this
progress.set_target(10)
with ProcessPoolExecutor() as pool:
f = pool.submit(dummy_worker, progress, cancel)
futures = [f]
for f in as_completed(futures):
print(f.result())
print(f'Progress: {progress.progress()}')
Prints:
Starting manager
Manager started
--> Worker started
1
Progress: 100.0
Also, get rid of/change references to mp.Event.
I have a list of objects, and I want to execute a method in each object in parallel. The method modifies the attributes of the objects. For example:
class Object:
def __init__(self, a):
self.a = a
def aplus(self):
self.a += 1
object_list = [Object(1), Object(2), Object(3)]
# I want to execute this in parallel
for i in range(len(object_list)):
object_list[i].aplus()
I tried the following:
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
executor = ProcessPoolExecutor(max_workers=3)
res = executor.map([obj.aplus for obj in object_list])
Which does not work, leaving the objects unaltered. I assume it's because the objects can only be copied, and not accessed, with multiprocessing. Any idea?
Thanks a lot!
EDIT: Supposedly the objects are very big, so it would be preferable to avoid copying them to each process. The methods are also supposedly very CPU intensive, so multiple processes rather than threads should be used. Within these conditions, I believe there is no solution, as multiprocessing cannot share memory and threads cannot use multiple CPUs. I would like to be shown wrong though.
Here is a working example using Pool.map:
import multiprocessing
class Object:
def __init__(self, a):
self.a = a
def aplus(self):
self.a += 1
def __str__(self):
return str(self.a)
def worker(obj):
obj.aplus()
return obj
if __name__ == "__main__":
object_list = [Object(1), Object(2), Object(3)]
try:
processes = multiprocessing.cpu_count()
except NotImplementedError:
processes = 2
pool = multiprocessing.Pool(processes=processes)
modified_object_list = pool.map(worker, object_list)
for obj in modified_object_list:
print(obj)
Prints:
2
3
4
Here is my answer, using threading:
from threading import Thread
class Object:
def __init__(self, a):
self.a = a
def aplus(self):
self.a += 1
object_list = [Object(1), Object(2), Object(3)]
# A list containing all threads we will create
threads = []
# Create a thread for every objects
for obj in object_list:
thread = Thread(target=obj.aplus)
thread.daemon = True
thread.start()
threads.append(thread)
# Wait for all threads to finish before continuing
for thread in threads:
thread.join();
# prints results
for obj in object_list:
print(obj.a)
I assume it's because the objects can only be copied, and not
accessed, with multiprocessing.
This is exactly right, and is half the answer. Because the processes are isolated they each have their own copy of the object_list. One solution here is to use ThreadPoolExecutor (the threads all share the same object_list).
The syntax to use it is a bit different from what you are trying to use, but this works as intended:
executor = ThreadPoolExecutor(max_workers=3)
res = executor.map(Object.aplus, object_list)
If you really want to use ProcessPoolExecutor then you'll need to get the data back from the processes somehow. The easiest way is to use functions which return values:
from concurrent.futures import ProcessPoolExecutor
class Object:
def __init__(self, a):
self.a = a
def aplus(self):
self.a += 1
return self.a
if __name__ == '__main__':
object_list = [Object(1), Object(2), Object(3)]
executor = ProcessPoolExecutor(max_workers=3)
for result in executor.map(Object.aplus, object_list):
print("I got: " + str(result))
You can even have the function you are maping return self, and put those returned objects back into your object_list at then end. So the full multiprocessing solution would look like:
from concurrent.futures import ProcessPoolExecutor
class Object:
def __init__(self, a):
self.a = a
def aplus(self):
self.a += 1
return self
if __name__ == '__main__':
object_list = [Object(1), Object(2), Object(3)]
executor = ProcessPoolExecutor(max_workers=3)
object_list = list(executor.map(Object.aplus, object_list))
How can I get the following to work? The main point is that I want to run a method (and not a function) asynchronously.
from multiprocessing import Pool
class Async:
def __init__(self, pool):
self.pool = pool
self.run()
def run(self):
p.apply_async(self.f, (10, ))
def f(self, x):
print x*x
if __name__ == '__main__':
p = Pool(5)
a = Async(p)
p.close()
p.join()
This prints nothing.
The problem appears to be due to the fact that multiprocessing needs to pickle self.f while bound methods are not picklable. There is a discussion on how to solve the problem here.
The apply_async apparently creates an exception which is put inside the future returned. That's why nothing is printed. If a get is executed on the future, then the exception is raised.
Its definitely possible to thread class methods using a threadpool in python 2 - the following programme did what I would expect.
#!/usr/bin/env python
from multiprocessing.pool import ThreadPool
class TestAsync():
def __init__(self):
pool = ThreadPool(processes = 2)
async_completions = []
for a in range(2):
async_completions.append(pool.apply_async(self.print_int, ( a,)))
for completion in async_completions:
res = completion.get()
print("res = %d" % res)
def print_int(self, value):
print(value)
return (value*10)
a = TestAsync()
I swear I saw the following in an example somewhere, but now I can't find that example and this isn't working. The __call__ class function never gets called.
EDIT: Code updated
pool.map appears to start the QueueWriter instance and the __call__ function is reached. However, the workers never seem to start or at least no results are pulled from the queue. Is my queue set up the right way? Why do the workers not fire off?
import multiprocessing as mp
import os
import random
class QueueWriter(object):
def __init__(self, **kwargs):
self.grid = kwargs.get("grid")
self.path = kwargs.get("path")
def __call__(self, q):
print self.path
log = open(self.path, "a", 1)
log.write("QueueWriter called.\n")
while 1:
res = q.get()
if res == 'kill':
self.log.write("QueueWriter received 'kill' message. Closing Writer.\n")
break
else:
self.log.write("This is where I'd write: {0} to grid file.\n".format(res))
log.close()
log = None
class Worker(object):
def __init__(self, **kwargs):
self.queue = kwargs.get("queue")
self.grid = kwargs.get("grid")
def __call__(self, idx):
res = self.workhorse(self, idx)
self.queue.put((idx,res))
return res
def workhorse(self,idx):
#in reality a fairly complex operation
return self.grid[idx] ** self.grid[idx]
if __name__ == '__main__':
# log = open(os.path.expanduser('~/minimal.log'), 'w',1)
path = os.path.expanduser('~/minimal.log')
pool = mp.Pool(mp.cpu_count())
manager = mp.Manager()
q = manager.Queue()
grid = [random.random() for _ in xrange(10000)]
# in actuality grid is a shared resource, read by Workers and written
# to by QueueWriter
qWriter = QueueWriter(grid=grid, path=path)
watcher = pool.map(qWriter, (q,),1)
wrkr = Worker(queue=q,grid=grid)
result = pool.map(wrkr, range(10000), 1)
result.get()
q.put('kill')
pool.close()
pool.join()
So the log does indeed print the initialization message, but then __call__ function is never called. Is this one of those pickling issues I've seen discussed so often? I've found answers about class member functions, but what about class instances?
At the gentle and patient prodding of martineau (thanks!) I think I've ironed out the problems. I have yet to apply it to my original code, but it is working in the example above and I'll start new questions for future implementation problems.
So in addition to changing where in the code the target file (the log, in this example) gets opened, I also started the QueueWriter instance as a single multiprocessing process rather than using pool.map. As martineau pointed out the map call blocks until the qWriter.__call__() returns and this prevented the workers from being called.
There were some other bugs in the code above, but those were incidental and fixed below:
import multiprocessing as mp
import os
import random
class QueueWriter(object):
def __init__(self, **kwargs):
self.grid = kwargs.get("grid")
self.path = kwargs.get("path")
def __call__(self, q):
print self.path
log = open(self.path, "a", 1)
log.write("QueueWriter called.\n")
while 1:
res = q.get()
if res == 'kill':
log.write("QueueWriter received 'kill' message. Closing Writer.\n")
break
else:
log.write("This is where I'd write: {0} to grid file.\n".format(res))
log.close()
log = None
class Worker(object):
def __init__(self, **kwargs):
self.queue = kwargs.get("queue")
self.grid = kwargs.get("grid")
def __call__(self, idx):
res = self.workhorse(idx)
self.queue.put((idx,res))
return res
def workhorse(self,idx):
#in reality a fairly complex operation
return self.grid[idx] ** self.grid[idx]
if __name__ == '__main__':
# log = open(os.path.expanduser('~/minimal.log'), 'w',1)
path = os.path.expanduser('~/minimal.log')
pool = mp.Pool(mp.cpu_count())
manager = mp.Manager()
q = manager.Queue()
grid = [random.random() for _ in xrange(10000)]
# in actuality grid is a shared resource, read by Workers and written
# to by QueueWriter
qWriter = QueueWriter(grid=grid, path=path)
# watcher = pool.map(qWriter, (q,),1)
# Start the writer as a single process rather than a pool
p = mp.Process(target=qWriter, args=(q,))
p.start()
wrkr = Worker(queue=q,grid=grid)
result = pool.map(wrkr, range(10000), 1)
# result.get()
# not required for pool
q.put('kill')
pool.close()
p.join()
pool.join()