Python multiprocessing Event with process pool and custom manager - python

the following code works fine -
import time
from concurrent.futures import Future, as_completed
from concurrent.futures.process import ProcessPoolExecutor
import multiprocessing as mp
from multiprocessing.managers import BaseManager
class Progress(object):
_target: int = 0
_progress: int = 0
def __init__(self):
self._target = 0
self._progress = 0
def completed(self, n):
self._progress += n
def progress(self):
return (self._progress/self._target) * 100
def set_target(self, n):
self._target = n
class ObjectManager(BaseManager):
pass
def dummy_worker(progress: Progress, cancel: mp.Event = None):
print("--> Worker started")
for i in range(10):
time.sleep(1)
progress.completed(1)
return 1
if __name__ == "__main__":
ObjectManager.register('Progress', Progress)
print('Starting manager')
with ObjectManager() as manager:
print('Manager started')
progress = manager.Progress()
progress.set_target(10)
with ProcessPoolExecutor() as pool:
f = pool.submit(dummy_worker, progress)
futures = [f]
for f in as_completed(futures):
print(f.result())
print(f'Progress: {progress.progress()}')
while the following gives me Condition objects should only be shared between processes through inheritance. i am not sure how to use Event in dummy_worker. the main goal is to pass some shared object and also have a way to cancel the worker. sorry for the long post, i wanted to give full code for clarity.
import time
from concurrent.futures import Future, as_completed
from concurrent.futures.process import ProcessPoolExecutor
import multiprocessing as mp
from multiprocessing.managers import BaseManager
class Progress(object):
_target: int = 0
_progress: int = 0
def __init__(self):
self._target = 0
self._progress = 0
def completed(self, n):
self._progress += n
def progress(self):
return (self._progress/self._target) * 100
def set_target(self, n):
self._target = n
class ObjectManager(BaseManager):
pass
def dummy_worker(progress: Progress, cancel: mp.Event = None):
print("--> Worker started")
for i in range(10):
time.sleep(1)
progress.completed(1)
return 1
if __name__ == "__main__":
ObjectManager.register('Progress', Progress)
cancel = mp.Event()
print('Starting manager')
with ObjectManager() as manager:
print('Manager started')
progress = manager.Progress()
progress.set_target(10)
with ProcessPoolExecutor() as pool:
f = pool.submit(dummy_worker, progress, cancel)
futures = [f]
for f in as_completed(futures):
print(f.result())
print(f'Progress: {progress.progress()}')
assume i am using python 3.5+.

Try the following changes:
from multiprocessing.managers import SyncManager
class ObjectManager(SyncManager):
pass
# use an Event() created by ObjectManager instance: cancel = manager.Event()
if __name__ == "__main__":
ObjectManager.register('Progress', Progress)
#cancel = mp.Event() # not this
print('Starting manager')
with ObjectManager() as manager:
print('Manager started')
progress = manager.Progress()
cancel = manager.Event() # but rather this
progress.set_target(10)
with ProcessPoolExecutor() as pool:
f = pool.submit(dummy_worker, progress, cancel)
futures = [f]
for f in as_completed(futures):
print(f.result())
print(f'Progress: {progress.progress()}')
Prints:
Starting manager
Manager started
--> Worker started
1
Progress: 100.0
Also, get rid of/change references to mp.Event.

Related

How can the target= and args= be obtained from variables?

I am new to both stackoverflow and python so please bear over with me
When I run this test program, it doesn't seem like the threads start the function.
How can the target= and args= be obtained from variables?
import queue
import random
import threading
import time
def start_threads(count, func, args):
threads =[]
for _ in range(count):
thread = threading.Thread(target=func, args=args)
thread.start
threads.append(thread)
return threads
def function(a , b):
print("Start function")
time.sleep(random.randint(a, b))
print("Stop function")
if __name__ == "__main__":
num_threads = 5
func_name = "function"
min_wait = 3
max_wait = 7
threads = start_threads(num_threads, func_name, (min_wait,max_wait))
print(f"Active threads {threading.active_count()}")
You cannot pass a String as a target argument by calling Thread(...). You must provide a function object.
Here is a working solution:
import random
import time
from threading import Thread
def function(a, b):
print("START FUNCTION")
time.sleep(random.randint(a, b))
print("STOP FUNCTION")
def create_threads(count, func, times):
threads = []
for _ in range(count):
thread = Thread(target=func, args=[times[0], times[1]])
threads.append(thread)
return threads
def run_threads(threads):
for thread in threads:
thread.start()
NUM_THREADS = 5
FUNC_NAME = function
MIN_WAIT = 3
MAX_WAIT = 7
threads = create_threads(NUM_THREADS, FUNC_NAME, (MIN_WAIT, MAX_WAIT))
run_threads(threads)

Python multiprocessing, share class instance does not work

I want to send tasks to the POOL inside the shared class based on some conditions. But I got some unexpected result, Which are shown below.
• Why the len(self.map) is 0, not 100.
• Do I have to reconstruct my code to achieve this goal.
from multiprocessing import Pool
from multiprocessing.managers import BaseManager
pool = None
def doSomething(obj, *args):
obj.doSomething(*args)
class SharedClass:
def __init__(self):
global pool
self.map = set()
pool = Pool(4)
def someCondition(self):
# the condition is rely on the instance, here is just an example
return True
def go(self, n):
global pool
for i in xrange(n):
if self.someCondition():
# pass the shared class to other process
pool.apply_async(doSomething, (self, i))
pool.close()
pool.join()
# got AssertionError here
# why the len of self.map is 0
assert len(self.map) == 100
def doSomething(self, n):
# this should change the same SharedClass instance?
self.map.add(n)
class MyManager(BaseManager):
pass
MyManager.register("SharedClass", SharedClass)
def main():
manager = MyManager()
manager.start()
obj = manager.SharedClass()
obj.go(100)
if __name__ == "__main__":
main()

Python, tracking using tqdm across parallel sub-tasks

In order to control the code I am working on, I have tried to create a single tracking across many tasks that occur in different threads.
I know at the beginning of the run the amount of tasks (and workers).
For demonstration (does not work, toy example):
from multiprocessing import Pool
from tqdm import tqdm
def work(i, t):
for _ in range(10**6):
t.update()
return i
def wrapped_work(params):
work(*params)
def main(n=1):
# another loop:
with Pool(processes=8) as p:
with tqdm(total=n * 10**6) as t:
return sum(p.map(work, ((i, t) for i in range(1, n+1))))
if __name__ == "__main__":
main(5)
I tried to implies this topic with pool, but without success.
I would greatly appreciate your help.
based on this post:
from multiprocessing import Pool, Process, Value
from ctypes import c_bool, c_long
from tqdm.auto import tqdm
class TqdmMultiprocessing:
max_processes = 64
def __init__(self, static_func, processes=64):
self.counter = Value(c_long, lock=False)
self.pool = Pool(
processes=min(processes, self.max_processes),
initializer=self.worker_init,
initargs=(static_func, self.counter)
)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.pool.close()
def tqdm(self, static_func, iterable, **kwargs):
done_value = Value(c_bool)
proc = Process(target=self.listener, args=(self.counter, done_value, kwargs,))
proc.start()
result = self.pool.map(static_func, iterable)
done_value.value = True
proc.join()
self.counter.value = 0
return result
#staticmethod
def listener(counter: Value, is_done: Value, kwargs):
with tqdm(**kwargs) as tqdm_bar:
old_counter = 0
while not is_done.value:
new_counter = counter.value
tqdm_bar.update(new_counter - old_counter)
old_counter = new_counter
tqdm_bar.update(tqdm_bar.total - old_counter)
#staticmethod
def worker_init(static_func, counter: Value):
static_func.counter = counter
def work(i):
for _ in range(10**6):
work.counter.value += 1
return i
def main(n=1):
with TqdmMultiprocessing(work, processes=3) as p:
p.tqdm(work, range(n), total=n * 10 ** 6)
p.tqdm(work, range(n), total=n * 10 ** 6)
if __name__ == "__main__":
main(5)

python multiprocessing process count

#coding:utf-8
import sys
import time
import os
import multiprocessing
class Worker(object):
def __init__(self):
self.progress = 0
self.task_info = None
def init(self):
pass
def status(self):
pass
def set_task_info(self, task_info):
self.task_info = task_info
def run(self, worker_status_meta_dict):
print multiprocessing.current_process()
print "process is %d" % self.progress
while self.progress < 5:
self.progress = self.progress +1
worker_status_meta_dict['state'] = 0
worker_status_meta_dict['status'] = "running"
time.sleep(2)
worker_status_meta_dict['state'] = 1
worker_status_meta_dict['status'] = "succeeded"
print "bavscan worker finished..."
if __name__ == "__main__":
worker = Worker()
worker_process_dict = multiprocessing.Manager().dict()
process = multiprocessing.Process(target=Worker.run, args=(worker, worker_process_dict))
process.start()
time.sleep(60)
This is a simple demo for python multiprocess.
The main process invoke the Worker.run method in a subprocess with multiprocessing.Process.
When run it in wondows 7, the main process will lauch two subprocess.
I find the problem in the "Python27\Lib\multiprocessing__init__.py"
def Manager():
'''
Returns a manager associated with a running server process
The managers methods such as `Lock()`, `Condition()` and `Queue()`
can be used to create shared objects.
'''
from multiprocessing.managers import SyncManager
m = SyncManager()
m.start()
return m
m.start() will lauch a subprocess to start the manager.

Python multiprocessing IOError: [Errno 232] The pipe is being closed

I am trying to implement this tutorial on mutliprocessing in python, but when I tried to do my own task I get the following error:
Traceback (most recent call last):
>>> File "C:\Python27\lib\multiprocessing\queues.py", line 262, in _feed
send(obj)
IOError: [Errno 232] The pipe is being closed
Here is a reproducible example of what I am trying to do which gives the same error message:
from multiprocessing import Lock, Process, Queue, current_process
import time
class Testclass(object):
def __init__(self, x):
self.x = x
def toyfunction(testclass):
testclass.product = testclass.x * testclass.x
return testclass
def worker(work_queue, done_queue):
try:
for testclass in iter(work_queue.get, 'STOP'):
print(testclass.counter)
newtestclass = toyfunction(testclass)
done_queue.put(newtestclass)
except:
print('error')
return True
def main():
counter = 1
database = []
while counter <= 1000:
database.append(Testclass(3))
counter += 1
print(counter)
workers = 8
work_queue = Queue()
done_queue = Queue()
processes = []
start = time.clock()
counter = 1
for testclass in database:
testclass.counter = counter
work_queue.put(testclass)
counter += 1
print(counter)
print('items loaded')
for w in range(workers):
p = Process(target=worker, args=(work_queue, done_queue))
p.start()
processes.append(p)
work_queue.put('STOP')
for p in processes:
p.join()
done_queue.put('STOP')
print(time.clock()-start)
print("Done")
if __name__ == '__main__':
main()
I got around this by emptying the queue after using an event to exit the process gracefully:
self.event.set() #the process has a timer that checks for this to be set, then shuts itself down
while not self._q.empty(): #_q is a multiprocess.Queue object used to communicate inter-process
try:
self._q.get(timeout=0.001)
except:
pass
self._q.close()
When I add code that processes the done queue I no longer get the error. Here is working code:
from multiprocessing import Lock, Process, Queue, current_process
import time
class Testclass(object):
def __init__(self, x):
self.x = x
def toyfunction(testclass):
testclass.product = testclass.x * testclass.x
return testclass
def worker(work_queue, done_queue):
try:
for testclass in iter(work_queue.get, 'STOP'):
print(testclass.counter)
newtestclass = toyfunction(testclass)
done_queue.put(newtestclass)
except:
print('error')
return True
def main():
counter = 1
database = []
while counter <= 100:
database.append(Testclass(10))
counter += 1
print(counter)
workers = 8
work_queue = Queue()
done_queue = Queue()
processes = []
start = time.clock()
counter = 1
for testclass in database:
testclass.counter = counter
work_queue.put(testclass)
counter += 1
print(counter)
print('items loaded')
for w in range(workers):
p = Process(target=worker, args=(work_queue, done_queue))
p.start()
processes.append(p)
work_queue.put('STOP')
for p in processes:
p.join()
done_queue.put('STOP')
# added: process the done queue
newdatabase = []
for testclass in iter(done_queue.get, 'STOP'):
newdatabase.append(testclass)
print(time.clock()-start)
print("Done")
return(newdatabase)
if __name__ == '__main__':
database = main()

Categories

Resources