Sharing a Queue created within a process - Python multiprocessing - python

I wish to have a list of Queue's shared between processes. The idea is from a "main" process, I can pipe whatever information I want to one of the other processes, but the number of other processes aren't determined.
I cannot create the Queue in the "main" process. I am simulating a decentralised system and creating the Queue in the main process does not fit this paradigm. As such, the Queue's must be created within the other processes.
This poses a difficulty, as I can't find how to share these Queue's with the main process. I have a managed list using multiprocessing.Manager, but if I append a multiprocess.Queue to it, I get:
RuntimeError: Queue objects should only be shared between processes
through inheritance
Appending a standard data type such as an integer works just fine.
MRE below:
import multiprocessing as mp
from time import sleep
class test:
def __init__(self, qlist):
self.qlist = qlist
self.q = mp.Queue()
qlist.append(4)
self.next = None
self.run()
def run(self):
while True:
val = self.q.get()
if val == 1:
p = mp.Process(target = test, args=(qlist, ))
p.start()
else:
print(val)
if __name__ == '__main__':
manager = mp.Manager()
qlist = manager.list()
p = mp.Process(target = test, args=(qlist, ))
p.start()
sleep(0.5)
print(qlist)
p.join()
The idea would be in the if __name__ == '__main__': code, I could look through the qlist and select one of the Queues to pipe information to, such as: qlist[2].put(1) to add a test object or qlist[3].put("Hello") to print "Hello".
The best case scenario would rather be to have a list of test objects (where the test object has its self.q attribute for accessing it's Queue) that I could access from the "main" process, but I'm even less sure of how to do that hence why I'm asking about the Queue's.
Any help with this would be greatly appreciated

You can definitely create queue instances in the main process; this occurs in your test.__init__ method with the statement self.q = mp.Queue(), which is running in the main process. The problem is that a multiprocessing queue cannot be added to a managed list. Here is your program, slightly modified where it does not attempt to add the queues to a managed list. I have also made your test class (now renamed Test) to be a subclass of Process and it will now terminate:
import multiprocessing as mp
class Test(mp.Process):
def __init__(self, value):
mp.Process.__init__(self)
self.value = value
self.q = mp.Queue()
self.q.put(value)
self.next = None
def run(self):
value = self.q.get()
print('value = ', value)
value -= 1
if value > 0:
p = Test(value).start()
if __name__ == '__main__':
p = Test(4).start()
Prints:
value = 4
value = 3
value = 2
value = 1
If you want to maintain a list of objects, then it would be better if Test is not a subclass of Process:
import multiprocessing as mp
class Test():
def __init__(self, lst, value):
lst.append(self)
self.lst = lst
self.value = value
self.q = mp.Queue()
self.q.put(value)
self.next = None
def run(self):
value = self.q.get()
print('value = ', value)
value -= 1
if value > 0:
test = Test(self.lst, value)
p = mp.Process(target=test.run).start()
if __name__ == '__main__':
manager = mp.Manager()
lst = manager.list()
test = Test(lst, 4)
p = mp.Process(target=test.run).start()
import time
time.sleep(3)
print(lst)
Prints:
value = 4
value = 3
value = 2
value = 1
[<__mp_main__.Test object at 0x0000028E6DAD5DC0>, <__mp_main__.Test object at 0x0000028E6DAD5FA0>, <__mp_main__.Test object at 0x0000028E6DAD5E50>, <__mp_main__.Test object at 0x0000028E6DAD5D90>]
But here is a big BUT:
Each of those objects "live" in a different address space and the references can only have meaning when accessed from the original address space they were created in. So this is pretty useless:
import multiprocessing as mp
class Test():
def __init__(self, lst, value):
lst.append(self)
self.lst = lst
self.value = value
self.q = mp.Queue()
self.q.put(value)
self.next = None
def run(self):
value = self.q.get()
print('value = ', value)
value -= 1
if value > 0:
test = Test(self.lst, value)
p = mp.Process(target=test.run).start()
if __name__ == '__main__':
manager = mp.Manager()
lst = manager.list()
test = Test(lst, 4)
p = mp.Process(target=test.run).start()
import time
time.sleep(3)
print(test, test.__class__, test.value)
print(lst)
for elem in lst:
print(type(elem))
print(elem.value)
Prints:
value = 4
value = 3
value = 2
value = 1
<__main__.Test object at 0x0000020E52E6A640> <class '__main__.Test'> 4
[<__mp_main__.Test object at 0x0000016827704DC0>, <__mp_main__.Test object at 0x0000016827704FA0>, <__mp_main__.Test object at 0x0000016827704250>, <__mp_main__.Test object at 0x0000016827704D90>]
<class '__main__.Test'>
Traceback (most recent call last):
File "C:\Ron\test\test.py", line 31, in <module>
print(elem.value)
AttributeError: 'Test' object has no attribute 'value'

Related

Changing values of list in multiprocessing

I am new to python multiprocessing, a background about the below code. I am trying to create three processes, one to add an element to the list, one to modify element in the list, and one to print the list.
The three processes are ideally using the same list that is in shared memory, initiated using manager.
The problem I face is that testprocess2 is not able to set the value to 0, basically, it is not able to alter the list.
class Trade:
def __init__(self, id):
self.exchange = None
self.order_id = id
class testprocess2(Process):
def __init__(self, trades, lock):
super().__init__(args=(trades, lock))
self.trades = trades
self.lock = lock
def run(self):
while True:
# lock.acquire()
print("Altering")
for idx in range(len(self.trades)):
self.trades[idx].order_id = 0
# lock.release()
sleep(1)
class testprocess1(Process):
def __init__(self, trades, lock):
super().__init__(args=(trades, lock))
self.trades = trades
self.lock = lock
def run(self):
while True:
print("start")
for idx in range(len(self.trades)):
print(self.trades[idx].order_id)
sleep(1)
class testprocess(Process):
def __init__(self, trades, lock):
super().__init__(args=(trades, lock))
self.trades = trades
self.lock = lock
def run(self):
while True:
# lock.acquire()
n = random.randint(0, 9)
print("adding random {}".format(n))
self.trades.append(Trade(n))
# lock.release()
# print(trades)
sleep(5)
if __name__ == "__main__":
with Manager() as manager:
records = manager.list([Trade(5)])
lock = Lock()
p1 = testprocess(records, lock)
p1.start()
p2 = testprocess1(records, lock)
p2.start()
p3 = testprocess2(records, lock)
p3.start()
p1.join()
p2.join()
p3.join()
Strictly speaking your managed list is not in shared memory and it is very important to understand what is going on. The actual list holding your Trade instances resides in a process that is created when you execute the Manager() call. When you then execute records = manager.list([Trade(5)]), records is not a direct reference to that list because, as I said, we are not dealing with shared memory. It is instead a special proxy object that implements the same methods as a list but when you, for example, invoke append on this proxy object, it takes the argument you are trying to append and serializes it and transmits it to the manager's process via either a socket or pipe where it gets de-serialized and appended to the actual list. In short, operations on the proxy object are turned into remote method calls.
Now for your problem. You are trying to reset the order_id attribute with the following statement:
self.trades[idx].order_id = 0
Since we are dealing with a remote list via a proxy object, the above statements unfortunately become the equivalent of:
trade = self.trades[idx] # fetch object from the remote list
trade.order_id = 0 # reset the order_id to 0 on the local copy
What is missing is updating the list with the newly updated trade object:
self.trades[idx] = trade
So your single update statement really needs to be replaced with the above 3-statement sequence.
I have also taken the liberty to modify your code in several ways.
The PEP8 Style Guide for Python Code recommends that class names be capitalized.
Since all of your process classes are identical in how they are constructed (i.e. have identical __init__ methods), I have created an abstract base class, TestProcess that these classes inherit from. All they have to do is provide a run method.
I have made these process classes daemon classes. That means that they will terminate automatically when the main process terminates. I did this for demo purposes so that the program does not loop endlessly. The main process will terminate after 15 seconds.
You do not need to pass the trades and lock arguments to the __init__ method of the Process class. If you were not deriving your classes from Process and you just wanted to, for example, have your newly created process be running a function foo that takes arguments trades and lock, then you would specify p1 = Process(target=foo, args=(trades, lock)). That is the real purpose of the args argument, i.e. to be used with the target argument. See documentation for threading.Thread class for details. I actually see very little value in actually deriving your classes from multiprocessing.Process (by not doing so there is better opportunity for reuse). But since you did, you are already in your __init__ method setting instance attributes self.trades and self.lock, which will be used when your run method is invoked implicitly by your calling the start method. There is nothing further you need to do. See the two additional code examples at the end.
from multiprocessing import Process, Manager, Lock
from time import sleep
import random
from abc import ABC, abstractmethod
class Trade:
def __init__(self, id):
self.exchange = None
self.order_id = id
class TestProcess(Process, ABC):
def __init__(self, trades, lock):
Process.__init__(self, daemon=True)
self.trades = trades
self.lock = lock
#abstractmethod
def run():
pass
class TestProcess2(TestProcess):
def run(self):
while True:
# lock.acquire()
print("Altering")
for idx in range(len(self.trades)):
trade = self.trades[idx]
trade.order_id = 0
# We must tell the managed list that it has been updated!!!:
self.trades[idx] = trade
# lock.release()
sleep(1)
class TestProcess1(TestProcess):
def run(self):
while True:
print("start")
for idx in range(len(self.trades)):
print(f'index = {idx}, order id = {self.trades[idx].order_id}')
sleep(1)
class TestProcess(TestProcess):
def run(self):
while True:
# lock.acquire()
n = random.randint(0, 9)
print("adding random {}".format(n))
self.trades.append(Trade(n))
# lock.release()
# print(trades)
sleep(5)
if __name__ == "__main__":
with Manager() as manager:
records = manager.list([Trade(5)])
lock = Lock()
p1 = TestProcess(records, lock)
p1.start()
p2 = TestProcess1(records, lock)
p2.start()
p3 = TestProcess2(records, lock)
p3.start()
sleep(15) # run for 15 seconds
Using classes not derived from multiprocessing.Process
from multiprocessing import Process, Manager, Lock
from time import sleep
import random
from abc import ABC, abstractmethod
class Trade:
def __init__(self, id):
self.exchange = None
self.order_id = id
class TestProcess(ABC):
def __init__(self, trades, lock):
self.trades = trades
self.lock = lock
#abstractmethod
def process():
pass
class TestProcess2(TestProcess):
def process(self):
while True:
# lock.acquire()
print("Altering")
for idx in range(len(self.trades)):
trade = self.trades[idx]
trade.order_id = 0
# We must tell the managed list that it has been updated!!!:
self.trades[idx] = trade
# lock.release()
sleep(1)
class TestProcess1(TestProcess):
def process(self):
while True:
print("start")
for idx in range(len(self.trades)):
print(f'index = {idx}, order id = {self.trades[idx].order_id}')
sleep(1)
class TestProcess(TestProcess):
def process(self):
while True:
# lock.acquire()
n = random.randint(0, 9)
print("adding random {}".format(n))
self.trades.append(Trade(n))
# lock.release()
# print(trades)
sleep(5)
if __name__ == "__main__":
with Manager() as manager:
records = manager.list([Trade(5)])
lock = Lock()
tp = TestProcess(records, lock)
p1 = Process(target=tp.process, daemon=True)
p1.start()
tp1 = TestProcess1(records, lock)
p2 = Process(target=tp1.process, daemon=True)
p2.start()
tp2 = TestProcess2(records, lock)
p3 = Process(target=tp2.process, daemon=True)
p3.start()
sleep(15) # run for 15 seconds
Using functions instead of classes derived from multiprocessing.Process
from multiprocessing import Process, Manager, Lock
from time import sleep
import random
class Trade:
def __init__(self, id):
self.exchange = None
self.order_id = id
def testprocess2(trades, lock):
while True:
# lock.acquire()
print("Altering")
for idx in range(len(trades)):
trade = trades[idx]
trade.order_id = 0
# We must tell the managed list that it has been updated!!!:
trades[idx] = trade
# lock.release()
sleep(1)
def testprocess1(trades, lock):
while True:
print("start")
for idx in range(len(trades)):
print(f'index = {idx}, order id = {trades[idx].order_id}')
sleep(1)
def testprocess(trades, lock):
while True:
# lock.acquire()
n = random.randint(0, 9)
print("adding random {}".format(n))
trades.append(Trade(n))
# lock.release()
# print(trades)
sleep(5)
if __name__ == "__main__":
with Manager() as manager:
records = manager.list([Trade(5)])
lock = Lock()
p1 = Process(target=testprocess, args=(records, lock), daemon=True)
p1.start()
p2 = Process(target=testprocess1, args=(records, lock), daemon=True)
p2.start()
p3 = Process(target=testprocess2, args=(records, lock), daemon=True)
p3.start()
sleep(15) # run for 15 seconds

Communicating between processes Python

I am trying to work out the solution that a process would tell the other process that some values have changed.
import multiprocessing
import time
class Consumer(multiprocessing.Process):
def __init__(self, share):
super().__init__()
self.share = share
def run(self):
print (self.share)
self.share = "xxx"
share = "ssss"
A = Consumer(share)
B = Consumer(share)
if __name__ == '__main__':
A = Consumer(share)
A.start()
time.sleep(5)
B = Consumer(share)
B.start()
expecting to have "xxx" to be printed when B runs. but got "ssss" as initial value.
after some researches, multiprocess.manager package can be used to achieve it. But due to the concerns of speed, i.e. 100 processes, with high frequency of accessing the share value, the lock would become a bottleneck.
Is there way to be able to lock the object when change the value but reading??
Use a manager to share objects across processes:
import multiprocessing
import time
class Consumer(multiprocessing.Process):
def __init__(self, manager_namespace):
super().__init__()
self.share = manager_namespace
def run(self):
print (self.share.myString)
self.share.myString = "xxx"
if __name__ == '__main__':
manager = multiprocessing.Manager()
namespace = manager.Namespace()
namespace.myString = 'sss'
B = Consumer(namespace)
A = Consumer(namespace)
A.start()
time.sleep(5)
B = Consumer(namespace)
B.start()
At least in my system it gives the required output.

Multiprocessing: Passing a class instance to pool.map

I swear I saw the following in an example somewhere, but now I can't find that example and this isn't working. The __call__ class function never gets called.
EDIT: Code updated
pool.map appears to start the QueueWriter instance and the __call__ function is reached. However, the workers never seem to start or at least no results are pulled from the queue. Is my queue set up the right way? Why do the workers not fire off?
import multiprocessing as mp
import os
import random
class QueueWriter(object):
def __init__(self, **kwargs):
self.grid = kwargs.get("grid")
self.path = kwargs.get("path")
def __call__(self, q):
print self.path
log = open(self.path, "a", 1)
log.write("QueueWriter called.\n")
while 1:
res = q.get()
if res == 'kill':
self.log.write("QueueWriter received 'kill' message. Closing Writer.\n")
break
else:
self.log.write("This is where I'd write: {0} to grid file.\n".format(res))
log.close()
log = None
class Worker(object):
def __init__(self, **kwargs):
self.queue = kwargs.get("queue")
self.grid = kwargs.get("grid")
def __call__(self, idx):
res = self.workhorse(self, idx)
self.queue.put((idx,res))
return res
def workhorse(self,idx):
#in reality a fairly complex operation
return self.grid[idx] ** self.grid[idx]
if __name__ == '__main__':
# log = open(os.path.expanduser('~/minimal.log'), 'w',1)
path = os.path.expanduser('~/minimal.log')
pool = mp.Pool(mp.cpu_count())
manager = mp.Manager()
q = manager.Queue()
grid = [random.random() for _ in xrange(10000)]
# in actuality grid is a shared resource, read by Workers and written
# to by QueueWriter
qWriter = QueueWriter(grid=grid, path=path)
watcher = pool.map(qWriter, (q,),1)
wrkr = Worker(queue=q,grid=grid)
result = pool.map(wrkr, range(10000), 1)
result.get()
q.put('kill')
pool.close()
pool.join()
So the log does indeed print the initialization message, but then __call__ function is never called. Is this one of those pickling issues I've seen discussed so often? I've found answers about class member functions, but what about class instances?
At the gentle and patient prodding of martineau (thanks!) I think I've ironed out the problems. I have yet to apply it to my original code, but it is working in the example above and I'll start new questions for future implementation problems.
So in addition to changing where in the code the target file (the log, in this example) gets opened, I also started the QueueWriter instance as a single multiprocessing process rather than using pool.map. As martineau pointed out the map call blocks until the qWriter.__call__() returns and this prevented the workers from being called.
There were some other bugs in the code above, but those were incidental and fixed below:
import multiprocessing as mp
import os
import random
class QueueWriter(object):
def __init__(self, **kwargs):
self.grid = kwargs.get("grid")
self.path = kwargs.get("path")
def __call__(self, q):
print self.path
log = open(self.path, "a", 1)
log.write("QueueWriter called.\n")
while 1:
res = q.get()
if res == 'kill':
log.write("QueueWriter received 'kill' message. Closing Writer.\n")
break
else:
log.write("This is where I'd write: {0} to grid file.\n".format(res))
log.close()
log = None
class Worker(object):
def __init__(self, **kwargs):
self.queue = kwargs.get("queue")
self.grid = kwargs.get("grid")
def __call__(self, idx):
res = self.workhorse(idx)
self.queue.put((idx,res))
return res
def workhorse(self,idx):
#in reality a fairly complex operation
return self.grid[idx] ** self.grid[idx]
if __name__ == '__main__':
# log = open(os.path.expanduser('~/minimal.log'), 'w',1)
path = os.path.expanduser('~/minimal.log')
pool = mp.Pool(mp.cpu_count())
manager = mp.Manager()
q = manager.Queue()
grid = [random.random() for _ in xrange(10000)]
# in actuality grid is a shared resource, read by Workers and written
# to by QueueWriter
qWriter = QueueWriter(grid=grid, path=path)
# watcher = pool.map(qWriter, (q,),1)
# Start the writer as a single process rather than a pool
p = mp.Process(target=qWriter, args=(q,))
p.start()
wrkr = Worker(queue=q,grid=grid)
result = pool.map(wrkr, range(10000), 1)
# result.get()
# not required for pool
q.put('kill')
pool.close()
p.join()
pool.join()

Cannot change class variables with multiprocessing.Process object in Python3

If I write a class with a class variable, generate two class objects, and change the value of the class variable by using a method of one of the two objects, the class variable value is of course also changed for the other object. Here's what I mean in code:
class DemoClass:
ClassVariable = False
def __init__(self):
pass
def do(self):
print(DemoClass.ClassVariable)
DemoClass.ClassVariable = True
class1 = DemoClass()
class1.do() # False
class2 = DemoClass()
class2.do() # True
However, if I do the same with multiprocessing.Process, it does not work. The class variable value will only change for the object that changed it:
import multiprocessing
class DemoProcess(multiprocessing.Process):
ClassVariable = False
def __init__(self):
multiprocessing.Process.__init__(self)
def run(self):
print(DemoProcess.ClassVariable)
DemoProcess.ClassVariable = True
print(DemoProcess.ClassVariable)
if __name__ == '__main__':
process_list = []
p1 = DemoProcess()
process_list.append(p1)
p1.start() # False True
p2 = DemoProcess()
process_list.append(p2)
p2.start() # False True; should be: True True
for p in process_list:
p.join()
The code behaves as if each process generates a new class variable. Am I doing something wrong?
With the help of the commenters of my original question I came to the conclusion that I had not yet understood how processes work.
Every DemoProcess.start() creates a new Process which can not share its class variables with other processes.
I solved the issue by using a multprocessing.Value object like Mike McKerns proposed in the comments. The value of this object can be shared with multiple processes.
import multiprocessing
class DemoProcess(multiprocessing.Process):
def __init__(self, class_variable):
multiprocessing.Process.__init__(self)
self.class_variable = class_variable
def run(self):
print(self.class_variable.value)
with self.class_variable.get_lock():
self.class_variable.value = True
print(self.class_variable.value)
if __name__ == '__main__':
ClassVariable = multiprocessing.Value('b', False)
process_list = []
p1 = DemoProcess(ClassVariable)
process_list.append(p1)
p1.start() # Output: 0 1
p2 = DemoProcess(ClassVariable)
process_list.append(p2)
p2.start() # Output: 1 1
for p in process_list:
p.join()

How to increment a shared counter from multiple processes?

I am having troubles with the multiprocessing module. I am using a Pool of workers with its map method to concurrently analyze lots of files. Each time a file has been processed I would like to have a counter updated so that I can keep track of how many files remains to be processed. Here is sample code:
import os
import multiprocessing
counter = 0
def analyze(file):
# Analyze the file.
global counter
counter += 1
print counter
if __name__ == '__main__':
files = os.listdir('/some/directory')
pool = multiprocessing.Pool(4)
pool.map(analyze, files)
I cannot find a solution for this.
The problem is that the counter variable is not shared between your processes: each separate process is creating it's own local instance and incrementing that.
See this section of the documentation for some techniques you can employ to share state between your processes. In your case you might want to share a Value instance between your workers
Here's a working version of your example (with some dummy input data). Note it uses global values which I would really try to avoid in practice:
from multiprocessing import Pool, Value
from time import sleep
counter = None
def init(args):
''' store the counter for later use '''
global counter
counter = args
def analyze_data(args):
''' increment the global counter, do something with the input '''
global counter
# += operation is not atomic, so we need to get a lock:
with counter.get_lock():
counter.value += 1
print counter.value
return args * 10
if __name__ == '__main__':
#inputs = os.listdir(some_directory)
#
# initialize a cross-process counter and the input lists
#
counter = Value('i', 0)
inputs = [1, 2, 3, 4]
#
# create the pool of workers, ensuring each one receives the counter
# as it starts.
#
p = Pool(initializer = init, initargs = (counter, ))
i = p.map_async(analyze_data, inputs, chunksize = 1)
i.wait()
print i.get()
Counter class without the race-condition bug:
class Counter(object):
def __init__(self):
self.val = multiprocessing.Value('i', 0)
def increment(self, n=1):
with self.val.get_lock():
self.val.value += n
#property
def value(self):
return self.val.value
A extremly simple example, changed from jkp's answer:
from multiprocessing import Pool, Value
from time import sleep
counter = Value('i', 0)
def f(x):
global counter
with counter.get_lock():
counter.value += 1
print("counter.value:", counter.value)
sleep(1)
return x
with Pool(4) as p:
r = p.map(f, range(1000*1000))
Faster Counter class without using the built-in lock of Value twice
class Counter(object):
def __init__(self, initval=0):
self.val = multiprocessing.RawValue('i', initval)
self.lock = multiprocessing.Lock()
def increment(self):
with self.lock:
self.val.value += 1
#property
def value(self):
return self.val.value
https://eli.thegreenplace.net/2012/01/04/shared-counter-with-pythons-multiprocessing
https://docs.python.org/2/library/multiprocessing.html#multiprocessing.sharedctypes.Value
https://docs.python.org/2/library/multiprocessing.html#multiprocessing.sharedctypes.RawValue
Here is a solution to your problem based on a different approach from that proposed in the other answers. It uses message passing with multiprocessing.Queue objects (instead of shared memory with multiprocessing.Value objects) and process-safe (atomic) built-in increment and decrement operators += and -= (instead of introducing custom increment and decrement methods) since you asked for it.
First, we define a class Subject for instantiating an object that will be local to the parent process and whose attributes are to be incremented or decremented:
import multiprocessing
class Subject:
def __init__(self):
self.x = 0
self.y = 0
Next, we define a class Proxy for instantiating an object that will be the remote proxy through which the child processes will request the parent process to retrieve or update the attributes of the Subject object. The interprocess communication will use two multiprocessing.Queue attributes, one for exchanging requests and one for exchanging responses. Requests are of the form (sender, action, *args) where sender is the sender name, action is the action name ('get', 'set', 'increment', or 'decrement' the value of an attribute), and args is the argument tuple. Responses are of the form value (to 'get' requests):
class Proxy(Subject):
def __init__(self, request_queue, response_queue):
self.__request_queue = request_queue
self.__response_queue = response_queue
def _getter(self, target):
sender = multiprocessing.current_process().name
self.__request_queue.put((sender, 'get', target))
return Decorator(self.__response_queue.get())
def _setter(self, target, value):
sender = multiprocessing.current_process().name
action = getattr(value, 'action', 'set')
self.__request_queue.put((sender, action, target, value))
#property
def x(self):
return self._getter('x')
#property
def y(self):
return self._getter('y')
#x.setter
def x(self, value):
self._setter('x', value)
#y.setter
def y(self, value):
self._setter('y', value)
Then, we define the class Decorator to decorate the int objects returned by the getters of a Proxy object in order to inform its setters whether the increment or decrement operators += and -= have been used by adding an action attribute, in which case the setters request an 'increment' or 'decrement' operation instead of a 'set' operation. The increment and decrement operators += and -= call the corresponding augmented assignment special methods __iadd__ and __isub__ if they are defined, and fall back on the assignment special methods __add__ and __sub__ which are always defined for int objects (e.g. proxy.x += value is equivalent to proxy.x = proxy.x.__iadd__(value) which is equivalent to proxy.x = type(proxy).x.__get__(proxy).__iadd__(value) which is equivalent to type(proxy).x.__set__(proxy, type(proxy).x.__get__(proxy).__iadd__(value))):
class Decorator(int):
def __iadd__(self, other):
value = Decorator(other)
value.action = 'increment'
return value
def __isub__(self, other):
value = Decorator(other)
value.action = 'decrement'
return value
Then, we define the function worker that will be run in the child processes and request the increment and decrement operations:
def worker(proxy):
proxy.x += 1
proxy.y -= 1
Finally, we define a single request queue to send requests to the parent process, and multiple response queues to send responses to the child processes:
if __name__ == '__main__':
subject = Subject()
request_queue = multiprocessing.Queue()
response_queues = {}
processes = []
for index in range(4):
sender = 'child {}'.format(index)
response_queues[sender] = multiprocessing.Queue()
proxy = Proxy(request_queue, response_queues[sender])
process = multiprocessing.Process(
target=worker, args=(proxy,), name=sender)
processes.append(process)
running = len(processes)
for process in processes:
process.start()
while subject.x != 4 or subject.y != -4:
sender, action, *args = request_queue.get()
print(sender, 'requested', action, *args)
if action == 'get':
response_queues[sender].put(getattr(subject, args[0]))
elif action == 'set':
setattr(subject, args[0], args[1])
elif action == 'increment':
setattr(subject, args[0], getattr(subject, args[0]) + args[1])
elif action == 'decrement':
setattr(subject, args[0], getattr(subject, args[0]) - args[1])
for process in processes:
process.join()
The program is guaranteed to terminate when += and -= are process-safe. If you remove process-safety by commenting the corresponding __iadd__ or __isub__ of Decorator then the program will only terminate by chance (e.g. proxy.x += value is equivalent to proxy.x = proxy.x.__iadd__(value) but falls back to proxy.x = proxy.x.__add__(value) if __iadd__ is not defined, which is equivalent to proxy.x = proxy.x + value which is equivalent to proxy.x = type(proxy).x.__get__(proxy) + value which is equivalent to type(proxy).x.__set__(proxy, type(proxy).x.__get__(proxy) + value), so the action attribute is not added and the setter requests a 'set' operation instead of an 'increment' operation).
Example process-safe session (atomic += and -=):
child 0 requested get x
child 0 requested increment x 1
child 0 requested get y
child 0 requested decrement y 1
child 3 requested get x
child 3 requested increment x 1
child 3 requested get y
child 2 requested get x
child 3 requested decrement y 1
child 1 requested get x
child 2 requested increment x 1
child 2 requested get y
child 2 requested decrement y 1
child 1 requested increment x 1
child 1 requested get y
child 1 requested decrement y 1
Example process-unsafe session (non-atomic += and -=):
child 2 requested get x
child 1 requested get x
child 0 requested get x
child 2 requested set x 1
child 2 requested get y
child 1 requested set x 1
child 1 requested get y
child 2 requested set y -1
child 1 requested set y -1
child 0 requested set x 1
child 0 requested get y
child 0 requested set y -2
child 3 requested get x
child 3 requested set x 2
child 3 requested get y
child 3 requested set y -3 # the program stalls here
A more sophisticated solution based on the lock-free atomic operations, as given by example on atomics library README:
from multiprocessing import Process, shared_memory
import atomics
def fn(shmem_name: str, width: int, n: int) -> None:
shmem = shared_memory.SharedMemory(name=shmem_name)
buf = shmem.buf[:width]
with atomics.atomicview(buffer=buf, atype=atomics.INT) as a:
for _ in range(n):
a.inc()
del buf
shmem.close()
if __name__ == "__main__":
# setup
width = 4
shmem = shared_memory.SharedMemory(create=True, size=width)
buf = shmem.buf[:width]
total = 10_000
# run processes to completion
p1 = Process(target=fn, args=(shmem.name, width, total // 2))
p2 = Process(target=fn, args=(shmem.name, width, total // 2))
p1.start(), p2.start()
p1.join(), p2.join()
# print results and cleanup
with atomics.atomicview(buffer=buf, atype=atomics.INT) as a:
print(f"a[{a.load()}] == total[{total}]")
del buf
shmem.close()
shmem.unlink()
(atomics could be installed via pip install atomics on most of the major platforms)
This is a different solution and the simplest to my taste.
The reasoning is you create an empty list and append to it each time your function executes , then print len(list) to check progress.
Here is an example based on your code :
import os
import multiprocessing
counter = []
def analyze(file):
# Analyze the file.
counter.append(' ')
print len(counter)
if __name__ == '__main__':
files = os.listdir('/some/directory')
pool = multiprocessing.Pool(4)
pool.map(analyze, files)
For future visitors, the hack to add counter to multiprocessing is as follow :
from multiprocessing.pool import ThreadPool
counter = []
def your_function():
# function/process
counter.append(' ') # you can append anything
return len(counter)
pool = ThreadPool()
result = pool.map(get_data, urls)
Hope this will help.
I'm working on a process bar in PyQT5, so I use thread and pool together
import threading
import multiprocessing as mp
from queue import Queue
def multi(x):
return x*x
def pooler(q):
with mp.Pool() as pool:
count = 0
for i in pool.imap_unordered(ggg, range(100)):
print(count, i)
count += 1
q.put(count)
def main():
q = Queue()
t = threading.Thread(target=thr, args=(q,))
t.start()
print('start')
process = 0
while process < 100:
process = q.get()
print('p',process)
if __name__ == '__main__':
main()
this I put in Qthread worker and it works with acceptable latency

Categories

Resources