Communicating between processes Python - python

I am trying to work out the solution that a process would tell the other process that some values have changed.
import multiprocessing
import time
class Consumer(multiprocessing.Process):
def __init__(self, share):
super().__init__()
self.share = share
def run(self):
print (self.share)
self.share = "xxx"
share = "ssss"
A = Consumer(share)
B = Consumer(share)
if __name__ == '__main__':
A = Consumer(share)
A.start()
time.sleep(5)
B = Consumer(share)
B.start()
expecting to have "xxx" to be printed when B runs. but got "ssss" as initial value.
after some researches, multiprocess.manager package can be used to achieve it. But due to the concerns of speed, i.e. 100 processes, with high frequency of accessing the share value, the lock would become a bottleneck.
Is there way to be able to lock the object when change the value but reading??

Use a manager to share objects across processes:
import multiprocessing
import time
class Consumer(multiprocessing.Process):
def __init__(self, manager_namespace):
super().__init__()
self.share = manager_namespace
def run(self):
print (self.share.myString)
self.share.myString = "xxx"
if __name__ == '__main__':
manager = multiprocessing.Manager()
namespace = manager.Namespace()
namespace.myString = 'sss'
B = Consumer(namespace)
A = Consumer(namespace)
A.start()
time.sleep(5)
B = Consumer(namespace)
B.start()
At least in my system it gives the required output.

Related

Python multithreading- memory leak when use an shared object(so)

I have a python programs that gets memory leaks when use an third-party SO.
I simplify my code like this:
import time
import sys
import threading
import codecs
import ctypes
sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
class TestThirdPartySo(object):
def __init__(self):
# this so uses thread-specific data
self.call_stat_so = ctypes.CDLL("./third_party_fun.so")
self.handle = self.call_stat_so._handle
def test_fun(self):
self.call_stat_so.fun_xxx()
def thread_fun():
TestThirdPartySo().test_fun()
def test_main(num):
count = 0
while True:
# create 3 * num threads
thread_num = 3
thread_list = []
for _ in range(thread_num):
thread_list.append(threading.Thread(target=thread_fun))
for thread in thread_list:
thread.start()
for thread in thread_list:
thread.join()
count += thread_num
time.sleep(0.01)
if count % 100 == 0:
print("finied %s" % count)
if count > num:
break
print("end !!!!")
if __name__ == '__main__':
num = sys.argv[1]
test_main(int(num))
Now, I know this shared object uses thread-specific data.And I have tried to close the SO after called it like this:
class TestThirdPartySo(object):
def __init__(self):
# this so uses thread-specific data
self.call_stat_so = ctypes.CDLL("./third_party_fun.so")
self.handle = self.call_stat_so._handle
def test_fun(self):
self.call_stat_so.fun_xxx()
def __del__(self):
dlclose_func(self.handle)
def dlclose_func(_handle):
dlclose_func_tmp = ctypes.cdll.LoadLibrary('libdl.so').dlclose
dlclose_func_tmp.argtypes = [ctypes.c_void_p]
dlclose_func_tmp(_handle)
But I failed to close the so. And I'm also not sure if the leaked memory will be freed after closing the so.
If the program not uses multi-threads or creates a fixed number of threads(threadpool), it works ok.
For some reason,I need create threads constantly in my program. What can I do to prevent this memory leaks?

Changing values of list in multiprocessing

I am new to python multiprocessing, a background about the below code. I am trying to create three processes, one to add an element to the list, one to modify element in the list, and one to print the list.
The three processes are ideally using the same list that is in shared memory, initiated using manager.
The problem I face is that testprocess2 is not able to set the value to 0, basically, it is not able to alter the list.
class Trade:
def __init__(self, id):
self.exchange = None
self.order_id = id
class testprocess2(Process):
def __init__(self, trades, lock):
super().__init__(args=(trades, lock))
self.trades = trades
self.lock = lock
def run(self):
while True:
# lock.acquire()
print("Altering")
for idx in range(len(self.trades)):
self.trades[idx].order_id = 0
# lock.release()
sleep(1)
class testprocess1(Process):
def __init__(self, trades, lock):
super().__init__(args=(trades, lock))
self.trades = trades
self.lock = lock
def run(self):
while True:
print("start")
for idx in range(len(self.trades)):
print(self.trades[idx].order_id)
sleep(1)
class testprocess(Process):
def __init__(self, trades, lock):
super().__init__(args=(trades, lock))
self.trades = trades
self.lock = lock
def run(self):
while True:
# lock.acquire()
n = random.randint(0, 9)
print("adding random {}".format(n))
self.trades.append(Trade(n))
# lock.release()
# print(trades)
sleep(5)
if __name__ == "__main__":
with Manager() as manager:
records = manager.list([Trade(5)])
lock = Lock()
p1 = testprocess(records, lock)
p1.start()
p2 = testprocess1(records, lock)
p2.start()
p3 = testprocess2(records, lock)
p3.start()
p1.join()
p2.join()
p3.join()
Strictly speaking your managed list is not in shared memory and it is very important to understand what is going on. The actual list holding your Trade instances resides in a process that is created when you execute the Manager() call. When you then execute records = manager.list([Trade(5)]), records is not a direct reference to that list because, as I said, we are not dealing with shared memory. It is instead a special proxy object that implements the same methods as a list but when you, for example, invoke append on this proxy object, it takes the argument you are trying to append and serializes it and transmits it to the manager's process via either a socket or pipe where it gets de-serialized and appended to the actual list. In short, operations on the proxy object are turned into remote method calls.
Now for your problem. You are trying to reset the order_id attribute with the following statement:
self.trades[idx].order_id = 0
Since we are dealing with a remote list via a proxy object, the above statements unfortunately become the equivalent of:
trade = self.trades[idx] # fetch object from the remote list
trade.order_id = 0 # reset the order_id to 0 on the local copy
What is missing is updating the list with the newly updated trade object:
self.trades[idx] = trade
So your single update statement really needs to be replaced with the above 3-statement sequence.
I have also taken the liberty to modify your code in several ways.
The PEP8 Style Guide for Python Code recommends that class names be capitalized.
Since all of your process classes are identical in how they are constructed (i.e. have identical __init__ methods), I have created an abstract base class, TestProcess that these classes inherit from. All they have to do is provide a run method.
I have made these process classes daemon classes. That means that they will terminate automatically when the main process terminates. I did this for demo purposes so that the program does not loop endlessly. The main process will terminate after 15 seconds.
You do not need to pass the trades and lock arguments to the __init__ method of the Process class. If you were not deriving your classes from Process and you just wanted to, for example, have your newly created process be running a function foo that takes arguments trades and lock, then you would specify p1 = Process(target=foo, args=(trades, lock)). That is the real purpose of the args argument, i.e. to be used with the target argument. See documentation for threading.Thread class for details. I actually see very little value in actually deriving your classes from multiprocessing.Process (by not doing so there is better opportunity for reuse). But since you did, you are already in your __init__ method setting instance attributes self.trades and self.lock, which will be used when your run method is invoked implicitly by your calling the start method. There is nothing further you need to do. See the two additional code examples at the end.
from multiprocessing import Process, Manager, Lock
from time import sleep
import random
from abc import ABC, abstractmethod
class Trade:
def __init__(self, id):
self.exchange = None
self.order_id = id
class TestProcess(Process, ABC):
def __init__(self, trades, lock):
Process.__init__(self, daemon=True)
self.trades = trades
self.lock = lock
#abstractmethod
def run():
pass
class TestProcess2(TestProcess):
def run(self):
while True:
# lock.acquire()
print("Altering")
for idx in range(len(self.trades)):
trade = self.trades[idx]
trade.order_id = 0
# We must tell the managed list that it has been updated!!!:
self.trades[idx] = trade
# lock.release()
sleep(1)
class TestProcess1(TestProcess):
def run(self):
while True:
print("start")
for idx in range(len(self.trades)):
print(f'index = {idx}, order id = {self.trades[idx].order_id}')
sleep(1)
class TestProcess(TestProcess):
def run(self):
while True:
# lock.acquire()
n = random.randint(0, 9)
print("adding random {}".format(n))
self.trades.append(Trade(n))
# lock.release()
# print(trades)
sleep(5)
if __name__ == "__main__":
with Manager() as manager:
records = manager.list([Trade(5)])
lock = Lock()
p1 = TestProcess(records, lock)
p1.start()
p2 = TestProcess1(records, lock)
p2.start()
p3 = TestProcess2(records, lock)
p3.start()
sleep(15) # run for 15 seconds
Using classes not derived from multiprocessing.Process
from multiprocessing import Process, Manager, Lock
from time import sleep
import random
from abc import ABC, abstractmethod
class Trade:
def __init__(self, id):
self.exchange = None
self.order_id = id
class TestProcess(ABC):
def __init__(self, trades, lock):
self.trades = trades
self.lock = lock
#abstractmethod
def process():
pass
class TestProcess2(TestProcess):
def process(self):
while True:
# lock.acquire()
print("Altering")
for idx in range(len(self.trades)):
trade = self.trades[idx]
trade.order_id = 0
# We must tell the managed list that it has been updated!!!:
self.trades[idx] = trade
# lock.release()
sleep(1)
class TestProcess1(TestProcess):
def process(self):
while True:
print("start")
for idx in range(len(self.trades)):
print(f'index = {idx}, order id = {self.trades[idx].order_id}')
sleep(1)
class TestProcess(TestProcess):
def process(self):
while True:
# lock.acquire()
n = random.randint(0, 9)
print("adding random {}".format(n))
self.trades.append(Trade(n))
# lock.release()
# print(trades)
sleep(5)
if __name__ == "__main__":
with Manager() as manager:
records = manager.list([Trade(5)])
lock = Lock()
tp = TestProcess(records, lock)
p1 = Process(target=tp.process, daemon=True)
p1.start()
tp1 = TestProcess1(records, lock)
p2 = Process(target=tp1.process, daemon=True)
p2.start()
tp2 = TestProcess2(records, lock)
p3 = Process(target=tp2.process, daemon=True)
p3.start()
sleep(15) # run for 15 seconds
Using functions instead of classes derived from multiprocessing.Process
from multiprocessing import Process, Manager, Lock
from time import sleep
import random
class Trade:
def __init__(self, id):
self.exchange = None
self.order_id = id
def testprocess2(trades, lock):
while True:
# lock.acquire()
print("Altering")
for idx in range(len(trades)):
trade = trades[idx]
trade.order_id = 0
# We must tell the managed list that it has been updated!!!:
trades[idx] = trade
# lock.release()
sleep(1)
def testprocess1(trades, lock):
while True:
print("start")
for idx in range(len(trades)):
print(f'index = {idx}, order id = {trades[idx].order_id}')
sleep(1)
def testprocess(trades, lock):
while True:
# lock.acquire()
n = random.randint(0, 9)
print("adding random {}".format(n))
trades.append(Trade(n))
# lock.release()
# print(trades)
sleep(5)
if __name__ == "__main__":
with Manager() as manager:
records = manager.list([Trade(5)])
lock = Lock()
p1 = Process(target=testprocess, args=(records, lock), daemon=True)
p1.start()
p2 = Process(target=testprocess1, args=(records, lock), daemon=True)
p2.start()
p3 = Process(target=testprocess2, args=(records, lock), daemon=True)
p3.start()
sleep(15) # run for 15 seconds

In a parent process, how to see child variables that are managed by child processes?

I defined a class Node, which defines a listener service to constantly communicate and update local variables. The listener is started using multiprocessing. The class looks like:
# Pseudo-code
import multiprocessing
class Node(object):
def __init__(self, x):
self.variables = x
def listener(self):
while(True):
COMMUNICATE WITH OTHERS # Pseudo-code
UPDATE self.variable # Pseudo-code
print(self.variable) # local printer
def run(self):
p = multiprocessing.Process(target=self.listener)
p.start()
In the main process, I created two nodes a = Node(x1), b = Node(x2), and let them run
if __name__ == "__main__":
x1 = 1 # for example
x2 = 1000 # for example
a = Node(x1)
b = Node(x2)
a.run()
b.run()
while(True):
print(a.variable) # global printer
print(b.variable) # global printer
In this way, Node-a communicates with Node-b and updates its variables, and so does Node-b.
Now I come with a problem: Local printers output updated variable values correctly, but global printers do not. Actually, the global printers always output unchanged values (x1, x2, same as initial).
What's wrong with the code Or how to see the child process variables?
You won't be able to do that unless you use any mechanism to communicate with the parent. I recommend you use Manager dicts.
import random
import time
import multiprocessing as mp
class Child(mp.Process):
def __init__(self, shared_variables):
super(Child, self).__init__()
self.shared_variables = shared_variables
def run(self):
for _ in range(5): # Change shared variable value 5 times
self.shared_variables['var'] = random.randint(0, 10)
self.shared_variables['var1'] = random.randint(0, 10)
time.sleep(3)
if __name__ == "__main__":
shared_variables = mp.Manager().dict()
child = Child(shared_variables)
child.start()
while True:
print('Form parent')
for k, v in shared_variables.items():
print(f'-> {k}: {v}')
print('***********')
time.sleep(3)
And the output would look like this:
Form parent
-> var: 8
-> var1: 6
***********
Form parent
-> var: 7
-> var1: 7
***********
....

How can I use COM and multiprocessing at the same time in Python?

I'm trying to make two processes and make them communicate to each other. One of them gets values through a COM library using win32com, and the other just gets the values from the first process via a queue and prints them out. I think the code below has no problem, but it doesn't work(p2 process doesn't show values at all). If I just make the first process print queue values in the same process by
item = self.q.get()
print(item)
it shows values in the queue. So, I think putting values in the queue has no problem, and therefore, there could be some problems in exchanging values via the queue, using win32com
import win32com.client
import os
import multiprocessing as mp
from PyQt4.QtGui import QApplication
from datetime import datetime, timedelta
global q
q = mp.Queue() # A queue is used to send values from p1 to p2
class RealTrEventHandler(object):
def __init__(self):
self.q = q
def OnReceiveRealData(self,szTrCode):
date = datetime.utcnow() + timedelta(hours=3)
type = self.GetFieldData("OutBlock", "cgubun")
appending_line = date + ', ' + type
self.q.put(appending_line)
#item = self.q.get() # it prints values out if these are not comments
#print(item)
def ticker():
loop = QApplication([])
global instXASession, instXAReal
print('TICKER: ', os.getpid() )
# When an event occurs, it calls RealTrEventHandler class
instXAReal = win32com.client.DispatchWithEvents("XA_DataSet.XAReal", RealTrEventHandler)
instXAReal.LoadFromResFile("C:\\eBEST\\xingAPI\\Res\\OVC.res")
instXAReal.SetFieldData("InBlock", "symbol", "CLX17")
loop.exec_()
class listener(mp.Process): # What listener does is only to get values via the queue and prints them out
def __init__(self):
mp.Process.__init__(self)
self.q = q
def run(self):
print('CSM PID: ', os.getpid() )
while True:
item = self.q.get()
print(item)
if __name__ == '__main__':
loop = QApplication([])
print('MAIN: ', os.getpid() )
p1 = mp.Process( target = ticker, args=() )
p1.start()
p2 = listener()
p2.start()
mp.freeze_support()
loop.exec_()
Could anyone give me some advice?
Have you tried to explicitly declare q as global in your listener and RealTrEventHandler class? E.g.:
class listener(mp.Process):
global q
def __init__(self):
mp.Process.__init__(self)
self.q=q
Another way of passing variables between (at least) threads is using the buildins-module, but I'm not sure if multiprocessing is so much different.

Multiprocessing: Passing a class instance to pool.map

I swear I saw the following in an example somewhere, but now I can't find that example and this isn't working. The __call__ class function never gets called.
EDIT: Code updated
pool.map appears to start the QueueWriter instance and the __call__ function is reached. However, the workers never seem to start or at least no results are pulled from the queue. Is my queue set up the right way? Why do the workers not fire off?
import multiprocessing as mp
import os
import random
class QueueWriter(object):
def __init__(self, **kwargs):
self.grid = kwargs.get("grid")
self.path = kwargs.get("path")
def __call__(self, q):
print self.path
log = open(self.path, "a", 1)
log.write("QueueWriter called.\n")
while 1:
res = q.get()
if res == 'kill':
self.log.write("QueueWriter received 'kill' message. Closing Writer.\n")
break
else:
self.log.write("This is where I'd write: {0} to grid file.\n".format(res))
log.close()
log = None
class Worker(object):
def __init__(self, **kwargs):
self.queue = kwargs.get("queue")
self.grid = kwargs.get("grid")
def __call__(self, idx):
res = self.workhorse(self, idx)
self.queue.put((idx,res))
return res
def workhorse(self,idx):
#in reality a fairly complex operation
return self.grid[idx] ** self.grid[idx]
if __name__ == '__main__':
# log = open(os.path.expanduser('~/minimal.log'), 'w',1)
path = os.path.expanduser('~/minimal.log')
pool = mp.Pool(mp.cpu_count())
manager = mp.Manager()
q = manager.Queue()
grid = [random.random() for _ in xrange(10000)]
# in actuality grid is a shared resource, read by Workers and written
# to by QueueWriter
qWriter = QueueWriter(grid=grid, path=path)
watcher = pool.map(qWriter, (q,),1)
wrkr = Worker(queue=q,grid=grid)
result = pool.map(wrkr, range(10000), 1)
result.get()
q.put('kill')
pool.close()
pool.join()
So the log does indeed print the initialization message, but then __call__ function is never called. Is this one of those pickling issues I've seen discussed so often? I've found answers about class member functions, but what about class instances?
At the gentle and patient prodding of martineau (thanks!) I think I've ironed out the problems. I have yet to apply it to my original code, but it is working in the example above and I'll start new questions for future implementation problems.
So in addition to changing where in the code the target file (the log, in this example) gets opened, I also started the QueueWriter instance as a single multiprocessing process rather than using pool.map. As martineau pointed out the map call blocks until the qWriter.__call__() returns and this prevented the workers from being called.
There were some other bugs in the code above, but those were incidental and fixed below:
import multiprocessing as mp
import os
import random
class QueueWriter(object):
def __init__(self, **kwargs):
self.grid = kwargs.get("grid")
self.path = kwargs.get("path")
def __call__(self, q):
print self.path
log = open(self.path, "a", 1)
log.write("QueueWriter called.\n")
while 1:
res = q.get()
if res == 'kill':
log.write("QueueWriter received 'kill' message. Closing Writer.\n")
break
else:
log.write("This is where I'd write: {0} to grid file.\n".format(res))
log.close()
log = None
class Worker(object):
def __init__(self, **kwargs):
self.queue = kwargs.get("queue")
self.grid = kwargs.get("grid")
def __call__(self, idx):
res = self.workhorse(idx)
self.queue.put((idx,res))
return res
def workhorse(self,idx):
#in reality a fairly complex operation
return self.grid[idx] ** self.grid[idx]
if __name__ == '__main__':
# log = open(os.path.expanduser('~/minimal.log'), 'w',1)
path = os.path.expanduser('~/minimal.log')
pool = mp.Pool(mp.cpu_count())
manager = mp.Manager()
q = manager.Queue()
grid = [random.random() for _ in xrange(10000)]
# in actuality grid is a shared resource, read by Workers and written
# to by QueueWriter
qWriter = QueueWriter(grid=grid, path=path)
# watcher = pool.map(qWriter, (q,),1)
# Start the writer as a single process rather than a pool
p = mp.Process(target=qWriter, args=(q,))
p.start()
wrkr = Worker(queue=q,grid=grid)
result = pool.map(wrkr, range(10000), 1)
# result.get()
# not required for pool
q.put('kill')
pool.close()
p.join()
pool.join()

Categories

Resources