Fastest way to share big object between different Process of python - python

Supose I have 3 different Process that do different logic in a forever loop. I want to run all of them in parallel and while each Process can access a shared_object, which is a heavy object of a class. So I tried using multiprocessing with a manger to archive it like this:
import multiprocessing
import inspect
from multiprocessing.managers import BaseManager, NamespaceProxy
import time
import random
class SharedObject():
def __init__(self):
self.a = 1
def show_a(self):
print(self.a)
class ProcessManager(BaseManager):
pass
class ProxyBase(NamespaceProxy):
_exposed_ = ('__getattribute__', '__setattr__', '__delattr__')
class ManagerProxy(ProxyBase):
pass
def register_proxy(name, cls, proxy):
for attr in dir(cls):
if callable(getattr(cls, attr)) and not attr.startswith("__"):
proxy._exposed_ += (attr,)
setattr(proxy, attr,
lambda s: object.__getattribute__(s, '_callmethod')(attr))
ProcessManager.register(name, cls, proxy)
register_proxy('shared_object', SharedObject, ManagerProxy)
process_manager = ProcessManager()
process_manager.start()
shared_object = process_manager.shared_object()
def process_1():
while True:
print('Process 1 see {}'.format(shared_object.a))
shared_object.a = 1
time.sleep(1)
def process_2():
while True:
print('Process 2 see {}'.format(shared_object.a))
shared_object.a = 2
time.sleep(1)
def process_3():
while True:
print('Process 3 see {}'.format(shared_object.a))
shared_object.a = 3
if random.randint(0,1) == 1:
shared_object.show_a()
time.sleep(1)
first_process = multiprocessing.Process(name="First process", target=process_1)
first_process.start()
second_process = multiprocessing.Process(name="Second process", target=process_2)
second_process.start()
third_process = multiprocessing.Process(name="Third process", target=process_3)
third_process.start()
shared_object.show_a()
while True:
time.sleep(10)
It works but too slow for me since I have to pass around big numpy array. Are there any other ways to make this faster (real-time speed)? Thanks a lot

It looks like it's the problem solved by multiprocessing.shared_memory, but a) it looks like it's only python 3.8+ and b) the code would need to be restructured, at the very least:
assigning the right size
passing the name of the shared object to the processes
and remembering to close it at the end
EDIT:
Since I couldn't get it to work with python 3.7, I decided to use it with the shared memory primitives in 3.5+, Array (and Value, it could be what you need). The following code runs happily:
import time
import random
from multiprocessing import Process, Array
s1 = Array('i', [1])
def process_1():
while True:
print('Process 1 see {}'.format(s1[0]))
s1[0] = 1
time.sleep(1)
def process_2():
while True:
print('Process 2 see {}'.format(s1[0]))
s1[0] = 2
time.sleep(1)
def process_3():
while True:
print('Process 3 see {}'.format(s1[0]))
s1[0] = 3
if random.randint(0,1) == 1:
print(s1[0])
time.sleep(1)
first_process = Process(name="First process", target=process_1)
first_process.start()
second_process = Process(name="Second process", target=process_2)
second_process.start()
third_process = Process(name="Third process", target=process_3)
third_process.start()
while True:
time.sleep(10)
Getting
Process 1 see 1
Process 2 see 1
Process 3 see 1
Process 1 see 3
Process 2 see 1
Process 3 see 2
3
Process 1 see 3
Process 2 see 1
Process 3 see 2
3
[...]
I would still pass the array to the processes, something like:
def process_1(shared):
...
and then
Process(name="First process", args=(s1), target=process_1)
to make it clearer what each process is working on, though.
Also, since I've not tried it with BIG objects, I am not really sure how it would fare...

Related

Python 3 Limit count of active threads (finished threads do not quit)

I want to limit the number of active threads. What i have seen is, that a finished thread stays alive and does not exit itself, so the number of active threads keep growing until an error occours.
The following code starts only 8 threads at a time but they stay alive even when they finished. So the number keeps growing:
class ThreadEx(threading.Thread):
__thread_limiter = None
__max_threads = 2
#classmethod
def max_threads(cls, thread_max):
ThreadEx.__max_threads = thread_max
ThreadEx.__thread_limiter = threading.BoundedSemaphore(value=ThreadEx.__max_threads)
def __init__(self, target=None, args:tuple=()):
super().__init__(target=target, args=args)
if not ThreadEx.__thread_limiter:
ThreadEx.__thread_limiter = threading.BoundedSemaphore(value=ThreadEx.__max_threads)
def run(self):
ThreadEx.__thread_limiter.acquire()
try:
#success = self._target(*self._args)
#if success: return True
super().run()
except:
pass
finally:
ThreadEx.__thread_limiter.release()
def call_me(test1, test2):
print(test1 + test2)
time.sleep(1)
ThreadEx.max_threads(8)
for i in range(0, 99):
t = ThreadEx(target=call_me, args=("Thread count: ", str(threading.active_count())))
t.start()
Due to the for loop, the number of threads keep growing to 99.
I know that a thread has done its work because call_me has been executed and threading.active_count() was printed.
Does somebody know how i make sure, a finished thread does not stay alive?
This may be a silly answer but to me it looks you are trying to reinvent ThreadPool.
from multiprocessing.pool import ThreadPool
from time import sleep
p = ThreadPool(8)
def call_me(test1):
print(test1)
sleep(1)
for i in range(0, 99):
p.apply_async(call_me, args=(i,))
p.close()
p.join()
This will ensure only 8 concurrent threads are running your function at any point of time. And if you want a bit more performance, you can import Pool from multiprocessing and use that. The interface is exactly the same but your pool will now be subprocesses instead of threads, which usually gives a performance boost as GIL does not come in the way.
I have changed the class according to the help of Hannu.
I post it for reference, maybe it's useful for others that come across this post:
import threading
from multiprocessing.pool import ThreadPool
import time
class MultiThread():
__thread_pool = None
#classmethod
def begin(cls, max_threads):
MultiThread.__thread_pool = ThreadPool(max_threads)
#classmethod
def end(cls):
MultiThread.__thread_pool.close()
MultiThread.__thread_pool.join()
def __init__(self, target=None, args:tuple=()):
self.__target = target
self.__args = args
def run(self):
try:
result = MultiThread.__thread_pool.apply_async(self.__target, args=self.__args)
return result.get()
except:
pass
def call_me(test1, test2):
print(test1 + test2)
time.sleep(1)
return 0
MultiThread.begin(8)
for i in range(0, 99):
t = MultiThread(target=call_me, args=("Thread count: ", str(threading.active_count())))
t.run()
MultiThread.end()
The maximum of threads is 8 at any given time determined by the method begin.
And also the method run returns the result of your passed function if it returns something.
Hope that helps.

Is there any replacement for empty while loops?

I'm using empty while loops a lot, for example:
I have a thread running in the background that will change a value called "a" in 5 seconds. however, I'm using a different function at the same time, and I want to let the second function know that the value has changed, so what I always did was:
import threading, time
class example:
def __init__(self):
self.a = 0
def valchange(self):
time.sleep(5)
self.a += 1
time.sleep(1)
print("im changing the a value to " + str(self.a))
print("those print commands needs to run after notifier stopped his while and started printing")
def notifier(exam :example, num :int):
while(exam.a != num):
pass
print("it changed to " + str(num))
exa = example()
i = 1
while(i <= 16):
temp= threading.Thread(target=notifier, args=(exa, i, ))
temp.start()
i += 3
i = 1
while(i <= 16):
exa.valchange()
i += 1
It's important to mention, that example could not use wait and set to an event, because there is no indication to when you need to run set, and how much threads are running in the background, and even what numbers will have a thread waiting for them to change.
And also you can't use join because changing 'a' is not a sign to print, only the condition is the sign.
Async and select can't help me as well because of the last reason.
Is there any way to create something, that will stop the program fromrunning until the condition will become true? you can provide your solution with any programming language you want, but mainly I'm using python 3.
EDIT: please remember that I need it to work with every condition. And my code example- is only an example, so if something works there, it doesn't necessarily will work with a different condition.
Thank you very much in advance :)
Idea:
wait(a == 5) // will do nothing until a == 5
You need to use select or epoll system calls if you're waiting for some system operation to finish. In case you're waiting for a certain IO event, then you can use asyncio (provided your Python version > 3.3), otherwise you could consider twisted.
If you're doing some CPU bound operations you need to consider multiple processes or threads, only then you can do any such monitoring effectively. Having a while loop running infinitely without any interruption is a disaster waiting to happen.
If your thread only changes a's value once, at the end of its life, then you can use .join() to wait for the thread to terminate.
import threading
import time
class example:
def __init__(self):
self.a = 0
self.temp = threading.Thread(target=self.valchange)
self.temp.start()
self.notifier()
def valchange(self):
time.sleep(5)
self.a = 1
def notifier(self):
self.temp.join()
print("the value of a has changed")
example()
If the thread might change a's value at any point in its lifetime, then you can use one of the threading module's more generalized control flow objects to coordinate execution. For instance, the Event object.
import threading
import time
class example:
def __init__(self):
self.a = 0
self.event = threading.Event()
temp = threading.Thread(target=self.valchange)
temp.start()
self.notifier()
def valchange(self):
time.sleep(5)
self.a = 1
self.event.set()
def notifier(self):
self.event.wait()
print("the value of a has changed")
example()
One drawback to this Event approach is that the thread target has to explicitly call set() whenever it changes the value of a, which can be irritating if you change a several times in your code. You could automate this away using a property:
import threading
import time
class example(object):
def __init__(self):
self._a = 0
self._a_event = threading.Event()
temp = threading.Thread(target=self.valchange)
temp.start()
self.notifier()
#property
def a(self):
return self._a
#a.setter
def a(self, value):
self._a = value
self._a_event.set()
def valchange(self):
time.sleep(5)
self.a = 1
def notifier(self):
self._a_event.wait()
print("the value of a has changed")
example()
Now valchange doesn't have to do anything special after setting a's value.
What you are describing is a spin lock, and might be fine, depending on your use case.
The alternative approach is to have the code you are waiting on call you back when it reaches a certain condition. This would require an async framework such as https://docs.python.org/3/library/asyncio-task.html
There are some nice simple examples in those docs so I won't insult your intelligence by pasting them here.

python - update thread variable

how do I update a variable inside a running thread, which is an infinite loop based on such variable?
a simplified version of what I tried is what follows, to no results of course, and I can't find any pointer.
import some_module as mod
import threading
class thr (threading.Thread):
NUM = 5 # set a default value to start the script
def run (self):
mod.NUM = NUM
mod.main_loop()
try:
thr().start()
time.sleep(1)
thr().NUM = 2
time.sleep(1)
thr().NUM = 6
time.sleep(1)
thr().NUM = 8
The problem is that you're creating a new thread each time you "call" (i.e. instantiate) thr. Change your code to
t = thr()
t.start()
time.sleep(1)
t.NUM = 2
time.sleep(1)
t.NUM = 6
time.sleep(1)
t.NUM = 8
time.sleep(1)
Maybe try use queue for change NUM variable.
https://docs.python.org/2/library/queue.html
Check examples here :
https://pymotw.com/2/Queue/
Generally speakinig the queue allows You send data between threads. Use get() for getting data from queue and put() for put data to queue.

class method process is not changing objects' attributes

It's my second day in Python, I found it's a really cool language and I want to try different things in it.
Is it possible to call an object and create a daemon of that object's method which would change the objects attributes?
from multiprocessing import Process
import time
class Foo(object):
def __init__(self):
self.number = 1
# this attribute...
def loop(self):
while 1:
print self.number
# ...is changed here
self.number += 1
time.sleep(1)
if __name__ == '__main__':
f = Foo()
p = Process(target=f.loop)
p.deamon = True # this makes it work in the background
p.start()
# proceed with the main loop...
while 1:
time.sleep(1)
print f.number * 10
The result:
1
10
2
10
3
10
4
10
...
Why doesn't f.loop() change the self.number of f? They are both part of the same class Foo().
What can I change to receive this output:
1
10
2
20
3
30
4
40
...
/edit 1:
I tried this, with the same result (why?):
class Foo(Process):
def __init__(self):
super(Foo, self).__init__()
self.daemon = True # is daemon
self.number = 1
self._target = self.loop # on start() it will run loop()
def loop(self):
while 1:
print self.number
self.number += 1
time.sleep(1)
if __name__ == '__main__':
f = Foo() # is now Process
f.start() # runs f.loop()
while 1:
time.sleep(1)
print f.number * 10
Same output as before.
You're using multiprocessing. The short (and somewhat simplified) answer is that processes to do not share memory by default. Try using threading instead.
If you're hell bent on experimenting with shared memory and processes then look at sharing state in the documentation on multiprocessing.
Also daemon doesn't do what you think it does. If a process creates children then it will attempt to kill all it's daemonic children when it exits. All Processes will work in the background, you just need to start them.

Python - start two processes to run indefinitely

I have a simple example script constructed that defines three separate processes using multiprocessing in python. My objective is to have one parent thread that spawns two smaller threads that will collect and process data.
Currently, my implementation looks like this:
from Queue import Queue,Empty
from multiprocessing import Process
import time
import hashlib
class FillQueue(Process):
def __init__(self,q):
Process.__init__(self)
self.q = q
def run(self):
i = 0
while i is not 5:
print 'putting'
self.q.put('foo')
i+=1
self.q.put('|STOP|')
class ConsumeQueue(Process):
def __init__(self,q):
Process.__init__(self)
self.q = q
def run(self):
print 'Consume'
while True:
try:
value = self.q.get(False)
print value
if value == '|STOP|':
print 'done'
break;
except Empty:
print 'Nothing to process atm'
class Ripper(Process):
q = Queue()
def __init__(self):
self.fq = FillQueue(self.q)
self.cq = ConsumeQueue(self.q)
self.fq.daemon = True
self.cq.daemon = True
def run(self):
try:
self.fq.start()
self.cq.start()
except KeyboardInterrupt:
print 'exit'
if __name__ == '__main__':
r = Ripper()
r.start()
As it runs presently, the output from the script on CLI looks like this:
putting
putting
putting
putting
putting
Consume
foo
foo
foo
foo
foo
|STOP|
done
Obviously, the way I am starting my two threads is blocking, since the consumer doesn't even begin to process the items in the queue until the filler finishes adding items.
How should I rewrite this to make both threads begin immediately and not block, so the consumer will simply pass to the Empty except block while there is no work to process, but will exit completely when it receives the stop message?
EDIT: typo, had the start and run methods mixed up
You seem to be starting multiple processes using multiprocessing.Process.
However, you are using Queue.Queue which is only threadsafe, and not designed to be used by multiple processes.
shevek's answer is valid as well, but as a start, you should replace Queue.Queue with multiprocessing.Queue.
try this:
from Queue import Empty
from multiprocessing import Process, Queue
import time
import hashlib
class FillQueue(object):
def __init__(self, q):
self.q = q
def run(self):
i = 0
while i < 5:
print 'putting'
self.q.put('foo %d' % i )
i+=1
time.sleep(.5)
self.q.put('|STOP|')
class ConsumeQueue(object):
def __init__(self, q):
self.q = q
def run(self):
while True:
try:
value = self.q.get(False)
print value
if value == '|STOP|':
print 'done'
break;
except Empty:
print 'Nothing to process atm'
time.sleep(.2)
if __name__ == '__main__':
q = Queue()
f = FillQueue(q)
c = ConsumeQueue(q)
p1 = Process(target=f.run)
p1.start()
p2 = Process(target=c.run)
p2.start()
p1.join()
p2.join()
I think your program works fine. The CPU processes only one thing at a time, for a short time. However, the time required to put all your stuff in the queue is very short. So there is no reason that the filler cannot do this in one time slice.
If you add some delays in the filler, I think you should see that it actually works as you expect.

Categories

Resources