Keep static class members in python multiprocessing - python

I'm trying to keep a "static" defined multiprocessing Queue through multiple Processes, but it appears that this context is not copied to the new spawned process. Is there a way to keep them without storing them to derived process classes (so without self.q = A.q)?
main.py
from class_b import B
if __name__ == "__main__":
b = B()
b.start()
while True:
pass
class_a.py
from multiprocessing import Process, Queue
class A(Process):
q = Queue()
def __init__(self) -> None:
super().__init__(daemon=True)
class_b.py
from multiprocessing import Process
from class_a import A
class B(Process):
def __init__(self):
super().__init__(daemon=True)
print(A.q)
def run(self):
print(A.q)
console
<multiprocessing.queues.Queue object at 0x000001F77851B280>
<multiprocessing.queues.Queue object at 0x0000023C420C2580>

When you import from class_a.py to access A.q, then so does multiprocessing in its own process. Then there will be two copies. You should create it as a local in "main" and pass it into B.
from class_b import B
from multiprocessing import Queue
if __name__ == "__main__":
q = Queue()
b = B(q)
b.start()
while True:
pass
Then make B store that reference for itself:
from multiprocessing import Process
class B(Process):
def __init__(self, q):
super().__init__(daemon=True)
print(q)
self.q = q
def run(self):
print(self.q)

Related

Change class object in Python multiprocessing

I feel like I am missing something very simple but still cannot figure out how to achieve the result after reading docs of the multiprocessing package. All I want is to set a class object (property) in a separate process and return it back to the main process. What I tried:
from multiprocessing import Process, Queue
class B:
def __init__(self):
self.attr = 'hello'
def worker(queue):
b = B()
setattr(b.__class__, 'prop', property(lambda b: b.attr))
assert b.prop
queue.put(b)
queue = Queue()
p = Process(target=worker, args=(queue,))
p.start()
res = queue.get()
p.join()
assert hasattr(res, 'prop')
So property "prop" just disappears. What is the proper way to return it ? I am using Windows 10.

Python multiprocessing, share class instance does not work

I want to send tasks to the POOL inside the shared class based on some conditions. But I got some unexpected result, Which are shown below.
• Why the len(self.map) is 0, not 100.
• Do I have to reconstruct my code to achieve this goal.
from multiprocessing import Pool
from multiprocessing.managers import BaseManager
pool = None
def doSomething(obj, *args):
obj.doSomething(*args)
class SharedClass:
def __init__(self):
global pool
self.map = set()
pool = Pool(4)
def someCondition(self):
# the condition is rely on the instance, here is just an example
return True
def go(self, n):
global pool
for i in xrange(n):
if self.someCondition():
# pass the shared class to other process
pool.apply_async(doSomething, (self, i))
pool.close()
pool.join()
# got AssertionError here
# why the len of self.map is 0
assert len(self.map) == 100
def doSomething(self, n):
# this should change the same SharedClass instance?
self.map.add(n)
class MyManager(BaseManager):
pass
MyManager.register("SharedClass", SharedClass)
def main():
manager = MyManager()
manager.start()
obj = manager.SharedClass()
obj.go(100)
if __name__ == "__main__":
main()

Python refactoring with thread and queue

I was trying to restructure my code,first version is here
What I want is to run two objects concurrently
from queue import Queue
from threading import Thread
from html.parser import HTMLParser
import urllib.request
NUMBER_OF_THREADS = 3
HOSTS = ["http://yahoo.com", "http://google.com", "http://ibm.com"]
class MyHTMLParser(HTMLParser):
def handle_starttag(self, tag, attrs):
print("Start tag:", tag)
for attr in attrs:
print("\tattr:", attr)
class ProducerThread(Thread):
def __init__(self,queue):
super(ProducerThread, self).__init__()
self.queue = queue
def run(self):
while True:
for host in HOSTS:
url = urllib.request.urlopen(host)
content = str(url.read(4096))
queue.put(content)
class ConsumerThread(Thread):
def __init__(self,queue):
super(ConsumerThread, self).__init__()
self.queue = queue
def run(self):
while True:
item = queue.get()
parser = MyHTMLParser()
new_con = parser.feed(item)
print(new_con)
queue.task_done()
if __name__ == '__main__':
queue = Queue()
p = ProducerThread(queue)
c = ConsumerThread(queue)
p.start()
c.start()
When I run code from terminal there is no output.What should I change?
Unindent the run methods so that they are not inside the __init__ methods.
Note however you almost certainly don't want those to loop forever; remove the while True.

How to control the maximum concurrently running processes?

There are 5 files: main.py, worker.py, cat.py, dog.py and rabbit.py. cat, dog and rabbit inherit form worker and implement worker_run().
In the main.py, I prepare 3 processes to execute, however don't know how to control the maximum concurrently running process at the same time (eg. 2 processes).
I have tried using the multiprocessing.Pool, but it only supports functions outside class (?).
main.py:
from multiprocessing import Process
from cat import *
from dog import *
from rabbit import *
p1 = cat()
p2 = dog()
p3 = rabbit()
p1 = start()
p2 = start()
p3 = start()
p1 = join()
p2 = join()
p3 = join()
worker.py:
import multiprocessing
class Worker(multiprocessing.Process):
def __init__(self):
multiprocessing.Process.__init__(self)
print "Init"
self.value = None
def run(self):
print "Running"
self.worker_run()
#abc.abstractmethod
def worker_run(self):
""" implement """
return
cat.py:
from worker import *
class cat(Worker):
def worker_run(self)
for i in range(10000)
print "cat run"
dog.py:
from worker import *
class dog(Worker):
def worker_run(self)
for i in range(10000)
print "dog run"
rabbit.py:
from worker import *
class dog(Worker):
def worker_run(self)
for i in range(10000)
print "rabbit run"
If you want to let at most two methods run concurrently and block the third one until one of the others stopped, you have to use a Semaphore
You must pass the semaphore to the object methods so that they can acquire it.
In your main file you create the semaphore and pass it to the objects:
from multiprocessing import Process, Semaphore
from cat import *
from dog import *
from rabbit import *
semaphore = Semaphore(2) # at most 2 processes running concurrently
p1 = cat(semaphore)
p2 = dog(semaphore)
p3 = rabbit(semaphore)
p1.start()
p2.start()
p3.start()
p1.join()
p2.join()
p3.join()
you can then modify the Worker class to acquire the semaphore before running worker_run:
class Worker(multiprocessing.Process):
def __init__(self, semaphore):
multiprocessing.Process.__init__(self)
print "Init"
self.value = None
self.semaphore
def run(self):
with self.semaphore:
print "Running"
self.worker_run()
#abc.abstractmethod
def worker_run(self):
""" implement """
return
This should ensure that at most 2 worker_run methods are running concurrently.
In fact I believe you are making things more complex than what ought to be. You do not have to subclass Process. You can achieve exactly the same functionality using the target argument:
from multiprocessing import Process, Semaphore
from cat import Cat
from dog import Dog
from rabbit import Rabbit
semaphore = Semaphore(2)
cat = Cat()
dog = Dog()
rabbit = Rabbit()
def run(animal, sema):
with sema:
animal.worker_run(*args)
cat_proc = Process(target=run, args=(cat, semaphore))
dog_proc = Process(target=run, args=(dog, semaphore))
rabbit_proc = Process(target=run, args=(rabbit, semaphore))
cat_proc.start()
dog_proc.start()
rabbit_proc.start()
cat_proc.join()
dog_proc.join()
rabbit_proc.join()
In fact with a little change you can get rid of the Semaphore and simply use the Pool object:
from multiprocessing import Pool
from cat import Cat
from dog import Dog
from rabbit import Rabbit
cat = Cat()
dog = Dog()
rabbit = Rabbit()
def run(animal):
animal.worker_run()
pool = Pool(2)
pool.map(run, [cat, dog, rabbit])
The problem you had is that you cannot pass as target argument, or as callable to Pool.map a method, because methods cannot be pickled (see What can be pickled and unpickled?). The multiprocessing modules uses the pickle protocol to communicate between processes so everything it handles should be pickleable.
In particular to solve the problem about unpickleable methods the standard workaround is to use a global function where you explicitly pass the instance as first argument, as I did above. This is exactly what happens with method calls, but it's done automatically by the interpreter. In this case you have to handle it explicitly.

Multiprocessing using imported modules

I was wondering if multiprocessing can be confined in separate python modules. For example, if I have a python module with multiprocessing as so:
#example.py
def f(q, a, m='No'):
print m
q.put(a)
if __name__ == '__main__':
a = '123'
m = 'Yes'
q = Queue()
p = Process(target=f, args=(q, a, m))
p.start()
print q.get()
p.join()
Is there anyway of using this in another script as a module using import, whilst still retaining the multiprocessing:
#Call from another script
import example
example.f(q, a)
>>> 'Yes' #Confirmation that multiprocessing was used
Yes, you can accomplish this by either creating a class or a function. You can import either into another script.
Here is an example with a class:
# example.py
from multiprocessing import Process
class Example(object):
def __init__(self, queue):
"""
#type queue: multiprocessing.Queue
"""
self.q = queue
def run(self, a, m=None):
p = Process(target=self.f, args=(a, m))
p.start()
print self.q.get()
p.join()
def f(self, a, m='No'):
print m
self.q.put(a)
Then import from your example:
>>> from multiprocessing import Queue
>>> from example import Example
>>> q = Queue()
>>> e = Example(q)
>>> e.run('123', m='Yes')
Yes
123

Categories

Resources