AttributeError: Can't get attribute 'journalerReader' on <module '__mp_main__ - python

I tried to implement Lmax in python .I tried to handle data in 4 processes
import disruptor
import multiprocessing
import random
if __name__ == '__main__':
cb = disruptor.CircularBuffer(5)
def receiveWriter():
while(True):
n = random.randint(5,20)
cb.receive(n)
def ReplicatorReader():
while(True):
cb.replicator()
def journalerReader():
while(True):
cb.journaler()
def unmarshallerReader():
while(True):
cb.unmarshaller()
def consumeReader():
while(True):
print(cb.consume())
p1 = multiprocessing.Process(name="p1",target=ReplicatorReader)
p1.start()
p0 = multiprocessing.Process(name="p0",target=receiveWriter)
p0.start()
p1 = multiprocessing.Process(name="p1",target=ReplicatorReader)
p1.start()
p2 = multiprocessing.Process(name="p2",target=journalerReader)
p2.start()
p3 = multiprocessing.Process(name="p3",target=unmarshallerReader)
p3.start()
p4 = multiprocessing.Process(name="p4",target=consumeReader)
p4.start()
but I get this Error in my code :
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "<string>", line 1, in <module>
File "C:\Program Files\Python39\lib\multiprocessing\spawn.py", line 116, in spawn_main
File "C:\Program Files\Python39\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
exitcode = _main(fd, parent_sentinel)
File "C:\Program Files\Python39\lib\multiprocessing\spawn.py", line 126, in _main
File "C:\Program Files\Python39\lib\multiprocessing\spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
self = reduction.pickle.load(from_parent)
AttributeError: Can't get attribute 'unmarshallerReader' on <module '__mp_main__' from 'd:\\python\\RunDisruptor.py'>
AttributeError: Can't get attribute 'consumeReader' on <module '__mp_main__' from 'd:\\python\\RunDisruptor.py'>

Your first problem is that the target of a Process call cannot be within the if __name__ == '__main__': block. But:
As I mentioned in an earlier post of yours, the only way I see that you can share an instance of CircularBuffer across multiple processess is to implement a managed class, which surprisingly is not all that difficult to do. But when you create a managed class and create an instance of that class, what you have is actually a proxy reference to the object. This has two implications:
Each method call is more like a remote procedure call to a special server process created by the manager you will start up and therefore has more overhead than a local method call.
If you print the reference, the class's __str__ method will not be called; you will be printing a representation of the proxy pointer. You should probably rename method __str__ to something like dump and call that explicitly whenever you want a representation of the instance.
You should also explicitly wait for the completion of the processes you are creating so that the manager service does not shutdown prematurely, which means that each process should be assigned to a unique variable and have a unique name.
import disruptor
import multiprocessing
from multiprocessing.managers import BaseManager
import random
class CircularBufferManager(BaseManager):
pass
def receiveWriter(cb):
while(True):
n = random.randint(5,20)
cb.receive(n)
def ReplicatorReader(cb):
while(True):
cb.replicator()
def journalerReader(cb):
while(True):
cb.journaler()
def unmarshallerReader(cb):
while(True):
cb.unmarshaller()
def consumeReader(cb):
while(True):
print(cb.consume())
if __name__ == '__main__':
# Create managed class
CircularBufferManager.register('CircularBuffer', disruptor.CircularBuffer)
# create and start manager:
with CircularBufferManager() as manager:
cb = manager.CircularBuffer(5)
p1 = multiprocessing.Process(name="p1", target=ReplicatorReader, args=(cb,))
p1.start()
p0 = multiprocessing.Process(name="p0",target=receiveWriter, args=(cb,))
p0.start()
p1a = multiprocessing.Process(name="p1a",target=ReplicatorReader, args=(cb,))
p1a.start()
p2 = multiprocessing.Process(name="p2",target=journalerReader, args=(cb,))
p2.start()
p3 = multiprocessing.Process(name="p3",target=unmarshallerReader, args=(cb,))
p3.start()
p4 = multiprocessing.Process(name="p4",target=consumeReader, args=(cb,))
p4.start()
p1.join()
p0.join()
p1a.join()
p2.join()
p3.join()
p4.join()

Related

How do I nest multiprocessing in multiprocessing, with common variables (python)?

I have a function which is running twice in two parallel processes. Lets call it - parentFunction().
Each process ends with a dictionary which is added to a common list which gives a list of two dictionaries. This I solved by using preset list using manager.
Now, inside parentFunction() L would like to run two parallel processes, each gives one variable to the dictionary. I tried to do this with preset dictionary using manager
At the end I`m converting the list of dictionaries to pandas data frame.
def I(D, a):
D["a"] = a
def II(D, b):
D["a"] = b
def task(L, x):
x = 0
a = 1
b = 2
manager = Manager()
D = manager.dict() # <-- can be shared between processes.
pI = Process(target=I, args=(D, 0))
pII = Process(target=II, args=(D, 0))
pI.start()
pII.start()
pI.join()
pII.join()
L.append(D)
if __name__ == "__main__":
with Manager() as manager:
L = manager.list() # <-- can be shared between processes.
p1 = Process(target=task, args=(L, 0)) # Passing the list
p2 = Process(target=task, args=(L, 0)) # Passing the list
p1.start()
p2.start()
p1.join()
p2.join()
print(L)
returns error:
TypeError: task() missing 1 required positional argument: 'L'
Traceback (most recent call last):
File "C:\Users\user\AppData\Roaming\JetBrains\PyCharmCE2021.2\scratches\scratch_8.py", line 88, in <module>
print(list(L))
File "<string>", line 2, in __getitem__
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\multiprocessing\managers.py", line 810, in _callmethod
kind, result = conn.recv()
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\multiprocessing\connection.py", line 256, in recv
return _ForkingPickler.loads(buf.getbuffer())
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\multiprocessing\managers.py", line 934, in RebuildProxy
return func(token, serializer, incref=incref, **kwds)
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\multiprocessing\managers.py", line 784, in __init__
self._incref()
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\multiprocessing\managers.py", line 838, in _incref
conn = self._Client(self._token.address, authkey=self._authkey)
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\multiprocessing\connection.py", line 505, in Client
c = PipeClient(address)
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\multiprocessing\connection.py", line 707, in PipeClient
_winapi.WaitNamedPipe(address, 1000)
FileNotFoundError: [WinError 2] The system cannot find the file specified
```
The source you posted does not seem to match your stack trace. You would only get a FileNotFoundException when the main process tries to enumerate any objects within list L with a statement such as print(list(L)), which I see in the stack trace but not in your code. It helps when you post the actual code causing the exception. But here is the cause of your problem:
When you create a new manager with the call manager = Manager() a new process is created and any objects that are created via the manager "live" in the same address space and process as that manager. You are creating two manager processes, once in the main process and once in the child process task. It is in the latter that the dictionary, D is created. When that process terminates the manager process terminates too along with any objects created by that manager. So when the main process attempts to print the list L, the proxy object within it, D, no longer points to an existing object. The solution is to have the main process create the dictionary, D, and pass it to the task child process:
from multiprocessing import Process, Manager
def I(D, a):
D["a"] = a
def II(D, b):
D["a"] = b
def task(L, D, x):
x = 0
a = 1
b = 2
pI = Process(target=I, args=(D, 0))
pII = Process(target=II, args=(D, 0))
pI.start()
pII.start()
pI.join()
pII.join()
L.append(D)
if __name__ == "__main__":
with Manager() as manager:
L = manager.list() # <-- can be shared between processes.
D = manager.dict() # <-- can be shared between processes.
p = Process(target=task, args=(L, D, 0)) # Passing the list
p.start()
p.join()
print(L[0])
Prints:
{'a': 0}

Cycle an iterator using multiprocessing in Python

I have an iterator that will retrive various number of lines from a very large (>20GB) file depend on some features. The iterator works fine, but I can only use 1 thread to process the result. I would like to feed the value from each iteration to multiple threads / processes.
I'm using a text file with 9 lines to mimic my data, here is my code. I've been struggling on how to create the feedback so when one process finished, it will go and retrive the next iteration:
from multiprocessing import Process, Manager
import time
# Iterator
class read_file(object):
def __init__(self, filePath):
self.file = open(filePath, 'r')
def __iter__(self):
return self
def __next__(self):
line = self.file.readline()
if line:
return line
else:
raise StopIteration
# worker for one process
def print_worker(a, n, stat):
print(a)
stat[n] = True # Set the finished status as True
return None
# main
def main():
file_path = 'tst_mp.txt' # the txt file wit 9 lines
n_worker = 2
file_handle = read_file(file_path)
workers = []
# Create shared list for store dereplicated dict and progress counter
manager = Manager()
status = manager.list([False] * 2) # list of dictonary for each thread
# Initiate the workers
for i in range(n_worker):
workers.append(Process(target=print_worker, args=(file_handle.__next__(), i, status,)))
for worker in workers:
worker.start()
block = file_handle.__next__() # The next block (line)
while block: # continue is there is still block left
print(status)
time.sleep(1) # for every second
for i in range(2):
if status[i]: # Worker i finished
workers[i].join()
# workers[i].close()
workers[i] = Process(target=print_worker, args=(block, i, status,))
status[i] = False # Set worker i as busy (False)
workers[i].start() # Start worker i
try: # try to get the next item in the iterator
block = file_handle.__next__()
except StopIteration:
block = False
if __name__ == '__main__':
main()
The code is clumsy, but it did print out the sequence, but also with some error when I ran the code twice:
1
2
3
4
5
6
7
8
9
Process Process-10:
Traceback (most recent call last):
File "/home/zewei/mambaforge/lib/python3.9/multiprocessing/managers.py", line 802, in _callmethod
conn = self._tls.connection
AttributeError: 'ForkAwareLocal' object has no attribute 'connection'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/zewei/mambaforge/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/home/zewei/mambaforge/lib/python3.9/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/home/zewei/share/paf_depth/test_multiprocess.py", line 31, in print_worker
stat[n] = True # Set the finished status as True
File "<string>", line 2, in __setitem__
File "/home/zewei/mambaforge/lib/python3.9/multiprocessing/managers.py", line 806, in _callmethod
self._connect()
File "/home/zewei/mambaforge/lib/python3.9/multiprocessing/managers.py", line 794, in _connect
dispatch(conn, None, 'accept_connection', (name,))
File "/home/zewei/mambaforge/lib/python3.9/multiprocessing/managers.py", line 90, in dispatch
kind, result = c.recv()
File "/home/zewei/mambaforge/lib/python3.9/multiprocessing/connection.py", line 255, in recv
buf = self._recv_bytes()
File "/home/zewei/mambaforge/lib/python3.9/multiprocessing/connection.py", line 419, in _recv_bytes
buf = self._recv(4)
File "/home/zewei/mambaforge/lib/python3.9/multiprocessing/connection.py", line 384, in _recv
chunk = read(handle, remaining)
ConnectionResetError: [Errno 104] Connection reset by peer
Here is where I'm stucked, I was wondering if there is any fix or more elegant way for this?
Thanks!
Here's a better way to do what you are doing, using pool:
from multiprocessing import Pool
import time
.
.
.
.
# worker for one process
def print_worker(a):
print(a)
return None
def main():
file_path = r'' # the txt file wit 9 lines
n_worker = 2
file_handle = read_file(file_path)
results = []
with Pool(n_worker) as pool:
for result in pool.imap(print_worker, file_handle):
results.append(result)
print(results)
if __name__ == '__main__':
main()
Here, the imap function lazily iterates over the iterator, so that the whole file won't be read into memory. Pool handles spreading the tasks across the number of processes you started (using n_worker) automatically so that you don't have to manage it yourself.

Python multiprocessing basic - Can't pickle local object and Ran out of input

Could anyone make me understand what is this error and what is going wrong and how to resolve it.
I am quite new to python and learning and wanted to implement some multiprocessing in the code so started with the basic approach of it.
> AttributeError: Can't pickle local object
> 'computation.abc.<locals>.s1'
> Traceback (most recent call last):
> File "<string>", line 1, in <module>
> File "C:\Python\lib\multiprocessing\spawn.py", line 116, in spawn_main
> exitcode = _main(fd, parent_sentinel)
> File "C:\Python\lib\multiprocessing\spawn.py", line 126, in _main
> self = reduction.pickle.load(from_parent)
> EOFError: Ran out of input
My code is below:
import multiprocessing
class computation:
def abc(self):
try:
"""Some
logic
here"""
except Exception as E:
print('Error : ', E)
def func1(sfunc1):
"""some
logic
here"""
def func2(sfunc2):
"""some
logic
here"""
def check(stk):
p1 = multiprocessing.Process(target=func1, args=s1) # s1 and s2 is a List
p2 = multiprocessing.Process(target=func2, args=s2)
p1.start()
p2.start()
p1.join()
p2.join()
check(Symbols)
Multiprocessing uses pickle to serialize and transfer data between sub-processes.
Pickle cannot serialize local (inner) functions. Try setting your target to a function visible from the file's namespace.
For example:
import multiprocessing
def abc(self):
try:
"""Some
logic
here"""
except Exception as E:
print('Error : ', E)
def func1(sfunc1):
"""some
logic
here"""
def func2(sfunc2):
"""some
logic
here"""
def check(stk):
p1 = multiprocessing.Process(target=func1, args=s1) # s1 and s2 is a List
p2 = multiprocessing.Process(target=func2, args=s2)
p1.start()
p2.start()
p1.join()
p2.join()
if __name__ == "__main__": # Make sure only 1 process runs this.
check(Symbols)
Make sure s1, s2, and Symbols are defined.

func must be a callable or a textual reference to one

I am trying to run a function every 2 minutes, and I use apscheduler for this. However, when I run this I get the following error:
Traceback (most recent call last):
File "main_forecast.py", line 7, in <module>
scheduler.add_job(get_warnings(), 'interval', seconds = 120)
File "/home/anastasispap/.local/lib/python3.6/site-packages/apscheduler/schedulers/base.py", line 434, in add_job
job = Job(self, **job_kwargs)
File "/home/anastasispap/.local/lib/python3.6/site-packages/apscheduler/job.py", line 49, in __init__
self._modify(id=id or uuid4().hex, **kwargs)
File "/home/anastasispap/.local/lib/python3.6/site-packages/apscheduler/job.py", line 170, in _modify
raise TypeError('func must be a callable or a textual reference to one')
TypeError: func must be a callable or a textual reference to one
And here's the code:
from apscheduler.schedulers.background import BackgroundScheduler
from enemies_info import get_warnings
import time
scheduler = BackgroundScheduler()
scheduler.add_job(get_warnings(), 'interval', seconds = 120)
scheduler.start()
while True:
time.sleep(120)
The function I want to run every 2 minutes is get_warnings.
def get_warnings():
print('get_warning has been run')
names = []
types = []
number_of_threats = 0
forecast_weather()
for i in range(0, number_of_enemies):
enemies = info["enemies"][i]
name = enemies["name"]
type = enemies["type"]
temperature = enemies["temperature"]
temperature = temperature.split("-")
min_temp = temperature[0]
max_temp = temperature[1]
for i in range(len(temperatures)):
if avg_temps[i] <= str(max_temp):
names.append(name)
types.append(type)
number_of_threats += 1
break
os.chdir('..')
write_data(number_of_threats, names, types)
move_to_github()
You are calling the function get_warnings, instead of providing it as a callable. Try:
scheduler.add_job(get_warnings, 'interval', seconds = 120)

I have an issue using the multiprocessing module under python 2.7 on Windows

I was using a huge scientific code on Linux using the multiprocessing module to accelerate some computations. Somewhere in a library that I am using the multiprocessing is called this way :
manager = multiprocessing.Manager()
return_dict = manager.dict()
n=0
while n <n_samples:
if n + n_procs < n_samples:
n_subs = n_procs
else:
n_subs = n_samples-n
jobs = []
for i in range(n_subs):
index = n+i
x_in = samples[index]
p = multiprocessing.Process(target=self.__worker, args=(index, x_in, return_dict))
jobs.append(p)
p.start()
I wrapped the top level call of this code in my main python script as following :
if __name__ == '__main__' :
freeze_support()
fitting.optimize()
As the :
fitting.optimize()
line calls the parallel code
Still while launching the code I have an error occuring and I do not know why :
File "C:\Users\XXX\workspace\git-DLLM\Simulation\CFD_Data_Analysis\Fitting3DPanels\CoeffFitting3DPanels.py", line 48, in <module>
fitting.optimize()
....
File "C:\Users\XXX\workspace\git-MDOTools\modules\MDOTools\ValidGrad\FDGradient.py", line 97, in grad_f
manager = multiprocessing.Manager()
File "C:\Users\XXX\AppData\Local\Continuum\Anaconda2\lib\multiprocessing\__init__.py", line 99, in Manager
m.start()
File "C:\Users\XXX\AppData\Local\Continuum\Anaconda2\lib\multiprocessing\managers.py", line 528, in start
self._address = reader.recv()
EOFError

Categories

Resources