Can't get attribute 'abc' on <module '__main__' from 'abc_h.py'> - python

I am defining a function in python. Program file name itself is abc_d.py . I don't understand if i can import the same file inside again.
import numpy as np
import matplotlib.pyplot as plt
import sys
import multiprocessing
num_processor=4
pool = multiprocessing.Pool(num_processor)
def abc(data):
w=np.dot(data.reshape(25,1),data.reshape(1,25))
return w
data_final=np.array(range(100))
n=100
error=[]
k_list=[50,100,500,1000,2000]
for k in k_list:
dict_data={}
for d_set in range(num_processor):
dict_data[d_set]=data_final[int(d_set*n/4):int((d_set+1)*n/4)]
if(d_set==num_processor-1):
dict_data[d_set]=data_final[int(d_set*n/4):]
tasks = dict_data
results_w=[pool.apply_async(abc,dict_data[t]) for t in range(num_processor)]
w_f=[]
for result in results_w:
w_s=result.get()
w_f.append(w_s.tolist())
w_f=np.array(w_f)
print (w_f)
where tasks is a dictionary with array.
Error:
Can anybody explain the error. I am still not much familiar with the python.
Process ForkPoolWorker-1:
Process ForkPoolWorker-2:
Process ForkPoolWorker-3:
Process ForkPoolWorker-4:
Traceback (most recent call last):
Traceback (most recent call last):
File "/home/anaconda3/lib/python3.5/multiprocessing/process.py", line 254, in _bootstrap
self.run()
File "/home/anaconda3/lib/python3.5/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "/home/anaconda3/lib/python3.5/multiprocessing/pool.py", line 108, in worker
task = get()
File "/home/anaconda3/lib/python3.5/multiprocessing/queues.py", line 345, in get
return ForkingPickler.loads(res)
File "/home/anaconda3/lib/python3.5/multiprocessing/process.py", line 254, in _bootstrap
self.run()
File "/home/anaconda3/lib/python3.5/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
AttributeError: Can't get attribute 'abc' on <module '__main__' from 'abc_d.py'>

If you declare the pool prior to declaring the function you are trying to use in parallel it will throw this error. Reverse the order and it will no longer throw this error. Also, there is a bug in your code, you are feeding all of your data_dict to abc, when you want to feed it as a list. So I changed that line too and it returns some results.
import numpy as np
import matplotlib.pyplot as plt
import sys
import multiprocessing
num_processor=4
def abc(data):
w=np.dot(data.reshape(25,1),data.reshape(1,25))
return w
pool = multiprocessing.Pool(num_processor)
data_final=np.array(range(100))
n=100
error=[]
k_list=[50,100,500,1000,2000]
for k in k_list:
dict_data={}
for d_set in range(num_processor):
dict_data[d_set]=data_final[int(d_set*n/4):int((d_set+1)*n/4)]
if(d_set==num_processor-1):
dict_data[d_set]=data_final[int(d_set*n/4):]
tasks = dict_data
results_w=[pool.apply_async(abc, [dict_data[t]]) for t in range(num_processor)]
w_f=[]
for result in results_w:
w_s=result.get()
w_f.append(w_s.tolist())
w_f=np.array(w_f)
print (w_f)

Hi i got the same problem but i could fix it.
you have to put the definitions out of the script, because windows can't find the function.
Maybe you put your code in an if __name__ == '__main__': query and add the function out of them.
import numpy as np
import matplotlib.pyplot as plt
import sys
import multiprocessing
def abc(data):
w=np.dot(data.reshape(25,1),data.reshape(1,25))
return w
if __name__ == '__main__':
num_processor=4
pool = multiprocessing.Pool(num_processor)
data_final=np.array(range(100))
n=100
error=[]
k_list=[50,100,500,1000,2000]
for k in k_list:
dict_data={}
for d_set in range(num_processor):
dict_data[d_set]=data_final[int(d_set*n/4):int((d_set+1)*n/4)]
if(d_set==num_processor-1):
dict_data[d_set]=data_final[int(d_set*n/4):]
tasks = dict_data
results_w=[pool.apply_async(abc,dict_data[t]) for t in range(num_processor)]
w_f=[]
for result in results_w:
w_s=result.get()
w_f.append(w_s.tolist())
w_f=np.array(w_f)
print (w_f)

I also faced the same issue.
Declaring pool after the function solved the issue.
pool = multiprocessing.Pool(num_processor)

a likely answer that I am pursuing myself is that the function will not pickle.. as discovered by this guy:
https://github.com/joblib/joblib/issues/166#issuecomment-55529781
who is the writer of a multi threading handler.
for those who use global variables in there multi threaded function, refer to this question:
Globals variables and Python multiprocessing

You can try to pass the Pool as a parameter ! Alex

Related

Multithreading / Multiprocessing solution using concurrent.futures

Hi I'm referencing the following question because it's similar to what I'm trying to achieve, however, I'm getting an error that I can't seem to figure out so looking for some help
Combining multithreading and multiprocessing with concurrent.futures
Here's my test code:
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
import numpy as np
from os import cpu_count
from functools import partial
num_list = range(0,1000)
def test(x):
x**2
def multithread(f, lst):
print('Thread running')
with ThreadPoolExecutor() as thread_executor:
thread_executor.map(f, lst)
def multiprocesser(lst, f, n_processors=cpu_count()//2):
chunks = np.array_split(lst, n_processors)
with ProcessPoolExecutor(max_workers=n_processors) as mp_executor:
mp_executor.map(partial(multithread, f), chunks)
if __name__ == '__main__':
multiprocesser(num_list, test)
Process SpawnProcess-31:
Traceback (most recent call last):
File "C:\Users\Test_user\Anaconda3\envs\test_env\lib\multiprocessing\process.py", line 315, in _bootstrap
self.run()
File "C:\Users\Test_user\Anaconda3\envs\test_env\lib\multiprocessing\process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "C:\Users\Test_user\Anaconda3\envs\test_env\lib\concurrent\futures\process.py", line 237, in _process_worker
call_item = call_queue.get(block=True)
File "C:\Users\Test_user\Anaconda3\envs\test_env\lib\multiprocessing\queues.py", line 122, in get
return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'multithread' on <module '__main__' (built-in)>
Process SpawnProcess-32:
Traceback (most recent call last):
File "C:\Users\Test_user\Anaconda3\envs\test_env\lib\multiprocessing\process.py", line 315, in _bootstrap
self.run()
File "C:\Users\Test_user\Anaconda3\envs\test_env\lib\multiprocessing\process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "C:\Users\Test_user\Anaconda3\envs\test_env\lib\concurrent\futures\process.py", line 237, in _process_worker
call_item = call_queue.get(block=True)
File "C:\Users\Test_user\Anaconda3\envs\test_env\lib\multiprocessing\queues.py", line 122, in get
return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'multithread' on <module '__main__' (built-in)>
So I didn't specify number of threads (don't see a reason to for the threadpool executor). Having trouble understanding what the error actually means and how I can fix it. Any help would be appreciated.
The error probably stems from the fact that multithread() is being called incorrectly.
Try this:
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
import numpy as np
from os import cpu_count
from functools import partial
num_list = range(0,1000)
def test(x):
x**2
def multithread(f, lst):
print('Thread running')
with ThreadPoolExecutor() as thread_executor:
thread_executor.map(f, lst)
def multiprocesser(lst, f, n_processors=cpu_count()//2):
chunks = np.array_split(lst, n_processors)
with ProcessPoolExecutor(max_workers=n_processors) as mp_executor:
mp_executor.map(partial(multithread, f), chunks)
if __name__ == '__main__':
multiprocesser(num_list, test)
Missing if __name__ == '__main__'
if __name__ == '__main__':
multiprocesser(num_list, test)
Unintended recursion
When you don't block out the call to multiprocessor(), you have recursion when the subprocess runs the python script.
Safe importing of main module
The following is an example of the same type of problem
from the multiprocessing docs:
https://docs.python.org/3/library/multiprocessing.html?highlight=multiprocess#the-spawn-and-forkserver-start-methods
Make sure that the main module can be safely imported by a new Python
interpreter without causing unintended side effects (such a starting a
new process).
For example, using the spawn or forkserver start method running the
following module would fail with a RuntimeError:
multiprocessing import Process
def foo():
print('hello')
p = Process(target=foo) p.start()
Instead one should protect the
“entry point” of the program by using if __name__ == '__main__': as
follows:
from multiprocessing import Process, freeze_support, set_start_method
def foo():
print('hello')
if __name__ == '__main__':
freeze_support()
set_start_method('spawn')
p = Process(target=foo)
p.start() ```

How can you use easyocr with multiprocessing?

I tried to read text on images with easyocr on python, and I want to run it separately so it doesn't hold back other parts of the code. But when I call the function inside a multiprocessing loop, I get a notimplemented error. Here is an example of code.
import multiprocessing as mp
import easyocr
import cv2
def ocr_test(q, reader):
while not q.empty():
q.get()
img = cv2.imread('unknown.png')
result = reader.readtext(img)
if __name__ == '__main__':
q = mp.Queue()
reader = easyocr.Reader(['en'])
p = mp.Process(target=ocr_test, args=(q,reader))
p.start()
q.put('start')
p.join()
and this is the error I get.
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "C:\Program Files\Python310\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "C:\Program Files\Python310\lib\multiprocessing\spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
File "C:\Python\venv\lib\site-packages\torch\multiprocessing\reductions.py", line 90, in rebuild_tensor
t = torch._utils._rebuild_tensor(storage, storage_offset, size, stride)
File "C:\Python\venv\lib\site-packages\torch\_utils.py", line 134, in _rebuild_tensor
t = torch.tensor([], dtype=storage.dtype, device=storage._untyped().device)
NotImplementedError: Could not run 'aten::empty.memory_format' with arguments from the 'QuantizedCPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'aten::empty.memory_format' is only available for these backends: [CPU, Meta, MkldnnCPU, SparseCPU, SparseCsrCPU, BackendSelect, Python, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, AutogradLazy, AutogradXPU, AutogradMLC, AutogradHPU, AutogradNestedTensor, AutogradPrivateUse1, AutogradPrivateUse2, AutogradPrivateUse3, Tracer, AutocastCPU, Autocast, Batched, VmapMode, Functionalize].
Is there a way to solve this problem?

OSError (Errno 9) when using multiprocessing.Array in Python

I'm trying to use a multiprocessing.Array in two separate processes in Python 3.7.4 (macOS 10.14.6). I start off by creating a new process using the spawn context, passing as an argument to it an Array object:
import multiprocessing, time, ctypes
def fn(c):
time.sleep(1)
print("value:", c.value)
def main():
ctx = multiprocessing.get_context("spawn")
arr = multiprocessing.Array(ctypes.c_char, 32)
p = ctx.Process(target=fn, args=(arr,))
p.start()
arr.value = b"hello"
p.join()
if __name__ == "__main__":
main()
However, when I try to read it, I get the following error:
Process SpawnProcess-1:
Traceback (most recent call last):
File "/usr/local/Cellar/python/3.7.4/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/usr/local/Cellar/python/3.7.4/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/Users/federico/Workspace/test/test.py", line 6, in fn
print("value:", c.value)
File "<string>", line 3, in getvalue
OSError: [Errno 9] Bad file descriptor
The expected output, however, is value: hello. Anyone know what could be going wrong here? Thanks.
The array should also be defined in the context that you define for the multiprocessing like so:
import multiprocessing, time
import ctypes
from multiprocessing import Process
def fn(arr):
time.sleep(1)
print("value:", arr.value)
def main():
ctx = multiprocessing.get_context("spawn")
arr = ctx.Array(ctypes.c_char, 32)
p = ctx.Process(target=fn, args=(arr,))
p.start()
arr.value = b'hello'
p.join()
if __name__ == "__main__":
main()

Failed to import multiprocessing Queue object in python3.6

I was using multiprocessing library in python. I have python 3.6. Whenever i try to create the multiprocessing. Queue() object i get an error.
My code looks like:
import multiprocessing
def square(arr,q):
for i in arr:
q.put(i*i)
arr=[1,2,3,4,5,6]
q=multiprocessing.Queue()
p1=multiprocessing.Process(target=square,args=(arr,q,))
p1.start()
p1.join()
result=[]
while q.empty() is False:
result.append(q.get())
print(result)
and error is :
Traceback (most recent call last):
File "qu.py", line 9, in <module>
q=multiprocessing.Queue()
File "/usr/lib/python3.6/multiprocessing/context.py", line 101, in Queue
from .queues import Queue
File "/usr/lib/python3.6/multiprocessing/queues.py", line 20, in <module>
from queue import Empty, Full
File "/home/vivek/Desktop/code/par/queue.py", line 11, in <module>
q=Queue()
File "/usr/lib/python3.6/multiprocessing/context.py", line 101, in Queue
from .queues import Queue
ImportError: cannot import name 'Queue'
As you can see in the import chain listed in the error traceback, Python is trying to import the Queue definition from:
/home/vivek/Desktop/code/par/queue.py
This indicates you have somehow broken Python import logic as usually it prioritizes modules in /lib /usr/lib folders. This usually happens if you set your custom PYTHONPATH environment variable or if you mess with module variables such as sys.path.
Quick fix is to rename your file from queue.py to something else.

python tempfile and multiprocessing pool error

I'm experimenting with python's multiprocessing. I struggled with a bug in my code and managed to narrow it down. However, I still don't know why this happens. What I'm posting is just sample code. If I import tempfile module and change tempdir, the code crashes at pool creation. I'm using python 2.7.5
Here's the code
from multiprocessing import Pool
import tempfile
tempfile.tempdir = "R:/" #REMOVING THIS LINE FIXES THE ERROR
def f(x):
return x*x
if __name__ == '__main__':
pool = Pool(processes=4) # start 4 worker processes
result = pool.apply_async(f, [10]) # evaluate "f(10)" asynchronously
print result.get(timeout=1) # prints "100" unless your computer is *very* slow
print pool.map(f, range(10)) # prints "[0, 1, 4,..., 81]"
Here's error
R:\>mp_pool_test.py
Traceback (most recent call last):
File "R:\mp_pool_test.py", line 11, in <module>
pool = Pool(processes=4) # start 4 worker processes
File "C:\Python27\lib\multiprocessing\__init__.py", line 232, in Pool
return Pool(processes, initializer, initargs, maxtasksperchild)
File "C:\Python27\lib\multiprocessing\pool.py", line 138, in __init__
self._setup_queues()
File "C:\Python27\lib\multiprocessing\pool.py", line 233, in _setup_queues
self._inqueue = SimpleQueue()
File "C:\Python27\lib\multiprocessing\queues.py", line 351, in __init__
self._reader, self._writer = Pipe(duplex=False)
File "C:\Python27\lib\multiprocessing\__init__.py", line 107, in Pipe
return Pipe(duplex)
File "C:\Python27\lib\multiprocessing\connection.py", line 223, in Pipe
1, obsize, ibsize, win32.NMPWAIT_WAIT_FOREVER, win32.NULL
WindowsError: [Error 123] The filename, directory name, or volume label syntax is incorrect
This code works fine.
from multiprocessing import Pool
import tempfile as TF
TF.tempdir = "R:/"
def f(x):
return x*x
if __name__ == '__main__':
print("test")
The bizarre thing is that, both times I don't do anything with TF.tempdir, but the one with the Pool doesn't work for some reason.
It is cool it looks like you have a name collision from what I can see in
"C:\Program Files\PYTHON\Lib\multiprocessing\connection.py"
It seems that multipprocessing is using tempfile as well
That behavior should not happen but it looks to me like the problem is in line 66 of connection.py
elif family == 'AF_PIPE':
return tempfile.mktemp(prefix=r'\\.\pipe\pyc-%d-%d-' %
(os.getpid(), _mmap_counter.next()))
I am still poking at this, I looked at globals after importing tempfile and then tempfile as TF, different names exist but now I am wondering about references, and so am trying to figure out if they point to the same thing.

Categories

Resources