asyncio + multiprocessing + unix - python

I have a pet project with the following logic:
import asyncio, multiprocessing
async def sub_main():
print('Hello from subprocess')
def sub_loop():
asyncio.get_event_loop().run_until_complete(sub_main())
def start():
multiprocessing.Process(target=sub_loop).start()
start()
If you run it, you'll see:
Hello from subprocess
That is good. But what I have to do is to make start() coroutine instead:
async def start():
multiprocessing.Process(target=sub_loop).start()
To run it, I have to do something like that:
asyncio.get_event_loop().run_until_complete(start())
Here is the issue: when sub process is created, it gets the whole Python environment cloned, so event loop is already running there:
Process Process-1:
Traceback (most recent call last):
File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
self.run()
File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "test.py", line 7, in sub_loop
asyncio.get_event_loop().run_until_complete(sub_main())
File "/usr/lib/python3.5/asyncio/base_events.py", line 361, in run_until_complete
self.run_forever()
File "/usr/lib/python3.5/asyncio/base_events.py", line 326, in run_forever
raise RuntimeError('Event loop is running.')
RuntimeError: Event loop is running.
I tried to destroy it on subprocess side with no luck but I think that the correct way is to prevent its sharing with subprocess though. Is it possible somehow?
UPDATE:
Here is the full failing code:
import asyncio, multiprocessing
import asyncio.unix_events
async def sub_main():
print('Hello from subprocess')
def sub_loop():
asyncio.get_event_loop().run_until_complete(sub_main())
async def start():
multiprocessing.Process(target=sub_loop).start()
asyncio.get_event_loop().run_until_complete(start())

First, you should consider using loop.run_in_executor with a ProcessPoolExecutor if you plan to run python subprocesses from within the loop. As for your problem, you can use the event loop policy functions to set a new loop:
import asyncio
from concurrent.futures import ProcessPoolExecutor
async def sub_main():
print('Hello from subprocess')
def sub_loop():
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(sub_main())
async def start(executor):
await asyncio.get_event_loop().run_in_executor(executor, sub_loop)
if __name__ == '__main__':
executor = ProcessPoolExecutor()
asyncio.get_event_loop().run_until_complete(start(executor))

You should always add a check to see how you're running the code (the if __name__ == '__main__': part. Your subprocess is running everything in the module a 2nd time, giving you grief (couldn't resist).
import asyncio, multiprocessing
async def sub_main():
print('Hello from subprocess')
def sub_loop():
asyncio.get_event_loop().run_until_complete(sub_main())
async def start():
multiprocessing.Process(target=sub_loop).start()
if __name__ == '__main__':
asyncio.get_event_loop().run_until_complete(start())

Related

Multithreading / Multiprocessing solution using concurrent.futures

Hi I'm referencing the following question because it's similar to what I'm trying to achieve, however, I'm getting an error that I can't seem to figure out so looking for some help
Combining multithreading and multiprocessing with concurrent.futures
Here's my test code:
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
import numpy as np
from os import cpu_count
from functools import partial
num_list = range(0,1000)
def test(x):
x**2
def multithread(f, lst):
print('Thread running')
with ThreadPoolExecutor() as thread_executor:
thread_executor.map(f, lst)
def multiprocesser(lst, f, n_processors=cpu_count()//2):
chunks = np.array_split(lst, n_processors)
with ProcessPoolExecutor(max_workers=n_processors) as mp_executor:
mp_executor.map(partial(multithread, f), chunks)
if __name__ == '__main__':
multiprocesser(num_list, test)
Process SpawnProcess-31:
Traceback (most recent call last):
File "C:\Users\Test_user\Anaconda3\envs\test_env\lib\multiprocessing\process.py", line 315, in _bootstrap
self.run()
File "C:\Users\Test_user\Anaconda3\envs\test_env\lib\multiprocessing\process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "C:\Users\Test_user\Anaconda3\envs\test_env\lib\concurrent\futures\process.py", line 237, in _process_worker
call_item = call_queue.get(block=True)
File "C:\Users\Test_user\Anaconda3\envs\test_env\lib\multiprocessing\queues.py", line 122, in get
return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'multithread' on <module '__main__' (built-in)>
Process SpawnProcess-32:
Traceback (most recent call last):
File "C:\Users\Test_user\Anaconda3\envs\test_env\lib\multiprocessing\process.py", line 315, in _bootstrap
self.run()
File "C:\Users\Test_user\Anaconda3\envs\test_env\lib\multiprocessing\process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "C:\Users\Test_user\Anaconda3\envs\test_env\lib\concurrent\futures\process.py", line 237, in _process_worker
call_item = call_queue.get(block=True)
File "C:\Users\Test_user\Anaconda3\envs\test_env\lib\multiprocessing\queues.py", line 122, in get
return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'multithread' on <module '__main__' (built-in)>
So I didn't specify number of threads (don't see a reason to for the threadpool executor). Having trouble understanding what the error actually means and how I can fix it. Any help would be appreciated.
The error probably stems from the fact that multithread() is being called incorrectly.
Try this:
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
import numpy as np
from os import cpu_count
from functools import partial
num_list = range(0,1000)
def test(x):
x**2
def multithread(f, lst):
print('Thread running')
with ThreadPoolExecutor() as thread_executor:
thread_executor.map(f, lst)
def multiprocesser(lst, f, n_processors=cpu_count()//2):
chunks = np.array_split(lst, n_processors)
with ProcessPoolExecutor(max_workers=n_processors) as mp_executor:
mp_executor.map(partial(multithread, f), chunks)
if __name__ == '__main__':
multiprocesser(num_list, test)
Missing if __name__ == '__main__'
if __name__ == '__main__':
multiprocesser(num_list, test)
Unintended recursion
When you don't block out the call to multiprocessor(), you have recursion when the subprocess runs the python script.
Safe importing of main module
The following is an example of the same type of problem
from the multiprocessing docs:
https://docs.python.org/3/library/multiprocessing.html?highlight=multiprocess#the-spawn-and-forkserver-start-methods
Make sure that the main module can be safely imported by a new Python
interpreter without causing unintended side effects (such a starting a
new process).
For example, using the spawn or forkserver start method running the
following module would fail with a RuntimeError:
multiprocessing import Process
def foo():
print('hello')
p = Process(target=foo) p.start()
Instead one should protect the
“entry point” of the program by using if __name__ == '__main__': as
follows:
from multiprocessing import Process, freeze_support, set_start_method
def foo():
print('hello')
if __name__ == '__main__':
freeze_support()
set_start_method('spawn')
p = Process(target=foo)
p.start() ```

Cannot use ThreadPoolExecutor within threading.Thread (Python 3.9)

Due to production needs, we updated our project to Python3.9. But the project stopped working because of RuntimeError: can't register atexit after shutdown, which did not occur with Python3.7. Our project has many threads and each thread might spawn sub-threads. We used threading.Thread for the higher levels and concurrent.futures.ThreadPoolExecutor at the bottom level. For example, the following code would work on 3.7 but not 3.9:
from threading import Thread
import concurrent.futures
def func1():
print("func1 start")
def func2():
print("func2 start")
def func3():
with concurrent.futures.ThreadPoolExecutor() as executor:
print("func3 start")
future1 = executor.submit(func1)
future2 = executor.submit(func2)
concurrent.futures.wait([future1, future2])
print("func3 end")
thread1 = Thread(target=func1)
thread3 = Thread(target=func3)
thread1.start()
thread3.start()
thread1.join()
thread3.join()
with the following error in 3.9:
func1 start
Exception in thread Thread-2:
Traceback (most recent call last):
File "C:\my_project\lib\threading.py", line 973, in _bootstrap_inner
self.run()
File "C:\my_project\lib\threading.py", line 910, in run
self._target(*self._args, **self._kwargs)
File "C:\my_projectr\tests\thread_test.py", line 13, in func2
with concurrent.futures.ThreadPoolExecutor() as executor:
File "C:\my_project\lib\concurrent\futures\__init__.py", line 49, in __getattr__
from .thread import ThreadPoolExecutor as te
File "C:\my_project\lib\concurrent\futures\thread.py", line 37, in <module>
threading._register_atexit(_python_exit)
File "C:\my_project\lib\threading.py", line 1407, in _register_atexit
raise RuntimeError("can't register atexit after shutdown")
RuntimeError: can't register atexit after shutdown
After some experimenting, I realized that ThreadPoolExecutor cannot be used under Thread while Thread can be used under ThreadPoolExecutor in Python3.9.
My questions are:
Is this behaviour (change) intended? Why?
What would be a proper way to use multi-level threading in Python3.9?

OSError (Errno 9) when using multiprocessing.Array in Python

I'm trying to use a multiprocessing.Array in two separate processes in Python 3.7.4 (macOS 10.14.6). I start off by creating a new process using the spawn context, passing as an argument to it an Array object:
import multiprocessing, time, ctypes
def fn(c):
time.sleep(1)
print("value:", c.value)
def main():
ctx = multiprocessing.get_context("spawn")
arr = multiprocessing.Array(ctypes.c_char, 32)
p = ctx.Process(target=fn, args=(arr,))
p.start()
arr.value = b"hello"
p.join()
if __name__ == "__main__":
main()
However, when I try to read it, I get the following error:
Process SpawnProcess-1:
Traceback (most recent call last):
File "/usr/local/Cellar/python/3.7.4/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/usr/local/Cellar/python/3.7.4/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/Users/federico/Workspace/test/test.py", line 6, in fn
print("value:", c.value)
File "<string>", line 3, in getvalue
OSError: [Errno 9] Bad file descriptor
The expected output, however, is value: hello. Anyone know what could be going wrong here? Thanks.
The array should also be defined in the context that you define for the multiprocessing like so:
import multiprocessing, time
import ctypes
from multiprocessing import Process
def fn(arr):
time.sleep(1)
print("value:", arr.value)
def main():
ctx = multiprocessing.get_context("spawn")
arr = ctx.Array(ctypes.c_char, 32)
p = ctx.Process(target=fn, args=(arr,))
p.start()
arr.value = b'hello'
p.join()
if __name__ == "__main__":
main()

Exception " There is no current event loop in thread 'MainThread' " while running over new loop

The is the simple test code and the result.
import asyncio
async def test():
await asyncio.sleep(1)
if __name__ == '__main__':
asyncio.set_event_loop(None) # Clear the main loop.
loop = asyncio.new_event_loop() # Create a new loop.
loop.run_until_complete(test()) # Run coroutine over the new loop
Traceback (most recent call last):
File "test_test.py", line 11, in <module>
loop.run_until_complete(test())
File "/usr/lib/python3.5/asyncio/base_events.py", line 387, in run_until_complete
return future.result()
File "/usr/lib/python3.5/asyncio/futures.py", line 274, in result
raise self._exception
File "/usr/lib/python3.5/asyncio/tasks.py", line 239, in _step
result = coro.send(None)
File "test_test.py", line 5, in test
await asyncio.sleep(1)
File "/usr/lib/python3.5/asyncio/tasks.py", line 510, in sleep
loop = events.get_event_loop()
File "/usr/lib/python3.5/asyncio/events.py", line 632, in get_event_loop
return get_event_loop_policy().get_event_loop()
File "/usr/lib/python3.5/asyncio/events.py", line 578, in get_event_loop
% threading.current_thread().name)
RuntimeError: There is no current event loop in thread 'MainThread'.
I run the async def test() over the new loop and expected that asyncio.sleep(1) which is nested by test() also use the new loop.
In contrast to that, sleep() still seems to access main loop I set as None.
I know I can re-set a main loop as the new loop with asyncio.set_event_loop(loop) before calling run_until_complete() and it will work with no exception.
However, I want to know it is normal for asyncio that main loop Must be set and is used for coroutines regardless of a loop over which coroutine is run.
I want to know it is normal for asyncio that main loop Must be set and is used for coroutines regardless of a loop over which coroutine is run.
This used to be required prior to Python 3.6. The reason is that functions like asyncio.sleep() need an event loop to be able to use loop.call_later() to schedule a wake-up call to complete the future.
As of Python 3.6 (or 3.5.3, which included a fix for the issue), when get_event_loop() is invoked from a coroutine driven by an event loop, it always returns the event loop that drives it. As a result, your code works correctly.
The new behavior is not mentioned in the online documentation, but is in the docstring:
When called from a coroutine or a callback (e.g. scheduled with call_soon or similar API), this function will always return the running event loop.

python tempfile and multiprocessing pool error

I'm experimenting with python's multiprocessing. I struggled with a bug in my code and managed to narrow it down. However, I still don't know why this happens. What I'm posting is just sample code. If I import tempfile module and change tempdir, the code crashes at pool creation. I'm using python 2.7.5
Here's the code
from multiprocessing import Pool
import tempfile
tempfile.tempdir = "R:/" #REMOVING THIS LINE FIXES THE ERROR
def f(x):
return x*x
if __name__ == '__main__':
pool = Pool(processes=4) # start 4 worker processes
result = pool.apply_async(f, [10]) # evaluate "f(10)" asynchronously
print result.get(timeout=1) # prints "100" unless your computer is *very* slow
print pool.map(f, range(10)) # prints "[0, 1, 4,..., 81]"
Here's error
R:\>mp_pool_test.py
Traceback (most recent call last):
File "R:\mp_pool_test.py", line 11, in <module>
pool = Pool(processes=4) # start 4 worker processes
File "C:\Python27\lib\multiprocessing\__init__.py", line 232, in Pool
return Pool(processes, initializer, initargs, maxtasksperchild)
File "C:\Python27\lib\multiprocessing\pool.py", line 138, in __init__
self._setup_queues()
File "C:\Python27\lib\multiprocessing\pool.py", line 233, in _setup_queues
self._inqueue = SimpleQueue()
File "C:\Python27\lib\multiprocessing\queues.py", line 351, in __init__
self._reader, self._writer = Pipe(duplex=False)
File "C:\Python27\lib\multiprocessing\__init__.py", line 107, in Pipe
return Pipe(duplex)
File "C:\Python27\lib\multiprocessing\connection.py", line 223, in Pipe
1, obsize, ibsize, win32.NMPWAIT_WAIT_FOREVER, win32.NULL
WindowsError: [Error 123] The filename, directory name, or volume label syntax is incorrect
This code works fine.
from multiprocessing import Pool
import tempfile as TF
TF.tempdir = "R:/"
def f(x):
return x*x
if __name__ == '__main__':
print("test")
The bizarre thing is that, both times I don't do anything with TF.tempdir, but the one with the Pool doesn't work for some reason.
It is cool it looks like you have a name collision from what I can see in
"C:\Program Files\PYTHON\Lib\multiprocessing\connection.py"
It seems that multipprocessing is using tempfile as well
That behavior should not happen but it looks to me like the problem is in line 66 of connection.py
elif family == 'AF_PIPE':
return tempfile.mktemp(prefix=r'\\.\pipe\pyc-%d-%d-' %
(os.getpid(), _mmap_counter.next()))
I am still poking at this, I looked at globals after importing tempfile and then tempfile as TF, different names exist but now I am wondering about references, and so am trying to figure out if they point to the same thing.

Categories

Resources