Asyncio in corroutine RuntimeError: no running event loop - python

I'm writing multi-process code, which runs perfectly in Python 3.7. Yet I want one of the parallel process to execute an IO process take stakes for ever using AsyncIO i order to get better performance, but have not been able to get it to run.
Ubuntu 18.04, Python 3.7, AsyncIO, pipenv (all pip libraries installed)
The method in particular runs as expected using multithreading, which is what I want to replace with AsyncIO.
I have googled and tried looping in the main() function and now only in the intended cor-routine, have looked at examples and read about this new Async way of getting things down and no results so far.
The following is the app.py code which is esecuted: python app.py
import sys
import traceback
import logging
import asyncio
from config import DEBUG
from config import log_config
from <some-module> import <some-class>
if DEBUG:
logging.config.dictConfig(log_config())
else:
logging.basicConfig(
level=logging.DEBUG, format='%(relativeCreated)6d %(threadName)s %(message)s')
logger = logging.getLogger(__name__)
def main():
try:
<some> = <some-class>([
'some-data1.csv',
'some-data2.csv'
])
<some>.run()
except:
traceback.print_exc()
pdb.post_mortem()
sys.exit(0)
if __name__ == '__main__':
asyncio.run(main())
Here is the code where I have the given class defined
_sql_client = SQLServer()
_blob_client = BlockBlobStore()
_keys = KeyVault()
_data_source = _keys.fetch('some-data')
# Multiprocessing
_manager = mp.Manager()
_ns = _manager.Namespace()
def __init__(self, list_of_collateral_files: list) -> None:
#timeit
def _get_filter_collateral(self, ns: mp.managers.NamespaceProxy) -> None:
#timeit
def _get_hours(self, ns: mp.managers.NamespaceProxy) -> None:
#timeit
def _load_original_bids(self, ns: mp.managers.NamespaceProxy) -> None:
#timeit
def _merge_bids_with_hours(self, ns: mp.managers.NamespaceProxy) -> None:
#timeit
def _get_collaterial_per_month(self, ns: mp.managers.NamespaceProxy) -> None:
#timeit
def _calc_bid_per_path(self) -> None:
#timeit
def run(self) -> None:
The method containing the async code is here:
def _get_filter_collateral(self, ns: mp.managers.NamespaceProxy) -> None:
all_files = self._blob_client.download_blobs(self._list_of_blob_files)
_all_dfs = pd.DataFrame()
async def read_task(file_: str) -> None:
nonlocal _all_dfs
df = pd.read_csv(StringIO(file_.content))
_all_dfs = _all_dfs.append(df, sort=False)
tasks = []
loop = asyncio.new_event_loop()
for file_ in all_files:
tasks.append(asyncio.create_task(read_task(file_)))
loop.run_until_complete(asyncio.wait(tasks))
loop.close()
_all_dfs['TOU'] = _all_dfs['TOU'].map(lambda x: 'OFFPEAK' if x == 'OFF' else 'ONPEAK')
ns.dfs = _all_dfs
And the method that calls the particular sequence and and this async method is:
def run(self) -> None:
extract = []
extract.append(mp.Process(target=self._get_filter_collateral, args=(self._ns, )))
extract.append(mp.Process(target=self._get_hours, args=(self._ns, )))
extract.append(mp.Process(target=self._load_original_bids, args=(self._ns, )))
# Start the parallel processes
for process in extract:
process.start()
# Await for database process to end
extract[1].join()
extract[2].join()
# Merge both database results
self._merge_bids_with_hours(self._ns)
extract[0].join()
self._get_collaterial_per_month(self._ns)
self._calc_bid_per_path()
self._save_reports()
self._upload_data()
These are the errors I get:
Process Process-2:
Traceback (most recent call last):
File "<some-path>/.pyenv/versions/3.7.4/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "<some-path>/.pyenv/versions/3.7.4/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "<some-path>/src/azure/application/utils/lib.py", line 10, in timed
result = method(*args, **kwargs)
File "<some-path>/src/azure/application/caiso/main.py", line 104, in _get_filter_collateral
tasks.append(asyncio.create_task(read_task(file_)))
File "<some-path>/.pyenv/versions/3.7.4/lib/python3.7/asyncio/tasks.py", line 350, in create_task
loop = events.get_running_loop()
RuntimeError: no running event loop
<some-path>/.pyenv/versions/3.7.4/lib/python3.7/multiprocessing/process.py:313: RuntimeWarning: coroutine '<some-class>._get_filter_collateral.<locals>.read_task' was never awaited
traceback.print_exc()
RuntimeWarning: Enable tracemalloc to get the object allocation traceback
DEBUG Calculating monthly collateral...
Traceback (most recent call last):
File "app.py", line 25, in main
caiso.run()
File "<some-path>/src/azure/application/utils/lib.py", line 10, in timed
result = method(*args, **kwargs)
File "<some-path>/src/azure/application/caiso/main.py", line 425, in run
self._get_collaterial_per_month(self._ns)
File "<some-path>/src/azure/application/utils/lib.py", line 10, in timed
result = method(*args, **kwargs)
File "<some-path>/src/azure/application/caiso/main.py", line 196, in _get_collaterial_per_month
credit_margin = ns.dfs
File "<some-path>/.pyenv/versions/3.7.4/lib/python3.7/multiprocessing/managers.py", line 1122, in __getattr__
return callmethod('__getattribute__', (key,))
File "<some-path>/.pyenv/versions/3.7.4/lib/python3.7/multiprocessing/managers.py", line 834, in _callmethod
raise convert_to_error(kind, result)
AttributeError: 'Namespace' object has no attribute 'dfs'
> <some-path>/.pyenv/versions/3.7.4/lib/python3.7/multiprocessing/managers.py(834)_callmethod()
-> raise convert_to_error(kind, result)
(Pdb)

As it seems from the Traceback log it is look like you are trying to add tasks to not running event loop.
/.pyenv/versions/3.7.4/lib/python3.7/multiprocessing/process.py:313:
RuntimeWarning: coroutine
'._get_filter_collateral..read_task' was never
awaited
The loop was just created and it's not running yet, therefor the asyncio unable to attach tasks to it.
The following example will reproduce the same results, adding tasks and then trying to await for all of them to finish:
import asyncio
async def func(num):
print('My name is func {0}...'.format(num))
loop = asyncio.get_event_loop()
tasks = list()
for i in range(5):
tasks.append(asyncio.create_task(func(i)))
loop.run_until_complete(asyncio.wait(tasks))
loop.close()
Results with:
Traceback (most recent call last):
File "C:/tmp/stack_overflow.py", line 42, in <module>
tasks.append(asyncio.create_task(func(i)))
File "C:\Users\Amiram\AppData\Local\Programs\Python\Python37-32\lib\asyncio\tasks.py", line 324, in create_task
loop = events.get_running_loop()
RuntimeError: no running event loop
sys:1: RuntimeWarning: coroutine 'func' was never awaited
Nonetheless the solution is pretty simple, you just need to add the tasks to the created loop - instead of asking the asyncio to go it.
The only change is needed in the following line:
tasks.append(asyncio.create_task(func(i)))
Change the creation of the task from the asyncio to the newly created loop, you are able to do it because this is your loop unlike the asynio which is searching for a running one.
So the new line should look like this:
tasks.append(loop.create_task(func(i)))
Another solution could be running an async function and create the tasks there for example (Because that loop is already running now the asyncio enable to attach tasks to it):
import asyncio
async def func(num):
print('Starting func {0}...'.format(num))
await asyncio.sleep(0.1)
print('Ending func {0}...'.format(num))
loop = asyncio.get_event_loop()
async def create_tasks_func():
tasks = list()
for i in range(5):
tasks.append(asyncio.create_task(func(i)))
await asyncio.wait(tasks)
loop.run_until_complete(create_tasks_func())
loop.close()
This simple change will results with:
Starting func 0...
Starting func 1...
Starting func 2...
Starting func 3...
Starting func 4...
Ending func 0...
Ending func 2...
Ending func 4...
Ending func 1...
Ending func 3...

Use asyncio.ensure_future instead. See https://docs.python.org/3/library/asyncio-future.html#asyncio.ensure_future

Related

gcp pubsub-lite subscription python "AuthMetadataPluginCallback \"<google.auth.transport.grpc.AuthMetadataPlugin object at " raised exception!" error

I am using the python pubsublite client(async version) for subscribing from pubsub-lite.
I get the below error intermittently
Traceback (most recent call last):
File \"/usr/local/lib/python3.10/site-packages/grpc/_plugin_wrapping.py\", line 89, in __call__
self._metadata_plugin(
File \"/usr/local/lib/python3.10/site-packages/google/auth/transport/grpc.py\", line 101, in __call__
callback(self._get_authorization_headers(context), None)
File \"/usr/local/lib/python3.10/site-packages/google/auth/transport/grpc.py\", line 87, in _get_authorization_headers
self._credentials.before_request(
File \"/usr/local/lib/python3.10/site-packages/google/auth/credentials.py\", line 134, in before_request
self.apply(headers)
File \"/usr/local/lib/python3.10/site-packages/google/auth/credentials.py\", line 110, in apply
_helpers.from_bytes(token or self.token)
File \"/usr/local/lib/python3.10/site-packages/google/auth/_helpers.py\", line 130, in from_bytes
raise ValueError(\"{0!r} could not be converted to unicode\".format(value))
ValueError: None could not be converted to unicode"
I don't use GOOGLE_APPLICATION_CREDENTIALS env variable to specify credentials, instead I do as below(I don't want to write credentials to a file in aws host)
import asyncio
from google.cloud.pubsublite.cloudpubsub import AsyncSubscriberClient
from google.cloud.pubsublite.types import (
CloudRegion,
CloudZone,
FlowControlSettings,
SubscriptionPath,
)
from google.oauth2 import service_account
class AsyncTimedIterable:
def __init__(self, iterable, poll_timeout=90):
class AsyncTimedIterator:
def __init__(self):
self._iterator = iterable.__aiter__()
async def __anext__(self):
try:
result = await asyncio.wait_for(
self._iterator.__anext__(), int(poll_timeout)
)
if not result:
raise StopAsyncIteration
return result
except asyncio.TimeoutError as e:
raise e
self._factory = AsyncTimedIterator
def __aiter__(self):
return self._factory()
# TODO add project info below
location = CloudZone(CloudRegion("region"), "zone")
subscription_path = SubscriptionPath("project_number", location, "subscription_id")
# TODO add service account details
gcp_creds = {}
async def async_receive_from_subscription(per_partition_count=100):
# Configure when to pause the message stream for more incoming messages based on the
# maximum size or number of messages that a single-partition subscriber has received,
# whichever condition is met first.
per_partition_flow_control_settings = FlowControlSettings(
# 1,000 outstanding messages. Must be >0.
messages_outstanding=per_partition_count,
# 10 MiB. Must be greater than the allowed size of the largest message (1 MiB).
bytes_outstanding=10 * 1024 * 1024,
)
async with AsyncSubscriberClient(
credentials=service_account.Credentials.from_service_account_info(gcp_creds)
) as async_subscriber_client:
message_iterator = await async_subscriber_client.subscribe(
subscription_path,
per_partition_flow_control_settings=per_partition_flow_control_settings,
)
timed_iter = AsyncTimedIterable(message_iterator, 90)
async for message in timed_iter:
yield message
async def main():
async for message in async_receive_from_subscription(per_partition_count=100_000):
print(message.data)
if __name__ == "__main__":
asyncio.run(main())
when I went through the files in stack trace I saw a code comment as below in file ``
# The plugin may be invoked on a thread created by Core, which will not
# have the context propagated. This context is stored and installed in
# the thread invoking the plugin.
Is it because the credentials I set are not being sent to another thread when it is created?

Join timeout in multiprocessing

I have a dummy example, I want to apply multiprocessing in it. Consider a scenario where you have a stream of numbers(which I call frame) incoming one by one. And I want to assign it to any single process that is available currently. So I am creating 4 processes that are running a while loop, seeing if any element in queue, than apply function on it.
The problem is that when I join it, it gets stuck in any while loop, even though I close the while loop before it. But somehow it gets stuck inside it.
Code:
# step 1, 4 processes
import multiprocessing as mp
import os
import time
class MpListOperations:
def __init__(self):
self.results_queue = mp.Manager().Queue()
self.frames_queue = mp.Manager().Queue()
self.flag = mp.Manager().Value(typecode='b',value=True)
self.list_nums = list(range(0,5000))
def process_list(self):
print(f"Process id {os.getpid()} started")
while self.flag.value:
# print(self.flag.value)
if self.frames_queue.qsize():
self.results_queue.put(self.frames_queue.get()**2)
def create_processes(self, no_of_processes = mp.cpu_count()):
print("Creating Processes")
self.processes = [mp.Process(target=self.process_list) for _ in range(no_of_processes)]
def start_processes(self):
print(f"starting processes")
for process in self.processes:
process.start()
def join_process(self):
print("Joining Processes")
while True:
if not self.frames_queue.qsize():
self.flag.value=False
print("JOININNG HERE")
for process in self.processes:
exit_code = process.join()
print(exit_code)
print("BREAKING DONE")
break
def stream_frames(self):
print("Streaming Frames")
for frame in self.list_nums:
self.frames_queue.put(frame)
if __name__=="__main__":
start = time.time()
mp_ops = MpListOperations()
mp_ops.create_processes()
mp_ops.start_processes()
mp_ops.stream_frames()
mp_ops.join_process()
print(time.time()-start)
Now if I add a timeout parameter in join, even 0, i.e exit_code = process.join(0) it works. I want to understand in this scenario, if this code is correct, what should be the value of timeout? Why is it working with timeout and not without it? What is the proper way to implement multiprocessing with it?
If you look at the documentation for a managed queue you will see that the qsize method only returns an approximate size. I would therefore not use it for testing when all the items have been taken of the frames queue. Presumably you want to let the processes run until all frames have been processed. The simplest way I know would be to put N sentinel items on the frames queue after the actual frames have been put where N is the number of processes getting from the queue. A sentinel item is a special value that cannot be mistaken for an actual frame and signals to the process that there are no more items for it to get from the queue (i.e. a quasi end-of-file item). In this case we can use None as the sentinel items. Each process then just continues to do get operations on the queue until it sees a sentinel item and then terminates. There is therefore no need for the self.flag attribute.
Here is the updated and simplified code. I have made some other minor changes that have been commented:
import multiprocessing as mp
import os
import time
class MpListOperations:
def __init__(self):
# Only create one manager process:
manager = mp.Manager()
self.results_queue = manager.Queue()
self.frames_queue = manager.Queue()
# No need to convert range to a list:
self.list_nums = range(0, 5000)
def process_list(self):
print(f"Process id {os.getpid()} started")
while True:
frame = self.frames_queue.get()
if frame is None: # Sentinel?
# Yes, we are done:
break
self.results_queue.put(frame ** 2)
def create_processes(self, no_of_processes = mp.cpu_count()):
print("Creating Processes")
self.no_of_processes = no_of_processes
self.processes = [mp.Process(target=self.process_list) for _ in range(no_of_processes)]
def start_processes(self):
print("Starting Processes")
for process in self.processes:
process.start()
def join_processes(self):
print("Joining Processes")
for process in self.processes:
# join returns None:
process.join()
def stream_frames(self):
print("Streaming Frames")
for frame in self.list_nums:
self.frames_queue.put(frame)
# Put sentinels:
for _ in range(self.no_of_processes):
self.frames_queue.put(None)
if __name__== "__main__":
start = time.time()
mp_ops = MpListOperations()
mp_ops.create_processes()
mp_ops.start_processes()
mp_ops.stream_frames()
mp_ops.join_processes()
print(time.time()-start)
Prints:
Creating Processes
Starting Processes
Process id 28 started
Process id 29 started
Streaming Frames
Process id 33 started
Process id 31 started
Process id 38 started
Process id 44 started
Process id 42 started
Process id 45 started
Joining Processes
2.3660173416137695
Note for Windows
I have modified method start_processes to temporarily set attribute self.processes to None:
def start_processes(self):
print("Starting Processes")
processes = self.processes
# Don't try to pickle list of processes:
self.processes = None
for process in processes:
process.start()
# Restore attribute:
self.processes = processes
Otherwise under Windows we get a pickle error trying to serialize/deserialize a list of processes containing two or more multiprocessing.Process instances. The error is "TypeError: cannot pickle 'weakref' object." This can be demonstrated with the following code where we first try to pickle a list of 1 process and then a list of 2 processes:
import multiprocessing as mp
import os
class Foo:
def __init__(self, number_of_processes):
self.processes = [mp.Process(target=self.worker) for _ in range(number_of_processes)]
self.start_processes()
self.join_processes()
def start_processes(self):
processes = self.processes
for process in self.processes:
process.start()
def join_processes(self):
for process in self.processes:
process.join()
def worker(self):
print(f"Process id {os.getpid()} started")
print(f"Process id {os.getpid()} ended")
if __name__== "__main__":
foo = Foo(1)
foo = Foo(2)
Prints:
Process id 7540 started
Process id 7540 ended
Traceback (most recent call last):
File "C:\Booboo\test\test.py", line 26, in <module>
foo = Foo(2)
File "C:\Booboo\test\test.py", line 7, in __init__
self.start_processes()
File "C:\Booboo\test\test.py", line 13, in start_processes
process.start()
File "C:\Program Files\Python38\lib\multiprocessing\process.py", line 121, in start
self._popen = self._Popen(self)
File "C:\Program Files\Python38\lib\multiprocessing\context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Program Files\Python38\lib\multiprocessing\context.py", line 327, in _Popen
return Popen(process_obj)
File "C:\Program Files\Python38\lib\multiprocessing\popen_spawn_win32.py", line 93, in __init__
reduction.dump(process_obj, to_child)
File "C:\Program Files\Python38\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: cannot pickle 'weakref' object
Process id 18152 started
Process id 18152 ended
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "C:\Program Files\Python38\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "C:\Program Files\Python38\lib\multiprocessing\spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
EOFError: Ran out of input
The target loop is stuck in the get() method of your loop. This is because multiple processes could see that the queue wasn't empty, but only 1 of them was able to get the last item. The remaining processes are waiting for the next item to be available from the queue.
You might need to add a Lock when you are reading the size of the Queue object And getting the object of that queue.
Or alternatively, you avoid reading the size of the queue by simply using the queue.get() method with a timeout that allows us to check the flag regularly
import queue
TIMEOUT = 1 # seconds
class MpListOperations:
#[...]
def process_list(self):
print(f"Process id {os.getpid()} started")
previous = self.flag.value
while self.flag.value:
try:
got = self.frames_queue.get(timeout=TIMEOUT)
except queue.Empty:
pass
else:
print(f"Gotten {got}")
self.results_queue.put(got**2)
_next = self.flag.value
if previous != _next:
print(f"Flag change: {_next}")
$ python ./test_mp.py
Creating Processes
starting processes
Process id 36566 started
Streaming Frames
Process id 36565 started
Process id 36564 started
Process id 36570 started
Process id 36567 started
Gotten 0
Process id 36572 started
Gotten 1
Gotten 2
Gotten 3
Process id 36579 started
Gotten 4
Gotten 5
Gotten 6
Process id 36583 started
Gotten 7
# [...]
Gotten 4997
Joining Processes
Gotten 4998
Gotten 4999
JOININNG HERE
Flag change: False
Flag change: False
Flag change: False
Flag change: False
Flag change: False
Flag change: False
Flag change: False
Flag change: False
Exit code : None
Exit code : None
Exit code : None
Exit code : None
Exit code : None
Exit code : None
Exit code : None
Exit code : None
BREAKING DONE
1.4375360012054443
Alternatively, using a multiprocessing.Pool object:
def my_func(arg):
time.sleep(0.002)
return arg**2
def get_input():
for i in range(5000):
yield i
time.sleep(0.001)
if __name__=="__main__":
start = time.time()
mp_pool = mp.Pool()
result = mp_pool.map(my_func, get_input())
mp_pool.close()
mp_pool.join()
print(len(result))
print(f"Duration: {time.time()-start}")
Giving:
$ python ./test_mp.py
5000
Duration: 6.847279787063599

Python signals: ValueError: signal only works in main thread

I have this python code:
from pytchat import LiveChat
import time
import requests
import threading
video_id = None
liveChat = None
def runChat():
global liveChat
liveChat = LiveChat(video_id)
while liveChat.is_alive():
try:
chatdata = liveChat.get()
for c in chatdata.items:
print(f"{c.datetime} [{c.author.channelId}]- {c.message}")
chatdata.tick()
except KeyboardInterrupt:
liveChat.terminate()
break
def checkId():
threading.Timer(1.0, checkId).start()
global video_id
req = requests.get("http://localhost/getId")
if req.status_code == 200:
vidId = req.text
if vidId != "null" and vidId != video_id:
video_id = vidId
print(f"got id: {video_id}")
if liveChat is not None:
liveChat.terminate()
runChat()
def main():
checkId()
if __name__ == '__main__':
main()
That utilizes the pytchat library (https://github.com/taizan-hokuto/pytchat).
As you can see, I want to dynamically change the videoId that the chat is bundled to, but with the code above this is the result:
root#vps:~# python3 liveChat.py
got id: a10TAdVZmOq # First id, the library works, we can capture chat messages.
got id: NMre6IAPoLI # After second id appears, there is this exception:
Exception in thread Thread-52:
Traceback (most recent call last):
File "/usr/local/lib/python3.6/threading.py", line 916, in _bootstrap_inner
self.run()
File "/usr/local/lib/python3.6/threading.py", line 1182, in run
self.function(*self.args, **self.kwargs)
File "liveChat.py", line 36, in checkId
runChat()
File "liveChat.py", line 11, in runChat
liveChat = LiveChat(video_id)
File "/usr/local/lib/python3.6/site-packages/pytchat/core_multithread/livechat.py", line 110, in __init__
signal.signal(signal.SIGINT, lambda a, b: self.terminate())
File "/usr/local/lib/python3.6/signal.py", line 47, in signal
handler = _signal.signal(_enum_to_int(signalnum), _enum_to_int(handler))
ValueError: signal only works in main thread
How can this be solved since signals only work in main thread?
Adding the parameter interruptable=False to the LiveChat object solved the issue.
def runChat():
global liveChat
liveChat = LiveChat(video_id, interruptable=False) # <---THIS
while liveChat.is_alive():
# ....
https://github.com/taizan-hokuto/pytchat/issues/8

python multiprocessing process pool fails to find asynced function

Pretty simple multiprocessing example. Goals:
Create a pool of process workers using mp.Pool
Do some sort of transformation (here a simple string operation on line)
Push the transformed line to mp.Queue
Further process data from that mp.Queue in the main program afterwards
So lets do this:
import multiprocessing as mp
Init async processes with a mp.queue
def process_pool_init_per_process(q):
global mp_queue
mp_queue = q
Really init the mp_pool
no_of_processes = 4
q = mp.Queue()
mp_pool = mp.Pool(no_of_processes, process_pool_init_per_process, (q,))
This is getting called for every line to be proccesed async
def process_async_main(line):
print(line)
q.put(line + '_asynced')
And now let´s start it using apply_async
line = "Hi, this is a test to test mp_queues with mp process pools"
handler = mp_pool.apply_async(process_async_main, (line))
mp_resp = handler.get()
And read from the queue
while not q.empty():
print(q.get()) # This should be the inital line
Fails wih:
python3 mp_process_example.py
Process ForkPoolWorker-1:
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib/python3.6/multiprocessing/pool.py", line 108, in worker
task = get()
File "/usr/lib/python3.6/multiprocessing/queues.py", line 337, in get
return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'process_async_main' on <module '__main__' from 'mp_process_example.py'>
The question is: Why is multiprocessing not finding the main class?
Complete code to reproduce:
import multiprocessing as mp
##### Init async processes
def process_pool_init_per_process(q):
global mp_queue
mp_queue = q
# Really init the mp_pool
no_of_processes = 4
q = mp.Queue()
mp_pool = mp.Pool(no_of_processes, process_pool_init_per_process, (q,))
#This is getting called for every line to be proccesed async
def process_async_main(line):
print(line)
q.put(line + '_asynced')
line = "Hi, this is a test to test mp_queues with mp process pools"
handler = mp_pool.apply_async(process_async_main, (line))
mp_resp = handler.get()
while not q.empty():
print(q.get()) # This should be the inital line
Ok... I´ve got it... For some strange reason multiprocessing is not able to have the function to be asynced in the same file as the synchronized code.
Writing the code like this:
asynced.py
##### Init async processes
def process_pool_init_per_process(q):
global mp_queue
mp_queue = q
##### Function to be asycned
def process_async_main(line):
print(line)
mp_queue.put(line + '_asynced')
And than mp_process_example.py:
import multiprocessing as mp
from asynced import process_async_main, process_pool_init_per_process
# Really init the mp_pool
no_of_processes = 4
q = mp.Queue()
mp_pool = mp.Pool(no_of_processes, process_pool_init_per_process, (q,))
line = "Hi, this is a test to test mp_queues with mp process pools"
handler = mp_pool.apply_async(process_async_main, (line,))
mp_resp = handler.get()
while not q.empty():
print(q.get()) # This should be the inital line + "_asynced"
Works as expected:
$ python3 mp_process_example.py
Hi, this is a test to test mp_queues with mp process pools
Hi, this is a test to test mp_queues with mp process pools_asynced

django celery and asyncio - loop argument must agree with Future approx every 3 mins

Im using django celery and celery beat to run periodic tasks. I run a task every one minute to get some data via SNMP.
My function uses asyncio as per the below. I have put a check in the code to check if the loop is closed and to create a new one.
but what seems to be happening is every few tasks, I get an failure and in the Django-tasks-results db I have the below traceback. there seems to be a failure around every 3 minutes, but there are successes every minute there are not failures
Error:
Traceback (most recent call last):
File "/usr/local/lib/python3.6/site-packages/celery/app/trace.py", line 374, in trace_task
R = retval = fun(*args, **kwargs)
File "/usr/local/lib/python3.6/site-packages/celery/app/trace.py", line 629, in __protected_call__
return self.run(*args, **kwargs)
File "/itapp/itapp/monitoring/tasks.py", line 32, in link_data
return get_link_data()
File "/itapp/itapp/monitoring/jobs/link_monitoring.py", line 209, in get_link_data
done, pending = loop.run_until_complete(asyncio.wait(tasks))
File "/usr/local/lib/python3.6/asyncio/base_events.py", line 468, in run_until_complete
return future.result()
File "/usr/local/lib/python3.6/asyncio/tasks.py", line 311, in wait
fs = {ensure_future(f, loop=loop) for f in set(fs)}
File "/usr/local/lib/python3.6/asyncio/tasks.py", line 311, in <setcomp>
fs = {ensure_future(f, loop=loop) for f in set(fs)}
File "/usr/local/lib/python3.6/asyncio/tasks.py", line 514, in ensure_future
raise ValueError('loop argument must agree with Future')
ValueError: loop argument must agree with Future
Function:
async def retrieve_data(link):
poll_interval = 60
results = []
# credentials:
link_mgmt_ip = link.mgmt_ip
link_index = link.interface_index
snmp_user = link.device_circuit_subnet.device.snmp_data.name
snmp_auth = link.device_circuit_subnet.device.snmp_data.auth
snmp_priv = link.device_circuit_subnet.device.snmp_data.priv
hostname = link.device_circuit_subnet.device.hostname
print('polling data for {} on {}'.format(hostname,link_mgmt_ip))
# first poll for speeds
download_speed_data_poll1 = snmp_get(link_mgmt_ip, down_speed_oid % link_index ,snmp_user, snmp_auth, snmp_priv)
# check we were able to poll
if 'timeout' in str(get_snmp_value(download_speed_data_poll1)).lower():
return 'timeout trying to poll {} - {}'.format(hostname ,link_mgmt_ip)
upload_speed_data_poll1 = snmp_get(link_mgmt_ip, up_speed_oid % link_index, snmp_user, snmp_auth, snmp_priv)
# wait for poll interval
await asyncio.sleep(poll_interval)
# second poll for speeds
download_speed_data_poll2 = snmp_get(link_mgmt_ip, down_speed_oid % link_index, snmp_user, snmp_auth, snmp_priv)
upload_speed_data_poll2 = snmp_get(link_mgmt_ip, up_speed_oid % link_index, snmp_user, snmp_auth, snmp_priv)
# create deltas for speed
down_delta = int(get_snmp_value(download_speed_data_poll2)) - int(get_snmp_value(download_speed_data_poll1))
up_delta = int(get_snmp_value(upload_speed_data_poll2)) - int(get_snmp_value(upload_speed_data_poll1))
# set speed results
download_speed = round((down_delta * 8 / poll_interval) / 1048576)
upload_speed = round((up_delta * 8 / poll_interval) / 1048576)
# get description and interface state
int_desc = snmp_get(link_mgmt_ip, int_desc_oid % link_index, snmp_user, snmp_auth, snmp_priv)
int_state = snmp_get(link_mgmt_ip, int_state_oid % link_index, snmp_user, snmp_auth, snmp_priv)
...
return results
def get_link_data():
mgmt_ip = Subquery(
DeviceCircuitSubnets.objects.filter(device_id=OuterRef('device_circuit_subnet__device_id'),subnet__subnet_type__poll=True).values('subnet__subnet')[:1])
link_data = LinkTargets.objects.all() \
.select_related('device_circuit_subnet') \
.select_related('device_circuit_subnet__device') \
.select_related('device_circuit_subnet__device__snmp_data') \
.select_related('device_circuit_subnet__subnet') \
.select_related('device_circuit_subnet__circuit') \
.annotate(mgmt_ip=mgmt_ip)
tasks = []
loop = asyncio.get_event_loop()
if asyncio.get_event_loop().is_closed():
loop = asyncio.new_event_loop()
asyncio.set_event_loop(asyncio.new_event_loop())
for link in link_data:
tasks.append(asyncio.ensure_future(retrieve_data(link)))
if tasks:
start = time.time()
done, pending = loop.run_until_complete(asyncio.wait(tasks))
loop.close()
results = []
for completed_task in done:
results.append(completed_task.result()[0])
end = time.time()
print("Poll time: {}".format(end - start))
return 'Link data updated for {}'.format(' \n '.join(results))
else:
return 'no tasks defined'
from these urls suggested by user4815162342
https://medium.freecodecamp.org/a-guide-to-asynchronous-programming-in-python-with-asyncio-232e2afa44f6
When to use and when not to use Python 3.5 `await` ?
When running ascync functions, any input output operation needs to be async compatible, except functions that are run in memory. (in my example a regex query)
i.e any function that needs to gather data from another source (a django query in my example) which is not async compatible must be run in an executor.
I think I have now fixed my issues by running all django DB calls in executors, I have not had an issue running the script ad hoc since.
However I have compatibility issue with celery and async (as celery is not yet compatible with asyncio which throws up some errors, but not the errors I was previously seeing)

Categories

Resources