aiohttp ClientSession.get() async contextmanager work unexpected - python

import asyncio
from aiohttp import ClientSession
async def download(session: ClientSession, link: str):
print("init")
async with session.get(link) as resp:
print("hello")
data = await resp.content.read()
print("bye")
return
async def amain():
link = "https://www.google.com/"
async with ClientSession() as session:
tasks = []
for _ in range(100_000):
tasks.append(asyncio.create_task(download(session, link)))
await asyncio.sleep(0) # <------------ Label 1
await asyncio.gather(*tasks)
if __name__ == '__main__':
asyncio.run(amain())
If u comment "Label 1" line, u got unexpected output like:
init
init
...
...
init
and then mixed 'hello' and 'bye' as expected.
but if that line uncommented we got 'init' 'hello' 'bye' mixed as u expected from async tasks.
Can anybody explain me, why async with session.get(link) as resp: block task until all tasks created, if they created without await asyncio.sleep(0)?
↓↓↓ works unexpected too:
async def amain():
link = "https://www.google.com/"
async with ClientSession() as session:
await asyncio.gather(*(download(session, link) for _ in range(100_000)))

Related

Aggregation of 2 RabbitMQ messages does not work properly (messages hanging unacked)

I need to listen tasks on 2 queues, so I wrote the code below, but it has a problem. Currently it behaves like this: if the code started when 2 queues were full, it works great. But if queues were empty one of them was, the code reads messages, but does not proccess them (does not send ack, does not do the logic). But the messages became unacked, until I stop the code. I do not see any reason to be them unacked and unprocessed.
I can't understand what is wrong with the code? May be there is another way to aggregate 2 or more queues like this?
# task_processor.py
from aio_pika import IncomingMessage
class TaskProcessor:
MAX_TASKS_PER_INSTANCE = 1
def __init__(self):
self._tasks = []
def can_accept_new_task(self) -> bool:
return len(self._tasks) < self.MAX_TASKS_PER_INSTANCE
async def process(self, message: IncomingMessage):
self._tasks.append(message)
print(message.body)
await message.ack()
self._tasks.pop()
# main.py
import asyncio
from asyncio import QueueEmpty
from typing import Callable
import aio_pika
from aio_pika import RobustQueue
from dotenv import load_dotenv
load_dotenv()
from core.logger.logger import logger
from core.services.rabbitmq.task_processor.task_processor import TaskProcessor
async def get_single_task(queue: RobustQueue):
while True:
try:
msg = await queue.get(timeout=3600)
return msg
except QueueEmpty:
await asyncio.sleep(3)
except asyncio.exceptions.TimeoutError:
logger.warning('queue timeout error')
pass
except Exception as ex:
logger.error(f"{queue} errored", exc_info=ex)
async def task_aggregator(queue1: RobustQueue, queue2: RobustQueue, should_take_new_task_cb: Callable):
while True:
if should_take_new_task_cb():
queue2, queue1 = queue1, queue2
gen1 = get_single_task(queue1)
gen2 = get_single_task(queue2)
done, _ = await asyncio.wait([gen1, gen2], return_when=asyncio.FIRST_COMPLETED)
for item in done:
result = item.result()
yield result
else:
await asyncio.sleep(1)
async def tasks(queue1: RobustQueue, queue2: RobustQueue, should_take_new_task_cb: Callable):
async for task in task_aggregator(queue1, queue2, should_take_new_task_cb):
yield task
async def main():
connection = await aio_pika.connect_robust(
f"amqp://user:password#host:port/vhost?heartbeat={180}"
)
channel1 = connection.channel()
channel2 = connection.channel()
await channel1.initialize()
await channel2.initialize()
queue1 = await channel1.get_queue('queue1')
queue2 = await channel2.get_queue('queue2')
task_processor = TaskProcessor()
task_generator = tasks(queue1, queue2, task_processor.can_accept_new_task)
while True:
if task_processor.can_accept_new_task():
task = await anext(task_generator)
await task_processor.process(task)
else:
await asyncio.sleep(1)
if __name__ == '__main__':
asyncio.run(main())

How to execute async request with up to K workers without code duplication?

I have a project with many async functions (they do http call to a remote server).
The server can handle up to k simultaneous request (it depends of which request i do)
I want to be able to easily implement "do this N requests with up to K workers". I could not find a way without either duplicating code or loosing type hints.
How to do it ?
I tried to use Semaphore but failed to use it, mainly because I could not find a way to move this logic at the caller scope.
implement logic on caller scope (not working)
import asyncio
import time
async def sleep(s: int):
await asyncio.sleep(s)
print(f'task done in {s}s')
return s
sem = asyncio.Semaphore(2)
# Expect to ends in 8 seconds, ends in 6 (not the correct behavior)
start = time.time()
async with sem:
res = await asyncio.gather(*[
sleep(6),
sleep(2),
sleep(2),
sleep(2),
sleep(2),
sleep(2),
])
end = time.time()
print(f"ended in {end-start}s")
Refactoring async functions (code duplication)
import asyncio
import time
from typing import Optional
async def sleep(s: int):
await asyncio.sleep(s)
async def request_0(arg0: int, sem: Optional[asyncio.Semaphore]=None):
if sem is not None:
async with sem:
await sleep(2) # Do http call 0
else:
await sleep(2) # Do http call 0
async def request_1(arg1: str, sem: Optional[asyncio.Semaphore]=None):
if sem is not None:
async with sem:
await sleep(1) # Do http call 1
else:
await sleep(1) # Do http call 1
async def request_2(arg2: float, arg3: str, sem: Optional[asyncio.Semaphore]=None):
if sem is not None:
async with sem:
await sleep(1) # Do http call 2
else:
await sleep(1) # Do http call 2
start = time.time()
async with asyncio.Semaphore(2) as sem:
res = await asyncio.gather(*[
request_0(arg0=0),
request_1(arg1='0'),
request_2(arg2=0, arg3='0'),
])
end = time.time()
print(f"ended in {end-start}s")
Enhance functions with annotation (losing type hints)
# type: ignore
import asyncio
import time
from typing import Callable, Coroutine, Optional, Any
async def sleep(s: int):
await asyncio.sleep(s)
def semaphoreUseable(func):
async def wrapper(*args, sem=Optional[asyncio.Semaphore], **kwargs):
if sem is not None:
async with sem:
return await func(*args, **kwargs)
else:
return await func(*args, **kwargs)
return wrapper
#semaphoreUseable
async def request_0(arg0: int):
await sleep(2) # http call 0
#semaphoreUseable
async def request_1(arg1: str):
await sleep(1) # http call 1
#semaphoreUseable
async def request_2(arg2: float, arg3: str):
await sleep(1) # http call 2
async def main():
start = time.time()
async with asyncio.Semaphore(2) as sem:
res = await asyncio.gather(*[
request_0(arg0=0, sem=sem),
request_1(arg1='0', sem=sem),
request_2(arg2=0, arg3='0', sem=sem),
])
end = time.time()
print(f"ended in {end-start}s")
if __name__ == "__main__":
asyncio.run(main())
related question: How to type a function with Callable without losing keyword argument? (could not find a way to type the annotation solution)

How to correctly lock async generators?

I am trying to use an async generator as a wrapper for a shared connection
async def mygen():
await init()
connection = await open_connection()
while True:
data = yield
await connection.send(data)
shared_gen = None
async def send_data(data):
global shared_gen
if not shared_gen:
shared_gen = mygen()
await shared_gen.asend(None)
await shared_gen.asend(data)
Is the above code safe from race conditions? Is it possible for two asends to execute concurrently or the second one will block implicitly until the generator is ready in the yield step? Assume connection.send is not concurrency safe.
Update:
Wrote a wrapper to help use safely.
class Locked:
def __init__(self, resource):
self._resource = resource
self._lock = asyncio.Lock()
#contextlib.asynccontextmanager
async def lock(self):
async with self._lock:
yield self._resource
async def send_data(locked_gen, data):
async with locked_gen.lock() as gen:
await gen.asend(data)
async def main():
gen = mygen()
await gen.asend(None)
locked_gen = Locked(gen)
...
Is it possible for two asends to execute concurrently or the second one will block implicitly until the generator is ready in the yield step?
It is not possible for asend to be called concurrently, but trying to do so doesn't result in blocking. Instead, the second one will raise a RuntimeError, as demonstrated by the following example:
import asyncio
async def gen():
while True:
yield
await asyncio.sleep(1)
async def main():
ait = gen()
await ait.asend(None) # start the generator
async def send():
print('sending')
await ait.asend(42)
await asyncio.gather(send(), send())
asyncio.run(main())
To make the send block until the previous one finishes, you need an explicit lock around the await of asend:
async def main():
ait = gen()
await ait.asend(None)
lock = asyncio.Lock()
async def send():
async with lock:
print('sending')
await ait.asend(42)
await asyncio.gather(send(), send())

Asyncio python - TypeError: A Future, a coroutine or an awaitable is required

One of the async function returns the async generator object. I added loop.run_until_complete(func()), but still, it throws the error as "TypeError: A Future, a coroutine or an awaitable is required". Below is the code. I'm trying to fetch the records from Neo4j asynchronously. I got the async "Neo4j class from a GitHub. I'm new to this async concept.
from concurrent import futures
import neo4j
from neo4j import GraphDatabase, basic_auth
import time
import traceback
import asyncio
RETRY_WAITS = [0, 1, 4] # How long to wait after each successive failure.
class Neo4j:
"""Neo4j database API."""
def __init__(self, config, loop):
self.config = config
self.loop = loop
self.executor = futures.ThreadPoolExecutor(max_workers=30)
for retry_wait in RETRY_WAITS:
try:
self.init_driver()
break
except:
if retry_wait == RETRY_WAITS[-1]:
raise
else:
print('WARNING: retrying to Init DB; err:')
traceback.print_exc()
time.sleep(retry_wait) # wait for 0, 1, 3... seconds.
def init_driver(self):
auth = basic_auth(self.config['user'], self.config['pass'])
self.driver = GraphDatabase.driver(self.config['url'], auth=auth)
async def afetch_start(self, query):
session = self.driver.session(access_mode=neo4j.READ_ACCESS)
def run():
return session.run(query).records()
return session, await self.loop.run_in_executor(self.executor, run)
async def afetch_iterate(self, session, iter):
def iterate():
try:
return next(iter)
except StopIteration:
return None
while True:
res = await self.loop.run_in_executor(self.executor, iterate)
if res is None:
return
else:
yield dict(res)
async def afetch(self, query):
for retry_wait in RETRY_WAITS:
try:
session, iter = await self.afetch_start(query)
break
except (BrokenPipeError, neo4j.exceptions.ServiceUnavailable) as e:
if retry_wait == RETRY_WAITS[-1]:
raise
else:
await asyncio.sleep(retry_wait)
await self.loop.run_in_executor(self.executor, self.init_driver)
async for x in self.afetch_iterate(session, iter):
yield x
await self.loop.run_in_executor(self.executor, session.close)
async def afetch_one(self, query):
async for i in self.afetch(query):
return i
return None
async def aexec(self, query):
async for i in self.afetch(query):
pass
return
config={'url':"bolt://localhost",'user':'neo4j','pass':'pwd'}
loop=asyncio.get_event_loop()
n=Neo4j(config,loop)
loop.run_until_complete(n.afetch("MATCH(p:Person)-[:Acted_in]->(mv:Movies) RETURN p.name as actors"))
loop.close()
--EDIT
I have modified the code to work properly. The query returns 218K rows and it takes 5 minutes to extract the complete list and the same async operation in C# completes in just 2 sec. Looks like the above code still doesnt go in async
It's very hard to tell what exactly happens without reproducible example, but I'll take a guess. You probably pass async generator object in a loop, you shouldn't do it. A way to work with async generators is to use async for. Here's example:
import asyncio
async def func(): # async generator
yield 1
yield 2
yield 3
async def main():
async for i in func(): # get values from async generator
print(i)
asyncio.run(main()) # can be used instead of loop.run_until_complete(main())

How to call a async function contained in a class?

Based on this answer I want to build an async websoket client in a class which would be imported from another file:
#!/usr/bin/env python3
import sys, json
import asyncio
from websockets import connect
class EchoWebsocket:
def __await__(self):
# see: https://stackoverflow.com/a/33420721/1113207
return self._async_init().__await__()
async def _async_init(self):
self._conn = connect('wss://ws.binaryws.com/websockets/v3')
self.websocket = await self._conn.__aenter__()
return self
async def close(self):
await self._conn.__aexit__(*sys.exc_info())
async def send(self, message):
await self.websocket.send(message)
async def receive(self):
return await self.websocket.recv()
class mtest:
async def start(self):
try:
self.wws = await EchoWebsocket()
finally:
await self.wws.close()
async def get_ticks(self):
await self.wws.send(json.dumps({'ticks_history': 'R_50', 'end': 'latest', 'count': 1}))
return await self.wws.receive()
if __name__ == '__main__':
a = mtest()
loop = asyncio.get_event_loop()
loop.run_until_complete(a.start())
And I import it in main.py, where I have the following:
from testws import *
a = mtest()
print (a.get_ticks())
print ("this will be printed after the ticks")
But it retrieves me the following error:
root#ubupc1:/home/dinocob# python3 test.py
<coroutine object hello.get_ticks at 0x7f13190a9200>
test.py:42: RuntimeWarning: coroutine 'mtest.get_ticks' was never awaited
print (a.get_ticks())
this will be printed after the ticks
What's is going on here? Why I'm not able to access mtest.get_ticks if it has the async word at the begining of def?
Finally I could find the right way to do it (special thanks to #dirn)
#!/usr/bin/env python3
import sys, json
import asyncio
from websockets import connect
class EchoWebsocket:
async def __aenter__(self):
self._conn = connect('wss://ws.binaryws.com/websockets/v3')
self.websocket = await self._conn.__aenter__()
return self
async def __aexit__(self, *args, **kwargs):
await self._conn.__aexit__(*args, **kwargs)
async def send(self, message):
await self.websocket.send(message)
async def receive(self):
return await self.websocket.recv()
class mtest:
def __init__(self):
self.wws = EchoWebsocket()
self.loop = asyncio.get_event_loop()
def get_ticks(self):
return self.loop.run_until_complete(self.__async__get_ticks())
async def __async__get_ticks(self):
async with self.wws as echo:
await echo.send(json.dumps({'ticks_history': 'R_50', 'end': 'latest', 'count': 1}))
return await echo.receive()
And this in main.py:
from testws import *
a = mtest()
foo = a.get_ticks()
print (foo)
print ("async works like a charm!")
foo = a.get_ticks()
print (foo)
This is the output:
root#ubupc1:/home/dinocob# python3 test.py
{"count": 1, "end": "latest", "ticks_history": "R_50"}
async works like a charm!
{"count": 1, "end": "latest", "ticks_history": "R_50"}
Any tip to improve it is welcomed! ;)
Your question and answer are great!
They helped me a lot!
Based on your code I was able to create the following class,
better matching my need:
import asyncio
from websockets import connect
class TestClient:
def __init__(self, URL):
self.URL = URL
self.conn = None
self.loop = asyncio.get_event_loop()
async def send(self, message):
if self.conn == None:
self.conn = await connect(self.URL)
await self.conn.send(message)
async def receive(self):
return await self.conn.recv()
def ping(self):
return self.loop.run_until_complete(self._ping())
async def _ping(self):
await self.send("Hello World")
return await self.receive()
test = TestClient("wss://echo.websocket.org")
print(test.ping())

Categories

Resources