How to correctly lock async generators? - python

I am trying to use an async generator as a wrapper for a shared connection
async def mygen():
await init()
connection = await open_connection()
while True:
data = yield
await connection.send(data)
shared_gen = None
async def send_data(data):
global shared_gen
if not shared_gen:
shared_gen = mygen()
await shared_gen.asend(None)
await shared_gen.asend(data)
Is the above code safe from race conditions? Is it possible for two asends to execute concurrently or the second one will block implicitly until the generator is ready in the yield step? Assume connection.send is not concurrency safe.
Update:
Wrote a wrapper to help use safely.
class Locked:
def __init__(self, resource):
self._resource = resource
self._lock = asyncio.Lock()
#contextlib.asynccontextmanager
async def lock(self):
async with self._lock:
yield self._resource
async def send_data(locked_gen, data):
async with locked_gen.lock() as gen:
await gen.asend(data)
async def main():
gen = mygen()
await gen.asend(None)
locked_gen = Locked(gen)
...

Is it possible for two asends to execute concurrently or the second one will block implicitly until the generator is ready in the yield step?
It is not possible for asend to be called concurrently, but trying to do so doesn't result in blocking. Instead, the second one will raise a RuntimeError, as demonstrated by the following example:
import asyncio
async def gen():
while True:
yield
await asyncio.sleep(1)
async def main():
ait = gen()
await ait.asend(None) # start the generator
async def send():
print('sending')
await ait.asend(42)
await asyncio.gather(send(), send())
asyncio.run(main())
To make the send block until the previous one finishes, you need an explicit lock around the await of asend:
async def main():
ait = gen()
await ait.asend(None)
lock = asyncio.Lock()
async def send():
async with lock:
print('sending')
await ait.asend(42)
await asyncio.gather(send(), send())

Related

How to execute async request with up to K workers without code duplication?

I have a project with many async functions (they do http call to a remote server).
The server can handle up to k simultaneous request (it depends of which request i do)
I want to be able to easily implement "do this N requests with up to K workers". I could not find a way without either duplicating code or loosing type hints.
How to do it ?
I tried to use Semaphore but failed to use it, mainly because I could not find a way to move this logic at the caller scope.
implement logic on caller scope (not working)
import asyncio
import time
async def sleep(s: int):
await asyncio.sleep(s)
print(f'task done in {s}s')
return s
sem = asyncio.Semaphore(2)
# Expect to ends in 8 seconds, ends in 6 (not the correct behavior)
start = time.time()
async with sem:
res = await asyncio.gather(*[
sleep(6),
sleep(2),
sleep(2),
sleep(2),
sleep(2),
sleep(2),
])
end = time.time()
print(f"ended in {end-start}s")
Refactoring async functions (code duplication)
import asyncio
import time
from typing import Optional
async def sleep(s: int):
await asyncio.sleep(s)
async def request_0(arg0: int, sem: Optional[asyncio.Semaphore]=None):
if sem is not None:
async with sem:
await sleep(2) # Do http call 0
else:
await sleep(2) # Do http call 0
async def request_1(arg1: str, sem: Optional[asyncio.Semaphore]=None):
if sem is not None:
async with sem:
await sleep(1) # Do http call 1
else:
await sleep(1) # Do http call 1
async def request_2(arg2: float, arg3: str, sem: Optional[asyncio.Semaphore]=None):
if sem is not None:
async with sem:
await sleep(1) # Do http call 2
else:
await sleep(1) # Do http call 2
start = time.time()
async with asyncio.Semaphore(2) as sem:
res = await asyncio.gather(*[
request_0(arg0=0),
request_1(arg1='0'),
request_2(arg2=0, arg3='0'),
])
end = time.time()
print(f"ended in {end-start}s")
Enhance functions with annotation (losing type hints)
# type: ignore
import asyncio
import time
from typing import Callable, Coroutine, Optional, Any
async def sleep(s: int):
await asyncio.sleep(s)
def semaphoreUseable(func):
async def wrapper(*args, sem=Optional[asyncio.Semaphore], **kwargs):
if sem is not None:
async with sem:
return await func(*args, **kwargs)
else:
return await func(*args, **kwargs)
return wrapper
#semaphoreUseable
async def request_0(arg0: int):
await sleep(2) # http call 0
#semaphoreUseable
async def request_1(arg1: str):
await sleep(1) # http call 1
#semaphoreUseable
async def request_2(arg2: float, arg3: str):
await sleep(1) # http call 2
async def main():
start = time.time()
async with asyncio.Semaphore(2) as sem:
res = await asyncio.gather(*[
request_0(arg0=0, sem=sem),
request_1(arg1='0', sem=sem),
request_2(arg2=0, arg3='0', sem=sem),
])
end = time.time()
print(f"ended in {end-start}s")
if __name__ == "__main__":
asyncio.run(main())
related question: How to type a function with Callable without losing keyword argument? (could not find a way to type the annotation solution)

Asynchronous, Multiple HTTP requests in a While Loop

The code below is intended to send multiple HTTP requests asynchronously in a while loop, and depending on the response from each request(request "X" always returns "XXX", "Y" always returns "YYY" and so on), do something and sleep for interval seconds specified for each request.
However, it throws an error...
RuntimeError: cannot reuse already awaited coroutine
Could anyone help me how I could fix the code to realise the intended behaviour?
class Client:
def __init__(self):
pass
async def run_forever(self, coro, interval):
while True:
res = await coro
await self._onresponse(res, interval)
async def _onresponse(self, res, interval):
if res == "XXX":
# ... do something with the resonse ...
await asyncio.sleep(interval)
if res == "YYY":
# ... do something with the resonse ...
await asyncio.sleep(interval)
if res == "ZZZ":
# ... do something with the resonse ...
await asyncio.sleep(interval)
async def request(something):
# ... HTTP request using aiohttp library ...
return response
async def main():
c = Client()
await c.run_forever(request("X"), interval=1)
await c.run_forever(request("Y"), interval=2)
await c.run_forever(request("Z"), interval=3)
# ... and more
As the error says, you can't await a coroutine more than once. Instead of passing a coroutine into run_forever and then awaiting it in a loop, passing the coroutine's argument(s) instead and await a new coroutine each iteration of the loop.
class Client:
async def run_forever(self, value, interval):
while True:
res = await rqequest(value)
await self._response(response, interval)
You also need to change how you await run_forever. await is blocking, so when you await something with an infinite loop, you'll never reach the next line. Instead, you want to gather multiple coroutines as once.
async def main():
c = Client()
await asyncio.gather(
c.run_forever("X", interval=1),
c.run_forever("Y", interval=2),
c.run_forever("Z", interval=3),
)

yield object inside asyncio function

I'm trying to work with asyncio I tried this code where I process a list of elements ad I print the state of It (element) (working)
problem: how can I yield the element(object) when I do this I have this error object async_generator can't be used in 'await' expression
import asyncio, random
async def process_element(element):
print('starting', element)
await asyncio.sleep(random.random()) # simulate IO-bound processing
print('done', element)
async def do_stuff(q):
while not q.empty():
value = await q.get()
await process_element(element=value)
q.task_done()
async def main():
jobs = asyncio.Queue()
for i in range(20):
await jobs.put(i)
for i in range(5):
asyncio.create_task(do_stuff(jobs))
await jobs.join()
asyncio.run(main())
It is just a matter of receiving whatever you want to yield using an async for instead of a plain await:
import asyncio, random
async def process_element(element):
print('starting', element)
await asyncio.sleep(random.random()) # simulate IO-bound processing
yield element
print('done', element)
async def do_stuff(q):
while not q.empty():
value = await q.get()
async for response in process_element(value):
print(f"process yield element: {response}")
q.task_done()
async def main():
jobs = asyncio.Queue()
for i in range(20):
await jobs.put(i)
for i in range(5):
asyncio.create_task(do_stuff(jobs))
await jobs.join()
asyncio.run(main())
If for some reason you don't want to use "async for", you can call
the methods __anext__ and asend on the async generator object
(which is what is returned by calling process_element after it contains an yield keyword).
Both .__anext__ and .asend have to be awaited, and will throw StopAsyncIteration when the generator is exhausted (in contrast with StopIteration for non-async generators).

how to understand await in coroutines?

Following example shows we can run phase1 then run phase2. But what we wanted with coroutine is to do two things concurrently instead of one after another. I know if I use asyncio.get_event_loop.create_task can achieve what I want, but why use await? I think there is no difference between using await and just using the plain function.
import asyncio
async def outer():
print('in outer')
print('waiting for result1')
result1 = await phase1()
print('waiting for result2')
result2 = await phase2(result1)
return (result1, result2)
async def phase1():
print('in phase1')
return 'result1'
async def phase2(arg):
print('in phase2')
return 'result2 derived from {}'.format(arg)
event_loop = asyncio.get_event_loop()
try:
return_value = event_loop.run_until_complete(outer())
print('return value: {!r}'.format(return_value))
finally:
event_loop.close()

Asyncio python - TypeError: A Future, a coroutine or an awaitable is required

One of the async function returns the async generator object. I added loop.run_until_complete(func()), but still, it throws the error as "TypeError: A Future, a coroutine or an awaitable is required". Below is the code. I'm trying to fetch the records from Neo4j asynchronously. I got the async "Neo4j class from a GitHub. I'm new to this async concept.
from concurrent import futures
import neo4j
from neo4j import GraphDatabase, basic_auth
import time
import traceback
import asyncio
RETRY_WAITS = [0, 1, 4] # How long to wait after each successive failure.
class Neo4j:
"""Neo4j database API."""
def __init__(self, config, loop):
self.config = config
self.loop = loop
self.executor = futures.ThreadPoolExecutor(max_workers=30)
for retry_wait in RETRY_WAITS:
try:
self.init_driver()
break
except:
if retry_wait == RETRY_WAITS[-1]:
raise
else:
print('WARNING: retrying to Init DB; err:')
traceback.print_exc()
time.sleep(retry_wait) # wait for 0, 1, 3... seconds.
def init_driver(self):
auth = basic_auth(self.config['user'], self.config['pass'])
self.driver = GraphDatabase.driver(self.config['url'], auth=auth)
async def afetch_start(self, query):
session = self.driver.session(access_mode=neo4j.READ_ACCESS)
def run():
return session.run(query).records()
return session, await self.loop.run_in_executor(self.executor, run)
async def afetch_iterate(self, session, iter):
def iterate():
try:
return next(iter)
except StopIteration:
return None
while True:
res = await self.loop.run_in_executor(self.executor, iterate)
if res is None:
return
else:
yield dict(res)
async def afetch(self, query):
for retry_wait in RETRY_WAITS:
try:
session, iter = await self.afetch_start(query)
break
except (BrokenPipeError, neo4j.exceptions.ServiceUnavailable) as e:
if retry_wait == RETRY_WAITS[-1]:
raise
else:
await asyncio.sleep(retry_wait)
await self.loop.run_in_executor(self.executor, self.init_driver)
async for x in self.afetch_iterate(session, iter):
yield x
await self.loop.run_in_executor(self.executor, session.close)
async def afetch_one(self, query):
async for i in self.afetch(query):
return i
return None
async def aexec(self, query):
async for i in self.afetch(query):
pass
return
config={'url':"bolt://localhost",'user':'neo4j','pass':'pwd'}
loop=asyncio.get_event_loop()
n=Neo4j(config,loop)
loop.run_until_complete(n.afetch("MATCH(p:Person)-[:Acted_in]->(mv:Movies) RETURN p.name as actors"))
loop.close()
--EDIT
I have modified the code to work properly. The query returns 218K rows and it takes 5 minutes to extract the complete list and the same async operation in C# completes in just 2 sec. Looks like the above code still doesnt go in async
It's very hard to tell what exactly happens without reproducible example, but I'll take a guess. You probably pass async generator object in a loop, you shouldn't do it. A way to work with async generators is to use async for. Here's example:
import asyncio
async def func(): # async generator
yield 1
yield 2
yield 3
async def main():
async for i in func(): # get values from async generator
print(i)
asyncio.run(main()) # can be used instead of loop.run_until_complete(main())

Categories

Resources