Correctly Implementing (concurrent) Asyncio for multiple functions - python

I have couple of functions and there execution is not dependent each other. What I am trying to do is execute them concurrently instead of sequentially (synchronous). I have added event loop as well, but I am not able to figure out if it is working correctly or not.
This is the implementation:
File 1:
import file2
def funcA():
a, b = 1, 2
file2.main(a, b)
File2:
def main(a, b):
asyncio.get_event_loop().run_until_complete(_main(a, b))
async def _main(a, b):
out1 = await funcA(a, b)
out2 = await funcB(a, b)
out3 = await funcC(a, b)
async def funcA(a, b):
result = 1 # some processing done here
return result
async def funcB(a, b):
result = 1 # some processing done here
return result
async def funcC(a, b):
result = 1 # some processing done here
return result
I am not able to figure out if these are working concurrently or not. I am adding time.sleep(10) in any function, executions stops there. I don't want them to run in background as I need output from those functions. Please help guys.

One way to do what you want would be to use asyncio.run() in main and then gather in the async version of main. To simulate long processing, use asyncio.sleep() See the following code:
import asyncio
def main(a, b):
res = asyncio.run(async_main(a, b))
print(f"in main, result is {res}")
async def funcA(a, b):
print('funcA - start')
await asyncio.sleep(3)
result = (a+b) # some processing done here
print('funcA - end')
return result
async def funcB(a, b):
print('funcB - start')
await asyncio.sleep(3)
result = (a+b)*2 # some processing done here
print('funcB - end')
return result
async def funcC(a, b):
print('funcC - start')
await asyncio.sleep(3)
result = (a+b)*3 # some processing done here
print('funcC - end')
return result
async def async_main(a, b):
print("in async_main")
res = await asyncio.gather(funcA(a, b), funcB(a, b), funcC(a, b))
print(f"in async_main, result is {res}")
return res
if __name__ == "__main__":
main(1, 2)
The result is:
in async_main
funcA - start
funcB - start
funcC - start
funcA - end
funcB - end
funcC - end
in async_main, result is [3, 6, 9]
in main, result is [3, 6, 9]

Related

Beginner async/await question for api requests

I want speed up some API requests... for that I try to figure out how to do and copy some code which run but when I try my own code its no longer asynchrone. Maybe someone find the fail?
Copy Code (guess from stackoverflow):
#!/usr/bin/env python3
import asyncio
#asyncio.coroutine
def func_normal():
print('A')
yield from asyncio.sleep(5)
print('B')
return 'saad'
#asyncio.coroutine
def func_infinite():
for i in range(10):
print("--%d" % i)
return 'saad2'
loop = asyncio.get_event_loop()
tasks = func_normal(), func_infinite()
a, b = loop.run_until_complete(asyncio.gather(*tasks))
print("func_normal()={a}, func_infinite()={b}".format(**vars()))
loop.close()
My "own" code (I need at the end a list returned and merge the results of all functions):
import asyncio
import time
#asyncio.coroutine
def say_after(start,count,say,yep=True):
retl = []
if yep:
time.sleep(5)
for x in range(start,count):
retl.append(x)
print(say)
return retl
def main():
print(f"started at {time.strftime('%X')}")
loop = asyncio.get_event_loop()
tasks = say_after(10,20,"a"), say_after(20,30,"b",False)
a, b = loop.run_until_complete(asyncio.gather(*tasks))
print("func_normal()={a}, func_infinite()={b}".format(**vars()))
loop.close()
c = a + b
#print(c)
print(f"finished at {time.strftime('%X')}")
main()
Or I m completly wrong and should solve that with multithreading? What would be the best way for API requests that returns a list that I need to merge?
Added comment for each section that needs improvement. Removed some to simply code.
In fact, I didn't find any performance uplift with using range() wrapped in coroutine and using async def, might worth with heavier operations.
import asyncio
import time
# #asyncio.coroutine IS DEPRECATED since python 3.8
#asyncio.coroutine
def say_after(wait=True):
result = []
if wait:
print("I'm sleeping!")
time.sleep(5)
print("'morning!")
# This BLOCKs thread, but release GIL so other thread can run.
# But asyncio runs in ONE thread, so this still harms simultaneity.
# normal for is BLOCKING operation.
for i in range(5):
result.append(i)
print(i, end='')
print()
return result
def main():
start = time.time()
# Loop argument will be DEPRECATED from python 3.10
# Make main() as coroutine, then use asyncio.run(main()).
# It will be in asyncio Event loop, without explicitly passing Loop.
loop = asyncio.get_event_loop()
tasks = say_after(), say_after(False)
# As we will use asyncio.run(main()) from now on, this should be await-ed.
a, b = loop.run_until_complete(asyncio.gather(*tasks))
print(f"Took {time.time() - start:5f}")
loop.close()
main()
Better way:
import asyncio
import time
async def say_after(wait=True):
result = []
if wait:
print("I'm sleeping!")
await asyncio.sleep(2) # 'await' a coroutine version of it instead.
print("'morning!")
# wrap iterator in generator - or coroutine
async def asynchronous_range(end):
for _i in range(end):
yield _i
# use it with async for
async for i in asynchronous_range(5):
result.append(i)
print(i, end='')
print()
return result
async def main():
start = time.time()
tasks = say_after(), say_after(False)
a, b = await asyncio.gather(*tasks)
print(f"Took {time.time() - start:5f}")
asyncio.run(main())
Result
Your code:
DeprecationWarning: "#coroutine" decorator is deprecated since Python 3.8, use "async def" instead
def say_after(wait=True):
I'm sleeping!
'morning!
01234
01234
Took 5.003802
Better async code:
I'm sleeping!
01234
'morning!
01234
Took 2.013863
Note that fixed code now finish it's job while other task is sleeping.

how to understand await in coroutines?

Following example shows we can run phase1 then run phase2. But what we wanted with coroutine is to do two things concurrently instead of one after another. I know if I use asyncio.get_event_loop.create_task can achieve what I want, but why use await? I think there is no difference between using await and just using the plain function.
import asyncio
async def outer():
print('in outer')
print('waiting for result1')
result1 = await phase1()
print('waiting for result2')
result2 = await phase2(result1)
return (result1, result2)
async def phase1():
print('in phase1')
return 'result1'
async def phase2(arg):
print('in phase2')
return 'result2 derived from {}'.format(arg)
event_loop = asyncio.get_event_loop()
try:
return_value = event_loop.run_until_complete(outer())
print('return value: {!r}'.format(return_value))
finally:
event_loop.close()

What's the equivalent in asyncio of gevent.sleep(0)

If I want some code to run later I can call gevent.sleep(0) to yield from current greenlet, how to handle it in asyncio?
e.g.
def spawn(f):
def wrapper(*args, **kwargs):
return gevent.spawn(f, *args, **kwargs)
return wrapper
#spawn
def f(a, b):
gevent.sleep(0) # gevent.idle()
print(a + b)
#spawn
def g():
print("hello")
f(1, 3)
f(4, 5)
g()
gevent.sleep(3)
"""
Expected:
hello
4
9
"""
of this case g will run ahead of f. In asyncio is there something similar?
The equivalent of gevent.sleep(time) would be await asyncio.sleep(time) in asyncio. If you call await asyncio.sleep(time) would sleep/block the caller task and if there are other tasks available they will run. After the declared time passes, caller task will be available for execution.
Example:
import asyncio
async def f():
await asyncio.sleep(2)
print('This is function f!')
async def g():
print("This is function g!")
async def main():
loop = asyncio.get_event_loop()
loop.create_task(f())
loop.create_task(g())
await asyncio.sleep(10)
asyncio.run(main())

How to yield from an async for loop using asyncio?

I'm trying to write a simple asynchronous data batch generator, but having troubles with understanding how to yield from an async for loop. Here I've written a simple class illustrating my idea:
import asyncio
from typing import List
class AsyncSimpleIterator:
def __init__(self, data: List[str], batch_size=None):
self.data = data
self.batch_size = batch_size
self.doc2index = self.get_doc_ids()
def get_doc_ids(self):
return list(range(len(self.data)))
async def get_batch_data(self, doc_ids):
print("get_batch_data() running")
page = [self.data[j] for j in doc_ids]
return page
async def get_docs(self, batch_size):
print("get_docs() running")
_batch_size = self.batch_size or batch_size
batches = [self.doc2index[i:i + _batch_size] for i in
range(0, len(self.doc2index), _batch_size)]
for _, doc_ids in enumerate(batches):
docs = await self.get_batch_data(doc_ids)
yield docs, doc_ids
async def main(self):
print("main() running")
async for res in self.get_docs(batch_size=2):
print(res) # how to yield instead of print?
def gen_batches(self):
# how to get results of self.main() here?
loop = asyncio.get_event_loop()
loop.run_until_complete(self.main())
loop.close()
DATA = ["Hello, world!"] * 4
iterator = AsyncSimpleIterator(DATA)
iterator.gen_batches()
So, my question is, how to yield a result from main() to gather it inside gen_batches()?
When I print the result inside main(), I get the following output:
main() running
get_docs() running
get_batch_data() running
(['Hello, world!', 'Hello, world!'], [0, 1])
get_batch_data() running
(['Hello, world!', 'Hello, world!'], [2, 3])
I'm trying to write a simple asynchronous data batch generator, but having troubles with understanding how to yield from an async for loop
Yielding from an async for works like a regular yield, except that it also has to be collected by an async for or equivalent. For example, the yield in get_docs makes it an async generator. If you replace print(res) with yield res in main(), it will make main() an async generator as well.
the generator in main() should exhaust in gen_batches(), so I can gather all results in gen_batches()
To collect the values produced by an async generator (such as main() with print(res) replaced with yield res), you can use a helper coroutine:
def gen_batches(self):
loop = asyncio.get_event_loop()
async def collect():
return [item async for item in self.main()]
items = loop.run_until_complete(collect())
loop.close()
return items
The collect() helper makes use of a PEP 530 asynchronous comprehension, which can be thought of as syntactic sugar for the more explicit:
async def collect():
l = []
async for item in self.main():
l.append(item)
return l
A working solution based on #user4815162342 answer to the original question:
import asyncio
from typing import List
class AsyncSimpleIterator:
def __init__(self, data: List[str], batch_size=None):
self.data = data
self.batch_size = batch_size
self.doc2index = self.get_doc_ids()
def get_doc_ids(self):
return list(range(len(self.data)))
async def get_batch_data(self, doc_ids):
print("get_batch_data() running")
page = [self.data[j] for j in doc_ids]
return page
async def get_docs(self, batch_size):
print("get_docs() running")
_batch_size = self.batch_size or batch_size
batches = [self.doc2index[i:i + _batch_size] for i in
range(0, len(self.doc2index), _batch_size)]
for _, doc_ids in enumerate(batches):
docs = await self.get_batch_data(doc_ids)
yield docs, doc_ids
def gen_batches(self):
loop = asyncio.get_event_loop()
async def collect():
return [j async for j in self.get_docs(batch_size=2)]
items = loop.run_until_complete(collect())
loop.close()
return items
DATA = ["Hello, world!"] * 4
iterator = AsyncSimpleIterator(DATA)
result = iterator.gen_batches()
print(result)

Parallelize nested functions in Python

As a Python beginner, I'm trying to parallelize some sections of a function that serves as an input to an optimization routine. This function f returns the log-likelihood, the gradient and the hessian for a given vector b. In this function there are three independent loop functions: loop_1, loop_2, and loop_3.
What is the most efficient implementation? Parallelizing the three loop functions in three concurrent processes or parallelizing one loop at a time? And how can this be implemented? When using the multiprocessing package I get a 'pickle' error, as my nested loop functions are not in the general namespace.
def f(b):
# Do something computational intensive on b
def calc(i, j):
return u, v, w
def loop_1():
for i in range(1:1000):
c, d, e = calc(i, 0)
for j in range(1:200):
f, g, h = calc(i, j)
return x, y, z
def loop_2():
# similar to loop_1
def loop_3():
# similar to loop_1
# Aggregate results from the three loops
return u, v, w
There are several ways to avoid the pickling error you receive.
An option could be asynchronous, if makes sense to do so. Sometimes it makes it slower, sometimes it makes it slower.
In that case it would look something like the code bellow, I use it as a templet when I forget things:
import asyncio
def f():
async def factorial(n):
f.p = 2
await asyncio.sleep(0.2)
return 1 if n < 2 else n * await factorial(n-1)
async def multiply(n, k):
await asyncio.sleep(0.2)
return sum(n for _ in range(k))
async def power(n, k):
await asyncio.sleep(0.2)
return await multiply(n, await power(n, k-1)) if k != 0 else 1
loop = asyncio.get_event_loop()
tasks = [asyncio.ensure_future(power(2, 5)),
asyncio.ensure_future(factorial(5))]
f.p = 0
ans = tuple(loop.run_until_complete(asyncio.gather(*tasks)))
print(f.p)
return ans
if __name__ == '__main__':
print(f())
Async and await is builtin keywords like def, for, in and such in python3.5.
Another work around with functions in functions is to use threads instead.
from concurrent.futures import ThreadPoolExecutor
import time
def f():
def factorial(n):
f.p = 2
time.sleep(0.2)
return 1 if n < 2 else n*factorial(n-1)
def multiply(n, k):
time.sleep(0.2)
return sum(n for _ in range(k))
def power(n, k):
time.sleep(0.2)
return multiply(n, power(n, k-1)) if k != 0 else 1
def calculate(func, args):
return func(*args)
def calculate_star(args):
return calculate(*args)
pool = ThreadPoolExecutor()
tasks = [(power, (2, 5)), (factorial, (5, ))]
f.p = 0
result = list(pool.map(calculate_star, tasks))
print(f.p)
return result
if __name__ == '__main__':
print(f())
You should start your function in a pool of process.
import multiprocessing
pool = multiprocessing.Pool()
for i in range(3):
if i == 0:
pool.apply_async(loop_1)
elif i == 1:
pool.apply_async(loop_2)
if i == 2:
pool.apply_async(loop_3)
pool.close()
If loop_1,loop_2 and loop_3 are the same functions with same operations you simply can call loop_3 three times.

Categories

Resources