Using Python 3.6 and asyncio and aiohttp I wrote a simple async program:
from aiohttp import ClientSession
import asyncio, ssl, time
base_url = 'https://my-base-url.com/api'
async def fetch(session, id):
query_params = {'qp1':'v1','qp2':'v2', 'id': id}
async with session.get(base_url, params=query_params, ssl=ssl.SSLContext()) as response:
res_json = await response.json()
if response.status == 200:
time.sleep(2)
min_rating = res_json.get('minRating')
max_rating = res_json.get('maxRating')
print("id = %s, min = %s, max = %s" % (id, min_rating, max_rating))
async def run(ids):
tasks = []
async with ClientSession() as session:
for id in ids:
task = asyncio.ensure_future(fetch(session, id))
tasks.append(task)
responses = await asyncio.gather(*tasks)
return responses
if __name__ == '__main__':
ids = [123, 456, 789]
future = asyncio.ensure_future(run(ids))
event_loop = asyncio.get_event_loop()
event_loop.run_until_complete(future)
print("\n\ndone")
The time.sleep(2) inside fetch(session, id) makes it seem like this program is not asynchronous because it gets one response, sleeps, gets another, sleeps, so on and so forth. When I remove the sleep call, it does seem to be async/concurrent because the responses come back in a random order. What is sleep doing in this case? Is it locking all threads? Why does it appear to be sequential instead of parallel?
time.sleep(2) is a synchronous (blocking) call hence you are stopping the asynchronous call with it, you should use await asyncio.sleep(2) which it will "liberate" the resource.
Related
I have one DB connection and many сoroutines to request data.
I make the minimal concept, and need help with correct understanding the way of realization.
import asyncio
db_queeu = asyncio.Queue()
async def db_pipe():
while True:
data = await db_queeu.get()
print("DB got", data)
# here process data and return result to requested exec_in_db
async def exec_in_db(query, timeout):
await asyncio.sleep(timeout)
await db_queeu.put(query)
# here I want got result from db_pipe
async def main():
asyncio.create_task(db_pipe())
await asyncio.gather(exec_in_db("Loong query", 4), exec_in_db("Fast query", 1))
print("Listener starts")
if __name__ == "__main__":
asyncio.run(main())
I need to listen tasks on 2 queues, so I wrote the code below, but it has a problem. Currently it behaves like this: if the code started when 2 queues were full, it works great. But if queues were empty one of them was, the code reads messages, but does not proccess them (does not send ack, does not do the logic). But the messages became unacked, until I stop the code. I do not see any reason to be them unacked and unprocessed.
I can't understand what is wrong with the code? May be there is another way to aggregate 2 or more queues like this?
# task_processor.py
from aio_pika import IncomingMessage
class TaskProcessor:
MAX_TASKS_PER_INSTANCE = 1
def __init__(self):
self._tasks = []
def can_accept_new_task(self) -> bool:
return len(self._tasks) < self.MAX_TASKS_PER_INSTANCE
async def process(self, message: IncomingMessage):
self._tasks.append(message)
print(message.body)
await message.ack()
self._tasks.pop()
# main.py
import asyncio
from asyncio import QueueEmpty
from typing import Callable
import aio_pika
from aio_pika import RobustQueue
from dotenv import load_dotenv
load_dotenv()
from core.logger.logger import logger
from core.services.rabbitmq.task_processor.task_processor import TaskProcessor
async def get_single_task(queue: RobustQueue):
while True:
try:
msg = await queue.get(timeout=3600)
return msg
except QueueEmpty:
await asyncio.sleep(3)
except asyncio.exceptions.TimeoutError:
logger.warning('queue timeout error')
pass
except Exception as ex:
logger.error(f"{queue} errored", exc_info=ex)
async def task_aggregator(queue1: RobustQueue, queue2: RobustQueue, should_take_new_task_cb: Callable):
while True:
if should_take_new_task_cb():
queue2, queue1 = queue1, queue2
gen1 = get_single_task(queue1)
gen2 = get_single_task(queue2)
done, _ = await asyncio.wait([gen1, gen2], return_when=asyncio.FIRST_COMPLETED)
for item in done:
result = item.result()
yield result
else:
await asyncio.sleep(1)
async def tasks(queue1: RobustQueue, queue2: RobustQueue, should_take_new_task_cb: Callable):
async for task in task_aggregator(queue1, queue2, should_take_new_task_cb):
yield task
async def main():
connection = await aio_pika.connect_robust(
f"amqp://user:password#host:port/vhost?heartbeat={180}"
)
channel1 = connection.channel()
channel2 = connection.channel()
await channel1.initialize()
await channel2.initialize()
queue1 = await channel1.get_queue('queue1')
queue2 = await channel2.get_queue('queue2')
task_processor = TaskProcessor()
task_generator = tasks(queue1, queue2, task_processor.can_accept_new_task)
while True:
if task_processor.can_accept_new_task():
task = await anext(task_generator)
await task_processor.process(task)
else:
await asyncio.sleep(1)
if __name__ == '__main__':
asyncio.run(main())
The code below is intended to send multiple HTTP requests asynchronously in a while loop, and depending on the response from each request(request "X" always returns "XXX", "Y" always returns "YYY" and so on), do something and sleep for interval seconds specified for each request.
However, it throws an error...
RuntimeError: cannot reuse already awaited coroutine
Could anyone help me how I could fix the code to realise the intended behaviour?
class Client:
def __init__(self):
pass
async def run_forever(self, coro, interval):
while True:
res = await coro
await self._onresponse(res, interval)
async def _onresponse(self, res, interval):
if res == "XXX":
# ... do something with the resonse ...
await asyncio.sleep(interval)
if res == "YYY":
# ... do something with the resonse ...
await asyncio.sleep(interval)
if res == "ZZZ":
# ... do something with the resonse ...
await asyncio.sleep(interval)
async def request(something):
# ... HTTP request using aiohttp library ...
return response
async def main():
c = Client()
await c.run_forever(request("X"), interval=1)
await c.run_forever(request("Y"), interval=2)
await c.run_forever(request("Z"), interval=3)
# ... and more
As the error says, you can't await a coroutine more than once. Instead of passing a coroutine into run_forever and then awaiting it in a loop, passing the coroutine's argument(s) instead and await a new coroutine each iteration of the loop.
class Client:
async def run_forever(self, value, interval):
while True:
res = await rqequest(value)
await self._response(response, interval)
You also need to change how you await run_forever. await is blocking, so when you await something with an infinite loop, you'll never reach the next line. Instead, you want to gather multiple coroutines as once.
async def main():
c = Client()
await asyncio.gather(
c.run_forever("X", interval=1),
c.run_forever("Y", interval=2),
c.run_forever("Z", interval=3),
)
I'm trying to learn how to run tasks concurrently using Python's asyncio module. In the following code, I've got a mock "web crawler" for an example. Basically, I am trying to make it where there are a max of two active fetch() requests happening at any given time, and I want process() to be called during the sleep() period.
import asyncio
class Crawler():
urlq = ['http://www.google.com', 'http://www.yahoo.com',
'http://www.cnn.com', 'http://www.gamespot.com',
'http://www.facebook.com', 'http://www.evergreen.edu']
htmlq = []
MAX_ACTIVE_FETCHES = 2
active_fetches = 0
def __init__(self):
pass
async def fetch(self, url):
self.active_fetches += 1
print("Fetching URL: " + url);
await(asyncio.sleep(2))
self.active_fetches -= 1
self.htmlq.append(url)
async def crawl(self):
while self.active_fetches < self.MAX_ACTIVE_FETCHES:
if self.urlq:
url = self.urlq.pop()
task = asyncio.create_task(self.fetch(url))
await task
else:
print("URL queue empty")
break;
def process(self, page):
print("processed page: " + page)
# main loop
c = Crawler()
while(c.urlq):
asyncio.run(c.crawl())
while c.htmlq:
page = c.htmlq.pop()
c.process(page)
However, the code above downloads the URLs one by one (not two at a time concurrently) and doesn't do any "processing" until after all URLs have been fetched. How can I make the fetch() tasks run concurrently, and make it so that process() is called in between during sleep()?
Your crawl method is waiting after each individual task; you should change it to this:
async def crawl(self):
tasks = []
while self.active_fetches < self.MAX_ACTIVE_FETCHES:
if self.urlq:
url = self.urlq.pop()
tasks.append(asyncio.create_task(self.fetch(url)))
await asyncio.gather(*tasks)
EDIT: Here's a cleaner version with comments that fetches and processes all at the same time, while preserving the basic ability to put a cap on the maximum number of fetchers.
import asyncio
class Crawler:
def __init__(self, urls, max_workers=2):
self.urls = urls
# create a queue that only allows a maximum of two items
self.fetching = asyncio.Queue()
self.max_workers = max_workers
async def crawl(self):
# DON'T await here; start consuming things out of the queue, and
# meanwhile execution of this function continues. We'll start two
# coroutines for fetching and two coroutines for processing.
all_the_coros = asyncio.gather(
*[self._worker(i) for i in range(self.max_workers)])
# place all URLs on the queue
for url in self.urls:
await self.fetching.put(url)
# now put a bunch of `None`'s in the queue as signals to the workers
# that there are no more items in the queue.
for _ in range(self.max_workers):
await self.fetching.put(None)
# now make sure everything is done
await all_the_coros
async def _worker(self, i):
while True:
url = await self.fetching.get()
if url is None:
# this coroutine is done; simply return to exit
return
print(f'Fetch worker {i} is fetching a URL: {url}')
page = await self.fetch(url)
self.process(page)
async def fetch(self, url):
print("Fetching URL: " + url);
await asyncio.sleep(2)
return f"the contents of {url}"
def process(self, page):
print("processed page: " + page)
# main loop
c = Crawler(['http://www.google.com', 'http://www.yahoo.com',
'http://www.cnn.com', 'http://www.gamespot.com',
'http://www.facebook.com', 'http://www.evergreen.edu'])
asyncio.run(c.crawl())
You can make htmlq an asyncio.Queue(), and change htmlq.append to htmlq.push. Then your main can be async, like this:
async def main():
c = Crawler()
asyncio.create_task(c.crawl())
while True:
page = await c.htmlq.get()
if page is None:
break
c.process(page)
Your top-level code boils down to a call to asyncio.run(main()).
Once you are done with crawling, crawl() can enqueue None to notify the main coroutine that the work is done.
I have been trying all kinds of things to be able to use an asyncio loop inside another asyncio loop. Most of the time my test just end in errors, such as:
RuntimeError: This event loop is already running
My example code below is just the base test I started with, so you can see the basics of what I am trying to do. I tried so many things after this test, it was just too confusing, so I figured I should keep it simple when asking for help. If anyone can point me in the right direction, that would be great. Thank you for your time!
import asyncio
async def fetch(data):
message = 'Hey {}!'.format(data)
other_data = ['image_a.com', 'image_b.com', 'image_c.com']
images = sub_run(other_data)
return {'message' : message, 'images' : images}
async def bound(sem, data):
async with sem:
r = await fetch(data)
return r
async def build(dataset):
tasks = []
sem = asyncio.Semaphore(400)
for data in dataset:
task = asyncio.ensure_future(bound(sem, data))
tasks.append(task)
r = await asyncio.gather(*tasks)
return r
def run(dataset):
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(build(dataset))
responses = loop.run_until_complete(future)
loop.close()
return responses
async def sub_fetch(data):
image = 'https://{}'.format(data)
return image
async def sub_bound(sem, data):
async with sem:
r = await sub_fetch(data)
return r
async def sub_build(dataset):
tasks = []
sem = asyncio.Semaphore(400)
for data in dataset:
task = asyncio.ensure_future(sub_bound(sem, data))
tasks.append(task)
r = await asyncio.gather(*tasks)
return r
def sub_run(dataset):
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(sub_build(dataset))
responses = loop.run_until_complete(future)
loop.close()
return responses
if __name__ == '__main__':
dataset = ['Joe', 'Bob', 'Zoe', 'Howard']
responses = run(dataset)
print (responses)
Running loop.run_until_compete inside a running event loop would block the outer loop, thus defeating the purpose of using asyncio. Because of that, asyncio event loops aren't recursive, and one shouldn't need to run them recursively. Instead of creating an inner event loop, await a task on the existing one.
In your case, remove sub_run and simply replace its usage:
images = sub_run(other_data)
with:
images = await sub_build(other_data)
And it will work just fine, running the sub-coroutines and not continuing with the outer coroutine until the inner one is complete, as you likely intended from the sync code.