Aiokafka library does not consume messages asynchronously - python

I'm trying to implement Python aiokafka async library and for some reason I can't process the messages asynchronously.
I created async consumer, producer and use the asyncio python library.
environment:
python 3.7.2
aiokafka==0.5.1
kafka-python==1.4.3
Consumer:
from aiokafka import AIOKafkaConsumer
import asyncio
import json
import ast
loop = asyncio.get_event_loop()
async def consume():
consumer = AIOKafkaConsumer(
"test_topic", loop=loop, bootstrap_servers='localhost:9092')
# Get cluster layout and topic/partition allocation
await consumer.start()
try:
async for msg in consumer:
sleep_time = ast.literal_eval(json.loads(msg.value))
print('before sleep %s' % sleep_time)
await asyncio.sleep(sleep_time)
print('after sleep %s' % sleep_time)
finally:
await consumer.stop()
loop.run_until_complete(consume())
Producer:
import json
import uuid
from kafka import KafkaProducer, KafkaConsumer
class KafkaClient(object):
def __init__(self, topic_name=None, consume=True):
"""
Initial consumer and producer for Kafka
:param topic_name: consumer topic name
"""
self.topic_name = topic_name
if topic_name is not None:
self.kafka_connect(topic_name, source='SOURCE')
self.producer = KafkaProducer(bootstrap_servers='localhost:9092',
key_serializer=str.encode,
value_serializer=lambda m: json.dumps(m).encode('utf-8'))
def publish_message(self, topic_name, message, extra_data=None):
try:
msg_uid = str(uuid.uuid1())
self.producer.send(topic_name, value=json.dumps(message))
self.producer.flush()
print('Message published [msg_uid]: %s' % msg_uid)
return True
except Exception as err:
print(err)
return False
k = KafkaClient()
for i in range(0, 1):
k.publish_message('test_topic', 5)
k.publish_message('test_topic', 3)
k.publish_message('test_topic', 1)
expected result :
the process will print:
before sleep 5
before sleep 3
before sleep 1
after sleep 1
after sleep 3
after sleep 5
actual result:
the process prints
before sleep 5
after sleep 5
before sleep 3
after sleep 3
before sleep 1
after sleep 1

On its own, async for doesn't process a sequence in parallel - it just allows a coroutine to suspend while waiting for the next item to be produced by the async iterable. You can think of it as a series of awaits on the __anext__ special method, analogous to ordinary for being a series of calls to __next__.
But it's easy enough to spawn tasks that process the messages as they arrive. For example:
async def process(msg):
sleep_time = ast.literal_eval(json.loads(msg.value))
print('before sleep %s' % sleep_time)
await asyncio.sleep(sleep_time)
print('after sleep %s' % sleep_time)
async def consume():
consumer = AIOKafkaConsumer(
"test_topic", loop=loop, bootstrap_servers='localhost:9092')
await consumer.start()
tasks = []
try:
async for msg in consumer:
tasks.append(asyncio.create_task(process(msg))
finally:
await consumer.stop()
await asyncio.gather(*tasks)

Related

Aggregation of 2 RabbitMQ messages does not work properly (messages hanging unacked)

I need to listen tasks on 2 queues, so I wrote the code below, but it has a problem. Currently it behaves like this: if the code started when 2 queues were full, it works great. But if queues were empty one of them was, the code reads messages, but does not proccess them (does not send ack, does not do the logic). But the messages became unacked, until I stop the code. I do not see any reason to be them unacked and unprocessed.
I can't understand what is wrong with the code? May be there is another way to aggregate 2 or more queues like this?
# task_processor.py
from aio_pika import IncomingMessage
class TaskProcessor:
MAX_TASKS_PER_INSTANCE = 1
def __init__(self):
self._tasks = []
def can_accept_new_task(self) -> bool:
return len(self._tasks) < self.MAX_TASKS_PER_INSTANCE
async def process(self, message: IncomingMessage):
self._tasks.append(message)
print(message.body)
await message.ack()
self._tasks.pop()
# main.py
import asyncio
from asyncio import QueueEmpty
from typing import Callable
import aio_pika
from aio_pika import RobustQueue
from dotenv import load_dotenv
load_dotenv()
from core.logger.logger import logger
from core.services.rabbitmq.task_processor.task_processor import TaskProcessor
async def get_single_task(queue: RobustQueue):
while True:
try:
msg = await queue.get(timeout=3600)
return msg
except QueueEmpty:
await asyncio.sleep(3)
except asyncio.exceptions.TimeoutError:
logger.warning('queue timeout error')
pass
except Exception as ex:
logger.error(f"{queue} errored", exc_info=ex)
async def task_aggregator(queue1: RobustQueue, queue2: RobustQueue, should_take_new_task_cb: Callable):
while True:
if should_take_new_task_cb():
queue2, queue1 = queue1, queue2
gen1 = get_single_task(queue1)
gen2 = get_single_task(queue2)
done, _ = await asyncio.wait([gen1, gen2], return_when=asyncio.FIRST_COMPLETED)
for item in done:
result = item.result()
yield result
else:
await asyncio.sleep(1)
async def tasks(queue1: RobustQueue, queue2: RobustQueue, should_take_new_task_cb: Callable):
async for task in task_aggregator(queue1, queue2, should_take_new_task_cb):
yield task
async def main():
connection = await aio_pika.connect_robust(
f"amqp://user:password#host:port/vhost?heartbeat={180}"
)
channel1 = connection.channel()
channel2 = connection.channel()
await channel1.initialize()
await channel2.initialize()
queue1 = await channel1.get_queue('queue1')
queue2 = await channel2.get_queue('queue2')
task_processor = TaskProcessor()
task_generator = tasks(queue1, queue2, task_processor.can_accept_new_task)
while True:
if task_processor.can_accept_new_task():
task = await anext(task_generator)
await task_processor.process(task)
else:
await asyncio.sleep(1)
if __name__ == '__main__':
asyncio.run(main())

Python Asyncio blocking when using queues

I'm trying to use asyncio with both a sync (would be the rest of the python program) and an async bloc and have the sync bloc send data through asyncio.queues.
Without the queueing everythig works fine.
but when I'm sending data in the queue it seems to block.
I'm trying different ways with get_nowait, etc... but with no success so far.
import asyncio
import time
queue = asyncio.Queue()
async def processor() -> None:
print("Started proc")
while True:
print("waiting for quee")
msg = await queue.get()
print(f"Got command from queue: {msg}")
# do something
await asyncio.sleep(5)
def run_sync(url: str) -> int:
while 1:
print("Sending HTTP request")
input("enter to send message to queue\n")
queue.put_nowait(url)
#do other work
time.sleep(10)
async def run_sync_threaded( url: str) -> int:
return await asyncio.to_thread(run_sync, url)
async def main() -> None:
await asyncio.gather(
processor(),
run_sync_threaded("https://www.example.com"),
)
asyncio.run(main())
EDIT:
Got this working, but looks like a work around instead of a proper solution. I don't know feels not very stable
import asyncio
import time
queue = asyncio.Queue()
async def processor() -> None:
print("Started proc")
while True:
print("waiting for quee")
msg = await queue.get()
print(f"Got command from queue: {msg}")
# do something
await asyncio.sleep(5)
async def async_send(url):
print(f'Adding {url} to queue')
queue.put_nowait(url)
def send(url, loop):
asyncio.run_coroutine_threadsafe(async_send(url), loop)
def run_sync(url: str, loop) -> int:
while 1:
input("enter to send message to queue\n")
send(url, loop)
#do other work
time.sleep(3)
async def run_sync_threaded( url: str, loop) -> int:
return await asyncio.to_thread(run_sync, url, loop)
async def main() -> None:
loop = asyncio.get_event_loop()
t = asyncio.create_task( processor())
t2 = asyncio.create_task(run_sync_threaded("https://www.example.com", loop))
asyncio.gather(
await t,
await t2
)
# This does not work
# asyncio.gather(
# await processor(),
# await run_sync_threaded("https://www.example.com", loop)
# )
asyncio.run(main())

Elapsed time of coroutines/futures using asyncio.gather()

I have a list of async queries which I'm gathering with asyncio.gather() & waiting with asyncio.run_until_complete(). Something like:
queries = [
async_query_a(),
async_query_b()
]
loop = asyncio.get_event_loop()
tasks = asyncio.gather(*queries)
results = loop.run_until_complete(tasks)
I would like to know the "waiting time" of each of the queries. Something like a #log_performance wrapper which logs the elapsed time of the future/coroutine completion.
Here is an example implementation of timecoro timeing a coroutine function.
import asyncio
import functools
import logging
import random
import time
def timecoro(corofn):
#functools.wraps(corofn)
async def wrapper(*args, **kwargs):
start = time.time()
try:
result = await corofn(*args, **kwargs)
except Exception:
finish = time.time() - start
logging.info('%s failed in %.2f', corofn, finish)
raise
else:
finish = time.time() - start
logging.info('%s succeeded in %.2f', corofn, finish)
return result
return wrapper
#timecoro
async def async_query_a():
await asyncio.sleep(random.randint(0, 4))
#timecoro
async def async_query_b():
await asyncio.sleep(random.randint(0, 4))
raise RuntimeError
async def main():
queries = [
async_query_a(),
async_query_b(),
]
await asyncio.gather(*queries)
if __name__ == '__main__':
logging.basicConfig(level='INFO')
asyncio.run(main())

asyncio : How to queue object when an exception occurs

Hi I have to process several objects queued 5 at time.
I have a queue of 5 items.
Sometimes process fails and an exception occurs:
async def worker(nam):
while True:
queue_item = await queue.get()
Worker starts the process loop and tries to process items
try:
loop = asyncio.get_event_loop()
task = loop.create_task(download(queue_item, path))
download_result = await asyncio.wait_for(task, timeout=timeout)
except asyncio.TimeoutError:
unfortunately the process timed out.
Can I add like this ?
except asyncio.TimeoutError:
await queue.put(queue_item)
I want to process again that item on next round
Thank
Yes, you can re-queue an object at the end of the queue for processing. A simple
example based on your code:
import asyncio
from random import randrange
async def download(item):
print("Process item", item)
if randrange(4) == 1: # simulate occasional event
await asyncio.sleep(100) # trigger timeout error
async def worker(queue):
while True:
queue_item = await queue.get()
try:
result = await asyncio.wait_for(download(queue_item), timeout=1)
except asyncio.TimeoutError:
print("Timeout for ", queue_item)
await queue.put(queue_item)
queue.task_done()
async def main():
q = asyncio.Queue()
asyncio.create_task(worker(q))
for i in range(5): # put 5 items to process
await q.put(i)
await q.join()
asyncio.run(main())
Process item 0
Timeout for 0
Process item 1
Process item 2
Process item 3
Timeout for 3
Process item 4
Process item 0
Process item 3

Why is asyncio queue await get() blocking?

Why is await queue.get() blocking?
import asyncio
async def producer(queue, item):
await queue.put(item)
async def consumer(queue):
val = await queue.get()
print("val = %d" % val)
async def main():
queue = asyncio.Queue()
await consumer(queue)
await producer(queue, 1)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()
If I call the producer() before consumer(), it works fine
That is to say, the following works fine.
async def main():
queue = asyncio.Queue()
await producer(queue, 1)
await consumer(queue)
Why isn't await queue.get() yielding control back to the event loop so that the producer coroutine can run which will populate the queue so that queue.get() can return.
You need to start the consumer and the producer in parallel, e.g. defining main like this:
async def main():
queue = asyncio.Queue()
await asyncio.gather(consumer(queue), producer(queue, 1))
If for some reason you can't use gather, then you can do (the equivalent of) this:
async def main():
queue = asyncio.Queue()
asyncio.create_task(consumer(queue))
asyncio.create_task(producer(queue, 1))
await asyncio.sleep(100) # what your program actually does
Why isn't await queue.get() yielding control back to the event loop so that the producer coroutine can run which will populate the queue so that queue.get() can return.
await queue.get() is yielding control back to the event loop. But await means wait, so when your main coroutine says await consumer(queue), that means "resume me once consumer(queue) has completed." Since consumer(queue) is itself waiting for someone to produce something, you have a classic case of deadlock.
Reversing the order works only because your producer is one-shot, so it immediately returns to the caller. If your producer happened to await an external source (such as a socket), you would have a deadlock there as well. Starting them in parallel avoids the deadlock regardless of how producer and consumer are written.
It's because you call await consumer(queue), which means the next line (procuder) will not be called until consumer returns, which it of course never does because nobody produced yet
check out the Example in the docs and see how they use it there: https://docs.python.org/3/library/asyncio-queue.html#examples
another simple example:
import asyncio
import random
async def produce(queue, n):
for x in range(1, n + 1):
# produce an item
print('producing {}/{}'.format(x, n))
# simulate i/o operation using sleep
await asyncio.sleep(random.random())
item = str(x)
# put the item in the queue
await queue.put(item)
# indicate the producer is done
await queue.put(None)
async def consume(queue):
while True:
# wait for an item from the producer
item = await queue.get()
if item is None:
# the producer emits None to indicate that it is done
break
# process the item
print('consuming item {}...'.format(item))
# simulate i/o operation using sleep
await asyncio.sleep(random.random())
loop = asyncio.get_event_loop()
queue = asyncio.Queue(loop=loop)
producer_coro = produce(queue, 10)
consumer_coro = consume(queue)
loop.run_until_complete(asyncio.gather(producer_coro, consumer_coro))
loop.close()
You should use .run_until_complete() with .gather()
Here is your updated code:
import asyncio
async def producer(queue, item):
await queue.put(item)
async def consumer(queue):
val = await queue.get()
print("val = %d" % val)
queue = asyncio.Queue()
loop = asyncio.get_event_loop()
loop.run_until_complete(
asyncio.gather(consumer(queue), producer(queue, 1))
)
loop.close()
Out:
val = 1
Also you could use .run_forever() with .create_task()
So your code snippet will be:
import asyncio
async def producer(queue, item):
await queue.put(item)
async def consumer(queue):
val = await queue.get()
print("val = %d" % val)
queue = asyncio.Queue()
loop = asyncio.get_event_loop()
loop.create_task(consumer(queue))
loop.create_task(producer(queue, 1))
try:
loop.run_forever()
except KeyboardInterrupt:
loop.close()
Out:
val = 1

Categories

Resources