Using threading.Timer with asycnio - python

I'm new to python's ascynio feature and I have a server that processes websocket requests from a browser. Here's a simplified version of how it works:
#asyncio.coroutine
def web_client_connected(self, websocket):
self.web_client_socket = websocket
while True:
request = yield from self.web_client_socket.recv()
json_val = process_request(request)
yield from self.socket_queue.put(json_val)
#asyncio.coroutine
def push_from_web_client_json_queue(self):
while True:
json_val = yield from self.socket_queue.get()
yield from self.web_client_socket.send(json_val)
You have one loop looking for web socket requests coming in from the client. When it gets one, it processes it and puts the value onto a queue. Another loop is looking for values on that queue and when it finds one it sends processed value back out on the web socket. Pretty straight forward and it works.
What I want to do now it introduce a timer. When requests comes and and is done processing, instead of putting a response back on the queue immediately, I want to start a timer for 1 minute. When the timer is finished, then I want to put the response on the queue.
I've tried something like:
#asyncio.coroutine
def web_client_connected(self, websocket):
self.web_client_socket = websocket
while True:
request = yield from self.web_client_socket.recv()
json_val = process_request(request)
t = threading.Timer(60, self.timer_done, json_val)
t.start()
#asyncio.coroutine
def timer_done(self, args):
yield from self.socket_queue.put(args)
It doesn't work though. The timer_done method is never called. If I removed the #asyncio.coroutine decorator and yield from, then timer_done does get called but then call to self.socket_queue.put(args) doesn't work.
I think I'm misunderstanding something fundamental here. How do you do this?

Insted of a timer, use asyncio.ensure_future() and asyncio.sleep():
#asyncio.coroutine
def web_client_connected(self, websocket):
self.web_client_socket = websocket
while True:
request = yield from self.web_client_socket.recv()
json_val = process_request(request)
asyncio.ensure_future(web_client_timer(json_val))
yield
#asyncio.coroutine
def web_client_timer(self, json_val):
yield from asyncio.sleep(60)
yield from self.socket_queue.put(json_val)
Working example:
import asyncio
#asyncio.coroutine
def foo():
print("enter foo")
timers = []
for i in range(10):
print("Start foo", i)
yield from asyncio.sleep(0.5)
print("Got foo", i)
timers.append(asyncio.ensure_future(timer(i)))
yield
print("foo waiting")
# wait for all timers to finish
yield from asyncio.wait(timers)
print("exit foo")
#asyncio.coroutine
def timer(i):
print("Setting timer", i)
yield from asyncio.sleep(2)
print("**** Timer", i)
loop = asyncio.get_event_loop()
resp = loop.run_until_complete(foo())
loop.close()

Related

How to run tasks concurrently in asyncio?

I'm trying to learn how to run tasks concurrently using Python's asyncio module. In the following code, I've got a mock "web crawler" for an example. Basically, I am trying to make it where there are a max of two active fetch() requests happening at any given time, and I want process() to be called during the sleep() period.
import asyncio
class Crawler():
urlq = ['http://www.google.com', 'http://www.yahoo.com',
'http://www.cnn.com', 'http://www.gamespot.com',
'http://www.facebook.com', 'http://www.evergreen.edu']
htmlq = []
MAX_ACTIVE_FETCHES = 2
active_fetches = 0
def __init__(self):
pass
async def fetch(self, url):
self.active_fetches += 1
print("Fetching URL: " + url);
await(asyncio.sleep(2))
self.active_fetches -= 1
self.htmlq.append(url)
async def crawl(self):
while self.active_fetches < self.MAX_ACTIVE_FETCHES:
if self.urlq:
url = self.urlq.pop()
task = asyncio.create_task(self.fetch(url))
await task
else:
print("URL queue empty")
break;
def process(self, page):
print("processed page: " + page)
# main loop
c = Crawler()
while(c.urlq):
asyncio.run(c.crawl())
while c.htmlq:
page = c.htmlq.pop()
c.process(page)
However, the code above downloads the URLs one by one (not two at a time concurrently) and doesn't do any "processing" until after all URLs have been fetched. How can I make the fetch() tasks run concurrently, and make it so that process() is called in between during sleep()?
Your crawl method is waiting after each individual task; you should change it to this:
async def crawl(self):
tasks = []
while self.active_fetches < self.MAX_ACTIVE_FETCHES:
if self.urlq:
url = self.urlq.pop()
tasks.append(asyncio.create_task(self.fetch(url)))
await asyncio.gather(*tasks)
EDIT: Here's a cleaner version with comments that fetches and processes all at the same time, while preserving the basic ability to put a cap on the maximum number of fetchers.
import asyncio
class Crawler:
def __init__(self, urls, max_workers=2):
self.urls = urls
# create a queue that only allows a maximum of two items
self.fetching = asyncio.Queue()
self.max_workers = max_workers
async def crawl(self):
# DON'T await here; start consuming things out of the queue, and
# meanwhile execution of this function continues. We'll start two
# coroutines for fetching and two coroutines for processing.
all_the_coros = asyncio.gather(
*[self._worker(i) for i in range(self.max_workers)])
# place all URLs on the queue
for url in self.urls:
await self.fetching.put(url)
# now put a bunch of `None`'s in the queue as signals to the workers
# that there are no more items in the queue.
for _ in range(self.max_workers):
await self.fetching.put(None)
# now make sure everything is done
await all_the_coros
async def _worker(self, i):
while True:
url = await self.fetching.get()
if url is None:
# this coroutine is done; simply return to exit
return
print(f'Fetch worker {i} is fetching a URL: {url}')
page = await self.fetch(url)
self.process(page)
async def fetch(self, url):
print("Fetching URL: " + url);
await asyncio.sleep(2)
return f"the contents of {url}"
def process(self, page):
print("processed page: " + page)
# main loop
c = Crawler(['http://www.google.com', 'http://www.yahoo.com',
'http://www.cnn.com', 'http://www.gamespot.com',
'http://www.facebook.com', 'http://www.evergreen.edu'])
asyncio.run(c.crawl())
You can make htmlq an asyncio.Queue(), and change htmlq.append to htmlq.push. Then your main can be async, like this:
async def main():
c = Crawler()
asyncio.create_task(c.crawl())
while True:
page = await c.htmlq.get()
if page is None:
break
c.process(page)
Your top-level code boils down to a call to asyncio.run(main()).
Once you are done with crawling, crawl() can enqueue None to notify the main coroutine that the work is done.

Tornado async http client blocks

theQueue = tornado.queues.Queue()
theQueue.put_nowait('http://www.baidu.com')
theQueue.put_nowait('http://www.google.com')
theQueue.put_nowait('http://cn.bing.com/')
#tornado.gen.coroutine
def Test1():
def cb(response):
print str(response)
while True:
item = yield theQueue.get()
print item
tmp = tornado.httpclient.AsyncHTTPClient(force_instance=True)
tmp.fetch(item,callback=cb)
#tornado.gen.coroutine
def Test2():
while True:
item = yield theQueue.get()
print item
tmp = tornado.httpclient.AsyncHTTPClient(force_instance=True)
response = yield tmp.fetch(item)
print str(response)
#Test1()
Test2()
tornado.ioloop.IOLoop.instance().start()
python 2.6 and tornado 4.2
In the function Test1,it will first prints out 3 items,then prints 3 responses.
But in Test2,it will print item and it's response one by one.
I was confused,why in Test2 isn't asynchronous?
Test2() is asynchronous, but in a different way, the coroutine way.
Tornado's coroutine suspends when it meets yield keyword, waiting for the async process(in your case, requesting webpage via http client) to complete. tornado will switch to other available coroutines when current coroutine suspends.
With coroutine, your code looks synchronous, and "runs synchronous"(if there is only one coroutine) too.
You can easily test the ASYNC feature of tornado's coroutine, by using two or more conroutines:
#tornado.gen.coroutine
def Test2():
while True:
item = yield theQueue.get()
print 'Test2:', item
tmp = tornado.httpclient.AsyncHTTPClient(force_instance=True)
response = yield tmp.fetch(item)
print 'Test2:', str(response)
# Write another test function called `Test3` and do the exactly same thing with Test2.
#tornado.gen.coroutine
def Test3():
while True:
item = yield theQueue.get()
print 'Test3:', item
tmp = tornado.httpclient.AsyncHTTPClient(force_instance=True)
response = yield tmp.fetch(item)
print 'Test3:', str(response)
Test2()
Test3()
tornado.ioloop.IOLoop.instance().start()
You will see Test2 and Test3 runs simultaneously(but not really) in this example.
The ability of switching between different routines to perform concurrent operations, that's the meaning of coroutine asynchronous.

Gradually create async tasks and wait for all of them to complete

I am trying to make a program to make a lot of web-socket connections to the server I've created:
class WebSocketClient():
#asyncio.coroutine
def run(self):
print(self.client_id, 'Connecting')
ws = yield from aiohttp.ws_connect(self.url)
print(self.client_id, 'Connected')
print(self.client_id, 'Sending the message')
ws.send_str(self.make_new_message())
while not ws.closed:
msg = yield from ws.receive()
if msg.tp == aiohttp.MsgType.text:
print(self.client_id, 'Received the echo')
yield from ws.close()
break
print(self.client_id, 'Closed')
#asyncio.coroutine
def make_clients():
for client_id in range(args.clients):
yield from WebSocketClient(client_id, WS_CHANNEL_URL.format(client_id=client_id)).run()
event_loop.run_until_complete(make_clients())
The problem is that all the clients do their jobs one after another:
0 Connecting
0 Connected
0 Sending the message
0 Received the echo
0 Closed
1 Connecting
1 Connected
1 Sending the message
1 Received the echo
1 Closed
...
I've tried to use asyncio.wait, but all the clients start together. I want them to be created gradually and connected to the server immediately once each of them is created. At the same time continuing creating new clients.
What approach should I apply to accomplish this?
Using asyncio.wait is a good approach. You can combine it with asyncio.ensure_future and asyncio.sleep to create tasks gradually:
#asyncio.coroutine
def make_clients(nb_clients, delay):
futures = []
for client_id in range(nb_clients):
url = WS_CHANNEL_URL.format(client_id=client_id)
coro = WebSocketClient(client_id, url).run()
futures.append(asyncio.ensure_future(coro))
yield from asyncio.sleep(delay)
yield from asyncio.wait(futures)
EDIT: I implemented a FutureSet class that should do what you want. This set can be filled with futures and removes them automatically when they're done. It is also possible to wait for all the futures to complete.
class FutureSet:
def __init__(self, maxsize, *, loop=None):
self._set = set()
self._loop = loop
self._maxsize = maxsize
self._waiters = []
#asyncio.coroutine
def add(self, item):
if not asyncio.iscoroutine(item) and \
not isinstance(item, asyncio.Future):
raise ValueError('Expecting a coroutine or a Future')
if item in self._set:
return
while len(self._set) >= self._maxsize:
waiter = asyncio.Future(loop=self._loop)
self._waiters.append(waiter)
yield from waiter
item = asyncio.async(item, loop=self._loop)
self._set.add(item)
item.add_done_callback(self._remove)
def _remove(self, item):
if not item.done():
raise ValueError('Cannot remove a pending Future')
self._set.remove(item)
if self._waiters:
waiter = self._waiters.pop(0)
waiter.set_result(None)
#asyncio.coroutine
def wait(self):
return asyncio.wait(self._set)
Example:
#asyncio.coroutine
def make_clients(nb_clients, limit=0):
futures = FutureSet(maxsize=limit)
for client_id in range(nb_clients):
url = WS_CHANNEL_URL.format(client_id=client_id)
client = WebSocketClient(client_id, url)
yield from futures.add(client.run())
yield from futures.wait()

Asynchronous RabbitMQ consumer with aioamqp

I'm trying to write an asynchronous consumer using asyncio/aioamqp. My problem is, the callback coroutine (below) is blocking. I set the channel to do a basic_consume(), and assign the callback as callback(). The callback has a "yield from asyncio.sleep" statement (to simulate "work"), which takes an integer from the publisher and sleeps for that amount of time before printing the message.
If I published two messages, one with a time of "10", immediately followed by one with a time of "1", I expected the second message would print first, since it has a shorter sleep time. Instead, the callback blocks for 10 seconds, prints the first message, and then prints the second.
It appears either basic_consume, or the callback, is blocking somewhere. Is there another way this could be handled?
#asyncio.coroutine
def callback(body, envelope, properties):
yield from asyncio.sleep(int(body))
print("consumer {} recved {} ({})".format(envelope.consumer_tag, body, envelope.delivery_tag))
#asyncio.coroutine
def receive_log():
try:
transport, protocol = yield from aioamqp.connect('localhost', 5672, login="login", password="password")
except:
print("closed connections")
return
channel = yield from protocol.channel()
exchange_name = 'cloudstack-events'
exchange_name = 'test-async-exchange'
queue_name = 'async-queue-%s' % random.randint(0, 10000)
yield from channel.exchange(exchange_name, 'topic', auto_delete=True, passive=False, durable=False)
yield from asyncio.wait_for(channel.queue(queue_name, durable=False, auto_delete=True), timeout=10)
binding_keys = ['mykey']
for binding_key in binding_keys:
print("binding", binding_key)
yield from asyncio.wait_for(channel.queue_bind(exchange_name=exchange_name,
queue_name=queue_name,
routing_key=binding_key), timeout=10)
print(' [*] Waiting for logs. To exit press CTRL+C')
yield from channel.basic_consume(queue_name, callback=callback)
loop = asyncio.get_event_loop()
loop.create_task(receive_log())
loop.run_forever()
For those interested, I figured out a way to do this. I'm not sure if it's best practice, but it's accomplishing what I need.
Rather than do the "work" (in this case, async.sleep) inside the callback, I create a new task on the loop, and schedule a separate co-routine to run do_work(). Presumably this is working, because it's freeing up callback() to return immediately.
I loaded up a few hundred events in Rabbit with different sleep timers, and they were interleaved when printed by the code below. So it seems to be working. Hope this helps someone!
#asyncio.coroutine
def do_work(envelope, body):
yield from asyncio.sleep(int(body))
print("consumer {} recved {} ({})".format(envelope.consumer_tag, body, envelope.delivery_tag))
#asyncio.coroutine
def callback(body, envelope, properties):
loop = asyncio.get_event_loop()
loop.create_task(do_work(envelope, body))
#asyncio.coroutine
def receive_log():
try:
transport, protocol = yield from aioamqp.connect('localhost', 5672, login="login", password="password")
except:
print("closed connections")
return
channel = yield from protocol.channel()
exchange_name = 'cloudstack-events'
exchange_name = 'test-async-exchange'
queue_name = 'async-queue-%s' % random.randint(0, 10000)
yield from channel.exchange(exchange_name, 'topic', auto_delete=True, passive=False, durable=False)
yield from asyncio.wait_for(channel.queue(queue_name, durable=False, auto_delete=True), timeout=10)
binding_keys = ['mykey']
for binding_key in binding_keys:
print("binding", binding_key)
yield from asyncio.wait_for(channel.queue_bind(exchange_name=exchange_name,
queue_name=queue_name,
routing_key=binding_key), timeout=10)
print(' [*] Waiting for logs. To exit press CTRL+C')
yield from channel.basic_consume(queue_name, callback=callback)
loop = asyncio.get_event_loop()
loop.create_task(receive_log())
loop.run_forever()

Calling a coroutine from asyncio.Protocol.data_received

This is similar to Calling coroutines in asyncio.Protocol.data_received but I think it warrants a new question.
I have a simple server set up like this
loop.create_unix_server(lambda: protocol, path=serverSocket)
It works fine, if I do this
def data_received(self, data):
data = b'data reply'
self.send(data)
my client gets the reply. But I can't get it to work with any sort of asyncio call. I tried all of the following and none of them worked.
#asyncio.coroutine
def go(self):
yield from asyncio.sleep(1, result = b'data reply')
def data_received(self, data):
print('Data Received', flush=True)
task = asyncio.get_event_loop().create_task(self.go())
data = yield from asyncio.wait_for(task,10)
self.send(data)
that one hung and printed nothing (if I decorated data_received with #asyncio.coroutine I get that that is not yielded from) OK, I get that using yield in data_received isn't right.
If I try a new event loop, as below, that hangs in run_until_complete
loop = asyncio.new_event_loop()
task = loop.create_task(self.go())
loop.run_until_complete(task)
data = task.result()
self.send(data)
If I use a Future, that also hangs in run_until_complete
#asyncio.coroutine
def go(self, future):
yield from asyncio.sleep(1)
future.set_result(b'data reply')
def data_received(self, data):
print('Data Received', flush=True)
loop = asyncio.new_event_loop()
future = asyncio.Future(loop=loop)
asyncio.async(self.go(future))
loop.run_until_complete(future)
data = future.result()
self.send(data)
The following gets close, but it returns immediately and the result is of type asyncio.coroutines.CoroWrapper, implying that the wait_for line returned immediately with the unfinished task?
#asyncio.coroutine
def go(self):
return(yield from asyncio.sleep(3, result = b'data reply'))
#asyncio.coroutine
def go2(self):
task = asyncio.get_event_loop().create_task(self.go())
res = yield from asyncio.wait_for(task, 10)
return result
def data_received(self, data):
print('Data Received', flush=True)
data = self.go2()
self.send(data)
I'm a bit stuck really, and would appreciate some pointers about what to look at.
You need to add your coroutine to the event loop, and then use Future.add_done_callback to handle the result when the coroutine completes:
#asyncio.coroutine
def go(self):
return(yield from asyncio.sleep(3, result = b'data reply'))
def data_received(self, data):
print('Data Received', flush=True)
task = asyncio.async(self.go()) # or asyncio.get_event_loop().create_task()
task.add_done_callback(self.handle_go_result)
def handle_go_result(self, task):
data = task.result()
self.send(data)
Calling a coroutine directly in data_received just simply isn't allowed, since the caller isn't going to try to yield from it, and creating/running a new event loop inside of data_received will always end up blocking the main event loop until the inner event loop finishes its work.
You just want to schedule some work with your main event loop (asyncio.async/loop.create_task()), and schedule a callback to run when the work is done (add_done_callback).

Categories

Resources