Here is the handler for my login page, which i intend to use via ajax post requests.
from argon2 import PasswordHasher
from argon2.exceptions import VerifyMismatchError
class AdminLoginHandler(RequestHandler):
async def post(self):
username = self.get_argument("username")
password = self.get_argument("password")
db_hash = await self.settings['db'].users.find_one({"username":username}, {"password":1})
if not db_hash:
await self.settings['hasher'].verify("","")
self.write("wrong")
return
try:
print(db_hash)
pass_correct = await self.settings['hasher'].verify(db_hash['password'], password)
except VerifyMismatchError:
pass_correct = False
if pass_correct:
self.set_secure_cookie("user", username)
self.write("set?")
else:
self.write("wrong")
The settings includes this argument hasher=PasswordHasher().
I'm getting the following error TypeError: object bool can't be used in 'await' expression, i'm aware this is because the function i'm calling doesn't return a future object but a boolean.
My question is how do i use the hashing library asynchronously without tornado blocking for the full time of the hashing process, which i know by design takes a long time.
You can use a ThreadPoolExecutor or a ProcessPoolExecutor to run the time consuming code in separate threads/processes:
import math
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
import tornado.ioloop
import tornado.web
def blocking_task(number):
return len(str(math.factorial(number)))
class MainHandler(tornado.web.RequestHandler):
executor = ProcessPoolExecutor(max_workers=4)
# executor = ThreadPoolExecutor(max_workers=4)
async def get(self):
number = 54545 # factorial calculation takes about one second on my machine
# result = blocking_task(number) # use this line for classic (non-pool) function call
result = await tornado.ioloop.IOLoop.current().run_in_executor(self.executor, blocking_task, number)
self.write("result has %d digits" % result)
def make_app():
return tornado.web.Application([
(r"/", MainHandler),
])
if __name__ == "__main__":
app = make_app()
app.listen(8888)
tornado.ioloop.IOLoop.current().start()
I used a simple factorial calculation here to simulate a CPU intensive task and tested the above using wrk:
wrk -t2 -c4 -d30s http://127.0.0.1:8888/
Running 30s test # http://127.0.0.1:8888/
2 threads and 4 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 1.25s 34.16ms 1.37s 72.04%
Req/Sec 2.54 3.40 10.00 83.75%
93 requests in 30.04s, 19.89KB read
Requests/sec: 3.10
Transfer/sec: 678.00B
Without the executor I would get around 1 requests/sec; of course you need to tune the max_workers setting according to your setup.
If you're going to test using a browser, be aware of possible limitations.
Edit
I modified the code to easily allow for a process executor instead of a thread executor, but I doubt it will make a lot of difference in your case mainly because calls to argon2 should release the GIL but you should test it nonetheless.
Related
I am trying to understand how to handle a grpc api with bidirectional streaming (using the Python API).
Say I have the following simple server definition:
syntax = "proto3";
package simple;
service TestService {
rpc Translate(stream Msg) returns (stream Msg){}
}
message Msg
{
string msg = 1;
}
Say that the messages that will be sent from the client come asynchronously ( as a consequence of user selecting some ui elements).
The generated python stub for the client will contain a method Translate that will accept a generator function and will return an iterator.
What is not clear to me is how would I write the generator function that will return messages as they are created by the user. Sleeping on the thread while waiting for messages doesn't sound like the best solution.
This is a bit clunky right now, but you can accomplish your use case as follows:
#!/usr/bin/env python
from __future__ import print_function
import time
import random
import collections
import threading
from concurrent import futures
from concurrent.futures import ThreadPoolExecutor
import grpc
from translate_pb2 import Msg
from translate_pb2_grpc import TestServiceStub
from translate_pb2_grpc import TestServiceServicer
from translate_pb2_grpc import add_TestServiceServicer_to_server
def translate_next(msg):
return ''.join(reversed(msg))
class Translator(TestServiceServicer):
def Translate(self, request_iterator, context):
for req in request_iterator:
print("Translating message: {}".format(req.msg))
yield Msg(msg=translate_next(req.msg))
class TranslatorClient(object):
def __init__(self):
self._stop_event = threading.Event()
self._request_condition = threading.Condition()
self._response_condition = threading.Condition()
self._requests = collections.deque()
self._last_request = None
self._expected_responses = collections.deque()
self._responses = {}
def _next(self):
with self._request_condition:
while not self._requests and not self._stop_event.is_set():
self._request_condition.wait()
if len(self._requests) > 0:
return self._requests.popleft()
else:
raise StopIteration()
def next(self):
return self._next()
def __next__(self):
return self._next()
def add_response(self, response):
with self._response_condition:
request = self._expected_responses.popleft()
self._responses[request] = response
self._response_condition.notify_all()
def add_request(self, request):
with self._request_condition:
self._requests.append(request)
with self._response_condition:
self._expected_responses.append(request.msg)
self._request_condition.notify()
def close(self):
self._stop_event.set()
with self._request_condition:
self._request_condition.notify()
def translate(self, to_translate):
self.add_request(to_translate)
with self._response_condition:
while True:
self._response_condition.wait()
if to_translate.msg in self._responses:
return self._responses[to_translate.msg]
def _run_client(address, translator_client):
with grpc.insecure_channel('localhost:50054') as channel:
stub = TestServiceStub(channel)
responses = stub.Translate(translator_client)
for resp in responses:
translator_client.add_response(resp)
def main():
server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
add_TestServiceServicer_to_server(Translator(), server)
server.add_insecure_port('[::]:50054')
server.start()
translator_client = TranslatorClient()
client_thread = threading.Thread(
target=_run_client, args=('localhost:50054', translator_client))
client_thread.start()
def _translate(to_translate):
return translator_client.translate(Msg(msg=to_translate)).msg
translator_pool = futures.ThreadPoolExecutor(max_workers=4)
to_translate = ("hello", "goodbye", "I", "don't", "know", "why",)
translations = translator_pool.map(_translate, to_translate)
print("Translations: {}".format(zip(to_translate, translations)))
translator_client.close()
client_thread.join()
server.stop(None)
if __name__ == "__main__":
main()
The basic idea is to have an object called TranslatorClient running on a separate thread, correlating requests and responses. It expects that responses will return in the order that requests were sent out. It also implements the iterator interface so that you can pass it directly to an invocation of the Translate method on your stub.
We spin up a thread running _run_client which pulls responses out of TranslatorClient and feeds them back in the other end with add_response.
The main function I included here is really just a strawman since I don't have the particulars of your UI code. I'm running _translate in a ThreadPoolExecutor to demonstrate that, even though translator_client.translate is synchronous, it yields, allowing you to have multiple in-flight requests at once.
We recognize that this is a lot of code to write for such a simple use case. Ultimately, the answer will be asyncio support. We have plans for this in the not-too-distant future. But for the moment, this sort of solution should keep you going whether you're running python 2 or python 3.
I am trying to play with this piece of code to understand #tornado.web.asynchronous. The code as intended should handle asynchronous web requests but it doesnt seem to work as intended. There are two end points:
1) http://localhost:5000/A (This is the time consuming request and
takes a few seconds)
2) http://localhost:5000/B (This is the fast request and takes no time to return.
However when I hit the browser to go to http://localhost:5000/A and then while that is running go to http://localhost:5000/B the second request is queued and runs only after A has finished.
In other words one task is time consuming but it blocks the other faster task. What am I doing wrong?
import tornado.web
from tornado.ioloop import IOLoop
import sys, random, signal
class TestHandler(tornado.web.RequestHandler):
"""
In below function goes your time consuming task
"""
def background_task(self):
sm = 0
for i in range(10 ** 8):
sm = sm + 1
return str(sm + random.randint(0, sm)) + "\n"
#tornado.web.asynchronous
def get(self):
""" Request that asynchronously calls background task. """
res = self.background_task()
self.write(str(res))
self.finish()
class TestHandler2(tornado.web.RequestHandler):
#tornado.web.asynchronous
def get(self):
self.write('Response from server: ' + str(random.randint(0, 100000)) + "\n")
self.finish()
def sigterm_handler(signal, frame):
# save the state here or do whatever you want
print('SIGTERM: got kill, exiting')
sys.exit(0)
def main(argv):
signal.signal(signal.SIGTERM, sigterm_handler)
try:
if argv:
print ":argv:", argv
application = tornado.web.Application([
(r"/A", TestHandler),
(r"/B", TestHandler2),
])
application.listen(5000)
IOLoop.instance().start()
except KeyboardInterrupt:
print "Caught interrupt"
except Exception as e:
print e.message
finally:
print "App: exited"
if __name__ == '__main__':
sys.exit(main(sys.argv))
According to the documentation:
To minimize the cost of concurrent connections, Tornado uses a
single-threaded event loop. This means that all application code
should aim to be asynchronous and non-blocking because only one
operation can be active at a time.
To achieve this goal you need to prepare the RequestHandler properly. Simply adding #tornado.web.asynchronous decorator to any of the functions (get, post, etc.) is not enough if the function performs only synchronous actions.
What does the #tornado.web.asynchronous decorator do?
Let's look at the get function. The statements are executed one after another in a synchronous manner. Once the work is done and the function returns the request is being closed. A call to self.finish() is being made under the hood. However, when we use the #tornado.web.asynchronous decorator the request is not being closed after the function returned. So the self.finish() must be called by the user to finish the HTTP request. Without this decorator the request is automatically finished when the get() method returns.
Look at the "Example 21" from this page - tornado.web.asynchronous:
#web.asynchronous
def get(self):
http = httpclient.AsyncHTTPClient()
http.fetch("http://example.com/", self._on_download)
def _on_download(self, response):
self.finish()
The get() function performs an asynchronous call to the http://example.com/ page. Let's assume this call is a long action. So the http.fetch() function is being called and a moment later the get() function returns (http.fetch() is still running the background). The Tornado's IOLoop can move forward to serve the next request while the data from the http://example.com/ is being fetched. Once the the http.fetch() function call is finished the callback function - self._on_download - is called. Then self.finish() is called and the request is finally closed. This is the moment when the user can see the result in the browser.
It's possible due to the httpclient.AsyncHTTPClient(). If you use a synchronous version of the httpclient.HTTPClient() you will need to wait for the call to http://example.com/ to finish. Then the get() function will return and the next request will be processed.
To sum up, you use #tornado.web.asynchronous decorator if you use asynchronous code inside the RequestHandler which is advised. Otherwise it doesn't make much difference to the performance.
EDIT: To solve your problem you can run your time-consuming function in a separate thread. Here's a simple example of your TestHandler class:
class TestHandler(tornado.web.RequestHandler):
def on_finish(self, response):
self.write(response)
self.finish()
def async_function(base_function):
#functools.wraps(base_function)
def run_in_a_thread(*args, **kwargs):
func_t = threading.Thread(target=base_function, args=args, kwargs=kwargs)
func_t.start()
return run_in_a_thread
#async_function
def background_task(self, callback):
sm = 0
for i in range(10 ** 8):
sm = sm + 1
callback(str(sm + random.randint(0, sm)))
#tornado.web.asynchronous
def get(self):
res = self.background_task(self.on_finish)
You also need to add those imports to your code:
import threading
import functools
import threading
async_function is a decorator function. If you're not familiar with the topic I suggest to read (e.g.: decorators) and try it on your own. In general, our decorator allows the function to return immediately (so the main program execution can go forward) and the processing to take place at the same time in a separate thread. Once the function in a thread is finished we call a callback function which writes out the results to the end user and closes the connection.
I am newbie on tornado and python. A couple days ago i started to write a non-blocking rest api, but i couldn't accomplish the mission yet. When i send two request to this endpoint "localhost:8080/async" at the same time, the second request takes response after 20 seconds! That explains i am doing something wrong.
MAX_WORKERS = 4
class ASYNCHandler(tornado.web.RequestHandler):
executor = ThreadPoolExecutor(max_workers=MAX_WORKERS)
counter = 0
def pow_task(self, x, y):
time.sleep(10)
return pow(x,y)
async def background_task(self):
future = ASYNCHandler.executor.submit(self.pow_task, 2, 3)
return future
#gen.coroutine
def get(self, *args, **kwargs):
future = yield from self.background_task()
response= dumps({"result":future.result()}, default=json_util.default)
print(response)
application = tornado.web.Application([
('/async', ASYNCHandler),
('/sync', SYNCHandler),
], db=db, debug=True)
application.listen(8888)
tornado.ioloop.IOLoop.current().start()
Never use time.sleep in Tornado code! Use IOLoop.add_timeout to schedule a callback later, or in a coroutine yield gen.sleep(n).
http://www.tornadoweb.org/en/latest/faq.html#why-isn-t-this-example-with-time-sleep-running-in-parallel
That's strange that returning the ThreadPoolExecutor future, essentially blocks tornado's event loop. If anyone from the tornado team reads this and knows why that is, can they please give an explaination? I had planned to do some stuff with threads in tornado but after dealing with this question, I see that it's not going to be as simple as I originally anticipated. In any case, here is the code which does what you expect (I've trimmed your original example down a bit so that anyone can run it quickly):
from concurrent.futures import ThreadPoolExecutor
from json import dumps
import time
from tornado.platform.asyncio import to_tornado_future
from tornado.ioloop import IOLoop
from tornado import gen, web
MAX_WORKERS = 4
class ASYNCHandler(web.RequestHandler):
executor = ThreadPoolExecutor(max_workers=MAX_WORKERS)
counter = 0
def pow_task(self, x, y):
time.sleep(5)
return pow(x,y)
async def background_task(self):
future = self.executor.submit(self.pow_task, 2, 3)
result = await to_tornado_future(future) # convert to tornado future
return result
#gen.coroutine
def get(self, *args, **kwargs):
result = yield from self.background_task()
response = dumps({"result": result})
self.write(response)
application = web.Application([
('/async', ASYNCHandler),
], debug=True)
application.listen(8888)
IOLoop.current().start()
The main differences are in the background_tasks() method. I convert the asyncio future to a tornado future, wait for the result, then return the result. The code you provided in the question, blocked for some reason when yielding from background_task() and you were unable to await the result because the future wasn't a tornado future.
On a slightly different note, this simple example can easily be implemented using a single thread/async designs and chances are your code can also be done without threads. Threads are easy to implement but equally easy to get wrong and can lead to very sticky situations. When attempting to write threaded code please remember this photo :)
How can I create a wrapper that makes celery tasks look like asyncio.Task? Or is there a better way to integrate Celery with asyncio?
#asksol, the creator of Celery, said this::
It's quite common to use Celery as a distributed layer on top of async I/O frameworks (top tip: routing CPU-bound tasks to a prefork worker means they will not block your event loop).
But I could not find any code examples specifically for asyncio framework.
EDIT: 01/12/2021 previous answer (find it at the bottom) didn't age well therefore I added a combination of possible solutions that may satisfy those who still look on how to co-use asyncio and Celery
Lets quickly break up the use cases first (more in-depth analysis here: asyncio and coroutines vs task queues):
If the task is I/O bound then it tends to be better to use coroutines and asyncio.
If the task is CPU bound then it tends to be better to use Celery or other similar task management systems.
So it makes sense in the context of Python's "Do one thing and do it well" to not try and mix asyncio and celery together.
BUT what happens in cases where we want to be able to run a method both asynchronously and as an async task? then we have some options to consider:
The best example that I was able to find is the following: https://johnfraney.ca/posts/2018/12/20/writing-unit-tests-celery-tasks-async-functions/ (and I just found out that it is #Franey's response):
Define your async method.
Use asgiref's sync.async_to_sync module to wrap the async method and run it synchronously inside a celery task:
# tasks.py
import asyncio
from asgiref.sync import async_to_sync
from celery import Celery
app = Celery('async_test', broker='a_broker_url_goes_here')
async def return_hello():
await asyncio.sleep(1)
return 'hello'
#app.task(name="sync_task")
def sync_task():
async_to_sync(return_hello)()
A use case that I came upon in a FastAPI application was the reverse of the previous example:
An intense CPU bound process is hogging up the async endpoints.
The solution is to refactor the async CPU bound process into a celery task and pass a task instance for execution from the Celery queue.
A minimal example for visualization of that case:
import asyncio
import uvicorn
from celery import Celery
from fastapi import FastAPI
app = FastAPI(title='Example')
worker = Celery('worker', broker='a_broker_url_goes_here')
#worker.task(name='cpu_boun')
def cpu_bound_task():
# Does stuff but let's simplify it
print([n for n in range(1000)])
#app.get('/calculate')
async def calculate():
cpu_bound_task.delay()
if __name__ == "__main__":
uvicorn.run('main:app', host='0.0.0.0', port=8000)
Another solution seems to be what #juanra and #danius are proposing in their answers, but we have to keep in mind that performance tends to take a hit when we intermix sync and async executions, thus those answers need monitoring before we can decide to use them in a prod environment.
Finally, there are some ready-made solutions, that I cannot recommend (because I have not used them myself) but I will list them here:
Celery Pool AsyncIO which seems to solve exactly what Celery 5.0 didn't, but keep in mind that it seems a bit experimental (version 0.2.0 today 01/12/2021)
aiotasks claims to be "a Celery like task manager that distributes Asyncio coroutines" but seems a bit stale (latest commit around 2 years ago)
Well that didn't age so well did it? Version 5.0 of Celery didn't implement asyncio compatibility thus we cannot know when and if this will ever be implemented... Leaving this here for response legacy reasons (as it was the answer at the time) and for comment continuation.
That will be possible from Celery version 5.0 as stated on the official site:
http://docs.celeryproject.org/en/4.0/whatsnew-4.0.html#preface
The next major version of Celery will support Python 3.5 only, where we are planning to take advantage of the new asyncio library.
Dropping support for Python 2 will enable us to remove massive amounts of compatibility code, and going with Python 3.5 allows us to take advantage of typing, async/await, asyncio, and similar concepts there’s no alternative for in older versions.
The above was quoted from the previous link.
So the best thing to do is wait for version 5.0 to be distributed!
In the meantime, happy coding :)
This simple way worked fine for me:
import asyncio
from celery import Celery
app = Celery('tasks')
async def async_function(param1, param2):
# more async stuff...
pass
#app.task(name='tasks.task_name', queue='queue_name')
def task_name(param1, param2):
asyncio.run(async_function(param1, param2))
You can wrap any blocking call into a Task using run_in_executor as described in documentation, I also added in the example a custom timeout:
def run_async_task(
target,
*args,
timeout = 60,
**keywords
) -> Future:
loop = asyncio.get_event_loop()
return asyncio.wait_for(
loop.run_in_executor(
executor,
functools.partial(target, *args, **keywords)
),
timeout=timeout,
loop=loop
)
loop = asyncio.get_event_loop()
async_result = loop.run_until_complete(
run_async_task, your_task.delay, some_arg, some_karg=""
)
result = loop.run_until_complete(
run_async_task, async_result.result
)
Here is a simple helper that you can use to make a Celery task awaitable:
import asyncio
from asgiref.sync import sync_to_async
# Converts a Celery tasks to an async function
def task_to_async(task):
async def wrapper(*args, **kwargs):
delay = 0.1
async_result = await sync_to_async(task.delay)(*args, **kwargs)
while not async_result.ready():
await asyncio.sleep(delay)
delay = min(delay * 1.5, 2) # exponential backoff, max 2 seconds
return async_result.get()
return wrapper
Like sync_to_async, it can be used as a direct wrapper:
#shared_task
def get_answer():
sleep(10) # simulate long computation
return 42
result = await task_to_async(get_answer)()
...and as a decorator:
#task_to_async
#shared_task
def get_answer():
sleep(10) # simulate long computation
return 42
result = await get_answer()
Of course, this is not a perfect solution since it relies on polling.
However, it should be a good workaround to call Celery tasks from Django async views until Celery officially provides a better solution.
EDIT 2021/03/02: added the call to sync_to_async to support eager mode.
The cleanest way I've found to do this is to wrap the async function in asgiref.sync.async_to_sync (from asgiref):
from asgiref.sync import async_to_sync
from celery.task import periodic_task
async def return_hello():
await sleep(1)
return 'hello'
#periodic_task(
run_every=2,
name='return_hello',
)
def task_return_hello():
async_to_sync(return_hello)()
I pulled this example from a blog post I wrote.
I solved problem by combining Celery and asyncio in the celery-pool-asyncio library.
Here's my implementation of Celery handling async coroutines when necessary:
Wrap the Celery class to extend its functionnality:
from celery import Celery
from inspect import isawaitable
import asyncio
class AsyncCelery(Celery):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.patch_task()
if 'app' in kwargs:
self.init_app(kwargs['app'])
def patch_task(self):
TaskBase = self.Task
class ContextTask(TaskBase):
abstract = True
async def _run(self, *args, **kwargs):
result = TaskBase.__call__(self, *args, **kwargs)
if isawaitable(result):
await result
def __call__(self, *args, **kwargs):
asyncio.run(self._run(*args, **kwargs))
self.Task = ContextTask
def init_app(self, app):
self.app = app
conf = {}
for key in app.config.keys():
if key[0:7] == 'CELERY_':
conf[key[7:].lower()] = app.config[key]
if 'broker_transport_options' not in conf and conf.get('broker_url', '')[0:4] == 'sqs:':
conf['broker_transport_options'] = {'region': 'eu-west-1'}
self.config_from_object(conf)
celery = AsyncCelery()
For anyone who stumbles on this looking for help specifically with async sqlalchemy (ie, using the asyncio extension) and Celery tasks, explicitly disposing of the engine will fix the issue. This particular example worked with asyncpg.
Example:
from sqlalchemy.ext.asyncio import (
AsyncSession,
create_async_engine,
)
from sqlalchemy.orm import sessionmaker
from asgiref.sync import async_to_sync
engine = create_async_engine("some_uri", future=True)
async_session_factory = sessionmaker(engine, expire_on_commit=False, class_=AsyncSession)
#celery_app.task(name="task-name")
def sync_func() -> None:
async_to_sync(some_func)()
async def some_func() -> None:
async with get_db_session() as session:
result = await some_db_query(session)
# engine.dispose will be called on exit
#contextlib.asynccontextmanager
async def get_db_session() -> AsyncGenerator:
try:
db = async_session_factory()
yield db
finally:
await db.close()
await engine.dispose()
A nice way to implement Celery with asyncio:
import asyncio
from celery import Celery
app = Celery()
async def async_function(param):
print('do something')
#app.task()
def celery_task(param):
loop = asyncio.get_event_loop()
return loop.run_until_complete(async_function(param))
(Note: The background for this problem is pretty verbose, but there's an SSCCE at the bottom that can be skipped to)
Background
I'm trying to develop a Python-based CLI to interact with a web service. In my codebase I have a CommunicationService class that handles all direct communication with the web service. It exposes a received_response property that returns an Observable (from RxPY) that other objects can subscribe to in order to be notified when responses are received back from the web service.
I've based my CLI logic on the click library, where one of my subcommands is implemented as below:
async def enabled(self, request: str, response_handler: Callable[[str], Tuple[bool, str]]) -> None:
self._generate_request(request)
if response_handler is None:
return None
while True:
response = await self.on_response
success, value = response_handler(response)
print(success, value)
if success:
return value
What's happening here (in the case that response_handler is not None) is that the subcommand is behaving as a coroutine that awaits responses from the web service (self.on_response == CommunicationService.received_response) and returns some processed value from the first response it can handle.
I'm trying to test the behaviour of my CLI by creating test cases in which CommunicationService is completely mocked; a fake Subject is created (which can act as an Observable) and CommunicationService.received_response is mocked to return it. As part of the test, the subject's on_next method is invoked to pass mock web service responses back to the production code:
#when('the communications service receives a response from TestCube Web Service')
def step_impl(context):
context.mock_received_response_subject.on_next(context.text)
I use a click 'result callback' function that gets invoked at the end of the CLI invocation and blocks until the coroutine (the subcommand) is done:
#cli.resultcallback()
def _handle_command_task(task: Coroutine, **_) -> None:
if task:
loop = asyncio.get_event_loop()
result = loop.run_until_complete(task)
loop.close()
print('RESULT:', result)
Problem
At the start of the test, I run CliRunner.invoke to fire off the whole shebang. The problem is that this is a blocking call and will block the thread until the CLI has finished and returned a result, which isn't helpful if I need my test thread to carry on so it can produce mock web service responses concurrently with it.
What I guess I need to do is run CliRunner.invoke on a new thread using ThreadPoolExecutor. This allows the test logic to continue on the original thread and execute the #when step posted above. However, notifications published with mock_received_response_subject.on_next do not seem to trigger execution to continue within the subcommand.
I believe the solution would involve making use of RxPY's AsyncIOScheduler, but I'm finding the documentation on this a little sparse and unhelpful.
SSCCE
The snippet below captures what I hope is the essence of the problem. If it can be modified to work, I should be able to apply the same solution to my actual code to get it to behave as I want.
import asyncio
import logging
import sys
import time
import click
from click.testing import CliRunner
from rx.subjects import Subject
web_response_subject = Subject()
web_response_observable = web_response_subject.as_observable()
thread_loop = asyncio.new_event_loop()
#click.group()
def cli():
asyncio.set_event_loop(thread_loop)
#cli.resultcallback()
def result_handler(task, **_):
loop = asyncio.get_event_loop()
result = loop.run_until_complete(task) # Should block until subject publishes value
loop.close()
print(result)
#cli.command()
async def get_web_response():
return await web_response_observable
def test():
runner = CliRunner()
future = thread_loop.run_in_executor(None, runner.invoke, cli, ['get_web_response'])
time.sleep(1)
web_response_subject.on_next('foo') # Simulate reception of web response.
time.sleep(1)
result = future.result()
print(result.output)
logging.basicConfig(
level=logging.DEBUG,
format='%(threadName)10s %(name)18s: %(message)s',
stream=sys.stderr,
)
test()
Current Behaviour
The program hangs when run, blocking at result = loop.run_until_complete(task).
Acceptance Criteria
The program terminates and prints foo on stdout.
Update 1
Based on Vincent's help I've made some changes to my code.
Relay.enabled (the subcommand that awaits responses from the web service in order to process them) is now implemented like this:
async def enabled(self, request: str, response_handler: Callable[[str], Tuple[bool, str]]) -> None:
self._generate_request(request)
if response_handler is None:
return None
return await self.on_response \
.select(response_handler) \
.where(lambda result, i: result[0]) \
.select(lambda result, index: result[1]) \
.first()
I wasn't quite sure how await would behave with RxPY observables - would they return execution to the caller on each element generated, or only when the observable has completed (or errored?). I now know it's the latter, which honestly feels like the more natural choice and has allowed me to make the implementation of this function feel a lot more elegant and reactive.
I've also modified the test step that generates mock web service responses:
#when('the communications service receives a response from TestCube Web Service')
def step_impl(context):
loop = asyncio.get_event_loop()
loop.call_soon_threadsafe(context.mock_received_response_subject.on_next, context.text)
Unfortunately, this will not work as it stands, since the CLI is being invoked in its own thread...
#when('the CLI is run with "{arguments}"')
def step_impl(context, arguments):
loop = asyncio.get_event_loop()
if 'async.cli' in context.tags:
context.async_result = loop.run_in_executor(None, context.cli_runner.invoke, testcube.cli, arguments.split())
else:
...
And the CLI creates its own thread-private event loop when invoked...
def cli(context, hostname, port):
_initialize_logging(context.meta['click_log.core.logger']['level'])
# Create a new event loop for processing commands asynchronously on.
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
...
What I think I need is a way to allow my test steps to invoke the CLI on a new thread and then fetch the event loop it's using:
#when('the communications service receives a response from TestCube Web Service')
def step_impl(context):
loop = _get_cli_event_loop() # Needs to be implemented.
loop.call_soon_threadsafe(context.mock_received_response_subject.on_next, context.text)
Update 2
There doesn't seem to be an easy way to get the event loop that a particular thread creates and uses for itself, so instead I took Victor's advice and mocked asyncio.new_event_loop to return an event loop that my test code creates and stores:
def _apply_mock_event_loop_patch(context):
# Close any already-existing exit stacks.
if hasattr(context, 'mock_event_loop_exit_stack'):
context.mock_event_loop_exit_stack.close()
context.test_loop = asyncio.new_event_loop()
print(context.test_loop)
context.mock_event_loop_exit_stack = ExitStack()
context.mock_event_loop_exit_stack.enter_context(
patch.object(asyncio, 'new_event_loop', spec=True, return_value=context.test_loop))
I change my 'mock web response received' test step to do the following:
#when('the communications service receives a response from TestCube Web Service')
def step_impl(context):
loop = context.test_loop
loop.call_soon_threadsafe(context.mock_received_response_subject.on_next, context.text)
The great news is that I'm actually getting the Relay.enabled coroutine to trigger when this step gets executed!
The only problem now is the final test step in which I await the future I got from executing the CLI in its own thread and validate that the CLI is sending this on stdout:
#then('the CLI should print "{output}"')
def step_impl(context, output):
if 'async.cli' in context.tags:
loop = asyncio.get_event_loop() # main loop, not test loop
result = loop.run_until_complete(context.async_result)
else:
result = context.result
assert_that(result.output, equal_to(output))
I've tried playing around with this but I can't seem to get context.async_result (which stores the future from loop.run_in_executor) to transition nicely to done and return the result. With the current implementation, I get an error for the first test (1.1) and indefinite hanging for the second (1.2):
#mock.comms #async.cli #wip
Scenario Outline: Querying relay enable state -- #1.1 # testcube/tests/features/relay.feature:45
When the user queries the enable state of relay 0 # testcube/tests/features/steps/relay.py:17 0.003s
Then the CLI should query the web service about the enable state of relay 0 # testcube/tests/features/steps/relay.py:48 0.000s
When the communications service receives a response from TestCube Web Service # testcube/tests/features/steps/core.py:58 0.000s
"""
{'module':'relays','path':'relays[0].enabled','data':[True]}'
"""
Then the CLI should print "True" # testcube/tests/features/steps/core.py:94 0.003s
Traceback (most recent call last):
File "/Users/davidfallah/testcube_env/lib/python3.5/site-packages/behave/model.py", line 1456, in run
match.run(runner.context)
File "/Users/davidfallah/testcube_env/lib/python3.5/site-packages/behave/model.py", line 1903, in run
self.func(context, *args, **kwargs)
File "testcube/tests/features/steps/core.py", line 99, in step_impl
result = loop.run_until_complete(context.async_result)
File "/usr/local/Cellar/python3/3.5.2_1/Frameworks/Python.framework/Versions/3.5/lib/python3.5/asyncio/base_events.py", line 387, in run_until_complete
return future.result()
File "/usr/local/Cellar/python3/3.5.2_1/Frameworks/Python.framework/Versions/3.5/lib/python3.5/asyncio/futures.py", line 274, in result
raise self._exception
File "/usr/local/Cellar/python3/3.5.2_1/Frameworks/Python.framework/Versions/3.5/lib/python3.5/concurrent/futures/thread.py", line 55, in run
result = self.fn(*self.args, **self.kwargs)
File "/Users/davidfallah/testcube_env/lib/python3.5/site-packages/click/testing.py", line 299, in invoke
output = out.getvalue()
ValueError: I/O operation on closed file.
Captured stdout:
RECEIVED WEB RESPONSE: {'module':'relays','path':'relays[0].enabled','data':[True]}'
<Future pending cb=[_chain_future.<locals>._call_check_cancel() at /usr/local/Cellar/python3/3.5.2_1/Frameworks/Python.framework/Versions/3.5/lib/python3.5/asyncio/futures.py:431]>
#mock.comms #async.cli #wip
Scenario Outline: Querying relay enable state -- #1.2 # testcube/tests/features/relay.feature:46
When the user queries the enable state of relay 1 # testcube/tests/features/steps/relay.py:17 0.005s
Then the CLI should query the web service about the enable state of relay 1 # testcube/tests/features/steps/relay.py:48 0.001s
When the communications service receives a response from TestCube Web Service # testcube/tests/features/steps/core.py:58 0.000s
"""
{'module':'relays','path':'relays[1].enabled','data':[False]}'
"""
RECEIVED WEB RESPONSE: {'module':'relays','path':'relays[1].enabled','data':[False]}'
Then the CLI should print "False" # testcube/tests/features/steps/core.py:94
Chapter 3: Finale
Screw all this asynchronous multi-threaded stuff, I'm too dumb for it.
First off, instead of describing the scenario like this...
When the user queries the enable state of relay <relay_id>
Then the CLI should query the web service about the enable state of relay <relay_id>
When the communications service receives a response from TestCube Web Service:
"""
{"module":"relays","path":"relays[<relay_id>].enabled","data":[<relay_enabled>]}
"""
Then the CLI should print "<relay_enabled>"
We describe it like this:
Given the communications service will respond to requests:
"""
{"module":"relays","path":"relays[<relay_id>].enabled","data":[<relay_enabled>]}
"""
When the user queries the enable state of relay <relay_id>
Then the CLI should query the web service about the enable state of relay <relay_id>
And the CLI should print "<relay_enabled>"
Implement the new given step:
#given('the communications service will respond to requests')
def step_impl(context):
response = context.text
def publish_mock_response(_):
loop = context.test_loop
loop.call_soon_threadsafe(context.mock_received_response_subject.on_next, response)
# Configure the mock comms service to publish a mock response when a request is made.
instance = context.mock_comms.return_value
instance.send_request.on_next.side_effect = publish_mock_response
BOOM
2 features passed, 0 failed, 0 skipped
22 scenarios passed, 0 failed, 0 skipped
58 steps passed, 0 failed, 0 skipped, 0 undefined
Took 0m0.111s
I can see two problems with your code:
asyncio is not thread-safe, unless you use call_soon_threadsafe or run_coroutine_threadsafe. RxPy doesn't use any of those in Observable.to_future, so you have to access RxPy objects in the same thread that runs the asyncio event loop.
RxPy sets the result of the future when on_completed is called, so that awaiting for an observable returns the last object emitted. This means you have to call both on_next and on_completed to get await to return.
Here is a working example:
import click
import asyncio
from rx.subjects import Subject
from click.testing import CliRunner
web_response_subject = Subject()
web_response_observable = web_response_subject.as_observable()
main_loop = asyncio.get_event_loop()
#click.group()
def cli():
pass
#cli.resultcallback()
def result_handler(task, **_):
future = asyncio.run_coroutine_threadsafe(task, main_loop)
print(future.result())
#cli.command()
async def get_web_response():
return await web_response_observable
def test():
runner = CliRunner()
future = main_loop.run_in_executor(
None, runner.invoke, cli, ['get_web_response'])
main_loop.call_later(1, web_response_subject.on_next, 'foo')
main_loop.call_later(2, web_response_subject.on_completed)
result = main_loop.run_until_complete(future)
print(result.output, end='')
if __name__ == '__main__':
test()