python Tronado not supporting streaming huge data - python

I have requirement to stream huge oracle record set in Python rest API. I am running flask on tornado server. when I use tornado streaming dosent work, whereas on flask native server(werkzeung) it works perfectly. can anyone help me tornado support streaming or not?
Here is a small sample of code just trying to stream by using yield.
import tornado.web
from tornado import gen, httpclient
import asyncio, json, time
class basicReuqestHandler(tornado.web.RequestHandler):
def get(self):
self.write("Helow World!")
class staticReuqestHandler(tornado.web.RequestHandler):
def get(self):
self.render("index.html")
class StreamingHandler(tornado.web.RequestHandler):
#gen.coroutine
def get(self):
self.write("starting ....")
def stream():
a = 1
for i in range(100):
a = a+i
print(i)
print(json.dumps(i))
yield json.dumps(i)
self.write(stream())
self.write("closing...")
self.finish()
if __name__=='__main__':
app = tornado.web.Application([
(r"/", basicReuqestHandler),
(r"/myPage",staticReuqestHandler ),
(r"/StreamTest", StreamingHandler),
])
app.listen(7000)
tornado.ioloop.IOLoop.current().start()

I got my mistake, so answering to my question here to help anyone having similar issue.
here is the code:
import tornado.web
from tornado import gen, httpclient
import asyncio, json, time
class basicReuqestHandler(tornado.web.RequestHandler):
def get(self):
self.write("Helow World!")
class staticReuqestHandler(tornado.web.RequestHandler):
def get(self):
self.render("index.html")
class StreamingHandler(tornado.web.RequestHandler):
#gen.coroutine
def get(self):
self.write("starting ....")
def stream():
for i in range(100):
print(i)
print(json.dumps(i))
self.write(json.dumps(i))
yield self.flush()
self.write("closing...")
self.finish()
if __name__=='__main__':
app = tornado.web.Application([
(r"/", basicReuqestHandler),
(r"/myPage",staticReuqestHandler ),
(r"/StreamTest", StreamingHandler),
])
app.listen(7000)
tornado.ioloop.IOLoop.current().start()

Related

Parametrizing Tornado RequestHandler

Let's say I have a very simple web app in python Tornado framework with a single endpoint. All I'm interested in is returning a value calculated before starting the server. Slightly modified example from https://www.tornadoweb.org/en/stable/index.html will do just fine.
handler.py
import tornado.web
class MainHandler(tornado.web.RequestHandler):
def get(self):
self.write('I want to return var `expensive_value`')
main.py
import tornado.ioloop
import tornado.web
def make_app():
return tornado.web.Application([
(r"/", MainHandler),
])
if __name__ == "__main__":
# calculate some var here before starting the server
expensive_value = 'value from long_calculation()'
app = make_app()
app.listen(8888)
tornado.ioloop.IOLoop.current().start()
When running python main.py and sending a request to the endpoint it returns only a string of course. But I'd like to return the actual value of expensive_value. Currently I'm aware of two solutions to the problem.
1. Using global variable in handler
handler.py
import tornado.web
global_variable = None
def setter(val):
global global_variable
global_variable = val
class MainHandler(tornado.web.RequestHandler):
def get(self):
self.write(global_variable)
main.py
import tornado.ioloop
import tornado.web
from handler import MainHandler, setter
def make_app():
return tornado.web.Application([
(r"/", MainHandler),
])
if __name__ == "__main__":
expensive_value = 'value from long_calculation()'
setter(expensive_value)
app = make_app()
app.listen(8888)
tornado.ioloop.IOLoop.current().start()
Having a global var and setting its value from some other module sounds like an antipattern to me.
2. Using initialize method in handler
handler.py
import tornado.web
class MainHandler(tornado.web.RequestHandler):
def initialize(self, expensive_value):
self.expensive_value = expensive_value
def get(self):
self.write(self.expensive_value)
main.py
import tornado.ioloop
import tornado.web
from handler import MainHandler
def make_app(parameter):
return tornado.web.Application([
(r"/", MainHandler, {'expensive_value': parameter}),
])
if __name__ == "__main__":
expensive_value = 'value from long_calculation()'
app = make_app(expensive_value)
app.listen(8888)
tornado.ioloop.IOLoop.current().start()
This solution is better. But initialize method is called for every request. I realize the overhead for that would be rather small but I think it might be misleading for potential reader of the code since expensive_value never changes.
Summary
Both of these solution work. But I don't like any of them and it seems like I'm missing some Tornado functionality. What would be a pythonic way to solve this?
For example I believe Flask has app.config dictionary that is accessible in handlers and it seems to be a nice solution to this as expensive_value is indeed a configuration to the app. But I'm not aware of anything similar in Tornado.
Handlers have access to self.application.settings which is a dictionary containing additional arguments passed to the Application constructor.
So you can pass expensive_value directly to the Application class like this:
def make_app(parameter):
return tornado.web.Application(
[
(r"/", MainHandler),
],
expensive_value=parameter
)
And access this value in any handler like this:
def initialize(self):
self.expensive_value = self.application.settings.get('expensive_value')

AssertionError "assert not IOLoop.initialized()" on testing Tornado app with AsyncIOMainLoop

I have some trouble with writing tests with AsyncHTTPTestCase for existing Tornado application that uses asyncio event loop.
Here I prepare short model where I can reproduce issue:
app.py
from tornado.platform.asyncio import AsyncIOMainLoop
import asyncio
import tornado.web
class MainHandler(tornado.web.RequestHandler):
async def get(self, *args, **kwargs):
self.write("200 OK")
async def post(self, *args, **kwargs):
self.write("201 OK")
def make_app():
AsyncIOMainLoop().install() # here is how to asyncio loop installed in app I already have
return tornado.web.Application([
(r"/", MainHandler),
], debug=True)
def start_app():
app = make_app()
app.listen(8888)
loop = asyncio.get_event_loop()
loop.set_debug(True)
loop.run_forever()
start.py
#!/usr/bin/env python3
import app
if __name__ == "__main__":
app.start_app()
test_app.py
import json
from tornado.testing import AsyncHTTPTestCase
import app
class TestHelloApp(AsyncHTTPTestCase):
def get_app(self):
return app.make_app()
def test_get(self):
response = self.fetch('/')
self.assertEqual(response.code, 200)
self.assertEqual(response.body.decode(), '200 OK')
def test_post(self):
response = self.fetch('/', method="POST",
body=json.dumps({"key": "value"}))
self.assertEqual(response.code, 200)
self.assertEqual(response.body.decode(), '201 OK')
With that approach of installation asyncio loop application works fine (I mean I can do requests and I'm getting responses), but test like this failed with error:
======================================================================
FAIL: test_post (test_app.TestHelloApp)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/home/biceps/work/torn/.venv/lib/python3.6/site-packages/tornado/testing.py", line 380, in setUp
self._app = self.get_app()
File "/home/biceps/work/torn/test_app.py", line 8, in get_app
return app.make_app()
File "/home/biceps/work/torn/app.py", line 14, in make_app
tornado.platform.asyncio.AsyncIOMainLoop().install()
File "/home/biceps/work/torn/.venv/lib/python3.6/site-packages/tornado/ioloop.py", line 181, in install
assert not IOLoop.initialized()
AssertionError
----------------------------------------------------------------------
Ran 2 tests in 0.006s
FAILED (failures=1)
Seems like loop that was installed by AsyncIOMainLoop().install() command is not stopped between tests, first test passed OK, but second always failed.
When I moved AsyncIOMainLoop().install() to start_app() method - tests are passed OK, but I'm worrying about that during test I use one event loop, but in real running app I use asyncio loop.
So, against that code tests are passed OK:
from tornado.platform.asyncio import AsyncIOMainLoop
import asyncio
import tornado.web
class MainHandler(tornado.web.RequestHandler):
async def get(self, *args, **kwargs):
self.write("200 OK")
async def post(self, *args, **kwargs):
self.write("201 OK")
def make_app():
return tornado.web.Application([
(r"/", MainHandler),
], debug=True)
def start_app():
AsyncIOMainLoop().install()
app = make_app()
app.listen(8888)
loop = asyncio.get_event_loop()
loop.set_debug(True)
loop.run_forever()
Q: My question is - how to do write tests in that usecase correctly ? How to write tests with AsyncHTTPTestCase when Tornado app uses AsyncIOMainLoop ?
Am I right with decision to make AsyncIOMainLoop().install() into start_app(), not in make_app() function ?
P.S. I've added self.io_loop.clear_instance() to tearDown() - it looks probably dirty but that works for case when AsyncIOMainLoop().install() called from make_app() code.
def tearDown(self):
self.io_loop.clear_instance()
super().tearDown()
According to documentation I need to install AsyncIOMainLoop before startup application, not when I'm making app.
documentation
from tornado.platform.asyncio import AsyncIOMainLoop
import asyncio
AsyncIOMainLoop().install()
asyncio.get_event_loop().run_forever()
So now I'm sure that proper way is using AsyncIOMainLoop installation into start_app() code.
So now my pattern code looks like:
web1.py
class MainHandler(tornado.web.RequestHandler):
async def get(self, *args, **kwargs):
await asyncio.sleep(1)
return self.write("OK")
async def post(self, *args, **kwargs):
await asyncio.sleep(1)
return self.write("OK")
def make_app():
return tornado.web.Application([(r"/", MainHandler),],
debug=False)
def start_app():
from tornado.platform.asyncio import AsyncIOMainLoop
import asyncio
AsyncIOMainLoop().install()
app = make_app()
app.listen(8888)
asyncio.get_event_loop().run_forever()
if __name__ == "__main__":
start_app()
test_app.py
from tornado.testing import AsyncHTTPTestCase
import web1
class TestTornadoAppBase(AsyncHTTPTestCase):
def get_app(self):
return web1.make_app()
def get_new_ioloop(self):
"""
Needed to make sure that I can also run asyncio based callbacks in my tests
"""
io_loop = tornado.platform.asyncio.AsyncIOLoop()
asyncio.set_event_loop(io_loop.asyncio_loop)
return io_loop
class TestGET(TestTornadoAppBase):
def test_root_get_method(self):
response = self.fetch("/")
self.assertEqual(response.code, 200)
self.assertEqual(response.body.decode(), 'OK')
def test_root_post_method(self):
response = self.fetch("/", method="POST", body="{}")
self.assertEqual(response.code, 200)
self.assertEqual(response.body.decode(), 'OK')
This pattern works as well, and during tests AsyncIOMainLoop is used. So I can use libraries those use asyncio loop. In my example there is asyncio.sleep() for example.

Tornado yield [ <list of futures> ] performs only 10 requests simultaneously

I want to perform 100 requests at the same time by easy way with yield [ list_of_futures ].
But this method performs only 10 requests at the time!
I prepared a short example which demostrates it, just run and you'll see requests which are performed by portions of 10 requests at the time.
Tested with debian stretch and ubuntu 16.04 with the same results.
Python 3.6.1,
tornado==4.5.1
from datetime import datetime
import tornado.ioloop
import tornado.gen
import tornado.web
from tornado.httpclient import AsyncHTTPClient
# the same for tornado and curl clients
# AsyncHTTPClient.configure('tornado.curl_httpclient.CurlAsyncHTTPClient')
http_client = AsyncHTTPClient()
class MainHandler(tornado.web.RequestHandler):
#tornado.gen.coroutine
def get(self, **kwargs):
yield self.write('<html><pre>')
yield tornado.gen.sleep(5)
yield self.finish('long page test</pre></html>')
def make_app():
return tornado.web.Application([
tornado.web.url('^/test', MainHandler),
])
#tornado.gen.coroutine
def long_request(n):
print('long_request {n} start'.format(n=n))
response = yield http_client.fetch('http://127.0.0.1:8000/test')
yield tornado.gen.sleep(5)
print('{date} long_request {n} finish, size {size}'.format(
date=datetime.now(), n=n, size=len(response.body))
)
#tornado.gen.coroutine
def requests_handler():
print('Requests handler started')
yield [long_request(n) for n in range(100)]
print('Requests handler finished')
app = make_app()
app.listen(8000, '127.0.0.1')
tornado.ioloop.IOLoop.current().add_callback(callback=requests_handler)
tornado.ioloop.IOLoop.current().start()
Oops, just found what it was.
Each client can perform only max_clients requests.
AsyncHTTPClient.configure(
'tornado.curl_httpclient.CurlAsyncHTTPClient',
max_clients=100
)
AsyncHTTPClient.configure(
'tornado.simple_httpclient.SimpleAsyncHTTPClient',
max_clients=100
)
But I suppose it's not an obvious behaviour, so I suppose it should be remained here for next googlers.

How to send immediate GET reponse in tornado?

I have the following code to send result to browser based on the api call.
import tornado.ioloop
import tornado.web
from tornado import gen
from datetime import date
class GetGameByIdHandler(tornado.web.RequestHandler):
#gen.coroutine
def get(self, id):
response = { 'id': int(id),
'name': 'Crazy Game',
'release_date': date.today().isoformat() }
self.set_header('Content-Type', 'text/json')
self.write(response)
for i in range(10000000):
for j in range(10):
pass
print i
application = tornado.web.Application([
(r"/getgamebyid/([0-9]+)", GetGameByIdHandler),
], debug = True)
if __name__ == "__main__":
application.listen(8888)
tornado.ioloop.IOLoop.instance().start()
I want that the api should return result as soon as self.write is encountered. The for loop should be run after that. How can I get this done? Basically, I don't want to return the result immediately.
NOTE: The loop here has no real purpose except to demonstrate the sending of result is delayed just because of this extra thing in the get function.
A less abstract example:
import tornado.ioloop
import tornado.web
from tornado import gen
from datetime import date
class GetGameByIdHandler(tornado.web.RequestHandler):
#gen.coroutine
def get(self, id):
result_dict = GetResultsFromDB(id)
response = result_dict
self.set_header('Content-Type', 'text/json')
self.write(response)
# Basically i want to doSomething basedon results
# Generated from DB
for key in result_dict:
if result_dict[key] == None:
DoSomething()
application = tornado.web.Application([
(r"/getgamebyid/([0-9]+)", GetGameByIdHandler),
], debug = True)
if __name__ == "__main__":
application.listen(8888)
tornado.ioloop.IOLoop.instance().start()
if you need to run some code after writing all data to a socket, you can use tornado.web.RequestHandler.flush:
self.write(response)
self.flush(callback=lambda: DoSomethingWrapper(response))

Object Oriented Python with Flask Server?

I'm using Flask to expose some data-crunching code as a web service.
I'd like to have some class variables that my Flask functions can access.
Let me walk you through where I'm stuck:
from flask import Flask
app = Flask(__name__)
class MyServer:
def __init__(self):
globalData = json.load(filename)
#app.route('/getSomeData')
def getSomeData():
return random.choice(globalData) #select some random data to return
if __name__ == "__main__":
app.run(host='0.0.0.0')
When I run getSomeData() outside of Flask, it works fine. But, when I run this with Flask, I get 500 internal server error. There's no magic here, and Flask has no idea that it's supposed to initialize a MyServer object. How can I feed an instance of MyServer to the app.run() command?
I could admit defeat and put globalData into a database instead. But, is there an other way?
You can create an instance of MyServer just outside the scope of your endpoints and access its attributes. This worked for me:
class MyServer:
def __init__(self):
self.globalData = "hello"
from flask import Flask
app = Flask(__name__)
my_server = MyServer()
#app.route("/getSomeData")
def getSomeData():
return my_server.globalData
if __name__ == "__main__":
app.run(host="0.0.0.0")
I know this is a late reply, but I came across this question while facing a similar issue. I found flask-classful really good.
You inherit your class from FlaskView and register the Flask app with your MyServer class
http://flask-classful.teracy.org/#
In this case, with flask-classful, your code would look like this:
from flask import Flask
from flask_classful import FlaskView, route
app = Flask(__name__)
class MyServer(FlaskView):
def __init__(self):
globalData = json.load(filename)
#route('/getSomeData')
def getSomeData():
return random.choice(globalData) #select some random data to return
MyServer.register(app, base_route="/")
if __name__ == "__main__":
app.run(host='0.0.0.0')
The least-coupled solution is to apply the routes at runtime (instead of at load time):
def init_app(flask_app, database_interface, filesystem_interface):
server = MyServer(database_interface, filesystem_interface)
flask_app.route('get_data', methods=['GET'])(server.get_data)
This is very testable--just invoke init_app() in your test code with the mocked/faked dependencies (database_interface and filesystem_interface) and a flask app that has been configured for testing (app.config["TESTING"]=True or something like that) and you're all-set to write tests that cover your entire application (including the flask routing).
The only downside is this isn't very "Flasky" (or so I've been told); the Flask idiom is to use #app.route(), which is applied at load time and is necessarily tightly coupled because dependencies are hard-coded into the implementation instead of injected into some constructor or factory method (and thus complicated to test).
The following code is a simple solution for OOP with Flask:
from flask import Flask, request
class Server:
def __init__(self, name):
self.app = Flask(name)
#self.app.route('/')
def __index():
return self.index()
#self.app.route('/hello')
def __hello():
return self.hello()
#self.app.route('/user_agent')
def __user_agent():
return self.user_agent()
#self.app.route('/factorial/<n>', methods=['GET'])
def __factorial(n):
return self.factorial(n)
def index(self):
return 'Index Page'
def hello(self):
return 'Hello, World'
def user_agent(self):
return request.headers.get('User-Agent')
def factorial(self, n):
n = int(n)
fact = 1
for num in range(2, n + 1):
fact = fact * num
return str(fact)
def run(self, host, port):
self.app.run(host=host, port=port)
def main():
server = Server(__name__)
server.run(host='0.0.0.0', port=5000)
if __name__ == '__main__':
main()
To test the code, browse the following urls:
http://localhost:5000/
http://localhost:5000/hello
http://localhost:5000/user_agent
http://localhost:5000/factorial/10
a bit late but heres a quick implementation that i use to register routes at init time
from flask import Flask,request,render_template
from functools import partial
registered_routes = {}
def register_route(route=None):
#simple decorator for class based views
def inner(fn):
registered_routes[route] = fn
return fn
return inner
class MyServer(Flask):
def __init__(self,*args,**kwargs):
if not args:
kwargs.setdefault('import_name',__name__)
Flask.__init__(self,*args ,**kwargs)
# register the routes from the decorator
for route,fn in registered_routes.items():
partial_fn = partial(fn,self)
partial_fn.__name__ = fn.__name__
self.route(route)(partial_fn)
#register_route("/")
def index(self):
return render_template("my_template.html")
if __name__ == "__main__":
MyServer(template_folder=os.path.dirname(__file__)).run(debug=True)
if you wish to approach MyServer class as a resource
I believe that flask_restful can help you:
from flask import Flask
from flask_restful import Resource, Api
import json
import numpy as np
app = Flask(__name__)
api = Api(app)
class MyServer(Resource):
def __init__(self):
self.globalData = json.load(filename)
def get(self):
return np.random.choice(self.globalData)
api.add_resource(MyServer, '/')
if __name__ == '__main__':
app.run()

Categories

Resources