How would I implement "tail" over HTTP with Python Tornado? - python

I'm trying to implement something like "tail -f" over HTTP with Python. Currently, I'm trying to use Tornado, but it only is handling one connection at a time, even when I do asynchronous requests.
import socket
import subprocess
import tornado.gen as gen
import tornado.httpserver
import tornado.ioloop
import tornado.iostream
import tornado.options
import tornado.web
from tornado.options import define, options
define("port", default=8888, help="run on the given port", type=int)
define(
"inputfile",
default="test.txt",
help="the path to the file which we will 'tail'",
type=str)
class MainHandler(tornado.web.RequestHandler):
#tornado.web.asynchronous
#gen.engine
def get(self):
print "GOT REQUEST"
inputfile = open(options.inputfile)
p = subprocess.Popen(
"./nettail.py",
stdin=inputfile,
stdout=subprocess.PIPE)
port_number = int(p.stdout.readline().strip())
self.write("<pre>")
self.write("Hello, world\n")
self.flush()
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
stream = tornado.iostream.IOStream(s)
yield gen.Task(stream.connect, ("127.0.0.1", port_number))
while True:
data = yield gen.Task(stream.read_until, "\n")
self.write(data)
self.flush()
def main():
tornado.options.parse_command_line()
application = tornado.web.Application([
(r"/", MainHandler),
])
http_server = tornado.httpserver.HTTPServer(application)
http_server.listen(options.port)
tornado.ioloop.IOLoop.instance().start()
if __name__ == "__main__":
main()
The process I am starting is a simple "tail" which outputs to a socket.
import random
import socket
import sys
import time
#create an INET, STREAMing socket
s = socket.socket(
socket.AF_INET, socket.SOCK_STREAM)
# Open the connection.
try:
for attempt_number in xrange(5):
port_number = random.randint(9000, 65000)
try:
s.bind(("localhost", port_number))
except socket.error:
continue
# We successfully bound!
sys.stdout.write("{0}".format(port_number))
sys.stdout.write("\n")
sys.stdout.flush()
break
#become a server socket
s.listen(5)
# Accept a connection.
try:
(clientsocket, address) = s.accept()
while True:
line = sys.stdin.readline()
if not line:
time.sleep(1)
continue
clientsocket.sendall(line)
finally:
clientsocket.close()
finally:
s.close()
./nettail.py works as I expect, but the Tornado HTTP server is only handling one request at a time.
I would like to use long-running, persistent HTTP connections to do this, as it is compatible with older browsers. I understand that Web Sockets is how it would be done in modern browsers.
Edit:
I'm running this on Linux and Solaris, not Windows. That means I could use tornado.iostream on the file, rather than through a sockets program. Still, that is not a documented feature, so I launch a sockets program for each connection.

After doing some more debugging, it turns out that this tail server was not blocking, after all.
I was trying to test concurrent connections with two windows of Firefox open, but Firefox would not start fetching the second window until the first window was manually stopped. I guess Firefox does not like to have two concurrent HTTP connections to fetch the same resource.
Opening a Firefox window and a Chromium window, I can see the "tail" output pushed to both tabs.
Thank you for all your help. #abarnert's comments were especially helpful.
Edit:
In the to-be-release 2.4.2 version of Tornado, a "Pipe" IOStream is implemented. Using this and regular "tail" simplified the code a lot.
import subprocess
import tornado.httpserver
import tornado.ioloop
import tornado.iostream
import tornado.options
import tornado.web
from tornado.options import define, options
define("port", default=8888, help="run on the given port", type=int)
define(
"inputfile",
default="test.txt",
help="the path to the file which we will 'tail'",
type=str)
class MainHandler(tornado.web.RequestHandler):
#tornado.web.asynchronous
def get(self):
print "GOT REQUEST"
self.p = subprocess.Popen(
["tail", "-f", options.inputfile, "-n+1"],
stdout=subprocess.PIPE)
self.write("<pre>")
self.write("Hello, world\n")
self.flush()
self.stream = tornado.iostream.PipeIOStream(self.p.stdout.fileno())
self.stream.read_until("\n", self.line_from_nettail)
def on_connection_close(self, *args, **kwargs):
"""Clean up the nettail process when the connection is closed.
"""
print "CONNECTION CLOSED!!!!"
self.p.terminate()
tornado.web.RequestHandler.on_connection_close(self, *args, **kwargs)
def line_from_nettail(self, data):
self.write(data)
self.flush()
self.stream.read_until("\n", self.line_from_nettail)
def main():
tornado.options.parse_command_line()
application = tornado.web.Application([
(r"/", MainHandler),
])
http_server = tornado.httpserver.HTTPServer(application)
http_server.listen(options.port)
tornado.ioloop.IOLoop.instance().start()
if __name__ == "__main__":
main()

I created this recently as an experiment. Works for me with multiple connections is it any use?
class TailHandler(BaseHandler):
#asynchronous
def get(self):
self.file = open('data/to_read.txt', 'r')
self.pos = self.file.tell()
def _read_file():
line = self.file.read()
last_pos = self.file.tell()
if not line:
self.file.close()
self.file = open('data/to_read.txt', 'r')
self.file.seek(last_pos)
pass
else:
self.write(line)
self.flush()
IOLoop.instance().add_timeout(time.time() + 1, _read_file)
_read_file()

You shouldn't have blocking calls like this in the handler.
port_number = int(p.stdout.readline().strip())
You'll need to use select or a similar mechanism ti avoid the blocking call
Edit: ok I went and checked the docs. You should use their iostream to read from p

Related

Serve proxy app through sockets Tornado 4.4

I have a little question.
I'm new to the whole socket theory nad the Tornado Framework. Lately I've written a proxy server using the Tornado framework. My app is standing in the middle of a connection client(browser) <===> remote address. So the connection looks like this:
client(my web browser) <=== PROXY(my app) ===> Remote address (stackoverflow.com)
Everything works fine if I use the standard "main" function. But I want to go a little more low - level, I mean I want to create sockets and offer my proxy app through that connection.
My proxy app urls:
# coding: utf-8
"""URL's for proxy app."""
from settings import settings
from tornado.web import (
StaticFileHandler,
url,
)
from handlers import (
mainHandlers,
myProxy,
)
urls = [
url(r"/admin/$", mainHandlers.MainHandler),
url(r"/admin/delete_filter/", mainHandlers.DataDeleteHandler),
url(r"/admin/filters/$", mainHandlers.DataGetter),
url(r"/admin/new_filter/$", mainHandlers.FormHandler),
url(r"/admin/stats/$", mainHandlers.StatsTableHandler),
url(r"/admin/stats/query/$", mainHandlers.AjaxStatsGetHandler),
url(r"/static/", StaticFileHandler, dict(path=settings['static_path'])),
url(r'.*', myProxy.ProxyHandler),
]
My ProxyHandler:
class ProxyHandler(tornado.web.RequestHandler):
SUPPORTED_METHODS = ['GET', 'POST']
def data_received(self, chunk):
pass
def compute_etag(self):
return None # disable tornado Etag
def handle_response(self, response):
if response.error and not isinstance(response.error, tornado.httpclient.HTTPError):
self.set_status(500)
self.write('Internal server error:\n' + str(response.error))
else:
self.set_status(response.code, response.reason)
self._headers = tornado.httputil.HTTPHeaders() # clear tornado default header
for header, v in response.headers.get_all():
if header not in ('Content-Length', 'Transfer-Encoding', 'Content-Encoding', 'Connection'):
self.add_header(header, v) # some header appear multiple times, eg 'Set-Cookie'
secured_page = False
for page in secure_pages:
if page in self.request.uri:
secured_page = True
self.set_header('Content-Length', len(response.body))
self.write(response.body)
break
if response.body and not secured_page:
c.execute('SELECT filter_name FROM filters WHERE filter_type=1')
tags = c.fetchall()
soup = BeautifulSoup(response.body, 'html.parser')
for row in tags:
catched_tags = soup.find_all(str(row[0]))
if catched_tags:
print 'catched: %s of <%s> tags' % (len(catched_tags), str(row[0]))
for tag in catched_tags:
tag.extract()
new_body = str(soup)
self.set_header('Content-Length', len(new_body))
self.write(new_body)
self.finish()
#tornado.web.asynchronous
def get(self):
logger.debug('Handle %s request to %s', self.request.method, self.request.uri)
body = self.request.body
if not body:
body = None
try:
if 'Proxy-Connection' in self.request.headers:
del self.request.headers['Proxy-Connection']
c.execute('SELECT filter_name FROM filters WHERE filter_type=2')
urls = c.fetchall()
for url in urls:
if url[0] in self.request.path:
self.set_status(403)
self.finish()
return
fetch_request(self.request.uri, self.handle_response,
method=self.request.method, body=body, headers=self.request.headers, follow_redirects=False,
allow_nonstandard_methods=True)
except tornado.httpclient.HTTPError as e:
if hasattr(e, 'response') and e.response:
self.handle_response(e.response)
else:
self.set_status(500)
self.write('Internal server error:\n' + str(e))
self.finish()
#tornado.web.asynchronous
def post(self):
return self.get()
The easy main function:
# coding: utf-8
import sys
import tornado.web
from tornado.options import options
from configuration.application import MyApplication
from proxy.urls import proxy_urls
def make_app():
"""Create my application with my settings and urls."""
return MyApplication(proxy_urls)
if __name__ == "__main__":
u"""Main loop."""
app = make_app()
port = options.port
if len(sys.argv) > 1:
port = int(sys.argv[1])
app.listen(port)
print 'tornado working on port %s' % port
tornado.ioloop.IOLoop.current().start()
So I want to change the easy way to the low-level way based on the docs:
import errno
import functools
import tornado.ioloop
import socket
def connection_ready(sock, fd, events):
while True:
try:
connection, address = sock.accept()
except socket.error as e:
if e.args[0] not in (errno.EWOULDBLOCK, errno.EAGAIN):
raise
return
connection.setblocking(0)
handle_connection(connection, address)
if __name__ == '__main__':
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.setblocking(0)
sock.bind(("", port))
sock.listen(128)
io_loop = tornado.ioloop.IOLoop.current()
callback = functools.partial(connection_ready, sock)
io_loop.add_handler(sock.fileno(), callback, io_loop.READ)
io_loop.start()
To accomplish this I read a little about the whole network programming using sockets (https://www.tutorialspoint.com/python/python_networking.htm).
The example in the tutorial works well so I tried to connect the tutorial with the example in the Tornado docs:
# coding: utf-8
import errno
import functools
import socket
import sys
import tornado.httpserver
import tornado.ioloop
import tornado.netutil
import tornado.process
import tornado.web
from tornado.options import options
from configuration.application import MyApplication
def make_app():
u"""Create my application with my settings and urls."""
return MyApplication()
def connection_ready(sock, fd, events):
u"""Function to handle an incoming connection."""
proxy_app = make_app()
server = tornado.httpserver.HTTPServer(proxy_app)
while True:
try:
connection, address = sock.accept()
except socket.error as e:
if e.args[0] not in (errno.EWOULDBLOCK, errno.EAGAIN):
raise
return
print 'Got connection from', address
# connection.setblocking(False)
connection.send(server)
connection.close()
if __name__ == "__main__":
u"""Main loop."""
port = options.port
if len(sys.argv) > 1:
port = int(sys.argv[1])
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.setblocking(False)
sock.bind(('', port))
sock.listen(5)
io_loop = tornado.ioloop.IOLoop.current()
callback = functools.partial(connection_ready, sock)
io_loop.add_handler(sock.fileno(), callback, io_loop.READ)
print 'Tornado Proxy working on port: %s' % port
io_loop.start()
But when I try to connect to my proxy (for example to add filters: http://127.0.0.1:8000/admin/filters/ - I have an handler written to handle this url)
I get specyfic errors:
ERROR:tornado.application:Exception in callback (3, )
Traceback (most recent call last):
File "/home/dave/.virtualenvs/teleV1/local/lib/python2.7/site-packages/tornado/ioloop.py", line 887, in start
handler_func(fd_obj, events)
File "/home/dave/.virtualenvs/teleV1/local/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
return fn(*args, **kwargs)
File "manage.py", line 35, in connection_ready
connection.send(server)
TypeError: send() argument 1 must be convertible to a buffer, not HTTPServer
I understand that I cannot send a HTTPServer through the connection (from one end to the other), it must be a buffer.
My first idea was to send the buffor from the handler that handles a URL (for example class ProxyHandler(tornado.web.RequestHandler)),
but how can I handle diffrent urls and diffrent handlers?
After a few approaches I changed my Proxy app written as a Tornado app to a pure Python code, that handles responses from remote addresses and does the filter stuff. I think this is the best and fastest thing that I could do.

how to bind multiple port with multiprocessing when using tornado

My python version is 3.4, my tornado version is 4.3.
I have 2 servers, and they have to share some datas during runtime, my code is like this,
from tornado.web import gen, asynchronous, RequestHandler, Application
from tornado.httpserver import HTTPServer
from tornado.ioloop import IOLoop
class HelloHandler(RequestHandler):
#asynchronous
#gen.engine
def get(self):
self.write('hello')
self.finish()
class MainHandler(RequestHandler):
#asynchronous
#gen.engine
def get(self):
self.write('main')
self.finish()
helloApp = Application([
(r'/hello', HelloHandler),
])
mainApp = Application([
(r'/main', MainHandler),
])
if __name__ == "__main__":
hello_server = HTTPServer(helloApp)
hello_server.bind(8881)
hello_server.start()
# hello_server.start(0)
main_server = HTTPServer(mainApp)
main_server.bind(8882)
main_server.start()
# main_server.start(0)
IOLoop.current().start()
It works, but when I tried to support multiple processes by using server.start(0), I recived an error: 'OSError: [Errno 98] Address already in use', I used different ports already (8881, 8882). How does this happen?
And how to fix it?
start(n) only works with a single server. To use more than one, you must use bind_sockets, fork_processes, and add_sockets separately (example adapted from http://www.tornadoweb.org/en/stable/tcpserver.html):
from tornado.netutil import bind_sockets
hello_sockets = bind_sockets(8881)
main_sockets = bind_sockets(8882)
tornado.process.fork_processes(0)
hello_server = HTTPServer(helloApp)
hello_server.add_sockets(hello_sockets)
main_server = HTTPServer(mainApp)
main_server.add_sockets(main_sockets)
IOLoop.current().start()

echo http server tornado

How can I create an HTTP echo server from Tornado?
#!/usr/bin/env python
import signal
from tornado.ioloop import IOLoop
from tornado.tcpserver import TCPServer
import tornado.web
def handle_signal(sig, frame):
IOLoop.instance().add_callback(IOLoop.instance().stop)
class EchoServer(TCPServer):
def handle_stream(self, stream, address):
self._stream = stream
self._read_line()
def _read_line(self):
self._stream.read_until('\n' ,self._handle_read)
def _handle_read(self, data):
self._stream.write(data, '\n')
self._read_line()
if __name__ == '__main__':
signal.signal(signal.SIGINT, handle_signal)
signal.signal(signal.SIGTERM, handle_signal)
server = EchoServer()
server.bind(8001)
server.start(25)
IOLoop.instance().start()
IOLoop.instance().close()
How do I make of this http echo server
what's wrong? not much i am newbie
Thanks!
Your question would be more clear if you explained what happened when you run this code, and what you expected instead.
One TCPServer object may handle many connections, so instead of assigning to self.stream in handle_stream, you should make a new object to handle this stream.
The second argument to stream.write is a callback; it looks like you meant self._stream.write(data + '\n').

Twisted SSE server subscribed to Redis via pubsub

I'm trying to build a server in Twisted which would let clients connect using Server Sent Events. I would like this server also to listen to Redis and if a message comes then push it to the connected SSE clients.
I have the SSE server working. I know how to subscribe to Redis. I can't figure out how to have both pieces running without blocking each other.
I'm aware of https://github.com/leporo/tornado-redis and https://github.com/fiorix/txredisapi, which were recommended in related questions. No idea how this helps :/
How to solve this? Could you help with both: conceptual tips and code snippets?
My Twisted SSE server code:
# coding: utf-8
from twisted.web import server, resource
from twisted.internet import reactor
class Subscribe(resource.Resource):
isLeaf = True
sse_conns = set()
def render_GET(self, request):
request.setHeader('Content-Type', 'text/event-stream; charset=utf-8')
request.write("")
self.add_conn(request)
return server.NOT_DONE_YET
def add_conn(self, conn):
self.sse_conns.add(conn)
finished = conn.notifyFinish()
finished.addBoth(self.rm_conn)
def rm_conn(self, conn):
self.sse_conns.remove(conn)
def broadcast(self, event):
for conn in self.sse_conns:
event_line = "data: {}'\r\n'".format(event)
conn.write(event_line + '\r\n')
if __name__ == "__main__":
sub = Subscribe()
reactor.listenTCP(9000, server.Site(sub))
reactor.run()
My Redis subscribe code:
import redis
redis = redis.StrictRedis.from_url('redis://localhost:6379')
class RedisSub(object):
def __init__(self):
self.pubsub = redis.pubsub()
self.pubsub.subscribe('foobar-channel')
def listen(self):
for item in self.pubsub.listen():
print str(item)
This is what works for me.
I've ended up using txredis lib with a slight change to the RedisClient (added minimal subscribe capabilities).
# coding: utf-8
import os
import sys
import weakref
from txredis.client import RedisClient
from twisted.web import server, resource
from twisted.internet import reactor, protocol, defer
from twisted.python import log
from utils import cors, redis_conf_from_url
log.startLogging(sys.stdout)
PORT = int(os.environ.get('PORT', 9000))
REDIS_CONF = redis_conf_from_url(os.environ.get('REDISCLOUD_URL', 'redis://localhost:6379'))
REDIS_SUB_CHANNEL = 'votes'
class RedisBroadcaster(RedisClient):
def subscribe(self, *channels):
self._send('SUBSCRIBE', *channels)
def handleCompleteMultiBulkData(self, reply):
if reply[0] == u"message":
message = reply[1:][1]
self.sse_connector.broadcast(message)
else:
super(RedisClient, self).handleCompleteMultiBulkData(reply)
#defer.inlineCallbacks
def redis_sub():
clientCreator = protocol.ClientCreator(reactor, RedisBroadcaster, password=REDIS_CONF.get('password'))
redis = yield clientCreator.connectTCP(REDIS_CONF['host'], REDIS_CONF['port'])
redis.subscribe(REDIS_SUB_CHANNEL)
class Subscribe(resource.Resource):
isLeaf = True
sse_conns = weakref.WeakSet()
#cors
def render_GET(self, request):
request.setHeader('Content-Type', 'text/event-stream; charset=utf-8')
request.write("")
self.sse_conns.add(request)
return server.NOT_DONE_YET
def broadcast(self, event):
for conn in self.sse_conns:
event_line = "data: {}\r\n".format(event)
conn.write(event_line + '\r\n')
if __name__ == "__main__":
sub = Subscribe()
reactor.listenTCP(PORT, server.Site(sub))
RedisBroadcaster.sse_connector = sub
reactor.callLater(0, redis_sub)
reactor.run()

tornado.websocket and [Errno 24] Too many open files

I have got and error ([Errno 24] Too many open files) while testing tornado.websocket on a local machine.
server.py
import tornado.ioloop
import tornado.web
import tornado.websocket
import tornado.options
class ChatSocketHandler(tornado.websocket.WebSocketHandler):
waiters = set()
def open(self):
ChatSocketHandler.waiters.add(self)
print "Clients: ", len(ChatSocketHandler.waiters)
def on_close(self):
ChatSocketHandler.waiters.remove(self)
#classmethod
def send_updates(cls, chat):
for waiter in cls.waiters:
try:
waiter.write_message(chat)
except:
logging.error("Error sending message", exc_info=True)
def on_message(self, message):
ChatSocketHandler.send_updates(message)
app = tornado.web.Application([
(r"/ws", ChatSocketHandler)
])
def main():
tornado.options.parse_command_line()
app.listen(8888)
tornado.ioloop.IOLoop.instance().start()
if __name__ == "__main__":
main()
clients.py (using websocket-client)
from multiprocessing import Pool, Process
from websocket import create_connection
def go():
ws = create_connection("ws://127.0.0.1:8888/ws")
while True:
try:
ws.send("Message ...")
result = ws.recv()
print "Received '%s'" % result
except KeyboardInterrupt:
break
ws.close()
for i in range(1000):
Process(target=go).start()
The server dies after ~800 connections ;/
Additional question: is it ok to set up a Nginx proxy to a tornado server instance? Do I get some benefits?
Your process likely runs out of file descriptors. Here is a recipe for network tuning on Linux including how to increase max. FDs. (This is for Crossbar.io, but will work for Tornado also).
As to your question "does it make sense to put Nginx in front of Tornado": yes, definitely. Tornado's native TLS support is limited. Have a look at Hynek Schlawack: The Sorry State of SSL - PyCon 2014
Note: the latter does not apply to Twisted (or Crossbar.io, which is based on Twisted) - since Twisted uses pyOpenSSL and can be made to have high-quality TLS. So there is no need for Nginx with these (at least not for TLS reasons).

Categories

Resources