I went down a journey of "How much performance can I squeeze out of a Python web-server?" This lead me to AIOHTTP and uvloop. Still, I could see that AIOHTTP wasn't using my CPU to its full potential. I set out to use multiprocessing with AIOHTTP. I learned that there's a Linux kernel feature that allows multiple processes to share the same TCP port. This lead me to develop the following code (Which works wonderfully):
import asyncio
import os
import socket
import time
from aiohttp import web
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import cpu_count
CPU_COUNT = cpu_count()
print("CPU Count:", CPU_COUNT)
def mk_socket(host="127.0.0.1", port=8000, reuseport=False):
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
if reuseport:
SO_REUSEPORT = 15
sock.setsockopt(socket.SOL_SOCKET, SO_REUSEPORT, 1)
sock.bind((host, port))
return sock
async def handle(request):
name = request.match_info.get('name', "Anonymous")
pid = os.getpid()
text = "{:.2f}: Hello {}! Process {} is treating you\n".format(
time.time(), name, pid)
#time.sleep(5) # intentionally blocking sleep to simulate CPU load
return web.Response(text=text)
def start_server():
host = "127.0.0.1"
port=8000
reuseport = True
app = web.Application()
sock = mk_socket(host, port, reuseport=reuseport)
app.add_routes([web.get('/', handle),
web.get('/{name}', handle)])
loop = asyncio.get_event_loop()
coro = loop.create_server(
protocol_factory=app.make_handler(),
sock=sock,
)
srv = loop.run_until_complete(coro)
loop.run_forever()
if __name__ == '__main__':
with ProcessPoolExecutor() as executor:
for i in range(0, CPU_COUNT):
executor.submit(start_server)
wrk benchmark of my site before applying this code:
Running 30s test # http://127.0.0.1:8000/
12 threads and 400 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 54.33ms 6.54ms 273.24ms 89.95%
Req/Sec 608.68 115.97 2.27k 83.63%
218325 requests in 30.10s, 41.23MB read
Non-2xx or 3xx responses: 218325
Requests/sec: 7254.17
Transfer/sec: 1.37MB
wrk benchmark after:
Running 30s test # http://127.0.0.1:8000/
12 threads and 400 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 15.96ms 7.27ms 97.29ms 84.78%
Req/Sec 2.11k 208.30 4.45k 75.50%
759290 requests in 30.08s, 153.51MB read
Requests/sec: 25242.39
Transfer/sec: 5.10MB
WoW! But there's a problem:
DeprecationWarning: Application.make_handler(...) is deprecated, use AppRunner API instead
protocol_factory=app.make_handler()
So I tried this:
import asyncio
import os
import socket
import time
from aiohttp import web
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import cpu_count
CPU_COUNT = cpu_count()
print("CPU Count:", CPU_COUNT)
def mk_socket(host="127.0.0.1", port=8000, reuseport=False):
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
if reuseport:
SO_REUSEPORT = 15
sock.setsockopt(socket.SOL_SOCKET, SO_REUSEPORT, 1)
sock.bind((host, port))
return sock
async def handle(request):
name = request.match_info.get('name', "Anonymous")
pid = os.getpid()
text = "{:.2f}: Hello {}! Process {} is treating you\n".format(
time.time(), name, pid)
#time.sleep(5) # intentionally blocking sleep to simulate CPU load
return web.Response(text=text)
async def start_server():
host = "127.0.0.1"
port=8000
reuseport = True
app = web.Application()
sock = mk_socket(host, port, reuseport=reuseport)
app.add_routes([web.get('/', handle),
web.get('/{name}', handle)])
coro = loop.create_server(
protocol_factory=app.make_handler(),
sock=sock,
)
runner = web.AppRunner(app)
await runner.setup()
srv = web.TCPSite(runner, 'localhost', 8000)
await srv.start()
print('Server started at http://127.0.0.1:8000')
return coro, app, runner
async def finalize(srv, app, runner):
sock = srv.sockets[0]
app.loop.remove_reader(sock.fileno())
sock.close()
#await handler.finish_connections(1.0)
await runner.cleanup()
srv.close()
await srv.wait_closed()
await app.finish()
def init():
loop = asyncio.get_event_loop()
srv, app, runner = loop.run_until_complete(init)
try:
loop.run_forever()
except KeyboardInterrupt:
loop.run_until_complete((finalize(srv, app, runner)))
if __name__ == '__main__':
with ProcessPoolExecutor() as executor:
for i in range(0, CPU_COUNT):
executor.submit(init)
which is obviously incomplete becuase coro isn't being used. I'm not sure where to integrate the socket with AppRunner. Answer should show original example modified to use App Runner.
As it's my first time using coroutines and aiohttp, I may be wrong, but it seems to work with a SockSite:
#!/usr/bin/env python
import asyncio
import os
import socket
import time
import traceback
from aiohttp import web
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import cpu_count
CPU_COUNT = cpu_count()
print("CPU Count:", CPU_COUNT)
def mk_socket(host="127.0.0.1", port=9090, reuseport=False):
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
if reuseport:
SO_REUSEPORT = 15
sock.setsockopt(socket.SOL_SOCKET, SO_REUSEPORT, 1)
sock.bind((host, port))
return sock
async def handle(request):
name = request.match_info.get('name', "Anonymous")
pid = os.getpid()
text = "{:.2f}: Hello {}! Process {} is treating you\n".format(
time.time(), name, pid)
#time.sleep(5) # intentionally blocking sleep to simulate CPU load
return web.Response(text=text)
async def start_server():
try:
host = "127.0.0.1"
port=9090
reuseport = True
app = web.Application()
app.add_routes([web.get('/', handle),
web.get('/{name}', handle)])
runner = web.AppRunner(app)
await runner.setup()
sock = mk_socket(host, port, reuseport=reuseport)
srv = web.SockSite(runner, sock)
await srv.start()
print('Server started at http://127.0.0.1:9090')
return srv, app, runner
except Exception:
traceback.print_exc()
raise
async def finalize(srv, app, runner):
sock = srv.sockets[0]
app.loop.remove_reader(sock.fileno())
sock.close()
#await handler.finish_connections(1.0)
await runner.cleanup()
srv.close()
await srv.wait_closed()
await app.finish()
def init():
loop = asyncio.get_event_loop()
srv, app, runner = loop.run_until_complete(start_server())
try:
loop.run_forever()
except KeyboardInterrupt:
loop.run_until_complete((finalize(srv, app, runner)))
if __name__ == '__main__':
with ProcessPoolExecutor() as executor:
for i in range(0, CPU_COUNT):
executor.submit(init)
Eventually:
>curl http://127.0.0.1:9090
1580741746.47: Hello Anonymous! Process 54623 is treating you
>curl http://127.0.0.1:9090
1580741747.05: Hello Anonymous! Process 54620 is treating you
>curl http://127.0.0.1:9090
1580741747.77: Hello Anonymous! Process 54619 is treating you
>curl http://127.0.0.1:9090
1580741748.36: Hello Anonymous! Process 54621 is treating you
I also added a log in the finalize routine and it seems correctly triggered.
Edit: And, out of curiosity, I gave it a try on an older kernel and I confirm it doesn't work when the option is enabled (it works with False).
Related
I followed the tutorial in https://docs.python.org/3/library/asyncio-stream.html to read data from a sensor (TCP protocol, PC as the server):
import datetime
import asyncio
import socket
async def connect_to_sq():
_ip = "0.0.0.0"
_port = 45454
# Create a TCP server (socket type: SOCK_STREAM)
server = await asyncio.start_server(handle_server, _ip, _port,family=socket.AF_INET, reuse_address=True)
async with server:
await server.serve_forever()
async def handle_server(reader, writer):
# read data
print("start reading")
init_time = datetime.datetime.now()
end_time = init_time + datetime.timedelta(seconds=1)
while datetime.datetime.now() < end_time:
raw_data_stream = await reader.readexactly(200)
print("time:", datetime.datetime.now()-init_time)
# close connection
writer.close()
await writer.wait_closed()
print("Connection closed")
if __name__ == "__main__":
asyncio.run(connect_to_sq())
The program is supposed to finish after 1 second data transmission. However, the output is:
start reading
time: 0:00:00.495863
time: 0:00:00.594812
time: 0:00:00.695760
time: 0:00:00.794883
time: 0:00:00.895336
time: 0:00:00.995024
time: 0:00:01.095308
Connection closed
start reading
time: 0:00:00.647908
time: 0:00:00.750355
time: 0:00:00.848436
......
It repeated automatically and infinitely. What is the reason for this and How could I solve it?
Problem
serve_forever() listens to the port indefinitely. So, even though the individual connections close after 1 second, the server keeps accepting new connections. In this case, your sensor (client) seems to be creating a new connection after an old connection is closed, and since the server still accepts them, handle_server runs again from the top.
Solution
Maybe not the best way™, but one possible solution is to use a Future so that handle_server can signal the main code upon connection completion. The main code can then stop the server, avoiding new connections. This is how it can be done:
import datetime
import asyncio
import socket
from functools import partial
async def connect_to_sq():
_ip = "0.0.0.0"
_port = 45454
# Create a TCP server (socket type: SOCK_STREAM)
first_connection_completion = asyncio.Future()
server = await asyncio.start_server(
partial(handle_server, first_connection_completion=first_connection_completion),
_ip,
_port,family=socket.AF_INET,
reuse_address=True)
async with server:
server_task = asyncio.create_task(server.serve_forever())
await first_connection_completion
server_task.cancel()
async def handle_server(reader, writer, first_connection_completion=None):
# read data
print("start reading")
init_time = datetime.datetime.now()
end_time = init_time + datetime.timedelta(seconds=1)
while datetime.datetime.now() < end_time:
raw_data_stream = await reader.readexactly(200)
print("time:", datetime.datetime.now()-init_time)
# close connection
writer.close()
await writer.wait_closed()
print("Connection closed")
first_connection_completion.set_result(None)
if __name__ == "__main__":
asyncio.run(connect_to_sq())
Couple of notes:
handle_server now takes one extra argument, first_connection_completion which is the future used to send signal from the function to the main code. The argument has been binded to the function using functools.partial. Within the function, set_result has been used to mark the future as completed.
serve_forever() is now wrapped by a create_task call. This is because we can't await on it.
The new code looks messy, but you can always refactor. So a cleaner version would be:
import datetime
import asyncio
import socket
async def listen_once(handler, *server_args, **server_kwargs):
first_connection_completion = asyncio.Future()
async def wrapped_handler(*args):
await handler(*args)
first_connection_completion.set_result(None)
server = await asyncio.start_server(wrapped_handler, *server_args, **server_kwargs)
async with server:
server_task = asyncio.create_task(server.serve_forever())
await first_connection_completion
server_task.cancel()
async def connect_to_sq():
_ip = "0.0.0.0"
_port = 45454
# Create a TCP server (socket type: SOCK_STREAM)
await listen_once(handle_server, _ip, _port, family=socket.AF_INET, reuse_address=True)
async def handle_server(reader, writer):
# read data
print("start reading")
init_time = datetime.datetime.now()
end_time = init_time + datetime.timedelta(seconds=1)
while datetime.datetime.now() < end_time:
raw_data_stream = await reader.readexactly(200)
print("time:", datetime.datetime.now()-init_time)
# close connection
writer.close()
await writer.wait_closed()
print("Connection closed")
if __name__ == "__main__":
asyncio.run(connect_to_sq())
Code:
#!/usr/bin/env python
import asyncio
import os
import socket
import time
import traceback
from aiohttp import web
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import cpu_count
CPU_COUNT = cpu_count()
print("CPU Count:", CPU_COUNT)
def mk_socket(host="127.0.0.1", port=9090, reuseport=False):
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
if reuseport:
SO_REUSEPORT = 15
sock.setsockopt(socket.SOL_SOCKET, SO_REUSEPORT, 1)
sock.bind((host, port))
return sock
async def index(request):
icecast_index_path = os.path.abspath("../test/icecast/icecast_index.html")
print(icecast_index_path)
try:
content = open(icecast_index_path, encoding="utf8").read()
return web.Response(content_type="text/html", text=content)
except Exception as e:
return web.Response(content_type="text/html", text="<!doctype html><body><h1>Error: "+str(e)+"</h1></body></html>")
async def start_server():
try:
host = "127.0.0.1"
port=8080
reuseport = True
app = web.Application()
app.add_routes([web.get('/', index)])
runner = web.AppRunner(app)
await runner.setup()
sock = mk_socket(host, port, reuseport=reuseport)
srv = web.SockSite(runner, sock)
await srv.start()
print('Server started at http://127.0.0.1:8080')
return srv, app, runner
except Exception:
traceback.print_exc()
raise
async def finalize(srv, app, runner):
sock = srv.sockets[0]
app.loop.remove_reader(sock.fileno())
sock.close()
#await handler.finish_connections(1.0)
await runner.cleanup()
srv.close()
await srv.wait_closed()
await app.finish()
def init():
loop = asyncio.get_event_loop()
srv, app, runner = loop.run_until_complete(start_server())
try:
loop.run_forever()
except KeyboardInterrupt:
loop.run_until_complete((finalize(srv, app, runner)))
if __name__ == '__main__':
with ProcessPoolExecutor() as executor:
for i in range(0, int(CPU_COUNT/2)):
executor.submit(init)
#after the aiohttp start i want to execute more code
#for example:
print("Hello world.")
#in actual programm the ProcessPoolExecutor is called
#inside a pyqt5 app
#so i don't want the pyqt5 app to freeze.
The problem is that with that code i can't execute code after the ProcessPoolExecutor calls.
How can i fix that?
I tried to remove this part:
try:
loop.run_forever()
except KeyboardInterrupt:
loop.run_until_complete((finalize(srv, app, runner)))
in init() method but after that the aiohttp server is closed instantly.
Edit: If i use a thread instead of ProcessPoolExecutor then there is an aiohttp errors that says:
RuntimeError: set_wakeup_fd only works in main thread
RuntimeError: There is no current event loop in thread
Aiohttp + asyncio related errors.
Maybe i may use a # signature up to def declarations (i suppose).
Using with with an Executor will cause your process to block until the jobs in the executor are completed; since they’re running infinite event loops, they will never complete and your Executor will never unblock.
Instead, just use the executor to kick off the jobs, and run your stuff afterwards. When you’re finally done, call .shutdown() to wait for processes to exit:
executor = ProcessPoolExecutor()
for i in range(0, int(CPU_COUNT/2)):
executor.submit(init)
# other code…
executor.shutdown()
I'm using in my python script the pyserial-asyncio lib. I encountered that from writing to reading it takes around 1s which is in my opinion far to long. Should be some ms only. Does anybody also have this seen or any idea?
import asyncio
import serial_asyncio
import time
class Output(asyncio.Protocol):
def connection_made(self, transport):
global start
self.transport = transport
print('port opened', transport)
cmd = b'config A'
print("write cmd: " + str(cmd))
start = time.time()
transport.write(cmd) # Write serial data via transport
def data_received(self, data):
print('data received', repr(data))
end = time.time()
print("write-receive: {0:0.3f}".format(end - start))
if b'\n' in data:
self.transport.close()
def connection_lost(self, exc):
print('port closed')
self.transport.loop.stop()
if __name__ == '__main__':
port = '/dev/ttyACM0'
loop = asyncio.get_event_loop()
coro = serial_asyncio.create_serial_connection(loop, Output, port, baudrate=921600)
loop.run_until_complete(coro)
loop.run_forever()
loop.close()
I have a simple Python program that I want to do three things:
Serve an HTTP document
Serve Websockets
Interact with the Websocket data
I am trying to use / grok asyncio. The issue is that I can't figure out how to access data acquired from a function in the main event loop.
For example in my code below I have two threads.
One thread is the HTTP server thread, one thread is the Websocket server thread and there is the main thread.
What I want to do is to print data captured in the websocket receiving thread in the main thread.
The only way I know how to do this is to use Queues to pass data between threads at which point I do not even know what the advantage of using asyncio is.
Similarly, it feels weird to pass the event loop to the serve_websocket function.
Can anyone please explain how to architect this to get data from the Websocket function into the main function?
It seems like / I want a way to do this without using the threading library at all, which seems possible. In an async project I would want to react to websocket events in different function than where they are called.
NOTE: I know there are other libraries for websockets and http serving with asyncio but this is an example to help me understarnd how to structure projects using this paradigm.
Thanks
#!/usr/bin/env python
import json
import socketserver
import threading
import http.server
import asyncio
import time
import websockets
SERVER_ADDRESS = '127.0.0.1'
HTTP_PORT = 8087
WEBSOCKET_PORT = 5678
def serve_http():
http_handler = http.server.SimpleHTTPRequestHandler
with socketserver.TCPServer(("", HTTP_PORT), http_handler) as httpd:
print(f'HTTP server listening on port {HTTP_PORT}')
httpd.serve_forever()
def serve_websocket(server, event_loop):
print(f'Websocket server listening on port {WEBSOCKET_PORT}')
event_loop.run_until_complete(server)
event_loop.run_forever()
async def ws_callback(websocket, path):
while True:
data = await websocket.recv()
# How do I access parsed_data in the main function below
parsed_data = json.loads(data)
await websocket.send(data)
def main():
event_loop = asyncio.get_event_loop()
ws_server = websockets.serve(ws_callback, SERVER_ADDRESS, WEBSOCKET_PORT)
threading.Thread(target=serve_http, daemon=True).start()
threading.Thread(target=serve_websocket, args=(ws_server, event_loop), daemon=True).start()
try:
while True:
# Keep alive - this is where I want to access the data from ws_callback
# i.e.
# print(data.values)
time.sleep(.01)
except KeyboardInterrupt:
print('Exit called')
if __name__ == '__main__':
main()
I believe that you should not mix asyncio and multithreading without special need. And in your case, use only asyncio tools.
In this case, you have no problem sharing data between coroutines, because they all run on the same thread using cooperative multitasking.
Your code can be rewtitten as:
#!/usr/bin/env python
import json
import socketserver
import threading
import http.server
import asyncio
import time
import websockets
SERVER_ADDRESS = '127.0.0.1'
HTTP_PORT = 8087
WEBSOCKET_PORT = 5678
parsed_data = {}
async def handle_http(reader, writer):
data = await reader.read(100)
message = data.decode()
writer.write(data)
await writer.drain()
writer.close()
async def ws_callback(websocket, path):
global parsed_data
while True:
data = await websocket.recv()
# How do I access parsed_data in the main function below
parsed_data = json.loads(data)
await websocket.send(data)
async def main():
ws_server = await websockets.serve(ws_callback, SERVER_ADDRESS, WEBSOCKET_PORT)
print(f'Websocket server listening on port {WEBSOCKET_PORT}')
http_server = await asyncio.start_server(
handle_http, SERVER_ADDRESS, HTTP_PORT)
print(f'HTTP server listening on port {HTTP_PORT}')
try:
while True:
if parsed_data:
print(parsed_data.values())
await asyncio.sleep(0.1)
except KeyboardInterrupt:
print('Exit called')
if __name__ == '__main__':
asyncio.run(main())
I need to use asyncio with os.fork() method for sharing socket between subprocess.
There is a heavy_jobs() function in data_received() callback, which will occupy a lot of CPU time.
import asyncio
class EchoClientProtocol(asyncio.Protocol):
def __init__(self, message, loop):
self.message = message
self.loop = loop
def data_received(self, data):
heavy_jobs()
loop = asyncio.get_event_loop()
message = 'Hello World!'
coro = loop.create_connection(lambda: EchoClientProtocol(message, loop),
'127.0.0.1', 8000)
loop.run_until_complete(coro)
loop.run_forever()
loop.close()
In traditional method, we could use fork() to share socket between subprocess and parent:
bind(...);
listen(...);
pid = fork();
So, how could I do the same thing in asyncio?
Currently asyncio does not support fork while the event loop is running (https://bugs.python.org/issue21998). You must fork and then create the loop. A simple EchoClient with two processes:
import asyncio
import os
import socket
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.connect(('127.0.0.1', 7777))
pid = os.fork()
class EchoClientProtocol(asyncio.Protocol):
def __init__(self, message, loop):
self.message = message
self.loop = loop
def data_received(self, data):
print('Received in %s' % pid)
loop = asyncio.get_event_loop()
message = 'Hello World!'
coro = loop.create_connection(lambda: EchoClientProtocol(message, loop), sock=sock)
loop.run_until_complete(coro)
loop.run_forever()
loop.close()
And simple test - run nc -k -l 7777, then start the client (code above).
If you also want to write a server, just change connect with socket.bind and socket.listen and of course asyncio.create_server