NATS WEBSOCKET Python WebSocket Connection - python

I want to do the following thing:
I want to make a websocket, which prints me all events about esports and I want to use https://sofascore.com
I've inspected the network requests as usual and it seems that I need to send a Auth WebSocket Content first, then one for subscribing the right sport and then I will receive my events I need.
I've wrote the following code:
import websockets
import asyncio
from websockets.extensions import permessage_deflate
async def esports():
async with websockets.connect('wss://ws.sofascore.com:9222/', compression='deflate') as websocket:
msg = await websocket.recv()
print(f"From Server: {msg}")
t = await websocket.send(
'CONNECT {"no_responders":true,"protocol":1,"verbose":false,"pedantic":false,"user":"none","pass":"none","lang":"nats.ws","version":"1.8.1","headers":true}')
await websocket.send("PING")
pong = await websocket.recv()
print(f"From Server: {pong}")
await websocket.send(
'SUB sport.esports 6')
while (True):
msg = await websocket.recv()
print(f"From Server: {msg}")
asyncio.get_event_loop().run_until_complete(esports())
I know that the websocket is compressed as permessage_deflate, when I saw into the request headers of the websocket.
But I still get an error:
Traceback (most recent call last):
File "C:\Users\Coding\Desktop\websockett.py", line 23, in <module>
asyncio.get_event_loop().run_until_complete(esports())
File "C:\Users\Coding\AppData\Local\Programs\Python\Python39-32\lib\asyncio\base_events.py", line 642, in run_until_complete
return future.result()
File "C:\Users\Coding\Desktop\websockett.py", line 15, in esports
await websocket.send(
File "C:\Users\Coding\AppData\Roaming\Python\Python39\site-packages\websockets\legacy\protocol.py", line 620, in send
await self.ensure_open()
File "C:\Users\Coding\AppData\Roaming\Python\Python39\site-packages\websockets\legacy\protocol.py", line 921, in ensure_open
raise self.connection_closed_exc()
websockets.exceptions.ConnectionClosedError: received 1008 (policy violation) Authentication Timeout; then sent 1008 (policy violation) Authentication Timeout
Process finished with exit code 1
EDIT:
I have now found out that the whole thing works with the Nats network. Is there any way to use Nats with a Libary that also supports the websockets?
Haven't found one on github or pypi unfortunately....

Ideally you would be able to use the nats-py library:
import asyncio
import nats
async def handler(msg):
print(f"From server: {msg}")
async def main():
nc = await nats.connect("wss://ws.sofascore.com:9222")
await nc.subscribe("sport.esports", cb=handler)
if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
try:
loop.run_forever()
finally:
loop.close()
However, this library does not currently support connecting with WebSockets, so
the above doesn't work (yet - it looks like it's being worked on right
now).
For your code, the only reason it fails is that the messages you're sending
don't end with \r\n, which the NATS protocol requires. The code works as
expected with this change:
import asyncio
import websockets
async def esports():
async with websockets.connect('wss://ws.sofascore.com:9222') as websocket:
msg = await websocket.recv()
print(f"From Server: {msg}")
await websocket.send(
'CONNECT {"no_responders":true,"protocol":1,"verbose":false,"pedantic":false,"user":"none","pass":"none","lang":"nats.ws","version":"1.8.1","headers":true}\r\n'
)
await websocket.send("SUB sport.esports 1\r\n")
async for msg in websocket:
print(f"From Server: {msg}")
asyncio.run(esports())
Of course this will eventually get disconnected because it doesn't respond to
PING messages. Here's a little more fleshed out script which implements enough
of the NATS protocol to log the sport.esports messages:
import asyncio
import json
import textwrap
from dataclasses import dataclass
import websockets
class SofaError(Exception):
pass
def message_string(message, data=None, pretty=False):
s = message
if data is not None:
if pretty:
s += json.dumps(data, indent=2)
else:
s += json.dumps(data, separators=(",", ":"))
return s
def log(pre, message, data=None):
print(textwrap.indent(message_string(message, data, True), pre))
def recv_log(message, data=None):
log("< ", message, data)
async def send(websocket, message, data=None):
log("> ", message, data)
data = (message_string(message, data, False) + "\r\n").encode()
await websocket.send(data)
async def connect_and_subscribe(websocket):
connect_options = {
"no_responders": True,
"protocol": 1,
"verbose": False,
"pedantic": False,
"user": "none",
"pass": "none",
"lang": "nats.ws",
"version": "1.8.1",
"headers": True,
}
await send(websocket, "CONNECT ", connect_options)
await send(websocket, "SUB sport.esports 1")
#dataclass
class NatsMsg:
subject: str
sid: str
reply_to: str
size: int
payload: bytes
def parse_msg(info_line, pending_data):
if not info_line:
raise SofaError("No payload information received")
info = [b.decode(errors="replace") for b in info_line.split(b" ")]
if len(info) == 3:
subject, sid, size = info
reply_to = None
elif len(info) == 4:
subject, sid, reply_to, size = info
else:
raise SofaError("Unrecognized info format")
try:
size = int(size)
except ValueError:
raise SofaError("Bad payload size")
if len(pending_data) < size:
raise SofaError("Incomplete payload")
payload = pending_data[:size]
pending_data = pending_data[size:]
return NatsMsg(subject, sid, reply_to, size, payload), pending_data
async def handler(websocket, ws_message, connected):
while len(ws_message):
nats_message, _, ws_message = ws_message.partition(b"\r\n")
if not nats_message:
continue
op, _, rest = nats_message.partition(b" ")
if op == b"-ERR":
recv_log(nats_message.decode(errors="replace"))
err = rest.strip(b"'").decode(errors="replace") if rest else "(No message received)"
raise SofaError(f"Server error: {err}")
elif op == b"INFO":
info_options = json.loads(rest) if rest else None
recv_log("INFO ", info_options)
if not connected:
await connect_and_subscribe(websocket)
connected = True
elif op == b"PING":
recv_log("PING")
await send(websocket, "PONG")
elif op == b"MSG":
try:
msg, ws_message = parse_msg(rest, ws_message)
except SofaError as e:
recv_log(f"MSG (Error: {e}) {rest}")
continue
msg_info = (
f"MSG subject={msg.subject} sid={msg.sid} "
f"reply-to={msg.reply_to} nbytes={msg.size}:\n"
)
try:
decoded = msg.payload.decode()
data = json.loads(decoded)
except UnicodeError:
recv_log(f"{msg_info}{msg.payload}")
except json.JSONDecodeError:
recv_log(f"{msg_info}{decoded}")
else:
recv_log(msg_info, data)
else:
recv_log(f"(Unhandled op) {nats_message.decode(errors='replace')}")
return connected
async def main():
async with websockets.connect("wss://ws.sofascore.com:9222") as websocket:
connected = False
async for message in websocket:
connected = await handler(websocket, message, connected)
if __name__ == "__main__":
asyncio.run(main())

Related

Discord.py async-await won't close connection with Airflow DAG/Task

I'm trying to post a dataframe to a Discord channel. However, I am having issue getting Discord.py to close the connection and move on the next task. I've tried using the event loop as suggested in this thread (How to run async function in Airflow?) as well as asyncio.run() function. Not really familiar with the async and hoping to get some pointers here. Below is my code in Python that I've tried importing in DAG and Task without success. Thanks in advance!
Airflow: 2.5.1
Python: 3.7
import discord
from tabulate import tabulate
import asyncio
import pandas as pd
async def post_to_discord(df, channel_id, bot_token, as_message=True, num_rows=5):
intents = discord.Intents.default()
intents.members = True
client = discord.Client(intents=intents)
try:
#client.event
async def on_ready():
channel = client.get_channel(channel_id)
if as_message:
# Post the dataframe as a message, num_rows rows at a time
for i in range(0, len(df), num_rows):
message = tabulate(df.iloc[i:i+num_rows,:], headers='keys', tablefmt='pipe', showindex=False)
await channel.send(message)
else:
# Send the dataframe as a CSV file
df.to_csv("dataframe.csv", index=False)
with open("dataframe.csv", "rb") as f:
await channel.send(file=discord.File(f))
# client.run(bot_token)
await client.start(bot_token)
await client.wait_until_ready()
finally:
await client.close()
async def main(df, channel_id, bot_token, as_message=True, num_rows=5):
# loop = asyncio.get_event_loop()
# result = loop.run_until_complete(post_to_discord(df, channel_id, bot_token, as_message, num_rows))
result = asyncio.run(post_to_discord(df, channel_id, bot_token, as_message, num_rows))
await result
return result
if __name__ =='__main__':
main()
It seems like your script works but the server is blocking the open socket (and kudos - the discord server is good at that). So we will work by creating a ping function (adopted from another answer).
def ping(ip, port):
try:
s = socket.socket() # TCP - standard values are `socket.AF_INET, socket.SOCK_STREAM` so you don't have to write them
s.settimeout(2)
print('[DEBUG] connect')
s.connect((ip, int(port)))
#result = s.connect_ex((ip, int(port)))
#print('result:', result)
return True
except socket.timeout as ex:
print('[DEBUG] timeout')
return True
except Exception as ex:
print('[Exception]', ex)
return False
finally:
print('[DEBUG] close')
s.close()
Feel free to test your ID
id = ...channel number...
print(id, type(id))
and you should see
<built-in function id> <class 'builtin_function_or_method'>
Then let us move on to improving your code:
import discord
import asyncio
import time # you are not using this module
import socket
import os
from tabulate import tabulate
import pandas as pd # as pd is not required
def ping(ip, port):
try:
s = socket.socket() # TCP - standard values are `socket.AF_INET, socket.SOCK_STREAM` so you don't have to write them
s.settimeout(2)
print('[DEBUG] connect')
s.connect((ip, int(port)))
#result = s.connect_ex((ip, int(port)))
#print('result:', result)
return True
except socket.timeout as ex:
print('[DEBUG] timeout')
return True
except Exception as ex:
print('[Exception]', ex)
return False
finally:
print('[DEBUG] close')
s.close()
TOKEN = os.getenv('DISCORD_TOKEN')
client = discord.Client()
async def post_to_discord(df, channel_id, bot_token, as_message=True, num_rows=5):
intents = discord.Intents.default()
intents.members = True
client = discord.Client(intents=intents)
try:
#client.event
async def on_ready():
channel = client.get_channel(channel_id)
if as_message:
# Post the dataframe as a message, num_rows rows at a time
for i in range(0, len(df), num_rows):
message = tabulate(df.iloc[i:i+num_rows,:], headers='keys', tablefmt='pipe', showindex=False)
await channel.send(message)
else:
# Send the dataframe as a CSV file
df.to_csv("dataframe.csv", index=False)
with open("dataframe.csv", "rb") as f:
await channel.send(file=discord.File(f))
# client.run(bot_token)
await client.start(bot_token)
await client.wait_until_ready()
while True:
online = ping("26.51.174.109", "25565") #modify it as you see fit
#online = ping("192.168.1.101", "8081") #same as above
if online:
print("server online")
#await channel.edit(name="Server Status - Online")
else:
print("server offline")
#await channel.edit(name="Server Status - Offline")
await asyncio.sleep(5)
# optional - client.run(TOKEN)
finally:
await client.close()
async def main(df, channel_id, bot_token, as_message=True, num_rows=5):
# loop = asyncio.get_event_loop()
# result = loop.run_until_complete(post_to_discord(df, channel_id, bot_token, as_message, num_rows))
result = asyncio.run(post_to_discord(df, channel_id, bot_token, as_message, num_rows))
await result
return result
if __name__ =='__main__':
main()

How to connect via websocket?

I have such part of code(Python):
def func_login_params():
async with websockets.connect(url) as ws:
# login
timestamp = str(get_local_timestamp())
login_str = login_params(timestamp, api_key, passphrase, secret_key)
await ws.send(login_str)
res = await ws.recv()
# params
sub_str = json.dumps(params)
await ws.send(sub_str)
Upper code works - okey. But I need to have two function: first - login, second - send params. As I thought I could do this:
def func_login():
async with websockets.connect(url) as ws:
# login
timestamp = str(get_local_timestamp())
login_str = login_params(timestamp, api_key, passphrase, secret_key)
await ws.send(login_str)
res = await ws.recv()
def func_params():
async with websockets.connect(url) as ws:
# params
sub_str = json.dumps(params)
await ws.send(sub_str)
But in this situation params don't come. Maybe it don't connect correctly?
When you try to connect websockets, it creates new connection.
So, in that case, you should take an websocket connection as function argument:
async def func_login(ws):
timestamp = str(get_local_timestamp())
login_str = login_params(timestamp, api_key, passphrase, secret_key)
await ws.send(login_str)
res = await ws.recv()
async def func_params(ws):
sub_str = json.dumps(params)
await ws.send(sub_str)
def main():
async with web sockets.connect(url) as ws:
await func_login(ws)
await func_params(ws)

Migrating a Quart project with websockets from asyncio to trio

I'm trying to convert my asyncio project to trio.
I understand that I have to use memory channels instead of Queues but for some reason I don't have the result I'm expecting.
My main problem is that when I run two clients, the first one does not get notified if the second one leaves (broadcasting the 'part' message from the server raises an error).
Another problem is that sometimes the client exits immediately when opening the websocket.
When I use asyncio, everything works fine.
Here is the stack trace I get when the second client is disconnecting:
[2021-07-30 18:39:51,899] ERROR in app: Exception on websocket /ws
Traceback (most recent call last):
File "/tmp/debug/venv/lib/python3.9/site-packages/quart_trio/app.py", line 175, in handle_websocket
return await self.full_dispatch_websocket(websocket_context)
File "/tmp/debug/venv/lib/python3.9/site-packages/quart_trio/app.py", line 197, in full_dispatch_websocket
result = await self.handle_user_exception(error)
File "/tmp/debug/venv/lib/python3.9/site-packages/quart_trio/app.py", line 166, in handle_user_exception
raise error
File "/tmp/debug/venv/lib/python3.9/site-packages/quart_trio/app.py", line 195, in full_dispatch_websocket
result = await self.dispatch_websocket(websocket_context)
File "/tmp/debug/venv/lib/python3.9/site-packages/quart/app.py", line 1651, in dispatch_websocket
return await self.ensure_async(handler)(**websocket_.view_args)
File "/tmp/debug/server.py", line 103, in wsocket
nursery.start_soon(receiving, u)
File "/tmp/debug/venv/lib/python3.9/site-packages/trio/_core/_run.py", line 815, in __aexit__
raise combined_error_from_nursery
trio.MultiError: Cancelled(), Cancelled(), Cancelled()
Details of embedded exception 1:
Traceback (most recent call last):
File "/tmp/debug/venv/lib/python3.9/site-packages/trio/_core/_run.py", line 1172, in raise_cancel
raise Cancelled._create()
trio.Cancelled: Cancelled
Details of embedded exception 2:
Traceback (most recent call last):
File "/tmp/debug/server.py", line 68, in receiving
data = await websocket.receive_json()
File "/tmp/debug/venv/lib/python3.9/site-packages/quart/wrappers/websocket.py", line 68, in receive_json
data = await self.receive()
File "/tmp/debug/venv/lib/python3.9/site-packages/quart/wrappers/websocket.py", line 57, in receive
return await self._receive()
File "/tmp/debug/venv/lib/python3.9/site-packages/trio/_channel.py", line 314, in receive
return await trio.lowlevel.wait_task_rescheduled(abort_fn)
File "/tmp/debug/venv/lib/python3.9/site-packages/trio/_core/_traps.py", line 166, in wait_task_rescheduled
return (await _async_yield(WaitTaskRescheduled(abort_func))).unwrap()
File "/tmp/debug/venv/lib/python3.9/site-packages/outcome/_impl.py", line 138, in unwrap
raise captured_error
File "/tmp/debug/venv/lib/python3.9/site-packages/trio/_core/_run.py", line 1172, in raise_cancel
raise Cancelled._create()
trio.Cancelled: Cancelled
Details of embedded exception 3:
Traceback (most recent call last):
File "/tmp/debug/server.py", line 54, in sending
data = await u.queue_recv.receive()
File "/tmp/debug/venv/lib/python3.9/site-packages/trio/_channel.py", line 314, in receive
return await trio.lowlevel.wait_task_rescheduled(abort_fn)
File "/tmp/debug/venv/lib/python3.9/site-packages/trio/_core/_traps.py", line 166, in wait_task_rescheduled
return (await _async_yield(WaitTaskRescheduled(abort_func))).unwrap()
File "/tmp/debug/venv/lib/python3.9/site-packages/outcome/_impl.py", line 138, in unwrap
raise captured_error
File "/tmp/debug/venv/lib/python3.9/site-packages/trio/_core/_run.py", line 1172, in raise_cancel
raise Cancelled._create()
trio.Cancelled: Cancelled
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/tmp/debug/server.py", line 63, in sending
await broadcast({'type': 'part', 'data': u.name})
File "/tmp/debug/server.py", line 75, in broadcast
await user.queue_send.send(message)
File "/tmp/debug/venv/lib/python3.9/site-packages/trio/_channel.py", line 159, in send
await trio.lowlevel.checkpoint_if_cancelled()
File "/tmp/debug/venv/lib/python3.9/site-packages/trio/_core/_run.py", line 2361, in checkpoint_if_cancelled
await _core.checkpoint()
File "/tmp/debug/venv/lib/python3.9/site-packages/trio/_core/_run.py", line 2339, in checkpoint
await _core.wait_task_rescheduled(lambda _: _core.Abort.SUCCEEDED)
File "/tmp/debug/venv/lib/python3.9/site-packages/trio/_core/_traps.py", line 166, in wait_task_rescheduled
return (await _async_yield(WaitTaskRescheduled(abort_func))).unwrap()
File "/tmp/debug/venv/lib/python3.9/site-packages/outcome/_impl.py", line 138, in unwrap
raise captured_error
File "/tmp/debug/venv/lib/python3.9/site-packages/trio/_core/_run.py", line 1172, in raise_cancel
raise Cancelled._create()
trio.Cancelled: Cancelled
Here is the code (set TRIO to False to use asyncio):
server.py
#!/usr/bin/env python
from quart import Quart, websocket, request, jsonify, json
from quart_trio import QuartTrio
from functools import wraps
import uuid
import trio
import asyncio
from quart_auth import AuthUser, AuthManager, login_user, _AuthSerializer
TRIO = True
if TRIO:
app = QuartTrio(__name__)
else:
app = Quart(__name__)
app.secret_key = '**changeme**'
authorized_users = set()
class User(AuthUser):
#staticmethod
def current():
token = websocket.cookies['QUART_AUTH']
serializer = _AuthSerializer('**changeme**', 'quart auth salt')
user_id = serializer.loads(token)
for u in authorized_users:
if u.auth_id == user_id:
return u
return None
def __init__(self, auth_id):
super().__init__(auth_id)
self.name = None
self.queue = None # asyncio
self.queue_send = None #trio
self.queue_recv = None #trio
self.connected = False
self.websockets = set()
def to_dict(self):
return {
'id': self.auth_id,
'name': self.name
}
auth_manager = AuthManager()
auth_manager.user_class = User
async def sending(u: User):
await broadcast({'type': 'join', 'data': u.name})
try:
while True:
if TRIO:
data = await u.queue_recv.receive()
else:
data = await u.queue.get()
for s in u.websockets:
await s.send_json(data)
finally:
u.websockets.remove(websocket._get_current_object())
if len(u.websockets) == 0:
u.connected = False
await broadcast({'type': 'part', 'data': u.name})
async def receiving(u: User):
while True:
data = await websocket.receive_json()
if data['type'] == 'msg':
await broadcast({'type': 'msg', 'user': u.name, 'data': data['data']})
async def broadcast(message):
for user in [u for u in authorized_users if u.connected]:
if TRIO:
await user.queue_send.send(message)
else:
await user.queue.put(message)
#app.route('/api/v1/auth', methods=['POST'])
async def auth_login():
data = await request.json
user_id = str(uuid.uuid4())[:8]
u = User(user_id)
u.name = data['login'] or 'Anonymous'+user_id
if TRIO:
u.queue_send, u.queue_recv = trio.open_memory_channel(float('inf'))
else:
u.queue = asyncio.Queue()
login_user(u, True)
authorized_users.add(u)
return jsonify({'id': user_id, 'name': u.name}), 200
#app.websocket('/ws')
async def wsocket():
u = User.current()
if u is None:
return
u.websockets.add(websocket._get_current_object())
u.connected = True
if TRIO:
async with trio.open_nursery() as nursery:
nursery.start_soon(sending, u)
nursery.start_soon(receiving, u)
else:
producer = asyncio.create_task(sending(u))
consumer = asyncio.create_task(receiving(u))
await asyncio.gather(producer, consumer)
auth_manager.init_app(app)
if __name__ == "__main__":
app.run(host='localhost', port=8080)
client.py
#!/usr/bin/env python
import asks
import trio
import trio_websocket
import json
asks.init(trio)
class User:
def __init__(self, name: str="") -> None:
self.name = name
class Client(User):
def __init__(self) -> None:
super(Client, self).__init__()
self.web_url = 'http://localhost:8080/api/v1'
self.ws_url = 'ws://localhost:8080/ws'
self.ws = None
self.nursery = None
self.cookiejar = {}
async def send(self, msg: dict) -> None:
if self.ws is not None:
await self.ws.send_message(json.dumps(msg))
async def reader(self, websocket) -> None:
while True:
try:
message_raw = await websocket.get_message()
msg = json.loads(message_raw)
if msg['type'] == 'msg':
print(f"<{msg['user']}> {msg['data']}")
elif msg['type'] == 'join':
print(f"* {msg['data']} joined")
elif msg['type'] == 'part':
print(f"* {msg['data']} left")
except trio_websocket.ConnectionClosed:
break
async def login(self) -> None:
rlogin = await asks.post(self.web_url + '/auth', json={'login': self.name, 'password': 'password'})
for c in rlogin.cookies:
if c.name == 'QUART_AUTH':
self.cookiejar = {'QUART_AUTH': c.value}
async def connect(self) -> None:
await self.login()
async with trio_websocket.open_websocket_url(self.ws_url, extra_headers=[('Cookie', 'QUART_AUTH'+'='+self.cookiejar['QUART_AUTH'])]) as websocket:
self.ws = websocket
await self.send({'type': 'msg', 'data': 'hello'})
async with trio.open_nursery() as nursery:
self.nursery = nursery
nursery.start_soon(self.reader, websocket)
def run(self) -> None:
trio.run(self.connect)
c = Client()
c.name = 'clientA'
c.run()
Edit: I tested using anyio and while anyio+trio acts the same, anyio+asyncio reproduces the problem (without any exception). So I guess it comes from the Queue replacement.
Ok, #tibs, I think I've found the issue. The problem is with the way that Trio handles cancellation. For full docs, have a read of this doc:
https://trio.readthedocs.io/en/stable/reference-core.html#cancellation-and-timeouts
However, to explain what's going on here, when a user disconnects, what Quart-Trio does is raises a Cancelled exception in every coroutine that's running/waiting under that that websocket. For a websocket-user, there are two spots that will currently be waiting:
In async def sending(u: User):
async def sending(u: User):
await broadcast({'type': 'join', 'data': u.name})
try:
while True:
if TRIO:
data = await u.queue_recv.receive() <--- Code is waiting here, Cancelled is raised here
else:
data = await u.queue.get()
for s in u.websockets:
await s.send_json(data)
finally:
u.websockets.remove(websocket._get_current_object())
if len(u.websockets) == 0:
u.connected = False
await broadcast({'type': 'part', 'data': u.name})
In async def receiving(u: User):
async def receiving(u: User):
while True:
data = await websocket.receive_json() <--- Code is waiting here, Cancelled is raised here
if data['type'] == 'msg':
await broadcast({'type': 'msg', 'user': u.name, 'data': data['data']})
Okay, so what happens from here? Well, in the sending() function we move down to the finally block, which begins executing, but then we call another awaitable function:
finally:
u.websockets.remove(websocket._get_current_object())
if len(u.websockets) == 0:
u.connected = False
await broadcast({'type': 'part', 'data': u.name}) <--- we call an awaitable here
From the Trio docs:
Cancellations in Trio are “level triggered”, meaning that once a block has been cancelled, all cancellable operations in that block will keep raising Cancelled.
So when await broadcast(...) is called, it is immediately Cancelled, unlike asyncio which behaves differently. This explains why your "part" message is never sent. So when trio, if you want to do some cleanup work while you are being cancelled, you should open a new cancel scope, and shield it from being cancelled, like this:
async def sending(u: User):
await broadcast({'type': 'join', 'data': u.name})
try:
while True:
if TRIO:
data = await u.queue_recv.receive() <--- Code is waiting here, Cancelled is raised here
else:
data = await u.queue.get()
for s in u.websockets:
await s.send_json(data)
finally:
u.websockets.remove(websocket._get_current_object())
if len(u.websockets) == 0:
u.connected = False
with trio.move_on_after(5) as leaving_cancel_scope:
# Shield from the cancellation for 5s to run the broadcast of leaving
leaving_cancel_scope.shield = True
await broadcast({'type': 'part', 'data': u.name})
Or alternatively you could start the broadcast coroutine on the app nursery. Be aware that if the broadcast(...) crashes you will the crash the whole running app, unless you put a try/except in the broadcast(...) function:
async def sending(u: User):
await broadcast({'type': 'join', 'data': u.name})
try:
while True:
if TRIO:
data = await u.queue_recv.receive()
else:
data = await u.queue.get()
for s in u.websockets:
await s.send_json(data)
finally:
u.websockets.remove(websocket._get_current_object())
if len(u.websockets) == 0:
u.connected = False
app.nursery.start_soon(broadcast, {'type': 'part', 'data': u.name})
After this you still get the Cancelled exceptions flowing through to your websocket function, so you may want to catch them there. Be aware you will need to catch BaseException to catch errors, some thing like:
#app.websocket('/ws')
async def wsocket():
u = User.current()
if u is None:
return
u.websockets.add(websocket._get_current_object())
u.connected = True
if TRIO:
try:
async with trio.open_nursery() as nursery:
nursery.start_soon(sending, u)
nursery.start_soon(receiving, u)
except BaseException as e:
print(f'websocket funcs crashed with exception: {e}')
In particular this is because trio doesn't allow you to silently drop exceptions, you need to either catch them or crash. I hope this is enough to get you started on fixing the issues you are seeing.

Combining Python3.7 asyncio and multiprocess for tcp server

I am trying to create a tcp server that handles small amount of connections (about a 100, but could grow to more), but each tcp connection will be persistent and stream GBs of data off which I will be uploading to Elasticsearch database via REST API.
I have tried two independent ways (asyncio and multiprocessing), and I see MP has the data uploaded quicker, but at the cost of a ton of processes, and asyncio only uses 1 process, but the data takes about 5 mins to get into the DB. I was wondering if there is a hybrid way such that have a batching mechanism to back 5 connections into a child process and run an asyncio loop in there to handle the connections.
Kinda what I am thinking:
async def dial_out_server(args):
conn_handler = ClientConnection(args.elastic_server)
server = await asyncio.start_server(
conn_handler.handle_connection, args.host, args.port)
addr = server.sockets[0].getsockname()
print(f'Serving on {addr}')
async with server:
#batch the connections and spawn a new process with a loop here.
#Not sure what is best way or if that is possible
await server.serve_forever()
if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument("-a", "--host", dest="host", help="host", required=True)
parser.add_argument("-r", "--port", dest="port", help="port", required=True)
parser.add_argument("-e", "--elastic_server", dest="elastic_server", help="Elastic Server", required=True)
args = parser.parse_args()
loop = uvloop.new_event_loop()
asyncio.set_event_loop(loop)
asyncio.run(dial_out_server(args))
Thanks
Full Code:
from utils import process_cisco_encoding
from telemetry_pb2 import Telemetry
from argparse import ArgumentParser
from struct import Struct, unpack
from aiohttp import ClientSession
from logging.handlers import RotatingFileHandler, QueueHandler
import grpc
import logging
import asyncio
import json
import logging
import traceback
import uvloop
class Error(Exception):
pass
class GetIndexListError(Error):
def __init__(self, traceback, response_json, message, e):
self.traceback = traceback
self.response = response_json
self.message = message
self.exception = e
class PostDataError(Error):
def __init__(self, traceback, response_json, data, message, e):
self.traceback = traceback
self.response = response_json
self.data = data
self.message = message
self.exception = e
class PutIndexError(Error):
def __init__(self, traceback, response_json, message, e):
self.traceback = traceback
self.response = response_json
self.message = message
self.exception = e
class ElasticSearchError(Error):
def __init__(self, response_json, message):
self.response = response_json
self.message = message
class ClientConnection(object):
def __init__(self, elastic_server):
self.elastic_server = elastic_server
self.lock = asyncio.Lock()
self.log = None
async def get_index_list(self, url):
indices = []
try:
async with ClientSession() as session:
async with session.get(url) as response:
response = await response.read()
response = json.loads(response.decode())
for key in response:
if not key.startswith('.'):
indices.append(key)
return indices
except Exception as e:
raise GetIndexListError(traceback.print_exc(), response, "Got Exception while trying to get index list", e)
async def post_data(self, data_to_post):
headers = {'Content-Type': "application/x-ndjson"}
url = f"http://{self.elastic_server}:9200/_bulk"
try:
async with ClientSession() as session:
response = await session.post(url, data=data_to_post, headers=headers)
return response
except Exception as e:
raise PostDataError(traceback.print_exc(), data_to_post, "Got Exception while trying to post data", e)
async def put_index(self, index):
url = f"http://{self.elastic_server}:9200/{index}"
headers = {'Content-Type': "application/json"}
mapping = {"mappings": {"properties": {"#timestamp": {"type": "date"}}}}
try:
async with ClientSession() as session:
response = await session.put(url, json=mapping, headers=headers)
return response
except Exception as e:
raise PutDataError(traceback.print_exc(), response, f"Got Exception while trying to put index {index}", e)
async def init_logger(self, address):
log_name = "dial-out.log"
log = logging.getLogger(log_name)
log.setLevel(logging.INFO)
file_handler = RotatingFileHandler(log_name, maxBytes=536870912, backupCount=2)
screen_handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s %(processName)-10s %(name)s %(levelname)-8s %(message)s')
file_handler.setFormatter(formatter)
screen_handler.setFormatter(formatter)
log.addHandler(file_handler)
log.addHandler(screen_handler)
return log
async def handle_connection(self, reader, writer):
try:
HEADER_SIZE = 12
header_struct = Struct('>hhhhi')
_UNPACK_HEADER = header_struct.unpack
address = writer.get_extra_info('peername')
if self.log is None:
self.log = await self.init_logger(address)
self.log.info(f"Got Connection from {address[0]}:{address[1]}")
while True:
header_data = await reader.read(HEADER_SIZE)
msg_type, encode_type, msg_version, flags, msg_length = _UNPACK_HEADER(header_data)
encoding = {1:'gpb', 2:'json'}[encode_type]
msg_data = b''
if encode_type == 1:
while len(msg_data) < msg_length:
packet = await reader.read(msg_length - len(msg_data))
msg_data += packet
sorted_by_index = {}
converted_decode_segments = process_cisco_encoding([msg_data])
for converted_decode_segment in converted_decode_segments:
if not converted_decode_segment["_index"] in sorted_by_index.keys():
sorted_by_index[converted_decode_segment["_index"]] = [converted_decode_segment]
else:
sorted_by_index[converted_decode_segment["_index"]].append(converted_decode_segment)
index_list = await self.get_index_list(f"http://{self.elastic_server}:9200/*")
for index in sorted_by_index.keys():
if index not in index_list:
async with self.lock:
index_list = await self.get_index_list(f"http://{self.elastic_server}:9200/*")
if index not in index_list:
self.log.info("Acciqured lock to put index in elasticsearch")
response = await self.put_index(index)
if response.status is not 200:
raise ElasticSearchError(await response.json(), "Unable to put index into Elasticsearch")
else:
index_list.append(index)
else:
segment_list = sorted_by_index[index]
elastic_index = {'index': {'_index': f'{index}'}}
payload_list = [elastic_index]
for segment in segment_list:
segment.pop('_index', None)
payload_list.append(segment)
payload_list.append(elastic_index)
payload_list.pop()
data_to_post = '\n'.join(json.dumps(d) for d in payload_list)
data_to_post += '\n'
response = await self.post_data(data_to_post)
if response.status is not 200:
raise ElasticSearchError(await response.json(), "Unable to put data into Elasticsearch")
except GetIndexListError as e:
self.log.error(e.message)
self.log.error(e.traceback)
self.log.error(e.response)
self.log.error(e.exception)
await writer.drain()
self.log.error(f"Closing connection from {address[0]}")
writer.close()
except PostDataError as e:
self.log.error(e.message)
self.log.error(e.traceback)
self.log.error(e.response)
self.log.error(e.exception)
self.log.error(e.data)
await writer.drain()
self.log.error(f"Closing connection from {address[0]}")
writer.close()
except PutIndexError as e:
self.log.error(e.message)
self.log.error(e.traceback)
self.log.error(e.response)
await writer.drain()
self.log.error(f"Closing connection from {address[0]}")
writer.close()
except ElasticSearchError as e:
self.log.error(e.message)
self.log.error(e.response)
await writer.drain()
self.log.error(f"Closing connection from {address[0]}")
writer.close()
except Exception as e:
self.log.error(e)
self.log.error(traceback.print_exc())
await writer.drain()
self.log.error(f"Closing connection from {address[0]}")
writer.close()
async def dial_out_server(args):
conn_handler = ClientConnection(args.elastic_server)
server = await asyncio.start_server(
conn_handler.handle_connection, args.host, args.port)
addr = server.sockets[0].getsockname()
print(f'Serving on {addr}')
async with server:
await server.serve_forever()
if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument("-a", "--host", dest="host", help="host", required=True)
parser.add_argument("-r", "--port", dest="port", help="port", required=True)
parser.add_argument("-e", "--elastic_server", dest="elastic_server", help="Elastic Server", required=True)
args = parser.parse_args()
loop = uvloop.new_event_loop()
asyncio.set_event_loop(loop)
asyncio.run(dial_out_server(args))

how to implement a websocket aware reverse-proxy with aiohttp (python 3.6)

I am trying to implement an application specific reverse-proxy for jupyter notebooks using aiohttp. It works fine for http requests, but the websocket forwarding does not work. Requests from the browser arrive and get forwarded, but there are no responses from jupyter forthcoming. I assume my websocket client code somehow does not react to incoming messages from jupyter.
The only indication on the jupyter side that something is amiss are messages like this:
WebSocket ping timeout after 90009 ms.
so here is my attempt at writing the proxy
from aiohttp import web
from aiohttp import client
import aiohttp
import logging
import pprint
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
baseUrl = 'http://0.0.0.0:8888'
mountPoint = '/fakeUuid'
async def handler(req):
proxyPath = req.match_info.get('proxyPath','no proxyPath placeholder defined')
reqH = req.headers.copy()
if reqH['connection'] == 'Upgrade' and reqH['upgrade'] == 'websocket' and req.method == 'GET':
ws_server = web.WebSocketResponse()
await ws_server.prepare(req)
logger.info('##### WS_SERVER %s' % pprint.pformat(ws_server))
client_session = aiohttp.ClientSession()
async with client_session.ws_connect(baseUrl+req.path_qs,
headers = { 'cookie': reqH['cookie'] },
) as ws_client:
logger.info('##### WS_CLIENT %s' % pprint.pformat(ws_client))
async for server_msg in ws_server:
logger.info('>>> msg from browser: %s',pprint.pformat(server_msg))
if server_msg.type == aiohttp.WSMsgType.TEXT:
await ws_client.send_str(server_msg.data)
else:
await ws_client.send_bytes(server_msg.data)
async for client_msg in ws_client:
logger.info('>>> msg from jupyter: %s',pprint.pformat(client_msg))
if client_msg.tp == aiohttp.WSMsgType.TEXT:
await ws_server.send_str(client_msg.data)
else:
await ws_server.send_bytes(client_msg.data)
return ws_server
else:
async with client.request(
req.method,baseUrl+mountPoint+proxyPath,
headers = reqH,
allow_redirects=False,
data = await req.read()
) as res:
headers = res.headers.copy()
body = await res.read()
return web.Response(
headers = headers,
status = res.status,
body = body
)
return ws_server
app = web.Application()
app.router.add_route('*',mountPoint + '{proxyPath:.*}', handler)
web.run_app(app,port=3984)
Lesson learned: the two async for are blocking in the flow of the current function. By running them with asyncio.wait I can get them to run at the same time. The resulting program looks like this:
from aiohttp import web
from aiohttp import client
import aiohttp
import asyncio
import logging
import pprint
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
baseUrl = 'http://0.0.0.0:8888'
mountPoint = '/fakeUuid'
async def handler(req):
proxyPath = req.match_info.get('proxyPath','no proxyPath placeholder defined')
reqH = req.headers.copy()
if reqH['connection'] == 'Upgrade' and reqH['upgrade'] == 'websocket' and req.method == 'GET':
ws_server = web.WebSocketResponse()
await ws_server.prepare(req)
logger.info('##### WS_SERVER %s' % pprint.pformat(ws_server))
client_session = aiohttp.ClientSession(cookies=req.cookies)
async with client_session.ws_connect(
baseUrl+req.path_qs,
},
) as ws_client:
logger.info('##### WS_CLIENT %s' % pprint.pformat(ws_client))
async def wsforward(ws_from,ws_to):
async for msg in ws_from:
logger.info('>>> msg: %s',pprint.pformat(msg))
mt = msg.type
md = msg.data
if mt == aiohttp.WSMsgType.TEXT:
await ws_to.send_str(md)
elif mt == aiohttp.WSMsgType.BINARY:
await ws_to.send_bytes(md)
elif mt == aiohttp.WSMsgType.PING:
await ws_to.ping()
elif mt == aiohttp.WSMsgType.PONG:
await ws_to.pong()
elif ws_to.closed:
await ws_to.close(code=ws_to.close_code,message=msg.extra)
else:
raise ValueError('unexpecte message type: %s',pprint.pformat(msg))
finished,unfinished = await asyncio.wait([wsforward(ws_server,ws_client),wsforward(ws_client,ws_server)],return_when=asyncio.FIRST_COMPLETED)
return ws_server
else:
async with client.request(
req.method,baseUrl+mountPoint+proxyPath,
headers = reqH,
allow_redirects=False,
data = await req.read()
) as res:
headers = res.headers.copy()
body = await res.read()
return web.Response(
headers = headers,
status = res.status,
body = body
)
return ws_server
app = web.Application()
app.router.add_route('*',mountPoint + '{proxyPath:.*}', handler)
web.run_app(app,port=3984)

Categories

Resources