How can I broadcast asyncio StreamReader to several consumers? - python

I'm trying to use aiohttp to make a sort of advanced reverse proxy.
I want to get content of HTTP request and pass it to new HTTP request without pulling it to memory. While there is the only upstream the task is fairly easy: aiohttp server returns request content as StreamReader and aiohttp client can accept StreamReader as request body.
The problem is that I want to send origin request to several upstreams or, for example, simultaneously send content to upstream and write it on disk.
Is there some instruments to broadcast content of StreamReader?
I've tried to make some naive broadcaster but it fails on large objects. What do I do wrong?
class StreamBroadcast:
async def __do_broadcast(self):
while True:
chunk = await self.__source.read(self.__n)
if not chunk:
break
for output in self.__sinks:
output.feed_data(chunk)
for output in self.__sinks:
output.feed_eof()
def __init__(self, source: StreamReader, sinks_count: int, n: int = -1):
self.__source = source
self.__n = n
self.__sinks = [StreamReader() for i in range(sinks_count)]
self.__task = asyncio.create_task(self.__do_broadcast())
#property
def sinks(self) -> Iterable[StreamReader]:
return self.__sinks
#property
def ready(self) -> Task:
return self.__task

Well, I've looked through asyncio sources and discovered that I should use Transport to pump data over a stream. Here is my solution.
import asyncio
from asyncio import StreamReader, StreamWriter, ReadTransport, StreamReaderProtocol
from typing import Iterable
class _BroadcastReadTransport(ReadTransport):
"""
Internal class, is not meant to be instantiated manually
"""
def __init__(self, source: StreamReader, sinks: Iterable[StreamReader]):
super().__init__()
self.__source = source
self.__sinks = tuple(StreamReaderProtocol(s) for s in sinks)
for sink in sinks:
sink.set_transport(self)
self.__waiting_for_data = len(self.__sinks)
asyncio.create_task(self.__broadcast_next_chunk(), name='initial-chunk-broadcast')
def is_reading(self):
return self.__waiting_for_data == len(self.__sinks)
def pause_reading(self):
self.__waiting_for_data -= 1
async def __broadcast_next_chunk(self):
data = await self.__source.read()
if data:
for sink in self.__sinks:
sink.data_received(data)
if self.is_reading():
asyncio.create_task(self.__broadcast_next_chunk())
else:
for sink in self.__sinks:
sink.eof_received()
def resume_reading(self):
self.__waiting_for_data += 1
if self.__waiting_for_data == len(self.__sinks):
asyncio.create_task(self.__broadcast_next_chunk(), name='chunk-broadcast')
#property
def is_completed(self):
return self.__source.at_eof()
class StreamBroadcast:
def __init__(self, source: StreamReader, sinks_count: int):
self.__source = source
self.__sinks = tuple(StreamReader() for _ in range(sinks_count))
self.__transport = _BroadcastReadTransport(self.__source, self.__sinks)
#property
def sinks(self) -> Iterable[StreamReader]:
return self.__sinks
#property
def is_completed(self):
return self.__transport.is_completed
Hope once I'll pack it to pip module.

Related

Django: asynchronous service development through task list sharing

I am creating a web application with Django 3.1.0 and Python 3.8. This application requires the execution of a very long back-office operation and of which I am interested in knowing the percentage of progress. To do this, the server exposes two services:
www.contoso.com/process_start: returns a string to indicate the id assigned to the task;
www.contoso.com/process_status/taskid: returns a number from 0 to 1 to indicate the progress (as a taskid I use the id returned by the process_start call);
To do this I have created the following Python class
TASKS = dict() # list of tasks in progress
class Task():
def __init__(self):
id = get_uid() # generation of a random string
TASKS[id] = self # added to the list of tasks in progress
self.id = id
self.percentage = 0.0
self.thread = Thread(name=self.id, target=self.execute)
self.thread.start()
#staticmethod
def get_status(id: str) -> float:
task = TASKS.get(id, None)
return (1.0 if task == None else task.percentage)
def execute(self) -> None:
try:
...
finally:
TASKS.pop(self.id, None) # removal from the list of tasks in progress
The first service creates the task like this:
def post(self, request: Request) -> Response:
task = TaskReslice()
return Response(status=status.HTTP_200_OK, data=task.id)
the second service returns the status of the task in this way:
def get(self, request: Request, *args, **kwargs) -> Response:
task_id: str = kwargs["id"]
return Response(status=status.HTTP_200_OK, data=Task.get_status(task_id))
The problem is that the second service can't find any running tasks. What's the problem?

Cannot trigger an async function from another threaded function in Python

I am making a discord bot that will grab a json using requests from time to time, and then send the relevant information to a specific channel.
I have the following classes:
Helper, which is the discord bot itself, that runs async from the start, inside an asyncio.gather;
tasker that controls the interval which calls the class that will do the requests. It runs in a different thread so it doesn't stop the async Helper while it waits
getInfo that does the requests, store the info and should talk with Helper
I am having 2 problems right now:
While the tasker is on a different thread, every time I try to talk with Helper via getInfo it gives me the errors RuntimeError: no running event loop and RuntimeWarning: coroutine 'getInfo.discordmsg' was never awaited
If I dont run it on a different thread, however, it does work on the TestStatus: 1 but it makes Helper get stuck and stop running with TestStatus: 2
Anyway, here is the code
import requests
import asyncio
import discord
from discord.ext import commands, tasks
from datetime import datetime, timedelta
import threading
class Helper(discord.Client):
async def on_ready(self):
global discordbot, taskervar
servername = 'ServerName'
discordbot = self
self.servidores = dict()
self.canais = dict()
for i in range(len(self.guilds)):
self.servidores[self.guilds[i].name] = {}
self.servidores[self.guilds[i].name]['guild']=self.guilds[i]
servidor = self.guilds[i]
for k in range(len(servidor.channels)):
canal = servidor.channels[k]
self.canais[str(canal.name)] = canal
if 'bottalk' not in self.canais.keys():
newchan = await self.servidores[self.guilds[i].name]['guild'].create_text_channel('bottalk')
self.canais[str(newchan.name)] = newchan
self.servidores[self.guilds[i].name]['canais'] = self.canais
self.bottalk = self.get_channel(self.servidores[servername]['canais']['bottalk'].id)
await self.msg("Bot online: " + converteHora(datetime.now(),True))
print(f'{self.user} has connected to Discord!')
taskervar.startprocess()
async def msg(self, msg):
await self.bottalk.send(msg)
async def on_message(self, message):
if message.author == self.user:
return
else:
print(message)
class tasker:
def __init__(self):
global discordbot, taskervar
print('Tasker start')
taskervar = self
self.waiter = threading.Event()
self.lastupdate = datetime.now()
self.nextupdate = datetime.now()
self.thread = threading.Thread(target=self.requests)
def startprocess(self):
if not self.thread.is_alive():
self.waiter = threading.Event()
self.interval = 60*5
self.thread = threading.Thread(target=self.requests)
self.thread.start()
def requests(self):
while not self.waiter.is_set():
getInfo()
self.lastupdate = datetime.now()
self.nextupdate = datetime.now()+timedelta(seconds=self.interval)
self.waiter.wait(self.interval)
def stopprocess(self):
self.waiter.set()
class getInfo:
def __init__(self):
global discordbot, taskervar
self.requests()
async def discordmsg(self,msg):
await discordbot.msg(msg)
def requests(self):
jsondata = {"TestStatus": 1}
if jsondata['TestStatus'] == 1:
print('here')
asyncio.create_task(self.discordmsg("SOMETHING WENT WRONG"))
taskervar.stopprocess()
return
elif jsondata['TestStatus'] == 2:
print('test')
hora = converteHora(datetime.now(),True)
asyncio.create_task(self.discordmsg(str("Everything is fine but not now: " + hora )))
print('test2')
def converteHora(dateUTC, current=False):
if current:
response = (dateUTC.strftime("%d/%m/%Y, %H:%M:%S"))
else:
response = (dateutil.parser.isoparse(dateUTC)-timedelta(hours=3)).strftime("%d/%m/%Y, %H:%M:%S")
return response
async def main():
TOKEN = 'TOKEN GOES HERE'
tasker()
await asyncio.gather(
await Helper().start(TOKEN)
)
if __name__ == '__main__':
asyncio.run(main())
Your primary problem is you don't give your secondary thread access to the asyncio event loop. You can't just await and/or create_task a coroutine on a global object (One of many reasons to avoid using global objects in the first place). Here is how you could modify your code to accomplish that:
class tasker:
def __init__(self):
# ...
self.loop = asyncio.get_running_loop()
# ...
class getInfo:
#...
def requests(self):
# replace the create_tasks calls with this.
asyncio.run_coroutine_threadsafe(self.discordmsg, taskervar.loop)
This uses your global variables because I don't want to rewrite your entire program, but I still strongly recommend avoiding them and considering a re-write yourself.
All that being said, I suspect you will still have this bug:
If I dont run it on a different thread, however, it does work on the TestStatus: 1 but it makes Helper get stuck and stop running with TestStatus: 2
I can't tell what would cause this issue and I'm running into trouble reproducing this on my machine. Your code is pretty hard to read and is missing some details for reproducibility. I would imagine that is part of the reason why you didn't get an answer in the first place. I'm sure you're aware of this article but might be worth a re-visit for better practices in sharing code. https://stackoverflow.com/help/minimal-reproducible-example

Python equivalent of Java synchronized

In Java, you can make a variable thread safe by just adding the synchronized keyword. Is there anything that can achieve the same results in Python?
You can use with self.lock: and then put your code inside there. See http://theorangeduck.com/page/synchronized-python for more information.
Working code using with self.lock which can take care of exception if occurs:
Inside Manager we are making Manager mehods thread safe :
from threading import RLock
class Manager:
def __init__(self):
self.lock = RLock()
self.hash: dict[str, int] = dict()
def containsToken(self, key) -> bool:
with self.lock:
self.lock.acquire()
return key in self.hash
def addToken(self, token: str):
with self.lock:
token = token.strip()
if token in self.hash:
self.hash[token] = self.hash[token] + 1
else:
self.hash[token] = 1
def removeToken(self, token):
with self.lock:
if token not in self.hash:
raise KeyError(f"token : {token} doesn't exits")
self.hash[token] = self.hash[token] - 1
if self.hash[token] == 0:
self.hash.pop(token)
if __name__ == "__main__":
sync = Manager()
sync.addToken("a")
sync.addToken("a")
sync.addToken("a")
sync.addToken("a")
sync.addToken("B")
sync.addToken("B")
sync.addToken("B")
sync.addToken("B")
sync.removeToken("a")
sync.removeToken("a")
sync.removeToken("a")
sync.removeToken("B")
print(sync.hash)
Output:
{'a': 1, 'B': 3}
You can write your own #synchronized decorator.
The example uses a Mutex Lock:
from functools import wraps
from multiprocessing import Lock
def synchronized(member):
"""
#synchronized decorator.
Lock a method for synchronized access only. The lock is stored to
the function or class instance, depending on what is available.
"""
#wraps(member)
def wrapper(*args, **kwargs):
lock = vars(member).get("_synchronized_lock", None)
result = ""
try:
if lock is None:
lock = vars(member).setdefault("_synchronized_lock", Lock())
lock.acquire()
result = member(*args, **kwargs)
lock.release()
except Exception as e:
lock.release()
raise e
return result
return wrapper
Now your are able to decorate a method like this:
class MyClass:
...
#synchronized
def hello_world(self):
print("synced hello world")
And there is also an excellent Blog post about the missing synchronized decorator.

Python: Asyncio NATS.io blocking

I have troubles to make Python Asyncio NATS.io running sequentialy. I have two classes: Account and Bridge
Account holds the logic of application and it is communicating thought Bridge with external service via NATS.io.
Main file:
loop = asyncio.get_event_loop()
account = Account(loop, options)
asyncio.async(account.start())
loop.run_forever()
Account class:
class Account:
bridge = Bridge()
def connect(self):
result = self.bridge.connect(self.id)
return result
Bridge class:
def connect(self, account_id):
data = None
try:
response = yield from self.nc.timed_request("bank.account.connect",
BankRequest(
method="connect",
data={...}
), 10)
data = json.loads(response.data.decode())
except ErrTimeout:
status = Messages.REQUEST_TIMED_OUT
return Result(data=data)
I need to call account.connect() from anywhere inside account class and get result of connection (sequentialy). now I'm getting generator object
your connect() methods should probably be coroutines:
class Account:
bridge = Bridge() # you probably want to put this in `def __init__(self)`!
#asyncio.coroutine
def connect(self):
result = yield from self.bridge.connect(self.id)
return result
class Bridge:
#asyncio.coroutine
def connect(self, account_id):
data = None
try:
response = yield from self.nc.timed_request("bank.account.connect",
BankRequest(
method="connect",
data={...}
), 10)
data = json.loads(response.data.decode())
except ErrTimeout:
status = Messages.REQUEST_TIMED_OUT
return Result(data=data)
and:
resp = yield from account.connect()

Python equivalent of Perl's HTTP::Async->next_response

I'm looking for a way to do the equivalent of Perl's HTTP::Async module's next_response method
The HTTP::Async module doesn't spawn any background threads, nor does it use any callbacks. Instead, every time anyone (in my case, the main thread) calls next_response on the object, all the data that has been received by the OS so far is read (blocking, but instantaneous since it only processes data that's already been received). If this is the end of the response, then next_response returns an HTTP::Response object, otherwise it returns undef.
Usage of this module looks something like (pseudocode):
request = HTTP::Async(url)
do:
response = request->next_response()
if not response:
sleep 5 # or process events or whatever
while not response
# Do things with response
As far as I can see, Python's urllib or http.client don't support this style. As for why I want to do it in this style:
This is for an embedded Python environment where I can't spawn threads, nor have Python spawn any.
I'm restricted to a single thread that is actually the embedding application's thread. This means I cannot have any delayed callbacks either - the application decides when to let my Python code run. All I can do is request the embedding application to invoke a callback of my choosing every 50 milliseconds, say.
Is there a way to do this in Python?
For reference, this is an example of the Perl code I have right now and that I'm looking to port to Python:
httpAsync = HTTP::Async->new()
sub httpRequestAsync {
my ($url, $callback) = #_; # $callback will be called with the response text
$httpAsync->add(new HTTP::Request(GET => $url));
# create_timer causes the embedding application to call the supplied callback every 50ms
application::create_timer(50, sub {
my $timer_result = application::keep_timer;
my $response = $httpAsync->next_response;
if ($response) {
my $responseText = $response->decoded_content;
if ($responseText) {
$callback->($responseText);
}
$timer_result = application::remove_timer;
}
# Returning application::keep_timer will preserve the timer to be called again.
# Returning application::remove_timer will remove the timer.
return $timer_result;
});
}
httpRequestAsync('http://www.example.com/', sub {
my $responseText = $_[0];
application::display($responseText);
});
Edit: Given that this is for an embedded Python instance, I'll take all the alternatives I can get (part of the standard library or otherwise) as I'll have to evaluate all of them to make sure they can run under my particular constraints.
Note: If you're interested in only retrieving data when YOU call for data to be received, simply add a flag to handle_receive and add it to the sleep block inside handle_receive thus giving you data only when you call your function.
#!/usr/bin/python
# -*- coding: iso-8859-15 -*-
import asyncore, errno
from socket import AF_INET, SOCK_STREAM
from time import sleep
class sender():
def __init__(self, sock_send):
self.s = sock_send
self.bufferpos = 0
self.buffer = {}
self.alive = 1
def send(self, what):
self.buffer[len(self.buffer)] = what
def writable(self):
return (len(self.buffer) > self.bufferpos)
def run(self):
while self.alive:
if self.writable():
logout = str([self.buffer[self.bufferpos]])
self.s(self.buffer[self.bufferpos])
self.bufferpos += 1
sleep(0.01)
class SOCK(asyncore.dispatcher):
def __init__(self, _s=None, config=None):
self.conf = config
Thread.__init__(self)
self._s = _s
self.inbuffer = ''
#self.buffer = ''
self.lockedbuffer = False
self.is_writable = False
self.autounlockAccounts = {}
if _s:
asyncore.dispatcher.__init__(self, _s)
self.sender = sender(self.send)
else:
asyncore.dispatcher.__init__(self)
self.create_socket(AF_INET, SOCK_STREAM)
#if self.allow_reuse_address:
# self.set_resue_addr()
self.bind((self.conf['SERVER'], self.conf['PORT']))
self.listen(5)
self.sender = None
self.start()
def parse(self):
self.lockedbuffer = True
## Parse here
print self.inbuffer
self.inbuffer = ''
self.lockedbuffer = False
def readable(self):
return True
def handle_connect(self):
pass
def handle_accept(self):
(conn_sock, client_address) = self.accept()
if self.verify_request(conn_sock, client_address):
self.process_request(conn_sock, client_address)
def process_request(self, sock, addr):
x = SOCK(sock, config={'PARSER' : self.conf['PARSER'], 'ADDR' : addr[0], 'NAME' : 'CORE_SUB_SOCK_('+str(addr[0]) + ')'})
def verify_request(self, conn_sock, client_address):
return True
def handle_close(self):
self.close()
if self.sender:
self.sender.alive = False
def handle_read(self):
data = self.recv(8192)
while self.lockedbuffer:
sleep(0.01)
self.inbuffer += data
def writable(self):
return True
def handle_write(self):
pass
def run(self):
if not self._s:
asyncore.loop()
imap = SOCK(config={'SERVER' : '', 'PORT' : 6668})
imap.run()
while 1
sleep(1)
Something along the lines of this?
Asyncore socket that always appends to the inbuffer when there's data to recieve.
You can modify it however you want to, i just pasted a piece of code from another project that happens to be Threaded :)
Last attempt:
class EchoHandler(asyncore.dispatcher_with_send):
def handle_read(self):
data = self.recv(8192)
if data:
self.send(data)

Categories

Resources