I have a program with one main thread where I spawn a second thread that uses asyncio. Are there any tools provided to synchronize these two threads? If everything was asyncio, I could do it with its synchronization primitives, eg:
import asyncio
async def taskA(lst, evt):
print(f'Appending 1')
lst.append(1)
evt.set()
async def taskB(lst, evt):
await evt.wait()
print('Retrieved:', lst.pop())
lst = []
evt = asyncio.Event()
asyncio.get_event_loop().run_until_complete(asyncio.gather(
taskA(lst, evt),
taskB(lst, evt),
))
However, this does not work with multiple threads. If I just use a threading.Event then it will block the asyncio thread. I figured out I could defer the wait to an executor:
import asyncio
import threading
def taskA(lst, evt):
print(f'Appending 1')
lst.append(1)
evt.set()
async def taskB(lst, evt):
asyncio.get_event_loop().run_in_executor(None, evt.wait)
print('Retrieved:', lst.pop())
def targetA(lst, evt):
taskA(lst, evt)
def targetB(lst, evt):
asyncio.set_event_loop(asyncio.new_event_loop())
asyncio.get_event_loop().run_until_complete(taskB(lst, evt))
lst = []
evt = threading.Event()
threadA = threading.Thread(target=targetA, args=(lst, evt))
threadB = threading.Thread(target=targetB, args=(lst, evt))
threadA.start()
threadB.start()
threadA.join()
threadB.join()
However, having an executor thread only to wait for a mutex seems unnatural. Is this the way this is supposed to be done? Or is there any other way to wait for synchronization between OS threads asynchronously?
A simple way to synchronize an asyncio coroutine with an event coming from another thread is to await an asyncio.Event in taskB, and set it from taskA using loop.call_soon_threadsafe.
To be able to pass values and exceptions between the two, you can use futures; however then you are inventing much of run_in_executor. If the only job of taskA is to take tasks off a queue, you might as well make a single-worker "pool" and use it as your worker thread. Then you can use run_in_executor as intended:
worker = concurrent.futures.ThreadPoolExecutor(max_workers=1)
async def taskB(lst):
loop = asyncio.get_event_loop()
# or result = await ..., if taskA has a useful return value
# This will also propagate exceptions raised by taskA
await loop.run_in_executor(worker, taskA, lst)
print('Retrieved:', lst.pop())
The semantics are the same as in your version with an explicit queue - the queue is still there, it's just inside the ThreadPoolExecutor.
Related
I want to start a new Process (Pricefeed) from my Executor class and then have the Executor class keep running in its own event loop (the shoot method). In my current attempt, the asyncio loop gets blocked on the line p.join(). However, without that line, my code just exits. How do I do this properly?
Note: fh.run() blocks as well.
import asyncio
from multiprocessing import Process, Queue
from cryptofeed import FeedHandler
from cryptofeed.defines import L2_BOOK
from cryptofeed.exchanges.ftx import FTX
class Pricefeed(Process):
def __init__(self, queue: Queue):
super().__init__()
self.coin_symbol = 'SOL-USD'
self.fut_symbol = 'SOL-USD-PERP'
self.queue = queue
async def _book_update(self, feed, symbol, book, timestamp, receipt_timestamp):
self.queue.put(book)
def run(self):
fh = FeedHandler()
fh.add_feed(FTX(symbols=[self.fut_symbol, self.coin_symbol], channels=[L2_BOOK],
callbacks={L2_BOOK: self._book_update}))
fh.run()
class Executor:
def __init__(self):
self.q = Queue()
async def shoot(self):
print('in shoot')
for i in range(5):
msg = self.q.get()
print(msg)
await asyncio.sleep(1) # do some stuff
async def run(self):
asyncio.create_task(self.shoot())
p = Pricefeed(self.q)
p.start()
p.join()
async def main():
g = Executor()
await g.run()
if __name__ == '__main__':
asyncio.run(main())
Since you're using a queue to communicate this is a somewhat tricky problem. To answer your first question as to why removing join makes the program work, join blocks until the process finishes. In asyncio you can't do anything blocking in a function marked async or it will freeze the event loop. To do this properly you'll need to run your process with the asyncio event loop's run_in_executor method which will run things in a process pool and return an awaitable that is compatible with the asyncio event loop.
Secondly, you'll need to use a multiprocessing Manager which creates shared state that can be used by multiple processes to properly share your queue. Managers directly support creation of a shared queue. Using these two bits of knowledge you can adapt your code to something like the following which works:
import asyncio
import functools
import time
from multiprocessing import Manager
from concurrent.futures import ProcessPoolExecutor
def run_pricefeed(queue):
i = 0
while True: #simulate putting an item on the queue every 250ms
queue.put(f'test-{i}')
i += 1
time.sleep(.25)
class Executor:
async def shoot(self, queue):
print('in shoot')
for i in range(5):
while not queue.empty():
msg = queue.get(block=False)
print(msg)
await asyncio.sleep(1) # do some stuff
async def run(self):
with ProcessPoolExecutor() as pool:
with Manager() as manager:
queue = manager.Queue()
asyncio.create_task(self.shoot(queue))
await asyncio.get_running_loop().run_in_executor(pool, functools.partial(run_pricefeed, queue))
async def main():
g = Executor()
await g.run()
if __name__ == '__main__':
asyncio.run(main())
This code has a drawback in that you need to empty the queue in a non-blocking fashing from your asyncio process and wait for a while for new items to come in before emptying it again, effectively implementing a polling mechanism. If you don't wait after emptying, you'll wind up with blocking code and you will freeze the event loop again. This isn't as good as just waiting for the queue to have an item in it by blocking, but may suit your needs. If possible, I would avoid asyncio here and use multiprocessing entirely, for example, by implementing queue processing as a separate process.
I have an asyncio event loop to which I add a signal handler, using loop.add_signal_handler(). The signals I want to catch are SIGINT, SIGHUP and SIGTERM to gracefully shutdown my event loop.
From this event loop I want to fork processes, using multiprocessing.Process(). This process p I want to be able to terminate from the event loop using p.terminate(). However, the signal handler will catch the SIGTERM signal issued by p.terminate(), prompting the execution of my shutdown code, leaving p running.
I have not found any solutions to this. Most posts say you should refrain from using termination signals and seek to use multiprocessing.Queue() by passing e.g. None and handle this in the child-process. While I see the usefulness and cleanness of this approach, in my case using a multiprocessing.Queue() will not be feasible. Am I trying something impossible or have I missed something?
I have created a minimum example:
import asyncio
import multiprocessing as mp
import signal
import time
class Test():
def __init__(self):
self.event_loop = asyncio.get_event_loop()
def create_mp(self):
# Wrapper for creating process
self.p = mp.Process(target=worker)
self.p.start()
async def shutdown_mp(self):
# Shutdown after 5 sec
await asyncio.sleep(5)
self.p.terminate()
async def async_print(self):
# Simple coroutine printing
while True:
await asyncio.sleep(1)
print("Async_test")
async def shutdown_event_loop(self, signal):
# Graceful shutdown, inspiration from roguelynn (Lynn Root)
print("Received exit signal {}".format(signal.name))
tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
# Cancel all tasks
[task.cancel() for task in tasks]
print("Cancelling {} outstanding tasks".format(len(tasks)))
await asyncio.gather(*tasks, return_exceptions=True)
self.event_loop.stop()
def run(self):
# Add signals
signals = (signal.SIGINT, signal.SIGHUP, signal.SIGTERM)
for s in signals:
self.event_loop.add_signal_handler(
s, lambda s=s: self.event_loop.create_task(self.shutdown_event_loop(s)))
# Schedule async task
self.event_loop.create_task(self.async_print())
# Start processes
self.create_mp()
# Schedule process to be terminated
self.event_loop.create_task(self.shutdown_mp())
self.event_loop.run_forever()
def worker():
# Simple process
while True:
print("Test")
time.sleep(1)
if __name__ == "__main__":
test = Test()
test.run()
I have not been able to simply KeyboardInterrupt cancel the processes. Instead I use pgrep python3 and kill -9 PID.
I would like to combine asyncio and multiprocessing as I have a task where a part is io-bound and another is cpu-bound. I first tried to use loop.run_in_executor(), but I couldn't get it to work probably. Instead I went with creating two processes where one uses asyncio and the other doesn't.
The code is such that I have a class with some non-blocking functions and one blocking. I have an asyncio.Queue to pass information between the non-blocking parts and a multiprocessing.Queue to pass information between the non-blocking and the blocking functions.
import multiprocessing as mp
from concurrent.futures import ProcessPoolExecutor
import asyncio
import time
class TestClass:
def __init__(self):
m = mp.Manager()
self.blocking_queue = m.Queue()
async def run(self):
loop = asyncio.get_event_loop()
self.non_blocking_queue = asyncio.Queue() # asyncio Queue must be declared within event loop
task1 = loop.create_task(self.non_blocking1())
task2 = loop.create_task(self.non_blocking2())
task3 = loop.create_task(self.print_msgs())
await asyncio.gather(task1, task2)
task3.cancel()
def blocking(self):
i = 0
while i < 5:
time.sleep(0.6)
i += 1
print("Blocking ", i)
line = self.blocking_queue.get()
print("Blocking: ", line)
print("blocking done")
async def non_blocking1(self):
for i in range(5):
await self.non_blocking_queue.put("Hello")
await asyncio.sleep(0.4)
async def non_blocking2(self):
for i in range(5):
await self.non_blocking_queue.put("World")
await asyncio.sleep(0.5)
async def print_msgs(self):
while True:
line = await self.non_blocking_queue.get()
self.blocking_queue.put(line)
print(line)
test_class = TestClass()
with ProcessPoolExecutor() as pool:
pool.submit(test_class.blocking)
pool.submit(asyncio.run(test_class.run()))
print("done")
About half the times I run this, it works fine and prints out the text in the blocking and the non-blocking queues. The other half it only prints out the results of the non-blocking queue. It looks like the blocking process isn't started at all. It is not consequent every other time. It might work five times in a row and then not work five times in row.
What might cause such a problem? Which better way can I do this, using both multiprocessing and asyncio?
running the async task "inside" the other process works for me, e.g.:
def runfn(fn):
return asyncio.run(fn())
with ProcessPoolExecutor() as pool:
pool.submit(test_class.blocking)
pool.submit(runfn, test_class.run)
presumably there's some state inside asyncio/the task that needs to be consistent or gets broken when running in another process
I'm trying to combine multiprocessing with asyncio. The program has two main components - one which streams/generates content, and another that consumes it.
What I want to do is to create multiple processes in order to exploit multiple CPU cores - one for the stream listener/generator, another for the consumer, and a simple one to shut down everything when the consumer has stopped.
My approach so far has been to create the processes, and start them. Each such process creates an async task. Once all processes have started, I run the asyncio tasks. What I have so far (stripped down) is:
def consume_task(loop, consumer):
loop.create_task(consume_queue(consumer))
def stream_task(loop, listener, consumer):
loop.create_task(create_stream(listener, consumer))
def shutdown_task(loop, listener):
loop.create_task(shutdown(consumer))
async def shutdown(consumer):
print("Shutdown task created")
while not consumer.is_stopped():
print("No activity")
await asyncio.sleep(5)
print("Shutdown initiated")
loop.stop()
async def create_stream(listener, consumer):
stream = Stream(auth, listener)
print("Stream created")
stream.filter(track=KEYWORDS, is_async=True)
await asyncio.sleep(EVENT_DURATION)
print("Stream finished")
consumer.stop()
async def consume_queue(consumer):
await consumer.run()
loop = asyncio.get_event_loop()
p_stream = Process(target=stream_task, args=(loop, listener, consumer, ))
p_consumer = Process(target=consume_task, args=(loop, consumer, ))
p_shutdown = Process(target=shutdown_task, args=(loop, consumer, ))
p_stream.start()
p_consumer.start()
p_shutdown.start()
loop.run_forever()
loop.close()
The problem is that everything hangs (or does it block?) - no tasks are actually running. My solution was to change the first three functions to:
def consume_task(loop, consumer):
loop.create_task(consume_queue(consumer))
loop.run_forever()
def stream_task(loop, listener, consumer):
loop.create_task(create_stream(listener, consumer))
loop.run_forever()
def shutdown_task(loop, listener):
loop.create_task(shutdown(consumer))
loop.run_forever()
This does actually run. However, the consumer and the listener objects are not able to communicate. As a simple example, when the create_stream function calls consumer.stop(), the consumer does not stop. Even when I change a consumer class variable, the changes are not made - case in point, the shared queue remains empty. This is how I am creating the instances:
queue = Queue()
consumer = PrintConsumer(queue)
listener = QueuedListener(queue, max_time=EVENT_DURATION)
Please note that if I do not use processes, but only asyncio tasks, everything works as expected, so I do not think it's a reference issue:
loop = asyncio.get_event_loop()
stream_task(loop, listener, consumer)
consume_task(loop, consumer)
shutdown_task(loop, listener)
loop.run_forever()
loop.close()
Is it because they are running on different processes? How should I go about fixing this issue please?
Found the problem! Multi-processing creates copies of instances. The solution is to create a Manager, which shares the instances itself.
EDIT [11/2/2020]:
import asyncio
from multiprocessing import Process, Manager
"""
These two functions will be created as separate processes.
"""
def task1(loop, shared_list):
output = loop.run_until_complete(asyncio.gather(async1(shared_list)))
def task2(loop, shared_list):
output = loop.run_until_complete(asyncio.gather(async2(shared_list)))
"""
These two functions will be called (in different processes) asynchronously.
"""
async def async1(shared_list):
pass
async def async2(shared_list):
pass
"""
Create the manager and start it up.
From this manager, also create a list that is shared by functions in different threads.
"""
manager = Manager()
manager.start()
shared_list = manager.list()
loop = asyncio.get_event_loop() # the event loop
"""
Create two processes.
"""
process1 = Process(target=task1, args=(loop, shared_list, ))
process2 = Process(target=task2, args=(loop, shared_list, ))
"""
Start the two processes and wait for them to finish.
"""
process1.start()
process2.start()
output1 = process1.join()
output2 = process2.join()
"""
Clean up
"""
loop.close()
manager.shutdown()
I have a python multi-threaded application. I want to run an asyncio loop in a thread and post calbacks and coroutines to it from another thread. Should be easy but I cannot get my head around the asyncio stuff.
I came up to the following solution which does half of what I want, feel free to comment on anything:
import asyncio
from threading import Thread
class B(Thread):
def __init__(self):
Thread.__init__(self)
self.loop = None
def run(self):
self.loop = asyncio.new_event_loop()
asyncio.set_event_loop(self.loop) #why do I need that??
self.loop.run_forever()
def stop(self):
self.loop.call_soon_threadsafe(self.loop.stop)
def add_task(self, coro):
"""this method should return a task object, that I
can cancel, not a handle"""
f = functools.partial(self.loop.create_task, coro)
return self.loop.call_soon_threadsafe(f)
def cancel_task(self, xx):
#no idea
#asyncio.coroutine
def test():
while True:
print("running")
yield from asyncio.sleep(1)
b.start()
time.sleep(1) #need to wait for loop to start
t = b.add_task(test())
time.sleep(10)
#here the program runs fine but how can I cancel the task?
b.stop()
So starting and stoping the loop works fine. I thought about creating task using create_task, but that method is not threadsafe so I wrapped it in call_soon_threadsafe. But I would like to be able to get the task object in order to be able to cancel the task. I could do a complicated stuff using Future and Condition, but there must be a simplier way, isnt'it?
I think you may need to make your add_task method aware of whether or not its being called from a thread other than the event loop's. That way, if it's being called from the same thread, you can just call asyncio.async directly, otherwise, it can do some extra work to pass the task from the loop's thread to the calling thread. Here's an example:
import time
import asyncio
import functools
from threading import Thread, current_thread, Event
from concurrent.futures import Future
class B(Thread):
def __init__(self, start_event):
Thread.__init__(self)
self.loop = None
self.tid = None
self.event = start_event
def run(self):
self.loop = asyncio.new_event_loop()
asyncio.set_event_loop(self.loop)
self.tid = current_thread()
self.loop.call_soon(self.event.set)
self.loop.run_forever()
def stop(self):
self.loop.call_soon_threadsafe(self.loop.stop)
def add_task(self, coro):
"""this method should return a task object, that I
can cancel, not a handle"""
def _async_add(func, fut):
try:
ret = func()
fut.set_result(ret)
except Exception as e:
fut.set_exception(e)
f = functools.partial(asyncio.async, coro, loop=self.loop)
if current_thread() == self.tid:
return f() # We can call directly if we're not going between threads.
else:
# We're in a non-event loop thread so we use a Future
# to get the task from the event loop thread once
# it's ready.
fut = Future()
self.loop.call_soon_threadsafe(_async_add, f, fut)
return fut.result()
def cancel_task(self, task):
self.loop.call_soon_threadsafe(task.cancel)
#asyncio.coroutine
def test():
while True:
print("running")
yield from asyncio.sleep(1)
event = Event()
b = B(event)
b.start()
event.wait() # Let the loop's thread signal us, rather than sleeping
t = b.add_task(test()) # This is a real task
time.sleep(10)
b.stop()
First, we save the thread id of the event loop in the run method, so we can figure out if calls to add_task are coming from other threads later. If add_task is called from a non-event loop thread, we use call_soon_threadsafe to call a function that will both schedule the coroutine, and then use a concurrent.futures.Future to pass the task back to the calling thread, which waits on the result of the Future.
A note on cancelling a task: You when you call cancel on a Task, a CancelledError will be raised in the coroutine the next time the event loop runs. This means that the coroutine that the Task is wrapping will aborted due to the exception the next time it hit a yield point - unless the coroutine catches the CancelledError and prevents itself from aborting. Also note that this only works if the function being wrapped is actually an interruptible coroutine; an asyncio.Future returned by BaseEventLoop.run_in_executor, for example, can't really be cancelled, because it's actually wrapped around a concurrent.futures.Future, and those can't be cancelled once their underlying function actually starts executing. In those cases, the asyncio.Future will say its cancelled, but the function actually running in the executor will continue to run.
Edit: Updated the first example to use concurrent.futures.Future, instead of a queue.Queue, per Andrew Svetlov's suggestion.
Note: asyncio.async is deprecated since version 3.4.4 use asyncio.ensure_future instead.
You do everything right.
For task stopping make method
class B(Thread):
# ...
def cancel(self, task):
self.loop.call_soon_threadsafe(task.cancel)
BTW you have to setup an event loop for the created thread explicitly by
self.loop = asyncio.new_event_loop()
asyncio.set_event_loop(self.loop)
because asyncio creates implicit event loop only for main thread.
just for reference here it the code I finally implemented based on the the help I got on this site, it is simpler since I did not need all features. thanks again!
import asyncio
from threading import Thread
from concurrent.futures import Future
import functools
class B(Thread):
def __init__(self):
Thread.__init__(self)
self.loop = None
def run(self):
self.loop = asyncio.new_event_loop()
asyncio.set_event_loop(self.loop)
self.loop.run_forever()
def stop(self):
self.loop.call_soon_threadsafe(self.loop.stop)
def _add_task(self, future, coro):
task = self.loop.create_task(coro)
future.set_result(task)
def add_task(self, coro):
future = Future()
p = functools.partial(self._add_task, future, coro)
self.loop.call_soon_threadsafe(p)
return future.result() #block until result is available
def cancel(self, task):
self.loop.call_soon_threadsafe(task.cancel)
Since version 3.4.4 asyncio provides a function called run_coroutine_threadsafe to submit a coroutine object from a thread to an event loop. It returns a concurrent.futures.Future to access the result or cancel the task.
Using your example:
#asyncio.coroutine
def test(loop):
try:
while True:
print("Running")
yield from asyncio.sleep(1, loop=loop)
except asyncio.CancelledError:
print("Cancelled")
loop.stop()
raise
loop = asyncio.new_event_loop()
thread = threading.Thread(target=loop.run_forever)
future = asyncio.run_coroutine_threadsafe(test(loop), loop)
thread.start()
time.sleep(5)
future.cancel()
thread.join()