I'm working with faust and would like to leverage concurrency feature.
The example listed doesn't quite demonstrate the use of concurrency.
What I would like to do is, read from kafka producer and unnest json.
Then the shipments are sent to a process to calculate billing etc. I should send 10 shipments at one time to a function which does the calculation. For this i'm using concurrency so 10 shipments can calculate concurrently.
import faust
import time
import json
from typing import List
import asyncio
class Items(faust.Record):
name: str
billing_unit: str
billing_qty: int
class Shipments(faust.Record, serializer="json"):
shipments: List[Items]
ship_type: str
shipping_service: str
shipped_at: str
app = faust.App('ships_app', broker='kafka://localhost:9092', )
ship_topic = app.topic('test_shipments', value_type=Shipments)
#app.agent(value_type=str, concurrency=10)
async def mytask(records):
# task that does some other activity
async for record in records:
print(f'received....{record}')
time.sleep(5)
#app.agent(ship_topic)
async def process_shipments(shipments):
# async for ships in stream.take(100, within=10):
async for ships in shipments:
data = ships.items
uid = faust.uuid()
for item in data:
item_uuid = faust.uuid()
print(f'{uid}, {item_uuid}, {ships.ship_type}, {ships.shipping_service}, {ships.shipped_at}, {item.name}, {item.billing_unit}, {item.billing_qty}')
await mytask.send(value=("{} -- {}".format(uid, item_uuid)))
# time.sleep(2)
# time.sleep(10)
if __name__ == '__main__':
app.main()
Ok I figured out how it works. The problem with the example you gave was actually with the time.sleep bit, not the concurrency bit. Below are two silly examples that show how an agent would work with and without concurrency.
import faust
import asyncio
app = faust.App(
'example_app',
broker="kafka://localhost:9092",
value_serializer='raw',
)
t = app.topic('topic_1')
# #app.agent(t, concurrency=1)
# async def my_task(tasks):
# async for my_task in tasks:
# val = my_task.decode('utf-8')
# if (val == "Meher"):
# # This will print out second because there is only one thread.
# # It'll take 5ish seconds and print out right after Waldo
# print("Meher's a jerk.")
# else:
# await asyncio.sleep(5)
# # Since there's only one thread running this will effectively
# # block the agent.
# print(f"Where did {val} go?")
#app.agent(t, concurrency=2)
async def my_task2(tasks):
async for my_task in tasks:
val = my_task.decode('utf-8')
if (val == "Meher"):
# This will print out first even though the Meher message is
# received second.
print("Meher's a jerk.")
else:
await asyncio.sleep(5)
# Because this will be sleeping and there are two threads available.
print(f"Where did {val} go?")
# ===============================
# In another process run
from kafka import KafkaProducer
p = KafkaProducer()
p.send('topic_1', b'Waldo'); p.send('topic_1', b'Meher')
Related
I am trying to run the loops simultaneously the second loop is dependent on the first one output and needs it to fetch the input from the ids list so no need to wait for the first one until the finish. I tried to do it with multiple libraries and methods but failed to find the optimal structure for that.
import time
import pandas as pd
import requests
import json
from matplotlib import pyplot
import seaborn as sns
import numpy as np
API_KEY = ''
df = pd.read_csv('lat_long file')
# get name and information of each place
id = df['id']
lat = df['latitude']
lon = df['longitude']
ids=[]
loc=[]
unit=[]
print('First API now running')
def get_details(lat, lon):
try:
url = "https://maps.googleapis.com/maps/api/geocode/json?latlng="+ str(lat) + ',' + str(lon)+'&key='+ API_KEY
response = requests.get(url)
data = json.loads(response.text)
ids.append(data['results'][0]['place_id'])
except Exception as e:
print('This code NOT be running because of', e)
return data
def get_deta(ids):
url1 = "https://maps.googleapis.com/maps/api/place/details/json?language=en-US&placeid="+str(ids)+"&key=" + API_KEY
responsedata = requests.get(url1)
data2 = json.loads(responsedata.text)
if 'business_status' in data2['result'].keys():
loc.append((data2['result']['business_status']))
else:
loc.append('0')
flag = False
if data2['result']:
for level in data2['result']['address_components']:
#if len(level['types']) > 1:
if level['types'][0] == 'premise':
flag = True
unit.append(level['long_name'][4:])
else:
print(data2)
if not flag:
unit.append('0')
return data2
def loop1():
for i in range(len(id)):
get_details(lat[i], lon[i])
return
print('Seconed API now running')
def loop2(len(id)):
#printing and appending addresses to use them with the next API
for i in range(50):
get_deta(ids[i])
return
loop1()
loop2()
It is not very clear what you are trying to achieve here. How exactly does the second API depends on the first?
To achieve concurrency you could use the AsyncIO library which is designed to perform concurrent network requests efficiently. However, the requests library you are using is a synchronous one, you must change to an asynchronous one such as aiohttp.
Given that, you can communicate between two concurrent tasks using asyncio.Queue. Here is a draft of what your program could look like:
import asyncio
import aiohttp
import json
async def get_details(lat, lon, session: aiohttp.ClientSession, id_queue: asyncio.Queue):
url: str = f"https://maps.googleapis.com/maps/api/geocode/json?latlng={lat},{lon}&key={API_KEY}"
async with session.get(url) as response:
data = await json.loads(response.text())
await id_queue.put(data['results'][0]['place_id'])
async def get_data(id, session: aiohttp.ClientSSLError, loc_queue: asyncio.Queue):
# Network requests and JSON decoding
...
await loc_queue.put((data['result']['business_status']))
async def loop_1(coords, id_queue: asyncio.Queue):
await asyncio.gather(
*[get_details(lat, lon) for lat, lon in coords]
)
async def loop_2(id_queue: asyncio.Queue, loc_queue: asyncio.Queue):
while True:
id = await id_queue.get()
await get_data(id)
async def main():
id_queue = asyncio.Queue(maxsize=100)
loc_queue = asyncio.Queue(maxsize=100)
await asyncio.gather(
loop_1(),
loop_2()
)
if __name__ == "__main__":
asyncio.run(main())
I simplified your example for the purpose of the example. If you take a look at the main() function, the two loops are executed concurrently with asyncio.gather(). The first loop gets the details of all places concurrently (again with asyncio.gather) and feed a shared queue id_queue. The second loop waits for new ids to come up in the queue and process them with the second API as soon as they are available. It then enqueue the results in a last queue loc_queue.
You could extend this program by adding a third API linked plugged to this last queue and continue to process.
I've inherited some code that utilizes Python Bleak to scan for advertisements emitted from a certain device. Whenever an advertisement from the Bluetooth mac address and service id we're looking for is detected and a certain condition from the extracted payload information is true, we want to terminate and return. In the attached code, I've masked the Bluetooth and service ID:s.
Not being too familiar with the event loop, is there a way to exit before the timer runs out? I suppose there's probably a better way to approach this problem.
Sample code:
import asyncio
import struct
from bleak import BleakScanner
timeout_seconds = 10
address_to_look_for = 'masked'
service_id_to_look_for = 'masked'
def detection_callback(device, advertisement_data):
if device.address == address_to_look_for:
byte_data = advertisement_data.service_data.get(service_id_to_look_for)
num_to_test = struct.unpack_from('<I', byte_data, 0)
if num_to_test == 1:
print('here we want to terminate')
async def run():
scanner = BleakScanner()
scanner.register_detection_callback(detection_callback)
await scanner.start()
await asyncio.sleep(timeout_seconds)
await scanner.stop()
if __name__=='__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(run())
I'm sure there are many ways this can be done. A small mod to your code would be rather than having the asyncio.sleep for the full period before you stop the scan, you could could have a while loop that ends on time elapsed or device found event.
For example:
import asyncio
import struct
from bleak import BleakScanner
timeout_seconds = 20
address_to_look_for = 'F1:D9:3B:39:4D:A2'
service_id_to_look_for = '0000feaa-0000-1000-8000-00805f9b34fb'
class MyScanner:
def __init__(self):
self._scanner = BleakScanner()
self._scanner.register_detection_callback(self.detection_callback)
self.scanning = asyncio.Event()
def detection_callback(self, device, advertisement_data):
# Looking for:
# AdvertisementData(service_data={
# '0000feaa-0000-1000-8000-00805f9b34fb': b'\x00\xf6\x00\x00\x00Jupiter\x00\x00\x00\x00\x00\x0b'},
# service_uuids=['0000feaa-0000-1000-8000-00805f9b34fb'])
if device.address == address_to_look_for:
byte_data = advertisement_data.service_data.get(service_id_to_look_for)
num_to_test, = struct.unpack_from('<I', byte_data, 0)
if num_to_test == 62976:
print('\t\tDevice found so we terminate')
self.scanning.clear()
async def run(self):
await self._scanner.start()
self.scanning.set()
end_time = loop.time() + timeout_seconds
while self.scanning.is_set():
if loop.time() > end_time:
self.scanning.clear()
print('\t\tScan has timed out so we terminate')
await asyncio.sleep(0.1)
await self._scanner.stop()
if __name__ == '__main__':
my_scanner = MyScanner()
loop = asyncio.get_event_loop()
loop.run_until_complete(my_scanner.run())
I just try to use asynchronous script for parsing with asyncio. I find the similar question and took this answer as pattern for my tasks. I added latency for requests (1st part of linked answer) and also tried to add a counter of active requests(2nd part). But this code launch just 5 requests and after become to wait.
I didn`t find good explanation for me how asyncio.Event works, so I would like to ask you to help me to improve my code. Thank you in advance.
import json
from bs4 import BeautifulSoup
import asyncio
import aiohttp
active_calls = 0
MAX_CALLS = 5
def write_to_txt_file(text, name):
f = open(f'{PATH}{name}.txt', 'w')
f.write(text)
f.close()
async def getData(item , session, next_delay , event):
global active_calls, next_delay
await event.wait()
if active_calls > MAX_CALLS - 1:
event.clear()
next_delay = 0.1
print( 'start' , active_calls)
active_calls += 1
next_delay += DELAY
await asyncio.sleep(next_delay)
try:
async with session.get(url=item['Link'] ) as response:
soup = BeautifulSoup(await response.text(), 'html.parser')
name = str(item["ID"]) + '. ' + item["Title"][:100]
text = soup.find(id="begin").get_text()
write_to_txt_file(text , name)
finally:
active_calls -= 1
if active_calls == 0:
event.set()
async def parseFromJson():
with open('./data2/csvjson.json') as data_file: #take links from JSON
data = json.load(data_file)
async with aiohttp.ClientSession() as session:
tasks = []
event = asyncio.Event()
event.set()
next_delay = 0.1
DELAY = 0.3
for item in data:
task = asyncio.create_task(getData(item , session, next_delay , event))
next_delay += DELAY
tasks.append(task)
await asyncio.gather(*tasks)
def main():
asyncio.run(parseFromJson())
if __name__ == "__main__":
main()
UPD: As I suppose this code can just stop requests but no start it again?
UPD: I have changed my code and now it works like this:
All getData() functions launched at one time
Script launches 5 requests and stops on 'await event.wait()' line
rest of them
All requests finished and I start 'event.set()'
After this all functions continuous work and rest requests
started(without limitation of 5 requests).
How to fix it?
So I just added the loop, and all functions check waiting every time, it seems not correct but it helped me
while active_calls > MAX_CALLS:
print(1)
await event.wait()
I am writing some code where I have 3 processes (spawned from the main). The first one is a process that uses Async IO to create 3 coroutines and switch between them. The last two processes run independently and generate two outputs that are used in one of the coroutines of the first process.
The communication has been managed using multiprocessing.queue(), the main puts the input data inside queue_source_position_hrir_calculator and queue_source_position_cutoff_calculator, then these two queues are emptied by p2_hrir_computation_process and p3_cutoff_computation_process. These two processes outputs their computation results in two output queues queue_computed_hrirs and queue_computed_cutoff
Finally these two queues are consumed by the Async IO process, in particular inside the input_parameters_coroutine function.
The full code is the following (I will highlight the key parts in following snippets):
import asyncio
import multiprocessing
import numpy as np
import time
from classes.HRIR_interpreter_min_phase_linear_interpolation import HRIR_interpreter_min_phase_linear_interpolation
from classes.object_renderer import ObjectRenderer
#Useful resources: https://bbc.github.io/cloudfit-public-docs/asyncio/asyncio-part-2
#https://realpython.com/async-io-python/
Fs = 44100
# region Async_IO functions
async def audio_input_coroutine(overlay):
for i in range(0,100):
print('Executing audio input coroutine')
print(overlay)
await asyncio.sleep(1/(Fs*4))
async def input_parameters_coroutine(overlay, queue_computed_hrirs,queue_computed_cutoff):
for i in range(0,10):
print('Executing audio input_parameters coroutine')
#print(overlay)
current_hrir = queue_computed_hrirs.get()
print('got current hrir')
current_cutoff = queue_computed_cutoff.get()
print('got current cutoff')
await asyncio.sleep(0.5)
async def audio_output_coroutine(overlay):
for i in range(0,10):
print('Executing audio_output coroutine')
#print(overlay)
await asyncio.sleep(0.5)
async def main_coroutine(overlay, queue_computed_hrirs,queue_computed_cutoff):
await asyncio.gather(audio_input_coroutine(overlay), input_parameters_coroutine(overlay, queue_computed_hrirs,queue_computed_cutoff), audio_output_coroutine(overlay))
def async_IO_main_process(queue_computed_hrirs,queue_computed_cutoff):
overlay = 10
asyncio.run(main_coroutine(overlay, queue_computed_hrirs,queue_computed_cutoff))
# endregion
# region HRIR_computation_process
def compute_hrir(queue_source_position, queue_computed_hrirs):
print('computing hrir')
SOFA_filename = '../HRTF_data/HUTUBS_min_phase.sofa'
# loading the simulated dataset using the support class HRIRInterpreter
HRIRInterpreter = HRIR_interpreter_min_phase_linear_interpolation(SOFA_filename=SOFA_filename)
# variable to check if I have other positions in my input queue
eof_source_position = False
# Un-comment following line to return when no more messages
while not eof_source_position:
#while True:
# print('inside while loop')
time.sleep(1)
# print('state of the queue', queue_source_position.empty())
if not eof_source_position:
position = queue_source_position.get()
if position is None:
eof_source_position = True # end of messages indicator
else:
required_IR = HRIRInterpreter.get_interpolated_IR(position[0], position[1], 1)
queue_computed_hrirs.put(required_IR)
# print('printing computed HRIR:', required_IR)
print('completed hrir computation, adding none to queue')
queue_computed_hrirs.put(None) # end of messages indicator
print('completed hrir process')
# endregion
# region cutoff_computation_process
def compute_cutoff(queue_source_position, queue_computed_cutoff):
print('computing cutoff')
cutoff = 20000
object_renderer = ObjectRenderer()
object_positions = np.array([(20, 0), (40, 0), (100, 0), (225, 0)])
eof_source_position = False
# Un-comment following line to return when no more messages
while not eof_source_position:
#while True:
time.sleep(1)
object_renderer.update_object_position(object_positions)
if not eof_source_position:
print('inside source position update')
source_position = queue_source_position.get()
if source_position is None: # end of messages indicator
eof_source_position = True
else:
cutoff = object_renderer.get_cutoff(azimuth=source_position[0], elevation=source_position[1])
queue_computed_cutoff.put(cutoff)
queue_computed_cutoff.put(None) # end of messages indicator
# endregion
if __name__ == "__main__":
import time
queue_source_position_hrir_calculator = multiprocessing.Queue()
queue_source_position_cutoff_calculator = multiprocessing.Queue()
queue_computed_hrirs = multiprocessing.Queue()
queue_computed_cutoff = multiprocessing.Queue()
i = 0.0
#Basically here I am writing a sequence of positions into the queue
#then I add a None value to detect when I am done with the simulation so the process can end
for _ in range(10):
# print('into main while-> source_position:', source_position[0])
source_position = np.array([i, 0.0])
queue_source_position_hrir_calculator.put(source_position)
queue_source_position_cutoff_calculator.put(source_position)
i += 10
queue_source_position_hrir_calculator.put(None) # "end of messages" indicator
queue_source_position_cutoff_calculator.put(None) # "end of messages" indicator
p1_async_IO_process = multiprocessing.Process(target=async_IO_main_process, args=(queue_computed_hrirs,queue_computed_cutoff)) #process that manages the ASYNC_IO coroutines between DMAs
p2_hrir_computation_process = multiprocessing.Process(target=compute_hrir, args=(queue_source_position_hrir_calculator, queue_computed_hrirs))
p3_cutoff_computation_process = multiprocessing.Process(target=compute_hrir, args=(queue_source_position_cutoff_calculator, queue_computed_cutoff))
p1_async_IO_process.start()
p2_hrir_computation_process.start()
p3_cutoff_computation_process.start()
#temp cycle to join processes
#for _ in range(2):
# current_hrir = queue_computed_hrirs.get()
# current_cutoff = queue_computed_cutoff.get()
print('joining async_IO process')
p1_async_IO_process.join()
print('joined async_IO process')
#NB: to join a process, its qeues must be empty. So before calling the join on p2, I should get the values from the queue_computed_hrirs queue
print('joining hrir computation process')
p2_hrir_computation_process.join()
print('joined hrir computation process')
print('joining hrir computation process')
p2_hrir_computation_process.join()
print('joined hrir computation process')
print('joining cutoff computation process')
p3_cutoff_computation_process.join()
print('joined cutoff computation process')
print("completed main")
The important part of the code is:
async def input_parameters_coroutine(overlay, queue_computed_hrirs,queue_computed_cutoff):
for i in range(0,10):
print('Executing audio input_parameters coroutine')
#print(overlay)
current_hrir = queue_computed_hrirs.get()
print('got current hrir')
current_cutoff = queue_computed_cutoff.get()
print('got current cutoff')
await asyncio.sleep(0.5)
This coroutine receives as input 3 variables overlay (which is a dummy variable I am using for future developments) and the two multiprocessing.Queue() classes, queue_computed_hrirs and queue_computed_cutoff.
At the moment my input_parameters_coroutine gets "stuck" while executing current_hrir = queue_computed_hrirs.get() and current_cutoff = queue_computed_cutoff.get(). I said "stuck" because the code works fine and complete its execution, the problem is that those two commands are blocking, thus my coroutine stops until it has something to get from the queue.
What I would like to achieve is: try to execute current_hrir = queue_computed_hrirs.get(), if it is not possible at that moment, switch to another coroutine and let it execute what it wants, then go back and check if it possible to execute current_hrir = queue_computed_hrirs.get(), if yes go on, if not switch again to another coroutine and let it do its job.
I saw that there are some problems in making async IO and multiprocessing communicate ( What kind of problems (if any) would there be combining asyncio with multiprocessing? , Can I somehow share an asynchronous queue with a subprocess? ) but I wasn't able to find a smart solution to my problem.
I want to use the bleak library in Python to receive data from a Bluetooth Low Energy device. This part is working. My problem is now, that I don't know how to run this code in the background or parallel.
Eventually, I want to build a tiny python app which is processing the data from the Bluetooth device. So bleak is looping all the time fetching data from a bluetooth device and sending it to the main process where it is processed and displayed.
For some reason, bleak does not run in a thread. Is it possible to use asyncio for this (since it is already used by bleak maybe a good way to go)?
I checked out threads and multiprocessing but somehow I found only examples without processes which loop infinitely and send data. I'm totally new to the topic of parallelization and/or asynchronous processes. Maybe one of you can give a hint where to look for a proper solution for this case.
Below is my code so far (for now I just loop and print data).
from bleak import BleakClient
import json
import time
current_index = 0
time_array = [0] * 20
def TicTocGenerator():
# Generator that returns time differences
ti = 0 # initial time
tf = time.time() # final time
while True:
ti = tf
tf = time.time()
yield tf-ti # returns the time difference
TicToc = TicTocGenerator() # create an instance of the TicTocGen generator
# This will be the main function through which we define both tic() and toc()
def toc(tempBool=True):
# Prints the time difference yielded by generator instance TicToc
tempTimeInterval = next(TicToc)
global current_index
if tempBool:
#print( "Elapsed time: %f seconds.\n" %tempTimeInterval )
time_array[current_index] = tempTimeInterval
if current_index == 19:
current_index = 0
else:
current_index += 1
def tic():
# Records a time in TicToc, marks the beginning of a time interval
toc(False)
def Average(lst):
return sum(lst) / len(lst)
#address = "30:ae:a4:5d:bc:ba"
address = "CCA9907B-10EA-411E-9816-A5E247DCA0C7"
MODEL_NBR_UUID = "beb5483e-36e1-4688-b7f5-ea07361b26a8"
async def run(address, loop):
async with BleakClient(address, loop=loop) as client:
while True:
tic()
model_number = await client.read_gatt_char(MODEL_NBR_UUID)
toc()
json_payload=json.loads(model_number)
print()
print(json_payload)
print("Temp [°C]: "+"{:.2f}".format(json_payload["Temp"]))
print("Volt [V]: "+"{:.2f}".format(json_payload["Volt"]))
print("AngX: "+str(json_payload["AngX"]))
print("AngY: "+str(json_payload["AngY"]))
print("AngZ: "+str(json_payload["AngZ"]))
#print("Millis: {0}".format("".join(map(chr, model_number))))
print("Average [ms]: {:.1f}".format(Average(time_array)*1000))
loop = asyncio.get_event_loop()
loop.run_until_complete(run(address, loop))
I had to make GUI for app that automates FUOTA on multiple BLE devices so my solution was to put bleak loop in separate thread in order to be able to use tkinter mainloop in main thread. You need to use asyncio.run_coroutine_threadsafe to schedule a new task from main thread.
from threading import Thread
import tkinter as tk
from Bleak import BleakScanner
async def scan():
device = await BleakScanner.discover()
for device in devices:
print(device)
def startScan():
# call startScan() from main thread
asyncio.run_coroutine_threadsafe(scan(), loop)
if __name__ == "__main__":
window = tk.Tk()
# ...
loop = asyncio.get_event_loop()
def bleak_thread(loop):
asyncio.set_event_loop(loop)
loop.run_forever()
t = Thread(target=bleak_thread, args=(loop,))
t.start()
window.mainloop()
loop.call_soon_threadsafe(loop.stop)