Using boto3 to await a synchronous lambda invocation - python

I want to invoke a lambda function synchronously (request - response) but want to use python async-await to await the response.
response = await client.invoke('my-func', InvocationType='RequestResponse', Payload='...'))
I found a kind of solution here but it is cumbersome and from 2016.
Is there a better approach today?

I found a way of doing it by manually running the invoke function on the asyncio event loop:
import asyncio
import concurrent
import boto3
import json
import botocore
class LambdaClient():
def __init__(self, concurrency: int = 20):
self.executor = concurrent.futures.ThreadPoolExecutor(
max_workers=concurrency,
)
client_config = botocore.config.Config(
max_pool_connections=concurrency
)
self.client = boto3.client('lambda', config=client_config)
async def invoke_async(self, snapshot):
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(self.executor, lambda: self.invoke(snapshot))
return result
def invoke(self, snapshot):
payload = {
'path': '/calculate/value',
'body': json.dumps(snapshot)
}
b = bytes(json.dumps(payload), encoding='utf8')
response = self.client.invoke(
FunctionName='function-name',
InvocationType='RequestResponse',
LogType='None',
Payload=b)
if 'StatusCode' not in response or response['StatusCode'] != 200:
raise ValueError(f'Lambda invocation failed with response {response}')
output = response["Payload"].read()
return output

Related

How to search by list of keywords using asyncio?

Im trying make my search method work by list of search keywords.
Is there a way how can I make await asyncio work though list search?
async def _request(query: dict):
async with httpx.AsyncClient() as client:
r = await client.post('https://nmmgzjq6qi-2.algolianet.com/1/indexes/public_prod_inventory_track_index/query?x-algolia-agent=Algolia%20for%20JavaScript%20(4.12.0)%3B%20Browser', headers=headers, json=query)
return r.json()
async def to_search(query: str, tags: list[str] = [], page=0, hitsPerPage=100):
data = {
"query": query,
"page": page,
"hitsPerPage": hitsPerPage,
"facets": [
"*"
],
"analytics": True,
"clickAnalytics": True,
"tagFilters": [],
"facetFilters": [
make_tags_filter(tags)
],
"maxValuesPerFacet": hitsPerPage,
"enableABTest": False,
"userToken": userToken,
"filters": "",
"ruleContexts": []
}
return await _request(data)
import asyncio
search = ['coffee', 'banana', 'apple']
#search = input()
for x in search:
r = await asyncio.gather(*[to_search(x, page=i) for i in range(10)])
Also. Is there a way how to make search = input() inputing list of keywords (assuming split them with comas)
You can make serveral requests with any parameters the way in the code snippet below. Pay attention that it is the easiest way to do it. If you have many tasks, you have to implement producer-consumers pattern using asyncio.Queue.
import asyncio
from typing import List
import httpx
async def to_search(url, client: httpx.AsyncClient):
res = await client.get(url)
await asyncio.sleep(3)
return res.status_code
async def main_wrapper(urls: List[str]):
# you need only one AsyncClient for asyncio app.
async with httpx.AsyncClient() as client:
results = await asyncio.gather(*[to_search(i, client) for i in urls])
print(results)
if __name__ == '__main__':
urls = ["http://google.com"] * 20
asyncio.run(main_wrapper(urls=urls))

Python async API requests in batches

I m tryin to make async API calls this way:
func to send request:
async def get_data(client, postdata):
res = await client.post(url=_url, headers=_headers, data=postdata)
return res
func to parse JSON:
async def parse_res(client, postdata):
res = await get_data(client, postdata)
if bool(json.loads(res.text)['suggestions']):
_oks = <...grab some JSON fields...>
else:
_oks = {}
return _oks
I wrap this two funcs in MAIN():
async def main(_jobs):
async with httpx.AsyncClient() as client:
batch = []
calls = []
for job in _jobs:
_postdata = '{ "query": "'+ job + '" }'
calls.append(asyncio.create_task(parse_res(client, _postdata)))
batch = await asyncio.gather(*calls)
return batch
and then just run MAIN()
But the API can handle about 30-50 fast (nearly simultaneous requests or throws 429 HTTP error).
So i need to send batches of 30 calls and process 10 000 requests in chunks.
How do i process 10 000 (ten thousand) API calls in batches of 30 ?
One library that comes in handy here is funcy. It offers various helper for working with sequences. One of that would be chunks. This allows you to split a sequence into chunks of equal size or fewer in the end if the totalsize does not divide.
from funcy import chunks
result = []
for job_chunk in chunks(30, _jobs):
calls = [parse_res(client, '{ "query": "'+ job + '" }') for job un job_chunk]
batch = await asyncio.gather(*calls)
result.extend(batch)
You could use Simon Hawe's answer, however here's a different approach without the usage of external libraries
Use asyncio.Semaphore to limit the amount of calls made concurrently, when the semaphore is released it will let another function to run.
import asyncio
sem = asyncio.Semaphore(30) # no. of simultaneous requests
async def get_data(client, postdata):
async with sem:
res = client.post(url=_url, headers=_headers, data=postdata)
return res
async def parse_res(client, postdata):
res = await get_data(client, postdata)
if bool(json.loads(res.text)['suggestions']):
_oks = <...grab some JSON fields...>
else:
_oks = {}
return _oks
async def main(_jobs: int):
async with httpx.AsyncClient() as client:
postdata = '{"query": "' + job + '"}'
calls = [
asyncio.create_task(parse_res(client, postdata)
for _ in range(_jobs)
]
return await asyncio.gather(*calls)

Run two concurrent task groups asynchronously with asyncio

I am trying to write a program using asyncio and was oriented towards this blog post. What I am trying to do is fetch some JSON data concurrently. For one input data frame. however, I would like to process the requested data further as soon as it becomes available.
So basically there are two groups of tasks:
process data in df1 concurrently and do some calc once JSON returned
process data in df2 concurrently
They are more or less independent of each other, but I want to run the group of tasks concurrently as well. Once both task groups are finished I want to further process them.
My question is if my implementation is properly designed in terms of asyncio patterns, where I just used two gather statements? Or whether this is the wrong concept? Here is a scatch:
import asyncio
import aiohttp
from aiohttp import ClientSession
async def fetch_json(url: str, session: ClientSession, data: json.dumps) -> Dict:
resp = await session.get(url=url, headers={"content-type": "application/json"}, data=data)
resp.raise_for_status()
logger.info("Got response [%s] for URL: %s", resp.status, url)
json = await resp.json()
return json
async def some_calc(url: str, session: ClientSession, data: json.dumps):
res = await fetch_json(url=url, session=session, data=data)
return [float(x) for x in res]
async def process_data(df: Dict, url: str, session: ClientSession):
async with session:
tasks = []
for data in df:
try:
if df1:
task = some_calc(url=url, session=session, data=data)
else:
task = fetch_json(url=url, session=session, data=data)
except Exception as e:
# ...
tasks.append(
task
)
res = await asyncio.gather(*tasks)
return res
async def bulk_execute(df1, df2):
url = "http://some.url/"
async with ClientSession() as session:
res = await asyncio.gather(process_data(df1, url, session), process_data(df2, url, session))
return res
if __name__ == "__main__":
res = asyncio.run(bulk_execute(df1, df2))

How to add requests parameters when request is done in async def loop executor?

How do I add parameters like verify and proxies to the below requests.get?
In a non-async setting I would just do requests.get(url, proxies='some_proxy', verify=False), but I don't know how to put that in the below.
import asyncio
import concurrent.futures
import requests
ids = [2048854772, 2042055933, 2036234693, 2007740886, 2006259847, 2003100744]
token = '111111'
max_workers = len(ids)
async def main():
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
loop = asyncio.get_event_loop()
futures = [
loop.run_in_executor(
executor,
requests.get,
'https://www.strava.com/api/v3/activities/{id}?include_all_efforts=true&access_token={token}'.format(id=id, token=token)
)
for id in ids
]
for response in await asyncio.gather(*futures):
print(response.text)
pass
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
You can use a partial :
from functools import partial
def sum(a, b):
return a + b
sum_with_two = partial(sum, 2)
sum_with_two(5)
>>> 7
sum_two_and_four = partial(sum, 2, 4)
sum_two_and_four()
>>> 6
In your case :
my_request = partial(requests.get, proxies='...', verify=False)
loop.run_in_executor(
executor,
my_request, # Arguments of the partials will be used
'...url...'
)

Asyncio exception handling, possible to not gather exceptions?

I have some code, which makes some API calls with asyncio and aiohttp. For some urls, asyncio will raise an exception, so I allow it to return it (with asyncio.gather(return_exceptions = True)), so it doesn't break the event loop. Is it possible to no gather the returned exceptions, so it returns only the results which worked? Or do I need to clean up the list manually afterwards?
This is the code:
import asyncio
import aiohttp
import ssl
import datetime as dt
limit = 30
start_epoch = int(dt.datetime(2018,7,1).timestamp())
end_epoch = int(dt.datetime.now().timestamp())
epoch_step = 40000
url_list = []
while True:
url = "https://api.pushshift.io/reddit/search/comment/?q=" + "Nestle" + "&size=" + str(limit) + "&after=" + str(start_epoch) + "&before=" + str(start_epoch + epoch_step)
url_list.append(url)
start_epoch += epoch_step
if start_epoch > end_epoch:
break
async def fetch(session, url):
async with session.get(url, ssl=ssl.SSLContext()) as response:
return await response.json()
async def fetch_all(urls, loop):
async with aiohttp.ClientSession(loop=loop) as session:
results = await asyncio.gather(*[fetch(session, url) for url in urls], return_exceptions=True)
return results
if __name__ == '__main__':
loop = asyncio.get_event_loop()
urls = url_list
htmls = loop.run_until_complete(fetch_all(urls, loop))
print(htmls)
and it returns a list which looks something like this:
[ContentTypeError("0, message='Attempt to decode JSON with unexpected mimetype: text/html'",), {'data': [{'author':...]

Categories

Resources