I'm attempting to write an AWS Lambda which will loop over all Cloudwatch log groups, creating a metric filter for a search term on each log group.
Unfortunately I am finding that although all of my calls to put_metric_filter receive HTTP 200 responses, most of the calls result in nothing getting created (4/15 calls resulting in the creation of a filter).
I have an AWS Lambda with this handler file 'handler.py':
from __future__ import print_function
from basicExample import ManageMetricsAndAlarms
import json, logging
log = logging.getLogger()
log.setLevel(logging.INFO)
def handler(event, context):
log.info("Received event {}".format(json.dumps(event)))
mc = ManageMetricsAndAlarms(event, context)
response = mc.main()
return json.dumps(response)
Which calls the ManageMetricsAndAlarms class from 'basicExample.py' which maps over an array of log group names, creating a metric for each which filters on the term 'ERROR':
from __future__ import print_function
import boto3, os, sys, json, botocore, logging
log = logging.getLogger()
log.setLevel(logging.INFO)
class ManageMetricsAndAlarms:
# -------------------------------------------------
def __init__(self,event,context):
self.event = event
# -------------------------------------------------
def main(self):
cloudwatch = boto3.resource('cloudwatch')
metricsNamespace = 'ExampleMetrics'
errorFilter = '{ $.levelname = "ERROR" }'
# Supposing that I have log groups for 10 imaginatively named lambdas
logGroupNames = [
'/aws/lambda/Lambda-1', '/aws/lambda/Lambda-2',
'/aws/lambda/Lambda-3', '/aws/lambda/Lambda-4',
'/aws/lambda/Lambda-5', '/aws/lambda/Lambda-6',
'/aws/lambda/Lambda-7', '/aws/lambda/Lambda-8',
'/aws/lambda/Lambda-9', '/aws/lambda/Lambda-10'
]
# map over the log groups adding a metric filter for 'ERROR' to each
responses = map(lambda lg: self.createErrorFilter(metricsNamespace, errorFilter, lg), logGroupNames)
return responses
# -------------------------------------------------
def createErrorFilter(self, metricsNamespace, filterPattern, logGroup):
metricName = logGroup + '_ErrorCount'
logs_client = boto3.client('logs')
log.info('Put metric filter ' + metricName + ' with filter $.levelname-ERROR on logGroup: ' + logGroup)
errorFilter = logs_client.put_metric_filter(
logGroupName = logGroup,
filterName ='ERROR-filter',
filterPattern = filterPattern,
metricTransformations = [
{
'metricNamespace': metricsNamespace,
'metricValue': '1',
'metricName': metricName,
}
]
)
log.info('errorFilter response: ' + json.dumps(errorFilter))
return errorFilter
# -------------------------------------------------
I'm quite new to python so I expect I've missed something basic but any help would be much appreciated!
Few things to consider:
Why would you put this on a lambda? are you going to put the same filter every minute/hour on the same lambdas? In general you should execute your script only once (or just after deploying new lambdas.
map is a lazy evaluator, so you will need something like
list(map(function x: print(x), iterable))
if you want to execute the function
Here is an example
import boto3
def createErrorFilter(metricsNamespace, filterPattern, logGroup):
metricName = logGroup + '_example'
logs_client = boto3.client('logs')
errorFilter = logs_client.put_metric_filter(
logGroupName = logGroup,
filterName ='ERROR-filter',
filterPattern = filterPattern,
metricTransformations = [
{
'metricNamespace': metricsNamespace,
'metricValue': '1',
'metricName': metricName,
}
]
)
print('ok')
return
cloudwatch = boto3.resource('cloudwatch')
metricsNamespace = 'ExampleMetrics-2'
errorFilter = 'ERROR'
logGroupNames = [
'/aws/lambda/lambda1', '/aws/lambda/lambda2'
]
# map over the log groups adding a metric filter for 'ERROR' to each
responses = list(map(lambda lg: createErrorFilter(metricsNamespace, errorFilter, lg), logGroupNames))
Related
I'm using aws lambda for a slack app, and I'm handling an interactive response(so I need to send a response in 3 seconds)
I invoke another lambda in my code with the Event type, and returning a return {"statusCode": 200} but I can't find in cw logs the returned value, the lambda execute with no issues but there is no returned value.
this is my code:
import logging
from urllib.parse import parse_qs
import utils.slack.client as slack
from functions.flows.update_zendesk_ticket import pass_to_pso
from lambda_warmer.lambda_warmer import lambda_warmup
from utils.common import invoke_lambda, PSO_NOC_ALERTS
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
#lambda_warmup()
def lambda_handler(event, context):
logger.info(f'this is the event: {event}')
logger.info(f'this is the context: {context}')
params = dict(parse_qs(event['body'], keep_blank_values=False))
if "payload" in params:
payload = eval(params["payload"][0].replace('false', 'False').replace('null', 'None').replace('true', 'True'))
if payload["type"] == "message_action":
logger.info(f'{payload["user"]["username"]} clicked on {payload["view"]["callback_id"]}')
elif payload["type"] == "view_submission":
logger.debug(payload)
logger.info(f'{payload["user"]["username"]} submitted {payload["view"]["callback_id"]}')
submitted_data = payload["view"]["state"]["values"]
logger.info(submitted_data)
if payload["view"]["callback_id"] == "pass_to_pso":
result = pass_to_pso_handler(submitted_data)
return result
return {"statusCode": 200}
def pass_to_pso_handler(submitted_data):
pso_slack_id = submitted_data["pso"]["pso_select-action"]["selected_user"]
slack_client = slack.SlackClient()
pso_email = slack_client.get_email_from_slack(pso_slack_id)
zd_ticket_id = submitted_data["ticket_id"]["ticket_id-action"]["value"]
thread_link = submitted_data["thread_link"]["thread_link-action"]["value"]
reply_language = submitted_data["reply_language"]["reply_language-action"]["selected_option"][
"value"]
reply_type = submitted_data["reply_type"]["reply_type-action"]["selected_option"]["value"]
pass_to_pso(pso_email=pso_email, ticket_id=zd_ticket_id, thread_link=thread_link,
reply_language=reply_language, reply_type=reply_type)
pso_name = pso_email.split('#')[0]
invoke_lambda({
"pso": pso_name,
"ticket id": zd_ticket_id,
"channel_id": PSO_NOC_ALERTS
}, "Event")
return {"statusCode": 200}
the invoke function:
def invoke_lambda(payload, invocation_type):
client = boto3.client('lambda', 'us-east-1')
response = client.invoke(
FunctionName=SLACK_MESSAGE_LAMBDA,
InvocationType=invocation_type,
Payload=bytes(json.dumps(payload), encoding='utf8'))
and this is the last rows of my cw logs
I think the only way to log your return to CloudWatch Logs by printing it. Or else, it is only visible to your function's integrations such as API Gateway.
import json
status = {"statusCode": 200}
print(json.dumps(status))
return status
I'm trying to validate WebApp data but the result is not what I wanted.
Telegram documentation:
data_check_string = ...
secret_key = HMAC_SHA256(<bot_token>, "WebAppData")
if (hex(HMAC_SHA256(data_check_string, secret_key)) == hash) {
// data is from Telegram
}
MyCode:
BOT_TOKEN = '5139539316:AAGVhDje2A3mB9yA_7l8-TV8xikC7KcudNk'
data_check_string = 'query_id=AAGcqlFKAAAAAJyqUUp6-Y62&user=%7B%22id%22%3A1246866076%2C%22first_name%22%3A%22Dante%22%2C%22last_name%22%3A%22%22%2C%22username%22%3A%22S_User%22%2C%22language_code%22%3A%22en%22%7D&auth_date=1651689536&hash=de7f6b26aadbd667a36d76d91969ecf6ffec70ffaa40b3e98d20555e2406bfbb'
data_check_arr = data_check_string.split('&')
needle = 'hash='
hash_item = ''
telegram_hash = ''
for item in data_check_arr:
if item[0:len(needle)] == needle:
telegram_hash = item[len(needle):]
hash_item = item
data_check_arr.remove(hash_item)
data_check_arr.sort()
data_check_string = "\n".join(data_check_arr)
secret_key = hmac.new("WebAppData".encode(), BOT_TOKEN.encode(), hashlib.sha256).digest()
calculated_hash = hmac.new(data_check_string.encode(), secret_key, hashlib.sha256).hexdigest()
print(calculated_hash == telegram_hash) # print False
I'm trying to validate webapp data in python, but my code didn't give the intended result.
the hash which my code gives me is different from the telegram's one.
UPDATE: valid data added, and bot-token has been changed.
You need to unquote data_check_string
from urllib.parse import unquote
data_check_string = unquote('query_id=AAGcqlFKAAAAAJyqUUp6-Y62&user=%7B%22id%22%3A1246866076%2C%22first_name%22%3A%22Dante%22%2C%22last_name%22%3A%22%22%2C%22username%22%3A%22S_User%22%2C%22language_code%22%3A%22en%22%7D&auth_date=1651689536&hash=de7f6b26aadbd667a36d76d91969ecf6ffec70ffaa40b3e98d20555e2406bfbb')
And swap the arguments
calculated_hash = hmac.new(secret_key, data_check_string.encode(), hashlib.sha256).hexdigest()
You can replace the for-loops with a couple of lines (already incorporates kurdyukovpv's suggestion to unquote the query string):
data_check_string = sorted([ chunk.split("=") for chunk in unquote(data_check_string).split("&")
if chunk[:len("hash=")]!="hash="],
key=lambda x: x[0])
data_check_string = "\n".join([f"{rec[0]}={rec[1]}" for rec in data_check_string])
EDIT: Figured I might as well just post the entire working function I got out of this thread ) :
import hmac
import hashlib
from urllib.parse import unquote
def validate(hash_str, init_data, token, c_str="WebAppData"):
"""
Validates the data received from the Telegram web app, using the
method documented here:
https://core.telegram.org/bots/webapps#validating-data-received-via-the-web-app
hash_str - the has string passed by the webapp
init_data - the query string passed by the webapp
token - Telegram bot's token
c_str - constant string (default = "WebAppData")
"""
init_data = sorted([ chunk.split("=")
for chunk in unquote(init_data).split("&")
if chunk[:len("hash=")]!="hash="],
key=lambda x: x[0])
init_data = "\n".join([f"{rec[0]}={rec[1]}" for rec in init_data])
secret_key = hmac.new(c_str.encode(), token.encode(),
hashlib.sha256 ).digest()
data_check = hmac.new( secret_key, init_data.encode(),
hashlib.sha256)
return data_check.hexdigest() == hash_str
I am using boto3 api to get all the log events in cloud watch.
The following is my code
import boto3
client = boto3.client("logs")
LOG_GROUP_NAME = "/foo/bar/foo-jobs/foo"
instance_id= "i-somefooid"
log_events = []
response = client.get_log_events(logGroupName=LOG_GROUP_NAME, logStreamName=instance_id, startFromHead=True)
log_events.extend(response["events"])
next_token = response["nextForwardToken"]
while True:
response = client.get_log_events(logGroupName=LOG_GROUP_NAME, logStreamName=instance_id, nextToken=next_token)
log_events.extend(response["events"])
if next_token == response["nextForwardToken"]:
break
next_token = response["nextForwardToken"]
print(log_events)
Using this I am able to print all the log events for a specified instance id but i am not happy that i have to call .get_log_events twice. The reason is because when i make the first call i don't have a nextToken. I only have it after the initial call. Is there a way to simplify this so that i only make the get_log_events call once inside the while True loop.
I would love to hear some suggestions.
import boto3
log_client = boto3.client('logs')
params = {
'logGroupName': "/foo/bar/foo-jobs/foo",
'logStreamName': "i-somefooid"
}
log_events = []
while params.get('nextToken') != '':
response = log_client.get_log_events(**params)
log_events.extend(response['events'])
next_token = response.get('nextToken')
params['nextToken'] = next_token if next_token else ''
SSM — Boto 3 Docs 1.9.64 documentation
get_parameters doesn't list all parameters?
For those who wants to just copy-paste the code:
import boto3
ssm = boto3.client('ssm')
parameters = ssm.describe_parameters()['Parameters']
Beware of the limit of max 50 parameters!
This code will get all parameters, by recursively fetching until there are no more (50 max is returned per call):
import boto3
def get_resources_from(ssm_details):
results = ssm_details['Parameters']
resources = [result for result in results]
next_token = ssm_details.get('NextToken', None)
return resources, next_token
def main()
config = boto3.client('ssm', region_name='us-east-1')
next_token = ' '
resources = []
while next_token is not None:
ssm_details = config.describe_parameters(MaxResults=50, NextToken=next_token)
current_batch, next_token = get_resources_from(ssm_details)
resources += current_batch
print(resources)
print('done')
You can use get_paginator api. find below example, In my use case i had to get all the values of SSM parameter store and wanted to compare it with a string.
import boto3
import sys
LBURL = sys.argv[1].strip()
client = boto3.client('ssm')
p = client.get_paginator('describe_parameters')
paginator = p.paginate().build_full_result()
for page in paginator['Parameters']:
response = client.get_parameter(Name=page['Name'])
value = response['Parameter']['Value']
if LBURL in value:
print("Name is: " + page['Name'] + " and Value is: " + value)
One of the responses from above/below(?) (by Val Lapidas) inspired me to expand it to this (as his solution doesn't get the SSM parameter value, and some other, additional details).
The downside here is that the AWS function client.get_parameters() only allows 10 names per call.
There's one referenced function call in this code (to_pdatetime(...)) that I have omitted - it just takes the datetime value and makes sure it is a "naive" datetime. This is because I am ultimately dumping this data to an Excel file using pandas, which doesn't deal well with timezones.
from typing import List, Tuple
from boto3 import session
from mypy_boto3_ssm import SSMClient
def ssm_params(aws_session: session.Session = None) -> List[dict]:
"""
Return a detailed list of all the SSM parameters.
"""
# -------------------------------------------------------------
#
#
# -------------------------------------------------------------
def get_parameter_values(ssm_client: SSMClient, ssm_details: dict) -> Tuple[list, str]:
"""
Retrieve additional attributes for the SSM parameters contained in the 'ssm_details'
dictionary passed in.
"""
# Get the details
ssm_param_details = ssm_details['Parameters']
# Just the names, ma'am
param_names = [result['Name'] for result in ssm_param_details]
# Get the parames, including the values
ssm_params_with_values = ssm_client.get_parameters(Names=param_names,
WithDecryption=True)
resources = []
result: dict
for result in ssm_params_with_values['Parameters']:
# Get the matching parameter from the `ssm_details` dict since this has some of the fields
# that aren't in the `ssm_params_with_values` returned from "get_arameters".
param_details = next((zz for zz in ssm_param_details if zz.get('Name', None) == result['Name']), {})
param_policy = param_details.get('Policies', None)
if len(param_policy) == 0:
param_policy = None
resources.append({
'Name': result['Name'],
'LastModifiedDate': to_pdatetime(result['LastModifiedDate']),
'LastModifiedUser': param_details.get('LastModifiedUser', None),
'Version': result['Version'],
'Tier': param_details.get('Tier', None),
'Policies': param_policy,
'ARN': result['ARN'],
'DataType': result.get('DataType', None),
'Type': result.get('Type', None),
'Value': result.get('Value', None)
})
next_token = ssm_details.get('NextToken', None)
return resources, next_token
# -------------------------------------------------------------
#
#
# -------------------------------------------------------------
if aws_session is None:
raise ValueError('No session.')
# Create SSM client
aws_ssm_client = aws_session.client('ssm')
next_token = ' '
ssm_resources = []
while next_token is not None:
# The "describe_parameters" call gets a whole lot of info on the defined SSM params,
# except their actual values. Due to this limitation let's call the nested function
# to get the values, and a few other details.
ssm_descriptions = aws_ssm_client.describe_parameters(MaxResults=10,
NextToken=next_token)
# This will get additional details for the params, including values.
current_batch, next_token = get_parameter_values(ssm_client=aws_ssm_client,
ssm_details=ssm_descriptions)
ssm_resources += current_batch
print(f'SSM Parameters: {len(ssm_resources)}')
return ssm_resources
pythonawsboto3amazon-web-services
There's no ListParameters only DescribeParameter, which lists all the paremeters, or you can set filters.
Boto3 Docs Link:
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ssm.html#SSM.Client.describe_parameters
AWS API Documentation Link:
https://docs.aws.amazon.com/systems-manager/latest/APIReference/API_DescribeParameters.html
You can use get_parameters() and get_parameters_by_path().
Use paginators.
paginator = client.get_paginator('describe_parameters')
More information here.
import celery
def temptask(n):
header=list(tempsubtask.si(i) for i in range(n))
callback=templink.si('printed at last?')
r = celery.chord(celery.group(header))(callback)
return r
#task()
def tempsubtask(i):
print i
for x in range(i):
time.sleep(2)
current_task.update_state(
state='PROGRESS', meta={'completed': x, 'total': i })
#task()
def templink(x):
print 'this should be run at last %s'%x
#executing temptask
r = temptask(100)
I want acccess to the progress status updated by tempsubtask. How can I go about achieving it?
I've had a similar question. Most examples on the net are outdated, the docs didn't help much, but the docs have links to sources, reading which did help me.
My objective was to organize parallel tasks in groups. The groups would have to be executed sequentially in order.
So I decided to generate the task ids before starting any tasks separately and only assigning them. I'm using Celery 4.3.0
Here's a brief example.
Firstly I needed a dummy task to make execution sequential and to be able to check the state of a certain group. As this is used a callback, it will complete only after all other tasks in the group.
#celery.task(bind=True, name="app.tasks.dummy_task")
def dummy_task( self, results=None, *args, **kwargs ):
return results
My comments here explain how I assign ids.
from celery.utils import uuid
from celery import group, chord, chain
# Generating task ids,
# which can be saved to a db, sent to the client and so on
#
# This is done before executing any tasks
task_id_1 = uuid()
task_id_2 = uuid()
chord_callback_id_1 = uuid()
chord_callback_id_2 = uuid()
workflow_id = None
# Generating goups, using signatures
# the group may contain any number of tasks
group_1 = group(
[
celery.signature(
'app.tasks.real_task',
args=(),
kwargs = { 'email': some_email, 'data':some_data },
options = ( {'task_id': task_id_1 } )
)
]
)
group_2 = group(
[
celery.signature(
'app.tasks.real_task',
args=(),
kwargs = { 'email': some_email, 'data':some_data },
options = ( {'task_id': task_id_2 } )
)
]
)
# Creating callback task which will simply rely the result
# Using the task id, which has been generated before
#
# The dummy task start after all tasks in this group are completed
# This way we know that the group is completed
chord_callback = celery.signature(
'app.tasks.dummy_task',
options=( {'task_id': chord_callback_id_1 } )
)
chord_callback_2 = celery.signature(
'app.tasks.dummy_task',
options=( {'task_id': chord_callback_id_2 } )
)
# we can monitor each step status
# by its chord callback id
# the id of the chord callback
step1 = chord( group_1, body=chord_callback )
# the id of the chord callback
step2 = chord( group_2, body=chord_callback_2 )
# start the workflow execution
# the steps will execute sequentially
workflow = chain( step1, step2 )()
# the id of the last cord callback
workflow_id = workflow.id
# return any ids you need
print( workflow_id )
That's how I can check the status of any task in my app.
# This is a simplified example
# some code is omitted
from celery.result import AsyncResult
def task_status( task_id=None ):
# PENDING
# RECEIVED
# STARTED
# SUCCESS
# FAILURE
# REVOKED
# RETRY
task = AsyncResult(task_id)
response = {
'state': task.state,
}
return jsonify(response), 200
After hours of googling I stumbled upon http://www.manasupo.com/2012/03/chord-progress-in-celery.html . Though the solution there didn't work for me out of the box, it did inspire me to try something similar.
from celery.utils import uuid
from celery import chord
class ProgressChord(chord):
def __call__(self, body=None, **kwargs):
_chord = self.type
body = (body or self.kwargs['body']).clone()
kwargs = dict(self.kwargs, body=body, **kwargs)
if _chord.app.conf.CELERY_ALWAYS_EAGER:
return self.apply((), kwargs)
callback_id = body.options.setdefault('task_id', uuid())
r= _chord(**kwargs)
return _chord.AsyncResult(callback_id), r
and instead of executing celery.chord I use ProgressChord as follows:
def temptask(n):
header=list(tempsubtask.si(i) for i in range(n))
callback=templink.si('printed at last?')
r = celery.Progresschord(celery.group(header))(callback)
return r
returned value of r contained a tuple having both, callback's asyncresult and a group result. So success looked something like this:
In [3]: r
Out[3]:
(<AsyncResult: bf87507c-14cb-4ac4-8070-d32e4ff326a6>,
<GroupResult: af69e131-5a93-492d-b985-267484651d95 [4672cbbb-8ec3-4a9e-971a-275807124fae, a236e55f-b312-485c-a816-499d39d7de41, e825a072-b23c-43f2-b920-350413fd5c9e, e3f8378d-fd02-4a34-934b-39a5a735871d, c4f7093b-9f1a-4e5e-b90d-66f83b9c97c4, d5c7dc2c-4e10-4e71-ba2b-055a33e15f02, 07b1c6f7-fe95-4c1f-b0ba-6bc82bceaa4e, 00966cb8-41c2-4e95-b5e7-d8604c000927, e039c78e-6647-4c8d-b59b-e9baf73171a0, 6cfdef0a-25a2-4905-a40e-fea9c7940044]>)
I inherited and overrode [celery.chord][1] instead of [celery.task.chords.Chord][2] because I couldn't find it's source anywhere.
Old problem and I wasted a several days to find a better and modern solution. In my current project I must to track group progress separately and release lock in final callback.
And current solution is much more simple (but harder to guess), subject lines commented at the end:
#celery_app.task(name="_scheduler", track_started=True, ignore_result=False)
def _scheduler():
lock = cache.lock("test_lock")
if not lock.acquire(blocking=False):
return {"Error": "Job already in progress"}
lock_code = lock.local.token.decode("utf-8")
tasks = []
for x in range(100):
tasks.append(calculator.s())
_group = group(*tasks)
_chord = chord(_group)(_get_results.s(token=lock_code))
group_results = _chord.parent # This is actual group inside chord
group_results.save() # I am saving it to usual results backend, and can track progress inside.
return _chord # can return anything, I need only chord.
I am working in Celery 5.1