I am using boto3 api to get all the log events in cloud watch.
The following is my code
import boto3
client = boto3.client("logs")
LOG_GROUP_NAME = "/foo/bar/foo-jobs/foo"
instance_id= "i-somefooid"
log_events = []
response = client.get_log_events(logGroupName=LOG_GROUP_NAME, logStreamName=instance_id, startFromHead=True)
log_events.extend(response["events"])
next_token = response["nextForwardToken"]
while True:
response = client.get_log_events(logGroupName=LOG_GROUP_NAME, logStreamName=instance_id, nextToken=next_token)
log_events.extend(response["events"])
if next_token == response["nextForwardToken"]:
break
next_token = response["nextForwardToken"]
print(log_events)
Using this I am able to print all the log events for a specified instance id but i am not happy that i have to call .get_log_events twice. The reason is because when i make the first call i don't have a nextToken. I only have it after the initial call. Is there a way to simplify this so that i only make the get_log_events call once inside the while True loop.
I would love to hear some suggestions.
import boto3
log_client = boto3.client('logs')
params = {
'logGroupName': "/foo/bar/foo-jobs/foo",
'logStreamName': "i-somefooid"
}
log_events = []
while params.get('nextToken') != '':
response = log_client.get_log_events(**params)
log_events.extend(response['events'])
next_token = response.get('nextToken')
params['nextToken'] = next_token if next_token else ''
Related
Target: Get all S3 buckets tagged with owner=dotslashshawn
Firstly, I'd like to say any help will be greatly appreciated.
I have working code allowing me to do this but there doesn't seem to be a way, unlike with EC2 and RDS resources, that will allow me to pull only buckets where that tag exists. I have to pull all buckets and then loop through each to get their tags, then make the comparison unless I've missed something.
It takes 12 seconds to do this operation and I'm thinking, there must be a faster way. I'm keeping in mind that it'll only get slower the more buckets that are found.
Question: Is this something I could speed up using parallel processing?
I have cross account permissions set up because I'm looking in 5 separate accounts for matches.
Example Code:
accounts = [
'123', # Account 1
'456', # Account 2
'789', # Account 3
'987', # Account 4
'654', # Account 5
]
s3_data = []
owner = 'dotslashshawn'
# Loop through roles
for account in accounts:
# Assume each role
assumed_role = sts.assume_role(
RoleArn= f'arn:aws:iam::{account}:role/custom-role',
RoleSessionName="DotSlashShawnSession"
)
# Assign credentials
assumed_role_credentials = assumed_role['Credentials']
client = boto3.client('s3',
aws_access_key_id=assumed_role_credentials['AccessKeyId'],
aws_secret_access_key=assumed_role_credentials['SecretAccessKey'],
aws_session_token=assumed_role_credentials['SessionToken'],
region_name = 'us-east-2',
)
# get s3 buckets and filter by tag value
response = client.list_buckets()
# # Loop through resources and add to list
for bucket in response['Buckets']:
bucket_name = bucket['Name']
try:
tagging_response = client.get_bucket_tagging(Bucket=bucket_name)
for tag in tagging_response['TagSet']:
tagging_response.update({f'{tag["Key"]}':f'{tag["Value"]}'})
# Remove TagSet after we extract the tags into the main object.
tagging_response.pop('TagSet')
if (tagging_response.get('owner') == f'{owner}') or (tagging_response.get('owner2') == f'{owner}'):
s3_data.append(tagging_response)
except Exception as e:
continue
print(s3_data)
Yes you can use parallel processing to execute tasks by using ThreadPoolExecutor.
In your code there are several action that can run in parallel, like getting credentials and getting tag of the buckets.
Here is what I would do:
init_s3_client is ran in parallel in order to speed up the credentials information. This will create a unique s3 client with proper permission.
Then you can have all your logic of listing buckets and findings tags in another method, execute_logic that can also run in parallel.
import boto3
import json
import asyncio
import concurrent.futures
from timeit import default_timer as timer
import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)
accounts = [
'123', # Account 1
'456', # Account 2
'789', # Account 3
'987', # Account 4
'654', # Account 5
]
s3_data = []
owner = 'dotslashshawn'
s3_client = []
def sts_get_credentials(account):
client = boto3.client('sts')
response = client.assume_role(
RoleArn='arn:aws:iam::{}:role/{}'.format(account, 'custom-role'),
RoleSessionName='DotSlashShawnSession'
)
return response['Credentials']
def init_s3_client(credentials):
for account in accounts:
s3_client.append(
{
'account': account,
'client': boto3.client(
's3',
aws_access_key_id=credentials['AccessKeyId'],
aws_secret_access_key=credentials['SecretAccessKey'],
aws_session_token=credentials['SessionToken'],
region_name = 'us-east-2'
)
}
)
def execute_logic(client):
# get s3 buckets and filter by tag value
response = client.list_buckets()
# # Loop through resources and add to list
for bucket in response['Buckets']:
bucket_name = bucket['Name']
try:
tagging_response = client.get_bucket_tagging(Bucket=bucket_name)
for tag in tagging_response['TagSet']:
tagging_response.update({f'{tag["Key"]}':f'{tag["Value"]}'})
# Remove TagSet after we extract the tags into the main object.
tagging_response.pop('TagSet')
if (tagging_response.get('owner') == f'{owner}') or (tagging_response.get('owner2') == f'{owner}'):
s3_data.append(tagging_response)
except Exception as e:
continue
async def non_blocking(executor):
loop = asyncio.get_event_loop()
execute_logic_tasks = []
s3_init_tasks = []
results = []
results_execute_logic_tasks = []
# Executing get credentials in parallel
for account in accounts:
s3_init_tasks.append(loop.run_in_executor(executor, init_s3_client, sts_get_credentials(account)))
if len(s3_init_tasks) > 0:
completed, pending = await asyncio.wait(s3_init_tasks)
results_s3_init_tasks = [t.result() for t in completed]
else:
results_s3_init_tasks = []
results.append(results_s3_init_tasks)
# Executing get buckets in parallel
for account in accounts:
execute_logic_tasks.append(loop.run_in_executor(executor, execute_logic, s3_client[account]['client']))
if len(execute_logic_tasks) > 0:
completed, pending = await asyncio.wait(execute_logic_tasks)
results_execute_logic_tasks = [t.result() for t in completed]
else:
results_execute_logic_tasks = []
results.append(results_execute_logic_tasks)
return results
if __name__ == "__main__":
executor = concurrent.futures.ThreadPoolExecutor(
max_workers=10,
)
event_loop = asyncio.get_event_loop()
# async next
start = timer()
non_blocking_results = event_loop.run_until_complete(non_blocking(executor))
elapsed = (timer() - start)
logger.debug("Non-blocking took: {}".format(elapsed))
logger.info(s3_data)
Background: I have seen lots of examples of integrating a progress bar into a for loop, however nothing for my use case, and as such am looking for some advice.
For my use case, I am calling an API and testing if meta is in the response (meta = data I need). If meta is not in the API response, then the API returns a key pair value named percent_complete, which indicates the data I am trying to return is still aggregating, and provides a value on the progress of data aggregation.
Current code:
def api_call():
key, secret, url = ini_reader()
endpoint_url = endpoint_initializer()
while True:
response = requests.get(url = endpoint_url, auth = HTTPBasicAuth(key, secret), headers = {"vendor-firm": "111"})
api_response = json.loads(response.text)
if "meta" not in api_response:
id_value = "id"
res1 = [val[id_value] for key, val in api_response.items() if id_value in val]
id_value = "".join(res1)
percent_value = "percent_complete"
res2 = api_response["data"]["attributes"].get("percent_complete", '')*100
print(f' Your data request for: {id_value} is {res2}% complete!')
time.sleep(60)
elif "meta" in api_response:
return api_response
What I am trying to achieve: {res2} *100 gives the percentage, which I would like to use the measure of progress in a progress bar.
Can anyone suggest an appropriate dependency to use?
You can use the Enlighten library. You can keep your print statements and have multiple progress bars at the same time without making any other changes. Below is an example of how you might implement it.
Based on your example it looks like id_value changes, so I wrote the example like that. If it doesn't change you can just use it in the description. And if you have multiples, you'd probably want to create a progress bar for each. If you want to remove your progress bars after they complete, just add leave=False to manager.Counter().
The library is very customizable and the documentation has a lot of examples.
import enlighten
BAR_FORMAT = u'{id_value} {percentage:3.0f}%|{bar}| ' u'[{elapsed}<{eta}, {rate:.2f} %/s]'
manager = enlighten.get_manager()
def api_call():
pbar = manager.counter(total=100, bar_format=BAR_FORMAT)
...
while True:
...
if "meta" not in api_response:
...
pbar.count = res2
pbar.update(incr=0, id_value=id_value)
else:
...
pbar.count = 100
pbar.update(incr=0, id_value=id_value)
pbar.close()
return api_response
Thanks to Aviso, and for everyone's benefit, here is the completed function -
def api_call():
endpoint_url = endpoint_initializer()
key, secret, url = ini_reader()
BAR_FORMAT = u'{id_value} {percentage:3.0f}%|{bar}| ' u'[{elapsed}<{eta}, {rate:.2f} %/s]'
manager = enlighten.get_manager()
date = dt.datetime.today().strftime("%Y-%m-%d")
print("------------------------------------\n","API URL constructed for:", date, "\n------------------------------------")
print("-------------------------------------------------------------\n","Endpoint:", endpoint_url, "\n-------------------------------------------------------------")
pbar = manager.counter(total=100, bar_format=BAR_FORMAT)
while True:
response = requests.get(url = endpoint_url, auth = HTTPBasicAuth(key, secret), headers = {"vendor-firm": "381"})
api_response = json.loads(response.text)
if "meta" not in api_response:
id_value = "id"
res1 = [val[id_value] for key, val in api_response.items() if id_value in val]
id_value = "".join(res1)
percent_value = "percent_complete"
res2 = api_response["data"]["attributes"].get("percent_complete", '')*100
pbar.count = res2
pbar.update(incr=0, id_value=id_value)
time.sleep(60)
elif "meta" in api_response:
pbar.count = 100
pbar.update(incr=0, id_value=id_value)
pbar.close()
return api_response
For about the past week I've been trying to wrap my head around the concept of a signed HMAC sha256 request.
In this example I'm just trying to get a list of all current orders.
I thought I'd figured it out but for some reason this still won't work.
The API keys are new...I've tried both Read and Write versions, and my IP is whitelisted.
I'm getting {'code': -1022, 'msg': 'Signature for this request is not valid.'}
My code...
import hmac
import hashlib
import json
import requests
import time
import Credentials
class Private:
def GetAllOrders(pair,orderid='',start='',finish='',limit='',window=''):
# Credentials #
ApiKey = Credentials.Binance.ReadAPIKey
SecretKey = Credentials.Binance.ReadSecretKey
# Base #
BaseURL = 'https://api.binance.com'
EndPoint = '/api/v3/allOrders'
# Required #
Pair = '?symbol='+str(pair)
Time = '×tamp='+str(int(time.time()*1000))
# Optional #
if orderid != '':
OrderID = '&orderId='+str(orderid)
else:
OrderID = orderid
if start != '':
Start = '&startTime='+str(start*1000)
else:
Start = start
if finish != '':
Finish = '&endTime='+str(finish*1000)
else:
Finish = finish
if limit != '':
Limit = '&limit='+str(limit)
else:
Limit = limit
if window != '':
Window = '&recvWindow='+str(window)
else:
Window = window
# HMAC #
HMAC = hmac.new(bytes(SecretKey.encode('utf-8')),
(Pair+OrderID+Start+Finish+Limit+Window+Time).encode('utf-8'),
hashlib.sha256).hexdigest()
# Signature #
Signature = '&signature='+str(HMAC)
# Headers #
Headers = {'X-MBX-APIKEY': ApiKey}
# Request #
JSON = requests.get(BaseURL+EndPoint+Pair+OrderID+Start+Finish+Limit+Window+Time+Signature,headers=Headers).json()
return JSON
print(Private.GetAllOrders(pair='BTCUSDT'))
Any help would be appreciated...
I figured it out...
The HMAC does not recognize the '?' as being the start of the parameters, whereas the URL (API) does.
The following lines should look like this...
# Required #
Pair = 'symbol='+str(pair)
# Request #
JSON = requests.get(BaseURL+EndPoint+'?'+Pair+OrderID+Start+Finish+Limit+Window+Time+Signature,headers=Headers).json()
I try to use Google Calendar API
events_result = service.events().list(calendarId=calendarId,
timeMax=now,
alwaysIncludeEmail=True,
maxResults=100, singleEvents=True,
orderBy='startTime').execute()
Everything is ok, when I have permission to access the calendarId, but it will be errors if wrong when I don't have calendarId permission.
I build an autoload.py function with schedule python to load events every 10 mins, this function will be stopped if error come, and I have to use SSH terminal to restart autoload.py manually
So i want to know:
How can I get status_code, example, if it is 404, python will PASS
Answer:
You can use a try/except block within a loop to go through all your calendars, and skip over accesses which throw an error.
Code Example:
To get the error code, make sure to import json:
import json
and then you can get the error code out of the Exception:
calendarIds = ["calendar ID 1", "calendar ID 2", "calendar Id 3", "etc"]
for i in calendarIds:
try:
events_result = service.events().list(calendarId=i,
timeMax=now,
alwaysIncludeEmail=True,
maxResults=100, singleEvents=True,
orderBy='startTime').execute()
except Exception as e:
print(json.loads(e.content)['error']['code'])
continue
Further Reading:
Python Try Except - w3schools
Python For Loops - w3schools
Thanks to #Rafa Guillermo, I uploaded the full code to the autoload.py program, but I also wanted to know, how to get response json or status_code for request Google API.
The solution:
try:
code here
except Exception as e:
continue
import schedule
import time
from datetime import datetime
import dir
import sqlite3
from project.function import cmsCalendar as cal
db_file = str(dir.dir) + '/admin.sqlite'
def get_list_shop_from_db(db_file):
cur = sqlite3.connect(db_file).cursor()
query = cur.execute('SELECT * FROM Shop')
colname = [ d[0] for d in query.description ]
result_list = [ dict(zip(colname, r)) for r in query.fetchall() ]
cur.close()
cur.connection.close()
return result_list
def auto_load_google_database(list_shop, calendarError=False):
shopId = 0
for shop in list_shop:
try:
shopId = shopId+1
print("dang ghi vao shop", shopId)
service = cal.service_build()
shop_step_time_db = list_shop[shopId]['shop_step_time']
shop_duration_db = list_shop[shopId]['shop_duration']
slot_available = list_shop[shopId]['shop_slots']
slot_available = int(slot_available)
workers = list_shop[shopId]['shop_workers']
workers = int(workers)
calendarId = list_shop[shopId]['shop_calendarId']
if slot_available > workers:
a = workers
else:
a = slot_available
if shop_duration_db == None:
shop_duration_db = '30'
if shop_step_time_db == None:
shop_step_time_db = '15'
shop_duration = int(shop_duration_db)
shop_step_time = int(shop_step_time_db)
shop_start_time = list_shop[shopId]['shop_start_time']
shop_start_time = datetime.strptime(shop_start_time, "%H:%M:%S.%f").time()
shop_end_time = list_shop[shopId]['shop_end_time']
shop_end_time = datetime.strptime(shop_end_time, "%H:%M:%S.%f").time()
# nang luc moi khung gio lay ra tu file Json WorkShop.js
booking_status = cal.auto_load_listtimes(service, shopId, calendarId, shop_step_time, shop_duration, a,
shop_start_time,
shop_end_time)
except Exception as e:
continue
def main():
list_shop = get_list_shop_from_db(db_file)
auto_load_google_database(list_shop)
if __name__ == '__main__':
main()
schedule.every(5).minutes.do(main)
while True:
# Checks whether a scheduled task
# is pending to run or not
schedule.run_pending()
time.sleep(1)
SSM — Boto 3 Docs 1.9.64 documentation
get_parameters doesn't list all parameters?
For those who wants to just copy-paste the code:
import boto3
ssm = boto3.client('ssm')
parameters = ssm.describe_parameters()['Parameters']
Beware of the limit of max 50 parameters!
This code will get all parameters, by recursively fetching until there are no more (50 max is returned per call):
import boto3
def get_resources_from(ssm_details):
results = ssm_details['Parameters']
resources = [result for result in results]
next_token = ssm_details.get('NextToken', None)
return resources, next_token
def main()
config = boto3.client('ssm', region_name='us-east-1')
next_token = ' '
resources = []
while next_token is not None:
ssm_details = config.describe_parameters(MaxResults=50, NextToken=next_token)
current_batch, next_token = get_resources_from(ssm_details)
resources += current_batch
print(resources)
print('done')
You can use get_paginator api. find below example, In my use case i had to get all the values of SSM parameter store and wanted to compare it with a string.
import boto3
import sys
LBURL = sys.argv[1].strip()
client = boto3.client('ssm')
p = client.get_paginator('describe_parameters')
paginator = p.paginate().build_full_result()
for page in paginator['Parameters']:
response = client.get_parameter(Name=page['Name'])
value = response['Parameter']['Value']
if LBURL in value:
print("Name is: " + page['Name'] + " and Value is: " + value)
One of the responses from above/below(?) (by Val Lapidas) inspired me to expand it to this (as his solution doesn't get the SSM parameter value, and some other, additional details).
The downside here is that the AWS function client.get_parameters() only allows 10 names per call.
There's one referenced function call in this code (to_pdatetime(...)) that I have omitted - it just takes the datetime value and makes sure it is a "naive" datetime. This is because I am ultimately dumping this data to an Excel file using pandas, which doesn't deal well with timezones.
from typing import List, Tuple
from boto3 import session
from mypy_boto3_ssm import SSMClient
def ssm_params(aws_session: session.Session = None) -> List[dict]:
"""
Return a detailed list of all the SSM parameters.
"""
# -------------------------------------------------------------
#
#
# -------------------------------------------------------------
def get_parameter_values(ssm_client: SSMClient, ssm_details: dict) -> Tuple[list, str]:
"""
Retrieve additional attributes for the SSM parameters contained in the 'ssm_details'
dictionary passed in.
"""
# Get the details
ssm_param_details = ssm_details['Parameters']
# Just the names, ma'am
param_names = [result['Name'] for result in ssm_param_details]
# Get the parames, including the values
ssm_params_with_values = ssm_client.get_parameters(Names=param_names,
WithDecryption=True)
resources = []
result: dict
for result in ssm_params_with_values['Parameters']:
# Get the matching parameter from the `ssm_details` dict since this has some of the fields
# that aren't in the `ssm_params_with_values` returned from "get_arameters".
param_details = next((zz for zz in ssm_param_details if zz.get('Name', None) == result['Name']), {})
param_policy = param_details.get('Policies', None)
if len(param_policy) == 0:
param_policy = None
resources.append({
'Name': result['Name'],
'LastModifiedDate': to_pdatetime(result['LastModifiedDate']),
'LastModifiedUser': param_details.get('LastModifiedUser', None),
'Version': result['Version'],
'Tier': param_details.get('Tier', None),
'Policies': param_policy,
'ARN': result['ARN'],
'DataType': result.get('DataType', None),
'Type': result.get('Type', None),
'Value': result.get('Value', None)
})
next_token = ssm_details.get('NextToken', None)
return resources, next_token
# -------------------------------------------------------------
#
#
# -------------------------------------------------------------
if aws_session is None:
raise ValueError('No session.')
# Create SSM client
aws_ssm_client = aws_session.client('ssm')
next_token = ' '
ssm_resources = []
while next_token is not None:
# The "describe_parameters" call gets a whole lot of info on the defined SSM params,
# except their actual values. Due to this limitation let's call the nested function
# to get the values, and a few other details.
ssm_descriptions = aws_ssm_client.describe_parameters(MaxResults=10,
NextToken=next_token)
# This will get additional details for the params, including values.
current_batch, next_token = get_parameter_values(ssm_client=aws_ssm_client,
ssm_details=ssm_descriptions)
ssm_resources += current_batch
print(f'SSM Parameters: {len(ssm_resources)}')
return ssm_resources
pythonawsboto3amazon-web-services
There's no ListParameters only DescribeParameter, which lists all the paremeters, or you can set filters.
Boto3 Docs Link:
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ssm.html#SSM.Client.describe_parameters
AWS API Documentation Link:
https://docs.aws.amazon.com/systems-manager/latest/APIReference/API_DescribeParameters.html
You can use get_parameters() and get_parameters_by_path().
Use paginators.
paginator = client.get_paginator('describe_parameters')
More information here.