How to calculate EC2 instance price using Python script

Now that AWS have a Pricing API, how could one use Boto3 to fetch the current hourly price for a given on-demand EC2 instance type (e.g. t2.micro), region (e.g. eu-west-1) and operating system (e.g. Linux)? I only want the price returned. Based on my understanding, having those four pieces of information should be enough to filter down to a singular result.
However, all the examples I've seen fetch huge lists of data from the API that would have to be post-processed in order to get what I want. I would like to filter the data on the API side, before it's being returned.

Here is the solution I ended up with. Using Boto3's own Pricing API with a filter for the instance type, region and operating system. The API still returns a lot of information, so I needed to do a bit of post-processing.
import boto3
import json
from pkg_resources import resource_filename
# Search product filter. This will reduce the amount of data returned by the
# get_products function of the Pricing API
FLT = '[{{"Field": "tenancy", "Value": "shared", "Type": "TERM_MATCH"}},'\
'{{"Field": "operatingSystem", "Value": "{o}", "Type": "TERM_MATCH"}},'\
'{{"Field": "preInstalledSw", "Value": "NA", "Type": "TERM_MATCH"}},'\
'{{"Field": "instanceType", "Value": "{t}", "Type": "TERM_MATCH"}},'\
'{{"Field": "location", "Value": "{r}", "Type": "TERM_MATCH"}},'\
'{{"Field": "capacitystatus", "Value": "Used", "Type": "TERM_MATCH"}}]'
# Get current AWS price for an on-demand instance
def get_price(region, instance, os):
f = FLT.format(r=region, t=instance, o=os)
data = client.get_products(ServiceCode='AmazonEC2', Filters=json.loads(f))
od = json.loads(data['PriceList'][0])['terms']['OnDemand']
id1 = list(od)[0]
id2 = list(od[id1]['priceDimensions'])[0]
return od[id1]['priceDimensions'][id2]['pricePerUnit']['USD']
# Translate region code to region name. Even though the API data contains
# regionCode field, it will not return accurate data. However using the location
# field will, but then we need to translate the region code into a region name.
# You could skip this by using the region names in your code directly, but most
# other APIs are using the region code.
def get_region_name(region_code):
default_region = 'US East (N. Virginia)'
endpoint_file = resource_filename('botocore', 'data/endpoints.json')
with open(endpoint_file, 'r') as f:
data = json.load(f)
# Botocore is using Europe while Pricing API using EU...sigh...
return data['partitions'][0]['regions'][region_code]['description'].replace('Europe', 'EU')
except IOError:
return default_region
# Use AWS Pricing API through Boto3
# API only has us-east-1 and ap-south-1 as valid endpoints.
# It doesn't have any impact on your selected region for your instance.
client = boto3.client('pricing', region_name='us-east-1')
# Get current price for a given instance, region and os
price = get_price(get_region_name('eu-west-1'), 't3.micro', 'Linux')
This example outputs 0.0114000000 (hourly price in USD) fairly quickly. (This number was verified to match the current value listed here at the date of this writing)

If you don't like the native function, then look at Lyft's awspricing library for Python. Here's an example:
import awspricing
ec2_offer = awspricing.offer('AmazonEC2')
p = ec2_offer.ondemand_hourly(
print(p) # 0.0126
I'd recommend enabling caching (see AWSPRICING_USE_CACHE) otherwise it will be slow.

I have updated toringe's solution a bit to handle different key errors
def price_information(self, instance_type, os, region):
# Search product filter
FLT = '[{{"Field": "operatingSystem", "Value": "{o}", "Type": "TERM_MATCH"}},' \
'{{"Field": "instanceType", "Value": "{t}", "Type": "TERM_MATCH"}}]'
f = FLT.format(t=instance_type, o=os)
data = self.pricing_client.get_products(ServiceCode='AmazonEC2', Filters=json.loads(f))
instance_price = 0
for price in data['PriceList']:
first_id = list(eval(price)['terms']['OnDemand'].keys())[0]
price_data = eval(price)['terms']['OnDemand'][first_id]
second_id = list(price_data['priceDimensions'].keys())[0]
instance_price = price_data['priceDimensions'][second_id]['pricePerUnit']['USD']
if float(price) > 0:
except Exception as e:
return instance_price
except Exception as e:
return 0

Based on other answers, here's some code that returns the On Demand prices for all instance types (or for a given instance type, if you add the search filter), gets some relevant attributes for each instance type, and pretty-prints the data.
It assumes pricing is the AWS Pricing client.
import json
def ec2_get_ondemand_prices(Filters):
data = []
reply = pricing.get_products(ServiceCode='AmazonEC2', Filters=Filters, MaxResults=100)
data.extend([json.loads(r) for r in reply['PriceList']])
while 'NextToken' in reply.keys():
reply = pricing.get_products(ServiceCode='AmazonEC2', Filters=Filters, MaxResults=100, NextToken=reply['NextToken'])
data.extend([json.loads(r) for r in reply['PriceList']])
print(f"\x1b[33mGET \x1b[0m{len(reply['PriceList']):3} \x1b[94m{len(data):4}\x1b[0m")
instances = {}
for d in data:
attr = d['product']['attributes']
type = attr['instanceType']
if type in data: continue
region = attr.get('location', '')
clock = attr.get('clockSpeed', '')
type = attr.get('instanceType', '')
market = attr.get('marketoption', '')
ram = attr.get('memory', '')
os = attr.get('operatingSystem', '')
arch = attr.get('processorArchitecture', '')
region = attr.get('regionCode', '')
storage = attr.get('storage', '')
tenancy = attr.get('tenancy', '')
usage = attr.get('usagetype', '')
vcpu = attr.get('vcpu', '')
terms = d['terms']
ondemand = terms['OnDemand']
ins = ondemand[next(iter(ondemand))]
pricedim = ins['priceDimensions']
price = pricedim[next(iter(pricedim))]
desc = price['description']
p = float(price['pricePerUnit']['USD'])
unit = price['unit'].lower()
if 'GiB' not in ram: print('\x1b[31mWARN\x1b[0m')
if 'hrs'!=unit: print('\x1b[31mWARN\x1b[0m')
if p==0.: continue
instances[type] = {'type':type, 'market':market, 'vcpu':vcpu, 'ram':float(ram.replace('GiB','')), 'ondm':p, 'unit':unit, 'terms':list(terms.keys()), 'desc':desc}
instances = {k:v for k,v in sorted(instances.items(), key=lambda e: e[1]['ondm'])}
for ins in instances.values():
p = ins['ondm']
print(f"{ins['type']:32} {ins['market'].lower()}\x1b[91m: \x1b[0m{ins['vcpu']:3} vcores\x1b[91m, \x1b[0m{ins['ram']:7.1f} GB, \x1b[0m{p:7.4f} \x1b[95m$/h\x1b[0m, \x1b[0m\x1b[0m{p*720:8,.1f} \x1b[95m$/m\x1b[0m, \x1b[0m\x1b[0m{p*720*12:7,.0f} \x1b[95m$/y\x1b[0m, \x1b[0m{ins['unit']}\x1b[91m, \x1b[0m{ins['terms']}\x1b[0m")
# print(desc, , sep='\n')
flt = [
# {'Field': 'instanceType', 'Value': 't4g.nano', 'Type': 'TERM_MATCH'}, # enable this filter to select only 1 instance type
{'Field': 'regionCode', 'Value': 'us-east-2', 'Type': 'TERM_MATCH'}, # alternative notation?: {'Field': 'location', 'Value': 'US East (Ohio)', 'Type': 'TERM_MATCH'},
{'Field': 'operatingSystem', 'Value': 'Linux', 'Type': 'TERM_MATCH'},
{'Field': 'tenancy', 'Value': 'shared', 'Type': 'TERM_MATCH'},
{'Field': 'capacitystatus', 'Value': 'Used', 'Type': 'TERM_MATCH'},


Find coordinates in wikipedia pages iterating over a list

Probably this is a simple question, but my experience in for loop is very limited.
I was trying to adapt the solution in this page with some simple examples that i have, but the result is not what i expected.
For example:
I have this simple data frame:
df= pd.DataFrame({'City':['Sesimbra','Ciudad Juárez','31100 Treviso','Ramada Portugal','Olhão'],
I created a list based on cities:
lista_cidades = list(df['City'])
and i would like to iterate over this list to get the coordinates (decimal, preferably)
So far i tried this approach:
import requests
lng_dict = {}
lat_dict = {}
S = requests.Session()
URL = ""
"action": "query",
"format": "json",
"titles": [lista_cidades],
"prop": "coordinates"
R = S.get(url=URL, params=PARAMS)
DATA = R.json()
PAGES = DATA['query']['pages']
for i in range(len(lista_cidades)):
for k, v in PAGES.items():
lat_dict[lista_cidades[i]] = str(v['coordinates'][0]['lat'])
lng_dict[lista_cidades[i]] = str(v['coordinates'][0]['lon'])
but it looks like the code doesn't iterate over the list and always returns the same coordinate
For example, when i call the dictionary with latitude coordinates, this is what i get
{'Sesimbra': '-7.84166667',
'Ciudad Juárez': '-7.84166667',
'31100 Treviso': '-7.84166667',
'Ramada Portugal': '-7.84166667',
'Olhão': '-7.84166667'}
What should i do to solve this?
Thanks in advance
I think the query returns only one result, it will take only the last city from you list (in your cas the "Olhão" coordinates).
You can check it by logging the DATA content.
I do not know about wikipedia API, but either your call lack a parameter (documentation should give you the information) or you have to call the API for each city like :
import pandas as pd
import requests
df = pd.DataFrame({'City': ['Sesimbra', 'Ciudad Juárez', '31100 Treviso', 'Ramada Portugal', 'Olhão'],
'Country': ['Portugal', 'México', 'Itália', 'Portugal', 'Portugal']})
lista_cidades = list(df['City'])
lng_dict = {}
lat_dict = {}
S = requests.Session()
URL = ""
for city in lista_cidades:
"action": "query",
"format": "json",
"titles": city,
"prop": "coordinates"
R = S.get(url=URL, params=PARAMS)
DATA = R.json()
PAGES = DATA['query']['pages']
for k, v in PAGES.items():
lat_dict[city] = str(v['coordinates'][0]['lat'])
lng_dict[city] = str(v['coordinates'][0]['lon'])

Parse server payload with few keys absent

I have a rather basic bit of code. Basically what it does is sends an API request to a locally hosted Server and returns a JSON string. I'm taking that string and cracking it apart. Then I take what I need from it, make a Dictionary, and export it as an XML file with an nfo extension.
The issue is sometimes there are missing bits to the source data. Season is missing fairly frequently for example. It breaks the Data Mapping. I need a way to handle that. For somethings I may want to exclude the data and for others I need a sane default value.
#!/bin/env python
import os
import requests
import re
import json
import dicttoxml
import xml.dom.minidom
from xml.dom.minidom import parseString
# Grab Shoko Auth Key
apiheaders = {
'Content-Type': 'application/json',
'Accept': 'application/json',
apidata = '{"user": "Default", "pass": "", "device": "CLI"}'
r ='',
headers=apiheaders, data=apidata)
key = json.loads(r.text)['apikey']
# Grabbing Episode Data
EpisodeHeaders = {
'accept': 'text/plain',
'apikey': key
EpisodeParams = (
('pic', '1'),
fileinfo = requests.get(
'', headers=EpisodeHeaders, params=EpisodeParams)
# Mapping Data from Shoko to Jellyfin NFO
string = json.loads(fileinfo.text)
eplot = json.loads(fileinfo.text)['summary']
etitle = json.loads(fileinfo.text)['name']
eyear = json.loads(fileinfo.text)['year']
episode = json.loads(fileinfo.text)['epnumber']
season = json.loads(fileinfo.text)['season']
aid = json.loads(fileinfo.text)['aid']
seasonnum = season.split('x')
# Create Dictionary From Mapped Data
show = {
"plot": eplot,
"title": etitle,
"year": eyear,
"episode": episode,
"season": seasonnum[0],
Here is some example output when the code crashes
{'type': 'ep', 'eptype': 'Credits', 'epnumber': 1, 'aid': 10713, 'eid': 167848,
'id': 95272, 'name': 'Opening', 'summary': 'Episode Overview not Available',
'year': '2014', 'air': '2014-11-23', 'rating': '10.00', 'votes': '1',
'art': {'fanart': [{'url': '/api/v2/image/support/plex_404.png'}],
'thumb': [{'url': '/api/v2/image/support/plex_404.png'}]}}
Traceback (most recent call last):
File "/home/fletcher/Documents/Shoko-Jellyfin-NFO/", line 48, in <module>
season = json.loads(fileinfo.text)['season']
KeyError: 'season'
The solution based on what Mahori suggested. Worked perfectly.
eplot = json.loads(fileinfo.text).get('summary', None)
etitle = json.loads(fileinfo.text).get('name', None)
eyear = json.loads(fileinfo.text).get('year', None)
episode = json.loads(fileinfo.text).get('epnumber', None)
season = json.loads(fileinfo.text).get('season', '1x1')
aid = json.loads(fileinfo.text).get('aid', None)
This is fairly common scenario with web development, where you cannot always assume other party will send all keys.
The standard way to get around this is by using get instead of named fetch.
season = json.loads(fileinfo.text).get('season', None)
#you can change None to any default value here

Compare images and save in dynamodb aws

Hi I want to write a lambda function which will work like. I have two folder in s3 bucket . in 1st box there are "owner" and 2nd have random pictures. I want to compare all pictures with owner and then save in dynamodb with owner name against everypicture . Atm I am lost in API of face detection and doing some thing like this
BUCKET = "ais-django"
KEY = "20180530105812.jpeg"
FEATURES_BLACKLIST = ("Landmarks", "Emotions", "Pose", "Quality", "BoundingBox", "Confidence")
def detect_faces(bucket, key, attributes=['ALL'], region="eu-west-1"):
rekognition = boto3.client("rekognition", region)
response = rekognition.detect_faces(
"S3Object": {
"Bucket": bucket,
"Name": key,
return response['FaceDetails']
for face in detect_faces(BUCKET, KEY):
"Face ({Confidence}%)".format(**face)
# emotions
for emotion in face['Emotions']:
" {Type} : {Confidence}%".format(**emotion)
# quality
for quality, value in face['Quality'].iteritems():
" {quality} : {value}".format(quality=quality, value=value)
# facial features
for feature, data in face.iteritems():
if feature not in FEATURES_BLACKLIST:
" {feature}({data[Value]}) : {data[Confidence]}%".format(feature=feature, data=data)
You can use compare_faces operation of Rekognition client. Here is a pseudocode reflecting the operation (Note: this code is not tested and only to show the concepts). You can adjust the similarity threshold as per your need.
client = boto3.client('rekognition', region_name='eu-west-1')
for key in keyNamesInsideRandomFolder:
response = client.detect_faces(
'S3Object': {
'Bucket': "bucketname",
'Name': "randomfolder/"+key
faceDetails = response['FaceDetails']
hasFace = len(faceDetails) > 0
if hasFace:
response = client.compare_faces(
'S3Object': {
'Bucket': "bucketname",
'Name': "ownerfolder/ownerimage.jpg"
'S3Object': {
'Bucket': "bucketname",
'Name': "randomfolder/"+key
faceMatch= response['FaceMatches']
similarity = faceMatch['Similarity']
if similarity>90:
#write to dynamodb
Edited: To get a list of objects from a folder with prefix/folder 'random', use list_objects operation of s3 client.
response = client.list_objects(
for x in range(1, numberofobjects):
Note: The response['Contents'][x]['Key'] returns the key name of object along with the prefix. For eg. If you have an image with filename img.jpg inside random folder, it returns "random/img.jpg". Notice I started the for loop from 1 that is because the first element returned as response is just the keyname of the folder i.e "random/" in this case.

writing json-ish list to csv, line by line, in python for bitcoin addresses

I'm querying the onename api in an effort to get the bitcoin addresses of all the users.
At the moment I'm getting all the user information as a json-esque list, and then piping the output to a file, it looks like this:
[{'0': {'owner_address': '1Q2Tv6f9vXbdoxRmGwNrHbjrrK4Hv6jCsz', 'zone_file': '{"avatar": {"url": ""}, "bitcoin": {"address": "1NmLvYVEZqPGeQNcgFS3DdghpoqaH4r5Xh"}, "cover": {"url": ""}, "facebook": {"proof": {"url": ""}, "username": "jasondrake1978"}, "graph": {"url": ""}, "location": {"formatted": "Mechanicsville, Va"}, "name": {"formatted": "Jason Drake"}, "twitter": {"username": "000001"}, "v": "0.2", "website": ""}', 'verifications': [{'proof_url': '', 'service': 'facebook', 'valid': False, 'identifier': 'jasondrake1978'}], 'profile': {'website': '', 'cover': {'url': ''}, 'facebook': {'proof': {'url': ''}, 'username': 'jasondrake1978'}, 'twitter': {'username': '000001'}, 'bitcoin': {'address': '1NmLvYVEZqPGeQNcgFS3DdghpoqaH4r5Xh'}, 'name': {'formatted': 'Jason Drake'}, 'graph': {'url': ''}, 'location': {'formatted': 'Mechanicsville, Va'}, 'avatar': {'url': ''}, 'v': '0.2'}}}]
what I'm really interested in is the field {"address": "1NmLvYVEZqPGeQNcgFS3DdghpoqaH4r5Xh"}, the rest of the stuff I don't need, I just want the addresses of every user.
Is there a way that I can just write only the addresses to a file using python?
I'm trying to write it as something like:
and so on.
I've tried a number of different ways using dump, dumps, etc. but I haven't yet been able to pin it down.
My code looks like this:
import os
import json
import requests
#import py2neo
import csv
# set up authentication parameters
#py2neo.authenticate("", "neo4j", "uni-bonn")
# Connect to graph and add constraints.
neo4jUrl = os.environ.get('NEO4J_URL',"")
#graph = py2neo.Graph(neo4jUrl)
# Add uniqueness constraints."CREATE CONSTRAINT ON (q:Person) ASSERT IS UNIQUE;")
# Build URL.
apiUrl = ""
# apiUrl = ""
# Send GET request.
Allusersjson = requests.get(apiUrl, headers = {"accept":"application/json"}).json()
for username in Allusersjson['usernames']:
usernamex= username[:-3]
userinfo=requests.get(apiUrl2, headers = {"accept":"application/json"}).json()
# try:
# if('bitcoin' not in userinfo[usernamex]['profile']):
# continue
# else:
# UsersDetails.append(userinfo)
# except:
# continue
address = userinfo[usernamex]["profile"]["bitcoin"]["address"]
except KeyError:
pass # no address
out = "\n".join(UsersDetails)
open("out.csv", "w").write(out)
# f = csv.writer(open("test.csv", "wb+"))
# Build query.
query = """
RETURN {json}
# Send Cypher query.
# py2neo.CypherQuery(graph, query).run(json=json)
anyway, in such a situation, what's the best way to write out those addresses as csv :/
I ran it, and at first it worked, but then I got the following error:
Instead of adding all the information to the UsersDetails list
you can add just the relevant part (address)
address = userinfo[usernamex]["profile"]["bitcoin"]["address"]
except KeyError:
pass # no address
except TypeError:
pass # illformed data
To print the values to the screen:
out = "\n".join(UsersDetails)
(replace "\n" with "," for comma separated output, instead of one per line)
To save to a file:
open("out.csv", "w").write(out)
You need to reformat the list, either through map() or a list comprehension, to get it down to just the information you want. For example, if the top-level key used in the response from the API is always 0, you can do something like this
UsersAddresses = [user['0']['profile']['bitcoin']['address'] for user in UsersDetails]

What is the data format returned by the AdWords API TargetingIdeaPage service?

When I query the AdWords API to get search volume data and trends through their TargetingIdeaSelector using the Python client library the returned data looks like this:
totalNumEntries = 1
entries[] =
data[] =
value =
Attribute.Type = "StringAttribute"
value = "keyword phrase"
value =
Attribute.Type = "MonthlySearchVolumeAttribute"
value[] =
year = 2016
month = 2
count = 2900
year = 2015
month = 3
count = 2900
This isn't JSON and appears to just be a messy Python list. What's the easiest way to flatten the monthly data into a Pandas dataframe with a structure like this?
Keyword | Year | Month | Count
keyword phrase 2016 2 10
The output is a sudsobject. I found that this code does the trick:
import suds.sudsobject as sudsobject
import pandas as pd
a = [sudsobject.asdict(x) for x in output]
df = pd.DataFrame(a)
Addendum: This was once correct but new versions of the API (I tested
201802) now return a zeep.objects. However, zeep.helpers.serialize_object should do the same trick.
Here's the complete code that I used to query the TargetingIdeaSelector, with requestType STATS, and the method I used to parse the data to a useable dataframe; note the section starting "Parse results to pandas dataframe" as this takes the output given in the question above and converts it to a dataframe. Probably not the fastest or best, but it works! Tested with Python 2.7.
"""This code pulls trends for a set of keywords, and parses into a dataframe.
The LoadFromStorage method is pulling credentials and properties from a
"googleads.yaml" file. By default, it looks for this file in your home
directory. For more information, see the "Caching authentication information"
section of our README.
from googleads import adwords
import pandas as pd
adwords_client = adwords.AdWordsClient.LoadFromStorage()
# Initialize appropriate service.
targeting_idea_service = adwords_client.GetService(
'TargetingIdeaService', version='v201601')
# Construct selector object and retrieve related keywords.
offset = 0
stats_selector = {
'searchParameters': [
'xsi_type': 'RelatedToQuerySearchParameter',
'queries': ['donald trump', 'bernie sanders']
# Language setting (optional).
# The ID can be found in the documentation:
'xsi_type': 'LanguageSearchParameter',
'languages': [{'id': '1000'}],
# Location setting
'xsi_type': 'LocationSearchParameter',
'locations': [{'id': '1027363'}] # Burlington,Vermont
'ideaType': 'KEYWORD',
'requestType': 'STATS',
'requestedAttributeTypes': ['KEYWORD_TEXT', 'TARGETED_MONTHLY_SEARCHES'],
'paging': {
'startIndex': str(offset),
'numberResults': str(PAGE_SIZE)
stats_page = targeting_idea_service.get(stats_selector)
# Parse results to pandas dataframe
stats_pd = pd.DataFrame()
if 'entries' in stats_page:
for stats_result in stats_page['entries']:
stats_attributes = {}
for stats_attribute in stats_result['data']:
#print (stats_attribute)
if stats_attribute['key'] == 'KEYWORD_TEXT':
kt = stats_attribute['value']['value']
for i, val in enumerate(stats_attribute['value'][1]):
data = {'keyword': kt,
'year': val['year'],
'month': val['month'],
'count': val['count']}
data = pd.DataFrame(data, index = [i])
stats_pd = stats_pd.append(data, ignore_index=True)

