Created
May 31, 2018 23:01
-
-
Save sughodke/19bd5c726197deaffd8857940cc2c889 to your computer and use it in GitHub Desktop.
Scrape current EC2 pricing data from Amazon Web Services catalog (Original powdahound/ec2instances.info)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import requests | |
import json | |
from json import encoder | |
import sys | |
def add_pretty_names(instances): | |
family_names = { | |
't2': 'T2 General Purpose', | |
'r3': 'R3 Memory Optimized', | |
'r4': 'R4 Memory Optimized', | |
'c3': 'C3 High-CPU', | |
'c4': 'C4 High-CPU', | |
'm3': 'M3 General Purpose', | |
'i3': 'I3 High I/O', | |
'cg1': 'Cluster GPU', | |
'cc2': 'Cluster Compute', | |
'cr1': 'High Memory Cluster', | |
'hs1': 'High Storage', | |
'c1' : 'C1 High-CPU', | |
'hi1': 'HI1. High I/O', | |
'm2' : 'M2 High Memory', | |
'm1' : 'M1 General Purpose', | |
'm4' : 'M4 General Purpose' | |
} | |
for k in instances: | |
i = instances[k] | |
# instance type format looks like "db.r4.large"; dropping the "db" prefix | |
pieces = i['instance_type'].split('.') | |
family = pieces[1] | |
short = pieces[2] | |
prefix = family_names.get(family, family.upper()) | |
extra = None | |
if short.startswith('8x'): | |
extra = 'Eight' | |
elif short.startswith('4x'): | |
extra = 'Quadruple' | |
elif short.startswith('2x'): | |
extra = 'Double' | |
elif short.startswith('10x'): | |
extra = 'Deca' | |
elif short.startswith('x'): | |
extra = '' | |
bits = [prefix] | |
if extra is not None: | |
bits.extend([extra, 'Extra']) | |
short = 'Large' | |
bits.append(short.capitalize()) | |
i['pretty_name'] = ' '.join([b for b in bits if b]) | |
def scrape(output_file, input_file=None): | |
# if an argument is given, use that as the path for the json file | |
if input_file: | |
with open(input_file) as json_data: | |
data = json.load(json_data) | |
else: | |
price_index = 'https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonRDS/current/index.json' | |
index = requests.get(price_index) | |
data = index.json() | |
rds_instances = {} | |
instances = {} | |
# region mapping, someone thought it was handy not to include the region id's :( | |
regions = { | |
"AWS GovCloud (US)": 'us-gov-west-1', | |
"Asia Pacific (Mumbai)": 'ap-south-1', | |
"Asia Pacific (Seoul)": 'ap-northeast-2', | |
"Asia Pacific (Singapore)": 'ap-southeast-1', | |
"Asia Pacific (Sydney)": 'ap-southeast-2', | |
"Asia Pacific (Tokyo)": 'ap-northeast-1', | |
"Asia Pacific (Osaka-Local)": 'ap-northeast-3', | |
"Canada (Central)": 'ca-central-1', | |
"EU (Frankfurt)": 'eu-central-1', | |
"EU (Ireland)": 'eu-west-1', | |
"EU (London)": 'eu-west-2', | |
"EU (Paris)": 'eu-west-3', | |
"South America (Sao Paulo)": 'sa-east-1', | |
"US East (N. Virginia)": 'us-east-1', | |
"US East (Ohio)": 'us-east-2', | |
"US West (N. California)": 'us-west-1', | |
"US West (Oregon)": 'us-west-2', | |
} | |
# loop through products, and only fetch available instances for now | |
for sku, product in data['products'].items(): | |
if product.get('productFamily', None) == 'Database Instance': | |
# map the region | |
try: | |
region = regions[product['attributes']['location']] | |
except KeyError as e: | |
if product['attributes']['location'] == 'Any': | |
region = 'us-east-1' | |
else: | |
raise | |
# set the attributes in line with the ec2 index | |
attributes = product['attributes'] | |
attributes['region'] = region | |
attributes['memory'] = attributes['memory'].split(' ')[0] | |
attributes['network_performance'] = attributes['networkPerformance'] | |
attributes['family'] = attributes['instanceFamily'] | |
attributes['instance_type'] = attributes['instanceType'] | |
attributes['database_engine'] = attributes['databaseEngine'] | |
attributes['arch'] = attributes['processorArchitecture'] | |
attributes['pricing'] = {} | |
attributes['pricing'][region] = {} | |
rds_instances[sku] = attributes | |
if attributes['instance_type'] not in instances.keys(): | |
instances[attributes['instance_type']] = attributes | |
instances[attributes['instance_type']]['pricing'] = {} | |
# Parse ondemand pricing | |
for sku, offers in data['terms']['OnDemand'].items(): | |
for code, offer in offers.items(): | |
for key, dimension in offer['priceDimensions'].items(): | |
# skip these for now | |
if any(descr in dimension['description'].lower() for descr in ['transfer', 'global', 'storage', 'iops', 'requests', 'multi-az']): | |
continue | |
instance = rds_instances.get(sku) | |
if not instance: | |
print("ERROR: Instance type not found for sku={}".format(sku)) | |
continue | |
if instance['region'] not in instances[instance['instance_type']]['pricing']: | |
instances[instance['instance_type']]['pricing'][instance['region']] = {} | |
instances[instance['instance_type']]['pricing'][instance['region']][instance['database_engine']] = { | |
'ondemand': float(dimension['pricePerUnit']['USD']) | |
} | |
reserved_mapping = { | |
'3yr Partial Upfront': 'yrTerm3.partialUpfront', | |
'1yr Partial Upfront': 'yrTerm1.partialUpfront', | |
'3yr All Upfront': 'yrTerm3.allUpfront', | |
'1yr All Upfront': 'yrTerm1.allUpfront', | |
'1yr No Upfront': 'yrTerm1.noUpfront' | |
} | |
# Parse reserved pricing | |
for sku, offers in data['terms']['Reserved'].items(): | |
for code, offer in offers.items(): | |
for key, dimension in offer['priceDimensions'].items(): | |
# skip multi-az | |
if rds_instances[sku]['deploymentOption'] != 'Single-AZ': | |
continue | |
instance = rds_instances[sku] | |
region = rds_instances[sku]['region'] | |
# create a regional hash | |
if region not in instances[instance['instance_type']]['pricing']: | |
instances[instance['instance_type']]['pricing'][region] = {} | |
# create a reserved hash | |
if 'reserved' not in instances[instance['instance_type']]['pricing'][region][instance['database_engine']]: | |
instances[instance['instance_type']]['pricing'][region][instance['database_engine']]['reserved'] = {} | |
# store the pricing in placeholder field | |
reserved_type = "%s %s" % (offer['termAttributes']['LeaseContractLength'], offer['termAttributes']['PurchaseOption']) | |
instances[instance['instance_type']]['pricing'][region][instance['database_engine']]['reserved']['%s-%s' % (reserved_mapping[reserved_type], dimension['unit'].lower())] = float(dimension['pricePerUnit']['USD']) | |
# Calculate all reserved effective pricings (upfront hourly + hourly price) | |
for instance_type, instance in instances.items(): | |
for region, pricing in instance['pricing'].items(): | |
for engine, prices in pricing.items(): | |
if 'reserved' not in prices: | |
continue | |
try: | |
# no multi-az here | |
reserved_prices = { | |
'yrTerm3.partialUpfront': (prices['reserved']['yrTerm3.partialUpfront-quantity'] / (365 * 3) / 24) + prices['reserved']['yrTerm3.partialUpfront-hrs'], | |
'yrTerm1.partialUpfront': (prices['reserved']['yrTerm1.partialUpfront-quantity'] / 365 / 24) + prices['reserved']['yrTerm1.partialUpfront-hrs'], | |
'yrTerm3.allUpfront': (prices['reserved']['yrTerm3.allUpfront-quantity'] / (365 * 3) / 24) + prices['reserved']['yrTerm3.allUpfront-hrs'], | |
'yrTerm1.allUpfront': (prices['reserved']['yrTerm1.allUpfront-quantity'] / 365 / 24) + prices['reserved']['yrTerm1.allUpfront-hrs'], | |
'yrTerm1.noUpfront': prices['reserved']['yrTerm1.noUpfront-hrs'], | |
} | |
instances[instance_type]['pricing'][region][engine]['reserved'] = reserved_prices | |
except Exception as e: | |
print("ERROR: Trouble generating RDS reserved price for {}: {!r}".format(instance_type, e)) | |
add_pretty_names(instances) | |
# write output to file | |
encoder.FLOAT_REPR = lambda o: format(o, '.5f') | |
with open(output_file, 'w') as outfile: | |
json.dump(list(instances.values()), outfile, indent=4) | |
if __name__ == '__main__': | |
input_file = None | |
if len(sys.argv) > 1: | |
input_file = sys.argv[1] | |
output_file = 'instances.json' | |
scrape(output_file, input_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment