Skip to content

Instantly share code, notes, and snippets.

@WalBeh
Last active November 13, 2024 15:58
Show Gist options
  • Save WalBeh/ef1cd5855ebdbc742e60df8954210af6 to your computer and use it in GitHub Desktop.
Save WalBeh/ef1cd5855ebdbc742e60df8954210af6 to your computer and use it in GitHub Desktop.
Exports Quota and Usage on selected VPC, EC, ELB resources
import boto3
from botocore.exceptions import ClientError
import logging
import time
import os
from prometheus_client import start_http_server, Gauge
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# Define Prometheus metrics with an additional 'account' label
aws_resource_usage = Gauge(
'aws_resource_usage',
'AWS Resource Usage Counts',
['account', 'region', 'resource']
)
aws_resource_quota = Gauge(
'aws_resource_quota',
'AWS Resource Quotas',
['account', 'region', 'resource']
)
def get_resource_quotas(session, region, account):
"""
Retrieves configured quotas for specific AWS resources in a region.
Args:
session (boto3.Session): Boto3 session for the AWS account.
region (str): AWS region.
account (str): AWS account/profile name.
Returns:
dict: Configured quotas for AWS resources.
"""
try:
service_quota_client = session.client('service-quotas', region_name=region)
quotas = {}
# VPCs quota
quotas['VPCs'] = service_quota_client.get_service_quota(
ServiceCode='vpc',
QuotaCode='L-F678F1CE'
)['Quota']['Value']
# Subnets quota
quotas['Subnets'] = service_quota_client.get_service_quota(
ServiceCode='vpc',
QuotaCode='L-407747CB'
)['Quota']['Value']
# Load Balancers quota
quotas['Classic Load Balancers'] = service_quota_client.get_service_quota(
ServiceCode='elasticloadbalancing',
QuotaCode='L-E9E9831D' # QuotaCode for Classic ELBs
)['Quota']['Value']
quotas['Network Load Balancers'] = service_quota_client.get_service_quota(
ServiceCode='elasticloadbalancing',
QuotaCode='L-69A177A2' # QuotaCode for NLBs
)['Quota']['Value']
quotas['Security Group Rules'] = service_quota_client.get_service_quota(
ServiceCode='vpc',
QuotaCode='L-0EA8095F' # QuotaCode for Security Group Rules
)['Quota']['Value']
quotas['Network ACLs per VPC'] = service_quota_client.get_service_quota(
ServiceCode='vpc',
QuotaCode='L-B4A6D682' # QuotaCode for Network ACLs
)['Quota']['Value']
quotas['Rules per Network ACL'] = service_quota_client.get_service_quota(
ServiceCode='vpc',
QuotaCode='L-2AEEBF1A' # QuotaCode for Network ACL Entries
)['Quota']['Value']
quotas['VPC Peering Connections'] = service_quota_client.get_service_quota(
ServiceCode='vpc',
QuotaCode='L-7E9ECCDB' # QuotaCode for VPC Peering Connections
)['Quota']['Value']
quotas['EC2 m6a Dedicated Instances'] = service_quota_client.get_service_quota(
ServiceCode='ec2',
QuotaCode='L-80F2B67F' # QuotaCode for EC2 Instances m6a
)['Quota']['Value']
quotas['All Standard Spot Instance Requests'] = service_quota_client.get_service_quota(
ServiceCode='ec2',
QuotaCode='L-34B43A08' # QuotaCode for EC2 Spot Instances
)['Quota']['Value']
quotas['Storage for General Purpose SSD (gp3) volumes, in TiB'] = service_quota_client.get_service_quota(
ServiceCode='ebs',
QuotaCode='L-7A658B76'
)['Quota']['Value']
return quotas
except ClientError as e:
logger.error(f"[{account}][{region}] Error fetching quotas: {e}")
return {}
def get_resource_counts(session, region, account):
"""
Retrieves counts for specific AWS resources in a region.
Args:
session (boto3.Session): Boto3 session for the AWS account.
region (str): AWS region.
account (str): AWS account/profile name.
Returns:
dict: Counts of various AWS resources.
"""
counts = {}
try:
ec2 = session.client('ec2', region_name=region)
elb = session.client('elbv2', region_name=region)
classic_elb = session.client('elb', region_name=region)
# VPCs count
vpc_response = ec2.describe_vpcs()
counts['VPCs'] = len(vpc_response.get('Vpcs', []))
# Subnets count
subnet_response = ec2.describe_subnets()
counts['Subnets'] = len(subnet_response.get('Subnets', []))
# Load Balancers (ALB/NLB)
lb_response = elb.describe_load_balancers()
lbs = lb_response.get('LoadBalancers', [])
# Count Network Load Balancers
counts['Network Load Balancers'] = len([lb for lb in lbs if lb['Type'] == 'network'])
# Classic ELB
classic_response = classic_elb.describe_load_balancers()
counts['Classic Load Balancers'] = len(classic_response.get('LoadBalancerDescriptions', []))
# Security Group Rules (inbound + outbound)
sg_response = ec2.describe_security_groups()
counts['Security Group Rules'] = sum(
len(sg.get('IpPermissions', [])) + len(sg.get('IpPermissionsEgress', []))
for sg in sg_response.get('SecurityGroups', [])
)
# Network ACLs count
nacl_response = ec2.describe_network_acls()
nacls = nacl_response.get('NetworkAcls', [])
counts['Network ACLs per VPC'] = len(nacls)
# Network ACL Entries
counts['Rules per Network ACL'] = sum(
len(nacl.get('Entries', []))
for nacl in nacls
)
# VPC Peering Connections
peering_response = ec2.describe_vpc_peering_connections()
counts['VPC Peering Connections'] = len(peering_response.get('VpcPeeringConnections', []))
# EC2 Instances
ec2_response = ec2.describe_instances()
counts['EC2 Instances'] = sum(
len(reservations.get('Instances', []))
for reservations in ec2_response.get('Reservations', [])
)
# Storage for General Purpose SSD (gp3) volumes, in TiB
gp3_volumes_response = ec2.describe_volumes(
Filters=[
{'Name': 'volume-type', 'Values': ['gp3']}
]
)
gp3_total_gib = sum(volume['Size'] for volume in gp3_volumes_response.get('Volumes', []))
gp3_total_tib = gp3_total_gib / 1024 # Convert GiB to TiB
counts['Storage for General Purpose SSD (gp3) volumes, in TiB'] = gp3_total_tib
# # Storage Modifications for General Purpose SSD (gp3) volumes, in TiB
# storage_modifications_response = ec2.describe_volume_modifications()
# modifications = storage_modifications_response.get('VolumesModifications', [])
# gp3_storage_modifications_gib = sum(
# (mod.get('Modification', {}).get('TargetSize', 0) - mod.get('VolumeSize', 0))
# for mod in modifications
# if mod.get('VolumeType') == 'gp3' and 'Modification' in mod
# )
# # Ensure that we don't have negative modifications
# gp3_storage_modifications_gib = max(gp3_storage_modifications_gib, 0)
# gp3_storage_modifications_tib = gp3_storage_modifications_gib / 1024 # Convert GiB to TiB
# counts['Storage modifications for General Purpose SSD (gp3) volumes, in TiB'] = gp3_storage_modifications_tib
# Calculate region total
region_total = sum(counts.values())
counts['REGION TOTAL'] = region_total
return counts
except ClientError as e:
logger.error(f"[{account}][{region}] Error fetching counts: {e}")
return {}
def update_metrics(session, region, account):
"""
Fetches resource counts and quotas, updates Prometheus metrics.
Args:
session (boto3.Session): Boto3 session for the AWS account.
region (str): AWS region.
account (str): AWS account/profile name.
"""
counts = get_resource_counts(session, region, account)
quotas = get_resource_quotas(session, region, account)
# Update Usage Metrics
for resource, count in counts.items():
if resource != 'REGION TOTAL':
aws_resource_usage.labels(account=account, region=region, resource=resource).set(count)
# Update Quota Metrics
for resource, quota in quotas.items():
aws_resource_quota.labels(account=account, region=region, resource=resource).set(quota)
def main():
# Start Prometheus HTTP server to expose metrics
start_http_server(8000, addr='0.0.0.0')
logger.info("Prometheus metrics server started on port 8000")
# Define AWS profiles and regions
if not os.environ.get('AWS_PROFILE'):
logger.error("Please set the AWS_PROFILE environment variable")
profiles = ['default', 'cratedb-cloud-dev', 'cratedb-cloud-prod'] # Add your AWS profiles here
else:
profiles = os.environ.get('AWS_PROFILE').split(',')
regions = ['us-east-1', 'us-west-2', 'eu-west-1'] # Add or modify regions as needed
logger.info(f"Checking resources for profiles: {profiles} in regions: {regions}")
while True:
for profile in profiles:
session = boto3.Session(profile_name=profile)
for region in regions:
logger.info(f"[{profile}][{region}] Updating metrics")
update_metrics(session, region, profile)
logger.info("Metrics updated")
time.sleep(120) # Sleep for 2 minutes
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment