├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── report-example.xlsx ├── requirements.txt ├── run.py ├── src ├── get_bill_diff.py ├── get_ec2_costs.sh ├── get_ec2_data.py ├── get_ec2_instance_history.py ├── get_ec2_metadata.py ├── get_ec2_recommendations.py ├── get_last_month_ebs_cost.py ├── get_last_month_ec2_cost.py ├── get_s3_cost.py ├── make_sheet.py ├── make_xlsx.py ├── mytypes.py ├── ressources │ └── introduction.png ├── sheets.py └── utils.py └── util ├── awsdumpenv └── awsenv /.gitignore: -------------------------------------------------------------------------------- 1 | *.sw? 2 | *.pyc 3 | /src/__pycache__ 4 | /in 5 | /out 6 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:16.04 2 | 3 | RUN apt-get update && apt-get upgrade -y 4 | RUN apt-get install jq python3-pip curl -y 5 | 6 | COPY . /root/aws-cost-report 7 | WORKDIR /root/aws-cost-report 8 | RUN pip3 install -r requirements.txt 9 | 10 | ENV PYTHONUNBUFFERED=0 11 | 12 | ENTRYPOINT ["/root/aws-cost-report/run.py"] 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018 MSolution.IO 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 7 | of the Software, and to permit persons to whom the Software is furnished to do 8 | so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![](https://s3-us-west-2.amazonaws.com/trackit-public-artifacts/aws-cost-report/introduction.png) 2 | # Cost report generator 3 | 4 | ## Requirements 5 | 6 | - Install [jq](https://stedolan.github.io/jq/download/) 7 | - Install the python3 dependencies: `sudo pip3 install -r requirements.txt` 8 | 9 | ## Google Sheets API access 10 | 11 | Follow the instructions at 12 | https://developers.google.com/sheets/api/quickstart/python to setup credentials 13 | and API access. 14 | 15 | ## How to run the tool on your machine 16 | 17 | ``` 18 | # Print help and usage informations 19 | $> ./run.py --help 20 | 21 | # Run with one billing bucket and one EC2 profile 22 | $> ./run.py --billing profile_name billing-bucket-name prefix --ec2 profile_name --xlsx-name filename 23 | 24 | # Run with multiple billing buckets and EC2 profiles 25 | $> ./run.py --billing profile_name billing-bucket-name prefix --billing profile_name2 billing-bucket-name2 prefix2 --ec2 profile_name --ec2 profile_name2 --xlsx-name filename 26 | ``` 27 | 28 | The tool is built to use AWS credentials stored in `~/.aws/credentials`. 29 | If you set the profile to `env`, the tool will use environment variables you must supply instead. 30 | 31 | ## How to run the tool with docker 32 | 33 | The docker container do not export any data to google sheets. 34 | However it generates CSVs in the `out` directory, and a local spreadsheet generation will be added soon. 35 | 36 | ### Use our prebuilt image 37 | 38 | ``` 39 | # Pull the msolution/aws-cost-report image 40 | $> docker pull msolution/aws-cost-report 41 | ``` 42 | 43 | ### Build your own image 44 | 45 | ``` 46 | # Build your own msolution/aws-cost-report image 47 | $> docker build -t msolution/aws-cost-report . 48 | ``` 49 | 50 | ### Use the docker container 51 | 52 | ``` 53 | # Run with one billing bucket and one EC2 profile, using env credentials 54 | $> docker run -v /local/path/out:/root/aws-cost-report/out -e AWS_ACCESS_KEY_ID=accesskeyid -e AWS_SECRET_ACCESS_KEY=secretaccesskey -e AWS_DEFAULT_REGION=default-region -e AWS_SESSION_TOKEN=sessiontoken(optional) msolution/aws-cost-report --no-generate-sheet --billing env billing-bucket-name prefix --ec2 env --xlsx-name filename 55 | 56 | # Run with multiple billing buckets and EC2 profiles, using your local aws credentials 57 | $> docker run -v /path/to/credentials:/root/.aws:ro -v /local/path/out:/root/aws-cost-report/out msolution/aws-cost-report --no-generate-sheet --billing profile_name billing-bucket-name prefix --billing profile_name2 billing-bucket-name2 prefix2 --ec2 profile_name --ec2 profile_name2 --xlsx-name filename 58 | ``` 59 | 60 | ## Screenshots 61 | 62 | Download the report example [here](https://s3-us-west-2.amazonaws.com/trackit-public-artifacts/aws-cost-report/report-example.xlsx). 63 | 64 | ### Cost and variations tab 65 | 66 | ![](https://s3-us-west-2.amazonaws.com/trackit-public-artifacts/aws-cost-report/cost-variations.png) 67 | 68 | 69 | ### Reserved instance summary 70 | 71 | ![](https://s3-us-west-2.amazonaws.com/trackit-public-artifacts/aws-cost-report/reserved-instance-summary.png) 72 | 73 | 74 | ### Instance size recommendations 75 | 76 | ![](https://s3-us-west-2.amazonaws.com/trackit-public-artifacts/aws-cost-report/instance-size-recommendations.png) 77 | 78 | 79 | ### EC2 instances last month 80 | 81 | ![](https://s3-us-west-2.amazonaws.com/trackit-public-artifacts/aws-cost-report/ec2-instances-last-month.png) 82 | -------------------------------------------------------------------------------- /report-example.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/trackit/aws-cost-report/d04b24330fd1d4fcad463e5e53c5ca5955a226ee/report-example.xlsx -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | google-api-python-client==1.6.5 2 | boto3==1.6.3 3 | awscli==1.14.59 4 | xlsxwriter==1.0.2 5 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import boto3 4 | import argparse 5 | import sys 6 | import os 7 | import hashlib 8 | import json 9 | import threading 10 | import zipfile 11 | import gzip 12 | import time 13 | import shutil 14 | import itertools 15 | import dateutil.relativedelta 16 | from datetime import datetime 17 | 18 | class Parser(argparse.ArgumentParser): 19 | def print_help(self, file=sys.stdout): 20 | super(Parser, self).print_help(file) 21 | print( 22 | """ 23 | BILLING PREFIX: 24 | This tool uses AWS's new Cost And Usage Report format for billing data. The 25 | following structure is expected in S3: 26 | 27 | PREFIX 28 | `- arbitraryReportName 29 | |- 20171001-20171101 30 | | |- arbitraryReportName-Manifest.json 31 | | |- bbe82960-6a1a-47fd-ae59-1e666e2f674a 32 | | | |- arbitraryReportName-Manifest.json 33 | | | |- arbitraryReportName-1.csv.gz 34 | | | `- ... 35 | | `- ... 36 | `- ... 37 | 38 | You can get more information about this at 39 | https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/billing-reports-costusage.html""", 40 | file=file, 41 | ) 42 | 43 | def error(self, message): 44 | print(message) 45 | self.print_help() 46 | sys.exit(2) 47 | 48 | 49 | def parse_args(): 50 | parser = Parser() 51 | parser.add_argument( 52 | "--no-clear-before", 53 | help="Do not clear all data before doing anything. Useful when a previous invocation failed or when you add data incrementally before generating the sheet.", 54 | dest="clear_before", 55 | action="store_false", 56 | default=True, 57 | ) 58 | parser.add_argument( 59 | "--no-generate-xlsx", 60 | help="Do not generate a XLSX file after all data was retrieved.", 61 | dest="generate_xslx", 62 | action="store_false", 63 | default=True, 64 | ) 65 | now = datetime.now() 66 | parser.add_argument( 67 | "--xlsx-name", 68 | help="Name of the XLSX file.", 69 | dest="xlsx_name", 70 | default=now.strftime("trackit_aws_cost_report_%Y_%m_%d"), 71 | ) 72 | parser.add_argument( 73 | "--generate-gsheet", 74 | help="Generate a Google Sheet after all data was retrieved.", 75 | dest="generate_gsheet", 76 | action="store_true", 77 | default=False, 78 | ) 79 | parser.add_argument( 80 | "--billing", 81 | help="Get billing data from s3:/BUCKET/PREFIX using PROFILE.", 82 | action="append", 83 | nargs=3, 84 | metavar=("PROFILE", "BUCKET", "PREFIX"), 85 | default=[], 86 | ) 87 | parser.add_argument( 88 | "--ec2", 89 | help="Get EC2 data for PROFILE.", 90 | action="append", 91 | nargs=1, 92 | metavar="PROFILE", 93 | default=[], 94 | ) 95 | return parser.parse_args(), parser 96 | 97 | 98 | def try_mkdir(path): 99 | try: 100 | os.mkdir(path) 101 | except FileExistsError: 102 | pass 103 | 104 | 105 | try_mkdir("in") 106 | try_mkdir("in/usagecost") 107 | try_mkdir("out") 108 | try_mkdir("out/reservation-usage") 109 | try_mkdir("out/instance-reservation-usage") 110 | try_mkdir("out/instance-size-recommendation") 111 | try_mkdir("out/instance-metadata") 112 | try_mkdir("out/last-month") 113 | try_mkdir("out/s3") 114 | 115 | default_region = "us-east-1" 116 | 117 | def awsenv(profile, region): 118 | return "util/awsenv --profile {} --region {}".format(profile, region) 119 | 120 | 121 | def build_billing_diff(): 122 | os.system("src/get_bill_diff.py") 123 | 124 | 125 | def build_instance_history(): 126 | os.system("src/get_ec2_instance_history.py") 127 | 128 | def build_ec2_last_month_usage(): 129 | os.system("src/get_last_month_ec2_cost.py") 130 | 131 | def build_ebs_last_month_usage(): 132 | os.system("src/get_last_month_ebs_cost.py") 133 | 134 | def build_s3_cost(): 135 | os.system("src/get_s3_cost.py") 136 | 137 | def build_gsheet(): 138 | os.system("src/make_gsheet.py") 139 | 140 | 141 | def build_xlsx(name): 142 | os.system("src/make_xlsx.py {}".format(name)) 143 | 144 | def get_session(profile): 145 | if profile != 'env': 146 | session = boto3.Session(profile_name=profile) 147 | else: 148 | session = boto3.Session() 149 | return session 150 | 151 | def do_get_billing_data(profile, bucket, prefix): 152 | 153 | nonce = hashlib.sha1("{}{}".format(bucket, prefix).encode()).hexdigest()[:12] 154 | it = 1 155 | concurrent_available = 4 156 | concurrent_available_mutex = threading.Lock() 157 | thread = [] 158 | 159 | def change_concurrent_available(value): 160 | nonlocal concurrent_available 161 | nonlocal concurrent_available_mutex 162 | 163 | concurrent_available_mutex.acquire() 164 | concurrent_available += value 165 | concurrent_available_mutex.release() 166 | 167 | def save_to_file(s3_client, bucket, file_name, report_key): 168 | try: 169 | s3_client.download_file(Bucket=bucket, Key=report_key, Filename=file_name) 170 | except Exception as e: 171 | print(e) 172 | finally: 173 | change_concurrent_available(1) 174 | 175 | def analyze_report(s3_client, bucket, report_keys): 176 | nonlocal it 177 | nonlocal thread 178 | nonlocal concurrent_available 179 | for report_key in report_keys: 180 | if concurrent_available <= 0: 181 | print(" Waiting to download {}...".format(report_key)) 182 | while concurrent_available <= 0: 183 | time.sleep(0.1) 184 | file_name = "in/usagecost/{}.{}.csv.{}".format(nonce, it, report_key.split(".")[-1]) 185 | t = threading.Thread(name=report_key, target=save_to_file, args=(s3_client, bucket, file_name, report_key)) 186 | print(" Downloading {}...".format(report_key)) 187 | t.start() 188 | change_concurrent_available(-1) 189 | thread.append(t) 190 | it += 1 191 | 192 | def analyze_obj(s3_client, objs): 193 | total = len(objs) 194 | current = 1 195 | for obj in objs: 196 | print(" Getting bill files from {} ({}/{})...".format(obj["Key"], current, total)) 197 | content = s3_client.get_object(Bucket=bucket, Key=obj["Key"])["Body"].read().decode("utf-8") 198 | content_json = json.loads(content) 199 | if "bucket" in content_json: 200 | analyze_report(s3_client, content_json["bucket"], content_json["reportKeys"]) 201 | current += 1 202 | for t in thread: 203 | t.join() 204 | 205 | def unzip_obj(): 206 | for file_name in os.listdir("in/usagecost"): 207 | try: 208 | print("Extracting {}...".format(file_name)) 209 | if file_name.startswith(nonce) and file_name.endswith(".zip"): 210 | with zipfile.ZipFile(os.path.join("in/usagecost", file_name), "r") as z: 211 | z.extractall("in/usagecost") 212 | elif file_name.startswith(nonce) and file_name.endswith(".gz"): 213 | with gzip.GzipFile(os.path.join("in/usagecost", file_name), "r") as z: 214 | with open(os.path.join("in/usagecost", file_name[:-3]), "wb+") as f: 215 | shutil.copyfileobj(z, f) 216 | except Exception as e: 217 | print("Failed to extract {}: {}".format(file_name, e)) 218 | finally: 219 | os.remove(os.path.join("in/usagecost", file_name)) 220 | 221 | try: 222 | session = get_session(profile) 223 | s3_client = session.client("s3") 224 | page = s3_client.get_paginator("list_objects").paginate(Bucket=bucket, Prefix=prefix) 225 | min_date = (datetime.now() + dateutil.relativedelta.relativedelta(months=-6)).replace(day=1).strftime('%Y%m%d') 226 | objs = [ 227 | obj 228 | for p in page 229 | for obj in p["Contents"] 230 | if obj["Key"].endswith(".json") and 231 | len(obj["Key"].split('/')) == 4 and 232 | obj["Key"].split('/')[-2] >= min_date 233 | ] 234 | except Exception as e: 235 | exit(e) 236 | analyze_obj(s3_client, objs) 237 | unzip_obj() 238 | 239 | 240 | def do_get_instance_data(profile, region): 241 | threads = [] 242 | for cmd in ( 243 | "{} src/get_ec2_recommendations.py".format(awsenv(profile, region)), 244 | "{} src/get_ec2_metadata.py".format(awsenv(profile, region)), 245 | ): 246 | threads.append(threading.Thread(target=os.system, args=[cmd])) 247 | threads[-1].start() 248 | for t in threads: 249 | t.join() 250 | 251 | 252 | def recursively_remove_file(path): 253 | if os.path.isdir(path): 254 | for f in os.listdir(path): 255 | recursively_remove_file(os.path.join(path, f)) 256 | else: 257 | os.remove(path) 258 | 259 | 260 | def clear_data(): 261 | for f in os.listdir("out"): 262 | recursively_remove_file(os.path.join("out", f)) 263 | for f in os.listdir("in"): 264 | f = os.path.join("in", f) 265 | if not os.path.isdir(f) or (os.path.isdir(f) and f != "in/persistent"): 266 | recursively_remove_file(f) 267 | 268 | def get_regions(session): 269 | client_region = session.region_name or default_region 270 | client = session.client('ec2', region_name=client_region) 271 | regions = client.describe_regions() 272 | return [ 273 | region['RegionName'] 274 | for region in regions['Regions'] 275 | ] 276 | 277 | def main(): 278 | args, parser = parse_args() 279 | args.ec2 = [a[0] for a in args.ec2] if len(args.ec2) else [] 280 | # if len(args.billing) == 0 and len(args.ec2) == 0: 281 | # return parser.print_help() 282 | if args.clear_before: 283 | clear_data() 284 | if not os.path.isfile("in/ondemandcosts.json"): 285 | os.system("src/get_ec2_costs.sh") 286 | for bill in args.billing: 287 | print("Download billings for {}...".format(bill[0])) 288 | do_get_billing_data(*bill) 289 | if len(args.ec2): 290 | session = get_session(args.ec2[0]) 291 | regions = get_regions(session) 292 | threads = [] 293 | for region in regions: 294 | print("Fetching ec2 data for all accounts in {}...".format(region)) 295 | cmd = "src/get_ec2_data.py --region {} --profile {}".format(region, ' '.join(args.ec2)) 296 | threads.append((region, threading.Thread(target=os.system, args=[cmd]))) 297 | threads[-1][1].start() 298 | for t in threads: 299 | t[1].join() 300 | print("Fetched ec2 data for all accounts in {}".format(t[0])) 301 | for ec in args.ec2: 302 | threads = [] 303 | for region in regions: 304 | print("Fetching ec2 metadata for {} in {}...".format(ec, region)) 305 | threads.append((region, threading.Thread(target=do_get_instance_data, args=(ec, region)))) 306 | threads[-1][1].start() 307 | for t in threads: 308 | t[1].join() 309 | print("Fetched ec2 metadata for {} in {}".format(ec, t[0])) 310 | if args.generate_gsheet or args.generate_xslx: 311 | fcts = [ 312 | ("billing diff", build_billing_diff), 313 | ("instance history", build_instance_history), 314 | ("ec2 last month", build_ec2_last_month_usage), 315 | ("ebs last month", build_ebs_last_month_usage), 316 | ("s3 cost", build_s3_cost) 317 | ] 318 | for i, fct in zip(itertools.count(1), fcts): 319 | print("Processing billing data ({}/{} - {})...".format(i, len(fcts), fct[0])) 320 | fct[1]() 321 | if args.generate_gsheet: 322 | build_gsheet() 323 | if args.generate_xslx: 324 | build_xlsx(args.xlsx_name) 325 | 326 | 327 | if __name__ == "__main__": 328 | main() 329 | -------------------------------------------------------------------------------- /src/get_bill_diff.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import csv 4 | import sys 5 | import collections 6 | import datetime 7 | import itertools 8 | import os 9 | 10 | DIR_USAGECOST='in/usagecost' 11 | OUT_MONTHS='out/months.csv' 12 | OUT_ABSOLUTE='out/absolute.csv' 13 | 14 | def rows_folder(dirpath): 15 | for filename in os.listdir(dirpath): 16 | filepath = os.path.join(dirpath, filename) 17 | with open(filepath) as f: 18 | for row in rows(f): 19 | yield row 20 | 21 | def parseIsoDatetime(isodatetime): 22 | return datetime.datetime.strptime(isodatetime.replace('Z', '+0000'), '%Y-%m-%dT%H:%M:%S%z') 23 | 24 | def rows(csvfile): 25 | reader = csv.DictReader(csvfile) 26 | for row in reader: 27 | yield row 28 | 29 | def window(l, n=2): 30 | for i in range(len(l) - n + 1): 31 | yield l[i:i+n] 32 | 33 | def variations(costs): 34 | return [ 35 | "" if old <= .0 else (new / old) - 1 36 | for [old, new] in window([0, *costs]) 37 | ] 38 | 39 | def yearmonth_to_string(yearmonth): 40 | year, month = divmod(yearmonth, 12) 41 | return '{:04d}-{:02d}'.format(year, month + 1) 42 | 43 | first_month = -1 44 | last_month = -1 45 | breakdown = collections.defaultdict(float) 46 | 47 | for row in rows_folder(DIR_USAGECOST): 48 | usage_start_date = parseIsoDatetime(row['lineItem/UsageStartDate']) 49 | month = usage_start_date.year * 12 + usage_start_date.month - 1 50 | first_month = month if first_month == -1 else min(month, first_month) 51 | last_month = month if last_month == -1 else max(month, last_month) 52 | usagetype = row['lineItem/UsageType'] 53 | # do not process if it is a line item for AWS support 54 | if row['lineItem/ProductCode'] == 'AWSSupportBusiness': 55 | continue 56 | try: 57 | breakdown[(month, usagetype)] += float(row['lineItem/UnblendedCost'])# if row['lineItem/UnblendedCost'] else 0.0 58 | except: 59 | print(row, file=sys.stderr) 60 | print(month, file=sys.stderr) 61 | print(usagetype, file=sys.stderr) 62 | 63 | all_months = sorted(set(k[0] for k in breakdown.keys())) 64 | preserved_months = all_months[-12:] 65 | first_month = preserved_months[0] 66 | last_month = preserved_months[-1] 67 | breakdown = { 68 | (month, usagetype): value 69 | for (month, usagetype), value in breakdown.items() 70 | if month in preserved_months 71 | } 72 | 73 | with open(OUT_MONTHS, 'w') as monthsfile: 74 | writer = csv.writer(monthsfile) 75 | writer.writerow(['month', 'usage', 'cost']) 76 | for key, value in breakdown.items(): 77 | writer.writerow([*key, value]) 78 | 79 | #with open('months.csv') as monthsfile: 80 | # for row in rows(monthsfile): 81 | # breakdown[(int(row['month']), row['usage'])] = float(row['cost']) 82 | # first_month = int(row['month']) if first_month == -1 else min(int(row['month']), first_month) 83 | # last_month = int(row['month']) if last_month == -1 else max(int(row['month']), last_month) 84 | 85 | breakdown_by_date = collections.defaultdict(lambda: list([.0] * (last_month - first_month + 1))) 86 | for (month, product), cost in breakdown.items(): 87 | breakdown_by_date[product][month - first_month] += cost 88 | 89 | with open(OUT_ABSOLUTE, 'w') as f: 90 | writer = csv.writer(f) 91 | writer.writerow(['usage'] + [yearmonth_to_string(ym) for ym in range(first_month, last_month + 1)]) 92 | for product, month_cost in breakdown_by_date.items(): 93 | writer.writerow([product, *month_cost]) 94 | 95 | breakdown_variation = {} 96 | for (product, monthly_costs) in breakdown_by_date.items(): 97 | breakdown_variation[product] = variations(monthly_costs) 98 | -------------------------------------------------------------------------------- /src/get_ec2_costs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | curl 'https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/index.json' | \ 4 | jq ' 5 | [ 6 | .terms.OnDemand as $terms 7 | | .products 8 | | .[] 9 | | select(.productFamily == "Compute Instance") 10 | | select($terms[.sku]) 11 | | ( 12 | . + 13 | { cost: [ 14 | $terms[.sku] 15 | | .[]][0] 16 | | [.priceDimensions | .[]][0] 17 | | .pricePerUnit.USD | 18 | tonumber 19 | } 20 | ) 21 | ]' > in/ondemandcosts.json 22 | -------------------------------------------------------------------------------- /src/get_ec2_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import collections 4 | import csv 5 | import itertools 6 | import json 7 | import re 8 | import argparse 9 | import multiprocessing.pool 10 | from pprint import pprint 11 | import boto3 12 | import botocore 13 | 14 | from mytypes import * 15 | 16 | compute_sheet_region = { 17 | 'us-east-2': 'US East (Ohio)', 18 | 'us-east-1': 'US East (N. Virginia)', 19 | 'us-west-1': 'US West (N. California)', 20 | 'us-west-2': 'US West (Oregon)', 21 | 'ap-northeast-1': 'Asia Pacific (Tokyo)', 22 | 'ap-northeast-2': 'Asia Pacific (Seoul)', 23 | 'ap-south-1': 'Asia Pacific (Mumbai)', 24 | 'ap-southeast-1': 'Asia Pacific (Singapore)', 25 | 'ap-southeast-2': 'Asia Pacific (Sydney)', 26 | 'ca-central-1': 'Canada (Central)', 27 | 'cn-north-1': 'China (Beijing)', 28 | 'cn-northwest-1': 'China (Ningxia)', 29 | 'eu-central-1': 'EU (Frankfurt)', 30 | 'eu-west-1': 'EU (Ireland)', 31 | 'eu-west-2': 'EU (London)', 32 | 'eu-west-3': 'EU (Paris)', 33 | 'sa-east-1': 'South America (Sao Paulo)', 34 | } 35 | 36 | compute_sheet_tenancy = { 37 | 'dedicated': 'Dedicated', 38 | 'host': 'Host', 39 | 'default': 'Shared', 40 | } 41 | 42 | compute_sheet_platform = { 43 | 'Linux/UNIX': 'Linux', 44 | 'Linux/UNIX (Amazon VPC)': 'Linux', 45 | 'SUSE Linux': 'SUSE', 46 | 'SUSE Linux (Amazon VPC)': 'SUSE', 47 | 'Red Hat Enterprise Linux': 'RHEL', 48 | 'Red Hat Enterprise Linux (Amazon VPC)': 'RHEL', 49 | 'Windows': 'Windows', 50 | 'windows': 'Windows', 51 | 'Windows (Amazon VPC)': 'Windows', 52 | 'windows (Amazon VPC)': 'Windows', 53 | 'Windows with SQL Server Standard': 'Windows', 54 | 'windows with SQL Server Standard': 'Windows', 55 | 'Windows with SQL Server Standard (Amazon VPC)': 'Windows', 56 | 'windows with SQL Server Standard (Amazon VPC)': 'Windows', 57 | 'Windows with SQL Server Web': 'Windows', 58 | 'windows with SQL Server Web': 'Windows', 59 | 'Windows with SQL Server Web (Amazon VPC)': 'Windows', 60 | 'windows with SQL Server Web (Amazon VPC)': 'Windows', 61 | 'Windows with SQL Server Enterprise': 'Windows', 62 | 'windows with SQL Server Enterprise': 'Windows', 63 | 'Windows with SQL Server Enterprise (Amazon VPC)': 'Windows', 64 | 'windows with SQL Server Enterprise (Amazon VPC)': 'Windows', 65 | } 66 | 67 | DIR_BILLS = 'in/usagecost' 68 | DIR_INSTANCE_RESERVATION_USAGE = 'out/instance-reservation-usage' 69 | DIR_RESERVATION_USAGE = 'out/reservation-usage' 70 | FIL_ONDEMAND_COSTS = 'in/ondemandcosts.json' 71 | 72 | with open(FIL_ONDEMAND_COSTS) as f: 73 | compute_instance_costs = json.load(f) 74 | 75 | _az_to_region_re = re.compile(r'^(.+?)[a-z]?$') 76 | 77 | 78 | def az_to_region(az): 79 | return _az_to_region_re.match(az).group(1) 80 | 81 | 82 | def identity(x): 83 | return x 84 | 85 | 86 | boto_sessions = {} 87 | 88 | 89 | def boto_session_getter(profile, region): 90 | global boto_sessions 91 | if (profile, region) in boto_sessions: 92 | return boto_sessions[(profile, region)] 93 | session = boto3.Session(profile_name=profile, region_name=region) 94 | ec2 = session.client('ec2') 95 | boto_sessions[(profile, region)] = ec2 96 | return ec2 97 | 98 | 99 | def reserved_instance_offering_cost_per_hour(offering): 100 | return offering['FixedPrice'] / (offering['Duration'] / 3600) + ( 101 | offering['RecurringCharges'][0]['Amount'] if len( 102 | offering['RecurringCharges']) > 0 else 0.0) 103 | 104 | 105 | def get_reserved_instances(ec2, region): 106 | reserved_instances_data = ec2.describe_reserved_instances( 107 | Filters=[ 108 | { 109 | 'Name': 'state', 110 | 'Values': [ 111 | 'active', 112 | ] 113 | }, 114 | ], 115 | ) 116 | return [ 117 | InstanceReservationCount( 118 | instance_reservation=InstanceReservation( 119 | type=InstanceType( 120 | size=ri['InstanceType'], 121 | availability_zone=ri['AvailabilityZone'] if ri[ 122 | 'Scope'] == 'Availability Zone' else region, 123 | tenancy=ri['InstanceTenancy'], 124 | product=ri['ProductDescription'], 125 | vpc=ri['ProductDescription'].endswith("(Amazon VPC)"), 126 | ), 127 | cost_hourly=sum(rc['Amount'] for rc in ri['RecurringCharges']), 128 | cost_upfront=ri['FixedPrice'], 129 | ), 130 | count=ri['InstanceCount'], 131 | count_used=0, 132 | ) 133 | for ri in reserved_instances_data['ReservedInstances'] 134 | ] 135 | 136 | 137 | def get_ondemand_instance_types(ec2, profile): 138 | def get_instance_type(instance_type): 139 | if instance_type == "windows": 140 | return "Windows" 141 | return instance_type 142 | 143 | instance_paginator = ec2.get_paginator('describe_instances') 144 | pages = instance_paginator.paginate( 145 | Filters=[ 146 | { 147 | 'Name': 'instance-state-name', 148 | 'Values': [ 149 | 'pending', 150 | 'running', 151 | ], 152 | }, 153 | { 154 | 'Name': 'tenancy', 155 | 'Values': [ 156 | 'dedicated', 157 | 'default', 158 | ], 159 | } 160 | ] 161 | ) 162 | reservations = itertools.chain.from_iterable( 163 | p['Reservations'] for p in pages) 164 | instances = itertools.chain.from_iterable( 165 | r['Instances'] for r in reservations) 166 | return [ 167 | InstanceTypeWithProfile( 168 | profile=profile, 169 | instance_type=InstanceType( 170 | size=i['InstanceType'], 171 | availability_zone=i['Placement']['AvailabilityZone'], 172 | tenancy=i['Placement']['Tenancy'], 173 | product=get_instance_type(i.get('Platform', 'Linux/UNIX')), 174 | vpc=i.get('VpcId', '') != '', 175 | ) 176 | ) 177 | for i in instances 178 | if i.get('InstanceLifecycle', 'ondemand') == 'ondemand' 179 | ] 180 | 181 | 182 | def get_ec2_type_offerings(ec2, instance_type): 183 | offerings = itertools.chain.from_iterable( 184 | page['ReservedInstancesOfferings'] 185 | for page in 186 | ec2.get_paginator('describe_reserved_instances_offerings').paginate( 187 | IncludeMarketplace=False, 188 | InstanceTenancy=instance_type.tenancy, 189 | ProductDescription=instance_type.product, 190 | Filters=[ 191 | { 192 | 'Name': 'instance-type', 193 | 'Values': [instance_type.size], 194 | }, 195 | ], 196 | ) 197 | ) 198 | try: 199 | offerings = sorted(offerings, 200 | key=reserved_instance_offering_cost_per_hour) 201 | except botocore.exceptions.ClientError: 202 | # Handling api limits 203 | return get_ec2_type_offerings(ec2, instance_type) 204 | try: 205 | offering_best = offerings[0] 206 | offering_worst = offerings[-1] 207 | except IndexError: 208 | return None 209 | ondemand = next( 210 | c 211 | for c in compute_instance_costs 212 | if ( 213 | c['attributes']['instanceType'] == instance_type.size 214 | and c['attributes']['location'] == compute_sheet_region.get( 215 | az_to_region(instance_type.availability_zone), 216 | az_to_region(instance_type.availability_zone)) 217 | and c['attributes']['tenancy'] == compute_sheet_tenancy[ 218 | instance_type.tenancy] 219 | and c['attributes']['operatingSystem'] == 220 | compute_sheet_platform[instance_type.product] 221 | ) 222 | )['cost'] 223 | res = InstanceOffering( 224 | type=instance_type, 225 | cost_reserved_worst=reserved_instance_offering_cost_per_hour( 226 | offering_worst), 227 | cost_reserved_best=reserved_instance_offering_cost_per_hour( 228 | offering_best), 229 | cost_ondemand=ondemand, 230 | ) 231 | return res 232 | 233 | 234 | def instance_type_matches(pattern, example): 235 | def get_generic_type(instancetype): 236 | if instancetype.lower().startswith( 237 | 'windows') or instancetype.lower().startswith('suse'): 238 | return instancetype 239 | return 'Linux/UNIX' 240 | 241 | tmpPattern = pattern.type._replace( 242 | product=get_generic_type(pattern.type.product)) 243 | tmpExample = example._replace( 244 | product=get_generic_type(pattern.type.product)) 245 | if example.vpc == True: 246 | return (tmpPattern == example or tmpPattern == example._replace( 247 | vpc=False) or 248 | tmpPattern == tmpExample._replace(vpc=False, 249 | availability_zone=az_to_region( 250 | example.availability_zone)) or 251 | tmpPattern == tmpExample._replace( 252 | availability_zone=az_to_region(example.availability_zone))) 253 | else: 254 | return (tmpPattern == example or tmpPattern == example._replace( 255 | availability_zone=az_to_region(example.availability_zone)) or 256 | tmpPattern == tmpExample._replace(vpc=True) or 257 | tmpPattern == tmpExample._replace(vpc=True, 258 | availability_zone=az_to_region( 259 | example.availability_zone))) 260 | 261 | 262 | def get_instance_matchings(offerings, reservations): 263 | instance_offerings_counted = [ 264 | InstanceOfferingCount( 265 | instance_offering=instance_offering, 266 | count=count, 267 | count_reserved=0, 268 | ) 269 | for instance_offering, count in offerings.items() 270 | ] 271 | remaining_reserved_instances = [ 272 | [ri, count] 273 | for ri, count in reservations.items() 274 | ] 275 | matched_instances = [] 276 | for oi in sorted(instance_offerings_counted, reverse=True, 277 | key=lambda x: x.instance_offering.type.availability_zone[ 278 | ::-1]): 279 | matching_reserved = ( 280 | rri 281 | for rri in sorted(remaining_reserved_instances, reverse=True, 282 | key=lambda i: i[0].type.availability_zone[::-1]) 283 | if rri[1] > 0 and instance_type_matches(rri[0], oi.instance_offering.type) 284 | ) 285 | reserved = 0 286 | while reserved < oi.count: 287 | try: 288 | ri = next(matching_reserved) 289 | except StopIteration: 290 | break 291 | use = min(ri[1], oi.count - reserved) 292 | ri[1] -= use 293 | reserved += use 294 | matched_instances.append(oi._replace(count_reserved=reserved)) 295 | reservation_usage = [ 296 | InstanceReservationCount( 297 | instance_reservation=ri, 298 | count=reservations[ri], 299 | count_used=reservations[ri] - remaining, 300 | ) 301 | for [ri, remaining] in remaining_reserved_instances 302 | ] 303 | return matched_instances, reservation_usage 304 | 305 | 306 | def get_ec2_reservations(profiles, region): 307 | reservations = collections.defaultdict(int) 308 | for profile in profiles: 309 | print('[{} - {}] Getting reserved instances...'.format(profile, region)) 310 | ec2 = boto_session_getter(profile, region) 311 | reserved_instances = get_reserved_instances(ec2, region) 312 | for ri in reserved_instances: 313 | reservations[ri.instance_reservation] += ri.count 314 | return reservations 315 | 316 | 317 | def get_ec2_instances(profiles, region): 318 | instances = collections.defaultdict(int) 319 | for profile in profiles: 320 | print('[{} - {}] Getting on-demand instances...'.format(profile, region)) 321 | ec2 = boto_session_getter(profile, region) 322 | instance_types = get_ondemand_instance_types(ec2, profile) 323 | for it in instance_types: 324 | instances[it] += 1 325 | return instances 326 | 327 | 328 | def get_ec2_offerings(instances, region, profiles): 329 | with multiprocessing.pool.ThreadPool(processes=4) as pool: 330 | offerings = collections.defaultdict(int) 331 | tasks = [] 332 | print('[global - {}] Getting offerings for all instances...'.format(region)) 333 | for instance, count in instances.items(): 334 | ec2 = boto_session_getter(instance.profile, region) 335 | tasks.append({ 336 | 'profile': [instance.profile], 337 | 'remaining_profiles': [p for p in profiles if p != instance.profile], 338 | 'instance_type': instance.instance_type, 339 | 'instance_count': count, 340 | 'task': pool.apply_async(get_ec2_type_offerings, 341 | [ec2, instance.instance_type]), 342 | }) 343 | for i, task in zip(itertools.count(1), tasks): 344 | if len(task['profile']) == 1: 345 | print('[{} - {}] Getting offerings for instance {}/{}...'.format( 346 | task['profile'][0], region, i, len(instances))) 347 | offering = task['task'].get() 348 | if offering: 349 | offerings[offering] += task['instance_count'] 350 | elif len(task['remaining_profiles']): 351 | ec2 = boto_session_getter(task['remaining_profiles'][0], region) 352 | new_task = task.copy() 353 | new_task['task'] = pool.apply_async(get_ec2_type_offerings, [ec2, new_task['instance_type']]) 354 | new_task['profile'].append(new_task['remaining_profiles'][0]) 355 | new_task['remaining_profiles'].pop(0) 356 | tasks.append(new_task) 357 | return offerings 358 | 359 | 360 | def get_ec2_data(profiles, region): 361 | reservations = get_ec2_reservations(profiles, region) 362 | instances = get_ec2_instances(profiles, region) 363 | offerings = get_ec2_offerings(instances, region, profiles) 364 | print('[global - {}] Matching on-demand instances with reserved instances...'.format(region)) 365 | matched_instances, reservation_usage = get_instance_matchings(offerings, 366 | reservations) 367 | print('[global - {}] Done!'.format(region)) 368 | return matched_instances, reservation_usage 369 | 370 | 371 | def write_matched_instances(f, matched_instances, header=True): 372 | writer = csv.DictWriter(f, fieldnames=[ 373 | 'instance_type', 374 | 'availability_zone', 375 | 'tenancy', 376 | 'product', 377 | 'cost_ondemand', 378 | 'cost_reserved_worst', 379 | 'cost_reserved_best', 380 | 'count', 381 | 'count_reserved', 382 | ]) 383 | if header: 384 | writer.writeheader() 385 | for mi in matched_instances: 386 | writer.writerow({ 387 | 'instance_type': mi.instance_offering.type.size, 388 | 'availability_zone': mi.instance_offering.type.availability_zone, 389 | 'tenancy': mi.instance_offering.type.tenancy, 390 | 'product': mi.instance_offering.type.product, 391 | 'cost_ondemand': mi.instance_offering.cost_ondemand, 392 | 'cost_reserved_worst': mi.instance_offering.cost_reserved_worst, 393 | 'cost_reserved_best': mi.instance_offering.cost_reserved_best, 394 | 'count': mi.count, 395 | 'count_reserved': mi.count_reserved, 396 | }) 397 | 398 | 399 | def write_reservation_usage(f, reservation_usage, header=True): 400 | writer = csv.DictWriter(f, fieldnames=[ 401 | 'instance_type', 402 | 'availability_zone', 403 | 'tenancy', 404 | 'product', 405 | 'cost_hourly', 406 | 'cost_upfront', 407 | 'count', 408 | 'count_used', 409 | ]) 410 | if header: 411 | writer.writeheader() 412 | for ru in reservation_usage: 413 | writer.writerow({ 414 | 'instance_type': ru.instance_reservation.type.size, 415 | 'availability_zone': ru.instance_reservation.type.availability_zone, 416 | 'tenancy': ru.instance_reservation.type.tenancy, 417 | 'product': ru.instance_reservation.type.product, 418 | 'cost_hourly': ru.instance_reservation.cost_hourly, 419 | 'cost_upfront': ru.instance_reservation.cost_upfront, 420 | 'count': ru.count, 421 | 'count_used': ru.count_used, 422 | }) 423 | 424 | 425 | if __name__ == '__main__': 426 | parser = argparse.ArgumentParser() 427 | parser.add_argument('--region', help='aws region', required=True) 428 | parser.add_argument('--profile', help='aws profile', required=True, nargs='+') 429 | args = parser.parse_args() 430 | matched_instances, reservation_usage = get_ec2_data(args.profile, 431 | args.region) 432 | with open('{}/{}.csv'.format(DIR_INSTANCE_RESERVATION_USAGE, args.region), 433 | 'w') as f: 434 | write_matched_instances(f, matched_instances) 435 | with open('{}/{}.csv'.format(DIR_RESERVATION_USAGE, args.region), 'w') as f: 436 | write_reservation_usage(f, reservation_usage) 437 | -------------------------------------------------------------------------------- /src/get_ec2_instance_history.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import collections 4 | import csv 5 | import functools 6 | import itertools 7 | import json 8 | 9 | import utils 10 | 11 | USAGECOST_DIR='in/usagecost' 12 | OUT_PATH = 'out/instance-history.csv' 13 | USAGE = 'BoxUsage' 14 | 15 | def updated(base, addend): 16 | base = base.copy() 17 | base.update(addend) 18 | return base 19 | 20 | with utils.csv_folder(USAGECOST_DIR) as records: 21 | box_usage_records = ( 22 | record 23 | for record in records 24 | if USAGE in record['lineItem/UsageType'] 25 | ) 26 | simplified_lineitems = ( 27 | ( 28 | record['product/instanceType'], 29 | round(float(record['lineItem/UsageAmount'])) if record['lineItem/UsageAmount'] else 0, 30 | record['lineItem/UsageStartDate']) 31 | for record in box_usage_records 32 | ) 33 | 34 | histogram = { 35 | date: updated( 36 | collections.defaultdict(int), 37 | { 38 | instancetype: sum( 39 | lineitem[1] for lineitem in instancetype_lineitems 40 | ) 41 | for instancetype, instancetype_lineitems in itertools.groupby( 42 | sorted( 43 | date_lineitems, 44 | key=lambda x: x[0], 45 | ), 46 | key=lambda x: x[0], 47 | ) 48 | } 49 | ) 50 | for date, date_lineitems in itertools.groupby( 51 | sorted( 52 | simplified_lineitems, 53 | key=lambda x: x[2], 54 | ), 55 | key=lambda x: x[2], 56 | ) 57 | } 58 | 59 | instance_types = sorted( 60 | functools.reduce( 61 | lambda x, y: x.union(y), 62 | ( 63 | date.keys() 64 | for date in histogram.values() 65 | ), 66 | frozenset(), 67 | ) 68 | ) 69 | 70 | with open(OUT_PATH, 'w') as outfile: 71 | writer = csv.DictWriter(outfile, fieldnames=['date', *instance_types]) 72 | writer.writeheader() 73 | for date in sorted(histogram.keys()): 74 | writer.writerow(updated( 75 | collections.defaultdict(int), 76 | { 'date': date, **histogram[date] } 77 | )) 78 | -------------------------------------------------------------------------------- /src/get_ec2_metadata.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import boto3 4 | import itertools 5 | import collections 6 | import csv 7 | import sys 8 | import datetime 9 | import json 10 | import collections 11 | import re 12 | from dateutil.tz import tzutc 13 | 14 | DIR_INSTANCE_METADATA = 'out/instance-metadata' 15 | 16 | REGION = boto3._get_default_session().region_name 17 | ACCOUNT = boto3.client('sts').get_caller_identity()['Account'] 18 | 19 | 20 | def safe_list_get(l, idx, default): 21 | try: 22 | return l[idx] 23 | except IndexError: 24 | return default 25 | 26 | 27 | def get_ec2_metadata(ec2, region): 28 | print("[{} - {}] Getting instances metadata...".format(ACCOUNT, REGION)) 29 | instances_pag = ec2.get_paginator('describe_instances') 30 | metadata = [ 31 | { 32 | 'instance_id': i.get('InstanceId', ''), 33 | 'name': safe_list_get([v['Value'] for v in i.get('Tags', []) if v['Key'] == 'Name'], 0, ''), 34 | 'ebs': ','.join([e.get('Ebs', {}).get('VolumeId', '') for e in i.get('BlockDeviceMappings', [])]), 35 | } 36 | for p in instances_pag.paginate() 37 | for r in p['Reservations'] 38 | for i in r['Instances'] 39 | ] 40 | print('[{} - {}] Done!'.format(ACCOUNT, REGION)) 41 | return metadata 42 | 43 | 44 | def write_instances_metadata(f, reservation_usage): 45 | writer = csv.DictWriter(f, fieldnames=[ 46 | 'instance_id', 47 | 'name', 48 | 'ebs', 49 | ]) 50 | writer.writeheader() 51 | for m in metadata: 52 | writer.writerow(m) 53 | 54 | 55 | if __name__ == '__main__': 56 | ec2 = boto3.client('ec2') 57 | metadata = get_ec2_metadata( 58 | ec2, REGION) 59 | with open('{}/{}.{}.csv'.format(DIR_INSTANCE_METADATA, ACCOUNT, REGION), 'w') as f: 60 | write_instances_metadata(f, metadata) 61 | -------------------------------------------------------------------------------- /src/get_ec2_recommendations.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import collections 4 | import datetime 5 | import re 6 | import csv 7 | 8 | import boto3 9 | 10 | # Normalization factors can be found at 11 | # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ri-modifying.html#ri-modification-instancemove 12 | # Authorized family can be found at 13 | # https://aws.amazon.com/ec2/pricing/on-demand 14 | INSTANCE_META = collections.OrderedDict([ 15 | ('nano' , [1 , ["t2"]]) , 16 | ('micro' , [2 , ["t2", "t1"]]) , 17 | ('small' , [1 * 4 , ["t2", "m1"]]) , 18 | ('medium' , [2 * 4 , ["t2", "m1", "m3", "c1"]]) , 19 | ('large' , [4 * 4 , ["t2", "m5", "m4", "c5", "c4", "r4", "i3", "m1", "m3", "c3", "r3"]]) , 20 | ('xlarge' , [8 * 4 , ["t2", "m5", "m4", "c5", "c4", "p2", "x1e", "r4", "i3", "d2", "m1", "m3", "c1", "c3", "m2", "r3", "i2"]]), 21 | ('2xlarge' , [16 * 4 , ["t2", "m5", "m4", "c5", "c4", "p3", "x1e", "r4", "i3", "h1", "d2", "m3", "c3", "g2", "m2", "r3", "i2"]]), 22 | ('4xlarge' , [32 * 4 , ["m5", "m4", "c5", "c4", "g3", "x1e", "r4", "i3", "h1", "d2", "c3", "m2", "r3", "i2"]]) , 23 | ('8xlarge' , [64 * 4 , ["c4", "p2", "p3", "g3", "x1e", "r4", "i3", "h1", "d2", "cc2", "c3", "g2", "cr1", "r3", "i2", "hs1"]]) , 24 | ('9xlarge' , [72 * 4 , ["c5"]]) , 25 | ('10xlarge' , [80 * 4 , ["m4"]]) , 26 | ('12xlarge' , [96 * 4 , ["m5"]]) , 27 | ('16xlarge' , [128 * 4, ["m4", "p2", "p3", "g3", "x1", "x1e", "r4", "i3", "h1"]]) , 28 | ('18xlarge' , [144 * 4, ["c5"]]) , 29 | ('24xlarge' , [192 * 4, ["m5"]]) , 30 | ('32xlarge' , [256 * 4, ["x1", "x1e"]]) , 31 | ]) 32 | 33 | TARGET_CPU_USAGE = 0.80 34 | CPU_USAGE_INTERVAL = datetime.timedelta(hours=24) 35 | CPU_USAGE_INTERVAL_SECOND = CPU_USAGE_INTERVAL.days * 24 * 3600 + CPU_USAGE_INTERVAL.seconds 36 | DIR_RECOMMENDATION = 'out/instance-size-recommendation' 37 | 38 | REGION=boto3._get_default_session().region_name 39 | ACCOUNT=boto3.client('sts').get_caller_identity()['Account'] 40 | 41 | InstanceSize = collections.namedtuple('InstanceSize', ['family', 'size']) 42 | InstanceRecommendation = collections.namedtuple('InstanceRecommendation', [ 43 | 'account', 44 | 'id', 45 | 'name', 46 | 'size', 47 | 'lifecycle', 48 | 'cpu_usage', 49 | 'recommendation', 50 | 'saving', 51 | 'reason', 52 | ]) 53 | 54 | def next_or(it, default): 55 | try: 56 | return next(it) 57 | except StopIteration: 58 | return default 59 | 60 | def next_or_none(it): 61 | return next_or(it, None) 62 | 63 | _str_to_instance_size_re = re.compile(r'([a-z]+[0-9])\.(nano|micro|small|medium|(?:[0-9]*x?large))') 64 | def str_to_instance_size(s): 65 | m = _str_to_instance_size_re.match(s) 66 | if m: 67 | return InstanceSize( 68 | family=m.group(1), 69 | size=m.group(2), 70 | ) 71 | 72 | def instance_size_to_str(instance_size): 73 | return '{}.{}'.format(*instance_size) 74 | 75 | def recommended_size(instance_type, cpu_usage): 76 | current_norm_factor = INSTANCE_META[instance_type.size][0] 77 | cpu_delta = cpu_usage / TARGET_CPU_USAGE 78 | target_norm_factor = cpu_delta * current_norm_factor 79 | matching_norm_factor = next(size for size, meta in INSTANCE_META.items() if meta[0] >= target_norm_factor and instance_type.family in meta[1]) 80 | return matching_norm_factor 81 | 82 | def get_reason(cpu_usage, current_size, recommendation): 83 | if cpu_usage is None: 84 | return 'insufficient_data' 85 | elif cpu_usage > 0.80: 86 | return 'High CPU usage average: {0:.3f}%'.format(cpu_usage*100) 87 | elif current_size == recommendation: 88 | return 'Optimal CPU usage average' 89 | return 'Low CPU usage average: {0:.3f}%'.format(cpu_usage*100) 90 | 91 | def get_saving(cpu_usage, current_size, recommendation): 92 | current_norm_factor = INSTANCE_META[current_size][0] 93 | recommended_norm_factor = INSTANCE_META.get(recommendation, [0])[0] 94 | if cpu_usage is None or current_norm_factor == 0 or recommended_norm_factor == 0: 95 | return '0%' 96 | else: 97 | return '{0:.1f}%'.format(100 - ((recommended_norm_factor * 100) / current_norm_factor)) 98 | 99 | 100 | def get_cpu_usage(cloudwatch, now, instance_id): 101 | usage_statistics = cloudwatch.get_metric_statistics( 102 | Namespace='AWS/EC2', 103 | MetricName='CPUUtilization', 104 | Dimensions=[ 105 | { 'Name': 'InstanceId', 'Value': instance_id }, 106 | ], 107 | StartTime=now - CPU_USAGE_INTERVAL, 108 | EndTime=now, 109 | Period=CPU_USAGE_INTERVAL_SECOND, 110 | Statistics=['Average'] 111 | ) 112 | try: 113 | return usage_statistics['Datapoints'][0]['Average'] / 100 114 | except IndexError: 115 | return None 116 | 117 | def get_recommendation(instance): 118 | instance_type_str = instance['InstanceType'] 119 | instance_type = str_to_instance_size(instance_type_str) 120 | instance_id = instance['InstanceId'] 121 | instance_name = next_or((tag['Value'] for tag in instance.get('Tags', []) if tag['Key'] == 'Name'), '') 122 | instance_lifecycle = instance.get('InstanceLifecycle', 'ondemand') 123 | cpu_usage = get_cpu_usage(cloudwatch, now, instance_id) 124 | recommendation = recommended_size(instance_type, cpu_usage) if cpu_usage is not None else 'insufficient_data' 125 | reason = get_reason(cpu_usage, instance_type.size, recommendation) 126 | saving = get_saving(cpu_usage, instance_type.size, recommendation) 127 | return InstanceRecommendation( 128 | id=instance_id, 129 | name=instance_name, 130 | size=instance_type_str, 131 | lifecycle=instance_lifecycle, 132 | cpu_usage=cpu_usage or "", 133 | recommendation=recommendation, 134 | reason=reason, 135 | saving=saving, 136 | account=ACCOUNT, 137 | ) 138 | 139 | def main(ec2, cloudwatch, now): 140 | instances = ( 141 | instance 142 | for page in ec2.get_paginator('describe_instances').paginate() 143 | for reservation in page['Reservations'] 144 | for instance in reservation['Instances'] 145 | ) 146 | recommendations = ( 147 | get_recommendation(instance) 148 | for instance in instances 149 | ) 150 | recommendations = sorted(recommendations, key=lambda r: (r.name, r.size)) 151 | with open('{}/{}.{}.csv'.format(DIR_RECOMMENDATION, ACCOUNT, REGION), 'w') as f: 152 | writer = csv.writer(f) 153 | writer.writerow(InstanceRecommendation._fields) 154 | for recommendation in recommendations: 155 | writer.writerow(recommendation) 156 | 157 | if __name__ == '__main__': 158 | ec2 = boto3.client('ec2') 159 | cloudwatch = boto3.client('cloudwatch') 160 | now = datetime.datetime.now() 161 | main(ec2, cloudwatch, now) 162 | -------------------------------------------------------------------------------- /src/get_last_month_ebs_cost.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import csv 4 | from datetime import datetime 5 | from collections import defaultdict 6 | import dateutil.relativedelta 7 | import json 8 | import re 9 | 10 | import utils 11 | 12 | USAGECOST_DIR='in/usagecost' 13 | METADATA_DIR='out/instance-metadata' 14 | OUT_PATH_EBS = 'out/last-month/ebs.csv' 15 | OUT_PATH_SNAPSHOTS = 'out/last-month/snapshots.csv' 16 | 17 | BEGIN_LAST_MONTH = (datetime.now() + dateutil.relativedelta.relativedelta(months=-1)).replace(day=1, hour=0, minute=0, second=0, microsecond=0) 18 | END_LAST_MONTH = (BEGIN_LAST_MONTH + dateutil.relativedelta.relativedelta(months=1, days=-1)).replace(hour=23, minute=59, second=59, microsecond=999999) 19 | 20 | with utils.csv_folder(METADATA_DIR) as records: 21 | ebs_match = re.compile(r"((vol-[0-9a-fA-F]*),?)*") 22 | ebs_links = defaultdict(lambda: ("","")) 23 | for record in records: 24 | for ebs in ebs_match.match(record['ebs']).group(0).split(','): 25 | ebs_links[ebs] = (record['instance_id'], record['name']) 26 | 27 | with utils.csv_folder(USAGECOST_DIR) as records: 28 | resource_id_missing = False 29 | ebs_usage_records = defaultdict(float) 30 | snapshot_usage_records = defaultdict(float) 31 | for record in records: 32 | if 'lineItem/ResourceId' not in record: 33 | if resource_id_missing == False: 34 | print("Error: the billing report does not export the ResourceId") 35 | resource_id_missing = True 36 | continue 37 | if 'EBS' in record['lineItem/UsageType'] and 'EBSOptimized' not in record['lineItem/UsageType']: 38 | usage_start_date = datetime.strptime(record['lineItem/UsageStartDate'], '%Y-%m-%dT%H:%M:%SZ') 39 | if usage_start_date >= BEGIN_LAST_MONTH and usage_start_date <= END_LAST_MONTH: 40 | if 'Snapshot' not in record['lineItem/UsageType']: 41 | ebs_usage_records[(record['lineItem/UsageAccountId'], record['lineItem/ResourceId'], record['product/region'])] += float(record['lineItem/UnblendedCost']) 42 | elif 'Snapshot' in record['lineItem/UsageType']: 43 | snapshot_usage_records[(record['lineItem/UsageAccountId'], record['lineItem/ResourceId'])] += float(record['lineItem/UnblendedCost']) 44 | 45 | with open(OUT_PATH_EBS, 'w') as outfile: 46 | writer = csv.writer(outfile) 47 | writer.writerow(['Account', 'ResourceId', 'Region', 'Cost', 'InstanceId', 'InstanceName']) 48 | for ebs in sorted(ebs_usage_records.keys(), key=lambda tup: ebs_usage_records[tup], reverse=True): 49 | writer.writerow([ 50 | ebs[0], 51 | ebs[1], 52 | ebs[2], 53 | repr(ebs_usage_records[ebs]), 54 | ebs_links[ebs[1]][0], 55 | ebs_links[ebs[1]][1], 56 | ]) 57 | 58 | with open(OUT_PATH_SNAPSHOTS, 'w') as outfile: 59 | writer = csv.writer(outfile) 60 | writer.writerow(['Account', 'ResourceId', 'Cost']) 61 | for rid in sorted(snapshot_usage_records.keys(), key=lambda rid: snapshot_usage_records[rid], reverse=True): 62 | writer.writerow([ 63 | rid[0], 64 | rid[1], 65 | repr(snapshot_usage_records[rid]), 66 | ]) 67 | -------------------------------------------------------------------------------- /src/get_last_month_ec2_cost.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import csv 4 | from datetime import datetime 5 | from collections import defaultdict 6 | import dateutil.relativedelta 7 | 8 | import utils 9 | 10 | USAGECOST_DIR='in/usagecost' 11 | METADATA_DIR='out/instance-metadata' 12 | OUT_PATH_INSTANCES = 'out/last-month/ec2_instances.csv' 13 | OUT_PATH_BANDWIDTH = 'out/last-month/ec2_bandwidth.csv' 14 | 15 | BEGIN_LAST_MONTH = (datetime.now() + dateutil.relativedelta.relativedelta(months=-1)).replace(day=1, hour=0, minute=0, second=0, microsecond=0) 16 | END_LAST_MONTH = (BEGIN_LAST_MONTH + dateutil.relativedelta.relativedelta(months=1, days=-1)).replace(hour=23, minute=59, second=59, microsecond=999999) 17 | 18 | with utils.csv_folder(METADATA_DIR) as records: 19 | instance_name = defaultdict(str) 20 | for record in records: 21 | instance_name[record['instance_id']] = record['name'] 22 | 23 | with utils.csv_folder(USAGECOST_DIR) as records: 24 | resource_id_missing = False 25 | instance_usage_records = defaultdict(float) 26 | bandwidth_usage_records = defaultdict(float) 27 | for record in records: 28 | if 'lineItem/ResourceId' not in record: 29 | if resource_id_missing == False: 30 | print("Error: the billing report does not export the ResourceId") 31 | resource_id_missing = True 32 | continue 33 | if record['lineItem/ProductCode'] == 'AmazonEC2': 34 | usage_start_date = datetime.strptime(record['lineItem/UsageStartDate'], '%Y-%m-%dT%H:%M:%SZ') 35 | if usage_start_date >= BEGIN_LAST_MONTH and usage_start_date <= END_LAST_MONTH: 36 | if 'BoxUsage' in record['lineItem/UsageType']: 37 | instance_usage_records[(record['lineItem/UsageAccountId'], record['lineItem/ResourceId'], record['lineItem/AvailabilityZone'], record['pricing/term'], record['product/instanceType'])] += float(record['lineItem/UnblendedCost']) 38 | elif 'DataTransfer' in record['lineItem/UsageType']: 39 | bandwidth_usage_records[record['lineItem/ResourceId']] += float(record['lineItem/UnblendedCost']) 40 | 41 | with open(OUT_PATH_INSTANCES, 'w') as outfile: 42 | writer = csv.writer(outfile) 43 | writer.writerow(['Account', 'ResourceId', 'Name', 'AvailabilityZone', 'Term', 'Type', 'Cost']) 44 | for instance in sorted(instance_usage_records.keys(), key=lambda tup: instance_usage_records[tup], reverse=True): 45 | writer.writerow([ 46 | instance[0], 47 | instance[1], 48 | instance_name[instance[1]], 49 | instance[2], 50 | instance[3], 51 | instance[4], 52 | repr(instance_usage_records[instance]), 53 | ]) 54 | 55 | with open(OUT_PATH_BANDWIDTH, 'w') as outfile: 56 | writer = csv.writer(outfile) 57 | writer.writerow(['ResourceId', 'Bandwidth']) 58 | for instance in sorted(bandwidth_usage_records.keys(), key=lambda instance: bandwidth_usage_records[instance], reverse=True): 59 | writer.writerow([ 60 | instance, 61 | repr(bandwidth_usage_records[instance]), 62 | ]) 63 | -------------------------------------------------------------------------------- /src/get_s3_cost.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import csv 4 | from datetime import datetime 5 | from collections import defaultdict 6 | import dateutil.relativedelta 7 | 8 | import utils 9 | 10 | USAGECOST_DIR='in/usagecost' 11 | OUT_PATH_S3 = 'out/s3/current_usage.csv' 12 | 13 | BEGIN_LAST_MONTH = (datetime.now() + dateutil.relativedelta.relativedelta(months=-1)).replace(day=1, hour=0, minute=0, second=0, microsecond=0) 14 | BEGIN_CURRENT_MONTH = datetime.now().replace(day=1, hour=0, minute=0, second=0, microsecond=0) 15 | 16 | def get_simplified_cost_name(record): 17 | if 'TimedStorage' in record.get('lineItem/UsageType', ''): 18 | return 'storage_cost' 19 | elif record.get('product/servicecode', '') == 'AWSDataTransfer': 20 | return 'bandwidth_cost' 21 | elif 'Requests' in record.get('lineItem/UsageType', ''): 22 | return 'requests_cost' 23 | return None 24 | 25 | with utils.csv_folder(USAGECOST_DIR) as records: 26 | resource_id_missing = False 27 | s3_usage = defaultdict(lambda:dict(usage_gb_month=0.0, storage_cost=0.0, bandwidth_cost=0.0, requests_cost=0.0, last_month_cost=0.0)) 28 | for record in records: 29 | if 'lineItem/ResourceId' not in record: 30 | if resource_id_missing == False: 31 | print("Error: the billing report does not export the ResourceId") 32 | resource_id_missing = True 33 | continue 34 | if record['lineItem/ProductCode'] == 'AmazonS3': 35 | usage_start_date = datetime.strptime(record['lineItem/UsageStartDate'], '%Y-%m-%dT%H:%M:%SZ') 36 | if usage_start_date >= BEGIN_LAST_MONTH: 37 | simplified_cost_name = get_simplified_cost_name(record) 38 | if simplified_cost_name is not None: 39 | if usage_start_date >= BEGIN_CURRENT_MONTH: 40 | s3_usage[record.get('lineItem/ResourceId', '')][simplified_cost_name] += float(record['lineItem/UnblendedCost']) 41 | if simplified_cost_name == 'storage_cost': 42 | s3_usage[record.get('lineItem/ResourceId', '')]['usage_gb_month'] += float(record['lineItem/UsageAmount']) 43 | else: 44 | s3_usage[record.get('lineItem/ResourceId', '')]['last_month_cost'] += float(record['lineItem/UnblendedCost']) 45 | 46 | with open(OUT_PATH_S3, 'w') as outfile: 47 | writer = csv.writer(outfile) 48 | writer.writerow(['Bucket', 'Usage-GB-Month', 'StorageCost', 'BandwidthCost', 'RequestsCost', 'CurrentTotal', 'LastMonthTotal']) 49 | for bucket in sorted(list(s3_usage.keys()), key=lambda resid: s3_usage[resid]['last_month_cost'], reverse=True): 50 | writer.writerow([ 51 | bucket, 52 | s3_usage[bucket]['usage_gb_month'], 53 | s3_usage[bucket]['storage_cost'], 54 | s3_usage[bucket]['bandwidth_cost'], 55 | s3_usage[bucket]['requests_cost'], 56 | s3_usage[bucket]['storage_cost'] + s3_usage[bucket]['bandwidth_cost'] + s3_usage[bucket]['requests_cost'], 57 | s3_usage[bucket]['last_month_cost'], 58 | ]) 59 | -------------------------------------------------------------------------------- /src/make_sheet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import collections 4 | import csv 5 | import itertools 6 | import json 7 | import os 8 | import pprint 9 | 10 | from apiclient import discovery 11 | from oauth2client import client 12 | from oauth2client import tools 13 | from oauth2client.file import Storage 14 | import httplib2 15 | 16 | from sheets import * 17 | import utils 18 | 19 | try: 20 | import argparse 21 | flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args() 22 | except ImportError: 23 | flags = None 24 | 25 | # If modifying these scopes, delete your previously saved credentials 26 | # at ~/.credentials/sheets.googleapis.com-python-quickstart.json 27 | SCOPES = ' '.join([ 28 | 'https://www.googleapis.com/auth/spreadsheets', 29 | 'https://www.googleapis.com/auth/drive', 30 | 'https://www.googleapis.com/auth/drive.file', 31 | ]) 32 | CLIENT_SECRET_FILE = 'client_secret.json' 33 | APPLICATION_NAME = 'Google Sheets API Python Quickstart' 34 | 35 | SHEET_RESERVATIONS_SUMMARY = 1 36 | 37 | PRETTY_FIELD_NAMES = { 38 | 'instance_type' : 'Instance type', 39 | 'availability_zone' : 'Availability zone', 40 | 'tenancy' : 'Tenancy', 41 | 'product' : 'Product', 42 | 'count' : 'Count', 43 | 'count_reserved' : 'Count (reserved)', 44 | 'cost_ondemand' : 'Cost (on demand)', 45 | 'cost_reserved_worst' : 'Cost (worst reserved)', 46 | 'cost_reserved_best' : 'Cost (best reserved)', 47 | } 48 | 49 | PRETTY_FIELD_GROUPS = { 50 | 'reservation': 'Reservation', 51 | 'hourly_cost_per_instance': 'Hourly cost per instance', 52 | } 53 | 54 | NUMFORMAT_CURRENCY = '#,##0.000 [$USD]' 55 | NUMFORMAT_PERCENT = '0.00%' 56 | NUMFORMAT_PERCENT_VAR = '\+0.00%;\-0.00%' 57 | 58 | IN_INSTANCE_RESERVATION_USAGE_DIR = 'out/instance-reservation-usage' 59 | IN_RESERVATION_USAGE_DIR = 'out/reservation-usage' 60 | IN_ABSOLUTE_COST_PER_MONTH = 'out/absolute.csv' 61 | IN_INSTANCE_SIZE_RECOMMENDATIONS_DIR = 'out/instance-size-recommendation' 62 | IN_INSTANCE_HISTORY = 'out/instance-history.csv' 63 | 64 | COLOR_RED_BG = { 'red': 0xFF/float(0xFF), 'green': 0xCC/float(0xFF), 'blue': 0xCC/float(0xFF) } 65 | COLOR_RED_FG = { 'red': 0xCC/float(0xFF), 'green': 0x00/float(0xFF), 'blue': 0x00/float(0xFF) } 66 | COLOR_GREEN_BG = { 'red': 0xCC/float(0xFF), 'green': 0xFF/float(0xFF), 'blue': 0xCC/float(0xFF) } 67 | COLOR_GREEN_FG = { 'red': 0x00/float(0xFF), 'green': 0x66/float(0xFF), 'blue': 0x00/float(0xFF) } 68 | 69 | def _with_trailing(it, trail): 70 | return itertools.chain(it, itertools.repeat(trail)) 71 | 72 | def get_credentials(): 73 | """Gets valid user credentials from storage. 74 | 75 | If nothing has been stored, or if the stored credentials are invalid, 76 | the OAuth2 flow is completed to obtain the new credentials. 77 | 78 | Returns: 79 | Credentials, the obtained credential. 80 | """ 81 | home_dir = os.path.expanduser('~') 82 | credential_dir = os.path.join(home_dir, '.credentials') 83 | if not os.path.exists(credential_dir): 84 | os.makedirs(credential_dir) 85 | credential_path = os.path.join(credential_dir, 86 | 'sheets.googleapis.com-python-quickstart.json') 87 | 88 | store = Storage(credential_path) 89 | credentials = store.get() 90 | if not credentials or credentials.invalid: 91 | flow = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES) 92 | flow.user_agent = APPLICATION_NAME 93 | if flags: 94 | credentials = tools.run_flow(flow, store, flags) 95 | else: # Needed only for compatibility with Python 2.6 96 | credentials = tools.run(flow, store) 97 | print('Storing credentials to ' + credential_path) 98 | return credentials 99 | 100 | def reserved_summary(): 101 | def cost_monthly(sheet, row, column, field): 102 | base_hourly = sheet.field_index('cost_ondemand') 103 | base_monthly = sheet.field_index('cost_monthly_ondemand') 104 | return Formula('={}*{}*720'.format( 105 | sheet.field_address('count', row, 2), 106 | sheet.address(base_hourly + (column - base_monthly), row), 107 | )) 108 | def savings_monthly(sheet, row, column, field): 109 | base_ondemand = sheet.field_index('cost_ondemand') 110 | base_reserved = sheet.field_index('cost_reserved_worst') 111 | base_savings = sheet.field_index('savings_reserved_worst') 112 | return Formula('=1-{}/{}'.format( 113 | sheet.address(base_reserved + (column - base_savings), row), 114 | sheet.field_address('cost_ondemand', row, 2), 115 | )) 116 | fields = ( 117 | FieldGroup('Reservation', ( 118 | Field('instance_type' , 'instance_type' , str , 'Instance type' , None) , 119 | Field('availability_zone' , 'availability_zone' , str , 'Availability zone' , None) , 120 | Field('tenancy' , 'tenancy' , str , 'Tenancy' , None) , 121 | Field('product' , 'product' , str , 'Product' , None) , 122 | )), 123 | Field( 'count' , 'count' , int , 'Count' , '0') , 124 | Field( 'count_reserved' , 'count_reserved' , int , 'Count (reserved)' , '0') , 125 | FieldGroup('Hourly cost per instance', ( 126 | Field('cost_ondemand' , 'cost_ondemand' , float , 'On demand' , NUMFORMAT_CURRENCY) , 127 | Field('cost_reserved_worst' , 'cost_reserved_worst' , float , 'Worst reserved' , NUMFORMAT_CURRENCY) , 128 | Field('cost_reserved_best' , 'cost_reserved_best' , float , 'Best reserved' , NUMFORMAT_CURRENCY) , 129 | )), 130 | FieldGroup('Monthly cost total', ( 131 | Field('cost_monthly_ondemand' , 'cost_monthly_ondemand' , cost_monthly , 'On demand' , NUMFORMAT_CURRENCY) , 132 | Field('cost_monthly_reserved_worst' , 'cost_monthly_reserved_worst' , cost_monthly , 'Worst reserved' , NUMFORMAT_CURRENCY) , 133 | Field('cost_monthly_reserved_best' , 'cost_monthly_reserved_best' , cost_monthly , 'Best reserved' , NUMFORMAT_CURRENCY) , 134 | )), 135 | FieldGroup('Savings over on demand', ( 136 | Field('savings_reserved_worst' , 'savings_reserved_worst' , savings_monthly , 'Worst reserved' , NUMFORMAT_PERCENT) , 137 | Field('savings_reserved_best' , 'savings_reserved_best' , savings_monthly , 'Best reserved' , NUMFORMAT_PERCENT) , 138 | )) 139 | ) 140 | conditional_format = ( 141 | ConditionalFormat('CUSTOM_FORMULA', '=(INDIRECT(ADDRESS(ROW(), COLUMN() - 1)) = INDIRECT(ADDRESS(ROW(), COLUMN())))', { 142 | 'backgroundColor': COLOR_GREEN_BG, 143 | 'textFormat': { 144 | 'foregroundColor': COLOR_GREEN_FG, 145 | }, 146 | }), 147 | ) 148 | with utils.csv_folder(IN_INSTANCE_RESERVATION_USAGE_DIR) as records: 149 | sheet = Sheet( 150 | source=records, 151 | fields=fields, 152 | sheet_id=1, 153 | fields_conditional_formats=tuple( 154 | ColumnConditionalFormat(column, conditional_format) 155 | for column in field_flatten(FieldRoot(fields)) if column.name == 'count_reserved' 156 | ) 157 | ) 158 | sheet.properties['title'] = 'Reserved instance summary' 159 | return sheet.to_dict() 160 | 161 | def _returns(value): 162 | def f(*args, **kwargs): 163 | return value 164 | return f 165 | 166 | def reservation_usage_summary(): 167 | def effective_cost(sheet, row, column, field): 168 | return Formula('={}/720+{}'.format( 169 | sheet.field_address('cost_upfront', row, 2), 170 | sheet.field_address('cost_hourly', row, 2), 171 | )) 172 | def monthly_losses(sheet, row, column, field): 173 | return Formula('({reserved}-{used})*{effective}*720'.format( 174 | reserved =sheet.field_address('count_reserved', row, 2), 175 | used =sheet.field_address('count_used', row, 2), 176 | effective=sheet.field_address('effective_cost', row, 2), 177 | )) 178 | fields = ( 179 | FieldGroup('Reservation', ( 180 | Field('instance_type' , 'instance_type' , str , 'Instance type' , None) , 181 | Field('availability_zone' , 'availability_zone' , str , 'Availability zone' , None) , 182 | Field('tenancy' , 'tenancy' , str , 'Tenancy' , None) , 183 | Field('product' , 'product' , str , 'Product' , None) , 184 | )), 185 | FieldGroup('Count', ( 186 | Field('count_reserved' , 'count' , int , 'Reserved' , None) , 187 | Field('count_used' , 'count_used' , int , 'Used' , None) , 188 | )), 189 | FieldGroup('Cost per instance', ( 190 | Field('cost_upfront' , 'cost_upfront' , float , 'Upfront' , NUMFORMAT_CURRENCY) , 191 | Field('cost_hourly' , 'cost_hourly' , float , 'Hourly' , NUMFORMAT_CURRENCY) , 192 | Field('effective_cost' , 'effective_cost' , effective_cost , 'Effective', NUMFORMAT_CURRENCY), 193 | )), 194 | Field( 'monthly_losses' , 'monthly_losses' , monthly_losses , 'Monthly losses', NUMFORMAT_CURRENCY), 195 | ) 196 | with utils.csv_folder(IN_RESERVATION_USAGE_DIR) as records: 197 | sheet = Sheet( 198 | source=records, 199 | fields=fields, 200 | sheet_id=3, 201 | ) 202 | sheet.properties['title'] = 'Reservation usage summary' 203 | return sheet.to_dict() 204 | 205 | def weekly_variations(): 206 | def variation(sheet, row, column, field): 207 | prev_address = sheet.address(column - 1, row) 208 | next_address = sheet.address(column + 1, row) 209 | return Formula('=IF({0}=0,"",{1}/{0}-1)'.format( 210 | prev_address, 211 | next_address, 212 | )) 213 | def total(sheet, row, column, field): 214 | cost_fields = [ 215 | f 216 | for f in sheet.fields_flat() if '_cost' in f.name 217 | ] 218 | return Formula('=SUM({})'.format( 219 | ','.join( 220 | sheet.field_address(f, row) 221 | for f in cost_fields 222 | ) 223 | )) 224 | with open(IN_ABSOLUTE_COST_PER_MONTH) as f: 225 | reader = csv.DictReader(f) 226 | fields = ( 227 | Field( 'usage' , 'usage' , str , 'Usage type' , None), 228 | FieldGroup('Monthly cost', tuple( 229 | FieldGroup(isoweek, 230 | ( 231 | ( 232 | Field(isoweek+'_var', isoweek, variation, 'Variation', NUMFORMAT_PERCENT_VAR), 233 | ) if not is_first_week else () 234 | ) + ( 235 | Field(isoweek+'_cost', isoweek, float , 'Cost' , NUMFORMAT_CURRENCY), 236 | ) 237 | ) 238 | for isoweek, is_first_week in zip(reader.fieldnames[1:], _with_trailing((True,), False)) 239 | )), 240 | Field('total', 'total', total, 'Total', NUMFORMAT_CURRENCY), 241 | ) 242 | variation_conditional_format = ( 243 | ConditionalFormat('NUMBER_GREATER', '0', { 244 | 'backgroundColor': COLOR_RED_BG, 245 | 'textFormat': { 246 | 'foregroundColor': COLOR_RED_FG, 247 | }, 248 | }), 249 | ConditionalFormat('NUMBER_LESS_THAN_EQ', '0', { 250 | 'backgroundColor': COLOR_GREEN_BG, 251 | 'textFormat': { 252 | 'foregroundColor': COLOR_GREEN_FG, 253 | }, 254 | }) 255 | ) 256 | variation_columns = ( 257 | f 258 | for f in field_flatten(FieldRoot(fields)) if '_var' in f.name 259 | ) 260 | source = sorted( 261 | reader, 262 | key=(lambda row: sum(float(v) for k, v in row.items() if k != 'usage')), 263 | reverse=True, 264 | ) 265 | sheet = Sheet( 266 | source=source, 267 | fields=fields, 268 | fields_conditional_formats=tuple( 269 | ColumnConditionalFormat(column, variation_conditional_format) 270 | for column in variation_columns 271 | ), 272 | sheet_id=2, 273 | ) 274 | sheet.properties['title'] = 'Cost variations' 275 | sheet_data = sheet.to_dict() 276 | return sheet_data 277 | 278 | INSTANCE_SIZES = [ 279 | 'nano', 280 | 'micro', 281 | 'small', 282 | 'medium', 283 | 'large', 284 | 'xlarge', 285 | '2xlarge', 286 | '4xlarge', 287 | '8xlarge', 288 | '9xlarge', 289 | '10xlarge', 290 | '12xlarge', 291 | '16xlarge', 292 | '18xlarge', 293 | '24xlarge', 294 | '32xlarge', 295 | ] 296 | 297 | def instance_history(): 298 | with open(IN_INSTANCE_HISTORY) as f: 299 | reader = csv.DictReader(f) 300 | fields = ( 301 | Field('date', 'date', str, 'Date', None), 302 | FieldGroup('Instance count', tuple( 303 | Field(instance_type, instance_type, int, instance_type, None) 304 | for instance_type in reader.fieldnames[1:] 305 | )), 306 | ) 307 | sheet = Sheet( 308 | source=reader, 309 | fields=fields, 310 | sheet_id=5 311 | ) 312 | sheet.properties['title'] = 'Instance count history' 313 | return sheet.to_dict() 314 | 315 | def instance_size_recommendations(): 316 | fields = ( 317 | FieldGroup('Instance', ( 318 | Field('account', 'account', str, 'Account', None), 319 | Field('id', 'id', str, 'ID', None), 320 | Field('name', 'name', str, 'Name', None), 321 | Field('size', 'size', str, 'Type', None), 322 | Field('lifecycle', 'lifecycle', str, 'Lifecycle', None), 323 | )), 324 | Field('recommendation', 'recommendation', str, 'Recommended', None), 325 | ) 326 | with utils.csv_folder(IN_INSTANCE_SIZE_RECOMMENDATIONS_DIR) as source: 327 | sheet = Sheet( 328 | source=source, 329 | fields=fields, 330 | sheet_id=4, 331 | ) 332 | sheet.properties['title'] = 'Instance size recommendations' 333 | return sheet.to_dict() 334 | 335 | def main(): 336 | """Shows basic usage of the Sheets API. 337 | 338 | Creates a Sheets API service object and prints the names and majors of 339 | students in a sample spreadsheet: 340 | https://docs.google.com/spreadsheets/d/1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms/edit 341 | """ 342 | credentials = get_credentials() 343 | http = credentials.authorize(httplib2.Http()) 344 | discoveryUrl = ('https://sheets.googleapis.com/$discovery/rest?' 345 | 'version=v4') 346 | service = discovery.build('sheets', 'v4', http=http, 347 | discoveryServiceUrl=discoveryUrl) 348 | 349 | reserved_summary_data = reserved_summary() 350 | weekly_variations_data = weekly_variations() 351 | reservation_usage_summary_data = reservation_usage_summary() 352 | instance_size_recommendations_data = instance_size_recommendations() 353 | instance_history_data = instance_history() 354 | 355 | body = { 356 | 'properties': { 357 | 'title': 'my generated spreadsheet', 358 | }, 359 | 'sheets': [ 360 | weekly_variations_data, 361 | reserved_summary_data, 362 | reservation_usage_summary_data, 363 | instance_size_recommendations_data, 364 | instance_history_data, 365 | ], 366 | } 367 | 368 | #print(json.dumps(body, indent=4)) 369 | 370 | spreadsheet = service.spreadsheets().create(body=body) 371 | 372 | print(spreadsheet) 373 | print(dir(spreadsheet)) 374 | print(spreadsheet.execute()) 375 | 376 | if __name__ == '__main__': 377 | main() 378 | -------------------------------------------------------------------------------- /src/make_xlsx.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import collections 4 | import csv 5 | import itertools 6 | import json 7 | import datetime 8 | import os 9 | import pprint 10 | from collections import defaultdict 11 | import xlsxwriter 12 | from datetime import datetime 13 | import dateutil.relativedelta 14 | import sys 15 | 16 | from sheets import * 17 | import utils 18 | 19 | SHEET_RESERVATIONS_SUMMARY = 1 20 | 21 | PRETTY_FIELD_NAMES = { 22 | 'instance_type': 'Instance type', 23 | 'availability_zone': 'Availability zone', 24 | 'tenancy': 'Tenancy', 25 | 'product': 'Product', 26 | 'count': 'Count', 27 | 'count_reserved': 'Count (reserved)', 28 | 'cost_ondemand': 'Cost (on demand)', 29 | 'cost_reserved_worst': 'Cost (worst reserved)', 30 | 'cost_reserved_best': 'Cost (best reserved)', 31 | } 32 | 33 | PRETTY_FIELD_GROUPS = { 34 | 'reservation': 'Reservation', 35 | 'hourly_cost_per_instance': 'Hourly cost per instance', 36 | } 37 | 38 | NUMFORMAT_CURRENCY = '#,##0.000 [$USD]' 39 | NUMFORMAT_PERCENT = '0.00%' 40 | NUMFORMAT_PERCENT_VAR = '\+0.00%;\-0.00%' 41 | 42 | IN_INSTANCE_RESERVATION_USAGE_DIR = 'out/instance-reservation-usage' 43 | IN_RESERVATION_USAGE_DIR = 'out/reservation-usage' 44 | IN_ABSOLUTE_COST_PER_MONTH = 'out/absolute.csv' 45 | IN_INSTANCE_SIZE_RECOMMENDATIONS_DIR = 'out/instance-size-recommendation' 46 | IN_INSTANCE_HISTORY = 'out/instance-history.csv' 47 | IN_INSTANCE_USAGE_LAST_MONTH = 'out/last-month/ec2_instances.csv' 48 | IN_EC2_BANDWIDTH_USAGE_LAST_MONTH = 'out/last-month/ec2_bandwidth.csv' 49 | IN_EBS_USAGE_LAST_MONTH = 'out/last-month/ebs.csv' 50 | IN_SNAPSHOT_USAGE_LAST_MONTH = 'out/last-month/snapshots.csv' 51 | IN_S3_COST = 'out/s3/current_usage.csv' 52 | 53 | COLOR_RED_BG = "#ffcccc" 54 | COLOR_RED_FG = "#cc0000" 55 | COLOR_GREEN_BG = "#ccffcc" 56 | COLOR_GREEN_FG = "#006600" 57 | 58 | 59 | def _with_trailing(it, trail): 60 | return itertools.chain(it, itertools.repeat(trail)) 61 | 62 | 63 | def gen_reserved_summary(workbook, header_format, val_format): 64 | with utils.csv_folder(IN_INSTANCE_RESERVATION_USAGE_DIR) as records: 65 | worksheet = workbook.add_worksheet("Reserved instance summary") 66 | 67 | worksheet.freeze_panes(2, 0) 68 | worksheet.set_column("A:O", 15) 69 | worksheet.merge_range("A1:D1", "Reservation", header_format) 70 | worksheet.merge_range("E1:F1", "Count", header_format) 71 | worksheet.merge_range("G1:I1", "Cost per instance", header_format) 72 | worksheet.merge_range("J1:L1", "Total monthly cost", header_format) 73 | worksheet.merge_range("M1:N1", "Savings over on demand", header_format) 74 | 75 | green_format = workbook.add_format() 76 | green_format.set_color(COLOR_GREEN_FG) 77 | green_format.set_bg_color(COLOR_GREEN_BG) 78 | 79 | cur_format = workbook.add_format() 80 | cur_format.set_align("center") 81 | cur_format.set_align("vcenter") 82 | cur_format.set_border() 83 | cur_format.set_num_format(NUMFORMAT_CURRENCY) 84 | 85 | per_format = workbook.add_format() 86 | per_format.set_align("center") 87 | per_format.set_align("vcenter") 88 | per_format.set_border() 89 | per_format.set_num_format(NUMFORMAT_PERCENT) 90 | 91 | refs = { 92 | "instance_type": [0, "Instance type", str, val_format], 93 | "availability_zone": [1, "Availability zone", str, val_format], 94 | "tenancy": [2, "Tenancy", str, val_format], 95 | "product": [3, "Product", str, val_format], 96 | "count": [4, "Running", int, val_format], 97 | "count_reserved": [5, "Reserved", int, val_format], 98 | "cost_ondemand": [6, "On demand", float, cur_format], 99 | "cost_reserved_worst": [7, "Worst reserved", float, cur_format], 100 | "cost_reserved_best": [8, "Best reserved", float, cur_format], 101 | "cost_monthly_ondemand": [9, "On demand", float, cur_format], 102 | "cost_monthly_reserved_worst": [10, "Worst reserved", float, cur_format], 103 | "cost_monthly_reserved_best": [11, "Best reserved", float, cur_format], 104 | "savings_reserved_worst": [12, "Worst reserved", float, per_format], 105 | "savings_reserved_best": [13, "Best reserved", float, per_format], 106 | } 107 | for v in refs.values(): 108 | worksheet.write(1, v[0], v[1], header_format) 109 | for i, line in zip(itertools.count(2), records): 110 | for h, v in line.items(): 111 | worksheet.write(i, refs[h][0], refs[h][2](v), refs[h][3]) 112 | for h in ("cost_monthly_ondemand", "cost_monthly_reserved_worst", "cost_monthly_reserved_best"): 113 | res = float(line["count"]) * \ 114 | float(line["cost_" + h[13:]]) * 720 115 | worksheet.write_formula( 116 | i, refs[h][0], 117 | "=E{}*{}{}*720".format(i+1, chr(ord('A') + 118 | refs[h][0] - 3), i+1), refs[h][3], 119 | res, 120 | ) 121 | for h in ("savings_reserved_worst", "savings_reserved_best"): 122 | res = 1 - float(line[h.replace("savings", "cost")] 123 | ) / float(line["cost_ondemand"]) 124 | worksheet.write_formula( 125 | i, refs[h][0], 126 | "=1-{}{}/G{}".format(chr(ord('A') + 127 | refs[h][0] - 5), i+1, i+1), refs[h][3], 128 | res, 129 | ) 130 | worksheet.conditional_format("F{}".format(i+1), { 131 | "type": "cell", 132 | "criteria": "equal to", 133 | "value": "E{}".format(i+1), 134 | "format": green_format, 135 | }) 136 | 137 | 138 | def gen_reservation_usage_summary(workbook, header_format, val_format): 139 | with utils.csv_folder(IN_RESERVATION_USAGE_DIR) as records: 140 | worksheet = workbook.add_worksheet("Reservation usage summary") 141 | 142 | worksheet.freeze_panes(2, 0) 143 | worksheet.set_column("A:J", 18) 144 | worksheet.merge_range("A1:D1", "Reservation", header_format) 145 | worksheet.merge_range("E1:F1", "Count", header_format) 146 | worksheet.merge_range("G1:I1", "Cost per instance", header_format) 147 | worksheet.merge_range("J1:J2", "Monthly losses", header_format) 148 | 149 | green_format = workbook.add_format() 150 | green_format.set_color(COLOR_GREEN_FG) 151 | green_format.set_bg_color(COLOR_GREEN_BG) 152 | 153 | cur_format = workbook.add_format() 154 | cur_format.set_align("center") 155 | cur_format.set_align("vcenter") 156 | cur_format.set_border() 157 | cur_format.set_num_format(NUMFORMAT_CURRENCY) 158 | 159 | refs = { 160 | "instance_type": [0, "Instance type", str, val_format], 161 | "availability_zone": [1, "Availability zone", str, val_format], 162 | "tenancy": [2, "Tenancy", str, val_format], 163 | "product": [3, "Product", str, val_format], 164 | "count": [4, "Reserved", int, val_format], 165 | "count_used": [5, "Used", int, val_format], 166 | "cost_upfront": [6, "Upfront", float, cur_format], 167 | "cost_hourly": [7, "Hourly", float, cur_format], 168 | "effective_cost": [8, "Effective", float, cur_format], 169 | "monthly_losses": [9, "Monthly losses", float, cur_format], 170 | } 171 | for v in refs.values(): 172 | worksheet.write(1, v[0], v[1], header_format) 173 | for i, line in zip(itertools.count(2), records): 174 | for h, v in line.items(): 175 | worksheet.write(i, refs[h][0], refs[h][2](v), refs[h][3]) 176 | effective_cost = float( 177 | line["cost_upfront"]) / 720 + float(line["cost_hourly"]) 178 | worksheet.write_formula( 179 | i, refs["effective_cost"][0], 180 | "=G{}/720+H{}".format(*[i+1]*2), refs["effective_cost"][3], 181 | effective_cost, 182 | ) 183 | worksheet.conditional_format("F{}".format(i + 1), { 184 | "type": "cell", 185 | "criteria": "equal to", 186 | "value": "E{}".format(i + 1), 187 | "format": green_format, 188 | }) 189 | worksheet.write( 190 | i, refs["monthly_losses"][0], 191 | "=(E{}-F{})*I{}*720".format(*[i+1] 192 | * 3), refs["monthly_losses"][3], 193 | (float(line["count"]) - float(line["count_used"]) 194 | ) * effective_cost * 720, 195 | ) 196 | 197 | 198 | def gen_weekly_variations(workbook, header_format, val_format): 199 | def to_alpha(x): return chr(ord('A') + x) 200 | 201 | with open(IN_ABSOLUTE_COST_PER_MONTH) as f: 202 | reader = csv.DictReader(f) 203 | source = sorted( 204 | reader, 205 | key=(lambda row: sum(float(v) 206 | for k, v in row.items() if k != 'usage')), 207 | reverse=True, 208 | ) 209 | worksheet = workbook.add_worksheet("Cost variations") 210 | 211 | worksheet.freeze_panes(3, 1) 212 | worksheet.set_column("A:A", 30) 213 | worksheet.set_column("B:M", 14) 214 | worksheet.merge_range("A1:A3", "Usage type", header_format) 215 | worksheet.merge_range("B1:L1", "Monthly cost", header_format) 216 | worksheet.merge_range("M1:M3", "Total", header_format) 217 | 218 | green_format = workbook.add_format() 219 | green_format.set_color(COLOR_GREEN_FG) 220 | green_format.set_bg_color(COLOR_GREEN_BG) 221 | 222 | red_format = workbook.add_format() 223 | red_format.set_color(COLOR_RED_FG) 224 | red_format.set_bg_color(COLOR_RED_BG) 225 | 226 | cur_format = workbook.add_format() 227 | cur_format.set_align("center") 228 | cur_format.set_align("vcenter") 229 | cur_format.set_border() 230 | cur_format.set_num_format(NUMFORMAT_CURRENCY) 231 | 232 | per_format = workbook.add_format() 233 | per_format.set_align("center") 234 | per_format.set_align("vcenter") 235 | per_format.set_border() 236 | per_format.set_num_format(NUMFORMAT_PERCENT) 237 | 238 | date_fieldnames = reader.fieldnames[1:-1] 239 | if len(date_fieldnames) > 6: 240 | date_fieldnames = date_fieldnames[-5:] 241 | refs = { 242 | header: [i, True, float] 243 | for i, header in zip(itertools.count(3, 2), date_fieldnames[1:]) 244 | } 245 | refs[date_fieldnames[0]] = [1, False, float] 246 | refs["usage"] = [0, False, str] 247 | for h, v in refs.items(): 248 | if v[1]: 249 | worksheet.merge_range(1, v[0]-1, 1, v[0], h, header_format) 250 | worksheet.write(2, v[0]-1, "Variation", header_format) 251 | worksheet.write(2, v[0], "Cost", header_format) 252 | else: 253 | worksheet.write(1, v[0], h, header_format) 254 | worksheet.write(2, v[0], "Cost", header_format) 255 | for i, line in zip(itertools.count(3), source): 256 | for name, meta in refs.items(): 257 | val = line[name] 258 | worksheet.write(i, meta[0], meta[2](val), cur_format) 259 | if meta[1]: 260 | before = float(line[date_fieldnames[int(meta[0]/2-1)]]) 261 | worksheet.write_formula( 262 | i, meta[0]-1, 263 | "=IF({}{}=0,\"\",{}{}/{}{}-1)".format( 264 | to_alpha(meta[0] - 2), 265 | i+1, 266 | to_alpha(meta[0]), 267 | i+1, 268 | to_alpha(meta[0] - 2), 269 | i+1, 270 | ), per_format, 271 | " " if before == 0.0 else meta[2](val) / before - 1 272 | ) 273 | worksheet.conditional_format("{}{}".format(to_alpha(meta[0]-1), i+1), { 274 | "type": "cell", 275 | "criteria": "greater than", 276 | "value": "0", 277 | "format": red_format, 278 | }) 279 | worksheet.conditional_format("{}{}".format(to_alpha(meta[0]-1), i+1), { 280 | "type": "cell", 281 | "criteria": "less than or equal to", 282 | "value": "0", 283 | "format": green_format, 284 | }) 285 | worksheet.write("M{}".format( 286 | i+1), sum([float(line[o]) for o in reader.fieldnames[1:]]), cur_format) 287 | 288 | 289 | def gen_weekly_variations_chart(workbook, header_format, val_format): 290 | with open(IN_ABSOLUTE_COST_PER_MONTH) as f: 291 | reader = csv.DictReader(f) 292 | source = sorted( 293 | reader, 294 | key=(lambda row: sum(float(v) for k, v in row.items() if k != 'usage')), 295 | reverse=True, 296 | )[:5] 297 | 298 | header = ['usage'] + sorted([s for s in source[0] if s != 'usage']) 299 | data = [ 300 | [float(s[h]) if h != 'usage' else s[h] for h in header] 301 | for s in source 302 | ] 303 | chart = workbook.add_chart({ 304 | "type": "line" 305 | }) 306 | chartsheet = workbook.add_worksheet("Cost variations chart") 307 | chartsheet.add_table(1, 1, len(data)+1, len(header)-1, {'data': data, 'columns': [{'header': h} for h in header]}) 308 | for i in range(2, len(data)+1): 309 | chart.add_series({ 310 | "values": ["Cost variations chart", i, 2, i, len(header)-1], 311 | "categories": ["Cost variations chart", 1, 2, 1, len(header)-1], 312 | "name": ["Cost variations chart", i, 1], 313 | }) 314 | chartsheet.insert_chart('A1', chart, {'x_scale': 3, 'y_scale': 2}) 315 | 316 | 317 | def gen_instance_count_history(workbook, header_format, val_format): 318 | with open(IN_INSTANCE_HISTORY) as f: 319 | reader = csv.DictReader(f) 320 | worksheet = workbook.add_worksheet("Instance count history") 321 | 322 | worksheet.freeze_panes(2, 1) 323 | worksheet.set_column(0, len(reader.fieldnames), 18) 324 | worksheet.merge_range("A1:A2", "Date", header_format) 325 | worksheet.merge_range(0, 1, 0, len(reader.fieldnames), "Instance Count", header_format) 326 | 327 | def transform(x): 328 | try: 329 | if x == "": 330 | return 0 331 | else: 332 | return int(x) 333 | except ValueError: 334 | return x 335 | 336 | refs = { 337 | header: [ 338 | i, 339 | transform, 340 | ] for i, header in zip(itertools.count(), reader.fieldnames + ["Total"]) 341 | } 342 | for h, v in refs.items(): 343 | worksheet.write(1, v[0], h, header_format) 344 | for i, line in zip(itertools.count(2), reader): 345 | for h, v in line.items(): 346 | worksheet.write(i, refs[h][0], refs[h][1](v), val_format) 347 | total = sum([transform(v) for h, v in line.items() if h != 'date']) 348 | worksheet.write(i, refs['Total'][0], refs['Total'][1](total), val_format) 349 | 350 | 351 | def gen_instance_count_history_chart(workbook, header_format, val_format): 352 | with open(IN_INSTANCE_HISTORY) as f: 353 | reader = csv.DictReader(f) 354 | 355 | chart = workbook.add_chart({ 356 | "type": "line" 357 | }) 358 | row_len = len(list(reader)) 359 | for i, fieldname in zip(itertools.count(1), reader.fieldnames[1:] + ["Total"]): 360 | chart.add_series({ 361 | "values": ["Instance count history", 2, i, row_len-1, i], 362 | "categories": ["Instance count history", 2, 0, row_len-1, 0], 363 | "name": fieldname, 364 | }) 365 | chartsheet = workbook.add_chartsheet("Instance count history chart") 366 | chartsheet.set_chart(chart) 367 | 368 | 369 | def gen_instance_size_recommendations(workbook, header_format, val_format): 370 | def transform(h, v): 371 | if h == "cpu_usage": 372 | try: 373 | return "%.3f%%" % (float(v)*100) 374 | except ValueError: 375 | pass 376 | return v 377 | 378 | with utils.csv_folder(IN_INSTANCE_SIZE_RECOMMENDATIONS_DIR) as source: 379 | worksheet = workbook.add_worksheet("Instance size recommendations") 380 | 381 | worksheet.set_column("A:E", 25) 382 | worksheet.set_column("F:F", 20) 383 | worksheet.set_column("G:H", 18) 384 | worksheet.set_column("I:I", 35) 385 | worksheet.merge_range("A1:F1", "Instance", header_format) 386 | worksheet.merge_range("G1:G2", "Recommended", header_format) 387 | worksheet.merge_range("H1:H2", "Potential saving", header_format) 388 | worksheet.merge_range("I1:I2", "Reason", header_format) 389 | 390 | worksheet.freeze_panes(2, 0) 391 | 392 | refs = { 393 | "account": [0, "Account"], 394 | "id": [1, "ID"], 395 | "name": [2, "Name"], 396 | "size": [3, "Type"], 397 | "lifecycle": [4, "Lifecycle"], 398 | "cpu_usage": [5, "CPU Utilization (Avg.)"], 399 | "recommendation": [6, "Recommendation"], 400 | "saving": [7, "Saving"], 401 | "reason": [8, "Reason"] 402 | } 403 | for i in refs.values(): 404 | worksheet.write(1, i[0], i[1], header_format) 405 | for i, line in zip(itertools.count(2), source): 406 | for h, v in line.items(): 407 | worksheet.write(i, refs[h][0], transform(h, v), val_format) 408 | 409 | def instance_summary(workbook, header_format, val_format): 410 | bandwidth_usage = {} 411 | ebs_usage = defaultdict(int) 412 | def transform(x): 413 | try: 414 | if x == "": return 0.0 415 | else: return float(x) 416 | except ValueError: 417 | return x 418 | with open(IN_EC2_BANDWIDTH_USAGE_LAST_MONTH) as f: 419 | reader = csv.reader(f) 420 | for i, line in itertools.islice(zip(itertools.count(2), reader), 1, None): 421 | bandwidth_usage[line[0]] = transform(line[1]) 422 | with open(IN_EBS_USAGE_LAST_MONTH) as f: 423 | reader = csv.reader(f) 424 | for line in itertools.islice(reader, 1, None): 425 | ebs_usage[line[4]] += transform(line[3]) 426 | with open(IN_INSTANCE_USAGE_LAST_MONTH) as f: 427 | reader = csv.DictReader(f) 428 | worksheet = workbook.add_worksheet("EC2 instances last month") 429 | 430 | last_month = datetime.now() + dateutil.relativedelta.relativedelta(months=-1) 431 | worksheet.merge_range("A1:I1", "Instances for {}-{:02d}".format(last_month.year, last_month.month), header_format) 432 | worksheet.merge_range("J1:J2", "Total", header_format) 433 | 434 | cur_format = workbook.add_format() 435 | cur_format.set_align("center") 436 | cur_format.set_align("vcenter") 437 | cur_format.set_border() 438 | cur_format.set_num_format(NUMFORMAT_CURRENCY) 439 | 440 | worksheet.freeze_panes(2, 0) 441 | worksheet.set_column(2, len(reader.fieldnames)+2, 18) 442 | worksheet.set_column("A:C", 33) 443 | 444 | refs = { 445 | "Account": [0, "Account", str, val_format], 446 | "ResourceId": [1, "Resource Id", str, val_format], 447 | "Name": [2, "Name", str, val_format], 448 | "AvailabilityZone": [3, "Availability zone", str, val_format], 449 | "Term": [4, "Term", str, val_format], 450 | "Type": [5, "Type", str, val_format], 451 | "Cost": [6, "Instance cost", transform, cur_format], 452 | "Bandwidth": [7, "Bandwidth cost", transform, cur_format], 453 | "EBS": [8, "EBS cost", transform, cur_format], 454 | } 455 | ec2_cost_data = [] 456 | for i, line in zip(itertools.count(2), reader): 457 | line['Bandwidth'] = refs['Bandwidth'][2](bandwidth_usage.get(line['ResourceId'], '')) 458 | line['EBS'] = refs['EBS'][2](ebs_usage.get(line['ResourceId'], '')) 459 | line['Total'] = refs['Cost'][2](line['Cost']) + line['Bandwidth'] + line['EBS'] 460 | ec2_cost_data.append(line) 461 | ec2_cost_data.sort(key=lambda e: e['Total'], reverse=True) 462 | for v in refs.values(): 463 | worksheet.write(1, v[0], v[1], header_format) 464 | for i, line in zip(itertools.count(2), ec2_cost_data): 465 | for h, v in line.items(): 466 | if h != 'Total': 467 | worksheet.write(i, refs[h][0], refs[h][2](v), refs[h][3]) 468 | worksheet.write(i, len(refs), line['Total'], cur_format) 469 | 470 | def ebs_summary(workbook, header_format, val_format): 471 | def transform(x): 472 | try: 473 | if x == "": return 0.0 474 | else: return float(x) 475 | except ValueError: 476 | return x 477 | with open(IN_EBS_USAGE_LAST_MONTH) as f: 478 | reader = csv.DictReader(f) 479 | worksheet = workbook.add_worksheet("EBS last month") 480 | 481 | last_month = datetime.now() + dateutil.relativedelta.relativedelta(months=-1) 482 | worksheet.merge_range("A1:F1", "EBS for {}-{:02d}".format(last_month.year, last_month.month), header_format) 483 | worksheet.merge_range("A2:A3", "Account", header_format) 484 | worksheet.merge_range("B2:B3", "Resource ID", header_format) 485 | worksheet.merge_range("C2:C3", "Region", header_format) 486 | worksheet.merge_range("D2:D3", "Cost", header_format) 487 | worksheet.merge_range("E2:F2", "Instance Linked", header_format) 488 | 489 | cur_format = workbook.add_format() 490 | cur_format.set_align("center") 491 | cur_format.set_align("vcenter") 492 | cur_format.set_border() 493 | cur_format.set_num_format(NUMFORMAT_CURRENCY) 494 | 495 | worksheet.freeze_panes(3, 0) 496 | worksheet.set_column(0, len(reader.fieldnames)-1, 25) 497 | 498 | refs = { 499 | "Account": [0, "Account", str, val_format], 500 | "ResourceId": [1, "Resource Id", str, val_format], 501 | "Region": [2, "Region", str, val_format], 502 | "Cost": [3, "Cost", transform, cur_format], 503 | "InstanceId": [4, "ID", str, val_format], 504 | "InstanceName": [5, "Name", str, val_format], 505 | } 506 | for v in refs.values(): 507 | worksheet.write(2, v[0], v[1], header_format) 508 | for i, line in zip(itertools.count(3), reader): 509 | for h, v in line.items(): 510 | worksheet.write(i, refs[h][0], refs[h][2](v), refs[h][3]) 511 | 512 | def snapshots_summary(workbook, header_format, val_format): 513 | def transform(x): 514 | try: 515 | if x == "": return 0.0 516 | else: return float(x) 517 | except ValueError: 518 | return x 519 | with open(IN_SNAPSHOT_USAGE_LAST_MONTH) as f: 520 | reader = csv.DictReader(f) 521 | worksheet = workbook.add_worksheet("Snapshots last month") 522 | 523 | last_month = datetime.now() + dateutil.relativedelta.relativedelta(months=-1) 524 | worksheet.merge_range("A1:C1", "Snapshots for {}-{:02d}".format(last_month.year, last_month.month), header_format) 525 | 526 | cur_format = workbook.add_format() 527 | cur_format.set_align("center") 528 | cur_format.set_align("vcenter") 529 | cur_format.set_border() 530 | cur_format.set_num_format(NUMFORMAT_CURRENCY) 531 | 532 | worksheet.freeze_panes(2, 0) 533 | worksheet.set_column(0, 0, 25) 534 | worksheet.set_column(1, 1, 80) 535 | worksheet.set_column(2, 2, 25) 536 | 537 | refs = { 538 | "Account": [0, "Account", str, val_format], 539 | "ResourceId": [1, "Resource Id", str, val_format], 540 | "Cost": [2, "Cost", transform, cur_format], 541 | } 542 | for v in refs.values(): 543 | worksheet.write(1, v[0], v[1], header_format) 544 | for i, line in zip(itertools.count(2), reader): 545 | for h, v in line.items(): 546 | worksheet.write(i, refs[h][0], refs[h][2](v), refs[h][3]) 547 | 548 | def gen_s3_cost(workbook, header_format, val_format): 549 | def transform(x): 550 | try: 551 | if x == "": return 0.0 552 | else: return float(x) 553 | except ValueError: 554 | return x 555 | with open(IN_S3_COST) as f: 556 | reader = csv.DictReader(f) 557 | worksheet = workbook.add_worksheet("S3 cost") 558 | 559 | worksheet.merge_range("A1:F1", "S3 cost for current month", header_format) 560 | 561 | cur_format = workbook.add_format() 562 | cur_format.set_align("center") 563 | cur_format.set_align("vcenter") 564 | cur_format.set_border() 565 | cur_format.set_num_format(NUMFORMAT_CURRENCY) 566 | 567 | worksheet.freeze_panes(2, 0) 568 | worksheet.set_column(0, 0, 45) 569 | worksheet.set_column(1, 1, 18) 570 | worksheet.set_column(2, 2, 18) 571 | worksheet.set_column(3, 3, 18) 572 | worksheet.set_column(4, 4, 18) 573 | worksheet.set_column(5, 5, 18) 574 | worksheet.set_column(6, 6, 18) 575 | 576 | refs = { 577 | "Bucket": [0, "Bucket", str, val_format], 578 | "Usage-GB-Month": [1, "Usage (GB-Month)", float, val_format], 579 | "StorageCost": [2, "Storage cost", transform, cur_format], 580 | "BandwidthCost": [3, "Bandwidth cost", transform, cur_format], 581 | "RequestsCost": [4, "Requests cost", transform, cur_format], 582 | "CurrentTotal": [5, "Current cost", transform, cur_format], 583 | "LastMonthTotal": [6, "Last month cost", transform, cur_format], 584 | } 585 | for v in refs.values(): 586 | worksheet.write(1, v[0], v[1], header_format) 587 | for i, line in zip(itertools.count(2), reader): 588 | for h, v in line.items(): 589 | worksheet.write(i, refs[h][0], refs[h][2](v), refs[h][3]) 590 | 591 | def gen_introduction(workbook, header_format, val_format): 592 | worksheet = workbook.add_worksheet("Introduction") 593 | 594 | worksheet.insert_image("A1", "src/ressources/introduction.png") 595 | 596 | 597 | def main(name): 598 | workbook = xlsxwriter.Workbook('./out/{}.xlsx'.format(name)) 599 | 600 | header_format = workbook.add_format() 601 | header_format.set_bold() 602 | header_format.set_align("center") 603 | header_format.set_align("vcenter") 604 | header_format.set_border() 605 | 606 | val_format = workbook.add_format() 607 | val_format.set_align("center") 608 | val_format.set_align("vcenter") 609 | val_format.set_border() 610 | 611 | gen_introduction(workbook, header_format, val_format) 612 | gen_weekly_variations(workbook, header_format, val_format) 613 | gen_weekly_variations_chart(workbook, header_format, val_format) 614 | gen_reserved_summary(workbook, header_format, val_format) 615 | gen_reservation_usage_summary(workbook, header_format, val_format) 616 | gen_instance_size_recommendations(workbook, header_format, val_format) 617 | gen_instance_count_history_chart(workbook, header_format, val_format) 618 | gen_instance_count_history(workbook, header_format, val_format) 619 | instance_summary(workbook, header_format, val_format) 620 | ebs_summary(workbook, header_format, val_format) 621 | snapshots_summary(workbook, header_format, val_format) 622 | gen_s3_cost(workbook, header_format, val_format) 623 | 624 | workbook.close() 625 | 626 | 627 | if __name__ == '__main__': 628 | name = sys.argv[1] if len(sys.argv) > 1 else "sheet" 629 | print("Generating xlsx file...") 630 | main(name) 631 | print("{}.xlsx generated!".format(name)) 632 | -------------------------------------------------------------------------------- /src/mytypes.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | InstanceType = collections.namedtuple( 4 | 'InstanceType', 5 | [ 6 | 'size', 7 | 'availability_zone', 8 | 'tenancy', 9 | 'product', 10 | 'vpc', 11 | ] 12 | ) 13 | 14 | InstanceTypeWithProfile = collections.namedtuple( 15 | 'InstanceTypeWithProfile', 16 | [ 17 | 'profile', 18 | 'instance_type', 19 | ] 20 | ) 21 | 22 | InstanceReservation = collections.namedtuple( 23 | 'InstanceReservation', 24 | [ 25 | 'type', 26 | 'cost_hourly', 27 | 'cost_upfront', 28 | ] 29 | ) 30 | 31 | InstanceReservationCount = collections.namedtuple( 32 | 'InstanceReservationCount', 33 | [ 34 | 'instance_reservation', 35 | 'count', 36 | 'count_used' 37 | ] 38 | ) 39 | 40 | InstanceOffering = collections.namedtuple( 41 | 'InstanceOffering', 42 | [ 43 | 'type', 44 | 'cost_ondemand', 45 | 'cost_reserved_worst', 46 | 'cost_reserved_best', 47 | ] 48 | ) 49 | 50 | InstanceOfferingCount = collections.namedtuple( 51 | 'InstanceOfferingCount', 52 | [ 53 | 'instance_offering', 54 | 'count', 55 | 'count_reserved', 56 | ] 57 | ) 58 | -------------------------------------------------------------------------------- /src/ressources/introduction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/trackit/aws-cost-report/d04b24330fd1d4fcad463e5e53c5ca5955a226ee/src/ressources/introduction.png -------------------------------------------------------------------------------- /src/sheets.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import random 3 | import itertools 4 | import functools 5 | from functools import singledispatch 6 | 7 | Field = collections.namedtuple('Field', ['name', 'index', 'type', 'pretty', 'format']) 8 | FieldGroup = collections.namedtuple('FieldGroup', ['pretty', 'children']) 9 | FieldRoot = collections.namedtuple('FieldRoot', ['children']) 10 | Formula = collections.namedtuple('Formula', ['value']) 11 | ConditionalFormat = collections.namedtuple('ConditionalFormat', ['type', 'value', 'format']) 12 | ColumnConditionalFormat = collections.namedtuple('ColumnConditionalFormat', ['field', 'formats']) 13 | 14 | @singledispatch 15 | def _field_depth(arg): 16 | raise ValueError("arg must be Field or FieldGroup, is {}".format(type(arg))) 17 | 18 | @_field_depth.register(Field) 19 | def _(_): 20 | return 1 21 | 22 | @_field_depth.register(FieldGroup) 23 | def _(field_group): 24 | return 1 + max(_field_depth(f) for f in field_group.children) 25 | 26 | @_field_depth.register(FieldRoot) 27 | def _(field_root): 28 | return max(_field_depth(f) for f in field_root.children) 29 | 30 | @singledispatch 31 | def _field_width(arg): 32 | raise ValueError("arg must be Field or FieldGroup") 33 | 34 | @_field_width.register(Field) 35 | def _(_): 36 | return 1 37 | 38 | @_field_width.register(FieldGroup) 39 | @_field_width.register(FieldRoot) 40 | def _(field_group): 41 | return sum(_field_width(f) for f in field_group.children) 42 | 43 | @singledispatch 44 | def _field_slice(arg, row_num): 45 | raise ValueError("arg must be Field or FieldGroup, is {}".format(type(arg))) 46 | 47 | @_field_slice.register(Field) 48 | def _(field, _): 49 | return [field] 50 | 51 | @_field_slice.register(FieldGroup) 52 | def _(field_group, row_num): 53 | if row_num == 0: 54 | return [field_group] 55 | else: 56 | return sum((_field_slice(f, row_num - 1) for f in field_group.children), []) 57 | 58 | @singledispatch 59 | def field_flatten(arg): 60 | raise ValueError("arg is {}".format(type(arg))) 61 | 62 | @field_flatten.register(FieldRoot) 63 | @field_flatten.register(FieldGroup) 64 | def _(field_group): 65 | return itertools.chain.from_iterable( 66 | field_flatten(c) for c in field_group.children 67 | ) 68 | 69 | @field_flatten.register(Field) 70 | def _(field): 71 | return (field,) 72 | 73 | @_field_slice.register(FieldRoot) 74 | def _(field_root, row_num): 75 | return sum((_field_slice(f, row_num) for f in field_root.children), []) 76 | 77 | @singledispatch 78 | def _field_find(arg, f): 79 | raise ValueError("arg must be Field or FieldGroup") 80 | 81 | @_field_find.register(Field) 82 | def _(field, f): 83 | if f == field or isinstance(f, str) and f == field.name: 84 | return field 85 | else: 86 | return None 87 | 88 | @_field_find.register(FieldGroup) 89 | @_field_find.register(FieldRoot) 90 | def _(field_group, f): 91 | return reduce( 92 | lambda a, b: a or b, 93 | (_field_find(e) for e in field_group.children), 94 | ) 95 | 96 | @singledispatch 97 | def _field_index(arg, f, o): 98 | raise ValueError("arg must be Field or FieldGroup") 99 | 100 | @_field_index.register(Field) 101 | def _(field, f, o): 102 | if f == field or isinstance(f, str) and f == field.name: 103 | return o 104 | else: 105 | return None 106 | 107 | @_field_index.register(FieldGroup) 108 | @_field_index.register(FieldRoot) 109 | def _(field_group, f, o): 110 | for c in field_group.children: 111 | r = _field_index(c, f, o) 112 | if r == None: 113 | o += _field_width(c) 114 | else: 115 | return r 116 | return None 117 | 118 | @singledispatch 119 | def _field_flatten(arg, f, o): 120 | raise ValueError("arg must be Field or FieldGroup") 121 | 122 | @_field_flatten.register(Field) 123 | def _(field): 124 | return (field,) 125 | 126 | @_field_flatten.register(FieldGroup) 127 | @_field_flatten.register(FieldRoot) 128 | def _(field_group): 129 | return itertools.chain.from_iterable( 130 | _field_flatten(f) for f in field_group.children 131 | ) 132 | 133 | def _add_dict(a, b): 134 | a = a.copy() 135 | a.update(b) 136 | return a 137 | 138 | class Sheet: 139 | 140 | _CELL_TYPES = collections.defaultdict(lambda: 'stringValue', { 141 | int: 'numberValue', 142 | float: 'numberValue', 143 | bool: 'boolValue', 144 | Formula: 'formulaValue', 145 | }) 146 | 147 | def __init__(self, source, fields, fields_conditional_formats=(), sheet_id=None): 148 | self._source = source 149 | self._fields = FieldRoot(fields) 150 | self._sheet_id = sheet_id or random.randint(0, 2**32) 151 | self.properties = {} 152 | self._fields_conditional_formats = fields_conditional_formats 153 | self._row_count = None 154 | self._HEADER_ROW = 0 155 | self._HEADER_COL = 0 156 | self._BODY_ROW = _field_depth(self._fields) 157 | self._BODY_COL = 0 158 | 159 | def to_dict(self): 160 | return { 161 | 'properties': self._to_dict_properties(), 162 | 'data': self._to_dict_data(), 163 | 'merges': self._to_dict_merges(), 164 | 'conditionalFormats': self._to_dict_conditional_formats(), 165 | } 166 | 167 | def field_index(self, field, row_num=None): 168 | col_num = _field_index(self._fields, field, 0) 169 | if row_num is None: 170 | return col_num 171 | else: 172 | return col_num, row_num 173 | 174 | def field_address(self, field, row_num, absolute=0): 175 | col_num, row_num = self.field_index(field, row_num) 176 | return self.address(col_num, row_num, absolute) 177 | 178 | def address(self, col_num, row_num, absolute=0): 179 | return ''.join([ 180 | '$' if absolute & 2 else '', 181 | self.col_address(col_num), 182 | '$' if absolute & 1 else '', 183 | self.row_address(row_num), 184 | ]) 185 | 186 | def row_address(self, row_number): 187 | return str(row_number + 1 + self._BODY_ROW) 188 | 189 | def col_address(self, col_number): 190 | res = [] 191 | n = col_number + self._BODY_COL 192 | while n >= 0: 193 | nc = n % 26 + 65 194 | res.append(nc) 195 | n = n // 26 - 1 196 | return ''.join(map(chr, res)) 197 | 198 | def fields_flat(self): 199 | return _field_flatten(self._fields) 200 | 201 | def _cell_contents(self, row, field, row_num, col_num): 202 | if field.type in (str, int, float): 203 | if row[field.index] == '': 204 | return Sheet._CELL_TYPES[field.type], field.type() 205 | else: 206 | return Sheet._CELL_TYPES[field.type], field.type(row[field.index]) 207 | elif callable(field.type): 208 | value = field.type(self, row_num, col_num, field) 209 | cell_type = Sheet._CELL_TYPES[type(value)] 210 | if type(value) == Formula: 211 | value = value.value if value.value.startswith('=') else '={}'.format(value.value) 212 | return cell_type, value 213 | 214 | def _column_range(self, field): 215 | col_start = self.field_index(field) + self._BODY_COL 216 | row_start = self._BODY_ROW 217 | col_end = col_start + 1 218 | row_end = row_start + self._row_count 219 | return { 220 | 'startColumnIndex': col_start, 221 | 'endColumnIndex': col_end, 222 | 'startRowIndex': row_start, 223 | 'endRowIndex': row_end, 224 | 'sheetId': self._sheet_id, 225 | } 226 | 227 | def _to_dict_conditional_formats(self): 228 | return [ 229 | { 230 | 'ranges': self._column_range(column_format.field), 231 | 'booleanRule': { 232 | 'condition': { 233 | 'type': format.type, 234 | 'values': [ 235 | { 236 | 'userEnteredValue': format.value, 237 | }, 238 | ], 239 | }, 240 | 'format': format.format 241 | } 242 | } 243 | for column_format in self._fields_conditional_formats 244 | for format in column_format.formats 245 | ] 246 | 247 | def _to_dict_properties(self): 248 | res = self.properties.copy() 249 | res.update({ 250 | 'sheetId': self._sheet_id, 251 | }) 252 | return res 253 | 254 | def _to_dict_data(self): 255 | return [ 256 | self._to_dict_data_header(), 257 | self._to_dict_data_body(), 258 | ] 259 | 260 | def _to_dict_data_header(self): 261 | header_height = _field_depth(self._fields) 262 | row_data = [ 263 | { 264 | 'values': sum(( 265 | [ 266 | { 267 | 'userEnteredValue': { 268 | 'stringValue': f.pretty 269 | }, 270 | 'userEnteredFormat': { 271 | 'horizontalAlignment': 'center', 272 | 'verticalAlignment': 'middle', 273 | 'textFormat': { 274 | 'bold': True, 275 | 'fontSize': 10 if header_row == 0 else 8, 276 | }, 277 | }, 278 | } 279 | ] * _field_width(f) 280 | for f in header_slice 281 | ), [] 282 | ) 283 | } 284 | for header_slice, header_row in ((_field_slice(self._fields, h), h) for h in range(header_height)) 285 | ] 286 | return { 287 | 'startRow': self._HEADER_ROW, 288 | 'startColumn': self._HEADER_COL, 289 | 'rowData': row_data, 290 | } 291 | 292 | def _to_dict_data_body(self): 293 | row_data = [ 294 | { 295 | 'values': [ 296 | _add_dict({ 297 | 'userEnteredValue': { 298 | cell_type: cell_value, 299 | }, 300 | }, { 301 | 'userEnteredFormat': { 302 | 'numberFormat': { 303 | 'type': 'NUMBER', 304 | 'pattern': field.format, 305 | }, 306 | }, 307 | }) 308 | for cell_type, cell_value, field in ( 309 | self._cell_contents(row, field, row_num, col_num) + (field,) 310 | for field, col_num in zip(_field_flatten(self._fields), itertools.count()) 311 | ) 312 | ], 313 | } 314 | for row, row_num in zip(self._source, itertools.count()) 315 | ] 316 | self._row_count = len(row_data) 317 | return { 318 | 'startRow': self._BODY_ROW, 319 | 'startColumn': self._BODY_COL, 320 | 'rowData': row_data, 321 | } 322 | 323 | def _to_dict_merges(self): 324 | merges = [] 325 | seen_fields = set() 326 | for header_row in range(_field_depth(self._fields)): 327 | header_slice = _field_slice(self._fields, header_row) 328 | col_start = 0 329 | for header in header_slice: 330 | if type(header) == FieldGroup: 331 | col_width = _field_width(header) 332 | merges.append({ 333 | 'startColumnIndex': col_start + self._HEADER_COL, 334 | 'endColumnIndex': col_start + col_width + self._HEADER_COL, 335 | 'startRowIndex': header_row + self._HEADER_ROW, 336 | 'endRowIndex': header_row + 1 + self._HEADER_ROW, 337 | 'sheetId': self._sheet_id, 338 | }) 339 | col_start += col_width 340 | elif type(header) == Field and header not in seen_fields: 341 | col_width = 1 342 | merges.append({ 343 | 'startColumnIndex': col_start + self._HEADER_COL, 344 | 'endColumnIndex': col_start + col_width + self._HEADER_COL, 345 | 'startRowIndex': header_row + self._HEADER_ROW, 346 | 'endRowIndex': self._BODY_ROW, 347 | 'sheetId': self._sheet_id, 348 | }) 349 | col_start += col_width 350 | seen_fields.add(header) 351 | else: 352 | col_start += 1 353 | return merges 354 | 355 | test_fields = FieldRoot(( 356 | Field('first', 0, int, 'First', None), 357 | Field('second', 0, int, 'Second', None), 358 | FieldGroup('Grouped', ( 359 | Field('third', 0, int, 'Third', None), 360 | Field('fourth', 0, int, 'Fourth', None), 361 | FieldGroup('Nested', ( 362 | Field('fifth', 0, int, 'Fifth', None), 363 | Field('sixth', 0, int, 'Sixth', None), 364 | )), 365 | )), 366 | Field('seventh', 0, int, 'Seventh', None), 367 | Field('eighth', 0, int, 'Eigth', None), 368 | )) 369 | 370 | var_fields = FieldRoot(( 371 | Field(name='usage', index='usage', type=str, pretty='Usage type', format=None), 372 | FieldGroup(pretty='Monthly cost', children=( 373 | FieldGroup(pretty='2017-10', children=( 374 | Field(name='cost2017-10', index='2017-10', type=float, pretty='Cost', format='#,##0.000 [$USD]'), 375 | )), 376 | FieldGroup(pretty='2017-11', children=( 377 | Field(name='cost2017-11', index='2017-11', type=float, pretty='Cost', format='#,##0.000 [$USD]'), 378 | Field(name='var2017-11', index='2017-11', type=float, pretty='Variation', format='0.00%') 379 | )), 380 | FieldGroup(pretty='2017-12', children=( 381 | Field(name='cost2017-12', index='2017-12', type=float, pretty='Cost', format='#,##0.000 [$USD]'), 382 | Field(name='var2017-12', index='2017-12', type=float, pretty='Variation', format='0.00%') 383 | )), 384 | FieldGroup(pretty='2018-01', children=( 385 | Field(name='cost2018-01', index='2018-01', type=float, pretty='Cost', format='#,##0.000 [$USD]'), 386 | Field(name='var2018-01', index='2018-01', type=float, pretty='Variation', format='0.00%') 387 | )), 388 | FieldGroup(pretty='2018-02', children=( 389 | Field(name='cost2018-02', index='2018-02', type=float, pretty='Cost', format='#,##0.000 [$USD]'), 390 | Field(name='var2018-02', index='2018-02', type=float, pretty='Variation', format='0.00%') 391 | )) 392 | )) 393 | )) 394 | -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | 4 | def rows_folder(dirpath): 5 | for filename in os.listdir(dirpath): 6 | filepath = os.path.join(dirpath, filename) 7 | with open(filepath) as f: 8 | for row in rows(f): 9 | yield row 10 | 11 | def rows(csvfile): 12 | reader = csv.DictReader(csvfile) 13 | for row in reader: 14 | yield row 15 | 16 | class csv_folder: 17 | """csv_folder is to be used in conjunction with the 'with' statement. It is 18 | an iterator over all the CSV records of all files within a folder.""" 19 | def __init__(self, dirpath, readerclass=csv.DictReader): 20 | self._dirpath = dirpath 21 | self._filepaths = ( 22 | os.path.join(self._dirpath, filename) 23 | for filename in os.listdir(self._dirpath) 24 | if filename.endswith('.csv') 25 | ) 26 | self._reader = None 27 | self._handle = None 28 | self._readerclass = readerclass 29 | 30 | def __enter__(self): 31 | return self 32 | 33 | def __iter__(self): 34 | return self 35 | 36 | def __exit__(self, type, value, traceback): 37 | self._close() 38 | 39 | def __next__(self): 40 | if self._reader is None: 41 | self._open_next() 42 | try: 43 | return next(self._reader) 44 | except StopIteration: 45 | self._close() 46 | return self.__next__() 47 | 48 | def _open_next(self): 49 | filepath = next(self._filepaths) 50 | self._handle = open(filepath, 'rt') 51 | self._reader = self._readerclass(self._handle) 52 | 53 | def _close(self): 54 | if self._handle is not None: 55 | self._handle.close() 56 | self._handle = None 57 | self._reader = None 58 | -------------------------------------------------------------------------------- /util/awsdumpenv: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | env | sed -ne '/^AWS_/s/^/export /' -e 'ta' -e 'd' -e ':a' -e 'p' 4 | -------------------------------------------------------------------------------- /util/awsenv: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import os 5 | import boto3 6 | 7 | def main(profile_name, cmd, region): 8 | env = os.environ.copy() 9 | if profile_name != 'env': 10 | session = boto3.session.Session(profile_name=profile_name) 11 | credentials = session.get_credentials().get_frozen_credentials() 12 | env['AWS_ACCESS_KEY_ID'] = credentials.access_key 13 | env['AWS_SECRET_ACCESS_KEY'] = credentials.secret_key 14 | if credentials.token: 15 | env['AWS_SESSION_TOKEN'] = credentials.token 16 | if region: 17 | env['AWS_DEFAULT_REGION'] = region 18 | os.execvpe( 19 | cmd[0], 20 | cmd, 21 | env, 22 | ) 23 | 24 | if __name__ == '__main__': 25 | parser = argparse.ArgumentParser(description='Run command with AWS credentials in environment.') 26 | parser.add_argument('--profile', type=str, default='default', 27 | help='select the profile credentials will be obtained for', 28 | ) 29 | parser.add_argument('--region', type=str, 30 | help='the default AWS region', 31 | ) 32 | parser.add_argument('cmd', type=str, metavar='cmd', 33 | help='program to be run with AWS credentials', 34 | ) 35 | parser.add_argument('args', type=str, metavar='arg', nargs='*', 36 | help='arguments to the program', 37 | ) 38 | args = parser.parse_args() 39 | cmd = [args.cmd] + args.args 40 | main(args.profile, cmd, args.region) 41 | 42 | # vim: set ts=4 sts=4 noet: 43 | --------------------------------------------------------------------------------