├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── report-example.xlsx
├── requirements.txt
├── run.py
├── src
    ├── get_bill_diff.py
    ├── get_ec2_costs.sh
    ├── get_ec2_data.py
    ├── get_ec2_instance_history.py
    ├── get_ec2_metadata.py
    ├── get_ec2_recommendations.py
    ├── get_last_month_ebs_cost.py
    ├── get_last_month_ec2_cost.py
    ├── get_s3_cost.py
    ├── make_sheet.py
    ├── make_xlsx.py
    ├── mytypes.py
    ├── ressources
    │   └── introduction.png
    ├── sheets.py
    └── utils.py
└── util
    ├── awsdumpenv
    └── awsenv


/.gitignore:
--------------------------------------------------------------------------------
1 | *.sw?
2 | *.pyc
3 | /src/__pycache__
4 | /in
5 | /out
6 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:16.04
 2 | 
 3 | RUN apt-get update && apt-get upgrade -y
 4 | RUN apt-get install jq python3-pip curl -y
 5 | 
 6 | COPY . /root/aws-cost-report
 7 | WORKDIR /root/aws-cost-report
 8 | RUN pip3 install -r requirements.txt
 9 | 
10 | ENV PYTHONUNBUFFERED=0
11 | 
12 | ENTRYPOINT ["/root/aws-cost-report/run.py"]
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2018 MSolution.IO
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 7 | of the Software, and to permit persons to whom the Software is furnished to do
 8 | so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![](https://s3-us-west-2.amazonaws.com/trackit-public-artifacts/aws-cost-report/introduction.png)
 2 | # Cost report generator
 3 | 
 4 | ## Requirements
 5 | 
 6 | - Install [jq](https://stedolan.github.io/jq/download/)
 7 | - Install the python3 dependencies: `sudo pip3 install -r requirements.txt`
 8 | 
 9 | ## Google Sheets API access
10 | 
11 | Follow the instructions at
12 | https://developers.google.com/sheets/api/quickstart/python to setup credentials
13 | and API access.
14 | 
15 | ## How to run the tool on your machine
16 | 
17 | ```
18 | # Print help and usage informations
19 | $> ./run.py --help
20 | 
21 | # Run with one billing bucket and one EC2 profile
22 | $> ./run.py --billing profile_name billing-bucket-name prefix --ec2 profile_name --xlsx-name filename
23 | 
24 | # Run with multiple billing buckets and EC2 profiles
25 | $> ./run.py --billing profile_name billing-bucket-name prefix --billing profile_name2 billing-bucket-name2 prefix2 --ec2 profile_name --ec2 profile_name2 --xlsx-name filename
26 | ```
27 | 
28 | The tool is built to use AWS credentials stored in `~/.aws/credentials`.
29 | If you set the profile to `env`, the tool will use environment variables you must supply instead.
30 | 
31 | ## How to run the tool with docker
32 | 
33 | The docker container do not export any data to google sheets.
34 | However it generates CSVs in the `out` directory, and a local spreadsheet generation will be added soon.
35 | 
36 | ### Use our prebuilt image
37 | 
38 | ```
39 | # Pull the msolution/aws-cost-report image
40 | $> docker pull msolution/aws-cost-report
41 | ```
42 | 
43 | ### Build your own image
44 | 
45 | ```
46 | # Build your own msolution/aws-cost-report image
47 | $> docker build -t msolution/aws-cost-report .
48 | ```
49 | 
50 | ### Use the docker container
51 | 
52 | ```
53 | # Run with one billing bucket and one EC2 profile, using env credentials
54 | $> docker run -v /local/path/out:/root/aws-cost-report/out -e AWS_ACCESS_KEY_ID=accesskeyid -e AWS_SECRET_ACCESS_KEY=secretaccesskey -e AWS_DEFAULT_REGION=default-region -e AWS_SESSION_TOKEN=sessiontoken(optional) msolution/aws-cost-report --no-generate-sheet --billing env billing-bucket-name prefix --ec2 env --xlsx-name filename
55 | 
56 | # Run with multiple billing buckets and EC2 profiles, using your local aws credentials
57 | $> docker run -v /path/to/credentials:/root/.aws:ro -v /local/path/out:/root/aws-cost-report/out msolution/aws-cost-report --no-generate-sheet --billing profile_name billing-bucket-name prefix --billing profile_name2 billing-bucket-name2 prefix2 --ec2 profile_name --ec2 profile_name2 --xlsx-name filename
58 | ```
59 | 
60 | ## Screenshots
61 | 
62 | Download the report example [here](https://s3-us-west-2.amazonaws.com/trackit-public-artifacts/aws-cost-report/report-example.xlsx).
63 | 
64 | ### Cost and variations tab
65 | 
66 | ![](https://s3-us-west-2.amazonaws.com/trackit-public-artifacts/aws-cost-report/cost-variations.png)
67 | 
68 | 
69 | ### Reserved instance summary
70 | 
71 | ![](https://s3-us-west-2.amazonaws.com/trackit-public-artifacts/aws-cost-report/reserved-instance-summary.png)
72 | 
73 | 
74 | ### Instance size recommendations
75 | 
76 | ![](https://s3-us-west-2.amazonaws.com/trackit-public-artifacts/aws-cost-report/instance-size-recommendations.png)
77 | 
78 | 
79 | ### EC2 instances last month
80 | 
81 | ![](https://s3-us-west-2.amazonaws.com/trackit-public-artifacts/aws-cost-report/ec2-instances-last-month.png)
82 | 


--------------------------------------------------------------------------------
/report-example.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trackit/aws-cost-report/d04b24330fd1d4fcad463e5e53c5ca5955a226ee/report-example.xlsx


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | google-api-python-client==1.6.5
2 | boto3==1.6.3
3 | awscli==1.14.59
4 | xlsxwriter==1.0.2
5 | 


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import boto3
  4 | import argparse
  5 | import sys
  6 | import os
  7 | import hashlib
  8 | import json
  9 | import threading
 10 | import zipfile
 11 | import gzip
 12 | import time
 13 | import shutil
 14 | import itertools
 15 | import dateutil.relativedelta
 16 | from datetime import datetime
 17 | 
 18 | class Parser(argparse.ArgumentParser):
 19 |     def print_help(self, file=sys.stdout):
 20 |         super(Parser, self).print_help(file)
 21 |         print(
 22 |             """
 23 | BILLING PREFIX:
 24 |   This tool uses AWS's new Cost And Usage Report format for billing data. The
 25 |   following structure is expected in S3:
 26 | 
 27 |     PREFIX
 28 |     `- arbitraryReportName
 29 |        |- 20171001-20171101
 30 |        |  |- arbitraryReportName-Manifest.json
 31 |        |  |- bbe82960-6a1a-47fd-ae59-1e666e2f674a
 32 |        |  |  |- arbitraryReportName-Manifest.json
 33 |        |  |  |- arbitraryReportName-1.csv.gz
 34 |        |  |  `- ...
 35 |        |  `- ...
 36 |        `- ...
 37 | 
 38 |   You can get more information about this at
 39 |   https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/billing-reports-costusage.html""",
 40 |             file=file,
 41 |         )
 42 | 
 43 |     def error(self, message):
 44 |         print(message)
 45 |         self.print_help()
 46 |         sys.exit(2)
 47 | 
 48 | 
 49 | def parse_args():
 50 |     parser = Parser()
 51 |     parser.add_argument(
 52 |         "--no-clear-before",
 53 |         help="Do not clear all data before doing anything. Useful when a previous invocation failed or when you add data incrementally before generating the sheet.",
 54 |         dest="clear_before",
 55 |         action="store_false",
 56 |         default=True,
 57 |     )
 58 |     parser.add_argument(
 59 |         "--no-generate-xlsx",
 60 |         help="Do not generate a XLSX file after all data was retrieved.",
 61 |         dest="generate_xslx",
 62 |         action="store_false",
 63 |         default=True,
 64 |     )
 65 |     now = datetime.now()
 66 |     parser.add_argument(
 67 |         "--xlsx-name",
 68 |         help="Name of the XLSX file.",
 69 |         dest="xlsx_name",
 70 |         default=now.strftime("trackit_aws_cost_report_%Y_%m_%d"),
 71 |     )
 72 |     parser.add_argument(
 73 |         "--generate-gsheet",
 74 |         help="Generate a Google Sheet after all data was retrieved.",
 75 |         dest="generate_gsheet",
 76 |         action="store_true",
 77 |         default=False,
 78 |     )
 79 |     parser.add_argument(
 80 |         "--billing",
 81 |         help="Get billing data from s3:/BUCKET/PREFIX using PROFILE.",
 82 |         action="append",
 83 |         nargs=3,
 84 |         metavar=("PROFILE", "BUCKET", "PREFIX"),
 85 |         default=[],
 86 |     )
 87 |     parser.add_argument(
 88 |         "--ec2",
 89 |         help="Get EC2 data for PROFILE.",
 90 |         action="append",
 91 |         nargs=1,
 92 |         metavar="PROFILE",
 93 |         default=[],
 94 |     )
 95 |     return parser.parse_args(), parser
 96 | 
 97 | 
 98 | def try_mkdir(path):
 99 |     try:
100 |         os.mkdir(path)
101 |     except FileExistsError:
102 |         pass
103 | 
104 | 
105 | try_mkdir("in")
106 | try_mkdir("in/usagecost")
107 | try_mkdir("out")
108 | try_mkdir("out/reservation-usage")
109 | try_mkdir("out/instance-reservation-usage")
110 | try_mkdir("out/instance-size-recommendation")
111 | try_mkdir("out/instance-metadata")
112 | try_mkdir("out/last-month")
113 | try_mkdir("out/s3")
114 | 
115 | default_region = "us-east-1"
116 | 
117 | def awsenv(profile, region):
118 |     return "util/awsenv --profile {} --region {}".format(profile, region)
119 | 
120 | 
121 | def build_billing_diff():
122 |     os.system("src/get_bill_diff.py")
123 | 
124 | 
125 | def build_instance_history():
126 |     os.system("src/get_ec2_instance_history.py")
127 | 
128 | def build_ec2_last_month_usage():
129 |     os.system("src/get_last_month_ec2_cost.py")
130 | 
131 | def build_ebs_last_month_usage():
132 |     os.system("src/get_last_month_ebs_cost.py")
133 | 
134 | def build_s3_cost():
135 |     os.system("src/get_s3_cost.py")
136 | 
137 | def build_gsheet():
138 |     os.system("src/make_gsheet.py")
139 | 
140 | 
141 | def build_xlsx(name):
142 |     os.system("src/make_xlsx.py {}".format(name))
143 | 
144 | def get_session(profile):
145 |     if profile != 'env':
146 |         session = boto3.Session(profile_name=profile)
147 |     else:
148 |         session = boto3.Session()
149 |     return session
150 | 
151 | def do_get_billing_data(profile, bucket, prefix):
152 | 
153 |     nonce = hashlib.sha1("{}{}".format(bucket, prefix).encode()).hexdigest()[:12]
154 |     it = 1
155 |     concurrent_available = 4
156 |     concurrent_available_mutex = threading.Lock()
157 |     thread = []
158 | 
159 |     def change_concurrent_available(value):
160 |         nonlocal concurrent_available
161 |         nonlocal concurrent_available_mutex
162 | 
163 |         concurrent_available_mutex.acquire()
164 |         concurrent_available += value
165 |         concurrent_available_mutex.release()
166 | 
167 |     def save_to_file(s3_client, bucket, file_name, report_key):
168 |         try:
169 |             s3_client.download_file(Bucket=bucket, Key=report_key, Filename=file_name)
170 |         except Exception as e:
171 |             print(e)
172 |         finally:
173 |             change_concurrent_available(1)
174 | 
175 |     def analyze_report(s3_client, bucket, report_keys):
176 |         nonlocal it
177 |         nonlocal thread
178 |         nonlocal concurrent_available
179 |         for report_key in report_keys:
180 |             if concurrent_available <= 0:
181 |                 print("    Waiting to download {}...".format(report_key))
182 |             while concurrent_available <= 0:
183 |                 time.sleep(0.1)
184 |             file_name = "in/usagecost/{}.{}.csv.{}".format(nonce, it, report_key.split(".")[-1])
185 |             t = threading.Thread(name=report_key, target=save_to_file, args=(s3_client, bucket, file_name, report_key))
186 |             print("    Downloading {}...".format(report_key))
187 |             t.start()
188 |             change_concurrent_available(-1)
189 |             thread.append(t)
190 |             it += 1
191 | 
192 |     def analyze_obj(s3_client, objs):
193 |         total = len(objs)
194 |         current = 1
195 |         for obj in objs:
196 |             print("  Getting bill files from {} ({}/{})...".format(obj["Key"], current, total))
197 |             content = s3_client.get_object(Bucket=bucket, Key=obj["Key"])["Body"].read().decode("utf-8")
198 |             content_json = json.loads(content)
199 |             if "bucket" in content_json:
200 |                 analyze_report(s3_client, content_json["bucket"], content_json["reportKeys"])
201 |             current += 1
202 |         for t in thread:
203 |             t.join()
204 | 
205 |     def unzip_obj():
206 |         for file_name in os.listdir("in/usagecost"):
207 |             try:
208 |                 print("Extracting {}...".format(file_name))
209 |                 if file_name.startswith(nonce) and file_name.endswith(".zip"):
210 |                     with zipfile.ZipFile(os.path.join("in/usagecost", file_name), "r") as z:
211 |                         z.extractall("in/usagecost")
212 |                 elif file_name.startswith(nonce) and file_name.endswith(".gz"):
213 |                     with gzip.GzipFile(os.path.join("in/usagecost", file_name), "r") as z:
214 |                         with open(os.path.join("in/usagecost", file_name[:-3]), "wb+") as f:
215 |                             shutil.copyfileobj(z, f)
216 |             except Exception as e:
217 |                 print("Failed to extract {}: {}".format(file_name, e))
218 |             finally:
219 |                 os.remove(os.path.join("in/usagecost", file_name))
220 | 
221 |     try:
222 |         session = get_session(profile)
223 |         s3_client = session.client("s3")
224 |         page = s3_client.get_paginator("list_objects").paginate(Bucket=bucket, Prefix=prefix)
225 |         min_date = (datetime.now() + dateutil.relativedelta.relativedelta(months=-6)).replace(day=1).strftime('%Y%m%d')
226 |         objs = [
227 |             obj
228 |             for p in page
229 |             for obj in p["Contents"]
230 |             if obj["Key"].endswith(".json") and
231 |                 len(obj["Key"].split('/')) == 4 and
232 |                 obj["Key"].split('/')[-2] >= min_date
233 |         ]
234 |     except Exception as e:
235 |         exit(e)
236 |     analyze_obj(s3_client, objs)
237 |     unzip_obj()
238 | 
239 | 
240 | def do_get_instance_data(profile, region):
241 |     threads = []
242 |     for cmd in (
243 |             "{} src/get_ec2_recommendations.py".format(awsenv(profile, region)),
244 |             "{} src/get_ec2_metadata.py".format(awsenv(profile, region)),
245 |         ):
246 |         threads.append(threading.Thread(target=os.system, args=[cmd]))
247 |         threads[-1].start()
248 |     for t in threads:
249 |         t.join()
250 | 
251 | 
252 | def recursively_remove_file(path):
253 |     if os.path.isdir(path):
254 |         for f in os.listdir(path):
255 |             recursively_remove_file(os.path.join(path, f))
256 |     else:
257 |         os.remove(path)
258 | 
259 | 
260 | def clear_data():
261 |     for f in os.listdir("out"):
262 |         recursively_remove_file(os.path.join("out", f))
263 |     for f in os.listdir("in"):
264 |         f = os.path.join("in", f)
265 |         if not os.path.isdir(f) or (os.path.isdir(f) and f != "in/persistent"):
266 |             recursively_remove_file(f)
267 | 
268 | def get_regions(session):
269 |     client_region = session.region_name or default_region
270 |     client = session.client('ec2', region_name=client_region)
271 |     regions = client.describe_regions()
272 |     return [
273 |         region['RegionName']
274 |         for region in regions['Regions']
275 |     ]
276 | 
277 | def main():
278 |     args, parser = parse_args()
279 |     args.ec2 = [a[0] for a in args.ec2] if len(args.ec2) else []
280 |     # if len(args.billing) == 0 and len(args.ec2) == 0:
281 |     #     return parser.print_help()
282 |     if args.clear_before:
283 |         clear_data()
284 |     if not os.path.isfile("in/ondemandcosts.json"):
285 |         os.system("src/get_ec2_costs.sh")
286 |     for bill in args.billing:
287 |         print("Download billings for {}...".format(bill[0]))
288 |         do_get_billing_data(*bill)
289 |     if len(args.ec2):
290 |         session = get_session(args.ec2[0])
291 |         regions = get_regions(session)
292 |         threads = []
293 |         for region in regions:
294 |             print("Fetching ec2 data for all accounts in {}...".format(region))
295 |             cmd = "src/get_ec2_data.py --region {} --profile {}".format(region, ' '.join(args.ec2))
296 |             threads.append((region, threading.Thread(target=os.system, args=[cmd])))
297 |             threads[-1][1].start()
298 |         for t in threads:
299 |             t[1].join()
300 |             print("Fetched ec2 data for all accounts in {}".format(t[0]))
301 |         for ec in args.ec2:
302 |             threads = []
303 |             for region in regions:
304 |                 print("Fetching ec2 metadata for {} in {}...".format(ec, region))
305 |                 threads.append((region, threading.Thread(target=do_get_instance_data, args=(ec, region))))
306 |                 threads[-1][1].start()
307 |             for t in threads:
308 |                 t[1].join()
309 |                 print("Fetched ec2 metadata for {} in {}".format(ec, t[0]))
310 |     if args.generate_gsheet or args.generate_xslx:
311 |         fcts = [
312 |             ("billing diff", build_billing_diff),
313 |             ("instance history", build_instance_history),
314 |             ("ec2 last month", build_ec2_last_month_usage),
315 |             ("ebs last month", build_ebs_last_month_usage),
316 |             ("s3 cost", build_s3_cost)
317 |         ]
318 |         for i, fct in zip(itertools.count(1), fcts):
319 |             print("Processing billing data ({}/{} - {})...".format(i, len(fcts), fct[0]))
320 |             fct[1]()
321 |         if args.generate_gsheet:
322 |             build_gsheet()
323 |         if args.generate_xslx:
324 |             build_xlsx(args.xlsx_name)
325 | 
326 | 
327 | if __name__ == "__main__":
328 |     main()
329 | 


--------------------------------------------------------------------------------
/src/get_bill_diff.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import csv
 4 | import sys
 5 | import collections
 6 | import datetime
 7 | import itertools
 8 | import os
 9 | 
10 | DIR_USAGECOST='in/usagecost'
11 | OUT_MONTHS='out/months.csv'
12 | OUT_ABSOLUTE='out/absolute.csv'
13 | 
14 | def rows_folder(dirpath):
15 |     for filename in os.listdir(dirpath):
16 |         filepath = os.path.join(dirpath, filename)
17 |         with open(filepath) as f:
18 |             for row in rows(f):
19 |                 yield row
20 | 
21 | def parseIsoDatetime(isodatetime):
22 |     return datetime.datetime.strptime(isodatetime.replace('Z', '+0000'), '%Y-%m-%dT%H:%M:%S%z')
23 | 
24 | def rows(csvfile):
25 |     reader = csv.DictReader(csvfile)
26 |     for row in reader:
27 |         yield row
28 | 
29 | def window(l, n=2):
30 |     for i in range(len(l) - n + 1):
31 |         yield l[i:i+n]
32 | 
33 | def variations(costs):
34 |     return [
35 |         "" if old <= .0 else (new / old) - 1
36 |         for [old, new] in window([0, *costs])
37 |     ]
38 | 
39 | def yearmonth_to_string(yearmonth):
40 |     year, month = divmod(yearmonth, 12)
41 |     return '{:04d}-{:02d}'.format(year, month + 1)
42 | 
43 | first_month = -1
44 | last_month = -1
45 | breakdown = collections.defaultdict(float)
46 | 
47 | for row in rows_folder(DIR_USAGECOST):
48 |     usage_start_date = parseIsoDatetime(row['lineItem/UsageStartDate'])
49 |     month = usage_start_date.year * 12 + usage_start_date.month - 1
50 |     first_month = month if first_month == -1 else min(month, first_month)
51 |     last_month = month if last_month == -1 else max(month, last_month)
52 |     usagetype = row['lineItem/UsageType']
53 |     # do not process if it is a line item for AWS support
54 |     if row['lineItem/ProductCode'] == 'AWSSupportBusiness':
55 |         continue
56 |     try:
57 |         breakdown[(month, usagetype)] += float(row['lineItem/UnblendedCost'])# if row['lineItem/UnblendedCost'] else 0.0
58 |     except:
59 |         print(row, file=sys.stderr)
60 |         print(month, file=sys.stderr)
61 |         print(usagetype, file=sys.stderr)
62 | 
63 | all_months = sorted(set(k[0] for k in breakdown.keys()))
64 | preserved_months = all_months[-12:]
65 | first_month = preserved_months[0]
66 | last_month = preserved_months[-1]
67 | breakdown = {
68 |     (month, usagetype): value
69 |     for (month, usagetype), value in breakdown.items()
70 |     if month in preserved_months
71 | }
72 | 
73 | with open(OUT_MONTHS, 'w') as monthsfile:
74 |     writer = csv.writer(monthsfile)
75 |     writer.writerow(['month', 'usage', 'cost'])
76 |     for key, value in breakdown.items():
77 |         writer.writerow([*key, value])
78 | 
79 | #with open('months.csv') as monthsfile:
80 | #    for row in rows(monthsfile):
81 | #        breakdown[(int(row['month']), row['usage'])] = float(row['cost'])
82 | #        first_month = int(row['month']) if first_month == -1 else min(int(row['month']), first_month)
83 | #        last_month = int(row['month']) if last_month == -1 else max(int(row['month']), last_month)
84 | 
85 | breakdown_by_date = collections.defaultdict(lambda: list([.0] * (last_month - first_month + 1)))
86 | for (month, product), cost in breakdown.items():
87 |     breakdown_by_date[product][month - first_month] += cost
88 | 
89 | with open(OUT_ABSOLUTE, 'w') as f:
90 |     writer = csv.writer(f)
91 |     writer.writerow(['usage'] + [yearmonth_to_string(ym) for ym in range(first_month, last_month + 1)])
92 |     for product, month_cost in breakdown_by_date.items():
93 |         writer.writerow([product, *month_cost])
94 | 
95 | breakdown_variation = {}
96 | for (product, monthly_costs) in breakdown_by_date.items():
97 |     breakdown_variation[product] = variations(monthly_costs)
98 | 


--------------------------------------------------------------------------------
/src/get_ec2_costs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | curl 'https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/index.json' | \
 4 | jq '
 5 | [
 6 | 	.terms.OnDemand as $terms
 7 | 	| .products
 8 | 	| .[]
 9 | 	| select(.productFamily == "Compute Instance")
10 | 	| select($terms[.sku])
11 | 	| (
12 | 		. +
13 | 		{ cost: [
14 | 			$terms[.sku]
15 | 			| .[]][0]
16 | 			| [.priceDimensions | .[]][0]
17 | 			| .pricePerUnit.USD |
18 | 			tonumber
19 | 		}
20 | 	)
21 | ]' > in/ondemandcosts.json
22 | 


--------------------------------------------------------------------------------
/src/get_ec2_data.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import collections
  4 | import csv
  5 | import itertools
  6 | import json
  7 | import re
  8 | import argparse
  9 | import multiprocessing.pool
 10 | from pprint import pprint
 11 | import boto3
 12 | import botocore
 13 | 
 14 | from mytypes import *
 15 | 
 16 | compute_sheet_region = {
 17 |     'us-east-2': 'US East (Ohio)',
 18 |     'us-east-1': 'US East (N. Virginia)',
 19 |     'us-west-1': 'US West (N. California)',
 20 |     'us-west-2': 'US West (Oregon)',
 21 |     'ap-northeast-1': 'Asia Pacific (Tokyo)',
 22 |     'ap-northeast-2': 'Asia Pacific (Seoul)',
 23 |     'ap-south-1': 'Asia Pacific (Mumbai)',
 24 |     'ap-southeast-1': 'Asia Pacific (Singapore)',
 25 |     'ap-southeast-2': 'Asia Pacific (Sydney)',
 26 |     'ca-central-1': 'Canada (Central)',
 27 |     'cn-north-1': 'China (Beijing)',
 28 |     'cn-northwest-1': 'China (Ningxia)',
 29 |     'eu-central-1': 'EU (Frankfurt)',
 30 |     'eu-west-1': 'EU (Ireland)',
 31 |     'eu-west-2': 'EU (London)',
 32 |     'eu-west-3': 'EU (Paris)',
 33 |     'sa-east-1': 'South America (Sao Paulo)',
 34 | }
 35 | 
 36 | compute_sheet_tenancy = {
 37 |     'dedicated': 'Dedicated',
 38 |     'host': 'Host',
 39 |     'default': 'Shared',
 40 | }
 41 | 
 42 | compute_sheet_platform = {
 43 |     'Linux/UNIX': 'Linux',
 44 |     'Linux/UNIX (Amazon VPC)': 'Linux',
 45 |     'SUSE Linux': 'SUSE',
 46 |     'SUSE Linux (Amazon VPC)': 'SUSE',
 47 |     'Red Hat Enterprise Linux': 'RHEL',
 48 |     'Red Hat Enterprise Linux (Amazon VPC)': 'RHEL',
 49 |     'Windows': 'Windows',
 50 |     'windows': 'Windows',
 51 |     'Windows (Amazon VPC)': 'Windows',
 52 |     'windows (Amazon VPC)': 'Windows',
 53 |     'Windows with SQL Server Standard': 'Windows',
 54 |     'windows with SQL Server Standard': 'Windows',
 55 |     'Windows with SQL Server Standard (Amazon VPC)': 'Windows',
 56 |     'windows with SQL Server Standard (Amazon VPC)': 'Windows',
 57 |     'Windows with SQL Server Web': 'Windows',
 58 |     'windows with SQL Server Web': 'Windows',
 59 |     'Windows with SQL Server Web (Amazon VPC)': 'Windows',
 60 |     'windows with SQL Server Web (Amazon VPC)': 'Windows',
 61 |     'Windows with SQL Server Enterprise': 'Windows',
 62 |     'windows with SQL Server Enterprise': 'Windows',
 63 |     'Windows with SQL Server Enterprise (Amazon VPC)': 'Windows',
 64 |     'windows with SQL Server Enterprise (Amazon VPC)': 'Windows',
 65 | }
 66 | 
 67 | DIR_BILLS = 'in/usagecost'
 68 | DIR_INSTANCE_RESERVATION_USAGE = 'out/instance-reservation-usage'
 69 | DIR_RESERVATION_USAGE = 'out/reservation-usage'
 70 | FIL_ONDEMAND_COSTS = 'in/ondemandcosts.json'
 71 | 
 72 | with open(FIL_ONDEMAND_COSTS) as f:
 73 |     compute_instance_costs = json.load(f)
 74 | 
 75 | _az_to_region_re = re.compile(r'^(.+?)[a-z]?$')
 76 | 
 77 | 
 78 | def az_to_region(az):
 79 |     return _az_to_region_re.match(az).group(1)
 80 | 
 81 | 
 82 | def identity(x):
 83 |     return x
 84 | 
 85 | 
 86 | boto_sessions = {}
 87 | 
 88 | 
 89 | def boto_session_getter(profile, region):
 90 |     global boto_sessions
 91 |     if (profile, region) in boto_sessions:
 92 |         return boto_sessions[(profile, region)]
 93 |     session = boto3.Session(profile_name=profile, region_name=region)
 94 |     ec2 = session.client('ec2')
 95 |     boto_sessions[(profile, region)] = ec2
 96 |     return ec2
 97 | 
 98 | 
 99 | def reserved_instance_offering_cost_per_hour(offering):
100 |     return offering['FixedPrice'] / (offering['Duration'] / 3600) + (
101 |         offering['RecurringCharges'][0]['Amount'] if len(
102 |             offering['RecurringCharges']) > 0 else 0.0)
103 | 
104 | 
105 | def get_reserved_instances(ec2, region):
106 |     reserved_instances_data = ec2.describe_reserved_instances(
107 |         Filters=[
108 |             {
109 |                 'Name': 'state',
110 |                 'Values': [
111 |                     'active',
112 |                 ]
113 |             },
114 |         ],
115 |     )
116 |     return [
117 |         InstanceReservationCount(
118 |             instance_reservation=InstanceReservation(
119 |                 type=InstanceType(
120 |                     size=ri['InstanceType'],
121 |                     availability_zone=ri['AvailabilityZone'] if ri[
122 |                                                                     'Scope'] == 'Availability Zone' else region,
123 |                     tenancy=ri['InstanceTenancy'],
124 |                     product=ri['ProductDescription'],
125 |                     vpc=ri['ProductDescription'].endswith("(Amazon VPC)"),
126 |                 ),
127 |                 cost_hourly=sum(rc['Amount'] for rc in ri['RecurringCharges']),
128 |                 cost_upfront=ri['FixedPrice'],
129 |             ),
130 |             count=ri['InstanceCount'],
131 |             count_used=0,
132 |         )
133 |         for ri in reserved_instances_data['ReservedInstances']
134 |     ]
135 | 
136 | 
137 | def get_ondemand_instance_types(ec2, profile):
138 |     def get_instance_type(instance_type):
139 |         if instance_type == "windows":
140 |             return "Windows"
141 |         return instance_type
142 | 
143 |     instance_paginator = ec2.get_paginator('describe_instances')
144 |     pages = instance_paginator.paginate(
145 |         Filters=[
146 |             {
147 |                 'Name': 'instance-state-name',
148 |                 'Values': [
149 |                     'pending',
150 |                     'running',
151 |                 ],
152 |             },
153 |             {
154 |                 'Name': 'tenancy',
155 |                 'Values': [
156 |                     'dedicated',
157 |                     'default',
158 |                 ],
159 |             }
160 |         ]
161 |     )
162 |     reservations = itertools.chain.from_iterable(
163 |         p['Reservations'] for p in pages)
164 |     instances = itertools.chain.from_iterable(
165 |         r['Instances'] for r in reservations)
166 |     return [
167 |         InstanceTypeWithProfile(
168 |             profile=profile,
169 |             instance_type=InstanceType(
170 |                 size=i['InstanceType'],
171 |                 availability_zone=i['Placement']['AvailabilityZone'],
172 |                 tenancy=i['Placement']['Tenancy'],
173 |                 product=get_instance_type(i.get('Platform', 'Linux/UNIX')),
174 |                 vpc=i.get('VpcId', '') != '',
175 |             )
176 |         )
177 |         for i in instances
178 |         if i.get('InstanceLifecycle', 'ondemand') == 'ondemand'
179 |     ]
180 | 
181 | 
182 | def get_ec2_type_offerings(ec2, instance_type):
183 |     offerings = itertools.chain.from_iterable(
184 |         page['ReservedInstancesOfferings']
185 |         for page in
186 |         ec2.get_paginator('describe_reserved_instances_offerings').paginate(
187 |             IncludeMarketplace=False,
188 |             InstanceTenancy=instance_type.tenancy,
189 |             ProductDescription=instance_type.product,
190 |             Filters=[
191 |                 {
192 |                     'Name': 'instance-type',
193 |                     'Values': [instance_type.size],
194 |                 },
195 |             ],
196 |         )
197 |     )
198 |     try:
199 |         offerings = sorted(offerings,
200 |                            key=reserved_instance_offering_cost_per_hour)
201 |     except botocore.exceptions.ClientError:
202 |         # Handling api limits
203 |         return get_ec2_type_offerings(ec2, instance_type)
204 |     try:
205 |         offering_best = offerings[0]
206 |         offering_worst = offerings[-1]
207 |     except IndexError:
208 |         return None
209 |     ondemand = next(
210 |         c
211 |         for c in compute_instance_costs
212 |         if (
213 |                 c['attributes']['instanceType'] == instance_type.size
214 |                 and c['attributes']['location'] == compute_sheet_region.get(
215 |             az_to_region(instance_type.availability_zone),
216 |             az_to_region(instance_type.availability_zone))
217 |                 and c['attributes']['tenancy'] == compute_sheet_tenancy[
218 |                     instance_type.tenancy]
219 |                 and c['attributes']['operatingSystem'] ==
220 |                 compute_sheet_platform[instance_type.product]
221 |         )
222 |     )['cost']
223 |     res = InstanceOffering(
224 |         type=instance_type,
225 |         cost_reserved_worst=reserved_instance_offering_cost_per_hour(
226 |             offering_worst),
227 |         cost_reserved_best=reserved_instance_offering_cost_per_hour(
228 |             offering_best),
229 |         cost_ondemand=ondemand,
230 |     )
231 |     return res
232 | 
233 | 
234 | def instance_type_matches(pattern, example):
235 |     def get_generic_type(instancetype):
236 |         if instancetype.lower().startswith(
237 |                 'windows') or instancetype.lower().startswith('suse'):
238 |             return instancetype
239 |         return 'Linux/UNIX'
240 | 
241 |     tmpPattern = pattern.type._replace(
242 |         product=get_generic_type(pattern.type.product))
243 |     tmpExample = example._replace(
244 |         product=get_generic_type(pattern.type.product))
245 |     if example.vpc == True:
246 |         return (tmpPattern == example or tmpPattern == example._replace(
247 |             vpc=False) or
248 |                 tmpPattern == tmpExample._replace(vpc=False,
249 |                                                   availability_zone=az_to_region(
250 |                                                       example.availability_zone)) or
251 |                 tmpPattern == tmpExample._replace(
252 |                     availability_zone=az_to_region(example.availability_zone)))
253 |     else:
254 |         return (tmpPattern == example or tmpPattern == example._replace(
255 |             availability_zone=az_to_region(example.availability_zone)) or
256 |                 tmpPattern == tmpExample._replace(vpc=True) or
257 |                 tmpPattern == tmpExample._replace(vpc=True,
258 |                                                   availability_zone=az_to_region(
259 |                                                       example.availability_zone)))
260 | 
261 | 
262 | def get_instance_matchings(offerings, reservations):
263 |     instance_offerings_counted = [
264 |         InstanceOfferingCount(
265 |             instance_offering=instance_offering,
266 |             count=count,
267 |             count_reserved=0,
268 |         )
269 |         for instance_offering, count in offerings.items()
270 |     ]
271 |     remaining_reserved_instances = [
272 |         [ri, count]
273 |         for ri, count in reservations.items()
274 |     ]
275 |     matched_instances = []
276 |     for oi in sorted(instance_offerings_counted, reverse=True,
277 |                      key=lambda x: x.instance_offering.type.availability_zone[
278 |                                    ::-1]):
279 |         matching_reserved = (
280 |             rri
281 |             for rri in sorted(remaining_reserved_instances, reverse=True,
282 |                               key=lambda i: i[0].type.availability_zone[::-1])
283 |             if rri[1] > 0 and instance_type_matches(rri[0], oi.instance_offering.type)
284 |         )
285 |         reserved = 0
286 |         while reserved < oi.count:
287 |             try:
288 |                 ri = next(matching_reserved)
289 |             except StopIteration:
290 |                 break
291 |             use = min(ri[1], oi.count - reserved)
292 |             ri[1] -= use
293 |             reserved += use
294 |         matched_instances.append(oi._replace(count_reserved=reserved))
295 |     reservation_usage = [
296 |         InstanceReservationCount(
297 |             instance_reservation=ri,
298 |             count=reservations[ri],
299 |             count_used=reservations[ri] - remaining,
300 |         )
301 |         for [ri, remaining] in remaining_reserved_instances
302 |     ]
303 |     return matched_instances, reservation_usage
304 | 
305 | 
306 | def get_ec2_reservations(profiles, region):
307 |     reservations = collections.defaultdict(int)
308 |     for profile in profiles:
309 |         print('[{} - {}] Getting reserved instances...'.format(profile, region))
310 |         ec2 = boto_session_getter(profile, region)
311 |         reserved_instances = get_reserved_instances(ec2, region)
312 |         for ri in reserved_instances:
313 |             reservations[ri.instance_reservation] += ri.count
314 |     return reservations
315 | 
316 | 
317 | def get_ec2_instances(profiles, region):
318 |     instances = collections.defaultdict(int)
319 |     for profile in profiles:
320 |         print('[{} - {}] Getting on-demand instances...'.format(profile, region))
321 |         ec2 = boto_session_getter(profile, region)
322 |         instance_types = get_ondemand_instance_types(ec2, profile)
323 |         for it in instance_types:
324 |             instances[it] += 1
325 |     return instances
326 | 
327 | 
328 | def get_ec2_offerings(instances, region, profiles):
329 |     with multiprocessing.pool.ThreadPool(processes=4) as pool:
330 |         offerings = collections.defaultdict(int)
331 |         tasks = []
332 |         print('[global - {}] Getting offerings for all instances...'.format(region))
333 |         for instance, count in instances.items():
334 |             ec2 = boto_session_getter(instance.profile, region)
335 |             tasks.append({
336 |                 'profile': [instance.profile],
337 |                 'remaining_profiles': [p for p in profiles if p != instance.profile],
338 |                 'instance_type': instance.instance_type,
339 |                 'instance_count': count,
340 |                 'task': pool.apply_async(get_ec2_type_offerings,
341 |                                          [ec2, instance.instance_type]),
342 |             })
343 |         for i, task in zip(itertools.count(1), tasks):
344 |             if len(task['profile']) == 1:
345 |                 print('[{} - {}] Getting offerings for instance {}/{}...'.format(
346 |                     task['profile'][0], region, i, len(instances)))
347 |             offering = task['task'].get()
348 |             if offering:
349 |                 offerings[offering] += task['instance_count']
350 |             elif len(task['remaining_profiles']):
351 |                 ec2 = boto_session_getter(task['remaining_profiles'][0], region)
352 |                 new_task = task.copy()
353 |                 new_task['task'] = pool.apply_async(get_ec2_type_offerings, [ec2, new_task['instance_type']])
354 |                 new_task['profile'].append(new_task['remaining_profiles'][0])
355 |                 new_task['remaining_profiles'].pop(0)
356 |                 tasks.append(new_task)
357 |     return offerings
358 | 
359 | 
360 | def get_ec2_data(profiles, region):
361 |     reservations = get_ec2_reservations(profiles, region)
362 |     instances = get_ec2_instances(profiles, region)
363 |     offerings = get_ec2_offerings(instances, region, profiles)
364 |     print('[global - {}] Matching on-demand instances with reserved instances...'.format(region))
365 |     matched_instances, reservation_usage = get_instance_matchings(offerings,
366 |                                                                   reservations)
367 |     print('[global - {}] Done!'.format(region))
368 |     return matched_instances, reservation_usage
369 | 
370 | 
371 | def write_matched_instances(f, matched_instances, header=True):
372 |     writer = csv.DictWriter(f, fieldnames=[
373 |         'instance_type',
374 |         'availability_zone',
375 |         'tenancy',
376 |         'product',
377 |         'cost_ondemand',
378 |         'cost_reserved_worst',
379 |         'cost_reserved_best',
380 |         'count',
381 |         'count_reserved',
382 |     ])
383 |     if header:
384 |         writer.writeheader()
385 |     for mi in matched_instances:
386 |         writer.writerow({
387 |             'instance_type': mi.instance_offering.type.size,
388 |             'availability_zone': mi.instance_offering.type.availability_zone,
389 |             'tenancy': mi.instance_offering.type.tenancy,
390 |             'product': mi.instance_offering.type.product,
391 |             'cost_ondemand': mi.instance_offering.cost_ondemand,
392 |             'cost_reserved_worst': mi.instance_offering.cost_reserved_worst,
393 |             'cost_reserved_best': mi.instance_offering.cost_reserved_best,
394 |             'count': mi.count,
395 |             'count_reserved': mi.count_reserved,
396 |         })
397 | 
398 | 
399 | def write_reservation_usage(f, reservation_usage, header=True):
400 |     writer = csv.DictWriter(f, fieldnames=[
401 |         'instance_type',
402 |         'availability_zone',
403 |         'tenancy',
404 |         'product',
405 |         'cost_hourly',
406 |         'cost_upfront',
407 |         'count',
408 |         'count_used',
409 |     ])
410 |     if header:
411 |         writer.writeheader()
412 |     for ru in reservation_usage:
413 |         writer.writerow({
414 |             'instance_type': ru.instance_reservation.type.size,
415 |             'availability_zone': ru.instance_reservation.type.availability_zone,
416 |             'tenancy': ru.instance_reservation.type.tenancy,
417 |             'product': ru.instance_reservation.type.product,
418 |             'cost_hourly': ru.instance_reservation.cost_hourly,
419 |             'cost_upfront': ru.instance_reservation.cost_upfront,
420 |             'count': ru.count,
421 |             'count_used': ru.count_used,
422 |         })
423 | 
424 | 
425 | if __name__ == '__main__':
426 |     parser = argparse.ArgumentParser()
427 |     parser.add_argument('--region', help='aws region', required=True)
428 |     parser.add_argument('--profile', help='aws profile', required=True, nargs='+')
429 |     args = parser.parse_args()
430 |     matched_instances, reservation_usage = get_ec2_data(args.profile,
431 |                                                         args.region)
432 |     with open('{}/{}.csv'.format(DIR_INSTANCE_RESERVATION_USAGE, args.region),
433 |               'w') as f:
434 |         write_matched_instances(f, matched_instances)
435 |     with open('{}/{}.csv'.format(DIR_RESERVATION_USAGE, args.region), 'w') as f:
436 |         write_reservation_usage(f, reservation_usage)
437 | 


--------------------------------------------------------------------------------
/src/get_ec2_instance_history.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import collections
 4 | import csv
 5 | import functools
 6 | import itertools
 7 | import json
 8 | 
 9 | import utils
10 | 
11 | USAGECOST_DIR='in/usagecost'
12 | OUT_PATH = 'out/instance-history.csv'
13 | USAGE = 'BoxUsage'
14 | 
15 | def updated(base, addend):
16 |     base = base.copy()
17 |     base.update(addend)
18 |     return base
19 | 
20 | with utils.csv_folder(USAGECOST_DIR) as records:
21 |     box_usage_records = (
22 |         record
23 |         for record in records
24 |         if USAGE in record['lineItem/UsageType']
25 |     )
26 |     simplified_lineitems = (
27 |         (
28 |             record['product/instanceType'],
29 |             round(float(record['lineItem/UsageAmount'])) if record['lineItem/UsageAmount'] else 0,
30 |             record['lineItem/UsageStartDate'])
31 |         for record in box_usage_records
32 |     )
33 | 
34 |     histogram = {
35 |         date: updated(
36 |             collections.defaultdict(int),
37 |             {
38 |                 instancetype: sum(
39 |                     lineitem[1] for lineitem in instancetype_lineitems
40 |                 )
41 |                 for instancetype, instancetype_lineitems in itertools.groupby(
42 |                     sorted(
43 |                         date_lineitems,
44 |                         key=lambda x: x[0],
45 |                     ),
46 |                     key=lambda x: x[0],
47 |                 )
48 |             }
49 |         )
50 |         for date, date_lineitems in itertools.groupby(
51 |             sorted(
52 |                 simplified_lineitems,
53 |                 key=lambda x: x[2],
54 |             ),
55 |             key=lambda x: x[2],
56 |         )
57 |     }
58 | 
59 | instance_types = sorted(
60 |     functools.reduce(
61 |         lambda x, y: x.union(y),
62 |         (
63 |             date.keys()
64 |             for date in histogram.values()
65 |         ),
66 |         frozenset(),
67 |     )
68 | )
69 | 
70 | with open(OUT_PATH, 'w') as outfile:
71 |     writer = csv.DictWriter(outfile, fieldnames=['date', *instance_types])
72 |     writer.writeheader()
73 |     for date in sorted(histogram.keys()):
74 |         writer.writerow(updated(
75 |             collections.defaultdict(int),
76 |             { 'date': date, **histogram[date] }
77 |         ))
78 | 


--------------------------------------------------------------------------------
/src/get_ec2_metadata.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import boto3
 4 | import itertools
 5 | import collections
 6 | import csv
 7 | import sys
 8 | import datetime
 9 | import json
10 | import collections
11 | import re
12 | from dateutil.tz import tzutc
13 | 
14 | DIR_INSTANCE_METADATA = 'out/instance-metadata'
15 | 
16 | REGION = boto3._get_default_session().region_name
17 | ACCOUNT = boto3.client('sts').get_caller_identity()['Account']
18 | 
19 | 
20 | def safe_list_get(l, idx, default):
21 |     try:
22 |         return l[idx]
23 |     except IndexError:
24 |         return default
25 | 
26 | 
27 | def get_ec2_metadata(ec2, region):
28 |     print("[{} - {}] Getting instances metadata...".format(ACCOUNT, REGION))
29 |     instances_pag = ec2.get_paginator('describe_instances')
30 |     metadata = [
31 |         {
32 |             'instance_id': i.get('InstanceId', ''),
33 |             'name': safe_list_get([v['Value'] for v in i.get('Tags', []) if v['Key'] == 'Name'], 0, ''),
34 |             'ebs': ','.join([e.get('Ebs', {}).get('VolumeId', '') for e in i.get('BlockDeviceMappings', [])]),
35 |         }
36 |         for p in instances_pag.paginate()
37 |         for r in p['Reservations']
38 |         for i in r['Instances']
39 |     ]
40 |     print('[{} - {}] Done!'.format(ACCOUNT, REGION))
41 |     return metadata
42 | 
43 | 
44 | def write_instances_metadata(f, reservation_usage):
45 |     writer = csv.DictWriter(f, fieldnames=[
46 |         'instance_id',
47 |         'name',
48 |         'ebs',
49 |     ])
50 |     writer.writeheader()
51 |     for m in metadata:
52 |         writer.writerow(m)
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     ec2 = boto3.client('ec2')
57 |     metadata = get_ec2_metadata(
58 |         ec2, REGION)
59 |     with open('{}/{}.{}.csv'.format(DIR_INSTANCE_METADATA, ACCOUNT, REGION), 'w') as f:
60 |         write_instances_metadata(f, metadata)
61 | 


--------------------------------------------------------------------------------
/src/get_ec2_recommendations.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import collections
  4 | import datetime
  5 | import re
  6 | import csv
  7 | 
  8 | import boto3
  9 | 
 10 | # Normalization factors can be found at
 11 | # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ri-modifying.html#ri-modification-instancemove
 12 | # Authorized family can be found at
 13 | # https://aws.amazon.com/ec2/pricing/on-demand
 14 | INSTANCE_META = collections.OrderedDict([
 15 |     ('nano'     , [1      , ["t2"]])                                                                                                 ,
 16 |     ('micro'    , [2      , ["t2", "t1"]])                                                                                           ,
 17 |     ('small'    , [1 * 4  , ["t2", "m1"]])                                                                                           ,
 18 |     ('medium'   , [2 * 4  , ["t2", "m1", "m3", "c1"]])                                                                               ,
 19 |     ('large'    , [4 * 4  , ["t2", "m5", "m4", "c5", "c4", "r4", "i3", "m1", "m3", "c3", "r3"]])                                     ,
 20 |     ('xlarge'   , [8 * 4  , ["t2", "m5", "m4", "c5", "c4", "p2", "x1e", "r4", "i3", "d2", "m1", "m3", "c1", "c3", "m2", "r3", "i2"]]),
 21 |     ('2xlarge'  , [16 * 4 , ["t2", "m5", "m4", "c5", "c4", "p3", "x1e", "r4", "i3", "h1", "d2", "m3", "c3", "g2", "m2", "r3", "i2"]]),
 22 |     ('4xlarge'  , [32 * 4 , ["m5", "m4", "c5", "c4", "g3", "x1e", "r4", "i3", "h1", "d2", "c3", "m2", "r3", "i2"]])                  ,
 23 |     ('8xlarge'  , [64 * 4 , ["c4", "p2", "p3", "g3", "x1e", "r4", "i3", "h1", "d2", "cc2", "c3", "g2", "cr1", "r3", "i2", "hs1"]])   ,
 24 |     ('9xlarge'  , [72 * 4 , ["c5"]])                                                                                                 ,
 25 |     ('10xlarge' , [80 * 4 , ["m4"]])                                                                                                 ,
 26 |     ('12xlarge' , [96 * 4 , ["m5"]])                                                                                                 ,
 27 |     ('16xlarge' , [128 * 4, ["m4", "p2", "p3", "g3", "x1", "x1e", "r4", "i3", "h1"]])                                                ,
 28 |     ('18xlarge' , [144 * 4, ["c5"]])                                                                                                 ,
 29 |     ('24xlarge' , [192 * 4, ["m5"]])                                                                                                 ,
 30 |     ('32xlarge' , [256 * 4, ["x1", "x1e"]])                                                                                          ,
 31 | ])
 32 | 
 33 | TARGET_CPU_USAGE = 0.80
 34 | CPU_USAGE_INTERVAL = datetime.timedelta(hours=24)
 35 | CPU_USAGE_INTERVAL_SECOND = CPU_USAGE_INTERVAL.days * 24 * 3600 + CPU_USAGE_INTERVAL.seconds
 36 | DIR_RECOMMENDATION = 'out/instance-size-recommendation'
 37 | 
 38 | REGION=boto3._get_default_session().region_name
 39 | ACCOUNT=boto3.client('sts').get_caller_identity()['Account']
 40 | 
 41 | InstanceSize = collections.namedtuple('InstanceSize', ['family', 'size'])
 42 | InstanceRecommendation = collections.namedtuple('InstanceRecommendation', [
 43 |     'account',
 44 |     'id',
 45 |     'name',
 46 |     'size',
 47 |     'lifecycle',
 48 |     'cpu_usage',
 49 |     'recommendation',
 50 |     'saving',
 51 |     'reason',
 52 | ])
 53 | 
 54 | def next_or(it, default):
 55 |     try:
 56 |         return next(it)
 57 |     except StopIteration:
 58 |         return default
 59 | 
 60 | def next_or_none(it):
 61 |     return next_or(it, None)
 62 | 
 63 | _str_to_instance_size_re = re.compile(r'([a-z]+[0-9])\.(nano|micro|small|medium|(?:[0-9]*x?large))')
 64 | def str_to_instance_size(s):
 65 |     m = _str_to_instance_size_re.match(s)
 66 |     if m:
 67 |         return InstanceSize(
 68 |             family=m.group(1),
 69 |             size=m.group(2),
 70 |         )
 71 | 
 72 | def instance_size_to_str(instance_size):
 73 |     return '{}.{}'.format(*instance_size)
 74 | 
 75 | def recommended_size(instance_type, cpu_usage):
 76 |     current_norm_factor = INSTANCE_META[instance_type.size][0]
 77 |     cpu_delta = cpu_usage / TARGET_CPU_USAGE
 78 |     target_norm_factor = cpu_delta * current_norm_factor
 79 |     matching_norm_factor = next(size for size, meta in INSTANCE_META.items() if meta[0] >= target_norm_factor and instance_type.family in meta[1])
 80 |     return matching_norm_factor
 81 | 
 82 | def get_reason(cpu_usage, current_size, recommendation):
 83 |     if cpu_usage is None:
 84 |         return 'insufficient_data'
 85 |     elif cpu_usage > 0.80:
 86 |         return 'High CPU usage average: {0:.3f}%'.format(cpu_usage*100)
 87 |     elif current_size == recommendation:
 88 |         return 'Optimal CPU usage average'
 89 |     return 'Low CPU usage average: {0:.3f}%'.format(cpu_usage*100)
 90 | 
 91 | def get_saving(cpu_usage, current_size, recommendation):
 92 |     current_norm_factor = INSTANCE_META[current_size][0]
 93 |     recommended_norm_factor = INSTANCE_META.get(recommendation, [0])[0]
 94 |     if cpu_usage is None or current_norm_factor == 0 or recommended_norm_factor == 0:
 95 |         return '0%'
 96 |     else:
 97 |         return '{0:.1f}%'.format(100 - ((recommended_norm_factor * 100) / current_norm_factor))
 98 | 
 99 | 
100 | def get_cpu_usage(cloudwatch, now, instance_id):
101 |     usage_statistics = cloudwatch.get_metric_statistics(
102 |         Namespace='AWS/EC2',
103 |         MetricName='CPUUtilization',
104 |         Dimensions=[
105 |             { 'Name': 'InstanceId', 'Value': instance_id },
106 |         ],
107 |         StartTime=now - CPU_USAGE_INTERVAL,
108 |         EndTime=now,
109 |         Period=CPU_USAGE_INTERVAL_SECOND,
110 |         Statistics=['Average']
111 |     )
112 |     try:
113 |         return usage_statistics['Datapoints'][0]['Average'] / 100
114 |     except IndexError:
115 |         return None
116 | 
117 | def get_recommendation(instance):
118 |         instance_type_str = instance['InstanceType']
119 |         instance_type = str_to_instance_size(instance_type_str)
120 |         instance_id = instance['InstanceId']
121 |         instance_name = next_or((tag['Value'] for tag in instance.get('Tags', []) if tag['Key'] == 'Name'), '')
122 |         instance_lifecycle = instance.get('InstanceLifecycle', 'ondemand')
123 |         cpu_usage = get_cpu_usage(cloudwatch, now, instance_id)
124 |         recommendation = recommended_size(instance_type, cpu_usage) if cpu_usage is not None else 'insufficient_data'
125 |         reason = get_reason(cpu_usage, instance_type.size, recommendation)
126 |         saving = get_saving(cpu_usage, instance_type.size, recommendation)
127 |         return InstanceRecommendation(
128 |             id=instance_id,
129 |             name=instance_name,
130 |             size=instance_type_str,
131 |             lifecycle=instance_lifecycle,
132 |             cpu_usage=cpu_usage or "",
133 |             recommendation=recommendation,
134 |             reason=reason,
135 |             saving=saving,
136 |             account=ACCOUNT,
137 |         )
138 | 
139 | def main(ec2, cloudwatch, now):
140 |     instances = (
141 |         instance
142 |         for page in ec2.get_paginator('describe_instances').paginate()
143 |         for reservation in page['Reservations']
144 |         for instance in reservation['Instances']
145 |     )
146 |     recommendations = (
147 |         get_recommendation(instance)
148 |         for instance in instances
149 |     )
150 |     recommendations = sorted(recommendations, key=lambda r: (r.name, r.size))
151 |     with open('{}/{}.{}.csv'.format(DIR_RECOMMENDATION, ACCOUNT, REGION), 'w') as f:
152 |         writer = csv.writer(f)
153 |         writer.writerow(InstanceRecommendation._fields)
154 |         for recommendation in recommendations:
155 |             writer.writerow(recommendation)
156 | 
157 | if __name__ == '__main__':
158 |     ec2 = boto3.client('ec2')
159 |     cloudwatch = boto3.client('cloudwatch')
160 |     now = datetime.datetime.now()
161 |     main(ec2, cloudwatch, now)
162 | 


--------------------------------------------------------------------------------
/src/get_last_month_ebs_cost.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import csv
 4 | from datetime import datetime
 5 | from collections import defaultdict
 6 | import dateutil.relativedelta
 7 | import json
 8 | import re
 9 | 
10 | import utils
11 | 
12 | USAGECOST_DIR='in/usagecost'
13 | METADATA_DIR='out/instance-metadata'
14 | OUT_PATH_EBS = 'out/last-month/ebs.csv'
15 | OUT_PATH_SNAPSHOTS = 'out/last-month/snapshots.csv'
16 | 
17 | BEGIN_LAST_MONTH = (datetime.now() + dateutil.relativedelta.relativedelta(months=-1)).replace(day=1, hour=0, minute=0, second=0, microsecond=0)
18 | END_LAST_MONTH = (BEGIN_LAST_MONTH + dateutil.relativedelta.relativedelta(months=1, days=-1)).replace(hour=23, minute=59, second=59, microsecond=999999)
19 | 
20 | with utils.csv_folder(METADATA_DIR) as records:
21 |     ebs_match = re.compile(r"((vol-[0-9a-fA-F]*),?)*")
22 |     ebs_links = defaultdict(lambda: ("",""))
23 |     for record in records:
24 |         for ebs in ebs_match.match(record['ebs']).group(0).split(','):
25 |             ebs_links[ebs] = (record['instance_id'], record['name'])
26 | 
27 | with utils.csv_folder(USAGECOST_DIR) as records:
28 |     resource_id_missing = False
29 |     ebs_usage_records = defaultdict(float)
30 |     snapshot_usage_records = defaultdict(float)
31 |     for record in records:
32 |         if 'lineItem/ResourceId' not in record:
33 |             if resource_id_missing == False:
34 |                 print("Error: the billing report does not export the ResourceId")
35 |                 resource_id_missing = True
36 |             continue
37 |         if 'EBS' in record['lineItem/UsageType'] and 'EBSOptimized' not in record['lineItem/UsageType']:
38 |             usage_start_date = datetime.strptime(record['lineItem/UsageStartDate'], '%Y-%m-%dT%H:%M:%SZ')
39 |             if usage_start_date >= BEGIN_LAST_MONTH and usage_start_date <= END_LAST_MONTH:
40 |                 if 'Snapshot' not in record['lineItem/UsageType']:
41 |                     ebs_usage_records[(record['lineItem/UsageAccountId'], record['lineItem/ResourceId'], record['product/region'])] += float(record['lineItem/UnblendedCost'])
42 |                 elif 'Snapshot' in record['lineItem/UsageType']:
43 |                     snapshot_usage_records[(record['lineItem/UsageAccountId'], record['lineItem/ResourceId'])] += float(record['lineItem/UnblendedCost'])
44 | 
45 | with open(OUT_PATH_EBS, 'w') as outfile:
46 |     writer = csv.writer(outfile)
47 |     writer.writerow(['Account', 'ResourceId', 'Region', 'Cost', 'InstanceId', 'InstanceName'])
48 |     for ebs in sorted(ebs_usage_records.keys(), key=lambda tup: ebs_usage_records[tup], reverse=True):
49 |         writer.writerow([
50 |             ebs[0],
51 |             ebs[1],
52 |             ebs[2],
53 |             repr(ebs_usage_records[ebs]),
54 |             ebs_links[ebs[1]][0],
55 |             ebs_links[ebs[1]][1],
56 |         ])
57 | 
58 | with open(OUT_PATH_SNAPSHOTS, 'w') as outfile:
59 |     writer = csv.writer(outfile)
60 |     writer.writerow(['Account', 'ResourceId', 'Cost'])
61 |     for rid in sorted(snapshot_usage_records.keys(), key=lambda rid: snapshot_usage_records[rid], reverse=True):
62 |         writer.writerow([
63 |             rid[0],
64 |             rid[1],
65 |             repr(snapshot_usage_records[rid]),
66 |         ])
67 | 


--------------------------------------------------------------------------------
/src/get_last_month_ec2_cost.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import csv
 4 | from datetime import datetime
 5 | from collections import defaultdict
 6 | import dateutil.relativedelta
 7 | 
 8 | import utils
 9 | 
10 | USAGECOST_DIR='in/usagecost'
11 | METADATA_DIR='out/instance-metadata'
12 | OUT_PATH_INSTANCES = 'out/last-month/ec2_instances.csv'
13 | OUT_PATH_BANDWIDTH = 'out/last-month/ec2_bandwidth.csv'
14 | 
15 | BEGIN_LAST_MONTH = (datetime.now() + dateutil.relativedelta.relativedelta(months=-1)).replace(day=1, hour=0, minute=0, second=0, microsecond=0)
16 | END_LAST_MONTH = (BEGIN_LAST_MONTH + dateutil.relativedelta.relativedelta(months=1, days=-1)).replace(hour=23, minute=59, second=59, microsecond=999999)
17 | 
18 | with utils.csv_folder(METADATA_DIR) as records:
19 |     instance_name = defaultdict(str)
20 |     for record in records:
21 |         instance_name[record['instance_id']] = record['name']
22 |     
23 | with utils.csv_folder(USAGECOST_DIR) as records:
24 |     resource_id_missing = False
25 |     instance_usage_records = defaultdict(float)
26 |     bandwidth_usage_records = defaultdict(float)
27 |     for record in records:
28 |         if 'lineItem/ResourceId' not in record:
29 |             if resource_id_missing == False:
30 |                 print("Error: the billing report does not export the ResourceId")
31 |                 resource_id_missing = True
32 |             continue
33 |         if record['lineItem/ProductCode'] == 'AmazonEC2':
34 |             usage_start_date = datetime.strptime(record['lineItem/UsageStartDate'], '%Y-%m-%dT%H:%M:%SZ')
35 |             if usage_start_date >= BEGIN_LAST_MONTH and usage_start_date <= END_LAST_MONTH:
36 |                 if 'BoxUsage' in record['lineItem/UsageType']:
37 |                     instance_usage_records[(record['lineItem/UsageAccountId'], record['lineItem/ResourceId'], record['lineItem/AvailabilityZone'], record['pricing/term'], record['product/instanceType'])] += float(record['lineItem/UnblendedCost'])
38 |                 elif 'DataTransfer' in record['lineItem/UsageType']:
39 |                     bandwidth_usage_records[record['lineItem/ResourceId']] += float(record['lineItem/UnblendedCost'])
40 | 
41 | with open(OUT_PATH_INSTANCES, 'w') as outfile:
42 |     writer = csv.writer(outfile)
43 |     writer.writerow(['Account', 'ResourceId', 'Name', 'AvailabilityZone', 'Term', 'Type', 'Cost'])
44 |     for instance in sorted(instance_usage_records.keys(), key=lambda tup: instance_usage_records[tup], reverse=True):
45 |         writer.writerow([
46 |             instance[0],
47 |             instance[1],
48 |             instance_name[instance[1]],
49 |             instance[2],
50 |             instance[3],
51 |             instance[4],
52 |             repr(instance_usage_records[instance]),
53 |         ])
54 | 
55 | with open(OUT_PATH_BANDWIDTH, 'w') as outfile:
56 |     writer = csv.writer(outfile)
57 |     writer.writerow(['ResourceId', 'Bandwidth'])
58 |     for instance in sorted(bandwidth_usage_records.keys(), key=lambda instance: bandwidth_usage_records[instance], reverse=True):
59 |         writer.writerow([
60 |             instance,
61 |             repr(bandwidth_usage_records[instance]),
62 |         ])
63 | 


--------------------------------------------------------------------------------
/src/get_s3_cost.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import csv
 4 | from datetime import datetime
 5 | from collections import defaultdict
 6 | import dateutil.relativedelta
 7 | 
 8 | import utils
 9 | 
10 | USAGECOST_DIR='in/usagecost'
11 | OUT_PATH_S3 = 'out/s3/current_usage.csv'
12 | 
13 | BEGIN_LAST_MONTH = (datetime.now() + dateutil.relativedelta.relativedelta(months=-1)).replace(day=1, hour=0, minute=0, second=0, microsecond=0)
14 | BEGIN_CURRENT_MONTH = datetime.now().replace(day=1, hour=0, minute=0, second=0, microsecond=0)
15 | 
16 | def get_simplified_cost_name(record):
17 |     if 'TimedStorage' in record.get('lineItem/UsageType', ''):
18 |         return 'storage_cost'
19 |     elif record.get('product/servicecode', '') == 'AWSDataTransfer':
20 |         return 'bandwidth_cost'
21 |     elif 'Requests' in record.get('lineItem/UsageType', ''):
22 |         return 'requests_cost'
23 |     return None
24 | 
25 | with utils.csv_folder(USAGECOST_DIR) as records:
26 |     resource_id_missing = False
27 |     s3_usage = defaultdict(lambda:dict(usage_gb_month=0.0, storage_cost=0.0, bandwidth_cost=0.0, requests_cost=0.0, last_month_cost=0.0))
28 |     for record in records:
29 |         if 'lineItem/ResourceId' not in record:
30 |             if resource_id_missing == False:
31 |                 print("Error: the billing report does not export the ResourceId")
32 |                 resource_id_missing = True
33 |             continue
34 |         if record['lineItem/ProductCode'] == 'AmazonS3':
35 |             usage_start_date = datetime.strptime(record['lineItem/UsageStartDate'], '%Y-%m-%dT%H:%M:%SZ')
36 |             if usage_start_date >= BEGIN_LAST_MONTH:
37 |                 simplified_cost_name = get_simplified_cost_name(record)
38 |                 if simplified_cost_name is not None:
39 |                     if usage_start_date >= BEGIN_CURRENT_MONTH:
40 |                         s3_usage[record.get('lineItem/ResourceId', '')][simplified_cost_name] += float(record['lineItem/UnblendedCost'])
41 |                         if simplified_cost_name == 'storage_cost':
42 |                             s3_usage[record.get('lineItem/ResourceId', '')]['usage_gb_month'] += float(record['lineItem/UsageAmount'])
43 |                     else:
44 |                         s3_usage[record.get('lineItem/ResourceId', '')]['last_month_cost'] += float(record['lineItem/UnblendedCost'])
45 | 
46 | with open(OUT_PATH_S3, 'w') as outfile:
47 |     writer = csv.writer(outfile)
48 |     writer.writerow(['Bucket', 'Usage-GB-Month', 'StorageCost', 'BandwidthCost', 'RequestsCost', 'CurrentTotal', 'LastMonthTotal'])
49 |     for bucket in sorted(list(s3_usage.keys()), key=lambda resid: s3_usage[resid]['last_month_cost'], reverse=True):
50 |         writer.writerow([
51 |             bucket,
52 |             s3_usage[bucket]['usage_gb_month'],
53 |             s3_usage[bucket]['storage_cost'],
54 |             s3_usage[bucket]['bandwidth_cost'],
55 |             s3_usage[bucket]['requests_cost'],
56 |             s3_usage[bucket]['storage_cost'] + s3_usage[bucket]['bandwidth_cost'] + s3_usage[bucket]['requests_cost'],
57 |             s3_usage[bucket]['last_month_cost'],
58 |         ])
59 | 


--------------------------------------------------------------------------------
/src/make_sheet.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import collections
  4 | import csv
  5 | import itertools
  6 | import json
  7 | import os
  8 | import pprint
  9 | 
 10 | from apiclient import discovery
 11 | from oauth2client import client
 12 | from oauth2client import tools
 13 | from oauth2client.file import Storage
 14 | import httplib2
 15 | 
 16 | from sheets import *
 17 | import utils
 18 | 
 19 | try:
 20 |     import argparse
 21 |     flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
 22 | except ImportError:
 23 |     flags = None
 24 | 
 25 | # If modifying these scopes, delete your previously saved credentials
 26 | # at ~/.credentials/sheets.googleapis.com-python-quickstart.json
 27 | SCOPES = ' '.join([
 28 |     'https://www.googleapis.com/auth/spreadsheets',
 29 |     'https://www.googleapis.com/auth/drive',
 30 |     'https://www.googleapis.com/auth/drive.file',
 31 | ])
 32 | CLIENT_SECRET_FILE = 'client_secret.json'
 33 | APPLICATION_NAME = 'Google Sheets API Python Quickstart'
 34 | 
 35 | SHEET_RESERVATIONS_SUMMARY = 1
 36 | 
 37 | PRETTY_FIELD_NAMES = {
 38 |     'instance_type'       : 'Instance type',
 39 |     'availability_zone'   : 'Availability zone',
 40 |     'tenancy'             : 'Tenancy',
 41 |     'product'             : 'Product',
 42 |     'count'               : 'Count',
 43 |     'count_reserved'      : 'Count (reserved)',
 44 |     'cost_ondemand'       : 'Cost (on demand)',
 45 |     'cost_reserved_worst' : 'Cost (worst reserved)',
 46 |     'cost_reserved_best'  : 'Cost (best reserved)',
 47 | }
 48 | 
 49 | PRETTY_FIELD_GROUPS = {
 50 |     'reservation': 'Reservation',
 51 |     'hourly_cost_per_instance': 'Hourly cost per instance',
 52 | }
 53 | 
 54 | NUMFORMAT_CURRENCY = '#,##0.000 [$USD]'
 55 | NUMFORMAT_PERCENT = '0.00%'
 56 | NUMFORMAT_PERCENT_VAR = '\+0.00%;\-0.00%'
 57 | 
 58 | IN_INSTANCE_RESERVATION_USAGE_DIR    = 'out/instance-reservation-usage'
 59 | IN_RESERVATION_USAGE_DIR             = 'out/reservation-usage'
 60 | IN_ABSOLUTE_COST_PER_MONTH           = 'out/absolute.csv'
 61 | IN_INSTANCE_SIZE_RECOMMENDATIONS_DIR = 'out/instance-size-recommendation'
 62 | IN_INSTANCE_HISTORY                  = 'out/instance-history.csv'
 63 | 
 64 | COLOR_RED_BG   = { 'red': 0xFF/float(0xFF), 'green': 0xCC/float(0xFF), 'blue': 0xCC/float(0xFF) }
 65 | COLOR_RED_FG   = { 'red': 0xCC/float(0xFF), 'green': 0x00/float(0xFF), 'blue': 0x00/float(0xFF) }
 66 | COLOR_GREEN_BG = { 'red': 0xCC/float(0xFF), 'green': 0xFF/float(0xFF), 'blue': 0xCC/float(0xFF) }
 67 | COLOR_GREEN_FG = { 'red': 0x00/float(0xFF), 'green': 0x66/float(0xFF), 'blue': 0x00/float(0xFF) }
 68 | 
 69 | def _with_trailing(it, trail):
 70 |     return itertools.chain(it, itertools.repeat(trail))
 71 | 
 72 | def get_credentials():
 73 |     """Gets valid user credentials from storage.
 74 | 
 75 |     If nothing has been stored, or if the stored credentials are invalid,
 76 |     the OAuth2 flow is completed to obtain the new credentials.
 77 | 
 78 |     Returns:
 79 |         Credentials, the obtained credential.
 80 |     """
 81 |     home_dir = os.path.expanduser('~')
 82 |     credential_dir = os.path.join(home_dir, '.credentials')
 83 |     if not os.path.exists(credential_dir):
 84 |         os.makedirs(credential_dir)
 85 |     credential_path = os.path.join(credential_dir,
 86 |                                    'sheets.googleapis.com-python-quickstart.json')
 87 | 
 88 |     store = Storage(credential_path)
 89 |     credentials = store.get()
 90 |     if not credentials or credentials.invalid:
 91 |         flow = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES)
 92 |         flow.user_agent = APPLICATION_NAME
 93 |         if flags:
 94 |             credentials = tools.run_flow(flow, store, flags)
 95 |         else: # Needed only for compatibility with Python 2.6
 96 |             credentials = tools.run(flow, store)
 97 |         print('Storing credentials to ' + credential_path)
 98 |     return credentials
 99 | 
100 | def reserved_summary():
101 |     def cost_monthly(sheet, row, column, field):
102 |         base_hourly = sheet.field_index('cost_ondemand')
103 |         base_monthly = sheet.field_index('cost_monthly_ondemand')
104 |         return Formula('={}*{}*720'.format(
105 |             sheet.field_address('count', row, 2),
106 |             sheet.address(base_hourly + (column - base_monthly), row),
107 |         ))
108 |     def savings_monthly(sheet, row, column, field):
109 |         base_ondemand = sheet.field_index('cost_ondemand')
110 |         base_reserved = sheet.field_index('cost_reserved_worst')
111 |         base_savings = sheet.field_index('savings_reserved_worst')
112 |         return Formula('=1-{}/{}'.format(
113 |             sheet.address(base_reserved + (column - base_savings), row),
114 |             sheet.field_address('cost_ondemand', row, 2),
115 |         ))
116 |     fields = (
117 |         FieldGroup('Reservation', (
118 |             Field('instance_type'               , 'instance_type'               , str             , 'Instance type'     , None)               ,
119 |             Field('availability_zone'           , 'availability_zone'           , str             , 'Availability zone' , None)               ,
120 |             Field('tenancy'                     , 'tenancy'                     , str             , 'Tenancy'           , None)               ,
121 |             Field('product'                     , 'product'                     , str             , 'Product'           , None)               ,
122 |         )),
123 |         Field(    'count'                       , 'count'                       , int             , 'Count'             , '0')                ,
124 |         Field(    'count_reserved'              , 'count_reserved'              , int             , 'Count (reserved)'  , '0')                ,
125 |         FieldGroup('Hourly cost per instance', (
126 |             Field('cost_ondemand'               , 'cost_ondemand'               , float           , 'On demand'         , NUMFORMAT_CURRENCY) ,
127 |             Field('cost_reserved_worst'         , 'cost_reserved_worst'         , float           , 'Worst reserved'    , NUMFORMAT_CURRENCY) ,
128 |             Field('cost_reserved_best'          , 'cost_reserved_best'          , float           , 'Best reserved'     , NUMFORMAT_CURRENCY) ,
129 |         )),
130 |         FieldGroup('Monthly cost total', (
131 |             Field('cost_monthly_ondemand'       , 'cost_monthly_ondemand'       , cost_monthly    , 'On demand'         , NUMFORMAT_CURRENCY) ,
132 |             Field('cost_monthly_reserved_worst' , 'cost_monthly_reserved_worst' , cost_monthly    , 'Worst reserved'    , NUMFORMAT_CURRENCY) ,
133 |             Field('cost_monthly_reserved_best'  , 'cost_monthly_reserved_best'  , cost_monthly    , 'Best reserved'     , NUMFORMAT_CURRENCY) ,
134 |         )),
135 |         FieldGroup('Savings over on demand', (
136 |             Field('savings_reserved_worst'      , 'savings_reserved_worst'      , savings_monthly , 'Worst reserved'    , NUMFORMAT_PERCENT)  ,
137 |             Field('savings_reserved_best'       , 'savings_reserved_best'       , savings_monthly , 'Best reserved'     , NUMFORMAT_PERCENT)  ,
138 |         ))
139 |     )
140 |     conditional_format = (
141 |         ConditionalFormat('CUSTOM_FORMULA', '=(INDIRECT(ADDRESS(ROW(), COLUMN() - 1)) = INDIRECT(ADDRESS(ROW(), COLUMN())))', {
142 |             'backgroundColor': COLOR_GREEN_BG,
143 |             'textFormat': {
144 |                 'foregroundColor': COLOR_GREEN_FG,
145 |             },
146 |         }),
147 |     )
148 |     with utils.csv_folder(IN_INSTANCE_RESERVATION_USAGE_DIR) as records:
149 |         sheet = Sheet(
150 |             source=records,
151 |             fields=fields,
152 |             sheet_id=1,
153 |             fields_conditional_formats=tuple(
154 |                 ColumnConditionalFormat(column, conditional_format)
155 |                 for column in field_flatten(FieldRoot(fields)) if column.name == 'count_reserved'
156 |             )
157 |         )
158 |         sheet.properties['title'] = 'Reserved instance summary'
159 |         return sheet.to_dict()
160 | 
161 | def _returns(value):
162 |     def f(*args, **kwargs):
163 |         return value
164 |     return f
165 | 
166 | def reservation_usage_summary():
167 |     def effective_cost(sheet, row, column, field):
168 |         return Formula('={}/720+{}'.format(
169 |             sheet.field_address('cost_upfront', row, 2),
170 |             sheet.field_address('cost_hourly', row, 2),
171 |         ))
172 |     def monthly_losses(sheet, row, column, field):
173 |         return Formula('({reserved}-{used})*{effective}*720'.format(
174 |             reserved =sheet.field_address('count_reserved', row, 2),
175 |             used     =sheet.field_address('count_used', row, 2),
176 |             effective=sheet.field_address('effective_cost', row, 2),
177 |         ))
178 |     fields = (
179 |         FieldGroup('Reservation', (
180 |             Field('instance_type'               , 'instance_type'               , str             , 'Instance type'     , None)               ,
181 |             Field('availability_zone'           , 'availability_zone'           , str             , 'Availability zone' , None)               ,
182 |             Field('tenancy'                     , 'tenancy'                     , str             , 'Tenancy'           , None)               ,
183 |             Field('product'                     , 'product'                     , str             , 'Product'           , None)               ,
184 |         )),
185 |         FieldGroup('Count', (
186 |             Field('count_reserved'              , 'count'                       , int             , 'Reserved'          , None)               ,
187 |             Field('count_used'                  , 'count_used'                  , int             , 'Used'              , None)               ,
188 |         )),
189 |         FieldGroup('Cost per instance', (
190 |             Field('cost_upfront'                , 'cost_upfront'                , float           , 'Upfront'           , NUMFORMAT_CURRENCY) ,
191 |             Field('cost_hourly'                 , 'cost_hourly'                 , float           , 'Hourly'            , NUMFORMAT_CURRENCY) ,
192 |             Field('effective_cost'              , 'effective_cost'              , effective_cost  , 'Effective', NUMFORMAT_CURRENCY),
193 |         )),
194 |         Field(    'monthly_losses'              , 'monthly_losses'              , monthly_losses  , 'Monthly losses', NUMFORMAT_CURRENCY),
195 |     )
196 |     with utils.csv_folder(IN_RESERVATION_USAGE_DIR) as records:
197 |         sheet = Sheet(
198 |             source=records,
199 |             fields=fields,
200 |             sheet_id=3,
201 |         )
202 |         sheet.properties['title'] = 'Reservation usage summary'
203 |         return sheet.to_dict()
204 | 
205 | def weekly_variations():
206 |     def variation(sheet, row, column, field):
207 |         prev_address = sheet.address(column - 1, row)
208 |         next_address = sheet.address(column + 1, row)
209 |         return Formula('=IF({0}=0,"",{1}/{0}-1)'.format(
210 |             prev_address,
211 |             next_address,
212 |         ))
213 |     def total(sheet, row, column, field):
214 |         cost_fields = [
215 |             f
216 |             for f in sheet.fields_flat() if '_cost' in f.name
217 |         ]
218 |         return Formula('=SUM({})'.format(
219 |             ','.join(
220 |                 sheet.field_address(f, row)
221 |                 for f in cost_fields
222 |             )
223 |         ))
224 |     with open(IN_ABSOLUTE_COST_PER_MONTH) as f:
225 |         reader = csv.DictReader(f)
226 |         fields = (
227 |             Field(    'usage' , 'usage' , str   , 'Usage type' , None),
228 |             FieldGroup('Monthly cost', tuple(
229 |                 FieldGroup(isoweek,
230 |                     (
231 |                         (
232 |                             Field(isoweek+'_var',  isoweek, variation, 'Variation', NUMFORMAT_PERCENT_VAR),
233 |                         ) if not is_first_week else ()
234 |                     ) + (
235 |                         Field(isoweek+'_cost', isoweek, float  , 'Cost'      , NUMFORMAT_CURRENCY),
236 |                     )
237 |                 )
238 |                 for isoweek, is_first_week in zip(reader.fieldnames[1:], _with_trailing((True,), False))
239 |             )),
240 |             Field('total', 'total', total, 'Total', NUMFORMAT_CURRENCY),
241 |         )
242 |         variation_conditional_format = (
243 |             ConditionalFormat('NUMBER_GREATER', '0', {
244 |                 'backgroundColor': COLOR_RED_BG,
245 |                 'textFormat': {
246 |                     'foregroundColor': COLOR_RED_FG,
247 |                 },
248 |             }),
249 |             ConditionalFormat('NUMBER_LESS_THAN_EQ', '0', {
250 |                 'backgroundColor': COLOR_GREEN_BG,
251 |                 'textFormat': {
252 |                     'foregroundColor': COLOR_GREEN_FG,
253 |                 },
254 |             })
255 |         )
256 |         variation_columns = (
257 |             f
258 |             for f in field_flatten(FieldRoot(fields)) if '_var' in f.name
259 |         )
260 |         source = sorted(
261 |             reader,
262 |             key=(lambda row: sum(float(v) for k, v in row.items() if k != 'usage')),
263 |             reverse=True,
264 |         )
265 |         sheet = Sheet(
266 |             source=source,
267 |             fields=fields,
268 |             fields_conditional_formats=tuple(
269 |                 ColumnConditionalFormat(column, variation_conditional_format)
270 |                 for column in variation_columns
271 |             ),
272 |             sheet_id=2,
273 |         )
274 |         sheet.properties['title'] = 'Cost variations'
275 |         sheet_data = sheet.to_dict()
276 |     return sheet_data
277 | 
278 | INSTANCE_SIZES = [
279 |     'nano',
280 |     'micro',
281 |     'small',
282 |     'medium',
283 |     'large',
284 |     'xlarge',
285 |     '2xlarge',
286 |     '4xlarge',
287 |     '8xlarge',
288 |     '9xlarge',
289 |     '10xlarge',
290 |     '12xlarge',
291 |     '16xlarge',
292 |     '18xlarge',
293 |     '24xlarge',
294 |     '32xlarge',
295 | ]
296 | 
297 | def instance_history():
298 |     with open(IN_INSTANCE_HISTORY) as f:
299 |         reader = csv.DictReader(f)
300 |         fields = (
301 |             Field('date', 'date', str, 'Date', None),
302 |             FieldGroup('Instance count', tuple(
303 |                 Field(instance_type, instance_type, int, instance_type, None)
304 |                 for instance_type in reader.fieldnames[1:]
305 |             )),
306 |         )
307 |         sheet = Sheet(
308 |             source=reader,
309 |             fields=fields,
310 |             sheet_id=5
311 |         )
312 |         sheet.properties['title'] = 'Instance count history'
313 |         return sheet.to_dict()
314 | 
315 | def instance_size_recommendations():
316 |     fields = (
317 |         FieldGroup('Instance', (
318 |             Field('account', 'account', str, 'Account', None),
319 |             Field('id', 'id', str, 'ID', None),
320 |             Field('name', 'name', str, 'Name', None),
321 |             Field('size', 'size', str, 'Type', None),
322 |             Field('lifecycle', 'lifecycle', str, 'Lifecycle', None),
323 |         )),
324 |         Field('recommendation', 'recommendation', str, 'Recommended', None),
325 |     )
326 |     with utils.csv_folder(IN_INSTANCE_SIZE_RECOMMENDATIONS_DIR) as source:
327 |         sheet = Sheet(
328 |             source=source,
329 |             fields=fields,
330 |             sheet_id=4,
331 |         )
332 |         sheet.properties['title'] = 'Instance size recommendations'
333 |         return sheet.to_dict()
334 | 
335 | def main():
336 |     """Shows basic usage of the Sheets API.
337 | 
338 |     Creates a Sheets API service object and prints the names and majors of
339 |     students in a sample spreadsheet:
340 |     https://docs.google.com/spreadsheets/d/1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms/edit
341 |     """
342 |     credentials = get_credentials()
343 |     http = credentials.authorize(httplib2.Http())
344 |     discoveryUrl = ('https://sheets.googleapis.com/$discovery/rest?'
345 |                     'version=v4')
346 |     service = discovery.build('sheets', 'v4', http=http,
347 |                               discoveryServiceUrl=discoveryUrl)
348 | 
349 |     reserved_summary_data = reserved_summary()
350 |     weekly_variations_data = weekly_variations()
351 |     reservation_usage_summary_data = reservation_usage_summary()
352 |     instance_size_recommendations_data = instance_size_recommendations()
353 |     instance_history_data = instance_history()
354 | 
355 |     body = {
356 |         'properties': {
357 |             'title': 'my generated spreadsheet',
358 |         },
359 |         'sheets': [
360 |             weekly_variations_data,
361 |             reserved_summary_data,
362 |             reservation_usage_summary_data,
363 |             instance_size_recommendations_data,
364 |             instance_history_data,
365 |         ],
366 |     }
367 | 
368 |     #print(json.dumps(body, indent=4))
369 | 
370 |     spreadsheet = service.spreadsheets().create(body=body)
371 | 
372 |     print(spreadsheet)
373 |     print(dir(spreadsheet))
374 |     print(spreadsheet.execute())
375 | 
376 | if __name__ == '__main__':
377 |     main()
378 | 


--------------------------------------------------------------------------------
/src/make_xlsx.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import collections
  4 | import csv
  5 | import itertools
  6 | import json
  7 | import datetime
  8 | import os
  9 | import pprint
 10 | from collections import defaultdict
 11 | import xlsxwriter
 12 | from datetime import datetime
 13 | import dateutil.relativedelta
 14 | import sys
 15 | 
 16 | from sheets import *
 17 | import utils
 18 | 
 19 | SHEET_RESERVATIONS_SUMMARY = 1
 20 | 
 21 | PRETTY_FIELD_NAMES = {
 22 |     'instance_type': 'Instance type',
 23 |     'availability_zone': 'Availability zone',
 24 |     'tenancy': 'Tenancy',
 25 |     'product': 'Product',
 26 |     'count': 'Count',
 27 |     'count_reserved': 'Count (reserved)',
 28 |     'cost_ondemand': 'Cost (on demand)',
 29 |     'cost_reserved_worst': 'Cost (worst reserved)',
 30 |     'cost_reserved_best': 'Cost (best reserved)',
 31 | }
 32 | 
 33 | PRETTY_FIELD_GROUPS = {
 34 |     'reservation': 'Reservation',
 35 |     'hourly_cost_per_instance': 'Hourly cost per instance',
 36 | }
 37 | 
 38 | NUMFORMAT_CURRENCY = '#,##0.000 [$USD]'
 39 | NUMFORMAT_PERCENT = '0.00%'
 40 | NUMFORMAT_PERCENT_VAR = '\+0.00%;\-0.00%'
 41 | 
 42 | IN_INSTANCE_RESERVATION_USAGE_DIR    = 'out/instance-reservation-usage'
 43 | IN_RESERVATION_USAGE_DIR             = 'out/reservation-usage'
 44 | IN_ABSOLUTE_COST_PER_MONTH           = 'out/absolute.csv'
 45 | IN_INSTANCE_SIZE_RECOMMENDATIONS_DIR = 'out/instance-size-recommendation'
 46 | IN_INSTANCE_HISTORY                  = 'out/instance-history.csv'
 47 | IN_INSTANCE_USAGE_LAST_MONTH         = 'out/last-month/ec2_instances.csv'
 48 | IN_EC2_BANDWIDTH_USAGE_LAST_MONTH    = 'out/last-month/ec2_bandwidth.csv'
 49 | IN_EBS_USAGE_LAST_MONTH              = 'out/last-month/ebs.csv'
 50 | IN_SNAPSHOT_USAGE_LAST_MONTH         = 'out/last-month/snapshots.csv'
 51 | IN_S3_COST                           = 'out/s3/current_usage.csv'
 52 | 
 53 | COLOR_RED_BG = "#ffcccc"
 54 | COLOR_RED_FG = "#cc0000"
 55 | COLOR_GREEN_BG = "#ccffcc"
 56 | COLOR_GREEN_FG = "#006600"
 57 | 
 58 | 
 59 | def _with_trailing(it, trail):
 60 |     return itertools.chain(it, itertools.repeat(trail))
 61 | 
 62 | 
 63 | def gen_reserved_summary(workbook, header_format, val_format):
 64 |     with utils.csv_folder(IN_INSTANCE_RESERVATION_USAGE_DIR) as records:
 65 |         worksheet = workbook.add_worksheet("Reserved instance summary")
 66 | 
 67 |         worksheet.freeze_panes(2, 0)
 68 |         worksheet.set_column("A:O", 15)
 69 |         worksheet.merge_range("A1:D1", "Reservation", header_format)
 70 |         worksheet.merge_range("E1:F1", "Count", header_format)
 71 |         worksheet.merge_range("G1:I1", "Cost per instance", header_format)
 72 |         worksheet.merge_range("J1:L1", "Total monthly cost", header_format)
 73 |         worksheet.merge_range("M1:N1", "Savings over on demand", header_format)
 74 | 
 75 |         green_format = workbook.add_format()
 76 |         green_format.set_color(COLOR_GREEN_FG)
 77 |         green_format.set_bg_color(COLOR_GREEN_BG)
 78 | 
 79 |         cur_format = workbook.add_format()
 80 |         cur_format.set_align("center")
 81 |         cur_format.set_align("vcenter")
 82 |         cur_format.set_border()
 83 |         cur_format.set_num_format(NUMFORMAT_CURRENCY)
 84 | 
 85 |         per_format = workbook.add_format()
 86 |         per_format.set_align("center")
 87 |         per_format.set_align("vcenter")
 88 |         per_format.set_border()
 89 |         per_format.set_num_format(NUMFORMAT_PERCENT)
 90 | 
 91 |         refs = {
 92 |             "instance_type": [0, "Instance type", str, val_format],
 93 |             "availability_zone": [1, "Availability zone", str, val_format],
 94 |             "tenancy": [2, "Tenancy", str, val_format],
 95 |             "product": [3, "Product", str, val_format],
 96 |             "count": [4, "Running", int, val_format],
 97 |             "count_reserved": [5, "Reserved", int, val_format],
 98 |             "cost_ondemand": [6, "On demand", float, cur_format],
 99 |             "cost_reserved_worst": [7, "Worst reserved", float, cur_format],
100 |             "cost_reserved_best": [8, "Best reserved", float, cur_format],
101 |             "cost_monthly_ondemand": [9, "On demand", float, cur_format],
102 |             "cost_monthly_reserved_worst": [10, "Worst reserved", float, cur_format],
103 |             "cost_monthly_reserved_best": [11, "Best reserved", float, cur_format],
104 |             "savings_reserved_worst": [12, "Worst reserved", float, per_format],
105 |             "savings_reserved_best": [13, "Best reserved", float, per_format],
106 |         }
107 |         for v in refs.values():
108 |             worksheet.write(1, v[0], v[1], header_format)
109 |         for i, line in zip(itertools.count(2), records):
110 |             for h, v in line.items():
111 |                 worksheet.write(i, refs[h][0], refs[h][2](v), refs[h][3])
112 |             for h in ("cost_monthly_ondemand", "cost_monthly_reserved_worst", "cost_monthly_reserved_best"):
113 |                 res = float(line["count"]) * \
114 |                     float(line["cost_" + h[13:]]) * 720
115 |                 worksheet.write_formula(
116 |                     i, refs[h][0],
117 |                     "=E{}*{}{}*720".format(i+1, chr(ord('A') +
118 |                                                     refs[h][0] - 3), i+1), refs[h][3],
119 |                     res,
120 |                 )
121 |             for h in ("savings_reserved_worst", "savings_reserved_best"):
122 |                 res = 1 - float(line[h.replace("savings", "cost")]
123 |                                 ) / float(line["cost_ondemand"])
124 |                 worksheet.write_formula(
125 |                     i, refs[h][0],
126 |                     "=1-{}{}/G{}".format(chr(ord('A') +
127 |                                              refs[h][0] - 5), i+1, i+1), refs[h][3],
128 |                     res,
129 |                 )
130 |             worksheet.conditional_format("F{}".format(i+1), {
131 |                 "type": "cell",
132 |                 "criteria": "equal to",
133 |                 "value": "E{}".format(i+1),
134 |                 "format": green_format,
135 |             })
136 | 
137 | 
138 | def gen_reservation_usage_summary(workbook, header_format, val_format):
139 |     with utils.csv_folder(IN_RESERVATION_USAGE_DIR) as records:
140 |         worksheet = workbook.add_worksheet("Reservation usage summary")
141 | 
142 |         worksheet.freeze_panes(2, 0)
143 |         worksheet.set_column("A:J", 18)
144 |         worksheet.merge_range("A1:D1", "Reservation", header_format)
145 |         worksheet.merge_range("E1:F1", "Count", header_format)
146 |         worksheet.merge_range("G1:I1", "Cost per instance", header_format)
147 |         worksheet.merge_range("J1:J2", "Monthly losses", header_format)
148 | 
149 |         green_format = workbook.add_format()
150 |         green_format.set_color(COLOR_GREEN_FG)
151 |         green_format.set_bg_color(COLOR_GREEN_BG)
152 | 
153 |         cur_format = workbook.add_format()
154 |         cur_format.set_align("center")
155 |         cur_format.set_align("vcenter")
156 |         cur_format.set_border()
157 |         cur_format.set_num_format(NUMFORMAT_CURRENCY)
158 | 
159 |         refs = {
160 |             "instance_type": [0, "Instance type", str, val_format],
161 |             "availability_zone": [1, "Availability zone", str, val_format],
162 |             "tenancy": [2, "Tenancy", str, val_format],
163 |             "product": [3, "Product", str, val_format],
164 |             "count": [4, "Reserved", int, val_format],
165 |             "count_used": [5, "Used", int, val_format],
166 |             "cost_upfront": [6, "Upfront", float, cur_format],
167 |             "cost_hourly": [7, "Hourly", float, cur_format],
168 |             "effective_cost": [8, "Effective", float, cur_format],
169 |             "monthly_losses": [9, "Monthly losses", float, cur_format],
170 |         }
171 |         for v in refs.values():
172 |             worksheet.write(1, v[0], v[1], header_format)
173 |         for i, line in zip(itertools.count(2), records):
174 |             for h, v in line.items():
175 |                 worksheet.write(i, refs[h][0], refs[h][2](v), refs[h][3])
176 |             effective_cost = float(
177 |                 line["cost_upfront"]) / 720 + float(line["cost_hourly"])
178 |             worksheet.write_formula(
179 |                 i, refs["effective_cost"][0],
180 |                 "=G{}/720+H{}".format(*[i+1]*2), refs["effective_cost"][3],
181 |                 effective_cost,
182 |             )
183 |             worksheet.conditional_format("F{}".format(i + 1), {
184 |                 "type": "cell",
185 |                 "criteria": "equal to",
186 |                 "value": "E{}".format(i + 1),
187 |                 "format": green_format,
188 |             })
189 |             worksheet.write(
190 |                 i, refs["monthly_losses"][0],
191 |                 "=(E{}-F{})*I{}*720".format(*[i+1]
192 |                                             * 3), refs["monthly_losses"][3],
193 |                 (float(line["count"]) - float(line["count_used"])
194 |                  ) * effective_cost * 720,
195 |             )
196 | 
197 | 
198 | def gen_weekly_variations(workbook, header_format, val_format):
199 |     def to_alpha(x): return chr(ord('A') + x)
200 | 
201 |     with open(IN_ABSOLUTE_COST_PER_MONTH) as f:
202 |         reader = csv.DictReader(f)
203 |         source = sorted(
204 |             reader,
205 |             key=(lambda row: sum(float(v)
206 |                                  for k, v in row.items() if k != 'usage')),
207 |             reverse=True,
208 |         )
209 |         worksheet = workbook.add_worksheet("Cost variations")
210 | 
211 |         worksheet.freeze_panes(3, 1)
212 |         worksheet.set_column("A:A", 30)
213 |         worksheet.set_column("B:M", 14)
214 |         worksheet.merge_range("A1:A3", "Usage type", header_format)
215 |         worksheet.merge_range("B1:L1", "Monthly cost", header_format)
216 |         worksheet.merge_range("M1:M3", "Total", header_format)
217 | 
218 |         green_format = workbook.add_format()
219 |         green_format.set_color(COLOR_GREEN_FG)
220 |         green_format.set_bg_color(COLOR_GREEN_BG)
221 | 
222 |         red_format = workbook.add_format()
223 |         red_format.set_color(COLOR_RED_FG)
224 |         red_format.set_bg_color(COLOR_RED_BG)
225 | 
226 |         cur_format = workbook.add_format()
227 |         cur_format.set_align("center")
228 |         cur_format.set_align("vcenter")
229 |         cur_format.set_border()
230 |         cur_format.set_num_format(NUMFORMAT_CURRENCY)
231 | 
232 |         per_format = workbook.add_format()
233 |         per_format.set_align("center")
234 |         per_format.set_align("vcenter")
235 |         per_format.set_border()
236 |         per_format.set_num_format(NUMFORMAT_PERCENT)
237 | 
238 |         date_fieldnames = reader.fieldnames[1:-1]
239 |         if len(date_fieldnames) > 6:
240 |             date_fieldnames = date_fieldnames[-5:]
241 |         refs = {
242 |             header: [i, True, float]
243 |             for i, header in zip(itertools.count(3, 2), date_fieldnames[1:])
244 |         }
245 |         refs[date_fieldnames[0]] = [1, False, float]
246 |         refs["usage"] = [0, False, str]
247 |         for h, v in refs.items():
248 |             if v[1]:
249 |                 worksheet.merge_range(1, v[0]-1, 1, v[0], h, header_format)
250 |                 worksheet.write(2, v[0]-1, "Variation", header_format)
251 |                 worksheet.write(2, v[0], "Cost", header_format)
252 |             else:
253 |                 worksheet.write(1, v[0], h, header_format)
254 |                 worksheet.write(2, v[0], "Cost", header_format)
255 |         for i, line in zip(itertools.count(3), source):
256 |             for name, meta in refs.items():
257 |                 val = line[name]
258 |                 worksheet.write(i, meta[0], meta[2](val), cur_format)
259 |                 if meta[1]:
260 |                     before = float(line[date_fieldnames[int(meta[0]/2-1)]])
261 |                     worksheet.write_formula(
262 |                         i, meta[0]-1,
263 |                         "=IF({}{}=0,\"\",{}{}/{}{}-1)".format(
264 |                             to_alpha(meta[0] - 2),
265 |                             i+1,
266 |                             to_alpha(meta[0]),
267 |                             i+1,
268 |                             to_alpha(meta[0] - 2),
269 |                             i+1,
270 |                         ), per_format,
271 |                         " " if before == 0.0 else meta[2](val) / before - 1
272 |                     )
273 |                     worksheet.conditional_format("{}{}".format(to_alpha(meta[0]-1), i+1), {
274 |                         "type": "cell",
275 |                         "criteria": "greater than",
276 |                         "value": "0",
277 |                         "format": red_format,
278 |                     })
279 |                     worksheet.conditional_format("{}{}".format(to_alpha(meta[0]-1), i+1), {
280 |                         "type": "cell",
281 |                         "criteria": "less than or equal to",
282 |                         "value": "0",
283 |                         "format": green_format,
284 |                     })
285 |             worksheet.write("M{}".format(
286 |                 i+1), sum([float(line[o]) for o in reader.fieldnames[1:]]), cur_format)
287 | 
288 | 
289 | def gen_weekly_variations_chart(workbook, header_format, val_format):
290 |     with open(IN_ABSOLUTE_COST_PER_MONTH) as f:
291 |         reader = csv.DictReader(f)
292 |         source = sorted(
293 |             reader,
294 |             key=(lambda row: sum(float(v) for k, v in row.items() if k != 'usage')),
295 |             reverse=True,
296 |         )[:5]
297 | 
298 |         header = ['usage'] + sorted([s for s in source[0] if s != 'usage'])
299 |         data = [
300 |             [float(s[h]) if h != 'usage' else s[h] for h in header]
301 |             for s in source
302 |         ]
303 |         chart = workbook.add_chart({
304 |             "type": "line"
305 |         })
306 |         chartsheet = workbook.add_worksheet("Cost variations chart")
307 |         chartsheet.add_table(1, 1, len(data)+1, len(header)-1, {'data': data, 'columns': [{'header': h} for h in header]})
308 |         for i in range(2, len(data)+1):
309 |             chart.add_series({
310 |                 "values": ["Cost variations chart", i, 2, i, len(header)-1],
311 |                 "categories": ["Cost variations chart", 1, 2, 1, len(header)-1],
312 |                 "name": ["Cost variations chart", i, 1],
313 |             })
314 |         chartsheet.insert_chart('A1', chart, {'x_scale': 3, 'y_scale': 2})
315 | 
316 | 
317 | def gen_instance_count_history(workbook, header_format, val_format):
318 |     with open(IN_INSTANCE_HISTORY) as f:
319 |         reader = csv.DictReader(f)
320 |         worksheet = workbook.add_worksheet("Instance count history")
321 | 
322 |         worksheet.freeze_panes(2, 1)
323 |         worksheet.set_column(0, len(reader.fieldnames), 18)
324 |         worksheet.merge_range("A1:A2", "Date", header_format)
325 |         worksheet.merge_range(0, 1, 0, len(reader.fieldnames), "Instance Count", header_format)
326 | 
327 |         def transform(x):
328 |             try:
329 |                 if x == "":
330 |                     return 0
331 |                 else:
332 |                     return int(x)
333 |             except ValueError:
334 |                 return x
335 | 
336 |         refs = {
337 |             header: [
338 |                 i,
339 |                 transform,
340 |             ] for i, header in zip(itertools.count(), reader.fieldnames + ["Total"])
341 |         }
342 |         for h, v in refs.items():
343 |             worksheet.write(1, v[0], h, header_format)
344 |         for i, line in zip(itertools.count(2), reader):
345 |             for h, v in line.items():
346 |                 worksheet.write(i, refs[h][0], refs[h][1](v), val_format)
347 |             total = sum([transform(v) for h, v in line.items() if h != 'date'])
348 |             worksheet.write(i, refs['Total'][0], refs['Total'][1](total), val_format)
349 | 
350 | 
351 | def gen_instance_count_history_chart(workbook, header_format, val_format):
352 |     with open(IN_INSTANCE_HISTORY) as f:
353 |         reader = csv.DictReader(f)
354 | 
355 |         chart = workbook.add_chart({
356 |             "type": "line"
357 |         })
358 |         row_len = len(list(reader))
359 |         for i, fieldname in zip(itertools.count(1), reader.fieldnames[1:] + ["Total"]):
360 |             chart.add_series({
361 |                 "values": ["Instance count history", 2, i, row_len-1, i],
362 |                 "categories": ["Instance count history", 2, 0, row_len-1, 0],
363 |                 "name": fieldname,
364 |             })
365 |         chartsheet = workbook.add_chartsheet("Instance count history chart")
366 |         chartsheet.set_chart(chart)
367 | 
368 | 
369 | def gen_instance_size_recommendations(workbook, header_format, val_format):
370 |     def transform(h, v):
371 |         if h == "cpu_usage":
372 |             try:
373 |                 return "%.3f%%" % (float(v)*100)
374 |             except ValueError:
375 |                 pass
376 |         return v
377 | 
378 |     with utils.csv_folder(IN_INSTANCE_SIZE_RECOMMENDATIONS_DIR) as source:
379 |         worksheet = workbook.add_worksheet("Instance size recommendations")
380 | 
381 |         worksheet.set_column("A:E", 25)
382 |         worksheet.set_column("F:F", 20)
383 |         worksheet.set_column("G:H", 18)
384 |         worksheet.set_column("I:I", 35)
385 |         worksheet.merge_range("A1:F1", "Instance", header_format)
386 |         worksheet.merge_range("G1:G2", "Recommended", header_format)
387 |         worksheet.merge_range("H1:H2", "Potential saving", header_format)
388 |         worksheet.merge_range("I1:I2", "Reason", header_format)
389 | 
390 |         worksheet.freeze_panes(2, 0)
391 | 
392 |         refs = {
393 |             "account": [0, "Account"],
394 |             "id": [1, "ID"],
395 |             "name": [2, "Name"],
396 |             "size": [3, "Type"],
397 |             "lifecycle": [4, "Lifecycle"],
398 |             "cpu_usage": [5, "CPU Utilization (Avg.)"],
399 |             "recommendation": [6, "Recommendation"],
400 |             "saving": [7, "Saving"],
401 |             "reason": [8, "Reason"]
402 |         }
403 |         for i in refs.values():
404 |             worksheet.write(1, i[0], i[1], header_format)
405 |             for i, line in zip(itertools.count(2), source):
406 |                 for h, v in line.items():
407 |                     worksheet.write(i, refs[h][0], transform(h, v), val_format)
408 | 
409 | def instance_summary(workbook, header_format, val_format):
410 |     bandwidth_usage = {}
411 |     ebs_usage = defaultdict(int)
412 |     def transform(x):
413 |         try:
414 |             if x == "": return 0.0
415 |             else: return float(x)
416 |         except ValueError:
417 |             return x
418 |     with open(IN_EC2_BANDWIDTH_USAGE_LAST_MONTH) as f:
419 |         reader = csv.reader(f)
420 |         for i, line in itertools.islice(zip(itertools.count(2), reader), 1, None):
421 |             bandwidth_usage[line[0]] = transform(line[1])
422 |     with open(IN_EBS_USAGE_LAST_MONTH) as f:
423 |         reader = csv.reader(f)
424 |         for line in itertools.islice(reader, 1, None):
425 |             ebs_usage[line[4]] += transform(line[3])
426 |     with open(IN_INSTANCE_USAGE_LAST_MONTH) as f:
427 |         reader = csv.DictReader(f)
428 |         worksheet = workbook.add_worksheet("EC2 instances last month")
429 | 
430 |         last_month = datetime.now() + dateutil.relativedelta.relativedelta(months=-1)
431 |         worksheet.merge_range("A1:I1", "Instances for {}-{:02d}".format(last_month.year, last_month.month), header_format)
432 |         worksheet.merge_range("J1:J2", "Total", header_format)
433 | 
434 |         cur_format = workbook.add_format()
435 |         cur_format.set_align("center")
436 |         cur_format.set_align("vcenter")
437 |         cur_format.set_border()
438 |         cur_format.set_num_format(NUMFORMAT_CURRENCY)
439 | 
440 |         worksheet.freeze_panes(2, 0)
441 |         worksheet.set_column(2, len(reader.fieldnames)+2, 18)
442 |         worksheet.set_column("A:C", 33)
443 | 
444 |         refs = {
445 |             "Account": [0, "Account", str, val_format],
446 |             "ResourceId": [1, "Resource Id", str, val_format],
447 |             "Name": [2, "Name", str, val_format],
448 |             "AvailabilityZone": [3, "Availability zone", str, val_format],
449 |             "Term": [4, "Term", str, val_format],
450 |             "Type": [5, "Type", str, val_format],
451 |             "Cost": [6, "Instance cost", transform, cur_format],
452 |             "Bandwidth": [7, "Bandwidth cost", transform, cur_format],
453 |             "EBS": [8, "EBS cost", transform, cur_format],
454 |         }
455 |         ec2_cost_data = []
456 |         for i, line in zip(itertools.count(2), reader):
457 |             line['Bandwidth'] = refs['Bandwidth'][2](bandwidth_usage.get(line['ResourceId'], ''))
458 |             line['EBS'] = refs['EBS'][2](ebs_usage.get(line['ResourceId'], ''))
459 |             line['Total'] = refs['Cost'][2](line['Cost']) + line['Bandwidth'] + line['EBS']
460 |             ec2_cost_data.append(line)
461 |         ec2_cost_data.sort(key=lambda e: e['Total'], reverse=True)
462 |         for v in refs.values():
463 |             worksheet.write(1, v[0], v[1], header_format)
464 |         for i, line in zip(itertools.count(2), ec2_cost_data):
465 |             for h, v in line.items():
466 |                 if h != 'Total':
467 |                     worksheet.write(i, refs[h][0], refs[h][2](v), refs[h][3])
468 |             worksheet.write(i, len(refs), line['Total'], cur_format)
469 | 
470 | def ebs_summary(workbook, header_format, val_format):
471 |     def transform(x):
472 |         try:
473 |             if x == "": return 0.0
474 |             else: return float(x)
475 |         except ValueError:
476 |             return x
477 |     with open(IN_EBS_USAGE_LAST_MONTH) as f:
478 |         reader = csv.DictReader(f)
479 |         worksheet = workbook.add_worksheet("EBS last month")
480 | 
481 |         last_month = datetime.now() + dateutil.relativedelta.relativedelta(months=-1)
482 |         worksheet.merge_range("A1:F1", "EBS for {}-{:02d}".format(last_month.year, last_month.month), header_format)
483 |         worksheet.merge_range("A2:A3", "Account", header_format)
484 |         worksheet.merge_range("B2:B3", "Resource ID", header_format)
485 |         worksheet.merge_range("C2:C3", "Region", header_format)
486 |         worksheet.merge_range("D2:D3", "Cost", header_format)
487 |         worksheet.merge_range("E2:F2", "Instance Linked", header_format)
488 | 
489 |         cur_format = workbook.add_format()
490 |         cur_format.set_align("center")
491 |         cur_format.set_align("vcenter")
492 |         cur_format.set_border()
493 |         cur_format.set_num_format(NUMFORMAT_CURRENCY)
494 | 
495 |         worksheet.freeze_panes(3, 0)
496 |         worksheet.set_column(0, len(reader.fieldnames)-1, 25)
497 | 
498 |         refs = {
499 |             "Account": [0, "Account", str, val_format],
500 |             "ResourceId": [1, "Resource Id", str, val_format],
501 |             "Region": [2, "Region", str, val_format],
502 |             "Cost": [3, "Cost", transform, cur_format],
503 |             "InstanceId": [4, "ID", str, val_format],
504 |             "InstanceName": [5, "Name", str, val_format],
505 |         }
506 |         for v in refs.values():
507 |             worksheet.write(2, v[0], v[1], header_format)
508 |         for i, line in zip(itertools.count(3), reader):
509 |             for h, v in line.items():
510 |                 worksheet.write(i, refs[h][0], refs[h][2](v), refs[h][3])
511 | 
512 | def snapshots_summary(workbook, header_format, val_format):
513 |     def transform(x):
514 |         try:
515 |             if x == "": return 0.0
516 |             else: return float(x)
517 |         except ValueError:
518 |             return x
519 |     with open(IN_SNAPSHOT_USAGE_LAST_MONTH) as f:
520 |         reader = csv.DictReader(f)
521 |         worksheet = workbook.add_worksheet("Snapshots last month")
522 | 
523 |         last_month = datetime.now() + dateutil.relativedelta.relativedelta(months=-1)
524 |         worksheet.merge_range("A1:C1", "Snapshots for {}-{:02d}".format(last_month.year, last_month.month), header_format)
525 | 
526 |         cur_format = workbook.add_format()
527 |         cur_format.set_align("center")
528 |         cur_format.set_align("vcenter")
529 |         cur_format.set_border()
530 |         cur_format.set_num_format(NUMFORMAT_CURRENCY)
531 | 
532 |         worksheet.freeze_panes(2, 0)
533 |         worksheet.set_column(0, 0, 25)
534 |         worksheet.set_column(1, 1, 80)
535 |         worksheet.set_column(2, 2, 25)
536 | 
537 |         refs = {
538 |             "Account": [0, "Account", str, val_format],
539 |             "ResourceId": [1, "Resource Id", str, val_format],
540 |             "Cost": [2, "Cost", transform, cur_format],
541 |         }
542 |         for v in refs.values():
543 |             worksheet.write(1, v[0], v[1], header_format)
544 |         for i, line in zip(itertools.count(2), reader):
545 |             for h, v in line.items():
546 |                 worksheet.write(i, refs[h][0], refs[h][2](v), refs[h][3])
547 | 
548 | def gen_s3_cost(workbook, header_format, val_format):
549 |     def transform(x):
550 |         try:
551 |             if x == "": return 0.0
552 |             else: return float(x)
553 |         except ValueError:
554 |             return x
555 |     with open(IN_S3_COST) as f:
556 |         reader = csv.DictReader(f)
557 |         worksheet = workbook.add_worksheet("S3 cost")
558 | 
559 |         worksheet.merge_range("A1:F1", "S3 cost for current month", header_format)
560 | 
561 |         cur_format = workbook.add_format()
562 |         cur_format.set_align("center")
563 |         cur_format.set_align("vcenter")
564 |         cur_format.set_border()
565 |         cur_format.set_num_format(NUMFORMAT_CURRENCY)
566 | 
567 |         worksheet.freeze_panes(2, 0)
568 |         worksheet.set_column(0, 0, 45)
569 |         worksheet.set_column(1, 1, 18)
570 |         worksheet.set_column(2, 2, 18)
571 |         worksheet.set_column(3, 3, 18)
572 |         worksheet.set_column(4, 4, 18)
573 |         worksheet.set_column(5, 5, 18)
574 |         worksheet.set_column(6, 6, 18)
575 | 
576 |         refs = {
577 |             "Bucket": [0, "Bucket", str, val_format],
578 |             "Usage-GB-Month": [1, "Usage (GB-Month)", float, val_format],
579 |             "StorageCost": [2, "Storage cost", transform, cur_format],
580 |             "BandwidthCost": [3, "Bandwidth cost", transform, cur_format],
581 |             "RequestsCost": [4, "Requests cost", transform, cur_format],
582 |             "CurrentTotal": [5, "Current cost", transform, cur_format],
583 |             "LastMonthTotal": [6, "Last month cost", transform, cur_format],
584 |         }
585 |         for v in refs.values():
586 |             worksheet.write(1, v[0], v[1], header_format)
587 |         for i, line in zip(itertools.count(2), reader):
588 |             for h, v in line.items():
589 |                 worksheet.write(i, refs[h][0], refs[h][2](v), refs[h][3])
590 | 
591 | def gen_introduction(workbook, header_format, val_format):
592 |     worksheet = workbook.add_worksheet("Introduction")
593 | 
594 |     worksheet.insert_image("A1", "src/ressources/introduction.png")
595 | 
596 | 
597 | def main(name):
598 |     workbook = xlsxwriter.Workbook('./out/{}.xlsx'.format(name))
599 | 
600 |     header_format = workbook.add_format()
601 |     header_format.set_bold()
602 |     header_format.set_align("center")
603 |     header_format.set_align("vcenter")
604 |     header_format.set_border()
605 | 
606 |     val_format = workbook.add_format()
607 |     val_format.set_align("center")
608 |     val_format.set_align("vcenter")
609 |     val_format.set_border()
610 | 
611 |     gen_introduction(workbook, header_format, val_format)
612 |     gen_weekly_variations(workbook, header_format, val_format)
613 |     gen_weekly_variations_chart(workbook, header_format, val_format)
614 |     gen_reserved_summary(workbook, header_format, val_format)
615 |     gen_reservation_usage_summary(workbook, header_format, val_format)
616 |     gen_instance_size_recommendations(workbook, header_format, val_format)
617 |     gen_instance_count_history_chart(workbook, header_format, val_format)
618 |     gen_instance_count_history(workbook, header_format, val_format)
619 |     instance_summary(workbook, header_format, val_format)
620 |     ebs_summary(workbook, header_format, val_format)
621 |     snapshots_summary(workbook, header_format, val_format)
622 |     gen_s3_cost(workbook, header_format, val_format)
623 | 
624 |     workbook.close()
625 | 
626 | 
627 | if __name__ == '__main__':
628 |     name = sys.argv[1] if len(sys.argv) > 1 else "sheet"
629 |     print("Generating xlsx file...")
630 |     main(name)
631 |     print("{}.xlsx generated!".format(name))
632 | 


--------------------------------------------------------------------------------
/src/mytypes.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | 
 3 | InstanceType = collections.namedtuple(
 4 |     'InstanceType',
 5 |     [
 6 |         'size',
 7 |         'availability_zone',
 8 |         'tenancy',
 9 |         'product',
10 |         'vpc',
11 |     ]
12 | )
13 | 
14 | InstanceTypeWithProfile = collections.namedtuple(
15 |     'InstanceTypeWithProfile',
16 |     [
17 |         'profile',
18 |         'instance_type',
19 |     ]
20 | )
21 | 
22 | InstanceReservation = collections.namedtuple(
23 |     'InstanceReservation',
24 |     [
25 |         'type',
26 |         'cost_hourly',
27 |         'cost_upfront',
28 |     ]
29 | )
30 | 
31 | InstanceReservationCount = collections.namedtuple(
32 |     'InstanceReservationCount',
33 |     [
34 |         'instance_reservation',
35 |         'count',
36 |         'count_used'
37 |     ]
38 | )
39 | 
40 | InstanceOffering = collections.namedtuple(
41 |     'InstanceOffering',
42 |     [
43 |         'type',
44 |         'cost_ondemand',
45 |         'cost_reserved_worst',
46 |         'cost_reserved_best',
47 |     ]
48 | )
49 | 
50 | InstanceOfferingCount = collections.namedtuple(
51 |     'InstanceOfferingCount',
52 |     [
53 |         'instance_offering',
54 |         'count',
55 |         'count_reserved',
56 |     ]
57 | )
58 | 


--------------------------------------------------------------------------------
/src/ressources/introduction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trackit/aws-cost-report/d04b24330fd1d4fcad463e5e53c5ca5955a226ee/src/ressources/introduction.png


--------------------------------------------------------------------------------
/src/sheets.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import random
  3 | import itertools
  4 | import functools
  5 | from functools import singledispatch
  6 | 
  7 | Field = collections.namedtuple('Field', ['name', 'index', 'type', 'pretty', 'format'])
  8 | FieldGroup = collections.namedtuple('FieldGroup', ['pretty', 'children'])
  9 | FieldRoot = collections.namedtuple('FieldRoot', ['children'])
 10 | Formula = collections.namedtuple('Formula', ['value'])
 11 | ConditionalFormat = collections.namedtuple('ConditionalFormat', ['type', 'value', 'format'])
 12 | ColumnConditionalFormat = collections.namedtuple('ColumnConditionalFormat', ['field', 'formats'])
 13 | 
 14 | @singledispatch
 15 | def _field_depth(arg):
 16 |     raise ValueError("arg must be Field or FieldGroup, is {}".format(type(arg)))
 17 | 
 18 | @_field_depth.register(Field)
 19 | def _(_):
 20 |     return 1
 21 | 
 22 | @_field_depth.register(FieldGroup)
 23 | def _(field_group):
 24 |     return 1 + max(_field_depth(f) for f in field_group.children)
 25 | 
 26 | @_field_depth.register(FieldRoot)
 27 | def _(field_root):
 28 |     return max(_field_depth(f) for f in field_root.children)
 29 | 
 30 | @singledispatch
 31 | def _field_width(arg):
 32 |     raise ValueError("arg must be Field or FieldGroup")
 33 | 
 34 | @_field_width.register(Field)
 35 | def _(_):
 36 |     return 1
 37 | 
 38 | @_field_width.register(FieldGroup)
 39 | @_field_width.register(FieldRoot)
 40 | def _(field_group):
 41 |     return sum(_field_width(f) for f in field_group.children)
 42 | 
 43 | @singledispatch
 44 | def _field_slice(arg, row_num):
 45 |     raise ValueError("arg must be Field or FieldGroup, is {}".format(type(arg)))
 46 | 
 47 | @_field_slice.register(Field)
 48 | def _(field, _):
 49 |     return [field]
 50 | 
 51 | @_field_slice.register(FieldGroup)
 52 | def _(field_group, row_num):
 53 |     if row_num == 0:
 54 |         return [field_group]
 55 |     else:
 56 |         return sum((_field_slice(f, row_num - 1) for f in field_group.children), [])
 57 | 
 58 | @singledispatch
 59 | def field_flatten(arg):
 60 |     raise ValueError("arg is {}".format(type(arg)))
 61 | 
 62 | @field_flatten.register(FieldRoot)
 63 | @field_flatten.register(FieldGroup)
 64 | def _(field_group):
 65 |     return itertools.chain.from_iterable(
 66 |         field_flatten(c) for c in field_group.children
 67 |     )
 68 | 
 69 | @field_flatten.register(Field)
 70 | def _(field):
 71 |     return (field,)
 72 | 
 73 | @_field_slice.register(FieldRoot)
 74 | def _(field_root, row_num):
 75 |     return sum((_field_slice(f, row_num) for f in field_root.children), [])
 76 | 
 77 | @singledispatch
 78 | def _field_find(arg, f):
 79 |     raise ValueError("arg must be Field or FieldGroup")
 80 | 
 81 | @_field_find.register(Field)
 82 | def _(field, f):
 83 |     if f == field or isinstance(f, str) and f == field.name:
 84 |         return field
 85 |     else:
 86 |         return None
 87 | 
 88 | @_field_find.register(FieldGroup)
 89 | @_field_find.register(FieldRoot)
 90 | def _(field_group, f):
 91 |     return reduce(
 92 |         lambda a, b: a or b,
 93 |         (_field_find(e) for e in field_group.children),
 94 |     )
 95 | 
 96 | @singledispatch
 97 | def _field_index(arg, f, o):
 98 |     raise ValueError("arg must be Field or FieldGroup")
 99 | 
100 | @_field_index.register(Field)
101 | def _(field, f, o):
102 |     if f == field or isinstance(f, str) and f == field.name:
103 |         return o
104 |     else:
105 |         return None
106 | 
107 | @_field_index.register(FieldGroup)
108 | @_field_index.register(FieldRoot)
109 | def _(field_group, f, o):
110 |     for c in field_group.children:
111 |         r = _field_index(c, f, o)
112 |         if r == None:
113 |             o += _field_width(c)
114 |         else:
115 |             return r
116 |     return None
117 | 
118 | @singledispatch
119 | def _field_flatten(arg, f, o):
120 |     raise ValueError("arg must be Field or FieldGroup")
121 | 
122 | @_field_flatten.register(Field)
123 | def _(field):
124 |     return (field,)
125 | 
126 | @_field_flatten.register(FieldGroup)
127 | @_field_flatten.register(FieldRoot)
128 | def _(field_group):
129 |     return itertools.chain.from_iterable(
130 |         _field_flatten(f) for f in field_group.children
131 |     )
132 | 
133 | def _add_dict(a, b):
134 |     a = a.copy()
135 |     a.update(b)
136 |     return a
137 | 
138 | class Sheet:
139 | 
140 |     _CELL_TYPES = collections.defaultdict(lambda: 'stringValue', {
141 |         int: 'numberValue',
142 |         float: 'numberValue',
143 |         bool: 'boolValue',
144 |         Formula: 'formulaValue',
145 |     })
146 | 
147 |     def __init__(self, source, fields, fields_conditional_formats=(), sheet_id=None):
148 |         self._source = source
149 |         self._fields = FieldRoot(fields)
150 |         self._sheet_id = sheet_id or random.randint(0, 2**32)
151 |         self.properties = {}
152 |         self._fields_conditional_formats = fields_conditional_formats
153 |         self._row_count = None
154 |         self._HEADER_ROW = 0
155 |         self._HEADER_COL = 0
156 |         self._BODY_ROW = _field_depth(self._fields)
157 |         self._BODY_COL = 0
158 | 
159 |     def to_dict(self):
160 |         return {
161 |             'properties': self._to_dict_properties(),
162 |             'data': self._to_dict_data(),
163 |             'merges': self._to_dict_merges(),
164 |             'conditionalFormats': self._to_dict_conditional_formats(),
165 |         }
166 | 
167 |     def field_index(self, field, row_num=None):
168 |         col_num = _field_index(self._fields, field, 0)
169 |         if row_num is None:
170 |             return col_num
171 |         else:
172 |             return col_num, row_num
173 | 
174 |     def field_address(self, field, row_num, absolute=0):
175 |         col_num, row_num = self.field_index(field, row_num)
176 |         return self.address(col_num, row_num, absolute)
177 | 
178 |     def address(self, col_num, row_num, absolute=0):
179 |         return ''.join([
180 |             '$' if absolute & 2 else '',
181 |             self.col_address(col_num),
182 |             '$' if absolute & 1 else '',
183 |             self.row_address(row_num),
184 |         ])
185 | 
186 |     def row_address(self, row_number):
187 |         return str(row_number + 1 + self._BODY_ROW)
188 |         
189 |     def col_address(self, col_number):
190 |         res = []
191 |         n = col_number + self._BODY_COL
192 |         while n >= 0:
193 |             nc = n % 26 + 65
194 |             res.append(nc)
195 |             n = n // 26 - 1
196 |         return ''.join(map(chr, res))
197 | 
198 |     def fields_flat(self):
199 |         return _field_flatten(self._fields)
200 | 
201 |     def _cell_contents(self, row, field, row_num, col_num):
202 |         if field.type in (str, int, float):
203 |             if row[field.index] == '':
204 |                 return Sheet._CELL_TYPES[field.type], field.type()
205 |             else:
206 |                 return Sheet._CELL_TYPES[field.type], field.type(row[field.index])
207 |         elif callable(field.type):
208 |             value = field.type(self, row_num, col_num, field)
209 |             cell_type = Sheet._CELL_TYPES[type(value)]
210 |             if type(value) == Formula:
211 |                 value = value.value if value.value.startswith('=') else '={}'.format(value.value)
212 |             return cell_type, value
213 | 
214 |     def _column_range(self, field):
215 |         col_start = self.field_index(field) + self._BODY_COL
216 |         row_start = self._BODY_ROW
217 |         col_end   = col_start + 1
218 |         row_end   = row_start + self._row_count
219 |         return {
220 |             'startColumnIndex': col_start,
221 |             'endColumnIndex': col_end,
222 |             'startRowIndex': row_start,
223 |             'endRowIndex': row_end,
224 |             'sheetId': self._sheet_id,
225 |         }
226 | 
227 |     def _to_dict_conditional_formats(self):
228 |         return [
229 |             {
230 |                 'ranges': self._column_range(column_format.field),
231 |                 'booleanRule': {
232 |                     'condition': {
233 |                         'type': format.type,
234 |                         'values': [
235 |                             {
236 |                                 'userEnteredValue': format.value,
237 |                             },
238 |                         ],
239 |                     },
240 |                     'format': format.format
241 |                 }
242 |             }
243 |             for column_format in self._fields_conditional_formats
244 |             for format in column_format.formats
245 |         ]
246 | 
247 |     def _to_dict_properties(self):
248 |         res = self.properties.copy()
249 |         res.update({
250 |             'sheetId': self._sheet_id,
251 |         })
252 |         return res
253 | 
254 |     def _to_dict_data(self):
255 |         return [
256 |             self._to_dict_data_header(),
257 |             self._to_dict_data_body(),
258 |         ]
259 | 
260 |     def _to_dict_data_header(self):
261 |         header_height = _field_depth(self._fields)
262 |         row_data = [
263 |             {
264 |                 'values': sum((
265 |                         [
266 |                             {
267 |                                 'userEnteredValue': {
268 |                                     'stringValue': f.pretty
269 |                                 },
270 |                                 'userEnteredFormat': {
271 |                                     'horizontalAlignment': 'center',
272 |                                     'verticalAlignment': 'middle',
273 |                                     'textFormat': {
274 |                                         'bold': True,
275 |                                         'fontSize': 10 if header_row == 0 else 8,
276 |                                     },
277 |                                 },
278 |                             }
279 |                         ] * _field_width(f)
280 |                         for f in header_slice
281 |                     ), []
282 |                 )
283 |             }
284 |             for header_slice, header_row in ((_field_slice(self._fields, h), h) for h in range(header_height))
285 |         ]
286 |         return {
287 |             'startRow': self._HEADER_ROW,
288 |             'startColumn': self._HEADER_COL,
289 |             'rowData': row_data,
290 |         }
291 | 
292 |     def _to_dict_data_body(self):
293 |         row_data = [
294 |             {
295 |                 'values': [
296 |                     _add_dict({
297 |                         'userEnteredValue': {
298 |                             cell_type: cell_value,
299 |                         },
300 |                     }, {
301 |                         'userEnteredFormat': {
302 |                             'numberFormat': {
303 |                                 'type': 'NUMBER',
304 |                                 'pattern': field.format,
305 |                             },
306 |                         },
307 |                     })
308 |                     for cell_type, cell_value, field in (
309 |                         self._cell_contents(row, field, row_num, col_num) + (field,)
310 |                         for field, col_num in zip(_field_flatten(self._fields), itertools.count())
311 |                     )
312 |                 ],
313 |             }
314 |             for row, row_num in zip(self._source, itertools.count())
315 |         ]
316 |         self._row_count = len(row_data)
317 |         return {
318 |             'startRow': self._BODY_ROW,
319 |             'startColumn': self._BODY_COL,
320 |             'rowData': row_data,
321 |         }
322 | 
323 |     def _to_dict_merges(self):
324 |         merges = []
325 |         seen_fields = set()
326 |         for header_row in range(_field_depth(self._fields)):
327 |             header_slice = _field_slice(self._fields, header_row)
328 |             col_start = 0
329 |             for header in header_slice:
330 |                 if type(header) == FieldGroup:
331 |                     col_width = _field_width(header)
332 |                     merges.append({
333 |                         'startColumnIndex': col_start + self._HEADER_COL,
334 |                         'endColumnIndex': col_start + col_width + self._HEADER_COL,
335 |                         'startRowIndex': header_row + self._HEADER_ROW,
336 |                         'endRowIndex': header_row + 1 + self._HEADER_ROW,
337 |                         'sheetId': self._sheet_id,
338 |                     })
339 |                     col_start += col_width
340 |                 elif type(header) == Field and header not in seen_fields:
341 |                     col_width = 1
342 |                     merges.append({
343 |                         'startColumnIndex': col_start + self._HEADER_COL,
344 |                         'endColumnIndex': col_start + col_width + self._HEADER_COL,
345 |                         'startRowIndex': header_row + self._HEADER_ROW,
346 |                         'endRowIndex': self._BODY_ROW,
347 |                         'sheetId': self._sheet_id,
348 |                     })
349 |                     col_start += col_width
350 |                     seen_fields.add(header)
351 |                 else:
352 |                     col_start += 1
353 |         return merges
354 | 
355 | test_fields = FieldRoot((
356 |     Field('first', 0, int, 'First', None),
357 |     Field('second', 0, int, 'Second', None),
358 |     FieldGroup('Grouped', (
359 |         Field('third', 0, int, 'Third', None),
360 |         Field('fourth', 0, int, 'Fourth', None),
361 |         FieldGroup('Nested', (
362 |             Field('fifth', 0, int, 'Fifth', None),
363 |             Field('sixth', 0, int, 'Sixth', None),
364 |         )),
365 |     )),
366 |     Field('seventh', 0, int, 'Seventh', None),
367 |     Field('eighth', 0, int, 'Eigth', None),
368 | ))
369 | 
370 | var_fields = FieldRoot((
371 | 	Field(name='usage', index='usage', type=str, pretty='Usage type', format=None),
372 | 	FieldGroup(pretty='Monthly cost', children=(
373 | 		FieldGroup(pretty='2017-10', children=(
374 | 			Field(name='cost2017-10', index='2017-10', type=float, pretty='Cost', format='#,##0.000 [$USD]'),
375 | 		)),
376 | 		FieldGroup(pretty='2017-11', children=(
377 | 			Field(name='cost2017-11', index='2017-11', type=float, pretty='Cost', format='#,##0.000 [$USD]'),
378 | 			Field(name='var2017-11', index='2017-11', type=float, pretty='Variation', format='0.00%')
379 | 		)),
380 | 		FieldGroup(pretty='2017-12', children=(
381 | 			Field(name='cost2017-12', index='2017-12', type=float, pretty='Cost', format='#,##0.000 [$USD]'),
382 | 			Field(name='var2017-12', index='2017-12', type=float, pretty='Variation', format='0.00%')
383 | 		)),
384 | 		FieldGroup(pretty='2018-01', children=(
385 | 			Field(name='cost2018-01', index='2018-01', type=float, pretty='Cost', format='#,##0.000 [$USD]'),
386 | 			Field(name='var2018-01', index='2018-01', type=float, pretty='Variation', format='0.00%')
387 | 		)),
388 | 		FieldGroup(pretty='2018-02', children=(
389 | 			Field(name='cost2018-02', index='2018-02', type=float, pretty='Cost', format='#,##0.000 [$USD]'),
390 | 			Field(name='var2018-02', index='2018-02', type=float, pretty='Variation', format='0.00%')
391 | 		))
392 | 	))
393 | ))
394 | 


--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import os
 3 | 
 4 | def rows_folder(dirpath):
 5 |     for filename in os.listdir(dirpath):
 6 |         filepath = os.path.join(dirpath, filename)
 7 |         with open(filepath) as f:
 8 |             for row in rows(f):
 9 |                 yield row
10 | 
11 | def rows(csvfile):
12 |     reader = csv.DictReader(csvfile)
13 |     for row in reader:
14 |         yield row
15 | 
16 | class csv_folder:
17 |     """csv_folder is to be used in conjunction with the 'with' statement. It is
18 |     an iterator over all the CSV records of all files within a folder."""
19 |     def __init__(self, dirpath, readerclass=csv.DictReader):
20 |         self._dirpath = dirpath
21 |         self._filepaths = (
22 |             os.path.join(self._dirpath, filename)
23 |             for filename in os.listdir(self._dirpath)
24 |             if filename.endswith('.csv')
25 |         )
26 |         self._reader = None
27 |         self._handle = None
28 |         self._readerclass = readerclass
29 | 
30 |     def __enter__(self):
31 |         return self
32 | 
33 |     def __iter__(self):
34 |         return self
35 | 
36 |     def __exit__(self, type, value, traceback):
37 |         self._close()
38 | 
39 |     def __next__(self):
40 |         if self._reader is None:
41 |             self._open_next()
42 |         try:
43 |             return next(self._reader)
44 |         except StopIteration:
45 |             self._close()
46 |             return self.__next__()
47 | 
48 |     def _open_next(self):
49 |         filepath = next(self._filepaths)
50 |         self._handle = open(filepath, 'rt')
51 |         self._reader = self._readerclass(self._handle)
52 | 
53 |     def _close(self):
54 |         if self._handle is not None:
55 |             self._handle.close()
56 |             self._handle = None
57 |             self._reader = None
58 | 


--------------------------------------------------------------------------------
/util/awsdumpenv:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | env | sed -ne '/^AWS_/s/^/export /' -e 'ta' -e 'd' -e ':a' -e 'p'
4 | 


--------------------------------------------------------------------------------
/util/awsenv:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import argparse
 4 | import os
 5 | import boto3
 6 | 
 7 | def main(profile_name, cmd, region):
 8 | 	env = os.environ.copy()
 9 | 	if profile_name != 'env':
10 | 		session = boto3.session.Session(profile_name=profile_name)
11 | 		credentials = session.get_credentials().get_frozen_credentials()
12 | 		env['AWS_ACCESS_KEY_ID']     = credentials.access_key
13 | 		env['AWS_SECRET_ACCESS_KEY'] = credentials.secret_key
14 | 		if credentials.token:
15 | 			env['AWS_SESSION_TOKEN'] = credentials.token
16 | 	if region:
17 | 		env['AWS_DEFAULT_REGION'] = region
18 | 	os.execvpe(
19 | 		cmd[0],
20 | 		cmd,
21 | 		env,
22 | 	)
23 | 
24 | if __name__ == '__main__':
25 | 	parser = argparse.ArgumentParser(description='Run command with AWS credentials in environment.')
26 | 	parser.add_argument('--profile', type=str, default='default',
27 | 		help='select the profile credentials will be obtained for',
28 | 	)
29 | 	parser.add_argument('--region', type=str,
30 | 		help='the default AWS region',
31 | 	)
32 | 	parser.add_argument('cmd', type=str, metavar='cmd',
33 | 		help='program to be run with AWS credentials',
34 | 	)
35 | 	parser.add_argument('args', type=str, metavar='arg', nargs='*',
36 | 		help='arguments to the program',
37 | 	)
38 | 	args = parser.parse_args()
39 | 	cmd = [args.cmd] + args.args
40 | 	main(args.profile, cmd, args.region)
41 | 
42 | # vim: set ts=4 sts=4 noet:
43 | 


--------------------------------------------------------------------------------