├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── diagram.png ├── handlers └── line-item-publisher.py ├── package.json ├── requirements.txt ├── sam-template.yml └── serverless.yml /.gitignore: -------------------------------------------------------------------------------- 1 | # Distribution / packaging 2 | .Python 3 | env/ 4 | build/ 5 | develop-eggs/ 6 | dist/ 7 | downloads/ 8 | eggs/ 9 | .eggs/ 10 | lib/ 11 | lib64/ 12 | parts/ 13 | sdist/ 14 | var/ 15 | wheels/ 16 | *.egg-info/ 17 | .installed.cfg 18 | *.egg 19 | **/.mypy_cache/ 20 | 21 | # Serverless directories 22 | .serverless 23 | 24 | # Serverless plugins / NPM. 25 | package-lock.json 26 | node_modules/ 27 | 28 | # Installer logs 29 | pip-log.txt 30 | pip-delete-this-directory.txt 31 | 32 | # Unit test / coverage reports 33 | htmlcov/ 34 | .tox/ 35 | .coverage 36 | .coverage.* 37 | .cache 38 | nosetests.xml 39 | coverage.xml 40 | *.cover 41 | .hypothesis/ 42 | 43 | # dotenv 44 | .env 45 | 46 | # virtualenv 47 | .venv 48 | venv/ 49 | ENV/ 50 | 51 | # pyenv 52 | .python-version 53 | 54 | # Byte-compiled / optimized / DLL files 55 | __pycache__/ 56 | *.py[cod] 57 | *$py.class 58 | 59 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | env: 2 | global: 3 | - DIST_DIR="${PWD}/dist" 4 | - PROJECT_NAME=$(basename ${PWD}) 5 | - ARTIFACT_ID="${TRAVIS_COMMIT}-${TRAVIS_BUILD_NUMBER}" 6 | - S3_BUCKET="serverlessops-opensource-deploy-prod" 7 | - S3_BUILD_PREFIX="${PROJECT_NAME}/${ARTIFACT_ID}/build" 8 | - S3_DEPLOY_PREFIX="${PROJECT_NAME}/${ARTIFACT_ID}/deploy" 9 | - S3_DEPLOY_ARTIFACT="${S3_DEPLOY_PREFIX}/${PROJECT_NAME}.zip" 10 | - AWS_SAM_TMPL_NAME="sam-template.yml" 11 | - AWS_CFN_TMPL_NAME="cfn-template.yml" 12 | - AWS_DEFAULT_REGION="us-east-1" 13 | # AWS_ACCESS_KEY_ID 14 | - secure: lXdZRC7LdCR9TOliONr8+AR9Tn6iYk6Sgv0nTOcTT8V/ogtDGgVpmSQkgCYFn3Q0u6tngxr57IvhDOJUU1pT908us8NQt+n9kUr/04DP7Pp4tyNdbJiaXQuX+GQF5Rc1LSAt61ahtNK10uWn7aD7KkXBnY9yqbGHvCHXkxAPj3D6Y+HfI7c2ej8YVkaY7K+9gMEsQfHs7UXiihSV3mFXbgjZ/xmHTtjX8UYn6l2yEl3B9HWi2Y9c0CcfhhNhDeV8vuwoUT53D7M0gnec88EHgCKe4vEALmKerb18OjFxRwZDdKFVTxWZu8LJjVPN+1QuvRyTDjCHlS6MHYVUzRsKh8N7FscvBjUYSfkUSIJ4YLwjYlsEs4r9LUlgnVFrd+5x+8IN/3ItFE+XILPEVOLfpCj/9zx+60drLDWcboX+MGalU0aZO4iPQoXES1/b4n/rP/o8qLOexo76VdJyZuS7oGXtrrE1TOxboKdAN2CIXGtepwG8EFso99s95NIctwpqX9wxV+hDVVmeNEvL2c5IYQ4cci0vRgbobcCcIv4qli4Z/ce/bGI9RWT+SBBMAxsKl6DHLDn8sC8neOUFTEVuK033tWbKYJb0CAA3c0uVGnNY/60zRk417pLNbHmSzmzVp52weASgVXw9uuFMyNDWHqSPmrU+hfqJK814U8roT58= 15 | # AWS_SECRET_ACCESS_KEY 16 | - secure: NFwZpizCRVR5hb+g7bAqQPV1XC6d379Z1SxgPNW22e/N3uCy8sawbLIB3KeVlj8dlcWMBWqrczHYbGY6abl5JbU7OsGh6dRbAkEU4KxomzvVfDXXih3FsR3e+5qlIZPxjXru0xBhLMlB3eVgqfOLElOft7yxq0buSQVekFtKiHBHMCeOYJZGi0ScCMP+KCecW90JP9bFY8u+rfpJRkq6HRP4f9j2eRPLOHLEM4SS8iV/ywP3BfOgjr8BHlZ8Le1h9bT9iB+XsMxRZC1rpC4bDidpqERyZSSelF6Z/b79FCdqWvmXPtk/jMw4z66lo1h3iLXar4u7JSDd7s4bN2yFmetIWTtuqmi8mMkpkbQXyS4hss6xppMnvZHtyWX4deR4O1cvcvYLhVctEg9ITAWjzZv56f5fJShHl8v3txwRu8kbH9OOE4Gxfyxb2z4iyjCeG73yxs3smi7w/KxIXWKYMK8+oFlFew/OuCXZOuHgVTCkZjEwT6yNZW7p+P/+jQC+Rj3jnnWrJZIPZ92Nu6vug/0dfSPb6nBETm1AO9Sn4ln40MR5fedTMY4GDV+5Fe6FLO3ATFankAsR1QxwvEXJSHq2nlGdksUdXVvxAqnRPeK7Y7/2GCbJ53fBZ9ssKI6M5I8pUOGlDOBwnvyjMBNAhn4YeUVtfdJqM0/FxfCUOnc= 17 | 18 | stages: 19 | - build 20 | - artifact 21 | - name: promote 22 | if: branch = master 23 | 24 | jobs: 25 | include: 26 | - stage: build 27 | sudo: required 28 | services: 29 | - docker 30 | language: python 31 | python: '3.6' 32 | install: 33 | - npm install -g serverless 34 | - npm install 35 | script: 36 | - sls package -v -s dev -r us-east-1 -p $DIST_DIR 37 | - cp serverless.yml $DIST_DIR 38 | # python-requirements directory 39 | - rm -rf ${DIST_DIR}/requirements 40 | deploy: 41 | - provider: s3 42 | skip_cleanup: true 43 | bucket: "$S3_BUCKET" 44 | upload_dir: "${PROJECT_NAME}/${ARTIFACT_ID}/build" 45 | local_dir: "$DIST_DIR" 46 | acl: private 47 | on: 48 | repo: "$TRAVIS_REPO_SLUG" 49 | all_branches: true 50 | access_key_id: "$AWS_ACCESS_KEY_ID" 51 | secret_access_key: "$AWS_SECRET_ACCESS_KEY" 52 | 53 | - stage: artifact 54 | language: python 55 | python: '3.6' 56 | install: 57 | - pip install awscli 58 | script: 59 | # We reuse the artifacts from the build stage which we place into tmp. 60 | - mkdir $DIST_DIR 61 | - aws s3 cp s3://${S3_BUCKET}/${S3_BUILD_PREFIX}/${PROJECT_NAME}.zip ${DIST_DIR}/ 62 | - cp ${AWS_SAM_TMPL_NAME} ${DIST_DIR} 63 | - sed -i'' -e "s,%%S3_BUCKET%%,${S3_BUCKET}," -e "s,%%S3_DEPLOY_ARTIFACT%%,${S3_DEPLOY_ARTIFACT}," ${DIST_DIR}/${AWS_SAM_TMPL_NAME} 64 | - aws cloudformation package --template-file ${DIST_DIR}/${AWS_SAM_TMPL_NAME} --output-template-file ${DIST_DIR}/${AWS_CFN_TMPL_NAME} --s3-bucket ${S3_BUCKET} --s3-prefix ${S3_DEPLOY_PREFIX} 65 | deploy: 66 | - provider: s3 67 | skip_cleanup: true 68 | bucket: "$S3_BUCKET" 69 | upload_dir: "${PROJECT_NAME}/${ARTIFACT_ID}/deploy" 70 | local_dir: "$DIST_DIR" 71 | acl: public_read 72 | on: 73 | repo: "$TRAVIS_REPO_SLUG" 74 | access_key_id: "$AWS_ACCESS_KEY_ID" 75 | secret_access_key: "$AWS_SECRET_ACCESS_KEY" 76 | 77 | - stage: promote 78 | language: python 79 | python: '3.6' 80 | install: 81 | - pip install awscli 82 | script: skip 83 | 84 | # FIXME: There's still an issue with browser caching an old link. May 85 | # need to look at rewriting the contents of the file. 86 | deploy: 87 | - provider: script 88 | script: aws s3api put-object --acl public-read --bucket $S3_BUCKET --key ${PROJECT_NAME}/CFN-DEPLOY-LATEST --website-redirect-location "https://console.aws.amazon.com/cloudformation/home?region=${AWS_DEFAULT_REGION}#/stacks/new?stackName=${PROJECT_NAME}&templateURL=https://${S3_BUCKET}.s3.amazonaws.com/${S3_DEPLOY_PREFIX}/${AWS_CFN_TMPL_NAME}" --cache-control "max-age=60" 89 | on: 90 | repo: "$TRAVIS_REPO_SLUG" 91 | - provider: script 92 | script: aws s3api put-object --acl public-read --bucket $S3_BUCKET --key ${PROJECT_NAME}/SAM-TEMPLATE-LATEST --website-redirect-location "https://${S3_BUCKET}.s3.amazonaws.com/${S3_DEPLOY_PREFIX}/${AWS_SAM_TMPL_NAME}" --cache-control "max-age=60" 93 | on: 94 | repo: "$TRAVIS_REPO_SLUG" 95 | 96 | 97 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2018, ServerlessOps 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AWS Application Cost Monitoring 2 | [![serverless](http://public.serverless.com/badges/v3.svg)](http://www.serverless.com) 3 | [![Build Status](https://travis-ci.org/ServerlessOpsIO/ApplicationCostMonitoring.svg?branch=master)](https://travis-ci.org/ServerlessOpsIO/ApplicationCostMonitoring) 4 | [![License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://opensource.org/licenses/BSD-2-Clause) 5 | 6 | Application Cost Monitoring provides granular AWS spend tracking. 7 | 8 | Using the AWS Cost and Usage report this system will parse the report and feed the line items to a place where you can perform analysis. This service only handles the billing report ingestion, parsing, and publishing to SNS. Additional publisher services need to be deployed. eg. 9 | 10 | * [ACM-S3-Publisher](https://github.com/ServerlessOpsIO/ACM-S3-Publisher) 11 | * [ACM-DynamoDB-Publisher](https://github.com/ServerlessOpsIO/ACM-DynamoDB-Publisher) 12 | 13 | ![System Architecture](/diagram.png?raw=true "System Architecture") 14 | 15 | ## Background 16 | Read why we released Application Cost Monitoring as independent nanoservices as opposed to a single application along with our thoughts on using nanoservices to build serverless applications: 17 | 18 | * [Rise Of The Nanoservice](https://www.serverlessops.io/blog/rise-of-the-nanoservice) 19 | 20 | ## Deployment 21 | Read through the entire documentation first. There is information in setting up the billing report that may influence your deployment. 22 | 23 | You will perform the folowing actions: 24 | * Deploy the application 25 | * Create the billing report 26 | * Setup bucket policy (AWS Application Repository only) 27 | 28 | ### Application Deployment 29 | This service supports both [Serverless Framework](https://serverless.com/) and [AWS Serverless Application Repository](https://aws.amazon.com/serverless/serverlessrepo/). 30 | 31 | #### Serverless Framework 32 | Clone this repository and deploy using Serverless Framework. 33 | 34 | ``` 35 | $ npm install -g serverless 36 | $ npm install 37 | $ serverless deploy -v 38 | ``` 39 | 40 | #### AWS Serverless Application Repository 41 | This application is available in the AWS Serverless Application Repository. Follow the directions there if you wish to deploy from AppRepo. 42 | 43 | * https://serverlessrepo.aws.amazon.com/#/applications/arn:aws:serverlessrepo:us-east-1:641494176294:applications~ApplicationCostMonitoring 44 | 45 | ### Outputs 46 | * _aws-adm-${stage}-BillingReportS3BucketName_: Name of S3 Bucket where billing reports will be delivered 47 | * _aws-adm-${stage}-BillingRecordsSnsTopicArn_: ARN of SNS topic that data publishers are to connect to. 48 | * _aws-adm-${stage}-AdmStateS3BucketName_: Name of the S3 bucket where state is stored during and between runs. 49 | 50 | ### Billing Report Setup 51 | Setup the [AWS Billing cost and usage report](https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/billing-reports-costusage.html). This will deliver billing reports to the configured S3 bucket up to three times a day. This service will create the S3 bucket for you when it is deployed. Get the _aws-adm-${stage}-BillingReportS3BucketName_ stack export after deploying and 52 | 53 | Follow the AWS instructions for [turning on the Cost and Usage Report](https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/billing-reports-gettingstarted-turnonreports.html). Select hourly or daily report data depending on the granularity of data you need (and can afford considering the potential size). 54 | 55 | Additional cost insight can be found by using cost allocation tags. [Enable cost allocation tags](https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/activate-built-in-tags.html) in the AWS console if desired and activate any appropriate [user defined tags](https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/custom-tags.html). 56 | 57 | To see what data is in the report, refer to the AWS documentation for [cost and Usage Report Details](https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/billing-reports-costusage-details.html). You decide the tags to track before you deploy Application Cost Monitoring. Changing the tags tracked in billing reports will cause some line item's to change their ID. Depending on how you are performing your analysis this may not be an issue. If you are using AWS Athena to query the data then this will result in a schema change that will break querying. You will also have to deal with duplicate line item data in the dataset unless you purge all previous data. See the `SCHEMA_CHANGE_HANDLING` variable for more information. 58 | 59 | ### Configuration 60 | *SCHEMA_CHANGE_HANDLING*: Set the desired behavior for how to handle a change in the billing report schema being detected. If using CloudFormation or AWS SAM, set this parameter to one of the values below. If using Serverless Framework, set value as an environmental variable. Choose the correct option for you after reading below. The default value is `CONTINUE`. 61 | 62 | Changing tags on billing reports will alter the report schema which can: 63 | - break downstream analysis systems dependent on the schema. 64 | - result in item duplication as AWS will generate a new line item ID and this system has to continually reprocess 1st of month on every run. 65 | - make tracking an item across the month difficult due to change in ID. 66 | 67 | Options: 68 | - ERROR: Error out line item writer. Must remove schema state file or remove tags to continue processing. 69 | - CONTINUE: Just continue processing. 70 | - RECONCILE: Reprocess the entire report. 71 | 72 | ## Usage 73 | This service only handles billing report ingestion and parsing. It requires an additional publisher service to be useful. Available ones are: 74 | 75 | * [ACM-S3-Publisher](https://github.com/ServerlessOpsIO/ACM-S3-Publisher) 76 | * [ACM-DynamoDB-Publisher](https://github.com/ServerlessOpsIO/ACM-DynamoDB-Publisher) 77 | 78 | -------------------------------------------------------------------------------- /diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServerlessOpsIO/ApplicationCostMonitoring/8d8e7fdf424a3f09598f2813024eea7d80f1e937/diagram.png -------------------------------------------------------------------------------- /handlers/line-item-publisher.py: -------------------------------------------------------------------------------- 1 | '''Fetch object from S3 and publish items to SNS''' 2 | 3 | import boto3 4 | import csv 5 | import gzip 6 | import io 7 | import iso8601 8 | import json 9 | import logging 10 | import os 11 | import zipfile 12 | 13 | AWS_SNS_TOPIC = os.environ.get('AWS_SNS_TOPIC') 14 | SCHEMA_CHANGE_HANDLING = os.environ.get('SCHEMA_CHANGE_HANDLING') 15 | 16 | SCHEMA_CHANGE_ERROR = 'ERROR' 17 | SCHEMA_CHANGE_CONTINUE = 'CONTINUE' 18 | SCHEMA_CHANGE_RECONCILE = 'RECONCILE' 19 | SCHEMA_CHANGE_OPTIONS = [ 20 | SCHEMA_CHANGE_ERROR, 21 | SCHEMA_CHANGE_CONTINUE, 22 | SCHEMA_CHANGE_RECONCILE 23 | ] 24 | 25 | X_RECORD_OFFSET = 'x-record-offset' 26 | X_RECORD_LATEST_DATE = 'x-record-latest-date' 27 | 28 | LAST_ADM_RUN_TIME_STATE = 'LAST_ADM_RUN_TIME_STATE' 29 | LAST_ADM_RUN_SCHEMA_STATE = 'LAST_ADM_RUN_SCHEMA_STATE' 30 | 31 | log_level = os.environ.get('LOG_LEVEL', 'INFO') 32 | logging.root.setLevel(logging.getLevelName(log_level)) # type: ignore 33 | _logger = logging.getLogger(__name__) 34 | 35 | s3_client = boto3.client('s3') 36 | sns_client = boto3.client('sns') 37 | lambda_client = boto3.client('lambda') 38 | 39 | 40 | class LineItemPublisherError(Exception): 41 | '''Lambda base exception''' 42 | pass 43 | 44 | 45 | class BillingReportSchemaChangeError(LineItemPublisherError): 46 | '''Billing report schema change''' 47 | def __init__(self): 48 | self.msg = 'Detected billing report schema change' 49 | super(LineItemPublisherError, self).__init__(self.msg) 50 | 51 | 52 | class InvalidSchemaChangeOptionError(LineItemPublisherError): 53 | '''Invalid schema change option''' 54 | def __init__(self, option): 55 | self.msg = '{} not in {}'.format(option, SCHEMA_CHANGE_OPTIONS) 56 | super(LineItemPublisherError, self).__init__(self.msg) 57 | 58 | 59 | def _check_report_schema_change(line_item_headers, old_line_item_headers): 60 | '''Compare a line_item doc with a set of headers.''' 61 | # We're not going to assume that columns are always in the same position. 62 | # As a result, we need to make copies of the list here or we're going to 63 | # have a bad time matching headers and line item data. 64 | tmp_line_item_headers = line_item_headers[:] 65 | tmp_old_line_item_headers = old_line_item_headers[:] 66 | 67 | tmp_line_item_headers.sort() 68 | tmp_old_line_item_headers.sort() 69 | 70 | return tmp_line_item_headers == tmp_old_line_item_headers 71 | 72 | 73 | def _check_s3_object_exists(s3_bucket, s3_key): 74 | '''Check if an object exists in S3''' 75 | resp = s3_client.list_objects( 76 | Bucket=s3_bucket, 77 | Prefix=s3_key 78 | ) 79 | 80 | exists = False 81 | if 'Contents' in resp: 82 | for k in resp.get('Contents'): 83 | if k.get('Key') == s3_key: 84 | exists = True 85 | break 86 | 87 | return exists 88 | 89 | 90 | def _convert_empty_value_to_none(item): 91 | '''Turn empty strings into None, etc.''' 92 | 93 | # DynamoDB can't have empty strings but csv.DictReader earlier in system 94 | # uses '' for empty fields. 95 | for key, value in item.items(): 96 | if value == '': 97 | item[key] = None 98 | 99 | return item 100 | 101 | 102 | def _create_line_item_message(headers, line_item): 103 | '''Return a formatted line item message.''' 104 | split_line_item = next(csv.reader([line_item])) 105 | item_dict = dict(zip(headers, split_line_item)) 106 | sanitized_item_dict = _convert_empty_value_to_none(item_dict) 107 | 108 | final_dict = _format_line_item_dict(sanitized_item_dict) 109 | 110 | return final_dict 111 | 112 | 113 | def _decompress_s3_object_body(s3_body, s3_key): 114 | '''Return the decompressed data of an S3 object.''' 115 | if s3_key.endswith('.gz'): 116 | gzip_file = gzip.GzipFile(fileobj=io.BytesIO(s3_body)) 117 | decompressed_s3_body = gzip_file.read().decode() 118 | else: 119 | decompress_s3_key = '.'.join(s3_key.split('.')[:-1]) 120 | zip_file = zipfile.ZipFile(io.BytesIO(s3_body)) 121 | decompressed_s3_body = zip_file.read(decompress_s3_key).decode() 122 | 123 | return decompressed_s3_body 124 | 125 | 126 | def _get_last_run_datetime_from_s3(s3_bucket, schema_change_handling): 127 | '''Return datetime of the last run''' 128 | if schema_change_handling == SCHEMA_CHANGE_RECONCILE: 129 | last_run_record_latest_date = '1970-01-01T00:00:00Z' 130 | last_run_record_latest_datetime = iso8601.parse_date(last_run_record_latest_date) 131 | else: 132 | if not _check_s3_object_exists(s3_bucket, LAST_ADM_RUN_TIME_STATE): 133 | last_run_record_latest_date = '1970-01-01T00:00:00Z' 134 | _put_s3_object(s3_bucket, LAST_ADM_RUN_TIME_STATE, last_run_record_latest_date) 135 | else: 136 | last_run_record_latest_date = _get_s3_object_body(s3_bucket, LAST_ADM_RUN_TIME_STATE).strip() 137 | last_run_record_latest_datetime = iso8601.parse_date(last_run_record_latest_date) 138 | 139 | return last_run_record_latest_datetime 140 | 141 | 142 | def _get_line_items_from_s3(s3_bucket, s3_key): 143 | '''Return the line items from the S3 bucket.''' 144 | s3_object_body = _get_s3_object_body(s3_bucket, s3_key, decode_bytes=False) 145 | s3_object_body_decompressed = _decompress_s3_object_body(s3_object_body, s3_key) 146 | s3_body_file = io.StringIO(s3_object_body_decompressed) 147 | 148 | line_item_headers = s3_body_file.readline().strip().split(',') 149 | line_items = s3_body_file.read().splitlines() 150 | 151 | return (line_item_headers, line_items) 152 | 153 | 154 | def _get_line_item_time_interval(line_item): 155 | '''Get the time interval of the line item''' 156 | return line_item.get('identity').get('TimeInterval').split('/') 157 | 158 | 159 | def _delete_s3_object(s3_bucket, s3_key): 160 | '''Get object body from S3.''' 161 | resp = s3_client.delete_object( 162 | Bucket=s3_bucket, 163 | Key=s3_key 164 | ) 165 | 166 | return resp 167 | 168 | 169 | def _get_s3_object_body(s3_bucket, s3_key, decode_bytes=True): 170 | '''Get object body from S3.''' 171 | s3_object = s3_client.get_object( 172 | Bucket=s3_bucket, 173 | Key=s3_key 174 | ) 175 | 176 | s3_object_body = s3_object.get('Body').read() 177 | if decode_bytes is True: 178 | s3_object_body = s3_object_body.decode() 179 | 180 | return s3_object_body 181 | 182 | 183 | def _format_line_item_dict(line_item_dict): 184 | '''Convert multi-level keys into parent/child dict values.''' 185 | formatted_line_item_dict = {} 186 | 187 | for k, v in line_item_dict.items(): 188 | key_list = k.split('/') 189 | if len(key_list) > 1: 190 | parent, child = key_list 191 | if parent not in formatted_line_item_dict.keys(): 192 | formatted_line_item_dict[parent] = {} 193 | formatted_line_item_dict[parent][child] = v 194 | else: 195 | formatted_line_item_dict[key_list[0]] = v 196 | 197 | return formatted_line_item_dict 198 | 199 | 200 | def _process_additional_items(arn, event, line_item_offset, this_latest_datetime): 201 | '''Process additional records.''' 202 | event.get('Records')[0][X_RECORD_OFFSET] = line_item_offset 203 | event.get('Records')[0][X_RECORD_LATEST_DATE] = str(this_latest_datetime) 204 | 205 | resp = lambda_client.invoke( 206 | FunctionName=arn, 207 | Payload=json.dumps(event), 208 | InvocationType='Event' 209 | ) 210 | # pop non-serializible value 211 | resp.pop('Payload') 212 | return resp 213 | 214 | 215 | def _publish_sns_message(topic_arn, line_item): 216 | '''Publish message to SNS''' 217 | resp = sns_client.publish( 218 | TopicArn=topic_arn, 219 | Message=json.dumps(line_item) 220 | ) 221 | 222 | return resp 223 | 224 | 225 | def _put_s3_object(s3_bucket, s3_key, body): 226 | '''Write item to S3''' 227 | resp = s3_client.put_object( 228 | Bucket=s3_bucket, 229 | Key=s3_key, 230 | Body=body 231 | ) 232 | 233 | return resp 234 | 235 | 236 | def handler(event, context): 237 | _logger.info('S3 event received: {}'.format(json.dumps(event))) 238 | 239 | # Raise an error if we don't know how to handle schema changes 240 | if SCHEMA_CHANGE_HANDLING not in SCHEMA_CHANGE_OPTIONS: 241 | raise InvalidSchemaChangeOptionError(SCHEMA_CHANGE_HANDLING) 242 | 243 | s3_bucket = event.get('Records')[0].get('s3').get('bucket').get('name') 244 | s3_key = event.get('Records')[0].get('s3').get('object').get('key') 245 | 246 | line_item_offset = event.get('Records')[0].get(X_RECORD_OFFSET) 247 | this_run_record_latest_date = event.get('Records')[0].get(X_RECORD_LATEST_DATE, '1970-01-01T00:00:00Z') 248 | this_run_record_latest_datetime = iso8601.parse_date(this_run_record_latest_date) 249 | 250 | line_item_headers, line_items = _get_line_items_from_s3(s3_bucket, s3_key) 251 | total_line_items = len(line_items) 252 | _logger.info('Total items: {}'.format(total_line_items)) 253 | 254 | # This is an initial processing run of a given report. 255 | if line_item_offset is None: 256 | # Write schema if none exists. 257 | if not _check_s3_object_exists(s3_bucket, LAST_ADM_RUN_SCHEMA_STATE): 258 | _put_s3_object(s3_bucket, LAST_ADM_RUN_SCHEMA_STATE, ','.join(line_item_headers)) 259 | # If we should error on change, check change 260 | else: 261 | old_line_item_headers = _get_s3_object_body(s3_bucket, LAST_ADM_RUN_SCHEMA_STATE).split(',') 262 | if not _check_report_schema_change(line_item_headers, old_line_item_headers): 263 | if SCHEMA_CHANGE_HANDLING == SCHEMA_CHANGE_ERROR: 264 | raise BillingReportSchemaChangeError 265 | 266 | # Get last run latest time. 267 | last_run_record_latest_datetime = _get_last_run_datetime_from_s3( 268 | s3_bucket, 269 | SCHEMA_CHANGE_HANDLING 270 | ) 271 | _logger.info('Processing line items since: {}'.format(last_run_record_latest_datetime)) 272 | 273 | if line_item_offset is None: 274 | line_item_offset = 0 275 | 276 | line_items = line_items[line_item_offset:] 277 | 278 | # NOTE: We might decide to batch send multiple records at a time. It's 279 | # Worth a look after we have decent metrics to understand tradeoffs. 280 | published_line_items = 0 281 | for line_item in line_items: 282 | _logger.debug('line_item: {}'.format(line_item)) 283 | 284 | line_item_msg = _create_line_item_message(line_item_headers, line_item) 285 | _logger.debug('message: {}'.format(json.dumps(line_item_msg))) 286 | 287 | line_item_start, line_item_end = _get_line_item_time_interval(line_item_msg) 288 | line_item_start_datetime = iso8601.parse_date(line_item_start) 289 | 290 | # XXX: AWS does not guarantee that data will not change across across 291 | # billing reports. There are two ways to easily observe this fact: 292 | # 293 | # - AmazonSNS and AmazonS3 BlendedRate will change across runs for the 294 | # same line item 295 | # - Additional line items will be added to the first of the month 296 | # throughout the month. 297 | # 298 | # We should probably do an end of month reconciliation run. Reports 299 | # are generated up to three times a day. If we tried to do this 300 | # automatically this could be problematic depending on the size of the 301 | # report and the downstream publishers. 302 | 303 | # First of month line items get appended through the month. 304 | is_first_of_month_line_item = line_item_start_datetime.day == 1 305 | 306 | # XXX: Appears so far that each report always starts with a new 307 | # time period. 308 | is_newer_than_last_run = line_item_start_datetime > last_run_record_latest_datetime 309 | 310 | if is_newer_than_last_run or is_first_of_month_line_item: 311 | _logger.info('Publishing line_item: {}/{}'.format(line_item_offset + 1, total_line_items)) 312 | resp = _publish_sns_message(AWS_SNS_TOPIC, line_item_msg) 313 | _logger.debug( 314 | 'Publish response for line_item {}: {}'.format( 315 | line_item_offset, json.dumps(resp) 316 | ) 317 | ) 318 | published_line_items += 1 319 | if line_item_start_datetime > this_run_record_latest_datetime: 320 | this_run_record_latest_datetime = line_item_start_datetime 321 | 322 | line_item_offset += 1 323 | 324 | if context.get_remaining_time_in_millis() <= 2000: 325 | break 326 | 327 | # We're done. Remove file. 328 | # FIXME: Need a better way to check for processing same report than using 329 | # -1 as the offset. 330 | if line_item_offset < total_line_items: 331 | _logger.info('Invoking additional execution at record offset: {}'.format(line_item_offset)) 332 | lambda_resp = _process_additional_items( 333 | context.invoked_function_arn, 334 | event, 335 | line_item_offset, 336 | this_run_record_latest_datetime, 337 | ) 338 | _logger.info('Invoked additional Lambda response: {}'.format(json.dumps(lambda_resp))) 339 | else: 340 | _logger.info('No additional records to process') 341 | 342 | # Since we always process the 1st of the month, only write if report 343 | # is later than last run datetime. 344 | if this_run_record_latest_datetime > last_run_record_latest_datetime: 345 | _put_s3_object(s3_bucket, LAST_ADM_RUN_TIME_STATE, str(this_run_record_latest_datetime)) 346 | 347 | resp = { 348 | 'records_published': published_line_items, 349 | 'line_item_offset': line_item_offset, 350 | 'total_records': total_line_items 351 | } 352 | 353 | _logger.info('AWS responses: {}'.format(json.dumps(resp))) 354 | return resp 355 | 356 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "aws-adm", 3 | "description": "", 4 | "version": "0.1.0", 5 | "devDependencies": { 6 | "serverless-iam-roles-per-function": "^0.1.2", 7 | "serverless-pseudo-parameters": "^1.3.2", 8 | "serverless-python-requirements": "^3.3.0", 9 | "serverless-sam": "0.0.4", 10 | "serverless-plugin-existing-s3": "^2.0.3" 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | iso8601 2 | -------------------------------------------------------------------------------- /sam-template.yml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: '2010-09-09' 2 | Transform: 'AWS::Serverless-2016-10-31' 3 | Description: 'SAM template for Serverless framework service: Application Cost Monitoring' 4 | Resources: 5 | BillingRecordsSnsTopic: 6 | Type: 'AWS::SNS::Topic' 7 | Publish: 8 | Type: 'AWS::Serverless::Function' 9 | Properties: 10 | # NOTE: Necessary to handle LambdaInvokePolicy for self. 11 | FunctionName: 'aws-serverless-repository-ApplicationCostMonitoring-Publish' 12 | Runtime: python3.6 13 | Handler: handlers/line-item-publisher.handler 14 | CodeUri: s3://%%S3_BUCKET%%/%%S3_DEPLOY_ARTIFACT%% 15 | Description: Ingest an S3 object and publish line items. 16 | MemorySize: 512 17 | Timeout: 300 18 | Policies: 19 | - S3CrudPolicy: 20 | # NOTE: Need to ref the parameter to avoid a circular dependency. 21 | BucketName: 22 | Ref: BillingReportBucketName 23 | - SNSPublishMessagePolicy: 24 | TopicName: 25 | Fn::GetAtt: 26 | - BillingRecordsSnsTopic 27 | - TopicName 28 | - LambdaInvokePolicy: 29 | # NOTE: We can't Ref Stack name when publishing to AppRepo 30 | FunctionName: 'aws-serverless-repository-ApplicationCostMonitoring-Publish' 31 | Environment: 32 | Variables: 33 | LOG_LEVEL: INFO 34 | AWS_SNS_TOPIC: 35 | Ref: BillingRecordsSnsTopic 36 | SCHEMA_CHANGE_HANDLING: 37 | Ref: SchemaChangeHandling 38 | Events: 39 | Event1: 40 | Type: S3 41 | Properties: 42 | Bucket: 43 | Ref: BillingReportS3Bucket 44 | Events: 's3:ObjectCreated:Put' 45 | Filter: 46 | S3Key: 47 | Rules: 48 | - Name: suffix 49 | Value: .gz 50 | Event2: 51 | Type: S3 52 | Properties: 53 | Bucket: 54 | Ref: BillingReportS3Bucket 55 | Events: 's3:ObjectCreated:Put' 56 | Filter: 57 | S3Key: 58 | Rules: 59 | - Name: suffix 60 | Value: .zip 61 | BillingReportS3Bucket: 62 | Type: 'AWS::S3::Bucket' 63 | Properties: 64 | BucketName: 65 | Ref: BillingReportBucketName 66 | Outputs: 67 | BillingReportS3BucketName: 68 | Description: Name of S3 Bucket for billing reports. 69 | Value: 70 | Ref: BillingReportS3Bucket 71 | Export: 72 | Name: aws-serverless-repository-ApplicationCostMonitoring-BillingReportS3BucketName 73 | BillingRecordsSnsTopicArn: 74 | Description: SNS topic ARN where billing records are published to. 75 | Value: 76 | Ref: BillingRecordsSnsTopic 77 | Export: 78 | Name: aws-serverless-repository-ApplicationCostMonitoring-BillingRecordsSnsTopicArn 79 | Parameters: 80 | SchemaChangeHandling: 81 | Type: String 82 | Description: Behavior by Publish function on report schema change. 83 | Default: CONTINUE 84 | BillingReportBucketName: 85 | Type: String 86 | Description: Name of S3 bucket AWS Cost and Usage reports are delivered to. 87 | -------------------------------------------------------------------------------- /serverless.yml: -------------------------------------------------------------------------------- 1 | service: ApplicationCostMonitoring 2 | 3 | plugins: 4 | - serverless-python-requirements 5 | - serverless-iam-roles-per-function 6 | - serverless-pseudo-parameters 7 | - serverless-sam 8 | 9 | custom: 10 | stage: "${opt:stage, env:SLS_STAGE, 'dev'}" 11 | profile: "${opt:aws-profile, env:AWS_PROFILE, env:AWS_DEFAULT_PROFILE, 'default'}" 12 | log_level: "${env:LOG_LEVEL, 'INFO'}" 13 | billing_bucket_name: 'applicationcostmonitoring-${self:provider.stage}-billing-reports-#{AWS::AccountId}' 14 | 15 | cf-parameters: 16 | # Changing tags on billing reports will alter the report schema which can: 17 | # - break downstream analysis systems dependent on the schema. 18 | # - result in item duplication as AWS will generate a new line item ID and 19 | # this system has to continually reprocess 1st of month on every run. 20 | # - make tracking an item across the month difficult due to change in ID. 21 | # 22 | # Options: 23 | # - ERROR: Error out line item writer. Must remove schema state file or 24 | # remove tags to continue processing. 25 | # - CONTINUE: Just continue processing. 26 | # - RECONCILE: Reprocess the entire report. 27 | schema_change_handling: "${env:SCHEMA_CHANGE_HANDLING, 'CONTINUE'}" 28 | 29 | provider: 30 | name: aws 31 | profile: ${self:custom.profile} 32 | stage: ${self:custom.stage} 33 | stackTags: 34 | x-service: ${self:service} 35 | x-stack: ${self:service}-${self:provider.stage} 36 | x-app: ApplicationCostMonitoring 37 | 38 | functions: 39 | Publish: 40 | handler: handlers/line-item-publisher.handler 41 | description: "Ingest an S3 object and publish line items." 42 | runtime: python3.6 43 | memorySize: 512 44 | timeout: 300 45 | environment: 46 | LOG_LEVEL: ${self:custom.log_level} 47 | AWS_SNS_TOPIC: 48 | Ref: BillingRecordsSnsTopic 49 | SCHEMA_CHANGE_HANDLING: 50 | Ref: SchemaChangeHandling 51 | iamRoleStatements: 52 | - Effect: "Allow" 53 | Action: 54 | - "s3:ListBucket" 55 | Resource: "arn:aws:s3:::${self:custom.billing_bucket_name}" 56 | - Effect: "Allow" 57 | Action: 58 | - "S3:GetObject" 59 | - "S3:PutObject" 60 | Resource: "arn:aws:s3:::${self:custom.billing_bucket_name}/*" 61 | - Effect: "Allow" 62 | Action: 63 | - "SNS:Publish" 64 | Resource: 65 | - Ref: BillingRecordsSnsTopic 66 | - Effect: "Allow" 67 | Action: 68 | - "Lambda:InvokeFunction" 69 | Resource: "arn:aws:lambda:${self:provider.region}:#{AWS::AccountId}:function:${self:service}-${self:provider.stage}-Publish" 70 | events: 71 | - s3: 72 | # NOTE: Need to use string here. 73 | bucket: '${self:custom.billing_bucket_name}' 74 | event: "s3:ObjectCreated:Put" 75 | rules: 76 | - suffix: ".gz" 77 | - s3: 78 | # NOTE: Need to use string here. 79 | bucket: '${self:custom.billing_bucket_name}' 80 | event: "s3:ObjectCreated:Put" 81 | rules: 82 | - suffix: ".zip" 83 | 84 | 85 | resources: 86 | Parameters: 87 | SchemaChangeHandling: 88 | Type: String 89 | Description: 'Behavior by Publish function on report schema change.' 90 | Default: '${self:custom.cf-parameters.schema_change_handling}' 91 | 92 | Resources: 93 | BillingRecordsSnsTopic: 94 | Type: "AWS::SNS::Topic" 95 | 96 | SnsTopicPolicy: 97 | Type: "AWS::SNS::TopicPolicy" 98 | Properties: 99 | PolicyDocument: { 100 | "Version": "2008-10-17", 101 | "Statement": [ 102 | { 103 | "Effect": "Allow", 104 | "Principal": { 105 | "AWS": "*" 106 | }, 107 | "Action": [ 108 | "SNS:GetTopicAttributes", 109 | "SNS:SetTopicAttributes", 110 | "SNS:AddPermission", 111 | "SNS:RemovePermission", 112 | "SNS:DeleteTopic", 113 | "SNS:Subscribe", 114 | "SNS:ListSubscriptionsByTopic", 115 | "SNS:Publish", 116 | "SNS:Receive" 117 | ], 118 | "Resource": { 119 | "Fn::Join": [ 120 | ":", 121 | [ 122 | "arn:aws:sns", 123 | Ref: "AWS::Region", 124 | Ref: "AWS::AccountId", 125 | Fn::GetAtt: ["BillingRecordsSnsTopic", "TopicName"] 126 | ] 127 | ] 128 | }, 129 | "Condition": { 130 | "StringEquals": { 131 | "AWS:SourceOwner": { 132 | "Ref": "AWS::AccountId" 133 | } 134 | } 135 | } 136 | } 137 | ] 138 | } 139 | Topics: 140 | - Ref: BillingRecordsSnsTopic 141 | 142 | BillingS3BucketPolicy: 143 | Type: "AWS::S3::BucketPolicy" 144 | Properties: 145 | Bucket: 146 | Ref: "S3BucketApplicationcostmonitoring${self:provider.stage}billingreportsAWSAccountId" 147 | PolicyDocument: { 148 | "Version": "2008-10-17", 149 | "Id": "Policy1335892530063", 150 | "Statement": [ 151 | { 152 | "Sid": "Stmt1335892150622", 153 | "Effect": "Allow", 154 | "Principal": { 155 | "AWS": "arn:aws:iam::386209384616:root" 156 | }, 157 | "Action": [ 158 | "s3:GetBucketAcl", 159 | "s3:GetBucketPolicy" 160 | ], 161 | "Resource": { "Fn::GetAtt": ["S3BucketApplicationcostmonitoring${self:provider.stage}billingreportsAWSAccountId", "Arn"] } 162 | }, 163 | { 164 | "Sid": "Stmt1335892526596", 165 | "Effect": "Allow", 166 | "Principal": { 167 | "AWS": "arn:aws:iam::386209384616:root" 168 | }, 169 | "Action": [ 170 | "s3:PutObject" 171 | ], 172 | "Resource": { "Fn::Join": ["/", [{ "Fn::GetAtt": ["S3BucketApplicationcostmonitoring${self:provider.stage}billingreportsAWSAccountId", "Arn"] }, "*"]] } 173 | } 174 | ] 175 | } 176 | 177 | 178 | Outputs: 179 | BillingReportS3BucketName: 180 | Description: "S3 bucket where billing reports are delivered to." 181 | Value: 182 | Ref: "S3BucketApplicationcostmonitoring${self:provider.stage}billingreportsAWSAccountId" 183 | Export: 184 | Name: "${self:service}-${self:provider.stage}-BillingReportS3BucketName" 185 | BillingRecordsSnsTopicArn: 186 | Description: "SNS topic ARN where billing records are published to." 187 | Value: 188 | Ref: BillingRecordsSnsTopic 189 | Export: 190 | Name: "${self:service}-${self:provider.stage}-BillingRecordsSnsTopicArn" 191 | 192 | --------------------------------------------------------------------------------