├── .gitattributes ├── .github ├── dependabot.yml └── workflows │ ├── linter.yml │ └── release.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .terraform-docs.yml ├── CHANGELOG.md ├── LICENSE ├── README.md ├── assets └── overview.png ├── data.tf ├── examples └── complete │ ├── main.tf │ ├── outputs.tf │ └── variables.tf ├── glue.tf ├── main.tf ├── notifications.tf ├── outputs.tf ├── providers.tf ├── src └── index.js └── variables.tf /.gitattributes: -------------------------------------------------------------------------------- 1 | *.png filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | updates: 4 | - package-ecosystem: terraform 5 | directory: / 6 | schedule: 7 | interval: daily 8 | -------------------------------------------------------------------------------- /.github/workflows/linter.yml: -------------------------------------------------------------------------------- 1 | name: PR validation 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - master 7 | 8 | jobs: 9 | linter: 10 | runs-on: ubuntu-latest 11 | 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | terraform: ['~0.13.0', '~0.14.0', '~0.15.0', '~1.0.0'] 16 | 17 | steps: 18 | - name: Checkout 19 | uses: actions/checkout@v2 20 | with: 21 | lfs: true 22 | token: ${{ secrets.GITHUB_TOKEN }} 23 | 24 | - name: Install Terraform 25 | uses: hashicorp/setup-terraform@v1 26 | with: 27 | terraform_version: ${{ matrix.terraform }} 28 | terraform_wrapper: false 29 | 30 | - name: Install TFLint 31 | uses: terraform-linters/setup-tflint@v1 32 | with: 33 | tflint_version: v0.26.0 34 | 35 | - name: Install Python 36 | uses: actions/setup-python@v2 37 | 38 | - name: Install pre-commit & deps 39 | run: | 40 | pip install pre-commit 41 | curl -L "$(curl -Ls https://api.github.com/repos/tfsec/tfsec/releases/latest | grep -o -E "https://.+?tfsec-linux-amd64" | head -n1)" > tfsec && chmod +x tfsec && sudo mv tfsec /usr/bin/ 42 | 43 | - name: Run pre-commit 44 | run: pre-commit run --color=always --show-diff-on-failure --all-files 45 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | 8 | jobs: 9 | docs: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout 13 | uses: actions/checkout@v2 14 | with: 15 | token: ${{ secrets.ROBOT_GITHUB_TOKEN }} 16 | lfs: true 17 | ref: master 18 | 19 | - name: Update module docs 20 | uses: terraform-docs/gh-actions@main 21 | with: 22 | working-dir: . 23 | output-file: README.md 24 | output-method: inject 25 | git-push: "true" 26 | git-push-user-name: cle-robot 27 | git-push-user-email: 77749875+cle-robot@users.noreply.github.com 28 | git-commit-message: "chore: update module docs [skip ci]" 29 | 30 | - name: Create release 31 | uses: GoogleCloudPlatform/release-please-action@v2 32 | with: 33 | token: ${{ secrets.ROBOT_GITHUB_TOKEN }} 34 | release-type: terraform-module 35 | changelog-types: '[{"type":"feat","section":"Features","hidden":false},{"type":"fix","section":"Fixes","hidden":false},{"type":"improvement","section":"Improvements","hidden":false}]' 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Local .terraform directories 2 | **/.terraform/* 3 | 4 | # .tfstate files 5 | *.tfstate 6 | *.tfstate.* 7 | 8 | # Crash log files 9 | crash.log 10 | 11 | # Ignore any .tfvars files that are generated automatically for each Terraform run. Most 12 | # .tfvars files are managed as part of configuration and so should be included in 13 | # version control. 14 | # 15 | # example.tfvars 16 | 17 | # Ignore override files as they are usually used to override resources locally and so 18 | # are not checked in 19 | override.tf 20 | override.tf.json 21 | *_override.tf 22 | *_override.tf.json 23 | 24 | *.swp 25 | 26 | # Terraform 27 | *.lock.hcl 28 | 29 | # Terragrunt 30 | **/.terragrunt-cache/* 31 | 32 | # Terragrunt-generated files 33 | **/*_terragrunt_generated.tf 34 | 35 | # VS Code 36 | **/.vscode/* 37 | 38 | # Compressed Lambda code 39 | lambda.zip 40 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/antonbabenko/pre-commit-terraform 3 | rev: v1.50.0 4 | hooks: 5 | - id: terraform_fmt 6 | - id: terraform_validate 7 | - id: terraform_tflint 8 | args: 9 | - '--args=--only=terraform_deprecated_interpolation' 10 | - '--args=--only=terraform_deprecated_index' 11 | - '--args=--only=terraform_unused_required_providers' 12 | - '--args=--only=terraform_unused_declarations' 13 | - '--args=--only=terraform_comment_syntax' 14 | - '--args=--only=terraform_documented_outputs' 15 | - '--args=--only=terraform_documented_variables' 16 | - '--args=--only=terraform_typed_variables' 17 | - '--args=--only=terraform_module_pinned_source' 18 | - '--args=--only=terraform_naming_convention' 19 | - '--args=--only=terraform_required_version' 20 | - '--args=--only=terraform_required_providers' 21 | - '--args=--only=terraform_standard_module_structure' 22 | - '--args=--only=terraform_workspace_remote' 23 | - repo: https://github.com/pre-commit/pre-commit-hooks 24 | rev: v4.0.1 25 | hooks: 26 | - id: detect-private-key 27 | - id: detect-aws-credentials 28 | args: 29 | - --allow-missing-credentials 30 | -------------------------------------------------------------------------------- /.terraform-docs.yml: -------------------------------------------------------------------------------- 1 | sections: 2 | show: 3 | - header 4 | - requirements 5 | - providers 6 | - inputs 7 | - outputs 8 | 9 | sort: 10 | enabled: false 11 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ### [0.1.3](https://www.github.com/nuuday/terraform-aws-cur/compare/v0.1.2...v0.1.3) (2022-01-27) 4 | 5 | 6 | ### Improvements 7 | 8 | * tigthen IAM for Crawler and Lambda ([c9e7b61](https://www.github.com/nuuday/terraform-aws-cur/commit/c9e7b61273e4f8e68cd3a606e198f4d80f9b5f73)) 9 | 10 | 11 | ### Fixes 12 | 13 | * allow Glue Crawler to create new Athena partitions ([#31](https://www.github.com/nuuday/terraform-aws-cur/issues/31)) ([724f0e1](https://www.github.com/nuuday/terraform-aws-cur/commit/724f0e115c866c29cc4074ee6c7868f80b35fe1e)) 14 | * prevent KMS data source from being read during apply ([#27](https://www.github.com/nuuday/terraform-aws-cur/issues/27)) ([b3ffcc0](https://www.github.com/nuuday/terraform-aws-cur/commit/b3ffcc055589c2b2aaf084722c76ff6859782e7c)) 15 | * really make `cur_role_session_name` variable optional ([#32](https://www.github.com/nuuday/terraform-aws-cur/issues/32)) ([f4135d6](https://www.github.com/nuuday/terraform-aws-cur/commit/f4135d6d2248a7ae781b7c27984fd7f574f49bf2)) 16 | 17 | ### [0.1.2](https://www.github.com/nuuday/terraform-aws-cur/compare/v0.1.1...v0.1.2) (2021-06-08) 18 | 19 | 20 | ### Improvements 21 | 22 | * support Terraform 1.0 ([#25](https://www.github.com/nuuday/terraform-aws-cur/issues/25)) ([f389ffd](https://www.github.com/nuuday/terraform-aws-cur/commit/f389ffd3b72b85838c788447d7517fa33d554d31)) 23 | 24 | ### [0.1.1](https://www.github.com/nuuday/terraform-aws-cur/compare/v0.1.0...v0.1.1) (2021-06-02) 25 | 26 | 27 | ### Fixes 28 | 29 | * prevent conflicting operations on creating S3 resources ([#22](https://www.github.com/nuuday/terraform-aws-cur/issues/22)) ([1a99651](https://www.github.com/nuuday/terraform-aws-cur/commit/1a99651a4259451cdc830633159e3f9cb12b3be1)) 30 | 31 | 32 | ### Improvements 33 | 34 | * optionally use KMS CMK for S3 SSE ([#24](https://www.github.com/nuuday/terraform-aws-cur/issues/24)) ([6cdc902](https://www.github.com/nuuday/terraform-aws-cur/commit/6cdc902dc19836665239f958883347f657d157e7)) 35 | 36 | ## 0.1.0 (2021-05-31) 37 | 38 | 39 | ### Features 40 | 41 | * optionally provision S3 bucket for CUR data ([#3](https://www.github.com/nuuday/terraform-aws-cur/issues/3)) ([96f16ee](https://www.github.com/nuuday/terraform-aws-cur/commit/96f16ee42f238454bab82bef2a985d32275a92c5)) 42 | * populate Athena Table with CUR data ([#6](https://www.github.com/nuuday/terraform-aws-cur/issues/6)) ([ffec446](https://www.github.com/nuuday/terraform-aws-cur/commit/ffec44651e3d51ce067d8e856b86fb30585987c8)) 43 | 44 | 45 | ### Fixes 46 | 47 | * actually create report definition ([#9](https://www.github.com/nuuday/terraform-aws-cur/issues/9)) ([a89251a](https://www.github.com/nuuday/terraform-aws-cur/commit/a89251a177eae79b7ca1e86b8d38994dada34079)) 48 | * prevent Lambda from erroring out if Crawler is already running ([#11](https://www.github.com/nuuday/terraform-aws-cur/issues/11)) ([a914ae3](https://www.github.com/nuuday/terraform-aws-cur/commit/a914ae38fb67a03542e0fe51d685ad76c3d29c55)) 49 | 50 | 51 | ### Improvements 52 | 53 | * output useful values from provisioned resources ([#19](https://www.github.com/nuuday/terraform-aws-cur/issues/19)) ([7904c5e](https://www.github.com/nuuday/terraform-aws-cur/commit/7904c5efa974cdcc1458b551a8f7d1fd882fe845)) 54 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Nuuday A/S 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AWS Cost & Usage Reports 2 | 3 | This Terraform module stands up a Cost and Usage Report, together with necessary services making the CUR data queryable in Athena. 4 | 5 | ## Overview 6 | 7 | The overall architecture looks like the illustration below 8 | ![AWS Cost and Usage Reports overview](./assets/overview.png) 9 | 10 | 1. AWS delivers Cost and Usage Reports data to the S3 bucket continuously 11 | 2. Whenever new CUR data is delivered, a Glue Crawler makes sure the newly available CUR data is processed and made available in the Data Catalog 12 | 3. Athena provides an SQL interface to the CUR data, using the Data Catalog as its data source 13 | 4. QuickSight visualizes the data returned from querying Athena 14 | 15 | 16 | ## Requirements 17 | 18 | | Name | Version | 19 | |------|---------| 20 | | [terraform](#requirement\_terraform) | >= 0.13, < 2.0 | 21 | | [archive](#requirement\_archive) | ~> 2.0 | 22 | | [aws](#requirement\_aws) | ~> 3.29 | 23 | 24 | ## Providers 25 | 26 | | Name | Version | 27 | |------|---------| 28 | | [aws.cur](#provider\_aws.cur) | ~> 3.29 | 29 | | [archive](#provider\_archive) | ~> 2.0 | 30 | | [aws](#provider\_aws) | ~> 3.29 | 31 | 32 | ## Inputs 33 | 34 | | Name | Description | Type | Default | Required | 35 | |------|-------------|------|---------|:--------:| 36 | | [use\_existing\_s3\_bucket](#input\_use\_existing\_s3\_bucket) | Whether to use an existing S3 bucket or create a new one. Regardless, `s3_bucket_name` must contain the name of the bucket. | `bool` | n/a | yes | 37 | | [s3\_bucket\_name](#input\_s3\_bucket\_name) | Name of the S3 bucket into which CUR will put the cost data. | `string` | n/a | yes | 38 | | [s3\_use\_existing\_kms\_key](#input\_s3\_use\_existing\_kms\_key) | Whether to use an existing KMS CMK for S3 SSE. | `bool` | n/a | yes | 39 | | [s3\_kms\_key\_alias](#input\_s3\_kms\_key\_alias) | Alias for the KMS CMK, existing or otherwise. | `string` | `""` | no | 40 | | [report\_name](#input\_report\_name) | Name of the Cost and Usage Report which will be created. | `string` | n/a | yes | 41 | | [report\_frequency](#input\_report\_frequency) | How often the Cost and Usage Report will be generated. HOURLY or DAILY. | `string` | n/a | yes | 42 | | [report\_versioning](#input\_report\_versioning) | Whether reports should be overwritten or new ones should be created. | `string` | n/a | yes | 43 | | [report\_format](#input\_report\_format) | Format for report. Valid values are: textORcsv, Parquet. If Parquet is used, then Compression must also be Parquet. | `string` | n/a | yes | 44 | | [report\_compression](#input\_report\_compression) | Compression format for report. Valid values are: GZIP, ZIP, Parquet. If Parquet is used, then format must also be Parquet. | `string` | n/a | yes | 45 | | [report\_additional\_artifacts](#input\_report\_additional\_artifacts) | A list of additional artifacts. Valid values are: REDSHIFT, QUICKSIGHT, ATHENA. When ATHENA exists within additional\_artifacts, no other artifact type can be declared and report\_versioning must be OVERWRITE\_REPORT. | `set(string)` | n/a | yes | 46 | | [s3\_bucket\_prefix](#input\_s3\_bucket\_prefix) | Prefix in the S3 bucket to put reports. | `string` | `""` | no | 47 | | [cur\_role\_arn](#input\_cur\_role\_arn) | ARN of the role to assume in order to provision the Cost and Usage Reports S3 bucket in us-east-1. | `string` | `""` | no | 48 | | [cur\_role\_session\_name](#input\_cur\_role\_session\_name) | Session name to use when assuming `cur_role_arn`. | `string` | `""` | no | 49 | | [lambda\_log\_group\_retention\_days](#input\_lambda\_log\_group\_retention\_days) | Number of days to retain logs from the Lambda function, which ensures Glue Crawler runs when new CUR data is available. | `number` | `14` | no | 50 | | [glue\_crawler\_create\_log\_group](#input\_glue\_crawler\_create\_log\_group) | Whether to create a CloudWatch Log Group for the Glue Crawler. Crawlers share Log Group, and this gives the option of managing the Log Group with retention through this module. | `bool` | `true` | no | 51 | | [glue\_crawler\_log\_group\_retention\_days](#input\_glue\_crawler\_log\_group\_retention\_days) | Number of days to retain logs from the Glue Crawler, which populates the Athena table whenever new CUR data is available. | `number` | `14` | no | 52 | | [tags](#input\_tags) | Tags which will be applied to provisioned resources. | `map(string)` | `{}` | no | 53 | 54 | ## Outputs 55 | 56 | | Name | Description | 57 | |------|-------------| 58 | | [s3\_bucket\_name](#output\_s3\_bucket\_name) | Name of S3 bucket used for storing CUR data. This may be provisioned by this module or not. | 59 | | [s3\_bucket\_prefix](#output\_s3\_bucket\_prefix) | Prefix used for storing CUR data inside the S3 bucket. | 60 | | [s3\_bucket\_arn](#output\_s3\_bucket\_arn) | ARN of S3 bucket used for storing CUR data. This may be provisioned by this module or not. | 61 | | [s3\_bucket\_region](#output\_s3\_bucket\_region) | Region where the S3 bucket used for storing CUR data is provisioned. This may be provisioned by this module or not. | 62 | | [report\_name](#output\_report\_name) | Name of the provisioned Cost and Usage Report. | 63 | | [lambda\_crawler\_trigger\_arn](#output\_lambda\_crawler\_trigger\_arn) | ARN of the Lambda function responsible for triggering the Glue Crawler when new CUR data is uploaded into the S3 bucket. | 64 | | [lambda\_crawler\_trigger\_role\_arn](#output\_lambda\_crawler\_trigger\_role\_arn) | ARN of the IAM role used by the Lambda function responsible for starting the Glue Crawler. | 65 | | [crawler\_arn](#output\_crawler\_arn) | ARN of the Glue Crawler responsible for populating the Catalog Database with new CUR data. | 66 | | [crawler\_role\_arn](#output\_crawler\_role\_arn) | ARN of the IAM role used by the Glue Crawler responsible for populating the Catalog Database with new CUR data. | 67 | | [glue\_catalog\_database\_name](#output\_glue\_catalog\_database\_name) | Name of the Glue Catalog Database which is populated with CUR data. | 68 | 69 | 70 | 71 | ## References 72 | 73 | It is based on [AWS: Query and Visualize AWS Cost and Usage](https://aws.amazon.com/blogs/big-data/query-and-visualize-aws-cost-and-usage-data-using-amazon-athena-and-amazon-quicksight/). 74 | Check out the blog post and the linked resources for an explanation of the concepts. 75 | 76 | For more information about Cost & Usage Reports in general, see [AWS: What are Cost and Usage Reports?](https://docs.aws.amazon.com/cur/latest/userguide/what-is-cur.html) 77 | 78 | 79 | ## Requirements 80 | 81 | | Name | Version | 82 | |------|---------| 83 | | [terraform](#requirement\_terraform) | >= 0.13, < 2.0 | 84 | | [archive](#requirement\_archive) | ~> 2.0 | 85 | | [aws](#requirement\_aws) | ~> 3.29 | 86 | 87 | ## Providers 88 | 89 | | Name | Version | 90 | |------|---------| 91 | | [aws.cur](#provider\_aws.cur) | ~> 3.29 | 92 | | [aws](#provider\_aws) | ~> 3.29 | 93 | | [archive](#provider\_archive) | ~> 2.0 | 94 | 95 | ## Inputs 96 | 97 | | Name | Description | Type | Default | Required | 98 | |------|-------------|------|---------|:--------:| 99 | | [use\_existing\_s3\_bucket](#input\_use\_existing\_s3\_bucket) | Whether to use an existing S3 bucket or create a new one. Regardless, `s3_bucket_name` must contain the name of the bucket. | `bool` | n/a | yes | 100 | | [s3\_bucket\_name](#input\_s3\_bucket\_name) | Name of the S3 bucket into which CUR will put the cost data. | `string` | n/a | yes | 101 | | [s3\_use\_existing\_kms\_key](#input\_s3\_use\_existing\_kms\_key) | Whether to use an existing KMS CMK for S3 SSE. | `bool` | n/a | yes | 102 | | [s3\_kms\_key\_alias](#input\_s3\_kms\_key\_alias) | Alias for the KMS CMK, existing or otherwise. | `string` | `""` | no | 103 | | [report\_name](#input\_report\_name) | Name of the Cost and Usage Report which will be created. | `string` | n/a | yes | 104 | | [report\_frequency](#input\_report\_frequency) | How often the Cost and Usage Report will be generated. HOURLY or DAILY. | `string` | n/a | yes | 105 | | [report\_versioning](#input\_report\_versioning) | Whether reports should be overwritten or new ones should be created. | `string` | n/a | yes | 106 | | [report\_format](#input\_report\_format) | Format for report. Valid values are: textORcsv, Parquet. If Parquet is used, then Compression must also be Parquet. | `string` | n/a | yes | 107 | | [report\_compression](#input\_report\_compression) | Compression format for report. Valid values are: GZIP, ZIP, Parquet. If Parquet is used, then format must also be Parquet. | `string` | n/a | yes | 108 | | [report\_additional\_artifacts](#input\_report\_additional\_artifacts) | A list of additional artifacts. Valid values are: REDSHIFT, QUICKSIGHT, ATHENA. When ATHENA exists within additional\_artifacts, no other artifact type can be declared and report\_versioning must be OVERWRITE\_REPORT. | `set(string)` | n/a | yes | 109 | | [s3\_bucket\_prefix](#input\_s3\_bucket\_prefix) | Prefix in the S3 bucket to put reports. | `string` | `""` | no | 110 | | [cur\_role\_arn](#input\_cur\_role\_arn) | ARN of the role to assume in order to provision the Cost and Usage Reports S3 bucket in us-east-1. | `string` | `""` | no | 111 | | [cur\_role\_session\_name](#input\_cur\_role\_session\_name) | Session name to use when assuming `cur_role_arn`. | `string` | `null` | no | 112 | | [lambda\_log\_group\_retention\_days](#input\_lambda\_log\_group\_retention\_days) | Number of days to retain logs from the Lambda function, which ensures Glue Crawler runs when new CUR data is available. | `number` | `14` | no | 113 | | [glue\_crawler\_create\_log\_group](#input\_glue\_crawler\_create\_log\_group) | Whether to create a CloudWatch Log Group for the Glue Crawler. Crawlers share Log Group, and this gives the option of managing the Log Group with retention through this module. | `bool` | `true` | no | 114 | | [glue\_crawler\_log\_group\_retention\_days](#input\_glue\_crawler\_log\_group\_retention\_days) | Number of days to retain logs from the Glue Crawler, which populates the Athena table whenever new CUR data is available. | `number` | `14` | no | 115 | | [tags](#input\_tags) | Tags which will be applied to provisioned resources. | `map(string)` | `{}` | no | 116 | 117 | ## Outputs 118 | 119 | | Name | Description | 120 | |------|-------------| 121 | | [s3\_bucket\_name](#output\_s3\_bucket\_name) | Name of S3 bucket used for storing CUR data. This may be provisioned by this module or not. | 122 | | [s3\_bucket\_prefix](#output\_s3\_bucket\_prefix) | Prefix used for storing CUR data inside the S3 bucket. | 123 | | [s3\_bucket\_arn](#output\_s3\_bucket\_arn) | ARN of S3 bucket used for storing CUR data. This may be provisioned by this module or not. | 124 | | [s3\_bucket\_region](#output\_s3\_bucket\_region) | Region where the S3 bucket used for storing CUR data is provisioned. This may be provisioned by this module or not. | 125 | | [report\_name](#output\_report\_name) | Name of the provisioned Cost and Usage Report. | 126 | | [lambda\_crawler\_trigger\_arn](#output\_lambda\_crawler\_trigger\_arn) | ARN of the Lambda function responsible for triggering the Glue Crawler when new CUR data is uploaded into the S3 bucket. | 127 | | [lambda\_crawler\_trigger\_role\_arn](#output\_lambda\_crawler\_trigger\_role\_arn) | ARN of the IAM role used by the Lambda function responsible for starting the Glue Crawler. | 128 | | [crawler\_arn](#output\_crawler\_arn) | ARN of the Glue Crawler responsible for populating the Catalog Database with new CUR data. | 129 | | [crawler\_role\_arn](#output\_crawler\_role\_arn) | ARN of the IAM role used by the Glue Crawler responsible for populating the Catalog Database with new CUR data. | 130 | | [glue\_catalog\_database\_name](#output\_glue\_catalog\_database\_name) | Name of the Glue Catalog Database which is populated with CUR data. | 131 | -------------------------------------------------------------------------------- /assets/overview.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f994c94c8a5c5aa3a24c7488432d5e9fdb8bc466bc79614e621eac80df839e63 3 | size 14200 4 | -------------------------------------------------------------------------------- /data.tf: -------------------------------------------------------------------------------- 1 | data "aws_caller_identity" "current" {} 2 | 3 | data "aws_region" "current" {} 4 | 5 | data "aws_partition" "current" {} 6 | -------------------------------------------------------------------------------- /examples/complete/main.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 0.13, < 2.0" 3 | } 4 | 5 | module "this" { 6 | source = "../../" 7 | 8 | 9 | 10 | use_existing_s3_bucket = true 11 | s3_bucket_name = "nuuday-cost-usage-reports" 12 | s3_bucket_prefix = "reports" 13 | s3_use_existing_kms_key = true 14 | s3_kms_key_alias = "aws/s3" 15 | 16 | report_name = "example" 17 | report_frequency = "HOURLY" 18 | report_additional_artifacts = [ 19 | "ATHENA", 20 | ] 21 | 22 | report_format = "Parquet" 23 | report_compression = "Parquet" 24 | report_versioning = "OVERWRITE_REPORT" 25 | } 26 | -------------------------------------------------------------------------------- /examples/complete/outputs.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nuuday/terraform-aws-cur/24f83285975e2ad10ca2e25cff31e1486bd14e51/examples/complete/outputs.tf -------------------------------------------------------------------------------- /examples/complete/variables.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nuuday/terraform-aws-cur/24f83285975e2ad10ca2e25cff31e1486bd14e51/examples/complete/variables.tf -------------------------------------------------------------------------------- /glue.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | # This is defined by AWS. 3 | glue_log_group_default_name = "/aws-glue/crawlers" 4 | } 5 | 6 | # Provisions Glue Crawler and Catalog Database. 7 | # Crawler will, when run, populate the Catalog Database with a table representing the CUR data in S3. 8 | 9 | resource "aws_glue_crawler" "this" { 10 | name = "cur-crawler" 11 | database_name = aws_glue_catalog_database.cur.name 12 | role = aws_iam_role.crawler.name 13 | 14 | s3_target { 15 | path = "s3://${var.s3_bucket_name}/${var.s3_bucket_prefix}/${var.report_name}/${var.report_name}" 16 | } 17 | 18 | tags = var.tags 19 | 20 | depends_on = [aws_s3_bucket.cur] 21 | } 22 | 23 | resource "aws_glue_catalog_database" "cur" { 24 | name = "${var.report_name}-db" 25 | description = "Contains CUR data based on contents from the S3 bucket '${var.s3_bucket_name}'" 26 | } 27 | 28 | # Crawler role 29 | resource "aws_iam_role" "crawler" { 30 | name_prefix = "cur-crawler" 31 | assume_role_policy = data.aws_iam_policy_document.crawler_assume.json 32 | 33 | tags = var.tags 34 | } 35 | 36 | resource "aws_iam_role_policy" "crawler" { 37 | role = aws_iam_role.crawler.name 38 | policy = data.aws_iam_policy_document.crawler.json 39 | } 40 | 41 | data "aws_iam_policy_document" "crawler_assume" { 42 | statement { 43 | effect = "Allow" 44 | 45 | principals { 46 | type = "Service" 47 | identifiers = ["glue.amazonaws.com"] 48 | } 49 | 50 | actions = ["sts:AssumeRole"] 51 | } 52 | } 53 | 54 | data "aws_iam_policy_document" "crawler" { 55 | statement { 56 | sid = "S3Decrypt" 57 | 58 | effect = "Allow" 59 | 60 | actions = [ 61 | "kms:GenerateDataKey", 62 | "kms:Decrypt", 63 | "kms:Encrypt", 64 | ] 65 | 66 | resources = [var.s3_use_existing_kms_key ? data.aws_kms_key.s3[0].arn : aws_kms_key.s3[0].arn] 67 | } 68 | 69 | statement { 70 | sid = "Glue" 71 | 72 | effect = "Allow" 73 | 74 | actions = [ 75 | "glue:ImportCatalogToGlue", 76 | "glue:GetDatabase", 77 | "glue:UpdateDatabase", 78 | "glue:GetTable", 79 | "glue:CreateTable", 80 | "glue:UpdateTable", 81 | "glue:BatchGetPartition", 82 | "glue:UpdatePartition", 83 | "glue:BatchCreatePartition", 84 | ] 85 | 86 | resources = [ 87 | aws_glue_catalog_database.cur.arn, 88 | "arn:${data.aws_partition.current.partition}:glue:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:catalog", 89 | "arn:${data.aws_partition.current.partition}:glue:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:table/${aws_glue_catalog_database.cur.name}/*", 90 | ] 91 | } 92 | 93 | statement { 94 | sid = "CloudWatch" 95 | 96 | effect = "Allow" 97 | 98 | actions = [ 99 | "logs:CreateLogStream", 100 | "logs:CreateLogGroup", 101 | "logs:PutLogEvents", 102 | ] 103 | 104 | resources = [ 105 | "arn:${data.aws_partition.current.partition}:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:log-group:${local.glue_log_group_default_name}", 106 | "arn:${data.aws_partition.current.partition}:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:log-group:${local.glue_log_group_default_name}:log-stream:*", 107 | ] 108 | } 109 | 110 | statement { 111 | sid = "S3" 112 | 113 | effect = "Allow" 114 | 115 | actions = [ 116 | "s3:GetObject", 117 | "s3:ListBucket", 118 | ] 119 | 120 | resources = [ 121 | "${var.use_existing_s3_bucket ? data.aws_s3_bucket.cur[0].arn : aws_s3_bucket.cur[0].arn}", 122 | "${var.use_existing_s3_bucket ? data.aws_s3_bucket.cur[0].arn : aws_s3_bucket.cur[0].arn}/*", 123 | ] 124 | } 125 | } 126 | 127 | # Optionally pre-create log group for Glue Crawlers. 128 | # Crawlers share Log Group for whatever reason I do not know. 129 | # 130 | # Anyway, Crawlers will automatically create this Log Group 131 | # with infinite retention, which is not desirable. 132 | # This gives module consumers the option of letting this module create it/manage it. 133 | # 134 | # Accept default encryption. Crawler logs are not sensitive. 135 | # #tfsec:ignore:AWS089 136 | resource "aws_cloudwatch_log_group" "crawler" { 137 | count = var.glue_crawler_create_log_group ? 1 : 0 138 | 139 | name = local.glue_log_group_default_name 140 | retention_in_days = var.glue_crawler_log_group_retention_days 141 | } 142 | -------------------------------------------------------------------------------- /main.tf: -------------------------------------------------------------------------------- 1 | resource "aws_cur_report_definition" "this" { 2 | report_name = var.report_name 3 | time_unit = var.report_frequency 4 | format = var.report_format 5 | compression = var.report_compression 6 | report_versioning = var.report_versioning 7 | additional_artifacts = var.report_additional_artifacts 8 | additional_schema_elements = ["RESOURCES"] 9 | 10 | s3_bucket = var.s3_bucket_name 11 | s3_region = var.use_existing_s3_bucket ? data.aws_s3_bucket.cur[0].region : aws_s3_bucket.cur[0].region 12 | s3_prefix = var.s3_bucket_prefix 13 | 14 | depends_on = [ 15 | aws_s3_bucket_policy.cur, 16 | ] 17 | 18 | provider = aws.cur 19 | } 20 | 21 | data "aws_s3_bucket" "cur" { 22 | count = var.use_existing_s3_bucket ? 1 : 0 23 | 24 | bucket = var.s3_bucket_name 25 | } 26 | 27 | data "aws_kms_key" "s3" { 28 | count = var.s3_use_existing_kms_key ? 1 : 0 29 | 30 | key_id = "alias/${trimprefix(var.s3_kms_key_alias, "alias/")}" 31 | } 32 | 33 | # tfsec:ignore:AWS019 (disable auto-rotation for now) 34 | resource "aws_kms_key" "s3" { 35 | count = var.s3_use_existing_kms_key ? 0 : 1 36 | 37 | description = "For server-side encryption in the '${var.s3_bucket_name}' S3 bucket." 38 | 39 | tags = var.tags 40 | } 41 | 42 | resource "aws_kms_alias" "s3" { 43 | count = var.s3_use_existing_kms_key ? 0 : 1 44 | 45 | name = "alias/${trimprefix(var.s3_kms_key_alias, "alias/")}" 46 | target_key_id = aws_kms_key.s3[0].key_id 47 | } 48 | 49 | # Versioning and logging disabled. 50 | # tfsec:ignore:AWS077 tfsec:ignore:AWS002 51 | resource "aws_s3_bucket" "cur" { 52 | count = var.use_existing_s3_bucket ? 0 : 1 53 | 54 | bucket = var.s3_bucket_name 55 | acl = "private" 56 | 57 | versioning { 58 | enabled = false 59 | } 60 | 61 | server_side_encryption_configuration { 62 | rule { 63 | apply_server_side_encryption_by_default { 64 | kms_master_key_id = var.s3_use_existing_kms_key ? data.aws_kms_key.s3[0].arn : aws_kms_key.s3[0].arn 65 | sse_algorithm = "aws:kms" 66 | } 67 | } 68 | } 69 | 70 | tags = var.tags 71 | } 72 | 73 | resource "aws_s3_bucket_public_access_block" "cur" { 74 | count = var.use_existing_s3_bucket ? 0 : 1 75 | 76 | bucket = aws_s3_bucket.cur[0].id 77 | 78 | block_public_acls = true 79 | block_public_policy = true 80 | ignore_public_acls = true 81 | restrict_public_buckets = true 82 | } 83 | 84 | resource "aws_s3_bucket_policy" "cur" { 85 | count = var.use_existing_s3_bucket ? 0 : 1 86 | 87 | bucket = aws_s3_bucket.cur[0].id 88 | policy = data.aws_iam_policy_document.s3_cur[0].json 89 | 90 | depends_on = [aws_s3_bucket_public_access_block.cur] 91 | } 92 | 93 | data "aws_iam_policy_document" "s3_cur" { 94 | count = var.use_existing_s3_bucket ? 0 : 1 95 | 96 | statement { 97 | principals { 98 | type = "Service" 99 | identifiers = ["billingreports.amazonaws.com"] 100 | } 101 | 102 | actions = [ 103 | "s3:GetBucketAcl", 104 | "s3:GetBucketPolicy", 105 | ] 106 | 107 | resources = [aws_s3_bucket.cur[0].arn] 108 | } 109 | 110 | statement { 111 | principals { 112 | type = "Service" 113 | identifiers = ["billingreports.amazonaws.com"] 114 | } 115 | 116 | actions = ["s3:PutObject"] 117 | 118 | resources = ["${aws_s3_bucket.cur[0].arn}/*"] 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /notifications.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | lambda_function_name = "${var.report_name}-crawler-trigger" 3 | } 4 | 5 | resource "aws_s3_bucket_notification" "cur" { 6 | bucket = var.s3_bucket_name 7 | 8 | lambda_function { 9 | lambda_function_arn = aws_lambda_function.run_crawler.arn 10 | events = ["s3:ObjectCreated:*"] 11 | filter_prefix = "${var.s3_bucket_prefix}/" 12 | filter_suffix = ".parquet" 13 | } 14 | 15 | depends_on = [ 16 | aws_s3_bucket.cur, 17 | aws_lambda_permission.allow_bucket, 18 | aws_s3_bucket_policy.cur, 19 | ] 20 | } 21 | 22 | resource "aws_lambda_function" "run_crawler" { 23 | function_name = local.lambda_function_name 24 | 25 | role = aws_iam_role.lambda.arn 26 | 27 | runtime = "nodejs12.x" 28 | handler = "index.handler" 29 | filename = data.archive_file.lambda.output_path 30 | source_code_hash = data.archive_file.lambda.output_base64sha256 31 | timeout = 30 32 | 33 | environment { 34 | variables = { 35 | CRAWLER_NAME = aws_glue_crawler.this.name 36 | } 37 | } 38 | 39 | depends_on = [ 40 | aws_iam_role_policy.lambda, 41 | aws_cloudwatch_log_group.lambda, 42 | ] 43 | } 44 | 45 | data "archive_file" "lambda" { 46 | type = "zip" 47 | source_file = "${path.module}/src/index.js" 48 | output_path = "${path.module}/lambda.zip" 49 | } 50 | 51 | resource "aws_lambda_permission" "allow_bucket" { 52 | statement_id = "AllowExecutionFromS3Bucket" 53 | action = "lambda:InvokeFunction" 54 | function_name = aws_lambda_function.run_crawler.arn 55 | source_account = data.aws_caller_identity.current.account_id 56 | principal = "s3.amazonaws.com" 57 | source_arn = var.use_existing_s3_bucket ? data.aws_s3_bucket.cur[0].arn : aws_s3_bucket.cur[0].arn 58 | } 59 | 60 | resource "aws_iam_role" "lambda" { 61 | name = "${var.report_name}-crawler-trigger" 62 | assume_role_policy = data.aws_iam_policy_document.crawler_trigger_assume.json 63 | } 64 | 65 | resource "aws_iam_role_policy" "lambda" { 66 | role = aws_iam_role.lambda.name 67 | policy = data.aws_iam_policy_document.crawler_trigger.json 68 | } 69 | 70 | data "aws_iam_policy_document" "crawler_trigger_assume" { 71 | statement { 72 | effect = "Allow" 73 | 74 | principals { 75 | type = "Service" 76 | identifiers = ["lambda.amazonaws.com"] 77 | } 78 | 79 | actions = ["sts:AssumeRole"] 80 | } 81 | } 82 | 83 | data "aws_iam_policy_document" "crawler_trigger" { 84 | statement { 85 | sid = "CloudWatch" 86 | 87 | effect = "Allow" 88 | 89 | actions = [ 90 | "logs:CreateLogStream", 91 | "logs:PutLogEvents", 92 | ] 93 | 94 | resources = ["${aws_cloudwatch_log_group.lambda.arn}:*"] 95 | } 96 | 97 | statement { 98 | sid = "Glue" 99 | 100 | effect = "Allow" 101 | 102 | actions = [ 103 | "glue:StartCrawler", 104 | ] 105 | 106 | resources = [aws_glue_crawler.this.arn] 107 | } 108 | } 109 | 110 | # Pre-create log group for the Lambda function. 111 | # Otherwise it will be created by Lambda itself with infinite retention. 112 | # 113 | # Accept default encryption. This Lambda does not produce sensitive logs. 114 | # #tfsec:ignore:AWS089 115 | resource "aws_cloudwatch_log_group" "lambda" { 116 | name = "/aws/lambda/${local.lambda_function_name}" 117 | retention_in_days = var.lambda_log_group_retention_days 118 | } 119 | -------------------------------------------------------------------------------- /outputs.tf: -------------------------------------------------------------------------------- 1 | output "s3_bucket_name" { 2 | description = "Name of S3 bucket used for storing CUR data. This may be provisioned by this module or not." 3 | value = aws_cur_report_definition.this.s3_bucket 4 | } 5 | 6 | output "s3_bucket_prefix" { 7 | description = "Prefix used for storing CUR data inside the S3 bucket." 8 | value = aws_cur_report_definition.this.s3_bucket 9 | } 10 | 11 | output "s3_bucket_arn" { 12 | description = "ARN of S3 bucket used for storing CUR data. This may be provisioned by this module or not." 13 | value = coalescelist(aws_s3_bucket.cur.*.arn, data.aws_s3_bucket.cur.*.arn)[0] 14 | } 15 | 16 | output "s3_bucket_region" { 17 | description = "Region where the S3 bucket used for storing CUR data is provisioned. This may be provisioned by this module or not." 18 | value = aws_cur_report_definition.this.s3_region 19 | } 20 | 21 | output "report_name" { 22 | description = "Name of the provisioned Cost and Usage Report." 23 | value = aws_cur_report_definition.this.report_name 24 | } 25 | 26 | output "lambda_crawler_trigger_arn" { 27 | description = "ARN of the Lambda function responsible for triggering the Glue Crawler when new CUR data is uploaded into the S3 bucket." 28 | value = aws_lambda_function.run_crawler.arn 29 | } 30 | 31 | output "lambda_crawler_trigger_role_arn" { 32 | description = "ARN of the IAM role used by the Lambda function responsible for starting the Glue Crawler." 33 | value = aws_iam_role.lambda.arn 34 | } 35 | 36 | output "crawler_arn" { 37 | description = "ARN of the Glue Crawler responsible for populating the Catalog Database with new CUR data." 38 | value = aws_lambda_function.run_crawler.arn 39 | } 40 | 41 | output "crawler_role_arn" { 42 | description = "ARN of the IAM role used by the Glue Crawler responsible for populating the Catalog Database with new CUR data." 43 | value = aws_iam_role.crawler.arn 44 | } 45 | 46 | output "glue_catalog_database_name" { 47 | description = "Name of the Glue Catalog Database which is populated with CUR data." 48 | value = aws_glue_catalog_database.cur.name 49 | } 50 | -------------------------------------------------------------------------------- /providers.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 0.13, < 2.0" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = "~> 3.29" 8 | } 9 | 10 | archive = { 11 | source = "hashicorp/archive" 12 | version = "~> 2.0" 13 | } 14 | } 15 | } 16 | 17 | provider "aws" { 18 | # CUR is only available in us-east-1. 19 | # aws_cur_report_definition.this is the only resource using this provider. 20 | alias = "cur" 21 | 22 | region = "us-east-1" 23 | 24 | assume_role { 25 | role_arn = var.cur_role_arn 26 | session_name = var.cur_role_session_name 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | const AWS = require('aws-sdk'); 2 | 3 | exports.handler = function(event, context, callback) { 4 | const glue = new AWS.Glue(); 5 | 6 | glue.startCrawler({ Name: process.env.CRAWLER_NAME }, function(err, data) { 7 | if (err) { 8 | // Check if Crawler is already running 9 | const response = JSON.parse(this.httpResponse.body); 10 | if (response['__type'] == 'CrawlerRunningException') { 11 | console.log('Crawler already running; ignoring trigger.'); 12 | 13 | callback(null, response.Message); 14 | } 15 | } 16 | else { 17 | console.log("Successfully triggered crawler"); 18 | } 19 | }); 20 | } 21 | -------------------------------------------------------------------------------- /variables.tf: -------------------------------------------------------------------------------- 1 | variable "use_existing_s3_bucket" { 2 | description = "Whether to use an existing S3 bucket or create a new one. Regardless, `s3_bucket_name` must contain the name of the bucket." 3 | type = bool 4 | } 5 | 6 | variable "s3_bucket_name" { 7 | description = "Name of the S3 bucket into which CUR will put the cost data." 8 | type = string 9 | } 10 | 11 | variable "s3_use_existing_kms_key" { 12 | description = "Whether to use an existing KMS CMK for S3 SSE." 13 | type = bool 14 | } 15 | 16 | variable "s3_kms_key_alias" { 17 | description = "Alias for the KMS CMK, existing or otherwise." 18 | type = string 19 | default = "" 20 | } 21 | 22 | variable "report_name" { 23 | description = "Name of the Cost and Usage Report which will be created." 24 | type = string 25 | } 26 | 27 | variable "report_frequency" { 28 | description = "How often the Cost and Usage Report will be generated. HOURLY or DAILY." 29 | type = string 30 | } 31 | 32 | variable "report_versioning" { 33 | description = "Whether reports should be overwritten or new ones should be created." 34 | type = string 35 | } 36 | 37 | variable "report_format" { 38 | description = "Format for report. Valid values are: textORcsv, Parquet. If Parquet is used, then Compression must also be Parquet." 39 | type = string 40 | } 41 | 42 | variable "report_compression" { 43 | description = "Compression format for report. Valid values are: GZIP, ZIP, Parquet. If Parquet is used, then format must also be Parquet." 44 | type = string 45 | } 46 | 47 | variable "report_additional_artifacts" { 48 | description = "A list of additional artifacts. Valid values are: REDSHIFT, QUICKSIGHT, ATHENA. When ATHENA exists within additional_artifacts, no other artifact type can be declared and report_versioning must be OVERWRITE_REPORT." 49 | type = set(string) 50 | } 51 | 52 | variable "s3_bucket_prefix" { 53 | description = "Prefix in the S3 bucket to put reports." 54 | type = string 55 | default = "" 56 | } 57 | 58 | variable "cur_role_arn" { 59 | description = "ARN of the role to assume in order to provision the Cost and Usage Reports S3 bucket in us-east-1." 60 | type = string 61 | default = "" 62 | } 63 | 64 | variable "cur_role_session_name" { 65 | description = "Session name to use when assuming `cur_role_arn`." 66 | type = string 67 | default = null 68 | } 69 | 70 | variable "lambda_log_group_retention_days" { 71 | description = "Number of days to retain logs from the Lambda function, which ensures Glue Crawler runs when new CUR data is available." 72 | type = number 73 | default = 14 74 | } 75 | 76 | variable "glue_crawler_create_log_group" { 77 | description = "Whether to create a CloudWatch Log Group for the Glue Crawler. Crawlers share Log Group, and this gives the option of managing the Log Group with retention through this module." 78 | type = bool 79 | default = true 80 | } 81 | 82 | variable "glue_crawler_log_group_retention_days" { 83 | description = "Number of days to retain logs from the Glue Crawler, which populates the Athena table whenever new CUR data is available." 84 | type = number 85 | default = 14 86 | } 87 | 88 | variable "tags" { 89 | description = "Tags which will be applied to provisioned resources." 90 | type = map(string) 91 | default = {} 92 | } 93 | --------------------------------------------------------------------------------