├── .gitattributes
├── .github
├── dependabot.yml
└── workflows
│ ├── linter.yml
│ └── release.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .terraform-docs.yml
├── CHANGELOG.md
├── LICENSE
├── README.md
├── assets
└── overview.png
├── data.tf
├── examples
└── complete
│ ├── main.tf
│ ├── outputs.tf
│ └── variables.tf
├── glue.tf
├── main.tf
├── notifications.tf
├── outputs.tf
├── providers.tf
├── src
└── index.js
└── variables.tf
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.png filter=lfs diff=lfs merge=lfs -text
2 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | updates:
4 | - package-ecosystem: terraform
5 | directory: /
6 | schedule:
7 | interval: daily
8 |
--------------------------------------------------------------------------------
/.github/workflows/linter.yml:
--------------------------------------------------------------------------------
1 | name: PR validation
2 |
3 | on:
4 | pull_request:
5 | branches:
6 | - master
7 |
8 | jobs:
9 | linter:
10 | runs-on: ubuntu-latest
11 |
12 | strategy:
13 | fail-fast: false
14 | matrix:
15 | terraform: ['~0.13.0', '~0.14.0', '~0.15.0', '~1.0.0']
16 |
17 | steps:
18 | - name: Checkout
19 | uses: actions/checkout@v2
20 | with:
21 | lfs: true
22 | token: ${{ secrets.GITHUB_TOKEN }}
23 |
24 | - name: Install Terraform
25 | uses: hashicorp/setup-terraform@v1
26 | with:
27 | terraform_version: ${{ matrix.terraform }}
28 | terraform_wrapper: false
29 |
30 | - name: Install TFLint
31 | uses: terraform-linters/setup-tflint@v1
32 | with:
33 | tflint_version: v0.26.0
34 |
35 | - name: Install Python
36 | uses: actions/setup-python@v2
37 |
38 | - name: Install pre-commit & deps
39 | run: |
40 | pip install pre-commit
41 | curl -L "$(curl -Ls https://api.github.com/repos/tfsec/tfsec/releases/latest | grep -o -E "https://.+?tfsec-linux-amd64" | head -n1)" > tfsec && chmod +x tfsec && sudo mv tfsec /usr/bin/
42 |
43 | - name: Run pre-commit
44 | run: pre-commit run --color=always --show-diff-on-failure --all-files
45 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Release
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 |
8 | jobs:
9 | docs:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - name: Checkout
13 | uses: actions/checkout@v2
14 | with:
15 | token: ${{ secrets.ROBOT_GITHUB_TOKEN }}
16 | lfs: true
17 | ref: master
18 |
19 | - name: Update module docs
20 | uses: terraform-docs/gh-actions@main
21 | with:
22 | working-dir: .
23 | output-file: README.md
24 | output-method: inject
25 | git-push: "true"
26 | git-push-user-name: cle-robot
27 | git-push-user-email: 77749875+cle-robot@users.noreply.github.com
28 | git-commit-message: "chore: update module docs [skip ci]"
29 |
30 | - name: Create release
31 | uses: GoogleCloudPlatform/release-please-action@v2
32 | with:
33 | token: ${{ secrets.ROBOT_GITHUB_TOKEN }}
34 | release-type: terraform-module
35 | changelog-types: '[{"type":"feat","section":"Features","hidden":false},{"type":"fix","section":"Fixes","hidden":false},{"type":"improvement","section":"Improvements","hidden":false}]'
36 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Local .terraform directories
2 | **/.terraform/*
3 |
4 | # .tfstate files
5 | *.tfstate
6 | *.tfstate.*
7 |
8 | # Crash log files
9 | crash.log
10 |
11 | # Ignore any .tfvars files that are generated automatically for each Terraform run. Most
12 | # .tfvars files are managed as part of configuration and so should be included in
13 | # version control.
14 | #
15 | # example.tfvars
16 |
17 | # Ignore override files as they are usually used to override resources locally and so
18 | # are not checked in
19 | override.tf
20 | override.tf.json
21 | *_override.tf
22 | *_override.tf.json
23 |
24 | *.swp
25 |
26 | # Terraform
27 | *.lock.hcl
28 |
29 | # Terragrunt
30 | **/.terragrunt-cache/*
31 |
32 | # Terragrunt-generated files
33 | **/*_terragrunt_generated.tf
34 |
35 | # VS Code
36 | **/.vscode/*
37 |
38 | # Compressed Lambda code
39 | lambda.zip
40 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/antonbabenko/pre-commit-terraform
3 | rev: v1.50.0
4 | hooks:
5 | - id: terraform_fmt
6 | - id: terraform_validate
7 | - id: terraform_tflint
8 | args:
9 | - '--args=--only=terraform_deprecated_interpolation'
10 | - '--args=--only=terraform_deprecated_index'
11 | - '--args=--only=terraform_unused_required_providers'
12 | - '--args=--only=terraform_unused_declarations'
13 | - '--args=--only=terraform_comment_syntax'
14 | - '--args=--only=terraform_documented_outputs'
15 | - '--args=--only=terraform_documented_variables'
16 | - '--args=--only=terraform_typed_variables'
17 | - '--args=--only=terraform_module_pinned_source'
18 | - '--args=--only=terraform_naming_convention'
19 | - '--args=--only=terraform_required_version'
20 | - '--args=--only=terraform_required_providers'
21 | - '--args=--only=terraform_standard_module_structure'
22 | - '--args=--only=terraform_workspace_remote'
23 | - repo: https://github.com/pre-commit/pre-commit-hooks
24 | rev: v4.0.1
25 | hooks:
26 | - id: detect-private-key
27 | - id: detect-aws-credentials
28 | args:
29 | - --allow-missing-credentials
30 |
--------------------------------------------------------------------------------
/.terraform-docs.yml:
--------------------------------------------------------------------------------
1 | sections:
2 | show:
3 | - header
4 | - requirements
5 | - providers
6 | - inputs
7 | - outputs
8 |
9 | sort:
10 | enabled: false
11 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | ### [0.1.3](https://www.github.com/nuuday/terraform-aws-cur/compare/v0.1.2...v0.1.3) (2022-01-27)
4 |
5 |
6 | ### Improvements
7 |
8 | * tigthen IAM for Crawler and Lambda ([c9e7b61](https://www.github.com/nuuday/terraform-aws-cur/commit/c9e7b61273e4f8e68cd3a606e198f4d80f9b5f73))
9 |
10 |
11 | ### Fixes
12 |
13 | * allow Glue Crawler to create new Athena partitions ([#31](https://www.github.com/nuuday/terraform-aws-cur/issues/31)) ([724f0e1](https://www.github.com/nuuday/terraform-aws-cur/commit/724f0e115c866c29cc4074ee6c7868f80b35fe1e))
14 | * prevent KMS data source from being read during apply ([#27](https://www.github.com/nuuday/terraform-aws-cur/issues/27)) ([b3ffcc0](https://www.github.com/nuuday/terraform-aws-cur/commit/b3ffcc055589c2b2aaf084722c76ff6859782e7c))
15 | * really make `cur_role_session_name` variable optional ([#32](https://www.github.com/nuuday/terraform-aws-cur/issues/32)) ([f4135d6](https://www.github.com/nuuday/terraform-aws-cur/commit/f4135d6d2248a7ae781b7c27984fd7f574f49bf2))
16 |
17 | ### [0.1.2](https://www.github.com/nuuday/terraform-aws-cur/compare/v0.1.1...v0.1.2) (2021-06-08)
18 |
19 |
20 | ### Improvements
21 |
22 | * support Terraform 1.0 ([#25](https://www.github.com/nuuday/terraform-aws-cur/issues/25)) ([f389ffd](https://www.github.com/nuuday/terraform-aws-cur/commit/f389ffd3b72b85838c788447d7517fa33d554d31))
23 |
24 | ### [0.1.1](https://www.github.com/nuuday/terraform-aws-cur/compare/v0.1.0...v0.1.1) (2021-06-02)
25 |
26 |
27 | ### Fixes
28 |
29 | * prevent conflicting operations on creating S3 resources ([#22](https://www.github.com/nuuday/terraform-aws-cur/issues/22)) ([1a99651](https://www.github.com/nuuday/terraform-aws-cur/commit/1a99651a4259451cdc830633159e3f9cb12b3be1))
30 |
31 |
32 | ### Improvements
33 |
34 | * optionally use KMS CMK for S3 SSE ([#24](https://www.github.com/nuuday/terraform-aws-cur/issues/24)) ([6cdc902](https://www.github.com/nuuday/terraform-aws-cur/commit/6cdc902dc19836665239f958883347f657d157e7))
35 |
36 | ## 0.1.0 (2021-05-31)
37 |
38 |
39 | ### Features
40 |
41 | * optionally provision S3 bucket for CUR data ([#3](https://www.github.com/nuuday/terraform-aws-cur/issues/3)) ([96f16ee](https://www.github.com/nuuday/terraform-aws-cur/commit/96f16ee42f238454bab82bef2a985d32275a92c5))
42 | * populate Athena Table with CUR data ([#6](https://www.github.com/nuuday/terraform-aws-cur/issues/6)) ([ffec446](https://www.github.com/nuuday/terraform-aws-cur/commit/ffec44651e3d51ce067d8e856b86fb30585987c8))
43 |
44 |
45 | ### Fixes
46 |
47 | * actually create report definition ([#9](https://www.github.com/nuuday/terraform-aws-cur/issues/9)) ([a89251a](https://www.github.com/nuuday/terraform-aws-cur/commit/a89251a177eae79b7ca1e86b8d38994dada34079))
48 | * prevent Lambda from erroring out if Crawler is already running ([#11](https://www.github.com/nuuday/terraform-aws-cur/issues/11)) ([a914ae3](https://www.github.com/nuuday/terraform-aws-cur/commit/a914ae38fb67a03542e0fe51d685ad76c3d29c55))
49 |
50 |
51 | ### Improvements
52 |
53 | * output useful values from provisioned resources ([#19](https://www.github.com/nuuday/terraform-aws-cur/issues/19)) ([7904c5e](https://www.github.com/nuuday/terraform-aws-cur/commit/7904c5efa974cdcc1458b551a8f7d1fd882fe845))
54 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Nuuday A/S
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # AWS Cost & Usage Reports
2 |
3 | This Terraform module stands up a Cost and Usage Report, together with necessary services making the CUR data queryable in Athena.
4 |
5 | ## Overview
6 |
7 | The overall architecture looks like the illustration below
8 | 
9 |
10 | 1. AWS delivers Cost and Usage Reports data to the S3 bucket continuously
11 | 2. Whenever new CUR data is delivered, a Glue Crawler makes sure the newly available CUR data is processed and made available in the Data Catalog
12 | 3. Athena provides an SQL interface to the CUR data, using the Data Catalog as its data source
13 | 4. QuickSight visualizes the data returned from querying Athena
14 |
15 |
16 | ## Requirements
17 |
18 | | Name | Version |
19 | |------|---------|
20 | | [terraform](#requirement\_terraform) | >= 0.13, < 2.0 |
21 | | [archive](#requirement\_archive) | ~> 2.0 |
22 | | [aws](#requirement\_aws) | ~> 3.29 |
23 |
24 | ## Providers
25 |
26 | | Name | Version |
27 | |------|---------|
28 | | [aws.cur](#provider\_aws.cur) | ~> 3.29 |
29 | | [archive](#provider\_archive) | ~> 2.0 |
30 | | [aws](#provider\_aws) | ~> 3.29 |
31 |
32 | ## Inputs
33 |
34 | | Name | Description | Type | Default | Required |
35 | |------|-------------|------|---------|:--------:|
36 | | [use\_existing\_s3\_bucket](#input\_use\_existing\_s3\_bucket) | Whether to use an existing S3 bucket or create a new one. Regardless, `s3_bucket_name` must contain the name of the bucket. | `bool` | n/a | yes |
37 | | [s3\_bucket\_name](#input\_s3\_bucket\_name) | Name of the S3 bucket into which CUR will put the cost data. | `string` | n/a | yes |
38 | | [s3\_use\_existing\_kms\_key](#input\_s3\_use\_existing\_kms\_key) | Whether to use an existing KMS CMK for S3 SSE. | `bool` | n/a | yes |
39 | | [s3\_kms\_key\_alias](#input\_s3\_kms\_key\_alias) | Alias for the KMS CMK, existing or otherwise. | `string` | `""` | no |
40 | | [report\_name](#input\_report\_name) | Name of the Cost and Usage Report which will be created. | `string` | n/a | yes |
41 | | [report\_frequency](#input\_report\_frequency) | How often the Cost and Usage Report will be generated. HOURLY or DAILY. | `string` | n/a | yes |
42 | | [report\_versioning](#input\_report\_versioning) | Whether reports should be overwritten or new ones should be created. | `string` | n/a | yes |
43 | | [report\_format](#input\_report\_format) | Format for report. Valid values are: textORcsv, Parquet. If Parquet is used, then Compression must also be Parquet. | `string` | n/a | yes |
44 | | [report\_compression](#input\_report\_compression) | Compression format for report. Valid values are: GZIP, ZIP, Parquet. If Parquet is used, then format must also be Parquet. | `string` | n/a | yes |
45 | | [report\_additional\_artifacts](#input\_report\_additional\_artifacts) | A list of additional artifacts. Valid values are: REDSHIFT, QUICKSIGHT, ATHENA. When ATHENA exists within additional\_artifacts, no other artifact type can be declared and report\_versioning must be OVERWRITE\_REPORT. | `set(string)` | n/a | yes |
46 | | [s3\_bucket\_prefix](#input\_s3\_bucket\_prefix) | Prefix in the S3 bucket to put reports. | `string` | `""` | no |
47 | | [cur\_role\_arn](#input\_cur\_role\_arn) | ARN of the role to assume in order to provision the Cost and Usage Reports S3 bucket in us-east-1. | `string` | `""` | no |
48 | | [cur\_role\_session\_name](#input\_cur\_role\_session\_name) | Session name to use when assuming `cur_role_arn`. | `string` | `""` | no |
49 | | [lambda\_log\_group\_retention\_days](#input\_lambda\_log\_group\_retention\_days) | Number of days to retain logs from the Lambda function, which ensures Glue Crawler runs when new CUR data is available. | `number` | `14` | no |
50 | | [glue\_crawler\_create\_log\_group](#input\_glue\_crawler\_create\_log\_group) | Whether to create a CloudWatch Log Group for the Glue Crawler. Crawlers share Log Group, and this gives the option of managing the Log Group with retention through this module. | `bool` | `true` | no |
51 | | [glue\_crawler\_log\_group\_retention\_days](#input\_glue\_crawler\_log\_group\_retention\_days) | Number of days to retain logs from the Glue Crawler, which populates the Athena table whenever new CUR data is available. | `number` | `14` | no |
52 | | [tags](#input\_tags) | Tags which will be applied to provisioned resources. | `map(string)` | `{}` | no |
53 |
54 | ## Outputs
55 |
56 | | Name | Description |
57 | |------|-------------|
58 | | [s3\_bucket\_name](#output\_s3\_bucket\_name) | Name of S3 bucket used for storing CUR data. This may be provisioned by this module or not. |
59 | | [s3\_bucket\_prefix](#output\_s3\_bucket\_prefix) | Prefix used for storing CUR data inside the S3 bucket. |
60 | | [s3\_bucket\_arn](#output\_s3\_bucket\_arn) | ARN of S3 bucket used for storing CUR data. This may be provisioned by this module or not. |
61 | | [s3\_bucket\_region](#output\_s3\_bucket\_region) | Region where the S3 bucket used for storing CUR data is provisioned. This may be provisioned by this module or not. |
62 | | [report\_name](#output\_report\_name) | Name of the provisioned Cost and Usage Report. |
63 | | [lambda\_crawler\_trigger\_arn](#output\_lambda\_crawler\_trigger\_arn) | ARN of the Lambda function responsible for triggering the Glue Crawler when new CUR data is uploaded into the S3 bucket. |
64 | | [lambda\_crawler\_trigger\_role\_arn](#output\_lambda\_crawler\_trigger\_role\_arn) | ARN of the IAM role used by the Lambda function responsible for starting the Glue Crawler. |
65 | | [crawler\_arn](#output\_crawler\_arn) | ARN of the Glue Crawler responsible for populating the Catalog Database with new CUR data. |
66 | | [crawler\_role\_arn](#output\_crawler\_role\_arn) | ARN of the IAM role used by the Glue Crawler responsible for populating the Catalog Database with new CUR data. |
67 | | [glue\_catalog\_database\_name](#output\_glue\_catalog\_database\_name) | Name of the Glue Catalog Database which is populated with CUR data. |
68 |
69 |
70 |
71 | ## References
72 |
73 | It is based on [AWS: Query and Visualize AWS Cost and Usage](https://aws.amazon.com/blogs/big-data/query-and-visualize-aws-cost-and-usage-data-using-amazon-athena-and-amazon-quicksight/).
74 | Check out the blog post and the linked resources for an explanation of the concepts.
75 |
76 | For more information about Cost & Usage Reports in general, see [AWS: What are Cost and Usage Reports?](https://docs.aws.amazon.com/cur/latest/userguide/what-is-cur.html)
77 |
78 |
79 | ## Requirements
80 |
81 | | Name | Version |
82 | |------|---------|
83 | | [terraform](#requirement\_terraform) | >= 0.13, < 2.0 |
84 | | [archive](#requirement\_archive) | ~> 2.0 |
85 | | [aws](#requirement\_aws) | ~> 3.29 |
86 |
87 | ## Providers
88 |
89 | | Name | Version |
90 | |------|---------|
91 | | [aws.cur](#provider\_aws.cur) | ~> 3.29 |
92 | | [aws](#provider\_aws) | ~> 3.29 |
93 | | [archive](#provider\_archive) | ~> 2.0 |
94 |
95 | ## Inputs
96 |
97 | | Name | Description | Type | Default | Required |
98 | |------|-------------|------|---------|:--------:|
99 | | [use\_existing\_s3\_bucket](#input\_use\_existing\_s3\_bucket) | Whether to use an existing S3 bucket or create a new one. Regardless, `s3_bucket_name` must contain the name of the bucket. | `bool` | n/a | yes |
100 | | [s3\_bucket\_name](#input\_s3\_bucket\_name) | Name of the S3 bucket into which CUR will put the cost data. | `string` | n/a | yes |
101 | | [s3\_use\_existing\_kms\_key](#input\_s3\_use\_existing\_kms\_key) | Whether to use an existing KMS CMK for S3 SSE. | `bool` | n/a | yes |
102 | | [s3\_kms\_key\_alias](#input\_s3\_kms\_key\_alias) | Alias for the KMS CMK, existing or otherwise. | `string` | `""` | no |
103 | | [report\_name](#input\_report\_name) | Name of the Cost and Usage Report which will be created. | `string` | n/a | yes |
104 | | [report\_frequency](#input\_report\_frequency) | How often the Cost and Usage Report will be generated. HOURLY or DAILY. | `string` | n/a | yes |
105 | | [report\_versioning](#input\_report\_versioning) | Whether reports should be overwritten or new ones should be created. | `string` | n/a | yes |
106 | | [report\_format](#input\_report\_format) | Format for report. Valid values are: textORcsv, Parquet. If Parquet is used, then Compression must also be Parquet. | `string` | n/a | yes |
107 | | [report\_compression](#input\_report\_compression) | Compression format for report. Valid values are: GZIP, ZIP, Parquet. If Parquet is used, then format must also be Parquet. | `string` | n/a | yes |
108 | | [report\_additional\_artifacts](#input\_report\_additional\_artifacts) | A list of additional artifacts. Valid values are: REDSHIFT, QUICKSIGHT, ATHENA. When ATHENA exists within additional\_artifacts, no other artifact type can be declared and report\_versioning must be OVERWRITE\_REPORT. | `set(string)` | n/a | yes |
109 | | [s3\_bucket\_prefix](#input\_s3\_bucket\_prefix) | Prefix in the S3 bucket to put reports. | `string` | `""` | no |
110 | | [cur\_role\_arn](#input\_cur\_role\_arn) | ARN of the role to assume in order to provision the Cost and Usage Reports S3 bucket in us-east-1. | `string` | `""` | no |
111 | | [cur\_role\_session\_name](#input\_cur\_role\_session\_name) | Session name to use when assuming `cur_role_arn`. | `string` | `null` | no |
112 | | [lambda\_log\_group\_retention\_days](#input\_lambda\_log\_group\_retention\_days) | Number of days to retain logs from the Lambda function, which ensures Glue Crawler runs when new CUR data is available. | `number` | `14` | no |
113 | | [glue\_crawler\_create\_log\_group](#input\_glue\_crawler\_create\_log\_group) | Whether to create a CloudWatch Log Group for the Glue Crawler. Crawlers share Log Group, and this gives the option of managing the Log Group with retention through this module. | `bool` | `true` | no |
114 | | [glue\_crawler\_log\_group\_retention\_days](#input\_glue\_crawler\_log\_group\_retention\_days) | Number of days to retain logs from the Glue Crawler, which populates the Athena table whenever new CUR data is available. | `number` | `14` | no |
115 | | [tags](#input\_tags) | Tags which will be applied to provisioned resources. | `map(string)` | `{}` | no |
116 |
117 | ## Outputs
118 |
119 | | Name | Description |
120 | |------|-------------|
121 | | [s3\_bucket\_name](#output\_s3\_bucket\_name) | Name of S3 bucket used for storing CUR data. This may be provisioned by this module or not. |
122 | | [s3\_bucket\_prefix](#output\_s3\_bucket\_prefix) | Prefix used for storing CUR data inside the S3 bucket. |
123 | | [s3\_bucket\_arn](#output\_s3\_bucket\_arn) | ARN of S3 bucket used for storing CUR data. This may be provisioned by this module or not. |
124 | | [s3\_bucket\_region](#output\_s3\_bucket\_region) | Region where the S3 bucket used for storing CUR data is provisioned. This may be provisioned by this module or not. |
125 | | [report\_name](#output\_report\_name) | Name of the provisioned Cost and Usage Report. |
126 | | [lambda\_crawler\_trigger\_arn](#output\_lambda\_crawler\_trigger\_arn) | ARN of the Lambda function responsible for triggering the Glue Crawler when new CUR data is uploaded into the S3 bucket. |
127 | | [lambda\_crawler\_trigger\_role\_arn](#output\_lambda\_crawler\_trigger\_role\_arn) | ARN of the IAM role used by the Lambda function responsible for starting the Glue Crawler. |
128 | | [crawler\_arn](#output\_crawler\_arn) | ARN of the Glue Crawler responsible for populating the Catalog Database with new CUR data. |
129 | | [crawler\_role\_arn](#output\_crawler\_role\_arn) | ARN of the IAM role used by the Glue Crawler responsible for populating the Catalog Database with new CUR data. |
130 | | [glue\_catalog\_database\_name](#output\_glue\_catalog\_database\_name) | Name of the Glue Catalog Database which is populated with CUR data. |
131 |
--------------------------------------------------------------------------------
/assets/overview.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:f994c94c8a5c5aa3a24c7488432d5e9fdb8bc466bc79614e621eac80df839e63
3 | size 14200
4 |
--------------------------------------------------------------------------------
/data.tf:
--------------------------------------------------------------------------------
1 | data "aws_caller_identity" "current" {}
2 |
3 | data "aws_region" "current" {}
4 |
5 | data "aws_partition" "current" {}
6 |
--------------------------------------------------------------------------------
/examples/complete/main.tf:
--------------------------------------------------------------------------------
1 | terraform {
2 | required_version = ">= 0.13, < 2.0"
3 | }
4 |
5 | module "this" {
6 | source = "../../"
7 |
8 |
9 |
10 | use_existing_s3_bucket = true
11 | s3_bucket_name = "nuuday-cost-usage-reports"
12 | s3_bucket_prefix = "reports"
13 | s3_use_existing_kms_key = true
14 | s3_kms_key_alias = "aws/s3"
15 |
16 | report_name = "example"
17 | report_frequency = "HOURLY"
18 | report_additional_artifacts = [
19 | "ATHENA",
20 | ]
21 |
22 | report_format = "Parquet"
23 | report_compression = "Parquet"
24 | report_versioning = "OVERWRITE_REPORT"
25 | }
26 |
--------------------------------------------------------------------------------
/examples/complete/outputs.tf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nuuday/terraform-aws-cur/24f83285975e2ad10ca2e25cff31e1486bd14e51/examples/complete/outputs.tf
--------------------------------------------------------------------------------
/examples/complete/variables.tf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nuuday/terraform-aws-cur/24f83285975e2ad10ca2e25cff31e1486bd14e51/examples/complete/variables.tf
--------------------------------------------------------------------------------
/glue.tf:
--------------------------------------------------------------------------------
1 | locals {
2 | # This is defined by AWS.
3 | glue_log_group_default_name = "/aws-glue/crawlers"
4 | }
5 |
6 | # Provisions Glue Crawler and Catalog Database.
7 | # Crawler will, when run, populate the Catalog Database with a table representing the CUR data in S3.
8 |
9 | resource "aws_glue_crawler" "this" {
10 | name = "cur-crawler"
11 | database_name = aws_glue_catalog_database.cur.name
12 | role = aws_iam_role.crawler.name
13 |
14 | s3_target {
15 | path = "s3://${var.s3_bucket_name}/${var.s3_bucket_prefix}/${var.report_name}/${var.report_name}"
16 | }
17 |
18 | tags = var.tags
19 |
20 | depends_on = [aws_s3_bucket.cur]
21 | }
22 |
23 | resource "aws_glue_catalog_database" "cur" {
24 | name = "${var.report_name}-db"
25 | description = "Contains CUR data based on contents from the S3 bucket '${var.s3_bucket_name}'"
26 | }
27 |
28 | # Crawler role
29 | resource "aws_iam_role" "crawler" {
30 | name_prefix = "cur-crawler"
31 | assume_role_policy = data.aws_iam_policy_document.crawler_assume.json
32 |
33 | tags = var.tags
34 | }
35 |
36 | resource "aws_iam_role_policy" "crawler" {
37 | role = aws_iam_role.crawler.name
38 | policy = data.aws_iam_policy_document.crawler.json
39 | }
40 |
41 | data "aws_iam_policy_document" "crawler_assume" {
42 | statement {
43 | effect = "Allow"
44 |
45 | principals {
46 | type = "Service"
47 | identifiers = ["glue.amazonaws.com"]
48 | }
49 |
50 | actions = ["sts:AssumeRole"]
51 | }
52 | }
53 |
54 | data "aws_iam_policy_document" "crawler" {
55 | statement {
56 | sid = "S3Decrypt"
57 |
58 | effect = "Allow"
59 |
60 | actions = [
61 | "kms:GenerateDataKey",
62 | "kms:Decrypt",
63 | "kms:Encrypt",
64 | ]
65 |
66 | resources = [var.s3_use_existing_kms_key ? data.aws_kms_key.s3[0].arn : aws_kms_key.s3[0].arn]
67 | }
68 |
69 | statement {
70 | sid = "Glue"
71 |
72 | effect = "Allow"
73 |
74 | actions = [
75 | "glue:ImportCatalogToGlue",
76 | "glue:GetDatabase",
77 | "glue:UpdateDatabase",
78 | "glue:GetTable",
79 | "glue:CreateTable",
80 | "glue:UpdateTable",
81 | "glue:BatchGetPartition",
82 | "glue:UpdatePartition",
83 | "glue:BatchCreatePartition",
84 | ]
85 |
86 | resources = [
87 | aws_glue_catalog_database.cur.arn,
88 | "arn:${data.aws_partition.current.partition}:glue:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:catalog",
89 | "arn:${data.aws_partition.current.partition}:glue:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:table/${aws_glue_catalog_database.cur.name}/*",
90 | ]
91 | }
92 |
93 | statement {
94 | sid = "CloudWatch"
95 |
96 | effect = "Allow"
97 |
98 | actions = [
99 | "logs:CreateLogStream",
100 | "logs:CreateLogGroup",
101 | "logs:PutLogEvents",
102 | ]
103 |
104 | resources = [
105 | "arn:${data.aws_partition.current.partition}:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:log-group:${local.glue_log_group_default_name}",
106 | "arn:${data.aws_partition.current.partition}:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:log-group:${local.glue_log_group_default_name}:log-stream:*",
107 | ]
108 | }
109 |
110 | statement {
111 | sid = "S3"
112 |
113 | effect = "Allow"
114 |
115 | actions = [
116 | "s3:GetObject",
117 | "s3:ListBucket",
118 | ]
119 |
120 | resources = [
121 | "${var.use_existing_s3_bucket ? data.aws_s3_bucket.cur[0].arn : aws_s3_bucket.cur[0].arn}",
122 | "${var.use_existing_s3_bucket ? data.aws_s3_bucket.cur[0].arn : aws_s3_bucket.cur[0].arn}/*",
123 | ]
124 | }
125 | }
126 |
127 | # Optionally pre-create log group for Glue Crawlers.
128 | # Crawlers share Log Group for whatever reason I do not know.
129 | #
130 | # Anyway, Crawlers will automatically create this Log Group
131 | # with infinite retention, which is not desirable.
132 | # This gives module consumers the option of letting this module create it/manage it.
133 | #
134 | # Accept default encryption. Crawler logs are not sensitive.
135 | # #tfsec:ignore:AWS089
136 | resource "aws_cloudwatch_log_group" "crawler" {
137 | count = var.glue_crawler_create_log_group ? 1 : 0
138 |
139 | name = local.glue_log_group_default_name
140 | retention_in_days = var.glue_crawler_log_group_retention_days
141 | }
142 |
--------------------------------------------------------------------------------
/main.tf:
--------------------------------------------------------------------------------
1 | resource "aws_cur_report_definition" "this" {
2 | report_name = var.report_name
3 | time_unit = var.report_frequency
4 | format = var.report_format
5 | compression = var.report_compression
6 | report_versioning = var.report_versioning
7 | additional_artifacts = var.report_additional_artifacts
8 | additional_schema_elements = ["RESOURCES"]
9 |
10 | s3_bucket = var.s3_bucket_name
11 | s3_region = var.use_existing_s3_bucket ? data.aws_s3_bucket.cur[0].region : aws_s3_bucket.cur[0].region
12 | s3_prefix = var.s3_bucket_prefix
13 |
14 | depends_on = [
15 | aws_s3_bucket_policy.cur,
16 | ]
17 |
18 | provider = aws.cur
19 | }
20 |
21 | data "aws_s3_bucket" "cur" {
22 | count = var.use_existing_s3_bucket ? 1 : 0
23 |
24 | bucket = var.s3_bucket_name
25 | }
26 |
27 | data "aws_kms_key" "s3" {
28 | count = var.s3_use_existing_kms_key ? 1 : 0
29 |
30 | key_id = "alias/${trimprefix(var.s3_kms_key_alias, "alias/")}"
31 | }
32 |
33 | # tfsec:ignore:AWS019 (disable auto-rotation for now)
34 | resource "aws_kms_key" "s3" {
35 | count = var.s3_use_existing_kms_key ? 0 : 1
36 |
37 | description = "For server-side encryption in the '${var.s3_bucket_name}' S3 bucket."
38 |
39 | tags = var.tags
40 | }
41 |
42 | resource "aws_kms_alias" "s3" {
43 | count = var.s3_use_existing_kms_key ? 0 : 1
44 |
45 | name = "alias/${trimprefix(var.s3_kms_key_alias, "alias/")}"
46 | target_key_id = aws_kms_key.s3[0].key_id
47 | }
48 |
49 | # Versioning and logging disabled.
50 | # tfsec:ignore:AWS077 tfsec:ignore:AWS002
51 | resource "aws_s3_bucket" "cur" {
52 | count = var.use_existing_s3_bucket ? 0 : 1
53 |
54 | bucket = var.s3_bucket_name
55 | acl = "private"
56 |
57 | versioning {
58 | enabled = false
59 | }
60 |
61 | server_side_encryption_configuration {
62 | rule {
63 | apply_server_side_encryption_by_default {
64 | kms_master_key_id = var.s3_use_existing_kms_key ? data.aws_kms_key.s3[0].arn : aws_kms_key.s3[0].arn
65 | sse_algorithm = "aws:kms"
66 | }
67 | }
68 | }
69 |
70 | tags = var.tags
71 | }
72 |
73 | resource "aws_s3_bucket_public_access_block" "cur" {
74 | count = var.use_existing_s3_bucket ? 0 : 1
75 |
76 | bucket = aws_s3_bucket.cur[0].id
77 |
78 | block_public_acls = true
79 | block_public_policy = true
80 | ignore_public_acls = true
81 | restrict_public_buckets = true
82 | }
83 |
84 | resource "aws_s3_bucket_policy" "cur" {
85 | count = var.use_existing_s3_bucket ? 0 : 1
86 |
87 | bucket = aws_s3_bucket.cur[0].id
88 | policy = data.aws_iam_policy_document.s3_cur[0].json
89 |
90 | depends_on = [aws_s3_bucket_public_access_block.cur]
91 | }
92 |
93 | data "aws_iam_policy_document" "s3_cur" {
94 | count = var.use_existing_s3_bucket ? 0 : 1
95 |
96 | statement {
97 | principals {
98 | type = "Service"
99 | identifiers = ["billingreports.amazonaws.com"]
100 | }
101 |
102 | actions = [
103 | "s3:GetBucketAcl",
104 | "s3:GetBucketPolicy",
105 | ]
106 |
107 | resources = [aws_s3_bucket.cur[0].arn]
108 | }
109 |
110 | statement {
111 | principals {
112 | type = "Service"
113 | identifiers = ["billingreports.amazonaws.com"]
114 | }
115 |
116 | actions = ["s3:PutObject"]
117 |
118 | resources = ["${aws_s3_bucket.cur[0].arn}/*"]
119 | }
120 | }
121 |
--------------------------------------------------------------------------------
/notifications.tf:
--------------------------------------------------------------------------------
1 | locals {
2 | lambda_function_name = "${var.report_name}-crawler-trigger"
3 | }
4 |
5 | resource "aws_s3_bucket_notification" "cur" {
6 | bucket = var.s3_bucket_name
7 |
8 | lambda_function {
9 | lambda_function_arn = aws_lambda_function.run_crawler.arn
10 | events = ["s3:ObjectCreated:*"]
11 | filter_prefix = "${var.s3_bucket_prefix}/"
12 | filter_suffix = ".parquet"
13 | }
14 |
15 | depends_on = [
16 | aws_s3_bucket.cur,
17 | aws_lambda_permission.allow_bucket,
18 | aws_s3_bucket_policy.cur,
19 | ]
20 | }
21 |
22 | resource "aws_lambda_function" "run_crawler" {
23 | function_name = local.lambda_function_name
24 |
25 | role = aws_iam_role.lambda.arn
26 |
27 | runtime = "nodejs12.x"
28 | handler = "index.handler"
29 | filename = data.archive_file.lambda.output_path
30 | source_code_hash = data.archive_file.lambda.output_base64sha256
31 | timeout = 30
32 |
33 | environment {
34 | variables = {
35 | CRAWLER_NAME = aws_glue_crawler.this.name
36 | }
37 | }
38 |
39 | depends_on = [
40 | aws_iam_role_policy.lambda,
41 | aws_cloudwatch_log_group.lambda,
42 | ]
43 | }
44 |
45 | data "archive_file" "lambda" {
46 | type = "zip"
47 | source_file = "${path.module}/src/index.js"
48 | output_path = "${path.module}/lambda.zip"
49 | }
50 |
51 | resource "aws_lambda_permission" "allow_bucket" {
52 | statement_id = "AllowExecutionFromS3Bucket"
53 | action = "lambda:InvokeFunction"
54 | function_name = aws_lambda_function.run_crawler.arn
55 | source_account = data.aws_caller_identity.current.account_id
56 | principal = "s3.amazonaws.com"
57 | source_arn = var.use_existing_s3_bucket ? data.aws_s3_bucket.cur[0].arn : aws_s3_bucket.cur[0].arn
58 | }
59 |
60 | resource "aws_iam_role" "lambda" {
61 | name = "${var.report_name}-crawler-trigger"
62 | assume_role_policy = data.aws_iam_policy_document.crawler_trigger_assume.json
63 | }
64 |
65 | resource "aws_iam_role_policy" "lambda" {
66 | role = aws_iam_role.lambda.name
67 | policy = data.aws_iam_policy_document.crawler_trigger.json
68 | }
69 |
70 | data "aws_iam_policy_document" "crawler_trigger_assume" {
71 | statement {
72 | effect = "Allow"
73 |
74 | principals {
75 | type = "Service"
76 | identifiers = ["lambda.amazonaws.com"]
77 | }
78 |
79 | actions = ["sts:AssumeRole"]
80 | }
81 | }
82 |
83 | data "aws_iam_policy_document" "crawler_trigger" {
84 | statement {
85 | sid = "CloudWatch"
86 |
87 | effect = "Allow"
88 |
89 | actions = [
90 | "logs:CreateLogStream",
91 | "logs:PutLogEvents",
92 | ]
93 |
94 | resources = ["${aws_cloudwatch_log_group.lambda.arn}:*"]
95 | }
96 |
97 | statement {
98 | sid = "Glue"
99 |
100 | effect = "Allow"
101 |
102 | actions = [
103 | "glue:StartCrawler",
104 | ]
105 |
106 | resources = [aws_glue_crawler.this.arn]
107 | }
108 | }
109 |
110 | # Pre-create log group for the Lambda function.
111 | # Otherwise it will be created by Lambda itself with infinite retention.
112 | #
113 | # Accept default encryption. This Lambda does not produce sensitive logs.
114 | # #tfsec:ignore:AWS089
115 | resource "aws_cloudwatch_log_group" "lambda" {
116 | name = "/aws/lambda/${local.lambda_function_name}"
117 | retention_in_days = var.lambda_log_group_retention_days
118 | }
119 |
--------------------------------------------------------------------------------
/outputs.tf:
--------------------------------------------------------------------------------
1 | output "s3_bucket_name" {
2 | description = "Name of S3 bucket used for storing CUR data. This may be provisioned by this module or not."
3 | value = aws_cur_report_definition.this.s3_bucket
4 | }
5 |
6 | output "s3_bucket_prefix" {
7 | description = "Prefix used for storing CUR data inside the S3 bucket."
8 | value = aws_cur_report_definition.this.s3_bucket
9 | }
10 |
11 | output "s3_bucket_arn" {
12 | description = "ARN of S3 bucket used for storing CUR data. This may be provisioned by this module or not."
13 | value = coalescelist(aws_s3_bucket.cur.*.arn, data.aws_s3_bucket.cur.*.arn)[0]
14 | }
15 |
16 | output "s3_bucket_region" {
17 | description = "Region where the S3 bucket used for storing CUR data is provisioned. This may be provisioned by this module or not."
18 | value = aws_cur_report_definition.this.s3_region
19 | }
20 |
21 | output "report_name" {
22 | description = "Name of the provisioned Cost and Usage Report."
23 | value = aws_cur_report_definition.this.report_name
24 | }
25 |
26 | output "lambda_crawler_trigger_arn" {
27 | description = "ARN of the Lambda function responsible for triggering the Glue Crawler when new CUR data is uploaded into the S3 bucket."
28 | value = aws_lambda_function.run_crawler.arn
29 | }
30 |
31 | output "lambda_crawler_trigger_role_arn" {
32 | description = "ARN of the IAM role used by the Lambda function responsible for starting the Glue Crawler."
33 | value = aws_iam_role.lambda.arn
34 | }
35 |
36 | output "crawler_arn" {
37 | description = "ARN of the Glue Crawler responsible for populating the Catalog Database with new CUR data."
38 | value = aws_lambda_function.run_crawler.arn
39 | }
40 |
41 | output "crawler_role_arn" {
42 | description = "ARN of the IAM role used by the Glue Crawler responsible for populating the Catalog Database with new CUR data."
43 | value = aws_iam_role.crawler.arn
44 | }
45 |
46 | output "glue_catalog_database_name" {
47 | description = "Name of the Glue Catalog Database which is populated with CUR data."
48 | value = aws_glue_catalog_database.cur.name
49 | }
50 |
--------------------------------------------------------------------------------
/providers.tf:
--------------------------------------------------------------------------------
1 | terraform {
2 | required_version = ">= 0.13, < 2.0"
3 |
4 | required_providers {
5 | aws = {
6 | source = "hashicorp/aws"
7 | version = "~> 3.29"
8 | }
9 |
10 | archive = {
11 | source = "hashicorp/archive"
12 | version = "~> 2.0"
13 | }
14 | }
15 | }
16 |
17 | provider "aws" {
18 | # CUR is only available in us-east-1.
19 | # aws_cur_report_definition.this is the only resource using this provider.
20 | alias = "cur"
21 |
22 | region = "us-east-1"
23 |
24 | assume_role {
25 | role_arn = var.cur_role_arn
26 | session_name = var.cur_role_session_name
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
1 | const AWS = require('aws-sdk');
2 |
3 | exports.handler = function(event, context, callback) {
4 | const glue = new AWS.Glue();
5 |
6 | glue.startCrawler({ Name: process.env.CRAWLER_NAME }, function(err, data) {
7 | if (err) {
8 | // Check if Crawler is already running
9 | const response = JSON.parse(this.httpResponse.body);
10 | if (response['__type'] == 'CrawlerRunningException') {
11 | console.log('Crawler already running; ignoring trigger.');
12 |
13 | callback(null, response.Message);
14 | }
15 | }
16 | else {
17 | console.log("Successfully triggered crawler");
18 | }
19 | });
20 | }
21 |
--------------------------------------------------------------------------------
/variables.tf:
--------------------------------------------------------------------------------
1 | variable "use_existing_s3_bucket" {
2 | description = "Whether to use an existing S3 bucket or create a new one. Regardless, `s3_bucket_name` must contain the name of the bucket."
3 | type = bool
4 | }
5 |
6 | variable "s3_bucket_name" {
7 | description = "Name of the S3 bucket into which CUR will put the cost data."
8 | type = string
9 | }
10 |
11 | variable "s3_use_existing_kms_key" {
12 | description = "Whether to use an existing KMS CMK for S3 SSE."
13 | type = bool
14 | }
15 |
16 | variable "s3_kms_key_alias" {
17 | description = "Alias for the KMS CMK, existing or otherwise."
18 | type = string
19 | default = ""
20 | }
21 |
22 | variable "report_name" {
23 | description = "Name of the Cost and Usage Report which will be created."
24 | type = string
25 | }
26 |
27 | variable "report_frequency" {
28 | description = "How often the Cost and Usage Report will be generated. HOURLY or DAILY."
29 | type = string
30 | }
31 |
32 | variable "report_versioning" {
33 | description = "Whether reports should be overwritten or new ones should be created."
34 | type = string
35 | }
36 |
37 | variable "report_format" {
38 | description = "Format for report. Valid values are: textORcsv, Parquet. If Parquet is used, then Compression must also be Parquet."
39 | type = string
40 | }
41 |
42 | variable "report_compression" {
43 | description = "Compression format for report. Valid values are: GZIP, ZIP, Parquet. If Parquet is used, then format must also be Parquet."
44 | type = string
45 | }
46 |
47 | variable "report_additional_artifacts" {
48 | description = "A list of additional artifacts. Valid values are: REDSHIFT, QUICKSIGHT, ATHENA. When ATHENA exists within additional_artifacts, no other artifact type can be declared and report_versioning must be OVERWRITE_REPORT."
49 | type = set(string)
50 | }
51 |
52 | variable "s3_bucket_prefix" {
53 | description = "Prefix in the S3 bucket to put reports."
54 | type = string
55 | default = ""
56 | }
57 |
58 | variable "cur_role_arn" {
59 | description = "ARN of the role to assume in order to provision the Cost and Usage Reports S3 bucket in us-east-1."
60 | type = string
61 | default = ""
62 | }
63 |
64 | variable "cur_role_session_name" {
65 | description = "Session name to use when assuming `cur_role_arn`."
66 | type = string
67 | default = null
68 | }
69 |
70 | variable "lambda_log_group_retention_days" {
71 | description = "Number of days to retain logs from the Lambda function, which ensures Glue Crawler runs when new CUR data is available."
72 | type = number
73 | default = 14
74 | }
75 |
76 | variable "glue_crawler_create_log_group" {
77 | description = "Whether to create a CloudWatch Log Group for the Glue Crawler. Crawlers share Log Group, and this gives the option of managing the Log Group with retention through this module."
78 | type = bool
79 | default = true
80 | }
81 |
82 | variable "glue_crawler_log_group_retention_days" {
83 | description = "Number of days to retain logs from the Glue Crawler, which populates the Athena table whenever new CUR data is available."
84 | type = number
85 | default = 14
86 | }
87 |
88 | variable "tags" {
89 | description = "Tags which will be applied to provisioned resources."
90 | type = map(string)
91 | default = {}
92 | }
93 |
--------------------------------------------------------------------------------