├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── bug_report.yml │ ├── config.yml │ ├── feature_request.md │ ├── feature_request.yml │ └── question.md ├── PULL_REQUEST_TEMPLATE.md ├── banner.png ├── mergify.yml ├── renovate.json ├── settings.yml └── workflows │ ├── branch.yml │ ├── chatops.yml │ ├── release.yml │ └── scheduled.yml ├── .gitignore ├── LICENSE ├── README.md ├── README.yaml ├── atmos.yaml ├── context.tf ├── examples └── complete │ ├── context.tf │ ├── fixtures.us-east-2.tfvars │ ├── main.tf │ ├── outputs.tf │ ├── variables.tf │ └── versions.tf ├── main.tf ├── outputs.tf ├── test ├── .gitignore ├── Makefile ├── Makefile.alpine └── src │ ├── .gitignore │ ├── Makefile │ ├── examples_complete_test.go │ ├── go.mod │ ├── go.sum │ └── utils.go ├── variables.tf └── versions.tf /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Use this file to define individuals or teams that are responsible for code in a repository. 2 | # Read more: 3 | # 4 | # Order is important: the last matching pattern has the highest precedence 5 | 6 | # These owners will be the default owners for everything 7 | * @cloudposse/engineering @cloudposse/contributors 8 | 9 | # Cloud Posse must review any changes to Makefiles 10 | **/Makefile @cloudposse/engineering 11 | **/Makefile.* @cloudposse/engineering 12 | 13 | # Cloud Posse must review any changes to GitHub actions 14 | .github/* @cloudposse/engineering 15 | 16 | # Cloud Posse must review any changes to standard context definition, 17 | # but some changes can be rubber-stamped. 18 | **/*.tf @cloudposse/engineering @cloudposse/contributors @cloudposse/approvers 19 | README.yaml @cloudposse/engineering @cloudposse/contributors @cloudposse/approvers 20 | README.md @cloudposse/engineering @cloudposse/contributors @cloudposse/approvers 21 | docs/*.md @cloudposse/engineering @cloudposse/contributors @cloudposse/approvers 22 | 23 | # Cloud Posse Admins must review all changes to CODEOWNERS or the mergify configuration 24 | .github/mergify.yml @cloudposse/admins 25 | .github/CODEOWNERS @cloudposse/admins 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: 'bug' 6 | assignees: '' 7 | 8 | --- 9 | 10 | Found a bug? Maybe our [Slack Community](https://slack.cloudposse.com) can help. 11 | 12 | [![Slack Community](https://slack.cloudposse.com/badge.svg)](https://slack.cloudposse.com) 13 | 14 | ## Describe the Bug 15 | A clear and concise description of what the bug is. 16 | 17 | ## Expected Behavior 18 | A clear and concise description of what you expected to happen. 19 | 20 | ## Steps to Reproduce 21 | Steps to reproduce the behavior: 22 | 1. Go to '...' 23 | 2. Run '....' 24 | 3. Enter '....' 25 | 4. See error 26 | 27 | ## Screenshots 28 | If applicable, add screenshots or logs to help explain your problem. 29 | 30 | ## Environment (please complete the following information): 31 | 32 | Anything that will help us triage the bug will help. Here are some ideas: 33 | - OS: [e.g. Linux, OSX, WSL, etc] 34 | - Version [e.g. 10.15] 35 | 36 | ## Additional Context 37 | Add any other context about the problem here. -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | description: Create a report to help us improve 4 | labels: ["bug"] 5 | assignees: [""] 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: | 10 | Found a bug? 11 | 12 | Please checkout our [Slack Community](https://slack.cloudposse.com) 13 | or visit our [Slack Archive](https://archive.sweetops.com/). 14 | 15 | [![Slack Community](https://slack.cloudposse.com/badge.svg)](https://slack.cloudposse.com) 16 | 17 | - type: textarea 18 | id: concise-description 19 | attributes: 20 | label: Describe the Bug 21 | description: A clear and concise description of what the bug is. 22 | placeholder: What is the bug about? 23 | validations: 24 | required: true 25 | 26 | - type: textarea 27 | id: expected 28 | attributes: 29 | label: Expected Behavior 30 | description: A clear and concise description of what you expected. 31 | placeholder: What happened? 32 | validations: 33 | required: true 34 | 35 | - type: textarea 36 | id: reproduction-steps 37 | attributes: 38 | label: Steps to Reproduce 39 | description: Steps to reproduce the behavior. 40 | placeholder: How do we reproduce it? 41 | validations: 42 | required: true 43 | 44 | - type: textarea 45 | id: screenshots 46 | attributes: 47 | label: Screenshots 48 | description: If applicable, add screenshots or logs to help explain. 49 | validations: 50 | required: false 51 | 52 | - type: textarea 53 | id: environment 54 | attributes: 55 | label: Environment 56 | description: Anything that will help us triage the bug. 57 | placeholder: | 58 | - OS: [e.g. Linux, OSX, WSL, etc] 59 | - Version [e.g. 10.15] 60 | - Module version 61 | - Terraform version 62 | validations: 63 | required: false 64 | 65 | - type: textarea 66 | id: additional 67 | attributes: 68 | label: Additional Context 69 | description: | 70 | Add any other context about the problem here. 71 | validations: 72 | required: false 73 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | 3 | contact_links: 4 | 5 | - name: Community Slack Team 6 | url: https://cloudposse.com/slack/ 7 | about: |- 8 | Please ask and answer questions here. 9 | 10 | - name: Office Hours 11 | url: https://cloudposse.com/office-hours/ 12 | about: |- 13 | Join us every Wednesday for FREE Office Hours (lunch & learn). 14 | 15 | - name: DevOps Accelerator Program 16 | url: https://cloudposse.com/accelerate/ 17 | about: |- 18 | Own your infrastructure in record time. We build it. You drive it. 19 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature Request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: 'feature request' 6 | assignees: '' 7 | 8 | --- 9 | 10 | Have a question? Please checkout our [Slack Community](https://slack.cloudposse.com) or visit our [Slack Archive](https://archive.sweetops.com/). 11 | 12 | [![Slack Community](https://slack.cloudposse.com/badge.svg)](https://slack.cloudposse.com) 13 | 14 | ## Describe the Feature 15 | 16 | A clear and concise description of what the bug is. 17 | 18 | ## Expected Behavior 19 | 20 | A clear and concise description of what you expected to happen. 21 | 22 | ## Use Case 23 | 24 | Is your feature request related to a problem/challenge you are trying to solve? Please provide some additional context of why this feature or capability will be valuable. 25 | 26 | ## Describe Ideal Solution 27 | 28 | A clear and concise description of what you want to happen. If you don't know, that's okay. 29 | 30 | ## Alternatives Considered 31 | 32 | Explain what alternative solutions or features you've considered. 33 | 34 | ## Additional Context 35 | 36 | Add any other context or screenshots about the feature request here. 37 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature Request 3 | description: Suggest an idea for this project 4 | labels: ["feature request"] 5 | assignees: [""] 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: | 10 | Have a question? 11 | 12 | Please checkout our [Slack Community](https://slack.cloudposse.com) 13 | or visit our [Slack Archive](https://archive.sweetops.com/). 14 | 15 | [![Slack Community](https://slack.cloudposse.com/badge.svg)](https://slack.cloudposse.com) 16 | 17 | - type: textarea 18 | id: concise-description 19 | attributes: 20 | label: Describe the Feature 21 | description: A clear and concise description of what the feature is. 22 | placeholder: What is the feature about? 23 | validations: 24 | required: true 25 | 26 | - type: textarea 27 | id: expected 28 | attributes: 29 | label: Expected Behavior 30 | description: A clear and concise description of what you expected. 31 | placeholder: What happened? 32 | validations: 33 | required: true 34 | 35 | - type: textarea 36 | id: use-case 37 | attributes: 38 | label: Use Case 39 | description: | 40 | Is your feature request related to a problem/challenge you are trying 41 | to solve? 42 | 43 | Please provide some additional context of why this feature or 44 | capability will be valuable. 45 | validations: 46 | required: true 47 | 48 | - type: textarea 49 | id: ideal-solution 50 | attributes: 51 | label: Describe Ideal Solution 52 | description: A clear and concise description of what you want to happen. 53 | validations: 54 | required: true 55 | 56 | - type: textarea 57 | id: alternatives-considered 58 | attributes: 59 | label: Alternatives Considered 60 | description: Explain alternative solutions or features considered. 61 | validations: 62 | required: false 63 | 64 | - type: textarea 65 | id: additional 66 | attributes: 67 | label: Additional Context 68 | description: | 69 | Add any other context about the problem here. 70 | validations: 71 | required: false 72 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudposse/terraform-aws-emr-cluster/ba8fab0c627d01c61f18393ea13d13d04a195615/.github/ISSUE_TEMPLATE/question.md -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## what 2 | 3 | 7 | 8 | ## why 9 | 10 | 15 | 16 | ## references 17 | 18 | 22 | -------------------------------------------------------------------------------- /.github/banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudposse/terraform-aws-emr-cluster/ba8fab0c627d01c61f18393ea13d13d04a195615/.github/banner.png -------------------------------------------------------------------------------- /.github/mergify.yml: -------------------------------------------------------------------------------- 1 | extends: .github 2 | -------------------------------------------------------------------------------- /.github/renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": [ 3 | "config:base", 4 | ":preserveSemverRanges" 5 | ], 6 | "baseBranches": ["main", "master", "/^release\\/v\\d{1,2}$/"], 7 | "labels": ["auto-update"], 8 | "dependencyDashboardAutoclose": true, 9 | "enabledManagers": ["terraform"], 10 | "terraform": { 11 | "ignorePaths": ["**/context.tf", "examples/**"] 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /.github/settings.yml: -------------------------------------------------------------------------------- 1 | # Upstream changes from _extends are only recognized when modifications are made to this file in the default branch. 2 | _extends: .github 3 | repository: 4 | name: terraform-aws-emr-cluster 5 | description: Terraform module to provision an Elastic MapReduce (EMR) cluster on AWS 6 | homepage: https://cloudposse.com/accelerate 7 | topics: hcl2, emr, emr-cluster, emrfs, emr-notebooks, terraform, terraform-modules, terraform-module, terraform-aws, hadoop, hive, presto, spark 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /.github/workflows/branch.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Branch 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | - release/** 8 | types: [opened, synchronize, reopened, labeled, unlabeled] 9 | push: 10 | branches: 11 | - main 12 | - release/v* 13 | paths-ignore: 14 | - '.github/**' 15 | - 'docs/**' 16 | - 'examples/**' 17 | - 'test/**' 18 | - 'README.md' 19 | 20 | permissions: {} 21 | 22 | jobs: 23 | terraform-module: 24 | uses: cloudposse/.github/.github/workflows/shared-terraform-module.yml@main 25 | secrets: inherit 26 | -------------------------------------------------------------------------------- /.github/workflows/chatops.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: chatops 3 | on: 4 | issue_comment: 5 | types: [created] 6 | 7 | permissions: 8 | pull-requests: write 9 | id-token: write 10 | contents: write 11 | statuses: write 12 | 13 | jobs: 14 | test: 15 | uses: cloudposse/.github/.github/workflows/shared-terraform-chatops.yml@main 16 | if: ${{ github.event.issue.pull_request && contains(github.event.comment.body, '/terratest') }} 17 | secrets: inherit 18 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: release 3 | on: 4 | release: 5 | types: 6 | - published 7 | 8 | permissions: 9 | id-token: write 10 | contents: write 11 | pull-requests: write 12 | 13 | jobs: 14 | terraform-module: 15 | uses: cloudposse/.github/.github/workflows/shared-release-branches.yml@main 16 | secrets: inherit 17 | -------------------------------------------------------------------------------- /.github/workflows/scheduled.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: scheduled 3 | on: 4 | workflow_dispatch: { } # Allows manually trigger this workflow 5 | schedule: 6 | - cron: "0 3 * * *" 7 | 8 | permissions: 9 | pull-requests: write 10 | id-token: write 11 | contents: write 12 | 13 | jobs: 14 | scheduled: 15 | uses: cloudposse/.github/.github/workflows/shared-terraform-scheduled.yml@main 16 | secrets: inherit 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled files 2 | *.tfstate 3 | *.tfstate.backup 4 | 5 | # Module directory 6 | .terraform 7 | .idea 8 | *.iml 9 | 10 | .build-harness 11 | build-harness 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2018-2022 Cloud Posse, LLC 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Project Banner
5 | 6 | 7 |

Latest ReleaseLast UpdatedSlack CommunityGet Support 8 | 9 |

10 | 11 | 12 | 32 | 33 | Terraform module to provision an Elastic MapReduce (EMR) cluster on AWS. 34 | 35 | 36 | > [!TIP] 37 | > #### 👽 Use Atmos with Terraform 38 | > Cloud Posse uses [`atmos`](https://atmos.tools) to easily orchestrate multiple environments using Terraform.
39 | > Works with [Github Actions](https://atmos.tools/integrations/github-actions/), [Atlantis](https://atmos.tools/integrations/atlantis), or [Spacelift](https://atmos.tools/integrations/spacelift). 40 | > 41 | >
42 | > Watch demo of using Atmos with Terraform 43 | >
44 | > Example of running atmos to manage infrastructure from our Quick Start tutorial. 45 | > 46 | 47 | 48 | 49 | 50 | 51 | ## Usage 52 | 53 | 54 | For a complete example, see [examples/complete](examples/complete) 55 | 56 | For automated tests of the complete example using [bats](https://github.com/bats-core/bats-core) and [Terratest](https://github.com/gruntwork-io/terratest) (which tests and deploys the example on AWS), see [test](test). 57 | 58 | ```hcl 59 | provider "aws" { 60 | region = "us-east-2" 61 | } 62 | 63 | module "vpc" { 64 | source = "cloudposse/vpc/aws" 65 | # Cloud Posse recommends pinning every module to a specific version 66 | # version = "x.x.x" 67 | 68 | ipv4_primary_cidr_block = "172.19.0.0/16" 69 | 70 | context = module.this.context 71 | } 72 | 73 | module "subnets" { 74 | source = "cloudposse/dynamic-subnets/aws" 75 | # Cloud Posse recommends pinning every module to a specific version 76 | # version = "x.x.x" 77 | 78 | availability_zones = var.availability_zones 79 | vpc_id = module.vpc.vpc_id 80 | igw_id = [module.vpc.igw_id] 81 | ipv4_cidr_block = [module.vpc.vpc_cidr_block] 82 | nat_gateway_enabled = false 83 | nat_instance_enabled = false 84 | 85 | context = module.this.context 86 | } 87 | 88 | module "s3_log_storage" { 89 | source = "cloudposse/s3-log-storage/aws" 90 | # Cloud Posse recommends pinning every module to a specific version 91 | # version = "x.x.x" 92 | 93 | region = var.region 94 | namespace = var.namespace 95 | stage = var.stage 96 | name = var.name 97 | attributes = ["logs"] 98 | force_destroy = true 99 | } 100 | 101 | module "aws_key_pair" { 102 | source = "cloudposse/key-pair/aws" 103 | # Cloud Posse recommends pinning every module to a specific version 104 | # version = "x.x.x" 105 | namespace = var.namespace 106 | stage = var.stage 107 | name = var.name 108 | attributes = ["ssh", "key"] 109 | ssh_public_key_path = var.ssh_public_key_path 110 | generate_ssh_key = var.generate_ssh_key 111 | } 112 | 113 | module "emr_cluster" { 114 | source = "cloudposse/emr-cluster/aws" 115 | # Cloud Posse recommends pinning every module to a specific version 116 | # version = "x.x.x" 117 | 118 | namespace = var.namespace 119 | stage = var.stage 120 | name = var.name 121 | master_allowed_security_groups = [module.vpc.vpc_default_security_group_id] 122 | slave_allowed_security_groups = [module.vpc.vpc_default_security_group_id] 123 | region = var.region 124 | vpc_id = module.vpc.vpc_id 125 | subnet_id = module.subnets.private_subnet_ids[0] 126 | route_table_id = module.subnets.private_route_table_ids[0] 127 | subnet_type = "private" 128 | ebs_root_volume_size = var.ebs_root_volume_size 129 | visible_to_all_users = var.visible_to_all_users 130 | release_label = var.release_label 131 | applications = var.applications 132 | configurations_json = var.configurations_json 133 | core_instance_group_instance_type = var.core_instance_group_instance_type 134 | core_instance_group_instance_count = var.core_instance_group_instance_count 135 | core_instance_group_ebs_size = var.core_instance_group_ebs_size 136 | core_instance_group_ebs_type = var.core_instance_group_ebs_type 137 | core_instance_group_ebs_volumes_per_instance = var.core_instance_group_ebs_volumes_per_instance 138 | master_instance_group_instance_type = var.master_instance_group_instance_type 139 | master_instance_group_instance_count = var.master_instance_group_instance_count 140 | master_instance_group_ebs_size = var.master_instance_group_ebs_size 141 | master_instance_group_ebs_type = var.master_instance_group_ebs_type 142 | master_instance_group_ebs_volumes_per_instance = var.master_instance_group_ebs_volumes_per_instance 143 | create_task_instance_group = var.create_task_instance_group 144 | log_uri = format("s3n://%s/", module.s3_log_storage.bucket_id) 145 | key_name = module.aws_key_pair.key_name 146 | } 147 | ``` 148 | 149 | > [!IMPORTANT] 150 | > In Cloud Posse's examples, we avoid pinning modules to specific versions to prevent discrepancies between the documentation 151 | > and the latest released versions. However, for your own projects, we strongly advise pinning each module to the exact version 152 | > you're using. This practice ensures the stability of your infrastructure. Additionally, we recommend implementing a systematic 153 | > approach for updating versions to avoid unexpected changes. 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | ## Requirements 164 | 165 | | Name | Version | 166 | |------|---------| 167 | | [terraform](#requirement\_terraform) | >= 0.14.0 | 168 | | [aws](#requirement\_aws) | >= 3.5.0 | 169 | 170 | ## Providers 171 | 172 | | Name | Version | 173 | |------|---------| 174 | | [aws](#provider\_aws) | >= 3.5.0 | 175 | 176 | ## Modules 177 | 178 | | Name | Source | Version | 179 | |------|--------|---------| 180 | | [dns\_master](#module\_dns\_master) | cloudposse/route53-cluster-hostname/aws | 0.12.2 | 181 | | [label\_core](#module\_label\_core) | cloudposse/label/null | 0.25.0 | 182 | | [label\_ec2](#module\_label\_ec2) | cloudposse/label/null | 0.25.0 | 183 | | [label\_ec2\_autoscaling](#module\_label\_ec2\_autoscaling) | cloudposse/label/null | 0.25.0 | 184 | | [label\_emr](#module\_label\_emr) | cloudposse/label/null | 0.25.0 | 185 | | [label\_master](#module\_label\_master) | cloudposse/label/null | 0.25.0 | 186 | | [label\_master\_managed](#module\_label\_master\_managed) | cloudposse/label/null | 0.25.0 | 187 | | [label\_service\_managed](#module\_label\_service\_managed) | cloudposse/label/null | 0.25.0 | 188 | | [label\_slave](#module\_label\_slave) | cloudposse/label/null | 0.25.0 | 189 | | [label\_slave\_managed](#module\_label\_slave\_managed) | cloudposse/label/null | 0.25.0 | 190 | | [label\_task](#module\_label\_task) | cloudposse/label/null | 0.25.0 | 191 | | [this](#module\_this) | cloudposse/label/null | 0.25.0 | 192 | 193 | ## Resources 194 | 195 | | Name | Type | 196 | |------|------| 197 | | [aws_emr_cluster.default](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/emr_cluster) | resource | 198 | | [aws_emr_instance_group.task](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/emr_instance_group) | resource | 199 | | [aws_iam_instance_profile.ec2](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_instance_profile) | resource | 200 | | [aws_iam_role.ec2](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | 201 | | [aws_iam_role.ec2_autoscaling](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | 202 | | [aws_iam_role.emr](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | 203 | | [aws_iam_role_policy_attachment.ec2](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | 204 | | [aws_iam_role_policy_attachment.ec2_autoscaling](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | 205 | | [aws_iam_role_policy_attachment.emr](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | 206 | | [aws_iam_role_policy_attachment.emr_ssm_access](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | 207 | | [aws_security_group.managed_master](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group) | resource | 208 | | [aws_security_group.managed_service_access](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group) | resource | 209 | | [aws_security_group.managed_slave](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group) | resource | 210 | | [aws_security_group.master](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group) | resource | 211 | | [aws_security_group.slave](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group) | resource | 212 | | [aws_security_group_rule.managed_master_egress](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group_rule) | resource | 213 | | [aws_security_group_rule.managed_master_service_access_ingress](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group_rule) | resource | 214 | | [aws_security_group_rule.managed_service_access_egress](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group_rule) | resource | 215 | | [aws_security_group_rule.managed_slave_egress](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group_rule) | resource | 216 | | [aws_security_group_rule.master_egress](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group_rule) | resource | 217 | | [aws_security_group_rule.master_ingress_cidr_blocks](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group_rule) | resource | 218 | | [aws_security_group_rule.master_ingress_security_groups](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group_rule) | resource | 219 | | [aws_security_group_rule.slave_egress](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group_rule) | resource | 220 | | [aws_security_group_rule.slave_ingress_cidr_blocks](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group_rule) | resource | 221 | | [aws_security_group_rule.slave_ingress_security_groups](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group_rule) | resource | 222 | | [aws_vpc_endpoint.vpc_endpoint_s3](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/vpc_endpoint) | resource | 223 | | [aws_iam_policy_document.assume_role_ec2](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | 224 | | [aws_iam_policy_document.assume_role_emr](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | 225 | | [aws_partition.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/partition) | data source | 226 | 227 | ## Inputs 228 | 229 | | Name | Description | Type | Default | Required | 230 | |------|-------------|------|---------|:--------:| 231 | | [additional\_info](#input\_additional\_info) | A JSON string for selecting additional features such as adding proxy information. Note: Currently there is no API to retrieve the value of this argument after EMR cluster creation from provider, therefore Terraform cannot detect drift from the actual EMR cluster if its value is changed outside Terraform | `string` | `null` | no | 232 | | [additional\_master\_security\_group](#input\_additional\_master\_security\_group) | The id of the existing additional security group that will be used for EMR master node. If empty, a new security group will be created | `string` | `""` | no | 233 | | [additional\_slave\_security\_group](#input\_additional\_slave\_security\_group) | The id of the existing additional security group that will be used for EMR core & task nodes. If empty, a new security group will be created | `string` | `""` | no | 234 | | [additional\_tag\_map](#input\_additional\_tag\_map) | Additional key-value pairs to add to each map in `tags_as_list_of_maps`. Not added to `tags` or `id`.
This is for some rare cases where resources want additional configuration of tags
and therefore take a list of maps with tag key, value, and additional configuration. | `map(string)` | `{}` | no | 235 | | [applications](#input\_applications) | A list of applications for the cluster. Valid values are: Flink, Ganglia, Hadoop, HBase, HCatalog, Hive, Hue, JupyterHub, Livy, Mahout, MXNet, Oozie, Phoenix, Pig, Presto, Spark, Sqoop, TensorFlow, Tez, Zeppelin, and ZooKeeper (as of EMR 5.25.0). Case insensitive | `list(string)` | n/a | yes | 236 | | [attributes](#input\_attributes) | ID element. Additional attributes (e.g. `workers` or `cluster`) to add to `id`,
in the order they appear in the list. New attributes are appended to the
end of the list. The elements of the list are joined by the `delimiter`
and treated as a single ID element. | `list(string)` | `[]` | no | 237 | | [auto\_termination\_idle\_timeout](#input\_auto\_termination\_idle\_timeout) | Auto termination policy idle timeout in seconds (60 - 604800 supported) | `string` | `null` | no | 238 | | [bootstrap\_action](#input\_bootstrap\_action) | List of bootstrap actions that will be run before Hadoop is started on the cluster nodes |
list(object({
path = string
name = string
args = list(string)
}))
| `[]` | no | 239 | | [configurations\_json](#input\_configurations\_json) | A JSON string for supplying list of configurations for the EMR cluster. See https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-configure-apps.html for more details | `string` | `""` | no | 240 | | [context](#input\_context) | Single object for setting entire context at once.
See description of individual variables for details.
Leave string and numeric variables as `null` to use default value.
Individual variable settings (non-null) override settings in context object,
except for attributes, tags, and additional\_tag\_map, which are merged. | `any` |
{
"additional_tag_map": {},
"attributes": [],
"delimiter": null,
"descriptor_formats": {},
"enabled": true,
"environment": null,
"id_length_limit": null,
"label_key_case": null,
"label_order": [],
"label_value_case": null,
"labels_as_tags": [
"unset"
],
"name": null,
"namespace": null,
"regex_replace_chars": null,
"stage": null,
"tags": {},
"tenant": null
}
| no | 241 | | [core\_instance\_group\_autoscaling\_policy](#input\_core\_instance\_group\_autoscaling\_policy) | String containing the EMR Auto Scaling Policy JSON for the Core instance group | `string` | `null` | no | 242 | | [core\_instance\_group\_bid\_price](#input\_core\_instance\_group\_bid\_price) | Bid price for each EC2 instance in the Core instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances | `string` | `null` | no | 243 | | [core\_instance\_group\_ebs\_iops](#input\_core\_instance\_group\_ebs\_iops) | The number of I/O operations per second (IOPS) that the Core volume supports | `number` | `null` | no | 244 | | [core\_instance\_group\_ebs\_size](#input\_core\_instance\_group\_ebs\_size) | Core instances volume size, in gibibytes (GiB) | `number` | n/a | yes | 245 | | [core\_instance\_group\_ebs\_type](#input\_core\_instance\_group\_ebs\_type) | Core instances volume type. Valid options are `gp2`, `io1`, `standard` and `st1` | `string` | `"gp2"` | no | 246 | | [core\_instance\_group\_ebs\_volumes\_per\_instance](#input\_core\_instance\_group\_ebs\_volumes\_per\_instance) | The number of EBS volumes with this configuration to attach to each EC2 instance in the Core instance group | `number` | `1` | no | 247 | | [core\_instance\_group\_instance\_count](#input\_core\_instance\_group\_instance\_count) | Target number of instances for the Core instance group. Must be at least 1 | `number` | `1` | no | 248 | | [core\_instance\_group\_instance\_type](#input\_core\_instance\_group\_instance\_type) | EC2 instance type for all instances in the Core instance group | `string` | n/a | yes | 249 | | [create\_task\_instance\_group](#input\_create\_task\_instance\_group) | Whether to create an instance group for Task nodes. For more info: https://www.terraform.io/docs/providers/aws/r/emr_instance_group.html, https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-master-core-task-nodes.html | `bool` | `false` | no | 250 | | [create\_vpc\_endpoint\_s3](#input\_create\_vpc\_endpoint\_s3) | Set to false to prevent the module from creating VPC S3 Endpoint | `bool` | `true` | no | 251 | | [custom\_ami\_id](#input\_custom\_ami\_id) | A custom Amazon Linux AMI for the cluster (instead of an EMR-owned AMI). Available in Amazon EMR version 5.7.0 and later | `string` | `null` | no | 252 | | [delimiter](#input\_delimiter) | Delimiter to be used between ID elements.
Defaults to `-` (hyphen). Set to `""` to use no delimiter at all. | `string` | `null` | no | 253 | | [descriptor\_formats](#input\_descriptor\_formats) | Describe additional descriptors to be output in the `descriptors` output map.
Map of maps. Keys are names of descriptors. Values are maps of the form
`{
format = string
labels = list(string)
}`
(Type is `any` so the map values can later be enhanced to provide additional options.)
`format` is a Terraform format string to be passed to the `format()` function.
`labels` is a list of labels, in order, to pass to `format()` function.
Label values will be normalized before being passed to `format()` so they will be
identical to how they appear in `id`.
Default is `{}` (`descriptors` output will be empty). | `any` | `{}` | no | 254 | | [ebs\_root\_volume\_size](#input\_ebs\_root\_volume\_size) | Size in GiB of the EBS root device volume of the Linux AMI that is used for each EC2 instance. Available in Amazon EMR version 4.x and later | `number` | `10` | no | 255 | | [ec2\_autoscaling\_role\_enabled](#input\_ec2\_autoscaling\_role\_enabled) | If set to `false`, will use `existing_ec2_autoscaling_role_arn` for an existing EC2 autoscaling IAM role that was created outside of this module | `bool` | `true` | no | 256 | | [ec2\_autoscaling\_role\_permissions\_boundary](#input\_ec2\_autoscaling\_role\_permissions\_boundary) | The Permissions Boundary ARN to apply to the EC2 Autoscaling Role. | `string` | `""` | no | 257 | | [ec2\_role\_enabled](#input\_ec2\_role\_enabled) | If set to `false`, will use `existing_ec2_instance_profile_arn` for an existing EC2 IAM role that was created outside of this module | `bool` | `true` | no | 258 | | [ec2\_role\_permissions\_boundary](#input\_ec2\_role\_permissions\_boundary) | The Permissions Boundary ARN to apply to the EC2 Role. | `string` | `""` | no | 259 | | [emr\_role\_permissions\_boundary](#input\_emr\_role\_permissions\_boundary) | The Permissions Boundary ARN to apply to the EMR Role. | `string` | `""` | no | 260 | | [enable\_ssm\_access](#input\_enable\_ssm\_access) | If set to `true`, attach the existing `AmazonSSMManagedInstanceCore` IAM policy to the EMR EC2 instance profile role | `bool` | `false` | no | 261 | | [enabled](#input\_enabled) | Set to false to prevent the module from creating any resources | `bool` | `null` | no | 262 | | [environment](#input\_environment) | ID element. Usually used for region e.g. 'uw2', 'us-west-2', OR role 'prod', 'staging', 'dev', 'UAT' | `string` | `null` | no | 263 | | [existing\_ec2\_autoscaling\_role\_arn](#input\_existing\_ec2\_autoscaling\_role\_arn) | ARN of an existing EC2 autoscaling role to attach to the cluster | `string` | `""` | no | 264 | | [existing\_ec2\_instance\_profile\_arn](#input\_existing\_ec2\_instance\_profile\_arn) | ARN of an existing EC2 instance profile | `string` | `""` | no | 265 | | [existing\_service\_role\_arn](#input\_existing\_service\_role\_arn) | ARN of an existing EMR service role to attach to the cluster | `string` | `""` | no | 266 | | [id\_length\_limit](#input\_id\_length\_limit) | Limit `id` to this many characters (minimum 6).
Set to `0` for unlimited length.
Set to `null` for keep the existing setting, which defaults to `0`.
Does not affect `id_full`. | `number` | `null` | no | 267 | | [keep\_job\_flow\_alive\_when\_no\_steps](#input\_keep\_job\_flow\_alive\_when\_no\_steps) | Switch on/off run cluster with no steps or when all steps are complete | `bool` | `true` | no | 268 | | [kerberos\_ad\_domain\_join\_password](#input\_kerberos\_ad\_domain\_join\_password) | The Active Directory password for ad\_domain\_join\_user. Terraform cannot perform drift detection of this configuration. | `string` | `null` | no | 269 | | [kerberos\_ad\_domain\_join\_user](#input\_kerberos\_ad\_domain\_join\_user) | Required only when establishing a cross-realm trust with an Active Directory domain. A user with sufficient privileges to join resources to the domain. Terraform cannot perform drift detection of this configuration. | `string` | `null` | no | 270 | | [kerberos\_cross\_realm\_trust\_principal\_password](#input\_kerberos\_cross\_realm\_trust\_principal\_password) | Required only when establishing a cross-realm trust with a KDC in a different realm. The cross-realm principal password, which must be identical across realms. Terraform cannot perform drift detection of this configuration. | `string` | `null` | no | 271 | | [kerberos\_enabled](#input\_kerberos\_enabled) | Set to true if EMR cluster will use kerberos\_attributes | `bool` | `false` | no | 272 | | [kerberos\_kdc\_admin\_password](#input\_kerberos\_kdc\_admin\_password) | The password used within the cluster for the kadmin service on the cluster-dedicated KDC, which maintains Kerberos principals, password policies, and keytabs for the cluster. Terraform cannot perform drift detection of this configuration. | `string` | `null` | no | 273 | | [kerberos\_realm](#input\_kerberos\_realm) | The name of the Kerberos realm to which all nodes in a cluster belong. For example, EC2.INTERNAL | `string` | `"EC2.INTERNAL"` | no | 274 | | [key\_name](#input\_key\_name) | Amazon EC2 key pair that can be used to ssh to the master node as the user called `hadoop` | `string` | `null` | no | 275 | | [label\_key\_case](#input\_label\_key\_case) | Controls the letter case of the `tags` keys (label names) for tags generated by this module.
Does not affect keys of tags passed in via the `tags` input.
Possible values: `lower`, `title`, `upper`.
Default value: `title`. | `string` | `null` | no | 276 | | [label\_order](#input\_label\_order) | The order in which the labels (ID elements) appear in the `id`.
Defaults to ["namespace", "environment", "stage", "name", "attributes"].
You can omit any of the 6 labels ("tenant" is the 6th), but at least one must be present. | `list(string)` | `null` | no | 277 | | [label\_value\_case](#input\_label\_value\_case) | Controls the letter case of ID elements (labels) as included in `id`,
set as tag values, and output by this module individually.
Does not affect values of tags passed in via the `tags` input.
Possible values: `lower`, `title`, `upper` and `none` (no transformation).
Set this to `title` and set `delimiter` to `""` to yield Pascal Case IDs.
Default value: `lower`. | `string` | `null` | no | 278 | | [labels\_as\_tags](#input\_labels\_as\_tags) | Set of labels (ID elements) to include as tags in the `tags` output.
Default is to include all labels.
Tags with empty values will not be included in the `tags` output.
Set to `[]` to suppress all generated tags.
**Notes:**
The value of the `name` tag, if included, will be the `id`, not the `name`.
Unlike other `null-label` inputs, the initial setting of `labels_as_tags` cannot be
changed in later chained modules. Attempts to change it will be silently ignored. | `set(string)` |
[
"default"
]
| no | 279 | | [log\_uri](#input\_log\_uri) | The path to the Amazon S3 location where logs for this cluster are stored | `string` | `null` | no | 280 | | [managed\_master\_security\_group](#input\_managed\_master\_security\_group) | The id of the existing managed security group that will be used for EMR master node. If empty, a new security group will be created | `string` | `""` | no | 281 | | [managed\_slave\_security\_group](#input\_managed\_slave\_security\_group) | The id of the existing managed security group that will be used for EMR core & task nodes. If empty, a new security group will be created | `string` | `""` | no | 282 | | [master\_allowed\_cidr\_blocks](#input\_master\_allowed\_cidr\_blocks) | List of CIDR blocks to be allowed to access the master instances | `list(string)` | `[]` | no | 283 | | [master\_allowed\_security\_groups](#input\_master\_allowed\_security\_groups) | List of security group ids to be allowed to connect to the master instances | `list(string)` | `[]` | no | 284 | | [master\_dns\_name](#input\_master\_dns\_name) | Name of the cluster CNAME record to create in the parent DNS zone specified by `zone_id`. If left empty, the name will be auto-asigned using the format `emr-master-var.name` | `string` | `null` | no | 285 | | [master\_instance\_group\_bid\_price](#input\_master\_instance\_group\_bid\_price) | Bid price for each EC2 instance in the Master instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances | `string` | `null` | no | 286 | | [master\_instance\_group\_ebs\_iops](#input\_master\_instance\_group\_ebs\_iops) | The number of I/O operations per second (IOPS) that the Master volume supports | `number` | `null` | no | 287 | | [master\_instance\_group\_ebs\_size](#input\_master\_instance\_group\_ebs\_size) | Master instances volume size, in gibibytes (GiB) | `number` | n/a | yes | 288 | | [master\_instance\_group\_ebs\_type](#input\_master\_instance\_group\_ebs\_type) | Master instances volume type. Valid options are `gp2`, `io1`, `standard` and `st1` | `string` | `"gp2"` | no | 289 | | [master\_instance\_group\_ebs\_volumes\_per\_instance](#input\_master\_instance\_group\_ebs\_volumes\_per\_instance) | The number of EBS volumes with this configuration to attach to each EC2 instance in the Master instance group | `number` | `1` | no | 290 | | [master\_instance\_group\_instance\_count](#input\_master\_instance\_group\_instance\_count) | Target number of instances for the Master instance group. Must be at least 1 | `number` | `1` | no | 291 | | [master\_instance\_group\_instance\_type](#input\_master\_instance\_group\_instance\_type) | EC2 instance type for all instances in the Master instance group | `string` | n/a | yes | 292 | | [name](#input\_name) | ID element. Usually the component or solution name, e.g. 'app' or 'jenkins'.
This is the only ID element not also included as a `tag`.
The "name" tag is set to the full `id` string. There is no tag with the value of the `name` input. | `string` | `null` | no | 293 | | [namespace](#input\_namespace) | ID element. Usually an abbreviation of your organization name, e.g. 'eg' or 'cp', to help ensure generated IDs are globally unique | `string` | `null` | no | 294 | | [regex\_replace\_chars](#input\_regex\_replace\_chars) | Terraform regular expression (regex) string.
Characters matching the regex will be removed from the ID elements.
If not set, `"/[^a-zA-Z0-9-]/"` is used to remove all characters other than hyphens, letters and digits. | `string` | `null` | no | 295 | | [region](#input\_region) | AWS region | `string` | n/a | yes | 296 | | [release\_label](#input\_release\_label) | The release label for the Amazon EMR release. https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-release-5x.html | `string` | `"emr-5.25.0"` | no | 297 | | [route\_table\_id](#input\_route\_table\_id) | Route table ID for the VPC S3 Endpoint when launching the EMR cluster in a private subnet. Required when `subnet_type` is `private` | `string` | `""` | no | 298 | | [scale\_down\_behavior](#input\_scale\_down\_behavior) | The way that individual Amazon EC2 instances terminate when an automatic scale-in activity occurs or an instance group is resized | `string` | `null` | no | 299 | | [security\_configuration](#input\_security\_configuration) | The security configuration name to attach to the EMR cluster. Only valid for EMR clusters with `release_label` 4.8.0 or greater. See https://www.terraform.io/docs/providers/aws/r/emr_security_configuration.html for more info | `string` | `null` | no | 300 | | [service\_access\_security\_group](#input\_service\_access\_security\_group) | The id of the existing additional security group that will be used for EMR core & task nodes. If empty, a new security group will be created | `string` | `""` | no | 301 | | [service\_role\_enabled](#input\_service\_role\_enabled) | If set to `false`, will use `existing_service_role_arn` for an existing IAM role that was created outside of this module | `bool` | `true` | no | 302 | | [slave\_allowed\_cidr\_blocks](#input\_slave\_allowed\_cidr\_blocks) | List of CIDR blocks to be allowed to access the slave instances | `list(string)` | `[]` | no | 303 | | [slave\_allowed\_security\_groups](#input\_slave\_allowed\_security\_groups) | List of security group ids to be allowed to connect to the slave instances | `list(string)` | `[]` | no | 304 | | [stage](#input\_stage) | ID element. Usually used to indicate role, e.g. 'prod', 'staging', 'source', 'build', 'test', 'deploy', 'release' | `string` | `null` | no | 305 | | [step\_concurrency\_level](#input\_step\_concurrency\_level) | The number of steps that can be executed concurrently. You can specify a maximum of 256 steps. Only valid for EMR clusters with release\_label 5.28.0 or greater. | `number` | `null` | no | 306 | | [steps](#input\_steps) | List of steps to run when creating the cluster. |
list(object({
name = string
action_on_failure = string
hadoop_jar_step = object({
args = list(string)
jar = string
main_class = string
properties = map(string)
})
}))
| `[]` | no | 307 | | [subnet\_id](#input\_subnet\_id) | VPC subnet ID where you want the job flow to launch. Cannot specify the `cc1.4xlarge` instance type for nodes of a job flow launched in a Amazon VPC | `string` | n/a | yes | 308 | | [subnet\_type](#input\_subnet\_type) | Type of VPC subnet ID where you want the job flow to launch. Supported values are `private` or `public` | `string` | `"private"` | no | 309 | | [tags](#input\_tags) | Additional tags (e.g. `{'BusinessUnit': 'XYZ'}`).
Neither the tag keys nor the tag values will be modified by this module. | `map(string)` | `{}` | no | 310 | | [task\_instance\_group\_autoscaling\_policy](#input\_task\_instance\_group\_autoscaling\_policy) | String containing the EMR Auto Scaling Policy JSON for the Task instance group | `string` | `null` | no | 311 | | [task\_instance\_group\_bid\_price](#input\_task\_instance\_group\_bid\_price) | Bid price for each EC2 instance in the Task instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances | `string` | `null` | no | 312 | | [task\_instance\_group\_ebs\_iops](#input\_task\_instance\_group\_ebs\_iops) | The number of I/O operations per second (IOPS) that the Task volume supports | `number` | `null` | no | 313 | | [task\_instance\_group\_ebs\_optimized](#input\_task\_instance\_group\_ebs\_optimized) | Indicates whether an Amazon EBS volume in the Task instance group is EBS-optimized. Changing this forces a new resource to be created | `bool` | `false` | no | 314 | | [task\_instance\_group\_ebs\_size](#input\_task\_instance\_group\_ebs\_size) | Task instances volume size, in gibibytes (GiB) | `number` | `10` | no | 315 | | [task\_instance\_group\_ebs\_type](#input\_task\_instance\_group\_ebs\_type) | Task instances volume type. Valid options are `gp2`, `io1`, `standard` and `st1` | `string` | `"gp2"` | no | 316 | | [task\_instance\_group\_ebs\_volumes\_per\_instance](#input\_task\_instance\_group\_ebs\_volumes\_per\_instance) | The number of EBS volumes with this configuration to attach to each EC2 instance in the Task instance group | `number` | `1` | no | 317 | | [task\_instance\_group\_instance\_count](#input\_task\_instance\_group\_instance\_count) | Target number of instances for the Task instance group. Must be at least 1 | `number` | `1` | no | 318 | | [task\_instance\_group\_instance\_type](#input\_task\_instance\_group\_instance\_type) | EC2 instance type for all instances in the Task instance group | `string` | `null` | no | 319 | | [tenant](#input\_tenant) | ID element \_(Rarely used, not included by default)\_. A customer identifier, indicating who this instance of a resource is for | `string` | `null` | no | 320 | | [termination\_protection](#input\_termination\_protection) | Switch on/off termination protection (default is false, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set to false | `bool` | `false` | no | 321 | | [use\_existing\_additional\_master\_security\_group](#input\_use\_existing\_additional\_master\_security\_group) | If set to `true`, will use variable `additional_master_security_group` using an existing security group that was created outside of this module | `bool` | `false` | no | 322 | | [use\_existing\_additional\_slave\_security\_group](#input\_use\_existing\_additional\_slave\_security\_group) | If set to `true`, will use variable `additional_slave_security_group` using an existing security group that was created outside of this module | `bool` | `false` | no | 323 | | [use\_existing\_managed\_master\_security\_group](#input\_use\_existing\_managed\_master\_security\_group) | If set to `true`, will use variable `managed_master_security_group` using an existing security group that was created outside of this module | `bool` | `false` | no | 324 | | [use\_existing\_managed\_slave\_security\_group](#input\_use\_existing\_managed\_slave\_security\_group) | If set to `true`, will use variable `managed_slave_security_group` using an existing security group that was created outside of this module | `bool` | `false` | no | 325 | | [use\_existing\_service\_access\_security\_group](#input\_use\_existing\_service\_access\_security\_group) | If set to `true`, will use variable `service_access_security_group` using an existing security group that was created outside of this module | `bool` | `false` | no | 326 | | [visible\_to\_all\_users](#input\_visible\_to\_all\_users) | Whether the job flow is visible to all IAM users of the AWS account associated with the job flow | `bool` | `true` | no | 327 | | [vpc\_id](#input\_vpc\_id) | VPC ID to create the cluster in (e.g. `vpc-a22222ee`) | `string` | n/a | yes | 328 | | [zone\_id](#input\_zone\_id) | Route53 parent zone ID. If provided (not empty), the module will create sub-domain DNS records for the masters and slaves | `string` | `null` | no | 329 | 330 | ## Outputs 331 | 332 | | Name | Description | 333 | |------|-------------| 334 | | [cluster\_id](#output\_cluster\_id) | EMR cluster ID | 335 | | [cluster\_name](#output\_cluster\_name) | EMR cluster name | 336 | | [ec2\_role](#output\_ec2\_role) | Role name of EMR EC2 instances so users can attach more policies | 337 | | [master\_host](#output\_master\_host) | Name of the cluster CNAME record for the master nodes in the parent DNS zone | 338 | | [master\_public\_dns](#output\_master\_public\_dns) | Master public DNS | 339 | | [master\_security\_group\_id](#output\_master\_security\_group\_id) | Master security group ID | 340 | | [slave\_security\_group\_id](#output\_slave\_security\_group\_id) | Slave security group ID | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | ## Related Projects 350 | 351 | Check out these related projects. 352 | 353 | - [terraform-aws-rds-cluster](https://github.com/cloudposse/terraform-aws-rds-cluster) - Terraform module to provision an RDS Aurora cluster for MySQL or Postgres 354 | - [terraform-aws-rds](https://github.com/cloudposse/terraform-aws-rds) - Terraform module to provision AWS RDS instances 355 | - [terraform-aws-rds-cloudwatch-sns-alarms](https://github.com/cloudposse/terraform-aws-rds-cloudwatch-sns-alarms) - Terraform module that configures important RDS alerts using CloudWatch and sends them to an SNS topic 356 | 357 | 358 | > [!TIP] 359 | > #### Use Terraform Reference Architectures for AWS 360 | > 361 | > Use Cloud Posse's ready-to-go [terraform architecture blueprints](https://cloudposse.com/reference-architecture/) for AWS to get up and running quickly. 362 | > 363 | > ✅ We build it together with your team.
364 | > ✅ Your team owns everything.
365 | > ✅ 100% Open Source and backed by fanatical support.
366 | > 367 | > Request Quote 368 | >
📚 Learn More 369 | > 370 | >
371 | > 372 | > Cloud Posse is the leading [**DevOps Accelerator**](https://cpco.io/commercial-support?utm_source=github&utm_medium=readme&utm_campaign=cloudposse/terraform-aws-emr-cluster&utm_content=commercial_support) for funded startups and enterprises. 373 | > 374 | > *Your team can operate like a pro today.* 375 | > 376 | > Ensure that your team succeeds by using Cloud Posse's proven process and turnkey blueprints. Plus, we stick around until you succeed. 377 | > #### Day-0: Your Foundation for Success 378 | > - **Reference Architecture.** You'll get everything you need from the ground up built using 100% infrastructure as code. 379 | > - **Deployment Strategy.** Adopt a proven deployment strategy with GitHub Actions, enabling automated, repeatable, and reliable software releases. 380 | > - **Site Reliability Engineering.** Gain total visibility into your applications and services with Datadog, ensuring high availability and performance. 381 | > - **Security Baseline.** Establish a secure environment from the start, with built-in governance, accountability, and comprehensive audit logs, safeguarding your operations. 382 | > - **GitOps.** Empower your team to manage infrastructure changes confidently and efficiently through Pull Requests, leveraging the full power of GitHub Actions. 383 | > 384 | > Request Quote 385 | > 386 | > #### Day-2: Your Operational Mastery 387 | > - **Training.** Equip your team with the knowledge and skills to confidently manage the infrastructure, ensuring long-term success and self-sufficiency. 388 | > - **Support.** Benefit from a seamless communication over Slack with our experts, ensuring you have the support you need, whenever you need it. 389 | > - **Troubleshooting.** Access expert assistance to quickly resolve any operational challenges, minimizing downtime and maintaining business continuity. 390 | > - **Code Reviews.** Enhance your team’s code quality with our expert feedback, fostering continuous improvement and collaboration. 391 | > - **Bug Fixes.** Rely on our team to troubleshoot and resolve any issues, ensuring your systems run smoothly. 392 | > - **Migration Assistance.** Accelerate your migration process with our dedicated support, minimizing disruption and speeding up time-to-value. 393 | > - **Customer Workshops.** Engage with our team in weekly workshops, gaining insights and strategies to continuously improve and innovate. 394 | > 395 | > Request Quote 396 | >
397 | 398 | ## ✨ Contributing 399 | 400 | This project is under active development, and we encourage contributions from our community. 401 | 402 | 403 | 404 | Many thanks to our outstanding contributors: 405 | 406 | 407 | 408 | 409 | 410 | For 🐛 bug reports & feature requests, please use the [issue tracker](https://github.com/cloudposse/terraform-aws-emr-cluster/issues). 411 | 412 | In general, PRs are welcome. We follow the typical "fork-and-pull" Git workflow. 413 | 1. Review our [Code of Conduct](https://github.com/cloudposse/terraform-aws-emr-cluster/?tab=coc-ov-file#code-of-conduct) and [Contributor Guidelines](https://github.com/cloudposse/.github/blob/main/CONTRIBUTING.md). 414 | 2. **Fork** the repo on GitHub 415 | 3. **Clone** the project to your own machine 416 | 4. **Commit** changes to your own branch 417 | 5. **Push** your work back up to your fork 418 | 6. Submit a **Pull Request** so that we can review your changes 419 | 420 | **NOTE:** Be sure to merge the latest changes from "upstream" before making a pull request!## Running Terraform Tests 421 | 422 | We use [Atmos](https://atmos.tools) to streamline how Terraform tests are run. It centralizes configuration and wraps common test workflows with easy-to-use commands. 423 | 424 | All tests are located in the [`test/`](test) folder. 425 | 426 | Under the hood, tests are powered by Terratest together with our internal [Test Helpers](https://github.com/cloudposse/test-helpers) library, providing robust infrastructure validation. 427 | 428 | Setup dependencies: 429 | - Install Atmos ([installation guide](https://atmos.tools/install/)) 430 | - Install Go [1.24+ or newer](https://go.dev/doc/install) 431 | - Install Terraform or OpenTofu 432 | 433 | To run tests: 434 | 435 | - Run all tests: 436 | ```sh 437 | atmos test run 438 | ``` 439 | - Clean up test artifacts: 440 | ```sh 441 | atmos test clean 442 | ``` 443 | - Explore additional test options: 444 | ```sh 445 | atmos test --help 446 | ``` 447 | The configuration for test commands is centrally managed. To review what's being imported, see the [`atmos.yaml`](https://raw.githubusercontent.com/cloudposse/.github/refs/heads/main/.github/atmos/terraform-module.yaml) file. 448 | 449 | Learn more about our [automated testing in our documentation](https://docs.cloudposse.com/community/contribute/automated-testing/) or implementing [custom commands](https://atmos.tools/core-concepts/custom-commands/) with atmos. 450 | 451 | ### 🌎 Slack Community 452 | 453 | Join our [Open Source Community](https://cpco.io/slack?utm_source=github&utm_medium=readme&utm_campaign=cloudposse/terraform-aws-emr-cluster&utm_content=slack) on Slack. It's **FREE** for everyone! Our "SweetOps" community is where you get to talk with others who share a similar vision for how to rollout and manage infrastructure. This is the best place to talk shop, ask questions, solicit feedback, and work together as a community to build totally *sweet* infrastructure. 454 | 455 | ### 📰 Newsletter 456 | 457 | Sign up for [our newsletter](https://cpco.io/newsletter?utm_source=github&utm_medium=readme&utm_campaign=cloudposse/terraform-aws-emr-cluster&utm_content=newsletter) and join 3,000+ DevOps engineers, CTOs, and founders who get insider access to the latest DevOps trends, so you can always stay in the know. 458 | Dropped straight into your Inbox every week — and usually a 5-minute read. 459 | 460 | ### 📆 Office Hours 461 | 462 | [Join us every Wednesday via Zoom](https://cloudposse.com/office-hours?utm_source=github&utm_medium=readme&utm_campaign=cloudposse/terraform-aws-emr-cluster&utm_content=office_hours) for your weekly dose of insider DevOps trends, AWS news and Terraform insights, all sourced from our SweetOps community, plus a _live Q&A_ that you can’t find anywhere else. 463 | It's **FREE** for everyone! 464 | ## License 465 | 466 | License 467 | 468 |
469 | Preamble to the Apache License, Version 2.0 470 |
471 |
472 | 473 | Complete license is available in the [`LICENSE`](LICENSE) file. 474 | 475 | ```text 476 | Licensed to the Apache Software Foundation (ASF) under one 477 | or more contributor license agreements. See the NOTICE file 478 | distributed with this work for additional information 479 | regarding copyright ownership. The ASF licenses this file 480 | to you under the Apache License, Version 2.0 (the 481 | "License"); you may not use this file except in compliance 482 | with the License. You may obtain a copy of the License at 483 | 484 | https://www.apache.org/licenses/LICENSE-2.0 485 | 486 | Unless required by applicable law or agreed to in writing, 487 | software distributed under the License is distributed on an 488 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 489 | KIND, either express or implied. See the License for the 490 | specific language governing permissions and limitations 491 | under the License. 492 | ``` 493 |
494 | 495 | ## Trademarks 496 | 497 | All other trademarks referenced herein are the property of their respective owners. 498 | 499 | 500 | --- 501 | Copyright © 2017-2025 [Cloud Posse, LLC](https://cpco.io/copyright) 502 | 503 | 504 | README footer 505 | 506 | Beacon 507 | -------------------------------------------------------------------------------- /README.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # This is the canonical configuration for the `README.md` 3 | # Run `make readme` to rebuild the `README.md` 4 | # 5 | 6 | # Name of this project 7 | name: terraform-aws-emr-cluster 8 | # Tags of this project 9 | tags: 10 | - aws 11 | - terraform 12 | - terraform-modules 13 | - databases 14 | - emr 15 | - emr-cluster 16 | - cluster 17 | - map-reduce 18 | 19 | # Categories of this project 20 | categories: 21 | - terraform-modules/databases 22 | 23 | # Logo for this project 24 | #logo: docs/logo.png 25 | 26 | # License of this project 27 | license: "APACHE2" 28 | 29 | # Canonical GitHub repo 30 | github_repo: cloudposse/terraform-aws-emr-cluster 31 | 32 | # Badges to display 33 | badges: 34 | - name: Latest Release 35 | image: https://img.shields.io/github/release/cloudposse/terraform-aws-emr-cluster.svg?style=for-the-badge 36 | url: https://github.com/cloudposse/terraform-aws-emr-cluster/releases/latest 37 | - name: Last Updated 38 | image: https://img.shields.io/github/last-commit/cloudposse/terraform-aws-emr-cluster.svg?style=for-the-badge 39 | url: https://github.com/cloudposse/terraform-aws-emr-cluster/commits 40 | - name: Slack Community 41 | image: https://slack.cloudposse.com/for-the-badge.svg 42 | url: https://cloudposse.com/slack 43 | 44 | # List any related terraform modules that this module may be used with or that this module depends on. 45 | related: 46 | - name: "terraform-aws-rds-cluster" 47 | description: "Terraform module to provision an RDS Aurora cluster for MySQL or Postgres" 48 | url: "https://github.com/cloudposse/terraform-aws-rds-cluster" 49 | - name: "terraform-aws-rds" 50 | description: "Terraform module to provision AWS RDS instances" 51 | url: "https://github.com/cloudposse/terraform-aws-rds" 52 | - name: "terraform-aws-rds-cloudwatch-sns-alarms" 53 | description: "Terraform module that configures important RDS alerts using CloudWatch and sends them to an SNS topic" 54 | url: "https://github.com/cloudposse/terraform-aws-rds-cloudwatch-sns-alarms" 55 | 56 | # Short description of this project 57 | description: |- 58 | Terraform module to provision an Elastic MapReduce (EMR) cluster on AWS. 59 | 60 | # How to use this project 61 | usage: |2- 62 | 63 | For a complete example, see [examples/complete](examples/complete) 64 | 65 | For automated tests of the complete example using [bats](https://github.com/bats-core/bats-core) and [Terratest](https://github.com/gruntwork-io/terratest) (which tests and deploys the example on AWS), see [test](test). 66 | 67 | ```hcl 68 | provider "aws" { 69 | region = "us-east-2" 70 | } 71 | 72 | module "vpc" { 73 | source = "cloudposse/vpc/aws" 74 | # Cloud Posse recommends pinning every module to a specific version 75 | # version = "x.x.x" 76 | 77 | ipv4_primary_cidr_block = "172.19.0.0/16" 78 | 79 | context = module.this.context 80 | } 81 | 82 | module "subnets" { 83 | source = "cloudposse/dynamic-subnets/aws" 84 | # Cloud Posse recommends pinning every module to a specific version 85 | # version = "x.x.x" 86 | 87 | availability_zones = var.availability_zones 88 | vpc_id = module.vpc.vpc_id 89 | igw_id = [module.vpc.igw_id] 90 | ipv4_cidr_block = [module.vpc.vpc_cidr_block] 91 | nat_gateway_enabled = false 92 | nat_instance_enabled = false 93 | 94 | context = module.this.context 95 | } 96 | 97 | module "s3_log_storage" { 98 | source = "cloudposse/s3-log-storage/aws" 99 | # Cloud Posse recommends pinning every module to a specific version 100 | # version = "x.x.x" 101 | 102 | region = var.region 103 | namespace = var.namespace 104 | stage = var.stage 105 | name = var.name 106 | attributes = ["logs"] 107 | force_destroy = true 108 | } 109 | 110 | module "aws_key_pair" { 111 | source = "cloudposse/key-pair/aws" 112 | # Cloud Posse recommends pinning every module to a specific version 113 | # version = "x.x.x" 114 | namespace = var.namespace 115 | stage = var.stage 116 | name = var.name 117 | attributes = ["ssh", "key"] 118 | ssh_public_key_path = var.ssh_public_key_path 119 | generate_ssh_key = var.generate_ssh_key 120 | } 121 | 122 | module "emr_cluster" { 123 | source = "cloudposse/emr-cluster/aws" 124 | # Cloud Posse recommends pinning every module to a specific version 125 | # version = "x.x.x" 126 | 127 | namespace = var.namespace 128 | stage = var.stage 129 | name = var.name 130 | master_allowed_security_groups = [module.vpc.vpc_default_security_group_id] 131 | slave_allowed_security_groups = [module.vpc.vpc_default_security_group_id] 132 | region = var.region 133 | vpc_id = module.vpc.vpc_id 134 | subnet_id = module.subnets.private_subnet_ids[0] 135 | route_table_id = module.subnets.private_route_table_ids[0] 136 | subnet_type = "private" 137 | ebs_root_volume_size = var.ebs_root_volume_size 138 | visible_to_all_users = var.visible_to_all_users 139 | release_label = var.release_label 140 | applications = var.applications 141 | configurations_json = var.configurations_json 142 | core_instance_group_instance_type = var.core_instance_group_instance_type 143 | core_instance_group_instance_count = var.core_instance_group_instance_count 144 | core_instance_group_ebs_size = var.core_instance_group_ebs_size 145 | core_instance_group_ebs_type = var.core_instance_group_ebs_type 146 | core_instance_group_ebs_volumes_per_instance = var.core_instance_group_ebs_volumes_per_instance 147 | master_instance_group_instance_type = var.master_instance_group_instance_type 148 | master_instance_group_instance_count = var.master_instance_group_instance_count 149 | master_instance_group_ebs_size = var.master_instance_group_ebs_size 150 | master_instance_group_ebs_type = var.master_instance_group_ebs_type 151 | master_instance_group_ebs_volumes_per_instance = var.master_instance_group_ebs_volumes_per_instance 152 | create_task_instance_group = var.create_task_instance_group 153 | log_uri = format("s3n://%s/", module.s3_log_storage.bucket_id) 154 | key_name = module.aws_key_pair.key_name 155 | } 156 | ``` 157 | 158 | # Example usage 159 | #examples: |- 160 | # Example goes here... 161 | 162 | # How to get started quickly 163 | #quickstart: |- 164 | # Here's how to get started... 165 | 166 | # Other files to include in this README from the project folder 167 | include: [] 168 | contributors: [] 169 | -------------------------------------------------------------------------------- /atmos.yaml: -------------------------------------------------------------------------------- 1 | # Atmos Configuration — powered by https://atmos.tools 2 | # 3 | # This configuration enables centralized, DRY, and consistent project scaffolding using Atmos. 4 | # 5 | # Included features: 6 | # - Organizational custom commands: https://atmos.tools/core-concepts/custom-commands 7 | # - Automated README generation: https://atmos.tools/cli/commands/docs/generate 8 | # 9 | 10 | # Import shared configuration used by all modules 11 | import: 12 | - https://raw.githubusercontent.com/cloudposse/.github/refs/heads/main/.github/atmos/terraform-module.yaml 13 | -------------------------------------------------------------------------------- /context.tf: -------------------------------------------------------------------------------- 1 | # 2 | # ONLY EDIT THIS FILE IN github.com/cloudposse/terraform-null-label 3 | # All other instances of this file should be a copy of that one 4 | # 5 | # 6 | # Copy this file from https://github.com/cloudposse/terraform-null-label/blob/master/exports/context.tf 7 | # and then place it in your Terraform module to automatically get 8 | # Cloud Posse's standard configuration inputs suitable for passing 9 | # to Cloud Posse modules. 10 | # 11 | # curl -sL https://raw.githubusercontent.com/cloudposse/terraform-null-label/master/exports/context.tf -o context.tf 12 | # 13 | # Modules should access the whole context as `module.this.context` 14 | # to get the input variables with nulls for defaults, 15 | # for example `context = module.this.context`, 16 | # and access individual variables as `module.this.`, 17 | # with final values filled in. 18 | # 19 | # For example, when using defaults, `module.this.context.delimiter` 20 | # will be null, and `module.this.delimiter` will be `-` (hyphen). 21 | # 22 | 23 | module "this" { 24 | source = "cloudposse/label/null" 25 | version = "0.25.0" # requires Terraform >= 0.13.0 26 | 27 | enabled = var.enabled 28 | namespace = var.namespace 29 | tenant = var.tenant 30 | environment = var.environment 31 | stage = var.stage 32 | name = var.name 33 | delimiter = var.delimiter 34 | attributes = var.attributes 35 | tags = var.tags 36 | additional_tag_map = var.additional_tag_map 37 | label_order = var.label_order 38 | regex_replace_chars = var.regex_replace_chars 39 | id_length_limit = var.id_length_limit 40 | label_key_case = var.label_key_case 41 | label_value_case = var.label_value_case 42 | descriptor_formats = var.descriptor_formats 43 | labels_as_tags = var.labels_as_tags 44 | 45 | context = var.context 46 | } 47 | 48 | # Copy contents of cloudposse/terraform-null-label/variables.tf here 49 | 50 | variable "context" { 51 | type = any 52 | default = { 53 | enabled = true 54 | namespace = null 55 | tenant = null 56 | environment = null 57 | stage = null 58 | name = null 59 | delimiter = null 60 | attributes = [] 61 | tags = {} 62 | additional_tag_map = {} 63 | regex_replace_chars = null 64 | label_order = [] 65 | id_length_limit = null 66 | label_key_case = null 67 | label_value_case = null 68 | descriptor_formats = {} 69 | # Note: we have to use [] instead of null for unset lists due to 70 | # https://github.com/hashicorp/terraform/issues/28137 71 | # which was not fixed until Terraform 1.0.0, 72 | # but we want the default to be all the labels in `label_order` 73 | # and we want users to be able to prevent all tag generation 74 | # by setting `labels_as_tags` to `[]`, so we need 75 | # a different sentinel to indicate "default" 76 | labels_as_tags = ["unset"] 77 | } 78 | description = <<-EOT 79 | Single object for setting entire context at once. 80 | See description of individual variables for details. 81 | Leave string and numeric variables as `null` to use default value. 82 | Individual variable settings (non-null) override settings in context object, 83 | except for attributes, tags, and additional_tag_map, which are merged. 84 | EOT 85 | 86 | validation { 87 | condition = lookup(var.context, "label_key_case", null) == null ? true : contains(["lower", "title", "upper"], var.context["label_key_case"]) 88 | error_message = "Allowed values: `lower`, `title`, `upper`." 89 | } 90 | 91 | validation { 92 | condition = lookup(var.context, "label_value_case", null) == null ? true : contains(["lower", "title", "upper", "none"], var.context["label_value_case"]) 93 | error_message = "Allowed values: `lower`, `title`, `upper`, `none`." 94 | } 95 | } 96 | 97 | variable "enabled" { 98 | type = bool 99 | default = null 100 | description = "Set to false to prevent the module from creating any resources" 101 | } 102 | 103 | variable "namespace" { 104 | type = string 105 | default = null 106 | description = "ID element. Usually an abbreviation of your organization name, e.g. 'eg' or 'cp', to help ensure generated IDs are globally unique" 107 | } 108 | 109 | variable "tenant" { 110 | type = string 111 | default = null 112 | description = "ID element _(Rarely used, not included by default)_. A customer identifier, indicating who this instance of a resource is for" 113 | } 114 | 115 | variable "environment" { 116 | type = string 117 | default = null 118 | description = "ID element. Usually used for region e.g. 'uw2', 'us-west-2', OR role 'prod', 'staging', 'dev', 'UAT'" 119 | } 120 | 121 | variable "stage" { 122 | type = string 123 | default = null 124 | description = "ID element. Usually used to indicate role, e.g. 'prod', 'staging', 'source', 'build', 'test', 'deploy', 'release'" 125 | } 126 | 127 | variable "name" { 128 | type = string 129 | default = null 130 | description = <<-EOT 131 | ID element. Usually the component or solution name, e.g. 'app' or 'jenkins'. 132 | This is the only ID element not also included as a `tag`. 133 | The "name" tag is set to the full `id` string. There is no tag with the value of the `name` input. 134 | EOT 135 | } 136 | 137 | variable "delimiter" { 138 | type = string 139 | default = null 140 | description = <<-EOT 141 | Delimiter to be used between ID elements. 142 | Defaults to `-` (hyphen). Set to `""` to use no delimiter at all. 143 | EOT 144 | } 145 | 146 | variable "attributes" { 147 | type = list(string) 148 | default = [] 149 | description = <<-EOT 150 | ID element. Additional attributes (e.g. `workers` or `cluster`) to add to `id`, 151 | in the order they appear in the list. New attributes are appended to the 152 | end of the list. The elements of the list are joined by the `delimiter` 153 | and treated as a single ID element. 154 | EOT 155 | } 156 | 157 | variable "labels_as_tags" { 158 | type = set(string) 159 | default = ["default"] 160 | description = <<-EOT 161 | Set of labels (ID elements) to include as tags in the `tags` output. 162 | Default is to include all labels. 163 | Tags with empty values will not be included in the `tags` output. 164 | Set to `[]` to suppress all generated tags. 165 | **Notes:** 166 | The value of the `name` tag, if included, will be the `id`, not the `name`. 167 | Unlike other `null-label` inputs, the initial setting of `labels_as_tags` cannot be 168 | changed in later chained modules. Attempts to change it will be silently ignored. 169 | EOT 170 | } 171 | 172 | variable "tags" { 173 | type = map(string) 174 | default = {} 175 | description = <<-EOT 176 | Additional tags (e.g. `{'BusinessUnit': 'XYZ'}`). 177 | Neither the tag keys nor the tag values will be modified by this module. 178 | EOT 179 | } 180 | 181 | variable "additional_tag_map" { 182 | type = map(string) 183 | default = {} 184 | description = <<-EOT 185 | Additional key-value pairs to add to each map in `tags_as_list_of_maps`. Not added to `tags` or `id`. 186 | This is for some rare cases where resources want additional configuration of tags 187 | and therefore take a list of maps with tag key, value, and additional configuration. 188 | EOT 189 | } 190 | 191 | variable "label_order" { 192 | type = list(string) 193 | default = null 194 | description = <<-EOT 195 | The order in which the labels (ID elements) appear in the `id`. 196 | Defaults to ["namespace", "environment", "stage", "name", "attributes"]. 197 | You can omit any of the 6 labels ("tenant" is the 6th), but at least one must be present. 198 | EOT 199 | } 200 | 201 | variable "regex_replace_chars" { 202 | type = string 203 | default = null 204 | description = <<-EOT 205 | Terraform regular expression (regex) string. 206 | Characters matching the regex will be removed from the ID elements. 207 | If not set, `"/[^a-zA-Z0-9-]/"` is used to remove all characters other than hyphens, letters and digits. 208 | EOT 209 | } 210 | 211 | variable "id_length_limit" { 212 | type = number 213 | default = null 214 | description = <<-EOT 215 | Limit `id` to this many characters (minimum 6). 216 | Set to `0` for unlimited length. 217 | Set to `null` for keep the existing setting, which defaults to `0`. 218 | Does not affect `id_full`. 219 | EOT 220 | validation { 221 | condition = var.id_length_limit == null ? true : var.id_length_limit >= 6 || var.id_length_limit == 0 222 | error_message = "The id_length_limit must be >= 6 if supplied (not null), or 0 for unlimited length." 223 | } 224 | } 225 | 226 | variable "label_key_case" { 227 | type = string 228 | default = null 229 | description = <<-EOT 230 | Controls the letter case of the `tags` keys (label names) for tags generated by this module. 231 | Does not affect keys of tags passed in via the `tags` input. 232 | Possible values: `lower`, `title`, `upper`. 233 | Default value: `title`. 234 | EOT 235 | 236 | validation { 237 | condition = var.label_key_case == null ? true : contains(["lower", "title", "upper"], var.label_key_case) 238 | error_message = "Allowed values: `lower`, `title`, `upper`." 239 | } 240 | } 241 | 242 | variable "label_value_case" { 243 | type = string 244 | default = null 245 | description = <<-EOT 246 | Controls the letter case of ID elements (labels) as included in `id`, 247 | set as tag values, and output by this module individually. 248 | Does not affect values of tags passed in via the `tags` input. 249 | Possible values: `lower`, `title`, `upper` and `none` (no transformation). 250 | Set this to `title` and set `delimiter` to `""` to yield Pascal Case IDs. 251 | Default value: `lower`. 252 | EOT 253 | 254 | validation { 255 | condition = var.label_value_case == null ? true : contains(["lower", "title", "upper", "none"], var.label_value_case) 256 | error_message = "Allowed values: `lower`, `title`, `upper`, `none`." 257 | } 258 | } 259 | 260 | variable "descriptor_formats" { 261 | type = any 262 | default = {} 263 | description = <<-EOT 264 | Describe additional descriptors to be output in the `descriptors` output map. 265 | Map of maps. Keys are names of descriptors. Values are maps of the form 266 | `{ 267 | format = string 268 | labels = list(string) 269 | }` 270 | (Type is `any` so the map values can later be enhanced to provide additional options.) 271 | `format` is a Terraform format string to be passed to the `format()` function. 272 | `labels` is a list of labels, in order, to pass to `format()` function. 273 | Label values will be normalized before being passed to `format()` so they will be 274 | identical to how they appear in `id`. 275 | Default is `{}` (`descriptors` output will be empty). 276 | EOT 277 | } 278 | 279 | #### End of copy of cloudposse/terraform-null-label/variables.tf 280 | -------------------------------------------------------------------------------- /examples/complete/context.tf: -------------------------------------------------------------------------------- 1 | # 2 | # ONLY EDIT THIS FILE IN github.com/cloudposse/terraform-null-label 3 | # All other instances of this file should be a copy of that one 4 | # 5 | # 6 | # Copy this file from https://github.com/cloudposse/terraform-null-label/blob/master/exports/context.tf 7 | # and then place it in your Terraform module to automatically get 8 | # Cloud Posse's standard configuration inputs suitable for passing 9 | # to Cloud Posse modules. 10 | # 11 | # curl -sL https://raw.githubusercontent.com/cloudposse/terraform-null-label/master/exports/context.tf -o context.tf 12 | # 13 | # Modules should access the whole context as `module.this.context` 14 | # to get the input variables with nulls for defaults, 15 | # for example `context = module.this.context`, 16 | # and access individual variables as `module.this.`, 17 | # with final values filled in. 18 | # 19 | # For example, when using defaults, `module.this.context.delimiter` 20 | # will be null, and `module.this.delimiter` will be `-` (hyphen). 21 | # 22 | 23 | module "this" { 24 | source = "cloudposse/label/null" 25 | version = "0.25.0" # requires Terraform >= 0.13.0 26 | 27 | enabled = var.enabled 28 | namespace = var.namespace 29 | tenant = var.tenant 30 | environment = var.environment 31 | stage = var.stage 32 | name = var.name 33 | delimiter = var.delimiter 34 | attributes = var.attributes 35 | tags = var.tags 36 | additional_tag_map = var.additional_tag_map 37 | label_order = var.label_order 38 | regex_replace_chars = var.regex_replace_chars 39 | id_length_limit = var.id_length_limit 40 | label_key_case = var.label_key_case 41 | label_value_case = var.label_value_case 42 | descriptor_formats = var.descriptor_formats 43 | labels_as_tags = var.labels_as_tags 44 | 45 | context = var.context 46 | } 47 | 48 | # Copy contents of cloudposse/terraform-null-label/variables.tf here 49 | 50 | variable "context" { 51 | type = any 52 | default = { 53 | enabled = true 54 | namespace = null 55 | tenant = null 56 | environment = null 57 | stage = null 58 | name = null 59 | delimiter = null 60 | attributes = [] 61 | tags = {} 62 | additional_tag_map = {} 63 | regex_replace_chars = null 64 | label_order = [] 65 | id_length_limit = null 66 | label_key_case = null 67 | label_value_case = null 68 | descriptor_formats = {} 69 | # Note: we have to use [] instead of null for unset lists due to 70 | # https://github.com/hashicorp/terraform/issues/28137 71 | # which was not fixed until Terraform 1.0.0, 72 | # but we want the default to be all the labels in `label_order` 73 | # and we want users to be able to prevent all tag generation 74 | # by setting `labels_as_tags` to `[]`, so we need 75 | # a different sentinel to indicate "default" 76 | labels_as_tags = ["unset"] 77 | } 78 | description = <<-EOT 79 | Single object for setting entire context at once. 80 | See description of individual variables for details. 81 | Leave string and numeric variables as `null` to use default value. 82 | Individual variable settings (non-null) override settings in context object, 83 | except for attributes, tags, and additional_tag_map, which are merged. 84 | EOT 85 | 86 | validation { 87 | condition = lookup(var.context, "label_key_case", null) == null ? true : contains(["lower", "title", "upper"], var.context["label_key_case"]) 88 | error_message = "Allowed values: `lower`, `title`, `upper`." 89 | } 90 | 91 | validation { 92 | condition = lookup(var.context, "label_value_case", null) == null ? true : contains(["lower", "title", "upper", "none"], var.context["label_value_case"]) 93 | error_message = "Allowed values: `lower`, `title`, `upper`, `none`." 94 | } 95 | } 96 | 97 | variable "enabled" { 98 | type = bool 99 | default = null 100 | description = "Set to false to prevent the module from creating any resources" 101 | } 102 | 103 | variable "namespace" { 104 | type = string 105 | default = null 106 | description = "ID element. Usually an abbreviation of your organization name, e.g. 'eg' or 'cp', to help ensure generated IDs are globally unique" 107 | } 108 | 109 | variable "tenant" { 110 | type = string 111 | default = null 112 | description = "ID element _(Rarely used, not included by default)_. A customer identifier, indicating who this instance of a resource is for" 113 | } 114 | 115 | variable "environment" { 116 | type = string 117 | default = null 118 | description = "ID element. Usually used for region e.g. 'uw2', 'us-west-2', OR role 'prod', 'staging', 'dev', 'UAT'" 119 | } 120 | 121 | variable "stage" { 122 | type = string 123 | default = null 124 | description = "ID element. Usually used to indicate role, e.g. 'prod', 'staging', 'source', 'build', 'test', 'deploy', 'release'" 125 | } 126 | 127 | variable "name" { 128 | type = string 129 | default = null 130 | description = <<-EOT 131 | ID element. Usually the component or solution name, e.g. 'app' or 'jenkins'. 132 | This is the only ID element not also included as a `tag`. 133 | The "name" tag is set to the full `id` string. There is no tag with the value of the `name` input. 134 | EOT 135 | } 136 | 137 | variable "delimiter" { 138 | type = string 139 | default = null 140 | description = <<-EOT 141 | Delimiter to be used between ID elements. 142 | Defaults to `-` (hyphen). Set to `""` to use no delimiter at all. 143 | EOT 144 | } 145 | 146 | variable "attributes" { 147 | type = list(string) 148 | default = [] 149 | description = <<-EOT 150 | ID element. Additional attributes (e.g. `workers` or `cluster`) to add to `id`, 151 | in the order they appear in the list. New attributes are appended to the 152 | end of the list. The elements of the list are joined by the `delimiter` 153 | and treated as a single ID element. 154 | EOT 155 | } 156 | 157 | variable "labels_as_tags" { 158 | type = set(string) 159 | default = ["default"] 160 | description = <<-EOT 161 | Set of labels (ID elements) to include as tags in the `tags` output. 162 | Default is to include all labels. 163 | Tags with empty values will not be included in the `tags` output. 164 | Set to `[]` to suppress all generated tags. 165 | **Notes:** 166 | The value of the `name` tag, if included, will be the `id`, not the `name`. 167 | Unlike other `null-label` inputs, the initial setting of `labels_as_tags` cannot be 168 | changed in later chained modules. Attempts to change it will be silently ignored. 169 | EOT 170 | } 171 | 172 | variable "tags" { 173 | type = map(string) 174 | default = {} 175 | description = <<-EOT 176 | Additional tags (e.g. `{'BusinessUnit': 'XYZ'}`). 177 | Neither the tag keys nor the tag values will be modified by this module. 178 | EOT 179 | } 180 | 181 | variable "additional_tag_map" { 182 | type = map(string) 183 | default = {} 184 | description = <<-EOT 185 | Additional key-value pairs to add to each map in `tags_as_list_of_maps`. Not added to `tags` or `id`. 186 | This is for some rare cases where resources want additional configuration of tags 187 | and therefore take a list of maps with tag key, value, and additional configuration. 188 | EOT 189 | } 190 | 191 | variable "label_order" { 192 | type = list(string) 193 | default = null 194 | description = <<-EOT 195 | The order in which the labels (ID elements) appear in the `id`. 196 | Defaults to ["namespace", "environment", "stage", "name", "attributes"]. 197 | You can omit any of the 6 labels ("tenant" is the 6th), but at least one must be present. 198 | EOT 199 | } 200 | 201 | variable "regex_replace_chars" { 202 | type = string 203 | default = null 204 | description = <<-EOT 205 | Terraform regular expression (regex) string. 206 | Characters matching the regex will be removed from the ID elements. 207 | If not set, `"/[^a-zA-Z0-9-]/"` is used to remove all characters other than hyphens, letters and digits. 208 | EOT 209 | } 210 | 211 | variable "id_length_limit" { 212 | type = number 213 | default = null 214 | description = <<-EOT 215 | Limit `id` to this many characters (minimum 6). 216 | Set to `0` for unlimited length. 217 | Set to `null` for keep the existing setting, which defaults to `0`. 218 | Does not affect `id_full`. 219 | EOT 220 | validation { 221 | condition = var.id_length_limit == null ? true : var.id_length_limit >= 6 || var.id_length_limit == 0 222 | error_message = "The id_length_limit must be >= 6 if supplied (not null), or 0 for unlimited length." 223 | } 224 | } 225 | 226 | variable "label_key_case" { 227 | type = string 228 | default = null 229 | description = <<-EOT 230 | Controls the letter case of the `tags` keys (label names) for tags generated by this module. 231 | Does not affect keys of tags passed in via the `tags` input. 232 | Possible values: `lower`, `title`, `upper`. 233 | Default value: `title`. 234 | EOT 235 | 236 | validation { 237 | condition = var.label_key_case == null ? true : contains(["lower", "title", "upper"], var.label_key_case) 238 | error_message = "Allowed values: `lower`, `title`, `upper`." 239 | } 240 | } 241 | 242 | variable "label_value_case" { 243 | type = string 244 | default = null 245 | description = <<-EOT 246 | Controls the letter case of ID elements (labels) as included in `id`, 247 | set as tag values, and output by this module individually. 248 | Does not affect values of tags passed in via the `tags` input. 249 | Possible values: `lower`, `title`, `upper` and `none` (no transformation). 250 | Set this to `title` and set `delimiter` to `""` to yield Pascal Case IDs. 251 | Default value: `lower`. 252 | EOT 253 | 254 | validation { 255 | condition = var.label_value_case == null ? true : contains(["lower", "title", "upper", "none"], var.label_value_case) 256 | error_message = "Allowed values: `lower`, `title`, `upper`, `none`." 257 | } 258 | } 259 | 260 | variable "descriptor_formats" { 261 | type = any 262 | default = {} 263 | description = <<-EOT 264 | Describe additional descriptors to be output in the `descriptors` output map. 265 | Map of maps. Keys are names of descriptors. Values are maps of the form 266 | `{ 267 | format = string 268 | labels = list(string) 269 | }` 270 | (Type is `any` so the map values can later be enhanced to provide additional options.) 271 | `format` is a Terraform format string to be passed to the `format()` function. 272 | `labels` is a list of labels, in order, to pass to `format()` function. 273 | Label values will be normalized before being passed to `format()` so they will be 274 | identical to how they appear in `id`. 275 | Default is `{}` (`descriptors` output will be empty). 276 | EOT 277 | } 278 | 279 | #### End of copy of cloudposse/terraform-null-label/variables.tf 280 | -------------------------------------------------------------------------------- /examples/complete/fixtures.us-east-2.tfvars: -------------------------------------------------------------------------------- 1 | region = "us-east-2" 2 | 3 | availability_zones = ["us-east-2a"] 4 | 5 | namespace = "eg" 6 | 7 | stage = "test" 8 | 9 | name = "emr" 10 | 11 | ebs_root_volume_size = 10 12 | 13 | visible_to_all_users = true 14 | 15 | # https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-release-components.html 16 | release_label = "emr-6.7.0" 17 | 18 | # https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-plan-ha-applications.html 19 | # https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-670-release.html 20 | # https://github.com/hashicorp/terraform-provider-aws/issues/23226 21 | applications = ["Hive", "Presto"] 22 | 23 | core_instance_group_instance_type = "m4.large" 24 | 25 | core_instance_group_instance_count = 1 26 | 27 | core_instance_group_ebs_size = 10 28 | 29 | core_instance_group_ebs_type = "gp2" 30 | 31 | core_instance_group_ebs_volumes_per_instance = 1 32 | 33 | master_instance_group_instance_type = "m4.large" 34 | 35 | master_instance_group_instance_count = 1 36 | 37 | master_instance_group_ebs_size = 10 38 | 39 | master_instance_group_ebs_type = "gp2" 40 | 41 | master_instance_group_ebs_volumes_per_instance = 1 42 | 43 | create_task_instance_group = false 44 | 45 | ssh_public_key_path = "/secrets" 46 | 47 | generate_ssh_key = true 48 | -------------------------------------------------------------------------------- /examples/complete/main.tf: -------------------------------------------------------------------------------- 1 | provider "aws" { 2 | region = var.region 3 | } 4 | 5 | module "vpc" { 6 | source = "cloudposse/vpc/aws" 7 | version = "1.1.0" 8 | 9 | ipv4_primary_cidr_block = "172.19.0.0/16" 10 | 11 | context = module.this.context 12 | } 13 | 14 | module "subnets" { 15 | source = "cloudposse/dynamic-subnets/aws" 16 | version = "2.0.2" 17 | 18 | availability_zones = var.availability_zones 19 | vpc_id = module.vpc.vpc_id 20 | igw_id = [module.vpc.igw_id] 21 | ipv4_cidr_block = [module.vpc.vpc_cidr_block] 22 | nat_gateway_enabled = false 23 | nat_instance_enabled = false 24 | 25 | context = module.this.context 26 | } 27 | 28 | module "s3_log_storage" { 29 | source = "cloudposse/s3-log-storage/aws" 30 | version = "0.26.0" 31 | 32 | force_destroy = true 33 | attributes = ["logs"] 34 | 35 | context = module.this.context 36 | } 37 | 38 | module "aws_key_pair" { 39 | source = "cloudposse/key-pair/aws" 40 | version = "0.18.3" 41 | 42 | ssh_public_key_path = var.ssh_public_key_path 43 | generate_ssh_key = var.generate_ssh_key 44 | attributes = ["ssh", "key"] 45 | 46 | context = module.this.context 47 | } 48 | 49 | module "emr_cluster" { 50 | source = "../../" 51 | 52 | master_allowed_security_groups = [module.vpc.vpc_default_security_group_id] 53 | slave_allowed_security_groups = [module.vpc.vpc_default_security_group_id] 54 | region = var.region 55 | vpc_id = module.vpc.vpc_id 56 | subnet_id = module.this.enabled ? module.subnets.private_subnet_ids[0] : null 57 | route_table_id = module.this.enabled ? module.subnets.private_route_table_ids[0] : null 58 | subnet_type = "private" 59 | ebs_root_volume_size = var.ebs_root_volume_size 60 | visible_to_all_users = var.visible_to_all_users 61 | release_label = var.release_label 62 | applications = var.applications 63 | configurations_json = var.configurations_json 64 | core_instance_group_instance_type = var.core_instance_group_instance_type 65 | core_instance_group_instance_count = var.core_instance_group_instance_count 66 | core_instance_group_ebs_size = var.core_instance_group_ebs_size 67 | core_instance_group_ebs_type = var.core_instance_group_ebs_type 68 | core_instance_group_ebs_volumes_per_instance = var.core_instance_group_ebs_volumes_per_instance 69 | master_instance_group_instance_type = var.master_instance_group_instance_type 70 | master_instance_group_instance_count = var.master_instance_group_instance_count 71 | master_instance_group_ebs_size = var.master_instance_group_ebs_size 72 | master_instance_group_ebs_type = var.master_instance_group_ebs_type 73 | master_instance_group_ebs_volumes_per_instance = var.master_instance_group_ebs_volumes_per_instance 74 | create_task_instance_group = var.create_task_instance_group 75 | log_uri = format("s3://%s/", module.s3_log_storage.bucket_id) 76 | key_name = module.aws_key_pair.key_name 77 | 78 | context = module.this.context 79 | } 80 | -------------------------------------------------------------------------------- /examples/complete/outputs.tf: -------------------------------------------------------------------------------- 1 | output "public_subnet_cidrs" { 2 | value = module.subnets.public_subnet_cidrs 3 | description = "Public subnet CIDRs" 4 | } 5 | 6 | output "private_subnet_cidrs" { 7 | value = module.subnets.private_subnet_cidrs 8 | description = "Private subnet CIDRs" 9 | } 10 | 11 | output "vpc_cidr" { 12 | value = module.vpc.vpc_cidr_block 13 | description = "VPC ID" 14 | } 15 | 16 | output "aws_key_pair_key_name" { 17 | value = module.aws_key_pair.key_name 18 | description = "Name of SSH key" 19 | } 20 | 21 | output "aws_key_pair_public_key" { 22 | value = module.aws_key_pair.public_key 23 | description = "Content of the generated public key" 24 | } 25 | 26 | output "aws_key_pair_public_key_filename" { 27 | description = "Public Key Filename" 28 | value = module.aws_key_pair.public_key_filename 29 | } 30 | 31 | output "aws_key_pair_private_key_filename" { 32 | description = "Private Key Filename" 33 | value = module.aws_key_pair.private_key_filename 34 | } 35 | 36 | output "s3_log_storage_bucket_domain_name" { 37 | value = module.s3_log_storage.bucket_domain_name 38 | description = "FQDN of bucket" 39 | } 40 | 41 | output "s3_log_storage_bucket_id" { 42 | value = module.s3_log_storage.bucket_id 43 | description = "Bucket Name (aka ID)" 44 | } 45 | 46 | output "s3_log_storage_bucket_arn" { 47 | value = module.s3_log_storage.bucket_arn 48 | description = "Bucket ARN" 49 | } 50 | 51 | output "cluster_id" { 52 | value = module.emr_cluster.cluster_id 53 | description = "EMR cluster ID" 54 | } 55 | 56 | output "cluster_name" { 57 | value = module.emr_cluster.cluster_name 58 | description = "EMR cluster name" 59 | } 60 | 61 | output "cluster_master_public_dns" { 62 | value = module.emr_cluster.master_public_dns 63 | description = "Master public DNS" 64 | } 65 | 66 | output "cluster_master_security_group_id" { 67 | value = module.emr_cluster.master_security_group_id 68 | description = "Master security group ID" 69 | } 70 | 71 | output "cluster_slave_security_group_id" { 72 | value = module.emr_cluster.slave_security_group_id 73 | description = "Slave security group ID" 74 | } 75 | 76 | output "cluster_master_host" { 77 | value = module.emr_cluster.master_host 78 | description = "Name of the cluster CNAME record for the master nodes in the parent DNS zone" 79 | } 80 | -------------------------------------------------------------------------------- /examples/complete/variables.tf: -------------------------------------------------------------------------------- 1 | variable "region" { 2 | type = string 3 | description = "AWS region" 4 | } 5 | 6 | variable "availability_zones" { 7 | type = list(string) 8 | description = "List of availability zones" 9 | } 10 | 11 | variable "ebs_root_volume_size" { 12 | type = number 13 | description = "Size in GiB of the EBS root device volume of the Linux AMI that is used for each EC2 instance. Available in Amazon EMR version 4.x and later" 14 | } 15 | 16 | variable "visible_to_all_users" { 17 | type = bool 18 | description = "Whether the job flow is visible to all IAM users of the AWS account associated with the job flow" 19 | } 20 | 21 | variable "release_label" { 22 | type = string 23 | description = "The release label for the Amazon EMR release. https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-release-5x.html" 24 | } 25 | 26 | variable "applications" { 27 | type = list(string) 28 | description = "A list of applications for the cluster. Valid values are: Flink, Ganglia, Hadoop, HBase, HCatalog, Hive, Hue, JupyterHub, Livy, Mahout, MXNet, Oozie, Phoenix, Pig, Presto, Spark, Sqoop, TensorFlow, Tez, Zeppelin, and ZooKeeper (as of EMR 5.25.0). Case insensitive" 29 | } 30 | 31 | variable "configurations_json" { 32 | type = string 33 | description = "A JSON string for supplying list of configurations for the EMR cluster" 34 | default = "" 35 | } 36 | 37 | variable "core_instance_group_instance_type" { 38 | type = string 39 | description = "EC2 instance type for all instances in the Core instance group" 40 | } 41 | 42 | variable "core_instance_group_instance_count" { 43 | type = number 44 | description = "Target number of instances for the Core instance group. Must be at least 1" 45 | } 46 | 47 | variable "core_instance_group_ebs_size" { 48 | type = number 49 | description = "Core instances volume size, in gibibytes (GiB)" 50 | } 51 | 52 | variable "core_instance_group_ebs_type" { 53 | type = string 54 | description = "Core instances volume type. Valid options are `gp2`, `io1`, `standard` and `st1`" 55 | } 56 | 57 | variable "core_instance_group_ebs_volumes_per_instance" { 58 | type = number 59 | description = "The number of EBS volumes with this configuration to attach to each EC2 instance in the Core instance group" 60 | } 61 | 62 | variable "master_instance_group_instance_type" { 63 | type = string 64 | description = "EC2 instance type for all instances in the Master instance group" 65 | } 66 | 67 | variable "master_instance_group_instance_count" { 68 | type = number 69 | description = "Target number of instances for the Master instance group. Must be at least 1" 70 | } 71 | 72 | variable "master_instance_group_ebs_size" { 73 | type = number 74 | description = "Master instances volume size, in gibibytes (GiB)" 75 | } 76 | 77 | variable "master_instance_group_ebs_type" { 78 | type = string 79 | description = "Master instances volume type. Valid options are `gp2`, `io1`, `standard` and `st1`" 80 | } 81 | 82 | variable "master_instance_group_ebs_volumes_per_instance" { 83 | type = number 84 | description = "The number of EBS volumes with this configuration to attach to each EC2 instance in the Master instance group" 85 | } 86 | 87 | variable "create_task_instance_group" { 88 | type = bool 89 | description = "Whether to create an instance group for Task nodes. For more info: https://www.terraform.io/docs/providers/aws/r/emr_instance_group.html, https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-master-core-task-nodes.html" 90 | } 91 | 92 | variable "ssh_public_key_path" { 93 | type = string 94 | description = "Path to SSH public key directory (e.g. `/secrets`)" 95 | } 96 | 97 | variable "generate_ssh_key" { 98 | type = bool 99 | description = "If set to `true`, new SSH key pair will be created" 100 | } 101 | -------------------------------------------------------------------------------- /examples/complete/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.1" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 3.0" 8 | } 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /main.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | enabled = module.this.enabled 3 | 4 | aws_partition = join("", data.aws_partition.current.*.partition) 5 | 6 | # This dummy bootstrap action is needed because of terraform bug https://github.com/terraform-providers/terraform-provider-aws/issues/12683 7 | # When javax.jdo.option.ConnectionPassword is used in configuration_json then every plan will result in force recreation of EMR cluster. 8 | # To mitigate this issue dummy bootstrap action `echo` was introduced. It is executed with an argument of a hash generated from configuration. 9 | # This in tandem with lifecycle ignore_changes for `configurations_json` will only trigger EMR recreation when hash of configuration will change. 10 | bootstrap_action = concat( 11 | [{ 12 | path = "file:/bin/echo", 13 | name = "Dummy bootstrap action to prevent EMR cluster recreation when configuration_json has parameter javax.jdo.option.ConnectionPassword", 14 | args = [md5(jsonencode(var.configurations_json))] 15 | }], 16 | var.bootstrap_action 17 | ) 18 | 19 | kerberos_attributes = { 20 | ad_domain_join_password = var.kerberos_ad_domain_join_password 21 | ad_domain_join_user = var.kerberos_ad_domain_join_user 22 | cross_realm_trust_principal_password = var.kerberos_cross_realm_trust_principal_password 23 | kdc_admin_password = var.kerberos_kdc_admin_password 24 | realm = var.kerberos_realm 25 | } 26 | } 27 | 28 | data "aws_partition" "current" { 29 | count = local.enabled ? 1 : 0 30 | } 31 | 32 | module "label_emr" { 33 | source = "cloudposse/label/null" 34 | version = "0.25.0" 35 | 36 | attributes = compact(concat(module.this.attributes, tolist(["emr"]))) 37 | context = module.this.context 38 | } 39 | 40 | module "label_ec2" { 41 | source = "cloudposse/label/null" 42 | version = "0.25.0" 43 | 44 | attributes = compact(concat(module.this.attributes, tolist(["ec2"]))) 45 | context = module.this.context 46 | } 47 | 48 | module "label_ec2_autoscaling" { 49 | source = "cloudposse/label/null" 50 | version = "0.25.0" 51 | 52 | attributes = compact(concat(module.this.attributes, tolist(["ec2", "autoscaling"]))) 53 | context = module.this.context 54 | } 55 | 56 | module "label_master" { 57 | source = "cloudposse/label/null" 58 | version = "0.25.0" 59 | 60 | attributes = compact(concat(module.this.attributes, tolist(["master"]))) 61 | context = module.this.context 62 | } 63 | 64 | module "label_slave" { 65 | source = "cloudposse/label/null" 66 | version = "0.25.0" 67 | 68 | attributes = compact(concat(module.this.attributes, tolist(["slave"]))) 69 | context = module.this.context 70 | } 71 | 72 | module "label_core" { 73 | source = "cloudposse/label/null" 74 | version = "0.25.0" 75 | 76 | attributes = compact(concat(module.this.attributes, tolist(["core"]))) 77 | context = module.this.context 78 | } 79 | 80 | module "label_task" { 81 | source = "cloudposse/label/null" 82 | version = "0.25.0" 83 | 84 | enabled = local.enabled && var.create_task_instance_group 85 | 86 | attributes = compact(concat(module.this.attributes, tolist(["task"]))) 87 | context = module.this.context 88 | } 89 | 90 | module "label_master_managed" { 91 | source = "cloudposse/label/null" 92 | version = "0.25.0" 93 | 94 | attributes = compact(concat(module.this.attributes, tolist(["master", "managed"]))) 95 | context = module.this.context 96 | } 97 | 98 | module "label_slave_managed" { 99 | source = "cloudposse/label/null" 100 | version = "0.25.0" 101 | 102 | attributes = compact(concat(module.this.attributes, tolist(["slave", "managed"]))) 103 | context = module.this.context 104 | } 105 | 106 | module "label_service_managed" { 107 | source = "cloudposse/label/null" 108 | version = "0.25.0" 109 | 110 | attributes = compact(concat(module.this.attributes, tolist(["service", "managed"]))) 111 | context = module.this.context 112 | } 113 | 114 | /* 115 | NOTE on EMR-Managed security groups: These security groups will have any missing inbound or outbound access rules added and maintained by AWS, 116 | to ensure proper communication between instances in a cluster. The EMR service will maintain these rules for groups provided 117 | in emr_managed_master_security_group and emr_managed_slave_security_group; 118 | attempts to remove the required rules may succeed, only for the EMR service to re-add them in a matter of minutes. 119 | This may cause Terraform to fail to destroy an environment that contains an EMR cluster, because the EMR service does not revoke rules added on deletion, 120 | leaving a cyclic dependency between the security groups that prevents their deletion. 121 | To avoid this, use the revoke_rules_on_delete optional attribute for any Security Group used in 122 | emr_managed_master_security_group and emr_managed_slave_security_group. 123 | */ 124 | 125 | # https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-sg-specify.html 126 | # https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-man-sec-groups.html 127 | # https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-clusters-in-a-vpc.html 128 | 129 | resource "aws_security_group" "managed_master" { 130 | count = local.enabled && var.use_existing_managed_master_security_group == false ? 1 : 0 131 | 132 | revoke_rules_on_delete = true 133 | vpc_id = var.vpc_id 134 | name = module.label_master_managed.id 135 | description = "EmrManagedMasterSecurityGroup" 136 | tags = module.label_master_managed.tags 137 | 138 | # EMR will update "ingress" and "egress" so we ignore the changes here 139 | lifecycle { 140 | ignore_changes = [ingress, egress] 141 | } 142 | } 143 | 144 | resource "aws_security_group_rule" "managed_master_egress" { 145 | count = local.enabled && var.use_existing_managed_master_security_group == false ? 1 : 0 146 | 147 | description = "Allow all egress traffic" 148 | type = "egress" 149 | from_port = 0 150 | to_port = 0 151 | protocol = "-1" 152 | cidr_blocks = ["0.0.0.0/0"] 153 | ipv6_cidr_blocks = ["::/0"] 154 | security_group_id = join("", aws_security_group.managed_master.*.id) 155 | } 156 | 157 | resource "aws_security_group" "managed_slave" { 158 | count = local.enabled && var.use_existing_managed_slave_security_group == false ? 1 : 0 159 | 160 | revoke_rules_on_delete = true 161 | vpc_id = var.vpc_id 162 | name = module.label_slave_managed.id 163 | description = "EmrManagedSlaveSecurityGroup" 164 | tags = module.label_slave_managed.tags 165 | 166 | # EMR will update "ingress" and "egress" so we ignore the changes here 167 | lifecycle { 168 | ignore_changes = [ingress, egress] 169 | } 170 | } 171 | 172 | resource "aws_security_group_rule" "managed_slave_egress" { 173 | count = local.enabled && var.use_existing_managed_slave_security_group == false ? 1 : 0 174 | 175 | description = "Allow all egress traffic" 176 | type = "egress" 177 | from_port = 0 178 | to_port = 0 179 | protocol = "-1" 180 | cidr_blocks = ["0.0.0.0/0"] 181 | ipv6_cidr_blocks = ["::/0"] 182 | security_group_id = join("", aws_security_group.managed_slave.*.id) 183 | } 184 | 185 | resource "aws_security_group" "managed_service_access" { 186 | count = local.enabled && var.subnet_type == "private" && var.use_existing_service_access_security_group == false ? 1 : 0 187 | 188 | revoke_rules_on_delete = true 189 | vpc_id = var.vpc_id 190 | name = module.label_service_managed.id 191 | description = "EmrManagedServiceAccessSecurityGroup" 192 | tags = module.label_service_managed.tags 193 | 194 | # EMR will update "ingress" and "egress" so we ignore the changes here 195 | lifecycle { 196 | ignore_changes = [ingress, egress] 197 | } 198 | } 199 | 200 | resource "aws_security_group_rule" "managed_master_service_access_ingress" { 201 | count = local.enabled && var.subnet_type == "private" && var.use_existing_service_access_security_group == false ? 1 : 0 202 | 203 | description = "Allow ingress traffic from EmrManagedMasterSecurityGroup" 204 | type = "ingress" 205 | from_port = 9443 206 | to_port = 9443 207 | protocol = "tcp" 208 | source_security_group_id = join("", aws_security_group.managed_master.*.id) 209 | security_group_id = join("", aws_security_group.managed_service_access.*.id) 210 | } 211 | 212 | resource "aws_security_group_rule" "managed_service_access_egress" { 213 | count = local.enabled && var.subnet_type == "private" && var.use_existing_service_access_security_group == false ? 1 : 0 214 | 215 | description = "Allow all egress traffic" 216 | type = "egress" 217 | from_port = 0 218 | to_port = 0 219 | protocol = "-1" 220 | cidr_blocks = ["0.0.0.0/0"] 221 | ipv6_cidr_blocks = ["::/0"] 222 | security_group_id = join("", aws_security_group.managed_service_access.*.id) 223 | } 224 | 225 | # Specify additional master and slave security groups 226 | resource "aws_security_group" "master" { 227 | count = local.enabled && var.use_existing_additional_master_security_group == false ? 1 : 0 228 | 229 | revoke_rules_on_delete = true 230 | vpc_id = var.vpc_id 231 | name = module.label_master.id 232 | description = "Allow inbound traffic from Security Groups and CIDRs for masters. Allow all outbound traffic" 233 | tags = module.label_master.tags 234 | } 235 | 236 | resource "aws_security_group_rule" "master_ingress_security_groups" { 237 | count = local.enabled && var.use_existing_additional_master_security_group == false ? length(var.master_allowed_security_groups) : 0 238 | 239 | description = "Allow inbound traffic from Security Groups" 240 | type = "ingress" 241 | from_port = 0 242 | to_port = 65535 243 | protocol = "tcp" 244 | source_security_group_id = var.master_allowed_security_groups[count.index] 245 | security_group_id = join("", aws_security_group.master.*.id) 246 | } 247 | 248 | resource "aws_security_group_rule" "master_ingress_cidr_blocks" { 249 | count = local.enabled && length(var.master_allowed_cidr_blocks) > 0 && var.use_existing_additional_master_security_group == false ? 1 : 0 250 | 251 | description = "Allow inbound traffic from CIDR blocks" 252 | type = "ingress" 253 | from_port = 0 254 | to_port = 65535 255 | protocol = "tcp" 256 | cidr_blocks = var.master_allowed_cidr_blocks 257 | security_group_id = join("", aws_security_group.master.*.id) 258 | } 259 | 260 | resource "aws_security_group_rule" "master_egress" { 261 | count = local.enabled && var.use_existing_additional_master_security_group == false ? 1 : 0 262 | 263 | description = "Allow all egress traffic" 264 | type = "egress" 265 | from_port = 0 266 | to_port = 65535 267 | protocol = "tcp" 268 | cidr_blocks = ["0.0.0.0/0"] 269 | security_group_id = join("", aws_security_group.master.*.id) 270 | } 271 | 272 | resource "aws_security_group" "slave" { 273 | count = local.enabled && var.use_existing_additional_slave_security_group == false ? 1 : 0 274 | 275 | revoke_rules_on_delete = true 276 | vpc_id = var.vpc_id 277 | name = module.label_slave.id 278 | description = "Allow inbound traffic from Security Groups and CIDRs for slaves. Allow all outbound traffic" 279 | tags = module.label_slave.tags 280 | } 281 | 282 | resource "aws_security_group_rule" "slave_ingress_security_groups" { 283 | count = local.enabled && var.use_existing_additional_slave_security_group == false ? length(var.slave_allowed_security_groups) : 0 284 | 285 | description = "Allow inbound traffic from Security Groups" 286 | type = "ingress" 287 | from_port = 0 288 | to_port = 65535 289 | protocol = "tcp" 290 | source_security_group_id = var.slave_allowed_security_groups[count.index] 291 | security_group_id = join("", aws_security_group.slave.*.id) 292 | } 293 | 294 | resource "aws_security_group_rule" "slave_ingress_cidr_blocks" { 295 | count = local.enabled && length(var.slave_allowed_cidr_blocks) > 0 && var.use_existing_additional_slave_security_group == false ? 1 : 0 296 | 297 | description = "Allow inbound traffic from CIDR blocks" 298 | type = "ingress" 299 | from_port = 0 300 | to_port = 65535 301 | protocol = "tcp" 302 | cidr_blocks = var.slave_allowed_cidr_blocks 303 | security_group_id = join("", aws_security_group.slave.*.id) 304 | } 305 | 306 | resource "aws_security_group_rule" "slave_egress" { 307 | count = local.enabled && var.use_existing_additional_slave_security_group == false ? 1 : 0 308 | 309 | description = "Allow all egress traffic" 310 | type = "egress" 311 | from_port = 0 312 | to_port = 65535 313 | protocol = "tcp" 314 | cidr_blocks = ["0.0.0.0/0"] 315 | security_group_id = join("", aws_security_group.slave.*.id) 316 | } 317 | 318 | /* 319 | Allows Amazon EMR to call other AWS services on your behalf when provisioning resources and performing service-level actions. 320 | This role is required for all clusters. 321 | https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-iam-roles.html 322 | */ 323 | data "aws_iam_policy_document" "assume_role_emr" { 324 | count = local.enabled && var.service_role_enabled ? 1 : 0 325 | 326 | statement { 327 | effect = "Allow" 328 | 329 | principals { 330 | type = "Service" 331 | identifiers = ["elasticmapreduce.amazonaws.com", "application-autoscaling.amazonaws.com"] 332 | } 333 | 334 | actions = ["sts:AssumeRole"] 335 | } 336 | } 337 | 338 | resource "aws_iam_role" "emr" { 339 | count = local.enabled && var.service_role_enabled ? 1 : 0 340 | 341 | name = module.label_emr.id 342 | assume_role_policy = join("", data.aws_iam_policy_document.assume_role_emr.*.json) 343 | permissions_boundary = var.emr_role_permissions_boundary 344 | 345 | tags = module.this.tags 346 | } 347 | 348 | # https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-iam-roles.html 349 | resource "aws_iam_role_policy_attachment" "emr" { 350 | count = local.enabled && var.service_role_enabled ? 1 : 0 351 | 352 | role = join("", aws_iam_role.emr.*.name) 353 | policy_arn = "arn:${local.aws_partition}:iam::aws:policy/service-role/AmazonElasticMapReduceRole" 354 | } 355 | 356 | /* 357 | Application processes that run on top of the Hadoop ecosystem on cluster instances use this role when they call other AWS services. 358 | For accessing data in Amazon S3 using EMRFS, you can specify different roles to be assumed based on the user or group making the request, 359 | or on the location of data in Amazon S3. 360 | This role is required for all clusters. 361 | https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-iam-roles.html 362 | */ 363 | data "aws_iam_policy_document" "assume_role_ec2" { 364 | count = local.enabled && var.ec2_role_enabled ? 1 : 0 365 | 366 | statement { 367 | effect = "Allow" 368 | 369 | principals { 370 | type = "Service" 371 | identifiers = ["ec2.amazonaws.com"] 372 | } 373 | 374 | actions = ["sts:AssumeRole"] 375 | } 376 | } 377 | 378 | resource "aws_iam_role" "ec2" { 379 | count = local.enabled && var.ec2_role_enabled ? 1 : 0 380 | 381 | name = module.label_ec2.id 382 | assume_role_policy = join("", data.aws_iam_policy_document.assume_role_ec2.*.json) 383 | permissions_boundary = var.ec2_role_permissions_boundary 384 | 385 | tags = module.this.tags 386 | } 387 | 388 | # https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-iam-roles.html 389 | resource "aws_iam_role_policy_attachment" "ec2" { 390 | count = local.enabled && var.ec2_role_enabled ? 1 : 0 391 | 392 | role = join("", aws_iam_role.ec2.*.name) 393 | policy_arn = "arn:${local.aws_partition}:iam::aws:policy/service-role/AmazonElasticMapReduceforEC2Role" 394 | } 395 | 396 | /* 397 | Allows SSH logins to EMR instances via SSM agent. 398 | https://aws.amazon.com/blogs/big-data/securing-access-to-emr-clusters-using-aws-systems-manager/ 399 | */ 400 | resource "aws_iam_role_policy_attachment" "emr_ssm_access" { 401 | count = local.enabled && var.ec2_role_enabled && var.enable_ssm_access ? 1 : 0 402 | 403 | role = join("", aws_iam_role.ec2.*.name) 404 | policy_arn = "arn:${local.aws_partition}:iam::aws:policy/AmazonSSMManagedInstanceCore" 405 | } 406 | 407 | resource "aws_iam_instance_profile" "ec2" { 408 | count = local.enabled && var.ec2_role_enabled ? 1 : 0 409 | 410 | name = join("", aws_iam_role.ec2.*.name) 411 | role = join("", aws_iam_role.ec2.*.name) 412 | tags = module.this.tags 413 | } 414 | 415 | /* 416 | Allows additional actions for dynamically scaling environments. Required only for clusters that use automatic scaling in Amazon EMR. 417 | This role is required for all clusters. 418 | https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-iam-roles.html 419 | */ 420 | resource "aws_iam_role" "ec2_autoscaling" { 421 | count = local.enabled && var.ec2_autoscaling_role_enabled ? 1 : 0 422 | 423 | name = module.label_ec2_autoscaling.id 424 | assume_role_policy = join("", data.aws_iam_policy_document.assume_role_emr.*.json) 425 | permissions_boundary = var.ec2_autoscaling_role_permissions_boundary 426 | 427 | tags = module.this.tags 428 | } 429 | 430 | # https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-iam-roles.html 431 | resource "aws_iam_role_policy_attachment" "ec2_autoscaling" { 432 | count = local.enabled && var.ec2_autoscaling_role_enabled ? 1 : 0 433 | 434 | role = join("", aws_iam_role.ec2_autoscaling.*.name) 435 | policy_arn = "arn:${local.aws_partition}:iam::aws:policy/service-role/AmazonElasticMapReduceforAutoScalingRole" 436 | } 437 | 438 | resource "aws_emr_cluster" "default" { 439 | count = local.enabled ? 1 : 0 440 | 441 | name = module.this.id 442 | release_label = var.release_label 443 | 444 | # https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-sg-specify.html 445 | ec2_attributes { 446 | key_name = var.key_name 447 | subnet_id = var.subnet_id 448 | emr_managed_master_security_group = var.use_existing_managed_master_security_group == false ? join("", aws_security_group.managed_master.*.id) : var.managed_master_security_group 449 | emr_managed_slave_security_group = var.use_existing_managed_slave_security_group == false ? join("", aws_security_group.managed_slave.*.id) : var.managed_slave_security_group 450 | service_access_security_group = var.use_existing_service_access_security_group == false && var.subnet_type == "private" ? join("", aws_security_group.managed_service_access.*.id) : var.service_access_security_group 451 | instance_profile = var.ec2_role_enabled ? join("", aws_iam_instance_profile.ec2.*.arn) : var.existing_ec2_instance_profile_arn 452 | additional_master_security_groups = var.use_existing_additional_master_security_group == false ? join("", aws_security_group.master.*.id) : var.additional_master_security_group 453 | additional_slave_security_groups = var.use_existing_additional_slave_security_group == false ? join("", aws_security_group.slave.*.id) : var.additional_slave_security_group 454 | } 455 | 456 | termination_protection = var.termination_protection 457 | keep_job_flow_alive_when_no_steps = var.keep_job_flow_alive_when_no_steps 458 | step_concurrency_level = var.step_concurrency_level 459 | ebs_root_volume_size = var.ebs_root_volume_size 460 | custom_ami_id = var.custom_ami_id 461 | visible_to_all_users = var.visible_to_all_users 462 | 463 | applications = var.applications 464 | 465 | core_instance_group { 466 | name = module.label_core.id 467 | instance_type = var.core_instance_group_instance_type 468 | instance_count = var.core_instance_group_instance_count 469 | 470 | ebs_config { 471 | size = var.core_instance_group_ebs_size 472 | type = var.core_instance_group_ebs_type 473 | iops = var.core_instance_group_ebs_iops 474 | volumes_per_instance = var.core_instance_group_ebs_volumes_per_instance 475 | } 476 | 477 | bid_price = var.core_instance_group_bid_price 478 | autoscaling_policy = var.core_instance_group_autoscaling_policy 479 | } 480 | 481 | master_instance_group { 482 | name = module.label_master.id 483 | instance_type = var.master_instance_group_instance_type 484 | instance_count = var.master_instance_group_instance_count 485 | bid_price = var.master_instance_group_bid_price 486 | 487 | ebs_config { 488 | size = var.master_instance_group_ebs_size 489 | type = var.master_instance_group_ebs_type 490 | iops = var.master_instance_group_ebs_iops 491 | volumes_per_instance = var.master_instance_group_ebs_volumes_per_instance 492 | } 493 | } 494 | 495 | scale_down_behavior = var.scale_down_behavior 496 | additional_info = var.additional_info 497 | security_configuration = var.security_configuration 498 | 499 | dynamic "bootstrap_action" { 500 | for_each = local.bootstrap_action 501 | content { 502 | path = bootstrap_action.value.path 503 | name = bootstrap_action.value.name 504 | args = bootstrap_action.value.args 505 | } 506 | } 507 | 508 | dynamic "kerberos_attributes" { 509 | for_each = var.kerberos_enabled ? [local.kerberos_attributes] : [] 510 | 511 | content { 512 | ad_domain_join_password = kerberos_attributes.value.ad_domain_join_password 513 | ad_domain_join_user = kerberos_attributes.value.ad_domain_join_user 514 | cross_realm_trust_principal_password = kerberos_attributes.value.cross_realm_trust_principal_password 515 | kdc_admin_password = kerberos_attributes.value.kdc_admin_password 516 | realm = kerberos_attributes.value.realm 517 | } 518 | } 519 | 520 | dynamic "step" { 521 | for_each = var.steps 522 | content { 523 | name = step.value.name 524 | action_on_failure = step.value.action_on_failure 525 | hadoop_jar_step { 526 | jar = step.value.hadoop_jar_step["jar"] 527 | main_class = lookup(step.value.hadoop_jar_step, "main_class", null) 528 | properties = lookup(step.value.hadoop_jar_step, "properties", null) 529 | args = lookup(step.value.hadoop_jar_step, "args", null) 530 | } 531 | } 532 | } 533 | 534 | dynamic "auto_termination_policy" { 535 | for_each = var.auto_termination_idle_timeout != null ? [var.auto_termination_idle_timeout] : [] 536 | content { 537 | idle_timeout = var.auto_termination_idle_timeout 538 | } 539 | } 540 | 541 | configurations_json = var.configurations_json 542 | 543 | log_uri = var.log_uri 544 | 545 | service_role = var.service_role_enabled ? join("", aws_iam_role.emr.*.arn) : var.existing_service_role_arn 546 | autoscaling_role = var.ec2_autoscaling_role_enabled ? join("", aws_iam_role.ec2_autoscaling.*.arn) : var.existing_ec2_autoscaling_role_arn 547 | 548 | # configurations_json changes are ignored because of terraform bug. Configuration changes are applied via local.bootstrap_action. 549 | lifecycle { 550 | ignore_changes = [kerberos_attributes, step, configurations_json] 551 | } 552 | 553 | tags = module.this.tags 554 | } 555 | 556 | # https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-master-core-task-nodes.html 557 | # https://www.terraform.io/docs/providers/aws/r/emr_instance_group.html 558 | resource "aws_emr_instance_group" "task" { 559 | count = local.enabled && var.create_task_instance_group ? 1 : 0 560 | 561 | name = module.label_task.id 562 | cluster_id = join("", aws_emr_cluster.default.*.id) 563 | 564 | instance_type = var.task_instance_group_instance_type 565 | instance_count = var.task_instance_group_instance_count 566 | 567 | ebs_config { 568 | size = var.task_instance_group_ebs_size 569 | type = var.task_instance_group_ebs_type 570 | iops = var.task_instance_group_ebs_iops 571 | volumes_per_instance = var.task_instance_group_ebs_volumes_per_instance 572 | } 573 | 574 | bid_price = var.task_instance_group_bid_price 575 | ebs_optimized = var.task_instance_group_ebs_optimized 576 | autoscaling_policy = var.task_instance_group_autoscaling_policy 577 | } 578 | 579 | module "dns_master" { 580 | source = "cloudposse/route53-cluster-hostname/aws" 581 | version = "0.12.2" 582 | 583 | enabled = local.enabled && var.zone_id != null && var.zone_id != "" ? true : false 584 | 585 | dns_name = var.master_dns_name != null && var.master_dns_name != "" ? var.master_dns_name : "emr-master-${module.this.name}" 586 | zone_id = var.zone_id 587 | records = coalescelist(aws_emr_cluster.default.*.master_public_dns, [""]) 588 | 589 | context = module.this.context 590 | } 591 | 592 | # https://www.terraform.io/docs/providers/aws/r/vpc_endpoint.html 593 | # https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-clusters-in-a-vpc.html 594 | resource "aws_vpc_endpoint" "vpc_endpoint_s3" { 595 | count = local.enabled && var.subnet_type == "private" && var.create_vpc_endpoint_s3 ? 1 : 0 596 | 597 | vpc_id = var.vpc_id 598 | service_name = format("com.amazonaws.%s.s3", var.region) 599 | auto_accept = true 600 | route_table_ids = [var.route_table_id] 601 | tags = module.this.tags 602 | } 603 | -------------------------------------------------------------------------------- /outputs.tf: -------------------------------------------------------------------------------- 1 | output "cluster_id" { 2 | value = join("", aws_emr_cluster.default.*.id) 3 | description = "EMR cluster ID" 4 | } 5 | 6 | output "cluster_name" { 7 | value = join("", aws_emr_cluster.default.*.name) 8 | description = "EMR cluster name" 9 | } 10 | 11 | output "master_public_dns" { 12 | value = join("", aws_emr_cluster.default.*.master_public_dns) 13 | description = "Master public DNS" 14 | } 15 | 16 | output "master_security_group_id" { 17 | value = join("", aws_security_group.master.*.id) 18 | description = "Master security group ID" 19 | } 20 | 21 | output "slave_security_group_id" { 22 | value = join("", aws_security_group.slave.*.id) 23 | description = "Slave security group ID" 24 | } 25 | 26 | output "master_host" { 27 | value = module.dns_master.hostname 28 | description = "Name of the cluster CNAME record for the master nodes in the parent DNS zone" 29 | } 30 | 31 | output "ec2_role" { 32 | value = var.ec2_role_enabled ? join("", aws_iam_role.ec2.*.name) : null 33 | description = "Role name of EMR EC2 instances so users can attach more policies" 34 | } 35 | -------------------------------------------------------------------------------- /test/.gitignore: -------------------------------------------------------------------------------- 1 | .test-harness 2 | -------------------------------------------------------------------------------- /test/Makefile: -------------------------------------------------------------------------------- 1 | TEST_HARNESS ?= https://github.com/cloudposse/test-harness.git 2 | TEST_HARNESS_BRANCH ?= master 3 | TEST_HARNESS_PATH = $(realpath .test-harness) 4 | BATS_ARGS ?= --tap 5 | BATS_LOG ?= test.log 6 | 7 | # Define a macro to run the tests 8 | define RUN_TESTS 9 | @echo "Running tests in $(1)" 10 | @cd $(1) && bats $(BATS_ARGS) $(addsuffix .bats,$(addprefix $(TEST_HARNESS_PATH)/test/terraform/,$(TESTS))) 11 | endef 12 | 13 | default: all 14 | 15 | -include Makefile.* 16 | 17 | ## Provision the test-harnesss 18 | .test-harness: 19 | [ -d $@ ] || git clone --depth=1 -b $(TEST_HARNESS_BRANCH) $(TEST_HARNESS) $@ 20 | 21 | ## Initialize the tests 22 | init: .test-harness 23 | 24 | ## Install all dependencies (OS specific) 25 | deps:: 26 | @exit 0 27 | 28 | ## Clean up the test harness 29 | clean: 30 | [ "$(TEST_HARNESS_PATH)" == "/" ] || rm -rf $(TEST_HARNESS_PATH) 31 | 32 | ## Run all tests 33 | all: module examples/complete 34 | 35 | ## Run basic sanity checks against the module itself 36 | module: export TESTS ?= installed lint get-modules module-pinning get-plugins provider-pinning validate terraform-docs input-descriptions output-descriptions 37 | module: deps 38 | $(call RUN_TESTS, ../) 39 | 40 | ## Run tests against example 41 | examples/complete: export TESTS ?= installed lint get-modules get-plugins validate 42 | examples/complete: deps 43 | $(call RUN_TESTS, ../$@) 44 | -------------------------------------------------------------------------------- /test/Makefile.alpine: -------------------------------------------------------------------------------- 1 | ifneq (,$(wildcard /sbin/apk)) 2 | ## Install all dependencies for alpine 3 | deps:: init 4 | @apk add --update terraform-docs@cloudposse json2hcl@cloudposse 5 | endif 6 | -------------------------------------------------------------------------------- /test/src/.gitignore: -------------------------------------------------------------------------------- 1 | .gopath 2 | vendor/ 3 | -------------------------------------------------------------------------------- /test/src/Makefile: -------------------------------------------------------------------------------- 1 | export TF_CLI_ARGS_init ?= -get-plugins=true 2 | export TERRAFORM_VERSION ?= $(shell curl -s https://checkpoint-api.hashicorp.com/v1/check/terraform | jq -r -M '.current_version' | cut -d. -f1-2) 3 | 4 | .DEFAULT_GOAL : all 5 | .PHONY: all 6 | 7 | ## Default target 8 | all: test 9 | 10 | .PHONY : init 11 | ## Initialize tests 12 | init: 13 | @exit 0 14 | 15 | .PHONY : test 16 | ## Run tests 17 | test: init 18 | go mod download 19 | go test -v -timeout 60m 20 | 21 | ## Run tests in docker container 22 | docker/test: 23 | docker run --name terratest --rm -it -e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY -e AWS_SESSION_TOKEN -e GITHUB_TOKEN \ 24 | -e PATH="/usr/local/terraform/$(TERRAFORM_VERSION)/bin:/go/bin:/usr/local/go/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" \ 25 | -v $(CURDIR)/../../:/module/ cloudposse/test-harness:latest -C /module/test/src test 26 | 27 | .PHONY : clean 28 | ## Clean up files 29 | clean: 30 | rm -rf ../../examples/complete/*.tfstate* 31 | -------------------------------------------------------------------------------- /test/src/examples_complete_test.go: -------------------------------------------------------------------------------- 1 | package test 2 | 3 | import ( 4 | "fmt" 5 | "github.com/gruntwork-io/terratest/modules/random" 6 | "github.com/gruntwork-io/terratest/modules/terraform" 7 | testStructure "github.com/gruntwork-io/terratest/modules/test-structure" 8 | "github.com/stretchr/testify/assert" 9 | "strings" 10 | "testing" 11 | ) 12 | 13 | // Test the Terraform module in examples/complete using Terratest. 14 | func TestExamplesComplete(t *testing.T) { 15 | t.Parallel() 16 | randID := strings.ToLower(random.UniqueId()) 17 | attributes := []string{randID} 18 | testNamePrefix := "eg-test-emr-" + randID 19 | 20 | rootFolder := "../../" 21 | terraformFolderRelativeToRoot := "examples/complete" 22 | varFiles := []string{"fixtures.us-east-2.tfvars"} 23 | 24 | tempTestFolder := testStructure.CopyTerraformFolderToTemp(t, rootFolder, terraformFolderRelativeToRoot) 25 | 26 | terraformOptions := &terraform.Options{ 27 | // The path to where our Terraform code is located 28 | TerraformDir: tempTestFolder, 29 | Upgrade: true, 30 | // Variables to pass to our Terraform code using -var-file options 31 | VarFiles: varFiles, 32 | Vars: map[string]interface{}{ 33 | "attributes": attributes, 34 | "enabled": true, 35 | }, 36 | } 37 | 38 | // At the end of the test, run `terraform destroy` to clean up any resources that were created 39 | defer cleanup(t, terraformOptions, tempTestFolder) 40 | 41 | // This will run `terraform init` and `terraform apply` and fail the test if there are any errors 42 | terraform.InitAndApply(t, terraformOptions) 43 | 44 | // Run `terraform output` to get the value of an output variable 45 | vpcCidr := terraform.Output(t, terraformOptions, "vpc_cidr") 46 | // Verify we're getting back the outputs we expect 47 | assert.Equal(t, "172.19.0.0/16", vpcCidr) 48 | 49 | // Run `terraform output` to get the value of an output variable 50 | privateSubnetCidrs := terraform.OutputList(t, terraformOptions, "private_subnet_cidrs") 51 | // Verify we're getting back the outputs we expect 52 | assert.Equal(t, []string{"172.19.0.0/19"}, privateSubnetCidrs) 53 | 54 | // Run `terraform output` to get the value of an output variable 55 | publicSubnetCidrs := terraform.OutputList(t, terraformOptions, "public_subnet_cidrs") 56 | // Verify we're getting back the outputs we expect 57 | assert.Equal(t, []string{"172.19.96.0/19"}, publicSubnetCidrs) 58 | 59 | // Run `terraform output` to get the value of an output variable 60 | s3LogStorageBucketId := terraform.Output(t, terraformOptions, "s3_log_storage_bucket_id") 61 | // Verify we're getting back the outputs we expect 62 | assert.Equal(t, fmt.Sprintf("%s-logs", testNamePrefix), s3LogStorageBucketId) 63 | 64 | // Run `terraform output` to get the value of an output variable 65 | awsKeyPairKeyName := terraform.Output(t, terraformOptions, "aws_key_pair_key_name") 66 | // Verify we're getting back the outputs we expect 67 | assert.Equal(t, fmt.Sprintf("%s-ssh-key", testNamePrefix), awsKeyPairKeyName) 68 | 69 | // Run `terraform output` to get the value of an output variable 70 | clusterName := terraform.Output(t, terraformOptions, "cluster_name") 71 | // Verify we're getting back the outputs we expect 72 | assert.Equal(t, testNamePrefix, clusterName) 73 | } 74 | 75 | func TestExamplesCompleteDisabled(t *testing.T) { 76 | t.Parallel() 77 | randID := strings.ToLower(random.UniqueId()) 78 | attributes := []string{randID} 79 | 80 | rootFolder := "../../" 81 | terraformFolderRelativeToRoot := "examples/complete" 82 | varFiles := []string{"fixtures.us-east-2.tfvars"} 83 | 84 | tempTestFolder := testStructure.CopyTerraformFolderToTemp(t, rootFolder, terraformFolderRelativeToRoot) 85 | 86 | terraformOptions := &terraform.Options{ 87 | // The path to where our Terraform code is located 88 | TerraformDir: tempTestFolder, 89 | Upgrade: true, 90 | // Variables to pass to our Terraform code using -var-file options 91 | VarFiles: varFiles, 92 | Vars: map[string]interface{}{ 93 | "attributes": attributes, 94 | "enabled": false, 95 | }, 96 | } 97 | 98 | // At the end of the test, run `terraform destroy` to clean up any resources that were created 99 | defer cleanup(t, terraformOptions, tempTestFolder) 100 | 101 | // This will run `terraform init` and `terraform apply` and fail the test if there are any errors 102 | results := terraform.InitAndApply(t, terraformOptions) 103 | 104 | // Should complete successfully without creating or changing any resources 105 | assert.Contains(t, results, "Resources: 0 added, 0 changed, 0 destroyed.") 106 | } 107 | -------------------------------------------------------------------------------- /test/src/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/cloudposse/terraform-aws-emr-cluster 2 | 3 | go 1.18 4 | 5 | require ( 6 | github.com/gruntwork-io/terratest v0.40.19 7 | github.com/stretchr/testify v1.8.0 8 | ) 9 | 10 | require ( 11 | cloud.google.com/go v0.83.0 // indirect 12 | cloud.google.com/go/storage v1.10.0 // indirect 13 | github.com/agext/levenshtein v1.2.3 // indirect 14 | github.com/apparentlymart/go-textseg/v13 v13.0.0 // indirect 15 | github.com/aws/aws-sdk-go v1.40.56 // indirect 16 | github.com/bgentry/go-netrc v0.0.0-20140422174119-9fd32a8b3d3d // indirect 17 | github.com/boombuler/barcode v1.0.1-0.20190219062509-6c824513bacc // indirect 18 | github.com/cpuguy83/go-md2man/v2 v2.0.0 // indirect 19 | github.com/davecgh/go-spew v1.1.1 // indirect 20 | github.com/docker/spdystream v0.0.0-20181023171402-6480d4af844c // indirect 21 | github.com/go-errors/errors v1.0.2-0.20180813162953-d98b870cc4e0 // indirect 22 | github.com/go-logr/logr v0.2.0 // indirect 23 | github.com/go-sql-driver/mysql v1.4.1 // indirect 24 | github.com/gogo/protobuf v1.3.2 // indirect 25 | github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect 26 | github.com/golang/protobuf v1.5.2 // indirect 27 | github.com/golang/snappy v0.0.3 // indirect 28 | github.com/google/gofuzz v1.1.0 // indirect 29 | github.com/google/uuid v1.2.0 // indirect 30 | github.com/googleapis/gax-go/v2 v2.0.5 // indirect 31 | github.com/googleapis/gnostic v0.4.1 // indirect 32 | github.com/gruntwork-io/go-commons v0.8.0 // indirect 33 | github.com/hashicorp/errwrap v1.0.0 // indirect 34 | github.com/hashicorp/go-cleanhttp v0.5.2 // indirect 35 | github.com/hashicorp/go-getter v1.6.1 // indirect 36 | github.com/hashicorp/go-multierror v1.1.0 // indirect 37 | github.com/hashicorp/go-safetemp v1.0.0 // indirect 38 | github.com/hashicorp/go-version v1.3.0 // indirect 39 | github.com/hashicorp/hcl/v2 v2.9.1 // indirect 40 | github.com/hashicorp/terraform-json v0.13.0 // indirect 41 | github.com/imdario/mergo v0.3.11 // indirect 42 | github.com/jinzhu/copier v0.0.0-20190924061706-b57f9002281a // indirect 43 | github.com/jmespath/go-jmespath v0.4.0 // indirect 44 | github.com/json-iterator/go v1.1.11 // indirect 45 | github.com/jstemmer/go-junit-report v0.9.1 // indirect 46 | github.com/klauspost/compress v1.13.0 // indirect 47 | github.com/mattn/go-zglob v0.0.2-0.20190814121620-e3c945676326 // indirect 48 | github.com/mitchellh/go-homedir v1.1.0 // indirect 49 | github.com/mitchellh/go-testing-interface v1.0.0 // indirect 50 | github.com/mitchellh/go-wordwrap v1.0.1 // indirect 51 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 52 | github.com/modern-go/reflect2 v1.0.1 // indirect 53 | github.com/pmezard/go-difflib v1.0.0 // indirect 54 | github.com/pquerna/otp v1.2.0 // indirect 55 | github.com/russross/blackfriday/v2 v2.1.0 // indirect 56 | github.com/spf13/pflag v1.0.5 // indirect 57 | github.com/tmccombs/hcl2json v0.3.3 // indirect 58 | github.com/ulikunitz/xz v0.5.8 // indirect 59 | github.com/urfave/cli v1.22.2 // indirect 60 | github.com/zclconf/go-cty v1.9.1 // indirect 61 | go.opencensus.io v0.23.0 // indirect 62 | golang.org/x/crypto v0.0.0-20210513164829-c07d793c2f9a // indirect 63 | golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect 64 | golang.org/x/mod v0.4.2 // indirect 65 | golang.org/x/net v0.0.0-20210614182718-04defd469f4e // indirect 66 | golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c // indirect 67 | golang.org/x/sys v0.0.0-20220517195934-5e4e11fc645e // indirect 68 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1 // indirect 69 | golang.org/x/text v0.3.6 // indirect 70 | golang.org/x/time v0.0.0-20200630173020-3af7569d3a1e // indirect 71 | golang.org/x/tools v0.1.2 // indirect 72 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect 73 | google.golang.org/api v0.47.0 // indirect 74 | google.golang.org/appengine v1.6.7 // indirect 75 | google.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c // indirect 76 | google.golang.org/grpc v1.38.0 // indirect 77 | google.golang.org/protobuf v1.26.0 // indirect 78 | gopkg.in/inf.v0 v0.9.1 // indirect 79 | gopkg.in/yaml.v2 v2.4.0 // indirect 80 | gopkg.in/yaml.v3 v3.0.1 // indirect 81 | k8s.io/api v0.20.6 // indirect 82 | k8s.io/apimachinery v0.20.6 // indirect 83 | k8s.io/client-go v0.20.6 // indirect 84 | k8s.io/klog/v2 v2.4.0 // indirect 85 | k8s.io/utils v0.0.0-20201110183641-67b214c5f920 // indirect 86 | sigs.k8s.io/structured-merge-diff/v4 v4.0.3 // indirect 87 | sigs.k8s.io/yaml v1.2.0 // indirect 88 | ) 89 | -------------------------------------------------------------------------------- /test/src/utils.go: -------------------------------------------------------------------------------- 1 | package test 2 | 3 | import ( 4 | "github.com/gruntwork-io/terratest/modules/terraform" 5 | "github.com/stretchr/testify/assert" 6 | "os" 7 | "testing" 8 | ) 9 | 10 | func cleanup(t *testing.T, terraformOptions *terraform.Options, tempTestFolder string) { 11 | terraform.Destroy(t, terraformOptions) 12 | err := os.RemoveAll(tempTestFolder) 13 | assert.NoError(t, err) 14 | } 15 | -------------------------------------------------------------------------------- /variables.tf: -------------------------------------------------------------------------------- 1 | variable "zone_id" { 2 | type = string 3 | description = "Route53 parent zone ID. If provided (not empty), the module will create sub-domain DNS records for the masters and slaves" 4 | default = null 5 | } 6 | 7 | variable "use_existing_managed_master_security_group" { 8 | type = bool 9 | description = "If set to `true`, will use variable `managed_master_security_group` using an existing security group that was created outside of this module" 10 | default = false 11 | } 12 | 13 | variable "use_existing_managed_slave_security_group" { 14 | type = bool 15 | description = "If set to `true`, will use variable `managed_slave_security_group` using an existing security group that was created outside of this module" 16 | default = false 17 | } 18 | 19 | variable "use_existing_additional_master_security_group" { 20 | type = bool 21 | description = "If set to `true`, will use variable `additional_master_security_group` using an existing security group that was created outside of this module" 22 | default = false 23 | } 24 | 25 | variable "use_existing_additional_slave_security_group" { 26 | type = bool 27 | description = "If set to `true`, will use variable `additional_slave_security_group` using an existing security group that was created outside of this module" 28 | default = false 29 | } 30 | 31 | variable "use_existing_service_access_security_group" { 32 | type = bool 33 | description = "If set to `true`, will use variable `service_access_security_group` using an existing security group that was created outside of this module" 34 | default = false 35 | } 36 | 37 | variable "managed_master_security_group" { 38 | type = string 39 | default = "" 40 | description = "The id of the existing managed security group that will be used for EMR master node. If empty, a new security group will be created" 41 | } 42 | 43 | variable "managed_slave_security_group" { 44 | type = string 45 | default = "" 46 | description = "The id of the existing managed security group that will be used for EMR core & task nodes. If empty, a new security group will be created" 47 | } 48 | 49 | variable "additional_master_security_group" { 50 | type = string 51 | default = "" 52 | description = "The id of the existing additional security group that will be used for EMR master node. If empty, a new security group will be created" 53 | } 54 | 55 | variable "additional_slave_security_group" { 56 | type = string 57 | default = "" 58 | description = "The id of the existing additional security group that will be used for EMR core & task nodes. If empty, a new security group will be created" 59 | } 60 | 61 | variable "service_access_security_group" { 62 | type = string 63 | default = "" 64 | description = "The id of the existing additional security group that will be used for EMR core & task nodes. If empty, a new security group will be created" 65 | } 66 | 67 | 68 | variable "master_allowed_security_groups" { 69 | type = list(string) 70 | default = [] 71 | description = "List of security group ids to be allowed to connect to the master instances" 72 | } 73 | 74 | variable "slave_allowed_security_groups" { 75 | type = list(string) 76 | default = [] 77 | description = "List of security group ids to be allowed to connect to the slave instances" 78 | } 79 | 80 | variable "master_allowed_cidr_blocks" { 81 | type = list(string) 82 | default = [] 83 | description = "List of CIDR blocks to be allowed to access the master instances" 84 | } 85 | 86 | variable "slave_allowed_cidr_blocks" { 87 | type = list(string) 88 | default = [] 89 | description = "List of CIDR blocks to be allowed to access the slave instances" 90 | } 91 | 92 | variable "vpc_id" { 93 | type = string 94 | description = "VPC ID to create the cluster in (e.g. `vpc-a22222ee`)" 95 | } 96 | 97 | variable "master_dns_name" { 98 | type = string 99 | description = "Name of the cluster CNAME record to create in the parent DNS zone specified by `zone_id`. If left empty, the name will be auto-asigned using the format `emr-master-var.name`" 100 | default = null 101 | } 102 | 103 | variable "termination_protection" { 104 | type = bool 105 | description = "Switch on/off termination protection (default is false, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set to false" 106 | default = false 107 | } 108 | 109 | variable "keep_job_flow_alive_when_no_steps" { 110 | type = bool 111 | description = "Switch on/off run cluster with no steps or when all steps are complete" 112 | default = true 113 | } 114 | 115 | variable "step_concurrency_level" { 116 | type = number 117 | description = "The number of steps that can be executed concurrently. You can specify a maximum of 256 steps. Only valid for EMR clusters with release_label 5.28.0 or greater." 118 | default = null 119 | } 120 | 121 | variable "ebs_root_volume_size" { 122 | type = number 123 | description = "Size in GiB of the EBS root device volume of the Linux AMI that is used for each EC2 instance. Available in Amazon EMR version 4.x and later" 124 | default = 10 125 | } 126 | 127 | variable "custom_ami_id" { 128 | type = string 129 | description = "A custom Amazon Linux AMI for the cluster (instead of an EMR-owned AMI). Available in Amazon EMR version 5.7.0 and later" 130 | default = null 131 | } 132 | 133 | variable "ec2_role_enabled" { 134 | type = bool 135 | description = "If set to `false`, will use `existing_ec2_instance_profile_arn` for an existing EC2 IAM role that was created outside of this module" 136 | default = true 137 | } 138 | 139 | variable "ec2_autoscaling_role_enabled" { 140 | type = bool 141 | description = "If set to `false`, will use `existing_ec2_autoscaling_role_arn` for an existing EC2 autoscaling IAM role that was created outside of this module" 142 | default = true 143 | } 144 | 145 | variable "service_role_enabled" { 146 | type = bool 147 | description = "If set to `false`, will use `existing_service_role_arn` for an existing IAM role that was created outside of this module" 148 | default = true 149 | } 150 | 151 | variable "enable_ssm_access" { 152 | type = bool 153 | description = "If set to `true`, attach the existing `AmazonSSMManagedInstanceCore` IAM policy to the EMR EC2 instance profile role" 154 | default = false 155 | } 156 | 157 | variable "existing_ec2_instance_profile_arn" { 158 | type = string 159 | description = "ARN of an existing EC2 instance profile" 160 | default = "" 161 | } 162 | 163 | variable "existing_ec2_autoscaling_role_arn" { 164 | type = string 165 | description = "ARN of an existing EC2 autoscaling role to attach to the cluster" 166 | default = "" 167 | } 168 | 169 | variable "existing_service_role_arn" { 170 | type = string 171 | description = "ARN of an existing EMR service role to attach to the cluster" 172 | default = "" 173 | } 174 | 175 | variable "visible_to_all_users" { 176 | type = bool 177 | description = "Whether the job flow is visible to all IAM users of the AWS account associated with the job flow" 178 | default = true 179 | } 180 | 181 | variable "release_label" { 182 | type = string 183 | description = "The release label for the Amazon EMR release. https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-release-5x.html" 184 | default = "emr-5.25.0" 185 | } 186 | 187 | variable "applications" { 188 | type = list(string) 189 | description = "A list of applications for the cluster. Valid values are: Flink, Ganglia, Hadoop, HBase, HCatalog, Hive, Hue, JupyterHub, Livy, Mahout, MXNet, Oozie, Phoenix, Pig, Presto, Spark, Sqoop, TensorFlow, Tez, Zeppelin, and ZooKeeper (as of EMR 5.25.0). Case insensitive" 190 | } 191 | 192 | # https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-configure-apps.html 193 | variable "configurations_json" { 194 | type = string 195 | description = "A JSON string for supplying list of configurations for the EMR cluster. See https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-configure-apps.html for more details" 196 | default = "" 197 | } 198 | 199 | variable "key_name" { 200 | type = string 201 | description = "Amazon EC2 key pair that can be used to ssh to the master node as the user called `hadoop`" 202 | default = null 203 | } 204 | 205 | variable "region" { 206 | type = string 207 | description = "AWS region" 208 | } 209 | 210 | variable "subnet_id" { 211 | type = string 212 | description = "VPC subnet ID where you want the job flow to launch. Cannot specify the `cc1.4xlarge` instance type for nodes of a job flow launched in a Amazon VPC" 213 | } 214 | 215 | variable "subnet_type" { 216 | type = string 217 | description = "Type of VPC subnet ID where you want the job flow to launch. Supported values are `private` or `public`" 218 | default = "private" 219 | } 220 | 221 | variable "route_table_id" { 222 | type = string 223 | description = "Route table ID for the VPC S3 Endpoint when launching the EMR cluster in a private subnet. Required when `subnet_type` is `private`" 224 | default = "" 225 | } 226 | 227 | variable "log_uri" { 228 | type = string 229 | description = "The path to the Amazon S3 location where logs for this cluster are stored" 230 | default = null 231 | } 232 | 233 | variable "core_instance_group_instance_type" { 234 | type = string 235 | description = "EC2 instance type for all instances in the Core instance group" 236 | } 237 | 238 | variable "core_instance_group_instance_count" { 239 | type = number 240 | description = "Target number of instances for the Core instance group. Must be at least 1" 241 | default = 1 242 | } 243 | 244 | variable "core_instance_group_ebs_size" { 245 | type = number 246 | description = "Core instances volume size, in gibibytes (GiB)" 247 | } 248 | 249 | variable "core_instance_group_ebs_type" { 250 | type = string 251 | description = "Core instances volume type. Valid options are `gp2`, `io1`, `standard` and `st1`" 252 | default = "gp2" 253 | } 254 | 255 | variable "core_instance_group_ebs_iops" { 256 | type = number 257 | description = "The number of I/O operations per second (IOPS) that the Core volume supports" 258 | default = null 259 | } 260 | 261 | variable "core_instance_group_ebs_volumes_per_instance" { 262 | type = number 263 | description = "The number of EBS volumes with this configuration to attach to each EC2 instance in the Core instance group" 264 | default = 1 265 | } 266 | 267 | variable "core_instance_group_bid_price" { 268 | type = string 269 | description = "Bid price for each EC2 instance in the Core instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances" 270 | default = null 271 | } 272 | 273 | variable "core_instance_group_autoscaling_policy" { 274 | type = string 275 | description = "String containing the EMR Auto Scaling Policy JSON for the Core instance group" 276 | default = null 277 | } 278 | 279 | variable "master_instance_group_instance_type" { 280 | type = string 281 | description = "EC2 instance type for all instances in the Master instance group" 282 | } 283 | 284 | variable "master_instance_group_instance_count" { 285 | type = number 286 | description = "Target number of instances for the Master instance group. Must be at least 1" 287 | default = 1 288 | } 289 | 290 | variable "master_instance_group_ebs_size" { 291 | type = number 292 | description = "Master instances volume size, in gibibytes (GiB)" 293 | } 294 | 295 | variable "master_instance_group_ebs_type" { 296 | type = string 297 | description = "Master instances volume type. Valid options are `gp2`, `io1`, `standard` and `st1`" 298 | default = "gp2" 299 | } 300 | 301 | variable "master_instance_group_ebs_iops" { 302 | type = number 303 | description = "The number of I/O operations per second (IOPS) that the Master volume supports" 304 | default = null 305 | } 306 | 307 | variable "master_instance_group_ebs_volumes_per_instance" { 308 | type = number 309 | description = "The number of EBS volumes with this configuration to attach to each EC2 instance in the Master instance group" 310 | default = 1 311 | } 312 | 313 | variable "master_instance_group_bid_price" { 314 | type = string 315 | description = "Bid price for each EC2 instance in the Master instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances" 316 | default = null 317 | } 318 | 319 | variable "scale_down_behavior" { 320 | type = string 321 | description = "The way that individual Amazon EC2 instances terminate when an automatic scale-in activity occurs or an instance group is resized" 322 | default = null 323 | } 324 | 325 | variable "additional_info" { 326 | type = string 327 | description = "A JSON string for selecting additional features such as adding proxy information. Note: Currently there is no API to retrieve the value of this argument after EMR cluster creation from provider, therefore Terraform cannot detect drift from the actual EMR cluster if its value is changed outside Terraform" 328 | default = null 329 | } 330 | 331 | variable "security_configuration" { 332 | type = string 333 | description = "The security configuration name to attach to the EMR cluster. Only valid for EMR clusters with `release_label` 4.8.0 or greater. See https://www.terraform.io/docs/providers/aws/r/emr_security_configuration.html for more info" 334 | default = null 335 | } 336 | 337 | variable "create_task_instance_group" { 338 | type = bool 339 | description = "Whether to create an instance group for Task nodes. For more info: https://www.terraform.io/docs/providers/aws/r/emr_instance_group.html, https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-master-core-task-nodes.html" 340 | default = false 341 | } 342 | 343 | variable "task_instance_group_instance_type" { 344 | type = string 345 | description = "EC2 instance type for all instances in the Task instance group" 346 | default = null 347 | } 348 | 349 | variable "task_instance_group_instance_count" { 350 | type = number 351 | description = "Target number of instances for the Task instance group. Must be at least 1" 352 | default = 1 353 | } 354 | 355 | variable "task_instance_group_ebs_size" { 356 | type = number 357 | description = "Task instances volume size, in gibibytes (GiB)" 358 | default = 10 359 | } 360 | 361 | variable "task_instance_group_ebs_optimized" { 362 | type = bool 363 | description = "Indicates whether an Amazon EBS volume in the Task instance group is EBS-optimized. Changing this forces a new resource to be created" 364 | default = false 365 | } 366 | 367 | variable "task_instance_group_ebs_type" { 368 | type = string 369 | description = "Task instances volume type. Valid options are `gp2`, `io1`, `standard` and `st1`" 370 | default = "gp2" 371 | } 372 | 373 | variable "task_instance_group_ebs_iops" { 374 | type = number 375 | description = "The number of I/O operations per second (IOPS) that the Task volume supports" 376 | default = null 377 | } 378 | 379 | variable "task_instance_group_ebs_volumes_per_instance" { 380 | type = number 381 | description = "The number of EBS volumes with this configuration to attach to each EC2 instance in the Task instance group" 382 | default = 1 383 | } 384 | 385 | variable "task_instance_group_bid_price" { 386 | type = string 387 | description = "Bid price for each EC2 instance in the Task instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances" 388 | default = null 389 | } 390 | 391 | variable "task_instance_group_autoscaling_policy" { 392 | type = string 393 | description = "String containing the EMR Auto Scaling Policy JSON for the Task instance group" 394 | default = null 395 | } 396 | 397 | variable "bootstrap_action" { 398 | type = list(object({ 399 | path = string 400 | name = string 401 | args = list(string) 402 | })) 403 | description = "List of bootstrap actions that will be run before Hadoop is started on the cluster nodes" 404 | default = [] 405 | } 406 | 407 | variable "create_vpc_endpoint_s3" { 408 | type = bool 409 | description = "Set to false to prevent the module from creating VPC S3 Endpoint" 410 | default = true 411 | } 412 | 413 | variable "kerberos_enabled" { 414 | type = bool 415 | description = "Set to true if EMR cluster will use kerberos_attributes" 416 | default = false 417 | } 418 | 419 | variable "kerberos_ad_domain_join_password" { 420 | type = string 421 | description = "The Active Directory password for ad_domain_join_user. Terraform cannot perform drift detection of this configuration." 422 | default = null 423 | } 424 | 425 | variable "kerberos_ad_domain_join_user" { 426 | type = string 427 | description = "Required only when establishing a cross-realm trust with an Active Directory domain. A user with sufficient privileges to join resources to the domain. Terraform cannot perform drift detection of this configuration." 428 | default = null 429 | } 430 | 431 | variable "kerberos_cross_realm_trust_principal_password" { 432 | type = string 433 | description = "Required only when establishing a cross-realm trust with a KDC in a different realm. The cross-realm principal password, which must be identical across realms. Terraform cannot perform drift detection of this configuration." 434 | default = null 435 | } 436 | 437 | variable "kerberos_kdc_admin_password" { 438 | type = string 439 | description = "The password used within the cluster for the kadmin service on the cluster-dedicated KDC, which maintains Kerberos principals, password policies, and keytabs for the cluster. Terraform cannot perform drift detection of this configuration." 440 | default = null 441 | } 442 | 443 | variable "kerberos_realm" { 444 | type = string 445 | description = "The name of the Kerberos realm to which all nodes in a cluster belong. For example, EC2.INTERNAL" 446 | default = "EC2.INTERNAL" 447 | } 448 | 449 | variable "steps" { 450 | type = list(object({ 451 | name = string 452 | action_on_failure = string 453 | hadoop_jar_step = object({ 454 | args = list(string) 455 | jar = string 456 | main_class = string 457 | properties = map(string) 458 | }) 459 | })) 460 | description = "List of steps to run when creating the cluster." 461 | default = [] 462 | } 463 | 464 | variable "emr_role_permissions_boundary" { 465 | type = string 466 | description = "The Permissions Boundary ARN to apply to the EMR Role." 467 | default = "" 468 | } 469 | 470 | variable "ec2_role_permissions_boundary" { 471 | type = string 472 | description = "The Permissions Boundary ARN to apply to the EC2 Role." 473 | default = "" 474 | } 475 | 476 | variable "ec2_autoscaling_role_permissions_boundary" { 477 | type = string 478 | description = "The Permissions Boundary ARN to apply to the EC2 Autoscaling Role." 479 | default = "" 480 | } 481 | 482 | variable "auto_termination_idle_timeout" { 483 | type = string 484 | description = "Auto termination policy idle timeout in seconds (60 - 604800 supported)" 485 | default = null 486 | } 487 | -------------------------------------------------------------------------------- /versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 0.14.0" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 3.5.0" 8 | } 9 | } 10 | } 11 | --------------------------------------------------------------------------------