├── .editorconfig
├── .github
├── .gitkeep
└── workflows
│ ├── lock.yml
│ ├── pr-title.yml
│ ├── pre-commit.yml
│ ├── release.yml
│ └── stale-actions.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .releaserc.json
├── CHANGELOG.md
├── LICENSE
├── README.md
├── examples
├── README.md
├── private-cluster
│ ├── README.md
│ ├── main.tf
│ ├── outputs.tf
│ ├── variables.tf
│ └── versions.tf
├── public-cluster
│ ├── README.md
│ ├── main.tf
│ ├── outputs.tf
│ ├── variables.tf
│ └── versions.tf
├── serverless-cluster
│ ├── README.md
│ ├── main.tf
│ ├── outputs.tf
│ ├── variables.tf
│ └── versions.tf
├── studio
│ ├── README.md
│ ├── main.tf
│ ├── outputs.tf
│ ├── variables.tf
│ └── versions.tf
└── virtual-cluster
│ ├── README.md
│ ├── main.tf
│ ├── outputs.tf
│ ├── variables.tf
│ └── versions.tf
├── main.tf
├── modules
├── serverless
│ ├── README.md
│ ├── main.tf
│ ├── outputs.tf
│ ├── variables.tf
│ └── versions.tf
├── studio
│ ├── README.md
│ ├── main.tf
│ ├── outputs.tf
│ ├── variables.tf
│ └── versions.tf
└── virtual-cluster
│ ├── README.md
│ ├── main.tf
│ ├── outputs.tf
│ ├── variables.tf
│ └── versions.tf
├── outputs.tf
├── variables.tf
└── versions.tf
/.editorconfig:
--------------------------------------------------------------------------------
1 | # EditorConfig is awesome: http://EditorConfig.org
2 | # Uses editorconfig to maintain consistent coding styles
3 |
4 | # top-most EditorConfig file
5 | root = true
6 |
7 | # Unix-style newlines with a newline ending every file
8 | [*]
9 | charset = utf-8
10 | end_of_line = lf
11 | indent_size = 2
12 | indent_style = space
13 | insert_final_newline = true
14 | max_line_length = 80
15 | trim_trailing_whitespace = true
16 |
17 | [*.{tf,tfvars}]
18 | indent_size = 2
19 | indent_style = space
20 |
21 | [*.md]
22 | max_line_length = 0
23 | trim_trailing_whitespace = false
24 |
25 | [Makefile]
26 | tab_width = 2
27 | indent_style = tab
28 |
29 | [COMMIT_EDITMSG]
30 | max_line_length = 0
31 |
--------------------------------------------------------------------------------
/.github/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terraform-aws-modules/terraform-aws-emr/0ddd4c67d45ef1ff3c1d80f5ec2975e900b95561/.github/.gitkeep
--------------------------------------------------------------------------------
/.github/workflows/lock.yml:
--------------------------------------------------------------------------------
1 | name: 'Lock Threads'
2 |
3 | on:
4 | schedule:
5 | - cron: '50 1 * * *'
6 |
7 | jobs:
8 | lock:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - uses: dessant/lock-threads@v5
12 | with:
13 | github-token: ${{ secrets.GITHUB_TOKEN }}
14 | issue-comment: >
15 | I'm going to lock this issue because it has been closed for _30 days_ ⏳. This helps our maintainers find and focus on the active issues.
16 | If you have found a problem that seems similar to this, please open a new issue and complete the issue template so we can capture all the details necessary to investigate further.
17 | issue-inactive-days: '30'
18 | pr-comment: >
19 | I'm going to lock this pull request because it has been closed for _30 days_ ⏳. This helps our maintainers find and focus on the active issues.
20 | If you have found a problem that seems related to this change, please open a new issue and complete the issue template so we can capture all the details necessary to investigate further.
21 | pr-inactive-days: '30'
22 |
--------------------------------------------------------------------------------
/.github/workflows/pr-title.yml:
--------------------------------------------------------------------------------
1 | name: 'Validate PR title'
2 |
3 | on:
4 | pull_request_target:
5 | types:
6 | - opened
7 | - edited
8 | - synchronize
9 |
10 | jobs:
11 | main:
12 | name: Validate PR title
13 | runs-on: ubuntu-latest
14 | steps:
15 | # Please look up the latest version from
16 | # https://github.com/amannn/action-semantic-pull-request/releases
17 | - uses: amannn/action-semantic-pull-request@v5.5.3
18 | env:
19 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
20 | with:
21 | # Configure which types are allowed.
22 | # Default: https://github.com/commitizen/conventional-commit-types
23 | types: |
24 | fix
25 | feat
26 | docs
27 | ci
28 | chore
29 | # Configure that a scope must always be provided.
30 | requireScope: false
31 | # Configure additional validation for the subject based on a regex.
32 | # This example ensures the subject starts with an uppercase character.
33 | subjectPattern: ^[A-Z].+$
34 | # If `subjectPattern` is configured, you can use this property to override
35 | # the default error message that is shown when the pattern doesn't match.
36 | # The variables `subject` and `title` can be used within the message.
37 | subjectPatternError: |
38 | The subject "{subject}" found in the pull request title "{title}"
39 | didn't match the configured pattern. Please ensure that the subject
40 | starts with an uppercase character.
41 | # For work-in-progress PRs you can typically use draft pull requests
42 | # from Github. However, private repositories on the free plan don't have
43 | # this option and therefore this action allows you to opt-in to using the
44 | # special "[WIP]" prefix to indicate this state. This will avoid the
45 | # validation of the PR title and the pull request checks remain pending.
46 | # Note that a second check will be reported if this is enabled.
47 | wip: true
48 | # When using "Squash and merge" on a PR with only one commit, GitHub
49 | # will suggest using that commit message instead of the PR title for the
50 | # merge commit, and it's easy to commit this by mistake. Enable this option
51 | # to also validate the commit message for one commit PRs.
52 | validateSingleCommit: false
53 |
--------------------------------------------------------------------------------
/.github/workflows/pre-commit.yml:
--------------------------------------------------------------------------------
1 | name: Pre-Commit
2 |
3 | on:
4 | pull_request:
5 | branches:
6 | - main
7 | - master
8 |
9 | env:
10 | TERRAFORM_DOCS_VERSION: v0.19.0
11 | TFLINT_VERSION: v0.53.0
12 |
13 | jobs:
14 | collectInputs:
15 | name: Collect workflow inputs
16 | runs-on: ubuntu-latest
17 | outputs:
18 | directories: ${{ steps.dirs.outputs.directories }}
19 | steps:
20 | - name: Checkout
21 | uses: actions/checkout@v4
22 |
23 | - name: Get root directories
24 | id: dirs
25 | uses: clowdhaus/terraform-composite-actions/directories@v1.9.0
26 |
27 | preCommitMinVersions:
28 | name: Min TF pre-commit
29 | needs: collectInputs
30 | runs-on: ubuntu-latest
31 | strategy:
32 | matrix:
33 | directory: ${{ fromJson(needs.collectInputs.outputs.directories) }}
34 | steps:
35 | # https://github.com/orgs/community/discussions/25678#discussioncomment-5242449
36 | - name: Delete huge unnecessary tools folder
37 | run: |
38 | rm -rf /opt/hostedtoolcache/CodeQL
39 | rm -rf /opt/hostedtoolcache/Java_Temurin-Hotspot_jdk
40 | rm -rf /opt/hostedtoolcache/Ruby
41 | rm -rf /opt/hostedtoolcache/go
42 |
43 | - name: Checkout
44 | uses: actions/checkout@v4
45 |
46 | - name: Terraform min/max versions
47 | id: minMax
48 | uses: clowdhaus/terraform-min-max@v1.3.1
49 | with:
50 | directory: ${{ matrix.directory }}
51 |
52 | - name: Pre-commit Terraform ${{ steps.minMax.outputs.minVersion }}
53 | # Run only validate pre-commit check on min version supported
54 | if: ${{ matrix.directory != '.' }}
55 | uses: clowdhaus/terraform-composite-actions/pre-commit@v1.11.1
56 | with:
57 | terraform-version: ${{ steps.minMax.outputs.minVersion }}
58 | tflint-version: ${{ env.TFLINT_VERSION }}
59 | args: 'terraform_validate --color=always --show-diff-on-failure --files ${{ matrix.directory }}/*'
60 |
61 | - name: Pre-commit Terraform ${{ steps.minMax.outputs.minVersion }}
62 | # Run only validate pre-commit check on min version supported
63 | if: ${{ matrix.directory == '.' }}
64 | uses: clowdhaus/terraform-composite-actions/pre-commit@v1.11.1
65 | with:
66 | terraform-version: ${{ steps.minMax.outputs.minVersion }}
67 | tflint-version: ${{ env.TFLINT_VERSION }}
68 | args: 'terraform_validate --color=always --show-diff-on-failure --files $(ls *.tf)'
69 |
70 | preCommitMaxVersion:
71 | name: Max TF pre-commit
72 | runs-on: ubuntu-latest
73 | needs: collectInputs
74 | steps:
75 | # https://github.com/orgs/community/discussions/25678#discussioncomment-5242449
76 | - name: Delete huge unnecessary tools folder
77 | run: |
78 | rm -rf /opt/hostedtoolcache/CodeQL
79 | rm -rf /opt/hostedtoolcache/Java_Temurin-Hotspot_jdk
80 | rm -rf /opt/hostedtoolcache/Ruby
81 | rm -rf /opt/hostedtoolcache/go
82 |
83 | - name: Checkout
84 | uses: actions/checkout@v4
85 | with:
86 | ref: ${{ github.event.pull_request.head.ref }}
87 | repository: ${{github.event.pull_request.head.repo.full_name}}
88 |
89 | - name: Terraform min/max versions
90 | id: minMax
91 | uses: clowdhaus/terraform-min-max@v1.3.1
92 |
93 | - name: Pre-commit Terraform ${{ steps.minMax.outputs.maxVersion }}
94 | uses: clowdhaus/terraform-composite-actions/pre-commit@v1.11.1
95 | with:
96 | terraform-version: ${{ steps.minMax.outputs.maxVersion }}
97 | tflint-version: ${{ env.TFLINT_VERSION }}
98 | terraform-docs-version: ${{ env.TERRAFORM_DOCS_VERSION }}
99 | install-hcledit: true
100 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Release
2 |
3 | on:
4 | workflow_dispatch:
5 | push:
6 | branches:
7 | - main
8 | - master
9 | paths:
10 | - '**/*.tpl'
11 | - '**/*.py'
12 | - '**/*.tf'
13 | - '.github/workflows/release.yml'
14 |
15 | jobs:
16 | release:
17 | name: Release
18 | runs-on: ubuntu-latest
19 | # Skip running release workflow on forks
20 | if: github.repository_owner == 'terraform-aws-modules'
21 | steps:
22 | - name: Checkout
23 | uses: actions/checkout@v4
24 | with:
25 | persist-credentials: false
26 | fetch-depth: 0
27 |
28 | - name: Release
29 | uses: cycjimmy/semantic-release-action@v4
30 | with:
31 | semantic_version: 23.0.2
32 | extra_plugins: |
33 | @semantic-release/changelog@6.0.3
34 | @semantic-release/git@10.0.1
35 | conventional-changelog-conventionalcommits@7.0.2
36 | env:
37 | GITHUB_TOKEN: ${{ secrets.SEMANTIC_RELEASE_TOKEN }}
38 |
--------------------------------------------------------------------------------
/.github/workflows/stale-actions.yaml:
--------------------------------------------------------------------------------
1 | name: 'Mark or close stale issues and PRs'
2 | on:
3 | schedule:
4 | - cron: '0 0 * * *'
5 |
6 | jobs:
7 | stale:
8 | runs-on: ubuntu-latest
9 | steps:
10 | - uses: actions/stale@v9
11 | with:
12 | repo-token: ${{ secrets.GITHUB_TOKEN }}
13 | # Staling issues and PR's
14 | days-before-stale: 30
15 | stale-issue-label: stale
16 | stale-pr-label: stale
17 | stale-issue-message: |
18 | This issue has been automatically marked as stale because it has been open 30 days
19 | with no activity. Remove stale label or comment or this issue will be closed in 10 days
20 | stale-pr-message: |
21 | This PR has been automatically marked as stale because it has been open 30 days
22 | with no activity. Remove stale label or comment or this PR will be closed in 10 days
23 | # Not stale if have this labels or part of milestone
24 | exempt-issue-labels: bug,wip,on-hold
25 | exempt-pr-labels: bug,wip,on-hold
26 | exempt-all-milestones: true
27 | # Close issue operations
28 | # Label will be automatically removed if the issues are no longer closed nor locked.
29 | days-before-close: 10
30 | delete-branch: true
31 | close-issue-message: This issue was automatically closed because of stale in 10 days
32 | close-pr-message: This PR was automatically closed because of stale in 10 days
33 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Local .terraform directories
2 | **/.terraform/*
3 |
4 | # .tfstate files
5 | *.tfstate
6 | *.tfstate.*
7 |
8 | # terraform lockfile
9 | .terraform.lock.hcl
10 |
11 | # Crash log files
12 | crash.log
13 |
14 | # Exclude all .tfvars files, which are likely to contain sentitive data, such as
15 | # password, private keys, and other secrets. These should not be part of version
16 | # control as they are data points which are potentially sensitive and subject
17 | # to change depending on the environment.
18 | #
19 | *.tfvars
20 |
21 | # Ignore override files as they are usually used to override resources locally and so
22 | # are not checked in
23 | override.tf
24 | override.tf.json
25 | *_override.tf
26 | *_override.tf.json
27 |
28 | # Include override files you do wish to add to version control using negated pattern
29 | #
30 | # !example_override.tf
31 |
32 | # Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan
33 | # example: *tfplan*
34 |
35 | # Ignore CLI configuration files
36 | .terraformrc
37 | terraform.rc
38 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/antonbabenko/pre-commit-terraform
3 | rev: v1.99.1
4 | hooks:
5 | - id: terraform_fmt
6 | - id: terraform_docs
7 | args:
8 | - '--args=--lockfile=false'
9 | - id: terraform_tflint
10 | args:
11 | - '--args=--only=terraform_deprecated_interpolation'
12 | - '--args=--only=terraform_deprecated_index'
13 | - '--args=--only=terraform_unused_declarations'
14 | - '--args=--only=terraform_comment_syntax'
15 | - '--args=--only=terraform_documented_outputs'
16 | - '--args=--only=terraform_documented_variables'
17 | - '--args=--only=terraform_typed_variables'
18 | - '--args=--only=terraform_module_pinned_source'
19 | - '--args=--only=terraform_naming_convention'
20 | - '--args=--only=terraform_required_version'
21 | - '--args=--only=terraform_required_providers'
22 | - '--args=--only=terraform_standard_module_structure'
23 | - '--args=--only=terraform_workspace_remote'
24 | - id: terraform_validate
25 | - repo: https://github.com/pre-commit/pre-commit-hooks
26 | rev: v5.0.0
27 | hooks:
28 | - id: check-merge-conflict
29 | - id: end-of-file-fixer
30 | - id: trailing-whitespace
31 |
--------------------------------------------------------------------------------
/.releaserc.json:
--------------------------------------------------------------------------------
1 | {
2 | "branches": [
3 | "main",
4 | "master"
5 | ],
6 | "ci": false,
7 | "plugins": [
8 | [
9 | "@semantic-release/commit-analyzer",
10 | {
11 | "preset": "conventionalcommits"
12 | }
13 | ],
14 | [
15 | "@semantic-release/release-notes-generator",
16 | {
17 | "preset": "conventionalcommits"
18 | }
19 | ],
20 | [
21 | "@semantic-release/github",
22 | {
23 | "successComment": "This ${issue.pull_request ? 'PR is included' : 'issue has been resolved'} in version ${nextRelease.version} :tada:",
24 | "labels": false,
25 | "releasedLabels": false
26 | }
27 | ],
28 | [
29 | "@semantic-release/changelog",
30 | {
31 | "changelogFile": "CHANGELOG.md",
32 | "changelogTitle": "# Changelog\n\nAll notable changes to this project will be documented in this file."
33 | }
34 | ],
35 | [
36 | "@semantic-release/git",
37 | {
38 | "assets": [
39 | "CHANGELOG.md"
40 | ],
41 | "message": "chore(release): version ${nextRelease.version} [skip ci]\n\n${nextRelease.notes}"
42 | }
43 | ]
44 | ]
45 | }
46 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | All notable changes to this project will be documented in this file.
4 |
5 | ## [2.4.2](https://github.com/terraform-aws-modules/terraform-aws-emr/compare/v2.4.1...v2.4.2) (2025-05-30)
6 |
7 |
8 | ### Bug Fixes
9 |
10 | * Align EMR EKS Job Execution role with AWS docs ([#38](https://github.com/terraform-aws-modules/terraform-aws-emr/issues/38)) ([0c7fec0](https://github.com/terraform-aws-modules/terraform-aws-emr/commit/0c7fec0f78534e20c64fb14120d49af8efc335bb))
11 |
12 | ## [2.4.1](https://github.com/terraform-aws-modules/terraform-aws-emr/compare/v2.4.0...v2.4.1) (2025-03-30)
13 |
14 |
15 | ### Bug Fixes
16 |
17 | * Add dependency on service security group rules ([#37](https://github.com/terraform-aws-modules/terraform-aws-emr/issues/37)) ([150d89c](https://github.com/terraform-aws-modules/terraform-aws-emr/commit/150d89c2b471376190e59adac10b99b0cdfa212d))
18 |
19 | ## [2.4.0](https://github.com/terraform-aws-modules/terraform-aws-emr/compare/v2.3.0...v2.4.0) (2025-01-15)
20 |
21 |
22 | ### Features
23 |
24 | * Support studio `encryption_key_arn` ([#35](https://github.com/terraform-aws-modules/terraform-aws-emr/issues/35)) ([8122444](https://github.com/terraform-aws-modules/terraform-aws-emr/commit/81224444712633533d40dc951e888357a46ffe57))
25 |
26 |
27 | ### Bug Fixes
28 |
29 | * Update CI workflow versions to latest ([#31](https://github.com/terraform-aws-modules/terraform-aws-emr/issues/31)) ([ad34d3d](https://github.com/terraform-aws-modules/terraform-aws-emr/commit/ad34d3d55581d51dff978d936d8ebc261f39e646))
30 |
31 | ## [2.3.0](https://github.com/terraform-aws-modules/terraform-aws-emr/compare/v2.2.0...v2.3.0) (2024-09-21)
32 |
33 |
34 | ### Features
35 |
36 | * Allow passing in custom instance profile role ([#30](https://github.com/terraform-aws-modules/terraform-aws-emr/issues/30)) ([0712293](https://github.com/terraform-aws-modules/terraform-aws-emr/commit/0712293bce835f099d5bc43e45320bc23eb5eacd))
37 |
38 | ## [2.2.0](https://github.com/terraform-aws-modules/terraform-aws-emr/compare/v2.1.0...v2.2.0) (2024-08-03)
39 |
40 |
41 | ### Features
42 |
43 | * Support interactive configuration block for EMR serverless application ([#27](https://github.com/terraform-aws-modules/terraform-aws-emr/issues/27)) ([2e7045e](https://github.com/terraform-aws-modules/terraform-aws-emr/commit/2e7045e99ee36bb93be4036388f01bbf4fcdbcdd))
44 |
45 | ## [2.1.0](https://github.com/terraform-aws-modules/terraform-aws-emr/compare/v2.0.0...v2.1.0) (2024-05-04)
46 |
47 |
48 | ### Features
49 |
50 | * Reset default value of s3_bucket_arns to empty ([#23](https://github.com/terraform-aws-modules/terraform-aws-emr/issues/23)) ([d8d79df](https://github.com/terraform-aws-modules/terraform-aws-emr/commit/d8d79df4dfe1c590c369ebb939a9e262de6cd42a))
51 |
52 | ## [2.0.0](https://github.com/terraform-aws-modules/terraform-aws-emr/compare/v1.2.2...v2.0.0) (2024-04-07)
53 |
54 |
55 | ### ⚠ BREAKING CHANGES
56 |
57 | * Add support for placement group config and unhealthy node replacement; raise AWS provider MSV to v5.44 (#21)
58 |
59 | ### Features
60 |
61 | * Add support for placement group config and unhealthy node replacement; raise AWS provider MSV to v5.44 ([#21](https://github.com/terraform-aws-modules/terraform-aws-emr/issues/21)) ([eff2018](https://github.com/terraform-aws-modules/terraform-aws-emr/commit/eff2018e7aeffdd260c21b9251275fa8342c34de))
62 |
63 | ## [1.2.2](https://github.com/terraform-aws-modules/terraform-aws-emr/compare/v1.2.1...v1.2.2) (2024-03-07)
64 |
65 |
66 | ### Bug Fixes
67 |
68 | * Update CI workflow versions to remove deprecated runtime warnings ([#18](https://github.com/terraform-aws-modules/terraform-aws-emr/issues/18)) ([faf4d0b](https://github.com/terraform-aws-modules/terraform-aws-emr/commit/faf4d0bfc218bc70d2124bed5e52780bb0856c2d))
69 |
70 | ### [1.2.1](https://github.com/terraform-aws-modules/terraform-aws-emr/compare/v1.2.0...v1.2.1) (2024-02-08)
71 |
72 |
73 | ### Bug Fixes
74 |
75 | * Add `"deletecollection"` verb to `"persistentvolumeclaims"` Kubernetes RBAC permission ([#17](https://github.com/terraform-aws-modules/terraform-aws-emr/issues/17)) ([668f09b](https://github.com/terraform-aws-modules/terraform-aws-emr/commit/668f09bcb2eb3dbac1be59648f00a4a7acbf832f))
76 |
77 | ## [1.2.0](https://github.com/terraform-aws-modules/terraform-aws-emr/compare/v1.1.3...v1.2.0) (2023-07-21)
78 |
79 |
80 | ### Features
81 |
82 | * Allowing Custom CloudWatch Log Group Name or Prefix ([#13](https://github.com/terraform-aws-modules/terraform-aws-emr/issues/13)) ([1be0b5e](https://github.com/terraform-aws-modules/terraform-aws-emr/commit/1be0b5e325f6ac458773c7eddc469397b57795a5))
83 |
84 | ### [1.1.3](https://github.com/terraform-aws-modules/terraform-aws-emr/compare/v1.1.2...v1.1.3) (2023-07-18)
85 |
86 |
87 | ### Bug Fixes
88 |
89 | * Updating Kubernetes Role for EMR Virtual Cluster ([#12](https://github.com/terraform-aws-modules/terraform-aws-emr/issues/12)) ([05bc754](https://github.com/terraform-aws-modules/terraform-aws-emr/commit/05bc754beddd0156271f05ccfd8702b9a6ba07b2))
90 |
91 | ### [1.1.2](https://github.com/terraform-aws-modules/terraform-aws-emr/compare/v1.1.1...v1.1.2) (2023-06-12)
92 |
93 |
94 | ### Bug Fixes
95 |
96 | * Remove wrapping list brackets from S3 bucket ARNs variable ([#9](https://github.com/terraform-aws-modules/terraform-aws-emr/issues/9)) ([2317c56](https://github.com/terraform-aws-modules/terraform-aws-emr/commit/2317c56f9b6715224af6eba4e7fe54ec0f0d4217))
97 |
98 | ### [1.1.1](https://github.com/terraform-aws-modules/terraform-aws-emr/compare/v1.1.0...v1.1.1) (2023-06-10)
99 |
100 |
101 | ### Bug Fixes
102 |
103 | * Correct S3 bucket access permission to try user provided S3 bucket ARNs first before falling back to default ([#8](https://github.com/terraform-aws-modules/terraform-aws-emr/issues/8)) ([ae366ed](https://github.com/terraform-aws-modules/terraform-aws-emr/commit/ae366ed81939a06a00c843edbf01097edee2353a))
104 |
105 | ## [1.1.0](https://github.com/terraform-aws-modules/terraform-aws-emr/compare/v1.0.0...v1.1.0) (2023-05-18)
106 |
107 |
108 | ### Features
109 |
110 | * Add support for image_configuration block in serverless module ([#2](https://github.com/terraform-aws-modules/terraform-aws-emr/issues/2)) ([4d29ee5](https://github.com/terraform-aws-modules/terraform-aws-emr/commit/4d29ee518322bffe48a3bc6fb096b3fe929b4eb0))
111 |
112 |
113 | ### Bug Fixes
114 |
115 | * Correct auto-release configuration file ([#6](https://github.com/terraform-aws-modules/terraform-aws-emr/issues/6)) ([74847b1](https://github.com/terraform-aws-modules/terraform-aws-emr/commit/74847b1dce3058d43e0a50affcf03fefee06a236))
116 | * Update EMR studio service role policy to RequestTags on Create* ([#5](https://github.com/terraform-aws-modules/terraform-aws-emr/issues/5)) ([274efc3](https://github.com/terraform-aws-modules/terraform-aws-emr/commit/274efc33cb7b251778019a66e9eed62b58722c8b))
117 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | # Examples
2 |
3 | Please note - the examples provided serve two primary means:
4 |
5 | 1. Show users working examples of the various ways in which the module can be configured and features supported
6 | 2. A means of testing/validating module changes
7 |
8 | Please do not mistake the examples provided as "best practices". It is up to users to consult the AWS service documentation for best practices, usage recommendations, etc.
9 |
--------------------------------------------------------------------------------
/examples/private-cluster/main.tf:
--------------------------------------------------------------------------------
1 | provider "aws" {
2 | region = local.region
3 | }
4 |
5 | data "aws_availability_zones" "available" {}
6 |
7 | data "aws_partition" "current" {}
8 |
9 | data "aws_caller_identity" "current" {}
10 |
11 | data "aws_region" "current" {}
12 |
13 | locals {
14 | name = replace(basename(path.cwd), "-cluster", "")
15 | region = "eu-west-1"
16 |
17 | vpc_cidr = "10.0.0.0/16"
18 | azs = slice(data.aws_availability_zones.available.names, 0, 3)
19 |
20 | tags = {
21 | Example = local.name
22 | GithubRepo = "terraform-aws-emr"
23 | GithubOrg = "terraform-aws-modules"
24 | }
25 | }
26 |
27 | ################################################################################
28 | # EMR Module
29 | ################################################################################
30 |
31 | module "emr_instance_fleet" {
32 | source = "../.."
33 |
34 | # create = false
35 | name = "${local.name}-instance-fleet"
36 |
37 | release_label_filters = {
38 | emr6 = {
39 | prefix = "emr-6"
40 | }
41 | }
42 | applications = ["spark", "trino"]
43 | auto_termination_policy = {
44 | idle_timeout = 3600
45 | }
46 |
47 | bootstrap_action = {
48 | example = {
49 | path = "file:/bin/echo",
50 | name = "Just an example",
51 | args = ["Hello World!"]
52 | }
53 | }
54 |
55 | configurations_json = jsonencode([
56 | {
57 | "Classification" : "spark-env",
58 | "Configurations" : [
59 | {
60 | "Classification" : "export",
61 | "Properties" : {
62 | "JAVA_HOME" : "/usr/lib/jvm/java-1.8.0"
63 | }
64 | }
65 | ],
66 | "Properties" : {}
67 | }
68 | ])
69 |
70 | master_instance_fleet = {
71 | name = "master-fleet"
72 | target_on_demand_capacity = 1
73 | instance_type_configs = [
74 | {
75 | instance_type = "m5.xlarge"
76 | }
77 | ]
78 | }
79 |
80 | core_instance_fleet = {
81 | name = "core-fleet"
82 | target_on_demand_capacity = 2
83 | target_spot_capacity = 2
84 | instance_type_configs = [
85 | {
86 | instance_type = "c4.large"
87 | weighted_capacity = 1
88 | },
89 | {
90 | bid_price_as_percentage_of_on_demand_price = 100
91 | ebs_config = [{
92 | size = 256
93 | type = "gp3"
94 | volumes_per_instance = 1
95 | }]
96 | instance_type = "c5.xlarge"
97 | weighted_capacity = 2
98 | },
99 | {
100 | bid_price_as_percentage_of_on_demand_price = 100
101 | instance_type = "c6i.xlarge"
102 | weighted_capacity = 2
103 | }
104 | ]
105 | launch_specifications = {
106 | spot_specification = {
107 | allocation_strategy = "capacity-optimized"
108 | block_duration_minutes = 0
109 | timeout_action = "SWITCH_TO_ON_DEMAND"
110 | timeout_duration_minutes = 5
111 | }
112 | }
113 | }
114 |
115 | task_instance_fleet = {
116 | name = "task-fleet"
117 | target_on_demand_capacity = 1
118 | target_spot_capacity = 2
119 | instance_type_configs = [
120 | {
121 | instance_type = "c4.large"
122 | weighted_capacity = 1
123 | },
124 | {
125 | bid_price_as_percentage_of_on_demand_price = 100
126 | ebs_config = [{
127 | size = 256
128 | type = "gp3"
129 | volumes_per_instance = 1
130 | }]
131 | instance_type = "c5.xlarge"
132 | weighted_capacity = 2
133 | }
134 | ]
135 | launch_specifications = {
136 | spot_specification = {
137 | allocation_strategy = "capacity-optimized"
138 | block_duration_minutes = 0
139 | timeout_action = "SWITCH_TO_ON_DEMAND"
140 | timeout_duration_minutes = 5
141 | }
142 | }
143 | }
144 |
145 | ebs_root_volume_size = 64
146 | ec2_attributes = {
147 | subnet_ids = module.vpc.private_subnets
148 | }
149 | vpc_id = module.vpc.vpc_id
150 |
151 | keep_job_flow_alive_when_no_steps = true
152 | list_steps_states = ["PENDING", "RUNNING", "CANCEL_PENDING", "CANCELLED", "FAILED", "INTERRUPTED", "COMPLETED"]
153 | log_uri = "s3://${module.s3_bucket.s3_bucket_id}/"
154 |
155 | scale_down_behavior = "TERMINATE_AT_TASK_COMPLETION"
156 | step_concurrency_level = 3
157 | termination_protection = false
158 | unhealthy_node_replacement = true
159 | visible_to_all_users = true
160 |
161 | tags = local.tags
162 | }
163 |
164 | module "emr_instance_group" {
165 | source = "../.."
166 |
167 | name = "${local.name}-instance-group"
168 | create_iam_instance_profile = false
169 | create_autoscaling_iam_role = false
170 |
171 | release_label_filters = {
172 | emr6 = {
173 | prefix = "emr-6"
174 | }
175 | }
176 | applications = ["spark", "trino"]
177 | auto_termination_policy = {
178 | idle_timeout = 3600
179 | }
180 |
181 | bootstrap_action = {
182 | example = {
183 | name = "Just an example",
184 | path = "file:/bin/echo",
185 | args = ["Hello World!"]
186 | }
187 | }
188 |
189 | configurations_json = jsonencode([
190 | {
191 | "Classification" : "spark-env",
192 | "Configurations" : [
193 | {
194 | "Classification" : "export",
195 | "Properties" : {
196 | "JAVA_HOME" : "/usr/lib/jvm/java-1.8.0"
197 | }
198 | }
199 | ],
200 | "Properties" : {}
201 | }
202 | ])
203 |
204 | # Example placement group config for multiple primary node clusters
205 | # placement_group_config = [
206 | # {
207 | # instance_role = "MASTER"
208 | # placement_strategy = "SPREAD"
209 | # }
210 | # ]
211 |
212 | master_instance_group = {
213 | name = "master-group"
214 | instance_count = 1
215 | instance_type = "m5.xlarge"
216 | }
217 |
218 | core_instance_group = {
219 | name = "core-group"
220 | instance_count = 2
221 | instance_type = "c4.large"
222 | }
223 |
224 | task_instance_group = {
225 | name = "task-group"
226 | instance_count = 2
227 | instance_type = "c5.xlarge"
228 | bid_price = "0.1"
229 |
230 | ebs_config = [{
231 | size = 256
232 | type = "gp3"
233 | volumes_per_instance = 1
234 | }]
235 | ebs_optimized = true
236 | }
237 |
238 | ebs_root_volume_size = 64
239 | ec2_attributes = {
240 | # Instance groups only support one Subnet/AZ
241 | subnet_id = element(module.vpc.private_subnets, 0)
242 | instance_profile = aws_iam_instance_profile.custom_instance_profile.arn
243 | }
244 | iam_instance_profile_role_arn = aws_iam_role.custom_instance_profile.arn
245 | autoscaling_iam_role_arn = aws_iam_role.autoscaling.arn
246 |
247 | vpc_id = module.vpc.vpc_id
248 |
249 | keep_job_flow_alive_when_no_steps = true
250 | list_steps_states = ["PENDING", "RUNNING", "CANCEL_PENDING", "CANCELLED", "FAILED", "INTERRUPTED", "COMPLETED"]
251 | log_uri = "s3://${module.s3_bucket.s3_bucket_id}/"
252 |
253 | scale_down_behavior = "TERMINATE_AT_TASK_COMPLETION"
254 | step_concurrency_level = 3
255 | termination_protection = false
256 | visible_to_all_users = true
257 |
258 | tags = local.tags
259 | }
260 |
261 | module "emr_disabled" {
262 | source = "../.."
263 |
264 | create = false
265 | }
266 |
267 | ################################################################################
268 | # Supporting Resources
269 | ################################################################################
270 |
271 | module "vpc" {
272 | source = "terraform-aws-modules/vpc/aws"
273 | version = "~> 5.0"
274 |
275 | name = local.name
276 | cidr = local.vpc_cidr
277 |
278 | azs = local.azs
279 | public_subnets = [for k, v in local.azs : cidrsubnet(local.vpc_cidr, 8, k)]
280 | private_subnets = [for k, v in local.azs : cidrsubnet(local.vpc_cidr, 8, k + 10)]
281 |
282 | enable_nat_gateway = true
283 | single_nat_gateway = true
284 |
285 | # https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-managed-iam-policies.html#manually-tagged-resources
286 | # Tag if you want EMR to create the security groups for you
287 | # vpc_tags = { "for-use-with-amazon-emr-managed-policies" = true }
288 | # Tag if you are using public subnets
289 | # public_subnet_tags = { "for-use-with-amazon-emr-managed-policies" = true }
290 | private_subnet_tags = { "for-use-with-amazon-emr-managed-policies" = true }
291 |
292 | tags = local.tags
293 | }
294 |
295 | module "vpc_endpoints" {
296 | source = "terraform-aws-modules/vpc/aws//modules/vpc-endpoints"
297 | version = "~> 5.0"
298 |
299 | vpc_id = module.vpc.vpc_id
300 | security_group_ids = [module.vpc_endpoints_sg.security_group_id]
301 |
302 | endpoints = merge({
303 | s3 = {
304 | service = "s3"
305 | service_type = "Gateway"
306 | route_table_ids = module.vpc.private_route_table_ids
307 | tags = {
308 | Name = "${local.name}-s3"
309 | }
310 | }
311 | },
312 | { for service in toset(["elasticmapreduce", "sts"]) :
313 | replace(service, ".", "_") =>
314 | {
315 | service = service
316 | subnet_ids = module.vpc.private_subnets
317 | private_dns_enabled = true
318 | tags = { Name = "${local.name}-${service}" }
319 | }
320 | })
321 |
322 | tags = local.tags
323 | }
324 |
325 | module "vpc_endpoints_sg" {
326 | source = "terraform-aws-modules/security-group/aws"
327 | version = "~> 5.0"
328 |
329 | name = "${local.name}-vpc-endpoints"
330 | description = "Security group for VPC endpoint access"
331 | vpc_id = module.vpc.vpc_id
332 |
333 | ingress_with_cidr_blocks = [
334 | {
335 | rule = "https-443-tcp"
336 | description = "VPC CIDR HTTPS"
337 | cidr_blocks = join(",", module.vpc.private_subnets_cidr_blocks)
338 | },
339 | ]
340 |
341 | tags = local.tags
342 | }
343 |
344 | module "s3_bucket" {
345 | source = "terraform-aws-modules/s3-bucket/aws"
346 | version = "~> 4.0"
347 |
348 | bucket_prefix = "${local.name}-"
349 |
350 | # Allow deletion of non-empty bucket
351 | # Example usage only - not recommended for production
352 | force_destroy = true
353 |
354 | attach_deny_insecure_transport_policy = true
355 | attach_require_latest_tls_policy = true
356 |
357 | block_public_acls = true
358 | block_public_policy = true
359 | ignore_public_acls = true
360 | restrict_public_buckets = true
361 |
362 | server_side_encryption_configuration = {
363 | rule = {
364 | apply_server_side_encryption_by_default = {
365 | sse_algorithm = "AES256"
366 | }
367 | }
368 | }
369 |
370 | tags = local.tags
371 | }
372 |
373 | resource "aws_iam_role" "custom_instance_profile" {
374 | name_prefix = "custom-instance-profile"
375 | assume_role_policy = data.aws_iam_policy_document.assume.json
376 | }
377 |
378 | data "aws_iam_policy_document" "assume" {
379 | statement {
380 | actions = ["sts:AssumeRole"]
381 | principals {
382 | identifiers = ["ec2.amazonaws.com"]
383 | type = "Service"
384 | }
385 | }
386 | }
387 |
388 | resource "aws_iam_role_policy_attachment" "emr_for_ec2" {
389 | role = aws_iam_role.custom_instance_profile.name
390 | policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceforEC2Role"
391 | }
392 |
393 | resource "aws_iam_instance_profile" "custom_instance_profile" {
394 | role = aws_iam_role.custom_instance_profile.name
395 |
396 | name = "custom-instance-profile"
397 |
398 | depends_on = [
399 | aws_iam_role_policy_attachment.emr_for_ec2,
400 | ]
401 | }
402 |
403 | resource "aws_iam_role" "autoscaling" {
404 | name_prefix = "custom-autoscaling-role"
405 | assume_role_policy = data.aws_iam_policy_document.autoscaling.json
406 | }
407 |
408 | data "aws_iam_policy_document" "autoscaling" {
409 | statement {
410 | sid = "EMRAssumeRole"
411 | actions = ["sts:AssumeRole"]
412 |
413 | principals {
414 | type = "Service"
415 | identifiers = [
416 | "elasticmapreduce.${data.aws_partition.current.dns_suffix}",
417 | "application-autoscaling.${data.aws_partition.current.dns_suffix}"
418 | ]
419 | }
420 |
421 | condition {
422 | test = "StringEquals"
423 | variable = "aws:SourceAccount"
424 | values = [data.aws_caller_identity.current.account_id]
425 | }
426 |
427 | condition {
428 | test = "ArnLike"
429 | variable = "aws:SourceArn"
430 | values = ["arn:aws:elasticmapreduce:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:*"]
431 | }
432 | }
433 | }
434 |
435 | resource "aws_iam_role_policy_attachment" "autoscaling" {
436 | role = aws_iam_role.autoscaling.name
437 | policy_arn = "arn:${data.aws_partition.current.partition}:iam::aws:policy/service-role/AmazonElasticMapReduceforAutoScalingRole"
438 | }
439 |
--------------------------------------------------------------------------------
/examples/private-cluster/outputs.tf:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Instance Fleet
3 | ################################################################################
4 |
5 | output "fleet_cluster_arn" {
6 | description = "The ARN of the cluster"
7 | value = module.emr_instance_fleet.cluster_arn
8 | }
9 |
10 | output "fleet_cluster_id" {
11 | description = "The ID of the cluster"
12 | value = module.emr_instance_fleet.cluster_id
13 | }
14 |
15 | output "fleet_cluster_core_instance_group_id" {
16 | description = "Core node type Instance Group ID, if using Instance Group for this node type"
17 | value = module.emr_instance_fleet.cluster_core_instance_group_id
18 | }
19 |
20 | output "fleet_cluster_master_instance_group_id" {
21 | description = "Master node type Instance Group ID, if using Instance Group for this node type"
22 | value = module.emr_instance_fleet.cluster_master_instance_group_id
23 | }
24 |
25 | output "fleet_cluster_master_public_dns" {
26 | description = "The DNS name of the master node. If the cluster is on a private subnet, this is the private DNS name. On a public subnet, this is the public DNS name"
27 | value = module.emr_instance_fleet.cluster_master_public_dns
28 | }
29 |
30 | output "fleet_security_configuration_id" {
31 | description = "The ID of the security configuration"
32 | value = module.emr_instance_fleet.security_configuration_id
33 | }
34 |
35 | output "fleet_security_configuration_name" {
36 | description = "The name of the security configuration"
37 | value = module.emr_instance_fleet.security_configuration_name
38 | }
39 |
40 | output "fleet_service_iam_role_name" {
41 | description = "Service IAM role name"
42 | value = module.emr_instance_fleet.service_iam_role_name
43 | }
44 |
45 | output "fleet_service_iam_role_arn" {
46 | description = "Service IAM role ARN"
47 | value = module.emr_instance_fleet.service_iam_role_arn
48 | }
49 |
50 | output "fleet_service_iam_role_unique_id" {
51 | description = "Stable and unique string identifying the service IAM role"
52 | value = module.emr_instance_fleet.service_iam_role_unique_id
53 | }
54 |
55 | output "fleet_autoscaling_iam_role_name" {
56 | description = "Autoscaling IAM role name"
57 | value = module.emr_instance_fleet.autoscaling_iam_role_name
58 | }
59 |
60 | output "fleet_autoscaling_iam_role_arn" {
61 | description = "Autoscaling IAM role ARN"
62 | value = module.emr_instance_fleet.autoscaling_iam_role_arn
63 | }
64 |
65 | output "fleet_autoscaling_iam_role_unique_id" {
66 | description = "Stable and unique string identifying the autoscaling IAM role"
67 | value = module.emr_instance_fleet.autoscaling_iam_role_unique_id
68 | }
69 |
70 | output "fleet_iam_instance_profile_iam_role_name" {
71 | description = "Instance profile IAM role name"
72 | value = module.emr_instance_fleet.iam_instance_profile_iam_role_name
73 | }
74 |
75 | output "fleet_iam_instance_profile_iam_role_arn" {
76 | description = "Instance profile IAM role ARN"
77 | value = module.emr_instance_fleet.iam_instance_profile_iam_role_arn
78 | }
79 |
80 | output "fleet_iam_instance_profile_iam_role_unique_id" {
81 | description = "Stable and unique string identifying the instance profile IAM role"
82 | value = module.emr_instance_fleet.iam_instance_profile_iam_role_unique_id
83 | }
84 |
85 | output "fleet_iam_instance_profile_arn" {
86 | description = "ARN assigned by AWS to the instance profile"
87 | value = module.emr_instance_fleet.iam_instance_profile_arn
88 | }
89 |
90 | output "fleet_iam_instance_profile_id" {
91 | description = "Instance profile's ID"
92 | value = module.emr_instance_fleet.iam_instance_profile_id
93 | }
94 |
95 | output "fleet_iam_instance_profile_unique" {
96 | description = "Stable and unique string identifying the IAM instance profile"
97 | value = module.emr_instance_fleet.iam_instance_profile_unique
98 | }
99 |
100 | output "fleet_managed_master_security_group_arn" {
101 | description = "Amazon Resource Name (ARN) of the managed master security group"
102 | value = module.emr_instance_fleet.managed_master_security_group_arn
103 | }
104 |
105 | output "fleet_managed_master_security_group_id" {
106 | description = "ID of the managed master security group"
107 | value = module.emr_instance_fleet.managed_master_security_group_id
108 | }
109 |
110 | output "fleet_managed_slave_security_group_arn" {
111 | description = "Amazon Resource Name (ARN) of the managed slave security group"
112 | value = module.emr_instance_fleet.managed_slave_security_group_arn
113 | }
114 |
115 | output "fleet_managed_slave_security_group_id" {
116 | description = "ID of the managed slave security group"
117 | value = module.emr_instance_fleet.managed_slave_security_group_id
118 | }
119 |
120 | output "fleet_managed_service_access_security_group_arn" {
121 | description = "Amazon Resource Name (ARN) of the managed service access security group"
122 | value = module.emr_instance_fleet.managed_service_access_security_group_arn
123 | }
124 |
125 | output "fleet_managed_service_access_security_group_id" {
126 | description = "ID of the managed service access security group"
127 | value = module.emr_instance_fleet.managed_service_access_security_group_id
128 | }
129 |
130 | ################################################################################
131 | # Instance Group
132 | ################################################################################
133 |
134 | output "group_cluster_arn" {
135 | description = "The ARN of the cluster"
136 | value = module.emr_instance_group.cluster_arn
137 | }
138 |
139 | output "group_cluster_id" {
140 | description = "The ID of the cluster"
141 | value = module.emr_instance_group.cluster_id
142 | }
143 |
144 | output "group_cluster_core_instance_group_id" {
145 | description = "Core node type Instance Group ID, if using Instance Group for this node type"
146 | value = module.emr_instance_group.cluster_core_instance_group_id
147 | }
148 |
149 | output "group_cluster_master_instance_group_id" {
150 | description = "Master node type Instance Group ID, if using Instance Group for this node type"
151 | value = module.emr_instance_group.cluster_master_instance_group_id
152 | }
153 |
154 | output "group_cluster_master_public_dns" {
155 | description = "The DNS name of the master node. If the cluster is on a private subnet, this is the private DNS name. On a public subnet, this is the public DNS name"
156 | value = module.emr_instance_group.cluster_master_public_dns
157 | }
158 |
159 | output "group_security_configuration_id" {
160 | description = "The ID of the security configuration"
161 | value = module.emr_instance_group.security_configuration_id
162 | }
163 |
164 | output "group_security_configuration_name" {
165 | description = "The name of the security configuration"
166 | value = module.emr_instance_group.security_configuration_name
167 | }
168 |
169 | output "group_service_iam_role_name" {
170 | description = "Service IAM role name"
171 | value = module.emr_instance_group.service_iam_role_name
172 | }
173 |
174 | output "group_service_iam_role_arn" {
175 | description = "Service IAM role ARN"
176 | value = module.emr_instance_group.service_iam_role_arn
177 | }
178 |
179 | output "group_service_iam_role_unique_id" {
180 | description = "Stable and unique string identifying the service IAM role"
181 | value = module.emr_instance_group.service_iam_role_unique_id
182 | }
183 |
184 | output "group_autoscaling_iam_role_name" {
185 | description = "Autoscaling IAM role name"
186 | value = module.emr_instance_group.autoscaling_iam_role_name
187 | }
188 |
189 | output "group_autoscaling_iam_role_arn" {
190 | description = "Autoscaling IAM role ARN"
191 | value = module.emr_instance_group.autoscaling_iam_role_arn
192 | }
193 |
194 | output "group_autoscaling_iam_role_unique_id" {
195 | description = "Stable and unique string identifying the autoscaling IAM role"
196 | value = module.emr_instance_group.autoscaling_iam_role_unique_id
197 | }
198 |
199 | output "group_iam_instance_profile_iam_role_name" {
200 | description = "Instance profile IAM role name"
201 | value = module.emr_instance_group.iam_instance_profile_iam_role_name
202 | }
203 |
204 | output "group_iam_instance_profile_iam_role_arn" {
205 | description = "Instance profile IAM role ARN"
206 | value = module.emr_instance_group.iam_instance_profile_iam_role_arn
207 | }
208 |
209 | output "group_iam_instance_profile_iam_role_unique_id" {
210 | description = "Stable and unique string identifying the instance profile IAM role"
211 | value = module.emr_instance_group.iam_instance_profile_iam_role_unique_id
212 | }
213 |
214 | output "group_iam_instance_profile_arn" {
215 | description = "ARN assigned by AWS to the instance profile"
216 | value = module.emr_instance_group.iam_instance_profile_arn
217 | }
218 |
219 | output "group_iam_instance_profile_id" {
220 | description = "Instance profile's ID"
221 | value = module.emr_instance_group.iam_instance_profile_id
222 | }
223 |
224 | output "group_iam_instance_profile_unique" {
225 | description = "Stable and unique string identifying the IAM instance profile"
226 | value = module.emr_instance_group.iam_instance_profile_unique
227 | }
228 |
229 | output "group_managed_master_security_group_arn" {
230 | description = "Amazon Resource Name (ARN) of the managed master security group"
231 | value = module.emr_instance_group.managed_master_security_group_arn
232 | }
233 |
234 | output "group_managed_master_security_group_id" {
235 | description = "ID of the managed master security group"
236 | value = module.emr_instance_group.managed_master_security_group_id
237 | }
238 |
239 | output "group_managed_slave_security_group_arn" {
240 | description = "Amazon Resource Name (ARN) of the managed slave security group"
241 | value = module.emr_instance_group.managed_slave_security_group_arn
242 | }
243 |
244 | output "group_managed_slave_security_group_id" {
245 | description = "ID of the managed slave security group"
246 | value = module.emr_instance_group.managed_slave_security_group_id
247 | }
248 |
249 | output "group_managed_service_access_security_group_arn" {
250 | description = "Amazon Resource Name (ARN) of the managed service access security group"
251 | value = module.emr_instance_group.managed_service_access_security_group_arn
252 | }
253 |
254 | output "group_managed_service_access_security_group_id" {
255 | description = "ID of the managed service access security group"
256 | value = module.emr_instance_group.managed_service_access_security_group_id
257 | }
258 |
259 | ################################################################################
260 | # Disabled
261 | ################################################################################
262 |
263 | output "disabled_cluster_arn" {
264 | description = "The ARN of the cluster"
265 | value = module.emr_disabled.cluster_arn
266 | }
267 |
268 | output "disabled_cluster_id" {
269 | description = "The ID of the cluster"
270 | value = module.emr_disabled.cluster_id
271 | }
272 |
273 | output "disabled_cluster_core_instance_group_id" {
274 | description = "Core node type Instance Group ID, if using Instance Group for this node type"
275 | value = module.emr_disabled.cluster_core_instance_group_id
276 | }
277 |
278 | output "disabled_cluster_master_instance_group_id" {
279 | description = "Master node type Instance Group ID, if using Instance Group for this node type"
280 | value = module.emr_disabled.cluster_master_instance_group_id
281 | }
282 |
283 | output "disabled_cluster_master_public_dns" {
284 | description = "The DNS name of the master node. If the cluster is on a private subnet, this is the private DNS name. On a public subnet, this is the public DNS name"
285 | value = module.emr_disabled.cluster_master_public_dns
286 | }
287 |
288 | output "disabled_security_configuration_id" {
289 | description = "The ID of the security configuration"
290 | value = module.emr_disabled.security_configuration_id
291 | }
292 |
293 | output "disabled_security_configuration_name" {
294 | description = "The name of the security configuration"
295 | value = module.emr_disabled.security_configuration_name
296 | }
297 |
298 | output "disabled_service_iam_role_name" {
299 | description = "Service IAM role name"
300 | value = module.emr_disabled.service_iam_role_name
301 | }
302 |
303 | output "disabled_service_iam_role_arn" {
304 | description = "Service IAM role ARN"
305 | value = module.emr_disabled.service_iam_role_arn
306 | }
307 |
308 | output "disabled_service_iam_role_unique_id" {
309 | description = "Stable and unique string identifying the service IAM role"
310 | value = module.emr_disabled.service_iam_role_unique_id
311 | }
312 |
313 | output "disabled_autoscaling_iam_role_name" {
314 | description = "Autoscaling IAM role name"
315 | value = module.emr_disabled.autoscaling_iam_role_name
316 | }
317 |
318 | output "disabled_autoscaling_iam_role_arn" {
319 | description = "Autoscaling IAM role ARN"
320 | value = module.emr_disabled.autoscaling_iam_role_arn
321 | }
322 |
323 | output "disabled_autoscaling_iam_role_unique_id" {
324 | description = "Stable and unique string identifying the autoscaling IAM role"
325 | value = module.emr_disabled.autoscaling_iam_role_unique_id
326 | }
327 |
328 | output "disabled_iam_instance_profile_iam_role_name" {
329 | description = "Instance profile IAM role name"
330 | value = module.emr_disabled.iam_instance_profile_iam_role_name
331 | }
332 |
333 | output "disabled_iam_instance_profile_iam_role_arn" {
334 | description = "Instance profile IAM role ARN"
335 | value = module.emr_disabled.iam_instance_profile_iam_role_arn
336 | }
337 |
338 | output "disabled_iam_instance_profile_iam_role_unique_id" {
339 | description = "Stable and unique string identifying the instance profile IAM role"
340 | value = module.emr_disabled.iam_instance_profile_iam_role_unique_id
341 | }
342 |
343 | output "disabled_iam_instance_profile_arn" {
344 | description = "ARN assigned by AWS to the instance profile"
345 | value = module.emr_disabled.iam_instance_profile_arn
346 | }
347 |
348 | output "disabled_iam_instance_profile_id" {
349 | description = "Instance profile's ID"
350 | value = module.emr_disabled.iam_instance_profile_id
351 | }
352 |
353 | output "disabled_iam_instance_profile_unique" {
354 | description = "Stable and unique string identifying the IAM instance profile"
355 | value = module.emr_disabled.iam_instance_profile_unique
356 | }
357 |
358 | output "disabled_managed_master_security_group_arn" {
359 | description = "Amazon Resource Name (ARN) of the managed master security group"
360 | value = module.emr_disabled.managed_master_security_group_arn
361 | }
362 |
363 | output "disabled_managed_master_security_group_id" {
364 | description = "ID of the managed master security group"
365 | value = module.emr_disabled.managed_master_security_group_id
366 | }
367 |
368 | output "disabled_managed_slave_security_group_arn" {
369 | description = "Amazon Resource Name (ARN) of the managed slave security group"
370 | value = module.emr_disabled.managed_slave_security_group_arn
371 | }
372 |
373 | output "disabled_managed_slave_security_group_id" {
374 | description = "ID of the managed slave security group"
375 | value = module.emr_disabled.managed_slave_security_group_id
376 | }
377 |
378 | output "disabled_managed_service_access_security_group_arn" {
379 | description = "Amazon Resource Name (ARN) of the managed service access security group"
380 | value = module.emr_disabled.managed_service_access_security_group_arn
381 | }
382 |
383 | output "disabled_managed_service_access_security_group_id" {
384 | description = "ID of the managed service access security group"
385 | value = module.emr_disabled.managed_service_access_security_group_id
386 | }
387 |
--------------------------------------------------------------------------------
/examples/private-cluster/variables.tf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terraform-aws-modules/terraform-aws-emr/0ddd4c67d45ef1ff3c1d80f5ec2975e900b95561/examples/private-cluster/variables.tf
--------------------------------------------------------------------------------
/examples/private-cluster/versions.tf:
--------------------------------------------------------------------------------
1 | terraform {
2 | required_version = ">= 1.0"
3 |
4 | required_providers {
5 | aws = {
6 | source = "hashicorp/aws"
7 | version = ">= 5.83"
8 | }
9 | }
10 | }
11 |
--------------------------------------------------------------------------------
/examples/public-cluster/README.md:
--------------------------------------------------------------------------------
1 | # AWS EMR Public Cluster Example
2 |
3 | Configuration in this directory creates:
4 |
5 | - EMR cluster using instance fleets (`master`, `core`, `task`) deployed into public subnets
6 | - EMR cluster using instance groups (`master`, `core`, `task`) deployed into public subnets
7 | - S3 bucket for EMR logs
8 |
9 | Note: The public subnets will need to be tagged with `{ "for-use-with-amazon-emr-managed-policies" = true }` ([Reference](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-managed-iam-policies.html#manually-tagged-resources))
10 |
11 | ## Usage
12 |
13 | To run this example you need to execute:
14 |
15 | ```bash
16 | $ terraform init
17 | $ terraform plan
18 | $ terraform apply
19 | ```
20 |
21 | Note that this example may create resources which will incur monetary charges on your AWS bill. Run `terraform destroy` when you no longer need these resources.
22 |
23 |
24 | ## Requirements
25 |
26 | | Name | Version |
27 | |------|---------|
28 | | [terraform](#requirement\_terraform) | >= 1.0 |
29 | | [aws](#requirement\_aws) | >= 5.83 |
30 |
31 | ## Providers
32 |
33 | | Name | Version |
34 | |------|---------|
35 | | [aws](#provider\_aws) | >= 5.83 |
36 |
37 | ## Modules
38 |
39 | | Name | Source | Version |
40 | |------|--------|---------|
41 | | [emr\_instance\_fleet](#module\_emr\_instance\_fleet) | ../.. | n/a |
42 | | [emr\_instance\_group](#module\_emr\_instance\_group) | ../.. | n/a |
43 | | [s3\_bucket](#module\_s3\_bucket) | terraform-aws-modules/s3-bucket/aws | ~> 4.0 |
44 | | [vpc](#module\_vpc) | terraform-aws-modules/vpc/aws | ~> 5.0 |
45 |
46 | ## Resources
47 |
48 | | Name | Type |
49 | |------|------|
50 | | [aws_availability_zones.available](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/availability_zones) | data source |
51 |
52 | ## Inputs
53 |
54 | No inputs.
55 |
56 | ## Outputs
57 |
58 | | Name | Description |
59 | |------|-------------|
60 | | [fleet\_autoscaling\_iam\_role\_arn](#output\_fleet\_autoscaling\_iam\_role\_arn) | Autoscaling IAM role ARN |
61 | | [fleet\_autoscaling\_iam\_role\_name](#output\_fleet\_autoscaling\_iam\_role\_name) | Autoscaling IAM role name |
62 | | [fleet\_autoscaling\_iam\_role\_unique\_id](#output\_fleet\_autoscaling\_iam\_role\_unique\_id) | Stable and unique string identifying the autoscaling IAM role |
63 | | [fleet\_cluster\_arn](#output\_fleet\_cluster\_arn) | The ARN of the cluster |
64 | | [fleet\_cluster\_core\_instance\_group\_id](#output\_fleet\_cluster\_core\_instance\_group\_id) | Core node type Instance Group ID, if using Instance Group for this node type |
65 | | [fleet\_cluster\_id](#output\_fleet\_cluster\_id) | The ID of the cluster |
66 | | [fleet\_cluster\_master\_instance\_group\_id](#output\_fleet\_cluster\_master\_instance\_group\_id) | Master node type Instance Group ID, if using Instance Group for this node type |
67 | | [fleet\_cluster\_master\_public\_dns](#output\_fleet\_cluster\_master\_public\_dns) | The DNS name of the master node. If the cluster is on a private subnet, this is the private DNS name. On a public subnet, this is the public DNS name |
68 | | [fleet\_iam\_instance\_profile\_arn](#output\_fleet\_iam\_instance\_profile\_arn) | ARN assigned by AWS to the instance profile |
69 | | [fleet\_iam\_instance\_profile\_iam\_role\_arn](#output\_fleet\_iam\_instance\_profile\_iam\_role\_arn) | Instance profile IAM role ARN |
70 | | [fleet\_iam\_instance\_profile\_iam\_role\_name](#output\_fleet\_iam\_instance\_profile\_iam\_role\_name) | Instance profile IAM role name |
71 | | [fleet\_iam\_instance\_profile\_iam\_role\_unique\_id](#output\_fleet\_iam\_instance\_profile\_iam\_role\_unique\_id) | Stable and unique string identifying the instance profile IAM role |
72 | | [fleet\_iam\_instance\_profile\_id](#output\_fleet\_iam\_instance\_profile\_id) | Instance profile's ID |
73 | | [fleet\_iam\_instance\_profile\_unique](#output\_fleet\_iam\_instance\_profile\_unique) | Stable and unique string identifying the IAM instance profile |
74 | | [fleet\_managed\_master\_security\_group\_arn](#output\_fleet\_managed\_master\_security\_group\_arn) | Amazon Resource Name (ARN) of the managed master security group |
75 | | [fleet\_managed\_master\_security\_group\_id](#output\_fleet\_managed\_master\_security\_group\_id) | ID of the managed master security group |
76 | | [fleet\_managed\_service\_access\_security\_group\_arn](#output\_fleet\_managed\_service\_access\_security\_group\_arn) | Amazon Resource Name (ARN) of the managed service access security group |
77 | | [fleet\_managed\_service\_access\_security\_group\_id](#output\_fleet\_managed\_service\_access\_security\_group\_id) | ID of the managed service access security group |
78 | | [fleet\_managed\_slave\_security\_group\_arn](#output\_fleet\_managed\_slave\_security\_group\_arn) | Amazon Resource Name (ARN) of the managed slave security group |
79 | | [fleet\_managed\_slave\_security\_group\_id](#output\_fleet\_managed\_slave\_security\_group\_id) | ID of the managed slave security group |
80 | | [fleet\_security\_configuration\_id](#output\_fleet\_security\_configuration\_id) | The ID of the security configuration |
81 | | [fleet\_security\_configuration\_name](#output\_fleet\_security\_configuration\_name) | The name of the security configuration |
82 | | [fleet\_service\_iam\_role\_arn](#output\_fleet\_service\_iam\_role\_arn) | Service IAM role ARN |
83 | | [fleet\_service\_iam\_role\_name](#output\_fleet\_service\_iam\_role\_name) | Service IAM role name |
84 | | [fleet\_service\_iam\_role\_unique\_id](#output\_fleet\_service\_iam\_role\_unique\_id) | Stable and unique string identifying the service IAM role |
85 | | [group\_autoscaling\_iam\_role\_arn](#output\_group\_autoscaling\_iam\_role\_arn) | Autoscaling IAM role ARN |
86 | | [group\_autoscaling\_iam\_role\_name](#output\_group\_autoscaling\_iam\_role\_name) | Autoscaling IAM role name |
87 | | [group\_autoscaling\_iam\_role\_unique\_id](#output\_group\_autoscaling\_iam\_role\_unique\_id) | Stable and unique string identifying the autoscaling IAM role |
88 | | [group\_cluster\_arn](#output\_group\_cluster\_arn) | The ARN of the cluster |
89 | | [group\_cluster\_core\_instance\_group\_id](#output\_group\_cluster\_core\_instance\_group\_id) | Core node type Instance Group ID, if using Instance Group for this node type |
90 | | [group\_cluster\_id](#output\_group\_cluster\_id) | The ID of the cluster |
91 | | [group\_cluster\_master\_instance\_group\_id](#output\_group\_cluster\_master\_instance\_group\_id) | Master node type Instance Group ID, if using Instance Group for this node type |
92 | | [group\_cluster\_master\_public\_dns](#output\_group\_cluster\_master\_public\_dns) | The DNS name of the master node. If the cluster is on a private subnet, this is the private DNS name. On a public subnet, this is the public DNS name |
93 | | [group\_iam\_instance\_profile\_arn](#output\_group\_iam\_instance\_profile\_arn) | ARN assigned by AWS to the instance profile |
94 | | [group\_iam\_instance\_profile\_iam\_role\_arn](#output\_group\_iam\_instance\_profile\_iam\_role\_arn) | Instance profile IAM role ARN |
95 | | [group\_iam\_instance\_profile\_iam\_role\_name](#output\_group\_iam\_instance\_profile\_iam\_role\_name) | Instance profile IAM role name |
96 | | [group\_iam\_instance\_profile\_iam\_role\_unique\_id](#output\_group\_iam\_instance\_profile\_iam\_role\_unique\_id) | Stable and unique string identifying the instance profile IAM role |
97 | | [group\_iam\_instance\_profile\_id](#output\_group\_iam\_instance\_profile\_id) | Instance profile's ID |
98 | | [group\_iam\_instance\_profile\_unique](#output\_group\_iam\_instance\_profile\_unique) | Stable and unique string identifying the IAM instance profile |
99 | | [group\_managed\_master\_security\_group\_arn](#output\_group\_managed\_master\_security\_group\_arn) | Amazon Resource Name (ARN) of the managed master security group |
100 | | [group\_managed\_master\_security\_group\_id](#output\_group\_managed\_master\_security\_group\_id) | ID of the managed master security group |
101 | | [group\_managed\_service\_access\_security\_group\_arn](#output\_group\_managed\_service\_access\_security\_group\_arn) | Amazon Resource Name (ARN) of the managed service access security group |
102 | | [group\_managed\_service\_access\_security\_group\_id](#output\_group\_managed\_service\_access\_security\_group\_id) | ID of the managed service access security group |
103 | | [group\_managed\_slave\_security\_group\_arn](#output\_group\_managed\_slave\_security\_group\_arn) | Amazon Resource Name (ARN) of the managed slave security group |
104 | | [group\_managed\_slave\_security\_group\_id](#output\_group\_managed\_slave\_security\_group\_id) | ID of the managed slave security group |
105 | | [group\_security\_configuration\_id](#output\_group\_security\_configuration\_id) | The ID of the security configuration |
106 | | [group\_security\_configuration\_name](#output\_group\_security\_configuration\_name) | The name of the security configuration |
107 | | [group\_service\_iam\_role\_arn](#output\_group\_service\_iam\_role\_arn) | Service IAM role ARN |
108 | | [group\_service\_iam\_role\_name](#output\_group\_service\_iam\_role\_name) | Service IAM role name |
109 | | [group\_service\_iam\_role\_unique\_id](#output\_group\_service\_iam\_role\_unique\_id) | Stable and unique string identifying the service IAM role |
110 |
111 |
112 | Apache-2.0 Licensed. See [LICENSE](https://github.com/terraform-aws-modules/terraform-aws-emr/blob/master/LICENSE).
113 |
--------------------------------------------------------------------------------
/examples/public-cluster/main.tf:
--------------------------------------------------------------------------------
1 | provider "aws" {
2 | region = local.region
3 | }
4 |
5 | data "aws_availability_zones" "available" {}
6 |
7 | locals {
8 | name = replace(basename(path.cwd), "-cluster", "")
9 | region = "eu-west-1"
10 |
11 | vpc_cidr = "10.0.0.0/16"
12 | azs = slice(data.aws_availability_zones.available.names, 0, 3)
13 |
14 | tags = {
15 | Example = local.name
16 | GithubRepo = "terraform-aws-emr"
17 | GithubOrg = "terraform-aws-modules"
18 | }
19 | }
20 |
21 | ################################################################################
22 | # EMR Module
23 | ################################################################################
24 |
25 | module "emr_instance_fleet" {
26 | source = "../.."
27 |
28 | name = "${local.name}-instance-fleet"
29 |
30 | release_label_filters = {
31 | emr6 = {
32 | prefix = "emr-6"
33 | }
34 | }
35 | applications = ["spark", "trino"]
36 | auto_termination_policy = {
37 | idle_timeout = 3600
38 | }
39 |
40 | bootstrap_action = {
41 | example = {
42 | path = "file:/bin/echo",
43 | name = "Just an example",
44 | args = ["Hello World!"]
45 | }
46 | }
47 |
48 | configurations_json = jsonencode([
49 | {
50 | "Classification" : "spark-env",
51 | "Configurations" : [
52 | {
53 | "Classification" : "export",
54 | "Properties" : {
55 | "JAVA_HOME" : "/usr/lib/jvm/java-1.8.0"
56 | }
57 | }
58 | ],
59 | "Properties" : {}
60 | }
61 | ])
62 |
63 | master_instance_fleet = {
64 | name = "master-fleet"
65 | target_on_demand_capacity = 1
66 | instance_type_configs = [
67 | {
68 | instance_type = "m5.xlarge"
69 | }
70 | ]
71 | }
72 |
73 | core_instance_fleet = {
74 | name = "core-fleet"
75 | target_on_demand_capacity = 2
76 | target_spot_capacity = 2
77 | instance_type_configs = [
78 | {
79 | instance_type = "c4.large"
80 | weighted_capacity = 1
81 | },
82 | {
83 | bid_price_as_percentage_of_on_demand_price = 100
84 | ebs_config = [{
85 | size = 256
86 | type = "gp3"
87 | volumes_per_instance = 1
88 | }]
89 | instance_type = "c5.xlarge"
90 | weighted_capacity = 2
91 | },
92 | {
93 | bid_price_as_percentage_of_on_demand_price = 100
94 | instance_type = "c6i.xlarge"
95 | weighted_capacity = 2
96 | }
97 | ]
98 | launch_specifications = {
99 | spot_specification = {
100 | allocation_strategy = "capacity-optimized"
101 | block_duration_minutes = 0
102 | timeout_action = "SWITCH_TO_ON_DEMAND"
103 | timeout_duration_minutes = 5
104 | }
105 | }
106 | }
107 |
108 | task_instance_fleet = {
109 | name = "task-fleet"
110 | target_on_demand_capacity = 0
111 | target_spot_capacity = 2
112 | instance_type_configs = [
113 | {
114 | instance_type = "c4.large"
115 | weighted_capacity = 1
116 | },
117 | {
118 | bid_price_as_percentage_of_on_demand_price = 100
119 | ebs_config = [{
120 | size = 256
121 | type = "gp3"
122 | volumes_per_instance = 1
123 | }]
124 | instance_type = "c5.xlarge"
125 | weighted_capacity = 2
126 | }
127 | ]
128 | launch_specifications = {
129 | spot_specification = {
130 | allocation_strategy = "capacity-optimized"
131 | block_duration_minutes = 0
132 | timeout_action = "SWITCH_TO_ON_DEMAND"
133 | timeout_duration_minutes = 5
134 | }
135 | }
136 | }
137 |
138 | ebs_root_volume_size = 64
139 | ec2_attributes = {
140 | subnet_ids = module.vpc.public_subnets
141 | }
142 | vpc_id = module.vpc.vpc_id
143 | # Required for creating public cluster
144 | is_private_cluster = false
145 |
146 | keep_job_flow_alive_when_no_steps = true
147 | list_steps_states = ["PENDING", "RUNNING", "CANCEL_PENDING", "CANCELLED", "FAILED", "INTERRUPTED", "COMPLETED"]
148 | log_uri = "s3://${module.s3_bucket.s3_bucket_id}/"
149 |
150 | scale_down_behavior = "TERMINATE_AT_TASK_COMPLETION"
151 | step_concurrency_level = 3
152 | termination_protection = false
153 | visible_to_all_users = true
154 |
155 | tags = local.tags
156 | }
157 |
158 |
159 | module "emr_instance_group" {
160 | source = "../.."
161 |
162 | name = "${local.name}-instance-group"
163 |
164 | release_label_filters = {
165 | emr6 = {
166 | prefix = "emr-6"
167 | }
168 | }
169 | applications = ["spark", "trino"]
170 | auto_termination_policy = {
171 | idle_timeout = 3600
172 | }
173 |
174 | bootstrap_action = {
175 | example = {
176 | name = "Just an example",
177 | path = "file:/bin/echo",
178 | args = ["Hello World!"]
179 | }
180 | }
181 |
182 | configurations_json = jsonencode([
183 | {
184 | "Classification" : "spark-env",
185 | "Configurations" : [
186 | {
187 | "Classification" : "export",
188 | "Properties" : {
189 | "JAVA_HOME" : "/usr/lib/jvm/java-1.8.0"
190 | }
191 | }
192 | ],
193 | "Properties" : {}
194 | }
195 | ])
196 |
197 | master_instance_group = {
198 | name = "master-group"
199 | instance_count = 1
200 | instance_type = "m5.xlarge"
201 | }
202 |
203 | core_instance_group = {
204 | name = "core-group"
205 | instance_count = 2
206 | instance_type = "c4.large"
207 | }
208 |
209 | task_instance_group = {
210 | name = "task-group"
211 | instance_count = 2
212 | instance_type = "c5.xlarge"
213 | bid_price = "0.1"
214 |
215 | ebs_config = [{
216 | size = 256
217 | type = "gp3"
218 | volumes_per_instance = 1
219 | }]
220 | ebs_optimized = true
221 | }
222 |
223 | ebs_root_volume_size = 64
224 | ec2_attributes = {
225 | # Instance groups only support one Subnet/AZ
226 | subnet_id = element(module.vpc.public_subnets, 0)
227 | }
228 | vpc_id = module.vpc.vpc_id
229 | # Required for creating public cluster
230 | is_private_cluster = false
231 |
232 | keep_job_flow_alive_when_no_steps = true
233 | list_steps_states = ["PENDING", "RUNNING", "CANCEL_PENDING", "CANCELLED", "FAILED", "INTERRUPTED", "COMPLETED"]
234 | log_uri = "s3://${module.s3_bucket.s3_bucket_id}/"
235 |
236 | scale_down_behavior = "TERMINATE_AT_TASK_COMPLETION"
237 | step_concurrency_level = 3
238 | termination_protection = false
239 | visible_to_all_users = true
240 |
241 | tags = local.tags
242 | }
243 |
244 | ################################################################################
245 | # Supporting Resources
246 | ################################################################################
247 |
248 | module "vpc" {
249 | source = "terraform-aws-modules/vpc/aws"
250 | version = "~> 5.0"
251 |
252 | name = local.name
253 | cidr = local.vpc_cidr
254 |
255 | azs = local.azs
256 | public_subnets = [for k, v in local.azs : cidrsubnet(local.vpc_cidr, 8, k)]
257 |
258 | enable_nat_gateway = false
259 |
260 | # https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-managed-iam-policies.html#manually-tagged-resources
261 | # Tag if you want EMR to create the security groups for you
262 | # vpc_tags = { "for-use-with-amazon-emr-managed-policies" = true }
263 | # Tag if you are using public subnets
264 | public_subnet_tags = { "for-use-with-amazon-emr-managed-policies" = true }
265 | # Tag if you are using private subnets
266 | # private_subnet_tags = { "for-use-with-amazon-emr-managed-policies" = true }
267 |
268 | tags = local.tags
269 | }
270 |
271 | module "s3_bucket" {
272 | source = "terraform-aws-modules/s3-bucket/aws"
273 | version = "~> 4.0"
274 |
275 | bucket_prefix = "${local.name}-"
276 |
277 | # Allow deletion of non-empty bucket
278 | # Example usage only - not recommended for production
279 | force_destroy = true
280 |
281 | attach_deny_insecure_transport_policy = true
282 | attach_require_latest_tls_policy = true
283 |
284 | block_public_acls = true
285 | block_public_policy = true
286 | ignore_public_acls = true
287 | restrict_public_buckets = true
288 |
289 | server_side_encryption_configuration = {
290 | rule = {
291 | apply_server_side_encryption_by_default = {
292 | sse_algorithm = "AES256"
293 | }
294 | }
295 | }
296 |
297 | tags = local.tags
298 | }
299 |
--------------------------------------------------------------------------------
/examples/public-cluster/outputs.tf:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Instance Fleet
3 | ################################################################################
4 |
5 | output "fleet_cluster_arn" {
6 | description = "The ARN of the cluster"
7 | value = module.emr_instance_fleet.cluster_arn
8 | }
9 |
10 | output "fleet_cluster_id" {
11 | description = "The ID of the cluster"
12 | value = module.emr_instance_fleet.cluster_id
13 | }
14 |
15 | output "fleet_cluster_core_instance_group_id" {
16 | description = "Core node type Instance Group ID, if using Instance Group for this node type"
17 | value = module.emr_instance_fleet.cluster_core_instance_group_id
18 | }
19 |
20 | output "fleet_cluster_master_instance_group_id" {
21 | description = "Master node type Instance Group ID, if using Instance Group for this node type"
22 | value = module.emr_instance_fleet.cluster_master_instance_group_id
23 | }
24 |
25 | output "fleet_cluster_master_public_dns" {
26 | description = "The DNS name of the master node. If the cluster is on a private subnet, this is the private DNS name. On a public subnet, this is the public DNS name"
27 | value = module.emr_instance_fleet.cluster_master_public_dns
28 | }
29 |
30 | output "fleet_security_configuration_id" {
31 | description = "The ID of the security configuration"
32 | value = module.emr_instance_fleet.security_configuration_id
33 | }
34 |
35 | output "fleet_security_configuration_name" {
36 | description = "The name of the security configuration"
37 | value = module.emr_instance_fleet.security_configuration_name
38 | }
39 |
40 | output "fleet_service_iam_role_name" {
41 | description = "Service IAM role name"
42 | value = module.emr_instance_fleet.service_iam_role_name
43 | }
44 |
45 | output "fleet_service_iam_role_arn" {
46 | description = "Service IAM role ARN"
47 | value = module.emr_instance_fleet.service_iam_role_arn
48 | }
49 |
50 | output "fleet_service_iam_role_unique_id" {
51 | description = "Stable and unique string identifying the service IAM role"
52 | value = module.emr_instance_fleet.service_iam_role_unique_id
53 | }
54 |
55 | output "fleet_autoscaling_iam_role_name" {
56 | description = "Autoscaling IAM role name"
57 | value = module.emr_instance_fleet.autoscaling_iam_role_name
58 | }
59 |
60 | output "fleet_autoscaling_iam_role_arn" {
61 | description = "Autoscaling IAM role ARN"
62 | value = module.emr_instance_fleet.autoscaling_iam_role_arn
63 | }
64 |
65 | output "fleet_autoscaling_iam_role_unique_id" {
66 | description = "Stable and unique string identifying the autoscaling IAM role"
67 | value = module.emr_instance_fleet.autoscaling_iam_role_unique_id
68 | }
69 |
70 | output "fleet_iam_instance_profile_iam_role_name" {
71 | description = "Instance profile IAM role name"
72 | value = module.emr_instance_fleet.iam_instance_profile_iam_role_name
73 | }
74 |
75 | output "fleet_iam_instance_profile_iam_role_arn" {
76 | description = "Instance profile IAM role ARN"
77 | value = module.emr_instance_fleet.iam_instance_profile_iam_role_arn
78 | }
79 |
80 | output "fleet_iam_instance_profile_iam_role_unique_id" {
81 | description = "Stable and unique string identifying the instance profile IAM role"
82 | value = module.emr_instance_fleet.iam_instance_profile_iam_role_unique_id
83 | }
84 |
85 | output "fleet_iam_instance_profile_arn" {
86 | description = "ARN assigned by AWS to the instance profile"
87 | value = module.emr_instance_fleet.iam_instance_profile_arn
88 | }
89 |
90 | output "fleet_iam_instance_profile_id" {
91 | description = "Instance profile's ID"
92 | value = module.emr_instance_fleet.iam_instance_profile_id
93 | }
94 |
95 | output "fleet_iam_instance_profile_unique" {
96 | description = "Stable and unique string identifying the IAM instance profile"
97 | value = module.emr_instance_fleet.iam_instance_profile_unique
98 | }
99 |
100 | output "fleet_managed_master_security_group_arn" {
101 | description = "Amazon Resource Name (ARN) of the managed master security group"
102 | value = module.emr_instance_fleet.managed_master_security_group_arn
103 | }
104 |
105 | output "fleet_managed_master_security_group_id" {
106 | description = "ID of the managed master security group"
107 | value = module.emr_instance_fleet.managed_master_security_group_id
108 | }
109 |
110 | output "fleet_managed_slave_security_group_arn" {
111 | description = "Amazon Resource Name (ARN) of the managed slave security group"
112 | value = module.emr_instance_fleet.managed_slave_security_group_arn
113 | }
114 |
115 | output "fleet_managed_slave_security_group_id" {
116 | description = "ID of the managed slave security group"
117 | value = module.emr_instance_fleet.managed_slave_security_group_id
118 | }
119 |
120 | output "fleet_managed_service_access_security_group_arn" {
121 | description = "Amazon Resource Name (ARN) of the managed service access security group"
122 | value = module.emr_instance_fleet.managed_service_access_security_group_arn
123 | }
124 |
125 | output "fleet_managed_service_access_security_group_id" {
126 | description = "ID of the managed service access security group"
127 | value = module.emr_instance_fleet.managed_service_access_security_group_id
128 | }
129 |
130 | ################################################################################
131 | # Instance Group
132 | ################################################################################
133 |
134 | output "group_cluster_arn" {
135 | description = "The ARN of the cluster"
136 | value = module.emr_instance_group.cluster_arn
137 | }
138 |
139 | output "group_cluster_id" {
140 | description = "The ID of the cluster"
141 | value = module.emr_instance_group.cluster_id
142 | }
143 |
144 | output "group_cluster_core_instance_group_id" {
145 | description = "Core node type Instance Group ID, if using Instance Group for this node type"
146 | value = module.emr_instance_group.cluster_core_instance_group_id
147 | }
148 |
149 | output "group_cluster_master_instance_group_id" {
150 | description = "Master node type Instance Group ID, if using Instance Group for this node type"
151 | value = module.emr_instance_group.cluster_master_instance_group_id
152 | }
153 |
154 | output "group_cluster_master_public_dns" {
155 | description = "The DNS name of the master node. If the cluster is on a private subnet, this is the private DNS name. On a public subnet, this is the public DNS name"
156 | value = module.emr_instance_group.cluster_master_public_dns
157 | }
158 |
159 | output "group_security_configuration_id" {
160 | description = "The ID of the security configuration"
161 | value = module.emr_instance_group.security_configuration_id
162 | }
163 |
164 | output "group_security_configuration_name" {
165 | description = "The name of the security configuration"
166 | value = module.emr_instance_group.security_configuration_name
167 | }
168 |
169 | output "group_service_iam_role_name" {
170 | description = "Service IAM role name"
171 | value = module.emr_instance_group.service_iam_role_name
172 | }
173 |
174 | output "group_service_iam_role_arn" {
175 | description = "Service IAM role ARN"
176 | value = module.emr_instance_group.service_iam_role_arn
177 | }
178 |
179 | output "group_service_iam_role_unique_id" {
180 | description = "Stable and unique string identifying the service IAM role"
181 | value = module.emr_instance_group.service_iam_role_unique_id
182 | }
183 |
184 | output "group_autoscaling_iam_role_name" {
185 | description = "Autoscaling IAM role name"
186 | value = module.emr_instance_group.autoscaling_iam_role_name
187 | }
188 |
189 | output "group_autoscaling_iam_role_arn" {
190 | description = "Autoscaling IAM role ARN"
191 | value = module.emr_instance_group.autoscaling_iam_role_arn
192 | }
193 |
194 | output "group_autoscaling_iam_role_unique_id" {
195 | description = "Stable and unique string identifying the autoscaling IAM role"
196 | value = module.emr_instance_group.autoscaling_iam_role_unique_id
197 | }
198 |
199 | output "group_iam_instance_profile_iam_role_name" {
200 | description = "Instance profile IAM role name"
201 | value = module.emr_instance_group.iam_instance_profile_iam_role_name
202 | }
203 |
204 | output "group_iam_instance_profile_iam_role_arn" {
205 | description = "Instance profile IAM role ARN"
206 | value = module.emr_instance_group.iam_instance_profile_iam_role_arn
207 | }
208 |
209 | output "group_iam_instance_profile_iam_role_unique_id" {
210 | description = "Stable and unique string identifying the instance profile IAM role"
211 | value = module.emr_instance_group.iam_instance_profile_iam_role_unique_id
212 | }
213 |
214 | output "group_iam_instance_profile_arn" {
215 | description = "ARN assigned by AWS to the instance profile"
216 | value = module.emr_instance_group.iam_instance_profile_arn
217 | }
218 |
219 | output "group_iam_instance_profile_id" {
220 | description = "Instance profile's ID"
221 | value = module.emr_instance_group.iam_instance_profile_id
222 | }
223 |
224 | output "group_iam_instance_profile_unique" {
225 | description = "Stable and unique string identifying the IAM instance profile"
226 | value = module.emr_instance_group.iam_instance_profile_unique
227 | }
228 |
229 | output "group_managed_master_security_group_arn" {
230 | description = "Amazon Resource Name (ARN) of the managed master security group"
231 | value = module.emr_instance_group.managed_master_security_group_arn
232 | }
233 |
234 | output "group_managed_master_security_group_id" {
235 | description = "ID of the managed master security group"
236 | value = module.emr_instance_group.managed_master_security_group_id
237 | }
238 |
239 | output "group_managed_slave_security_group_arn" {
240 | description = "Amazon Resource Name (ARN) of the managed slave security group"
241 | value = module.emr_instance_group.managed_slave_security_group_arn
242 | }
243 |
244 | output "group_managed_slave_security_group_id" {
245 | description = "ID of the managed slave security group"
246 | value = module.emr_instance_group.managed_slave_security_group_id
247 | }
248 |
249 | output "group_managed_service_access_security_group_arn" {
250 | description = "Amazon Resource Name (ARN) of the managed service access security group"
251 | value = module.emr_instance_group.managed_service_access_security_group_arn
252 | }
253 |
254 | output "group_managed_service_access_security_group_id" {
255 | description = "ID of the managed service access security group"
256 | value = module.emr_instance_group.managed_service_access_security_group_id
257 | }
258 |
--------------------------------------------------------------------------------
/examples/public-cluster/variables.tf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terraform-aws-modules/terraform-aws-emr/0ddd4c67d45ef1ff3c1d80f5ec2975e900b95561/examples/public-cluster/variables.tf
--------------------------------------------------------------------------------
/examples/public-cluster/versions.tf:
--------------------------------------------------------------------------------
1 | terraform {
2 | required_version = ">= 1.0"
3 |
4 | required_providers {
5 | aws = {
6 | source = "hashicorp/aws"
7 | version = ">= 5.83"
8 | }
9 | }
10 | }
11 |
--------------------------------------------------------------------------------
/examples/serverless-cluster/README.md:
--------------------------------------------------------------------------------
1 | # AWS EMR Serverless Cluster Example
2 |
3 | Configuration in this directory creates:
4 |
5 | - EMR serverless cluster running Spark provisioned in private subnets with a custom security group
6 | - EMR serverless cluster running Hive
7 | - Disabled EMR serverless cluster
8 |
9 | Note: The public subnets will need to be tagged with `{ "for-use-with-amazon-emr-managed-policies" = true }` ([Reference](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-managed-iam-policies.html#manually-tagged-resources))
10 |
11 | ## Usage
12 |
13 | To run this example you need to execute:
14 |
15 | ```bash
16 | $ terraform init
17 | $ terraform plan
18 | $ terraform apply
19 | ```
20 |
21 | Note that this example may create resources which will incur monetary charges on your AWS bill. Run `terraform destroy` when you no longer need these resources.
22 |
23 |
24 | ## Requirements
25 |
26 | | Name | Version |
27 | |------|---------|
28 | | [terraform](#requirement\_terraform) | >= 1.0 |
29 | | [aws](#requirement\_aws) | >= 5.83 |
30 |
31 | ## Providers
32 |
33 | | Name | Version |
34 | |------|---------|
35 | | [aws](#provider\_aws) | >= 5.83 |
36 |
37 | ## Modules
38 |
39 | | Name | Source | Version |
40 | |------|--------|---------|
41 | | [emr\_serverless\_disabled](#module\_emr\_serverless\_disabled) | ../../modules/serverless | n/a |
42 | | [emr\_serverless\_hive](#module\_emr\_serverless\_hive) | ../../modules/serverless | n/a |
43 | | [emr\_serverless\_spark](#module\_emr\_serverless\_spark) | ../../modules/serverless | n/a |
44 | | [vpc](#module\_vpc) | terraform-aws-modules/vpc/aws | ~> 5.0 |
45 |
46 | ## Resources
47 |
48 | | Name | Type |
49 | |------|------|
50 | | [aws_availability_zones.available](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/availability_zones) | data source |
51 |
52 | ## Inputs
53 |
54 | No inputs.
55 |
56 | ## Outputs
57 |
58 | | Name | Description |
59 | |------|-------------|
60 | | [disabled\_arn](#output\_disabled\_arn) | Amazon Resource Name (ARN) of the application |
61 | | [disabled\_id](#output\_disabled\_id) | ID of the application |
62 | | [disabled\_security\_group\_arn](#output\_disabled\_security\_group\_arn) | Amazon Resource Name (ARN) of the security group |
63 | | [disabled\_security\_group\_id](#output\_disabled\_security\_group\_id) | ID of the security group |
64 | | [hive\_arn](#output\_hive\_arn) | Amazon Resource Name (ARN) of the application |
65 | | [hive\_id](#output\_hive\_id) | ID of the application |
66 | | [hive\_security\_group\_arn](#output\_hive\_security\_group\_arn) | Amazon Resource Name (ARN) of the security group |
67 | | [hive\_security\_group\_id](#output\_hive\_security\_group\_id) | ID of the security group |
68 | | [spark\_arn](#output\_spark\_arn) | Amazon Resource Name (ARN) of the application |
69 | | [spark\_id](#output\_spark\_id) | ID of the application |
70 | | [spark\_security\_group\_arn](#output\_spark\_security\_group\_arn) | Amazon Resource Name (ARN) of the security group |
71 | | [spark\_security\_group\_id](#output\_spark\_security\_group\_id) | ID of the security group |
72 |
73 |
74 | Apache-2.0 Licensed. See [LICENSE](https://github.com/terraform-aws-modules/terraform-aws-emr/blob/master/LICENSE).
75 |
--------------------------------------------------------------------------------
/examples/serverless-cluster/main.tf:
--------------------------------------------------------------------------------
1 | provider "aws" {
2 | region = local.region
3 | }
4 |
5 | data "aws_availability_zones" "available" {}
6 |
7 | locals {
8 | name = replace(basename(path.cwd), "-cluster", "")
9 | region = "eu-west-1"
10 |
11 | vpc_cidr = "10.0.0.0/16"
12 | azs = slice(data.aws_availability_zones.available.names, 0, 3)
13 |
14 | tags = {
15 | Example = local.name
16 | GithubRepo = "terraform-aws-emr"
17 | GithubOrg = "terraform-aws-modules"
18 | }
19 | }
20 |
21 | ################################################################################
22 | # Cluster
23 | ################################################################################
24 |
25 | module "emr_serverless_spark" {
26 | source = "../../modules/serverless"
27 |
28 | name = "${local.name}-spark"
29 |
30 | release_label_prefix = "emr-6"
31 |
32 | initial_capacity = {
33 | driver = {
34 | initial_capacity_type = "Driver"
35 |
36 | initial_capacity_config = {
37 | worker_count = 2
38 | worker_configuration = {
39 | cpu = "4 vCPU"
40 | memory = "12 GB"
41 | }
42 | }
43 | }
44 |
45 | executor = {
46 | initial_capacity_type = "Executor"
47 |
48 | initial_capacity_config = {
49 | worker_count = 2
50 | worker_configuration = {
51 | cpu = "8 vCPU"
52 | disk = "64 GB"
53 | memory = "24 GB"
54 | }
55 | }
56 | }
57 | }
58 |
59 | maximum_capacity = {
60 | cpu = "48 vCPU"
61 | memory = "144 GB"
62 | }
63 |
64 | network_configuration = {
65 | subnet_ids = module.vpc.private_subnets
66 | }
67 |
68 | security_group_rules = {
69 | egress_all = {
70 | from_port = 0
71 | to_port = 0
72 | protocol = "-1"
73 | cidr_blocks = ["0.0.0.0/0"]
74 | }
75 | }
76 |
77 | interactive_configuration = {
78 | livy_endpoint_enabled = true
79 | studio_enabled = true
80 | }
81 |
82 | tags = local.tags
83 | }
84 |
85 | module "emr_serverless_hive" {
86 | source = "../../modules/serverless"
87 |
88 | name = "${local.name}-hive"
89 |
90 | release_label_prefix = "emr-6"
91 | type = "hive"
92 |
93 | initial_capacity = {
94 | driver = {
95 | initial_capacity_type = "HiveDriver"
96 |
97 | initial_capacity_config = {
98 | worker_count = 2
99 | worker_configuration = {
100 | cpu = "2 vCPU"
101 | memory = "6 GB"
102 | }
103 | }
104 | }
105 |
106 | task = {
107 | initial_capacity_type = "TezTask"
108 |
109 | initial_capacity_config = {
110 | worker_count = 2
111 | worker_configuration = {
112 | cpu = "4 vCPU"
113 | disk = "32 GB"
114 | memory = "12 GB"
115 | }
116 | }
117 | }
118 | }
119 |
120 | maximum_capacity = {
121 | cpu = "24 vCPU"
122 | memory = "72 GB"
123 | }
124 |
125 | tags = local.tags
126 | }
127 |
128 | module "emr_serverless_disabled" {
129 | source = "../../modules/serverless"
130 |
131 | create = false
132 | }
133 |
134 | ################################################################################
135 | # Supporting Resources
136 | ################################################################################
137 |
138 | module "vpc" {
139 | source = "terraform-aws-modules/vpc/aws"
140 | version = "~> 5.0"
141 |
142 | name = local.name
143 | cidr = local.vpc_cidr
144 |
145 | azs = local.azs
146 | public_subnets = [for k, v in local.azs : cidrsubnet(local.vpc_cidr, 8, k)]
147 | private_subnets = [for k, v in local.azs : cidrsubnet(local.vpc_cidr, 8, k + 10)]
148 |
149 | enable_nat_gateway = true
150 | single_nat_gateway = true
151 |
152 | tags = local.tags
153 | }
154 |
--------------------------------------------------------------------------------
/examples/serverless-cluster/outputs.tf:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Spark
3 | ################################################################################
4 |
5 | output "spark_arn" {
6 | description = "Amazon Resource Name (ARN) of the application"
7 | value = module.emr_serverless_spark.arn
8 | }
9 |
10 | output "spark_id" {
11 | description = "ID of the application"
12 | value = module.emr_serverless_spark.id
13 | }
14 |
15 | output "spark_security_group_arn" {
16 | description = "Amazon Resource Name (ARN) of the security group"
17 | value = module.emr_serverless_spark.security_group_arn
18 | }
19 |
20 | output "spark_security_group_id" {
21 | description = "ID of the security group"
22 | value = module.emr_serverless_spark.security_group_id
23 | }
24 |
25 | ################################################################################
26 | # Hive
27 | ################################################################################
28 |
29 | output "hive_arn" {
30 | description = "Amazon Resource Name (ARN) of the application"
31 | value = module.emr_serverless_hive.arn
32 | }
33 |
34 | output "hive_id" {
35 | description = "ID of the application"
36 | value = module.emr_serverless_hive.id
37 | }
38 |
39 | output "hive_security_group_arn" {
40 | description = "Amazon Resource Name (ARN) of the security group"
41 | value = module.emr_serverless_hive.security_group_arn
42 | }
43 |
44 | output "hive_security_group_id" {
45 | description = "ID of the security group"
46 | value = module.emr_serverless_hive.security_group_id
47 | }
48 |
49 | ################################################################################
50 | # Disabled
51 | ################################################################################
52 |
53 | output "disabled_arn" {
54 | description = "Amazon Resource Name (ARN) of the application"
55 | value = module.emr_serverless_disabled.arn
56 | }
57 |
58 | output "disabled_id" {
59 | description = "ID of the application"
60 | value = module.emr_serverless_disabled.id
61 | }
62 |
63 | output "disabled_security_group_arn" {
64 | description = "Amazon Resource Name (ARN) of the security group"
65 | value = module.emr_serverless_disabled.security_group_arn
66 | }
67 |
68 | output "disabled_security_group_id" {
69 | description = "ID of the security group"
70 | value = module.emr_serverless_disabled.security_group_id
71 | }
72 |
--------------------------------------------------------------------------------
/examples/serverless-cluster/variables.tf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terraform-aws-modules/terraform-aws-emr/0ddd4c67d45ef1ff3c1d80f5ec2975e900b95561/examples/serverless-cluster/variables.tf
--------------------------------------------------------------------------------
/examples/serverless-cluster/versions.tf:
--------------------------------------------------------------------------------
1 | terraform {
2 | required_version = ">= 1.0"
3 |
4 | required_providers {
5 | aws = {
6 | source = "hashicorp/aws"
7 | version = ">= 5.83"
8 | }
9 | }
10 | }
11 |
--------------------------------------------------------------------------------
/examples/studio/README.md:
--------------------------------------------------------------------------------
1 | # AWS EMR Studio Example
2 |
3 | Configuration in this directory creates:
4 |
5 | - EMR Studio demonstrating majority of configurations available
6 | - EMR Studio that utilizes IAM Identity Center (SSO) authentication mode
7 | - EMR Studio that utilizes IAM authentication mode
8 |
9 | ## Usage
10 |
11 | To run this example you need to execute:
12 |
13 | ```bash
14 | $ terraform init
15 | $ terraform plan
16 | $ terraform apply
17 | ```
18 |
19 |
20 | ## Requirements
21 |
22 | | Name | Version |
23 | |------|---------|
24 | | [terraform](#requirement\_terraform) | >= 1.0 |
25 | | [aws](#requirement\_aws) | >= 5.83 |
26 |
27 | ## Providers
28 |
29 | | Name | Version |
30 | |------|---------|
31 | | [aws](#provider\_aws) | >= 5.83 |
32 |
33 | ## Modules
34 |
35 | | Name | Source | Version |
36 | |------|--------|---------|
37 | | [emr\_studio\_complete](#module\_emr\_studio\_complete) | ../../modules/studio | n/a |
38 | | [emr\_studio\_disabled](#module\_emr\_studio\_disabled) | ../../modules/studio | n/a |
39 | | [emr\_studio\_iam](#module\_emr\_studio\_iam) | ../../modules/studio | n/a |
40 | | [emr\_studio\_sso](#module\_emr\_studio\_sso) | ../../modules/studio | n/a |
41 | | [kms](#module\_kms) | terraform-aws-modules/kms/aws | ~> 2.0 |
42 | | [s3\_bucket](#module\_s3\_bucket) | terraform-aws-modules/s3-bucket/aws | ~> 4.0 |
43 | | [vpc](#module\_vpc) | terraform-aws-modules/vpc/aws | ~> 5.0 |
44 |
45 | ## Resources
46 |
47 | | Name | Type |
48 | |------|------|
49 | | [aws_availability_zones.available](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/availability_zones) | data source |
50 | | [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source |
51 | | [aws_identitystore_group.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/identitystore_group) | data source |
52 | | [aws_region.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/region) | data source |
53 | | [aws_ssoadmin_instances.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ssoadmin_instances) | data source |
54 |
55 | ## Inputs
56 |
57 | No inputs.
58 |
59 | ## Outputs
60 |
61 | | Name | Description |
62 | |------|-------------|
63 | | [complete\_arn](#output\_complete\_arn) | ARN of the studio |
64 | | [complete\_engine\_security\_group\_arn](#output\_complete\_engine\_security\_group\_arn) | Amazon Resource Name (ARN) of the engine security group |
65 | | [complete\_engine\_security\_group\_id](#output\_complete\_engine\_security\_group\_id) | ID of the engine security group |
66 | | [complete\_service\_iam\_role\_arn](#output\_complete\_service\_iam\_role\_arn) | Service IAM role ARN |
67 | | [complete\_service\_iam\_role\_name](#output\_complete\_service\_iam\_role\_name) | Service IAM role name |
68 | | [complete\_service\_iam\_role\_policy\_arn](#output\_complete\_service\_iam\_role\_policy\_arn) | Service IAM role policy ARN |
69 | | [complete\_service\_iam\_role\_policy\_id](#output\_complete\_service\_iam\_role\_policy\_id) | Service IAM role policy ID |
70 | | [complete\_service\_iam\_role\_policy\_name](#output\_complete\_service\_iam\_role\_policy\_name) | The name of the service role policy |
71 | | [complete\_service\_iam\_role\_unique\_id](#output\_complete\_service\_iam\_role\_unique\_id) | Stable and unique string identifying the service IAM role |
72 | | [complete\_url](#output\_complete\_url) | The unique access URL of the Amazon EMR Studio |
73 | | [complete\_user\_iam\_role\_arn](#output\_complete\_user\_iam\_role\_arn) | User IAM role ARN |
74 | | [complete\_user\_iam\_role\_name](#output\_complete\_user\_iam\_role\_name) | User IAM role name |
75 | | [complete\_user\_iam\_role\_policy\_arn](#output\_complete\_user\_iam\_role\_policy\_arn) | User IAM role policy ARN |
76 | | [complete\_user\_iam\_role\_policy\_id](#output\_complete\_user\_iam\_role\_policy\_id) | User IAM role policy ID |
77 | | [complete\_user\_iam\_role\_policy\_name](#output\_complete\_user\_iam\_role\_policy\_name) | The name of the user role policy |
78 | | [complete\_user\_iam\_role\_unique\_id](#output\_complete\_user\_iam\_role\_unique\_id) | Stable and unique string identifying the user IAM role |
79 | | [complete\_workspace\_security\_group\_arn](#output\_complete\_workspace\_security\_group\_arn) | Amazon Resource Name (ARN) of the workspace security group |
80 | | [complete\_workspace\_security\_group\_id](#output\_complete\_workspace\_security\_group\_id) | ID of the workspace security group |
81 | | [iam\_arn](#output\_iam\_arn) | ARN of the studio |
82 | | [iam\_engine\_security\_group\_arn](#output\_iam\_engine\_security\_group\_arn) | Amazon Resource Name (ARN) of the engine security group |
83 | | [iam\_engine\_security\_group\_id](#output\_iam\_engine\_security\_group\_id) | ID of the engine security group |
84 | | [iam\_service\_iam\_role\_arn](#output\_iam\_service\_iam\_role\_arn) | Service IAM role ARN |
85 | | [iam\_service\_iam\_role\_name](#output\_iam\_service\_iam\_role\_name) | Service IAM role name |
86 | | [iam\_service\_iam\_role\_policy\_arn](#output\_iam\_service\_iam\_role\_policy\_arn) | Service IAM role policy ARN |
87 | | [iam\_service\_iam\_role\_policy\_id](#output\_iam\_service\_iam\_role\_policy\_id) | Service IAM role policy ID |
88 | | [iam\_service\_iam\_role\_policy\_name](#output\_iam\_service\_iam\_role\_policy\_name) | The name of the service role policy |
89 | | [iam\_service\_iam\_role\_unique\_id](#output\_iam\_service\_iam\_role\_unique\_id) | Stable and unique string identifying the service IAM role |
90 | | [iam\_url](#output\_iam\_url) | The unique access URL of the Amazon EMR Studio |
91 | | [iam\_user\_iam\_role\_arn](#output\_iam\_user\_iam\_role\_arn) | User IAM role ARN |
92 | | [iam\_user\_iam\_role\_name](#output\_iam\_user\_iam\_role\_name) | User IAM role name |
93 | | [iam\_user\_iam\_role\_policy\_arn](#output\_iam\_user\_iam\_role\_policy\_arn) | User IAM role policy ARN |
94 | | [iam\_user\_iam\_role\_policy\_id](#output\_iam\_user\_iam\_role\_policy\_id) | User IAM role policy ID |
95 | | [iam\_user\_iam\_role\_policy\_name](#output\_iam\_user\_iam\_role\_policy\_name) | The name of the user role policy |
96 | | [iam\_user\_iam\_role\_unique\_id](#output\_iam\_user\_iam\_role\_unique\_id) | Stable and unique string identifying the user IAM role |
97 | | [iam\_workspace\_security\_group\_arn](#output\_iam\_workspace\_security\_group\_arn) | Amazon Resource Name (ARN) of the workspace security group |
98 | | [iam\_workspace\_security\_group\_id](#output\_iam\_workspace\_security\_group\_id) | ID of the workspace security group |
99 | | [sso\_arn](#output\_sso\_arn) | ARN of the studio |
100 | | [sso\_engine\_security\_group\_arn](#output\_sso\_engine\_security\_group\_arn) | Amazon Resource Name (ARN) of the engine security group |
101 | | [sso\_engine\_security\_group\_id](#output\_sso\_engine\_security\_group\_id) | ID of the engine security group |
102 | | [sso\_service\_iam\_role\_arn](#output\_sso\_service\_iam\_role\_arn) | Service IAM role ARN |
103 | | [sso\_service\_iam\_role\_name](#output\_sso\_service\_iam\_role\_name) | Service IAM role name |
104 | | [sso\_service\_iam\_role\_policy\_arn](#output\_sso\_service\_iam\_role\_policy\_arn) | Service IAM role policy ARN |
105 | | [sso\_service\_iam\_role\_policy\_id](#output\_sso\_service\_iam\_role\_policy\_id) | Service IAM role policy ID |
106 | | [sso\_service\_iam\_role\_policy\_name](#output\_sso\_service\_iam\_role\_policy\_name) | The name of the service role policy |
107 | | [sso\_service\_iam\_role\_unique\_id](#output\_sso\_service\_iam\_role\_unique\_id) | Stable and unique string identifying the service IAM role |
108 | | [sso\_url](#output\_sso\_url) | The unique access URL of the Amazon EMR Studio |
109 | | [sso\_user\_iam\_role\_arn](#output\_sso\_user\_iam\_role\_arn) | User IAM role ARN |
110 | | [sso\_user\_iam\_role\_name](#output\_sso\_user\_iam\_role\_name) | User IAM role name |
111 | | [sso\_user\_iam\_role\_policy\_arn](#output\_sso\_user\_iam\_role\_policy\_arn) | User IAM role policy ARN |
112 | | [sso\_user\_iam\_role\_policy\_id](#output\_sso\_user\_iam\_role\_policy\_id) | User IAM role policy ID |
113 | | [sso\_user\_iam\_role\_policy\_name](#output\_sso\_user\_iam\_role\_policy\_name) | The name of the user role policy |
114 | | [sso\_user\_iam\_role\_unique\_id](#output\_sso\_user\_iam\_role\_unique\_id) | Stable and unique string identifying the user IAM role |
115 | | [sso\_workspace\_security\_group\_arn](#output\_sso\_workspace\_security\_group\_arn) | Amazon Resource Name (ARN) of the workspace security group |
116 | | [sso\_workspace\_security\_group\_id](#output\_sso\_workspace\_security\_group\_id) | ID of the workspace security group |
117 |
118 |
119 | Apache-2.0 Licensed. See [LICENSE](https://github.com/terraform-aws-modules/terraform-aws-emr/blob/master/LICENSE).
120 |
--------------------------------------------------------------------------------
/examples/studio/main.tf:
--------------------------------------------------------------------------------
1 | provider "aws" {
2 | region = local.region
3 | }
4 |
5 | data "aws_availability_zones" "available" {}
6 |
7 | data "aws_caller_identity" "current" {}
8 |
9 | data "aws_region" "current" {}
10 |
11 | locals {
12 | name = replace(basename(path.cwd), "-cluster", "")
13 | region = "eu-west-1"
14 |
15 | vpc_cidr = "10.0.0.0/16"
16 | azs = slice(data.aws_availability_zones.available.names, 0, 3)
17 |
18 | tags = {
19 | Example = local.name
20 | GithubRepo = "terraform-aws-emr"
21 | GithubOrg = "terraform-aws-modules"
22 | }
23 | }
24 |
25 | ################################################################################
26 | # EMR Studio
27 | ################################################################################
28 |
29 | data "aws_ssoadmin_instances" "this" {}
30 |
31 | data "aws_identitystore_group" "this" {
32 | identity_store_id = one(data.aws_ssoadmin_instances.this.identity_store_ids)
33 |
34 | alternate_identifier {
35 | unique_attribute {
36 | attribute_path = "DisplayName"
37 | attribute_value = "AWSControlTowerAdmins"
38 | }
39 | }
40 | }
41 |
42 | module "emr_studio_complete" {
43 | source = "../../modules/studio"
44 |
45 | name = "${local.name}-complete"
46 | description = "EMR Studio using SSO authentication"
47 | auth_mode = "SSO"
48 | default_s3_location = "s3://${module.s3_bucket.s3_bucket_id}/complete"
49 |
50 | vpc_id = module.vpc.vpc_id
51 | subnet_ids = module.vpc.private_subnets
52 |
53 | # SSO mapping
54 | session_mappings = {
55 | admin_group = {
56 | identity_type = "GROUP"
57 | identity_id = data.aws_identitystore_group.this.group_id
58 | }
59 | }
60 |
61 | # Service role
62 | service_role_name = "${local.name}-complete-service"
63 | service_role_path = "/complete/"
64 | service_role_description = "EMR Studio complete service role"
65 | service_role_tags = { service = true }
66 | service_role_s3_bucket_arns = [
67 | module.s3_bucket.s3_bucket_arn,
68 | "${module.s3_bucket.s3_bucket_arn}/complete/*}"
69 | ]
70 |
71 | # User role
72 | user_role_name = "${local.name}-complete-user"
73 | user_role_path = "/complete/"
74 | user_role_description = "EMR Studio complete user role"
75 | user_role_tags = { user = true }
76 | user_role_s3_bucket_arns = [
77 | module.s3_bucket.s3_bucket_arn,
78 | "${module.s3_bucket.s3_bucket_arn}/complete/*}"
79 | ]
80 |
81 | # Security groups
82 | security_group_name = "${local.name}-complete"
83 | security_group_tags = { complete = true }
84 |
85 | # Engine security group
86 | engine_security_group_description = "EMR Studio complete engine security group"
87 | engine_security_group_rules = {
88 | example = {
89 | description = "Example egress to VPC network"
90 | type = "egress"
91 | from_port = 443
92 | to_port = 443
93 | protocol = "tcp"
94 | cidr_blocks = [module.vpc.vpc_cidr_block]
95 | }
96 | }
97 |
98 | # Workspace security group
99 | workspace_security_group_description = "EMR Studio complete workspace security group"
100 | workspace_security_group_rules = {
101 | example = {
102 | description = "Example egress to internet"
103 | type = "egress"
104 | from_port = 443
105 | to_port = 443
106 | protocol = "tcp"
107 | cidr_blocks = [module.vpc.vpc_cidr_block]
108 | }
109 | }
110 |
111 | tags = local.tags
112 | }
113 |
114 | module "emr_studio_sso" {
115 | source = "../../modules/studio"
116 |
117 | name = "${local.name}-sso"
118 | description = "EMR Studio using SSO authentication"
119 | auth_mode = "SSO"
120 | default_s3_location = "s3://${module.s3_bucket.s3_bucket_id}/example"
121 |
122 | vpc_id = module.vpc.vpc_id
123 | subnet_ids = module.vpc.private_subnets
124 |
125 | # SSO Mapping
126 | session_mappings = {
127 | admin_group = {
128 | identity_type = "GROUP"
129 | identity_id = data.aws_identitystore_group.this.group_id
130 | }
131 | }
132 |
133 | tags = local.tags
134 | }
135 |
136 | module "emr_studio_iam" {
137 | source = "../../modules/studio"
138 |
139 | name = "${local.name}-iam"
140 | auth_mode = "IAM"
141 | default_s3_location = "s3://${module.s3_bucket.s3_bucket_id}/example"
142 |
143 | vpc_id = module.vpc.vpc_id
144 | subnet_ids = module.vpc.private_subnets
145 |
146 | encryption_key_arn = module.kms.key_arn
147 |
148 | service_role_statements = [
149 | {
150 | effect = "Allow"
151 | actions = [
152 | "kms:Decrypt",
153 | "kms:GenerateDataKey",
154 | "kms:ReEncryptFrom",
155 | "kms:ReEncryptTo",
156 | "kms:DescribeKey"
157 | ]
158 | resources = [module.kms.key_arn]
159 | }
160 | ]
161 |
162 | tags = local.tags
163 | }
164 |
165 | module "emr_studio_disabled" {
166 | source = "../../modules/studio"
167 |
168 | create = false
169 | }
170 |
171 | ################################################################################
172 | # Supporting Resources
173 | ################################################################################
174 |
175 | module "vpc" {
176 | source = "terraform-aws-modules/vpc/aws"
177 | version = "~> 5.0"
178 |
179 | name = local.name
180 | cidr = local.vpc_cidr
181 |
182 | azs = local.azs
183 | public_subnets = [for k, v in local.azs : cidrsubnet(local.vpc_cidr, 8, k)]
184 | private_subnets = [for k, v in local.azs : cidrsubnet(local.vpc_cidr, 8, k + 10)]
185 |
186 | enable_nat_gateway = true
187 | single_nat_gateway = true
188 | enable_dns_hostnames = true
189 |
190 | tags = local.tags
191 | }
192 |
193 | module "s3_bucket" {
194 | source = "terraform-aws-modules/s3-bucket/aws"
195 | version = "~> 4.0"
196 |
197 | bucket_prefix = "${local.name}-"
198 |
199 | # Allow deletion of non-empty bucket
200 | # Example usage only - not recommended for production
201 | force_destroy = true
202 |
203 | attach_deny_insecure_transport_policy = true
204 | attach_require_latest_tls_policy = true
205 |
206 | block_public_acls = true
207 | block_public_policy = true
208 | ignore_public_acls = true
209 | restrict_public_buckets = true
210 |
211 | server_side_encryption_configuration = {
212 | rule = {
213 | apply_server_side_encryption_by_default = {
214 | sse_algorithm = "AES256"
215 | }
216 | }
217 | }
218 |
219 | tags = local.tags
220 | }
221 |
222 | module "kms" {
223 | source = "terraform-aws-modules/kms/aws"
224 | version = "~> 2.0"
225 |
226 | deletion_window_in_days = 7
227 | description = "KMS key for ${local.name}."
228 | enable_key_rotation = true
229 | is_enabled = true
230 | key_usage = "ENCRYPT_DECRYPT"
231 | enable_default_policy = true
232 | key_statements = [
233 | {
234 | sid = "EMRStudio"
235 | actions = [
236 | "kms:Decrypt",
237 | "kms:GenerateDataKey",
238 | "kms:ReEncryptFrom",
239 | "kms:ReEncryptTo",
240 | "kms:DescribeKey"
241 | ]
242 | resources = ["*"]
243 |
244 | principals = [
245 | {
246 | type = "AWS"
247 | identifiers = [module.emr_studio_iam.service_iam_role_arn]
248 | }
249 | ]
250 |
251 | conditions = [
252 | {
253 | test = "StringEquals"
254 | variable = "kms:CallerAccount"
255 | values = [data.aws_caller_identity.current.account_id]
256 | },
257 | {
258 | test = "StringEquals"
259 | variable = "kms:EncryptionContext:aws:s3:arn"
260 | values = [module.s3_bucket.s3_bucket_arn]
261 | },
262 | {
263 | test = "StringEquals"
264 | variable = "kms:ViaService"
265 | values = ["s3.${data.aws_region.current.name}.amazonaws.com"]
266 | }
267 | ]
268 | }
269 | ]
270 |
271 | aliases = [local.name]
272 |
273 | tags = local.tags
274 | }
275 |
--------------------------------------------------------------------------------
/examples/studio/outputs.tf:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Complete
3 | ################################################################################
4 |
5 | output "complete_arn" {
6 | description = "ARN of the studio"
7 | value = module.emr_studio_complete.arn
8 | }
9 |
10 | output "complete_url" {
11 | description = "The unique access URL of the Amazon EMR Studio"
12 | value = module.emr_studio_complete.url
13 | }
14 |
15 | output "complete_service_iam_role_name" {
16 | description = "Service IAM role name"
17 | value = module.emr_studio_complete.service_iam_role_name
18 | }
19 |
20 | output "complete_service_iam_role_arn" {
21 | description = "Service IAM role ARN"
22 | value = module.emr_studio_complete.service_iam_role_arn
23 | }
24 |
25 | output "complete_service_iam_role_unique_id" {
26 | description = "Stable and unique string identifying the service IAM role"
27 | value = module.emr_studio_complete.service_iam_role_unique_id
28 | }
29 |
30 | output "complete_service_iam_role_policy_arn" {
31 | description = "Service IAM role policy ARN"
32 | value = module.emr_studio_complete.service_iam_role_policy_arn
33 | }
34 |
35 | output "complete_service_iam_role_policy_id" {
36 | description = "Service IAM role policy ID"
37 | value = module.emr_studio_complete.service_iam_role_policy_id
38 | }
39 |
40 | output "complete_service_iam_role_policy_name" {
41 | description = "The name of the service role policy"
42 | value = module.emr_studio_complete.service_iam_role_policy_name
43 | }
44 |
45 | output "complete_user_iam_role_name" {
46 | description = "User IAM role name"
47 | value = module.emr_studio_complete.user_iam_role_name
48 | }
49 |
50 | output "complete_user_iam_role_arn" {
51 | description = "User IAM role ARN"
52 | value = module.emr_studio_complete.user_iam_role_arn
53 | }
54 |
55 | output "complete_user_iam_role_unique_id" {
56 | description = "Stable and unique string identifying the user IAM role"
57 | value = module.emr_studio_complete.user_iam_role_unique_id
58 | }
59 |
60 | output "complete_user_iam_role_policy_arn" {
61 | description = "User IAM role policy ARN"
62 | value = module.emr_studio_complete.user_iam_role_policy_arn
63 | }
64 |
65 | output "complete_user_iam_role_policy_id" {
66 | description = "User IAM role policy ID"
67 | value = module.emr_studio_complete.user_iam_role_policy_id
68 | }
69 |
70 | output "complete_user_iam_role_policy_name" {
71 | description = "The name of the user role policy"
72 | value = module.emr_studio_complete.user_iam_role_policy_name
73 | }
74 |
75 | output "complete_engine_security_group_arn" {
76 | description = "Amazon Resource Name (ARN) of the engine security group"
77 | value = module.emr_studio_complete.engine_security_group_arn
78 | }
79 |
80 | output "complete_engine_security_group_id" {
81 | description = "ID of the engine security group"
82 | value = module.emr_studio_complete.engine_security_group_id
83 | }
84 |
85 | output "complete_workspace_security_group_arn" {
86 | description = "Amazon Resource Name (ARN) of the workspace security group"
87 | value = module.emr_studio_complete.workspace_security_group_arn
88 | }
89 |
90 | output "complete_workspace_security_group_id" {
91 | description = "ID of the workspace security group"
92 | value = module.emr_studio_complete.workspace_security_group_id
93 | }
94 |
95 | ################################################################################
96 | # SSO
97 | ################################################################################
98 |
99 | output "sso_arn" {
100 | description = "ARN of the studio"
101 | value = module.emr_studio_sso.arn
102 | }
103 |
104 | output "sso_url" {
105 | description = "The unique access URL of the Amazon EMR Studio"
106 | value = module.emr_studio_sso.url
107 | }
108 |
109 | output "sso_service_iam_role_name" {
110 | description = "Service IAM role name"
111 | value = module.emr_studio_sso.service_iam_role_name
112 | }
113 |
114 | output "sso_service_iam_role_arn" {
115 | description = "Service IAM role ARN"
116 | value = module.emr_studio_sso.service_iam_role_arn
117 | }
118 |
119 | output "sso_service_iam_role_unique_id" {
120 | description = "Stable and unique string identifying the service IAM role"
121 | value = module.emr_studio_sso.service_iam_role_unique_id
122 | }
123 |
124 | output "sso_service_iam_role_policy_arn" {
125 | description = "Service IAM role policy ARN"
126 | value = module.emr_studio_sso.service_iam_role_policy_arn
127 | }
128 |
129 | output "sso_service_iam_role_policy_id" {
130 | description = "Service IAM role policy ID"
131 | value = module.emr_studio_sso.service_iam_role_policy_id
132 | }
133 |
134 | output "sso_service_iam_role_policy_name" {
135 | description = "The name of the service role policy"
136 | value = module.emr_studio_sso.service_iam_role_policy_name
137 | }
138 |
139 | output "sso_user_iam_role_name" {
140 | description = "User IAM role name"
141 | value = module.emr_studio_sso.user_iam_role_name
142 | }
143 |
144 | output "sso_user_iam_role_arn" {
145 | description = "User IAM role ARN"
146 | value = module.emr_studio_sso.user_iam_role_arn
147 | }
148 |
149 | output "sso_user_iam_role_unique_id" {
150 | description = "Stable and unique string identifying the user IAM role"
151 | value = module.emr_studio_sso.user_iam_role_unique_id
152 | }
153 |
154 | output "sso_user_iam_role_policy_arn" {
155 | description = "User IAM role policy ARN"
156 | value = module.emr_studio_sso.user_iam_role_policy_arn
157 | }
158 |
159 | output "sso_user_iam_role_policy_id" {
160 | description = "User IAM role policy ID"
161 | value = module.emr_studio_sso.user_iam_role_policy_id
162 | }
163 |
164 | output "sso_user_iam_role_policy_name" {
165 | description = "The name of the user role policy"
166 | value = module.emr_studio_sso.user_iam_role_policy_name
167 | }
168 |
169 | output "sso_engine_security_group_arn" {
170 | description = "Amazon Resource Name (ARN) of the engine security group"
171 | value = module.emr_studio_sso.engine_security_group_arn
172 | }
173 |
174 | output "sso_engine_security_group_id" {
175 | description = "ID of the engine security group"
176 | value = module.emr_studio_sso.engine_security_group_id
177 | }
178 |
179 | output "sso_workspace_security_group_arn" {
180 | description = "Amazon Resource Name (ARN) of the workspace security group"
181 | value = module.emr_studio_sso.workspace_security_group_arn
182 | }
183 |
184 | output "sso_workspace_security_group_id" {
185 | description = "ID of the workspace security group"
186 | value = module.emr_studio_sso.workspace_security_group_id
187 | }
188 |
189 | ################################################################################
190 | # IAM
191 | ################################################################################
192 |
193 | output "iam_arn" {
194 | description = "ARN of the studio"
195 | value = module.emr_studio_iam.arn
196 | }
197 |
198 | output "iam_url" {
199 | description = "The unique access URL of the Amazon EMR Studio"
200 | value = module.emr_studio_iam.url
201 | }
202 |
203 | output "iam_service_iam_role_name" {
204 | description = "Service IAM role name"
205 | value = module.emr_studio_iam.service_iam_role_name
206 | }
207 |
208 | output "iam_service_iam_role_arn" {
209 | description = "Service IAM role ARN"
210 | value = module.emr_studio_iam.service_iam_role_arn
211 | }
212 |
213 | output "iam_service_iam_role_unique_id" {
214 | description = "Stable and unique string identifying the service IAM role"
215 | value = module.emr_studio_iam.service_iam_role_unique_id
216 | }
217 |
218 | output "iam_service_iam_role_policy_arn" {
219 | description = "Service IAM role policy ARN"
220 | value = module.emr_studio_iam.service_iam_role_policy_arn
221 | }
222 |
223 | output "iam_service_iam_role_policy_id" {
224 | description = "Service IAM role policy ID"
225 | value = module.emr_studio_iam.service_iam_role_policy_id
226 | }
227 |
228 | output "iam_service_iam_role_policy_name" {
229 | description = "The name of the service role policy"
230 | value = module.emr_studio_iam.service_iam_role_policy_name
231 | }
232 |
233 | output "iam_user_iam_role_name" {
234 | description = "User IAM role name"
235 | value = module.emr_studio_iam.user_iam_role_name
236 | }
237 |
238 | output "iam_user_iam_role_arn" {
239 | description = "User IAM role ARN"
240 | value = module.emr_studio_iam.user_iam_role_arn
241 | }
242 |
243 | output "iam_user_iam_role_unique_id" {
244 | description = "Stable and unique string identifying the user IAM role"
245 | value = module.emr_studio_iam.user_iam_role_unique_id
246 | }
247 |
248 | output "iam_user_iam_role_policy_arn" {
249 | description = "User IAM role policy ARN"
250 | value = module.emr_studio_iam.user_iam_role_policy_arn
251 | }
252 |
253 | output "iam_user_iam_role_policy_id" {
254 | description = "User IAM role policy ID"
255 | value = module.emr_studio_iam.user_iam_role_policy_id
256 | }
257 |
258 | output "iam_user_iam_role_policy_name" {
259 | description = "The name of the user role policy"
260 | value = module.emr_studio_iam.user_iam_role_policy_name
261 | }
262 |
263 | output "iam_engine_security_group_arn" {
264 | description = "Amazon Resource Name (ARN) of the engine security group"
265 | value = module.emr_studio_iam.engine_security_group_arn
266 | }
267 |
268 | output "iam_engine_security_group_id" {
269 | description = "ID of the engine security group"
270 | value = module.emr_studio_iam.engine_security_group_id
271 | }
272 |
273 | output "iam_workspace_security_group_arn" {
274 | description = "Amazon Resource Name (ARN) of the workspace security group"
275 | value = module.emr_studio_iam.workspace_security_group_arn
276 | }
277 |
278 | output "iam_workspace_security_group_id" {
279 | description = "ID of the workspace security group"
280 | value = module.emr_studio_iam.workspace_security_group_id
281 | }
282 |
--------------------------------------------------------------------------------
/examples/studio/variables.tf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terraform-aws-modules/terraform-aws-emr/0ddd4c67d45ef1ff3c1d80f5ec2975e900b95561/examples/studio/variables.tf
--------------------------------------------------------------------------------
/examples/studio/versions.tf:
--------------------------------------------------------------------------------
1 | terraform {
2 | required_version = ">= 1.0"
3 |
4 | required_providers {
5 | aws = {
6 | source = "hashicorp/aws"
7 | version = ">= 5.83"
8 | }
9 | }
10 | }
11 |
--------------------------------------------------------------------------------
/examples/virtual-cluster/README.md:
--------------------------------------------------------------------------------
1 | # AWS EMR Virtual Cluster Example
2 |
3 | This example shows how to provision a serverless cluster (serverless data plane) using Fargate Profiles to support EMR on EKS virtual clusters.
4 |
5 | There are two Fargate profiles created:
6 | 1. `kube-system` to support core Kubernetes components such as CoreDNS
7 | 2. `emr-wildcard` which supports any namespaces that begin with `emr-*`; this allows for creating multiple virtual clusters without having to create additional Fargate profiles for each new cluster.
8 |
9 | The resources created by the `virtual-cluster` module include:
10 | - Kubernetes namespace, role, and role binding; existing or externally created namespace and role can be utilized as well
11 | - IAM role for service account (IRSA) used by for job execution. Users can scope access to the appropriate S3 bucket and path via `s3_bucket_arns`, use for both accessing job data as well as writing out results. The bare minimum permissions have been provided for the job execution role; users can provide additional permissions by passing in additional policies to attach to the role via `iam_role_additional_policies`
12 | - CloudWatch log group for task execution logs. Log streams are created by the job itself and not via Terraform
13 | - EMR managed security group for the virtual cluster
14 | - EMR virtual cluster scoped to the namespace created/provided
15 |
16 | ## Usage
17 |
18 | To run this example you need to execute:
19 |
20 | ```bash
21 | $ terraform init
22 | $ terraform plan
23 | $ terraform apply
24 | ```
25 |
26 | Note that this example may create resources which will incur monetary charges on your AWS bill. Run `terraform destroy` when you no longer need these resources.
27 |
28 | ## Destroy
29 |
30 | If the EMR virtual cluster fails to delete and the following error is shown:
31 |
32 | > Error: waiting for EMR Containers Virtual Cluster (xwbc22787q6g1wscfawttzzgb) delete: unexpected state 'ARRESTED', wanted target ''. last error: %!s()
33 |
34 | You can clean up any of the clusters in the `ARRESTED` state with the following:
35 |
36 | ```sh
37 | aws emr-containers list-virtual-clusters --region us-west-2 --states ARRESTED \
38 | --query 'virtualClusters[0].id' --output text | xargs -I{} aws emr-containers delete-virtual-cluster \
39 | --region us-west-2 --id {}
40 | ```
41 |
42 |
43 | ## Requirements
44 |
45 | | Name | Version |
46 | |------|---------|
47 | | [terraform](#requirement\_terraform) | >= 1.0 |
48 | | [aws](#requirement\_aws) | >= 5.83 |
49 | | [kubernetes](#requirement\_kubernetes) | >= 2.17 |
50 | | [null](#requirement\_null) | >= 3.0 |
51 | | [time](#requirement\_time) | >= 0.7 |
52 |
53 | ## Providers
54 |
55 | | Name | Version |
56 | |------|---------|
57 | | [aws](#provider\_aws) | >= 5.83 |
58 | | [null](#provider\_null) | >= 3.0 |
59 | | [time](#provider\_time) | >= 0.7 |
60 |
61 | ## Modules
62 |
63 | | Name | Source | Version |
64 | |------|--------|---------|
65 | | [complete](#module\_complete) | ../../modules/virtual-cluster | n/a |
66 | | [default](#module\_default) | ../../modules/virtual-cluster | n/a |
67 | | [disabled](#module\_disabled) | ../../modules/virtual-cluster | n/a |
68 | | [eks](#module\_eks) | terraform-aws-modules/eks/aws | ~> 19.13 |
69 | | [s3\_bucket](#module\_s3\_bucket) | terraform-aws-modules/s3-bucket/aws | ~> 4.0 |
70 | | [vpc](#module\_vpc) | terraform-aws-modules/vpc/aws | ~> 5.0 |
71 | | [vpc\_endpoints](#module\_vpc\_endpoints) | terraform-aws-modules/vpc/aws//modules/vpc-endpoints | ~> 5.0 |
72 | | [vpc\_endpoints\_sg](#module\_vpc\_endpoints\_sg) | terraform-aws-modules/security-group/aws | ~> 5.0 |
73 |
74 | ## Resources
75 |
76 | | Name | Type |
77 | |------|------|
78 | | [null_resource.s3_sync](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource |
79 | | [null_resource.start_job_run](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource |
80 | | [time_sleep.coredns](https://registry.terraform.io/providers/hashicorp/time/latest/docs/resources/sleep) | resource |
81 | | [aws_availability_zones.available](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/availability_zones) | data source |
82 | | [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source |
83 |
84 | ## Inputs
85 |
86 | No inputs.
87 |
88 | ## Outputs
89 |
90 | | Name | Description |
91 | |------|-------------|
92 | | [complete\_cloudwatch\_log\_group\_arn](#output\_complete\_cloudwatch\_log\_group\_arn) | Arn of cloudwatch log group created |
93 | | [complete\_cloudwatch\_log\_group\_name](#output\_complete\_cloudwatch\_log\_group\_name) | Name of cloudwatch log group created |
94 | | [complete\_job\_execution\_role\_arn](#output\_complete\_job\_execution\_role\_arn) | IAM role ARN of the job execution role |
95 | | [complete\_job\_execution\_role\_name](#output\_complete\_job\_execution\_role\_name) | IAM role name of the job execution role |
96 | | [complete\_job\_execution\_role\_unique\_id](#output\_complete\_job\_execution\_role\_unique\_id) | Stable and unique string identifying the job execution IAM role |
97 | | [complete\_virtual\_cluster\_arn](#output\_complete\_virtual\_cluster\_arn) | ARN of the EMR virtual cluster |
98 | | [complete\_virtual\_cluster\_id](#output\_complete\_virtual\_cluster\_id) | ID of the EMR virtual cluster |
99 |
100 |
101 | Apache-2.0 Licensed. See [LICENSE](https://github.com/terraform-aws-modules/terraform-aws-emr/blob/master/LICENSE).
102 |
--------------------------------------------------------------------------------
/examples/virtual-cluster/main.tf:
--------------------------------------------------------------------------------
1 | provider "aws" {
2 | region = local.region
3 | }
4 |
5 | provider "kubernetes" {
6 | host = module.eks.cluster_endpoint
7 | cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
8 |
9 | exec {
10 | api_version = "client.authentication.k8s.io/v1beta1"
11 | command = "aws"
12 | # This requires the awscli to be installed locally where Terraform is executed
13 | args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
14 | }
15 | }
16 |
17 | data "aws_caller_identity" "current" {}
18 | data "aws_availability_zones" "available" {}
19 |
20 | locals {
21 | name = replace(basename(path.cwd), "-cluster", "")
22 | region = "eu-west-1"
23 |
24 | vpc_cidr = "10.0.0.0/16"
25 | azs = slice(data.aws_availability_zones.available.names, 0, 3)
26 |
27 | tags = {
28 | Example = local.name
29 | GithubRepo = "terraform-aws-emr"
30 | GithubOrg = "terraform-aws-modules"
31 | }
32 | }
33 |
34 | ################################################################################
35 | # Cluster
36 | ################################################################################
37 |
38 | module "complete" {
39 | source = "../../modules/virtual-cluster"
40 |
41 | name = "emr-custom"
42 | create_namespace = true
43 | namespace = "emr-custom"
44 |
45 | create_iam_role = true
46 | s3_bucket_arns = [
47 | module.s3_bucket.s3_bucket_arn,
48 | "${module.s3_bucket.s3_bucket_arn}/*"
49 | ]
50 | role_name = "emr-custom-role"
51 | iam_role_use_name_prefix = false
52 | iam_role_path = "/"
53 | iam_role_description = "EMR custom Role"
54 | iam_role_permissions_boundary = null
55 | iam_role_additional_policies = []
56 |
57 | tags = local.tags
58 | }
59 |
60 | module "default" {
61 | source = "../../modules/virtual-cluster"
62 |
63 | namespace = "emr-default"
64 |
65 | tags = local.tags
66 | }
67 |
68 | module "disabled" {
69 | source = "../../modules/virtual-cluster"
70 |
71 | create = false
72 | }
73 |
74 | ################################################################################
75 | # Sample Spark Job
76 | ################################################################################
77 |
78 | resource "null_resource" "s3_sync" {
79 | provisioner "local-exec" {
80 | interpreter = ["/bin/sh", "-c"]
81 |
82 | environment = {
83 | AWS_DEFAULT_REGION = local.region
84 | }
85 |
86 | # Sync to a bucket that we can provide access to (see `s3_bucket_arns` above)
87 | command = <<-EOT
88 | aws s3 sync s3://aws-data-analytics-workshops/emr-eks-workshop/scripts/ s3://${module.s3_bucket.s3_bucket_id}/emr-eks-workshop/scripts/
89 | EOT
90 | }
91 | }
92 |
93 | resource "time_sleep" "coredns" {
94 | create_duration = "60s"
95 |
96 | # In practice, this generally won't be necessary since the cluster will be provisioned long before jobs are scheduled on the cluster
97 | # However, for this example, its necessary to ensure CoreDNS is ready before we schedule the example job
98 | triggers = {
99 | coredns = module.eks.cluster_addons["coredns"].id
100 | }
101 | }
102 |
103 | resource "null_resource" "start_job_run" {
104 | provisioner "local-exec" {
105 | interpreter = ["/bin/sh", "-c"]
106 |
107 | environment = {
108 | AWS_DEFAULT_REGION = local.region
109 | }
110 |
111 | command = <<-EOT
112 | aws emr-containers start-job-run \
113 | --virtual-cluster-id ${module.complete.virtual_cluster_id} \
114 | --name ${local.name}-example \
115 | --execution-role-arn ${module.complete.job_execution_role_arn} \
116 | --release-label emr-6.8.0-latest \
117 | --job-driver '{
118 | "sparkSubmitJobDriver": {
119 | "entryPoint": "s3://${module.s3_bucket.s3_bucket_id}/emr-eks-workshop/scripts/pi.py",
120 | "sparkSubmitParameters": "--conf spark.executor.instances=2 --conf spark.executor.memory=2G --conf spark.executor.cores=2 --conf spark.driver.cores=1"
121 | }
122 | }' \
123 | --configuration-overrides '{
124 | "applicationConfiguration": [
125 | {
126 | "classification": "spark-defaults",
127 | "properties": {
128 | "spark.driver.memory":"2G"
129 | }
130 | }
131 | ],
132 | "monitoringConfiguration": {
133 | "cloudWatchMonitoringConfiguration": {
134 | "logGroupName": "${module.complete.cloudwatch_log_group_name}",
135 | "logStreamNamePrefix": "eks-blueprints"
136 | }
137 | }
138 | }'
139 | EOT
140 | }
141 |
142 | depends_on = [
143 | time_sleep.coredns
144 | ]
145 | }
146 |
147 | ################################################################################
148 | # Supporting Resources
149 | ################################################################################
150 |
151 | module "eks" {
152 | source = "terraform-aws-modules/eks/aws"
153 | version = "~> 19.13"
154 |
155 | cluster_name = local.name
156 | cluster_version = "1.27"
157 | cluster_endpoint_public_access = true
158 |
159 | cluster_addons = {
160 | coredns = {
161 | configuration_values = jsonencode({
162 | computeType = "Fargate"
163 | # Ensure that the we fully utilize the minimum amount of resources that are supplied by
164 | # Fargate https://docs.aws.amazon.com/eks/latest/userguide/fargate-pod-configuration.html
165 | # Fargate adds 256 MB to each pod's memory reservation for the required Kubernetes
166 | # components (kubelet, kube-proxy, and containerd). Fargate rounds up to the following
167 | # compute configuration that most closely matches the sum of vCPU and memory requests in
168 | # order to ensure pods always have the resources that they need to run.
169 | resources = {
170 | limits = {
171 | cpu = "0.25"
172 | # We are targetting the smallest Task size of 512Mb, so we subtract 256Mb from the
173 | # request/limit to ensure we can fit within that task
174 | memory = "256M"
175 | }
176 | requests = {
177 | cpu = "0.25"
178 | # We are targetting the smallest Task size of 512Mb, so we subtract 256Mb from the
179 | # request/limit to ensure we can fit within that task
180 | memory = "256M"
181 | }
182 | }
183 | })
184 | }
185 | kube-proxy = {}
186 | vpc-cni = {}
187 | }
188 |
189 | vpc_id = module.vpc.vpc_id
190 | subnet_ids = module.vpc.private_subnets
191 |
192 | # Fargate profiles use the cluster primary security group so these are not utilized
193 | create_cluster_security_group = false
194 | create_node_security_group = false
195 |
196 | manage_aws_auth_configmap = true
197 | aws_auth_roles = [
198 | {
199 | # Required for EMR on EKS virtual cluster
200 | rolearn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/AWSServiceRoleForAmazonEMRContainers"
201 | username = "emr-containers"
202 | },
203 | ]
204 |
205 | fargate_profiles = {
206 | emr_wildcard = {
207 | selectors = [
208 | { namespace = "emr-*" }
209 | ]
210 | }
211 | kube_system = {
212 | name = "kube-system"
213 | selectors = [
214 | { namespace = "kube-system" }
215 | ]
216 | }
217 | }
218 |
219 | tags = local.tags
220 | }
221 |
222 | module "vpc" {
223 | source = "terraform-aws-modules/vpc/aws"
224 | version = "~> 5.0"
225 |
226 | name = local.name
227 | cidr = local.vpc_cidr
228 |
229 | azs = local.azs
230 | public_subnets = [for k, v in local.azs : cidrsubnet(local.vpc_cidr, 8, k)]
231 | private_subnets = [for k, v in local.azs : cidrsubnet(local.vpc_cidr, 8, k + 10)]
232 |
233 | enable_nat_gateway = true
234 | single_nat_gateway = true
235 |
236 | public_subnet_tags = {
237 | "kubernetes.io/role/elb" = 1
238 | }
239 |
240 | private_subnet_tags = {
241 | "kubernetes.io/role/internal-elb" = 1
242 | }
243 |
244 | tags = local.tags
245 | }
246 |
247 | module "vpc_endpoints" {
248 | source = "terraform-aws-modules/vpc/aws//modules/vpc-endpoints"
249 | version = "~> 5.0"
250 |
251 | vpc_id = module.vpc.vpc_id
252 | security_group_ids = [module.vpc_endpoints_sg.security_group_id]
253 |
254 | endpoints = merge({
255 | s3 = {
256 | service = "s3"
257 | service_type = "Gateway"
258 | route_table_ids = module.vpc.private_route_table_ids
259 | tags = {
260 | Name = "${local.name}-s3"
261 | }
262 | }
263 | },
264 | { for service in toset(["emr-containers", "ecr.api", "ecr.dkr", "sts", "logs"]) :
265 | replace(service, ".", "_") =>
266 | {
267 | service = service
268 | subnet_ids = module.vpc.private_subnets
269 | private_dns_enabled = true
270 | tags = { Name = "${local.name}-${service}" }
271 | }
272 | })
273 |
274 | tags = local.tags
275 | }
276 |
277 | module "vpc_endpoints_sg" {
278 | source = "terraform-aws-modules/security-group/aws"
279 | version = "~> 5.0"
280 |
281 | name = "${local.name}-vpc-endpoints"
282 | description = "Security group for VPC endpoint access"
283 | vpc_id = module.vpc.vpc_id
284 |
285 | ingress_with_cidr_blocks = [
286 | {
287 | rule = "https-443-tcp"
288 | description = "VPC CIDR HTTPS"
289 | cidr_blocks = join(",", module.vpc.private_subnets_cidr_blocks)
290 | },
291 | ]
292 |
293 | tags = local.tags
294 | }
295 |
296 | module "s3_bucket" {
297 | source = "terraform-aws-modules/s3-bucket/aws"
298 | version = "~> 4.0"
299 |
300 | bucket_prefix = "${local.name}-"
301 |
302 | # Allow deletion of non-empty bucket
303 | # Example usage only - not recommended for production
304 | force_destroy = true
305 |
306 | attach_deny_insecure_transport_policy = true
307 | attach_require_latest_tls_policy = true
308 |
309 | block_public_acls = true
310 | block_public_policy = true
311 | ignore_public_acls = true
312 | restrict_public_buckets = true
313 |
314 | server_side_encryption_configuration = {
315 | rule = {
316 | apply_server_side_encryption_by_default = {
317 | sse_algorithm = "AES256"
318 | }
319 | }
320 | }
321 |
322 | tags = local.tags
323 | }
324 |
--------------------------------------------------------------------------------
/examples/virtual-cluster/outputs.tf:
--------------------------------------------------------------------------------
1 |
2 | ################################################################################
3 | # Complete
4 | ################################################################################
5 |
6 | output "complete_job_execution_role_name" {
7 | description = "IAM role name of the job execution role"
8 | value = module.complete.job_execution_role_name
9 | }
10 |
11 | output "complete_job_execution_role_arn" {
12 | description = "IAM role ARN of the job execution role"
13 | value = module.complete.job_execution_role_arn
14 | }
15 |
16 | output "complete_job_execution_role_unique_id" {
17 | description = "Stable and unique string identifying the job execution IAM role"
18 | value = module.complete.job_execution_role_unique_id
19 | }
20 |
21 | output "complete_virtual_cluster_arn" {
22 | description = "ARN of the EMR virtual cluster"
23 | value = module.complete.virtual_cluster_arn
24 | }
25 |
26 | output "complete_virtual_cluster_id" {
27 | description = "ID of the EMR virtual cluster"
28 | value = module.complete.virtual_cluster_id
29 | }
30 |
31 | output "complete_cloudwatch_log_group_name" {
32 | description = "Name of cloudwatch log group created"
33 | value = module.complete.cloudwatch_log_group_name
34 | }
35 |
36 | output "complete_cloudwatch_log_group_arn" {
37 | description = "Arn of cloudwatch log group created"
38 | value = module.complete.cloudwatch_log_group_arn
39 | }
40 |
--------------------------------------------------------------------------------
/examples/virtual-cluster/variables.tf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terraform-aws-modules/terraform-aws-emr/0ddd4c67d45ef1ff3c1d80f5ec2975e900b95561/examples/virtual-cluster/variables.tf
--------------------------------------------------------------------------------
/examples/virtual-cluster/versions.tf:
--------------------------------------------------------------------------------
1 | terraform {
2 | required_version = ">= 1.0"
3 |
4 | required_providers {
5 | aws = {
6 | source = "hashicorp/aws"
7 | version = ">= 5.83"
8 | }
9 | kubernetes = {
10 | source = "hashicorp/kubernetes"
11 | version = ">= 2.17"
12 | }
13 | null = {
14 | source = "hashicorp/null"
15 | version = ">= 3.0"
16 | }
17 | time = {
18 | source = "hashicorp/time"
19 | version = ">= 0.7"
20 | }
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/modules/serverless/README.md:
--------------------------------------------------------------------------------
1 | # AWS EMR Serverless Terraform module
2 |
3 | Terraform module which creates AWS EMR Serverless resources.
4 |
5 | ## Usage
6 |
7 | See [`examples`](https://github.com/terraform-aws-modules/terraform-aws-emr/tree/master/examples) directory for working examples to reference:
8 |
9 | ### Spark Cluster
10 |
11 | ```hcl
12 | module "emr_serverless" {
13 | source = "terraform-aws-modules/emr/aws//modules/serverless"
14 |
15 | name = "example-spark"
16 |
17 | release_label_prefix = "emr-6"
18 |
19 | initial_capacity = {
20 | driver = {
21 | initial_capacity_type = "Driver"
22 |
23 | initial_capacity_config = {
24 | worker_count = 2
25 | worker_configuration = {
26 | cpu = "4 vCPU"
27 | memory = "12 GB"
28 | }
29 | }
30 | }
31 |
32 | executor = {
33 | initial_capacity_type = "Executor"
34 |
35 | initial_capacity_config = {
36 | worker_count = 2
37 | worker_configuration = {
38 | cpu = "8 vCPU"
39 | disk = "64 GB"
40 | memory = "24 GB"
41 | }
42 | }
43 | }
44 | }
45 |
46 | maximum_capacity = {
47 | cpu = "48 vCPU"
48 | memory = "144 GB"
49 | }
50 |
51 | network_configuration = {
52 | subnet_ids = ["subnet-abcde012", "subnet-bcde012a", "subnet-fghi345a"]
53 | }
54 |
55 | security_group_rules = {
56 | egress_all = {
57 | from_port = 0
58 | to_port = 0
59 | protocol = "-1"
60 | cidr_blocks = ["0.0.0.0/0"]
61 | }
62 | }
63 |
64 | tags = {
65 | Terraform = "true"
66 | Environment = "dev"
67 | }
68 | }
69 | ```
70 |
71 | ### Hive Cluster
72 |
73 | ```hcl
74 | module "emr_serverless" {
75 | source = "terraform-aws-modules/emr/aws//modules/serverless"
76 |
77 | name = "example-hive"
78 |
79 | release_label_prefix = "emr-6"
80 | type = "hive"
81 |
82 | initial_capacity = {
83 | driver = {
84 | initial_capacity_type = "HiveDriver"
85 |
86 | initial_capacity_config = {
87 | worker_count = 2
88 | worker_configuration = {
89 | cpu = "2 vCPU"
90 | memory = "6 GB"
91 | }
92 | }
93 | }
94 |
95 | task = {
96 | initial_capacity_type = "TezTask"
97 |
98 | initial_capacity_config = {
99 | worker_count = 2
100 | worker_configuration = {
101 | cpu = "4 vCPU"
102 | disk = "32 GB"
103 | memory = "12 GB"
104 | }
105 | }
106 | }
107 | }
108 |
109 | maximum_capacity = {
110 | cpu = "24 vCPU"
111 | memory = "72 GB"
112 | }
113 |
114 | tags = {
115 | Terraform = "true"
116 | Environment = "dev"
117 | }
118 | }
119 | ```
120 |
121 | ## Examples
122 |
123 | Examples codified under the [`examples`](https://github.com/terraform-aws-modules/terraform-aws-emr/tree/master/examples) are intended to give users references for how to use the module(s) as well as testing/validating changes to the source code of the module. If contributing to the project, please be sure to make any appropriate updates to the relevant examples to allow maintainers to test your changes and to keep the examples up to date for users. Thank you!
124 |
125 | - [Private clusters](https://github.com/terraform-aws-modules/terraform-aws-emr/tree/master/examples/private-cluster) using instance fleet or instance group
126 | - [Public clusters](https://github.com/terraform-aws-modules/terraform-aws-emr/tree/master/examples/private-cluster) using instance fleet or instance group
127 | - [Serverless clusters](https://github.com/terraform-aws-modules/terraform-aws-emr/tree/master/examples/virtual-cluster) running Spark or Hive
128 | - [Studios](https://github.com/terraform-aws-modules/terraform-aws-emr/tree/master/examples/studio) with either IAM or SSO authentication
129 | - [Virtual cluster](https://github.com/terraform-aws-modules/terraform-aws-emr/tree/master/examples/virtual-cluster) running on Amazon EKS
130 |
131 |
132 | ## Requirements
133 |
134 | | Name | Version |
135 | |------|---------|
136 | | [terraform](#requirement\_terraform) | >= 1.0 |
137 | | [aws](#requirement\_aws) | >= 5.83 |
138 |
139 | ## Providers
140 |
141 | | Name | Version |
142 | |------|---------|
143 | | [aws](#provider\_aws) | >= 5.83 |
144 |
145 | ## Modules
146 |
147 | No modules.
148 |
149 | ## Resources
150 |
151 | | Name | Type |
152 | |------|------|
153 | | [aws_emrserverless_application.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/emrserverless_application) | resource |
154 | | [aws_security_group.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group) | resource |
155 | | [aws_security_group_rule.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group_rule) | resource |
156 | | [aws_emr_release_labels.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/emr_release_labels) | data source |
157 | | [aws_subnet.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/subnet) | data source |
158 |
159 | ## Inputs
160 |
161 | | Name | Description | Type | Default | Required |
162 | |------|-------------|------|---------|:--------:|
163 | | [architecture](#input\_architecture) | The CPU architecture of an application. Valid values are `ARM64` or `X86_64`. Default value is `X86_64` | `string` | `null` | no |
164 | | [auto\_start\_configuration](#input\_auto\_start\_configuration) | The configuration for an application to automatically start on job submission | `any` | `{}` | no |
165 | | [auto\_stop\_configuration](#input\_auto\_stop\_configuration) | The configuration for an application to automatically stop after a certain amount of time being idle | `any` | `{}` | no |
166 | | [create](#input\_create) | Controls if resources should be created (affects nearly all resources) | `bool` | `true` | no |
167 | | [create\_security\_group](#input\_create\_security\_group) | Determines whether the security group is created | `bool` | `true` | no |
168 | | [image\_configuration](#input\_image\_configuration) | The image configuration applied to all worker types | `any` | `{}` | no |
169 | | [initial\_capacity](#input\_initial\_capacity) | The capacity to initialize when the application is created | `any` | `{}` | no |
170 | | [interactive\_configuration](#input\_interactive\_configuration) | Enables the interactive use cases to use when running an application | `any` | `{}` | no |
171 | | [maximum\_capacity](#input\_maximum\_capacity) | The maximum capacity to allocate when the application is created. This is cumulative across all workers at any given point in time, not just when an application is created. No new resources will be created once any one of the defined limits is hit | `any` | `{}` | no |
172 | | [name](#input\_name) | The name of the application | `string` | `""` | no |
173 | | [network\_configuration](#input\_network\_configuration) | The network configuration for customer VPC connectivity | `any` | `{}` | no |
174 | | [release\_label](#input\_release\_label) | Release label for the Amazon EMR release | `string` | `null` | no |
175 | | [release\_label\_prefix](#input\_release\_label\_prefix) | Release label prefix used to lookup a release label | `string` | `"emr-6"` | no |
176 | | [security\_group\_description](#input\_security\_group\_description) | Description of the security group created | `string` | `null` | no |
177 | | [security\_group\_name](#input\_security\_group\_name) | Name to use on security group created | `string` | `null` | no |
178 | | [security\_group\_rules](#input\_security\_group\_rules) | Security group rules to add to the security group created | `any` | `{}` | no |
179 | | [security\_group\_tags](#input\_security\_group\_tags) | A map of additional tags to add to the security group created | `map(string)` | `{}` | no |
180 | | [security\_group\_use\_name\_prefix](#input\_security\_group\_use\_name\_prefix) | Determines whether the security group name (`security_group_name`) is used as a prefix | `bool` | `true` | no |
181 | | [tags](#input\_tags) | A map of tags to add to all resources | `map(string)` | `{}` | no |
182 | | [type](#input\_type) | The type of application you want to start, such as `spark` or `hive`. Defaults to `spark` | `string` | `"spark"` | no |
183 |
184 | ## Outputs
185 |
186 | | Name | Description |
187 | |------|-------------|
188 | | [arn](#output\_arn) | Amazon Resource Name (ARN) of the application |
189 | | [id](#output\_id) | ID of the application |
190 | | [security\_group\_arn](#output\_security\_group\_arn) | Amazon Resource Name (ARN) of the security group |
191 | | [security\_group\_id](#output\_security\_group\_id) | ID of the security group |
192 |
193 |
194 | ## License
195 |
196 | Apache-2.0 Licensed. See [LICENSE](https://github.com/terraform-aws-modules/terraform-aws-emr/blob/master/LICENSE).
197 |
--------------------------------------------------------------------------------
/modules/serverless/main.tf:
--------------------------------------------------------------------------------
1 | data "aws_emr_release_labels" "this" {
2 | count = var.create ? 1 : 0
3 |
4 | filters {
5 | prefix = var.release_label_prefix
6 | }
7 | }
8 |
9 | locals {
10 | tags = merge(var.tags, { terraform-aws-modules = "emr" })
11 | }
12 |
13 | ################################################################################
14 | # Application
15 | ################################################################################
16 |
17 | resource "aws_emrserverless_application" "this" {
18 | count = var.create ? 1 : 0
19 |
20 | architecture = var.architecture
21 |
22 | dynamic "auto_start_configuration" {
23 | for_each = [var.auto_start_configuration]
24 |
25 | content {
26 | enabled = try(auto_start_configuration.value.enabled, null)
27 | }
28 | }
29 |
30 | dynamic "auto_stop_configuration" {
31 | for_each = [var.auto_stop_configuration]
32 |
33 | content {
34 | enabled = try(auto_stop_configuration.value.enabled, null)
35 | idle_timeout_minutes = try(auto_stop_configuration.value.idle_timeout_minutes, null)
36 | }
37 | }
38 |
39 | dynamic "initial_capacity" {
40 | for_each = var.initial_capacity
41 |
42 | content {
43 | dynamic "initial_capacity_config" {
44 | for_each = try([initial_capacity.value.initial_capacity_config], [])
45 |
46 | content {
47 | dynamic "worker_configuration" {
48 | for_each = try([initial_capacity_config.value.worker_configuration], [])
49 |
50 | content {
51 | cpu = worker_configuration.value.cpu
52 | disk = try(worker_configuration.value.disk, null)
53 | memory = worker_configuration.value.memory
54 | }
55 | }
56 |
57 | worker_count = try(initial_capacity_config.value.worker_count, 1)
58 | }
59 | }
60 |
61 | initial_capacity_type = initial_capacity.value.initial_capacity_type
62 | }
63 | }
64 |
65 | dynamic "interactive_configuration" {
66 | for_each = length(var.interactive_configuration) > 0 ? [var.interactive_configuration] : []
67 | content {
68 | livy_endpoint_enabled = try(interactive_configuration.value.livy_endpoint_enabled, null)
69 | studio_enabled = try(interactive_configuration.value.studio_enabled, null)
70 | }
71 | }
72 |
73 | dynamic "maximum_capacity" {
74 | for_each = length(var.maximum_capacity) > 0 ? [var.maximum_capacity] : []
75 |
76 | content {
77 | cpu = maximum_capacity.value.cpu
78 | disk = try(maximum_capacity.value.disk, null)
79 | memory = maximum_capacity.value.memory
80 | }
81 | }
82 |
83 | name = var.name
84 |
85 | dynamic "network_configuration" {
86 | for_each = length(var.network_configuration) > 0 ? [var.network_configuration] : []
87 |
88 | content {
89 | security_group_ids = compact(concat([try(aws_security_group.this[0].id, "")], try(network_configuration.value.security_group_ids, [])))
90 | subnet_ids = try(network_configuration.value.subnet_ids, null)
91 | }
92 | }
93 |
94 | dynamic "image_configuration" {
95 | for_each = length(var.image_configuration) > 0 ? [var.image_configuration] : []
96 |
97 | content {
98 | image_uri = image_configuration.value.image_uri
99 | }
100 | }
101 |
102 | release_label = try(coalesce(var.release_label, element(data.aws_emr_release_labels.this[0].release_labels, 0)), "")
103 | type = var.type
104 |
105 | tags = local.tags
106 | }
107 |
108 | ################################################################################
109 | # Security Group
110 | ################################################################################
111 |
112 | locals {
113 | create_security_group = var.create && var.create_security_group && length(lookup(var.network_configuration, "subnet_ids", [])) > 0
114 | security_group_name = try(coalesce(var.security_group_name, var.name), "")
115 | }
116 |
117 | data "aws_subnet" "this" {
118 | count = local.create_security_group ? 1 : 0
119 |
120 | id = element(var.network_configuration.subnet_ids, 0)
121 | }
122 |
123 | resource "aws_security_group" "this" {
124 | count = local.create_security_group ? 1 : 0
125 |
126 | name = var.security_group_use_name_prefix ? null : local.security_group_name
127 | name_prefix = var.security_group_use_name_prefix ? "${local.security_group_name}-" : null
128 | description = var.security_group_description
129 | vpc_id = data.aws_subnet.this[0].vpc_id
130 |
131 | tags = merge(
132 | local.tags,
133 | var.security_group_tags,
134 | { "Name" = local.security_group_name },
135 | )
136 |
137 | lifecycle {
138 | create_before_destroy = true
139 | }
140 | }
141 |
142 | resource "aws_security_group_rule" "this" {
143 | for_each = { for k, v in var.security_group_rules : k => v if local.create_security_group }
144 |
145 | # Required
146 | security_group_id = aws_security_group.this[0].id
147 | protocol = try(each.value.protocol, "tcp")
148 | from_port = each.value.from_port
149 | to_port = each.value.to_port
150 | type = try(each.value.type, "egress")
151 |
152 | # Optional
153 | description = lookup(each.value, "description", null)
154 | cidr_blocks = lookup(each.value, "cidr_blocks", null)
155 | ipv6_cidr_blocks = lookup(each.value, "ipv6_cidr_blocks", null)
156 | prefix_list_ids = lookup(each.value, "prefix_list_ids", null)
157 | self = lookup(each.value, "self", null)
158 | source_security_group_id = lookup(each.value, "source_security_group_id", null)
159 | }
160 |
--------------------------------------------------------------------------------
/modules/serverless/outputs.tf:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Application
3 | ################################################################################
4 |
5 | output "arn" {
6 | description = "Amazon Resource Name (ARN) of the application"
7 | value = try(aws_emrserverless_application.this[0].arn, null)
8 | }
9 |
10 | output "id" {
11 | description = "ID of the application"
12 | value = try(aws_emrserverless_application.this[0].id, null)
13 | }
14 |
15 | ################################################################################
16 | # Security Group
17 | ################################################################################
18 |
19 | output "security_group_arn" {
20 | description = "Amazon Resource Name (ARN) of the security group"
21 | value = try(aws_security_group.this[0].arn, null)
22 | }
23 |
24 | output "security_group_id" {
25 | description = "ID of the security group"
26 | value = try(aws_security_group.this[0].id, null)
27 | }
28 |
--------------------------------------------------------------------------------
/modules/serverless/variables.tf:
--------------------------------------------------------------------------------
1 | variable "create" {
2 | description = "Controls if resources should be created (affects nearly all resources)"
3 | type = bool
4 | default = true
5 | }
6 |
7 | variable "tags" {
8 | description = "A map of tags to add to all resources"
9 | type = map(string)
10 | default = {}
11 | }
12 |
13 | ################################################################################
14 | # Application
15 | ################################################################################
16 |
17 | variable "architecture" {
18 | description = "The CPU architecture of an application. Valid values are `ARM64` or `X86_64`. Default value is `X86_64`"
19 | type = string
20 | default = null
21 | }
22 |
23 | variable "auto_start_configuration" {
24 | description = "The configuration for an application to automatically start on job submission"
25 | type = any
26 | default = {}
27 | }
28 |
29 | variable "auto_stop_configuration" {
30 | description = "The configuration for an application to automatically stop after a certain amount of time being idle"
31 | type = any
32 | default = {}
33 | }
34 |
35 | variable "image_configuration" {
36 | description = "The image configuration applied to all worker types"
37 | type = any
38 | default = {}
39 | }
40 |
41 | variable "initial_capacity" {
42 | description = "The capacity to initialize when the application is created"
43 | type = any
44 | default = {}
45 | }
46 |
47 | variable "interactive_configuration" {
48 | description = "Enables the interactive use cases to use when running an application"
49 | type = any
50 | default = {}
51 | }
52 |
53 | variable "maximum_capacity" {
54 | description = "The maximum capacity to allocate when the application is created. This is cumulative across all workers at any given point in time, not just when an application is created. No new resources will be created once any one of the defined limits is hit"
55 | type = any
56 | default = {}
57 | }
58 |
59 | variable "name" {
60 | description = "The name of the application"
61 | type = string
62 | default = ""
63 | }
64 |
65 | variable "network_configuration" {
66 | description = "The network configuration for customer VPC connectivity"
67 | type = any
68 | default = {}
69 | }
70 |
71 | variable "release_label" {
72 | description = "Release label for the Amazon EMR release"
73 | type = string
74 | default = null
75 | }
76 |
77 | variable "release_label_prefix" {
78 | description = "Release label prefix used to lookup a release label"
79 | type = string
80 | default = "emr-6"
81 | }
82 |
83 | variable "type" {
84 | description = "The type of application you want to start, such as `spark` or `hive`. Defaults to `spark`"
85 | type = string
86 | default = "spark"
87 | }
88 |
89 | ################################################################################
90 | # Security Group
91 | ################################################################################
92 |
93 | variable "create_security_group" {
94 | description = "Determines whether the security group is created"
95 | type = bool
96 | default = true
97 | }
98 |
99 | variable "security_group_name" {
100 | description = "Name to use on security group created"
101 | type = string
102 | default = null
103 | }
104 |
105 | variable "security_group_use_name_prefix" {
106 | description = "Determines whether the security group name (`security_group_name`) is used as a prefix"
107 | type = bool
108 | default = true
109 | }
110 |
111 | variable "security_group_description" {
112 | description = "Description of the security group created"
113 | type = string
114 | default = null
115 | }
116 |
117 | variable "security_group_tags" {
118 | description = "A map of additional tags to add to the security group created"
119 | type = map(string)
120 | default = {}
121 | }
122 |
123 | variable "security_group_rules" {
124 | description = "Security group rules to add to the security group created"
125 | type = any
126 | default = {}
127 | }
128 |
--------------------------------------------------------------------------------
/modules/serverless/versions.tf:
--------------------------------------------------------------------------------
1 | terraform {
2 | required_version = ">= 1.0"
3 |
4 | required_providers {
5 | aws = {
6 | source = "hashicorp/aws"
7 | version = ">= 5.83"
8 | }
9 | }
10 | }
11 |
--------------------------------------------------------------------------------
/modules/studio/outputs.tf:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Studio
3 | ################################################################################
4 |
5 | output "arn" {
6 | description = "ARN of the studio"
7 | value = try(aws_emr_studio.this[0].arn, null)
8 | }
9 |
10 | output "url" {
11 | description = "The unique access URL of the Amazon EMR Studio"
12 | value = try(aws_emr_studio.this[0].url, null)
13 | }
14 |
15 | ################################################################################
16 | # Service IAM Role
17 | ################################################################################
18 |
19 | output "service_iam_role_name" {
20 | description = "Service IAM role name"
21 | value = try(aws_iam_role.service[0].name, null)
22 | }
23 |
24 | output "service_iam_role_arn" {
25 | description = "Service IAM role ARN"
26 | value = try(aws_iam_role.service[0].arn, null)
27 | }
28 |
29 | output "service_iam_role_unique_id" {
30 | description = "Stable and unique string identifying the service IAM role"
31 | value = try(aws_iam_role.service[0].unique_id, null)
32 | }
33 |
34 | ################################################################################
35 | # Service IAM Role Policy
36 | ################################################################################
37 |
38 | output "service_iam_role_policy_arn" {
39 | description = "Service IAM role policy ARN"
40 | value = try(aws_iam_policy.service[0].arn, null)
41 | }
42 |
43 | output "service_iam_role_policy_id" {
44 | description = "Service IAM role policy ID"
45 | value = try(aws_iam_policy.service[0].id, null)
46 | }
47 |
48 | output "service_iam_role_policy_name" {
49 | description = "The name of the service role policy"
50 | value = try(aws_iam_policy.service[0].name, null)
51 | }
52 |
53 | ################################################################################
54 | # User IAM Role
55 | ################################################################################
56 |
57 | output "user_iam_role_name" {
58 | description = "User IAM role name"
59 | value = try(aws_iam_role.user[0].name, null)
60 | }
61 |
62 | output "user_iam_role_arn" {
63 | description = "User IAM role ARN"
64 | value = try(aws_iam_role.user[0].arn, null)
65 | }
66 |
67 | output "user_iam_role_unique_id" {
68 | description = "Stable and unique string identifying the user IAM role"
69 | value = try(aws_iam_role.user[0].unique_id, null)
70 | }
71 |
72 | ################################################################################
73 | # User IAM Role Policy
74 | ################################################################################
75 |
76 | output "user_iam_role_policy_arn" {
77 | description = "User IAM role policy ARN"
78 | value = try(aws_iam_policy.user[0].arn, null)
79 | }
80 |
81 | output "user_iam_role_policy_id" {
82 | description = "User IAM role policy ID"
83 | value = try(aws_iam_policy.user[0].id, null)
84 | }
85 |
86 | output "user_iam_role_policy_name" {
87 | description = "The name of the user role policy"
88 | value = try(aws_iam_policy.user[0].name, null)
89 | }
90 |
91 | ################################################################################
92 | # Engine Security Group
93 | ################################################################################
94 |
95 | output "engine_security_group_arn" {
96 | description = "Amazon Resource Name (ARN) of the engine security group"
97 | value = try(aws_security_group.engine[0].arn, null)
98 | }
99 |
100 | output "engine_security_group_id" {
101 | description = "ID of the engine security group"
102 | value = try(aws_security_group.engine[0].id, null)
103 | }
104 |
105 | ################################################################################
106 | # Workspace Security Group
107 | ################################################################################
108 |
109 | output "workspace_security_group_arn" {
110 | description = "Amazon Resource Name (ARN) of the workspace security group"
111 | value = try(aws_security_group.workspace[0].arn, null)
112 | }
113 |
114 | output "workspace_security_group_id" {
115 | description = "ID of the workspace security group"
116 | value = try(aws_security_group.workspace[0].id, null)
117 | }
118 |
--------------------------------------------------------------------------------
/modules/studio/variables.tf:
--------------------------------------------------------------------------------
1 | variable "create" {
2 | description = "Controls if resources should be created (affects nearly all resources)"
3 | type = bool
4 | default = true
5 | }
6 |
7 | variable "tags" {
8 | description = "A map of tags to add to all resources"
9 | type = map(string)
10 | default = {}
11 | }
12 |
13 | ################################################################################
14 | # Studio
15 | ################################################################################
16 |
17 | variable "auth_mode" {
18 | description = "Specifies whether the Studio authenticates users using IAM or Amazon Web Services SSO. Valid values are `SSO` or `IAM`"
19 | type = string
20 | default = "IAM"
21 | }
22 |
23 | variable "default_s3_location" {
24 | description = "The Amazon S3 location to back up Amazon EMR Studio Workspaces and notebook files"
25 | type = string
26 | default = ""
27 | }
28 |
29 | variable "description" {
30 | description = "A detailed description of the Amazon EMR Studio"
31 | type = string
32 | default = null
33 | }
34 |
35 | variable "encryption_key_arn" {
36 | description = "The AWS KMS key identifier (ARN) used to encrypt Amazon EMR Studio workspace and notebook files when backed up to Amazon S3"
37 | type = string
38 | default = null
39 | }
40 |
41 | variable "idp_auth_url" {
42 | description = "The authentication endpoint of your identity provider (IdP). Specify this value when you use IAM authentication and want to let federated users log in to a Studio with the Studio URL and credentials from your IdP"
43 | type = string
44 | default = null
45 | }
46 |
47 | variable "idp_relay_state_parameter_name" {
48 | description = "The name that your identity provider (IdP) uses for its RelayState parameter. For example, RelayState or TargetSource"
49 | type = string
50 | default = null
51 | }
52 |
53 | variable "name" {
54 | description = "A descriptive name for the Amazon EMR Studio"
55 | type = string
56 | default = ""
57 | }
58 |
59 | variable "subnet_ids" {
60 | description = "A list of subnet IDs to associate with the Amazon EMR Studio. A Studio can have a maximum of 5 subnets. The subnets must belong to the VPC specified by `vpc_id`"
61 | type = list(string)
62 | default = []
63 | }
64 |
65 | variable "vpc_id" {
66 | description = "The ID of the Amazon Virtual Private Cloud (Amazon VPC) to associate with the Studio"
67 | type = string
68 | default = ""
69 | }
70 |
71 | ################################################################################
72 | # Studio Session Mapping
73 | ################################################################################
74 |
75 | variable "session_mappings" {
76 | description = "A map of session mapping definitions to apply to the Studio"
77 | type = any
78 | default = {}
79 | }
80 |
81 | ################################################################################
82 | # Service IAM Role
83 | ################################################################################
84 |
85 | variable "create_service_role" {
86 | description = "Determines whether the service IAM role should be created"
87 | type = bool
88 | default = true
89 | }
90 |
91 | variable "service_role_arn" {
92 | description = "The ARN of an existing IAM role to use for the service"
93 | type = string
94 | default = null
95 | }
96 |
97 | variable "service_role_name" {
98 | description = "Name to use on IAM role created"
99 | type = string
100 | default = null
101 | }
102 |
103 | variable "service_role_use_name_prefix" {
104 | description = "Determines whether the IAM role name is used as a prefix"
105 | type = bool
106 | default = true
107 | }
108 |
109 | variable "service_role_description" {
110 | description = "Description of the role"
111 | type = string
112 | default = null
113 | }
114 | variable "service_role_path" {
115 | description = "IAM role path"
116 | type = string
117 | default = null
118 | }
119 |
120 | variable "service_role_permissions_boundary" {
121 | description = "ARN of the policy that is used to set the permissions boundary for the IAM role"
122 | type = string
123 | default = null
124 | }
125 |
126 | variable "service_role_policies" {
127 | description = "Map of IAM policies to attach to the service role"
128 | type = map(string)
129 | default = {}
130 | }
131 |
132 | variable "service_role_tags" {
133 | description = "A map of additional tags to add to the IAM role created"
134 | type = map(string)
135 | default = {}
136 | }
137 |
138 | ################################################################################
139 | # Service IAM Role Policy
140 | ################################################################################
141 |
142 | variable "create_service_role_policy" {
143 | description = "Determines whether the service IAM role policy should be created"
144 | type = bool
145 | default = true
146 | }
147 |
148 | variable "service_role_secrets_manager_arns" {
149 | description = "A list of Amazon Web Services Secrets Manager secret ARNs to allow use of Git credentials stored in AWS Secrets Manager to link Git repositories to a Workspace"
150 | type = list(string)
151 | default = []
152 | }
153 |
154 | variable "service_role_s3_bucket_arns" {
155 | description = "A list of Amazon S3 bucket ARNs to allow permission to read/write from the Amazon EMR Studio"
156 | type = list(string)
157 | default = []
158 | }
159 |
160 | variable "service_role_statements" {
161 | description = "A map of IAM policy [statements](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document#statement) for custom permission usage"
162 | type = any
163 | default = {}
164 | }
165 |
166 | ################################################################################
167 | # User IAM Role
168 | ################################################################################
169 |
170 | variable "create_user_role" {
171 | description = "Determines whether the user IAM role should be created"
172 | type = bool
173 | default = true
174 | }
175 |
176 | variable "user_role_arn" {
177 | description = "The ARN of an existing IAM role to use for the user"
178 | type = string
179 | default = null
180 | }
181 |
182 | variable "user_role_name" {
183 | description = "Name to use on IAM role created"
184 | type = string
185 | default = null
186 | }
187 |
188 | variable "user_role_use_name_prefix" {
189 | description = "Determines whether the IAM role name is used as a prefix"
190 | type = bool
191 | default = true
192 | }
193 |
194 | variable "user_role_description" {
195 | description = "Description of the role"
196 | type = string
197 | default = null
198 | }
199 | variable "user_role_path" {
200 | description = "IAM role path"
201 | type = string
202 | default = null
203 | }
204 |
205 | variable "user_role_permissions_boundary" {
206 | description = "ARN of the policy that is used to set the permissions boundary for the IAM role"
207 | type = string
208 | default = null
209 | }
210 |
211 | variable "user_role_policies" {
212 | description = "Map of IAM policies to attach to the user role"
213 | type = map(string)
214 | default = {}
215 | }
216 |
217 | variable "user_role_tags" {
218 | description = "A map of additional tags to add to the IAM role created"
219 | type = map(string)
220 | default = {}
221 | }
222 |
223 | ################################################################################
224 | # User IAM Role Policy
225 | ################################################################################
226 |
227 | variable "create_user_role_policy" {
228 | description = "Determines whether the user IAM role policy should be created"
229 | type = bool
230 | default = true
231 | }
232 |
233 | variable "user_role_s3_bucket_arns" {
234 | description = "A list of Amazon S3 bucket ARNs to allow permission to read/write from the Amazon EMR Studio user role"
235 | type = list(string)
236 | default = []
237 | }
238 |
239 | variable "user_role_statements" {
240 | description = "A map of IAM policy [statements](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document#statement) for custom permission usage"
241 | type = any
242 | default = {}
243 | }
244 |
245 | ################################################################################
246 | # Security Group
247 | ################################################################################
248 |
249 | variable "create_security_groups" {
250 | description = "Determines whether security groups for the EMR Studio engine and workspace are created"
251 | type = bool
252 | default = true
253 | }
254 |
255 | variable "security_group_name" {
256 | description = "Name to use on security group created. Note - `-engine` and `-workspace` will be appended to this name to distinguish"
257 | type = string
258 | default = null
259 | }
260 |
261 | variable "security_group_use_name_prefix" {
262 | description = "Determines whether the security group name (`security_group_name`) is used as a prefix"
263 | type = bool
264 | default = true
265 | }
266 |
267 | variable "security_group_tags" {
268 | description = "A map of additional tags to add to the security group created"
269 | type = map(string)
270 | default = {}
271 | }
272 |
273 | ################################################################################
274 | # Engine Security Group
275 | ################################################################################
276 |
277 | variable "engine_security_group_id" {
278 | description = "The ID of the Amazon EMR Studio Engine security group. The Engine security group allows inbound network traffic from the Workspace security group, and it must be in the same VPC specified by `vpc_id`"
279 | type = string
280 | default = null
281 | }
282 |
283 | variable "engine_security_group_description" {
284 | description = "Description of the security group created"
285 | type = string
286 | default = "EMR Studio engine security group"
287 | }
288 |
289 | variable "engine_security_group_rules" {
290 | description = "Security group rules to add to the security group created"
291 | type = any
292 | default = {}
293 | }
294 |
295 | ################################################################################
296 | # Workspace Security Group
297 | ################################################################################
298 |
299 | variable "workspace_security_group_id" {
300 | description = "The ID of the Amazon EMR Studio Workspace security group. The Workspace security group allows outbound network traffic to resources in the Engine security group, and it must be in the same VPC specified by `vpc_id`"
301 | type = string
302 | default = null
303 | }
304 |
305 | variable "workspace_security_group_description" {
306 | description = "Description of the security group created"
307 | type = string
308 | default = "EMR Studio workspace security group"
309 | }
310 |
311 | variable "workspace_security_group_rules" {
312 | description = "Security group rules to add to the security group created. Note - only egress rules are permitted"
313 | type = any
314 | default = {}
315 | }
316 |
--------------------------------------------------------------------------------
/modules/studio/versions.tf:
--------------------------------------------------------------------------------
1 | terraform {
2 | required_version = ">= 1.0"
3 |
4 | required_providers {
5 | aws = {
6 | source = "hashicorp/aws"
7 | version = ">= 5.83"
8 | }
9 | }
10 | }
11 |
--------------------------------------------------------------------------------
/modules/virtual-cluster/README.md:
--------------------------------------------------------------------------------
1 | # AWS EMR Virtual Cluster Terraform module
2 |
3 | Terraform module which creates AWS EMR Virtual Cluster resources.
4 |
5 | Note: you will need to add the `AWSServiceRoleForAmazonEMRContainers` role to the clusters `aws-auth` configmap under the username of `emr-containers`. See below for reference:
6 |
7 | ```hcl
8 | data "aws_caller_identity" "current" {}
9 |
10 | ...
11 | aws_auth_roles = [
12 | {
13 | # Required for EMR on EKS virtual cluster
14 | rolearn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/AWSServiceRoleForAmazonEMRContainers"
15 | username = "emr-containers"
16 | },
17 | ]
18 | ...
19 | ```
20 |
21 | ## Usage
22 |
23 | See [`examples`](https://github.com/terraform-aws-modules/terraform-aws-emr/tree/master/examples) directory for working examples to reference:
24 |
25 | ### Custom
26 |
27 | ```hcl
28 | module "emr_virtual_cluster" {
29 | source = "terraform-aws-modules/emr/aws//modules/virtual-cluster"
30 |
31 | name = "emr-custom"
32 | create_namespace = true
33 | namespace = "emr-custom"
34 |
35 | create_iam_role = true
36 | s3_bucket_arns = [
37 | "arn:aws:s3:::/my-elasticmapreduce-bucket",
38 | "arn:aws:s3:::/my-elasticmapreduce-bucket/*",
39 | ]
40 | role_name = "emr-custom-role"
41 | iam_role_use_name_prefix = false
42 | iam_role_path = "/"
43 | iam_role_description = "EMR custom Role"
44 | iam_role_permissions_boundary = null
45 | iam_role_additional_policies = []
46 |
47 | tags = {
48 | Terraform = "true"
49 | Environment = "dev"
50 | }
51 | }
52 | ```
53 |
54 | ### Default
55 |
56 | ```hcl
57 | module "emr_virtual_cluster" {
58 | source = "terraform-aws-modules/emr/aws//modules/virtual-cluster"
59 |
60 | namespace = "emr-default"
61 |
62 | tags = {
63 | Terraform = "true"
64 | Environment = "dev"
65 | }
66 | }
67 | ```
68 |
69 | ## Examples
70 |
71 | Examples codified under the [`examples`](https://github.com/terraform-aws-modules/terraform-aws-emr/tree/master/examples) are intended to give users references for how to use the module(s) as well as testing/validating changes to the source code of the module. If contributing to the project, please be sure to make any appropriate updates to the relevant examples to allow maintainers to test your changes and to keep the examples up to date for users. Thank you!
72 |
73 | - [Private clusters](https://github.com/terraform-aws-modules/terraform-aws-emr/tree/master/examples/private-cluster) using instance fleet or instance group
74 | - [Public clusters](https://github.com/terraform-aws-modules/terraform-aws-emr/tree/master/examples/private-cluster) using instance fleet or instance group
75 | - [Serverless clusters](https://github.com/terraform-aws-modules/terraform-aws-emr/tree/master/examples/virtual-cluster) running Spark or Hive
76 | - [Studios](https://github.com/terraform-aws-modules/terraform-aws-emr/tree/master/examples/studio) with either IAM or SSO authentication
77 | - [Virtual cluster](https://github.com/terraform-aws-modules/terraform-aws-emr/tree/master/examples/virtual-cluster) running on Amazon EKS
78 |
79 |
80 | ## Requirements
81 |
82 | | Name | Version |
83 | |------|---------|
84 | | [terraform](#requirement\_terraform) | >= 1.0 |
85 | | [aws](#requirement\_aws) | >= 5.83 |
86 | | [kubernetes](#requirement\_kubernetes) | >= 2.10 |
87 |
88 | ## Providers
89 |
90 | | Name | Version |
91 | |------|---------|
92 | | [aws](#provider\_aws) | >= 5.83 |
93 | | [kubernetes](#provider\_kubernetes) | >= 2.10 |
94 |
95 | ## Modules
96 |
97 | No modules.
98 |
99 | ## Resources
100 |
101 | | Name | Type |
102 | |------|------|
103 | | [aws_cloudwatch_log_group.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group) | resource |
104 | | [aws_emrcontainers_virtual_cluster.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/emrcontainers_virtual_cluster) | resource |
105 | | [aws_iam_policy.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
106 | | [aws_iam_role.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource |
107 | | [aws_iam_role_policy_attachment.additional](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
108 | | [aws_iam_role_policy_attachment.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
109 | | [kubernetes_namespace_v1.this](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace_v1) | resource |
110 | | [kubernetes_role_binding_v1.this](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/role_binding_v1) | resource |
111 | | [kubernetes_role_v1.this](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/role_v1) | resource |
112 | | [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source |
113 | | [aws_iam_policy_document.assume](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
114 | | [aws_iam_policy_document.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
115 |
116 | ## Inputs
117 |
118 | | Name | Description | Type | Default | Required |
119 | |------|-------------|------|---------|:--------:|
120 | | [annotations](#input\_annotations) | A map of annotations to add to all Kubernetes resources | `map(string)` | `{}` | no |
121 | | [cloudwatch\_log\_group\_arn](#input\_cloudwatch\_log\_group\_arn) | ARN of the log group to use for the cluster logs | `string` | `"arn:aws:logs:*:*:*"` | no |
122 | | [cloudwatch\_log\_group\_kms\_key\_id](#input\_cloudwatch\_log\_group\_kms\_key\_id) | If a KMS Key ARN is set, this key will be used to encrypt the corresponding log group. Please be sure that the KMS Key has an appropriate key policy (https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/encrypt-log-data-kms.html) | `string` | `null` | no |
123 | | [cloudwatch\_log\_group\_name](#input\_cloudwatch\_log\_group\_name) | The name of the log group. If a name is not provided, the default name format used is: `/emr-on-eks-logs/emr-workload/` | `string` | `null` | no |
124 | | [cloudwatch\_log\_group\_retention\_in\_days](#input\_cloudwatch\_log\_group\_retention\_in\_days) | Number of days to retain log events. Default retention - 7 days | `number` | `7` | no |
125 | | [cloudwatch\_log\_group\_skip\_destroy](#input\_cloudwatch\_log\_group\_skip\_destroy) | Set to 'true' if you do not wish the log group (and any logs it may contain) to be deleted at destroy time, and instead just remove the log group from the Terraform state | `bool` | `null` | no |
126 | | [cloudwatch\_log\_group\_use\_name\_prefix](#input\_cloudwatch\_log\_group\_use\_name\_prefix) | Determines whether the log group name (`cloudwatch_log_group_name`) is used as a prefix | `bool` | `false` | no |
127 | | [create](#input\_create) | Controls if resources should be created (affects nearly all resources) | `bool` | `true` | no |
128 | | [create\_cloudwatch\_log\_group](#input\_create\_cloudwatch\_log\_group) | Determines whether a log group is created by this module for the cluster logs. If not, AWS will automatically create one if logging is enabled | `bool` | `true` | no |
129 | | [create\_iam\_role](#input\_create\_iam\_role) | Determines whether an IAM role is created for EMR on EKS job execution role | `bool` | `true` | no |
130 | | [create\_kubernetes\_role](#input\_create\_kubernetes\_role) | Determines whether a Kubernetes role is created for EMR on EKS | `bool` | `true` | no |
131 | | [create\_namespace](#input\_create\_namespace) | Determines whether a Kubernetes namespace is created for EMR on EKS | `bool` | `true` | no |
132 | | [eks\_cluster\_id](#input\_eks\_cluster\_id) | EKS cluster ID | `string` | `""` | no |
133 | | [iam\_role\_additional\_policies](#input\_iam\_role\_additional\_policies) | Additional policies to be added to the job execution IAM role | `any` | `{}` | no |
134 | | [iam\_role\_description](#input\_iam\_role\_description) | Description of the job execution role | `string` | `null` | no |
135 | | [iam\_role\_path](#input\_iam\_role\_path) | Job execution IAM role path | `string` | `null` | no |
136 | | [iam\_role\_permissions\_boundary](#input\_iam\_role\_permissions\_boundary) | ARN of the policy that is used to set the permissions boundary for the job execution IAM role | `string` | `null` | no |
137 | | [iam\_role\_use\_name\_prefix](#input\_iam\_role\_use\_name\_prefix) | Determines whether the IAM job execution role name (`role_name`) is used as a prefix | `bool` | `true` | no |
138 | | [labels](#input\_labels) | A map of labels to add to all Kubernetes resources | `map(string)` | `{}` | no |
139 | | [name](#input\_name) | Name of the EMR on EKS virtual cluster | `string` | `""` | no |
140 | | [namespace](#input\_namespace) | Kubernetes namespace for EMR on EKS | `string` | `"emr-containers"` | no |
141 | | [oidc\_provider\_arn](#input\_oidc\_provider\_arn) | OIDC provider ARN for the EKS cluster | `string` | `""` | no |
142 | | [role\_name](#input\_role\_name) | Name to use on IAM role created for EMR on EKS job execution role as well as Kubernetes RBAC role | `string` | `null` | no |
143 | | [s3\_bucket\_arns](#input\_s3\_bucket\_arns) | S3 bucket ARNs for EMR on EKS job execution role to list, get objects, and put objects | `list(string)` | `[]` | no |
144 | | [tags](#input\_tags) | A map of tags to add to all resources | `map(string)` | `{}` | no |
145 |
146 | ## Outputs
147 |
148 | | Name | Description |
149 | |------|-------------|
150 | | [cloudwatch\_log\_group\_arn](#output\_cloudwatch\_log\_group\_arn) | Arn of cloudwatch log group created |
151 | | [cloudwatch\_log\_group\_name](#output\_cloudwatch\_log\_group\_name) | Name of cloudwatch log group created |
152 | | [job\_execution\_role\_arn](#output\_job\_execution\_role\_arn) | IAM role ARN of the job execution role |
153 | | [job\_execution\_role\_name](#output\_job\_execution\_role\_name) | IAM role name of the job execution role |
154 | | [job\_execution\_role\_unique\_id](#output\_job\_execution\_role\_unique\_id) | Stable and unique string identifying the job execution IAM role |
155 | | [virtual\_cluster\_arn](#output\_virtual\_cluster\_arn) | ARN of the EMR virtual cluster |
156 | | [virtual\_cluster\_id](#output\_virtual\_cluster\_id) | ID of the EMR virtual cluster |
157 |
158 |
159 | ## License
160 |
161 | Apache-2.0 Licensed. See [LICENSE](https://github.com/terraform-aws-modules/terraform-aws-emr/blob/master/LICENSE).
162 |
--------------------------------------------------------------------------------
/modules/virtual-cluster/main.tf:
--------------------------------------------------------------------------------
1 | data "aws_caller_identity" "current" {}
2 |
3 | locals {
4 | account_id = data.aws_caller_identity.current.account_id
5 |
6 | internal_role_name = try(coalesce(var.role_name, var.name), "")
7 |
8 | role_name = var.create_kubernetes_role ? kubernetes_role_v1.this[0].metadata[0].name : local.internal_role_name
9 | namespace = var.create_namespace ? kubernetes_namespace_v1.this[0].metadata[0].name : var.namespace
10 | cloudwatch_log_group_name = coalesce(var.cloudwatch_log_group_name, "/emr-on-eks-logs/emr-workload/${local.namespace}")
11 |
12 | tags = merge(var.tags, { terraform-aws-modules = "emr" })
13 | }
14 |
15 | ################################################################################
16 | # EMR Virtual Cluster
17 | ################################################################################
18 |
19 | resource "aws_emrcontainers_virtual_cluster" "this" {
20 | count = var.create ? 1 : 0
21 |
22 | name = var.name
23 |
24 | container_provider {
25 | id = var.eks_cluster_id
26 | type = "EKS"
27 |
28 | info {
29 | eks_info {
30 | namespace = local.namespace
31 | }
32 | }
33 | }
34 |
35 | tags = local.tags
36 | }
37 |
38 | ################################################################################
39 | # Kubernetes Namespace + Role/Role Binding
40 | # https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/setting-up-cluster-access.html#setting-up-cluster-access-manual
41 | ################################################################################
42 |
43 | resource "kubernetes_namespace_v1" "this" {
44 | count = var.create && var.create_namespace ? 1 : 0
45 |
46 | metadata {
47 | name = var.namespace
48 | labels = var.labels
49 | annotations = var.annotations
50 | }
51 | }
52 |
53 | resource "kubernetes_role_v1" "this" {
54 | count = var.create && var.create_kubernetes_role ? 1 : 0
55 |
56 | metadata {
57 | name = local.internal_role_name
58 | namespace = local.namespace
59 | labels = var.labels
60 | annotations = var.annotations
61 | }
62 |
63 | rule {
64 | api_groups = [""]
65 | resources = ["namespaces"]
66 | verbs = ["get"]
67 | }
68 |
69 | rule {
70 | api_groups = [""]
71 | resources = ["serviceaccounts", "services", "configmaps", "events", "pods", "pods/log"]
72 | verbs = ["get", "list", "watch", "describe", "create", "edit", "delete", "deletecollection", "annotate", "patch", "label"]
73 | }
74 |
75 | rule {
76 | api_groups = [""]
77 | resources = ["secrets"]
78 | verbs = ["create", "patch", "delete", "watch"]
79 | }
80 |
81 | rule {
82 | api_groups = ["apps"]
83 | resources = ["statefulsets", "deployments"]
84 | verbs = ["get", "list", "watch", "describe", "create", "edit", "delete", "annotate", "patch", "label"]
85 | }
86 |
87 | rule {
88 | api_groups = ["batch"]
89 | resources = ["jobs"]
90 | verbs = ["get", "list", "watch", "describe", "create", "edit", "delete", "annotate", "patch", "label"]
91 | }
92 |
93 | rule {
94 | api_groups = ["extensions", "networking.k8s.io"]
95 | resources = ["ingresses"]
96 | verbs = ["get", "list", "watch", "describe", "create", "edit", "delete", "annotate", "patch", "label"]
97 | }
98 |
99 | rule {
100 | api_groups = ["rbac.authorization.k8s.io"]
101 | resources = ["roles", "rolebindings"]
102 | verbs = ["get", "list", "watch", "describe", "create", "edit", "delete", "deletecollection", "annotate", "patch", "label"]
103 | }
104 |
105 | rule {
106 | api_groups = [""]
107 | resources = ["persistentvolumeclaims"]
108 | verbs = ["get", "list", "watch", "describe", "create", "edit", "delete", "annotate", "patch", "label", "deletecollection"]
109 | }
110 | }
111 |
112 | resource "kubernetes_role_binding_v1" "this" {
113 | count = var.create && var.create_kubernetes_role ? 1 : 0
114 |
115 | metadata {
116 | name = local.role_name
117 | namespace = local.namespace
118 | labels = var.labels
119 | annotations = var.annotations
120 | }
121 |
122 | subject {
123 | kind = "User"
124 | name = "emr-containers" # this must stay static and is not configurable
125 | api_group = "rbac.authorization.k8s.io"
126 | }
127 |
128 | role_ref {
129 | kind = "Role"
130 | name = local.role_name
131 | api_group = "rbac.authorization.k8s.io"
132 | }
133 | }
134 |
135 | ################################################################################
136 | # Job Execution Role
137 | # https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/creating-job-execution-role.html
138 | # https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/iam-execution-role.html
139 | ################################################################################
140 |
141 | locals {
142 | create_iam_role = var.create && var.create_iam_role
143 | }
144 |
145 | data "aws_iam_policy_document" "assume" {
146 | count = local.create_iam_role ? 1 : 0
147 |
148 | statement {
149 | sid = "IRSA"
150 | effect = "Allow"
151 | actions = ["sts:AssumeRoleWithWebIdentity"]
152 |
153 | principals {
154 | type = "Federated"
155 | identifiers = [var.oidc_provider_arn]
156 | }
157 |
158 | condition {
159 | test = "StringLike"
160 | variable = "${replace(var.oidc_provider_arn, "/^(.*provider/)/", "")}:sub"
161 | # Terraform lacks support for a base32 function and role names with prefixes are unknown so a wildcard is used
162 | values = ["system:serviceaccount:${local.namespace}:emr-containers-sa-*-*-${local.account_id}-*"]
163 | }
164 |
165 | # https://aws.amazon.com/premiumsupport/knowledge-center/eks-troubleshoot-oidc-and-irsa/?nc1=h_ls
166 | condition {
167 | test = "StringEquals"
168 | variable = "${replace(var.oidc_provider_arn, "/^(.*provider/)/", "")}:aud"
169 | values = ["sts.amazonaws.com"]
170 | }
171 | }
172 | }
173 |
174 | data "aws_iam_policy_document" "this" {
175 | count = local.create_iam_role ? 1 : 0
176 |
177 | statement {
178 | sid = "S3Objects"
179 | effect = "Allow"
180 | actions = [
181 | "s3:DeleteObject",
182 | "s3:GetObject",
183 | "s3:ListBucket",
184 | "s3:PutObject",
185 | ]
186 | resources = var.s3_bucket_arns
187 | }
188 |
189 | statement {
190 | sid = "CloudWatchLogs"
191 | effect = "Allow"
192 | actions = [
193 | "logs:PutLogEvents",
194 | "logs:CreateLogStream",
195 | "logs:DescribeLogGroups",
196 | "logs:DescribeLogStreams",
197 | ]
198 | resources = var.create_cloudwatch_log_group ? ["${aws_cloudwatch_log_group.this[0].arn}:log-stream:*"] : ["${var.cloudwatch_log_group_arn}:log-stream:*"]
199 | }
200 |
201 | statement {
202 | sid = "CloudWatchLogsReadOnly"
203 | effect = "Allow"
204 | actions = [
205 | "logs:DescribeLogGroups",
206 | "logs:DescribeLogStreams",
207 | ]
208 | resources = ["*"]
209 | }
210 | }
211 |
212 | resource "aws_iam_role" "this" {
213 | count = local.create_iam_role ? 1 : 0
214 |
215 | name = var.iam_role_use_name_prefix ? null : local.internal_role_name
216 | name_prefix = var.iam_role_use_name_prefix ? "${local.internal_role_name}-" : null
217 | path = var.iam_role_path
218 | description = coalesce(var.iam_role_description, "Job execution role for EMR on EKS ${var.name} virtual cluster")
219 |
220 | assume_role_policy = data.aws_iam_policy_document.assume[0].json
221 | permissions_boundary = var.iam_role_permissions_boundary
222 | force_detach_policies = true
223 |
224 | tags = local.tags
225 | }
226 |
227 | resource "aws_iam_policy" "this" {
228 | count = local.create_iam_role ? 1 : 0
229 |
230 | name = var.iam_role_use_name_prefix ? null : local.internal_role_name
231 | name_prefix = var.iam_role_use_name_prefix ? "${local.internal_role_name}-" : null
232 | path = var.iam_role_path
233 | description = coalesce(var.iam_role_description, "Job execution role policy for EMR on EKS ${var.name} virtual cluster")
234 |
235 | policy = data.aws_iam_policy_document.this[0].json
236 |
237 | tags = local.tags
238 | }
239 |
240 | resource "aws_iam_role_policy_attachment" "this" {
241 | count = local.create_iam_role ? 1 : 0
242 |
243 | policy_arn = aws_iam_policy.this[0].arn
244 | role = aws_iam_role.this[0].name
245 | }
246 |
247 | resource "aws_iam_role_policy_attachment" "additional" {
248 | for_each = { for k, v in var.iam_role_additional_policies : k => v if local.create_iam_role }
249 |
250 | policy_arn = each.value
251 | role = aws_iam_role.this[0].name
252 | }
253 |
254 | ################################################################################
255 | # Cloudwatch Log Group
256 | ################################################################################
257 |
258 | resource "aws_cloudwatch_log_group" "this" {
259 | count = var.create && var.create_cloudwatch_log_group ? 1 : 0
260 |
261 | name = var.cloudwatch_log_group_use_name_prefix ? null : local.cloudwatch_log_group_name
262 | name_prefix = var.cloudwatch_log_group_use_name_prefix ? "${local.cloudwatch_log_group_name}-" : null
263 | retention_in_days = var.cloudwatch_log_group_retention_in_days
264 | kms_key_id = var.cloudwatch_log_group_kms_key_id
265 | skip_destroy = var.cloudwatch_log_group_skip_destroy
266 |
267 | tags = local.tags
268 | }
269 |
--------------------------------------------------------------------------------
/modules/virtual-cluster/outputs.tf:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # EMR Virtual Cluster
3 | ################################################################################
4 |
5 | output "virtual_cluster_arn" {
6 | description = "ARN of the EMR virtual cluster"
7 | value = try(aws_emrcontainers_virtual_cluster.this[0].arn, null)
8 | }
9 |
10 | output "virtual_cluster_id" {
11 | description = "ID of the EMR virtual cluster"
12 | value = try(aws_emrcontainers_virtual_cluster.this[0].id, null)
13 | }
14 |
15 | ################################################################################
16 | # Job Execution Role
17 | ################################################################################
18 |
19 | output "job_execution_role_name" {
20 | description = "IAM role name of the job execution role"
21 | value = try(aws_iam_role.this[0].name, null)
22 | }
23 |
24 | output "job_execution_role_arn" {
25 | description = "IAM role ARN of the job execution role"
26 | value = try(aws_iam_role.this[0].arn, null)
27 | }
28 |
29 | output "job_execution_role_unique_id" {
30 | description = "Stable and unique string identifying the job execution IAM role"
31 | value = try(aws_iam_role.this[0].unique_id, null)
32 | }
33 |
34 | ################################################################################
35 | # CloudWatch Log Group
36 | ################################################################################
37 |
38 | output "cloudwatch_log_group_name" {
39 | description = "Name of cloudwatch log group created"
40 | value = try(aws_cloudwatch_log_group.this[0].name, null)
41 | }
42 |
43 | output "cloudwatch_log_group_arn" {
44 | description = "Arn of cloudwatch log group created"
45 | value = try(aws_cloudwatch_log_group.this[0].arn, null)
46 | }
47 |
--------------------------------------------------------------------------------
/modules/virtual-cluster/variables.tf:
--------------------------------------------------------------------------------
1 | variable "create" {
2 | description = "Controls if resources should be created (affects nearly all resources)"
3 | type = bool
4 | default = true
5 | }
6 |
7 | variable "tags" {
8 | description = "A map of tags to add to all resources"
9 | type = map(string)
10 | default = {}
11 | }
12 |
13 | variable "labels" {
14 | description = "A map of labels to add to all Kubernetes resources"
15 | type = map(string)
16 | default = {}
17 | }
18 |
19 | variable "annotations" {
20 | description = "A map of annotations to add to all Kubernetes resources"
21 | type = map(string)
22 | default = {}
23 | }
24 |
25 | ################################################################################
26 | # EMR Virtual Cluster
27 | ################################################################################
28 |
29 | variable "name" {
30 | description = "Name of the EMR on EKS virtual cluster"
31 | type = string
32 | default = ""
33 | }
34 |
35 | variable "eks_cluster_id" {
36 | description = "EKS cluster ID"
37 | type = string
38 | default = ""
39 | }
40 |
41 | ################################################################################
42 | # Kubernetes Namespace + Role/Role Binding
43 | ################################################################################
44 |
45 | variable "create_namespace" {
46 | description = "Determines whether a Kubernetes namespace is created for EMR on EKS"
47 | type = bool
48 | default = true
49 | }
50 |
51 | variable "namespace" {
52 | description = "Kubernetes namespace for EMR on EKS"
53 | type = string
54 | default = "emr-containers"
55 | }
56 |
57 | variable "create_kubernetes_role" {
58 | description = "Determines whether a Kubernetes role is created for EMR on EKS"
59 | type = bool
60 | default = true
61 | }
62 |
63 | ################################################################################
64 | # Job Execution Role
65 | ################################################################################
66 |
67 | variable "create_iam_role" {
68 | description = "Determines whether an IAM role is created for EMR on EKS job execution role"
69 | type = bool
70 | default = true
71 | }
72 |
73 | variable "oidc_provider_arn" {
74 | description = "OIDC provider ARN for the EKS cluster"
75 | type = string
76 | default = ""
77 | }
78 |
79 | variable "s3_bucket_arns" {
80 | description = "S3 bucket ARNs for EMR on EKS job execution role to list, get objects, and put objects"
81 | type = list(string)
82 | default = []
83 | }
84 |
85 | variable "role_name" {
86 | description = "Name to use on IAM role created for EMR on EKS job execution role as well as Kubernetes RBAC role"
87 | type = string
88 | default = null
89 | }
90 |
91 | variable "iam_role_use_name_prefix" {
92 | description = "Determines whether the IAM job execution role name (`role_name`) is used as a prefix"
93 | type = bool
94 | default = true
95 | }
96 |
97 | variable "iam_role_path" {
98 | description = "Job execution IAM role path"
99 | type = string
100 | default = null
101 | }
102 |
103 | variable "iam_role_description" {
104 | description = "Description of the job execution role"
105 | type = string
106 | default = null
107 | }
108 |
109 | variable "iam_role_permissions_boundary" {
110 | description = "ARN of the policy that is used to set the permissions boundary for the job execution IAM role"
111 | type = string
112 | default = null
113 | }
114 |
115 | variable "iam_role_additional_policies" {
116 | description = "Additional policies to be added to the job execution IAM role"
117 | type = any
118 | default = {}
119 | }
120 |
121 | ################################################################################
122 | # CloudWatch Log Group
123 | ################################################################################
124 |
125 | variable "create_cloudwatch_log_group" {
126 | description = "Determines whether a log group is created by this module for the cluster logs. If not, AWS will automatically create one if logging is enabled"
127 | type = bool
128 | default = true
129 | }
130 |
131 | variable "cloudwatch_log_group_arn" {
132 | description = "ARN of the log group to use for the cluster logs"
133 | type = string
134 | default = "arn:aws:logs:*:*:*"
135 | }
136 |
137 | variable "cloudwatch_log_group_retention_in_days" {
138 | description = "Number of days to retain log events. Default retention - 7 days"
139 | type = number
140 | default = 7
141 | }
142 |
143 | variable "cloudwatch_log_group_kms_key_id" {
144 | description = "If a KMS Key ARN is set, this key will be used to encrypt the corresponding log group. Please be sure that the KMS Key has an appropriate key policy (https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/encrypt-log-data-kms.html)"
145 | type = string
146 | default = null
147 | }
148 |
149 | variable "cloudwatch_log_group_name" {
150 | description = "The name of the log group. If a name is not provided, the default name format used is: `/emr-on-eks-logs/emr-workload/`"
151 | type = string
152 | default = null
153 | }
154 |
155 | variable "cloudwatch_log_group_use_name_prefix" {
156 | description = "Determines whether the log group name (`cloudwatch_log_group_name`) is used as a prefix"
157 | type = bool
158 | default = false
159 | }
160 |
161 | variable "cloudwatch_log_group_skip_destroy" {
162 | description = "Set to 'true' if you do not wish the log group (and any logs it may contain) to be deleted at destroy time, and instead just remove the log group from the Terraform state"
163 | type = bool
164 | default = null
165 | }
166 |
--------------------------------------------------------------------------------
/modules/virtual-cluster/versions.tf:
--------------------------------------------------------------------------------
1 | terraform {
2 | required_version = ">= 1.0"
3 |
4 | required_providers {
5 | aws = {
6 | source = "hashicorp/aws"
7 | version = ">= 5.83"
8 | }
9 | kubernetes = {
10 | source = "hashicorp/kubernetes"
11 | version = ">= 2.10"
12 | }
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/outputs.tf:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Cluster
3 | ################################################################################
4 |
5 | output "cluster_arn" {
6 | description = "The ARN of the cluster"
7 | value = try(aws_emr_cluster.this[0].arn, null)
8 | }
9 |
10 | output "cluster_id" {
11 | description = "The ID of the cluster"
12 | value = try(aws_emr_cluster.this[0].id, null)
13 | }
14 |
15 | output "cluster_core_instance_group_id" {
16 | description = "Core node type Instance Group ID, if using Instance Group for this node type"
17 | value = try(aws_emr_cluster.this[0].core_instance_group[0].id, null)
18 | }
19 |
20 | output "cluster_master_instance_group_id" {
21 | description = "Master node type Instance Group ID, if using Instance Group for this node type"
22 | value = try(aws_emr_cluster.this[0].master_instance_group[0].id, null)
23 | }
24 |
25 | output "cluster_master_public_dns" {
26 | description = "The DNS name of the master node. If the cluster is on a private subnet, this is the private DNS name. On a public subnet, this is the public DNS name"
27 | value = try(aws_emr_cluster.this[0].master_public_dns, null)
28 | }
29 |
30 | ################################################################################
31 | # Security Configuration
32 | ################################################################################
33 |
34 | output "security_configuration_id" {
35 | description = "The ID of the security configuration"
36 | value = try(aws_emr_security_configuration.this[0].id, null)
37 | }
38 |
39 | output "security_configuration_name" {
40 | description = "The name of the security configuration"
41 | value = try(aws_emr_security_configuration.this[0].name, null)
42 | }
43 |
44 | ################################################################################
45 | # Service IAM Role
46 | ################################################################################
47 |
48 | output "service_iam_role_name" {
49 | description = "Service IAM role name"
50 | value = try(aws_iam_role.service[0].name, null)
51 | }
52 |
53 | output "service_iam_role_arn" {
54 | description = "Service IAM role ARN"
55 | value = try(aws_iam_role.service[0].arn, var.service_iam_role_arn)
56 | }
57 |
58 | output "service_iam_role_unique_id" {
59 | description = "Stable and unique string identifying the service IAM role"
60 | value = try(aws_iam_role.service[0].unique_id, null)
61 | }
62 |
63 | ################################################################################
64 | # Autoscaling IAM Role
65 | ################################################################################
66 |
67 | output "autoscaling_iam_role_name" {
68 | description = "Autoscaling IAM role name"
69 | value = try(aws_iam_role.autoscaling[0].name, null)
70 | }
71 |
72 | output "autoscaling_iam_role_arn" {
73 | description = "Autoscaling IAM role ARN"
74 | value = try(aws_iam_role.autoscaling[0].arn, var.autoscaling_iam_role_arn)
75 | }
76 |
77 | output "autoscaling_iam_role_unique_id" {
78 | description = "Stable and unique string identifying the autoscaling IAM role"
79 | value = try(aws_iam_role.autoscaling[0].unique_id, null)
80 | }
81 |
82 | ################################################################################
83 | # Instance Profile
84 | ################################################################################
85 |
86 | output "iam_instance_profile_iam_role_name" {
87 | description = "Instance profile IAM role name"
88 | value = try(aws_iam_role.instance_profile[0].name, null)
89 | }
90 |
91 | output "iam_instance_profile_iam_role_arn" {
92 | description = "Instance profile IAM role ARN"
93 | value = try(aws_iam_role.instance_profile[0].arn, null)
94 | }
95 |
96 | output "iam_instance_profile_iam_role_unique_id" {
97 | description = "Stable and unique string identifying the instance profile IAM role"
98 | value = try(aws_iam_role.instance_profile[0].unique_id, null)
99 | }
100 |
101 | output "iam_instance_profile_arn" {
102 | description = "ARN assigned by AWS to the instance profile"
103 | value = try(aws_iam_instance_profile.this[0].arn, null)
104 | }
105 |
106 | output "iam_instance_profile_id" {
107 | description = "Instance profile's ID"
108 | value = try(aws_iam_instance_profile.this[0].id, null)
109 | }
110 |
111 | output "iam_instance_profile_unique" {
112 | description = "Stable and unique string identifying the IAM instance profile"
113 | value = try(aws_iam_instance_profile.this[0].unique_id, null)
114 | }
115 |
116 | ################################################################################
117 | # Managed Master Security Group
118 | ################################################################################
119 |
120 | output "managed_master_security_group_arn" {
121 | description = "Amazon Resource Name (ARN) of the managed master security group"
122 | value = try(aws_security_group.master[0].arn, null)
123 | }
124 |
125 | output "managed_master_security_group_id" {
126 | description = "ID of the managed master security group"
127 | value = try(aws_security_group.master[0].id, null)
128 | }
129 |
130 | ################################################################################
131 | # Managed Slave Security Group
132 | ################################################################################
133 |
134 | output "managed_slave_security_group_arn" {
135 | description = "Amazon Resource Name (ARN) of the managed slave security group"
136 | value = try(aws_security_group.slave[0].arn, null)
137 | }
138 |
139 | output "managed_slave_security_group_id" {
140 | description = "ID of the managed slave security group"
141 | value = try(aws_security_group.slave[0].id, null)
142 | }
143 |
144 | ################################################################################
145 | # Managed Service Access Security Group
146 | ################################################################################
147 |
148 | output "managed_service_access_security_group_arn" {
149 | description = "Amazon Resource Name (ARN) of the managed service access security group"
150 | value = try(aws_security_group.service[0].arn, null)
151 | }
152 |
153 | output "managed_service_access_security_group_id" {
154 | description = "ID of the managed service access security group"
155 | value = try(aws_security_group.service[0].id, null)
156 | }
157 |
--------------------------------------------------------------------------------
/versions.tf:
--------------------------------------------------------------------------------
1 | terraform {
2 | required_version = ">= 1.0"
3 |
4 | required_providers {
5 | aws = {
6 | source = "hashicorp/aws"
7 | version = ">= 5.83"
8 | }
9 | }
10 | }
11 |
--------------------------------------------------------------------------------