├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.yaml │ ├── config.yml │ ├── feature_request.md │ └── question.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── docbuild.yml │ ├── linkcheck.json │ ├── markdown-link-check.yaml │ ├── plan-examples.py │ ├── plan-examples.yml │ ├── pre-commit.yaml │ └── stale_issue_pr.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── .tflint.hcl ├── ADOPTERS.md ├── CODEOWNERS ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── NOTICE ├── NOTICE.txt ├── README.md ├── docs ├── adothealth │ └── index.md ├── concepts.md ├── container-insights │ └── eks.md ├── contributors.md ├── ecs │ └── ecs-monitoring-on-ec2.md ├── eks │ ├── destroy.md │ ├── eks-apiserver.md │ ├── gpu-monitoring.md │ ├── index.md │ ├── istio.md │ ├── java.md │ ├── logs.md │ ├── multiaccount.md │ ├── multicluster.md │ ├── nginx.md │ ├── tracing.md │ └── troubleshooting.md ├── helpers │ ├── ecs-cluster-with-vpc.md │ ├── managed-grafana.md │ └── new-eks-cluster.md ├── iam │ └── min-iam-policy.json ├── images │ ├── aws-favicon.png │ ├── aws-logo.png │ ├── dark-o11y-accelerator-amp-xray.drawio │ ├── dark-o11y-accelerator-amp-xray.png │ ├── light-o11y-accelerator-amp-xray.drawio │ └── light-o11y-accelerator-amp-xray.png ├── index.md ├── overrides │ └── main.html ├── support.md └── workloads │ └── managed-prometheus.md ├── examples ├── ecs-cluster-with-vpc │ ├── README.md │ ├── main.tf │ ├── outputs.tf │ ├── variables.tf │ └── versions.tf ├── eks-cluster-with-vpc │ ├── README.md │ ├── main.tf │ ├── min-iam-policy.json │ ├── outputs.tf │ ├── variables.tf │ └── versions.tf ├── eks-container-insights │ ├── README.md │ ├── data.tf │ ├── locals.tf │ ├── main.tf │ ├── outputs.tf │ ├── variables.tf │ └── versions.tf ├── eks-cross-account-with-central-amp │ ├── README.md │ ├── data.tf │ ├── iam.tf │ ├── main.tf │ ├── outputs.tf │ ├── providers.tf │ ├── variables.tf │ └── versions.tf ├── eks-istio │ ├── README.md │ ├── main.tf │ ├── outputs.tf │ ├── variables.tf │ └── versions.tf ├── eks-multicluster │ ├── README.md │ ├── data.tf │ ├── main.tf │ ├── outputs.tf │ ├── providers.tf │ ├── variables.tf │ └── versions.tf ├── existing-cluster-java │ ├── README.md │ ├── main.tf │ ├── outputs.tf │ ├── variables.tf │ └── versions.tf ├── existing-cluster-nginx │ ├── README.md │ ├── main.tf │ ├── outputs.tf │ ├── sample_traffic │ │ └── nginix-traffic-sample.yaml │ ├── variables.tf │ └── versions.tf ├── existing-cluster-with-base-and-infra │ ├── README.md │ ├── cleanup.sh │ ├── install.sh │ ├── main.tf │ ├── outputs.tf │ ├── variables.tf │ └── versions.tf ├── managed-grafana-workspace │ ├── main.tf │ ├── outputs.tf │ ├── readme.md │ ├── variables.tf │ └── versions.tf └── managed-prometheus-monitoring │ ├── README.md │ ├── main.tf │ ├── outputs.tf │ ├── variables.tf │ └── versions.tf ├── mkdocs.yml ├── modules ├── ecs-monitoring │ ├── README.md │ ├── configs │ │ └── config.yaml │ ├── locals.tf │ ├── main.tf │ ├── outputs.tf │ ├── task-definitions │ │ └── otel_collector.json │ ├── variables.tf │ └── versions.tf ├── eks-container-insights │ ├── README.md │ ├── data.tf │ ├── locals.tf │ ├── main.tf │ ├── outputs.tf │ ├── variables.tf │ └── versions.tf ├── eks-monitoring │ ├── README.md │ ├── add-ons │ │ ├── adot-operator │ │ │ ├── README.md │ │ │ ├── locals.tf │ │ │ ├── main.tf │ │ │ ├── outputs.tf │ │ │ ├── variables.tf │ │ │ └── versions.tf │ │ ├── aws-for-fluentbit │ │ │ ├── README.md │ │ │ ├── data.tf │ │ │ ├── locals.tf │ │ │ ├── main.tf │ │ │ ├── outputs.tf │ │ │ ├── values.yaml │ │ │ ├── variables.tf │ │ │ └── versions.tf │ │ └── external-secrets │ │ │ ├── README.md │ │ │ ├── locals.tf │ │ │ ├── main.tf │ │ │ ├── outputs.tf │ │ │ ├── variables.tf │ │ │ └── versions.tf │ ├── alerts.tf │ ├── dashboards.tf │ ├── locals.tf │ ├── main.tf │ ├── otel-config │ │ ├── Chart.yaml │ │ ├── templates │ │ │ ├── clusterrole.yaml │ │ │ ├── clusterrolebinding.yaml │ │ │ └── opentelemetrycollector.yaml │ │ └── values.yaml │ ├── outputs.tf │ ├── patterns │ │ ├── istio │ │ │ ├── README.md │ │ │ ├── main.tf │ │ │ ├── outputs.tf │ │ │ ├── variables.tf │ │ │ └── versions.tf │ │ ├── java │ │ │ ├── README.md │ │ │ ├── main.tf │ │ │ ├── outputs.tf │ │ │ ├── variables.tf │ │ │ └── versions.tf │ │ └── nginx │ │ │ ├── README.md │ │ │ ├── main.tf │ │ │ ├── outputs.tf │ │ │ ├── variables.tf │ │ │ └── versions.tf │ ├── rules.tf │ ├── variables.tf │ └── versions.tf └── managed-prometheus-monitoring │ ├── README.md │ ├── alarms.tf │ ├── billing │ ├── main.tf │ ├── outputs.tf │ ├── variables.tf │ └── versions.tf │ ├── locals.tf │ ├── main.tf │ ├── outputs.tf │ ├── variables.tf │ └── versions.tf ├── test └── examples_basic_test.go └── tfsec.yaml /.github/ISSUE_TEMPLATE/bug_report.yaml: -------------------------------------------------------------------------------- 1 | name: 🐞 Bug Report 2 | title: "[Bug]: " 3 | description: Create a report to help us improve 4 | labels: ["bug", "triage"] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | ### How to write a good bug report? 10 | 11 | - Respect the issue template as much as possible. 12 | - The title should be short and descriptive. 13 | - Explain the conditions which led you to report this issue and the context. 14 | - The context should lead to something, an idea or a problem that you’re facing. 15 | - Remain clear and concise. 16 | - Format your messages to help the reader focus on what matters and understand the structure of your message, use [Markdown syntax](https://help.github.com/articles/github-flavored-markdown) 17 | 18 | - type: checkboxes 19 | id: terms 20 | attributes: 21 | label: Welcome to the AWS Observability Accelerator 22 | options: 23 | - label: Yes, I've searched similar issues on [GitHub](https://github.com/aws-observability/terraform-aws-observability-accelerator/issues) and didn't find any. 24 | required: true 25 | 26 | - type: input 27 | attributes: 28 | label: AWS Observability Accelerator Release version 29 | description: | 30 | `latest` is not considered as a valid version. 31 | Enter release number! 32 | placeholder: Your version here. 33 | validations: 34 | required: true 35 | 36 | - type: textarea 37 | attributes: 38 | label: What is your environment, configuration and the example used? 39 | description: | 40 | Terraform version, link to example used or your main.tf content etc. 41 | 42 | Use [Markdown syntax](https://help.github.com/articles/github-flavored-markdown) if needed. 43 | placeholder: Add information here. 44 | validations: 45 | required: true 46 | 47 | - type: textarea 48 | attributes: 49 | label: What did you do and What did you see instead? 50 | description: | 51 | Provide error details and the expected details. 52 | 53 | Use [Markdown syntax](https://help.github.com/articles/github-flavored-markdown) if needed. 54 | placeholder: Add information here. 55 | validations: 56 | required: true 57 | 58 | - type: textarea 59 | attributes: 60 | label: Additional Information 61 | description: Use [Markdown syntax](https://help.github.com/articles/github-flavored-markdown) if needed. 62 | placeholder: Add information here. 63 | render: shell 64 | validations: 65 | required: false 66 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '[FEATURE] <title>' 5 | labels: 'feature-request' 6 | assignees: '' 7 | 8 | --- 9 | 10 | #### Is your feature request related to a problem? Please describe 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | 14 | #### Describe the solution you'd like 15 | A clear and concise description of what you want to happen. 16 | 17 | 18 | #### Describe alternatives you've considered 19 | A clear and concise description of any alternative solutions or features you've considered. 20 | 21 | 22 | #### Additional context 23 | Add any other context or screenshots about the feature request here. 24 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Question 3 | about: I have a Question 4 | title: '[QUESTION] <title>' 5 | labels: 'question' 6 | assignees: '' 7 | 8 | --- 9 | 10 | #### Please describe your question here 11 | <!-- Provide as much information as possible to explain your question --> 12 | 13 | 14 | #### Provide link to the example related to the question 15 | <!-- Please provide the link to the example related to this question from this repo --> 16 | 17 | 18 | #### Additional context 19 | <!-- Add any other context or screenshots about the question here --> 20 | 21 | 22 | #### More 23 | 24 | - [ ] Yes, I have checked the repo for existing issues before raising this question 25 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 2 | ### What does this PR do? 3 | 4 | <!-- A brief description of the change being made with this pull request. --> 5 | 6 | 🛑 Please open an issue first to discuss any significant work and flesh out details/direction - we would hate for your time to be wasted. Consult the CONTRIBUTING guide for submitting pull-requests. 7 | 8 | 9 | ### Motivation 10 | 11 | <!-- What inspired you to submit this pull request? --> 12 | 13 | 14 | ### More 15 | 16 | - [ ] Yes, I have tested the PR using my local account setup (Provide any test evidence report under Additional Notes) 17 | - [ ] Yes, I ran `pre-commit run -a` with this PR 18 | - [ ] Yes, I have added a new example under [examples](https://github.com/aws-observability/terraform-aws-observability-accelerator/tree/main/examples) to support my PR (when applicable) 19 | - [ ] Yes, I have updated the [Pages](https://github.com/aws-observability/terraform-aws-observability-accelerator/tree/main/docs) for this feature 20 | 21 | **Note**: Not all the PRs required examples and docs. 22 | 23 | ### For Moderators 24 | - [ ] E2E Test successfully complete before merge? 25 | 26 | ### Additional Notes 27 | 28 | <!-- Anything else we should know when reviewing? --> 29 | -------------------------------------------------------------------------------- /.github/workflows/docbuild.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | on: 3 | push: 4 | branches: 5 | - master 6 | - main 7 | permissions: 8 | contents: write 9 | jobs: 10 | deploy: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v3 14 | - uses: actions/setup-python@v4 15 | with: 16 | python-version: 3.x 17 | - run: pip install mkdocs-material 18 | - run: mkdocs gh-deploy --force 19 | -------------------------------------------------------------------------------- /.github/workflows/linkcheck.json: -------------------------------------------------------------------------------- 1 | { 2 | "timeout": "5s", 3 | "retryOn429": true, 4 | "retryCount": 5, 5 | "fallbackRetryDelay": "30s", 6 | "aliveStatusCodes": [200, 206], 7 | "httpHeaders": [ 8 | { 9 | "urls": ["https://help.github.com/"], 10 | "headers": { 11 | "Accept-Encoding": "zstd, br, gzip, deflate" 12 | } 13 | } 14 | ], 15 | "ignorePatterns": [ 16 | { 17 | "pattern": [ 18 | "localhost" 19 | ] 20 | }, 21 | { 22 | "pattern": [ 23 | "127.0.0.1" 24 | ] 25 | } 26 | ] 27 | } 28 | -------------------------------------------------------------------------------- /.github/workflows/markdown-link-check.yaml: -------------------------------------------------------------------------------- 1 | name: Check Markdown links 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - "**/*.md" 9 | 10 | pull_request: 11 | branches: 12 | - main 13 | paths: 14 | - "**/*.md" 15 | 16 | jobs: 17 | markdown-link-check: 18 | runs-on: ubuntu-latest 19 | steps: 20 | - uses: actions/checkout@v3 21 | - uses: actions/setup-node@v3 22 | with: 23 | node-version: '20.x' 24 | - name: install markdown-link-check 25 | run: npm install -g markdown-link-check@3.12.2 26 | - name: markdown-link-check version 27 | run: npm list -g markdown-link-check 28 | - name: Run markdown-link-check on MD files 29 | run: find docs -name "*.md" | xargs -n 1 markdown-link-check -q -c .github/workflows/linkcheck.json 30 | -------------------------------------------------------------------------------- /.github/workflows/plan-examples.py: -------------------------------------------------------------------------------- 1 | import json 2 | import glob 3 | import re 4 | 5 | 6 | def get_examples(): 7 | """ 8 | Get all Terraform example root directories using their respective `versions.tf`; 9 | returning a string formatted json array of the example directories minus those that are excluded 10 | """ 11 | exclude = { 12 | 'examples/eks-cross-account-with-central-amp', # Add examples here to exclude from terraform plan 13 | } 14 | 15 | projects = { 16 | x.replace('/versions.tf', '') 17 | for x in glob.glob('examples/**/versions.tf', recursive=True) 18 | if not re.match(r'^.+/_', x) 19 | } 20 | 21 | print(json.dumps(list(projects.difference(exclude)))) 22 | 23 | 24 | if __name__ == '__main__': 25 | get_examples() 26 | -------------------------------------------------------------------------------- /.github/workflows/stale_issue_pr.yaml: -------------------------------------------------------------------------------- 1 | name: 'Stale issue & PR handler' 2 | on: 3 | workflow_dispatch: 4 | schedule: 5 | - cron: '0 0 * * *' 6 | 7 | jobs: 8 | stale: 9 | runs-on: ubuntu-latest 10 | permissions: 11 | issues: write 12 | pull-requests: write 13 | steps: 14 | - uses: actions/stale@main 15 | id: stale 16 | with: 17 | ascending: true 18 | close-issue-message: 'Issue closed due to inactivity.' 19 | close-pr-message: 'Pull request closed due to inactivity.' 20 | days-before-close: 10 21 | days-before-stale: 60 22 | stale-issue-label: stale 23 | stale-pr-label: stale 24 | # Not stale if have this labels 25 | exempt-issue-labels: 'bug,enhancement,"feature request"' 26 | exempt-pr-labels: 'bug,enhancement' 27 | operations-per-run: 100 28 | stale-issue-message: | 29 | This issue has been automatically marked as stale because it has been open 60 days 30 | with no activity. Remove stale label or comment or this issue will be closed in 10 days 31 | stale-pr-message: | 32 | This PR has been automatically marked as stale because it has been open 60 days 33 | with no activity. Remove stale label or comment or this PR will be closed in 10 days 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .idea 3 | .build 4 | 5 | # Local .terraform directories 6 | **/.terraform/* 7 | 8 | # Terraform lockfile 9 | .terraform.lock.hcl 10 | 11 | # .tfstate files 12 | *.tfstate 13 | *.tfstate.* 14 | *.tfplan 15 | 16 | # Crash log files 17 | crash.log 18 | 19 | # Exclude all .tfvars files, which are likely to contain sentitive data, such as 20 | # password, private keys, and other secrets. These should not be part of version 21 | # control as they are data points which are potentially sensitive and subject 22 | # to change depending on the environment. 23 | *.tfvars 24 | 25 | # Ignore override files as they are usually used to override resources locally and so 26 | # are not checked in 27 | override.tf 28 | override.tf.json 29 | *_override.tf 30 | *_override.tf.json 31 | 32 | # Ignore CLI configuration files 33 | .terraformrc 34 | terraform.rc 35 | 36 | # Locals 37 | kubeconfig* 38 | kube-config* 39 | local_tf_state/ 40 | .vscode 41 | .gitallowed 42 | site 43 | .env* 44 | 45 | # Checks 46 | .tfsec 47 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: f71fa2c1f9cf5cb705f73dffe4b21f7c61470ba9 # frozen: v4.4.0 4 | hooks: 5 | - id: trailing-whitespace 6 | args: ['--markdown-linebreak-ext=md'] 7 | - id: end-of-file-fixer 8 | - id: check-merge-conflict 9 | - id: detect-private-key 10 | - id: detect-aws-credentials 11 | args: ['--allow-missing-credentials'] 12 | - repo: https://github.com/antonbabenko/pre-commit-terraform 13 | rev: 1d54ea2b9950097568c6a7a2e2bcb6d4b4ebfb61 # frozen: v1.77.0 14 | hooks: 15 | - id: terraform_fmt 16 | - id: terraform_docs 17 | args: 18 | - '--args=--lockfile=false' 19 | - id: terraform_validate 20 | exclude: deploy 21 | - id: terraform_tflint 22 | args: 23 | - '--args=--disable-rule=terraform_unused_declarations' 24 | - '--args=--config=__GIT_WORKING_DIR__/.tflint.hcl' 25 | - id: terraform_tfsec 26 | files: ^examples/ # only scan `examples/*` which are the implementation 27 | args: 28 | - --args=--config-file=__GIT_WORKING_DIR__/tfsec.yaml 29 | - --args=--concise-output 30 | -------------------------------------------------------------------------------- /.tflint.hcl: -------------------------------------------------------------------------------- 1 | # https://github.com/terraform-linters/tflint/blob/master/docs/user-guide/module-inspection.md 2 | # borrowed & modified indefinitely from https://github.com/ksatirli/building-infrastructure-you-can-mostly-trust/blob/main/.tflint.hcl 3 | 4 | plugin "aws" { 5 | enabled = true 6 | version = "0.21.1" 7 | source = "github.com/terraform-linters/tflint-ruleset-aws" 8 | } 9 | 10 | config { 11 | module = true 12 | force = false 13 | } 14 | 15 | rule "terraform_required_providers" { 16 | enabled = true 17 | } 18 | 19 | rule "terraform_required_version" { 20 | enabled = true 21 | } 22 | 23 | rule "terraform_naming_convention" { 24 | enabled = true 25 | format = "snake_case" 26 | } 27 | 28 | rule "terraform_typed_variables" { 29 | enabled = true 30 | } 31 | 32 | rule "terraform_unused_declarations" { 33 | enabled = true 34 | } 35 | 36 | rule "terraform_comment_syntax" { 37 | enabled = true 38 | } 39 | 40 | rule "terraform_deprecated_index" { 41 | enabled = true 42 | } 43 | 44 | rule "terraform_deprecated_interpolation" { 45 | enabled = true 46 | } 47 | 48 | rule "terraform_documented_outputs" { 49 | enabled = true 50 | } 51 | 52 | rule "terraform_documented_variables" { 53 | enabled = true 54 | } 55 | 56 | rule "terraform_module_pinned_source" { 57 | enabled = true 58 | } 59 | 60 | rule "terraform_standard_module_structure" { 61 | enabled = true 62 | } 63 | 64 | rule "terraform_workspace_remote" { 65 | enabled = true 66 | } 67 | -------------------------------------------------------------------------------- /ADOPTERS.md: -------------------------------------------------------------------------------- 1 | # Who is using AWS Observability Accelerator for Terraform? 2 | 3 | AWS Observability Accelerator for Terraform has a variety of users and use cases to configure and manage Observability on EKS/ECS clusters. 4 | Many customers want to learn from others who have already implemented AWS Observability Accelerator in their environments. 5 | 6 | The following is a self-reported list of users to help identify adoption and points of contact. 7 | 8 | ## Add yourself 9 | 10 | If you are using AWS Observability Accelerator please consider adding yourself as a user by opening a pull request to this file. 11 | 12 | ## Adopters (Alphabetical) 13 | 14 | | Organization | Description | Contacts | Link | 15 | | --- | --- | --- | --- | 16 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Require approvals from someone in the owner team before merging 2 | # More information here: https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners 3 | 4 | * @aws-observability/aws-observability-accelerator 5 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2016-2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at 4 | 5 | http://aws.amazon.com/apache2.0/ 6 | 7 | or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 8 | -------------------------------------------------------------------------------- /docs/adothealth/index.md: -------------------------------------------------------------------------------- 1 | # Monitoring ADOT collector health 2 | 3 | The OpenTelemetry collector produces metrics to monitor the entire pipeline. In the [EKS monitoring module](https://aws-observability.github.io/terraform-aws-observability-accelerator/eks/), we have enabled those metrics by default with the AWS Distro for OpenTelemetry (ADOT) collector. You get a Grafana dashboard named `OpenTelemetry Health Collector`. This dashboard shows useful telemetry information about the ADOT collector itself which can be helpful when you want to troubleshoot any issues with the collector or understand how much resources the collector is consuming. 4 | 5 | !!!note 6 | The dashboard and metrics used are not specific to Amazon EKS, but applicable to any environment running an OpenTelemetry collector. 7 | 8 | Below diagram shows an example data flow and the components in an ADOT collector: 9 | 10 | ![ADOTCollectorComponents](https://github.com/aws-observability/terraform-aws-observability-accelerator/assets/10175027/71a4a53d-f9fd-45b0-81cb-e060d2b3915b) 11 | 12 | 13 | In this dashboard, there are five sections. Each section has [metrics](https://aws-observability.github.io/observability-best-practices/guides/operational/adot-at-scale/operating-adot-collector/#collecting-health-metrics-from-the-collector) relevant to the various [components](https://opentelemetry.io/docs/demo/collector-data-flow-dashboard/#data-flow-overview) of the AWS Distro for OpenTelemetry (ADOT) collector : 14 | 15 | ### Receivers 16 | Shows the receiver’s accepted and refused rate/count of spans and metric points that are pushed into the telemetry pipeline. 17 | 18 | ### Processors 19 | Shows the accepted and refused rate/count of spans and metric points pushed into next component in the pipeline. The batch metrics can help to understand how often metrics are sent to exporter and the batch size. 20 | 21 | ![receivers_processors](https://github.com/aws-observability/terraform-aws-observability-accelerator/assets/10175027/34bfb881-1004-480f-8e0e-4ded10463d31) 22 | 23 | 24 | ### Exporters 25 | Shows the exporter’s accepted and refused rate/count of spans and metric points that are pushed to any of the destinations. It also shows the size and capacity of the retry queue. These metrics can be used to understand if the collector is having issues in sending trace or metric data to the destination configured. 26 | 27 | ![exporters](https://github.com/aws-observability/terraform-aws-observability-accelerator/assets/10175027/0bceaa32-a52c-4e23-9b6f-8b208e337f4f) 28 | 29 | 30 | ### Collectors 31 | Shows the collector’s operational metrics (Memory, CPU, uptime). This can be used to understand how much resources the collector is consuming. 32 | 33 | ![collectors](https://github.com/aws-observability/terraform-aws-observability-accelerator/assets/10175027/fc68d2f4-d6a1-4d34-ac05-78e57310c28e) 34 | 35 | ### Data Flow 36 | Shows the metrics and spans data flow through the collector’s components. 37 | 38 | ![dataflow](https://github.com/aws-observability/terraform-aws-observability-accelerator/assets/10175027/ffcdafca-5718-4d19-889d-b9503a295679) 39 | 40 | !!!note 41 | To read more about the metrics and the dashboard used, visit the upstream documentation [here](https://opentelemetry.io/docs/demo/collector-data-flow-dashboard/). 42 | 43 | ## Deploy instructions 44 | 45 | As this is enabled by default in the EKS monitoring module, visit [this example’s instructions](https://aws-observability.github.io/terraform-aws-observability-accelerator/eks/#prerequisites) which will provide the ADOT collector health dashboard after deployment 46 | 47 | ## Disable ADOT health monitoring 48 | 49 | You can disable ADOT collector health metrics by setting the [variable](https://github.com/aws-observability/terraform-aws-observability-accelerator/blob/main/modules/eks-monitoring/variables.tf) enable_adotcollector_metrics to false. 50 | 51 | ``` 52 | variable "enable_adotcollector_metrics" { 53 | description = "Enables collection of ADOT collector metrics" 54 | type = bool 55 | default = true 56 | } 57 | ``` 58 | -------------------------------------------------------------------------------- /docs/contributors.md: -------------------------------------------------------------------------------- 1 | # Contributors 2 | 3 | The content on this site is maintained by the Solutions Architects from the 4 | AWS observability team with support from the AWS service teams and other 5 | volunteers from across the organization. 6 | 7 | Our goal is to make it easier to use AWS Open Source Observability Services. 8 | 9 | The core team include the following people: 10 | 11 | * Abhi Khanna 12 | * Elamaran Shanmugam 13 | * Imaya Kumar Jagannathan 14 | * Jerome DECQ 15 | * Kevin Lewin 16 | * Michael Hausenblas 17 | * Rodrigue Koffi 18 | * Toshal Dudhwala 19 | * Vikram Venkataraman 20 | 21 | We welcome the wider open source community and thank [those who contribute](https://github.com/aws-observability/terraform-aws-observability-accelerator/graphs/contributors) 22 | to this project. 23 | 24 | Note that all information published on this site is available via the 25 | Apache 2.0 license. 26 | -------------------------------------------------------------------------------- /docs/ecs/ecs-monitoring-on-ec2.md: -------------------------------------------------------------------------------- 1 | # Amazon ECS on EC2 cluster monitoring 2 | 3 | This example demonstrates how to monitor your Amazon Elastic Container Service on EC2 4 | (Amazon ECS) cluster with the Observability Accelerator's ECS monitoring module 5 | 6 | The module collects Prometheus metrics from tasks running on ECS and sends it to Prometheus using AWS Distro for OpenTelemetry Collector (ADOT). 7 | 8 | You can either run the collector as a sidecar or deploy the collector as its own ECS service for entire cluster. 9 | ECS tasks with Prometheus endpoints are discovered using extension 10 | [ecsobserver](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/extension/observer/ecsobserver/README.md). 11 | (Unlike EKS, there is no builtin discovery for ECS inside prometheus) 12 | 13 | Additionally, you can optionally collect custom Prometheus metrics from your applications running 14 | on your ECS cluster. 15 | 16 | ## Prerequisites 17 | 18 | !!! note 19 | Make sure to complete the [prerequisites section](https://aws-observability.github.io/terraform-aws-observability-accelerator/concepts/#prerequisites) before proceeding. 20 | 21 | ## Available Samples for various Worklods 22 | Make sure to update your exisitng Application Task Definitions based on the workload type :- 23 | 24 | #### 1. [Java/JMX workload for ECS Clusters](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/ContainerInsights-Prometheus-Sample-Workloads-ECS-javajmx.html) 25 | #### 2. [NGINX workload for Amazon ECS clusters](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/ContainerInsights-Prometheus-Setup-nginx-ecs.html) 26 | #### 3. [App Mesh workload](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/ContainerInsights-Prometheus-Sample-Workloads-ECS-appmesh.html) 27 | 28 | ## Setup 29 | 30 | #### 1. Add the ECS Monitoring Module to your exisitng ECS Cluster 31 | 32 | ``` 33 | module "ecs_monitoring" { 34 | source = "../../modules/ecs-monitoring" 35 | aws_ecs_cluster_name = module.ecs_cluster.cluster_name 36 | task_role_arn = module.ecs_cluster.task_exec_iam_role_arn 37 | execution_role_arn = module.ecs_cluster.task_exec_iam_role_arn 38 | 39 | depends_on = [ 40 | module.ecs_cluster 41 | ] 42 | } 43 | ``` 44 | 45 | ## Deploy 46 | 47 | Simply run this command to deploy the example 48 | 49 | ```bash 50 | terraform apply 51 | ``` 52 | 53 | ## Visualization 54 | ![image](https://github.com/ruchimo/terraform-aws-observability-accelerator/assets/106240341/006c387e-92e8-45c8-ae2e-825900990741) 55 | 56 | 57 | ## Cleanup 58 | 59 | To clean up your environment, destroy the Terraform example by running 60 | 61 | ```sh 62 | terraform destroy 63 | ``` 64 | -------------------------------------------------------------------------------- /docs/eks/destroy.md: -------------------------------------------------------------------------------- 1 | # Destroy resources 2 | 3 | If you leave this stack running, you will continue to incur charges. To remove all resources 4 | created by Terraform, [refresh your Grafana API key](https://aws-observability.github.io/terraform-aws-observability-accelerator/eks/#6-grafana-api-key) and run the command below. 5 | 6 | !!! warning 7 | Be careful, this command will removing everything created by Terraform. If you wish 8 | to keep your Amazon Managed Grafana or Amazon Managed Service for Prometheus workspaces. Remove them 9 | from your terraform state before running the destroy command. 10 | 11 | ```bash 12 | terraform destroy 13 | ``` 14 | 15 | To remove resources from your Terraform state, run 16 | 17 | ```bash 18 | # prometheus workspace 19 | terraform state rm "module.eks_observability_accelerator.aws_prometheus_workspace.this[0]" 20 | ``` 21 | 22 | !!! note 23 | To view all the features proposed by this module, 24 | visit the [module documentation](https://github.com/aws-observability/terraform-aws-observability-accelerator/tree/main/modules/eks-monitoring). 25 | -------------------------------------------------------------------------------- /docs/eks/eks-apiserver.md: -------------------------------------------------------------------------------- 1 | # Monitoring Amazon EKS API server 2 | 3 | AWS Distro for OpenTelemetry (ADOT) enables Amazon EKS API server monitoring by default and provides three Grafana dashboards: 4 | 5 | ## Kube-apiserver (basic) 6 | 7 | The basic dashboard shows metrics recommended in [EKS Best Practices Guides - Monitor Control Plane Metrics](https://aws.github.io/aws-eks-best-practices/reliability/docs/controlplane/#monitor-control-plane-metrics) and provides request rate and latency for API server, latency for ETCD server and overall workqueue service time and latency. It allows a drill-down per API server. 8 | 9 | ![API server basic dashboard](https://github.com/aws-observability/terraform-aws-observability-accelerator/assets/10175027/d4ba74c4-7530-4037-b373-fa68986cabfc) 10 | 11 | 12 | ## Kube-apiserver (advanced) 13 | 14 | The advanced dashboard is derived from kube-prometheus-stack `Kubernetes / API server` dashboard and provides a detailed metrics drill-down for example per READ and WRITE operations per component (like deployments, configmaps etc.). 15 | 16 | ![API server advanced dashboard](https://github.com/aws-observability/terraform-aws-observability-accelerator/assets/10175027/8d614a6d-38c5-47bc-acfc-6cea4bc1f070) 17 | 18 | 19 | ## Kube-apiserver (troubleshooting) 20 | 21 | This dashboard can be used to troubleshoot API server problems like latency, errors etc. 22 | 23 | A detailed description for usage and background information regarding the dashboard can be found in AWS Containers blog post [Troubleshooting Amazon EKS API servers with Prometheus](https://aws.amazon.com/blogs/containers/troubleshooting-amazon-eks-api-servers-with-prometheus/). 24 | 25 | ![API server troubleshooting dashboard](https://github.com/aws-observability/terraform-aws-observability-accelerator/assets/10175027/687b5fac-8ae4-4a49-924c-6b3d708b9569) 26 | -------------------------------------------------------------------------------- /docs/eks/gpu-monitoring.md: -------------------------------------------------------------------------------- 1 | # Monitoring NVIDIA GPU Workloads 2 | 3 | GPUs play an integral part in data intensive workloads. The eks-monitoring module of the Observability Accelerator provides the ability to deploy the NVIDIA DCGM Exporter Dashboard. 4 | The dashboard utilizes metrics scraped from the `/metrics` endpoint that are exposed when running the nvidia gpu operator with the [DCGM exporter](https://developer.nvidia.com/blog/monitoring-gpus-in-kubernetes-with-dcgm/) and NVSMI binary. 5 | 6 | !!!note 7 | In order to make use of this dashboard, you will need to have a GPU backed EKS cluster and deploy the [GPU operator](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/amazon-eks.html) 8 | The recommended way of deploying the GPU operator is the [Data on EKS Blueprint](https://github.com/aws-ia/terraform-aws-eks-data-addons/blob/main/nvidia-gpu-operator.tf) 9 | 10 | ## Deployment 11 | 12 | This is enabled by default in the [eks-monitoring module](https://aws-observability.github.io/terraform-aws-observability-accelerator/eks/). 13 | 14 | ## Dashboards 15 | 16 | In order to start producing diagnostic metrics you must first deploy the nvidia SMI binary. nvidia-smi (also NVSMI) provides monitoring and management capabilities for each of NVIDIA’s devices from Fermi and higher architecture families. We can now deploy the nvidia-smi binary, which shows diagnostic information about all GPUs visible to the container: 17 | 18 | ``` 19 | cat << EOF | kubectl apply -f - 20 | apiVersion: v1 21 | kind: Pod 22 | metadata: 23 | name: nvidia-smi 24 | spec: 25 | restartPolicy: OnFailure 26 | containers: 27 | - name: nvidia-smi 28 | image: "nvidia/cuda:11.0.3-base-ubuntu20.04" 29 | args: 30 | - "nvidia-smi" 31 | resources: 32 | limits: 33 | nvidia.com/gpu: 1 34 | EOF 35 | ``` 36 | After producing the metrics they should populate the DCGM exporter dashboard: 37 | 38 | ![image](https://github.com/aws-observability/terraform-aws-observability-accelerator/assets/97046295/66e8ae83-3a78-48b8-a9fc-4460a5a4d173) 39 | -------------------------------------------------------------------------------- /docs/eks/logs.md: -------------------------------------------------------------------------------- 1 | # Viewing Logs 2 | 3 | By default, we deploy a FluentBit daemon set in the cluster to collect worker 4 | logs for all namespaces. Logs collection can be disabled with 5 | `enable_logs = false`. Logs are collected and exported to Amazon CloudWatch Logs, 6 | which enables you to centralize the logs from all of your systems, applications, 7 | and AWS services that you use, in a single, highly scalable service. 8 | 9 | Further configuration options are available in the [module documentation](https://github.com/aws-observability/terraform-aws-observability-accelerator/tree/main/modules/eks-monitoring#inputs). 10 | This guide shows how you can leverage CloudWatch Logs in Amazon Managed Grafana 11 | for your cluster and application logs. 12 | 13 | ## Using CloudWatch Logs as data source in Grafana 14 | 15 | Follow [the documentation](https://docs.aws.amazon.com/grafana/latest/userguide/using-amazon-cloudwatch-in-AMG.html) 16 | to enable Amazon CloudWatch as a data source. Make sure to provide permissions. 17 | 18 | !!! tip 19 | If you created your workspace with our [provided example](https://aws-observability.github.io/terraform-aws-observability-accelerator/helpers/managed-grafana/), 20 | Amazon CloudWatch data source has already been setup for you. 21 | 22 | All logs are delivered in the following CloudWatch Log groups naming pattern: 23 | `/aws/eks/observability-accelerator/{cluster-name}/{namespace}`. Log streams 24 | follow `{container-name}.{pod-name}`. In Grafana, querying and analyzing logs 25 | is done with [CloudWatch Logs Insights](https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/AnalyzingLogData.html) 26 | 27 | ### Example - ADOT collector logs 28 | 29 | Select one or many log groups and run the following query. The example below, 30 | queries AWS Distro for OpenTelemetry (ADOT) logs 31 | 32 | ```console 33 | fields @timestamp, log 34 | | order @timestamp desc 35 | | limit 100 36 | ``` 37 | 38 | <img width="1987" alt="Screenshot 2023-03-27 at 19 08 35" src="https://user-images.githubusercontent.com/10175027/228037030-95005f47-ff46-4f7a-af74-d31809c52fcd.png"> 39 | 40 | 41 | ### Example - Using time series visualizations 42 | 43 | [CloudWatch Logs syntax](https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/CWL_QuerySyntax.html) 44 | provide powerful functions to extract data from your logs. The `stats()` 45 | function allows you to calculate aggregate statistics with log field values. 46 | This is useful to have visualization on non-metric data from your applications. 47 | 48 | In the example below, we use the following query to graph the number of metrics 49 | collected by the ADOT collector 50 | 51 | ```console 52 | fields @timestamp, log 53 | | parse log /"#metrics": (?<metrics_count>\d+)}/ 54 | | stats avg(metrics_count) by bin(5m) 55 | | limit 100 56 | ``` 57 | 58 | !!! tip 59 | You can add logs in your dashboards with logs panel types or time series 60 | depending on your query results type. 61 | 62 | <img width="2056" alt="image" src="https://user-images.githubusercontent.com/10175027/228037186-12691590-0bfe-465b-a83b-5c4f583ebf96.png"> 63 | 64 | !!! warning 65 | Querying CloudWatch logs will incur costs per GB scanned. Use small time 66 | windows and limits in your queries. Checkout the CloudWatch 67 | [pricing page](https://aws.amazon.com/cloudwatch/pricing/) for more infos. 68 | -------------------------------------------------------------------------------- /docs/eks/multicluster.md: -------------------------------------------------------------------------------- 1 | # AWS EKS Multicluster Observability (single AWS Account) 2 | 3 | This example shows how to use the [AWS Observability Accelerator](https://github.com/aws-observability/terraform-aws-observability-accelerator), 4 | with more than one EKS cluster in a single account and visualize the collected 5 | metrics from all the clusters in the dashboards of a common 6 | `Amazon Managed Grafana` workspace. 7 | 8 | ## Prerequisites 9 | 10 | #### 1. EKS clusters 11 | 12 | Using the example [eks-cluster-with-vpc](https://aws-observability.github.io/terraform-aws-observability-accelerator/helpers/new-eks-cluster/), create two EKS clusters with the names: 13 | 14 | 1. `eks-cluster-1` 15 | 2. `eks-cluster-2` 16 | 17 | #### 2. Amazon Managed Service for Prometheus (AMP) workspace 18 | 19 | We recommend that you create a new AMP workspace. To do that you can run the following command. 20 | 21 | Ensure you have the following necessary IAM permissions 22 | 23 | * `aps.CreateWorkspace` 24 | 25 | ```sh 26 | export TF_VAR_managed_prometheus_workspace_id=$(aws amp create-workspace --alias observability-accelerator --query='workspaceId' --output text) 27 | ``` 28 | 29 | #### 3. Amazon Managed Grafana (AMG) workspace 30 | 31 | To run this example you need an AMG workspace. If you have 32 | an existing workspace, create an environment variable as described below. 33 | To create a new workspace, visit our supporting example for managed Grafana. 34 | 35 | !!! note 36 | For the URL `https://g-xyz.grafana-workspace.eu-central-1.amazonaws.com`, the workspace ID would be `g-xyz` 37 | 38 | ```sh 39 | export TF_VAR_managed_grafana_workspace_id=g-xxx 40 | ``` 41 | 42 | #### 4. Grafana API Key 43 | 44 | AMG provides a control plane API for generating Grafana API keys. 45 | As a security best practice, we will provide to Terraform a short lived API key to 46 | run the `apply` or `destroy` command. 47 | 48 | Ensure you have the following necessary IAM permissions 49 | 50 | * `grafana.CreateWorkspaceApiKey` 51 | * `grafana.DeleteWorkspaceApiKey` 52 | 53 | ```sh 54 | export TF_VAR_grafana_api_key=`aws grafana create-workspace-api-key --key-name "observability-accelerator-$(date +%s)" --key-role ADMIN --seconds-to-live 7200 --workspace-id $TF_VAR_managed_grafana_workspace_id --query key --output text` 55 | ``` 56 | 57 | ## Setup 58 | 59 | #### 1. Download sources and initialize Terraform 60 | 61 | ```sh 62 | git clone https://github.com/aws-observability/terraform-aws-observability-accelerator.git 63 | cd terraform-aws-observability-accelerator/examples/eks-multicluster 64 | terraform init 65 | ``` 66 | 67 | #### 2. Deploy 68 | 69 | Verify by looking at the file `variables.tf` that there are two EKS clusters targeted for deployment by the names/ids: 70 | 71 | 1. `eks-cluster-1` 72 | 2. `eks-cluster-2` 73 | 74 | The difference in deployment between these clusters is that Terraform, when setting up the EKS cluster behind variable `eks_cluster_1_id` for observability, also sets up: 75 | 76 | * Dashboard folder and files in Amazon Managed Grafana 77 | * Prometheus and Java, alerting and recording rules in Amazon Managed Service for Prometheus 78 | 79 | !!! warning 80 | To override the defaults, create a `terraform.tfvars` and change the default values of the variables. 81 | 82 | Run the following command to deploy 83 | 84 | ```sh 85 | terraform apply --auto-approve 86 | ``` 87 | 88 | ## Verifying Multicluster Observability 89 | 90 | One you have successfully run the above setup, you should be able to see dashboards similar to the images shown below in `Amazon Managed Grafana` workspace. 91 | 92 | Note how you are able to use the `cluster` dropdown to filter the dashboards to metrics collected from a specific EKS cluster. 93 | 94 | <img width="2557" alt="eks-multicluster-1" src="https://user-images.githubusercontent.com/4762573/233949110-ce275d06-7ad8-494c-b527-d9c2a0fb6645.png"> 95 | 96 | <img width="2560" alt="eks-multicluster-2" src="https://user-images.githubusercontent.com/4762573/233949227-f401f81e-e0d6-4242-96ad-0bcd39ad4e2d.png"> 97 | 98 | ## Cleanup 99 | 100 | To clean up entirely, run the following command: 101 | 102 | ```sh 103 | terraform destroy --auto-approve 104 | ``` 105 | -------------------------------------------------------------------------------- /docs/helpers/ecs-cluster-with-vpc.md: -------------------------------------------------------------------------------- 1 | # Example Amazon ECS Cluster with VPC 2 | This example deploys an AWS ECS Cluster with VPC and also add the ECS Monitoring module 3 | 4 | ## Prerequisites 5 | 6 | !!! note 7 | Make sure to complete the [prerequisites section](https://aws-observability.github.io/terraform-aws-observability-accelerator/concepts/#prerequisites) before proceeding. 8 | 9 | ## Setup 10 | #### 1. Download sources and initialize Terraform¶ 11 | 12 | ``` 13 | git clone https://github.com/aws-observability/terraform-aws-observability-accelerator.git 14 | cd terraform-aws-observability-accelerator/examples/ecs-cluster-with-vpc 15 | terraform init 16 | ``` 17 | 18 | #### 2. AWS Region¶ 19 | Specify the AWS Region where the resources will be deployed: 20 | 21 | ``` 22 | export TF_VAR_aws_region=xxx 23 | ``` 24 | 25 | #### 3. Terraform Plan to validate the changes/updates 26 | 27 | ``` 28 | terraform plan 29 | ``` 30 | 31 | ## Deploy 32 | 33 | Simply run this command to deploy the example 34 | 35 | ```bash 36 | terraform apply 37 | ``` 38 | 39 | ## Cleanup 40 | 41 | To clean up your environment, destroy the Terraform example by running 42 | 43 | ```sh 44 | terraform destroy 45 | ``` 46 | -------------------------------------------------------------------------------- /docs/helpers/managed-grafana.md: -------------------------------------------------------------------------------- 1 | # Creating a new Amazon Managed Grafana Workspace 2 | 3 | This example creates an Amazon Managed Grafana Workspace with 4 | Amazon CloudWatch, AWS X-Ray and Amazon Managed Service for Prometheus 5 | datasources. 6 | 7 | The authentication method chosen for this example is with IAM Identity 8 | Center (former SSO). You can extend this example to add SAML. 9 | 10 | ## Prerequisites 11 | 12 | !!! note 13 | Make sure to complete the [prerequisites section](https://aws-observability.github.io/terraform-aws-observability-accelerator/concepts/#prerequisites) before proceeding. 14 | 15 | ## Setup 16 | 17 | ### 1. Download sources and initialize Terraform 18 | 19 | ``` 20 | git clone https://github.com/aws-observability/terraform-aws-observability-accelerator.git 21 | cd terraform-aws-observability-accelerator/examples/managed-grafana-workspace 22 | terraform init 23 | ``` 24 | 25 | ### 2. AWS Region 26 | 27 | Specify the AWS Region where the resources will be deployed: 28 | 29 | ```bash 30 | export TF_VAR_aws_region=xxx 31 | ``` 32 | 33 | ## Deploy 34 | 35 | Simply run this command to deploy the example 36 | 37 | ```bash 38 | terraform apply 39 | ``` 40 | 41 | ## Authentication 42 | 43 | After apply, Terraform will output the Worksapce's URL, but you need to: 44 | 45 | - [Setup user(s)](https://docs.aws.amazon.com/singlesignon/latest/userguide/getting-started.html) in the IAM Identity Center (former SSO) 46 | - [Assign the user(s) to the workspace](https://docs.aws.amazon.com/grafana/latest/userguide/AMG-manage-users-and-groups-AMG.html) with proper permissions 47 | 48 | <img width="1936" alt="Screenshot 2023-03-19 at 12 04 45" src="https://user-images.githubusercontent.com/10175027/226172947-f8588ed3-3751-47c1-a3ed-fb4c2d4d847e.png"> 49 | 50 | 51 | ## Cleanup 52 | 53 | To clean up your environment, destroy the Terraform example by running 54 | 55 | ```sh 56 | terraform destroy 57 | ``` 58 | -------------------------------------------------------------------------------- /docs/helpers/new-eks-cluster.md: -------------------------------------------------------------------------------- 1 | # Creating a new Amazon EKS cluster with VPC 2 | 3 | This example deploys the following: 4 | 5 | - New sample VPC, 3 Private Subnets and 3 Public Subnets 6 | - Internet gateway for Public Subnets and NAT Gateway for Private Subnets 7 | - EKS Cluster Control plane with one managed node group 8 | 9 | ## Prerequisites 10 | 11 | !!! note 12 | Make sure to complete the [prerequisites section](https://aws-observability.github.io/terraform-aws-observability-accelerator/concepts/#prerequisites) before proceeding. 13 | 14 | 15 | ## Setup 16 | 17 | ### 1. Download sources and initialize Terraform 18 | 19 | ``` 20 | git clone https://github.com/aws-observability/terraform-aws-observability-accelerator.git 21 | cd examples/eks-cluster-with-vpc/ 22 | terraform init 23 | ``` 24 | 25 | ### 2. AWS Region 26 | 27 | Specify the AWS Region where the resources will be deployed: 28 | 29 | ```bash 30 | export TF_VAR_aws_region=xxx 31 | ``` 32 | 33 | ### 3. Cluster Name 34 | 35 | Specify the name of your EKS cluster: 36 | 37 | ```bash 38 | export TF_VAR_cluster_name=xxx 39 | ``` 40 | 41 | ## Deploy 42 | 43 | Simply run this command to deploy the example 44 | 45 | ```bash 46 | terraform apply 47 | ``` 48 | 49 | ## Additional configuration (optional) 50 | 51 | 52 | ### 1. Instance Type 53 | 54 | Depending on your region or limitations in your account, you might need to change to a different instance type. 55 | To do this, you can define the instance type to use: 56 | ```bash 57 | export TF_VAR_managed_node_instance_type=xxx 58 | ``` 59 | 60 | ### 2. Amazon Elastic Kubernetes Service (Amazon EKS) Version 61 | 62 | You can override the version of the cluster also: 63 | ```bash 64 | export TF_VAR_eks_version=xxx 65 | ``` 66 | 67 | ## Login to your cluster 68 | 69 | EKS Cluster details can be extracted from terraform output or from AWS Console to get the name of cluster. 70 | Use the following commands in your local machine where you want to interact with your EKS Cluster. 71 | 72 | ### 1. Run `update-kubeconfig` command 73 | 74 | `~/.kube/config` file gets updated with cluster details and certificate from the below command 75 | 76 | aws eks --region <enter-your-region> update-kubeconfig --name <cluster-name> 77 | 78 | ### 2. List all the worker nodes by running the command below 79 | 80 | kubectl get nodes 81 | 82 | ### 3. List all the pods running in `kube-system` namespace 83 | 84 | kubectl get pods -n kube-system 85 | 86 | ## Cleanup 87 | 88 | To clean up your environment, destroy the Terraform modules in reverse order. 89 | 90 | Destroy the Kubernetes Add-ons, EKS cluster with Node groups and VPC 91 | 92 | ```sh 93 | terraform destroy -target="module.eks_blueprints_kubernetes_addons" -auto-approve 94 | terraform destroy -target="module.eks_blueprints" -auto-approve 95 | terraform destroy -target="module.vpc" -auto-approve 96 | ``` 97 | 98 | Finally, destroy any additional resources that are not in the above modules 99 | 100 | ```sh 101 | terraform destroy -auto-approve 102 | ``` 103 | -------------------------------------------------------------------------------- /docs/images/aws-favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-observability/terraform-aws-observability-accelerator/c432af44ee1df1b4ccd654e401922b99cea2ada5/docs/images/aws-favicon.png -------------------------------------------------------------------------------- /docs/images/aws-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-observability/terraform-aws-observability-accelerator/c432af44ee1df1b4ccd654e401922b99cea2ada5/docs/images/aws-logo.png -------------------------------------------------------------------------------- /docs/images/dark-o11y-accelerator-amp-xray.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-observability/terraform-aws-observability-accelerator/c432af44ee1df1b4ccd654e401922b99cea2ada5/docs/images/dark-o11y-accelerator-amp-xray.png -------------------------------------------------------------------------------- /docs/images/light-o11y-accelerator-amp-xray.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-observability/terraform-aws-observability-accelerator/c432af44ee1df1b4ccd654e401922b99cea2ada5/docs/images/light-o11y-accelerator-amp-xray.png -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # AWS Observability Accelerator for Terraform 2 | 3 | Welcome to the AWS Observability Accelerator for Terraform! 4 | 5 | The AWS Observability Accelerator for Terraform is a set of opinionated modules to 6 | help you set up observability for your AWS environments with 7 | AWS-managed observability services such as Amazon Managed Service for Prometheus, 8 | Amazon Managed Grafana, AWS Distro for OpenTelemetry (ADOT) and Amazon CloudWatch. 9 | 10 | We provide curated metrics, logs, traces collection, alerting rules and Grafana 11 | dashboards for your AWS infrastructure and custom applications. 12 | 13 | ![image](https://github.com/aws-observability/terraform-aws-observability-accelerator/assets/10175027/e83f8709-f754-4192-90f2-e3de96d2e26c) 14 | 15 | ## Getting started 16 | 17 | This project provides a set of Terraform modules to enable metrics, logs and 18 | traces collection, dashboards and alerts for monitoring: 19 | 20 | - [EKS Monitoring with AWS-Managed Open Source](https://aws-observability.github.io/terraform-aws-observability-accelerator/eks/) - Get Prometheus metrics, CloudWatch logs collection, and X-Ray traces (with OTLP support) for your EKS cluster. Visualize key metrics and logs with provided Grafana dashboards and get pre-built alerting rules. 21 | 22 | - [EKS Monitoring with Enhanced CloudWatch Container Insights](https://aws-observability.github.io/terraform-aws-observability-accelerator/container-insights/eks/) - Get deep visibility into EKS using Amazon CloudWatch for metrics collection, aggregation, and insights summaries. Includes support for [CloudWatch Application Signals (preview)](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Application-Monitoring-Sections.html). 23 | 24 | - [ECS Monitoring on EC2 with AWS-Managed Open Source](https://aws-observability.github.io/terraform-aws-observability-accelerator/ecs/ecs-monitoring-on-ec2/) - Collect metrics, traces, and logs for ECS on EC2 and send them to a Managed Prometheus workspace , X-Ray, and CloudWatch Logs. Includes pre-built Grafana dashboards for key metrics and logs. 25 | 26 | - [Managed Prometheus Monitoring](https://aws-observability.github.io/terraform-aws-observability-accelerator/workloads/managed-prometheus/) - This module sets up automated observability for Amazon Managed Service for Prometheus workspaces, including a Grafana dashboard, CloudWatch monitoring, and service quota alarms. 27 | 28 | These modules can be directly configured in your existing Terraform 29 | configurations or ready to be deployed in our packaged 30 | [examples](https://github.com/aws-observability/terraform-aws-observability-accelerator/tree/main/examples) 31 | 32 | !!! tip 33 | We have supporting examples for quick setup such as: 34 | 35 | - Creating a new Amazon EKS cluster and a VPC 36 | - Creating and configure an Amazon Managed Grafana workspace with SSO 37 | 38 | ## Motivation 39 | 40 | To gain deep visibility into your workloads and environments, AWS proposes a 41 | set of secure, scalable, highly available, production-grade managed open 42 | source services such as Amazon Managed Service for Prometheus, Amazon Managed 43 | Grafana and Amazon OpenSearch. 44 | 45 | AWS customers have asked for best-practices and guidance to collect metrics, logs 46 | and traces from their containerized applications and microservices with ease of 47 | deployment. Customers can use the AWS Observability Accelerator to configure their 48 | metrics and traces collection, leveraging [AWS Distro for OpenTelemetry](https://aws-otel.github.io/), 49 | to have opinionated dashboards and alerts available in only minutes. 50 | 51 | 52 | ## Support & Feedback 53 | 54 | AWS Observability Accelerator for Terraform is maintained by AWS Solution Architects. 55 | It is not part of an AWS service and support is provided best-effort by the 56 | AWS Observability Accelerator community. 57 | 58 | To post feedback, submit feature ideas, or report bugs, please use the [issues](https://github.com/aws-observability/terraform-aws-observability-accelerator/issues) section of this GitHub repo. 59 | 60 | If you are interested in contributing, see the [contribution guide](https://github.com/aws-observability/terraform-aws-observability-accelerator/blob/main/CONTRIBUTING.md). 61 | -------------------------------------------------------------------------------- /docs/overrides/main.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block extrahead %} 4 | 5 | <script> 6 | (function(n,i,v,r,s,c,x,z){x=window.AwsRumClient={q:[],n:n,i:i,v:v,r:r,c:c};window[n]=function(c,p){x.q.push({c:c,p:p});};z=document.createElement('script');z.async=true;z.src=s;document.head.insertBefore(z,document.head.getElementsByTagName('script')[0]);})( 7 | 'cwr', 8 | '1244d427-de32-4423-b7fe-3d6903e95178', 9 | '1.0.0', 10 | 'us-east-2', 11 | 'https://client.rum.us-east-1.amazonaws.com/1.12.0/cwr.js', 12 | { 13 | sessionSampleRate: 1, 14 | guestRoleArn: "arn:aws:iam::147084596884:role/RUM-Monitor-us-east-2-147084596884-2189797490761-Unauth", 15 | identityPoolId: "us-east-2:4aa2665a-6b7f-4202-856c-333e1f4bdec6", 16 | endpoint: "https://dataplane.rum.us-east-2.amazonaws.com", 17 | telemetries: ["performance","errors","http"], 18 | allowCookies: true, 19 | enableXRay: false 20 | } 21 | ); 22 | </script> 23 | 24 | {% endblock %} 25 | -------------------------------------------------------------------------------- /docs/support.md: -------------------------------------------------------------------------------- 1 | # Support & Feedback 2 | 3 | AWS Observability Accelerator for Terraform is maintained by AWS Solution Architects. 4 | It is not part of an AWS service and support is provided best-effort by the 5 | AWS Observability Accelerator community. 6 | 7 | To post feedback, submit feature ideas, or report bugs, please use the [issues](https://github.com/aws-observability/terraform-aws-observability-accelerator/issues) section of this GitHub repo. 8 | 9 | If you are interested in contributing, see the [contribution guide](https://github.com/aws-observability/terraform-aws-observability-accelerator/blob/main/CONTRIBUTING.md). 10 | -------------------------------------------------------------------------------- /examples/ecs-cluster-with-vpc/README.md: -------------------------------------------------------------------------------- 1 | # ECS Cluster w/ EC2 Autoscaling 2 | 3 | Configuration in this directory creates: 4 | 5 | - ECS cluster using EC2 autoscaling groups 6 | - Autoscaling groups with IAM instance profile to be used by ECS cluster 7 | - Example ECS service that utilizes 8 | - Mounts a host volume into the container definition 9 | - Load balancer target group attachment 10 | - Security group for access to the example service 11 | 12 | ## Usage 13 | 14 | To run this example you need to execute: 15 | 16 | ```bash 17 | $ terraform init 18 | $ terraform plan 19 | $ terraform apply 20 | ``` 21 | 22 | Note that this example may create resources which will incur monetary charges on your AWS bill. Run `terraform destroy` when you no longer need these resources. 23 | 24 | <!-- BEGINNING OF PRE-COMMIT-TERRAFORM DOCS HOOK --> 25 | ## Requirements 26 | 27 | | Name | Version | 28 | |------|---------| 29 | | <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.0 | 30 | | <a name="requirement_aws"></a> [aws](#requirement\_aws) | >= 4.55 | 31 | 32 | ## Providers 33 | 34 | | Name | Version | 35 | |------|---------| 36 | | <a name="provider_aws"></a> [aws](#provider\_aws) | >= 4.55 | 37 | 38 | ## Modules 39 | 40 | | Name | Source | Version | 41 | |------|--------|---------| 42 | | <a name="module_alb_sg"></a> [alb\_sg](#module\_alb\_sg) | terraform-aws-modules/security-group/aws | ~> 5.0 | 43 | | <a name="module_autoscaling"></a> [autoscaling](#module\_autoscaling) | terraform-aws-modules/autoscaling/aws | ~> 6.5 | 44 | | <a name="module_autoscaling_sg"></a> [autoscaling\_sg](#module\_autoscaling\_sg) | terraform-aws-modules/security-group/aws | ~> 5.0 | 45 | | <a name="module_ecs_cluster"></a> [ecs\_cluster](#module\_ecs\_cluster) | terraform-aws-modules/ecs/aws | 5.2.2 | 46 | | <a name="module_ecs_monitoring"></a> [ecs\_monitoring](#module\_ecs\_monitoring) | ../../modules/ecs-monitoring | n/a | 47 | | <a name="module_vpc"></a> [vpc](#module\_vpc) | terraform-aws-modules/vpc/aws | ~> 5.0 | 48 | 49 | ## Resources 50 | 51 | | Name | Type | 52 | |------|------| 53 | | [aws_availability_zones.available](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/availability_zones) | data source | 54 | | [aws_ssm_parameter.ecs_optimized_ami](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ssm_parameter) | data source | 55 | 56 | ## Inputs 57 | 58 | No inputs. 59 | 60 | ## Outputs 61 | 62 | No outputs. 63 | <!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK --> 64 | 65 | ## License 66 | 67 | Apache-2.0 Licensed. See [LICENSE](https://github.com/terraform-aws-modules/terraform-aws-ecs/blob/master/LICENSE). 68 | -------------------------------------------------------------------------------- /examples/ecs-cluster-with-vpc/outputs.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-observability/terraform-aws-observability-accelerator/c432af44ee1df1b4ccd654e401922b99cea2ada5/examples/ecs-cluster-with-vpc/outputs.tf -------------------------------------------------------------------------------- /examples/ecs-cluster-with-vpc/variables.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-observability/terraform-aws-observability-accelerator/c432af44ee1df1b4ccd654e401922b99cea2ada5/examples/ecs-cluster-with-vpc/variables.tf -------------------------------------------------------------------------------- /examples/ecs-cluster-with-vpc/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.0" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 4.55" 8 | } 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /examples/eks-cluster-with-vpc/README.md: -------------------------------------------------------------------------------- 1 | # EKS Cluster Deployment with new VPC 2 | 3 | Note: This example is a subset from [this EKS Blueprint example](https://github.com/aws-ia/terraform-aws-eks-blueprints/tree/v4.13.1/examples/eks-cluster-with-new-vpc) 4 | 5 | This example deploys the following Basic EKS Cluster with VPC 6 | 7 | - Creates a new sample VPC, 3 Private Subnets and 3 Public Subnets 8 | - Creates Internet gateway for Public Subnets and NAT Gateway for Private Subnets 9 | - Creates EKS Cluster Control plane with one managed node group 10 | 11 | You can view the full documentation for this example [here](https://aws-observability.github.io/terraform-aws-observability-accelerator/helpers/new-eks-cluster/) 12 | -------------------------------------------------------------------------------- /examples/eks-cluster-with-vpc/main.tf: -------------------------------------------------------------------------------- 1 | provider "aws" { 2 | region = local.region 3 | } 4 | 5 | provider "kubernetes" { 6 | host = module.eks_blueprints.eks_cluster_endpoint 7 | cluster_ca_certificate = base64decode(module.eks_blueprints.eks_cluster_certificate_authority_data) 8 | token = data.aws_eks_cluster_auth.this.token 9 | } 10 | 11 | provider "helm" { 12 | kubernetes { 13 | host = module.eks_blueprints.eks_cluster_endpoint 14 | cluster_ca_certificate = base64decode(module.eks_blueprints.eks_cluster_certificate_authority_data) 15 | token = data.aws_eks_cluster_auth.this.token 16 | } 17 | } 18 | 19 | data "aws_eks_cluster_auth" "this" { 20 | name = module.eks_blueprints.eks_cluster_id 21 | } 22 | 23 | data "aws_availability_zones" "available" {} 24 | 25 | locals { 26 | name = basename(path.cwd) 27 | cluster_name = coalesce(var.cluster_name, local.name) 28 | region = var.aws_region 29 | 30 | vpc_cidr = "10.0.0.0/16" 31 | azs = slice(data.aws_availability_zones.available.names, 0, 3) 32 | 33 | tags = { 34 | Blueprint = local.name 35 | GithubRepo = "github.com/aws-observability/terraform-aws-observability-accelerator" 36 | } 37 | } 38 | 39 | #--------------------------------------------------------------- 40 | # EKS Blueprints 41 | #--------------------------------------------------------------- 42 | 43 | module "eks_blueprints" { 44 | source = "github.com/aws-ia/terraform-aws-eks-blueprints?ref=v4.32.1" 45 | 46 | cluster_name = local.cluster_name 47 | cluster_version = var.eks_version 48 | 49 | vpc_id = module.vpc.vpc_id 50 | private_subnet_ids = module.vpc.private_subnets 51 | 52 | managed_node_groups = { 53 | mg_5 = { 54 | node_group_name = "managed-ondemand" 55 | instance_types = [var.managed_node_instance_type] 56 | min_size = var.managed_node_min_size 57 | subnet_ids = module.vpc.private_subnets 58 | } 59 | } 60 | 61 | tags = local.tags 62 | } 63 | 64 | module "eks_blueprints_kubernetes_addons" { 65 | source = "github.com/aws-ia/terraform-aws-eks-blueprints//modules/kubernetes-addons?ref=v4.32.1" 66 | 67 | eks_cluster_id = module.eks_blueprints.eks_cluster_id 68 | eks_cluster_endpoint = module.eks_blueprints.eks_cluster_endpoint 69 | eks_oidc_provider = module.eks_blueprints.oidc_provider 70 | eks_cluster_version = module.eks_blueprints.eks_cluster_version 71 | 72 | # EKS Managed Add-ons 73 | enable_amazon_eks_vpc_cni = true 74 | enable_amazon_eks_coredns = true 75 | enable_amazon_eks_kube_proxy = true 76 | enable_amazon_eks_aws_ebs_csi_driver = true 77 | 78 | tags = local.tags 79 | } 80 | 81 | #--------------------------------------------------------------- 82 | # Supporting Resources 83 | #--------------------------------------------------------------- 84 | 85 | module "vpc" { 86 | source = "terraform-aws-modules/vpc/aws" 87 | version = "~> 5.0" 88 | 89 | name = local.name 90 | cidr = local.vpc_cidr 91 | 92 | azs = local.azs 93 | public_subnets = [for k, v in local.azs : cidrsubnet(local.vpc_cidr, 8, k)] 94 | private_subnets = [for k, v in local.azs : cidrsubnet(local.vpc_cidr, 8, k + 10)] 95 | 96 | enable_nat_gateway = true 97 | single_nat_gateway = true 98 | enable_dns_hostnames = true 99 | 100 | # Manage so we can name 101 | manage_default_network_acl = true 102 | default_network_acl_tags = { Name = "${local.name}-default" } 103 | manage_default_route_table = true 104 | default_route_table_tags = { Name = "${local.name}-default" } 105 | manage_default_security_group = true 106 | default_security_group_tags = { Name = "${local.name}-default" } 107 | 108 | public_subnet_tags = { 109 | "kubernetes.io/cluster/${local.cluster_name}" = "shared" 110 | "kubernetes.io/role/elb" = 1 111 | } 112 | 113 | private_subnet_tags = { 114 | "kubernetes.io/cluster/${local.cluster_name}" = "shared" 115 | "kubernetes.io/role/internal-elb" = 1 116 | } 117 | 118 | tags = local.tags 119 | } 120 | -------------------------------------------------------------------------------- /examples/eks-cluster-with-vpc/min-iam-policy.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Allow", 6 | "Action": [ 7 | "ec2:AllocateAddress", 8 | "ec2:AssociateRouteTable", 9 | "ec2:AttachInternetGateway", 10 | "ec2:AuthorizeSecurityGroupEgress", 11 | "ec2:AuthorizeSecurityGroupIngress", 12 | "ec2:CreateInternetGateway", 13 | "ec2:CreateNatGateway", 14 | "ec2:CreateNetworkAclEntry", 15 | "ec2:CreateRoute", 16 | "ec2:CreateRouteTable", 17 | "ec2:CreateSecurityGroup", 18 | "ec2:CreateSubnet", 19 | "ec2:CreateTags", 20 | "ec2:CreateVpc", 21 | "ec2:DeleteInternetGateway", 22 | "ec2:DeleteNatGateway", 23 | "ec2:DeleteNetworkAclEntry", 24 | "ec2:DeleteRoute", 25 | "ec2:DeleteRouteTable", 26 | "ec2:DeleteSecurityGroup", 27 | "ec2:DeleteSubnet", 28 | "ec2:DeleteTags", 29 | "ec2:DeleteVpc", 30 | "ec2:DescribeAccountAttributes", 31 | "ec2:DescribeAddresses", 32 | "ec2:DescribeAvailabilityZones", 33 | "ec2:DescribeInternetGateways", 34 | "ec2:DescribeNatGateways", 35 | "ec2:DescribeNetworkAcls", 36 | "ec2:DescribeNetworkInterfaces", 37 | "ec2:DescribeRouteTables", 38 | "ec2:DescribeSecurityGroups", 39 | "ec2:DescribeSubnets", 40 | "ec2:DescribeTags", 41 | "ec2:DescribeVpcAttribute", 42 | "ec2:DescribeVpcClassicLink", 43 | "ec2:DescribeVpcClassicLinkDnsSupport", 44 | "ec2:DescribeVpcs", 45 | "ec2:DetachInternetGateway", 46 | "ec2:DisassociateRouteTable", 47 | "ec2:ModifySubnetAttribute", 48 | "ec2:ModifyVpcAttribute", 49 | "ec2:ReleaseAddress", 50 | "ec2:RevokeSecurityGroupEgress", 51 | "ec2:RevokeSecurityGroupIngress", 52 | "eks:CreateAddon", 53 | "eks:CreateCluster", 54 | "eks:CreateNodegroup", 55 | "eks:DeleteAddon", 56 | "eks:DeleteCluster", 57 | "eks:DeleteNodegroup", 58 | "eks:DescribeAddon", 59 | "eks:DescribeAddonVersions", 60 | "eks:DescribeCluster", 61 | "eks:DescribeNodegroup", 62 | "iam:AddRoleToInstanceProfile", 63 | "iam:AttachRolePolicy", 64 | "iam:CreateInstanceProfile", 65 | "iam:CreateOpenIDConnectProvider", 66 | "iam:CreatePolicy", 67 | "iam:CreateRole", 68 | "iam:CreateServiceLinkedRole", 69 | "iam:DeleteInstanceProfile", 70 | "iam:DeleteOpenIDConnectProvider", 71 | "iam:DeletePolicy", 72 | "iam:DeleteRole", 73 | "iam:DetachRolePolicy", 74 | "iam:GetInstanceProfile", 75 | "iam:GetOpenIDConnectProvider", 76 | "iam:GetPolicy", 77 | "iam:GetPolicyVersion", 78 | "iam:GetRole", 79 | "iam:ListAttachedRolePolicies", 80 | "iam:ListInstanceProfilesForRole", 81 | "iam:ListPolicyVersions", 82 | "iam:ListRolePolicies", 83 | "iam:PassRole", 84 | "iam:RemoveRoleFromInstanceProfile", 85 | "iam:TagInstanceProfile", 86 | "kms:CreateAlias", 87 | "kms:CreateKey", 88 | "kms:DeleteAlias", 89 | "kms:DescribeKey", 90 | "kms:EnableKeyRotation", 91 | "kms:GetKeyPolicy", 92 | "kms:GetKeyRotationStatus", 93 | "kms:ListAliases", 94 | "kms:ListResourceTags", 95 | "kms:PutKeyPolicy", 96 | "kms:ScheduleKeyDeletion", 97 | "kms:TagResource", 98 | "s3:GetObject", 99 | "s3:ListBucket", 100 | "s3:PutObject" 101 | ], 102 | "Resource": "*" 103 | } 104 | ] 105 | } 106 | -------------------------------------------------------------------------------- /examples/eks-cluster-with-vpc/outputs.tf: -------------------------------------------------------------------------------- 1 | output "vpc_private_subnet_cidr" { 2 | description = "VPC private subnet CIDR" 3 | value = module.vpc.private_subnets_cidr_blocks 4 | } 5 | 6 | output "vpc_public_subnet_cidr" { 7 | description = "VPC public subnet CIDR" 8 | value = module.vpc.public_subnets_cidr_blocks 9 | } 10 | 11 | output "vpc_cidr" { 12 | description = "VPC CIDR" 13 | value = module.vpc.vpc_cidr_block 14 | } 15 | 16 | output "eks_cluster_id" { 17 | description = "EKS cluster ID" 18 | value = module.eks_blueprints.eks_cluster_id 19 | } 20 | 21 | output "eks_managed_nodegroups" { 22 | description = "EKS managed node groups" 23 | value = module.eks_blueprints.managed_node_groups 24 | } 25 | 26 | output "eks_managed_nodegroup_ids" { 27 | description = "EKS managed node group ids" 28 | value = module.eks_blueprints.managed_node_groups_id 29 | } 30 | 31 | output "eks_managed_nodegroup_arns" { 32 | description = "EKS managed node group arns" 33 | value = module.eks_blueprints.managed_node_group_arn 34 | } 35 | 36 | output "eks_managed_nodegroup_role_name" { 37 | description = "EKS managed node group role name" 38 | value = module.eks_blueprints.managed_node_group_iam_role_names 39 | } 40 | 41 | output "eks_managed_nodegroup_status" { 42 | description = "EKS managed node group status" 43 | value = module.eks_blueprints.managed_node_groups_status 44 | } 45 | 46 | output "configure_kubectl" { 47 | description = "Configure kubectl: make sure you're logged in with the correct AWS profile and run the following command to update your kubeconfig" 48 | value = module.eks_blueprints.configure_kubectl 49 | } 50 | -------------------------------------------------------------------------------- /examples/eks-cluster-with-vpc/variables.tf: -------------------------------------------------------------------------------- 1 | variable "cluster_name" { 2 | description = "Name of cluster - used by Terratest for e2e test automation" 3 | type = string 4 | default = "eks-cluster-with-vpc" 5 | 6 | validation { 7 | # cluster name is used as prefix on eks_blueprint module and cannot be >25 characters 8 | condition = can(regex("^[a-zA-Z][-a-zA-Z0-9]{3,24}$", var.cluster_name)) 9 | error_message = "Cluster name is used as a prefix-name for other resources. Max size is 25 chars and must satisfy regular expression pattern: '[a-zA-Z][-a-zA-Z0-9]{3,19}'." 10 | } 11 | } 12 | variable "aws_region" { 13 | description = "AWS Region" 14 | type = string 15 | } 16 | variable "managed_node_instance_type" { 17 | description = "Instance type for the cluster managed node groups" 18 | type = string 19 | default = "t3.xlarge" 20 | } 21 | variable "managed_node_min_size" { 22 | description = "Minumum number of instances in the node group" 23 | type = number 24 | default = 2 25 | } 26 | variable "eks_version" { 27 | type = string 28 | description = "EKS Cluster version" 29 | default = "1.28" 30 | } 31 | -------------------------------------------------------------------------------- /examples/eks-cluster-with-vpc/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.3.0" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 5.0.0" 8 | } 9 | kubernetes = { 10 | source = "hashicorp/kubernetes" 11 | version = ">= 2.20" 12 | } 13 | kubectl = { 14 | source = "alekc/kubectl" 15 | version = ">= 2.0.3" 16 | } 17 | helm = { 18 | source = "hashicorp/helm" 19 | version = ">= 2.10.0" 20 | } 21 | } 22 | 23 | # ## Used for end-to-end testing on project; update to suit your needs 24 | # backend "s3" { 25 | # bucket = "aws-observability-accelerator-terraform-states" 26 | # region = "us-west-2" 27 | # key = "e2e/eks-cluster-with-vpc/terraform.tfstate" 28 | # } 29 | } 30 | -------------------------------------------------------------------------------- /examples/eks-container-insights/README.md: -------------------------------------------------------------------------------- 1 | # Enable Container Insights for EKS cluster 2 | 3 | This example enables enhanced CloudWatch Container Insights for EKS and CloudWatch Application Signals (preview) through our CloudWatch EKS add-ons, providing comprehensive metrics, logs, and insights for cluster and application monitoring. 4 | 5 | Step-by-step instructions available on our [docs site](https://aws-observability.github.io/terraform-aws-observability-accelerator/container-insights/eks/) 6 | 7 | <!-- BEGINNING OF PRE-COMMIT-TERRAFORM DOCS HOOK --> 8 | ## Requirements 9 | 10 | | Name | Version | 11 | |------|---------| 12 | | <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.1.0 | 13 | | <a name="requirement_aws"></a> [aws](#requirement\_aws) | >= 5.0.0 | 14 | 15 | ## Providers 16 | 17 | | Name | Version | 18 | |------|---------| 19 | | <a name="provider_aws"></a> [aws](#provider\_aws) | >= 5.0.0 | 20 | 21 | ## Modules 22 | 23 | | Name | Source | Version | 24 | |------|--------|---------| 25 | | <a name="module_eks_container_insights"></a> [eks\_container\_insights](#module\_eks\_container\_insights) | ../../modules/eks-container-insights | n/a | 26 | 27 | ## Resources 28 | 29 | | Name | Type | 30 | |------|------| 31 | | [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source | 32 | | [aws_eks_cluster.eks_cluster](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/eks_cluster) | data source | 33 | | [aws_partition.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/partition) | data source | 34 | | [aws_region.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/region) | data source | 35 | 36 | ## Inputs 37 | 38 | | Name | Description | Type | Default | Required | 39 | |------|-------------|------|---------|:--------:| 40 | | <a name="input_eks_cluster_domain"></a> [eks\_cluster\_domain](#input\_eks\_cluster\_domain) | The domain for the EKS cluster | `string` | `""` | no | 41 | | <a name="input_eks_cluster_id"></a> [eks\_cluster\_id](#input\_eks\_cluster\_id) | EKS cluster name | `string` | n/a | yes | 42 | | <a name="input_eks_cluster_version"></a> [eks\_cluster\_version](#input\_eks\_cluster\_version) | The Kubernetes version for the cluster | `string` | `"1.28"` | no | 43 | | <a name="input_eks_oidc_provider"></a> [eks\_oidc\_provider](#input\_eks\_oidc\_provider) | The OpenID Connect identity provider (issuer URL without leading `https://`) | `string` | `null` | no | 44 | | <a name="input_eks_oidc_provider_arn"></a> [eks\_oidc\_provider\_arn](#input\_eks\_oidc\_provider\_arn) | The OpenID Connect identity provider ARN | `string` | `null` | no | 45 | | <a name="input_tags"></a> [tags](#input\_tags) | Additional tags (e.g. `map('BusinessUnit`,`XYZ`) | `map(string)` | `{}` | no | 46 | 47 | ## Outputs 48 | 49 | No outputs. 50 | <!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK --> 51 | -------------------------------------------------------------------------------- /examples/eks-container-insights/data.tf: -------------------------------------------------------------------------------- 1 | data "aws_partition" "current" {} 2 | data "aws_caller_identity" "current" {} 3 | data "aws_region" "current" {} 4 | 5 | data "aws_eks_cluster" "eks_cluster" { 6 | name = var.eks_cluster_id 7 | } 8 | -------------------------------------------------------------------------------- /examples/eks-container-insights/locals.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | name = "amazon-cloudwatch-observability" 3 | eks_oidc_issuer_url = replace(data.aws_eks_cluster.eks_cluster.identity[0].oidc[0].issuer, "https://", "") 4 | 5 | addon_context = { 6 | aws_caller_identity_account_id = data.aws_caller_identity.current.account_id 7 | aws_caller_identity_arn = data.aws_caller_identity.current.arn 8 | aws_partition_id = data.aws_partition.current.partition 9 | aws_region_name = data.aws_region.current.name 10 | eks_oidc_provider_arn = "arn:${data.aws_partition.current.partition}:iam::${data.aws_caller_identity.current.account_id}:oidc-provider/${local.eks_oidc_issuer_url}" 11 | eks_cluster_id = data.aws_eks_cluster.eks_cluster.id 12 | tags = var.tags 13 | } 14 | 15 | addon_config = { 16 | kubernetes_version = var.eks_cluster_version 17 | most_recent = true 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /examples/eks-container-insights/main.tf: -------------------------------------------------------------------------------- 1 | module "eks_container_insights" { 2 | source = "../../modules/eks-container-insights" 3 | eks_cluster_id = var.eks_cluster_id 4 | enable_amazon_eks_cw_observability = true 5 | create_cloudwatch_observability_irsa_role = true 6 | eks_oidc_provider_arn = local.addon_context.eks_oidc_provider_arn 7 | create_cloudwatch_application_signals_role = true 8 | } 9 | -------------------------------------------------------------------------------- /examples/eks-container-insights/outputs.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-observability/terraform-aws-observability-accelerator/c432af44ee1df1b4ccd654e401922b99cea2ada5/examples/eks-container-insights/outputs.tf -------------------------------------------------------------------------------- /examples/eks-container-insights/variables.tf: -------------------------------------------------------------------------------- 1 | variable "eks_cluster_id" { 2 | description = "EKS cluster name" 3 | type = string 4 | } 5 | 6 | variable "eks_cluster_domain" { 7 | description = "The domain for the EKS cluster" 8 | type = string 9 | default = "" 10 | } 11 | 12 | variable "eks_oidc_provider" { 13 | description = "The OpenID Connect identity provider (issuer URL without leading `https://`)" 14 | type = string 15 | default = null 16 | } 17 | 18 | variable "eks_oidc_provider_arn" { 19 | description = "The OpenID Connect identity provider ARN" 20 | type = string 21 | default = null 22 | } 23 | 24 | variable "eks_cluster_version" { 25 | description = "The Kubernetes version for the cluster" 26 | type = string 27 | default = "1.28" 28 | } 29 | 30 | variable "tags" { 31 | description = "Additional tags (e.g. `map('BusinessUnit`,`XYZ`)" 32 | type = map(string) 33 | default = {} 34 | } 35 | -------------------------------------------------------------------------------- /examples/eks-container-insights/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.1.0" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 5.0.0" 8 | } 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /examples/eks-cross-account-with-central-amp/data.tf: -------------------------------------------------------------------------------- 1 | data "aws_eks_cluster_auth" "eks_one" { 2 | name = var.cluster_one.name 3 | provider = aws.eks_cluster_one 4 | } 5 | 6 | data "aws_eks_cluster_auth" "eks_two" { 7 | name = var.cluster_two.name 8 | provider = aws.eks_cluster_two 9 | } 10 | 11 | data "aws_eks_cluster" "eks_one" { 12 | name = var.cluster_one.name 13 | provider = aws.eks_cluster_one 14 | } 15 | 16 | data "aws_eks_cluster" "eks_two" { 17 | name = var.cluster_two.name 18 | provider = aws.eks_cluster_two 19 | } 20 | 21 | data "aws_grafana_workspace" "this" { 22 | workspace_id = var.monitoring.managed_grafana_id 23 | provider = aws.eks_cluster_one 24 | } 25 | -------------------------------------------------------------------------------- /examples/eks-cross-account-with-central-amp/iam.tf: -------------------------------------------------------------------------------- 1 | data "aws_caller_identity" "monitoring" { 2 | provider = aws.central_monitoring 3 | } 4 | 5 | resource "aws_iam_policy" "irsa_assume_role_policy_one" { 6 | provider = aws.eks_cluster_one 7 | name = "${var.cluster_one.name}-irsa_assume_role_policy" 8 | path = "/" 9 | description = "This role allows the IRSA role to assume the cross-account role for AMP access" 10 | 11 | policy = jsonencode({ 12 | Version = "2012-10-17" 13 | Statement = [ 14 | { 15 | Action = [ 16 | "sts:AssumeRole", 17 | ] 18 | Effect = "Allow" 19 | Resource = "arn:aws:iam::${data.aws_caller_identity.monitoring.account_id}:role/${local.amp_workspace_alias}-role-for-cross-account" 20 | }, 21 | ] 22 | }) 23 | } 24 | 25 | resource "aws_iam_policy" "irsa_assume_role_policy_two" { 26 | provider = aws.eks_cluster_two 27 | name = "${var.cluster_two.name}-irsa_assume_role_policy" 28 | path = "/" 29 | description = "This role allows the IRSA role to assume the cross-account role for AMP access" 30 | 31 | policy = jsonencode({ 32 | Version = "2012-10-17" 33 | Statement = [ 34 | { 35 | Action = [ 36 | "sts:AssumeRole", 37 | ] 38 | Effect = "Allow" 39 | Resource = "arn:aws:iam::${data.aws_caller_identity.monitoring.account_id}:role/${local.amp_workspace_alias}-role-for-cross-account" 40 | }, 41 | ] 42 | }) 43 | } 44 | 45 | resource "aws_iam_role" "cross_account_amp_role" { 46 | provider = aws.central_monitoring 47 | name = "${local.amp_workspace_alias}-role-for-cross-account" 48 | 49 | assume_role_policy = <<EOF 50 | { 51 | "Version": "2012-10-17", 52 | "Statement": [ 53 | { 54 | "Effect": "Allow", 55 | "Principal": { 56 | "AWS": [ 57 | "${module.eks_monitoring_one.adot_irsa_arn}", 58 | "${module.eks_monitoring_two.adot_irsa_arn}" 59 | ] 60 | }, 61 | "Action": "sts:AssumeRole", 62 | "Condition": {} 63 | } 64 | ] 65 | } 66 | EOF 67 | } 68 | 69 | resource "aws_iam_role_policy_attachment" "role_attach" { 70 | provider = aws.central_monitoring 71 | role = aws_iam_role.cross_account_amp_role.name 72 | policy_arn = "arn:aws:iam::aws:policy/AmazonPrometheusRemoteWriteAccess" 73 | } 74 | -------------------------------------------------------------------------------- /examples/eks-cross-account-with-central-amp/outputs.tf: -------------------------------------------------------------------------------- 1 | output "amp_workspace_id" { 2 | description = "Identifier of the AMP workspace" 3 | value = module.managed_service_prometheus.workspace_id 4 | } 5 | -------------------------------------------------------------------------------- /examples/eks-cross-account-with-central-amp/providers.tf: -------------------------------------------------------------------------------- 1 | ###### AWS Providers ###### 2 | 3 | provider "aws" { 4 | region = var.cluster_one.region 5 | alias = "eks_cluster_one" 6 | assume_role { 7 | role_arn = var.cluster_one.tf_role 8 | } 9 | } 10 | 11 | provider "aws" { 12 | region = var.cluster_two.region 13 | alias = "eks_cluster_two" 14 | assume_role { 15 | role_arn = var.cluster_two.tf_role 16 | } 17 | } 18 | 19 | provider "aws" { 20 | region = var.monitoring.region 21 | alias = "central_monitoring" 22 | assume_role { 23 | role_arn = var.monitoring.tf_role 24 | } 25 | } 26 | 27 | ###### Helm Providers ###### 28 | 29 | provider "helm" { 30 | alias = "eks_cluster_one" 31 | kubernetes { 32 | host = data.aws_eks_cluster.eks_one.endpoint 33 | cluster_ca_certificate = base64decode(data.aws_eks_cluster.eks_one.certificate_authority[0].data) 34 | exec { 35 | api_version = "client.authentication.k8s.io/v1beta1" 36 | args = ["eks", "get-token", "--role-arn", var.cluster_one.tf_role, "--cluster-name", var.cluster_one.name] 37 | command = "aws" 38 | } 39 | } 40 | } 41 | 42 | provider "helm" { 43 | alias = "eks_cluster_two" 44 | kubernetes { 45 | host = data.aws_eks_cluster.eks_two.endpoint 46 | cluster_ca_certificate = base64decode(data.aws_eks_cluster.eks_two.certificate_authority[0].data) 47 | exec { 48 | api_version = "client.authentication.k8s.io/v1beta1" 49 | args = ["eks", "get-token", "--role-arn", var.cluster_two.tf_role, "--cluster-name", var.cluster_two.name] 50 | command = "aws" 51 | } 52 | } 53 | } 54 | 55 | ###### Kubernetes Providers ###### 56 | 57 | provider "kubernetes" { 58 | alias = "eks_cluster_one" 59 | host = data.aws_eks_cluster.eks_one.endpoint 60 | cluster_ca_certificate = base64decode(data.aws_eks_cluster.eks_one.certificate_authority[0].data) 61 | exec { 62 | api_version = "client.authentication.k8s.io/v1beta1" 63 | args = ["eks", "get-token", "--role-arn", var.cluster_one.tf_role, "--cluster-name", var.cluster_one.name] 64 | command = "aws" 65 | } 66 | } 67 | 68 | provider "kubernetes" { 69 | alias = "eks_cluster_two" 70 | host = data.aws_eks_cluster.eks_two.endpoint 71 | cluster_ca_certificate = base64decode(data.aws_eks_cluster.eks_two.certificate_authority[0].data) 72 | exec { 73 | api_version = "client.authentication.k8s.io/v1beta1" 74 | args = ["eks", "get-token", "--role-arn", var.cluster_two.tf_role, "--cluster-name", var.cluster_two.name] 75 | command = "aws" 76 | } 77 | } 78 | 79 | provider "kubectl" { 80 | alias = "eks_cluster_one" 81 | apply_retry_count = 30 82 | host = data.aws_eks_cluster.eks_one.endpoint 83 | cluster_ca_certificate = base64decode(data.aws_eks_cluster.eks_one.certificate_authority[0].data) 84 | load_config_file = false 85 | token = data.aws_eks_cluster_auth.eks_one.token 86 | } 87 | 88 | provider "kubectl" { 89 | alias = "eks_cluster_two" 90 | apply_retry_count = 30 91 | host = data.aws_eks_cluster.eks_two.endpoint 92 | cluster_ca_certificate = base64decode(data.aws_eks_cluster.eks_two.certificate_authority[0].data) 93 | load_config_file = false 94 | token = data.aws_eks_cluster_auth.eks_two.token 95 | } 96 | -------------------------------------------------------------------------------- /examples/eks-cross-account-with-central-amp/variables.tf: -------------------------------------------------------------------------------- 1 | variable "cluster_one" { 2 | description = "Input for your first EKS Cluster" 3 | type = object({ 4 | name = string 5 | region = string 6 | tf_role = string 7 | }) 8 | default = { 9 | name = "eks-cluster-1" 10 | region = "us-east-1" 11 | tf_role = "<iam-role-in-eks-cluster-1-account>" 12 | } 13 | } 14 | 15 | variable "cluster_two" { 16 | description = "Input for your second EKS Cluster" 17 | type = object({ 18 | name = string 19 | region = string 20 | tf_role = string 21 | }) 22 | default = { 23 | name = "eks-cluster-2" 24 | region = "us-east-1" 25 | tf_role = "<iam-role-in-eks-cluster-2-account>" 26 | } 27 | } 28 | 29 | variable "monitoring" { 30 | description = "Input for your AMP and AMG workspaces" 31 | type = object({ 32 | managed_grafana_id = string 33 | amp_workspace_alias = string 34 | region = string 35 | tf_role = string 36 | }) 37 | default = { 38 | managed_grafana_id = "<grafana-ws-id>" 39 | amp_workspace_alias = "aws-observability-accelerator" 40 | region = "<grafana-ws-region>" 41 | tf_role = "<iam-role-in-grafana-ws-account>" 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /examples/eks-cross-account-with-central-amp/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.3.9" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 4.55.0" 8 | configuration_aliases = [aws.eks_cluster_one, aws.eks_cluster_two, aws.central_monitoring] 9 | } 10 | kubernetes = { 11 | source = "hashicorp/kubernetes" 12 | version = ">= 2.18.0" 13 | configuration_aliases = [kubernetes.eks_cluster_one, kubernetes.eks_cluster_two] 14 | } 15 | helm = { 16 | source = "hashicorp/helm" 17 | version = ">= 2.9.0" 18 | configuration_aliases = [helm.eks_cluster_one, helm.eks_cluster_two] 19 | } 20 | kubectl = { 21 | source = "alekc/kubectl" 22 | version = ">= 2.0.3" 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /examples/eks-istio/README.md: -------------------------------------------------------------------------------- 1 | # Existing Cluster with Tetrate Istio Add-on and Istio monitoring 2 | 3 | View the full documentation for this example [here](https://aws-observability.github.io/terraform-aws-observability-accelerator/eks/istio) 4 | 5 | <!-- BEGINNING OF PRE-COMMIT-TERRAFORM DOCS HOOK --> 6 | ## Requirements 7 | 8 | | Name | Version | 9 | |------|---------| 10 | | <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.1.0 | 11 | | <a name="requirement_aws"></a> [aws](#requirement\_aws) | >= 4.0.0 | 12 | | <a name="requirement_helm"></a> [helm](#requirement\_helm) | >= 2.4.1 | 13 | | <a name="requirement_kubectl"></a> [kubectl](#requirement\_kubectl) | >= 2.0.3 | 14 | | <a name="requirement_kubernetes"></a> [kubernetes](#requirement\_kubernetes) | >= 2.10 | 15 | 16 | ## Providers 17 | 18 | | Name | Version | 19 | |------|---------| 20 | | <a name="provider_aws"></a> [aws](#provider\_aws) | >= 4.0.0 | 21 | 22 | ## Modules 23 | 24 | | Name | Source | Version | 25 | |------|--------|---------| 26 | | <a name="module_eks_blueprints_kubernetes_addons"></a> [eks\_blueprints\_kubernetes\_addons](#module\_eks\_blueprints\_kubernetes\_addons) | github.com/aws-ia/terraform-aws-eks-blueprints//modules/kubernetes-addons | v4.32.1 | 27 | | <a name="module_eks_monitoring"></a> [eks\_monitoring](#module\_eks\_monitoring) | ../../modules/eks-monitoring | n/a | 28 | 29 | ## Resources 30 | 31 | | Name | Type | 32 | |------|------| 33 | | [aws_eks_cluster.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/eks_cluster) | data source | 34 | | [aws_eks_cluster_auth.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/eks_cluster_auth) | data source | 35 | | [aws_grafana_workspace.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/grafana_workspace) | data source | 36 | 37 | ## Inputs 38 | 39 | | Name | Description | Type | Default | Required | 40 | |------|-------------|------|---------|:--------:| 41 | | <a name="input_aws_region"></a> [aws\_region](#input\_aws\_region) | AWS Region | `string` | n/a | yes | 42 | | <a name="input_eks_cluster_id"></a> [eks\_cluster\_id](#input\_eks\_cluster\_id) | Name of the EKS cluster | `string` | `"eks-cluster-with-vpc"` | no | 43 | | <a name="input_enable_dashboards"></a> [enable\_dashboards](#input\_enable\_dashboards) | Enables or disables curated dashboards. Dashboards are managed by the Grafana Operator | `bool` | `true` | no | 44 | | <a name="input_grafana_api_key"></a> [grafana\_api\_key](#input\_grafana\_api\_key) | API key for authorizing the Grafana provider to make changes to Amazon Managed Grafana | `string` | n/a | yes | 45 | | <a name="input_managed_grafana_workspace_id"></a> [managed\_grafana\_workspace\_id](#input\_managed\_grafana\_workspace\_id) | Amazon Managed Grafana Workspace ID | `string` | n/a | yes | 46 | | <a name="input_managed_prometheus_workspace_id"></a> [managed\_prometheus\_workspace\_id](#input\_managed\_prometheus\_workspace\_id) | Amazon Managed Service for Prometheus Workspace ID | `string` | `""` | no | 47 | 48 | ## Outputs 49 | 50 | | Name | Description | 51 | |------|-------------| 52 | | <a name="output_eks_cluster_id"></a> [eks\_cluster\_id](#output\_eks\_cluster\_id) | EKS Cluster Id | 53 | | <a name="output_eks_cluster_version"></a> [eks\_cluster\_version](#output\_eks\_cluster\_version) | EKS Cluster version | 54 | | <a name="output_managed_prometheus_workspace_endpoint"></a> [managed\_prometheus\_workspace\_endpoint](#output\_managed\_prometheus\_workspace\_endpoint) | Amazon Managed Prometheus workspace endpoint | 55 | | <a name="output_managed_prometheus_workspace_id"></a> [managed\_prometheus\_workspace\_id](#output\_managed\_prometheus\_workspace\_id) | Amazon Managed Prometheus workspace ID | 56 | | <a name="output_managed_prometheus_workspace_region"></a> [managed\_prometheus\_workspace\_region](#output\_managed\_prometheus\_workspace\_region) | AWS Region | 57 | <!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK --> 58 | -------------------------------------------------------------------------------- /examples/eks-istio/main.tf: -------------------------------------------------------------------------------- 1 | provider "aws" { 2 | region = local.region 3 | } 4 | 5 | data "aws_eks_cluster_auth" "this" { 6 | name = var.eks_cluster_id 7 | } 8 | 9 | data "aws_eks_cluster" "this" { 10 | name = var.eks_cluster_id 11 | } 12 | 13 | data "aws_grafana_workspace" "this" { 14 | workspace_id = var.managed_grafana_workspace_id 15 | } 16 | 17 | provider "kubernetes" { 18 | host = local.eks_cluster_endpoint 19 | cluster_ca_certificate = base64decode(data.aws_eks_cluster.this.certificate_authority[0].data) 20 | token = data.aws_eks_cluster_auth.this.token 21 | } 22 | 23 | provider "helm" { 24 | kubernetes { 25 | host = local.eks_cluster_endpoint 26 | cluster_ca_certificate = base64decode(data.aws_eks_cluster.this.certificate_authority[0].data) 27 | token = data.aws_eks_cluster_auth.this.token 28 | } 29 | } 30 | 31 | locals { 32 | region = var.aws_region 33 | eks_cluster_endpoint = data.aws_eks_cluster.this.endpoint 34 | create_new_workspace = var.managed_prometheus_workspace_id == "" ? true : false 35 | tags = { 36 | Source = "github.com/aws-observability/terraform-aws-observability-accelerator" 37 | } 38 | } 39 | 40 | module "eks_blueprints_kubernetes_addons" { 41 | source = "github.com/aws-ia/terraform-aws-eks-blueprints//modules/kubernetes-addons?ref=v4.32.1" 42 | 43 | eks_cluster_id = var.eks_cluster_id 44 | #eks_cluster_endpoint = module.eks_blueprints.eks_cluster_endpoint 45 | #eks_oidc_provider = module.eks_blueprints.oidc_provider 46 | #eks_cluster_version = module.eks_blueprints.eks_cluster_version 47 | 48 | # EKS Managed Add-ons 49 | #enable_amazon_eks_vpc_cni = true 50 | #enable_amazon_eks_coredns = true 51 | #enable_amazon_eks_kube_proxy = true 52 | 53 | # Add-ons 54 | enable_metrics_server = true 55 | enable_cluster_autoscaler = true 56 | 57 | # Tetrate Istio Add-on 58 | enable_tetrate_istio = true 59 | 60 | tags = local.tags 61 | } 62 | 63 | module "eks_monitoring" { 64 | source = "../../modules/eks-monitoring" 65 | # source = "github.com/aws-observability/terraform-aws-observability-accelerator//modules/eks-monitoring?ref=v2.0.0" 66 | enable_istio = true 67 | eks_cluster_id = var.eks_cluster_id 68 | 69 | # deploys AWS Distro for OpenTelemetry operator into the cluster 70 | enable_amazon_eks_adot = true 71 | 72 | # reusing existing certificate manager? defaults to true 73 | enable_cert_manager = true 74 | 75 | # deploys external-secrets in to the cluster 76 | enable_external_secrets = true 77 | grafana_api_key = var.grafana_api_key 78 | target_secret_name = "grafana-admin-credentials" 79 | target_secret_namespace = "grafana-operator" 80 | grafana_url = "https://${data.aws_grafana_workspace.this.endpoint}" 81 | 82 | # control the publishing of dashboards by specifying the boolean value for the variable 'enable_dashboards', default is 'true' 83 | enable_dashboards = var.enable_dashboards 84 | 85 | enable_managed_prometheus = local.create_new_workspace 86 | managed_prometheus_workspace_id = var.managed_prometheus_workspace_id 87 | 88 | # optional, defaults to 60s interval and 15s timeout 89 | prometheus_config = { 90 | global_scrape_interval = "60s" 91 | global_scrape_timeout = "15s" 92 | } 93 | 94 | enable_logs = true 95 | 96 | tags = local.tags 97 | } 98 | -------------------------------------------------------------------------------- /examples/eks-istio/outputs.tf: -------------------------------------------------------------------------------- 1 | output "managed_prometheus_workspace_region" { 2 | description = "AWS Region" 3 | value = module.eks_monitoring.managed_prometheus_workspace_region 4 | } 5 | 6 | output "managed_prometheus_workspace_endpoint" { 7 | description = "Amazon Managed Prometheus workspace endpoint" 8 | value = module.eks_monitoring.managed_prometheus_workspace_endpoint 9 | } 10 | 11 | output "managed_prometheus_workspace_id" { 12 | description = "Amazon Managed Prometheus workspace ID" 13 | value = module.eks_monitoring.managed_prometheus_workspace_id 14 | } 15 | 16 | output "eks_cluster_version" { 17 | description = "EKS Cluster version" 18 | value = module.eks_monitoring.eks_cluster_version 19 | } 20 | 21 | output "eks_cluster_id" { 22 | description = "EKS Cluster Id" 23 | value = module.eks_monitoring.eks_cluster_id 24 | } 25 | -------------------------------------------------------------------------------- /examples/eks-istio/variables.tf: -------------------------------------------------------------------------------- 1 | variable "eks_cluster_id" { 2 | description = "Name of the EKS cluster" 3 | type = string 4 | default = "eks-cluster-with-vpc" 5 | } 6 | 7 | variable "aws_region" { 8 | description = "AWS Region" 9 | type = string 10 | } 11 | 12 | variable "managed_prometheus_workspace_id" { 13 | description = "Amazon Managed Service for Prometheus Workspace ID" 14 | type = string 15 | default = "" 16 | } 17 | 18 | variable "managed_grafana_workspace_id" { 19 | description = "Amazon Managed Grafana Workspace ID" 20 | type = string 21 | } 22 | 23 | variable "grafana_api_key" { 24 | description = "API key for authorizing the Grafana provider to make changes to Amazon Managed Grafana" 25 | type = string 26 | sensitive = true 27 | } 28 | 29 | variable "enable_dashboards" { 30 | description = "Enables or disables curated dashboards. Dashboards are managed by the Grafana Operator" 31 | type = bool 32 | default = true 33 | } 34 | -------------------------------------------------------------------------------- /examples/eks-istio/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.1.0" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 4.0.0" 8 | } 9 | kubernetes = { 10 | source = "hashicorp/kubernetes" 11 | version = ">= 2.10" 12 | } 13 | kubectl = { 14 | source = "alekc/kubectl" 15 | version = ">= 2.0.3" 16 | } 17 | helm = { 18 | source = "hashicorp/helm" 19 | version = ">= 2.4.1" 20 | } 21 | } 22 | 23 | # ## Used for end-to-end testing on project; update to suit your needs 24 | # backend "s3" { 25 | # bucket = "aws-observability-accelerator-terraform-states" 26 | # region = "us-west-2" 27 | # key = "e2e/eks-istio/terraform.tfstate" 28 | # } 29 | 30 | } 31 | -------------------------------------------------------------------------------- /examples/eks-multicluster/README.md: -------------------------------------------------------------------------------- 1 | # AWS EKS Multicluster Observability 2 | 3 | This example shows how to use the [AWS Observability Accelerator](https://github.com/aws-observability/terraform-aws-observability-accelerator), with more than one EKS cluster and verify the collected metrics from all the clusters in the dashboards of a common `Amazon Managed Grafana` workspace. 4 | 5 | ## Prerequisites 6 | 7 | #### 1. EKS clusters 8 | 9 | Using the example [eks-cluster-with-vpc](../../examples/eks-cluster-with-vpc/), create two EKS clusters with the names: 10 | 1. `eks-cluster-1` 11 | 2. `eks-cluster-2` 12 | 13 | #### 2. Amazon Managed Serivce for Prometheus (AMP) workspace 14 | 15 | We recommend that you create a new AMP workspace. To do that you can run the following command. 16 | 17 | Ensure you have the following necessary IAM permissions 18 | * `aps.CreateWorkspace` 19 | 20 | ```sh 21 | export TF_VAR_managed_prometheus_workspace_id=$(aws amp create-workspace --alias observability-accelerator --query='workspaceId' --output text) 22 | ``` 23 | 24 | #### 3. Amazon Managed Grafana (AMG) workspace 25 | 26 | To run this example you need an AMG workspace. If you have 27 | an existing workspace, create an environment variable as described below. 28 | To create a new workspace, visit our supporting example for managed Grafana. 29 | 30 | !!! note 31 | For the URL `https://g-xyz.grafana-workspace.eu-central-1.amazonaws.com`, the workspace ID would be `g-xyz` 32 | 33 | ```sh 34 | export TF_VAR_managed_grafana_workspace_id=g-xxx 35 | ``` 36 | 37 | #### 4. Grafana API Key 38 | 39 | AMG provides a control plane API for generating Grafana API keys. 40 | As a security best practice, we will provide to Terraform a short lived API key to 41 | run the `apply` or `destroy` command. 42 | 43 | Ensure you have the following necessary IAM permissions 44 | * `grafana.CreateWorkspaceApiKey` 45 | * `grafana.DeleteWorkspaceApiKey` 46 | 47 | ```sh 48 | export TF_VAR_grafana_api_key=`aws grafana create-workspace-api-key --key-name "observability-accelerator-$(date +%s)" --key-role ADMIN --seconds-to-live 1200 --workspace-id $TF_VAR_managed_grafana_workspace_id --query key --output text` 49 | ``` 50 | 51 | ## Setup 52 | 53 | #### 1. Download sources and initialize Terraform 54 | 55 | ```sh 56 | git clone https://github.com/aws-observability/terraform-aws-observability-accelerator.git 57 | cd terraform-aws-observability-accelerator/examples/eks-multicluster 58 | terraform init 59 | ``` 60 | 61 | #### 2. Deploy 62 | 63 | Verify by looking at the file `variables.tf` that there are two EKS clusters targeted for deployment by the names/ids: 64 | 1. `eks-cluster-1` 65 | 2. `eks-cluster-2` 66 | 67 | The difference in deployment between these clusters is that Terraform, when setting up the EKS cluster behind variable `eks_cluster_1_id` for observability, also sets up: 68 | * Dashboard folder and files in `AMG` 69 | * Prometheus and Java, alerting and recording rules in `AMP` 70 | 71 | !!! warning 72 | To override the defaults, create a `terraform.tfvars` and change the default values of the variables. 73 | 74 | Run the following command to deploy 75 | 76 | ```sh 77 | terraform apply --auto-approve 78 | ``` 79 | 80 | ## Verifying Multicluster Observability 81 | 82 | One you have successfully run the above setup, you should be able to see dashboards similar to the images shown below in `Amazon Managed Grafana` workspace. 83 | 84 | Note how you are able to use the `cluster` dropdown to filter the dashboards to metrics collected from a specific EKS cluster. 85 | 86 | <img width="2557" alt="eks-multicluster-1" src="https://user-images.githubusercontent.com/4762573/233949110-ce275d06-7ad8-494c-b527-d9c2a0fb6645.png"> 87 | 88 | <img width="2560" alt="eks-multicluster-2" src="https://user-images.githubusercontent.com/4762573/233949227-f401f81e-e0d6-4242-96ad-0bcd39ad4e2d.png"> 89 | 90 | ## Cleanup 91 | 92 | To clean up entirely, run the following command: 93 | 94 | ```sh 95 | terraform destroy --auto-approve 96 | ``` 97 | -------------------------------------------------------------------------------- /examples/eks-multicluster/data.tf: -------------------------------------------------------------------------------- 1 | data "aws_eks_cluster_auth" "eks_cluster_1" { 2 | name = var.eks_cluster_1_id 3 | provider = aws.eks_cluster_1 4 | } 5 | 6 | data "aws_eks_cluster_auth" "eks_cluster_2" { 7 | name = var.eks_cluster_2_id 8 | provider = aws.eks_cluster_2 9 | } 10 | 11 | data "aws_eks_cluster" "eks_cluster_1" { 12 | name = var.eks_cluster_1_id 13 | provider = aws.eks_cluster_1 14 | } 15 | 16 | data "aws_eks_cluster" "eks_cluster_2" { 17 | name = var.eks_cluster_2_id 18 | provider = aws.eks_cluster_2 19 | } 20 | 21 | data "aws_grafana_workspace" "this" { 22 | workspace_id = var.managed_grafana_workspace_id 23 | provider = aws.eks_cluster_1 24 | } 25 | 26 | data "aws_prometheus_workspace" "this" { 27 | workspace_id = local.managed_prometheus_workspace_id 28 | provider = aws.eks_cluster_1 29 | } 30 | -------------------------------------------------------------------------------- /examples/eks-multicluster/main.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | create_new_workspace = var.managed_prometheus_workspace_id == "" ? true : false 3 | managed_prometheus_workspace_id = local.create_new_workspace ? module.managed_service_prometheus[0].workspace_id : var.managed_prometheus_workspace_id 4 | } 5 | module "eks_cluster_1_monitoring" { 6 | source = "../..//modules/eks-monitoring" 7 | eks_cluster_id = var.eks_cluster_1_id 8 | enable_amazon_eks_adot = true 9 | enable_cert_manager = true 10 | enable_java = true 11 | 12 | # This configuration section results in actions performed on AMG and AMP; and it needs to be done just once 13 | # And hence, this in performed in conjunction with the setup of the eks_cluster_1 EKS cluster 14 | enable_dashboards = true 15 | enable_external_secrets = true 16 | enable_fluxcd = true 17 | enable_alerting_rules = true 18 | enable_recording_rules = true 19 | 20 | # Additional dashboards 21 | enable_apiserver_monitoring = true 22 | enable_adotcollector_metrics = true 23 | 24 | grafana_api_key = var.grafana_api_key 25 | grafana_url = "https://${data.aws_grafana_workspace.this.endpoint}" 26 | 27 | # prevents the module to create a workspace 28 | enable_managed_prometheus = false 29 | 30 | managed_prometheus_workspace_id = local.managed_prometheus_workspace_id 31 | managed_prometheus_workspace_endpoint = data.aws_prometheus_workspace.this.prometheus_endpoint 32 | managed_prometheus_workspace_region = var.eks_cluster_1_region 33 | 34 | prometheus_config = { 35 | global_scrape_interval = "60s" 36 | global_scrape_timeout = "15s" 37 | scrape_sample_limit = 2000 38 | } 39 | 40 | providers = { 41 | aws = aws.eks_cluster_1 42 | kubernetes = kubernetes.eks_cluster_1 43 | helm = helm.eks_cluster_1 44 | } 45 | } 46 | 47 | module "eks_cluster_2_monitoring" { 48 | source = "../..//modules/eks-monitoring" 49 | eks_cluster_id = var.eks_cluster_2_id 50 | enable_amazon_eks_adot = true 51 | enable_cert_manager = true 52 | enable_java = true 53 | 54 | # Since the following were enabled in conjunction with the set up of the 55 | # eks_cluster_1 EKS cluster, we will skip them with the eks_cluster_2 EKS cluster 56 | enable_dashboards = false 57 | enable_external_secrets = false 58 | enable_fluxcd = false 59 | enable_alerting_rules = false 60 | enable_recording_rules = false 61 | 62 | # Disable additional dashboards 63 | enable_apiserver_monitoring = false 64 | enable_adotcollector_metrics = false 65 | 66 | # prevents the module to create a workspace 67 | enable_managed_prometheus = false 68 | 69 | managed_prometheus_workspace_id = var.managed_prometheus_workspace_id 70 | managed_prometheus_workspace_endpoint = data.aws_prometheus_workspace.this.prometheus_endpoint 71 | managed_prometheus_workspace_region = var.eks_cluster_1_region 72 | 73 | prometheus_config = { 74 | global_scrape_interval = "60s" 75 | global_scrape_timeout = "15s" 76 | scrape_sample_limit = 2000 77 | } 78 | 79 | providers = { 80 | aws = aws.eks_cluster_2 81 | kubernetes = kubernetes.eks_cluster_2 82 | helm = helm.eks_cluster_2 83 | } 84 | } 85 | 86 | module "managed_service_prometheus" { 87 | count = local.create_new_workspace ? 1 : 0 88 | source = "terraform-aws-modules/managed-service-prometheus/aws" 89 | version = "~> 2.2.2" 90 | providers = { 91 | aws = aws.eks_cluster_1 92 | } 93 | 94 | workspace_alias = "aws-observability-accelerator-multicluster" 95 | } 96 | -------------------------------------------------------------------------------- /examples/eks-multicluster/outputs.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-observability/terraform-aws-observability-accelerator/c432af44ee1df1b4ccd654e401922b99cea2ada5/examples/eks-multicluster/outputs.tf -------------------------------------------------------------------------------- /examples/eks-multicluster/providers.tf: -------------------------------------------------------------------------------- 1 | provider "kubernetes" { 2 | host = data.aws_eks_cluster.eks_cluster_1.endpoint 3 | cluster_ca_certificate = base64decode(data.aws_eks_cluster.eks_cluster_1.certificate_authority[0].data) 4 | token = data.aws_eks_cluster_auth.eks_cluster_1.token 5 | alias = "eks_cluster_1" 6 | } 7 | 8 | provider "kubernetes" { 9 | host = data.aws_eks_cluster.eks_cluster_2.endpoint 10 | cluster_ca_certificate = base64decode(data.aws_eks_cluster.eks_cluster_2.certificate_authority[0].data) 11 | token = data.aws_eks_cluster_auth.eks_cluster_2.token 12 | alias = "eks_cluster_2" 13 | } 14 | 15 | provider "helm" { 16 | kubernetes { 17 | host = data.aws_eks_cluster.eks_cluster_1.endpoint 18 | cluster_ca_certificate = base64decode(data.aws_eks_cluster.eks_cluster_1.certificate_authority[0].data) 19 | token = data.aws_eks_cluster_auth.eks_cluster_1.token 20 | } 21 | alias = "eks_cluster_1" 22 | } 23 | 24 | provider "helm" { 25 | kubernetes { 26 | host = data.aws_eks_cluster.eks_cluster_2.endpoint 27 | cluster_ca_certificate = base64decode(data.aws_eks_cluster.eks_cluster_2.certificate_authority[0].data) 28 | token = data.aws_eks_cluster_auth.eks_cluster_2.token 29 | } 30 | alias = "eks_cluster_2" 31 | } 32 | 33 | provider "aws" { 34 | region = var.eks_cluster_1_region 35 | alias = "eks_cluster_1" 36 | } 37 | 38 | provider "aws" { 39 | region = var.eks_cluster_2_region 40 | alias = "eks_cluster_2" 41 | } 42 | -------------------------------------------------------------------------------- /examples/eks-multicluster/variables.tf: -------------------------------------------------------------------------------- 1 | variable "eks_cluster_1_id" { 2 | description = "Name or ID of the EKS cluster 1" 3 | type = string 4 | default = "eks-cluster-1" 5 | nullable = false 6 | } 7 | 8 | variable "eks_cluster_1_region" { 9 | description = "AWS region of the EKS cluster 1" 10 | type = string 11 | default = "us-west-2" 12 | nullable = false 13 | } 14 | 15 | variable "eks_cluster_2_id" { 16 | description = "Name or ID of the EKS cluster 2" 17 | type = string 18 | default = "eks-cluster-2" 19 | nullable = true 20 | } 21 | 22 | variable "eks_cluster_2_region" { 23 | description = "AWS region of the EKS cluster 2" 24 | type = string 25 | default = "us-west-2" 26 | nullable = true 27 | } 28 | 29 | variable "managed_prometheus_workspace_id" { 30 | description = "Amazon Managed Service for Prometheus Workspace ID" 31 | type = string 32 | default = "" 33 | } 34 | 35 | variable "managed_grafana_workspace_id" { 36 | description = "Amazon Managed Grafana Workspace ID" 37 | type = string 38 | default = "" 39 | } 40 | 41 | variable "grafana_api_key" { 42 | description = "API key for external-secrets to create secrets for grafana-operator" 43 | type = string 44 | default = "" 45 | sensitive = true 46 | } 47 | -------------------------------------------------------------------------------- /examples/eks-multicluster/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.3.9" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 4.55.0" 8 | configuration_aliases = [aws.eks_cluster_1, aws.eks_cluster_2] 9 | } 10 | kubernetes = { 11 | source = "hashicorp/kubernetes" 12 | version = ">= 2.18.0" 13 | configuration_aliases = [kubernetes.eks_cluster_1, kubernetes.eks_cluster_2] 14 | } 15 | helm = { 16 | source = "hashicorp/helm" 17 | version = ">= 2.9.0" 18 | configuration_aliases = [helm.eks_cluster_1, helm.eks_cluster_2] 19 | } 20 | kubectl = { 21 | source = "alekc/kubectl" 22 | version = ">= 2.0.3" 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /examples/existing-cluster-java/main.tf: -------------------------------------------------------------------------------- 1 | provider "aws" { 2 | region = local.region 3 | } 4 | 5 | data "aws_eks_cluster_auth" "this" { 6 | name = var.eks_cluster_id 7 | } 8 | 9 | data "aws_eks_cluster" "this" { 10 | name = var.eks_cluster_id 11 | } 12 | 13 | data "aws_grafana_workspace" "this" { 14 | workspace_id = var.managed_grafana_workspace_id 15 | } 16 | 17 | provider "kubernetes" { 18 | host = local.eks_cluster_endpoint 19 | cluster_ca_certificate = base64decode(data.aws_eks_cluster.this.certificate_authority[0].data) 20 | token = data.aws_eks_cluster_auth.this.token 21 | } 22 | 23 | provider "helm" { 24 | kubernetes { 25 | host = local.eks_cluster_endpoint 26 | cluster_ca_certificate = base64decode(data.aws_eks_cluster.this.certificate_authority[0].data) 27 | token = data.aws_eks_cluster_auth.this.token 28 | } 29 | } 30 | 31 | locals { 32 | region = var.aws_region 33 | eks_cluster_endpoint = data.aws_eks_cluster.this.endpoint 34 | create_new_workspace = var.managed_prometheus_workspace_id == "" ? true : false 35 | tags = { 36 | Source = "github.com/aws-observability/terraform-aws-observability-accelerator" 37 | } 38 | } 39 | 40 | module "eks_monitoring" { 41 | source = "../../modules/eks-monitoring" 42 | # source = "github.com/aws-observability/terraform-aws-observability-accelerator//modules/eks-monitoring?ref=v2.0.0" 43 | 44 | # enable java metrics collection, dashboards and alerts rules creation 45 | enable_java = true 46 | 47 | # deploys external-secrets in to the cluster 48 | enable_external_secrets = true 49 | grafana_api_key = var.grafana_api_key 50 | target_secret_name = "grafana-admin-credentials" 51 | target_secret_namespace = "grafana-operator" 52 | grafana_url = "https://${data.aws_grafana_workspace.this.endpoint}" 53 | 54 | eks_cluster_id = var.eks_cluster_id 55 | 56 | # control the publishing of dashboards by specifying the boolean value for the variable 'enable_dashboards', default is 'true' 57 | enable_dashboards = var.enable_dashboards 58 | 59 | enable_managed_prometheus = local.create_new_workspace 60 | managed_prometheus_workspace_id = var.managed_prometheus_workspace_id 61 | 62 | # optional, defaults to 60s interval and 15s timeout 63 | prometheus_config = { 64 | global_scrape_interval = "60s" 65 | global_scrape_timeout = "15s" 66 | scrape_sample_limit = 2000 67 | } 68 | 69 | enable_logs = true 70 | 71 | tags = local.tags 72 | } 73 | -------------------------------------------------------------------------------- /examples/existing-cluster-java/outputs.tf: -------------------------------------------------------------------------------- 1 | output "managed_prometheus_workspace_region" { 2 | description = "AWS Region" 3 | value = module.eks_monitoring.managed_prometheus_workspace_region 4 | } 5 | 6 | output "managed_prometheus_workspace_endpoint" { 7 | description = "Amazon Managed Prometheus workspace endpoint" 8 | value = module.eks_monitoring.managed_prometheus_workspace_endpoint 9 | } 10 | 11 | output "managed_prometheus_workspace_id" { 12 | description = "Amazon Managed Prometheus workspace ID" 13 | value = module.eks_monitoring.managed_prometheus_workspace_id 14 | } 15 | 16 | output "eks_cluster_version" { 17 | description = "EKS Cluster version" 18 | value = module.eks_monitoring.eks_cluster_version 19 | } 20 | 21 | output "eks_cluster_id" { 22 | description = "EKS Cluster Id" 23 | value = module.eks_monitoring.eks_cluster_id 24 | } 25 | -------------------------------------------------------------------------------- /examples/existing-cluster-java/variables.tf: -------------------------------------------------------------------------------- 1 | variable "eks_cluster_id" { 2 | description = "Name of the EKS cluster" 3 | type = string 4 | } 5 | 6 | variable "aws_region" { 7 | description = "AWS Region" 8 | type = string 9 | } 10 | 11 | variable "managed_prometheus_workspace_id" { 12 | description = "Amazon Managed Service for Prometheus Workspace ID" 13 | type = string 14 | default = "" 15 | } 16 | 17 | variable "managed_grafana_workspace_id" { 18 | description = "Amazon Managed Grafana Workspace ID" 19 | type = string 20 | } 21 | 22 | variable "grafana_api_key" { 23 | description = "API key for external-secrets to create secrets for grafana-operator" 24 | type = string 25 | sensitive = true 26 | } 27 | 28 | variable "enable_dashboards" { 29 | description = "Enables or disables curated dashboards" 30 | type = bool 31 | default = true 32 | } 33 | -------------------------------------------------------------------------------- /examples/existing-cluster-java/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.1.0" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 4.0.0" 8 | } 9 | kubernetes = { 10 | source = "hashicorp/kubernetes" 11 | version = ">= 2.10" 12 | } 13 | kubectl = { 14 | source = "alekc/kubectl" 15 | version = ">= 2.0.3" 16 | } 17 | helm = { 18 | source = "hashicorp/helm" 19 | version = ">= 2.4.1" 20 | } 21 | } 22 | 23 | # ## Used for end-to-end testing on project; update to suit your needs 24 | # backend "s3" { 25 | # bucket = "aws-observability-accelerator-terraform-states" 26 | # region = "us-west-2" 27 | # key = "e2e/existing-cluster-java/terraform.tfstate" 28 | # } 29 | 30 | } 31 | -------------------------------------------------------------------------------- /examples/existing-cluster-nginx/main.tf: -------------------------------------------------------------------------------- 1 | provider "aws" { 2 | region = local.region 3 | } 4 | 5 | data "aws_eks_cluster_auth" "this" { 6 | name = var.eks_cluster_id 7 | } 8 | 9 | data "aws_eks_cluster" "this" { 10 | name = var.eks_cluster_id 11 | } 12 | 13 | data "aws_grafana_workspace" "this" { 14 | workspace_id = var.managed_grafana_workspace_id 15 | } 16 | 17 | provider "kubernetes" { 18 | host = local.eks_cluster_endpoint 19 | cluster_ca_certificate = base64decode(data.aws_eks_cluster.this.certificate_authority[0].data) 20 | token = data.aws_eks_cluster_auth.this.token 21 | } 22 | 23 | provider "helm" { 24 | kubernetes { 25 | host = local.eks_cluster_endpoint 26 | cluster_ca_certificate = base64decode(data.aws_eks_cluster.this.certificate_authority[0].data) 27 | token = data.aws_eks_cluster_auth.this.token 28 | } 29 | } 30 | 31 | locals { 32 | region = var.aws_region 33 | eks_cluster_endpoint = data.aws_eks_cluster.this.endpoint 34 | create_new_workspace = var.managed_prometheus_workspace_id == "" ? true : false 35 | 36 | tags = { 37 | Source = "github.com/aws-observability/terraform-aws-observability-accelerator" 38 | } 39 | } 40 | 41 | module "eks_monitoring" { 42 | source = "../../modules/eks-monitoring" 43 | # source = "github.com/aws-observability/terraform-aws-observability-accelerator//modules/eks-monitoring?ref=v2.0.0" 44 | 45 | # enable NGINX metrics collection, dashboards and alerts rules creation 46 | enable_nginx = true 47 | 48 | eks_cluster_id = var.eks_cluster_id 49 | 50 | # deploys external-secrets in to the cluster 51 | enable_external_secrets = true 52 | grafana_api_key = var.grafana_api_key 53 | target_secret_name = "grafana-admin-credentials" 54 | target_secret_namespace = "grafana-operator" 55 | grafana_url = "https://${data.aws_grafana_workspace.this.endpoint}" 56 | 57 | # control the publishing of dashboards by specifying the boolean value for the variable 'enable_dashboards', default is 'true' 58 | enable_dashboards = var.enable_dashboards 59 | 60 | enable_managed_prometheus = local.create_new_workspace 61 | managed_prometheus_workspace_id = var.managed_prometheus_workspace_id 62 | 63 | enable_logs = true 64 | 65 | tags = local.tags 66 | } 67 | -------------------------------------------------------------------------------- /examples/existing-cluster-nginx/outputs.tf: -------------------------------------------------------------------------------- 1 | output "managed_prometheus_workspace_region" { 2 | description = "AWS Region" 3 | value = module.eks_monitoring.managed_prometheus_workspace_region 4 | } 5 | 6 | output "managed_prometheus_workspace_endpoint" { 7 | description = "Amazon Managed Prometheus workspace endpoint" 8 | value = module.eks_monitoring.managed_prometheus_workspace_endpoint 9 | } 10 | 11 | output "managed_prometheus_workspace_id" { 12 | description = "Amazon Managed Prometheus workspace ID" 13 | value = module.eks_monitoring.managed_prometheus_workspace_id 14 | } 15 | 16 | output "eks_cluster_version" { 17 | description = "EKS Cluster version" 18 | value = module.eks_monitoring.eks_cluster_version 19 | } 20 | 21 | output "eks_cluster_id" { 22 | description = "EKS Cluster Id" 23 | value = module.eks_monitoring.eks_cluster_id 24 | } 25 | -------------------------------------------------------------------------------- /examples/existing-cluster-nginx/sample_traffic/nginix-traffic-sample.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: {{namespace}} 5 | labels: 6 | name: {{namespace}} 7 | 8 | --- 9 | 10 | kind: Pod 11 | apiVersion: v1 12 | metadata: 13 | name: banana-app 14 | namespace: {{namespace}} 15 | labels: 16 | app: banana 17 | spec: 18 | containers: 19 | - name: banana-app 20 | image: hashicorp/http-echo 21 | args: 22 | - "-text=banana" 23 | resources: 24 | limits: 25 | cpu: 100m 26 | memory: 100Mi 27 | requests: 28 | cpu: 50m 29 | memory: 50Mi 30 | --- 31 | 32 | kind: Service 33 | apiVersion: v1 34 | metadata: 35 | name: banana-service 36 | namespace: {{namespace}} 37 | spec: 38 | selector: 39 | app: banana 40 | ports: 41 | - port: 5678 # Default port for image 42 | 43 | --- 44 | 45 | kind: Pod 46 | apiVersion: v1 47 | metadata: 48 | name: apple-app 49 | namespace: {{namespace}} 50 | labels: 51 | app: apple 52 | spec: 53 | containers: 54 | - name: apple-app 55 | image: hashicorp/http-echo 56 | args: 57 | - "-text=apple" 58 | resources: 59 | limits: 60 | cpu: 100m 61 | memory: 100Mi 62 | requests: 63 | cpu: 50m 64 | memory: 50Mi 65 | --- 66 | 67 | kind: Service 68 | apiVersion: v1 69 | metadata: 70 | name: apple-service 71 | namespace: {{namespace}} 72 | spec: 73 | selector: 74 | app: apple 75 | ports: 76 | - port: 5678 # Default port for image 77 | 78 | --- 79 | 80 | apiVersion: networking.k8s.io/v1 81 | kind: Ingress 82 | metadata: 83 | name: ingress-nginx-demo 84 | namespace: {{namespace}} 85 | spec: 86 | rules: 87 | - host: {{external_ip}} 88 | http: 89 | paths: 90 | - path: /apple 91 | pathType: Prefix 92 | backend: 93 | service: 94 | name: apple-service 95 | port: 96 | number: 5678 97 | - path: /banana 98 | pathType: Prefix 99 | backend: 100 | service: 101 | name: banana-service 102 | port: 103 | number: 5678 104 | 105 | --- 106 | 107 | apiVersion: v1 108 | kind: Pod 109 | metadata: 110 | name: traffic-generator 111 | namespace: {{namespace}} 112 | spec: 113 | containers: 114 | - name: traffic-generator 115 | image: ellerbrock/alpine-bash-curl-ssl 116 | command: ["/bin/bash"] 117 | args: ["-c", "while :; do curl http://{{external_ip}}/apple > /dev/null 2>&1; curl http://{{external_ip}}/banana > /dev/null 2>&1; sleep 1; done"] 118 | resources: 119 | limits: 120 | cpu: 100m 121 | memory: 100Mi 122 | requests: 123 | cpu: 50m 124 | memory: 50Mi 125 | -------------------------------------------------------------------------------- /examples/existing-cluster-nginx/variables.tf: -------------------------------------------------------------------------------- 1 | variable "eks_cluster_id" { 2 | description = "EKS Cluster Id" 3 | type = string 4 | } 5 | 6 | variable "aws_region" { 7 | description = "AWS Region" 8 | type = string 9 | } 10 | 11 | variable "managed_prometheus_workspace_id" { 12 | description = "Amazon Managed Service for Prometheus (AMP) workspace ID" 13 | type = string 14 | default = "" 15 | } 16 | 17 | variable "managed_grafana_workspace_id" { 18 | description = "Amazon Managed Grafana (AMG) workspace ID" 19 | type = string 20 | } 21 | 22 | variable "grafana_api_key" { 23 | description = "API key for external-secrets to create secrets for grafana-operator" 24 | type = string 25 | sensitive = true 26 | } 27 | 28 | variable "enable_dashboards" { 29 | description = "Enables or disables curated dashboards" 30 | type = bool 31 | default = true 32 | } 33 | -------------------------------------------------------------------------------- /examples/existing-cluster-nginx/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.1.0" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 4.0.0" 8 | } 9 | kubernetes = { 10 | source = "hashicorp/kubernetes" 11 | version = ">= 2.10" 12 | } 13 | kubectl = { 14 | source = "alekc/kubectl" 15 | version = ">= 2.0.3" 16 | } 17 | helm = { 18 | source = "hashicorp/helm" 19 | version = ">= 2.4.1" 20 | } 21 | } 22 | 23 | # ## Used for end-to-end testing on project; update to suit your needs 24 | # backend "s3" { 25 | # bucket = "aws-observability-accelerator-terraform-states" 26 | # region = "us-west-2" 27 | # key = "e2e/existing-cluster-nginx/terraform.tfstate" 28 | # } 29 | 30 | } 31 | -------------------------------------------------------------------------------- /examples/existing-cluster-with-base-and-infra/README.md: -------------------------------------------------------------------------------- 1 | # Existing Cluster with the AWS Observability accelerator EKS Infrastructure monitoring 2 | 3 | This example demonstrates how to use the AWS Observability Accelerator Terraform 4 | modules with Infrastructure monitoring enabled. 5 | The current example deploys the [AWS Distro for OpenTelemetry Operator](https://docs.aws.amazon.com/eks/latest/userguide/opentelemetry.html) 6 | for Amazon EKS with its requirements and make use of an existing Amazon Managed Grafana workspace. 7 | It creates a new Amazon Managed Service for Prometheus workspace unless provided with an existing one to reuse. 8 | 9 | It uses the `EKS monitoring` [module](../../modules/eks-monitoring/) 10 | to provide an existing EKS cluster with an OpenTelemetry collector, 11 | curated Grafana dashboards, Prometheus alerting and recording rules with multiple 12 | configuration options on the cluster infrastructure. 13 | 14 | View the full documentation for this example [here](https://aws-observability.github.io/terraform-aws-observability-accelerator/eks/) 15 | 16 | 17 | <!-- BEGINNING OF PRE-COMMIT-TERRAFORM DOCS HOOK --> 18 | ## Requirements 19 | 20 | | Name | Version | 21 | |------|---------| 22 | | <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.1.0 | 23 | | <a name="requirement_aws"></a> [aws](#requirement\_aws) | >= 4.0.0 | 24 | | <a name="requirement_helm"></a> [helm](#requirement\_helm) | >= 2.4.1 | 25 | | <a name="requirement_kubectl"></a> [kubectl](#requirement\_kubectl) | >= 2.0.3 | 26 | | <a name="requirement_kubernetes"></a> [kubernetes](#requirement\_kubernetes) | >= 2.10 | 27 | 28 | ## Providers 29 | 30 | | Name | Version | 31 | |------|---------| 32 | | <a name="provider_aws"></a> [aws](#provider\_aws) | >= 4.0.0 | 33 | 34 | ## Modules 35 | 36 | | Name | Source | Version | 37 | |------|--------|---------| 38 | | <a name="module_eks_monitoring"></a> [eks\_monitoring](#module\_eks\_monitoring) | ../../modules/eks-monitoring | n/a | 39 | 40 | ## Resources 41 | 42 | | Name | Type | 43 | |------|------| 44 | | [aws_eks_cluster.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/eks_cluster) | data source | 45 | | [aws_eks_cluster_auth.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/eks_cluster_auth) | data source | 46 | | [aws_grafana_workspace.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/grafana_workspace) | data source | 47 | 48 | ## Inputs 49 | 50 | | Name | Description | Type | Default | Required | 51 | |------|-------------|------|---------|:--------:| 52 | | <a name="input_aws_region"></a> [aws\_region](#input\_aws\_region) | AWS Region | `string` | n/a | yes | 53 | | <a name="input_eks_cluster_id"></a> [eks\_cluster\_id](#input\_eks\_cluster\_id) | Name of the EKS cluster | `string` | `"eks-cluster-with-vpc"` | no | 54 | | <a name="input_enable_dashboards"></a> [enable\_dashboards](#input\_enable\_dashboards) | Enables or disables curated dashboards. Dashboards are managed by the Grafana Operator | `bool` | `true` | no | 55 | | <a name="input_grafana_api_key"></a> [grafana\_api\_key](#input\_grafana\_api\_key) | API key for authorizing the Grafana provider to make changes to Amazon Managed Grafana | `string` | n/a | yes | 56 | | <a name="input_managed_grafana_workspace_id"></a> [managed\_grafana\_workspace\_id](#input\_managed\_grafana\_workspace\_id) | Amazon Managed Grafana Workspace ID | `string` | n/a | yes | 57 | | <a name="input_managed_prometheus_workspace_id"></a> [managed\_prometheus\_workspace\_id](#input\_managed\_prometheus\_workspace\_id) | Amazon Managed Service for Prometheus Workspace ID | `string` | `""` | no | 58 | 59 | ## Outputs 60 | 61 | | Name | Description | 62 | |------|-------------| 63 | | <a name="output_eks_cluster_id"></a> [eks\_cluster\_id](#output\_eks\_cluster\_id) | EKS Cluster Id | 64 | | <a name="output_eks_cluster_version"></a> [eks\_cluster\_version](#output\_eks\_cluster\_version) | EKS Cluster version | 65 | | <a name="output_managed_prometheus_workspace_endpoint"></a> [managed\_prometheus\_workspace\_endpoint](#output\_managed\_prometheus\_workspace\_endpoint) | Amazon Managed Prometheus workspace endpoint | 66 | | <a name="output_managed_prometheus_workspace_id"></a> [managed\_prometheus\_workspace\_id](#output\_managed\_prometheus\_workspace\_id) | Amazon Managed Prometheus workspace ID | 67 | | <a name="output_managed_prometheus_workspace_region"></a> [managed\_prometheus\_workspace\_region](#output\_managed\_prometheus\_workspace\_region) | AWS Region | 68 | <!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK --> 69 | -------------------------------------------------------------------------------- /examples/existing-cluster-with-base-and-infra/cleanup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -o errexit 3 | set -o pipefail 4 | 5 | read -p "Enter the region: " region 6 | export AWS_DEFAULT_REGION=$region 7 | 8 | targets=( 9 | "module.eks_monitoring" 10 | ) 11 | 12 | for target in "${targets[@]}" 13 | do 14 | terraform destroy -target="$target" -auto-approve 15 | destroy_output=$(terraform destroy -target="$target" -auto-approve 2>&1) 16 | if [[ $? -eq 0 && $destroy_output == *"Destroy complete!"* ]]; then 17 | echo "SUCCESS: Terraform destroy of $target completed successfully" 18 | else 19 | echo "FAILED: Terraform destroy of $target failed" 20 | exit 1 21 | fi 22 | done 23 | 24 | terraform destroy -auto-approve 25 | destroy_output=$(terraform destroy -auto-approve 2>&1) 26 | if [[ $? -eq 0 && $destroy_output == *"Destroy complete!"* ]]; then 27 | echo "SUCCESS: Terraform destroy of all targets completed successfully" 28 | else 29 | echo "FAILED: Terraform destroy of all targets failed" 30 | exit 1 31 | fi 32 | -------------------------------------------------------------------------------- /examples/existing-cluster-with-base-and-infra/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Initializing ..." 4 | terraform init || echo "\"terraform init\" failed" 5 | 6 | # List of Terraform modules to apply in sequence 7 | targets=( 8 | "module.eks_monitoring" 9 | ) 10 | 11 | # Apply modules in sequence 12 | for target in "${targets[@]}" 13 | do 14 | echo "Applying module $target..." 15 | apply_output=$(terraform apply -target="$target" -auto-approve 2>&1 | tee /dev/tty) 16 | if [[ ${PIPESTATUS[0]} -eq 0 && $apply_output == *"Apply complete"* ]]; then 17 | echo "SUCCESS: Terraform apply of $target completed successfully" 18 | else 19 | echo "FAILED: Terraform apply of $target failed" 20 | exit 1 21 | fi 22 | done 23 | 24 | # Final apply to catch any remaining resources 25 | echo "Applying remaining resources..." 26 | apply_output=$(terraform apply -auto-approve 2>&1 | tee /dev/tty) 27 | if [[ ${PIPESTATUS[0]} -eq 0 && $apply_output == *"Apply complete"* ]]; then 28 | echo "SUCCESS: Terraform apply of all modules completed successfully" 29 | else 30 | echo "FAILED: Terraform apply of all modules failed" 31 | exit 1 32 | fi 33 | -------------------------------------------------------------------------------- /examples/existing-cluster-with-base-and-infra/main.tf: -------------------------------------------------------------------------------- 1 | provider "aws" { 2 | region = local.region 3 | } 4 | 5 | data "aws_eks_cluster_auth" "this" { 6 | name = var.eks_cluster_id 7 | } 8 | 9 | data "aws_eks_cluster" "this" { 10 | name = var.eks_cluster_id 11 | } 12 | 13 | data "aws_grafana_workspace" "this" { 14 | workspace_id = var.managed_grafana_workspace_id 15 | } 16 | 17 | provider "kubernetes" { 18 | host = local.eks_cluster_endpoint 19 | cluster_ca_certificate = base64decode(data.aws_eks_cluster.this.certificate_authority[0].data) 20 | token = data.aws_eks_cluster_auth.this.token 21 | } 22 | 23 | provider "helm" { 24 | kubernetes { 25 | host = local.eks_cluster_endpoint 26 | cluster_ca_certificate = base64decode(data.aws_eks_cluster.this.certificate_authority[0].data) 27 | token = data.aws_eks_cluster_auth.this.token 28 | } 29 | } 30 | 31 | locals { 32 | region = var.aws_region 33 | eks_cluster_endpoint = data.aws_eks_cluster.this.endpoint 34 | create_new_workspace = var.managed_prometheus_workspace_id == "" ? true : false 35 | tags = { 36 | Source = "github.com/aws-observability/terraform-aws-observability-accelerator" 37 | } 38 | } 39 | 40 | module "eks_monitoring" { 41 | source = "../../modules/eks-monitoring" 42 | # source = "github.com/aws-observability/terraform-aws-observability-accelerator//modules/eks-monitoring?ref=v2.0.0" 43 | 44 | eks_cluster_id = var.eks_cluster_id 45 | 46 | # deploys AWS Distro for OpenTelemetry operator into the cluster 47 | enable_amazon_eks_adot = true 48 | 49 | # reusing existing certificate manager? defaults to true 50 | enable_cert_manager = true 51 | 52 | # enable EKS API server monitoring 53 | enable_apiserver_monitoring = true 54 | 55 | # deploys external-secrets in to the cluster 56 | enable_external_secrets = true 57 | grafana_api_key = var.grafana_api_key 58 | target_secret_name = "grafana-admin-credentials" 59 | target_secret_namespace = "grafana-operator" 60 | grafana_url = "https://${data.aws_grafana_workspace.this.endpoint}" 61 | 62 | # control the publishing of dashboards by specifying the boolean value for the variable 'enable_dashboards', default is 'true' 63 | enable_dashboards = var.enable_dashboards 64 | 65 | # creates a new Amazon Managed Prometheus workspace, defaults to true 66 | enable_managed_prometheus = local.create_new_workspace 67 | managed_prometheus_workspace_id = var.managed_prometheus_workspace_id 68 | 69 | # sets up the Amazon Managed Prometheus alert manager at the workspace level 70 | enable_alertmanager = true 71 | 72 | # optional, defaults to 60s interval and 15s timeout 73 | prometheus_config = { 74 | global_scrape_interval = "60s" 75 | global_scrape_timeout = "15s" 76 | } 77 | 78 | enable_logs = true 79 | 80 | tags = local.tags 81 | } 82 | -------------------------------------------------------------------------------- /examples/existing-cluster-with-base-and-infra/outputs.tf: -------------------------------------------------------------------------------- 1 | output "managed_prometheus_workspace_region" { 2 | description = "AWS Region" 3 | value = module.eks_monitoring.managed_prometheus_workspace_region 4 | } 5 | 6 | output "managed_prometheus_workspace_endpoint" { 7 | description = "Amazon Managed Prometheus workspace endpoint" 8 | value = module.eks_monitoring.managed_prometheus_workspace_endpoint 9 | } 10 | 11 | output "managed_prometheus_workspace_id" { 12 | description = "Amazon Managed Prometheus workspace ID" 13 | value = module.eks_monitoring.managed_prometheus_workspace_id 14 | } 15 | 16 | output "eks_cluster_version" { 17 | description = "EKS Cluster version" 18 | value = module.eks_monitoring.eks_cluster_version 19 | } 20 | 21 | output "eks_cluster_id" { 22 | description = "EKS Cluster Id" 23 | value = module.eks_monitoring.eks_cluster_id 24 | } 25 | -------------------------------------------------------------------------------- /examples/existing-cluster-with-base-and-infra/variables.tf: -------------------------------------------------------------------------------- 1 | variable "eks_cluster_id" { 2 | description = "Name of the EKS cluster" 3 | type = string 4 | default = "eks-cluster-with-vpc" 5 | } 6 | 7 | variable "aws_region" { 8 | description = "AWS Region" 9 | type = string 10 | } 11 | 12 | variable "managed_prometheus_workspace_id" { 13 | description = "Amazon Managed Service for Prometheus Workspace ID" 14 | type = string 15 | default = "" 16 | } 17 | 18 | variable "managed_grafana_workspace_id" { 19 | description = "Amazon Managed Grafana Workspace ID" 20 | type = string 21 | } 22 | 23 | variable "grafana_api_key" { 24 | description = "API key for authorizing the Grafana provider to make changes to Amazon Managed Grafana" 25 | type = string 26 | sensitive = true 27 | } 28 | 29 | variable "enable_dashboards" { 30 | description = "Enables or disables curated dashboards. Dashboards are managed by the Grafana Operator" 31 | type = bool 32 | default = true 33 | } 34 | -------------------------------------------------------------------------------- /examples/existing-cluster-with-base-and-infra/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.1.0" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 4.0.0" 8 | } 9 | kubernetes = { 10 | source = "hashicorp/kubernetes" 11 | version = ">= 2.10" 12 | } 13 | kubectl = { 14 | source = "alekc/kubectl" 15 | version = ">= 2.0.3" 16 | } 17 | helm = { 18 | source = "hashicorp/helm" 19 | version = ">= 2.4.1" 20 | } 21 | } 22 | 23 | # ## Used for end-to-end testing on project; update to suit your needs 24 | # backend "s3" { 25 | # bucket = "aws-observability-accelerator-terraform-states" 26 | # region = "us-west-2" 27 | # key = "e2e/existing-cluster-with-base-and-infra/terraform.tfstate" 28 | # } 29 | 30 | } 31 | -------------------------------------------------------------------------------- /examples/managed-grafana-workspace/main.tf: -------------------------------------------------------------------------------- 1 | provider "aws" { 2 | region = var.aws_region 3 | } 4 | 5 | locals { 6 | name = "aws-observability-accelerator" 7 | description = "Amazon Managed Grafana workspace for ${local.name}" 8 | 9 | tags = { 10 | GithubRepo = "terraform-aws-observability-accelerator" 11 | GithubOrg = "aws-observability" 12 | } 13 | } 14 | 15 | module "managed_grafana" { 16 | source = "terraform-aws-modules/managed-service-grafana/aws" 17 | version = "1.10.0" 18 | 19 | name = local.name 20 | associate_license = false 21 | description = local.description 22 | account_access_type = "CURRENT_ACCOUNT" 23 | authentication_providers = ["AWS_SSO"] 24 | permission_type = "SERVICE_MANAGED" 25 | data_sources = ["CLOUDWATCH", "PROMETHEUS", "XRAY"] 26 | notification_destinations = ["SNS"] 27 | stack_set_name = local.name 28 | 29 | configuration = jsonencode({ 30 | unifiedAlerting = { 31 | enabled = true 32 | } 33 | }) 34 | 35 | grafana_version = "9.4" 36 | 37 | 38 | # Workspace IAM role 39 | create_iam_role = true 40 | iam_role_name = local.name 41 | use_iam_role_name_prefix = true 42 | iam_role_description = local.description 43 | iam_role_path = "/grafana/" 44 | iam_role_force_detach_policies = true 45 | iam_role_max_session_duration = 7200 46 | iam_role_tags = local.tags 47 | 48 | tags = local.tags 49 | } 50 | -------------------------------------------------------------------------------- /examples/managed-grafana-workspace/outputs.tf: -------------------------------------------------------------------------------- 1 | output "grafana_workspace_endpoint" { 2 | description = "Amazon Managed Grafana Workspace endpoint" 3 | value = "https://${module.managed_grafana.workspace_endpoint}" 4 | } 5 | 6 | output "grafana_workspace_id" { 7 | description = "Amazon Managed Grafana Workspace ID" 8 | value = module.managed_grafana.workspace_id 9 | } 10 | 11 | output "grafana_workspace_iam_role_arn" { 12 | description = "Amazon Managed Grafana Workspace's IAM Role ARN" 13 | value = module.managed_grafana.workspace_iam_role_arn 14 | } 15 | -------------------------------------------------------------------------------- /examples/managed-grafana-workspace/readme.md: -------------------------------------------------------------------------------- 1 | # Amazon Managed Grafana Workspace Setup 2 | 3 | This example creates an Amazon Managed Grafana Workspace with 4 | Amazon CloudWatch, AWS X-Ray and Amazon Managed Service for Prometheus 5 | datasources 6 | 7 | The authentication method chosen for this example is with IAM Identity 8 | Center (former SSO). You can extend this example to add SAML. 9 | 10 | Step-by-step instructions available on our [docs site](https://aws-observability.github.io/terraform-aws-observability-accelerator/) 11 | under **Supporting Examples** 12 | 13 | <!-- BEGINNING OF PRE-COMMIT-TERRAFORM DOCS HOOK --> 14 | ## Requirements 15 | 16 | | Name | Version | 17 | |------|---------| 18 | | <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.3.0 | 19 | | <a name="requirement_aws"></a> [aws](#requirement\_aws) | >= 5.0.0 | 20 | 21 | ## Providers 22 | 23 | No providers. 24 | 25 | ## Modules 26 | 27 | | Name | Source | Version | 28 | |------|--------|---------| 29 | | <a name="module_managed_grafana"></a> [managed\_grafana](#module\_managed\_grafana) | terraform-aws-modules/managed-service-grafana/aws | 1.10.0 | 30 | 31 | ## Resources 32 | 33 | No resources. 34 | 35 | ## Inputs 36 | 37 | | Name | Description | Type | Default | Required | 38 | |------|-------------|------|---------|:--------:| 39 | | <a name="input_aws_region"></a> [aws\_region](#input\_aws\_region) | AWS Region | `string` | n/a | yes | 40 | 41 | ## Outputs 42 | 43 | | Name | Description | 44 | |------|-------------| 45 | | <a name="output_grafana_workspace_endpoint"></a> [grafana\_workspace\_endpoint](#output\_grafana\_workspace\_endpoint) | Amazon Managed Grafana Workspace endpoint | 46 | | <a name="output_grafana_workspace_iam_role_arn"></a> [grafana\_workspace\_iam\_role\_arn](#output\_grafana\_workspace\_iam\_role\_arn) | Amazon Managed Grafana Workspace's IAM Role ARN | 47 | | <a name="output_grafana_workspace_id"></a> [grafana\_workspace\_id](#output\_grafana\_workspace\_id) | Amazon Managed Grafana Workspace ID | 48 | <!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK --> 49 | -------------------------------------------------------------------------------- /examples/managed-grafana-workspace/variables.tf: -------------------------------------------------------------------------------- 1 | variable "aws_region" { 2 | description = "AWS Region" 3 | type = string 4 | } 5 | -------------------------------------------------------------------------------- /examples/managed-grafana-workspace/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.3.0" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 5.0.0" 8 | } 9 | } 10 | 11 | # ## Used for end-to-end testing on project; update to suit your needs 12 | # backend "s3" { 13 | # bucket = "aws-observability-accelerator-terraform-states" 14 | # region = "us-west-2" 15 | # key = "e2e/managed-grafana-workspace/terraform.tfstate" 16 | # } 17 | } 18 | -------------------------------------------------------------------------------- /examples/managed-prometheus-monitoring/main.tf: -------------------------------------------------------------------------------- 1 | provider "aws" { 2 | region = local.region 3 | } 4 | 5 | provider "grafana" { 6 | url = local.amg_ws_endpoint 7 | auth = var.grafana_api_key 8 | } 9 | 10 | data "aws_grafana_workspace" "this" { 11 | workspace_id = var.managed_grafana_workspace_id 12 | } 13 | 14 | locals { 15 | region = var.aws_region 16 | amg_ws_endpoint = "https://${data.aws_grafana_workspace.this.endpoint}" 17 | } 18 | 19 | resource "grafana_folder" "this" { 20 | title = "Amazon Managed Prometheus monitoring dashboards" 21 | } 22 | 23 | module "managed_prometheus_monitoring" { 24 | source = "../../modules/managed-prometheus-monitoring" 25 | dashboards_folder_id = resource.grafana_folder.this.id 26 | aws_region = local.region 27 | managed_prometheus_workspace_ids = var.managed_prometheus_workspace_ids 28 | } 29 | -------------------------------------------------------------------------------- /examples/managed-prometheus-monitoring/outputs.tf: -------------------------------------------------------------------------------- 1 | output "grafana_dashboard_urls" { 2 | description = "URLs for dashboards created" 3 | value = module.managed_prometheus_monitoring.grafana_dashboard_urls 4 | } 5 | -------------------------------------------------------------------------------- /examples/managed-prometheus-monitoring/variables.tf: -------------------------------------------------------------------------------- 1 | variable "grafana_api_key" { 2 | description = "API key for authorizing the Grafana provider to make changes to Amazon Managed Grafana" 3 | type = string 4 | sensitive = true 5 | } 6 | 7 | variable "aws_region" { 8 | description = "AWS Region" 9 | type = string 10 | } 11 | 12 | variable "managed_prometheus_workspace_ids" { 13 | description = "Amazon Managed Service for Prometheus Workspace IDs to create Alarms for" 14 | type = string 15 | } 16 | 17 | variable "managed_grafana_workspace_id" { 18 | description = "Amazon Managed Grafana workspace ID" 19 | type = string 20 | } 21 | -------------------------------------------------------------------------------- /examples/managed-prometheus-monitoring/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.1.0" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 4.0.0" 8 | } 9 | grafana = { 10 | source = "grafana/grafana" 11 | version = ">= 1.25.0" 12 | } 13 | } 14 | 15 | # ## Used for end-to-end testing on project; update to suit your needs 16 | # backend "s3" { 17 | # bucket = "aws-observability-accelerator-terraform-states" 18 | # region = "us-west-2" 19 | # key = "e2e/managed-prometheus-monitoring/terraform.tfstate" 20 | # } 21 | } 22 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: AWS Observability Accelerator for Terraform 2 | docs_dir: "docs" 3 | copyright: "Copyright © Amazon 2022" 4 | site_author: "AWS" 5 | site_url: "https://aws-observability.github.io/terraform-aws-observability-accelerator/" 6 | repo_name: "aws-observability/terraform-aws-observability-accelerator" 7 | repo_url: "https://github.com/aws-observability/terraform-aws-observability-accelerator" 8 | 9 | theme: 10 | logo: images/aws-logo.png 11 | favicon: images/aws-favicon.png 12 | name: material 13 | font: 14 | text: ember 15 | custom_dir: docs/overrides 16 | icon: 17 | repo: fontawesome/brands/github 18 | features: 19 | - navigation.tabs.sticky 20 | 21 | palette: 22 | primary: indigo 23 | accent: grey 24 | 25 | nav: 26 | - Home: index.md 27 | - Concepts: concepts.md 28 | - Amazon EKS: 29 | - Infrastructure: eks/index.md 30 | - EKS API server: eks/eks-apiserver.md 31 | - EKS GPU montitoring: eks/gpu-monitoring.md 32 | - Multicluster: 33 | - Single AWS account: eks/multicluster.md 34 | - Cross AWS account: eks/multiaccount.md 35 | - Viewing logs: eks/logs.md 36 | - Tracing: eks/tracing.md 37 | - Patterns: 38 | - Java/JMX: eks/java.md 39 | - Nginx: eks/nginx.md 40 | - Istio: eks/istio.md 41 | - Troubleshooting: eks/troubleshooting.md 42 | - Teardown: eks/destroy.md 43 | - AWS Distro for OpenTelemetry (ADOT): 44 | - Monitoring ADOT collector health: adothealth/index.md 45 | - CloudWatch Container Insights & CloudWatch Application Signals: 46 | - Amazon EKS: container-insights/eks.md 47 | - Monitoring Managed Service for Prometheus Workspaces: workloads/managed-prometheus.md 48 | - Amazon ECS: 49 | - Cluster Monitoring: ecs/ecs-monitoring-on-ec2.md 50 | - Supporting Examples: 51 | - EKS Cluster with VPC: helpers/new-eks-cluster.md 52 | - Amazon Managed Grafana setup: helpers/managed-grafana.md 53 | - ECS Cluster with VPC: helpers/ecs-cluster-with-vpc.md 54 | - Support & Feedback: support.md 55 | - Contributors: contributors.md 56 | 57 | markdown_extensions: 58 | - toc: 59 | permalink: true 60 | - admonition 61 | - codehilite 62 | - footnotes 63 | - pymdownx.critic 64 | - pymdownx.tabbed: 65 | alternate_style: true 66 | - pymdownx.superfences: 67 | custom_fences: 68 | - name: mermaid 69 | class: mermaid 70 | format: !!python/name:pymdownx.superfences.fence_code_format 71 | 72 | plugins: 73 | - search 74 | -------------------------------------------------------------------------------- /modules/ecs-monitoring/configs/config.yaml: -------------------------------------------------------------------------------- 1 | extensions: 2 | sigv4auth: 3 | region: "${aws_region}" 4 | service: "aps" 5 | ecs_observer: # extension type is ecs_observer 6 | cluster_name: "${cluster_name}" # cluster name need to configured manually 7 | cluster_region: "${cluster_region}" # region can be configured directly or use AWS_REGION env var 8 | result_file: "/etc/ecs_sd_targets.yaml" # the directory for file must already exists 9 | refresh_interval: ${refresh_interval} 10 | job_label_name: prometheus_job 11 | # JMX 12 | docker_labels: 13 | - port_label: "ECS_PROMETHEUS_EXPORTER_PORT" 14 | 15 | receivers: 16 | otlp: 17 | protocols: 18 | grpc: 19 | endpoint: ${otlp_grpc_endpoint} 20 | http: 21 | endpoint: ${otlp_http_endpoint} 22 | prometheus: 23 | config: 24 | scrape_configs: 25 | - job_name: "ecssd" 26 | file_sd_configs: 27 | - files: 28 | - "/etc/ecs_sd_targets.yaml" 29 | relabel_configs: 30 | - source_labels: [__meta_ecs_cluster_name] 31 | action: replace 32 | target_label: ClusterName 33 | - source_labels: [__meta_ecs_service_name] 34 | action: replace 35 | target_label: ServiceName 36 | - source_labels: [__meta_ecs_task_definition_family] 37 | action: replace 38 | target_label: TaskDefinitionFamily 39 | - source_labels: [__meta_ecs_task_launch_type] 40 | action: replace 41 | target_label: LaunchType 42 | - source_labels: [__meta_ecs_container_name] 43 | action: replace 44 | target_label: container_name 45 | - action: labelmap 46 | regex: ^__meta_ecs_container_labels_(.+)$ 47 | replacement: "$$1" 48 | awsecscontainermetrics: 49 | collection_interval: ${ecs_metrics_collection_interval} 50 | 51 | processors: 52 | resource: 53 | attributes: 54 | - key: receiver 55 | value: "prometheus" 56 | action: insert 57 | filter: 58 | metrics: 59 | include: 60 | match_type: strict 61 | metric_names: 62 | - ecs.task.memory.utilized 63 | - ecs.task.memory.reserved 64 | - ecs.task.memory.usage 65 | - ecs.task.cpu.utilized 66 | - ecs.task.cpu.reserved 67 | - ecs.task.cpu.usage.vcpu 68 | - ecs.task.network.rate.rx 69 | - ecs.task.network.rate.tx 70 | - ecs.task.storage.read_bytes 71 | - ecs.task.storage.write_bytes 72 | metricstransform: 73 | transforms: 74 | - include: ".*" 75 | match_type: regexp 76 | action: update 77 | operations: 78 | - label: prometheus_job 79 | new_label: job 80 | action: update_label 81 | - include: ecs.task.memory.utilized 82 | action: update 83 | new_name: MemoryUtilized 84 | - include: ecs.task.memory.reserved 85 | action: update 86 | new_name: MemoryReserved 87 | - include: ecs.task.memory.usage 88 | action: update 89 | new_name: MemoryUsage 90 | - include: ecs.task.cpu.utilized 91 | action: update 92 | new_name: CpuUtilized 93 | - include: ecs.task.cpu.reserved 94 | action: update 95 | new_name: CpuReserved 96 | - include: ecs.task.cpu.usage.vcpu 97 | action: update 98 | new_name: CpuUsage 99 | - include: ecs.task.network.rate.rx 100 | action: update 101 | new_name: NetworkRxBytes 102 | - include: ecs.task.network.rate.tx 103 | action: update 104 | new_name: NetworkTxBytes 105 | - include: ecs.task.storage.read_bytes 106 | action: update 107 | new_name: StorageReadBytes 108 | - include: ecs.task.storage.write_bytes 109 | action: update 110 | new_name: StorageWriteBytes 111 | 112 | exporters: 113 | prometheusremotewrite: 114 | endpoint: "${amp_remote_write_ep}" 115 | auth: 116 | authenticator: sigv4auth 117 | logging: 118 | loglevel: debug 119 | 120 | service: 121 | extensions: [ecs_observer, sigv4auth] 122 | pipelines: 123 | metrics: 124 | receivers: [prometheus] 125 | processors: [resource, metricstransform] 126 | exporters: [prometheusremotewrite] 127 | metrics/ecs: 128 | receivers: [awsecscontainermetrics] 129 | processors: [filter] 130 | exporters: [logging, prometheusremotewrite] 131 | -------------------------------------------------------------------------------- /modules/ecs-monitoring/locals.tf: -------------------------------------------------------------------------------- 1 | data "aws_region" "current" {} 2 | 3 | locals { 4 | region = data.aws_region.current.name 5 | name = "amg-ex-${replace(basename(path.cwd), "_", "-")}" 6 | description = "AWS Managed Grafana service for ${local.name}" 7 | prometheus_ws_endpoint = module.managed_prometheus_default[0].workspace_prometheus_endpoint 8 | 9 | default_otel_values = { 10 | aws_region = data.aws_region.current.name 11 | cluster_name = var.aws_ecs_cluster_name 12 | cluster_region = data.aws_region.current.name 13 | refresh_interval = var.refresh_interval 14 | ecs_metrics_collection_interval = var.ecs_metrics_collection_interval 15 | amp_remote_write_ep = "${local.prometheus_ws_endpoint}api/v1/remote_write" 16 | otlp_grpc_endpoint = var.otlp_grpc_endpoint 17 | otlp_http_endpoint = var.otlp_http_endpoint 18 | } 19 | 20 | ssm_param_value = yamlencode( 21 | templatefile("${path.module}/configs/config.yaml", local.default_otel_values) 22 | ) 23 | 24 | container_def_default_values = { 25 | container_name = var.container_name 26 | otel_image_ver = var.otel_image_ver 27 | aws_region = data.aws_region.current.name 28 | } 29 | 30 | container_definitions = templatefile("${path.module}/task-definitions/otel_collector.json", local.container_def_default_values) 31 | 32 | } 33 | -------------------------------------------------------------------------------- /modules/ecs-monitoring/main.tf: -------------------------------------------------------------------------------- 1 | # SSM Parameter for storing and distrivuting the ADOT config 2 | resource "aws_ssm_parameter" "adot_config" { 3 | name = "/terraform-aws-observability/otel_collector_config" 4 | description = "SSM parameter for aws-observability-accelerator/otel-collector-config" 5 | type = "String" 6 | value = local.ssm_param_value 7 | tier = "Intelligent-Tiering" 8 | } 9 | 10 | ############################################ 11 | # Managed Grafana and Prometheus Module 12 | ############################################ 13 | 14 | module "managed_grafana_default" { 15 | count = var.create_managed_grafana_ws ? 1 : 0 16 | 17 | source = "terraform-aws-modules/managed-service-grafana/aws" 18 | version = "2.1.0" 19 | name = "${local.name}-default" 20 | associate_license = false 21 | } 22 | 23 | module "managed_prometheus_default" { 24 | count = var.create_managed_prometheus_ws ? 1 : 0 25 | 26 | source = "terraform-aws-modules/managed-service-prometheus/aws" 27 | version = "2.2.2" 28 | workspace_alias = "${local.name}-default" 29 | } 30 | 31 | ########################################### 32 | # Task Definition for ADOT ECS Prometheus 33 | ########################################### 34 | resource "aws_ecs_task_definition" "adot_ecs_prometheus" { 35 | family = "adot_prometheus_td" 36 | task_role_arn = var.task_role_arn 37 | execution_role_arn = var.execution_role_arn 38 | network_mode = "bridge" 39 | requires_compatibilities = ["EC2"] 40 | cpu = var.ecs_adot_cpu 41 | memory = var.ecs_adot_mem 42 | container_definitions = local.container_definitions 43 | } 44 | 45 | ############################################ 46 | # ECS Service 47 | ############################################ 48 | resource "aws_ecs_service" "adot_ecs_prometheus" { 49 | name = "adot_prometheus_svc" 50 | cluster = var.aws_ecs_cluster_name 51 | task_definition = aws_ecs_task_definition.adot_ecs_prometheus.arn 52 | desired_count = 1 53 | } 54 | -------------------------------------------------------------------------------- /modules/ecs-monitoring/outputs.tf: -------------------------------------------------------------------------------- 1 | output "grafana_workspace_id" { 2 | description = "The ID of the Grafana workspace" 3 | value = try(module.managed_grafana_default[0].workspace_id, "") 4 | } 5 | 6 | output "grafana_workspace_endpoint" { 7 | description = "The endpoint of the Grafana workspace" 8 | value = try(module.managed_grafana_default[0].workspace_endpoint, "") 9 | } 10 | 11 | output "prometheus_workspace_id" { 12 | description = "Identifier of the workspace" 13 | value = try(module.managed_prometheus_default[0].id, "") 14 | } 15 | 16 | output "prometheus_workspace_prometheus_endpoint" { 17 | description = "Prometheus endpoint available for this workspace" 18 | value = try(module.managed_prometheus_default[0].prometheus_endpoint, "") 19 | } 20 | -------------------------------------------------------------------------------- /modules/ecs-monitoring/task-definitions/otel_collector.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "${container_name}", 4 | "image": "amazon/aws-otel-collector:${otel_image_ver}", 5 | "secrets": [ 6 | { 7 | "name": "AOT_CONFIG_CONTENT", 8 | "valueFrom": "/terraform-aws-observability/otel_collector_config" 9 | } 10 | ], 11 | "logConfiguration": { 12 | "logDriver": "awslogs", 13 | "options": { 14 | "awslogs-create-group": "True", 15 | "awslogs-group": "/adot/collector", 16 | "awslogs-region": "${aws_region}", 17 | "awslogs-stream-prefix": "ecs-prometheus" 18 | } 19 | } 20 | } 21 | ] 22 | -------------------------------------------------------------------------------- /modules/ecs-monitoring/variables.tf: -------------------------------------------------------------------------------- 1 | variable "aws_ecs_cluster_name" { 2 | description = "Name of your ECS cluster" 3 | type = string 4 | } 5 | 6 | variable "task_role_arn" { 7 | description = "ARN of the IAM Task Role" 8 | type = string 9 | } 10 | 11 | variable "execution_role_arn" { 12 | description = "ARN of the IAM Execution Role" 13 | type = string 14 | } 15 | 16 | variable "ecs_adot_cpu" { 17 | description = "CPU to be allocated for the ADOT ECS TASK" 18 | type = string 19 | default = "256" 20 | } 21 | 22 | variable "ecs_adot_mem" { 23 | description = "Memory to be allocated for the ADOT ECS TASK" 24 | type = string 25 | default = "512" 26 | } 27 | 28 | variable "create_managed_grafana_ws" { 29 | description = "Creates a Workspace for Amazon Managed Grafana" 30 | type = bool 31 | default = true 32 | } 33 | 34 | variable "create_managed_prometheus_ws" { 35 | description = "Creates a Workspace for Amazon Managed Prometheus" 36 | type = bool 37 | default = true 38 | } 39 | 40 | variable "refresh_interval" { 41 | description = "Refresh interval for ecs_observer" 42 | type = string 43 | default = "60s" 44 | } 45 | 46 | variable "ecs_metrics_collection_interval" { 47 | description = "Collection interval for ecs metrics" 48 | type = string 49 | default = "15s" 50 | } 51 | 52 | variable "otlp_grpc_endpoint" { 53 | description = "otlpGrpcEndpoint" 54 | type = string 55 | default = "0.0.0.0:4317" 56 | } 57 | 58 | 59 | variable "otlp_http_endpoint" { 60 | description = "otlpHttpEndpoint" 61 | type = string 62 | default = "0.0.0.0:4318" 63 | } 64 | 65 | variable "container_name" { 66 | description = "Container Name for Adot" 67 | type = string 68 | default = "adot_new" 69 | } 70 | 71 | variable "otel_image_ver" { 72 | description = "Otel Docker Image version" 73 | type = string 74 | default = "v0.31.0" 75 | } 76 | -------------------------------------------------------------------------------- /modules/ecs-monitoring/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.0.0" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 5.0.0" 8 | } 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /modules/eks-container-insights/data.tf: -------------------------------------------------------------------------------- 1 | data "aws_partition" "current" {} 2 | data "aws_caller_identity" "current" {} 3 | data "aws_region" "current" {} 4 | 5 | data "aws_eks_cluster" "eks_cluster" { 6 | name = var.eks_cluster_id 7 | } 8 | -------------------------------------------------------------------------------- /modules/eks-container-insights/locals.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | kubernetes_version = var.kubernetes_version 3 | eks_oidc_issuer_url = replace(data.aws_eks_cluster.eks_cluster.identity[0].oidc[0].issuer, "https://", "") 4 | 5 | addon_context = { 6 | aws_caller_identity_account_id = data.aws_caller_identity.current.account_id 7 | aws_caller_identity_arn = data.aws_caller_identity.current.arn 8 | aws_partition_id = data.aws_partition.current.partition 9 | aws_region_name = data.aws_region.current.name 10 | eks_oidc_provider_arn = "arn:${data.aws_partition.current.partition}:iam::${data.aws_caller_identity.current.account_id}:oidc-provider/${local.eks_oidc_issuer_url}" 11 | eks_cluster_id = data.aws_eks_cluster.eks_cluster.id 12 | tags = var.tags 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /modules/eks-container-insights/main.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | name = "amazon-cloudwatch-observability" 3 | } 4 | 5 | module "cloudwatch_observability_irsa_role" { 6 | count = var.create_cloudwatch_observability_irsa_role ? 1 : 0 7 | 8 | source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" 9 | version = "v5.33.0" 10 | role_name = "cloudwatch-observability" 11 | attach_cloudwatch_observability_policy = true 12 | 13 | oidc_providers = { 14 | ex = { 15 | provider_arn = var.eks_oidc_provider_arn 16 | namespace_service_accounts = ["amazon-cloudwatch:cloudwatch-agent"] 17 | } 18 | } 19 | } 20 | 21 | data "aws_eks_addon_version" "eks_addon_version" { 22 | addon_name = local.name 23 | kubernetes_version = try(var.addon_config.kubernetes_version, var.kubernetes_version) 24 | most_recent = try(var.addon_config.most_recent, true) 25 | } 26 | 27 | resource "aws_eks_addon" "amazon_cloudwatch_observability" { 28 | count = var.enable_amazon_eks_cw_observability ? 1 : 0 29 | 30 | cluster_name = var.eks_cluster_id 31 | addon_name = local.name 32 | addon_version = try(var.addon_config.addon_version, data.aws_eks_addon_version.eks_addon_version.version) 33 | resolve_conflicts_on_create = try(var.addon_config.resolve_conflicts_on_create, "OVERWRITE") 34 | service_account_role_arn = try(module.cloudwatch_observability_irsa_role[0].iam_role_arn, null) 35 | preserve = try(var.addon_config.preserve, true) 36 | configuration_values = try(var.addon_config.configuration_values, null) 37 | 38 | tags = merge( 39 | # var.addon_context.tags, 40 | try(var.addon_config.tags, {}) 41 | ) 42 | } 43 | 44 | resource "aws_iam_service_linked_role" "application_signals_cw" { 45 | count = var.create_cloudwatch_application_signals_role ? 1 : 0 46 | aws_service_name = "application-signals.cloudwatch.amazonaws.com" 47 | } 48 | -------------------------------------------------------------------------------- /modules/eks-container-insights/outputs.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-observability/terraform-aws-observability-accelerator/c432af44ee1df1b4ccd654e401922b99cea2ada5/modules/eks-container-insights/outputs.tf -------------------------------------------------------------------------------- /modules/eks-container-insights/variables.tf: -------------------------------------------------------------------------------- 1 | variable "eks_cluster_id" { 2 | description = "Name of the EKS cluster" 3 | default = "eks-cw" 4 | type = string 5 | } 6 | 7 | variable "enable_amazon_eks_cw_observability" { 8 | description = "Enable Amazon EKS CloudWatch Observability add-on" 9 | type = bool 10 | default = true 11 | } 12 | 13 | variable "addon_config" { 14 | description = "Amazon EKS Managed CloudWatch Observability Add-on config" 15 | type = any 16 | default = {} 17 | } 18 | 19 | variable "kubernetes_version" { 20 | description = "Kubernetes version" 21 | type = string 22 | default = "1.28" 23 | } 24 | 25 | variable "most_recent" { 26 | description = "Determines if the most recent or default version of the addon should be returned." 27 | type = bool 28 | default = false 29 | } 30 | 31 | variable "eks_oidc_provider_arn" { 32 | description = "The OIDC Provider ARN of AWS EKS cluster" 33 | type = string 34 | default = "" 35 | } 36 | 37 | variable "create_cloudwatch_observability_irsa_role" { 38 | type = bool 39 | default = true 40 | description = "Create a Cloudwatch Observability IRSA" 41 | } 42 | 43 | variable "create_cloudwatch_application_signals_role" { 44 | type = bool 45 | default = true 46 | description = "Create a Cloudwatch Application Signals service-linked role" 47 | } 48 | 49 | variable "tags" { 50 | description = "Additional tags (e.g. `map('BusinessUnit`,`XYZ`)" 51 | type = map(string) 52 | default = {} 53 | } 54 | -------------------------------------------------------------------------------- /modules/eks-container-insights/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.1.0" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 5.0.0" 8 | } 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /modules/eks-monitoring/add-ons/adot-operator/locals.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | name = "adot" 3 | eks_addon_role_name = "eks:addon-manager" 4 | eks_addon_clusterrole_name = "eks:addon-manager-otel" 5 | addon_namespace = "opentelemetry-operator-system" 6 | } 7 | -------------------------------------------------------------------------------- /modules/eks-monitoring/add-ons/adot-operator/outputs.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-observability/terraform-aws-observability-accelerator/c432af44ee1df1b4ccd654e401922b99cea2ada5/modules/eks-monitoring/add-ons/adot-operator/outputs.tf -------------------------------------------------------------------------------- /modules/eks-monitoring/add-ons/adot-operator/variables.tf: -------------------------------------------------------------------------------- 1 | variable "helm_config" { 2 | description = "Helm provider config for cert-manager" 3 | type = any 4 | default = { version = "v1.8.2" } 5 | } 6 | 7 | variable "addon_context" { 8 | description = "Input configuration for the addon" 9 | type = object({ 10 | aws_caller_identity_account_id = string 11 | aws_caller_identity_arn = string 12 | aws_eks_cluster_endpoint = string 13 | aws_partition_id = string 14 | aws_region_name = string 15 | eks_cluster_id = string 16 | eks_oidc_issuer_url = string 17 | eks_oidc_provider_arn = string 18 | irsa_iam_role_path = string 19 | irsa_iam_permissions_boundary = string 20 | tags = map(string) 21 | }) 22 | } 23 | 24 | variable "enable_cert_manager" { 25 | description = "Enable cert-manager, a requirement for ADOT Operator" 26 | type = bool 27 | default = true 28 | } 29 | 30 | variable "kubernetes_version" { 31 | description = "EKS Cluster version" 32 | type = string 33 | } 34 | 35 | variable "addon_config" { 36 | description = "Amazon EKS Managed ADOT Add-on config" 37 | type = any 38 | default = {} 39 | } 40 | -------------------------------------------------------------------------------- /modules/eks-monitoring/add-ons/adot-operator/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.1.0" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 3.72" 8 | } 9 | kubernetes = { 10 | source = "hashicorp/kubernetes" 11 | version = ">= 2.10" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /modules/eks-monitoring/add-ons/aws-for-fluentbit/README.md: -------------------------------------------------------------------------------- 1 | # AWS for Fluent Bit 2 | 3 | Fluent Bit is an open source Log Processor and Forwarder which allows you to collect any data like metrics and logs from different sources, enrich them with filters and send them to multiple destinations. 4 | AWS provides a Fluent Bit image with plugins for CloudWatch Logs, Kinesis Data Firehose, Kinesis Data Stream and Amazon OpenSearch Service. 5 | 6 | This add-on is configured to stream the worker node logs to CloudWatch Logs by default. It can be configured to stream the logs to additional destinations like Kinesis Data Firehose, Kinesis Data Streams and Amazon OpenSearch Service by passing the custom `values.yaml`. 7 | See this [Helm Chart](https://github.com/aws/eks-charts/tree/master/stable/aws-for-fluent-bit) for more details. 8 | 9 | <!-- BEGINNING OF PRE-COMMIT-TERRAFORM DOCS HOOK --> 10 | ## Requirements 11 | 12 | | Name | Version | 13 | |------|---------| 14 | | <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.0.0 | 15 | | <a name="requirement_aws"></a> [aws](#requirement\_aws) | >= 3.72 | 16 | 17 | ## Providers 18 | 19 | | Name | Version | 20 | |------|---------| 21 | | <a name="provider_aws"></a> [aws](#provider\_aws) | >= 3.72 | 22 | 23 | ## Modules 24 | 25 | | Name | Source | Version | 26 | |------|--------|---------| 27 | | <a name="module_helm_addon"></a> [helm\_addon](#module\_helm\_addon) | github.com/aws-ia/terraform-aws-eks-blueprints//modules/kubernetes-addons/helm-addon | v4.32.1 | 28 | 29 | ## Resources 30 | 31 | | Name | Type | 32 | |------|------| 33 | | [aws_iam_policy.aws_for_fluent_bit](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | 34 | | [aws_iam_policy_document.irsa](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | 35 | 36 | ## Inputs 37 | 38 | | Name | Description | Type | Default | Required | 39 | |------|-------------|------|---------|:--------:| 40 | | <a name="input_addon_context"></a> [addon\_context](#input\_addon\_context) | Input configuration for the addon | <pre>object({<br> aws_caller_identity_account_id = string<br> aws_caller_identity_arn = string<br> aws_eks_cluster_endpoint = string<br> aws_partition_id = string<br> aws_region_name = string<br> eks_cluster_id = string<br> eks_oidc_issuer_url = string<br> eks_oidc_provider_arn = string<br> tags = map(string)<br> irsa_iam_role_path = string<br> irsa_iam_permissions_boundary = string<br> })</pre> | n/a | yes | 41 | | <a name="input_cw_log_retention_days"></a> [cw\_log\_retention\_days](#input\_cw\_log\_retention\_days) | FluentBit CloudWatch Log group retention period | `number` | `90` | no | 42 | | <a name="input_helm_config"></a> [helm\_config](#input\_helm\_config) | Helm provider config aws\_for\_fluent\_bit. | `any` | `{}` | no | 43 | | <a name="input_irsa_policies"></a> [irsa\_policies](#input\_irsa\_policies) | Additional IAM policies for a IAM role for service accounts | `list(string)` | `[]` | no | 44 | | <a name="input_manage_via_gitops"></a> [manage\_via\_gitops](#input\_manage\_via\_gitops) | Determines if the add-on should be managed via GitOps. | `bool` | `false` | no | 45 | | <a name="input_refresh_interval"></a> [refresh\_interval](#input\_refresh\_interval) | FluentBit input refresh interval | `number` | `60` | no | 46 | 47 | ## Outputs 48 | 49 | | Name | Description | 50 | |------|-------------| 51 | | <a name="output_irsa_arn"></a> [irsa\_arn](#output\_irsa\_arn) | IAM role ARN for the service account | 52 | | <a name="output_irsa_name"></a> [irsa\_name](#output\_irsa\_name) | IAM role name for the service account | 53 | | <a name="output_release_metadata"></a> [release\_metadata](#output\_release\_metadata) | Map of attributes of the Helm release metadata | 54 | | <a name="output_service_account"></a> [service\_account](#output\_service\_account) | Name of Kubernetes service account | 55 | <!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK --> 56 | -------------------------------------------------------------------------------- /modules/eks-monitoring/add-ons/aws-for-fluentbit/data.tf: -------------------------------------------------------------------------------- 1 | data "aws_iam_policy_document" "irsa" { 2 | statement { 3 | sid = "PutLogEvents" 4 | effect = "Allow" 5 | resources = ["arn:${var.addon_context.aws_partition_id}:logs:${var.addon_context.aws_region_name}:${var.addon_context.aws_caller_identity_account_id}:log-group:*:log-stream:*"] 6 | actions = ["logs:PutLogEvents"] 7 | } 8 | 9 | statement { 10 | sid = "CreateCWLogs" 11 | effect = "Allow" 12 | resources = ["arn:${var.addon_context.aws_partition_id}:logs:${var.addon_context.aws_region_name}:${var.addon_context.aws_caller_identity_account_id}:log-group:*"] 13 | 14 | actions = [ 15 | "logs:CreateLogGroup", 16 | "logs:CreateLogStream", 17 | "logs:DescribeLogGroups", 18 | "logs:DescribeLogStreams", 19 | "logs:PutRetentionPolicy", 20 | ] 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /modules/eks-monitoring/add-ons/aws-for-fluentbit/locals.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | name = "aws-for-fluent-bit" 3 | service_account = try(var.helm_config.service_account, "${local.name}-sa") 4 | 5 | set_values = [ 6 | { 7 | name = "serviceAccount.name" 8 | value = local.service_account 9 | }, 10 | { 11 | name = "serviceAccount.create" 12 | value = false 13 | } 14 | ] 15 | 16 | # https://github.com/aws/eks-charts/blob/master/stable/aws-for-fluent-bit/Chart.yaml 17 | default_helm_config = { 18 | name = local.name 19 | chart = local.name 20 | repository = "https://aws.github.io/eks-charts" 21 | version = "0.1.27" 22 | namespace = local.name 23 | values = local.default_helm_values 24 | description = "aws-for-fluentbit Helm Chart deployment configuration" 25 | } 26 | 27 | helm_config = merge( 28 | local.default_helm_config, 29 | var.helm_config 30 | ) 31 | 32 | default_helm_values = [templatefile("${path.module}/values.yaml", { 33 | aws_region = var.addon_context.aws_region_name 34 | cluster_name = var.addon_context.eks_cluster_id 35 | log_retention_days = var.cw_log_retention_days 36 | refresh_interval = var.refresh_interval 37 | service_account = local.service_account 38 | })] 39 | 40 | irsa_config = { 41 | kubernetes_namespace = local.helm_config["namespace"] 42 | kubernetes_service_account = local.service_account 43 | create_kubernetes_namespace = try(local.helm_config["create_namespace"], true) 44 | create_kubernetes_service_account = true 45 | create_service_account_secret_token = try(local.helm_config["create_service_account_secret_token"], false) 46 | irsa_iam_policies = concat([aws_iam_policy.aws_for_fluent_bit.arn], var.irsa_policies) 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /modules/eks-monitoring/add-ons/aws-for-fluentbit/main.tf: -------------------------------------------------------------------------------- 1 | module "helm_addon" { 2 | source = "github.com/aws-ia/terraform-aws-eks-blueprints//modules/kubernetes-addons/helm-addon?ref=v4.32.1" 3 | manage_via_gitops = var.manage_via_gitops 4 | set_values = local.set_values 5 | helm_config = local.helm_config 6 | irsa_config = local.irsa_config 7 | addon_context = var.addon_context 8 | } 9 | 10 | resource "aws_iam_policy" "aws_for_fluent_bit" { 11 | name = "${var.addon_context.eks_cluster_id}-fluentbit" 12 | description = "IAM Policy for AWS for FluentBit" 13 | policy = data.aws_iam_policy_document.irsa.json 14 | tags = var.addon_context.tags 15 | } 16 | -------------------------------------------------------------------------------- /modules/eks-monitoring/add-ons/aws-for-fluentbit/outputs.tf: -------------------------------------------------------------------------------- 1 | output "release_metadata" { 2 | description = "Map of attributes of the Helm release metadata" 3 | value = module.helm_addon.release_metadata 4 | } 5 | 6 | output "irsa_arn" { 7 | description = "IAM role ARN for the service account" 8 | value = module.helm_addon.irsa_arn 9 | } 10 | 11 | output "irsa_name" { 12 | description = "IAM role name for the service account" 13 | value = module.helm_addon.irsa_name 14 | } 15 | 16 | output "service_account" { 17 | description = "Name of Kubernetes service account" 18 | value = module.helm_addon.service_account 19 | } 20 | -------------------------------------------------------------------------------- /modules/eks-monitoring/add-ons/aws-for-fluentbit/values.yaml: -------------------------------------------------------------------------------- 1 | serviceAccount: 2 | create: false 3 | name: ${service_account} 4 | 5 | cloudWatch: 6 | enabled: false 7 | 8 | cloudWatchLogs: 9 | enabled: true 10 | region: ${aws_region} 11 | # logGroupName is a fallback to failed parsing 12 | logGroupName: /aws/eks/observability-accelerator/workloads 13 | logGroupTemplate: /aws/eks/observability-accelerator/${cluster_name}/$kubernetes['namespace_name'] 14 | logStreamTemplate: $kubernetes['container_name'].$kubernetes['pod_name'] 15 | logKey: log 16 | logRetentionDays: ${log_retention_days} 17 | 18 | input: 19 | enabled: false 20 | 21 | additionalInputs: | 22 | [INPUT] 23 | Name tail 24 | Tag kube.* 25 | Path /var/log/containers/*.log 26 | DB /var/log/flb_kube.db 27 | Mem_Buf_Limit 5MB 28 | Skip_Long_Lines On 29 | Refresh_Interval ${refresh_interval} 30 | multiline.parser cri, docker, go, java, python 31 | -------------------------------------------------------------------------------- /modules/eks-monitoring/add-ons/aws-for-fluentbit/variables.tf: -------------------------------------------------------------------------------- 1 | variable "helm_config" { 2 | description = "Helm provider config aws_for_fluent_bit." 3 | type = any 4 | default = {} 5 | } 6 | 7 | variable "cw_log_retention_days" { 8 | description = "FluentBit CloudWatch Log group retention period" 9 | type = number 10 | default = 90 11 | } 12 | 13 | variable "refresh_interval" { 14 | description = "FluentBit input refresh interval" 15 | type = number 16 | default = 60 17 | } 18 | 19 | 20 | variable "manage_via_gitops" { 21 | type = bool 22 | description = "Determines if the add-on should be managed via GitOps." 23 | default = false 24 | } 25 | 26 | variable "irsa_policies" { 27 | description = "Additional IAM policies for a IAM role for service accounts" 28 | type = list(string) 29 | default = [] 30 | } 31 | 32 | variable "addon_context" { 33 | description = "Input configuration for the addon" 34 | type = object({ 35 | aws_caller_identity_account_id = string 36 | aws_caller_identity_arn = string 37 | aws_eks_cluster_endpoint = string 38 | aws_partition_id = string 39 | aws_region_name = string 40 | eks_cluster_id = string 41 | eks_oidc_issuer_url = string 42 | eks_oidc_provider_arn = string 43 | tags = map(string) 44 | irsa_iam_role_path = string 45 | irsa_iam_permissions_boundary = string 46 | }) 47 | } 48 | -------------------------------------------------------------------------------- /modules/eks-monitoring/add-ons/aws-for-fluentbit/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.0.0" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 3.72" 8 | } 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /modules/eks-monitoring/add-ons/external-secrets/README.md: -------------------------------------------------------------------------------- 1 | # External Secrets Operator Kubernetes addon 2 | 3 | This deploys an EKS Cluster with the External Secrets Operator. The cluster is populated with a ClusterSecretStore and ExternalSecret using Grafana API Key secret from AWS SSM Parameter Store. A secret store for each AWS SSM Parameter Store is created. Store use IRSA (IAM Roles For Service Account) to retrieve the secret values from AWS. 4 | 5 | <!-- BEGINNING OF PRE-COMMIT-TERRAFORM DOCS HOOK --> 6 | ## Requirements 7 | 8 | | Name | Version | 9 | |------|---------| 10 | | <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.0.0 | 11 | | <a name="requirement_aws"></a> [aws](#requirement\_aws) | >= 3.72 | 12 | | <a name="requirement_kubectl"></a> [kubectl](#requirement\_kubectl) | >= 2.0.3 | 13 | | <a name="requirement_kubernetes"></a> [kubernetes](#requirement\_kubernetes) | >= 2.10 | 14 | | <a name="requirement_random"></a> [random](#requirement\_random) | >= 3.6.1 | 15 | 16 | ## Providers 17 | 18 | | Name | Version | 19 | |------|---------| 20 | | <a name="provider_aws"></a> [aws](#provider\_aws) | >= 3.72 | 21 | | <a name="provider_kubectl"></a> [kubectl](#provider\_kubectl) | >= 2.0.3 | 22 | | <a name="provider_random"></a> [random](#provider\_random) | >= 3.6.1 | 23 | 24 | ## Modules 25 | 26 | | Name | Source | Version | 27 | |------|--------|---------| 28 | | <a name="module_cluster_secretstore_role"></a> [cluster\_secretstore\_role](#module\_cluster\_secretstore\_role) | github.com/aws-ia/terraform-aws-eks-blueprints//modules/irsa | v4.32.1 | 29 | | <a name="module_external_secrets"></a> [external\_secrets](#module\_external\_secrets) | github.com/aws-ia/terraform-aws-eks-blueprints//modules/kubernetes-addons/external-secrets | v4.32.1 | 30 | 31 | ## Resources 32 | 33 | | Name | Type | 34 | |------|------| 35 | | [aws_iam_policy.cluster_secretstore](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | 36 | | [aws_kms_key.secrets](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/kms_key) | resource | 37 | | [aws_ssm_parameter.secret](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/ssm_parameter) | resource | 38 | | [kubectl_manifest.cluster_secretstore](https://registry.terraform.io/providers/alekc/kubectl/latest/docs/resources/manifest) | resource | 39 | | [kubectl_manifest.secret](https://registry.terraform.io/providers/alekc/kubectl/latest/docs/resources/manifest) | resource | 40 | | [random_uuid.grafana_key_suffix](https://registry.terraform.io/providers/hashicorp/random/latest/docs/resources/uuid) | resource | 41 | | [aws_region.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/region) | data source | 42 | 43 | ## Inputs 44 | 45 | | Name | Description | Type | Default | Required | 46 | |------|-------------|------|---------|:--------:| 47 | | <a name="input_addon_context"></a> [addon\_context](#input\_addon\_context) | Input configuration for the addon | <pre>object({<br> aws_caller_identity_account_id = string<br> aws_caller_identity_arn = string<br> aws_eks_cluster_endpoint = string<br> aws_partition_id = string<br> aws_region_name = string<br> eks_cluster_id = string<br> eks_oidc_issuer_url = string<br> eks_oidc_provider_arn = string<br> irsa_iam_role_path = string<br> irsa_iam_permissions_boundary = string<br> tags = map(string)<br> })</pre> | n/a | yes | 48 | | <a name="input_enable_external_secrets"></a> [enable\_external\_secrets](#input\_enable\_external\_secrets) | Enable external-secrets | `bool` | `true` | no | 49 | | <a name="input_grafana_api_key"></a> [grafana\_api\_key](#input\_grafana\_api\_key) | Grafana API key for the Amazon Managed Grafana workspace | `string` | n/a | yes | 50 | | <a name="input_helm_config"></a> [helm\_config](#input\_helm\_config) | Helm provider config for external secrets | `any` | `{}` | no | 51 | | <a name="input_target_secret_name"></a> [target\_secret\_name](#input\_target\_secret\_name) | Name to store the secret for Grafana API Key | `string` | n/a | yes | 52 | | <a name="input_target_secret_namespace"></a> [target\_secret\_namespace](#input\_target\_secret\_namespace) | Namespace to store the secret for Grafana API Key | `string` | n/a | yes | 53 | 54 | ## Outputs 55 | 56 | No outputs. 57 | <!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK --> 58 | -------------------------------------------------------------------------------- /modules/eks-monitoring/add-ons/external-secrets/locals.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | name = "external-secrets" 3 | namespace = "external-secrets" 4 | cluster_secretstore_name = "cluster-secretstore-sm" 5 | cluster_secretstore_sa = "cluster-secretstore-sa" 6 | } 7 | -------------------------------------------------------------------------------- /modules/eks-monitoring/add-ons/external-secrets/main.tf: -------------------------------------------------------------------------------- 1 | module "external_secrets" { 2 | source = "github.com/aws-ia/terraform-aws-eks-blueprints//modules/kubernetes-addons/external-secrets?ref=v4.32.1" 3 | count = var.enable_external_secrets ? 1 : 0 4 | 5 | helm_config = var.helm_config 6 | addon_context = var.addon_context 7 | } 8 | 9 | data "aws_region" "current" {} 10 | 11 | #--------------------------------------------------------------- 12 | # External Secrets Operator - Secret 13 | #--------------------------------------------------------------- 14 | 15 | resource "aws_kms_key" "secrets" { 16 | enable_key_rotation = true 17 | } 18 | 19 | module "cluster_secretstore_role" { 20 | source = "github.com/aws-ia/terraform-aws-eks-blueprints//modules/irsa?ref=v4.32.1" 21 | kubernetes_namespace = local.namespace 22 | create_kubernetes_namespace = false 23 | kubernetes_service_account = local.cluster_secretstore_sa 24 | irsa_iam_policies = [aws_iam_policy.cluster_secretstore.arn] 25 | eks_cluster_id = var.addon_context.eks_cluster_id 26 | eks_oidc_provider_arn = var.addon_context.eks_oidc_provider_arn 27 | depends_on = [module.external_secrets] 28 | } 29 | 30 | resource "aws_iam_policy" "cluster_secretstore" { 31 | name_prefix = local.cluster_secretstore_sa 32 | policy = <<POLICY 33 | { 34 | "Version": "2012-10-17", 35 | "Statement": [ 36 | { 37 | "Effect": "Allow", 38 | "Action": [ 39 | "ssm:DescribeParameters", 40 | "ssm:GetParameter", 41 | "ssm:GetParameters", 42 | "ssm:GetParametersByPath", 43 | "ssm:GetParameterHistory" 44 | ], 45 | "Resource": "${aws_ssm_parameter.secret.arn}" 46 | }, 47 | { 48 | "Effect": "Allow", 49 | "Action": [ 50 | "kms:Decrypt" 51 | ], 52 | "Resource": "${aws_kms_key.secrets.arn}" 53 | } 54 | ] 55 | } 56 | POLICY 57 | } 58 | 59 | resource "kubectl_manifest" "cluster_secretstore" { 60 | yaml_body = <<YAML 61 | apiVersion: external-secrets.io/v1beta1 62 | kind: ClusterSecretStore 63 | metadata: 64 | name: ${local.cluster_secretstore_name} 65 | spec: 66 | provider: 67 | aws: 68 | service: ParameterStore 69 | region: ${data.aws_region.current.name} 70 | auth: 71 | jwt: 72 | serviceAccountRef: 73 | name: ${local.cluster_secretstore_sa} 74 | namespace: ${local.namespace} 75 | YAML 76 | depends_on = [module.external_secrets] 77 | } 78 | 79 | resource "random_uuid" "grafana_key_suffix" { 80 | } 81 | 82 | resource "aws_ssm_parameter" "secret" { 83 | name = "/terraform-accelerator/grafana-api-key/${random_uuid.grafana_key_suffix.result}" 84 | description = "SSM Secret to store grafana API Key" 85 | type = "SecureString" 86 | value = jsonencode({ 87 | GF_SECURITY_ADMIN_APIKEY = var.grafana_api_key 88 | }) 89 | key_id = aws_kms_key.secrets.id 90 | } 91 | 92 | resource "kubectl_manifest" "secret" { 93 | yaml_body = <<YAML 94 | apiVersion: external-secrets.io/v1beta1 95 | kind: ExternalSecret 96 | metadata: 97 | name: ${local.name}-sm 98 | namespace: ${var.target_secret_namespace} 99 | spec: 100 | refreshInterval: 1h 101 | secretStoreRef: 102 | name: ${local.cluster_secretstore_name} 103 | kind: ClusterSecretStore 104 | target: 105 | name: ${var.target_secret_name} 106 | dataFrom: 107 | - extract: 108 | key: ${aws_ssm_parameter.secret.name} 109 | YAML 110 | depends_on = [module.external_secrets] 111 | } 112 | -------------------------------------------------------------------------------- /modules/eks-monitoring/add-ons/external-secrets/outputs.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-observability/terraform-aws-observability-accelerator/c432af44ee1df1b4ccd654e401922b99cea2ada5/modules/eks-monitoring/add-ons/external-secrets/outputs.tf -------------------------------------------------------------------------------- /modules/eks-monitoring/add-ons/external-secrets/variables.tf: -------------------------------------------------------------------------------- 1 | variable "helm_config" { 2 | description = "Helm provider config for external secrets" 3 | type = any 4 | default = {} 5 | } 6 | 7 | variable "addon_context" { 8 | description = "Input configuration for the addon" 9 | type = object({ 10 | aws_caller_identity_account_id = string 11 | aws_caller_identity_arn = string 12 | aws_eks_cluster_endpoint = string 13 | aws_partition_id = string 14 | aws_region_name = string 15 | eks_cluster_id = string 16 | eks_oidc_issuer_url = string 17 | eks_oidc_provider_arn = string 18 | irsa_iam_role_path = string 19 | irsa_iam_permissions_boundary = string 20 | tags = map(string) 21 | }) 22 | } 23 | 24 | variable "enable_external_secrets" { 25 | description = "Enable external-secrets" 26 | type = bool 27 | default = true 28 | } 29 | 30 | variable "grafana_api_key" { 31 | description = "Grafana API key for the Amazon Managed Grafana workspace" 32 | type = string 33 | } 34 | 35 | variable "target_secret_namespace" { 36 | description = "Namespace to store the secret for Grafana API Key" 37 | type = string 38 | } 39 | 40 | variable "target_secret_name" { 41 | description = "Name to store the secret for Grafana API Key" 42 | type = string 43 | } 44 | -------------------------------------------------------------------------------- /modules/eks-monitoring/add-ons/external-secrets/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.0.0" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 3.72" 8 | } 9 | kubernetes = { 10 | source = "hashicorp/kubernetes" 11 | version = ">= 2.10" 12 | } 13 | kubectl = { 14 | source = "alekc/kubectl" 15 | version = ">= 2.0.3" 16 | } 17 | random = { 18 | source = "hashicorp/random" 19 | version = ">= 3.6.1" 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /modules/eks-monitoring/otel-config/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: opentelemetry 3 | description: A Helm chart to install otel operator 4 | type: application 5 | version: 0.8.0 6 | appVersion: 0.8.0 7 | -------------------------------------------------------------------------------- /modules/eks-monitoring/otel-config/templates/clusterrole.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: otel-prometheus-role 5 | rules: 6 | - apiGroups: 7 | - "" 8 | resources: 9 | - nodes 10 | - nodes/proxy 11 | - services 12 | - endpoints 13 | - pods 14 | verbs: 15 | - get 16 | - list 17 | - watch 18 | - apiGroups: 19 | - extensions 20 | resources: 21 | - ingresses 22 | verbs: 23 | - get 24 | - list 25 | - watch 26 | - apiGroups: 27 | - metrics.eks.amazonaws.com 28 | resources: 29 | - kcm/metrics 30 | - ksh/metrics 31 | verbs: 32 | - get 33 | - nonResourceURLs: 34 | - /metrics 35 | verbs: 36 | - get 37 | -------------------------------------------------------------------------------- /modules/eks-monitoring/otel-config/templates/clusterrolebinding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: otel-prometheus-role-binding 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: otel-prometheus-role 9 | subjects: 10 | - kind: ServiceAccount 11 | name: {{ default "adot-collector-kubeprometheus" .Values.serviceAccount }} 12 | namespace: {{ default "adot-collector-kubeprometheus" .Values.namespace }} 13 | -------------------------------------------------------------------------------- /modules/eks-monitoring/otel-config/values.yaml: -------------------------------------------------------------------------------- 1 | ampurl: ${amp_url} 2 | region: ${region} 3 | ekscluster: ${eks_cluster} 4 | accountId: ${account_id} 5 | 6 | assumeRoleArn: ${managed_prometheus_cross_account_role} 7 | 8 | globalScrapeTimeout: ${global_scrape_timeout} 9 | globalScrapeSampleLimit: ${global_scrape_sample_limit} 10 | 11 | enableAPIserver: ${enable_apiserver_monitoring} 12 | 13 | enableTracing: ${enable_tracing} 14 | otlpGrpcEndpoint: ${otlp_grpc_endpoint} 15 | otlpHttpEndpoint: ${otlp_http_endpoint} 16 | tracingTimeout: ${tracing_timeout} 17 | tracingSendBatchSize: ${tracing_send_batch_size} 18 | 19 | enableCustomMetrics: ${enable_custom_metrics} 20 | customMetrics: ${custom_metrics} 21 | 22 | enableJava: ${enable_java} 23 | javaScrapeSampleLimit: ${java_scrape_sample_limit} 24 | javaPrometheusMetricsEndpoint: ${java_prometheus_metrics_endpoint} 25 | 26 | enableNginx: ${enable_nginx} 27 | nginxScrapeSampleLimit: ${nginx_scrape_sample_limit} 28 | nginxPrometheusMetricsEndpoint: ${nginx_prometheus_metrics_endpoint} 29 | 30 | enableIstio: ${enable_istio} 31 | istioScrapeSampleLimit: ${istio_scrape_sample_limit} 32 | istioPrometheusMetricsEndpoint: ${istio_prometheus_metrics_endpoint} 33 | 34 | adotLoglevel: ${adot_loglevel} 35 | adotServiceTelemetryLoglevel: ${adot_service_telemetry_loglevel} 36 | 37 | enableAdotcollectorMetrics: ${enable_adotcollector_metrics} 38 | 39 | serviceAccount: ${service_account} 40 | namespace: ${namespace} 41 | -------------------------------------------------------------------------------- /modules/eks-monitoring/outputs.tf: -------------------------------------------------------------------------------- 1 | output "eks_cluster_version" { 2 | description = "EKS Cluster version" 3 | value = data.aws_eks_cluster.eks_cluster.version 4 | } 5 | 6 | output "eks_cluster_id" { 7 | description = "EKS Cluster Id" 8 | value = var.eks_cluster_id 9 | } 10 | 11 | output "adot_irsa_arn" { 12 | description = "IRSA Arn for ADOT" 13 | value = module.helm_addon.irsa_arn 14 | } 15 | 16 | output "managed_prometheus_workspace_endpoint" { 17 | description = "Amazon Managed Prometheus workspace endpoint" 18 | value = local.managed_prometheus_workspace_endpoint 19 | } 20 | 21 | output "managed_prometheus_workspace_id" { 22 | description = "Amazon Managed Prometheus workspace ID" 23 | value = local.managed_prometheus_workspace_id 24 | } 25 | 26 | output "managed_prometheus_workspace_region" { 27 | description = "Amazon Managed Prometheus workspace region" 28 | value = local.managed_prometheus_workspace_region 29 | } 30 | -------------------------------------------------------------------------------- /modules/eks-monitoring/patterns/istio/README.md: -------------------------------------------------------------------------------- 1 | # Istio patterns module 2 | 3 | Provides monitoring for Istio based workloads with the following resources: 4 | 5 | - AWS Managed Grafana Dashboard and data source 6 | - Alerts and recording rules with AWS Managed Service for Prometheus 7 | 8 | <!-- BEGINNING OF PRE-COMMIT-TERRAFORM DOCS HOOK --> 9 | ## Requirements 10 | 11 | | Name | Version | 12 | |------|---------| 13 | | <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.1.0 | 14 | | <a name="requirement_aws"></a> [aws](#requirement\_aws) | >= 4.0.0 | 15 | | <a name="requirement_helm"></a> [helm](#requirement\_helm) | >= 2.4.1 | 16 | | <a name="requirement_kubectl"></a> [kubectl](#requirement\_kubectl) | >= 2.0.3 | 17 | | <a name="requirement_kubernetes"></a> [kubernetes](#requirement\_kubernetes) | >= 2.10 | 18 | 19 | ## Providers 20 | 21 | | Name | Version | 22 | |------|---------| 23 | | <a name="provider_aws"></a> [aws](#provider\_aws) | >= 4.0.0 | 24 | | <a name="provider_kubectl"></a> [kubectl](#provider\_kubectl) | >= 2.0.3 | 25 | 26 | ## Modules 27 | 28 | No modules. 29 | 30 | ## Resources 31 | 32 | | Name | Type | 33 | |------|------| 34 | | [aws_prometheus_rule_group_namespace.alerting_rules](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/prometheus_rule_group_namespace) | resource | 35 | | [aws_prometheus_rule_group_namespace.recording_rules](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/prometheus_rule_group_namespace) | resource | 36 | | [kubectl_manifest.flux_kustomization](https://registry.terraform.io/providers/alekc/kubectl/latest/docs/resources/manifest) | resource | 37 | 38 | ## Inputs 39 | 40 | | Name | Description | Type | Default | Required | 41 | |------|-------------|------|---------|:--------:| 42 | | <a name="input_pattern_config"></a> [pattern\_config](#input\_pattern\_config) | Configuration object for ISTIO monitoring | <pre>object({<br> enable_alerting_rules = bool<br> enable_recording_rules = bool<br> enable_dashboards = bool<br> scrape_sample_limit = number<br><br> flux_gitrepository_name = string<br> flux_gitrepository_url = string<br> flux_gitrepository_branch = string<br> flux_kustomization_name = string<br> flux_kustomization_path = string<br><br> managed_prometheus_workspace_id = string<br> prometheus_metrics_endpoint = string<br><br> dashboards = object({<br> cp = string<br> mesh = string<br> performance = string<br> service = string<br> })<br> })</pre> | n/a | yes | 43 | 44 | ## Outputs 45 | 46 | No outputs. 47 | <!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK --> 48 | -------------------------------------------------------------------------------- /modules/eks-monitoring/patterns/istio/outputs.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-observability/terraform-aws-observability-accelerator/c432af44ee1df1b4ccd654e401922b99cea2ada5/modules/eks-monitoring/patterns/istio/outputs.tf -------------------------------------------------------------------------------- /modules/eks-monitoring/patterns/istio/variables.tf: -------------------------------------------------------------------------------- 1 | variable "pattern_config" { 2 | description = "Configuration object for ISTIO monitoring" 3 | type = object({ 4 | enable_alerting_rules = bool 5 | enable_recording_rules = bool 6 | enable_dashboards = bool 7 | scrape_sample_limit = number 8 | 9 | flux_gitrepository_name = string 10 | flux_gitrepository_url = string 11 | flux_gitrepository_branch = string 12 | flux_kustomization_name = string 13 | flux_kustomization_path = string 14 | 15 | managed_prometheus_workspace_id = string 16 | prometheus_metrics_endpoint = string 17 | 18 | dashboards = object({ 19 | cp = string 20 | mesh = string 21 | performance = string 22 | service = string 23 | }) 24 | }) 25 | nullable = false 26 | } 27 | -------------------------------------------------------------------------------- /modules/eks-monitoring/patterns/istio/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.1.0" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 4.0.0" 8 | } 9 | kubernetes = { 10 | source = "hashicorp/kubernetes" 11 | version = ">= 2.10" 12 | } 13 | kubectl = { 14 | source = "alekc/kubectl" 15 | version = ">= 2.0.3" 16 | } 17 | helm = { 18 | source = "hashicorp/helm" 19 | version = ">= 2.4.1" 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /modules/eks-monitoring/patterns/java/README.md: -------------------------------------------------------------------------------- 1 | # Java patterns module 2 | 3 | Provides monitoring for Java based workloads with the following resources: 4 | 5 | - AWS Managed Grafana Dashboard and data source 6 | - Alerts and recording rules with AWS Managed Service for Prometheus 7 | 8 | <!-- BEGINNING OF PRE-COMMIT-TERRAFORM DOCS HOOK --> 9 | ## Requirements 10 | 11 | | Name | Version | 12 | |------|---------| 13 | | <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.1.0 | 14 | | <a name="requirement_aws"></a> [aws](#requirement\_aws) | >= 4.0.0 | 15 | | <a name="requirement_helm"></a> [helm](#requirement\_helm) | >= 2.4.1 | 16 | | <a name="requirement_kubectl"></a> [kubectl](#requirement\_kubectl) | >= 2.0.3 | 17 | | <a name="requirement_kubernetes"></a> [kubernetes](#requirement\_kubernetes) | >= 2.10 | 18 | 19 | ## Providers 20 | 21 | | Name | Version | 22 | |------|---------| 23 | | <a name="provider_aws"></a> [aws](#provider\_aws) | >= 4.0.0 | 24 | | <a name="provider_kubectl"></a> [kubectl](#provider\_kubectl) | >= 2.0.3 | 25 | 26 | ## Modules 27 | 28 | No modules. 29 | 30 | ## Resources 31 | 32 | | Name | Type | 33 | |------|------| 34 | | [aws_prometheus_rule_group_namespace.alerting_rules](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/prometheus_rule_group_namespace) | resource | 35 | | [aws_prometheus_rule_group_namespace.recording_rules](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/prometheus_rule_group_namespace) | resource | 36 | | [kubectl_manifest.flux_kustomization](https://registry.terraform.io/providers/alekc/kubectl/latest/docs/resources/manifest) | resource | 37 | 38 | ## Inputs 39 | 40 | | Name | Description | Type | Default | Required | 41 | |------|-------------|------|---------|:--------:| 42 | | <a name="input_pattern_config"></a> [pattern\_config](#input\_pattern\_config) | Configuration object for Java/JMX monitoring | <pre>object({<br> enable_alerting_rules = bool<br> enable_recording_rules = bool<br> scrape_sample_limit = number<br><br> enable_dashboards = bool<br><br> flux_gitrepository_name = string<br> flux_gitrepository_url = string<br> flux_gitrepository_branch = string<br> flux_kustomization_name = string<br> flux_kustomization_path = string<br><br> managed_prometheus_workspace_id = string<br> prometheus_metrics_endpoint = string<br><br> grafana_dashboard_url = string<br> })</pre> | n/a | yes | 43 | 44 | ## Outputs 45 | 46 | No outputs. 47 | <!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK --> 48 | -------------------------------------------------------------------------------- /modules/eks-monitoring/patterns/java/main.tf: -------------------------------------------------------------------------------- 1 | resource "aws_prometheus_rule_group_namespace" "recording_rules" { 2 | count = var.pattern_config.enable_recording_rules ? 1 : 0 3 | 4 | name = "accelerator-java-rules" 5 | workspace_id = var.pattern_config.managed_prometheus_workspace_id 6 | data = <<EOF 7 | groups: 8 | - name: default-metric 9 | rules: 10 | - record: metric:recording_rule 11 | expr: avg(rate(container_cpu_usage_seconds_total[5m])) 12 | EOF 13 | } 14 | 15 | resource "aws_prometheus_rule_group_namespace" "alerting_rules" { 16 | count = var.pattern_config.enable_alerting_rules ? 1 : 0 17 | 18 | name = "accelerator-java-alerting" 19 | workspace_id = var.pattern_config.managed_prometheus_workspace_id 20 | data = <<EOF 21 | groups: 22 | - name: default-alert 23 | rules: 24 | - alert: metric:alerting_rule 25 | expr: jvm_memory_bytes_used{job="java", area="heap"} / jvm_memory_bytes_max * 100 > 80 26 | for: 1m 27 | labels: 28 | severity: warning 29 | annotations: 30 | summary: "JVM heap warning" 31 | description: "JVM heap of instance `{{$labels.instance}}` from application `{{$labels.application}}` is above 80% for one minute. (current=`{{$value}}%`)" 32 | EOF 33 | } 34 | 35 | resource "kubectl_manifest" "flux_kustomization" { 36 | count = var.pattern_config.enable_dashboards ? 1 : 0 37 | 38 | yaml_body = <<YAML 39 | apiVersion: kustomize.toolkit.fluxcd.io/v1 40 | kind: Kustomization 41 | metadata: 42 | name: ${var.pattern_config.flux_kustomization_name} 43 | namespace: flux-system 44 | spec: 45 | interval: 1m0s 46 | path: ${var.pattern_config.flux_kustomization_path} 47 | prune: true 48 | sourceRef: 49 | kind: GitRepository 50 | name: ${var.pattern_config.flux_gitrepository_name} 51 | postBuild: 52 | substitute: 53 | GRAFANA_JAVA_JMX_DASH_URL: ${var.pattern_config.grafana_dashboard_url} 54 | YAML 55 | } 56 | -------------------------------------------------------------------------------- /modules/eks-monitoring/patterns/java/outputs.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-observability/terraform-aws-observability-accelerator/c432af44ee1df1b4ccd654e401922b99cea2ada5/modules/eks-monitoring/patterns/java/outputs.tf -------------------------------------------------------------------------------- /modules/eks-monitoring/patterns/java/variables.tf: -------------------------------------------------------------------------------- 1 | variable "pattern_config" { 2 | description = "Configuration object for Java/JMX monitoring" 3 | type = object({ 4 | enable_alerting_rules = bool 5 | enable_recording_rules = bool 6 | scrape_sample_limit = number 7 | 8 | enable_dashboards = bool 9 | 10 | flux_gitrepository_name = string 11 | flux_gitrepository_url = string 12 | flux_gitrepository_branch = string 13 | flux_kustomization_name = string 14 | flux_kustomization_path = string 15 | 16 | managed_prometheus_workspace_id = string 17 | prometheus_metrics_endpoint = string 18 | 19 | grafana_dashboard_url = string 20 | }) 21 | nullable = false 22 | } 23 | -------------------------------------------------------------------------------- /modules/eks-monitoring/patterns/java/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.1.0" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 4.0.0" 8 | } 9 | kubernetes = { 10 | source = "hashicorp/kubernetes" 11 | version = ">= 2.10" 12 | } 13 | kubectl = { 14 | source = "alekc/kubectl" 15 | version = ">= 2.0.3" 16 | } 17 | helm = { 18 | source = "hashicorp/helm" 19 | version = ">= 2.4.1" 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /modules/eks-monitoring/patterns/nginx/README.md: -------------------------------------------------------------------------------- 1 | # Observability Pattern for Nginx 2 | 3 | This module provides an automated experience around Observability for Nginx workloads. 4 | It provides the following resources: 5 | 6 | - AWS Distro For OpenTelemetry Operator and Collector 7 | - AWS Managed Grafana Dashboard and data source 8 | - Alerts and recording rules with AWS Managed Service for Prometheus 9 | 10 | <!-- BEGINNING OF PRE-COMMIT-TERRAFORM DOCS HOOK --> 11 | ## Requirements 12 | 13 | | Name | Version | 14 | |------|---------| 15 | | <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.1.0 | 16 | | <a name="requirement_aws"></a> [aws](#requirement\_aws) | >= 4.0.0 | 17 | | <a name="requirement_kubectl"></a> [kubectl](#requirement\_kubectl) | >= 2.0.3 | 18 | | <a name="requirement_kubernetes"></a> [kubernetes](#requirement\_kubernetes) | >= 2.10 | 19 | 20 | ## Providers 21 | 22 | | Name | Version | 23 | |------|---------| 24 | | <a name="provider_aws"></a> [aws](#provider\_aws) | >= 4.0.0 | 25 | | <a name="provider_kubectl"></a> [kubectl](#provider\_kubectl) | >= 2.0.3 | 26 | 27 | ## Modules 28 | 29 | No modules. 30 | 31 | ## Resources 32 | 33 | | Name | Type | 34 | |------|------| 35 | | [aws_prometheus_rule_group_namespace.alerting_rules](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/prometheus_rule_group_namespace) | resource | 36 | | [kubectl_manifest.flux_kustomization](https://registry.terraform.io/providers/alekc/kubectl/latest/docs/resources/manifest) | resource | 37 | 38 | ## Inputs 39 | 40 | | Name | Description | Type | Default | Required | 41 | |------|-------------|------|---------|:--------:| 42 | | <a name="input_pattern_config"></a> [pattern\_config](#input\_pattern\_config) | Configuration object for Java/JMX monitoring | <pre>object({<br> enable_alerting_rules = bool<br> enable_recording_rules = bool<br> scrape_sample_limit = number<br><br> enable_dashboards = bool<br><br> flux_gitrepository_name = string<br> flux_gitrepository_url = string<br> flux_gitrepository_branch = string<br> flux_kustomization_name = string<br> flux_kustomization_path = string<br><br> managed_prometheus_workspace_id = string<br> prometheus_metrics_endpoint = string<br><br> grafana_dashboard_url = string<br> })</pre> | n/a | yes | 43 | 44 | ## Outputs 45 | 46 | No outputs. 47 | <!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK --> 48 | -------------------------------------------------------------------------------- /modules/eks-monitoring/patterns/nginx/main.tf: -------------------------------------------------------------------------------- 1 | resource "aws_prometheus_rule_group_namespace" "alerting_rules" { 2 | count = var.pattern_config.enable_alerting_rules ? 1 : 0 3 | 4 | name = "accelerator-nginx-alerting" 5 | workspace_id = var.pattern_config.managed_prometheus_workspace_id 6 | data = <<EOF 7 | groups: 8 | - name: Nginx-HTTP-4xx-error-rate 9 | rules: 10 | - alert: metric:alerting_rule 11 | expr: sum(rate(nginx_http_requests_total{status=~"^4.."}[1m])) / sum(rate(nginx_http_requests_total[1m])) * 100 > 5 12 | for: 1m 13 | labels: 14 | severity: critical 15 | annotations: 16 | summary: Nginx high HTTP 4xx error rate (instance {{ $labels.instance }}) 17 | description: "Too many HTTP requests with status 4xx (> 5%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 18 | - name: Nginx-HTTP-5xx-error-rate 19 | rules: 20 | - alert: metric:alerting_rule 21 | expr: sum(rate(nginx_http_requests_total{status=~"^5.."}[1m])) / sum(rate(nginx_http_requests_total[1m])) * 100 > 5 22 | for: 1m 23 | labels: 24 | severity: critical 25 | annotations: 26 | summary: Nginx high HTTP 5xx error rate (instance {{ $labels.instance }}) 27 | description: "Too many HTTP requests with status 5xx (> 5%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 28 | - name: Nginx-high-latency 29 | rules: 30 | - alert: metric:alerting_rule 31 | expr: histogram_quantile(0.99, sum(rate(nginx_http_request_duration_seconds_bucket[2m])) by (host, node)) > 3 32 | for: 2m 33 | labels: 34 | severity: warning 35 | annotations: 36 | summary: Nginx latency high (instance {{ $labels.instance }}) 37 | description: "Nginx p99 latency is higher than 3 seconds\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 38 | EOF 39 | } 40 | 41 | resource "kubectl_manifest" "flux_kustomization" { 42 | count = var.pattern_config.enable_dashboards ? 1 : 0 43 | 44 | yaml_body = <<YAML 45 | apiVersion: kustomize.toolkit.fluxcd.io/v1 46 | kind: Kustomization 47 | metadata: 48 | name: ${var.pattern_config.flux_kustomization_name} 49 | namespace: flux-system 50 | spec: 51 | interval: 1m0s 52 | path: ${var.pattern_config.flux_kustomization_path} 53 | prune: true 54 | sourceRef: 55 | kind: GitRepository 56 | name: ${var.pattern_config.flux_gitrepository_name} 57 | postBuild: 58 | substitute: 59 | GRAFANA_NGINX_DASH_URL: ${var.pattern_config.grafana_dashboard_url} 60 | YAML 61 | } 62 | -------------------------------------------------------------------------------- /modules/eks-monitoring/patterns/nginx/outputs.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-observability/terraform-aws-observability-accelerator/c432af44ee1df1b4ccd654e401922b99cea2ada5/modules/eks-monitoring/patterns/nginx/outputs.tf -------------------------------------------------------------------------------- /modules/eks-monitoring/patterns/nginx/variables.tf: -------------------------------------------------------------------------------- 1 | variable "pattern_config" { 2 | description = "Configuration object for Java/JMX monitoring" 3 | type = object({ 4 | enable_alerting_rules = bool 5 | enable_recording_rules = bool 6 | scrape_sample_limit = number 7 | 8 | enable_dashboards = bool 9 | 10 | flux_gitrepository_name = string 11 | flux_gitrepository_url = string 12 | flux_gitrepository_branch = string 13 | flux_kustomization_name = string 14 | flux_kustomization_path = string 15 | 16 | managed_prometheus_workspace_id = string 17 | prometheus_metrics_endpoint = string 18 | 19 | grafana_dashboard_url = string 20 | }) 21 | nullable = false 22 | } 23 | -------------------------------------------------------------------------------- /modules/eks-monitoring/patterns/nginx/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.1.0" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 4.0.0" 8 | } 9 | kubernetes = { 10 | source = "hashicorp/kubernetes" 11 | version = ">= 2.10" 12 | } 13 | kubectl = { 14 | source = "alekc/kubectl" 15 | version = ">= 2.0.3" 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /modules/eks-monitoring/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.3.0" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 5.0.0" 8 | } 9 | kubernetes = { 10 | source = "hashicorp/kubernetes" 11 | version = ">= 2.10" 12 | } 13 | kubectl = { 14 | source = "alekc/kubectl" 15 | version = ">= 2.0.3" 16 | } 17 | helm = { 18 | source = "hashicorp/helm" 19 | version = ">= 2.4.1" 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /modules/managed-prometheus-monitoring/README.md: -------------------------------------------------------------------------------- 1 | # Observability Pattern for Amazon Managed Prometheus 2 | 3 | This module provides an automated experience around Observability for AMP (Amazon Managed Prometheus) workspaces. 4 | It provides the following resources: 5 | 6 | - AWS Managed Grafana Dashboard 7 | - Cloudwatch data source to monitor AMP usage and alert metrics. 8 | 9 | Note: The Billing widget of the dashboard requires [CloudWatch Billing Alerts](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/monitor_estimated_charges_with_cloudwatch.html) to be enabled. 10 | 11 | - CloudWatch alarms for AMP service quotas. 12 | 13 | <!-- BEGIN_TF_DOCS --> 14 | ## Requirements 15 | 16 | | Name | Version | 17 | |------|---------| 18 | | <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.1.0, < 1.3.0 | 19 | | <a name="requirement_aws"></a> [aws](#requirement\_aws) | >= 4.0.0 | 20 | | <a name="requirement_grafana"></a> [grafana](#requirement\_grafana) | >= 1.25.0 | 21 | 22 | ## Providers 23 | 24 | | Name | Version | 25 | |------|---------| 26 | | <a name="provider_aws"></a> [aws](#provider\_aws) | >= 4.0.0 | 27 | | <a name="provider_grafana"></a> [grafana](#provider\_grafana) | >= 1.25.0 | 28 | 29 | ## Modules 30 | 31 | No modules. 32 | 33 | ## Resources 34 | 35 | | Name | Type | 36 | |------|------| 37 | | [aws_cloudwatch_metric_alarm.active-series-metrics](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_metric_alarm) | resource | 38 | | [aws_cloudwatch_metric_alarm.ingestion_rate](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_metric_alarm) | resource | 39 | | [grafana_dashboard.this](https://registry.terraform.io/providers/grafana/grafana/latest/docs/resources/dashboard) | resource | 40 | | [grafana_data_source.cloudwatch](https://registry.terraform.io/providers/grafana/grafana/latest/docs/resources/data_source) | resource | 41 | 42 | ## Inputs 43 | 44 | | Name | Description | Type | Default | Required | 45 | |------|-------------|------|---------|:--------:| 46 | | <a name="input_active_series_threshold"></a> [active\_series\_threshold](#input\_active\_series\_threshold) | Threshold for active series metric alarm | `number` | `1000000` | no | 47 | | <a name="input_aws_region"></a> [aws\_region](#input\_aws\_region) | AWS Region | `string` | n/a | yes | 48 | | <a name="input_dashboards_folder_id"></a> [dashboards\_folder\_id](#input\_dashboards\_folder\_id) | Grafana folder ID for automatic dashboards | `string` | n/a | yes | 49 | | <a name="input_ingestion_rate_threshold"></a> [ingestion\_rate\_threshold](#input\_ingestion\_rate\_threshold) | Threshold for active series metric alarm | `number` | `70000` | no | 50 | | <a name="input_managed_prometheus_workspace_id"></a> [managed\_prometheus\_workspace\_id](#input\_managed\_prometheus\_workspace\_id) | Amazon Managed Service for Prometheus Workspace ID to create Alarms for | `string` | n/a | yes | 51 | 52 | ## Outputs 53 | 54 | No outputs. 55 | <!-- END_TF_DOCS --> 56 | -------------------------------------------------------------------------------- /modules/managed-prometheus-monitoring/alarms.tf: -------------------------------------------------------------------------------- 1 | #CloudWatch Alerts on AMP Usage 2 | resource "aws_cloudwatch_metric_alarm" "active_series_metrics" { 3 | for_each = local.amp_list 4 | alarm_name = "active-series-metrics" 5 | comparison_operator = "GreaterThanOrEqualToThreshold" 6 | evaluation_periods = "2" 7 | threshold = var.active_series_threshold 8 | alarm_description = "This metric monitors AMP active series metrics" 9 | insufficient_data_actions = [] 10 | metric_query { 11 | id = "m1" 12 | return_data = true 13 | metric { 14 | metric_name = "ResourceCount" 15 | namespace = "AWS/Usage" 16 | period = "120" 17 | stat = "Average" 18 | unit = "None" 19 | 20 | dimensions = { 21 | Type = "Resource" 22 | ResourceId = each.key 23 | Resource = "ActiveSeries" 24 | Service = "Prometheus" 25 | Class = "None" 26 | } 27 | } 28 | } 29 | } 30 | 31 | resource "aws_cloudwatch_metric_alarm" "ingestion_rate" { 32 | for_each = local.amp_list 33 | alarm_name = "ingestion_rate" 34 | comparison_operator = "GreaterThanOrEqualToThreshold" 35 | evaluation_periods = "2" 36 | threshold = var.ingestion_rate_threshold 37 | alarm_description = "This metric monitors AMP ingestion rate" 38 | insufficient_data_actions = [] 39 | metric_query { 40 | id = "m1" 41 | return_data = true 42 | 43 | metric { 44 | metric_name = "ResourceCount" 45 | namespace = "AWS/Usage" 46 | period = "120" 47 | stat = "Average" 48 | unit = "None" 49 | 50 | dimensions = { 51 | Type = "Resource" 52 | ResourceId = each.key 53 | Resource = "IngestionRate" 54 | Service = "Prometheus" 55 | Class = "None" 56 | } 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /modules/managed-prometheus-monitoring/billing/main.tf: -------------------------------------------------------------------------------- 1 | resource "aws_cloudwatch_metric_alarm" "amp_billing_anomaly_detection" { 2 | alarm_name = "amp_billing_anomaly" 3 | comparison_operator = "GreaterThanUpperThreshold" 4 | evaluation_periods = "2" 5 | threshold_metric_id = "e1" 6 | alarm_description = "This monitors AMP charges and alarms on anomaly detection" 7 | insufficient_data_actions = [] 8 | 9 | metric_query { 10 | id = "e1" 11 | expression = "ANOMALY_DETECTION_BAND(m1)" 12 | label = "Expected AMP Charges" 13 | return_data = "true" 14 | } 15 | 16 | metric_query { 17 | id = "m1" 18 | return_data = "true" 19 | metric { 20 | metric_name = "Estimated Charges" 21 | namespace = "AWS/Billing" 22 | period = "21600" 23 | stat = "Maximum" 24 | unit = "Count" 25 | 26 | dimensions = { 27 | ServiceName = "Prometheus" 28 | Currencty = "USD" 29 | } 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /modules/managed-prometheus-monitoring/billing/outputs.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-observability/terraform-aws-observability-accelerator/c432af44ee1df1b4ccd654e401922b99cea2ada5/modules/managed-prometheus-monitoring/billing/outputs.tf -------------------------------------------------------------------------------- /modules/managed-prometheus-monitoring/billing/variables.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-observability/terraform-aws-observability-accelerator/c432af44ee1df1b4ccd654e401922b99cea2ada5/modules/managed-prometheus-monitoring/billing/variables.tf -------------------------------------------------------------------------------- /modules/managed-prometheus-monitoring/billing/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.1.0" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 4.0.0" 8 | } 9 | grafana = { 10 | source = "grafana/grafana" 11 | version = ">= 1.25.0" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /modules/managed-prometheus-monitoring/locals.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-observability/terraform-aws-observability-accelerator/c432af44ee1df1b4ccd654e401922b99cea2ada5/modules/managed-prometheus-monitoring/locals.tf -------------------------------------------------------------------------------- /modules/managed-prometheus-monitoring/main.tf: -------------------------------------------------------------------------------- 1 | provider "aws" { 2 | region = "us-east-1" 3 | alias = "billing_region" 4 | } 5 | 6 | locals { 7 | name = "aws-observability-accelerator-cloudwatch" 8 | amp_list = toset(split(",", var.managed_prometheus_workspace_ids)) 9 | } 10 | 11 | resource "grafana_data_source" "cloudwatch" { 12 | type = "cloudwatch" 13 | name = local.name 14 | 15 | # Giving priority to Managed Prometheus datasources 16 | is_default = false 17 | json_data_encoded = jsonencode({ 18 | default_region = var.aws_region 19 | sigv4_auth = true 20 | sigv4_auth_type = "workspace-iam-role" 21 | sigv4_region = var.aws_region 22 | }) 23 | } 24 | 25 | data "http" "dashboard" { 26 | url = "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/a72787328e493c4628680487e3c885fc395d1c56/artifacts/grafana-dashboards/amp/amp-dashboard.json" 27 | 28 | request_headers = { 29 | Accept = "application/json" 30 | } 31 | } 32 | 33 | resource "grafana_dashboard" "this" { 34 | folder = var.dashboards_folder_id 35 | config_json = data.http.dashboard.response_body 36 | } 37 | 38 | module "billing" { 39 | source = "./billing" 40 | providers = { 41 | aws = aws.billing_region 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /modules/managed-prometheus-monitoring/outputs.tf: -------------------------------------------------------------------------------- 1 | output "grafana_dashboard_urls" { 2 | value = [grafana_dashboard.this.url] 3 | description = "URLs for dashboards created" 4 | } 5 | -------------------------------------------------------------------------------- /modules/managed-prometheus-monitoring/variables.tf: -------------------------------------------------------------------------------- 1 | variable "aws_region" { 2 | description = "AWS Region" 3 | type = string 4 | } 5 | 6 | variable "managed_prometheus_workspace_ids" { 7 | description = "Amazon Managed Service for Prometheus Workspace ID to create Alarms for" 8 | type = string 9 | } 10 | 11 | variable "active_series_threshold" { 12 | description = "Threshold for active series metric alarm" 13 | type = number 14 | default = 8000000 15 | } 16 | 17 | variable "ingestion_rate_threshold" { 18 | description = "Threshold for active series metric alarm" 19 | type = number 20 | default = 136000 21 | } 22 | 23 | variable "dashboards_folder_id" { 24 | description = "Grafana folder ID for automatic dashboards" 25 | default = "0" 26 | type = string 27 | } 28 | -------------------------------------------------------------------------------- /modules/managed-prometheus-monitoring/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.1.0" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 4.0.0" 8 | } 9 | helm = { 10 | source = "hashicorp/helm" 11 | version = ">= 2.4.1" 12 | } 13 | grafana = { 14 | source = "grafana/grafana" 15 | version = ">= 1.25.0" 16 | } 17 | http = { 18 | source = "hashicorp/http" 19 | version = ">= 3.3.0" 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /test/examples_basic_test.go: -------------------------------------------------------------------------------- 1 | package test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/gruntwork-io/terratest/modules/terraform" 7 | ) 8 | 9 | func TestExamplesBasic(t *testing.T) { 10 | 11 | terraformOptions := &terraform.Options{ 12 | TerraformDir: "../examples/basic", 13 | // Vars: map[string]interface{}{ 14 | // "myvar": "test", 15 | // "mylistvar": []string{"list_item_1"}, 16 | // }, 17 | } 18 | 19 | defer terraform.Destroy(t, terraformOptions) 20 | terraform.InitAndApply(t, terraformOptions) 21 | } 22 | -------------------------------------------------------------------------------- /tfsec.yaml: -------------------------------------------------------------------------------- 1 | exclude: 2 | - aws-observabilitym-no-policy-wildcards # Wildcards required in addon IAM policies 3 | - aws-vpc-no-excessive-port-access # VPC settings left up to user implementation for recommended practices 4 | - aws-vpc-no-public-ingress-acl # VPC settings left up to user implementation for recommended practices 5 | - aws-eks-no-public-cluster-access-to-cidr # Public access enabled for better example usability, users are recommended to disable if possible 6 | - aws-eks-no-public-cluster-access # Public access enabled for better example usability, users are recommended to disable if possible 7 | - aws-eks-encrypt-secrets # Module defaults to encrypting secrets with CMK, but this is not hardcoded and therefore a spurious error 8 | - aws-vpc-no-public-egress-sgr # Added in v1.22 9 | --------------------------------------------------------------------------------