Advance your Observability
47 |Choose your Infrastructure-as-code tool:
62 |
├── .github └── workflows │ ├── linkcheck.json │ ├── markdown-link-check.yaml │ └── pre-commit.yaml ├── .pre-commit-config.yaml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── NOTICE ├── README.md ├── artifacts ├── argocd-apps │ ├── grafana-operator-app │ │ ├── Chart.yaml │ │ ├── templates │ │ │ └── grafana-operator-app.yaml │ │ └── values.yaml │ ├── grafana-operator-chart │ │ ├── .helmignore │ │ ├── Chart.yaml │ │ ├── templates │ │ │ ├── amg_grafana-amp-datasource.yaml │ │ │ ├── amg_grafana-cw-datasource.yaml │ │ │ ├── amg_grafana-dashboard.yaml │ │ │ ├── amg_grafana-identity.yaml │ │ │ └── amg_grafana-xray-datasource.yaml │ │ └── values.yaml │ ├── sample-apps │ │ └── envs │ │ │ └── prod │ │ │ ├── Chart.yaml │ │ │ ├── templates │ │ │ └── team-geordie.yaml │ │ │ └── values.yaml │ └── teams │ │ └── team-geordie │ │ └── prod │ │ ├── Chart.yaml │ │ ├── ho11y │ │ ├── Chart.yaml │ │ ├── templates │ │ │ ├── ho11y.yaml │ │ │ └── ingress.yaml │ │ └── values.yaml │ │ ├── templates │ │ ├── ho11y-app.yaml │ │ └── yelb-app.yaml │ │ ├── values.yaml │ │ └── yelb │ │ ├── Chart.yaml │ │ ├── templates │ │ ├── deployment.yaml │ │ └── ingress.yaml │ │ └── values.yaml ├── grafana-dashboards │ ├── adot │ │ └── adothealth.json │ ├── amp │ │ └── amp-dashboard.json │ ├── eks-fargate │ │ └── infrastructure │ │ │ ├── cluster.json │ │ │ └── kubelet.json │ └── eks │ │ ├── apiserver │ │ ├── apiserver-advanced.json │ │ ├── apiserver-basic.json │ │ └── apiserver-troubleshooting.json │ │ ├── infrastructure │ │ ├── cluster.json │ │ ├── kcm.json │ │ ├── ksh.json │ │ ├── kubelet.json │ │ ├── namespace-workloads.json │ │ ├── nodeexporter-nodes.json │ │ ├── nodes.json │ │ └── workloads.json │ │ ├── istio │ │ ├── istio-control-plane-dashboard.json │ │ ├── istio-mesh-dashboard.json │ │ ├── istio-performance-dashboard.json │ │ └── istio-service-dashboard.json │ │ ├── java │ │ └── default.json │ │ ├── kube-proxy │ │ └── kube-proxy.json │ │ ├── neuron │ │ └── neuron-monitor.json │ │ └── nginx │ │ └── nginx.json ├── grafana-operator-manifests │ ├── amp │ │ ├── amg_grafana-dashboards.yaml │ │ └── kustomization.yaml │ └── eks │ │ ├── adot │ │ ├── amg_grafana-dashboards.yaml │ │ └── kustomization.yaml │ │ ├── apiserver │ │ ├── amg_grafana-dashboards.yaml │ │ └── kustomization.yaml │ │ ├── gpu │ │ ├── amg_grafana-dashboards.yaml │ │ └── kustomization.yaml │ │ ├── infrastructure │ │ ├── amg_grafana-amp-datasource.yaml │ │ ├── amg_grafana-cw-datasource.yaml │ │ ├── amg_grafana-dashboards.yaml │ │ ├── amg_grafana-identity.yaml │ │ ├── amg_grafana-xray-datasource.yaml │ │ └── kustomization.yaml │ │ ├── istio │ │ ├── amg_grafana-dashboards.yaml │ │ └── kustomization.yaml │ │ ├── java │ │ ├── amg_grafana-dashboards.yaml │ │ └── kustomization.yaml │ │ ├── kube-proxy │ │ ├── amg_grafana-dashboards.yaml │ │ └── kustomization.yaml │ │ ├── neuron │ │ ├── amg_grafana-dashboards.yaml │ │ └── kustomization.yaml │ │ └── nginx │ │ ├── amg_grafana-dashboards.yaml │ │ └── kustomization.yaml └── k8s-deployment-manifest-templates │ ├── neuron │ └── pytorch-inference-resnet50.yml │ └── nginx │ └── nginx-traffic-sample.yaml └── docs ├── img ├── cloud-sun-solid-orange.svg ├── dashboard.png └── logo_svg.svg └── index.html /.github/workflows/linkcheck.json: -------------------------------------------------------------------------------- 1 | { 2 | "timeout": "5s", 3 | "retryOn429": true, 4 | "retryCount": 5, 5 | "fallbackRetryDelay": "30s", 6 | "aliveStatusCodes": [200, 206], 7 | "httpHeaders": [ 8 | { 9 | "urls": ["https://help.github.com/"], 10 | "headers": { 11 | "Accept-Encoding": "zstd, br, gzip, deflate" 12 | } 13 | } 14 | ], 15 | "ignorePatterns": [ 16 | { 17 | "pattern": [ 18 | "localhost" 19 | ] 20 | }, 21 | { 22 | "pattern": [ 23 | "127.0.0.1" 24 | ] 25 | } 26 | ] 27 | } 28 | -------------------------------------------------------------------------------- /.github/workflows/markdown-link-check.yaml: -------------------------------------------------------------------------------- 1 | name: Check Markdown links 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - "**/*.md" 9 | 10 | pull_request: 11 | branches: 12 | - main 13 | paths: 14 | - "**/*.md" 15 | 16 | jobs: 17 | markdown-link-check: 18 | runs-on: ubuntu-latest 19 | steps: 20 | - uses: actions/checkout@v3 21 | - uses: actions/setup-node@v3 22 | with: 23 | node-version: '16.x' 24 | - name: install markdown-link-check 25 | run: npm install -g markdown-link-check@3.10.2 26 | - name: markdown-link-check version 27 | run: npm list -g markdown-link-check 28 | - name: Run markdown-link-check on MD files 29 | run: find docs -name "*.md" | xargs -n 1 markdown-link-check -q -c .github/workflows/linkcheck.json 30 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yaml: -------------------------------------------------------------------------------- 1 | # This workflow will build a golang project 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go 3 | 4 | name: pre-commit 5 | 6 | on: 7 | push: 8 | branches: ["main"] 9 | pull_request: 10 | branches: ["main"] 11 | 12 | jobs: 13 | 14 | build: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v3 18 | 19 | - uses: pre-commit/action@v3.0.0 20 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: f71fa2c1f9cf5cb705f73dffe4b21f7c61470ba9 # frozen: v4.4.0 4 | hooks: 5 | - id: trailing-whitespace 6 | args: ['--markdown-linebreak-ext=md'] 7 | - id: end-of-file-fixer 8 | - id: check-merge-conflict 9 | - id: detect-private-key 10 | - id: detect-aws-credentials 11 | args: ['--allow-missing-credentials'] 12 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## AWS Observability Accelerator 2 | 3 | [](https://github.com/aws-observability/aws-observability-accelerator/actions/workflows/pre-commit.yaml) 4 | 5 | Welcome to the AWS Observability Accelerator! 6 | 7 | The AWS Observability Accelerator help you set up observability for your AWS environments with AWS-managed observability services such as Amazon Managed Service for Prometheus, Amazon Managed Grafana, AWS Distro for OpenTelemetry (ADOT) and Amazon CloudWatch. 8 | 9 | This repository provide shared artifacts (documentation, dashboards, alerting rules) for the [Terraform](https://github.com/aws-observability/terraform-aws-observability-accelerator) and [CDK](https://github.com/aws-observability/cdk-aws-observability-accelerator) projects. This repository also serves as a GitOps source repository. 10 | 11 | Checkout the project documentation for [Terraform](https://aws-observability.github.io/terraform-aws-observability-accelerator/) and [CDK](https://aws-observability.github.io/cdk-aws-observability-accelerator/) projects for more information. 12 | 13 | 14 | ## Security 15 | 16 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information. 17 | 18 | ## Related projects 19 | 20 | Some of the dashboards have been inspired by the [Prometheus Monitoring Mixin for Kubernetes](https://github.com/kubernetes-monitoring/kubernetes-mixin), 21 | enhanced to work with AWS Observability services. 22 | 23 | ## License 24 | 25 | This project is licensed under the Apache-2.0 License. 26 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/grafana-operator-app/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: "1.0" 3 | name: grafana-operator-application 4 | description: App of apps chart for the Grafana Operator. 5 | version: 1.0.0 6 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/grafana-operator-app/templates/grafana-operator-app.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: argoproj.io/v1alpha1 2 | kind: Application 3 | metadata: 4 | name: grafana-operator-app 5 | namespace: {{ .Values.argoNamespace | default "argocd" }} 6 | finalizers: 7 | - resources-finalizer.argocd.argoproj.io 8 | spec: 9 | project: {{ .Values.argoProject | default "default" }} 10 | destination: 11 | namespace: grafana-operator 12 | server: {{ .Values.spec.destination.server }} 13 | source: 14 | repoURL: {{ .Values.spec.source.repoURL }} 15 | targetRevision: {{ .Values.spec.source.targetRevision }} 16 | path: artifacts/argocd-apps/grafana-operator-chart 17 | helm: 18 | values: | 19 | {{- toYaml .Values | nindent 8 }} 20 | syncPolicy: 21 | automated: 22 | prune: true 23 | syncOptions: 24 | - CreateNamespace=true 25 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/grafana-operator-app/values.yaml: -------------------------------------------------------------------------------- 1 | argoNamespace: '' 2 | argoProject: '' 3 | 4 | spec: 5 | destination: 6 | server: https://kubernetes.default.svc 7 | source: 8 | repoURL: https://github.com/aws-observability/aws-observability-accelerator.git 9 | targetRevision: main 10 | 11 | AMP_ASSUME_ROLE_ARN: 'UPDATE_ME_WITH_AMP_ASSUME_ROLE_ARN' 12 | AMP_AWS_REGION: 'UPDATE_ME_WITH_AMP_AWS_REGION' 13 | AMP_ENDPOINT_URL: 'UPDATE_ME_WITH_AMP_ENDPOINT_URL' 14 | 15 | CW_ASSUME_ROLE_ARN: 'UPDATE_ME_WITH_CW_ASSUME_ROLE_ARN' 16 | CW_AWS_REGION: 'UPDATE_ME_WITH_CW_AWS_REGION' 17 | 18 | AMG_ENDPOINT_URL: 'UPDATE_ME_WITH_AMG_ENDPOINT_URL_STARTING_WITH_HTTPS' 19 | 20 | GRAFANA_CLUSTER_DASH_URL: "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/cluster.json" 21 | GRAFANA_KUBELET_DASH_URL: "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/kubelet.json" 22 | GRAFANA_NSWRKLDS_DASH_URL: "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/namespace-workloads.json" 23 | GRAFANA_NODEEXP_DASH_URL: "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/nodeexporter-nodes.json" 24 | GRAFANA_NODES_DASH_URL: "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/nodes.json" 25 | GRAFANA_WORKLOADS_DASH_URL: "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/workloads.json" 26 | GRAFANA_KSH_DASH_URL: "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/ksh.json" 27 | GRAFANA_KCM_DASH_URL: "https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/main/artifacts/grafana-dashboards/eks/infrastructure/kcm.json" -------------------------------------------------------------------------------- /artifacts/argocd-apps/grafana-operator-chart/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/grafana-operator-chart/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: grafana-operator-chart 3 | description: A Helm chart for Kubernetes 4 | # A chart can be either an 'application' or a 'library' chart. 5 | # 6 | # Application charts are a collection of templates that can be packaged into versioned archives 7 | # to be deployed. 8 | # 9 | # Library charts provide useful utilities or functions for the chart developer. They're included as 10 | # a dependency of application charts to inject those utilities and functions into the rendering 11 | # pipeline. Library charts do not define any templates and therefore cannot be deployed. 12 | type: application 13 | # This is the chart version. This version number should be incremented each time you make changes 14 | # to the chart and its templates, including the app version. 15 | # Versions are expected to follow Semantic Versioning (https://semver.org/) 16 | version: 1.0.0 17 | # This is the version number of the application being deployed. This version number should be 18 | # incremented each time you make changes to the application. Versions are not expected to 19 | # follow Semantic Versioning. They should reflect the version the application is using. 20 | # It is recommended to use it with quotes. 21 | appVersion: "1.0.0" 22 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/grafana-operator-chart/templates/amg_grafana-amp-datasource.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: grafana.integreatly.org/v1beta1 2 | kind: GrafanaDatasource 3 | metadata: 4 | name: grafanadatasource-amp 5 | namespace: grafana-operator 6 | spec: 7 | instanceSelector: 8 | matchLabels: 9 | dashboards: "external-grafana" 10 | datasource: 11 | name: grafana-operator-amp-datasource 12 | type: prometheus 13 | access: proxy 14 | url: {{ .Values.AMP_ENDPOINT_URL }} 15 | isDefault: true 16 | jsonData: 17 | 'tlsSkipVerify': false 18 | 'timeInterval': "5s" 19 | 'sigV4Auth': true 20 | 'sigV4AuthType': "ec2_iam_role" 21 | 'sigV4AssumeRoleArn': {{ .Values.AMP_ASSUME_ROLE_ARN }} 22 | 'sigV4Region': {{ .Values.AMP_AWS_REGION }} 23 | 'defaultRegion': {{ .Values.AMP_AWS_REGION }} 24 | editable: true 25 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/grafana-operator-chart/templates/amg_grafana-cw-datasource.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: grafana.integreatly.org/v1beta1 2 | kind: GrafanaDatasource 3 | metadata: 4 | name: grafanadatasource-cw 5 | namespace: grafana-operator 6 | spec: 7 | instanceSelector: 8 | matchLabels: 9 | dashboards: "external-grafana" 10 | datasource: 11 | name: grafana-operator-cloudwatch-datasource 12 | type: cloudwatch 13 | access: server 14 | isDefault: false 15 | jsonData: 16 | 'tlsSkipVerify': false 17 | 'authType': "ec2_iam_role" 18 | 'assumeRoleArn': {{ .Values.CW_ASSUME_ROLE_ARN }} 19 | 'defaultRegion': {{ .Values.CW_AWS_REGION }} 20 | 'customMetricsNamespaces': "ContainerInsights/Prometheus" 21 | editable: true 22 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/grafana-operator-chart/templates/amg_grafana-dashboard.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: grafana.integreatly.org/v1beta1 2 | kind: GrafanaDashboard 3 | metadata: 4 | name: node-exporter-grafanadashboard 5 | namespace: grafana-operator 6 | spec: 7 | folder: "Observability Accelerator Dashboards" 8 | instanceSelector: 9 | matchLabels: 10 | dashboards: "external-grafana" 11 | url: {{ .Values.GRAFANA_NODEEXP_DASH_URL }} 12 | --- 13 | apiVersion: grafana.integreatly.org/v1beta1 14 | kind: GrafanaDashboard 15 | metadata: 16 | name: cluster-grafanadashboard 17 | namespace: grafana-operator 18 | spec: 19 | folder: "Observability Accelerator Dashboards" 20 | instanceSelector: 21 | matchLabels: 22 | dashboards: "external-grafana" 23 | url: {{ .Values.GRAFANA_CLUSTER_DASH_URL }} 24 | --- 25 | apiVersion: grafana.integreatly.org/v1beta1 26 | kind: GrafanaDashboard 27 | metadata: 28 | name: kubelet-grafanadashboard 29 | namespace: grafana-operator 30 | spec: 31 | folder: "Observability Accelerator Dashboards" 32 | instanceSelector: 33 | matchLabels: 34 | dashboards: "external-grafana" 35 | url: {{ .Values.GRAFANA_KUBELET_DASH_URL }} 36 | --- 37 | apiVersion: grafana.integreatly.org/v1beta1 38 | kind: GrafanaDashboard 39 | metadata: 40 | name: namespace-workloads-grafanadashboard 41 | namespace: grafana-operator 42 | spec: 43 | folder: "Observability Accelerator Dashboards" 44 | instanceSelector: 45 | matchLabels: 46 | dashboards: "external-grafana" 47 | url: {{ .Values.GRAFANA_NSWRKLDS_DASH_URL }} 48 | --- 49 | apiVersion: grafana.integreatly.org/v1beta1 50 | kind: GrafanaDashboard 51 | metadata: 52 | name: nodes-grafanadashboard 53 | namespace: grafana-operator 54 | spec: 55 | folder: "Observability Accelerator Dashboards" 56 | instanceSelector: 57 | matchLabels: 58 | dashboards: "external-grafana" 59 | url: {{ .Values.GRAFANA_NODES_DASH_URL }} 60 | --- 61 | apiVersion: grafana.integreatly.org/v1beta1 62 | kind: GrafanaDashboard 63 | metadata: 64 | name: workloads-grafanadashboard 65 | namespace: grafana-operator 66 | spec: 67 | folder: "Observability Accelerator Dashboards" 68 | instanceSelector: 69 | matchLabels: 70 | dashboards: "external-grafana" 71 | url: {{ .Values.GRAFANA_WORKLOADS_DASH_URL }} 72 | --- 73 | apiVersion: grafana.integreatly.org/v1beta1 74 | kind: GrafanaDashboard 75 | metadata: 76 | name: ksh-grafanadashboard 77 | namespace: grafana-operator 78 | spec: 79 | folder: "Observability Accelerator Dashboards" 80 | instanceSelector: 81 | matchLabels: 82 | dashboards: "external-grafana" 83 | url: {{ .Values.GRAFANA_KSH_DASH_URL }} 84 | --- 85 | apiVersion: grafana.integreatly.org/v1beta1 86 | kind: GrafanaDashboard 87 | metadata: 88 | name: kcm-grafanadashboard 89 | namespace: grafana-operator 90 | spec: 91 | folder: "Observability Accelerator Dashboards" 92 | instanceSelector: 93 | matchLabels: 94 | dashboards: "external-grafana" 95 | url: {{ .Values.GRAFANA_KCM_DASH_URL }} 96 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/grafana-operator-chart/templates/amg_grafana-identity.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: grafana.integreatly.org/v1beta1 2 | kind: Grafana 3 | metadata: 4 | name: external-grafana 5 | namespace: grafana-operator 6 | labels: 7 | dashboards: "external-grafana" 8 | spec: 9 | external: 10 | url: {{ .Values.AMG_ENDPOINT_URL }} 11 | apiKey: 12 | name: grafana-admin-credentials 13 | key: GF_SECURITY_ADMIN_APIKEY 14 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/grafana-operator-chart/templates/amg_grafana-xray-datasource.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: grafana.integreatly.org/v1beta1 2 | kind: GrafanaDatasource 3 | metadata: 4 | name: grafanadatasource-xray 5 | namespace: grafana-operator 6 | spec: 7 | instanceSelector: 8 | matchLabels: 9 | dashboards: "external-grafana" 10 | datasource: 11 | name: grafana-operator-xray-datasource 12 | type: grafana-x-ray-datasource 13 | access: server 14 | isDefault: false 15 | jsonData: 16 | 'tlsSkipVerify': false 17 | 'authType': "ec2_iam_role" 18 | 'assumeRoleArn': {{ .Values.CW_ASSUME_ROLE_ARN }} 19 | 'defaultRegion': {{ .Values.CW_AWS_REGION }} 20 | editable: true 21 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/grafana-operator-chart/values.yaml: -------------------------------------------------------------------------------- 1 | kubernetesClusterDomain: cluster.local 2 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/sample-apps/envs/prod/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: "1.0" 3 | name: prod-applications 4 | description: App of apps chart for the prod EKS environment. 5 | version: 1.0.0 6 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/sample-apps/envs/prod/templates/team-geordie.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: argoproj.io/v1alpha1 2 | kind: Application 3 | metadata: 4 | name: team-geordie 5 | namespace: {{ .Values.argoNamespace | default "argocd" }} 6 | labels: 7 | {{- toYaml .Values.labels | nindent 4 }} 8 | finalizers: 9 | - resources-finalizer.argocd.argoproj.io 10 | spec: 11 | project: {{ .Values.argoProject | default "default" }} 12 | destination: 13 | namespace: {{ .Values.argoNamespace | default "argocd" }} 14 | server: {{ .Values.destinationServer | default .Values.spec.destination.server }} 15 | source: 16 | repoURL: {{ .Values.spec.source.repoURL }} 17 | targetRevision: {{ .Values.spec.source.targetRevision }} 18 | path: artifacts/argocd-apps/teams/team-geordie/prod 19 | helm: 20 | values: | 21 | {{- toYaml .Values | nindent 8 }} 22 | syncPolicy: 23 | automated: 24 | prune: true 25 | syncOptions: 26 | - CreateNamespace=true 27 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/sample-apps/envs/prod/values.yaml: -------------------------------------------------------------------------------- 1 | destinationServer: '' 2 | argoNamespace: '' 3 | argoProject: '' 4 | 5 | labels: 6 | env: prod 7 | spec: 8 | destination: 9 | server: https://kubernetes.default.svc 10 | source: 11 | repoURL: https://github.com/aws-observability/aws-observability-accelerator.git 12 | targetRevision: main 13 | 14 | # Thoses default values can be surcharged by Infrastructure as Code 15 | env: prod 16 | clusterName: coa-eks-blueprint-cluster # the name of EKS cluster 17 | blueprint: 'terraform' # cdk | terraform 18 | karpenterInstanceProfile: 19 | ingress: 20 | type: 'alb' # nginx | alb 21 | host: # empty or your domain like dev.example.com 22 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/teams/team-geordie/prod/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: "1.0" 3 | name: monapps 4 | description: Charts of Monitoring Apps for the prod EKS environment. 5 | version: 0.1.0 6 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/teams/team-geordie/prod/ho11y/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: "1.0" 3 | name: ho11y 4 | description: Charts of Monitoring Apps for the prod EKS environment. 5 | version: 0.1.0 6 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/teams/team-geordie/prod/ho11y/templates/ho11y.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: ho11y 5 | --- 6 | apiVersion: apps/v1 7 | kind: Deployment 8 | metadata: 9 | name: frontend 10 | namespace: geordie 11 | spec: 12 | selector: 13 | matchLabels: 14 | app: frontend 15 | replicas: 1 16 | template: 17 | metadata: 18 | labels: 19 | app: frontend 20 | spec: 21 | containers: 22 | - name: ho11y 23 | image: public.ecr.aws/z0a4o2j5/ho11y:latest 24 | ports: 25 | - containerPort: 8765 26 | env: 27 | - name: DISABLE_OM 28 | value: "on" 29 | - name: HO11Y_LOG_DEST 30 | value: "stdout" 31 | - name: OTEL_RESOURCE_ATTRIB 32 | value: "frontend" 33 | - name: OTEL_EXPORTER_OTLP_ENDPOINT 34 | value: "otel-collector-xray-collector.default.svc.cluster.local:4317" 35 | - name: HO11Y_INJECT_FAILURE 36 | value: "enabled" 37 | - name: DOWNSTREAM0 38 | value: "http://downstream0" 39 | - name: DOWNSTREAM1 40 | value: "http://downstream1" 41 | imagePullPolicy: Always 42 | --- 43 | apiVersion: apps/v1 44 | kind: Deployment 45 | metadata: 46 | name: downstream0 47 | namespace: geordie 48 | spec: 49 | selector: 50 | matchLabels: 51 | app: downstream0 52 | replicas: 1 53 | template: 54 | metadata: 55 | labels: 56 | app: downstream0 57 | spec: 58 | containers: 59 | - name: ho11y 60 | image: public.ecr.aws/mhausenblas/ho11y:stable 61 | ports: 62 | - containerPort: 8765 63 | env: 64 | - name: DISABLE_OM 65 | value: "on" 66 | - name: HO11Y_LOG_DEST 67 | value: "stdout" 68 | - name: OTEL_RESOURCE_ATTRIB 69 | value: "downstream0" 70 | - name: OTEL_EXPORTER_OTLP_ENDPOINT 71 | value: "otel-collector-xray-collector.default.svc.cluster.local:4317" 72 | - name: DOWNSTREAM0 73 | value: "https://mhausenblas.info/" 74 | imagePullPolicy: Always 75 | --- 76 | apiVersion: apps/v1 77 | kind: Deployment 78 | metadata: 79 | name: downstream1 80 | namespace: geordie 81 | spec: 82 | selector: 83 | matchLabels: 84 | app: downstream1 85 | replicas: 1 86 | template: 87 | metadata: 88 | labels: 89 | app: downstream1 90 | spec: 91 | containers: 92 | - name: ho11y 93 | image: public.ecr.aws/mhausenblas/ho11y:stable 94 | ports: 95 | - containerPort: 8765 96 | env: 97 | - name: DISABLE_OM 98 | value: "on" 99 | - name: HO11Y_LOG_DEST 100 | value: "stdout" 101 | - name: OTEL_RESOURCE_ATTRIB 102 | value: "downstream1" 103 | - name: OTEL_EXPORTER_OTLP_ENDPOINT 104 | value: "otel-collector-xray-collector.default.svc.cluster.local:4317" 105 | - name: DOWNSTREAM0 106 | value: "https://o11y.news/2021-03-01/" 107 | - name: DOWNSTREAM1 108 | value: "DUMMY:187kB:42ms" 109 | - name: DOWNSTREAM2 110 | value: "DUMMY:13kB:2ms" 111 | imagePullPolicy: Always 112 | --- 113 | apiVersion: v1 114 | kind: Service 115 | metadata: 116 | name: frontend 117 | namespace: geordie 118 | annotations: 119 | scrape: "true" 120 | spec: 121 | type: LoadBalancer 122 | ports: 123 | - port: 80 124 | targetPort: 8765 125 | selector: 126 | app: frontend 127 | --- 128 | apiVersion: v1 129 | kind: Service 130 | metadata: 131 | name: downstream0 132 | namespace: geordie 133 | annotations: 134 | scrape: "true" 135 | spec: 136 | ports: 137 | - port: 80 138 | targetPort: 8765 139 | selector: 140 | app: downstream0 141 | --- 142 | apiVersion: v1 143 | kind: Service 144 | metadata: 145 | name: downstream1 146 | namespace: geordie 147 | annotations: 148 | scrape: "true" 149 | spec: 150 | ports: 151 | - port: 80 152 | targetPort: 8765 153 | selector: 154 | app: downstream1 155 | --- 156 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/teams/team-geordie/prod/ho11y/templates/ingress.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: networking.k8s.io/v1 2 | kind: Ingress 3 | metadata: 4 | namespace: geordie 5 | name: ho11y-ingress 6 | annotations: 7 | {{ if eq .Values.spec.ingress.type "alb" }} 8 | #rewrite does not exist yet in ALB 9 | #https://github.com/kubernetes-sigs/aws-load-balancer-controller/issues/1571 10 | #so commenting grouping feature 11 | #alb.ingress.kubernetes.io/group.name: geordie 12 | alb.ingress.kubernetes.io/scheme: internet-facing 13 | alb.ingress.kubernetes.io/target-type: ip 14 | {{ if .Values.spec.ingress.host }} 15 | alb.ingress.kubernetes.io/listen-ports: '[{"HTTP":80,"HTTPS": 443}]' 16 | alb.ingress.kubernetes.io/ssl-redirect: '443' 17 | {{ else }} 18 | alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}]' 19 | {{ end }} 20 | alb.ingress.kubernetes.io/tags: Environment={{ .Values.labels.env }},Team=Geordie 21 | {{ else }} 22 | kubernetes.io/ingress.class: "nginx" 23 | {{ end }} 24 | {{ if .Values.spec.ingress.host }} 25 | external-dns.alpha.kubernetes.io/set-identifier: {{ .Values.spec.clusterName }} 26 | external-dns.alpha.kubernetes.io/aws-weight: '{{ .Values.spec.ingress.route53_weight }}' 27 | external-dns.alpha.kubernetes.io/ttl: "10" 28 | {{ end }} 29 | spec: 30 | {{ if eq .Values.spec.ingress.type "nginx" }} 31 | ingressClassName: nginx 32 | {{ else }} 33 | ingressClassName: alb 34 | {{ end }} 35 | rules: 36 | {{ if .Values.spec.ingress.host }} 37 | - host: ho11y.{{ .Values.spec.ingress.host }} 38 | {{ else }} 39 | - host: 40 | {{ end }} 41 | http: 42 | paths: 43 | - path: / 44 | pathType: Prefix 45 | backend: 46 | service: 47 | name: frontend 48 | port: 49 | number: 80 50 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/teams/team-geordie/prod/ho11y/values.yaml: -------------------------------------------------------------------------------- 1 | labels: 2 | env: prod 3 | team: geordie 4 | spec: 5 | destination: 6 | server: https://kubernetes.default.svc 7 | source: 8 | repoURL: https://github.com/aws-samples/eks-blueprints-workloads 9 | targetRevision: main 10 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/teams/team-geordie/prod/templates/ho11y-app.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: argoproj.io/v1alpha1 2 | kind: Application 3 | metadata: 4 | name: ho11y 5 | # namespace is specified as namespace destination in env/{dev,test,prod}/team-geordie.yaml 6 | finalizers: 7 | - resources-finalizer.argocd.argoproj.io 8 | spec: 9 | project: {{ .Values.argoProject | default "default" }} 10 | destination: 11 | namespace: geordie 12 | server: {{ .Values.spec.destination.server }} 13 | source: 14 | repoURL: {{ .Values.spec.source.repoURL }} 15 | targetRevision: {{ .Values.spec.source.targetRevision }} 16 | path: artifacts/argocd-apps/teams/team-geordie/prod/ho11y 17 | helm: 18 | values: | 19 | {{- toYaml .Values | nindent 8 }} 20 | syncPolicy: 21 | automated: 22 | prune: true 23 | syncOptions: 24 | - CreateNamespace=true 25 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/teams/team-geordie/prod/templates/yelb-app.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: argoproj.io/v1alpha1 2 | kind: Application 3 | metadata: 4 | name: yelb 5 | # namespace is specified as namespace destination in env/{dev,test,prod}/team-geordie.yaml 6 | finalizers: 7 | - resources-finalizer.argocd.argoproj.io 8 | spec: 9 | project: {{ .Values.argoProject | default "default" }} 10 | destination: 11 | namespace: geordie 12 | server: {{ .Values.spec.destination.server }} 13 | source: 14 | repoURL: {{ .Values.spec.source.repoURL }} 15 | targetRevision: {{ .Values.spec.source.targetRevision }} 16 | path: artifacts/argocd-apps/teams/team-geordie/prod/yelb 17 | helm: 18 | values: | 19 | {{- toYaml .Values | nindent 8 }} 20 | syncPolicy: 21 | automated: 22 | prune: true 23 | syncOptions: 24 | - CreateNamespace=true 25 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/teams/team-geordie/prod/values.yaml: -------------------------------------------------------------------------------- 1 | argoProject: '' 2 | 3 | labels: 4 | env: prod 5 | team: geordie 6 | spec: 7 | destination: 8 | server: https://kubernetes.default.svc 9 | source: 10 | repoURL: https://github.com/aws-samples/eks-blueprints-workloads 11 | targetRevision: main 12 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/teams/team-geordie/prod/yelb/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: "1.0" 3 | description: Yelb! The only hub for healthy food recommendations 4 | name: yelb 5 | version: 0.1.0 6 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/teams/team-geordie/prod/yelb/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | namespace: geordie 5 | name: redis-server 6 | labels: 7 | app: redis-server 8 | tier: cache 9 | spec: 10 | type: ClusterIP 11 | ports: 12 | - port: 6379 13 | selector: 14 | app: redis-server 15 | tier: cache 16 | --- 17 | apiVersion: v1 18 | kind: Service 19 | metadata: 20 | namespace: geordie 21 | name: yelb-db 22 | labels: 23 | app: yelb-db 24 | tier: backenddb 25 | spec: 26 | type: ClusterIP 27 | ports: 28 | - port: 5432 29 | selector: 30 | app: yelb-db 31 | tier: backenddb 32 | --- 33 | apiVersion: v1 34 | kind: Service 35 | metadata: 36 | namespace: geordie 37 | name: yelb-appserver 38 | labels: 39 | app: yelb-appserver 40 | tier: middletier 41 | spec: 42 | type: ClusterIP 43 | ports: 44 | - port: 4567 45 | selector: 46 | app: yelb-appserver 47 | tier: middletier 48 | --- 49 | apiVersion: v1 50 | kind: Service 51 | metadata: 52 | namespace: geordie 53 | name: yelb-ui 54 | labels: 55 | app: yelb-ui 56 | tier: frontend 57 | spec: 58 | type: LoadBalancer 59 | ports: 60 | - port: 80 61 | protocol: TCP 62 | targetPort: 80 63 | selector: 64 | app: yelb-ui 65 | tier: frontend 66 | --- 67 | apiVersion: apps/v1 68 | kind: Deployment 69 | metadata: 70 | namespace: geordie 71 | name: yelb-ui 72 | spec: 73 | replicas: 1 74 | selector: 75 | matchLabels: 76 | app: yelb-ui 77 | tier: frontend 78 | template: 79 | metadata: 80 | labels: 81 | app: yelb-ui 82 | tier: frontend 83 | spec: 84 | containers: 85 | - name: yelb-ui 86 | image: mreferre/yelb-ui:0.7 87 | ports: 88 | - containerPort: 80 89 | --- 90 | apiVersion: apps/v1 91 | kind: Deployment 92 | metadata: 93 | namespace: geordie 94 | name: redis-server 95 | spec: 96 | selector: 97 | matchLabels: 98 | app: redis-server 99 | tier: cache 100 | replicas: 1 101 | template: 102 | metadata: 103 | labels: 104 | app: redis-server 105 | tier: cache 106 | spec: 107 | containers: 108 | - name: redis-server 109 | image: redis:4.0.2 110 | ports: 111 | - containerPort: 6379 112 | --- 113 | apiVersion: apps/v1 114 | kind: Deployment 115 | metadata: 116 | namespace: geordie 117 | name: yelb-db 118 | spec: 119 | replicas: 1 120 | selector: 121 | matchLabels: 122 | app: yelb-db 123 | tier: backenddb 124 | template: 125 | metadata: 126 | labels: 127 | app: yelb-db 128 | tier: backenddb 129 | spec: 130 | containers: 131 | - name: yelb-db 132 | image: mreferre/yelb-db:0.5 133 | ports: 134 | - containerPort: 5432 135 | --- 136 | apiVersion: apps/v1 137 | kind: Deployment 138 | metadata: 139 | namespace: geordie 140 | name: yelb-appserver 141 | spec: 142 | replicas: 1 143 | selector: 144 | matchLabels: 145 | app: yelb-appserver 146 | tier: middletier 147 | template: 148 | metadata: 149 | labels: 150 | app: yelb-appserver 151 | tier: middletier 152 | spec: 153 | containers: 154 | - name: yelb-appserver 155 | image: mreferre/yelb-appserver:0.5 156 | ports: 157 | - containerPort: 4567 158 | --- 159 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/teams/team-geordie/prod/yelb/templates/ingress.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: networking.k8s.io/v1 2 | kind: Ingress 3 | metadata: 4 | namespace: geordie 5 | name: yelb-ingress 6 | annotations: 7 | {{ if eq .Values.spec.ingress.type "alb" }} 8 | #rewrite does not exist yet in ALB 9 | #https://github.com/kubernetes-sigs/aws-load-balancer-controller/issues/1571 10 | #so commenting grouping feature 11 | #alb.ingress.kubernetes.io/group.name: geordie 12 | alb.ingress.kubernetes.io/scheme: internet-facing 13 | alb.ingress.kubernetes.io/target-type: ip 14 | {{ if .Values.spec.ingress.host }} 15 | alb.ingress.kubernetes.io/listen-ports: '[{"HTTP":80,"HTTPS": 443}]' 16 | alb.ingress.kubernetes.io/ssl-redirect: '443' 17 | {{ else }} 18 | alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}]' 19 | {{ end }} 20 | alb.ingress.kubernetes.io/tags: Environment={{ .Values.labels.env }},Team=Geordie 21 | {{ else }} 22 | kubernetes.io/ingress.class: "nginx" 23 | {{ end }} 24 | {{ if .Values.spec.ingress.host }} 25 | external-dns.alpha.kubernetes.io/set-identifier: {{ .Values.spec.clusterName }} 26 | external-dns.alpha.kubernetes.io/aws-weight: '{{ .Values.spec.ingress.route53_weight }}' 27 | external-dns.alpha.kubernetes.io/ttl: "10" 28 | {{ end }} 29 | labels: 30 | app: yelb-ui 31 | spec: 32 | {{ if eq .Values.spec.ingress.type "nginx" }} 33 | ingressClassName: nginx 34 | {{ else }} 35 | ingressClassName: alb 36 | {{ end }} 37 | rules: 38 | {{ if .Values.spec.ingress.host }} 39 | - host: yelb.{{ .Values.spec.ingress.host }} 40 | {{ else }} 41 | - host: 42 | {{ end }} 43 | http: 44 | paths: 45 | - path: / 46 | pathType: Prefix 47 | backend: 48 | service: 49 | name: yelb-ui 50 | port: 51 | number: 80 52 | -------------------------------------------------------------------------------- /artifacts/argocd-apps/teams/team-geordie/prod/yelb/values.yaml: -------------------------------------------------------------------------------- 1 | labels: 2 | env: prod 3 | team: geordie 4 | spec: 5 | destination: 6 | server: https://kubernetes.default.svc 7 | source: 8 | repoURL: https://github.com/aws-samples/eks-blueprints-workloads 9 | targetRevision: main 10 | -------------------------------------------------------------------------------- /artifacts/grafana-dashboards/amp/amp-dashboard.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": "-- Grafana --", 7 | "enable": true, 8 | "hide": true, 9 | "iconColor": "rgba(0, 211, 255, 1)", 10 | "name": "Annotations & Alerts", 11 | "target": { 12 | "limit": 100, 13 | "matchAny": false, 14 | "tags": [], 15 | "type": "dashboard" 16 | }, 17 | "type": "dashboard" 18 | } 19 | ] 20 | }, 21 | "description": "Dashboard for Amazon Managed Prometheus", 22 | "editable": true, 23 | "fiscalYearStartMonth": 0, 24 | "graphTooltip": 0, 25 | "id": 51, 26 | "iteration": 1666292684202, 27 | "links": [], 28 | "liveNow": false, 29 | "panels": [ 30 | { 31 | "gridPos": { 32 | "h": 7, 33 | "w": 5, 34 | "x": 0, 35 | "y": 0 36 | }, 37 | "id": 16, 38 | "options": { 39 | "content": "# Ingestion Usage Metrics\n\nMetrics relating to ingestion usage of the AMP service", 40 | "mode": "markdown" 41 | }, 42 | "pluginVersion": "8.4.7", 43 | "title": "Usage", 44 | "type": "text" 45 | }, 46 | { 47 | "datasource": { 48 | "type": "cloudwatch", 49 | "uid": "$datasource" 50 | }, 51 | "fieldConfig": { 52 | "defaults": { 53 | "color": { 54 | "mode": "palette-classic" 55 | }, 56 | "custom": { 57 | "axisLabel": "", 58 | "axisPlacement": "auto", 59 | "barAlignment": 0, 60 | "drawStyle": "line", 61 | "fillOpacity": 0, 62 | "gradientMode": "none", 63 | "hideFrom": { 64 | "legend": false, 65 | "tooltip": false, 66 | "viz": false 67 | }, 68 | "lineInterpolation": "linear", 69 | "lineWidth": 1, 70 | "pointSize": 5, 71 | "scaleDistribution": { 72 | "type": "linear" 73 | }, 74 | "showPoints": "auto", 75 | "spanNulls": false, 76 | "stacking": { 77 | "group": "A", 78 | "mode": "none" 79 | }, 80 | "thresholdsStyle": { 81 | "mode": "off" 82 | } 83 | }, 84 | "mappings": [], 85 | "thresholds": { 86 | "mode": "absolute", 87 | "steps": [ 88 | { 89 | "color": "green", 90 | "value": null 91 | }, 92 | { 93 | "color": "red", 94 | "value": 80 95 | } 96 | ] 97 | } 98 | }, 99 | "overrides": [] 100 | }, 101 | "gridPos": { 102 | "h": 7, 103 | "w": 9, 104 | "x": 5, 105 | "y": 0 106 | }, 107 | "id": 6, 108 | "options": { 109 | "legend": { 110 | "calcs": [], 111 | "displayMode": "list", 112 | "placement": "bottom" 113 | }, 114 | "tooltip": { 115 | "mode": "single", 116 | "sort": "none" 117 | } 118 | }, 119 | "targets": [ 120 | { 121 | "alias": "", 122 | "datasource": { 123 | "type": "cloudwatch", 124 | "uid": "$datasource" 125 | }, 126 | "dimensions": {}, 127 | "expression": "SELECT SUM(ResourceCount) FROM SCHEMA(\"AWS/Usage\", Class,Resource,ResourceId,Service,Type) WHERE Type = 'Resource' AND ResourceId = '$WorkspaceID' AND Resource = 'ActiveSeries' AND Service = 'Prometheus' AND Class = 'None'", 128 | "id": "", 129 | "matchExact": true, 130 | "metricEditorMode": 1, 131 | "metricName": "", 132 | "metricQueryType": 0, 133 | "namespace": "", 134 | "period": "", 135 | "queryMode": "Metrics", 136 | "refId": "A", 137 | "region": "default", 138 | "sqlExpression": "", 139 | "statistic": "Average" 140 | } 141 | ], 142 | "title": "Active Series Metrics", 143 | "type": "timeseries" 144 | }, 145 | { 146 | "datasource": { 147 | "type": "cloudwatch", 148 | "uid": "$datasource" 149 | }, 150 | "fieldConfig": { 151 | "defaults": { 152 | "color": { 153 | "mode": "palette-classic" 154 | }, 155 | "custom": { 156 | "axisLabel": "", 157 | "axisPlacement": "auto", 158 | "barAlignment": 0, 159 | "drawStyle": "line", 160 | "fillOpacity": 0, 161 | "gradientMode": "none", 162 | "hideFrom": { 163 | "legend": false, 164 | "tooltip": false, 165 | "viz": false 166 | }, 167 | "lineInterpolation": "linear", 168 | "lineWidth": 1, 169 | "pointSize": 5, 170 | "scaleDistribution": { 171 | "type": "linear" 172 | }, 173 | "showPoints": "auto", 174 | "spanNulls": false, 175 | "stacking": { 176 | "group": "A", 177 | "mode": "none" 178 | }, 179 | "thresholdsStyle": { 180 | "mode": "off" 181 | } 182 | }, 183 | "mappings": [], 184 | "thresholds": { 185 | "mode": "absolute", 186 | "steps": [ 187 | { 188 | "color": "green", 189 | "value": null 190 | }, 191 | { 192 | "color": "red", 193 | "value": 80 194 | } 195 | ] 196 | } 197 | }, 198 | "overrides": [] 199 | }, 200 | "gridPos": { 201 | "h": 7, 202 | "w": 9, 203 | "x": 14, 204 | "y": 0 205 | }, 206 | "id": 2, 207 | "options": { 208 | "legend": { 209 | "calcs": [], 210 | "displayMode": "list", 211 | "placement": "bottom" 212 | }, 213 | "tooltip": { 214 | "mode": "single", 215 | "sort": "none" 216 | } 217 | }, 218 | "targets": [ 219 | { 220 | "alias": "", 221 | "datasource": { 222 | "type": "cloudwatch", 223 | "uid": "$datasource" 224 | }, 225 | "dimensions": {}, 226 | "expression": "SELECT AVG(ResourceCount) FROM SCHEMA(\"AWS/Usage\", Class,Resource,ResourceId,Service,Type) WHERE Type = 'Resource' AND ResourceId = '$WorkspaceID' AND Resource = 'IngestionRate' AND Service = 'Prometheus' AND Class = 'None'", 227 | "id": "", 228 | "matchExact": true, 229 | "metricEditorMode": 1, 230 | "metricName": "", 231 | "metricQueryType": 0, 232 | "namespace": "", 233 | "period": "", 234 | "queryMode": "Metrics", 235 | "refId": "A", 236 | "region": "default", 237 | "sqlExpression": "", 238 | "statistic": "Average" 239 | } 240 | ], 241 | "title": "Workspace Ingestion Rate", 242 | "type": "timeseries" 243 | }, 244 | { 245 | "gridPos": { 246 | "h": 8, 247 | "w": 5, 248 | "x": 0, 249 | "y": 7 250 | }, 251 | "id": 22, 252 | "options": { 253 | "content": "# Billing\n\nContains information relating to the cost of AMP\n\n", 254 | "mode": "markdown" 255 | }, 256 | "pluginVersion": "8.4.7", 257 | "title": "Billing", 258 | "type": "text" 259 | }, 260 | { 261 | "datasource": { 262 | "type": "cloudwatch", 263 | "uid": "$datasource" 264 | }, 265 | "fieldConfig": { 266 | "defaults": { 267 | "color": { 268 | "mode": "palette-classic" 269 | }, 270 | "custom": { 271 | "axisLabel": "", 272 | "axisPlacement": "auto", 273 | "barAlignment": 0, 274 | "drawStyle": "line", 275 | "fillOpacity": 0, 276 | "gradientMode": "none", 277 | "hideFrom": { 278 | "legend": false, 279 | "tooltip": false, 280 | "viz": false 281 | }, 282 | "lineInterpolation": "linear", 283 | "lineWidth": 1, 284 | "pointSize": 5, 285 | "scaleDistribution": { 286 | "type": "linear" 287 | }, 288 | "showPoints": "auto", 289 | "spanNulls": false, 290 | "stacking": { 291 | "group": "A", 292 | "mode": "none" 293 | }, 294 | "thresholdsStyle": { 295 | "mode": "off" 296 | } 297 | }, 298 | "mappings": [], 299 | "thresholds": { 300 | "mode": "absolute", 301 | "steps": [ 302 | { 303 | "color": "green", 304 | "value": null 305 | }, 306 | { 307 | "color": "red", 308 | "value": 80 309 | } 310 | ] 311 | } 312 | }, 313 | "overrides": [] 314 | }, 315 | "gridPos": { 316 | "h": 8, 317 | "w": 18, 318 | "x": 5, 319 | "y": 7 320 | }, 321 | "id": 24, 322 | "options": { 323 | "legend": { 324 | "calcs": [], 325 | "displayMode": "list", 326 | "placement": "bottom" 327 | }, 328 | "tooltip": { 329 | "mode": "single", 330 | "sort": "none" 331 | } 332 | }, 333 | "targets": [ 334 | { 335 | "alias": "", 336 | "datasource": { 337 | "type": "cloudwatch", 338 | "uid": "$datasource" 339 | }, 340 | "dimensions": {}, 341 | "expression": "SELECT SUM(EstimatedCharges) FROM SCHEMA(\"AWS/Billing\", Currency,ServiceName) WHERE ServiceName = 'AmazonPrometheus'", 342 | "id": "", 343 | "matchExact": true, 344 | "metricEditorMode": 1, 345 | "metricName": "", 346 | "metricQueryType": 0, 347 | "namespace": "", 348 | "period": "", 349 | "queryMode": "Metrics", 350 | "refId": "A", 351 | "region": "default", 352 | "sqlExpression": "", 353 | "statistic": "Average" 354 | } 355 | ], 356 | "title": "Sum of Estimated AMP Charges (total)", 357 | "type": "timeseries" 358 | }, 359 | { 360 | "gridPos": { 361 | "h": 9, 362 | "w": 5, 363 | "x": 0, 364 | "y": 15 365 | }, 366 | "id": 14, 367 | "options": { 368 | "content": "# Alert Usage Metrics\n\nMetrics associated with Alertmanager Alert Usage", 369 | "mode": "markdown" 370 | }, 371 | "pluginVersion": "8.4.7", 372 | "title": "Alerts", 373 | "type": "text" 374 | }, 375 | { 376 | "datasource": { 377 | "type": "cloudwatch", 378 | "uid": "$datasource" 379 | }, 380 | "fieldConfig": { 381 | "defaults": { 382 | "color": { 383 | "mode": "thresholds" 384 | }, 385 | "mappings": [], 386 | "thresholds": { 387 | "mode": "absolute", 388 | "steps": [ 389 | { 390 | "color": "green", 391 | "value": null 392 | }, 393 | { 394 | "color": "red", 395 | "value": 1 396 | } 397 | ] 398 | } 399 | }, 400 | "overrides": [] 401 | }, 402 | "gridPos": { 403 | "h": 9, 404 | "w": 4, 405 | "x": 5, 406 | "y": 15 407 | }, 408 | "id": 4, 409 | "options": { 410 | "colorMode": "value", 411 | "graphMode": "area", 412 | "justifyMode": "auto", 413 | "orientation": "auto", 414 | "reduceOptions": { 415 | "calcs": [ 416 | "lastNotNull" 417 | ], 418 | "fields": "", 419 | "values": false 420 | }, 421 | "textMode": "auto" 422 | }, 423 | "pluginVersion": "8.4.7", 424 | "targets": [ 425 | { 426 | "alias": "", 427 | "datasource": { 428 | "type": "cloudwatch", 429 | "uid": "$datasource" 430 | }, 431 | "dimensions": {}, 432 | "expression": "SELECT AVG(ResourceCount) FROM SCHEMA(\"AWS/Usage\", Class,Resource,ResourceId,Service,Type) WHERE Type = 'Resource' AND ResourceId = '$WorkspaceID' AND Resource = 'ActiveAlerts' AND Service = 'Prometheus' AND Class = 'None'", 433 | "id": "", 434 | "matchExact": true, 435 | "metricEditorMode": 1, 436 | "metricName": "", 437 | "metricQueryType": 0, 438 | "namespace": "", 439 | "period": "", 440 | "queryMode": "Metrics", 441 | "refId": "A", 442 | "region": "default", 443 | "sqlExpression": "", 444 | "statistic": "Average" 445 | } 446 | ], 447 | "title": "Active Alerts", 448 | "type": "stat" 449 | }, 450 | { 451 | "datasource": { 452 | "type": "cloudwatch", 453 | "uid": "$datasource" 454 | }, 455 | "fieldConfig": { 456 | "defaults": { 457 | "color": { 458 | "mode": "thresholds" 459 | }, 460 | "mappings": [], 461 | "thresholds": { 462 | "mode": "absolute", 463 | "steps": [ 464 | { 465 | "color": "green", 466 | "value": null 467 | }, 468 | { 469 | "color": "red", 470 | "value": 1 471 | } 472 | ] 473 | } 474 | }, 475 | "overrides": [] 476 | }, 477 | "gridPos": { 478 | "h": 9, 479 | "w": 5, 480 | "x": 9, 481 | "y": 15 482 | }, 483 | "id": 12, 484 | "options": { 485 | "colorMode": "value", 486 | "graphMode": "area", 487 | "justifyMode": "auto", 488 | "orientation": "auto", 489 | "reduceOptions": { 490 | "calcs": [ 491 | "lastNotNull" 492 | ], 493 | "fields": "", 494 | "values": false 495 | }, 496 | "textMode": "auto" 497 | }, 498 | "pluginVersion": "8.4.7", 499 | "targets": [ 500 | { 501 | "alias": "", 502 | "datasource": { 503 | "type": "cloudwatch", 504 | "uid": "$datasource" 505 | }, 506 | "dimensions": {}, 507 | "expression": "SELECT AVG(AlertManagerNotificationsFailed) FROM SCHEMA(\"AWS/Prometheus\", Workspace) WHERE Workspace = '$WorkspaceID'", 508 | "id": "", 509 | "matchExact": true, 510 | "metricEditorMode": 1, 511 | "metricName": "", 512 | "metricQueryType": 0, 513 | "namespace": "", 514 | "period": "", 515 | "queryMode": "Metrics", 516 | "refId": "A", 517 | "region": "default", 518 | "sqlExpression": "", 519 | "statistic": "Average" 520 | } 521 | ], 522 | "title": "Alert Manager Notifications Failed", 523 | "type": "stat" 524 | }, 525 | { 526 | "datasource": { 527 | "type": "cloudwatch", 528 | "uid": "$datasource" 529 | }, 530 | "fieldConfig": { 531 | "defaults": { 532 | "color": { 533 | "mode": "thresholds" 534 | }, 535 | "mappings": [], 536 | "thresholds": { 537 | "mode": "absolute", 538 | "steps": [ 539 | { 540 | "color": "green", 541 | "value": null 542 | } 543 | ] 544 | } 545 | }, 546 | "overrides": [] 547 | }, 548 | "gridPos": { 549 | "h": 9, 550 | "w": 4, 551 | "x": 14, 552 | "y": 15 553 | }, 554 | "id": 10, 555 | "options": { 556 | "colorMode": "value", 557 | "graphMode": "area", 558 | "justifyMode": "auto", 559 | "orientation": "auto", 560 | "reduceOptions": { 561 | "calcs": [ 562 | "lastNotNull" 563 | ], 564 | "fields": "", 565 | "values": false 566 | }, 567 | "textMode": "auto" 568 | }, 569 | "pluginVersion": "8.4.7", 570 | "targets": [ 571 | { 572 | "alias": "", 573 | "datasource": { 574 | "type": "cloudwatch", 575 | "uid": "$datasource" 576 | }, 577 | "dimensions": {}, 578 | "expression": "SELECT AVG(AlertManagerAlertsReceived) FROM SCHEMA(\"AWS/Prometheus\", Workspace) WHERE Workspace = '$WorkspaceID'", 579 | "id": "", 580 | "matchExact": true, 581 | "metricEditorMode": 1, 582 | "metricName": "", 583 | "metricQueryType": 0, 584 | "namespace": "", 585 | "period": "", 586 | "queryMode": "Metrics", 587 | "refId": "A", 588 | "region": "default", 589 | "sqlExpression": "", 590 | "statistic": "Average" 591 | } 592 | ], 593 | "title": "Alert Manager Alerts Received", 594 | "type": "stat" 595 | }, 596 | { 597 | "datasource": { 598 | "type": "cloudwatch", 599 | "uid": "$datasource" 600 | }, 601 | "fieldConfig": { 602 | "defaults": { 603 | "color": { 604 | "mode": "thresholds" 605 | }, 606 | "mappings": [], 607 | "thresholds": { 608 | "mode": "absolute", 609 | "steps": [ 610 | { 611 | "color": "green", 612 | "value": null 613 | }, 614 | { 615 | "color": "red", 616 | "value": 80 617 | } 618 | ] 619 | } 620 | }, 621 | "overrides": [] 622 | }, 623 | "gridPos": { 624 | "h": 9, 625 | "w": 5, 626 | "x": 18, 627 | "y": 15 628 | }, 629 | "id": 8, 630 | "options": { 631 | "colorMode": "value", 632 | "graphMode": "area", 633 | "justifyMode": "auto", 634 | "orientation": "auto", 635 | "reduceOptions": { 636 | "calcs": [ 637 | "lastNotNull" 638 | ], 639 | "fields": "", 640 | "values": false 641 | }, 642 | "textMode": "auto" 643 | }, 644 | "pluginVersion": "8.4.7", 645 | "targets": [ 646 | { 647 | "alias": "", 648 | "datasource": { 649 | "type": "cloudwatch", 650 | "uid": "$datasource" 651 | }, 652 | "dimensions": {}, 653 | "expression": "SELECT AVG(ResourceCount) FROM SCHEMA(\"AWS/Usage\", Class,Resource,ResourceId,Service,Type) WHERE Type = 'Resource' AND ResourceId = '$WorkspaceID' AND Resource = 'SizeOfAlerts' AND Service = 'Prometheus' AND Class = 'None'", 654 | "id": "", 655 | "matchExact": true, 656 | "metricEditorMode": 1, 657 | "metricName": "", 658 | "metricQueryType": 0, 659 | "namespace": "", 660 | "period": "", 661 | "queryMode": "Metrics", 662 | "refId": "A", 663 | "region": "default", 664 | "sqlExpression": "", 665 | "statistic": "Average" 666 | } 667 | ], 668 | "title": "Size of Alerts", 669 | "type": "stat" 670 | }, 671 | { 672 | "gridPos": { 673 | "h": 7, 674 | "w": 5, 675 | "x": 0, 676 | "y": 24 677 | }, 678 | "id": 20, 679 | "options": { 680 | "content": "# AMP Vended Logs\n\nLast 25 log events from AMP Vended Logs for alert and rule evaluation", 681 | "mode": "markdown" 682 | }, 683 | "pluginVersion": "8.4.7", 684 | "title": "AMP Logs", 685 | "type": "text" 686 | }, 687 | { 688 | "datasource": { 689 | "type": "cloudwatch", 690 | "uid": "$datasource" 691 | }, 692 | "gridPos": { 693 | "h": 7, 694 | "w": 18, 695 | "x": 5, 696 | "y": 24 697 | }, 698 | "id": 18, 699 | "options": { 700 | "dedupStrategy": "none", 701 | "enableLogDetails": true, 702 | "prettifyLogMessage": false, 703 | "showCommonLabels": false, 704 | "showLabels": false, 705 | "showTime": false, 706 | "sortOrder": "Descending", 707 | "wrapLogMessage": false 708 | }, 709 | "targets": [ 710 | { 711 | "datasource": { 712 | "type": "cloudwatch", 713 | "uid": "$datasource" 714 | }, 715 | "expression": "fields @timestamp, @message\n| sort @timestamp desc\n| limit 25", 716 | "id": "", 717 | "logGroupNames": [ 718 | "/aws/vendedlogs/amp" 719 | ], 720 | "namespace": "", 721 | "queryMode": "Logs", 722 | "refId": "A", 723 | "region": "default", 724 | "statsGroups": [] 725 | } 726 | ], 727 | "timeFrom": "6h", 728 | "timeShift": "6h", 729 | "title": "AMP Vended Logs", 730 | "type": "logs" 731 | } 732 | ], 733 | "refresh": "", 734 | "schemaVersion": 35, 735 | "style": "dark", 736 | "tags": [], 737 | "templating": { 738 | "list": [ 739 | { 740 | "current": { 741 | "selected": true, 742 | "text": [ 743 | "ws-e8b003eb-0528-4208-b31c-edf4598d5f66" 744 | ], 745 | "value": [ 746 | "ws-e8b003eb-0528-4208-b31c-edf4598d5f66" 747 | ] 748 | }, 749 | "datasource": { 750 | "type": "cloudwatch", 751 | "uid": "$datasource" 752 | }, 753 | "definition": "dimension_values(default,AWS/Prometheus,RuleEvaluations,Workspace)", 754 | "hide": 0, 755 | "includeAll": false, 756 | "multi": true, 757 | "name": "WorkspaceID", 758 | "options": [], 759 | "query": "dimension_values(default,AWS/Prometheus,RuleEvaluations,Workspace)", 760 | "refresh": 1, 761 | "regex": "", 762 | "skipUrlSync": false, 763 | "sort": 0, 764 | "type": "query" 765 | }, 766 | { 767 | "current": { 768 | "selected": false, 769 | "text": "Amazon CloudWatch us-west-2", 770 | "value": "Amazon CloudWatch us-west-2" 771 | }, 772 | "hide": 0, 773 | "includeAll": false, 774 | "multi": false, 775 | "name": "datasource", 776 | "options": [], 777 | "query": "cloudwatch", 778 | "refresh": 1, 779 | "regex": "", 780 | "skipUrlSync": false, 781 | "type": "datasource" 782 | } 783 | ] 784 | }, 785 | "time": { 786 | "from": "now-6h", 787 | "to": "now" 788 | }, 789 | "timepicker": {}, 790 | "timezone": "", 791 | "title": "AWS Observability Accelerator - AMP Dashboard", 792 | "uid": "", 793 | "version": 1, 794 | "weekStart": "" 795 | } 796 | -------------------------------------------------------------------------------- /artifacts/grafana-dashboards/eks/apiserver/apiserver-basic.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": { 7 | "type": "grafana", 8 | "uid": "-- Grafana --" 9 | }, 10 | "enable": true, 11 | "hide": true, 12 | "iconColor": "rgba(0, 211, 255, 1)", 13 | "name": "Annotations & Alerts", 14 | "target": { 15 | "limit": 100, 16 | "matchAny": false, 17 | "tags": [], 18 | "type": "dashboard" 19 | }, 20 | "type": "dashboard" 21 | } 22 | ] 23 | }, 24 | "description": "Latency and cache hit graphs for Kubernetes apiserver. Useful for observing performance of Amazon EKS clusters.", 25 | "editable": true, 26 | "fiscalYearStartMonth": 0, 27 | "graphTooltip": 0, 28 | "id": null, 29 | "links": [], 30 | "liveNow": false, 31 | "panels": [ 32 | { 33 | "collapsed": false, 34 | "datasource": { 35 | "type": "prometheus", 36 | "uid": "$datasource" 37 | }, 38 | "fieldConfig": { 39 | "defaults": { 40 | "color": { 41 | "mode": "palette-classic" 42 | }, 43 | "custom": { 44 | "axisCenteredZero": false, 45 | "axisColorMode": "text", 46 | "axisLabel": "req/s", 47 | "axisPlacement": "auto", 48 | "barAlignment": 0, 49 | "drawStyle": "line", 50 | "fillOpacity": 10, 51 | "gradientMode": "none", 52 | "hideFrom": { 53 | "legend": false, 54 | "tooltip": false, 55 | "viz": false 56 | }, 57 | "lineInterpolation": "linear", 58 | "lineWidth": 1, 59 | "pointSize": 5, 60 | "scaleDistribution": { 61 | "type": "linear" 62 | }, 63 | "showPoints": "never", 64 | "spanNulls": false, 65 | "stacking": { 66 | "group": "A", 67 | "mode": "normal" 68 | }, 69 | "thresholdsStyle": { 70 | "mode": "off" 71 | } 72 | }, 73 | "decimals": 2, 74 | "links": [], 75 | "mappings": [], 76 | "min": 0, 77 | "thresholds": { 78 | "mode": "absolute", 79 | "steps": [ 80 | { 81 | "color": "green" 82 | }, 83 | { 84 | "color": "red", 85 | "value": 80 86 | } 87 | ] 88 | }, 89 | "unit": "short" 90 | }, 91 | "overrides": [] 92 | }, 93 | "gridPos": { 94 | "h": 8, 95 | "w": 12, 96 | "x": 0, 97 | "y": 0 98 | }, 99 | "id": 8, 100 | "options": { 101 | "legend": { 102 | "calcs": [ 103 | "lastNotNull", 104 | "max" 105 | ], 106 | "displayMode": "table", 107 | "placement": "right", 108 | "showLegend": true, 109 | "sortBy": "Max", 110 | "sortDesc": true 111 | }, 112 | "tooltip": { 113 | "mode": "multi", 114 | "sort": "none" 115 | } 116 | }, 117 | "pluginVersion": "9.4.7", 118 | "targets": [ 119 | { 120 | "datasource": { 121 | "uid": "prometheus", 122 | "type": "prometheus" 123 | }, 124 | "editorMode": "code", 125 | "exemplar": false, 126 | "expr": "sum(rate(apiserver_request_total{cluster=\"$cluster\",job=\"$job\", instance=~\"$instance\"}[5m])) by (instance,verb)", 127 | "legendFormat": "{{instance}}: {{verb}} ", 128 | "range": true, 129 | "refId": "A" 130 | } 131 | ], 132 | "title": "apiserver request rate", 133 | "type": "timeseries" 134 | }, 135 | { 136 | "datasource": { 137 | "type": "prometheus", 138 | "uid": "$datasource" 139 | }, 140 | "fieldConfig": { 141 | "defaults": { 142 | "color": { 143 | "mode": "palette-classic" 144 | }, 145 | "custom": { 146 | "axisCenteredZero": false, 147 | "axisColorMode": "text", 148 | "axisLabel": "service time", 149 | "axisPlacement": "auto", 150 | "barAlignment": 0, 151 | "drawStyle": "line", 152 | "fillOpacity": 10, 153 | "gradientMode": "none", 154 | "hideFrom": { 155 | "legend": false, 156 | "tooltip": false, 157 | "viz": false 158 | }, 159 | "lineInterpolation": "linear", 160 | "lineWidth": 1, 161 | "pointSize": 5, 162 | "scaleDistribution": { 163 | "type": "linear" 164 | }, 165 | "showPoints": "never", 166 | "spanNulls": false, 167 | "stacking": { 168 | "group": "A", 169 | "mode": "none" 170 | }, 171 | "thresholdsStyle": { 172 | "mode": "off" 173 | } 174 | }, 175 | "links": [], 176 | "mappings": [], 177 | "thresholds": { 178 | "mode": "absolute", 179 | "steps": [ 180 | { 181 | "color": "green", 182 | "value": null 183 | }, 184 | { 185 | "color": "red", 186 | "value": 80 187 | } 188 | ] 189 | }, 190 | "unit": "s" 191 | }, 192 | "overrides": [] 193 | }, 194 | "gridPos": { 195 | "h": 8, 196 | "w": 12, 197 | "x": 12, 198 | "y": 0 199 | }, 200 | "id": 10, 201 | "options": { 202 | "legend": { 203 | "calcs": [], 204 | "displayMode": "table", 205 | "placement": "right", 206 | "showLegend": true 207 | }, 208 | "tooltip": { 209 | "mode": "multi", 210 | "sort": "none" 211 | } 212 | }, 213 | "pluginVersion": "9.4.7", 214 | "targets": [ 215 | { 216 | "datasource": { 217 | "uid": "$datasource" 218 | }, 219 | "editorMode": "code", 220 | "expr": "histogram_quantile(0.95, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\",job=\"$job\", instance=~\"$instance\"}[5m])) by (instance,le))", 221 | "legendFormat": "{{instance}}: p95", 222 | "range": true, 223 | "refId": "A" 224 | }, 225 | { 226 | "datasource": { 227 | "uid": "$datasource" 228 | }, 229 | "editorMode": "code", 230 | "expr": "histogram_quantile(0.90, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\",job=\"$job\", instance=~\"$instance\"}[5m])) by (instance,le))", 231 | "legendFormat": "{{instance}}: p90", 232 | "range": true, 233 | "refId": "B" 234 | }, 235 | { 236 | "datasource": { 237 | "uid": "$datasource" 238 | }, 239 | "editorMode": "code", 240 | "expr": "histogram_quantile(0.5, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\",job=\"$job\", instance=~\"$instance\"}[5m])) by (instance,le))", 241 | "legendFormat": "{{instance}}: p50", 242 | "range": true, 243 | "refId": "C" 244 | } 245 | ], 246 | "title": "workqueue service time", 247 | "type": "timeseries" 248 | }, 249 | { 250 | "datasource": { 251 | "type": "prometheus", 252 | "uid": "$datasource" 253 | }, 254 | "description": "95th percentile request latency for Kubernetes apiserver component", 255 | "fieldConfig": { 256 | "defaults": { 257 | "color": { 258 | "mode": "palette-classic" 259 | }, 260 | "custom": { 261 | "axisCenteredZero": false, 262 | "axisColorMode": "text", 263 | "axisLabel": "latency", 264 | "axisPlacement": "auto", 265 | "barAlignment": 0, 266 | "drawStyle": "line", 267 | "fillOpacity": 10, 268 | "gradientMode": "none", 269 | "hideFrom": { 270 | "legend": false, 271 | "tooltip": false, 272 | "viz": false 273 | }, 274 | "lineInterpolation": "linear", 275 | "lineWidth": 1, 276 | "pointSize": 5, 277 | "scaleDistribution": { 278 | "type": "linear" 279 | }, 280 | "showPoints": "never", 281 | "spanNulls": false, 282 | "stacking": { 283 | "group": "A", 284 | "mode": "none" 285 | }, 286 | "thresholdsStyle": { 287 | "mode": "off" 288 | } 289 | }, 290 | "links": [], 291 | "mappings": [], 292 | "thresholds": { 293 | "mode": "absolute", 294 | "steps": [ 295 | { 296 | "color": "green", 297 | "value": null 298 | }, 299 | { 300 | "color": "red", 301 | "value": 80 302 | } 303 | ] 304 | }, 305 | "unit": "s" 306 | }, 307 | "overrides": [] 308 | }, 309 | "gridPos": { 310 | "h": 9, 311 | "w": 12, 312 | "x": 0, 313 | "y": 8 314 | }, 315 | "id": 2, 316 | "options": { 317 | "legend": { 318 | "calcs": [], 319 | "displayMode": "table", 320 | "placement": "right", 321 | "showLegend": true 322 | }, 323 | "tooltip": { 324 | "mode": "multi", 325 | "sort": "none" 326 | } 327 | }, 328 | "pluginVersion": "9.4.7", 329 | "targets": [ 330 | { 331 | "datasource": { 332 | "uid": "$datasource" 333 | }, 334 | "editorMode": "code", 335 | "expr": "histogram_quantile(0.95, sum(rate(apiserver_request_duration_seconds_bucket{verb!~\"CONNECT|WATCH\",cluster=\"$cluster\",job=\"$job\", instance=~\"$instance\"}[5m])) by (instance,le))", 336 | "legendFormat": "{{instance}}: p95", 337 | "range": true, 338 | "refId": "A" 339 | }, 340 | { 341 | "datasource": { 342 | "uid": "$datasource" 343 | }, 344 | "editorMode": "code", 345 | "expr": "histogram_quantile(0.90, sum(rate(apiserver_request_duration_seconds_bucket{verb!~\"CONNECT|WATCH\",cluster=\"$cluster\",job=\"$job\", instance=~\"$instance\"}[5m])) by (instance,le))", 346 | "legendFormat": "{{instance}}: p90", 347 | "range": true, 348 | "refId": "B" 349 | }, 350 | { 351 | "datasource": { 352 | "uid": "$datasource" 353 | }, 354 | "editorMode": "code", 355 | "expr": "histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{verb!~\"CONNECT|WATCH\",cluster=\"$cluster\",job=\"$job\", instance=~\"$instance\"}[5m])) by (instance,le))", 356 | "legendFormat": "{{instance}}: p50 ", 357 | "range": true, 358 | "refId": "C" 359 | } 360 | ], 361 | "title": "apiserver request latency", 362 | "type": "timeseries" 363 | }, 364 | { 365 | "datasource": { 366 | "type": "prometheus", 367 | "uid": "$datasource" 368 | }, 369 | "fieldConfig": { 370 | "defaults": { 371 | "color": { 372 | "mode": "palette-classic" 373 | }, 374 | "custom": { 375 | "axisCenteredZero": false, 376 | "axisColorMode": "text", 377 | "axisLabel": "processing time", 378 | "axisPlacement": "auto", 379 | "barAlignment": 0, 380 | "drawStyle": "line", 381 | "fillOpacity": 10, 382 | "gradientMode": "none", 383 | "hideFrom": { 384 | "legend": false, 385 | "tooltip": false, 386 | "viz": false 387 | }, 388 | "lineInterpolation": "linear", 389 | "lineWidth": 1, 390 | "pointSize": 5, 391 | "scaleDistribution": { 392 | "type": "linear" 393 | }, 394 | "showPoints": "never", 395 | "spanNulls": false, 396 | "stacking": { 397 | "group": "A", 398 | "mode": "none" 399 | }, 400 | "thresholdsStyle": { 401 | "mode": "off" 402 | } 403 | }, 404 | "links": [], 405 | "mappings": [], 406 | "thresholds": { 407 | "mode": "absolute", 408 | "steps": [ 409 | { 410 | "color": "green", 411 | "value": null 412 | }, 413 | { 414 | "color": "red", 415 | "value": 80 416 | } 417 | ] 418 | }, 419 | "unit": "s" 420 | }, 421 | "overrides": [] 422 | }, 423 | "gridPos": { 424 | "h": 8, 425 | "w": 12, 426 | "x": 12, 427 | "y": 8 428 | }, 429 | "id": 12, 430 | "options": { 431 | "legend": { 432 | "calcs": [], 433 | "displayMode": "table", 434 | "placement": "right", 435 | "showLegend": true 436 | }, 437 | "tooltip": { 438 | "mode": "multi", 439 | "sort": "none" 440 | } 441 | }, 442 | "pluginVersion": "9.4.7", 443 | "targets": [ 444 | { 445 | "datasource": { 446 | "uid": "$datasource" 447 | }, 448 | "editorMode": "code", 449 | "expr": "histogram_quantile(0.95, sum(rate(workqueue_work_duration_seconds_bucket{cluster=\"$cluster\",job=\"$job\", instance=~\"$instance\"}[5m])) by (instance,le))", 450 | "legendFormat": "{{instance}}: p95", 451 | "range": true, 452 | "refId": "A" 453 | }, 454 | { 455 | "datasource": { 456 | "uid": "$datasource" 457 | }, 458 | "editorMode": "code", 459 | "expr": "histogram_quantile(0.9, sum(rate(workqueue_work_duration_seconds_bucket{cluster=\"$cluster\",job=\"$job\", instance=~\"$instance\"}[5m])) by (instance,le))", 460 | "legendFormat": "{{instance}}: p90", 461 | "range": true, 462 | "refId": "B" 463 | }, 464 | { 465 | "datasource": { 466 | "uid": "$datasource" 467 | }, 468 | "editorMode": "code", 469 | "expr": "histogram_quantile(0.5, sum(rate(workqueue_work_duration_seconds_bucket{cluster=\"$cluster\",job=\"$job\", instance=~\"$instance\"}[5m])) by (instance,le))", 470 | "legendFormat": "{{instance}}: p50", 471 | "range": true, 472 | "refId": "C" 473 | } 474 | ], 475 | "title": "workqueue processing time", 476 | "type": "timeseries" 477 | }, 478 | { 479 | "datasource": { 480 | "type": "prometheus", 481 | "uid": "$datasource" 482 | }, 483 | "description": "etcd request latency (p95)", 484 | "fieldConfig": { 485 | "defaults": { 486 | "color": { 487 | "mode": "palette-classic" 488 | }, 489 | "custom": { 490 | "axisCenteredZero": false, 491 | "axisColorMode": "text", 492 | "axisLabel": "latency", 493 | "axisPlacement": "auto", 494 | "barAlignment": 0, 495 | "drawStyle": "line", 496 | "fillOpacity": 10, 497 | "gradientMode": "none", 498 | "hideFrom": { 499 | "legend": false, 500 | "tooltip": false, 501 | "viz": false 502 | }, 503 | "lineInterpolation": "linear", 504 | "lineWidth": 1, 505 | "pointSize": 5, 506 | "scaleDistribution": { 507 | "type": "linear" 508 | }, 509 | "showPoints": "never", 510 | "spanNulls": false, 511 | "stacking": { 512 | "group": "A", 513 | "mode": "none" 514 | }, 515 | "thresholdsStyle": { 516 | "mode": "off" 517 | } 518 | }, 519 | "links": [], 520 | "mappings": [], 521 | "thresholds": { 522 | "mode": "absolute", 523 | "steps": [ 524 | { 525 | "color": "green", 526 | "value": null 527 | }, 528 | { 529 | "color": "red", 530 | "value": 80 531 | } 532 | ] 533 | }, 534 | "unit": "s" 535 | }, 536 | "overrides": [] 537 | }, 538 | "gridPos": { 539 | "h": 8, 540 | "w": 12, 541 | "x": 0, 542 | "y": 17 543 | }, 544 | "id": 4, 545 | "options": { 546 | "legend": { 547 | "calcs": [], 548 | "displayMode": "table", 549 | "placement": "right", 550 | "showLegend": true 551 | }, 552 | "tooltip": { 553 | "mode": "multi", 554 | "sort": "none" 555 | } 556 | }, 557 | "pluginVersion": "9.4.7", 558 | "targets": [ 559 | { 560 | "datasource": { 561 | "uid": "$datasource" 562 | }, 563 | "editorMode": "code", 564 | "expr": "histogram_quantile(0.95, sum(rate(etcd_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"$job\", instance=~\"$instance\"}[5m])) by (instance,le))", 565 | "legendFormat": "{{instance}}: p95", 566 | "range": true, 567 | "refId": "A" 568 | }, 569 | { 570 | "datasource": { 571 | "uid": "$datasource" 572 | }, 573 | "editorMode": "code", 574 | "expr": "histogram_quantile(0.90, sum(rate(etcd_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"$job\", instance=~\"$instance\"}[5m])) by (instance,le))", 575 | "legendFormat": "{{instance}}: p90", 576 | "range": true, 577 | "refId": "B" 578 | }, 579 | { 580 | "datasource": { 581 | "uid": "$datasource" 582 | }, 583 | "editorMode": "code", 584 | "expr": "histogram_quantile(0.5, sum(rate(etcd_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"$job\", instance=~\"$instance\"}[5m])) by (instance,le))", 585 | "legendFormat": "{{instance}}: p50", 586 | "range": true, 587 | "refId": "C" 588 | } 589 | ], 590 | "title": "etcd request latency", 591 | "type": "timeseries" 592 | } 593 | ], 594 | "refresh": "10s", 595 | "revision": 1, 596 | "schemaVersion": 38, 597 | "style": "dark", 598 | "tags": [ 599 | "Infrastructure" 600 | ], 601 | "templating": { 602 | "list": [ 603 | { 604 | "current": { 605 | "selected": false, 606 | "text": "default", 607 | "value": "default" 608 | }, 609 | "hide": 0, 610 | "includeAll": false, 611 | "label": "Data source", 612 | "multi": false, 613 | "name": "datasource", 614 | "options": [], 615 | "query": "prometheus", 616 | "queryValue": "", 617 | "refresh": 1, 618 | "regex": "", 619 | "skipUrlSync": false, 620 | "type": "datasource" 621 | }, 622 | { 623 | "definition": "label_values(up{job=\"$job\"}, cluster)", 624 | "hide": 0, 625 | "includeAll": false, 626 | "label": "cluster", 627 | "multi": false, 628 | "name": "cluster", 629 | "options": [], 630 | "query": { 631 | "query": "label_values(up{job=\"$job\"}, cluster)", 632 | "refId": "StandardVariableQuery" 633 | }, 634 | "refresh": 2, 635 | "regex": "", 636 | "skipUrlSync": false, 637 | "sort": 1, 638 | "type": "query" 639 | }, 640 | { 641 | "definition": "label_values(apiserver_request_total, job)", 642 | "hide": 2, 643 | "includeAll": false, 644 | "label": "job", 645 | "multi": false, 646 | "name": "job", 647 | "options": [], 648 | "query": { 649 | "query": "label_values(apiserver_request_total, job)", 650 | "refId": "StandardVariableQuery" 651 | }, 652 | "refresh": 1, 653 | "regex": "", 654 | "skipUrlSync": false, 655 | "sort": 0, 656 | "type": "query" 657 | }, 658 | { 659 | "allValue": ".*", 660 | "current": { 661 | "selected": true, 662 | "text": [ 663 | "All" 664 | ], 665 | "value": [ 666 | "$__all" 667 | ] 668 | }, 669 | "definition": "label_values(up{job=\"$job\", cluster=\"$cluster\"}, instance)", 670 | "description": "EKS api server instances", 671 | "hide": 0, 672 | "includeAll": true, 673 | "label": "instance", 674 | "multi": true, 675 | "name": "instance", 676 | "options": [], 677 | "query": { 678 | "query": "label_values(up{job=\"$job\", cluster=\"$cluster\"}, instance)", 679 | "refId": "StandardVariableQuery" 680 | }, 681 | "refresh": 1, 682 | "regex": "", 683 | "skipUrlSync": false, 684 | "sort": 0, 685 | "type": "query" 686 | } 687 | ] 688 | }, 689 | "time": { 690 | "from": "now-1h", 691 | "to": "now" 692 | }, 693 | "timepicker": { 694 | "refresh_intervals": [ 695 | "5s", 696 | "10s", 697 | "30s", 698 | "1m", 699 | "5m", 700 | "15m", 701 | "30m", 702 | "1h", 703 | "2h", 704 | "1d" 705 | ], 706 | "time_options": [ 707 | "5m", 708 | "15m", 709 | "1h", 710 | "6h", 711 | "12h", 712 | "24h", 713 | "2d", 714 | "7d", 715 | "30d" 716 | ] 717 | }, 718 | "timezone": "utc", 719 | "title": "Kubernetes / Kube-apiserver (basic)", 720 | "uid": "R6abPf9Zz", 721 | "version": 5, 722 | "weekStart": "" 723 | } 724 | -------------------------------------------------------------------------------- /artifacts/grafana-dashboards/eks/infrastructure/kcm.json: -------------------------------------------------------------------------------- 1 | { 2 | "__inputs": [], 3 | "__elements": {}, 4 | "__requires": [ 5 | { 6 | "type": "panel", 7 | "id": "barchart", 8 | "name": "Bar chart", 9 | "version": "" 10 | }, 11 | { 12 | "type": "grafana", 13 | "id": "grafana", 14 | "name": "Grafana", 15 | "version": "10.4.1" 16 | }, 17 | { 18 | "type": "datasource", 19 | "id": "prometheus", 20 | "name": "Prometheus", 21 | "version": "1.0.0" 22 | }, 23 | { 24 | "type": "panel", 25 | "id": "timeseries", 26 | "name": "Time series", 27 | "version": "" 28 | } 29 | ], 30 | "annotations": { 31 | "list": [ 32 | { 33 | "builtIn": 1, 34 | "datasource": { 35 | "type": "grafana", 36 | "uid": "-- Grafana --" 37 | }, 38 | "enable": true, 39 | "hide": true, 40 | "iconColor": "rgba(0, 211, 255, 1)", 41 | "name": "Annotations & Alerts", 42 | "type": "dashboard" 43 | } 44 | ] 45 | }, 46 | "editable": true, 47 | "fiscalYearStartMonth": 0, 48 | "graphTooltip": 0, 49 | "id": null, 50 | "links": [ 51 | { 52 | "asDropdown": true, 53 | "includeVars": true, 54 | "keepTime": true, 55 | "tags": [ 56 | "kubernetes-mixin" 57 | ], 58 | "targetBlank": false, 59 | "title": "Kubernetes", 60 | "type": "dashboards" 61 | } 62 | ], 63 | "panels": [ 64 | { 65 | "collapsed": false, 66 | "gridPos": { 67 | "h": 1, 68 | "w": 24, 69 | "x": 0, 70 | "y": 0 71 | }, 72 | "id": 14, 73 | "panels": [], 74 | "title": "Top 5 View", 75 | "type": "row" 76 | }, 77 | { 78 | "datasource": { 79 | "type": "prometheus", 80 | "uid": "${datasource}" 81 | }, 82 | "fieldConfig": { 83 | "defaults": { 84 | "color": { 85 | "mode": "palette-classic" 86 | }, 87 | "custom": { 88 | "axisBorderShow": true, 89 | "axisCenteredZero": false, 90 | "axisColorMode": "text", 91 | "axisLabel": "", 92 | "axisPlacement": "auto", 93 | "fillOpacity": 100, 94 | "gradientMode": "none", 95 | "hideFrom": { 96 | "legend": false, 97 | "tooltip": false, 98 | "viz": false 99 | }, 100 | "lineWidth": 1, 101 | "scaleDistribution": { 102 | "type": "linear" 103 | }, 104 | "thresholdsStyle": { 105 | "mode": "off" 106 | } 107 | }, 108 | "mappings": [], 109 | "thresholds": { 110 | "mode": "absolute", 111 | "steps": [ 112 | { 113 | "color": "green", 114 | "value": null 115 | }, 116 | { 117 | "color": "red", 118 | "value": 80 119 | } 120 | ] 121 | }, 122 | "unit": "s" 123 | }, 124 | "overrides": [] 125 | }, 126 | "gridPos": { 127 | "h": 9, 128 | "w": 24, 129 | "x": 0, 130 | "y": 1 131 | }, 132 | "id": 11, 133 | "options": { 134 | "barRadius": 0, 135 | "barWidth": 1, 136 | "fullHighlight": true, 137 | "groupWidth": 1, 138 | "legend": { 139 | "calcs": [ 140 | "last", 141 | "max" 142 | ], 143 | "displayMode": "table", 144 | "placement": "right", 145 | "showLegend": true, 146 | "sortBy": "Max", 147 | "sortDesc": true 148 | }, 149 | "orientation": "auto", 150 | "showValue": "always", 151 | "stacking": "none", 152 | "tooltip": { 153 | "mode": "single", 154 | "sort": "none" 155 | }, 156 | "xTickLabelRotation": 0, 157 | "xTickLabelSpacing": -100 158 | }, 159 | "targets": [ 160 | { 161 | "datasource": { 162 | "type": "prometheus", 163 | "uid": "$datasource" 164 | }, 165 | "editorMode": "code", 166 | "expr": "topk(5, rate(workqueue_queue_duration_seconds_sum{job=\"$job\"}[$__rate_interval]) / rate(workqueue_queue_duration_seconds_count{job=\"$job\"}[$__rate_interval]))", 167 | "instant": false, 168 | "interval": "1m", 169 | "legendFormat": "{{name}}", 170 | "range": true, 171 | "refId": "A" 172 | } 173 | ], 174 | "title": "Length of Time in Queue (Top 5)", 175 | "transparent": true, 176 | "type": "barchart" 177 | }, 178 | { 179 | "datasource": { 180 | "type": "prometheus", 181 | "uid": "$datasource" 182 | }, 183 | "fieldConfig": { 184 | "defaults": { 185 | "color": { 186 | "mode": "palette-classic" 187 | }, 188 | "custom": { 189 | "axisBorderShow": true, 190 | "axisCenteredZero": false, 191 | "axisColorMode": "text", 192 | "axisLabel": "", 193 | "axisPlacement": "auto", 194 | "fillOpacity": 100, 195 | "gradientMode": "none", 196 | "hideFrom": { 197 | "legend": false, 198 | "tooltip": false, 199 | "viz": false 200 | }, 201 | "lineWidth": 1, 202 | "scaleDistribution": { 203 | "type": "linear" 204 | }, 205 | "thresholdsStyle": { 206 | "mode": "off" 207 | } 208 | }, 209 | "fieldMinMax": false, 210 | "mappings": [], 211 | "thresholds": { 212 | "mode": "absolute", 213 | "steps": [ 214 | { 215 | "color": "green", 216 | "value": null 217 | }, 218 | { 219 | "color": "red", 220 | "value": 80 221 | } 222 | ] 223 | }, 224 | "unit": "s" 225 | }, 226 | "overrides": [] 227 | }, 228 | "gridPos": { 229 | "h": 9, 230 | "w": 24, 231 | "x": 0, 232 | "y": 10 233 | }, 234 | "id": 15, 235 | "options": { 236 | "barRadius": 0, 237 | "barWidth": 1, 238 | "fullHighlight": true, 239 | "groupWidth": 1, 240 | "legend": { 241 | "calcs": [ 242 | "last", 243 | "max" 244 | ], 245 | "displayMode": "table", 246 | "placement": "right", 247 | "showLegend": true, 248 | "sortBy": "Max", 249 | "sortDesc": true 250 | }, 251 | "orientation": "auto", 252 | "showValue": "always", 253 | "stacking": "none", 254 | "tooltip": { 255 | "mode": "single", 256 | "sort": "none" 257 | }, 258 | "xTickLabelRotation": 0, 259 | "xTickLabelSpacing": 100 260 | }, 261 | "targets": [ 262 | { 263 | "datasource": { 264 | "type": "prometheus", 265 | "uid": "$datasource" 266 | }, 267 | "editorMode": "code", 268 | "expr": "topk(5, rate(workqueue_work_duration_seconds_sum{job=\"$job\"}[$__rate_interval]) / rate(workqueue_work_duration_seconds_count{job=\"$job\"}[$__rate_interval])) > 0", 269 | "hide": false, 270 | "instant": false, 271 | "interval": "1m", 272 | "legendFormat": "{{name}}", 273 | "range": true, 274 | "refId": "B" 275 | } 276 | ], 277 | "title": "How Long Requests are Taking to Execute (Top 5)", 278 | "transparent": true, 279 | "type": "barchart" 280 | }, 281 | { 282 | "datasource": { 283 | "type": "prometheus", 284 | "uid": "$datasource" 285 | }, 286 | "fieldConfig": { 287 | "defaults": { 288 | "color": { 289 | "mode": "palette-classic" 290 | }, 291 | "custom": { 292 | "axisBorderShow": true, 293 | "axisCenteredZero": false, 294 | "axisColorMode": "text", 295 | "axisLabel": "", 296 | "axisPlacement": "auto", 297 | "fillOpacity": 100, 298 | "gradientMode": "none", 299 | "hideFrom": { 300 | "legend": false, 301 | "tooltip": false, 302 | "viz": false 303 | }, 304 | "lineWidth": 0, 305 | "scaleDistribution": { 306 | "type": "linear" 307 | }, 308 | "thresholdsStyle": { 309 | "mode": "off" 310 | } 311 | }, 312 | "mappings": [], 313 | "thresholds": { 314 | "mode": "absolute", 315 | "steps": [ 316 | { 317 | "color": "green", 318 | "value": null 319 | }, 320 | { 321 | "color": "red", 322 | "value": 80 323 | } 324 | ] 325 | }, 326 | "unit": "short" 327 | }, 328 | "overrides": [] 329 | }, 330 | "gridPos": { 331 | "h": 11, 332 | "w": 24, 333 | "x": 0, 334 | "y": 19 335 | }, 336 | "id": 3, 337 | "interval": "1m", 338 | "options": { 339 | "barRadius": 0, 340 | "barWidth": 0.9, 341 | "fullHighlight": true, 342 | "groupWidth": 0.99, 343 | "legend": { 344 | "calcs": [ 345 | "last", 346 | "max" 347 | ], 348 | "displayMode": "table", 349 | "placement": "right", 350 | "showLegend": true 351 | }, 352 | "orientation": "auto", 353 | "showValue": "auto", 354 | "stacking": "none", 355 | "tooltip": { 356 | "mode": "single", 357 | "sort": "none" 358 | }, 359 | "xTickLabelRotation": 0, 360 | "xTickLabelSpacing": 0 361 | }, 362 | "pluginVersion": "v11.1.0", 363 | "targets": [ 364 | { 365 | "datasource": { 366 | "type": "prometheus", 367 | "uid": "$datasource" 368 | }, 369 | "editorMode": "code", 370 | "expr": "topk(5, sum_over_time(workqueue_depth{job=\"$job\"}[$__rate_interval])) > 0", 371 | "instant": false, 372 | "interval": "1m", 373 | "legendFormat": "{{name}}", 374 | "range": true, 375 | "refId": "A" 376 | } 377 | ], 378 | "title": "Number of Objects Waiting to be Processed (Top 5)", 379 | "transparent": true, 380 | "type": "barchart" 381 | }, 382 | { 383 | "collapsed": false, 384 | "gridPos": { 385 | "h": 1, 386 | "w": 24, 387 | "x": 0, 388 | "y": 30 389 | }, 390 | "id": 13, 391 | "panels": [], 392 | "title": "Full View", 393 | "type": "row" 394 | }, 395 | { 396 | "datasource": { 397 | "type": "prometheus", 398 | "uid": "$datasource" 399 | }, 400 | "description": "Length of Time in Queue", 401 | "fieldConfig": { 402 | "defaults": { 403 | "color": { 404 | "mode": "palette-classic" 405 | }, 406 | "custom": { 407 | "axisBorderShow": false, 408 | "axisCenteredZero": false, 409 | "axisColorMode": "text", 410 | "axisGridShow": false, 411 | "axisLabel": "", 412 | "axisPlacement": "auto", 413 | "barAlignment": 0, 414 | "drawStyle": "line", 415 | "fillOpacity": 89, 416 | "gradientMode": "none", 417 | "hideFrom": { 418 | "legend": false, 419 | "tooltip": false, 420 | "viz": false 421 | }, 422 | "insertNulls": false, 423 | "lineInterpolation": "smooth", 424 | "lineWidth": 5, 425 | "pointSize": 5, 426 | "scaleDistribution": { 427 | "type": "linear" 428 | }, 429 | "showPoints": "auto", 430 | "spanNulls": false, 431 | "stacking": { 432 | "group": "A", 433 | "mode": "none" 434 | }, 435 | "thresholdsStyle": { 436 | "mode": "off" 437 | } 438 | }, 439 | "mappings": [], 440 | "thresholds": { 441 | "mode": "absolute", 442 | "steps": [ 443 | { 444 | "color": "green", 445 | "value": null 446 | }, 447 | { 448 | "color": "red", 449 | "value": 80 450 | } 451 | ] 452 | }, 453 | "unit": "s" 454 | }, 455 | "overrides": [] 456 | }, 457 | "gridPos": { 458 | "h": 9, 459 | "w": 24, 460 | "x": 0, 461 | "y": 31 462 | }, 463 | "id": 12, 464 | "options": { 465 | "legend": { 466 | "calcs": [ 467 | "last" 468 | ], 469 | "displayMode": "table", 470 | "placement": "right", 471 | "showLegend": true, 472 | "sortBy": "Last", 473 | "sortDesc": true 474 | }, 475 | "tooltip": { 476 | "mode": "single", 477 | "sort": "none" 478 | } 479 | }, 480 | "targets": [ 481 | { 482 | "datasource": { 483 | "type": "prometheus", 484 | "uid": "$datasource" 485 | }, 486 | "editorMode": "code", 487 | "expr": "rate(workqueue_queue_duration_seconds_sum{job=\"$job\"}[$__rate_interval]) / rate(workqueue_queue_duration_seconds_count{job=\"$job\"}[$__rate_interval]) > 0", 488 | "instant": false, 489 | "interval": "1m", 490 | "legendFormat": "{{name}}", 491 | "range": true, 492 | "refId": "A" 493 | } 494 | ], 495 | "title": "Length of Time in Queue", 496 | "transparent": true, 497 | "type": "timeseries" 498 | }, 499 | { 500 | "datasource": { 501 | "type": "prometheus", 502 | "uid": "$datasource" 503 | }, 504 | "fieldConfig": { 505 | "defaults": { 506 | "color": { 507 | "mode": "palette-classic" 508 | }, 509 | "custom": { 510 | "axisBorderShow": false, 511 | "axisCenteredZero": false, 512 | "axisColorMode": "text", 513 | "axisGridShow": false, 514 | "axisLabel": "", 515 | "axisPlacement": "auto", 516 | "barAlignment": 0, 517 | "drawStyle": "line", 518 | "fillOpacity": 85, 519 | "gradientMode": "none", 520 | "hideFrom": { 521 | "legend": false, 522 | "tooltip": false, 523 | "viz": false 524 | }, 525 | "insertNulls": false, 526 | "lineInterpolation": "linear", 527 | "lineWidth": 6, 528 | "pointSize": 5, 529 | "scaleDistribution": { 530 | "type": "linear" 531 | }, 532 | "showPoints": "never", 533 | "spanNulls": true, 534 | "stacking": { 535 | "group": "A", 536 | "mode": "none" 537 | }, 538 | "thresholdsStyle": { 539 | "mode": "off" 540 | } 541 | }, 542 | "fieldMinMax": true, 543 | "mappings": [], 544 | "thresholds": { 545 | "mode": "absolute", 546 | "steps": [ 547 | { 548 | "color": "green", 549 | "value": null 550 | }, 551 | { 552 | "color": "red", 553 | "value": 80 554 | } 555 | ] 556 | }, 557 | "unit": "short" 558 | }, 559 | "overrides": [] 560 | }, 561 | "gridPos": { 562 | "h": 7, 563 | "w": 24, 564 | "x": 0, 565 | "y": 40 566 | }, 567 | "id": 4, 568 | "interval": "1m", 569 | "options": { 570 | "legend": { 571 | "asTable": true, 572 | "calcs": [ 573 | "last", 574 | "max" 575 | ], 576 | "displayMode": "table", 577 | "placement": "right", 578 | "showLegend": true 579 | }, 580 | "tooltip": { 581 | "mode": "single", 582 | "sort": "none" 583 | } 584 | }, 585 | "pluginVersion": "v11.1.0", 586 | "targets": [ 587 | { 588 | "datasource": { 589 | "type": "prometheus", 590 | "uid": "$datasource" 591 | }, 592 | "editorMode": "code", 593 | "expr": "sum_over_time(workqueue_depth{job=\"$job\"}[$__rate_interval]) >= 1 ", 594 | "instant": false, 595 | "interval": "1m", 596 | "legendFormat": "{{name}}", 597 | "range": true, 598 | "refId": "A" 599 | } 600 | ], 601 | "title": "Work Queue Depth (all)", 602 | "transparent": true, 603 | "type": "timeseries" 604 | }, 605 | { 606 | "datasource": { 607 | "type": "prometheus", 608 | "uid": "$datasource" 609 | }, 610 | "fieldConfig": { 611 | "defaults": { 612 | "color": { 613 | "mode": "palette-classic" 614 | }, 615 | "custom": { 616 | "axisBorderShow": false, 617 | "axisCenteredZero": false, 618 | "axisColorMode": "text", 619 | "axisGridShow": false, 620 | "axisLabel": "", 621 | "axisPlacement": "auto", 622 | "barAlignment": 0, 623 | "drawStyle": "line", 624 | "fillOpacity": 88, 625 | "gradientMode": "none", 626 | "hideFrom": { 627 | "legend": false, 628 | "tooltip": false, 629 | "viz": false 630 | }, 631 | "insertNulls": false, 632 | "lineInterpolation": "linear", 633 | "lineWidth": 6, 634 | "pointSize": 5, 635 | "scaleDistribution": { 636 | "type": "linear" 637 | }, 638 | "showPoints": "never", 639 | "spanNulls": true, 640 | "stacking": { 641 | "group": "A", 642 | "mode": "none" 643 | }, 644 | "thresholdsStyle": { 645 | "mode": "off" 646 | } 647 | }, 648 | "mappings": [], 649 | "thresholds": { 650 | "mode": "absolute", 651 | "steps": [ 652 | { 653 | "color": "green", 654 | "value": null 655 | }, 656 | { 657 | "color": "red", 658 | "value": 80 659 | } 660 | ] 661 | }, 662 | "unit": "short" 663 | }, 664 | "overrides": [] 665 | }, 666 | "gridPos": { 667 | "h": 8, 668 | "w": 24, 669 | "x": 0, 670 | "y": 47 671 | }, 672 | "id": 2, 673 | "interval": "1m", 674 | "options": { 675 | "legend": { 676 | "asTable": true, 677 | "calcs": [ 678 | "last", 679 | "max" 680 | ], 681 | "displayMode": "table", 682 | "placement": "right", 683 | "showLegend": true 684 | }, 685 | "tooltip": { 686 | "mode": "single", 687 | "sort": "none" 688 | } 689 | }, 690 | "pluginVersion": "v11.1.0", 691 | "targets": [ 692 | { 693 | "datasource": { 694 | "type": "prometheus", 695 | "uid": "$datasource" 696 | }, 697 | "editorMode": "code", 698 | "expr": "sum(increase(workqueue_adds_total{job=\"$job\"}[$__rate_interval]) > 1) by (name)", 699 | "instant": false, 700 | "interval": "$__rate_interval", 701 | "legendFormat": "__auto", 702 | "range": true, 703 | "refId": "A" 704 | } 705 | ], 706 | "title": "Work Queue Add Rate", 707 | "transparent": true, 708 | "type": "timeseries" 709 | } 710 | ], 711 | "refresh": "5m", 712 | "schemaVersion": 39, 713 | "tags": [ 714 | "Infrastructure" 715 | ], 716 | "templating": { 717 | "list": [ 718 | { 719 | "current": { 720 | "selected": false, 721 | "text": "default", 722 | "value": "default" 723 | }, 724 | "hide": 0, 725 | "includeAll": false, 726 | "label": "Data source", 727 | "multi": false, 728 | "name": "datasource", 729 | "options": [], 730 | "query": "prometheus", 731 | "queryValue": "", 732 | "refresh": 1, 733 | "regex": "", 734 | "skipUrlSync": false, 735 | "type": "datasource" 736 | }, 737 | { 738 | "current": {}, 739 | "datasource": { 740 | "type": "prometheus", 741 | "uid": "$datasource" 742 | }, 743 | "definition": "label_values(job)", 744 | "hide": 0, 745 | "includeAll": false, 746 | "label": "Job", 747 | "multi": false, 748 | "name": "job", 749 | "options": [], 750 | "query": { 751 | "qryType": 1, 752 | "query": "label_values(job)", 753 | "refId": "PrometheusVariableQueryEditor-VariableQuery" 754 | }, 755 | "refresh": 1, 756 | "regex": "", 757 | "skipUrlSync": false, 758 | "sort": 0, 759 | "type": "query" 760 | } 761 | ] 762 | }, 763 | "time": { 764 | "from": "now-1h", 765 | "to": "now" 766 | }, 767 | "timepicker": { 768 | "refresh_intervals": [ 769 | "5m" 770 | ] 771 | }, 772 | "timezone": "utc", 773 | "title": "EKS Controller Manager", 774 | "uid": "86753092", 775 | "version": 3, 776 | "weekStart": "" 777 | } -------------------------------------------------------------------------------- /artifacts/grafana-dashboards/eks/infrastructure/ksh.json: -------------------------------------------------------------------------------- 1 | { 2 | "__inputs": [], 3 | "__elements": {}, 4 | "__requires": [ 5 | { 6 | "type": "grafana", 7 | "id": "grafana", 8 | "name": "Grafana", 9 | "version": "10.4.1" 10 | }, 11 | { 12 | "type": "datasource", 13 | "id": "prometheus", 14 | "name": "Prometheus", 15 | "version": "1.0.0" 16 | }, 17 | { 18 | "type": "panel", 19 | "id": "timeseries", 20 | "name": "Time series", 21 | "version": "" 22 | } 23 | ], 24 | "annotations": { 25 | "list": [ 26 | { 27 | "builtIn": 1, 28 | "datasource": { 29 | "type": "datasource", 30 | "uid": "grafana" 31 | }, 32 | "enable": true, 33 | "hide": true, 34 | "iconColor": "rgba(0, 211, 255, 1)", 35 | "name": "Annotations & Alerts", 36 | "target": { 37 | "limit": 100, 38 | "matchAny": false, 39 | "tags": [], 40 | "type": "dashboard" 41 | }, 42 | "type": "dashboard" 43 | } 44 | ] 45 | }, 46 | "description": "", 47 | "editable": true, 48 | "fiscalYearStartMonth": 0, 49 | "graphTooltip": 0, 50 | "id": null, 51 | "links": [], 52 | "liveNow": false, 53 | "panels": [ 54 | { 55 | "datasource": { 56 | "type": "prometheus", 57 | "uid": "$datasource" 58 | }, 59 | "fieldConfig": { 60 | "defaults": { 61 | "color": { 62 | "mode": "palette-classic" 63 | }, 64 | "custom": { 65 | "axisBorderShow": true, 66 | "axisCenteredZero": false, 67 | "axisColorMode": "text", 68 | "axisGridShow": false, 69 | "axisLabel": "", 70 | "axisPlacement": "auto", 71 | "barAlignment": 0, 72 | "drawStyle": "line", 73 | "fillOpacity": 76, 74 | "gradientMode": "none", 75 | "hideFrom": { 76 | "legend": false, 77 | "tooltip": false, 78 | "viz": false 79 | }, 80 | "insertNulls": false, 81 | "lineInterpolation": "linear", 82 | "lineWidth": 8, 83 | "pointSize": 5, 84 | "scaleDistribution": { 85 | "type": "linear" 86 | }, 87 | "showPoints": "auto", 88 | "spanNulls": false, 89 | "stacking": { 90 | "group": "A", 91 | "mode": "none" 92 | }, 93 | "thresholdsStyle": { 94 | "mode": "off" 95 | } 96 | }, 97 | "mappings": [], 98 | "thresholds": { 99 | "mode": "absolute", 100 | "steps": [ 101 | { 102 | "color": "green", 103 | "value": null 104 | }, 105 | { 106 | "color": "red", 107 | "value": 80 108 | } 109 | ] 110 | } 111 | }, 112 | "overrides": [] 113 | }, 114 | "gridPos": { 115 | "h": 12, 116 | "w": 24, 117 | "x": 0, 118 | "y": 0 119 | }, 120 | "id": 9, 121 | "options": { 122 | "legend": { 123 | "calcs": [ 124 | "last", 125 | "max" 126 | ], 127 | "displayMode": "table", 128 | "placement": "bottom", 129 | "showLegend": true 130 | }, 131 | "tooltip": { 132 | "mode": "single", 133 | "sort": "none" 134 | } 135 | }, 136 | "targets": [ 137 | { 138 | "datasource": { 139 | "type": "prometheus", 140 | "uid": "$datasource" 141 | }, 142 | "editorMode": "code", 143 | "expr": "sum(sum_over_time(scheduler_pending_pods[1m]))by (queue)", 144 | "instant": false, 145 | "interval": "1m", 146 | "legendFormat": "__auto", 147 | "range": true, 148 | "refId": "A" 149 | } 150 | ], 151 | "title": "Pending Pods", 152 | "transparent": true, 153 | "type": "timeseries" 154 | }, 155 | { 156 | "datasource": { 157 | "type": "prometheus", 158 | "uid": "$datasource" 159 | }, 160 | "fieldConfig": { 161 | "defaults": { 162 | "color": { 163 | "fixedColor": "blue", 164 | "mode": "thresholds" 165 | }, 166 | "custom": { 167 | "axisBorderShow": true, 168 | "axisCenteredZero": false, 169 | "axisColorMode": "text", 170 | "axisGridShow": false, 171 | "axisLabel": "", 172 | "axisPlacement": "auto", 173 | "barAlignment": 0, 174 | "drawStyle": "line", 175 | "fillOpacity": 75, 176 | "gradientMode": "none", 177 | "hideFrom": { 178 | "legend": false, 179 | "tooltip": false, 180 | "viz": false 181 | }, 182 | "insertNulls": false, 183 | "lineInterpolation": "linear", 184 | "lineWidth": 4, 185 | "pointSize": 5, 186 | "scaleDistribution": { 187 | "type": "linear" 188 | }, 189 | "showPoints": "auto", 190 | "spanNulls": false, 191 | "stacking": { 192 | "group": "A", 193 | "mode": "none" 194 | }, 195 | "thresholdsStyle": { 196 | "mode": "off" 197 | } 198 | }, 199 | "fieldMinMax": false, 200 | "mappings": [], 201 | "thresholds": { 202 | "mode": "absolute", 203 | "steps": [ 204 | { 205 | "color": "blue", 206 | "value": null 207 | }, 208 | { 209 | "color": "red", 210 | "value": 80 211 | } 212 | ] 213 | } 214 | }, 215 | "overrides": [] 216 | }, 217 | "gridPos": { 218 | "h": 12, 219 | "w": 24, 220 | "x": 0, 221 | "y": 12 222 | }, 223 | "id": 11, 224 | "options": { 225 | "legend": { 226 | "calcs": [], 227 | "displayMode": "table", 228 | "placement": "bottom", 229 | "showLegend": true 230 | }, 231 | "tooltip": { 232 | "mode": "single", 233 | "sort": "none" 234 | } 235 | }, 236 | "pluginVersion": "11.2.2+security-01", 237 | "targets": [ 238 | { 239 | "datasource": { 240 | "type": "prometheus", 241 | "uid": "$datasource" 242 | }, 243 | "editorMode": "code", 244 | "expr": "increase(scheduler_pod_scheduling_attempts_sum[$__rate_interval]) / increase(scheduler_pod_scheduling_attempts_count[$__rate_interval])", 245 | "hide": false, 246 | "instant": false, 247 | "interval": "1m", 248 | "legendFormat": "{{instance}}", 249 | "range": true, 250 | "refId": "A" 251 | } 252 | ], 253 | "title": "Scheduling Attempts", 254 | "transparent": true, 255 | "type": "timeseries" 256 | }, 257 | { 258 | "datasource": { 259 | "type": "prometheus", 260 | "uid": "$datasource" 261 | }, 262 | "fieldConfig": { 263 | "defaults": { 264 | "color": { 265 | "fixedColor": "purple", 266 | "mode": "thresholds" 267 | }, 268 | "custom": { 269 | "axisBorderShow": true, 270 | "axisCenteredZero": false, 271 | "axisColorMode": "text", 272 | "axisGridShow": false, 273 | "axisLabel": "", 274 | "axisPlacement": "auto", 275 | "barAlignment": 0, 276 | "drawStyle": "line", 277 | "fillOpacity": 76, 278 | "gradientMode": "none", 279 | "hideFrom": { 280 | "legend": false, 281 | "tooltip": false, 282 | "viz": false 283 | }, 284 | "insertNulls": false, 285 | "lineInterpolation": "linear", 286 | "lineWidth": 8, 287 | "pointSize": 5, 288 | "scaleDistribution": { 289 | "type": "linear" 290 | }, 291 | "showPoints": "auto", 292 | "spanNulls": false, 293 | "stacking": { 294 | "group": "A", 295 | "mode": "none" 296 | }, 297 | "thresholdsStyle": { 298 | "mode": "off" 299 | } 300 | }, 301 | "mappings": [], 302 | "thresholds": { 303 | "mode": "absolute", 304 | "steps": [ 305 | { 306 | "color": "purple", 307 | "value": null 308 | }, 309 | { 310 | "color": "red", 311 | "value": 80 312 | } 313 | ] 314 | }, 315 | "unit": "s" 316 | }, 317 | "overrides": [] 318 | }, 319 | "gridPos": { 320 | "h": 12, 321 | "w": 24, 322 | "x": 0, 323 | "y": 24 324 | }, 325 | "id": 12, 326 | "options": { 327 | "legend": { 328 | "calcs": [ 329 | "last", 330 | "max" 331 | ], 332 | "displayMode": "table", 333 | "placement": "bottom", 334 | "showLegend": true 335 | }, 336 | "tooltip": { 337 | "mode": "single", 338 | "sort": "none" 339 | } 340 | }, 341 | "targets": [ 342 | { 343 | "datasource": { 344 | "type": "prometheus", 345 | "uid": "$datasource" 346 | }, 347 | "editorMode": "code", 348 | "expr": "rate(scheduler_scheduling_attempt_duration_seconds_sum{}[$__rate_interval]) / rate(scheduler_scheduling_attempt_duration_seconds_count[$__rate_interval])", 349 | "instant": false, 350 | "interval": "1m", 351 | "legendFormat": "{{profile}} - {{instance}}", 352 | "range": true, 353 | "refId": "A" 354 | } 355 | ], 356 | "title": "Scheduling Attempts Duration", 357 | "transparent": true, 358 | "type": "timeseries" 359 | }, 360 | { 361 | "datasource": { 362 | "type": "prometheus", 363 | "uid": "$datasource" 364 | }, 365 | "fieldConfig": { 366 | "defaults": { 367 | "color": { 368 | "fixedColor": "purple", 369 | "mode": "thresholds" 370 | }, 371 | "custom": { 372 | "axisBorderShow": true, 373 | "axisCenteredZero": false, 374 | "axisColorMode": "text", 375 | "axisGridShow": false, 376 | "axisLabel": "", 377 | "axisPlacement": "auto", 378 | "barAlignment": 0, 379 | "drawStyle": "line", 380 | "fillOpacity": 76, 381 | "gradientMode": "none", 382 | "hideFrom": { 383 | "legend": false, 384 | "tooltip": false, 385 | "viz": false 386 | }, 387 | "insertNulls": false, 388 | "lineInterpolation": "linear", 389 | "lineWidth": 8, 390 | "pointSize": 5, 391 | "scaleDistribution": { 392 | "type": "linear" 393 | }, 394 | "showPoints": "auto", 395 | "spanNulls": false, 396 | "stacking": { 397 | "group": "A", 398 | "mode": "none" 399 | }, 400 | "thresholdsStyle": { 401 | "mode": "off" 402 | } 403 | }, 404 | "mappings": [], 405 | "thresholds": { 406 | "mode": "absolute", 407 | "steps": [ 408 | { 409 | "color": "purple", 410 | "value": null 411 | }, 412 | { 413 | "color": "red", 414 | "value": 80 415 | } 416 | ] 417 | } 418 | }, 419 | "overrides": [] 420 | }, 421 | "gridPos": { 422 | "h": 12, 423 | "w": 24, 424 | "x": 0, 425 | "y": 36 426 | }, 427 | "id": 13, 428 | "options": { 429 | "legend": { 430 | "calcs": [ 431 | "last", 432 | "max" 433 | ], 434 | "displayMode": "table", 435 | "placement": "bottom", 436 | "showLegend": true 437 | }, 438 | "tooltip": { 439 | "mode": "single", 440 | "sort": "none" 441 | } 442 | }, 443 | "targets": [ 444 | { 445 | "datasource": { 446 | "type": "prometheus", 447 | "uid": "$datasource" 448 | }, 449 | "editorMode": "code", 450 | "expr": "rate(scheduler_preemption_attempts_total[$__rate_interval])", 451 | "instant": false, 452 | "interval": "1m", 453 | "legendFormat": "{{profile}} - {{instance}}", 454 | "range": true, 455 | "refId": "A" 456 | } 457 | ], 458 | "title": "Preemption Attempts", 459 | "transparent": true, 460 | "type": "timeseries" 461 | } 462 | ], 463 | "refresh": "5m", 464 | "schemaVersion": 39, 465 | "tags": [ 466 | "Infrastructure" 467 | ], 468 | "templating": { 469 | "list": [ 470 | { 471 | "current": { 472 | "selected": false, 473 | "text": "default", 474 | "value": "default" 475 | }, 476 | "hide": 2, 477 | "includeAll": false, 478 | "multi": false, 479 | "name": "datasource", 480 | "options": [], 481 | "query": "prometheus", 482 | "refresh": 1, 483 | "regex": "", 484 | "skipUrlSync": false, 485 | "type": "datasource" 486 | }, 487 | { 488 | "current": {}, 489 | "datasource": { 490 | "type": "prometheus", 491 | "uid": "$datasource" 492 | }, 493 | "definition": "label_values(scheduler_pending_pods,job)", 494 | "hide": 0, 495 | "includeAll": false, 496 | "label": "job", 497 | "multi": false, 498 | "name": "job", 499 | "options": [], 500 | "query": { 501 | "qryType": 1, 502 | "query": "label_values(scheduler_pending_pods,job)", 503 | "refId": "PrometheusVariableQueryEditor-VariableQuery" 504 | }, 505 | "refresh": 1, 506 | "regex": "", 507 | "skipUrlSync": false, 508 | "sort": 0, 509 | "type": "query" 510 | } 511 | ] 512 | }, 513 | "time": { 514 | "from": "now-15m", 515 | "to": "now" 516 | }, 517 | "timepicker": { 518 | "nowDelay": "4m", 519 | "refresh_intervals": [ 520 | "5m" 521 | ] 522 | }, 523 | "timezone": "", 524 | "title": "EKS Scheduler", 525 | "uid": "CqT0Jg-nz2", 526 | "version": 5, 527 | "weekStart": "" 528 | } -------------------------------------------------------------------------------- /artifacts/grafana-dashboards/eks/kube-proxy/kube-proxy.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": { 7 | "type": "grafana", 8 | "uid": "-- Grafana --" 9 | }, 10 | "enable": true, 11 | "hide": true, 12 | "iconColor": "rgba(0, 211, 255, 1)", 13 | "name": "Annotations & Alerts", 14 | "target": { 15 | "limit": 100, 16 | "matchAny": false, 17 | "tags": [], 18 | "type": "dashboard" 19 | }, 20 | "type": "dashboard" 21 | } 22 | ] 23 | }, 24 | "editable": true, 25 | "fiscalYearStartMonth": 0, 26 | "graphTooltip": 0, 27 | "id": null, 28 | "iteration": 1666949805962, 29 | "links": [], 30 | "liveNow": false, 31 | "panels": [ 32 | { 33 | "datasource": { 34 | "type": "prometheus", 35 | "uid": "$datasource" 36 | }, 37 | "fieldConfig": { 38 | "defaults": { 39 | "color": { 40 | "mode": "thresholds" 41 | }, 42 | "mappings": [ 43 | { 44 | "options": { 45 | "match": "null", 46 | "result": { 47 | "text": "N/A" 48 | } 49 | }, 50 | "type": "special" 51 | } 52 | ], 53 | "thresholds": { 54 | "mode": "absolute", 55 | "steps": [ 56 | { 57 | "color": "green", 58 | "value": null 59 | }, 60 | { 61 | "color": "red", 62 | "value": 80 63 | } 64 | ] 65 | }, 66 | "unit": "none" 67 | }, 68 | "overrides": [] 69 | }, 70 | "gridPos": { 71 | "h": 7, 72 | "w": 4, 73 | "x": 0, 74 | "y": 0 75 | }, 76 | "id": 2, 77 | "interval": "1m", 78 | "links": [], 79 | "maxDataPoints": 100, 80 | "options": { 81 | "colorMode": "none", 82 | "graphMode": "none", 83 | "justifyMode": "auto", 84 | "orientation": "horizontal", 85 | "reduceOptions": { 86 | "calcs": [ 87 | "min" 88 | ], 89 | "fields": "", 90 | "values": false 91 | }, 92 | "textMode": "auto" 93 | }, 94 | "pluginVersion": "9.4.7", 95 | "targets": [ 96 | { 97 | "datasource": { 98 | "type": "prometheus", 99 | "uid": "$datasource" 100 | }, 101 | "expr": "sum(up{cluster=\"$cluster\", job=\"$job\"})", 102 | "format": "time_series", 103 | "intervalFactor": 2, 104 | "legendFormat": "", 105 | "refId": "A" 106 | } 107 | ], 108 | "title": "Up", 109 | "type": "stat" 110 | }, 111 | { 112 | "aliasColors": {}, 113 | "bars": false, 114 | "dashLength": 10, 115 | "dashes": false, 116 | "datasource": { 117 | "type": "prometheus", 118 | "uid": "$datasource" 119 | }, 120 | "fill": 1, 121 | "fillGradient": 0, 122 | "gridPos": { 123 | "h": 7, 124 | "w": 10, 125 | "x": 4, 126 | "y": 0 127 | }, 128 | "hiddenSeries": false, 129 | "id": 3, 130 | "interval": "1m", 131 | "legend": { 132 | "alignAsTable": true, 133 | "avg": false, 134 | "current": false, 135 | "max": false, 136 | "min": false, 137 | "rightSide": true, 138 | "show": true, 139 | "total": false, 140 | "values": false 141 | }, 142 | "lines": true, 143 | "linewidth": 1, 144 | "links": [], 145 | "nullPointMode": "null", 146 | "options": { 147 | "alertThreshold": true 148 | }, 149 | "percentage": false, 150 | "pluginVersion": "9.4.7", 151 | "pointradius": 5, 152 | "points": false, 153 | "renderer": "flot", 154 | "seriesOverrides": [], 155 | "spaceLength": 10, 156 | "stack": false, 157 | "steppedLine": false, 158 | "targets": [ 159 | { 160 | "datasource": { 161 | "type": "prometheus", 162 | "uid": "$datasource" 163 | }, 164 | "expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{cluster=\"$cluster\", job=\"$job\", k8s_node_name=~\"$instance\"}[$__rate_interval]))", 165 | "format": "time_series", 166 | "intervalFactor": 2, 167 | "legendFormat": "rate", 168 | "refId": "A" 169 | } 170 | ], 171 | "thresholds": [], 172 | "timeRegions": [], 173 | "title": "Rules Sync Rate", 174 | "tooltip": { 175 | "shared": false, 176 | "sort": 0, 177 | "value_type": "individual" 178 | }, 179 | "type": "graph", 180 | "xaxis": { 181 | "mode": "time", 182 | "show": true, 183 | "values": [] 184 | }, 185 | "yaxes": [ 186 | { 187 | "format": "ops", 188 | "logBase": 1, 189 | "min": 0, 190 | "show": true 191 | }, 192 | { 193 | "format": "ops", 194 | "logBase": 1, 195 | "min": 0, 196 | "show": true 197 | } 198 | ], 199 | "yaxis": { 200 | "align": false 201 | } 202 | }, 203 | { 204 | "aliasColors": {}, 205 | "bars": false, 206 | "dashLength": 10, 207 | "dashes": false, 208 | "datasource": { 209 | "type": "prometheus", 210 | "uid": "$datasource" 211 | }, 212 | "fill": 1, 213 | "fillGradient": 0, 214 | "gridPos": { 215 | "h": 7, 216 | "w": 10, 217 | "x": 14, 218 | "y": 0 219 | }, 220 | "hiddenSeries": false, 221 | "id": 4, 222 | "interval": "1m", 223 | "legend": { 224 | "alignAsTable": true, 225 | "avg": false, 226 | "current": true, 227 | "max": false, 228 | "min": false, 229 | "rightSide": true, 230 | "show": true, 231 | "total": false, 232 | "values": true 233 | }, 234 | "lines": true, 235 | "linewidth": 1, 236 | "links": [], 237 | "nullPointMode": "null", 238 | "options": { 239 | "alertThreshold": true 240 | }, 241 | "percentage": false, 242 | "pluginVersion": "9.4.7", 243 | "pointradius": 5, 244 | "points": false, 245 | "renderer": "flot", 246 | "seriesOverrides": [], 247 | "spaceLength": 10, 248 | "stack": false, 249 | "steppedLine": false, 250 | "targets": [ 251 | { 252 | "datasource": { 253 | "type": "prometheus", 254 | "uid": "$datasource" 255 | }, 256 | "expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{cluster=\"$cluster\", job=\"$job\", k8s_node_name=~\"$instance\"}[$__rate_interval]))", 257 | "format": "time_series", 258 | "intervalFactor": 2, 259 | "legendFormat": "{{instance}}", 260 | "refId": "A" 261 | } 262 | ], 263 | "thresholds": [], 264 | "timeRegions": [], 265 | "title": "Rule Sync Latency 99th Quantile", 266 | "tooltip": { 267 | "shared": false, 268 | "sort": 0, 269 | "value_type": "individual" 270 | }, 271 | "type": "graph", 272 | "xaxis": { 273 | "mode": "time", 274 | "show": true, 275 | "values": [] 276 | }, 277 | "yaxes": [ 278 | { 279 | "format": "s", 280 | "logBase": 1, 281 | "min": 0, 282 | "show": true 283 | }, 284 | { 285 | "format": "s", 286 | "logBase": 1, 287 | "min": 0, 288 | "show": true 289 | } 290 | ], 291 | "yaxis": { 292 | "align": false 293 | } 294 | }, 295 | { 296 | "aliasColors": {}, 297 | "bars": false, 298 | "dashLength": 10, 299 | "dashes": false, 300 | "datasource": { 301 | "type": "prometheus", 302 | "uid": "$datasource" 303 | }, 304 | "fill": 1, 305 | "fillGradient": 0, 306 | "gridPos": { 307 | "h": 7, 308 | "w": 12, 309 | "x": 0, 310 | "y": 7 311 | }, 312 | "hiddenSeries": false, 313 | "id": 5, 314 | "interval": "1m", 315 | "legend": { 316 | "alignAsTable": true, 317 | "avg": false, 318 | "current": false, 319 | "max": false, 320 | "min": false, 321 | "rightSide": true, 322 | "show": true, 323 | "total": false, 324 | "values": false 325 | }, 326 | "lines": true, 327 | "linewidth": 1, 328 | "links": [], 329 | "nullPointMode": "null", 330 | "options": { 331 | "alertThreshold": true 332 | }, 333 | "percentage": false, 334 | "pluginVersion": "9.4.7", 335 | "pointradius": 5, 336 | "points": false, 337 | "renderer": "flot", 338 | "seriesOverrides": [], 339 | "spaceLength": 10, 340 | "stack": false, 341 | "steppedLine": false, 342 | "targets": [ 343 | { 344 | "datasource": { 345 | "type": "prometheus", 346 | "uid": "$datasource" 347 | }, 348 | "expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{cluster=\"$cluster\", job=\"$job\", k8s_node_name=~\"$instance\"}[$__rate_interval]))", 349 | "format": "time_series", 350 | "intervalFactor": 2, 351 | "legendFormat": "rate", 352 | "refId": "A" 353 | } 354 | ], 355 | "thresholds": [], 356 | "timeRegions": [], 357 | "title": "Network Programming Rate", 358 | "tooltip": { 359 | "shared": false, 360 | "sort": 0, 361 | "value_type": "individual" 362 | }, 363 | "type": "graph", 364 | "xaxis": { 365 | "mode": "time", 366 | "show": true, 367 | "values": [] 368 | }, 369 | "yaxes": [ 370 | { 371 | "format": "ops", 372 | "logBase": 1, 373 | "min": 0, 374 | "show": true 375 | }, 376 | { 377 | "format": "ops", 378 | "logBase": 1, 379 | "min": 0, 380 | "show": true 381 | } 382 | ], 383 | "yaxis": { 384 | "align": false 385 | } 386 | }, 387 | { 388 | "aliasColors": {}, 389 | "bars": false, 390 | "dashLength": 10, 391 | "dashes": false, 392 | "datasource": { 393 | "type": "prometheus", 394 | "uid": "$datasource" 395 | }, 396 | "fill": 1, 397 | "fillGradient": 0, 398 | "gridPos": { 399 | "h": 7, 400 | "w": 12, 401 | "x": 12, 402 | "y": 7 403 | }, 404 | "hiddenSeries": false, 405 | "id": 6, 406 | "interval": "1m", 407 | "legend": { 408 | "alignAsTable": true, 409 | "avg": false, 410 | "current": true, 411 | "max": false, 412 | "min": false, 413 | "rightSide": true, 414 | "show": true, 415 | "total": false, 416 | "values": true 417 | }, 418 | "lines": true, 419 | "linewidth": 1, 420 | "links": [], 421 | "nullPointMode": "null", 422 | "options": { 423 | "alertThreshold": true 424 | }, 425 | "percentage": false, 426 | "pluginVersion": "9.4.7", 427 | "pointradius": 5, 428 | "points": false, 429 | "renderer": "flot", 430 | "seriesOverrides": [], 431 | "spaceLength": 10, 432 | "stack": false, 433 | "steppedLine": false, 434 | "targets": [ 435 | { 436 | "datasource": { 437 | "type": "prometheus", 438 | "uid": "$datasource" 439 | }, 440 | "expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"$job\", k8s_node_name=~\"$instance\"}[$__rate_interval])) by (instance, le))", 441 | "format": "time_series", 442 | "intervalFactor": 2, 443 | "legendFormat": "{{instance}}", 444 | "refId": "A" 445 | } 446 | ], 447 | "thresholds": [], 448 | "timeRegions": [], 449 | "title": "Network Programming Latency 99th Quantile", 450 | "tooltip": { 451 | "shared": false, 452 | "sort": 0, 453 | "value_type": "individual" 454 | }, 455 | "type": "graph", 456 | "xaxis": { 457 | "mode": "time", 458 | "show": true, 459 | "values": [] 460 | }, 461 | "yaxes": [ 462 | { 463 | "format": "s", 464 | "logBase": 1, 465 | "min": 0, 466 | "show": true 467 | }, 468 | { 469 | "format": "s", 470 | "logBase": 1, 471 | "min": 0, 472 | "show": true 473 | } 474 | ], 475 | "yaxis": { 476 | "align": false 477 | } 478 | }, 479 | { 480 | "aliasColors": {}, 481 | "bars": false, 482 | "dashLength": 10, 483 | "dashes": false, 484 | "datasource": { 485 | "type": "prometheus", 486 | "uid": "$datasource" 487 | }, 488 | "fill": 1, 489 | "fillGradient": 0, 490 | "gridPos": { 491 | "h": 7, 492 | "w": 8, 493 | "x": 0, 494 | "y": 14 495 | }, 496 | "hiddenSeries": false, 497 | "id": 7, 498 | "interval": "1m", 499 | "legend": { 500 | "alignAsTable": true, 501 | "avg": false, 502 | "current": false, 503 | "max": false, 504 | "min": false, 505 | "rightSide": true, 506 | "show": true, 507 | "total": false, 508 | "values": false 509 | }, 510 | "lines": true, 511 | "linewidth": 1, 512 | "links": [], 513 | "nullPointMode": "null", 514 | "options": { 515 | "alertThreshold": true 516 | }, 517 | "percentage": false, 518 | "pluginVersion": "9.4.7", 519 | "pointradius": 5, 520 | "points": false, 521 | "renderer": "flot", 522 | "seriesOverrides": [], 523 | "spaceLength": 10, 524 | "stack": false, 525 | "steppedLine": false, 526 | "targets": [ 527 | { 528 | "datasource": { 529 | "type": "prometheus", 530 | "uid": "$datasource" 531 | }, 532 | "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"$job\", k8s_node_name=~\"$instance\",code=~\"2..\"}[$__rate_interval]))", 533 | "format": "time_series", 534 | "intervalFactor": 2, 535 | "legendFormat": "2xx", 536 | "refId": "A" 537 | }, 538 | { 539 | "datasource": { 540 | "type": "prometheus", 541 | "uid": "$datasource" 542 | }, 543 | "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"$job\", k8s_node_name=~\"$instance\",code=~\"3..\"}[$__rate_interval]))", 544 | "format": "time_series", 545 | "intervalFactor": 2, 546 | "legendFormat": "3xx", 547 | "refId": "B" 548 | }, 549 | { 550 | "datasource": { 551 | "type": "prometheus", 552 | "uid": "$datasource" 553 | }, 554 | "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"$job\", k8s_node_name=~\"$instance\",code=~\"4..\"}[$__rate_interval]))", 555 | "format": "time_series", 556 | "intervalFactor": 2, 557 | "legendFormat": "4xx", 558 | "refId": "C" 559 | }, 560 | { 561 | "datasource": { 562 | "uid": "$datasource" 563 | }, 564 | "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"$job\", k8s_node_name=~\"$instance\",code=~\"5..\"}[$__rate_interval]))", 565 | "format": "time_series", 566 | "intervalFactor": 2, 567 | "legendFormat": "5xx", 568 | "refId": "D" 569 | } 570 | ], 571 | "thresholds": [], 572 | "timeRegions": [], 573 | "title": "Kube API Request Rate", 574 | "tooltip": { 575 | "shared": false, 576 | "sort": 0, 577 | "value_type": "individual" 578 | }, 579 | "type": "graph", 580 | "xaxis": { 581 | "mode": "time", 582 | "show": true, 583 | "values": [] 584 | }, 585 | "yaxes": [ 586 | { 587 | "format": "ops", 588 | "logBase": 1, 589 | "show": true 590 | }, 591 | { 592 | "format": "ops", 593 | "logBase": 1, 594 | "show": true 595 | } 596 | ], 597 | "yaxis": { 598 | "align": false 599 | } 600 | }, 601 | { 602 | "aliasColors": {}, 603 | "bars": false, 604 | "dashLength": 10, 605 | "dashes": false, 606 | "datasource": { 607 | "type": "prometheus", 608 | "uid": "$datasource" 609 | }, 610 | "fill": 1, 611 | "fillGradient": 0, 612 | "gridPos": { 613 | "h": 7, 614 | "w": 16, 615 | "x": 8, 616 | "y": 14 617 | }, 618 | "hiddenSeries": false, 619 | "id": 8, 620 | "interval": "1m", 621 | "legend": { 622 | "alignAsTable": true, 623 | "avg": false, 624 | "current": false, 625 | "max": false, 626 | "min": false, 627 | "rightSide": true, 628 | "show": true, 629 | "total": false, 630 | "values": false 631 | }, 632 | "lines": true, 633 | "linewidth": 1, 634 | "links": [], 635 | "nullPointMode": "null", 636 | "options": { 637 | "alertThreshold": true 638 | }, 639 | "percentage": false, 640 | "pluginVersion": "9.4.7", 641 | "pointradius": 5, 642 | "points": false, 643 | "renderer": "flot", 644 | "seriesOverrides": [], 645 | "spaceLength": 10, 646 | "stack": false, 647 | "steppedLine": false, 648 | "targets": [ 649 | { 650 | "datasource": { 651 | "type": "prometheus", 652 | "uid": "$datasource" 653 | }, 654 | "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"$job\",k8s_node_name=~\"$instance\",verb=\"POST\"}[$__rate_interval])) by (verb, url, le))", 655 | "format": "time_series", 656 | "intervalFactor": 2, 657 | "legendFormat": "{{verb}} {{url}}", 658 | "refId": "A" 659 | } 660 | ], 661 | "thresholds": [], 662 | "timeRegions": [], 663 | "title": "Post Request Latency 99th Quantile", 664 | "tooltip": { 665 | "shared": false, 666 | "sort": 0, 667 | "value_type": "individual" 668 | }, 669 | "type": "graph", 670 | "xaxis": { 671 | "mode": "time", 672 | "show": true, 673 | "values": [] 674 | }, 675 | "yaxes": [ 676 | { 677 | "format": "s", 678 | "logBase": 1, 679 | "min": 0, 680 | "show": true 681 | }, 682 | { 683 | "format": "s", 684 | "logBase": 1, 685 | "min": 0, 686 | "show": true 687 | } 688 | ], 689 | "yaxis": { 690 | "align": false 691 | } 692 | }, 693 | { 694 | "aliasColors": {}, 695 | "bars": false, 696 | "dashLength": 10, 697 | "dashes": false, 698 | "datasource": { 699 | "type": "prometheus", 700 | "uid": "$datasource" 701 | }, 702 | "fill": 1, 703 | "fillGradient": 0, 704 | "gridPos": { 705 | "h": 7, 706 | "w": 24, 707 | "x": 0, 708 | "y": 21 709 | }, 710 | "hiddenSeries": false, 711 | "id": 9, 712 | "interval": "1m", 713 | "legend": { 714 | "alignAsTable": true, 715 | "avg": false, 716 | "current": true, 717 | "max": false, 718 | "min": false, 719 | "rightSide": true, 720 | "show": true, 721 | "total": false, 722 | "values": true 723 | }, 724 | "lines": true, 725 | "linewidth": 1, 726 | "links": [], 727 | "nullPointMode": "null", 728 | "options": { 729 | "alertThreshold": true 730 | }, 731 | "percentage": false, 732 | "pluginVersion": "9.4.7", 733 | "pointradius": 5, 734 | "points": false, 735 | "renderer": "flot", 736 | "seriesOverrides": [], 737 | "spaceLength": 10, 738 | "stack": false, 739 | "steppedLine": false, 740 | "targets": [ 741 | { 742 | "datasource": { 743 | "type": "prometheus", 744 | "uid": "$datasource" 745 | }, 746 | "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"$job\", k8s_node_name=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))", 747 | "format": "time_series", 748 | "intervalFactor": 2, 749 | "legendFormat": "{{verb}} {{url}}", 750 | "refId": "A" 751 | } 752 | ], 753 | "thresholds": [], 754 | "timeRegions": [], 755 | "title": "Get Request Latency 99th Quantile", 756 | "tooltip": { 757 | "shared": false, 758 | "sort": 0, 759 | "value_type": "individual" 760 | }, 761 | "type": "graph", 762 | "xaxis": { 763 | "mode": "time", 764 | "show": true, 765 | "values": [] 766 | }, 767 | "yaxes": [ 768 | { 769 | "format": "s", 770 | "logBase": 1, 771 | "min": 0, 772 | "show": true 773 | }, 774 | { 775 | "format": "s", 776 | "logBase": 1, 777 | "min": 0, 778 | "show": true 779 | } 780 | ], 781 | "yaxis": { 782 | "align": false 783 | } 784 | }, 785 | { 786 | "aliasColors": {}, 787 | "bars": false, 788 | "dashLength": 10, 789 | "dashes": false, 790 | "datasource": { 791 | "type": "prometheus", 792 | "uid": "$datasource" 793 | }, 794 | "fill": 1, 795 | "fillGradient": 0, 796 | "gridPos": { 797 | "h": 7, 798 | "w": 8, 799 | "x": 0, 800 | "y": 28 801 | }, 802 | "hiddenSeries": false, 803 | "id": 10, 804 | "interval": "1m", 805 | "legend": { 806 | "alignAsTable": true, 807 | "avg": false, 808 | "current": false, 809 | "max": false, 810 | "min": false, 811 | "rightSide": true, 812 | "show": true, 813 | "total": false, 814 | "values": false 815 | }, 816 | "lines": true, 817 | "linewidth": 1, 818 | "links": [], 819 | "nullPointMode": "null", 820 | "options": { 821 | "alertThreshold": true 822 | }, 823 | "percentage": false, 824 | "pluginVersion": "9.4.7", 825 | "pointradius": 5, 826 | "points": false, 827 | "renderer": "flot", 828 | "seriesOverrides": [], 829 | "spaceLength": 10, 830 | "stack": false, 831 | "steppedLine": false, 832 | "targets": [ 833 | { 834 | "datasource": { 835 | "type": "prometheus", 836 | "uid": "$datasource" 837 | }, 838 | "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"$job\",k8s_node_name=~\"$instance\"}", 839 | "format": "time_series", 840 | "intervalFactor": 2, 841 | "legendFormat": "{{instance}}", 842 | "refId": "A" 843 | } 844 | ], 845 | "thresholds": [], 846 | "timeRegions": [], 847 | "title": "Memory", 848 | "tooltip": { 849 | "shared": false, 850 | "sort": 0, 851 | "value_type": "individual" 852 | }, 853 | "type": "graph", 854 | "xaxis": { 855 | "mode": "time", 856 | "show": true, 857 | "values": [] 858 | }, 859 | "yaxes": [ 860 | { 861 | "format": "bytes", 862 | "logBase": 1, 863 | "show": true 864 | }, 865 | { 866 | "format": "bytes", 867 | "logBase": 1, 868 | "show": true 869 | } 870 | ], 871 | "yaxis": { 872 | "align": false 873 | } 874 | }, 875 | { 876 | "aliasColors": {}, 877 | "bars": false, 878 | "dashLength": 10, 879 | "dashes": false, 880 | "datasource": { 881 | "type": "prometheus", 882 | "uid": "$datasource" 883 | }, 884 | "fill": 1, 885 | "fillGradient": 0, 886 | "gridPos": { 887 | "h": 7, 888 | "w": 8, 889 | "x": 8, 890 | "y": 28 891 | }, 892 | "hiddenSeries": false, 893 | "id": 11, 894 | "interval": "1m", 895 | "legend": { 896 | "alignAsTable": true, 897 | "avg": false, 898 | "current": false, 899 | "max": false, 900 | "min": false, 901 | "rightSide": true, 902 | "show": true, 903 | "total": false, 904 | "values": false 905 | }, 906 | "lines": true, 907 | "linewidth": 1, 908 | "links": [], 909 | "nullPointMode": "null", 910 | "options": { 911 | "alertThreshold": true 912 | }, 913 | "percentage": false, 914 | "pluginVersion": "9.4.7", 915 | "pointradius": 5, 916 | "points": false, 917 | "renderer": "flot", 918 | "seriesOverrides": [], 919 | "spaceLength": 10, 920 | "stack": false, 921 | "steppedLine": false, 922 | "targets": [ 923 | { 924 | "datasource": { 925 | "type": "prometheus", 926 | "uid": "$datasource" 927 | }, 928 | "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"$job\",k8s_node_name=~\"$instance\"}[$__rate_interval])", 929 | "format": "time_series", 930 | "intervalFactor": 2, 931 | "legendFormat": "{{instance}}", 932 | "refId": "A" 933 | } 934 | ], 935 | "thresholds": [], 936 | "timeRegions": [], 937 | "title": "CPU usage", 938 | "tooltip": { 939 | "shared": false, 940 | "sort": 0, 941 | "value_type": "individual" 942 | }, 943 | "type": "graph", 944 | "xaxis": { 945 | "mode": "time", 946 | "show": true, 947 | "values": [] 948 | }, 949 | "yaxes": [ 950 | { 951 | "format": "short", 952 | "logBase": 1, 953 | "min": 0, 954 | "show": true 955 | }, 956 | { 957 | "format": "short", 958 | "logBase": 1, 959 | "min": 0, 960 | "show": true 961 | } 962 | ], 963 | "yaxis": { 964 | "align": false 965 | } 966 | }, 967 | { 968 | "aliasColors": {}, 969 | "bars": false, 970 | "dashLength": 10, 971 | "dashes": false, 972 | "datasource": { 973 | "type": "prometheus", 974 | "uid": "$datasource" 975 | }, 976 | "fill": 1, 977 | "fillGradient": 0, 978 | "gridPos": { 979 | "h": 7, 980 | "w": 8, 981 | "x": 16, 982 | "y": 28 983 | }, 984 | "hiddenSeries": false, 985 | "id": 12, 986 | "interval": "1m", 987 | "legend": { 988 | "alignAsTable": true, 989 | "avg": false, 990 | "current": false, 991 | "max": false, 992 | "min": false, 993 | "rightSide": true, 994 | "show": true, 995 | "total": false, 996 | "values": false 997 | }, 998 | "lines": true, 999 | "linewidth": 1, 1000 | "links": [], 1001 | "nullPointMode": "null", 1002 | "options": { 1003 | "alertThreshold": true 1004 | }, 1005 | "percentage": false, 1006 | "pluginVersion": "9.4.7", 1007 | "pointradius": 5, 1008 | "points": false, 1009 | "renderer": "flot", 1010 | "seriesOverrides": [], 1011 | "spaceLength": 10, 1012 | "stack": false, 1013 | "steppedLine": false, 1014 | "targets": [ 1015 | { 1016 | "datasource": { 1017 | "type": "prometheus", 1018 | "uid": "$datasource" 1019 | }, 1020 | "expr": "go_goroutines{cluster=\"$cluster\", job=\"$job\",k8s_node_name=~\"$instance\"}", 1021 | "format": "time_series", 1022 | "intervalFactor": 2, 1023 | "legendFormat": "{{instance}}", 1024 | "refId": "A" 1025 | } 1026 | ], 1027 | "thresholds": [], 1028 | "timeRegions": [], 1029 | "title": "Goroutines", 1030 | "tooltip": { 1031 | "shared": false, 1032 | "sort": 0, 1033 | "value_type": "individual" 1034 | }, 1035 | "type": "graph", 1036 | "xaxis": { 1037 | "mode": "time", 1038 | "show": true, 1039 | "values": [] 1040 | }, 1041 | "yaxes": [ 1042 | { 1043 | "format": "short", 1044 | "logBase": 1, 1045 | "show": true 1046 | }, 1047 | { 1048 | "format": "short", 1049 | "logBase": 1, 1050 | "show": true 1051 | } 1052 | ], 1053 | "yaxis": { 1054 | "align": false 1055 | } 1056 | } 1057 | ], 1058 | "refresh": "10s", 1059 | "revision": 1, 1060 | "schemaVersion": 38, 1061 | "style": "dark", 1062 | "tags": [ 1063 | "Infrastructure" 1064 | ], 1065 | "templating": { 1066 | "list": [ 1067 | { 1068 | "current": { 1069 | "selected": false, 1070 | "text": "default", 1071 | "value": "default" 1072 | }, 1073 | "hide": 0, 1074 | "includeAll": false, 1075 | "label": "Data Source", 1076 | "multi": false, 1077 | "name": "datasource", 1078 | "options": [], 1079 | "query": "prometheus", 1080 | "queryValue": "", 1081 | "refresh": 1, 1082 | "regex": "", 1083 | "skipUrlSync": false, 1084 | "type": "datasource" 1085 | }, 1086 | { 1087 | "current": {}, 1088 | "datasource": { 1089 | "type": "prometheus", 1090 | "uid": "$datasource" 1091 | }, 1092 | "definition": "label_values(up{job=\"$job\"}, cluster)", 1093 | "hide": 0, 1094 | "includeAll": false, 1095 | "label": "cluster", 1096 | "multi": false, 1097 | "name": "cluster", 1098 | "options": [], 1099 | "query": { 1100 | "query": "label_values(up{job=\"$job\"}, cluster)", 1101 | "refId": "StandardVariableQuery" 1102 | }, 1103 | "refresh": 2, 1104 | "regex": "", 1105 | "skipUrlSync": false, 1106 | "sort": 1, 1107 | "tagValuesQuery": "", 1108 | "tagsQuery": "", 1109 | "type": "query", 1110 | "useTags": false 1111 | }, 1112 | { 1113 | "current": {}, 1114 | "datasource": { 1115 | "type": "prometheus", 1116 | "uid": "$datasource" 1117 | }, 1118 | "definition": "label_values(up{job=\"$job\", cluster=\"$cluster\"}, k8s_node_name)", 1119 | "hide": 0, 1120 | "includeAll": true, 1121 | "label": "instance", 1122 | "multi": false, 1123 | "name": "instance", 1124 | "options": [], 1125 | "query": { 1126 | "query": "label_values(up{job=\"$job\", cluster=\"$cluster\"}, k8s_node_name)", 1127 | "refId": "StandardVariableQuery" 1128 | }, 1129 | "refresh": 2, 1130 | "regex": "", 1131 | "skipUrlSync": false, 1132 | "sort": 1, 1133 | "tagValuesQuery": "", 1134 | "tagsQuery": "", 1135 | "type": "query", 1136 | "useTags": false 1137 | }, 1138 | { 1139 | "current": {}, 1140 | "datasource": { 1141 | "type": "prometheus", 1142 | "uid": "$datasource" 1143 | }, 1144 | "definition": "label_values(kubeproxy_sync_proxy_rules_duration_seconds_count, job)", 1145 | "description": "", 1146 | "hide": 2, 1147 | "includeAll": false, 1148 | "label": "job", 1149 | "multi": false, 1150 | "name": "job", 1151 | "options": [], 1152 | "query": { 1153 | "query": "label_values(kubeproxy_sync_proxy_rules_duration_seconds_count, job)", 1154 | "refId": "StandardVariableQuery" 1155 | }, 1156 | "refresh": 2, 1157 | "regex": "", 1158 | "skipUrlSync": false, 1159 | "sort": 0, 1160 | "type": "query" 1161 | } 1162 | ] 1163 | }, 1164 | "time": { 1165 | "from": "now-1h", 1166 | "to": "now" 1167 | }, 1168 | "timepicker": { 1169 | "refresh_intervals": [ 1170 | "5s", 1171 | "10s", 1172 | "30s", 1173 | "1m", 1174 | "5m", 1175 | "15m", 1176 | "30m", 1177 | "1h", 1178 | "2h", 1179 | "1d" 1180 | ], 1181 | "time_options": [ 1182 | "5m", 1183 | "15m", 1184 | "1h", 1185 | "6h", 1186 | "12h", 1187 | "24h", 1188 | "2d", 1189 | "7d", 1190 | "30d" 1191 | ] 1192 | }, 1193 | "timezone": "utc", 1194 | "title": "Kubernetes / Kube-proxy", 1195 | "uid": "632e265de029684c40b21cb76bca4f94", 1196 | "version": 1, 1197 | "weekStart": "" 1198 | } 1199 | -------------------------------------------------------------------------------- /artifacts/grafana-operator-manifests/amp/amg_grafana-dashboards.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: grafana.integreatly.org/v1beta1 2 | kind: GrafanaDashboard 3 | metadata: 4 | name: amp-grafanadashboard 5 | namespace: grafana-operator 6 | spec: 7 | folder: "Observability Accelerator Dashboards" 8 | instanceSelector: 9 | matchLabels: 10 | dashboards: "external-grafana" 11 | url: ${GRAFANA_AMP_MON_DASH_URL} 12 | -------------------------------------------------------------------------------- /artifacts/grafana-operator-manifests/amp/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - amg_grafana-dashboards.yaml 5 | -------------------------------------------------------------------------------- /artifacts/grafana-operator-manifests/eks/adot/amg_grafana-dashboards.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: grafana.integreatly.org/v1beta1 2 | kind: GrafanaDashboard 3 | metadata: 4 | name: grafana-dashboards-adothealth 5 | namespace: grafana-operator 6 | spec: 7 | folder: "Observability Accelerator Dashboards" 8 | instanceSelector: 9 | matchLabels: 10 | dashboards: "external-grafana" 11 | url: ${GRAFANA_ADOTHEALTH_DASH_URL} 12 | -------------------------------------------------------------------------------- /artifacts/grafana-operator-manifests/eks/adot/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - amg_grafana-dashboards.yaml 5 | -------------------------------------------------------------------------------- /artifacts/grafana-operator-manifests/eks/apiserver/amg_grafana-dashboards.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: grafana.integreatly.org/v1beta1 2 | kind: GrafanaDashboard 3 | metadata: 4 | name: apiserver-basic-grafanadashboard 5 | namespace: grafana-operator 6 | spec: 7 | folder: "Observability Accelerator Dashboards" 8 | instanceSelector: 9 | matchLabels: 10 | dashboards: "external-grafana" 11 | url: ${GRAFANA_APISERVER_BASIC_DASH_URL} 12 | --- 13 | apiVersion: grafana.integreatly.org/v1beta1 14 | kind: GrafanaDashboard 15 | metadata: 16 | name: apiserver-advanced-grafanadashboard 17 | namespace: grafana-operator 18 | spec: 19 | folder: "Observability Accelerator Dashboards" 20 | instanceSelector: 21 | matchLabels: 22 | dashboards: "external-grafana" 23 | url: ${GRAFANA_APISERVER_ADVANCED_DASH_URL} 24 | --- 25 | apiVersion: grafana.integreatly.org/v1beta1 26 | kind: GrafanaDashboard 27 | metadata: 28 | name: apiserver-troubleshooting-grafanadashboard 29 | namespace: grafana-operator 30 | spec: 31 | folder: "Observability Accelerator Dashboards" 32 | instanceSelector: 33 | matchLabels: 34 | dashboards: "external-grafana" 35 | url: ${GRAFANA_APISERVER_TROUBLESHOOTING_DASH_URL} 36 | -------------------------------------------------------------------------------- /artifacts/grafana-operator-manifests/eks/apiserver/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - amg_grafana-dashboards.yaml 5 | -------------------------------------------------------------------------------- /artifacts/grafana-operator-manifests/eks/gpu/amg_grafana-dashboards.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: grafana.integreatly.org/v1beta1 2 | kind: GrafanaDashboard 3 | metadata: 4 | name: gpu-nvidia-dcgm-exporter-dashboard 5 | namespace: grafana-operator 6 | spec: 7 | datasources: 8 | - inputName: "DS_PROMETHEUS" 9 | datasourceName: "aws-observability-accelerator" 10 | folder: "Observability Accelerator Dashboards" 11 | instanceSelector: 12 | matchLabels: 13 | dashboards: "external-grafana" 14 | grafanaCom: 15 | id: 12239 16 | -------------------------------------------------------------------------------- /artifacts/grafana-operator-manifests/eks/gpu/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - amg_grafana-dashboards.yaml 5 | -------------------------------------------------------------------------------- /artifacts/grafana-operator-manifests/eks/infrastructure/amg_grafana-amp-datasource.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: grafana.integreatly.org/v1beta1 2 | kind: GrafanaDatasource 3 | metadata: 4 | name: grafanadatasource-amp 5 | namespace: grafana-operator 6 | spec: 7 | instanceSelector: 8 | matchLabels: 9 | dashboards: "external-grafana" 10 | datasource: 11 | name: aws-observability-accelerator 12 | type: prometheus 13 | access: proxy 14 | url: ${AMP_ENDPOINT_URL} 15 | isDefault: true 16 | jsonData: 17 | 'tlsSkipVerify': false 18 | 'timeInterval': "5s" 19 | 'sigV4Auth': true 20 | 'sigV4AuthType': "ec2_iam_role" 21 | 'sigV4Region': ${AMG_AWS_REGION} 22 | editable: true 23 | -------------------------------------------------------------------------------- /artifacts/grafana-operator-manifests/eks/infrastructure/amg_grafana-cw-datasource.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: grafana.integreatly.org/v1beta1 2 | kind: GrafanaDatasource 3 | metadata: 4 | name: grafanadatasource-cw 5 | namespace: grafana-operator 6 | spec: 7 | instanceSelector: 8 | matchLabels: 9 | dashboards: "external-grafana" 10 | datasource: 11 | name: aws-observability-accelerator-cloudwatch 12 | type: cloudwatch 13 | access: server 14 | isDefault: false 15 | url: "" 16 | jsonData: 17 | 'tlsSkipVerify': false 18 | 'timeInterval': "5s" 19 | "sigV4Auth": true, 20 | "sigV4AuthType": "ec2_iam_role" 21 | "sigV4Region": ${AMG_AWS_REGION} 22 | "customMetricsNamespaces": "ContainerInsights/Prometheus" 23 | "defaultRegion": ${AMG_AWS_REGION} 24 | "provisionedBy": "aws-datasource-provisioner-app" 25 | editable: true 26 | -------------------------------------------------------------------------------- /artifacts/grafana-operator-manifests/eks/infrastructure/amg_grafana-dashboards.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: grafana.integreatly.org/v1beta1 2 | kind: GrafanaDashboard 3 | metadata: 4 | name: cluster-grafanadashboard 5 | namespace: grafana-operator 6 | spec: 7 | folder: "Observability Accelerator Dashboards" 8 | instanceSelector: 9 | matchLabels: 10 | dashboards: "external-grafana" 11 | url: ${GRAFANA_CLUSTER_DASH_URL} 12 | --- 13 | apiVersion: grafana.integreatly.org/v1beta1 14 | kind: GrafanaDashboard 15 | metadata: 16 | name: kubelet-grafanadashboard 17 | namespace: grafana-operator 18 | spec: 19 | folder: "Observability Accelerator Dashboards" 20 | instanceSelector: 21 | matchLabels: 22 | dashboards: "external-grafana" 23 | url: ${GRAFANA_KUBELET_DASH_URL} 24 | --- 25 | apiVersion: grafana.integreatly.org/v1beta1 26 | kind: GrafanaDashboard 27 | metadata: 28 | name: namespace-workloads-grafanadashboard 29 | namespace: grafana-operator 30 | spec: 31 | folder: "Observability Accelerator Dashboards" 32 | instanceSelector: 33 | matchLabels: 34 | dashboards: "external-grafana" 35 | url: ${GRAFANA_NSWRKLDS_DASH_URL} 36 | --- 37 | apiVersion: grafana.integreatly.org/v1beta1 38 | kind: GrafanaDashboard 39 | metadata: 40 | name: node-exporter-grafanadashboard 41 | namespace: grafana-operator 42 | spec: 43 | folder: "Observability Accelerator Dashboards" 44 | instanceSelector: 45 | matchLabels: 46 | dashboards: "external-grafana" 47 | url: ${GRAFANA_NODEEXP_DASH_URL} 48 | --- 49 | apiVersion: grafana.integreatly.org/v1beta1 50 | kind: GrafanaDashboard 51 | metadata: 52 | name: nodes-grafanadashboard 53 | namespace: grafana-operator 54 | spec: 55 | folder: "Observability Accelerator Dashboards" 56 | instanceSelector: 57 | matchLabels: 58 | dashboards: "external-grafana" 59 | url: ${GRAFANA_NODES_DASH_URL} 60 | --- 61 | apiVersion: grafana.integreatly.org/v1beta1 62 | kind: GrafanaDashboard 63 | metadata: 64 | name: workloads-grafanadashboard 65 | namespace: grafana-operator 66 | spec: 67 | folder: "Observability Accelerator Dashboards" 68 | instanceSelector: 69 | matchLabels: 70 | dashboards: "external-grafana" 71 | url: ${GRAFANA_WORKLOADS_DASH_URL} 72 | 73 | --- 74 | apiVersion: grafana.integreatly.org/v1beta1 75 | kind: GrafanaDashboard 76 | metadata: 77 | name: ksh-grafanadashboard 78 | namespace: grafana-operator 79 | spec: 80 | folder: "Observability Accelerator Dashboards" 81 | instanceSelector: 82 | matchLabels: 83 | dashboards: "external-grafana" 84 | url: ${GRAFANA_KSH_DASH_URL} 85 | --- 86 | apiVersion: grafana.integreatly.org/v1beta1 87 | kind: GrafanaDashboard 88 | metadata: 89 | name: kcm-grafanadashboard 90 | namespace: grafana-operator 91 | spec: 92 | folder: "Observability Accelerator Dashboards" 93 | instanceSelector: 94 | matchLabels: 95 | dashboards: "external-grafana" 96 | url: ${GRAFANA_KCM_DASH_URL} 97 | -------------------------------------------------------------------------------- /artifacts/grafana-operator-manifests/eks/infrastructure/amg_grafana-identity.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: grafana.integreatly.org/v1beta1 2 | kind: Grafana 3 | metadata: 4 | name: external-grafana 5 | namespace: grafana-operator 6 | labels: 7 | dashboards: "external-grafana" 8 | spec: 9 | external: 10 | url: ${AMG_ENDPOINT_URL} 11 | apiKey: 12 | name: grafana-admin-credentials 13 | key: GF_SECURITY_ADMIN_APIKEY 14 | -------------------------------------------------------------------------------- /artifacts/grafana-operator-manifests/eks/infrastructure/amg_grafana-xray-datasource.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: grafana.integreatly.org/v1beta1 2 | kind: GrafanaDatasource 3 | metadata: 4 | name: grafanadatasource-xray 5 | namespace: grafana-operator 6 | spec: 7 | instanceSelector: 8 | matchLabels: 9 | dashboards: "external-grafana" 10 | datasource: 11 | name: aws-observability-accelerator-xray 12 | type: grafana-x-ray-datasource 13 | access: server 14 | url: "" 15 | isDefault: false 16 | jsonData: 17 | "authType": "ec2_iam_role" 18 | "defaultRegion": ${AMG_AWS_REGION} 19 | "provisionedBy": "aws-datasource-provisioner-app" 20 | editable: true 21 | -------------------------------------------------------------------------------- /artifacts/grafana-operator-manifests/eks/infrastructure/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - amg_grafana-identity.yaml 5 | - amg_grafana-amp-datasource.yaml 6 | - amg_grafana-cw-datasource.yaml 7 | - amg_grafana-xray-datasource.yaml 8 | - amg_grafana-dashboards.yaml 9 | -------------------------------------------------------------------------------- /artifacts/grafana-operator-manifests/eks/istio/amg_grafana-dashboards.yaml: -------------------------------------------------------------------------------- 1 | # Istio dashboards 2 | apiVersion: grafana.integreatly.org/v1beta1 3 | kind: GrafanaDashboard 4 | metadata: 5 | name: istio-controlplane-grafanadashboard 6 | namespace: grafana-operator 7 | spec: 8 | folder: "Observability Accelerator Dashboards" 9 | instanceSelector: 10 | matchLabels: 11 | dashboards: "external-grafana" 12 | url: ${GRAFANA_ISTIO_CP_DASH_URL} 13 | --- 14 | apiVersion: grafana.integreatly.org/v1beta1 15 | kind: GrafanaDashboard 16 | metadata: 17 | name: istio-mesh-grafanadashboard 18 | namespace: grafana-operator 19 | spec: 20 | folder: "Observability Accelerator Dashboards" 21 | instanceSelector: 22 | matchLabels: 23 | dashboards: "external-grafana" 24 | url: ${GRAFANA_ISTIO_MESH_DASH_URL} 25 | --- 26 | apiVersion: grafana.integreatly.org/v1beta1 27 | kind: GrafanaDashboard 28 | metadata: 29 | name: istio-performance-grafanadashboard 30 | namespace: grafana-operator 31 | spec: 32 | folder: "Observability Accelerator Dashboards" 33 | instanceSelector: 34 | matchLabels: 35 | dashboards: "external-grafana" 36 | url: ${GRAFANA_ISTIO_PERF_DASH_URL} 37 | --- 38 | apiVersion: grafana.integreatly.org/v1beta1 39 | kind: GrafanaDashboard 40 | metadata: 41 | name: istio-service-grafanadashboard 42 | namespace: grafana-operator 43 | spec: 44 | folder: "Observability Accelerator Dashboards" 45 | instanceSelector: 46 | matchLabels: 47 | dashboards: "external-grafana" 48 | url: ${GRAFANA_ISTIO_SERVICE_DASH_URL} 49 | -------------------------------------------------------------------------------- /artifacts/grafana-operator-manifests/eks/istio/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - amg_grafana-dashboards.yaml 5 | -------------------------------------------------------------------------------- /artifacts/grafana-operator-manifests/eks/java/amg_grafana-dashboards.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: grafana.integreatly.org/v1beta1 2 | kind: GrafanaDashboard 3 | metadata: 4 | name: java-grafanadashboard 5 | namespace: grafana-operator 6 | spec: 7 | folder: "Observability Accelerator Dashboards" 8 | instanceSelector: 9 | matchLabels: 10 | dashboards: "external-grafana" 11 | url: ${GRAFANA_JAVA_JMX_DASH_URL} 12 | -------------------------------------------------------------------------------- /artifacts/grafana-operator-manifests/eks/java/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - amg_grafana-dashboards.yaml 5 | -------------------------------------------------------------------------------- /artifacts/grafana-operator-manifests/eks/kube-proxy/amg_grafana-dashboards.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: grafana.integreatly.org/v1beta1 2 | kind: GrafanaDashboard 3 | metadata: 4 | name: grafana-dashboards-kubeproxy 5 | namespace: grafana-operator 6 | spec: 7 | folder: "Observability Accelerator Dashboards" 8 | instanceSelector: 9 | matchLabels: 10 | dashboards: "external-grafana" 11 | url: ${GRAFANA_KUBEPROXY_DASH_URL} 12 | -------------------------------------------------------------------------------- /artifacts/grafana-operator-manifests/eks/kube-proxy/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - amg_grafana-dashboards.yaml 5 | -------------------------------------------------------------------------------- /artifacts/grafana-operator-manifests/eks/neuron/amg_grafana-dashboards.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: grafana.integreatly.org/v1beta1 2 | kind: GrafanaDashboard 3 | metadata: 4 | name: neuron-monitor-grafanadashboard 5 | namespace: grafana-operator 6 | spec: 7 | folder: "Observability Accelerator Dashboards" 8 | instanceSelector: 9 | matchLabels: 10 | dashboards: "external-grafana" 11 | url: ${GRAFANA_NEURON_DASH_URL} 12 | -------------------------------------------------------------------------------- /artifacts/grafana-operator-manifests/eks/neuron/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - amg_grafana-dashboards.yaml 5 | -------------------------------------------------------------------------------- /artifacts/grafana-operator-manifests/eks/nginx/amg_grafana-dashboards.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: grafana.integreatly.org/v1beta1 2 | kind: GrafanaDashboard 3 | metadata: 4 | name: nginx-grafanadashboard 5 | namespace: grafana-operator 6 | spec: 7 | folder: "Observability Accelerator Dashboards" 8 | instanceSelector: 9 | matchLabels: 10 | dashboards: "external-grafana" 11 | url: ${GRAFANA_NGINX_DASH_URL} 12 | -------------------------------------------------------------------------------- /artifacts/grafana-operator-manifests/eks/nginx/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - amg_grafana-dashboards.yaml 5 | -------------------------------------------------------------------------------- /artifacts/k8s-deployment-manifest-templates/neuron/pytorch-inference-resnet50.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: pytorch-resnet50-script 5 | data: 6 | pytorch-resnet50.sh: | 7 | #!/usr/bin/env python 8 | import torch 9 | import numpy as np 10 | import os 11 | import torch_neuron 12 | from torchvision import models, transforms, datasets 13 | import time 14 | from urllib import request 15 | import json 16 | 17 | image = torch.zeros([1, 3, 224, 224], dtype=torch.float32) 18 | 19 | ## Load a pretrained ResNet50 model 20 | model = models.resnet50(pretrained=True) 21 | 22 | ## Tell the model we are using it for evaluation (not training) 23 | model.eval() 24 | model_neuron = torch.neuron.trace(model, example_inputs=[image]) 25 | 26 | ## Create an image directory containing a small kitten 27 | os.makedirs("./torch_neuron_test/images", exist_ok=True) 28 | request.urlretrieve("https://raw.githubusercontent.com/awslabs/mxnet-model-server/master/docs/images/kitten_small.jpg", 29 | "./torch_neuron_test/images/kitten_small.jpg") 30 | 31 | ## Fetch labels to output the top classifications 32 | request.urlretrieve("https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json","imagenet_class_index.json") 33 | idx2label = [] 34 | 35 | with open("imagenet_class_index.json", "r") as read_file: 36 | class_idx = json.load(read_file) 37 | idx2label = [class_idx[str(k)][1] for k in range(len(class_idx))] 38 | 39 | ## Import a sample image and normalize it into a tensor 40 | normalize = transforms.Normalize( 41 | mean=[0.485, 0.456, 0.406], 42 | std=[0.229, 0.224, 0.225]) 43 | 44 | eval_dataset = datasets.ImageFolder( 45 | os.path.dirname("./torch_neuron_test/"), 46 | transforms.Compose([ 47 | transforms.Resize([224, 224]), 48 | transforms.ToTensor(), 49 | normalize, 50 | ]) 51 | ) 52 | 53 | image, _ = eval_dataset[0] 54 | image = torch.tensor(image.numpy()[np.newaxis, ...]) 55 | 56 | ## Predict 57 | while True: 58 | results = model_neuron( image ) 59 | time.sleep(0.1) 60 | --- 61 | apiVersion: v1 62 | kind: Pod 63 | metadata: 64 | name: pytorch-inference-resnet50 65 | spec: 66 | tolerations: 67 | - key: node-role.kubernetes.io/control-plane 68 | operator: Exists 69 | effect: NoSchedule 70 | - key: node-role.kubernetes.io/master 71 | operator: Exists 72 | effect: NoSchedule 73 | containers: 74 | - name: pytorch 75 | image: 763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-inference-neuron:1.13.1-neuron-py310-sdk2.13.2-ubuntu20.04 76 | command: ["/script/pytorch-resnet50.sh"] 77 | volumeMounts: 78 | - name: script 79 | mountPath: "/script" 80 | resources: 81 | limits: 82 | cpu: 4 83 | memory: 4Gi 84 | aws.amazon.com/neuron: 1 85 | requests: 86 | cpu: "1" 87 | memory: 1Gi 88 | securityContext: 89 | capabilities: 90 | add: 91 | - IPC_LOCK 92 | volumes: 93 | - name: script 94 | configMap: 95 | name: pytorch-resnet50-script 96 | defaultMode: 0777 97 | -------------------------------------------------------------------------------- /artifacts/k8s-deployment-manifest-templates/nginx/nginx-traffic-sample.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: {{namespace}} 5 | labels: 6 | name: {{namespace}} 7 | 8 | --- 9 | 10 | kind: Pod 11 | apiVersion: v1 12 | metadata: 13 | name: banana-app 14 | namespace: {{namespace}} 15 | labels: 16 | app: banana 17 | spec: 18 | containers: 19 | - name: banana-app 20 | image: hashicorp/http-echo 21 | args: 22 | - "-text=banana" 23 | resources: 24 | limits: 25 | cpu: 100m 26 | memory: 100Mi 27 | requests: 28 | cpu: 50m 29 | memory: 50Mi 30 | --- 31 | 32 | kind: Service 33 | apiVersion: v1 34 | metadata: 35 | name: banana-service 36 | namespace: {{namespace}} 37 | spec: 38 | selector: 39 | app: banana 40 | ports: 41 | - port: 5678 # Default port for image 42 | 43 | --- 44 | 45 | kind: Pod 46 | apiVersion: v1 47 | metadata: 48 | name: apple-app 49 | namespace: {{namespace}} 50 | labels: 51 | app: apple 52 | spec: 53 | containers: 54 | - name: apple-app 55 | image: hashicorp/http-echo 56 | args: 57 | - "-text=apple" 58 | resources: 59 | limits: 60 | cpu: 100m 61 | memory: 100Mi 62 | requests: 63 | cpu: 50m 64 | memory: 50Mi 65 | --- 66 | 67 | kind: Service 68 | apiVersion: v1 69 | metadata: 70 | name: apple-service 71 | namespace: {{namespace}} 72 | spec: 73 | selector: 74 | app: apple 75 | ports: 76 | - port: 5678 # Default port for image 77 | 78 | --- 79 | 80 | apiVersion: networking.k8s.io/v1 81 | kind: Ingress 82 | metadata: 83 | name: ingress-nginx-demo 84 | namespace: {{namespace}} 85 | spec: 86 | rules: 87 | - host: {{external_ip}} 88 | http: 89 | paths: 90 | - path: /apple 91 | pathType: Prefix 92 | backend: 93 | service: 94 | name: apple-service 95 | port: 96 | number: 5678 97 | - path: /banana 98 | pathType: Prefix 99 | backend: 100 | service: 101 | name: banana-service 102 | port: 103 | number: 5678 104 | ingressClassName: nginx 105 | 106 | --- 107 | 108 | apiVersion: v1 109 | kind: Pod 110 | metadata: 111 | name: traffic-generator 112 | namespace: {{namespace}} 113 | spec: 114 | containers: 115 | - name: traffic-generator 116 | image: ellerbrock/alpine-bash-curl-ssl 117 | command: ["/bin/bash"] 118 | args: ["-c", "while :; do curl http://{{external_ip}}/apple > /dev/null 2>&1; curl http://{{external_ip}}/banana > /dev/null 2>&1; sleep 0.05; done"] 119 | resources: 120 | limits: 121 | cpu: 100m 122 | memory: 100Mi 123 | requests: 124 | cpu: 50m 125 | memory: 50Mi 126 | -------------------------------------------------------------------------------- /docs/img/cloud-sun-solid-orange.svg: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /docs/img/dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-observability/aws-observability-accelerator/03015950068f5c9db31c93e776875fc9453f4fd1/docs/img/dashboard.png -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
5 | 6 | 7 | 8 | 9 | 10 |