├── .dockerignore ├── .github └── workflows │ ├── deploy.yml │ └── validate.yml ├── .gitignore ├── .golangci.json ├── .snyk ├── CODEOWNERS ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── PULL_REQUEST_TEMPLATE.md ├── README.md ├── charts └── metrics-agent │ ├── Chart.yaml │ ├── templates │ ├── _helpers.tpl │ ├── clusterrole.yaml │ ├── clusterrolebinding.yaml │ ├── deployment.yaml │ ├── extra-objects.yaml │ ├── role.yaml │ ├── rolebinding.yaml │ ├── secret.yaml │ └── serviceaccount.yaml │ └── values.yaml ├── client ├── client.go ├── client_test.go ├── export_test.go └── testdata │ ├── random-test-data.txt │ └── test-cluster-1510159016.tgz ├── cmd ├── kubernetesCmd.go └── root.go ├── deploy ├── docker │ └── Dockerfile └── kubernetes │ └── cloudability-metrics-agent.yaml ├── go.mod ├── go.sum ├── kubernetes ├── endpoint.go ├── endpoint_test.go ├── heapster.go ├── heapster_test.go ├── kubernetes.go ├── kubernetes_test.go ├── kubernetes_unit_test.go ├── nodecollection.go ├── nodecollection_test.go └── testdata │ ├── baseline-container-proxyNode │ ├── baseline-summary-proxyNode │ └── mockToken ├── main.go ├── measurement ├── measurement.go └── measurement_test.go ├── retrieval ├── k8s │ └── k8s_stats.go └── raw │ ├── models.go │ ├── raw_endpoint.go │ └── raw_endpoint_test.go ├── test └── random.go ├── testdata ├── deployments.jsonl ├── e2e │ ├── e2e.sh │ ├── e2e_helper.go │ └── e2e_test.go ├── heapster-metric-export.json ├── namespaces.jsonl ├── pods.json ├── pods.jsonl └── services.jsonl ├── tools └── tools.go ├── util ├── testdata │ └── test-cluster-metrics-sample │ │ └── sample-1510159016 │ │ ├── agent-measurement.jsonl │ │ ├── heapster-metrics-export.json │ │ └── k8s.json ├── util.go └── util_test.go └── version └── version.go /.dockerignore: -------------------------------------------------------------------------------- 1 | .* 2 | vendor 3 | Makefile 4 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: Deploy 2 | 3 | on: 4 | push: 5 | paths: 6 | - 'version/version.go' 7 | jobs: 8 | deploy: 9 | name: Build and Deploy 10 | # only build/deploy for beta and master branches 11 | if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/beta' 12 | runs-on: ubuntu-22.04 13 | steps: 14 | - name: Login to DockerHub 15 | uses: docker/login-action@v1 16 | with: 17 | username: ${{ secrets.DOCKER_USER }} 18 | password: ${{ secrets.DOCKER_PASSWORD }} 19 | - uses: actions/setup-go@v2 20 | with: 21 | go-version: '1.23' 22 | - uses: actions/checkout@v2 23 | - name: Setup Docker Buildx 24 | uses: docker/setup-buildx-action@v1 25 | - name: Docker Build Master and Push 26 | # If pushing to the master branch, we want to correctly tag the builds 27 | # in docker-hub with the new master makefile command 28 | if: github.ref == 'refs/heads/master' 29 | run: make container-build-master 30 | - name: Docker Build Beta and Push 31 | # If pushing to the beta branch, we want to correctly tag the builds 32 | # in docker-hub with the new beta makefile command 33 | if: github.ref == 'refs/heads/beta' 34 | run: make container-build-beta 35 | 36 | helmRelease: 37 | name: Cut Helm Release when Updated 38 | needs: deploy 39 | runs-on: ubuntu-latest 40 | # Only cut release if pushing to master 41 | if: github.ref == 'refs/heads/master' 42 | steps: 43 | - name: Checkout 44 | uses: actions/checkout@v2 45 | with: 46 | fetch-depth: 0 47 | 48 | - name: Configure Git 49 | run: | 50 | git config user.name "$GITHUB_ACTOR" 51 | git config user.email "$GITHUB_ACTOR@users.noreply.github.com" 52 | - name: Run chart-releaser 53 | uses: helm/chart-releaser-action@v1.1.0 54 | env: 55 | CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}" 56 | 57 | slackFinish: 58 | name: Notify Finish 59 | needs: helmRelease 60 | runs-on: ubuntu-22.04 61 | steps: 62 | - uses: technote-space/workflow-conclusion-action@v2 63 | - uses: 8398a7/action-slack@v3 64 | with: 65 | fields: repo,message,commit,author,action,eventName,ref,workflow 66 | status: ${{ env.WORKFLOW_CONCLUSION }} 67 | env: 68 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 69 | SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK }} 70 | 71 | 72 | -------------------------------------------------------------------------------- /.github/workflows/validate.yml: -------------------------------------------------------------------------------- 1 | name: Metrics-Agent 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | name: Build 8 | runs-on: ubuntu-22.04 9 | steps: 10 | - uses: actions/setup-go@v2 11 | with: 12 | go-version: '1.23' 13 | - uses: actions/checkout@v2 14 | - name: Install tools 15 | run: | 16 | make install-tools 17 | GO111MODULE=on go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.60.0 18 | - name: Lint 19 | run: make lint 20 | - name: Run Tests 21 | run: make test 22 | 23 | test_e2e: 24 | name: Test E2E AMD 25 | needs: build 26 | runs-on: ubuntu-22.04 27 | steps: 28 | - uses: actions/setup-go@v2 29 | with: 30 | go-version: '1.23' 31 | - uses: actions/checkout@v2 32 | - name: run e2e AMD tests 33 | run: make test-e2e-all 34 | 35 | 36 | slackFinish: 37 | name: Notify Finish 38 | needs: test_e2e 39 | runs-on: ubuntu-22.04 40 | if: always() 41 | steps: 42 | - uses: technote-space/workflow-conclusion-action@v2 43 | - uses: 8398a7/action-slack@v3 44 | with: 45 | fields: repo,message,commit,author,action,eventName,ref,workflow 46 | status: ${{ env.WORKFLOW_CONCLUSION }} 47 | env: 48 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 49 | SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK }} 50 | 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | vendor/** 2 | **/.DS_Store 3 | /metrics-agent 4 | /cmd/debug.test 5 | **/debug.test 6 | .vscode 7 | .idea 8 | /testdata/e2e -------------------------------------------------------------------------------- /.golangci.json: -------------------------------------------------------------------------------- 1 | { 2 | "run": { 3 | "timeout": "5m" 4 | }, 5 | "linters": { 6 | "enable": [ 7 | "gocyclo", 8 | "revive", 9 | "goconst", 10 | "misspell", 11 | "ineffassign", 12 | "lll", 13 | "govet", 14 | "gosec", 15 | "dupl", 16 | "unconvert", 17 | "errcheck", 18 | "staticcheck", 19 | "gofmt" 20 | ], 21 | "settings": { 22 | "dupl": { 23 | "threshold": 100 24 | }, 25 | "lll": { 26 | "line-length": 120 27 | }, 28 | "gocyclo": { 29 | "min-complexity": 12 30 | } 31 | }, 32 | "exclusions": { 33 | "rules": [ 34 | { 35 | "path": "_test.go", 36 | "linters": [ 37 | "gocyclo", 38 | "errcheck", 39 | "dupl", 40 | "gosec" 41 | ] 42 | } 43 | ] 44 | } 45 | }, 46 | "version": "2" 47 | } 48 | -------------------------------------------------------------------------------- /.snyk: -------------------------------------------------------------------------------- 1 | # Snyk (https://snyk.io) policy file, patches or ignores known vulnerabilities. 2 | version: v1.25.1 3 | # ignores vulnerabilities until expiry date; change duration by modifying expiry date 4 | ignore: 5 | 'SNYK:LIC:GOLANG:GITHUB.COM:LEONKLINGELE:GROUPER:(AGPL-3.0_OR_AGPL-3.0-ONLY)': 6 | - '*': 7 | reason: >- 8 | We are ignoring this Dual license: AGPL-3.0-only vulnerability as our 9 | metrics-agent repository is open-source. 10 | expires: 2100-01-01T00:00:00.000Z 11 | created: 2023-08-07T19:11:55.892Z 12 | CWE-295: 13 | - '*': 14 | reason: >- 15 | The Improper Certificate Validation does not apply to the 16 | metrics-agents default configuration. The reason we have this 17 | vulnerability is we allow the agent to run in an insecure mode (not 18 | recommended) which is explicitly designed to avoid cert validation. 19 | This vulnerability does not apply as long as CLOUDABILITY_INSECURE is 20 | false (default behaivor) 21 | expires: 2100-01-01T00:00:00.000Z 22 | created: 2023-08-07T19:22:08.171Z 23 | SNYK-GOLANG-GITHUBCOMCYPHARFILEPATHSECUREJOIN-5889602: 24 | - '*': 25 | reason: Vulnerability is only exploitable on windows OS 26 | expires: 2100-01-01T00:00:00.000Z 27 | created: 2023-09-13T19:06:05.786Z 28 | SNYK-GOLANG-GITHUBCOMOPENCONTAINERSRUNCLIBCONTAINER-6672882: 29 | - '*': 30 | reason: No fix available yet 31 | expires: 2024-08-06T00:00:00.000Z 32 | created: 2024-05-06T00:00:00.275Z 33 | SNYK-GOLANG-GOLANGORGXIMAGETIFF-7268348: 34 | - '*': 35 | reason: No fix available yet 36 | expires: 2025-03-01T00:00:00.000Z 37 | created: 2024-07-11T23:07:11.176Z 38 | SNYK-GOLANG-K8SIOCLIENTGOTRANSPORT-7538822: 39 | - '*': 40 | reason: >- 41 | When bumping this dependency to the recommended version several 42 | critical vulnerabilities are introduced. Ignoring for 90 days. 43 | expires: 2024-10-22T00:00:00.000Z 44 | created: 2024-07-23T00:41:35.050Z 45 | SNYK-GOLANG-K8SIOCLIENTGOUTILJSONPATH-7540854: 46 | - '*': 47 | reason: >- 48 | When bumping this dependency to the recommended version several 49 | critical vulnerabilities are introduced. Ignoring for 90 days. 50 | expires: 2024-10-22T00:00:00.000Z 51 | created: 2024-07-23T00:42:14.222Z 52 | SNYK-GOLANG-K8SIOAPISERVERPLUGINPKGAUTHENTICATORTOKENOIDC-7459774: 53 | - '*': 54 | reason: >- 55 | When bumping this dependency to the recommended version several 56 | critical vulnerabilities are introduced. Ignoring for 90 days. 57 | expires: 2024-10-22T00:00:00.000Z 58 | created: 2024-07-23T00:42:33.848Z 59 | SNYK-CC-K8S-8: 60 | - '*': 61 | reason: >- 62 | Permanently ignore as this configuration is supported via the Helm 63 | Chart. Users can set readOnlyRootFilesystem to true and add the proper 64 | volume/volumeMount to their deployment 65 | expires: 2200-01-01T00:00:00.000Z 66 | created: 2024-07-23T00:43:23.074Z 67 | SNYK-ALPINE320-OPENSSL-8235201: 68 | - '*': 69 | reason: >- 70 | Alpine 3.20.3 is the newest version of alpine available, ignoring 71 | temporarily until patch version is ready 72 | expires: 2024-11-30T00:00:00.000Z 73 | created: 2024-10-30T18:01:47.217Z 74 | SNYK-GOLANG-K8SIOAPIMACHINERYPKGUTILRUNTIME-8367153: 75 | - '*': 76 | reason: >- 77 | Ignoring vulnerability for 30 days as newer versions of k8s.io 78 | contain several critical vulnerabilities and are still in alpha 79 | expires: 2025-03-01T00:00:00.000Z 80 | created: 2024-11-13T23:30:00.999Z 81 | patch: {} 82 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @egs5053 2 | * @housejester 3 | * @mollylogue 4 | * @mnorbury 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to the Metrics Agent 2 | 3 | Thank you for contributing! 4 | 5 | This document provides some basic guidelines for contributing to this repository. 6 | To propose improvements, feel free to submit a PR. 7 | 8 | ## Submitting issues 9 | 10 | * If you you have found an issue, please open a Github issue. 11 | 12 | ## Pull Requests 13 | 14 | Here are some items to check when submitting your PR: 15 | 16 | * have a [proper commit history](#commits) (Please rebase if needed). 17 | * write tests for the code you wrote. 18 | * preferably make sure that all tests pass locally `make test` & `make test-e2e-all`. 19 | * summarize your PR with an explanatory title and a message describing your 20 | changes, cross-referencing any related bugs/PRs. 21 | * open your PR against the `master` branch. 22 | * Agree to the Contributor License Agreement (CLA) (you will be prompted if you have not done so when you open the PR) 23 | 24 | Pull request must pass all CI tests before we will merge it. 25 | 26 | ### Keep it focused 27 | 28 | Avoid changing too many things at once. 29 | 30 | ### Commit Messages 31 | 32 | Please take a moment to write meaningful commit messages. 33 | 34 | The commit message should describe the reason for the change and give extra details 35 | that will allow someone later on to understand in 5 seconds the thing you've been 36 | working on for a day. 37 | 38 | ### Squash your commits 39 | 40 | Please rebase your changes on `master` and squash your commits whenever possible, 41 | it keeps history cleaner and it's easier to revert things. 42 | 43 | ### Increment the agent version number 44 | 45 | Currently the agent version is managed manually in [this file](version/version.go) please use [Semantic Versioning 2.0.0](https://semver.org/) as your guideline. 46 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2018 Cloudability Inc. 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ARCH?=amd64 2 | EXECUTABLES = go 3 | EXEC_CHECK := $(foreach exec,$(EXECUTABLES), \ 4 | $(if $(shell which $(exec)),some string,$(error "No $(exec) in PATH."))) 5 | 6 | GOLANG_VERSION?=1.23 7 | REPO_DIR:=$(shell pwd) 8 | PREFIX=cloudability 9 | CLDY_API_KEY=${CLOUDABILITY_API_KEY} 10 | PLATFORM?=linux/amd64 11 | PLATFORM_TAG?=amd64 12 | 13 | 14 | # $(call TEST_KUBERNETES, image_tag, prefix, git_commit) 15 | define TEST_KUBERNETES 16 | KUBERNETES_VERSION=$(1) IMAGE=$(2)/metrics-agent:$(3) TEMP_DIR=$(TEMP_DIR) $(REPO_DIR)/testdata/e2e/e2e.sh; \ 17 | if [ $$? != 0 ]; then \ 18 | exit 1; \ 19 | fi; 20 | endef 21 | 22 | ifndef TEMP_DIR 23 | TEMP_DIR:=$(shell mktemp -d /tmp/metrics-agent.XXXXXX) 24 | endif 25 | 26 | # This repo's root import path (under GOPATH). 27 | PKG := github.com/cloudability/metrics-agent 28 | 29 | # Application name 30 | APPLICATION := metrics-agent 31 | 32 | # This version-strategy uses git tags to set the version string 33 | VERSION := $(shell git describe --tags --always --dirty) 34 | 35 | RELEASE-VERSION := $(shell sed -nE 's/^var[[:space:]]VERSION[[:space:]]=[[:space:]]"([^"]+)".*/\1/p' version/version.go) 36 | 37 | # If this session isn't interactive, then we don't want to allocate a 38 | # TTY, which would fail, but if it is interactive, we do want to attach 39 | # so that the user can send e.g. ^C through. 40 | INTERACTIVE := $(shell [ -t 0 ] && echo 1 || echo 0) 41 | TTY= 42 | ifeq ($(INTERACTIVE), 1) 43 | TTY=-t 44 | endif 45 | 46 | default: 47 | @echo Specify a goal 48 | 49 | build: 50 | GOARCH=$(ARCH) CGO_ENABLED=0 go build -o metrics-agent main.go 51 | 52 | # Build a container image and push to DockerHub master with correct version tags 53 | container-build-master: 54 | docker buildx build --platform linux/arm/v7,linux/arm64/v8,linux/amd64 \ 55 | --build-arg golang_version=$(GOLANG_VERSION) \ 56 | --build-arg package=$(PKG) \ 57 | --build-arg application=$(APPLICATION) \ 58 | -t $(PREFIX)/metrics-agent:$(RELEASE-VERSION) \ 59 | -t $(PREFIX)/metrics-agent:latest -f deploy/docker/Dockerfile . --push 60 | 61 | # Build a container image and push to DockerHub beta with correct version tags 62 | container-build-beta: 63 | docker buildx build --platform linux/arm/v7,linux/arm64/v8,linux/amd64 \ 64 | --build-arg golang_version=$(GOLANG_VERSION) \ 65 | --build-arg package=$(PKG) \ 66 | --build-arg application=$(APPLICATION) \ 67 | -t $(PREFIX)/metrics-agent:$(RELEASE-VERSION)-beta \ 68 | -t $(PREFIX)/metrics-agent:beta-latest -f deploy/docker/Dockerfile . --push 69 | 70 | # Build a local container image with the linux AMD architecture 71 | container-build-single-platform: 72 | docker build --platform $(PLATFORM) \ 73 | --build-arg golang_version=$(GOLANG_VERSION) \ 74 | --build-arg package=$(PKG) \ 75 | --build-arg application=$(APPLICATION) \ 76 | -t $(PREFIX)/metrics-agent:$(VERSION)-$(PLATFORM_TAG) -f deploy/docker/Dockerfile . 77 | 78 | # Specify the repository you would like to send the single-architecture image to after building 79 | container-build-single-repository: 80 | @read -p "Enter the repository name you want to send this image to: " REPOSITORY; \ 81 | docker buildx build --platform $(PLATFORM) \ 82 | --build-arg golang_version=$(GOLANG_VERSION) \ 83 | --build-arg package=$(PKG) \ 84 | --build-arg application=$(APPLICATION) \ 85 | -t $$REPOSITORY/metrics-agent:$(VERSION) -f deploy/docker/Dockerfile . --push 86 | 87 | # Specify the repository you would like to send the single-architecture image to after building 88 | container-build-single-repository-podman: 89 | @read -p "Enter the repository name you want to send this image to: " REPOSITORY; \ 90 | podman buildx build --platform $(PLATFORM) \ 91 | --build-arg golang_version=$(GOLANG_VERSION) \ 92 | --build-arg package=$(PKG) \ 93 | --build-arg application=$(APPLICATION) \ 94 | -t $$REPOSITORY/metrics-agent:$(VERSION) -f deploy/docker/Dockerfile .; \ 95 | podman image push $$REPOSITORY/metrics-agent:$(VERSION) 96 | 97 | # Specify the repository you would like to send the multi-architectural image to after building. 98 | container-build-repository: 99 | @read -p "Enter the repository name you want to send this image to: " REPOSITORY; \ 100 | docker buildx build --platform linux/arm/v7,linux/arm64/v8,linux/amd64 \ 101 | --build-arg golang_version=$(GOLANG_VERSION) \ 102 | --build-arg package=$(PKG) \ 103 | --build-arg application=$(APPLICATION) \ 104 | -t $$REPOSITORY/metrics-agent:$(VERSION) -f deploy/docker/Dockerfile . --push 105 | 106 | helm-package: 107 | helm package deploy/charts/metrics-agent 108 | 109 | deploy-local: container-build-single-platform 110 | kubectl config use-context docker-desktop 111 | cat ./deploy/kubernetes/cloudability-metrics-agent.yaml | \ 112 | sed "s/latest/$(VERSION)/g; s/XXXXXXXXX/$(CLDY_API_KEY)/g; s/Always/Never/g; s/NNNNNNNNN/local-dev-$(shell hostname)/g" \ 113 | ./deploy/kubernetes/cloudability-metrics-agent.yaml |kubectl apply -f - 114 | 115 | download-deps: 116 | @echo Download go.mod dependencies 117 | @go mod download 118 | 119 | install-tools: download-deps 120 | @echo Installing tools from tools/tools.go 121 | @cat ./tools/tools.go | grep _ | awk -F'"' '{print $$2}' | xargs -tI % go install % 122 | 123 | fmt: 124 | gofmt -w . 125 | 126 | lint: 127 | golangci-lint run 128 | 129 | install: 130 | go install ./... 131 | 132 | test: 133 | go test ./... 134 | 135 | check: fmt lint test 136 | 137 | version: 138 | @echo $(VERSION) 139 | 140 | release-version: 141 | @echo $(RELEASE-VERSION) 142 | 143 | test-e2e-1.32: container-build-single-platform install-tools 144 | $(call TEST_KUBERNETES,v1.32.0,$(PREFIX),$(VERSION)-$(PLATFORM_TAG)) 145 | 146 | test-e2e-1.31: container-build-single-platform install-tools 147 | $(call TEST_KUBERNETES,v1.31.0,$(PREFIX),$(VERSION)-$(PLATFORM_TAG)) 148 | 149 | test-e2e-1.30: container-build-single-platform install-tools 150 | $(call TEST_KUBERNETES,v1.30.0,$(PREFIX),$(VERSION)-$(PLATFORM_TAG)) 151 | 152 | test-e2e-1.29: container-build-single-platform install-tools 153 | $(call TEST_KUBERNETES,v1.29.0,$(PREFIX),$(VERSION)-$(PLATFORM_TAG)) 154 | 155 | test-e2e-all: test-e2e-1.32 test-e2e-1.31 test-e2e-1.30 test-e2e-1.29 156 | 157 | .PHONY: test version 158 | -------------------------------------------------------------------------------- /PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | #### What does this PR do? 2 | 3 | #### Where should the reviewer start? 4 | 5 | #### How should this be manually tested? 6 | 7 | #### Any background context you want to provide? 8 | 9 | #### What picture best describes this PR (optional but encouraged)? 10 | 11 | #### What are the relevant Github Issues? 12 | 13 | #### Developer Done List 14 | 15 | - [ ] Tests Added/Updated 16 | - [ ] Updated README.md 17 | - [ ] Verified backward compatible 18 | - [ ] Verified database migrations will not be catastrophic 19 | - [ ] Considered Security, Availability and Confidentiality 20 | 21 | #### For the Reviewer: 22 | 23 | #### By approving this PR, the reviewer acknowledges that they have checked all items in this done list. 24 | 25 | #### Reviewer/Approval Done List 26 | 27 | - [ ] Tests Pass Locally 28 | - [ ] CI Build Passes 29 | - [ ] Verified README.md is updated 30 | - [ ] Verified changes are backward compatible 31 | - [ ] Reviewed impact to Security, Availability and Confidentiality (if issue found, add comments and request changes) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # metrics-agent 2 | 3 | The metrics-agent collects allocation metrics from a Kubernetes cluster system and sends the metrics to cloudability to help you gain visibility, reduce costs, and increase efficiency across your infrastructure. The agent is designed to run as a container in each cluster inside your orchestration system. 4 | 5 | [![Actions Status](https://github.com/cloudability/metrics-agent/workflows/Master/badge.svg)](https://github.com/cloudability/metrics-agent/actions) 6 | [![Actions Status](https://github.com/cloudability/metrics-agent/workflows/Metrics-Agent/badge.svg)](https://github.com/cloudability/metrics-agent/actions) 7 | [![Go Report Card](https://goreportcard.com/badge/github.com/cloudability/metrics-agent)](https://goreportcard.com/report/github.com/cloudability/metrics-agent) 8 | 9 | ## Kubernetes 10 | 11 | By default, the agent runs in a namespace named "cloudability" (see options below). Once deployed, the agent will pull metrics from the Kubernetes API and directly from each node in the cluster it is running in. An example kubernetes deployment can be found [here](deploy/kubernetes/cloudability-metrics-agent.yaml). 12 | 13 | Every 10 minutes the metrics agent creates a tarball of the gathered metrics and uploads to an Amazon Web Service S3 bucket. This process requires outbound connections to https://metrics-collector.cloudability.com/, to obtain a pre-signed URL, and https://apptio*.s3.amazonaws.com/ to upload the data. If the metrics agent is deployed behind a firewall, these addresses should be added to the outbound allow list. 14 | 15 | ## Supported Versions 16 | 17 | ### Kubernetes Versions 18 | 19 | Kubernetes versions 1.32 and below are supported by the metrics agent on AWS cloud service (EKS), Google Cloud Platform (GKE), Azure cloud services (AKS), and Oracle Cloud (OKE). 20 | 21 | ### OpenShift Versions 22 | 23 | OpenShift versions 4.17 to 4.12 are supported by the metrics agent on ROSA. 24 | 25 | #### Architectures 26 | 27 | On AWS, both AMD64 and ARM architectures are supported. 28 | 29 | ### Deploying with Helm 30 | 31 | Instructions for deploying the metrics-agent using Helm can be found [here](https://cloudability.github.io/metrics-agent/). For helm versioning this repository follows the [simple 1-1 versioning](https://codefresh.io/docs/docs/new-helm/helm-best-practices/#simple-1-1-versioning) strategy where the chart version is in sync with the actual application. 32 | 33 | ### Unsupported Configurations 34 | 35 | Cloudability Metrics Agent currently does not support Rancher or On Prem clusters. 36 | 37 | ### Configuration Options 38 | 39 | | Environment Variable | Description | 40 | |------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:| 41 | | CLOUDABILITY_API_KEY | Cloudability api key. Not recommended to store as environment variable, instead use CLOUDABILITY_API_KEY_FILEPATH | 42 | | CLOUDABILITY_API_KEY_FILEPATH | Path to the file where the api key is stored, ex: /etc/secrets/CLOUDABILITY_API_KEY | 43 | | CLOUDABILITY_CLUSTER_NAME | Required: The cluster name to be used for the cluster the agent is running in. Cannot be exclusively whitespace. | 44 | | CLOUDABILITY_POLL_INTERVAL | Optional: The interval (Seconds) to poll metrics. Default: 180 | 45 | | CLOUDABILITY_OUTBOUND_PROXY | Optional: The URL of an outbound HTTP/HTTPS proxy for the agent to use (eg: http://x.x.x.x:8080). The URL must contain the scheme prefix (http:// or https://) | 46 | | CLOUDABILITY_OUTBOUND_PROXY_AUTH | Optional: Basic Authentication credentials to be used with the defined outbound proxy. If your outbound proxy requires basic authentication credentials can be defined in the form username:password | 47 | | CLOUDABILITY_OUTBOUND_PROXY_INSECURE | Optional: When true, does not verify TLS certificates when using the outbound proxy. Default: False | 48 | | CLOUDABILITY_INSECURE | Optional: When true, does not verify certificates when making TLS connections. Default: False | 49 | | CLOUDABILITY_FORCE_KUBE_PROXY | Optional: When true, forces agent to use the proxy to connect to nodes rather than attempting a direct connection. Default: False | 50 | | CLOUDABILITY_COLLECTION_RETRY_LIMIT | Optional: Number of times agent should attempt to gather metrics from each source upon a failure Default: 1 | 51 | | CLOUDABILITY_NAMESPACE | Optional: Override the namespace that the agent runs in. It is not recommended to change this as it may negatively affect the agents ability to collect data. Default: `cloudability` | 52 | | CLOUDABILITY_LOG_FORMAT | Optional: Format for log output (JSON,PLAIN) Default: PLAIN | 53 | | CLOUDABILITY_LOG_LEVEL | Optional: Log level to run the agent at (INFO,WARN,DEBUG,TRACE). Default: `INFO` | 54 | | CLOUDABILITY_SCRATCH_DIR | Optional: Temporary directory that metrics will be written to. If set, must assure that the directory exists and that the user agent UID 10000 has read/write access to the folder. Default: `/tmp` | 55 | | CLOUDABILITY_NUMBER_OF_CONCURRENT_NODE_POLLERS | Optional: Number of goroutines that are created to poll node metrics in parallel. Default: `100` | 56 | | CLOUDABILITY_INFORMER_RESYNC_INTERVAL | Optional: Period of time (in hours) that the informers will fully resync the list of running resources. Default: 24 hours. Can be set to 0 to never resync | 57 | | CLOUDABILITY_PARSE_METRIC_DATA | Optional: When true, core files will be parsed and non-relevant data will be removed prior to upload. Default: `false` | 58 | | CLOUDABILITY_HTTPS_CLIENT_TIMEOUT | Optional: Amount (in seconds) of time the http client has before timing out requests. Might need to be increased to clusters with large payloads. Default: `60` | 59 | | CLOUDABILITY_UPLOAD_REGION | Optional: The region the metrics-agent will upload data to. Default `us-west-2`. Supported values: `us-west-2`, `eu-central-1`, `ap-southeast-2`, `me-central-1`, `us-gov-west-1` | 60 | 61 | ```sh 62 | 63 | metrics-agent kubernetes --help 64 | Command to collect Kubernetes Metrics 65 | 66 | Usage: 67 | metrics-agent kubernetes [flags] 68 | 69 | Flags: 70 | --api_key string Cloudability api key. Not recommended to store as environment variable, instead use CLOUDABILITY_API_KEY_FILEPATH 71 | --api_key_filepath Path to the file where the api key is stored, ex: /etc/secrets/CLOUDABILITY_API_KEY 72 | --certificate_file string The path to a certificate file. - Optional 73 | --cluster_name string Kubernetes Cluster Name - required this must be unique to every cluster. 74 | --collection_retry_limit uint Number of times agent should attempt to gather metrics from each source upon a failure (default 1) 75 | -h, --help help for kubernetes 76 | --insecure When true, does not verify certificates when making TLS connections. Default: False 77 | --key_file string The path to a key file. - Optional 78 | --outbound_proxy string Outbound HTTP/HTTPS proxy eg: http://x.x.x.x:8080. Must have a scheme prefix (http:// or https://) - Optional 79 | --outbound_proxy_auth string Outbound proxy basic authentication credentials. Must defined in the form username:password - Optional 80 | --outbound_proxy_insecure When true, does not verify TLS certificates when using the outbound proxy. Default: False 81 | 82 | --force_kube_proxy When true, forces agent to use the proxy to connect to nodes rather than attempting a direct connection. Default: False 83 | --poll_interval int Time, in seconds, to poll the services infrastructure. Default: 180 (default 180) 84 | --namespace string The namespace which the agent runs in. Changing this is not recommended. (default `cloudability`) 85 | --informer_resync_interval int The amount of time, in hours, between informer resyncs. (default 24) 86 | --number_of_concurrent_node_pollers int The number of goroutines that are created to poll node metrics in parallel. (default `100`) 87 | --parse_metric_data bool When true, core files will be parsed and non-relevant data will be removed prior to upload. (default `false`) 88 | --https_client_timeout int Amount (in seconds) of time the https client has before timing out requests. (default `60`) 89 | --upload_region The region the metrics-agent will upload data to. (default `us-west-2`) 90 | Global Flags: 91 | --log_format string Format for log output (JSON,PLAIN) (default "PLAIN") 92 | --log_level string Log level to run the agent at (INFO,WARN,DEBUG) (default "INFO") 93 | ``` 94 | 95 | ## Deployment of Metrics-Agent 96 | 97 | There are two ways to deploy Metrics-agent: 98 | 99 | ### Deployment using yaml 100 | 101 | Cloudability customers can download the deployment yaml directly from Cloudability UI. The downloaded yaml contains default settings including the API key needed to enable the metrics-agent to upload metrics to Cloudability. The customer should change the default settings in the yaml according to their clusters' configuration and security requirements. 102 | 103 | The API key is currently supported as an environment variable in the pod as plain text. However, it is highly recommended to update the agent version to >2.13.0 and to pull from a mounted volume using CLOUDABILITY_API_KEY_FILEPATH. Customers can also use various CSP's secret manager such as AWS secret manager, GCP secret manager, etc to then mount on the agent. Please refer to Kubernetes and CSP document for such integration. 104 | 105 | Note: If the metrics-agent was deployed using the older template, ensure you provision the new YAML from either Helm >2.13.0 or the UI that creates a Kubernetes secret and mounts the correct volume before updating to use CLOUDABILITY_API_KEY_FILEPATH. 106 | 107 | ### Deployment using helm 108 | 109 | Cloudability customers can use helm to deploy the metrics-agent. Please refer to the [Helm Installation Guide](https://cloudability.github.io/metrics-agent/). 110 | 111 | ## Computing Resources for Metrics Agent 112 | 113 | The following recommendation is based on number of nodes in the cluster. It's for references only. The actual required resources depends on a number of factors such as number of nodes, pods, workload, etc. Please adjust the resources depending on your actual usage. By default, the helm installation and manifest file configures the first row (nodes < 100) from the reference table. 114 | 115 | | Number of Nodes | CPU Request | CPU Limit | Mem Request | Mem Limit | 116 | | --- | --- | --- | --- | --- | 117 | | < 100 | 500m | 1000m | 2GBi | 4GBi | 118 | | 100-200 | 1000m | 1500m | 4GBi | 8GBi | 119 | | 200-500 | 1500m | 2000m | 8GBi | 16GBi | 120 | | 500-1000 | 2000m | 3000m | 16GBi | 24GBi | 121 | | 1000+ | 3000m | | 24GBi | | 122 | 123 | ## Networking Requirement for Metrics Agent 124 | The container that hosts the metrics agent should allow HTTPS requests to following endpoints: 125 | - metrics-collector.cloudability.com port 443 126 | 127 | The container that hosts the metrics agent should have write access to following Apptio S3 buckets: 128 | - apptio* (bucket prefixed with apptio) 129 | 130 | ## Development 131 | 132 | ### Dependency management 133 | 134 | We're using [go modules](https://github.com/golang/go/wiki/Modules) for Go dependencies. 135 | 136 | ### Source Code Analysis 137 | 138 | We're using [golangci-lint](https://github.com/golangci/golangci-lint) for static source code analysis. 139 | 140 | ## Contributing code 141 | 142 | You'll find information and help on how to contribute code in 143 | [the CONTRIBUTING document](CONTRIBUTING.md) in this repo. 144 | 145 | 146 | ### To Run Locally 147 | 148 | You must obtain a valid API Key and export it locally as an environment variable. 149 | 150 | ```sh 151 | export CLOUDABILITY_API_KEY={your_api_key} 152 | make deploy-local 153 | ``` 154 | 155 | ## Local Development 156 | 157 | The makefile target _deploy-local_ assumes that you have [docker](https://www.docker.com/community-edition) and kubernetes (with a context: docker-for-desktop) running locally. The target does the following: 158 | 159 | - Builds a container with the local project codebase 160 | - Locally creates a deployment / pod with the local metrics agent container 161 | 162 | ### Testing 163 | In addition to running all go tests via the make step `make test`, `make test-e2e-all` runs end to end tests by spinning up a [kind](https://github.com/kubernetes-sigs/kind) cluster, building the metrics agent, deploying it to the reference clusters, then testing the collected data. The use of kind requires a local docker daemon to be running. 164 | -------------------------------------------------------------------------------- /charts/metrics-agent/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: metrics-agent 3 | description: A Helm chart for Kubernetes 4 | 5 | # A chart can be either an 'application' or a 'library' chart. 6 | # 7 | # Application charts are a collection of templates that can be packaged into versioned archives 8 | # to be deployed. 9 | # 10 | # Library charts provide useful utilities or functions for the chart developer. They're included as 11 | # a dependency of application charts to inject those utilities and functions into the rendering 12 | # pipeline. Library charts do not define any templates and therefore cannot be deployed. 13 | type: application 14 | 15 | # This is the chart version. This version number should be incremented each time you make changes 16 | # to the chart and its templates, including the app version. 17 | version: 2.13.0 18 | 19 | # This is the version number of the application being deployed. This version number should be 20 | # incremented each time you make changes to the application. 21 | appVersion: 2.13.0 -------------------------------------------------------------------------------- /charts/metrics-agent/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "metrics-agent.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "metrics-agent.fullname" -}} 15 | {{- if .Values.fullnameOverride -}} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} 17 | {{- else -}} 18 | {{- $name := default .Chart.Name .Values.nameOverride -}} 19 | {{- if contains $name .Release.Name -}} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 21 | {{- else -}} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 23 | {{- end -}} 24 | {{- end -}} 25 | {{- end -}} 26 | 27 | {{/* 28 | Create chart name and version as used by the chart label. 29 | */}} 30 | {{- define "metrics-agent.chart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} 32 | {{- end -}} 33 | 34 | {{/* 35 | Common labels 36 | */}} 37 | {{- define "metrics-agent.labels" -}} 38 | helm.sh/chart: {{ include "metrics-agent.chart" . }} 39 | {{ include "metrics-agent.selectorLabels" . }} 40 | {{- if .Chart.AppVersion }} 41 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 42 | {{- end }} 43 | app.kubernetes.io/managed-by: {{ .Release.Service }} 44 | {{- if .Values.additionalLabels }} 45 | {{ toYaml .Values.additionalLabels }} 46 | {{- end }} 47 | {{- end -}} 48 | 49 | {{/* 50 | Selector labels 51 | */}} 52 | {{- define "metrics-agent.selectorLabels" -}} 53 | app.kubernetes.io/name: {{ include "metrics-agent.name" . }} 54 | app.kubernetes.io/instance: {{ .Release.Name }} 55 | {{- end -}} 56 | 57 | {{/* 58 | Create the name of the service account to use 59 | */}} 60 | {{- define "metrics-agent.serviceAccountName" -}} 61 | {{- if .Values.serviceAccount.create -}} 62 | {{ default (include "metrics-agent.fullname" .) .Values.serviceAccount.name }} 63 | {{- else -}} 64 | {{ default "default" .Values.serviceAccount.name }} 65 | {{- end -}} 66 | {{- end -}} 67 | -------------------------------------------------------------------------------- /charts/metrics-agent/templates/clusterrole.yaml: -------------------------------------------------------------------------------- 1 | {{- if and .Values.serviceAccount.create .Values.rbac.create }} 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | name: {{ include "metrics-agent.fullname" . }} 6 | labels: 7 | {{- include "metrics-agent.labels" . | nindent 4 }} 8 | rules: 9 | - apiGroups: 10 | - "" 11 | - "extensions" 12 | - "apps" 13 | - "batch" 14 | resources: 15 | - "namespaces" 16 | - "replicationcontrollers" 17 | - "services" 18 | - "nodes" 19 | - "nodes/spec" 20 | - "pods" 21 | - "jobs" 22 | - "cronjobs" 23 | - "persistentvolumes" 24 | - "persistentvolumeclaims" 25 | - "deployments" 26 | - "replicasets" 27 | - "daemonsets" 28 | verbs: 29 | - "get" 30 | - "watch" 31 | - "list" 32 | - apiGroups: [""] 33 | resources: 34 | - "services/proxy" 35 | - "pods/proxy" 36 | - "nodes/proxy" 37 | - "nodes/stats" 38 | verbs: 39 | - "get" 40 | - "list" 41 | {{- end }} -------------------------------------------------------------------------------- /charts/metrics-agent/templates/clusterrolebinding.yaml: -------------------------------------------------------------------------------- 1 | {{- if and .Values.serviceAccount.create .Values.rbac.create }} 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRoleBinding 4 | metadata: 5 | name: {{ include "metrics-agent.fullname" . }} 6 | labels: 7 | {{- include "metrics-agent.labels" . | nindent 4 }} 8 | roleRef: 9 | kind: ClusterRole 10 | name: {{ include "metrics-agent.fullname" . }} 11 | apiGroup: rbac.authorization.k8s.io 12 | subjects: 13 | - kind: ServiceAccount 14 | name: {{ include "metrics-agent.serviceAccountName" . }} 15 | namespace: {{ .Release.Namespace }} 16 | {{- end }} -------------------------------------------------------------------------------- /charts/metrics-agent/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: {{ include "metrics-agent.fullname" . }} 5 | labels: 6 | {{- include "metrics-agent.labels" . | nindent 4 }} 7 | spec: 8 | replicas: 1 9 | selector: 10 | matchLabels: 11 | {{- include "metrics-agent.selectorLabels" . | nindent 6 }} 12 | template: 13 | metadata: 14 | labels: 15 | {{- include "metrics-agent.labels" . | nindent 8 }} 16 | {{- if .Values.podLabels }}{{ toYaml .Values.podLabels | nindent 8 }}{{- end }} 17 | annotations: 18 | {{- if .Values.podAnnotations }}{{ toYaml .Values.podAnnotations | nindent 8 }}{{- end }} 19 | spec: 20 | {{- with .Values.imagePullSecrets }} 21 | imagePullSecrets: 22 | {{- toYaml . | nindent 8 }} 23 | {{- end }} 24 | serviceAccountName: {{ include "metrics-agent.serviceAccountName" . }} 25 | securityContext: {{- if not .Values.openShift }} {{- toYaml .Values.securityContext | nindent 8 }} {{- end }} 26 | containers: 27 | - name: {{ .Chart.Name }} 28 | image: "{{ .Values.image.name }}:{{ .Values.image.tag }}" 29 | securityContext: 30 | runAsUser: {{- if not .Values.openShift }} {{ .Values.securityContext.runAsUser }} {{- end }} 31 | runAsNonRoot: true 32 | allowPrivilegeEscalation: {{ .Values.allowPrivilegeEscalation }} 33 | seccompProfile: 34 | type: {{ .Values.seccompProfile.type }} 35 | capabilities: 36 | drop: 37 | {{ .Values.drop }} 38 | readOnlyRootFilesystem: {{ .Values.readOnlyRootFilesystem }} 39 | imagePullPolicy: {{ .Values.image.pullPolicy }} 40 | args: 41 | - 'kubernetes' 42 | env: 43 | - name: CLOUDABILITY_CLUSTER_NAME 44 | value: {{ .Values.clusterName }} 45 | - name: CLOUDABILITY_POLL_INTERVAL 46 | value: {{ .Values.pollInterval | quote }} 47 | - name: CLOUDABILITY_UPLOAD_REGION 48 | value: {{ .Values.uploadRegion }} 49 | - name: CLOUDABILITY_API_KEY_FILEPATH 50 | value: {{ .Values.pathToApiKey -}}/{{ .Values.fileNameWithApiKey }} 51 | {{- if .Values.extraEnv }} 52 | {{- toYaml .Values.extraEnv | nindent 12 }} 53 | {{- end }} 54 | resources: 55 | {{- toYaml .Values.resources | nindent 12 }} 56 | livenessProbe: 57 | {{- toYaml .Values.livenessProbe | nindent 12 }} 58 | volumeMounts: 59 | - name: api-key-volume 60 | mountPath: {{ .Values.pathToApiKey }} 61 | readOnly: true 62 | {{- if .Values.volumeMounts }} 63 | {{- toYaml .Values.volumeMounts | nindent 12 }} 64 | {{- end }} 65 | volumes: 66 | - name: api-key-volume 67 | secret: 68 | secretName: {{ include "metrics-agent.fullname" . }} 69 | {{- if .Values.volumes }} 70 | {{- toYaml .Values.volumes | nindent 8 }} 71 | {{- end }} 72 | {{- with .Values.nodeSelector }} 73 | nodeSelector: 74 | {{- toYaml . | nindent 8 }} 75 | {{- end }} 76 | {{- with .Values.affinity }} 77 | affinity: 78 | {{- toYaml . | nindent 8 }} 79 | {{- end }} 80 | {{- with .Values.tolerations }} 81 | tolerations: 82 | {{- toYaml . | nindent 8 }} 83 | {{- end }} 84 | -------------------------------------------------------------------------------- /charts/metrics-agent/templates/extra-objects.yaml: -------------------------------------------------------------------------------- 1 | {{ range .Values.extraObjects }} 2 | --- 3 | {{ tpl (toYaml .) $ }} 4 | {{ end }} -------------------------------------------------------------------------------- /charts/metrics-agent/templates/role.yaml: -------------------------------------------------------------------------------- 1 | {{- if and .Values.serviceAccount.create .Values.rbac.create }} 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: Role 4 | metadata: 5 | name: {{ include "metrics-agent.fullname" . }} 6 | labels: 7 | {{- include "metrics-agent.labels" . | nindent 4 }} 8 | rules: 9 | - apiGroups: ["*"] 10 | resources: 11 | - "pods" 12 | - "pods/log" 13 | verbs: 14 | - "get" 15 | - "list" 16 | {{- end }} -------------------------------------------------------------------------------- /charts/metrics-agent/templates/rolebinding.yaml: -------------------------------------------------------------------------------- 1 | {{- if and .Values.serviceAccount.create .Values.rbac.create }} 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: RoleBinding 4 | metadata: 5 | name: {{ include "metrics-agent.fullname" . }} 6 | labels: 7 | {{- include "metrics-agent.labels" . | nindent 4 }} 8 | roleRef: 9 | kind: Role 10 | name: {{ include "metrics-agent.fullname" . }} 11 | apiGroup: rbac.authorization.k8s.io 12 | subjects: 13 | - kind: ServiceAccount 14 | name: {{ include "metrics-agent.serviceAccountName" . }} 15 | namespace: {{ .Release.Namespace }} 16 | {{- end }} -------------------------------------------------------------------------------- /charts/metrics-agent/templates/secret.yaml: -------------------------------------------------------------------------------- 1 | {{- if not .Values.secretName }} 2 | apiVersion: v1 3 | kind: Secret 4 | metadata: 5 | name: {{ include "metrics-agent.fullname" . }} 6 | labels: 7 | {{- include "metrics-agent.labels" . | nindent 4 }} 8 | type: Opaque 9 | data: 10 | CLOUDABILITY_API_KEY: {{ .Values.apiKey | b64enc | quote }} 11 | {{- end }} -------------------------------------------------------------------------------- /charts/metrics-agent/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceAccount.create -}} 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: {{ include "metrics-agent.serviceAccountName" . }} 6 | labels: 7 | {{- include "metrics-agent.labels" . | nindent 4 }} 8 | {{- end -}} 9 | -------------------------------------------------------------------------------- /charts/metrics-agent/values.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Default values for metrics-agent. 3 | # This is a YAML-formatted file. 4 | # Declare variables to be passed into your templates. 5 | 6 | nameOverride: "" 7 | fullnameOverride: "" 8 | 9 | # apiKey or secretName is REQUIRED. 10 | # You must generate a template to get your apiKey. 11 | # In the Cloudability app, go to Insights -> Containers, then click 12 | # the provisioning cluster button that will take you through our provisioning workflow. 13 | apiKey: "" 14 | # configurable path to the location where the CLOUDABILITY_API_KEY is stored on the container 15 | pathToApiKey: "/etc/secrets" 16 | # configurable fileName to the file where the api key is stored 17 | fileNameWithApiKey: "CLOUDABILITY_API_KEY" 18 | 19 | # You may also store the apikey in a secret and pull the apikey from there as well. 20 | # name of the secret already stored in k8s containing CLOUDABILITY_API_KEY. 21 | secretName: "" 22 | 23 | # clusterName is REQUIRED. 24 | # The cluster name to be used for the cluster the agent runs in. 25 | clusterName: "" 26 | # The interval in seconds to poll metrics. 27 | pollInterval: 180 28 | # The region to upload metrics data to. See README for accepted values 29 | uploadRegion: "us-west-2" 30 | 31 | image: 32 | name: cloudability/metrics-agent 33 | tag: 2.13.0 34 | pullPolicy: Always 35 | 36 | imagePullSecrets: [] 37 | 38 | resources: 39 | requests: 40 | cpu: ".5" 41 | memory: "2Gi" 42 | limits: 43 | cpu: "1.0" 44 | memory: "4Gi" 45 | 46 | livenessProbe: 47 | exec: 48 | command: 49 | - touch 50 | - tmp/healthy 51 | initialDelaySeconds: 120 52 | periodSeconds: 600 53 | timeoutSeconds: 5 54 | 55 | # serviceAccount.create: true is required 56 | serviceAccount: 57 | create: true 58 | 59 | # rbac.create: true is required 60 | rbac: 61 | create: true 62 | 63 | # For agent configuration options, see https://github.com/cloudability/metrics-agent/blob/master/README.md 64 | extraEnv: [] 65 | 66 | securityContext: 67 | runAsUser: 10000 68 | 69 | nodeSelector: {} 70 | 71 | tolerations: [] 72 | 73 | affinity: {} 74 | 75 | allowPrivilegeEscalation: false 76 | 77 | openShift: false 78 | 79 | seccompProfile: 80 | type: RuntimeDefault 81 | 82 | volumes: [] 83 | # - name: foo 84 | # secret: 85 | # secretName: mysecret 86 | # optional: false 87 | 88 | volumeMounts: [] 89 | # - name: foo 90 | # mountPath: "/etc/foo" 91 | # readOnly: true 92 | 93 | # Extra labels to add to the pod only. 94 | podLabels: {} 95 | 96 | # Extra labels to add to all resources, including pods. 97 | additionalLabels: {} 98 | 99 | # Extra annotations to add to the pod only. 100 | podAnnotations: {} 101 | 102 | # when setting readOnlyRootFilesystem to true, the proper volume/volumeMount must be 103 | # configured correctly otherwise the agent will crash 104 | # the following volumes: and volumeMounts: sections below 105 | # will allow the agent to run successfully with readOnlyRootFilesystem set to true 106 | #volumeMounts: 107 | # - mountPath: /tmp 108 | # name: tmp 109 | # 110 | #volumes: 111 | # - emptyDir: {} 112 | # name: tmp 113 | readOnlyRootFilesystem: false 114 | 115 | drop: 116 | - ALL 117 | 118 | # Extra K8s manifests to deploy, 119 | # NOTE: not all various extraObject deployment configurations are tested/supported. When adding extra resources 120 | # to the metrics-agent deployment, Cloudability may not be able to assist in deployment troubleshooting 121 | extraObjects: [] 122 | # Example extra manifest 123 | # - apiVersion: external-secrets.io/v1beta1 124 | # kind: SecretStore 125 | # metadata: 126 | # name: aws-store-xxxxx 127 | # annotations: 128 | # argocd.argoproj.io/sync-options: SkipDryRunOnMissingResource=true 129 | # argocd.argoproj.io/sync-wave: "99" 130 | # spec: 131 | # provider: 132 | # aws: 133 | # service: SecretsManager 134 | # region: us-west-2 135 | -------------------------------------------------------------------------------- /client/client.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import ( 4 | "bytes" 5 | "crypto/tls" 6 | "encoding/base64" 7 | "encoding/json" 8 | "errors" 9 | "fmt" 10 | "io" 11 | "math" 12 | "net" 13 | "net/http" 14 | "net/http/httputil" 15 | "net/url" 16 | "os" 17 | "regexp" 18 | "strconv" 19 | "strings" 20 | "time" 21 | 22 | log "github.com/sirupsen/logrus" 23 | 24 | "crypto/md5" //nolint gosec 25 | 26 | "github.com/cloudability/metrics-agent/measurement" 27 | "github.com/cloudability/metrics-agent/util" 28 | "github.com/cloudability/metrics-agent/version" 29 | ) 30 | 31 | //nolint gosec 32 | 33 | const DefaultBaseURL string = "https://metrics-collector.cloudability.com/metricsample" 34 | const EUBaseURL string = "https://metrics-collector-eu.cloudability.com/metricsample" 35 | const AUBaseURL string = "https://metrics-collector-au.cloudability.com/metricsample" 36 | const MEBaseURL string = "https://metrics-collector-me.cloudability.com/metricsample" 37 | const GovBaseURL string = "https://metrics-collector-production-gov.cloudability.com/metricsample" 38 | const StagingBaseURL string = "https://metrics-collector-staging.cloudability.com/metricsample" 39 | const defaultTimeout = 1 * time.Minute 40 | const defaultMaxRetries = 5 41 | 42 | const authHeader = "token" 43 | const apiKeyHeader = "x-api-key" 44 | const clusterUIDHeader = "x-cluster-uid" 45 | const agentVersionHeader = "x-agent-version" 46 | const contentTypeHeader = "Content-Type" 47 | const userAgentHeader = "User-Agent" 48 | const uploadFileHash = "x-upload-file" 49 | const contentMD5 = "Content-MD5" 50 | const proxyAuthHeader = "Proxy-Authorization" 51 | 52 | var /* const */ validToken = regexp.MustCompile(`^\w+$`) 53 | 54 | // Configuration represents configurable values for the Cloudability Client 55 | type Configuration struct { 56 | Timeout time.Duration 57 | Token string 58 | MaxRetries int 59 | BaseURL string 60 | ProxyURL url.URL 61 | ProxyAuth string 62 | ProxyInsecure bool 63 | Verbose bool 64 | Region string 65 | } 66 | 67 | // NewHTTPMetricClient will configure a new instance of a Cloudability client. 68 | func NewHTTPMetricClient(cfg Configuration) (MetricClient, error) { 69 | 70 | if !validToken.MatchString(cfg.Token) { 71 | return nil, errors.New("token format is invalid (only alphanumeric are allowed). Please check you " + 72 | "are using your Containers Insights API Key (not Frontdoor). This can be found in the YAML after " + 73 | "provisioning in the 'Insights -> Containers' UI under the CLOUDABILITY_API_KEY environment variable") 74 | } 75 | 76 | // Use defaults 77 | if cfg.Timeout.Seconds() <= 0 { 78 | if cfg.Verbose { 79 | log.Infof("Using default timeout of %v", defaultTimeout) 80 | } 81 | cfg.Timeout = defaultTimeout 82 | } 83 | if len(strings.TrimSpace(cfg.BaseURL)) == 0 { 84 | if cfg.Verbose { 85 | log.Infof("Using default baseURL of %v", DefaultBaseURL) 86 | } 87 | cfg.BaseURL = GetUploadURLByRegion(cfg.Region) 88 | } 89 | if cfg.MaxRetries <= 0 { 90 | if cfg.Verbose { 91 | log.Infof("Using default retries %v", defaultMaxRetries) 92 | } 93 | cfg.MaxRetries = defaultMaxRetries 94 | } 95 | 96 | netTransport := &http.Transport{ 97 | Dial: (&net.Dialer{Timeout: cfg.Timeout}).Dial, 98 | TLSHandshakeTimeout: cfg.Timeout, 99 | } 100 | 101 | // configure outbound proxy 102 | if len(cfg.ProxyURL.Host) > 0 { 103 | ConnectHeader := http.Header{} 104 | 105 | if cfg.ProxyAuth != "" { 106 | basicAuth := "Basic " + base64.StdEncoding.EncodeToString([]byte(cfg.ProxyAuth)) 107 | ConnectHeader.Add(proxyAuthHeader, basicAuth) 108 | } 109 | 110 | netTransport = &http.Transport{ 111 | Dial: (&net.Dialer{Timeout: cfg.Timeout}).Dial, 112 | Proxy: http.ProxyURL(&cfg.ProxyURL), 113 | ProxyConnectHeader: ConnectHeader, 114 | TLSHandshakeTimeout: cfg.Timeout, 115 | TLSClientConfig: &tls.Config{ 116 | //nolint gas 117 | InsecureSkipVerify: cfg.ProxyInsecure, 118 | }, 119 | } 120 | } 121 | 122 | httpClient := http.Client{ 123 | Timeout: cfg.Timeout, 124 | Transport: netTransport, 125 | } 126 | 127 | userAgent := fmt.Sprintf("cldy-client/%v", version.VERSION) 128 | 129 | return httpMetricClient{ 130 | httpClient: httpClient, 131 | userAgent: userAgent, 132 | baseURL: cfg.BaseURL, 133 | token: cfg.Token, 134 | verbose: cfg.Verbose, 135 | maxRetries: cfg.MaxRetries, 136 | }, nil 137 | 138 | } 139 | 140 | // MetricClient represents a interface to send a cloudability measurement or metrics sample to an endpoint. 141 | type MetricClient interface { 142 | SendMetricSample(*os.File, string, string) error 143 | GetUploadURL(*os.File, string, string, string, int) (string, string, error) 144 | } 145 | 146 | type httpMetricClient struct { 147 | httpClient http.Client 148 | userAgent string 149 | baseURL string 150 | token string 151 | verbose bool 152 | maxRetries int 153 | } 154 | 155 | // MetricSampleResponse represents the response from the uploadmetrics endpoint 156 | type MetricSampleResponse struct { 157 | Location string `json:"location"` 158 | } 159 | 160 | // SendMetricSample uploads a file at a given path to the metrics endpoint. 161 | func (c httpMetricClient) SendMetricSample(metricSampleFile *os.File, agentVersion string, UID string) (rerr error) { 162 | metricSampleURL := c.baseURL 163 | 164 | resp, err := c.retryWithBackoff(metricSampleURL, metricSampleFile, agentVersion, UID) 165 | if err != nil { 166 | return err 167 | } 168 | if resp == nil { 169 | return err 170 | } 171 | 172 | defer util.SafeClose(resp.Body.Close, &rerr) 173 | 174 | if resp.StatusCode != http.StatusOK { 175 | return fmt.Errorf("Request received %v response", resp.StatusCode) 176 | } 177 | 178 | return nil 179 | } 180 | 181 | func toJSONLines(measurements []measurement.Measurement) ([]byte, error) { 182 | output := []byte{} 183 | newline := "\n" 184 | for _, m := range measurements { 185 | b, err := json.Marshal(m) 186 | if err != nil { 187 | return nil, err 188 | } 189 | output = append(output, b...) 190 | output = append(output, newline...) 191 | } 192 | return output, nil 193 | } 194 | 195 | // nolint gocyclo 196 | func (c httpMetricClient) retryWithBackoff( 197 | metricSampleURL string, 198 | metricFile *os.File, 199 | agentVersion, 200 | UID string, 201 | ) (resp *http.Response, err error) { 202 | 203 | for i := 0; i < c.maxRetries; i++ { 204 | 205 | var uploadURL, hash string 206 | uploadURL, hash, err = c.GetUploadURL(metricFile, metricSampleURL, agentVersion, UID, i) 207 | if err != nil { 208 | log.Debugf("Client proxy or deployment YAML may be misconfigured. Please check your client settings.") 209 | log.Errorf("error encountered while retrieving upload location: %v", err) 210 | continue 211 | } 212 | 213 | var awsRequestID, statusMessage string 214 | var responseDump, requestDump []byte 215 | var dumpErr error 216 | resp, requestDump, err = c.buildAndDoRequest(metricFile, uploadURL, agentVersion, UID, hash) 217 | if resp != nil { 218 | awsRequestID = resp.Header.Get("X-Amz-Request-Id") 219 | statusMessage = resp.Status 220 | responseDump, dumpErr = httputil.DumpResponse(resp, true) 221 | if dumpErr != nil { 222 | log.Errorln(dumpErr) 223 | } 224 | } 225 | 226 | if err != nil && strings.Contains(err.Error(), "Client.Timeout exceeded") { 227 | time.Sleep(getSleepDuration(i)) 228 | log.Errorf("Put S3 Retry %d: Failed to put data to S3 due to request timeout, "+ 229 | "Status: %s X-Amzn-Requestid: %s", i, statusMessage, awsRequestID) 230 | log.Debugln(string(requestDump)) 231 | if resp != nil { 232 | log.Debugln(string(responseDump)) 233 | } 234 | continue 235 | } 236 | 237 | if resp == nil { 238 | log.Errorf("Put S3 Retry %d: Failed to put data to S3. Response is empty", i) 239 | log.Debugln(string(requestDump)) 240 | continue 241 | } 242 | 243 | buf := new(bytes.Buffer) 244 | _, err = buf.ReadFrom(resp.Body) 245 | if err != nil { 246 | continue 247 | } 248 | 249 | s := buf.String() 250 | 251 | if strings.Contains(s, "Incompatible agent version please upgrade") { 252 | panic("Incompatible agent version please upgrade") 253 | } 254 | if resp.StatusCode == http.StatusInternalServerError || resp.StatusCode == http.StatusForbidden { 255 | time.Sleep(getSleepDuration(i)) 256 | log.Errorf("Put S3 Retry %d: Failed to put data to S3, Status: %s X-Amzn-Requestid: %s", i, 257 | statusMessage, awsRequestID) 258 | log.Debugln(string(requestDump)) 259 | log.Debugln(string(responseDump)) 260 | continue 261 | } 262 | log.Infof("Put S3 Retry %d: Successfully put data to S3, X-Amzn-Requestid: %s", i, awsRequestID) 263 | break 264 | } 265 | 266 | return resp, err 267 | } 268 | 269 | // nolint:revive 270 | func (c httpMetricClient) buildAndDoRequest( 271 | metricFile *os.File, 272 | metricSampleURL, 273 | agentVersion, 274 | UID string, 275 | hash string, 276 | ) (resp *http.Response, requestDump []byte, err error) { 277 | 278 | var ( 279 | req *http.Request 280 | dumpErr error 281 | ) 282 | 283 | metricFile, err = os.Open(metricFile.Name()) 284 | if err != nil { 285 | log.Fatalf("Failed to open metric sample: %v", err) 286 | return nil, nil, err 287 | } 288 | 289 | fi, err := metricFile.Stat() 290 | if err != nil { 291 | return nil, nil, err 292 | } 293 | 294 | size := fi.Size() 295 | 296 | req, err = http.NewRequest(http.MethodPut, metricSampleURL, metricFile) 297 | if err != nil { 298 | return nil, nil, err 299 | } 300 | 301 | req.Header.Set(contentTypeHeader, "multipart/form-data") 302 | req.Header.Set(contentMD5, hash) 303 | req.ContentLength = size 304 | 305 | requestDump, dumpErr = httputil.DumpRequest(req, false) 306 | if dumpErr != nil { 307 | log.Errorln(dumpErr) 308 | } 309 | 310 | resp, respErr := c.httpClient.Do(req) 311 | 312 | return resp, requestDump, respErr 313 | } 314 | 315 | func getSleepDuration(tries int) time.Duration { 316 | seconds := int((0.5) * (math.Pow(2, float64(tries)) - 1)) 317 | return time.Duration(seconds) * time.Second 318 | } 319 | 320 | func (c httpMetricClient) GetUploadURL( 321 | metricFile *os.File, 322 | metricSampleURL, 323 | agentVersion, 324 | UID string, 325 | attempt int, 326 | ) (string, string, error) { 327 | var rerr error 328 | hash, err := GetB64MD5Hash(metricFile.Name()) 329 | if err != nil { 330 | log.Errorf("error encountered generating upload check sum: %v", err) 331 | return "", "", err 332 | } 333 | 334 | d := MetricSampleResponse{} 335 | 336 | req, err := http.NewRequest(http.MethodPost, metricSampleURL, nil) 337 | if err != nil { 338 | return "", "", err 339 | } 340 | 341 | req.Header.Set(contentTypeHeader, "application/json") 342 | req.Header.Set(authHeader, c.token) 343 | req.Header.Set(apiKeyHeader, c.token) 344 | req.Header.Set(userAgentHeader, c.userAgent) 345 | req.Header.Set(agentVersionHeader, agentVersion) 346 | req.Header.Set(clusterUIDHeader, UID) 347 | req.Header.Set(uploadFileHash, hash) 348 | 349 | if c.verbose { 350 | requestDump, requestErr := httputil.DumpRequest(req, true) 351 | if requestErr != nil { 352 | log.Errorln(requestErr) 353 | } 354 | log.Infoln(string(requestDump)) 355 | } 356 | 357 | var awsRequestID, statusMessage string 358 | var responseDump []byte 359 | var dumpErr error 360 | 361 | resp, err := c.httpClient.Do(req) 362 | if resp != nil { 363 | awsRequestID = resp.Header.Get("X-Amzn-Requestid") 364 | statusMessage = resp.Status 365 | responseDump, dumpErr = httputil.DumpResponse(resp, true) 366 | if dumpErr != nil { 367 | log.Errorln(dumpErr) 368 | } 369 | } 370 | if err != nil { 371 | log.Errorf("GetURL Retry %d: Failed to acquire s3 url, Status: %s X-Amzn-Requestid: %s", attempt, 372 | statusMessage, awsRequestID) 373 | if resp != nil { 374 | log.Debugln(string(responseDump)) 375 | } 376 | return "", "", fmt.Errorf("Unable to retrieve upload URI: %v", err) 377 | } 378 | 379 | defer util.SafeClose(resp.Body.Close, &rerr) 380 | 381 | if resp.StatusCode != 200 { 382 | log.Errorf("GetURL Retry %d: Failed to acquire s3 url, Status: %s X-Amzn-Requestid: %s", attempt, 383 | statusMessage, awsRequestID) 384 | log.Debugln(string(responseDump)) 385 | return "", d.Location, errors.New("Error retrieving upload URI: " + strconv.Itoa(resp.StatusCode)) 386 | } 387 | 388 | data, err := io.ReadAll(resp.Body) 389 | if err == nil && data != nil { 390 | err = json.Unmarshal(data, &d) 391 | } 392 | 393 | log.Infof("GetURL Retry %d: Successfully acquired s3 url, X-Amzn-Requestid: %s", attempt, awsRequestID) 394 | return d.Location, hash, err 395 | } 396 | 397 | // GetB64MD5Hash returns base64 encoded MD5 Hash 398 | func GetB64MD5Hash(name string) (b64Hash string, rerr error) { 399 | //nolint gosec 400 | f, err := os.Open(name) 401 | if err != nil { 402 | log.Fatal(err) 403 | } 404 | 405 | defer util.SafeClose(f.Close, &rerr) 406 | 407 | //nolint gas 408 | h := md5.New() 409 | 410 | if _, err := io.Copy(h, f); err != nil { 411 | log.Fatal(err) 412 | } 413 | 414 | return base64.StdEncoding.EncodeToString(h.Sum(nil)), err 415 | } 416 | 417 | // GetUploadURLByRegion returns the correct base url depending on the env variable CLOUDABILITY_UPLOAD_REGION. 418 | // If value is not supported, default to us-west-2 (original) URL 419 | func GetUploadURLByRegion(region string) string { 420 | switch region { 421 | case "eu-central-1": 422 | return EUBaseURL 423 | case "ap-southeast-2": 424 | return AUBaseURL 425 | case "me-central-1": 426 | return MEBaseURL 427 | case "us-west-2": 428 | return DefaultBaseURL 429 | case "us-gov-west-1": 430 | return GovBaseURL 431 | case "us-west-2-staging": 432 | return StagingBaseURL 433 | default: 434 | log.Warnf("Region %s is not supported. Defaulting to us-west-2 region.", region) 435 | return DefaultBaseURL 436 | } 437 | } 438 | -------------------------------------------------------------------------------- /client/export_test.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | var AuthHeader = authHeader 4 | var APIKeyHeader = apiKeyHeader 5 | var ClusterUIDHeader = clusterUIDHeader 6 | var AgentVersionHeader = agentVersionHeader 7 | var ContentTypeHeader = contentTypeHeader 8 | var UploadFileHash = uploadFileHash 9 | var ContentMD5 = contentMD5 10 | 11 | var ToJSONLines = toJSONLines 12 | -------------------------------------------------------------------------------- /client/testdata/random-test-data.txt: -------------------------------------------------------------------------------- 1 | l0YGtLJQzVuusys6wYd5qbTsttCCzSSjCUAENbBwpiiTAvaTAyFa1ZTa1L5SfdECKhuL8k1QOow3bFsbWBNlIgjpFbStNiAvi!mqcHOuHXS!C2rX8IMiNO6EwCQN9nOsSC96jFN4AIxc15DueAzKnEEeptvkSKaz9gbOft!ZN84BnX!nLHON9WMYvQ7wTTF1Gtv4N2fgeOXPq0CHeu1XFTF8QWGlKQK3PaozfYZRd07uDpMRvv3yjfOq!lEtJhEcup7lNRFGzHsj!CL26vrah!Z!88CNyETLhfjD2DuEFnsN5LNaQFMfVbkaVJ2zQt7vSdD9mtFFgUenTKkFMLPgWrQJT3YtwRpFoknLqIjfl1W2Z6Hy0QVYGL64DZDx5sj8XDtkat8fCXtoGTkPdTisOJl32b2PHHgnpDXlMzWd8aoZsxD1V5uWsymjY29Qg1Swp9UjsQU4lVjYoDGW!Gkq86Cdf6m9Jbun13jMzx7er5JeWD4V27cZn8qIouA7iZlsPHpWsXpejedi52cx04vif67802qbfoOMhCtv258HphXKKCGsfIMjtLuoZEVTnUawvkXYToznFNHMQbXqoA9i895NONzrgP2XzXTDdAuSlpSVKGqw8YHuBAwi4yQrH8xiKb4KzuproQHRXcncMrSXZPw7AhcKgxMZSbGyayDn0XjXqrDkJT69dljMOVeLdfsf0US1ZKYgpBQidQtm1gmrL0rjDZmNSMX8jUVEIxoUHTrhDjdgIjDV6LdzG5JoUjYvIRxMX1l2nXqgkEHAk1I8U!by6aQQYL3FhuvzaNTCapukLOoyU8A2CmmnuEUUzOc7OMcjvAMR6D3RnNpYDg9m7pbB1b6hEnBFeqECpYPBNHe1O5WRwN8VkfD!1IgxKX5sLzkE0ngz8Z7SH5ywcKG92jRK5fIp2reQBW6ogYx21gPIjK0OX8H!SGntlhkr1worzoIOGFHJb5aGdzAMSjidI!y9xS2CJlbXw88zrfAgK 2 | -------------------------------------------------------------------------------- /client/testdata/test-cluster-1510159016.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudability/metrics-agent/884572f14f0ba93748659865ec2c2c78946c09d9/client/testdata/test-cluster-1510159016.tgz -------------------------------------------------------------------------------- /cmd/kubernetesCmd.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/cloudability/metrics-agent/kubernetes" 5 | "github.com/cloudability/metrics-agent/util" 6 | 7 | "github.com/spf13/cobra" 8 | "github.com/spf13/viper" 9 | ) 10 | 11 | // nolint:revive 12 | var ( 13 | config kubernetes.KubeAgentConfig 14 | requiredArgs = []string{ 15 | "cluster_name", 16 | } 17 | kubernetesCmd = &cobra.Command{ 18 | Use: "kubernetes", 19 | Short: "Collect Kubernetes Metrics", 20 | Long: "Command to collect Kubernetes Metrics", 21 | PreRunE: func(cmd *cobra.Command, args []string) error { 22 | return util.CheckRequiredSettings(requiredArgs) 23 | }, 24 | Run: func(cmd *cobra.Command, args []string) { 25 | kubernetes.CollectKubeMetrics(config) 26 | }, 27 | } 28 | ) 29 | 30 | func init() { 31 | 32 | // add cobra and viper ENVs and flags 33 | kubernetesCmd.PersistentFlags().StringVar( 34 | &config.APIKey, 35 | "api_key", 36 | "", 37 | "Cloudability API Key - required if api key is not stored in volume mount", 38 | ) 39 | kubernetesCmd.PersistentFlags().StringVar( 40 | &config.ClusterName, 41 | "cluster_name", 42 | "", 43 | "Kubernetes Cluster Name - required this must be unique to every cluster.", 44 | ) 45 | kubernetesCmd.PersistentFlags().IntVar( 46 | &config.PollInterval, 47 | "poll_interval", 48 | 180, 49 | "Time, in seconds, to poll the services infrastructure.", 50 | ) 51 | kubernetesCmd.PersistentFlags().UintVar( 52 | &config.CollectionRetryLimit, 53 | "collection_retry_limit", 54 | kubernetes.DefaultCollectionRetry, 55 | "Number of times agent should attempt to gather metrics from each source upon a failure", 56 | ) 57 | kubernetesCmd.PersistentFlags().StringVar( 58 | &config.Cert, 59 | "certificate_file", 60 | "", 61 | "The path to a certificate file. - Optional", 62 | ) 63 | kubernetesCmd.PersistentFlags().StringVar( 64 | &config.Key, 65 | "key_file", 66 | "", 67 | "The path to a key file. - Optional", 68 | ) 69 | kubernetesCmd.PersistentFlags().StringVar( 70 | &config.OutboundProxy, 71 | "outbound_proxy", 72 | "", 73 | "Outbound HTTP/HTTPS proxy eg: http://x.x.x.x:8080. Must have a scheme prefix (http:// or https://) - Optional", 74 | ) 75 | kubernetesCmd.PersistentFlags().StringVar( 76 | &config.OutboundProxyAuth, 77 | "outbound_proxy_auth", 78 | "", 79 | "Outbound proxy basic authentication credentials. Must defined in the form username:password - Optional", 80 | ) 81 | kubernetesCmd.PersistentFlags().BoolVar( 82 | &config.OutboundProxyInsecure, 83 | "outbound_proxy_insecure", 84 | false, 85 | "When true, does not verify TLS certificates when using the outbound proxy. Default: False", 86 | ) 87 | kubernetesCmd.PersistentFlags().BoolVar( 88 | &config.Insecure, 89 | "insecure", 90 | false, 91 | "When true, does not verify certificates when making TLS connections. Default: False", 92 | ) 93 | kubernetesCmd.PersistentFlags().BoolVar( 94 | &config.ForceKubeProxy, 95 | "force_kube_proxy", 96 | false, 97 | "When true, disables direct node connection and forces proxy use.", 98 | ) 99 | kubernetesCmd.PersistentFlags().StringVar( 100 | &config.Namespace, 101 | "namespace", 102 | "cloudability", 103 | "Kubernetes Namespace that the Agent is Running In", 104 | ) 105 | kubernetesCmd.PersistentFlags().StringVar( 106 | &config.ScratchDir, 107 | "scratch_dir", 108 | "/tmp", 109 | "Directory metrics will be written to", 110 | ) 111 | kubernetesCmd.PersistentFlags().IntVar( 112 | &config.InformerResyncInterval, 113 | "informer_resync_interval", 114 | 24, 115 | "Time (in hours) between informer resync", 116 | ) 117 | kubernetesCmd.PersistentFlags().IntVar( 118 | &config.ConcurrentPollers, 119 | "number_of_concurrent_node_pollers", 120 | 100, 121 | "Number of concurrent goroutines created when polling node data. Default 100", 122 | ) 123 | kubernetesCmd.PersistentFlags().BoolVar( 124 | &config.ParseMetricData, 125 | "parse_metric_data", 126 | false, 127 | "When true, core files will be parsed and non-relevant data will be removed prior to upload. Default: False", 128 | ) 129 | kubernetesCmd.PersistentFlags().IntVar( 130 | &config.HTTPSTimeout, 131 | "https_client_timeout", 132 | 60, 133 | "Amount (in seconds) of time the https client has before timing out requests. Default 60", 134 | ) 135 | kubernetesCmd.PersistentFlags().StringVar( 136 | &config.UploadRegion, 137 | "upload_region", 138 | "us-west-2", 139 | "The region the metrics-agent will upload data to. Default us-west-2", 140 | ) 141 | kubernetesCmd.PersistentFlags().StringVar( 142 | &config.CustomS3UploadBucket, 143 | "custom_s3_bucket", 144 | "", 145 | "The S3 bucket the metrics-agent will upload data to. Default is an empty string which will not upload "+ 146 | "to custom s3 location", 147 | ) 148 | kubernetesCmd.PersistentFlags().StringVar( 149 | &config.CustomS3Region, 150 | "custom_s3_region", 151 | "", 152 | "The AWS region that the custom s3 bucket is in", 153 | ) 154 | kubernetesCmd.PersistentFlags().StringVar( 155 | &config.APIKeyFilepath, 156 | "api_key_filepath", 157 | "", 158 | "Recommended - The file path where the api key is stored", 159 | ) 160 | 161 | //nolint gas 162 | _ = viper.BindPFlag("api_key", kubernetesCmd.PersistentFlags().Lookup("api_key")) 163 | _ = viper.BindPFlag("cluster_name", kubernetesCmd.PersistentFlags().Lookup("cluster_name")) 164 | _ = viper.BindPFlag("heapster_override_url", kubernetesCmd.PersistentFlags().Lookup("heapster_override_url")) 165 | _ = viper.BindPFlag("poll_interval", kubernetesCmd.PersistentFlags().Lookup("poll_interval")) 166 | _ = viper.BindPFlag("collection_retry_limit", kubernetesCmd.PersistentFlags().Lookup("collection_retry_limit")) 167 | _ = viper.BindPFlag("certificate_file", kubernetesCmd.PersistentFlags().Lookup("certificate_file")) 168 | _ = viper.BindPFlag("key_file", kubernetesCmd.PersistentFlags().Lookup("key_file")) 169 | _ = viper.BindPFlag("outbound_proxy", kubernetesCmd.PersistentFlags().Lookup("outbound_proxy")) 170 | _ = viper.BindPFlag("outbound_proxy_auth", kubernetesCmd.PersistentFlags().Lookup("outbound_proxy_auth")) 171 | _ = viper.BindPFlag("outbound_proxy_insecure", kubernetesCmd.PersistentFlags().Lookup("outbound_proxy_insecure")) 172 | _ = viper.BindPFlag("insecure", kubernetesCmd.PersistentFlags().Lookup("insecure")) 173 | _ = viper.BindPFlag("retrieve_node_summaries", kubernetesCmd.PersistentFlags().Lookup("retrieve_node_summaries")) 174 | _ = viper.BindPFlag("get_all_container_stats", kubernetesCmd.PersistentFlags().Lookup("get_all_container_stats")) 175 | _ = viper.BindPFlag("force_kube_proxy", kubernetesCmd.PersistentFlags().Lookup("force_kube_proxy")) 176 | _ = viper.BindPFlag("namespace", kubernetesCmd.PersistentFlags().Lookup("namespace")) 177 | _ = viper.BindPFlag("collect_heapster_export", kubernetesCmd.PersistentFlags().Lookup("collect_heapster_export")) 178 | _ = viper.BindPFlag("scratch_dir", kubernetesCmd.PersistentFlags().Lookup("scratch_dir")) 179 | _ = viper.BindPFlag("informer_resync_interval", kubernetesCmd.PersistentFlags().Lookup("informer_resync_interval")) 180 | _ = viper.BindPFlag("number_of_concurrent_node_pollers", 181 | kubernetesCmd.PersistentFlags().Lookup("number_of_concurrent_node_pollers")) 182 | _ = viper.BindPFlag("parse_metric_data", kubernetesCmd.PersistentFlags().Lookup("parse_metric_data")) 183 | _ = viper.BindPFlag("https_client_timeout", kubernetesCmd.PersistentFlags().Lookup("https_client_timeout")) 184 | _ = viper.BindPFlag("upload_region", kubernetesCmd.PersistentFlags().Lookup("upload_region")) 185 | _ = viper.BindPFlag("custom_s3_bucket", kubernetesCmd.PersistentFlags().Lookup("custom_s3_bucket")) 186 | _ = viper.BindPFlag("custom_s3_region", kubernetesCmd.PersistentFlags().Lookup("custom_s3_region")) 187 | _ = viper.BindPFlag("api_key_filepath", kubernetesCmd.PersistentFlags().Lookup("api_key_filepath")) 188 | viper.SetEnvPrefix("cloudability") 189 | viper.AutomaticEnv() 190 | 191 | RootCmd.AddCommand(kubernetesCmd) 192 | 193 | config = kubernetes.KubeAgentConfig{ 194 | APIKey: viper.GetString("api_key"), 195 | ClusterName: viper.GetString("cluster_name"), 196 | PollInterval: viper.GetInt("poll_interval"), 197 | CollectionRetryLimit: viper.GetUint("collection_retry_limit"), 198 | OutboundProxy: viper.GetString("outbound_proxy"), 199 | OutboundProxyAuth: viper.GetString("outbound_proxy_auth"), 200 | OutboundProxyInsecure: viper.GetBool("outbound_proxy_insecure"), 201 | Insecure: viper.GetBool("insecure"), 202 | Cert: viper.GetString("certificate_file"), 203 | Key: viper.GetString("key_file"), 204 | ConcurrentPollers: viper.GetInt("number_of_concurrent_node_pollers"), 205 | ForceKubeProxy: viper.GetBool("force_kube_proxy"), 206 | Namespace: viper.GetString("namespace"), 207 | ScratchDir: viper.GetString("scratch_dir"), 208 | InformerResyncInterval: viper.GetInt("informer_resync_interval"), 209 | ParseMetricData: viper.GetBool("parse_metric_data"), 210 | HTTPSTimeout: viper.GetInt("https_client_timeout"), 211 | UploadRegion: viper.GetString("upload_region"), 212 | CustomS3UploadBucket: viper.GetString("custom_s3_bucket"), 213 | CustomS3Region: viper.GetString("custom_s3_region"), 214 | APIKeyFilepath: viper.GetString("api_key_filepath"), 215 | } 216 | 217 | } 218 | -------------------------------------------------------------------------------- /cmd/root.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | util "github.com/cloudability/metrics-agent/util" 5 | cldyVersion "github.com/cloudability/metrics-agent/version" 6 | log "github.com/sirupsen/logrus" 7 | "github.com/spf13/cobra" 8 | "github.com/spf13/viper" 9 | ) 10 | 11 | // RootCmd is the cobra root command to be executed 12 | // nolint:revive 13 | var RootCmd = &cobra.Command{ 14 | Use: "metrics-agent [command] [flags]", 15 | Short: "Starts the Cloudability Metrics Agent", 16 | Long: `Starts the Cloudability Metrics Agent for the configured metrics collectors and polling interval.`, 17 | Args: cobra.MinimumNArgs(1), 18 | TraverseChildren: true, 19 | PersistentPreRunE: func(cmd *cobra.Command, args []string) error { 20 | return util.SetupLogger() 21 | }, 22 | Run: func(cmd *cobra.Command, args []string) { 23 | }, 24 | } 25 | 26 | func init() { 27 | 28 | RootCmd.PersistentFlags().String( 29 | "log_level", 30 | "INFO", 31 | "Log level to run the agent at (INFO,WARN,DEBUG)", 32 | ) 33 | 34 | RootCmd.PersistentFlags().String( 35 | "log_format", 36 | "PLAIN", 37 | "Format for log output (JSON,TXT)", 38 | ) 39 | 40 | // set version flag 41 | RootCmd.Version = cldyVersion.VERSION 42 | 43 | //nolint gosec 44 | _ = viper.BindPFlag("log_level", RootCmd.PersistentFlags().Lookup("log_level")) 45 | _ = viper.BindPFlag("log_format", RootCmd.PersistentFlags().Lookup("log_format")) 46 | 47 | } 48 | 49 | // Execute metrics-agent with arguments 50 | func Execute() { 51 | err := RootCmd.Execute() 52 | if err != nil { 53 | log.Fatalln("Unable to execute :", err) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /deploy/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG golang_version 2 | 3 | FROM golang:${golang_version} as build-env 4 | ARG package 5 | ARG application 6 | 7 | 8 | WORKDIR /go/src/${package}/ 9 | 10 | # Build source code 11 | ENV CGO_ENABLED=0 12 | ENV GOOS=linux 13 | COPY . /go/src/${package} 14 | RUN go build 15 | 16 | FROM alpine:3 17 | ARG package 18 | ARG application 19 | 20 | 21 | # Allow delve to run on Alpine based containers. 22 | RUN apk --update upgrade && apk add ca-certificates && apk add curl 23 | 24 | # Remove unnecessary netcat tool 25 | RUN apk del netcat-openbsd && apk del netcat-openbsd-doc && rm /var/cache/apk/* && rm /usr/bin/nc 26 | 27 | RUN addgroup -g 10000 agent && \ 28 | adduser agent -S -u 10000 -s /bin/nologin -g metrics-agent -H -G agent 29 | 30 | WORKDIR / 31 | 32 | COPY --from=build-env /go/src/${package}/${application} /${application} 33 | 34 | USER 10000 35 | 36 | ENTRYPOINT ["/metrics-agent"] 37 | -------------------------------------------------------------------------------- /deploy/kubernetes/cloudability-metrics-agent.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: cloudability 5 | --- 6 | apiVersion: v1 7 | kind: ServiceAccount 8 | metadata: 9 | name: cloudability 10 | namespace: cloudability 11 | --- 12 | apiVersion: rbac.authorization.k8s.io/v1 13 | kind: ClusterRole 14 | metadata: 15 | name: metrics-agents 16 | namespace: kube-system 17 | rules: 18 | - apiGroups: 19 | - "" 20 | - "extensions" 21 | - "apps" 22 | - "batch" 23 | resources: 24 | - "namespaces" 25 | - "replicationcontrollers" 26 | - "services" 27 | - "nodes" 28 | - "nodes/spec" 29 | - "pods" 30 | - "jobs" 31 | - "cronjobs" 32 | - "persistentvolumes" 33 | - "persistentvolumeclaims" 34 | - "deployments" 35 | - "replicasets" 36 | - "daemonsets" 37 | verbs: 38 | - "get" 39 | - "watch" 40 | - "list" 41 | - apiGroups: [""] 42 | resources: 43 | - "services/proxy" 44 | - "pods/proxy" 45 | - "nodes/proxy" 46 | - "nodes/stats" 47 | - "nodes/metrics" 48 | verbs: 49 | - "get" 50 | - "list" 51 | --- 52 | apiVersion: rbac.authorization.k8s.io/v1 53 | kind: ClusterRoleBinding 54 | metadata: 55 | name: metrics-agents 56 | namespace: kube-system 57 | subjects: 58 | - kind: ServiceAccount 59 | name: cloudability 60 | namespace: cloudability 61 | roleRef: 62 | kind: ClusterRole 63 | name: metrics-agents 64 | apiGroup: rbac.authorization.k8s.io 65 | --- 66 | apiVersion: rbac.authorization.k8s.io/v1 67 | kind: Role 68 | metadata: 69 | name: metrics-agents 70 | namespace: cloudability 71 | rules: 72 | - apiGroups: ["*"] 73 | resources: 74 | - "pods" 75 | - "pods/log" 76 | verbs: 77 | - "get" 78 | - "list" 79 | --- 80 | apiVersion: rbac.authorization.k8s.io/v1 81 | kind: RoleBinding 82 | metadata: 83 | name: metrics-agents 84 | namespace: cloudability 85 | subjects: 86 | - kind: ServiceAccount 87 | name: cloudability 88 | namespace: cloudability 89 | roleRef: 90 | kind: Role 91 | name: metrics-agents 92 | apiGroup: rbac.authorization.k8s.io 93 | --- 94 | apiVersion: apps/v1 95 | kind: Deployment 96 | metadata: 97 | labels: 98 | name: "metrics-agent" 99 | name: "metrics-agent" 100 | namespace: cloudability 101 | spec: 102 | selector: 103 | matchLabels: 104 | app: metrics-agent 105 | replicas: 1 106 | template: 107 | metadata: 108 | labels: 109 | app: metrics-agent 110 | spec: 111 | serviceAccount: "cloudability" 112 | containers: 113 | - image: cloudability/metrics-agent:latest 114 | imagePullPolicy: Always 115 | securityContext: 116 | runAsUser: 10000 117 | runAsNonRoot: true 118 | capabilities: 119 | drop: 120 | - all 121 | allowPrivilegeEscalation: false 122 | resources: 123 | requests: 124 | memory: "2Gi" 125 | cpu: ".5" 126 | limits: 127 | memory: "4Gi" 128 | cpu: "1" 129 | livenessProbe: 130 | exec: 131 | command: 132 | - touch 133 | - tmp/healthy 134 | initialDelaySeconds: 120 135 | periodSeconds: 600 136 | timeoutSeconds: 5 137 | name: "metrics-agent" 138 | args: 139 | - 'kubernetes' 140 | env: 141 | - name: CLOUDABILITY_API_KEY 142 | value: "XXXXXXXXX" 143 | - name: CLOUDABILITY_CLUSTER_NAME 144 | value: "NNNNNNNNN" 145 | - name: CLOUDABILITY_POLL_INTERVAL 146 | value: "180" 147 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/cloudability/metrics-agent 2 | 3 | go 1.23.0 4 | 5 | require ( 6 | github.com/aws/aws-sdk-go v1.40.27 7 | github.com/google/cadvisor v0.48.1 8 | github.com/googleapis/gnostic v0.5.5 9 | github.com/onsi/ginkgo v1.16.5 10 | github.com/onsi/gomega v1.27.4 11 | github.com/prometheus/common v0.38.0 12 | github.com/prometheus/prom2json v1.3.0 13 | github.com/sirupsen/logrus v1.9.3 14 | github.com/spf13/cobra v1.6.0 15 | github.com/spf13/viper v1.13.0 16 | k8s.io/api v0.27.4 17 | k8s.io/apimachinery v0.27.4 18 | k8s.io/client-go v0.27.4 19 | k8s.io/kubelet v0.27.4 20 | sigs.k8s.io/kind v0.17.0 21 | ) 22 | 23 | require ( 24 | github.com/BurntSushi/toml v1.2.1 // indirect 25 | github.com/alessio/shellescape v1.4.1 // indirect 26 | github.com/davecgh/go-spew v1.1.1 // indirect 27 | github.com/emicklei/go-restful/v3 v3.10.0 // indirect 28 | github.com/evanphx/json-patch v4.12.0+incompatible // indirect 29 | github.com/evanphx/json-patch/v5 v5.6.0 // indirect 30 | github.com/fsnotify/fsnotify v1.5.4 // indirect 31 | github.com/go-logr/logr v1.2.3 // indirect 32 | github.com/go-openapi/jsonpointer v0.19.6 // indirect 33 | github.com/go-openapi/jsonreference v0.20.1 // indirect 34 | github.com/go-openapi/swag v0.22.3 // indirect 35 | github.com/gogo/protobuf v1.3.2 // indirect 36 | github.com/golang/protobuf v1.5.3 // indirect 37 | github.com/google/gnostic v0.6.9 // indirect 38 | github.com/google/go-cmp v0.6.0 // indirect 39 | github.com/google/gofuzz v1.2.0 // indirect 40 | github.com/google/safetext v0.0.0-20220905092116-b49f7bc46da2 // indirect 41 | github.com/google/uuid v1.3.0 // indirect 42 | github.com/hashicorp/hcl v1.0.0 // indirect 43 | github.com/imdario/mergo v0.3.13 // indirect 44 | github.com/inconshreveable/mousetrap v1.0.1 // indirect 45 | github.com/jmespath/go-jmespath v0.4.0 // indirect 46 | github.com/josharian/intern v1.0.0 // indirect 47 | github.com/json-iterator/go v1.1.12 // indirect 48 | github.com/magiconair/properties v1.8.6 // indirect 49 | github.com/mailru/easyjson v0.7.7 // indirect 50 | github.com/mattn/go-isatty v0.0.16 // indirect 51 | github.com/matttproud/golang_protobuf_extensions v1.0.2 // indirect 52 | github.com/mitchellh/mapstructure v1.5.0 // indirect 53 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 54 | github.com/modern-go/reflect2 v1.0.2 // indirect 55 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect 56 | github.com/nxadm/tail v1.4.8 // indirect 57 | github.com/pelletier/go-toml v1.9.5 // indirect 58 | github.com/pelletier/go-toml/v2 v2.0.5 // indirect 59 | github.com/pkg/errors v0.9.1 // indirect 60 | github.com/prometheus/client_model v0.3.0 // indirect 61 | github.com/spf13/afero v1.9.2 // indirect 62 | github.com/spf13/cast v1.5.0 // indirect 63 | github.com/spf13/jwalterweatherman v1.1.0 // indirect 64 | github.com/spf13/pflag v1.0.5 // indirect 65 | github.com/subosito/gotenv v1.4.1 // indirect 66 | golang.org/x/net v0.25.0 // indirect 67 | golang.org/x/oauth2 v0.7.0 // indirect 68 | golang.org/x/sys v0.30.0 // indirect 69 | golang.org/x/term v0.29.0 // indirect 70 | golang.org/x/text v0.22.0 // indirect 71 | golang.org/x/time v0.3.0 // indirect 72 | google.golang.org/appengine v1.6.7 // indirect 73 | google.golang.org/protobuf v1.33.0 // indirect 74 | gopkg.in/inf.v0 v0.9.1 // indirect 75 | gopkg.in/ini.v1 v1.67.0 // indirect 76 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect 77 | gopkg.in/yaml.v2 v2.4.0 // indirect 78 | gopkg.in/yaml.v3 v3.0.1 // indirect 79 | k8s.io/klog/v2 v2.100.1 // indirect 80 | k8s.io/kube-openapi v0.0.0-20230501164219-8b0f38b5fd1f // indirect 81 | k8s.io/utils v0.0.0-20230406110748-d93618cff8a2 // indirect 82 | sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect 83 | sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect 84 | sigs.k8s.io/yaml v1.3.0 // indirect 85 | ) 86 | 87 | //Some of our dependencies have not updated their dependency imports 88 | replace ( 89 | github.com/docker/distribution => github.com/docker/distribution v2.8.3+incompatible 90 | github.com/docker/docker => github.com/docker/docker v25.0.5+incompatible 91 | github.com/hashicorp/consul/api => github.com/hashicorp/consul/api v1.30.0 92 | 93 | github.com/mattn/go-sqlite3 => github.com/mattn/go-sqlite3 v1.14.18 94 | github.com/opencontainers/runc => github.com/opencontainers/runc v1.1.14 95 | golang.org/x/crypto => golang.org/x/crypto v0.31.0 96 | golang.org/x/image => golang.org/x/image v0.10.0 97 | golang.org/x/net => golang.org/x/net v0.36.0 98 | google.golang.org/grpc => google.golang.org/grpc v1.56.3 99 | google.golang.org/protobuf => google.golang.org/protobuf v1.33.0 100 | ) 101 | -------------------------------------------------------------------------------- /kubernetes/endpoint.go: -------------------------------------------------------------------------------- 1 | package kubernetes 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/cloudability/metrics-agent/retrieval/raw" 7 | ) 8 | 9 | // Connection is a bitmask that describes the manner(s) in which 10 | // the agent can connect to an endpoint 11 | type Connection uint8 12 | 13 | const ( 14 | // By bitshifting each constant with iota we can use Connection as a bitmask 15 | Direct Connection = 1 << iota // 0001 = 1 16 | Proxy // 0010 = 2 17 | // Unreachable defined at end to avoid affecting iota, 18 | // as it should always be set to 0 19 | Unreachable Connection = 0 20 | ) 21 | 22 | func (c Connection) hasMethod(method Connection) bool { return c&method != 0 } 23 | 24 | // AddMethod adds the provided nonzero method to the bitmask (use SetUnreachable for Unreachable) 25 | func (c *Connection) AddMethod(method Connection) { *c |= method } 26 | 27 | // ClearMethod removes the method from the bitmask 28 | func (c *Connection) ClearMethod(method Connection) { *c &= ^method } 29 | 30 | // SetUnreachable sets the connection as unreachable 31 | func (c *Connection) SetUnreachable() { *c = 0 } 32 | 33 | func (c Connection) String() string { 34 | if c == Unreachable { 35 | return unreachable 36 | } 37 | var options []string 38 | if c.hasMethod(Proxy) { 39 | options = append(options, proxy) 40 | } 41 | if c.hasMethod(Direct) { 42 | options = append(options, direct) 43 | } 44 | return strings.Join(options, ",") 45 | } 46 | 47 | type ConnectionMethod struct { 48 | ConnType Connection 49 | API nodeAPI 50 | client raw.Client 51 | FriendlyName string 52 | } 53 | 54 | // Endpoint represents the various metrics endpoints we hit 55 | type Endpoint string 56 | 57 | const ( 58 | // NodeStatsSummaryEndpoint the /stats/summary endpoint 59 | NodeStatsSummaryEndpoint Endpoint = "/stats/summary" 60 | ) 61 | 62 | // EndpointMask a map representing the currently active endpoints. 63 | // The keys of the map are the currently active endpoints. 64 | type EndpointMask map[Endpoint]Connection 65 | 66 | // SetAvailability sets an endpoint availability state according to the supplied boolean 67 | func (m EndpointMask) SetAvailability(endpoint Endpoint, method Connection, available bool) { 68 | e := m[endpoint] 69 | if available { 70 | e.AddMethod(method) 71 | } else { 72 | e.ClearMethod(method) 73 | } 74 | m[endpoint] = e 75 | } 76 | 77 | func (m EndpointMask) SetUnreachable(endpoint Endpoint) { 78 | e := m[endpoint] 79 | e.SetUnreachable() 80 | m[endpoint] = e 81 | } 82 | 83 | // Available gets the availability of an endpoint for the specified connection method 84 | func (m EndpointMask) Available(endpoint Endpoint, method Connection) bool { 85 | return m[endpoint].hasMethod(method) 86 | } 87 | 88 | func (m EndpointMask) Unreachable(endpoint Endpoint) bool { 89 | return m[endpoint] == Unreachable 90 | } 91 | 92 | func (m EndpointMask) DirectAllowed(endpoint Endpoint) bool { 93 | return m[endpoint].hasMethod(Direct) 94 | } 95 | 96 | func (m EndpointMask) ProxyAllowed(endpoint Endpoint) bool { 97 | return m[endpoint].hasMethod(Proxy) 98 | } 99 | 100 | func (m EndpointMask) Options(endpoint Endpoint) string { 101 | return m[endpoint].String() 102 | } 103 | -------------------------------------------------------------------------------- /kubernetes/endpoint_test.go: -------------------------------------------------------------------------------- 1 | package kubernetes 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestConnection(t *testing.T) { 8 | t.Run("Connection bitmask handles Unreachable correctly", func(t *testing.T) { 9 | exampleConnection := Connection(0) 10 | if exampleConnection != Unreachable { 11 | t.Errorf("zero-value Connection should be unreachable, got %s", exampleConnection) 12 | } 13 | // make sure no weird shenanigans happen with double Unreachable 14 | exampleConnection.ClearMethod(Unreachable) 15 | if exampleConnection != Unreachable { 16 | t.Errorf("zero-value Connection should be unreachable, got %s", exampleConnection) 17 | } 18 | // note: Don't do this, use SetUnreachable 19 | exampleConnection.AddMethod(Unreachable) 20 | if exampleConnection != Unreachable { 21 | t.Errorf("zero-value Connection should be unreachable, got %s", exampleConnection) 22 | } 23 | 24 | exampleConnection.SetUnreachable() 25 | if exampleConnection != Unreachable { 26 | t.Errorf("zero-value Connection should be unreachable, got %s", exampleConnection) 27 | } 28 | }) 29 | t.Run("Connection bitmask handles Proxy correctly", func(t *testing.T) { 30 | exampleConnection := Connection(0) 31 | exampleConnection.AddMethod(Proxy) 32 | 33 | if !exampleConnection.hasMethod(Proxy) { 34 | t.Errorf("expected Proxy, got %s", exampleConnection) 35 | } 36 | exampleConnection.ClearMethod(Proxy) 37 | if exampleConnection != Unreachable { 38 | t.Errorf("cleared connection should be unreachable, got %s", exampleConnection) 39 | } 40 | }) 41 | t.Run("Connection bitmask handles Direct correctly", func(t *testing.T) { 42 | exampleConnection := Connection(0) 43 | exampleConnection.AddMethod(Direct) 44 | 45 | if !exampleConnection.hasMethod(Direct) { 46 | t.Errorf("expected Direct, got %s", exampleConnection) 47 | } 48 | exampleConnection.ClearMethod(Direct) 49 | if exampleConnection != Unreachable { 50 | t.Errorf("cleared connection should be unreachable, got %s", exampleConnection) 51 | } 52 | }) 53 | 54 | t.Run("Connection bitmask handles multiple flags correctly", func(t *testing.T) { 55 | exampleConnection := Connection(0) 56 | exampleConnection.AddMethod(Direct) 57 | exampleConnection.AddMethod(Proxy) 58 | 59 | // should have both methods set 60 | if !exampleConnection.hasMethod(Direct) { 61 | t.Errorf("expected Direct, got %s", exampleConnection) 62 | } 63 | if !exampleConnection.hasMethod(Proxy) { 64 | t.Errorf("expected Proxy, got %s", exampleConnection) 65 | } 66 | // should not be Unreachable 67 | if exampleConnection.hasMethod(Unreachable) { 68 | t.Errorf("connection should not be unreachable, got %s", exampleConnection) 69 | } 70 | 71 | if !exampleConnection.hasMethod(Direct | Proxy) { 72 | t.Errorf("expected to be able to check multiple flags at once") 73 | } 74 | 75 | exampleConnection.ClearMethod(Direct) 76 | 77 | // should have lost direct but not proxy 78 | if exampleConnection.hasMethod(Direct) { 79 | t.Errorf("should not have Direct, got %s", exampleConnection) 80 | } 81 | if !exampleConnection.hasMethod(Proxy) { 82 | t.Errorf("expected Proxy, got %s", exampleConnection) 83 | } 84 | 85 | exampleConnection.SetUnreachable() 86 | 87 | if exampleConnection.hasMethod(Direct) { 88 | t.Errorf("expected Unreachable, got %s", exampleConnection) 89 | } 90 | if exampleConnection.hasMethod(Proxy) { 91 | t.Errorf("expectedUnreachable, got %s", exampleConnection) 92 | } 93 | 94 | }) 95 | } 96 | 97 | func TestEndpointMask(t *testing.T) { 98 | t.Run("endpoint should report Unreachable correctly", func(t *testing.T) { 99 | mask := EndpointMask{} 100 | if !mask.Unreachable(NodeStatsSummaryEndpoint) { 101 | t.Error("empty mask should return all endpoints as unreachable") 102 | } 103 | 104 | // Don't do this weird stuff, use mask.SetUnreachable 105 | mask.SetAvailability(NodeStatsSummaryEndpoint, Unreachable, false) 106 | if !mask.Unreachable(NodeStatsSummaryEndpoint) { 107 | t.Errorf("endpoint should have remained unreachable") 108 | } 109 | 110 | mask.SetAvailability(NodeStatsSummaryEndpoint, Proxy, true) 111 | if !mask.ProxyAllowed(NodeStatsSummaryEndpoint) { 112 | t.Errorf("should have proxy method set, instead got: %s", mask.Options(NodeStatsSummaryEndpoint)) 113 | } 114 | 115 | mask.SetUnreachable(NodeStatsSummaryEndpoint) 116 | if !mask.Unreachable(NodeStatsSummaryEndpoint) { 117 | t.Errorf("expected unreachable, got %s", mask.Options(NodeStatsSummaryEndpoint)) 118 | } 119 | }) 120 | t.Run("endpoint should set availability correctly", func(t *testing.T) { 121 | mask := EndpointMask{} 122 | // set as available 123 | mask.SetAvailability(NodeStatsSummaryEndpoint, Direct, true) 124 | if !mask.DirectAllowed(NodeStatsSummaryEndpoint) { 125 | t.Errorf("expected direct connection allowed, but got %s", mask.Options(NodeStatsSummaryEndpoint)) 126 | } 127 | if mask.ProxyAllowed(NodeStatsSummaryEndpoint) { 128 | t.Errorf("expected direct connection allowed, but got %s", mask.Options(NodeStatsSummaryEndpoint)) 129 | } 130 | 131 | // set unavailable 132 | mask.SetAvailability(NodeStatsSummaryEndpoint, Direct, false) 133 | if mask.DirectAllowed(NodeStatsSummaryEndpoint) { 134 | t.Errorf("expected direct connection unavailable, but got %s", mask.Options(NodeStatsSummaryEndpoint)) 135 | } 136 | // an endpoint with no methods available should be unreachable 137 | if !mask.Unreachable(NodeStatsSummaryEndpoint) { 138 | t.Errorf("expected unreachable, got %s", mask.Options(NodeStatsSummaryEndpoint)) 139 | } 140 | }) 141 | t.Run("should be able to set multiple connection methods per endpoint", func(t *testing.T) { 142 | mask := EndpointMask{} 143 | if mask.ProxyAllowed(NodeStatsSummaryEndpoint) { 144 | t.Errorf("expected the availability of an endpoint to default to false") 145 | } 146 | mask.SetAvailability(NodeStatsSummaryEndpoint, Proxy, true) 147 | if !mask.ProxyAllowed(NodeStatsSummaryEndpoint) { 148 | t.Errorf("expected the availability of an endpoint to be true after being set as available") 149 | } 150 | // validate that setting another method doesn't unset the first 151 | mask.SetAvailability(NodeStatsSummaryEndpoint, Direct, true) 152 | if !mask.ProxyAllowed(NodeStatsSummaryEndpoint) { 153 | t.Errorf("expected the availability of an endpoint to be true after being set as available") 154 | } 155 | // validate that setting the same method twice does not unset 156 | mask.SetAvailability(NodeStatsSummaryEndpoint, Direct, true) 157 | if !mask.DirectAllowed(NodeStatsSummaryEndpoint) { 158 | t.Errorf("expected the availability of an endpoint to be true after being set as available") 159 | } 160 | if mask.Unreachable(NodeStatsSummaryEndpoint) { 161 | t.Errorf("endpoint should be available, instead got: %s", 162 | mask.Options(NodeStatsSummaryEndpoint)) 163 | } 164 | 165 | }) 166 | } 167 | -------------------------------------------------------------------------------- /kubernetes/heapster.go: -------------------------------------------------------------------------------- 1 | package kubernetes 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "fmt" 7 | "net/http" 8 | "net/url" 9 | "os" 10 | "path/filepath" 11 | "strconv" 12 | "strings" 13 | "time" 14 | 15 | log "github.com/sirupsen/logrus" 16 | 17 | "github.com/cloudability/metrics-agent/util" 18 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 19 | "k8s.io/client-go/kubernetes" 20 | "k8s.io/client-go/rest" 21 | ) 22 | 23 | type heapsterMetricExport []struct { 24 | Metrics struct { 25 | CPUUsage []struct { 26 | Start time.Time `json:"start,omitempty"` 27 | End time.Time `json:"end,omitempty"` 28 | Value int `json:"value,omitempty"` 29 | } `json:"cpu/usage,omitempty"` 30 | MemoryCache []struct { 31 | Start time.Time `json:"start,omitempty"` 32 | End time.Time `json:"end,omitempty"` 33 | Value int `json:"value,omitempty"` 34 | } `json:"memory/cache,omitempty"` 35 | MemoryMajorPageFaults []struct { 36 | Start time.Time `json:"start,omitempty"` 37 | End time.Time `json:"end,omitempty"` 38 | Value int `json:"value,omitempty"` 39 | } `json:"memory/major_page_faults,omitempty"` 40 | MemoryPageFaults []struct { 41 | Start time.Time `json:"start,omitempty"` 42 | End time.Time `json:"end,omitempty"` 43 | Value int `json:"value,omitempty"` 44 | } `json:"memory/page_faults,omitempty"` 45 | MemoryRss []struct { 46 | Start time.Time `json:"start,omitempty"` 47 | End time.Time `json:"end,omitempty"` 48 | Value int `json:"value,omitempty"` 49 | } `json:"memory/rss,omitempty"` 50 | MemoryUsage []struct { 51 | Start time.Time `json:"start,omitempty"` 52 | End time.Time `json:"end,omitempty"` 53 | Value int `json:"value,omitempty"` 54 | } `json:"memory/usage,omitempty"` 55 | MemoryWorkingSet []struct { 56 | Start time.Time `json:"start,omitempty"` 57 | End time.Time `json:"end,omitempty"` 58 | Value int `json:"value,omitempty"` 59 | } `json:"memory/working_set,omitempty"` 60 | Uptime []struct { 61 | Start time.Time `json:"start,omitempty"` 62 | End time.Time `json:"end,omitempty"` 63 | Value int `json:"value,omitempty"` 64 | } `json:"uptime,omitempty"` 65 | } `json:"metrics,omitempty"` 66 | Labels struct { 67 | ContainerName string `json:"container_name,omitempty"` 68 | HostID string `json:"host_id,omitempty"` 69 | Hostname string `json:"hostname,omitempty"` 70 | Nodename string `json:"nodename,omitempty"` 71 | } `json:"labels,omitempty"` 72 | } 73 | 74 | // returns the proxy url of heapster in the cluster (returns last found based on match) 75 | func getHeapsterURL(ctx context.Context, clientset kubernetes.Interface, clusterHostURL string) ( 76 | URL url.URL, err error) { 77 | pods, err := clientset.CoreV1().Pods("").List(ctx, metav1.ListOptions{}) 78 | 79 | if err != nil { 80 | log.Fatalf("cloudability metric agent is unable to get a list of pods: %v", err) 81 | } 82 | 83 | services, err := clientset.CoreV1().Services("").List(ctx, metav1.ListOptions{}) 84 | if err != nil { 85 | log.Fatalf("cloudability metric agent is unable to get a list of services: %v", err) 86 | } 87 | 88 | for _, pod := range pods.Items { 89 | if strings.Contains(pod.Name, "heapster") { 90 | URL.Host = clusterHostURL 91 | //nolint staticcheck 92 | URL.Path = pod.SelfLink + ":8082/proxy/api/v1/metric-export" 93 | } 94 | } 95 | 96 | // prefer accessing via service if present 97 | // nolint dupl 98 | for _, service := range services.Items { 99 | if service.Name == "heapster" && service.Namespace == "cloudability" { 100 | URL.Host = "http://heapster.cloudability:8082" 101 | URL.Path = "/api/v1/metric-export" 102 | return URL, nil 103 | } else if service.Name == "heapster" { 104 | URL.Host = clusterHostURL 105 | if len(service.Spec.Ports) > 0 { 106 | URL.Path = service.SelfLink + ":" + strconv.Itoa( 107 | int(service.Spec.Ports[0].Port)) + "/proxy/api/v1/metric-export" 108 | } else { 109 | URL.Path = service.SelfLink + "/proxy/api/v1/metric-export" 110 | } 111 | } 112 | } 113 | 114 | return URL, err 115 | 116 | } 117 | 118 | func validateHeapster(config KubeAgentConfig, client rest.HTTPClient) error { 119 | outerTest, body, err := util.TestHTTPConnection( 120 | client, config.HeapsterURL, http.MethodGet, config.BearerToken, retryCount, true) 121 | if err != nil { 122 | return err 123 | } 124 | if !outerTest { 125 | return fmt.Errorf("no heapster found") 126 | } 127 | var me heapsterMetricExport 128 | if err := json.Unmarshal(*body, &me); err != nil { 129 | return fmt.Errorf("malformed response from heapster running at: %v", config.HeapsterURL) 130 | } 131 | if len(me) < 10 { 132 | return fmt.Errorf("received empty or malformed response from heapster running at: %v", 133 | config.HeapsterURL) 134 | } 135 | log.Debugf("Connected to heapster at: %v", config.HeapsterURL) 136 | return err 137 | } 138 | 139 | func handleBaselineHeapsterMetrics(msExportDirectory, msd, baselineMetricSample, heapsterMetricExport string) error { 140 | // copy into the current sample directory the most recent baseline metric export 141 | err := util.CopyFileContents(msd+"/"+filepath.Base(baselineMetricSample), baselineMetricSample) 142 | if err != nil { 143 | log.Warn("Warning previous baseline not found or incomplete") 144 | } 145 | 146 | // remove the baseline metric if it is not json 147 | if baselineMetricSample != "" && filepath.Base(baselineMetricSample) != "baseline-metrics-export.json" { 148 | if err = os.Remove(baselineMetricSample); err != nil { 149 | return fmt.Errorf("error cleaning up invalid baseline metric export: %s", err) 150 | } 151 | } 152 | // update the baseline metric export with the most recent sample from this collection 153 | err = util.CopyFileContents( 154 | filepath.Dir(msExportDirectory)+"/"+"baseline-metrics-export"+filepath.Ext( 155 | heapsterMetricExport), heapsterMetricExport) 156 | if err != nil { 157 | return fmt.Errorf("error updating baseline metric export: %s", err) 158 | } 159 | 160 | return nil 161 | } 162 | -------------------------------------------------------------------------------- /kubernetes/heapster_test.go: -------------------------------------------------------------------------------- 1 | package kubernetes 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "net/http" 7 | "net/http/httptest" 8 | "os" 9 | "strconv" 10 | "testing" 11 | "time" 12 | 13 | v1 "k8s.io/api/core/v1" 14 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 15 | "k8s.io/client-go/kubernetes/fake" 16 | ) 17 | 18 | // nolint: goconst 19 | func TestGetHeapsterURL(t *testing.T) { 20 | cs := fake.NewSimpleClientset() 21 | t.Run("Ensure that heapster pod is found in the kube-system namespace", func(t *testing.T) { 22 | 23 | pod := &v1.Pod{ 24 | ObjectMeta: metav1.ObjectMeta{ 25 | Name: "heapster", 26 | Namespace: "kube-system", 27 | SelfLink: "/api/v1/namespaces/kube-system/pods/heapster-6d9d49d496-5scrb", 28 | }, 29 | } 30 | _, _ = cs.CoreV1().Pods("kube-system").Create(context.TODO(), pod, metav1.CreateOptions{}) 31 | 32 | clusterHostURL := "http://locahost" 33 | 34 | url, err := getHeapsterURL(context.TODO(), cs, clusterHostURL) 35 | if err != nil { 36 | t.Error(err) 37 | } 38 | if url.Host != "http://localhost" && 39 | url.Path != "/api/v1/namespaces/kube-system/pods/heapster-6d9d49d496-5scrb:8082/proxy/api/v1/metric-export" { 40 | t.Errorf("Error getting heapster pod url: %v", err) 41 | } 42 | }) 43 | t.Run("Ensure that heapster service is found in the kube-system namespace", func(t *testing.T) { 44 | 45 | service := &v1.Service{ 46 | ObjectMeta: metav1.ObjectMeta{ 47 | Name: "heapster", 48 | Namespace: "kube-system", 49 | SelfLink: "/api/v1/namespaces/kube-system/services/heapster", 50 | }, 51 | } 52 | _, _ = cs.CoreV1().Services("kube-system").Create(context.TODO(), service, metav1.CreateOptions{}) 53 | 54 | clusterHostURL := "http://locahost" 55 | 56 | url, err := getHeapsterURL(context.TODO(), cs, clusterHostURL) 57 | if err != nil { 58 | t.Error(err) 59 | } 60 | if url.Host != "http://localhost" && 61 | url.Path != "/api/v1/namespaces/kube-system/services/heapster/proxy/api/v1/metric-export" { 62 | t.Errorf("Error getting heapster service url: %v", err) 63 | } 64 | }) 65 | } 66 | 67 | // nolint:revive, errcheck 68 | func TestValidateHeapster(t *testing.T) { 69 | 70 | t.Run("Ensure that a valid heapster service is found and responds with data", func(t *testing.T) { 71 | 72 | testData := "../util/testdata/test-cluster-metrics-sample/sample-1510159016/heapster-metrics-export.json" 73 | body, _ := os.ReadFile(testData) 74 | 75 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 76 | w.WriteHeader(200) 77 | w.Header().Set("Content-Type", "application/json") 78 | w.Write(body) 79 | 80 | })) 81 | defer ts.Close() 82 | 83 | cs := fake.NewSimpleClientset() 84 | 85 | kac := KubeAgentConfig{ 86 | HTTPClient: http.Client{}, 87 | UseInClusterConfig: true, 88 | ClusterHostURL: ts.URL, 89 | Clientset: cs, 90 | HeapsterURL: ts.URL, 91 | Insecure: true, 92 | BearerToken: "", 93 | } 94 | 95 | err := validateHeapster(kac, &kac.HTTPClient) 96 | 97 | if err != nil { 98 | t.Error(err) 99 | } 100 | }) 101 | 102 | } 103 | 104 | // nolint: gosec 105 | func TestHandleBaselineHeapsterMetrics(t *testing.T) { 106 | 107 | msExportDirectory := os.TempDir() + "/cldy-metrics" + strconv.FormatInt( 108 | time.Now().Unix(), 10) + "/" + "21260ee7-4e52-11e8-87d9-025000000001_20180803183652" 109 | msd := msExportDirectory + "/20180803184251/1533321771/" 110 | baselineMetricSample := msExportDirectory + "/" + "baseline-metrics-export.json" 111 | heapsterMetricExport := msd + "heapster-metrics-export.json" 112 | 113 | _ = os.MkdirAll(msd, 0777) 114 | _ = os.WriteFile(baselineMetricSample, []byte("baseline"), 0777) 115 | _ = os.WriteFile(heapsterMetricExport, []byte("export"), 0777) 116 | 117 | t.Run("Ensure that heapster baseline is copied into metric sample directory ", func(t *testing.T) { 118 | bme1, _ := os.ReadFile(baselineMetricSample) 119 | err := handleBaselineHeapsterMetrics(msExportDirectory, msd, baselineMetricSample, heapsterMetricExport) 120 | bme2, _ := os.ReadFile(msd + "/baseline-metrics-export.json") 121 | 122 | if !bytes.Equal(bme1, bme2) || err != nil { 123 | t.Errorf("Heapster baseline was not correcly copied into metric sample directory: %v", err) 124 | } 125 | 126 | }) 127 | 128 | t.Run("Ensure that the baseline metric export is updated with the most recent sample from the collection", 129 | func(t *testing.T) { 130 | 131 | _ = handleBaselineHeapsterMetrics(msExportDirectory, msd, baselineMetricSample, heapsterMetricExport) 132 | 133 | bme1, _ := os.ReadFile(baselineMetricSample) 134 | bme2, _ := os.ReadFile(heapsterMetricExport) 135 | 136 | if bytes.Equal(bme1, bme2) { 137 | t.Error("Heapster baseline was not correcly updated with the most recent sample from the collection") 138 | } 139 | 140 | }) 141 | 142 | t.Run("Ensure that a baseline without a json extension is removed", func(t *testing.T) { 143 | 144 | _ = os.Remove(baselineMetricSample) 145 | baselineMetricSample := msExportDirectory + "/" + "baseline-metrics-export" 146 | _ = os.WriteFile(baselineMetricSample, []byte("baseline"), 0777) 147 | _ = handleBaselineHeapsterMetrics(msExportDirectory, msd, baselineMetricSample, heapsterMetricExport) 148 | if _, err := os.Stat(baselineMetricSample); err == nil { 149 | t.Errorf("Heapster baseline without a json extension was not removed: %v", err) 150 | } 151 | 152 | }) 153 | 154 | // cleanup 155 | os.RemoveAll(msExportDirectory) 156 | 157 | } 158 | -------------------------------------------------------------------------------- /kubernetes/kubernetes_unit_test.go: -------------------------------------------------------------------------------- 1 | package kubernetes 2 | 3 | // nolint:revive 4 | import ( 5 | "fmt" 6 | "testing" 7 | 8 | "github.com/cloudability/metrics-agent/client" 9 | . "github.com/onsi/ginkgo" 10 | . "github.com/onsi/gomega" 11 | ) 12 | 13 | func TestKubernetes(t *testing.T) { 14 | RegisterFailHandler(Fail) 15 | RunSpecs(t, "Kubernetes Unit Tests") 16 | } 17 | 18 | var _ = Describe("Kubernetes", func() { 19 | 20 | Describe("error validation", func() { 21 | It("should return an error if metrics-agent receives a 500 error getting upload URI", func() { 22 | errorStr := handleError(fmt.Errorf("Error retrieving upload URI: 500"), "us-west-2") 23 | Expect(errorStr).To(Equal(fmt.Sprintf(transportError, client.DefaultBaseURL))) 24 | }) 25 | // nolint: staticcheck 26 | It("should return an error if metrics-agent receives a 403 error getting upload URI", func() { 27 | errorStr := handleError(fmt.Errorf(forbiddenError), "us-west-2") 28 | Expect(errorStr).To(Equal(fmt.Sprintf(apiKeyError, kbProvisionURL))) 29 | }) 30 | 31 | It("should not return an error if the metrics-agent receives any other error", func() { 32 | errorStr := handleError(fmt.Errorf("test error"), "us-west-2") 33 | Expect(errorStr).To(Equal("")) 34 | }) 35 | 36 | It("Node source error handler should return an error if we need to verify RBAC roles", func() { 37 | errorStr := handleNodeSourceError(fmt.Errorf("Please verify RBAC roles")) 38 | Expect(errorStr).To(ContainSubstring("RBAC role in the Cloudability namespace may need to be updated.")) 39 | }) 40 | }) 41 | }) 42 | -------------------------------------------------------------------------------- /kubernetes/testdata/baseline-container-proxyNode: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudability/metrics-agent/884572f14f0ba93748659865ec2c2c78946c09d9/kubernetes/testdata/baseline-container-proxyNode -------------------------------------------------------------------------------- /kubernetes/testdata/baseline-summary-proxyNode: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudability/metrics-agent/884572f14f0ba93748659865ec2c2c78946c09d9/kubernetes/testdata/baseline-summary-proxyNode -------------------------------------------------------------------------------- /kubernetes/testdata/mockToken: -------------------------------------------------------------------------------- 1 | mymocktoken1234 -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/cloudability/metrics-agent/cmd" 8 | ) 9 | 10 | func main() { 11 | if err := cmd.RootCmd.Execute(); err != nil { 12 | fmt.Println(err) 13 | os.Exit(1) 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /measurement/measurement.go: -------------------------------------------------------------------------------- 1 | package measurement 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | // Measurement represents a single set of data 8 | type Measurement struct { 9 | Name string `json:"name,omitempty"` 10 | Metrics map[string]uint64 `json:"metrics,omitempty"` 11 | Tags map[string]string `json:"tags,omitempty"` 12 | Timestamp int64 `json:"ts,omitempty"` 13 | Value float64 `json:"value,omitempty"` 14 | Values map[string]string `json:"values,omitempty"` 15 | Errors []ErrorDetail `json:"errors,omitempty"` 16 | } 17 | 18 | // ErrorDetail represents a detailed error message 19 | type ErrorDetail struct { 20 | Name string `json:"name,omitempty"` 21 | Message string `json:"message,omitempty"` 22 | Type string `json:"type,omitempty"` 23 | } 24 | 25 | func (m Measurement) String() string { 26 | return fmt.Sprintf("%v:%.2f [%v] [%v] [%v] [%+v] @ %v ", 27 | m.Name, m.Value, m.Tags, m.Metrics, m.Values, m.Errors, m.Timestamp) 28 | } 29 | -------------------------------------------------------------------------------- /measurement/measurement_test.go: -------------------------------------------------------------------------------- 1 | package measurement_test 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "math/rand" 7 | "reflect" 8 | "strconv" 9 | "strings" 10 | "testing" 11 | "testing/quick" 12 | "time" 13 | 14 | "github.com/cloudability/metrics-agent/measurement" 15 | "github.com/cloudability/metrics-agent/test" 16 | ) 17 | 18 | func TestSanity(t *testing.T) { 19 | t.Parallel() 20 | 21 | tags := make(map[string]string) 22 | tags["host"] = "macbookpro.Local.abc123" 23 | tags["cat"] = "dog" 24 | 25 | measure := measurement.Measurement{ 26 | Name: "ametric", 27 | Value: 1243.00, 28 | Timestamp: time.Now().Unix(), 29 | Tags: tags, 30 | } 31 | 32 | strValue := measure.String() 33 | 34 | if !strings.Contains(strValue, "ametric") { 35 | t.Error("String() should contain the metric name") 36 | } 37 | 38 | if !strings.Contains(strValue, "1243.00") { 39 | t.Error("String() should contain the metric value with correct precision") 40 | } 41 | } 42 | 43 | // nolint:gosec 44 | func TestMarshalJSON(t *testing.T) { 45 | t.Parallel() 46 | 47 | tags := make(map[string]string) 48 | tags["host"] = "macbookpro.Local.abc123" 49 | metrics := make(map[string]uint64) 50 | metrics["uptime"] = 585525897 51 | randKey := "zz" + test.SecureRandomAlphaString(62) //ensure ordering 52 | randValue := test.SecureRandomAlphaString(64) 53 | tags[randKey] = randValue 54 | 55 | t.Run("Measurement with all fields marshals correctly", func(t *testing.T) { 56 | t.Parallel() 57 | 58 | name := test.SecureRandomAlphaString(64) 59 | value := rand.Float64() 60 | ts := time.Now().Unix() 61 | 62 | measure := measurement.Measurement{ 63 | Name: name, 64 | Tags: tags, 65 | Metrics: metrics, 66 | Timestamp: ts, 67 | Value: value, 68 | } 69 | 70 | res, err := json.Marshal(measure) 71 | jsonString := string(res) 72 | 73 | if err != nil { 74 | t.Errorf("Encountered error %v", err) 75 | } 76 | 77 | expected := fmt.Sprintf( 78 | //nolint lll 79 | "{\"name\":\"%s\",\"metrics\":{\"uptime\":585525897},\"tags\":{\"host\":\"macbookpro.Local.abc123\",\"%v\":\"%v\"},\"ts\":%d,\"value\":%g}", 80 | name, randKey, randValue, ts, value) 81 | if expected != jsonString { 82 | t.Errorf("expected json does not match actual. expected: %+v actual: %+v", expected, jsonString) 83 | } 84 | }) 85 | 86 | t.Run("Measurement missing value omits it in JSON", func(t *testing.T) { 87 | t.Parallel() 88 | 89 | name := test.SecureRandomAlphaString(64) 90 | ts := time.Now().Unix() 91 | 92 | measure := measurement.Measurement{ 93 | Name: name, 94 | Tags: tags, 95 | Timestamp: ts, 96 | } 97 | 98 | res, err := json.Marshal(measure) 99 | jsonString := string(res) 100 | 101 | if err != nil { 102 | t.Errorf("Encountered error %v", err) 103 | } 104 | 105 | if strings.Contains(jsonString, "value") { 106 | t.Errorf("Encountered unset field where it should have been omitted in output %s", jsonString) 107 | } 108 | }) 109 | 110 | t.Run("Measurement missing tags omits it in JSON", func(t *testing.T) { 111 | t.Parallel() 112 | 113 | name := test.SecureRandomAlphaString(64) 114 | value := rand.Float64() 115 | ts := time.Now().Unix() 116 | 117 | measure := measurement.Measurement{ 118 | Name: name, 119 | Value: value, 120 | Timestamp: ts, 121 | } 122 | 123 | res, err := json.Marshal(measure) 124 | jsonString := string(res) 125 | 126 | if err != nil { 127 | t.Errorf("Encountered error %v", err) 128 | } 129 | 130 | if strings.Contains(jsonString, "tags") { 131 | t.Errorf("Encountered unset field where it should have been omitted in output %s", jsonString) 132 | } 133 | }) 134 | } 135 | 136 | type testMeasurement measurement.Measurement 137 | 138 | func (t testMeasurement) Generate(rand *rand.Rand, size int) reflect.Value { 139 | tm := testMeasurement{ 140 | Name: test.SecureRandomAlphaString(size), 141 | Timestamp: rand.Int63(), 142 | Value: rand.Float64(), 143 | } 144 | return reflect.ValueOf(tm) 145 | } 146 | 147 | func TestMarshalJSON_Blackbox(t *testing.T) { 148 | t.Parallel() 149 | 150 | assertion := func(m testMeasurement) bool { 151 | res, err := json.Marshal(m) 152 | jsonString := string(res) 153 | 154 | if err != nil { 155 | t.Errorf("Encountered error %v", err) 156 | } 157 | 158 | return strings.Contains(jsonString, m.Name) && 159 | strings.Contains(jsonString, fmt.Sprintf("%g", m.Value)) && 160 | strings.Contains(jsonString, strconv.FormatInt(m.Timestamp, 10)) 161 | } 162 | 163 | if err := quick.Check(assertion, nil); err != nil { 164 | t.Error(err) 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /retrieval/k8s/k8s_stats.go: -------------------------------------------------------------------------------- 1 | package k8s 2 | 3 | import ( 4 | "bufio" 5 | "encoding/json" 6 | "errors" 7 | "os" 8 | "time" 9 | 10 | v1apps "k8s.io/api/apps/v1" 11 | v1batch "k8s.io/api/batch/v1" 12 | corev1 "k8s.io/api/core/v1" 13 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 14 | "k8s.io/client-go/informers" 15 | "k8s.io/client-go/kubernetes" 16 | "k8s.io/client-go/tools/cache" 17 | ) 18 | 19 | const ( 20 | KubernetesLastAppliedConfig = "kubectl.kubernetes.io/last-applied-configuration" 21 | ) 22 | 23 | func StartUpInformers(clientset kubernetes.Interface, clusterVersion float64, 24 | resyncInterval int, parseMetricsData bool, stopCh chan struct{}) (map[string]*cache.SharedIndexInformer, error) { 25 | factory := informers.NewSharedInformerFactory(clientset, time.Duration(resyncInterval)*time.Hour) 26 | 27 | // v1Sources 28 | replicationControllerInformer := factory.Core().V1().ReplicationControllers().Informer() 29 | servicesInformer := factory.Core().V1().Services().Informer() 30 | nodesInformer := factory.Core().V1().Nodes().Informer() 31 | podsInformer := factory.Core().V1().Pods().Informer() 32 | persistentVolumesInformer := factory.Core().V1().PersistentVolumes().Informer() 33 | persistentVolumeClaimsInformer := factory.Core().V1().PersistentVolumeClaims().Informer() 34 | namespacesInformer := factory.Core().V1().Namespaces().Informer() 35 | // AppSources 36 | replicasetsInformer := factory.Apps().V1().ReplicaSets().Informer() 37 | daemonsetsInformer := factory.Apps().V1().DaemonSets().Informer() 38 | deploymentsInformer := factory.Apps().V1().Deployments().Informer() 39 | // Jobs 40 | jobsInformer := factory.Batch().V1().Jobs().Informer() 41 | // Cronjobs were introduced in k8s 1.21 so for older versions do not attempt to create an informer 42 | var cronJobsInformer cache.SharedIndexInformer 43 | if clusterVersion > 1.20 { 44 | cronJobsInformer = factory.Batch().V1().CronJobs().Informer() 45 | } 46 | 47 | var clusterInformers = map[string]*cache.SharedIndexInformer{ 48 | "replicationcontrollers": &replicationControllerInformer, 49 | "services": &servicesInformer, 50 | "nodes": &nodesInformer, 51 | "pods": &podsInformer, 52 | "persistentvolumes": &persistentVolumesInformer, 53 | "persistentvolumeclaims": &persistentVolumeClaimsInformer, 54 | "replicasets": &replicasetsInformer, 55 | "daemonsets": &daemonsetsInformer, 56 | "deployments": &deploymentsInformer, 57 | "namespaces": &namespacesInformer, 58 | "jobs": &jobsInformer, 59 | "cronjobs": &cronJobsInformer, 60 | } 61 | 62 | for _, informer := range clusterInformers { 63 | transform := GetTransformFunc(parseMetricsData) 64 | err := (*informer).SetTransform(transform) 65 | if err != nil { 66 | return nil, err 67 | } 68 | } 69 | 70 | // runs in background, starts all informers that are a part of the factory 71 | factory.Start(stopCh) 72 | // wait until all informers have successfully synced 73 | factory.WaitForCacheSync(stopCh) 74 | 75 | return clusterInformers, nil 76 | } 77 | 78 | // GetK8sMetricsFromInformer loops through all k8s resource informers in kubeAgentConfig writing each to the WSD 79 | func GetK8sMetricsFromInformer(informers map[string]*cache.SharedIndexInformer, 80 | workDir *os.File) error { 81 | for resourceName, informer := range informers { 82 | // Cronjob informer will be nil if k8s version is less than 1.21, if so skip getting the list of cronjobs 83 | if *informer == nil { 84 | continue 85 | } 86 | resourceList := (*informer).GetIndexer().List() 87 | err := writeK8sResourceFile(workDir, resourceName, resourceList) 88 | 89 | if err != nil { 90 | return err 91 | } 92 | } 93 | return nil 94 | } 95 | 96 | // writeK8sResourceFile creates a new file in the upload sample directory for the resourceName passed in and writes data 97 | func writeK8sResourceFile(workDir *os.File, resourceName string, 98 | resourceList []interface{}) (rerr error) { 99 | 100 | file, err := os.OpenFile(workDir.Name()+"/"+resourceName+".jsonl", 101 | os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) 102 | if err != nil { 103 | return errors.New("error: unable to create kubernetes metric file") 104 | } 105 | datawriter := bufio.NewWriter(file) 106 | 107 | for _, k8Resource := range resourceList { 108 | if shouldSkipResource(k8Resource) { 109 | continue 110 | } 111 | 112 | data, err := json.Marshal(k8Resource) 113 | if err != nil { 114 | return errors.New("error: unable to marshal resource: " + resourceName) 115 | } 116 | _, err = datawriter.WriteString(string(data) + "\n") 117 | if err != nil { 118 | return errors.New("error: unable to write resource to file: " + resourceName) 119 | } 120 | } 121 | 122 | err = datawriter.Flush() 123 | if err != nil { 124 | return err 125 | } 126 | err = file.Close() 127 | if err != nil { 128 | return err 129 | } 130 | 131 | return err 132 | } 133 | 134 | func shouldSkipResource(k8Resource interface{}) bool { 135 | // safe buffer to allow for longer lived resources to be ingested correctly 136 | previousHour := time.Now().UTC().Add(-1 * time.Hour) 137 | switch resource := k8Resource.(type) { 138 | case *v1batch.Job: 139 | return shouldSkipJob(previousHour, resource) 140 | case *corev1.Pod: 141 | return shouldSkipPod(previousHour, resource) 142 | case *v1apps.ReplicaSet: 143 | return resource.Status.Replicas == 0 && previousHour.After(resource.CreationTimestamp.Time) 144 | } 145 | return false 146 | } 147 | 148 | func shouldSkipJob(previousHour time.Time, resource *v1batch.Job) bool { 149 | if resource.Status.CompletionTime != nil && 150 | previousHour.After(resource.Status.CompletionTime.Time) { 151 | return true 152 | } 153 | if resource.Status.Failed > 0 { 154 | for _, condition := range resource.Status.Conditions { 155 | if condition.Type == v1batch.JobFailed { 156 | if previousHour.After(condition.LastTransitionTime.Time) { 157 | return true 158 | } 159 | } 160 | } 161 | } 162 | return false 163 | } 164 | 165 | func shouldSkipPod(previousHour time.Time, resource *corev1.Pod) bool { 166 | if resource.Status.Phase == corev1.PodSucceeded || resource.Status.Phase == corev1.PodFailed { 167 | canSkip := true 168 | for _, v := range resource.Status.ContainerStatuses { 169 | if v.State.Terminated != nil && v.State.Terminated.FinishedAt.After(previousHour) { 170 | canSkip = false 171 | } 172 | } 173 | return canSkip 174 | } 175 | return false 176 | } 177 | 178 | // sanitizeData removes information from kubernetes resources for customer security purposes 179 | // nolint:gocyclo, gosimple 180 | func sanitizeData(to interface{}) interface{} { 181 | switch to.(type) { 182 | case *corev1.Pod: 183 | return sanitizePod(to) 184 | case *v1apps.DaemonSet: 185 | cast := to.(*v1apps.DaemonSet) 186 | cast.Spec.Template = corev1.PodTemplateSpec{} 187 | cast.Spec.RevisionHistoryLimit = nil 188 | cast.Spec.UpdateStrategy = v1apps.DaemonSetUpdateStrategy{} 189 | cast.Spec.MinReadySeconds = 0 190 | cast.Spec.RevisionHistoryLimit = nil 191 | sanitizeMeta(&cast.ObjectMeta) 192 | return cast 193 | case *v1apps.ReplicaSet: 194 | cast := to.(*v1apps.ReplicaSet) 195 | cast.Spec.Replicas = nil 196 | cast.Spec.Template = corev1.PodTemplateSpec{} 197 | cast.Spec.MinReadySeconds = 0 198 | sanitizeMeta(&cast.ObjectMeta) 199 | return cast 200 | case *v1apps.Deployment: 201 | cast := to.(*v1apps.Deployment) 202 | cast.Spec.Template = corev1.PodTemplateSpec{} 203 | cast.Spec.Replicas = nil 204 | cast.Spec.Strategy = v1apps.DeploymentStrategy{} 205 | cast.Spec.MinReadySeconds = 0 206 | cast.Spec.RevisionHistoryLimit = nil 207 | cast.Spec.ProgressDeadlineSeconds = nil 208 | sanitizeMeta(&cast.ObjectMeta) 209 | return cast 210 | case *v1batch.Job: 211 | cast := to.(*v1batch.Job) 212 | cast.Spec.Template = corev1.PodTemplateSpec{} 213 | cast.Spec.Parallelism = nil 214 | cast.Spec.Completions = nil 215 | cast.Spec.ActiveDeadlineSeconds = nil 216 | cast.Spec.BackoffLimit = nil 217 | cast.Spec.ManualSelector = nil 218 | cast.Spec.TTLSecondsAfterFinished = nil 219 | cast.Spec.CompletionMode = nil 220 | cast.Spec.Suspend = nil 221 | sanitizeMeta(&cast.ObjectMeta) 222 | return cast 223 | case *v1batch.CronJob: 224 | cast := to.(*v1batch.CronJob) 225 | // cronjobs have no Selector 226 | cast.Spec = v1batch.CronJobSpec{} 227 | sanitizeMeta(&cast.ObjectMeta) 228 | return cast 229 | case *corev1.Service: 230 | cast := to.(*corev1.Service) 231 | cast.Spec.Ports = nil 232 | cast.Spec.ClusterIP = "" 233 | cast.Spec.ClusterIPs = nil 234 | cast.Spec.Type = "" 235 | cast.Spec.ExternalIPs = nil 236 | cast.Spec.SessionAffinity = "" 237 | cast.Spec.LoadBalancerIP = "" 238 | cast.Spec.LoadBalancerSourceRanges = nil 239 | cast.Spec.ExternalName = "" 240 | cast.Spec.ExternalTrafficPolicy = "" 241 | cast.Spec.HealthCheckNodePort = 0 242 | cast.Spec.SessionAffinityConfig = nil 243 | cast.Spec.IPFamilies = nil 244 | cast.Spec.IPFamilyPolicy = nil 245 | cast.Spec.AllocateLoadBalancerNodePorts = nil 246 | cast.Spec.LoadBalancerClass = nil 247 | cast.Spec.InternalTrafficPolicy = nil 248 | sanitizeMeta(&cast.ObjectMeta) 249 | return cast 250 | case *corev1.ReplicationController: 251 | cast := to.(*corev1.ReplicationController) 252 | cast.Spec.Replicas = nil 253 | cast.Spec.Template = nil 254 | cast.Spec.MinReadySeconds = 0 255 | sanitizeMeta(&cast.ObjectMeta) 256 | return cast 257 | case *corev1.PersistentVolume: 258 | cast := to.(*corev1.PersistentVolume) 259 | sanitizeMeta(&cast.ObjectMeta) 260 | return cast 261 | case *corev1.PersistentVolumeClaim: 262 | cast := to.(*corev1.PersistentVolumeClaim) 263 | sanitizeMeta(&cast.ObjectMeta) 264 | return cast 265 | case *corev1.Node: 266 | cast := to.(*corev1.Node) 267 | sanitizeMeta(&cast.ObjectMeta) 268 | return cast 269 | } 270 | return to 271 | } 272 | 273 | // trimData removes unneeded kubernetes resource fields 274 | // nolint gocyclo, gosimple 275 | func trimData(to interface{}) interface{} { 276 | switch to.(type) { 277 | case *corev1.Pod: 278 | return trimPod(to) 279 | case *v1apps.DaemonSet: 280 | cast := to.(*v1apps.DaemonSet) 281 | trimMeta(&cast.ObjectMeta) 282 | return cast 283 | case *v1apps.ReplicaSet: 284 | cast := to.(*v1apps.ReplicaSet) 285 | trimMeta(&cast.ObjectMeta) 286 | return cast 287 | case *v1apps.Deployment: 288 | cast := to.(*v1apps.Deployment) 289 | trimMeta(&cast.ObjectMeta) 290 | return cast 291 | case *v1batch.Job: 292 | cast := to.(*v1batch.Job) 293 | trimMeta(&cast.ObjectMeta) 294 | return cast 295 | case *v1batch.CronJob: 296 | cast := to.(*v1batch.CronJob) 297 | trimMeta(&cast.ObjectMeta) 298 | return cast 299 | case *corev1.Service: 300 | cast := to.(*corev1.Service) 301 | trimMeta(&cast.ObjectMeta) 302 | return cast 303 | case *corev1.ReplicationController: 304 | cast := to.(*corev1.ReplicationController) 305 | trimMeta(&cast.ObjectMeta) 306 | return cast 307 | case *corev1.Namespace: 308 | return trimNamespace(to) 309 | case *corev1.PersistentVolume: 310 | cast := to.(*corev1.PersistentVolume) 311 | trimMeta(&cast.ObjectMeta) 312 | return cast 313 | case *corev1.PersistentVolumeClaim: 314 | cast := to.(*corev1.PersistentVolumeClaim) 315 | trimMeta(&cast.ObjectMeta) 316 | return cast 317 | case *corev1.Node: 318 | cast := to.(*corev1.Node) 319 | trimMeta(&cast.ObjectMeta) 320 | return cast 321 | } 322 | return to 323 | } 324 | 325 | func sanitizeMeta(objectMeta *metav1.ObjectMeta) { 326 | objectMeta.Finalizers = nil 327 | } 328 | 329 | func trimMeta(objectMeta *metav1.ObjectMeta) { 330 | objectMeta.ManagedFields = nil 331 | delete(objectMeta.Annotations, KubernetesLastAppliedConfig) 332 | } 333 | 334 | func sanitizePod(to interface{}) interface{} { 335 | cast := to.(*corev1.Pod) 336 | for j, container := range (*cast).Spec.Containers { 337 | (*cast).Spec.Containers[j] = sanitizeContainer(container) 338 | } 339 | for j, container := range (*cast).Spec.InitContainers { 340 | (*cast).Spec.InitContainers[j] = sanitizeContainer(container) 341 | } 342 | return cast 343 | } 344 | 345 | func trimPod(to interface{}) interface{} { 346 | cast := to.(*corev1.Pod) 347 | // removing env var and related data from the object 348 | (*cast).ObjectMeta.ManagedFields = nil 349 | delete((*cast).ObjectMeta.Annotations, KubernetesLastAppliedConfig) 350 | 351 | for j, container := range (*cast).Spec.Containers { 352 | (*cast).Spec.Containers[j] = trimContainer(container) 353 | } 354 | for j, container := range (*cast).Spec.InitContainers { 355 | (*cast).Spec.InitContainers[j] = trimContainer(container) 356 | } 357 | return cast 358 | } 359 | 360 | func sanitizeContainer(container corev1.Container) corev1.Container { 361 | container.Command = nil 362 | container.Args = nil 363 | container.ImagePullPolicy = "" 364 | container.LivenessProbe = nil 365 | container.StartupProbe = nil 366 | container.ReadinessProbe = nil 367 | container.TerminationMessagePath = "" 368 | container.TerminationMessagePolicy = "" 369 | container.SecurityContext = nil 370 | return container 371 | } 372 | 373 | func trimContainer(container corev1.Container) corev1.Container { 374 | container.Env = nil 375 | return container 376 | } 377 | 378 | func trimNamespace(to interface{}) interface{} { 379 | cast := to.(*corev1.Namespace) 380 | (*cast).ObjectMeta.ManagedFields = nil 381 | return cast 382 | } 383 | 384 | func GetTransformFunc(parseMetricsData bool) func(resource interface{}) (interface{}, error) { 385 | return func(resource interface{}) (interface{}, error) { 386 | if parseMetricsData { 387 | resource = sanitizeData(resource) 388 | } 389 | resource = trimData(resource) 390 | return resource, nil 391 | } 392 | } 393 | -------------------------------------------------------------------------------- /retrieval/raw/models.go: -------------------------------------------------------------------------------- 1 | package raw 2 | 3 | import ( 4 | corev1 "k8s.io/api/core/v1" 5 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 6 | ) 7 | 8 | const ( 9 | AgentMeasurement = "agent-measurement" 10 | Namespaces = "namespaces" 11 | Pods = "pods" 12 | Deployments = "deployments" 13 | ReplicaSets = "replicasets" 14 | ReplicationControllers = "replicationcontrollers" 15 | DaemonSets = "daemonsets" 16 | Services = "services" 17 | Jobs = "jobs" 18 | Nodes = "nodes" 19 | PersistentVolumes = "persistentvolumes" 20 | PersistentVolumeClaims = "persistentvolumeclaims" 21 | ) 22 | 23 | // ParsableFileSet contains file names that can be minimized via de/re serialization 24 | var ParsableFileSet = map[string]struct{}{ 25 | AgentMeasurement: {}, 26 | Namespaces: {}, 27 | Pods: {}, 28 | Deployments: {}, 29 | ReplicaSets: {}, 30 | ReplicationControllers: {}, 31 | DaemonSets: {}, 32 | Services: {}, 33 | Jobs: {}, 34 | Nodes: {}, 35 | PersistentVolumes: {}, 36 | PersistentVolumeClaims: {}, 37 | } 38 | 39 | // ListResponse is a base object for unmarshaling k8s objects from the JSON files containing them. It captures 40 | // the general fields present on all the responses. 41 | type ListResponse struct { 42 | APIVersion string `json:"apiVersion"` 43 | Kind string `json:"kind"` 44 | Metadata map[string]string `json:"metadata"` 45 | Code int `json:"code"` 46 | Details map[string]string `json:"details"` 47 | Message string `json:"message"` 48 | Reason string `json:"reason"` 49 | Status string `json:"status"` 50 | } 51 | 52 | // LabelSelectorMatchedResource is a k8s resource that "points" to a pod by a label selector. This struct 53 | // gathers the minimal necessary fields for adding the relevant labels to the heapster metric. 54 | type LabelSelectorMatchedResource struct { 55 | metav1.ObjectMeta `json:"metadata,omitempty"` 56 | Spec struct { 57 | LabelSelector metav1.LabelSelector `json:"selector,omitempty"` 58 | } `json:"spec,omitempty"` 59 | } 60 | 61 | // LabelSelectorMatchedResourceList is a slice of LabelSelectorMatchedResource, one for each entry in the json. 62 | type LabelSelectorMatchedResourceList struct { 63 | ListResponse 64 | Items []LabelSelectorMatchedResource `json:"items"` 65 | } 66 | 67 | // LabelMapMatchedResource is a k8s resource that "points" to a pod by a label map. This struct 68 | // gathers the minimal necessary fields for adding the relevant labels to the heapster metric. 69 | type LabelMapMatchedResource struct { 70 | metav1.ObjectMeta `json:"metadata,omitempty"` 71 | Spec struct { 72 | LabelSelector map[string]string `json:"selector,omitempty"` 73 | } `json:"spec,omitempty"` 74 | Status struct { 75 | LoadBalancer LoadBalancer `json:"loadBalancer"` 76 | } 77 | } 78 | 79 | // LoadBalancer represents ingress for ELB resources 80 | type LoadBalancer struct { 81 | Ingress []struct { 82 | Hostname string `json:"hostname"` 83 | } `json:"ingress"` 84 | } 85 | 86 | // LabelMapMatchedResourceList is a slice of LabelMapMatchedResource, one for each entry in the json. 87 | type LabelMapMatchedResourceList struct { 88 | ListResponse 89 | Items []LabelMapMatchedResource `json:"items"` 90 | } 91 | 92 | // NamespaceList represents the list of namespaces unmarshalled from the namespaces api. 93 | type NamespaceList struct { 94 | ListResponse 95 | Items []corev1.Namespace `json:"items"` 96 | } 97 | 98 | // PodList represents the list of pods unmarshalled from the pods api. 99 | type PodList struct { 100 | ListResponse 101 | Items []corev1.Pod `json:"items"` 102 | } 103 | 104 | // NodeList represents the list of nodes unmarshalled from the nodes api. 105 | type NodeList struct { 106 | ListResponse 107 | Items []corev1.Node `json:"items"` 108 | } 109 | 110 | // PersistentVolumeList represents the list of persistent volumes unmarshalled from the persistent volumes api. 111 | type PersistentVolumeList struct { 112 | ListResponse 113 | Items []corev1.PersistentVolume `json:"items"` 114 | } 115 | 116 | // PersistentVolumeClaimList represents the list of persistent volume claims unmarshalled from the persistent 117 | // volume claims api. 118 | type PersistentVolumeClaimList struct { 119 | ListResponse 120 | Items []corev1.PersistentVolumeClaim `json:"items"` 121 | } 122 | 123 | // CldyAgent has information from the agent JSON file. 124 | type CldyAgent struct { 125 | Name string `json:"name,omitempty"` 126 | Metrics map[string]uint64 `json:"metrics,omitempty"` 127 | Tags map[string]string `json:"tags,omitempty"` 128 | Timestamp int64 `json:"ts,omitempty"` 129 | Value float64 `json:"value,omitempty"` 130 | Values map[string]string `json:"values,omitempty"` 131 | } 132 | -------------------------------------------------------------------------------- /retrieval/raw/raw_endpoint.go: -------------------------------------------------------------------------------- 1 | package raw 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "errors" 7 | "fmt" 8 | "io" 9 | v1 "k8s.io/api/core/v1" 10 | "math" 11 | "net/http" 12 | "os" 13 | "reflect" 14 | "strconv" 15 | "strings" 16 | "time" 17 | 18 | "github.com/cloudability/metrics-agent/util" 19 | log "github.com/sirupsen/logrus" 20 | ) 21 | 22 | const ( 23 | KubernetesLastAppliedConfig = "kubectl.kubernetes.io/last-applied-configuration" 24 | ) 25 | 26 | // Client defines an HTTP Client 27 | type Client struct { 28 | HTTPClient *http.Client 29 | insecure bool 30 | BearerToken string 31 | BearerTokenPath string 32 | retries uint 33 | parseMetricData bool 34 | } 35 | 36 | // NewClient creates a new raw.Client 37 | func NewClient(HTTPClient http.Client, insecure bool, bearerToken, bearerTokenPath string, retries uint, 38 | parseMetricData bool) Client { 39 | return Client{ 40 | HTTPClient: &HTTPClient, 41 | insecure: insecure, 42 | BearerToken: bearerToken, 43 | BearerTokenPath: bearerTokenPath, 44 | retries: retries, 45 | parseMetricData: parseMetricData, 46 | } 47 | } 48 | 49 | // createRequest creates a HTTP request using a given client 50 | func (c *Client) createRequest(method, url string, body io.Reader) (*http.Request, error) { 51 | 52 | request, err := http.NewRequest(method, url, body) 53 | if err != nil { 54 | return nil, err 55 | } 56 | 57 | if c.BearerToken != "" { 58 | request.Header.Add("Authorization", "bearer "+c.BearerToken) 59 | } 60 | 61 | return request, err 62 | } 63 | 64 | // GetRawEndPoint retrives the body of HTTP response from a given method , 65 | // sourcename, working directory, URL, and request body 66 | func (c *Client) GetRawEndPoint(method, sourceName string, 67 | workDir *os.File, URL string, body []byte, verbose bool) (filename string, err error) { 68 | 69 | attempts := c.retries + 1 70 | b := bytes.NewBuffer(body) 71 | 72 | for i := uint(0); i < attempts; i++ { 73 | if i > 0 { 74 | time.Sleep(time.Duration(int64(math.Pow(2, float64(i)))) * time.Second) 75 | } 76 | filename, err = downloadToFile(c, method, sourceName, workDir, URL, b) 77 | if err == nil { 78 | return filename, nil 79 | } 80 | if verbose { 81 | log.Warnf("%v URL: %s -- retrying: %v", err, URL, i+1) 82 | } 83 | } 84 | return filename, err 85 | } 86 | 87 | func downloadToFile(c *Client, method, sourceName string, workDir *os.File, URL string, 88 | body io.Reader) (filename string, rerr error) { 89 | 90 | var fileExt string 91 | 92 | req, err := c.createRequest(method, URL, body) 93 | if err != nil { 94 | return filename, fmt.Errorf("unable to create raw request for %s", sourceName) 95 | } 96 | 97 | if method == http.MethodPost { 98 | req.Header.Set("Content-Type", "application/json") 99 | } 100 | 101 | resp, err := c.HTTPClient.Do(req) 102 | if err != nil { 103 | return filename, errors.New("unable to connect") 104 | } 105 | 106 | defer util.SafeClose(resp.Body.Close, &rerr) 107 | 108 | if !(resp.StatusCode >= 200 && resp.StatusCode <= 299) { 109 | return filename, fmt.Errorf("invalid response %s", strconv.Itoa(resp.StatusCode)) 110 | } 111 | 112 | ct := resp.Header.Get("Content-Type") 113 | 114 | if strings.Contains(ct, "application/json") { 115 | fileExt = ".json" 116 | } else if strings.Contains(ct, "text/plain") { 117 | fileExt = ".txt" 118 | } else { 119 | fileExt = "" 120 | } 121 | 122 | rawRespFile, err := os.Create(workDir.Name() + "/" + sourceName + fileExt) 123 | if err != nil { 124 | return filename, errors.New("unable to create raw metric file") 125 | } 126 | defer util.SafeClose(rawRespFile.Close, &rerr) 127 | filename = rawRespFile.Name() 128 | 129 | if _, ok := ParsableFileSet[sourceName]; c.parseMetricData && ok { 130 | err = parseAndWriteData(sourceName, resp.Body, rawRespFile) 131 | return filename, err 132 | } 133 | 134 | _, err = io.Copy(rawRespFile, resp.Body) 135 | if err != nil { 136 | return filename, fmt.Errorf("error writing file: %s", rawRespFile.Name()) 137 | } 138 | 139 | return filename, rerr 140 | } 141 | 142 | // TODO: investigate streamed json reading / writing 143 | func parseAndWriteData(filename string, reader io.Reader, writer io.Writer) error { 144 | var to = getType(filename) 145 | out := reflect.New(reflect.TypeOf(to)) 146 | err := json.NewDecoder(reader).Decode(out.Interface()) 147 | 148 | if err != nil { 149 | return fmt.Errorf("unable to decode data for file: %s", filename) 150 | } 151 | to = sanitizeData(out.Elem().Interface()) 152 | 153 | data, err := json.Marshal(to) 154 | if err != nil { 155 | return fmt.Errorf("unable to marshal data for file: %s", filename) 156 | } 157 | _, err = io.Copy(writer, bytes.NewReader(data)) 158 | if err != nil { 159 | return fmt.Errorf("error writing file: %s", filename) 160 | } 161 | return nil 162 | } 163 | 164 | func getType(filename string) interface{} { 165 | var to interface{} 166 | switch filename { 167 | case Nodes: 168 | to = NodeList{} 169 | case Namespaces: 170 | to = NamespaceList{} 171 | case Pods: 172 | to = PodList{} 173 | case PersistentVolumes: 174 | to = PersistentVolumeList{} 175 | case PersistentVolumeClaims: 176 | to = PersistentVolumeClaimList{} 177 | case AgentMeasurement: 178 | to = CldyAgent{} 179 | case Services, ReplicationControllers: 180 | to = LabelMapMatchedResourceList{} 181 | case Deployments, ReplicaSets, Jobs, DaemonSets: 182 | to = LabelSelectorMatchedResourceList{} 183 | } 184 | return to 185 | } 186 | 187 | func sanitizeData(to interface{}) interface{} { 188 | switch to.(type) { 189 | case LabelSelectorMatchedResourceList: 190 | return sanitizeSelectorMatchedResourceList(to) 191 | case PodList: 192 | return sanitizePodList(to) 193 | case LabelMapMatchedResourceList: 194 | return sanitizeMapMatchedResourceList(to) 195 | case NamespaceList: 196 | return sanitizeNamespaceData(to) 197 | } 198 | return to 199 | } 200 | 201 | func sanitizeNamespaceData(to interface{}) interface{} { 202 | cast := to.(NamespaceList) 203 | for i := range cast.Items { 204 | cast.Items[i].ObjectMeta.ManagedFields = nil 205 | } 206 | return cast 207 | } 208 | 209 | // nolint:gosimple 210 | func sanitizeSelectorMatchedResourceList(to interface{}) interface{} { 211 | cast := to.(LabelSelectorMatchedResourceList) 212 | for i := range cast.Items { 213 | 214 | // stripping env var and related data from the object 215 | cast.Items[i].ObjectMeta.ManagedFields = nil 216 | if _, ok := cast.Items[i].ObjectMeta.Annotations[KubernetesLastAppliedConfig]; ok { 217 | delete(cast.Items[i].ObjectMeta.Annotations, KubernetesLastAppliedConfig) 218 | } 219 | } 220 | return cast 221 | } 222 | 223 | // nolint: gosimple 224 | func sanitizePodList(to interface{}) interface{} { 225 | cast := to.(PodList) 226 | for i := range cast.Items { 227 | 228 | // stripping env var and related data from the object 229 | cast.Items[i].ObjectMeta.ManagedFields = nil 230 | if _, ok := cast.Items[i].ObjectMeta.Annotations[KubernetesLastAppliedConfig]; ok { 231 | delete(cast.Items[i].ObjectMeta.Annotations, KubernetesLastAppliedConfig) 232 | } 233 | for j, container := range cast.Items[i].Spec.Containers { 234 | cast.Items[i].Spec.Containers[j] = sanitizeContainer(container) 235 | } 236 | for j, container := range cast.Items[i].Spec.InitContainers { 237 | cast.Items[i].Spec.InitContainers[j] = sanitizeContainer(container) 238 | } 239 | } 240 | return cast 241 | } 242 | 243 | func sanitizeContainer(container v1.Container) v1.Container { 244 | container.Env = nil 245 | container.Command = nil 246 | container.Args = nil 247 | container.ImagePullPolicy = "" 248 | container.LivenessProbe = nil 249 | container.StartupProbe = nil 250 | container.ReadinessProbe = nil 251 | container.TerminationMessagePath = "" 252 | container.TerminationMessagePolicy = "" 253 | container.SecurityContext = nil 254 | return container 255 | } 256 | 257 | // nolint: gosimple 258 | func sanitizeMapMatchedResourceList(to interface{}) interface{} { 259 | cast := to.(LabelMapMatchedResourceList) 260 | for i := range cast.Items { 261 | 262 | // stripping env var and related data from the object 263 | cast.Items[i].ObjectMeta.ManagedFields = nil 264 | if _, ok := cast.Items[i].ObjectMeta.Annotations[KubernetesLastAppliedConfig]; ok { 265 | delete(cast.Items[i].ObjectMeta.Annotations, KubernetesLastAppliedConfig) 266 | } 267 | cast.Items[i].Finalizers = nil 268 | } 269 | return cast 270 | } 271 | -------------------------------------------------------------------------------- /retrieval/raw/raw_endpoint_test.go: -------------------------------------------------------------------------------- 1 | package raw 2 | 3 | import ( 4 | "net/http" 5 | "net/http/httptest" 6 | "os" 7 | "strconv" 8 | "strings" 9 | "testing" 10 | ) 11 | 12 | func rawEndpointTests(t testing.TB) { 13 | var scenarios = []func(t testing.TB){ 14 | ensureThatFileCreatedForHeapsterData, 15 | ensureThatErrorsAreHandled, 16 | ensureNetworkErrorsAreHandled, 17 | ensureThatFileParsedAndCreatedForPodsData, 18 | ensureThatFileCreatedForPodsData, 19 | } 20 | for _, v := range scenarios { 21 | v(t) 22 | } 23 | } 24 | 25 | func TestRawEndpoint(t *testing.T) { 26 | rawEndpointTests(t) 27 | } 28 | 29 | func BenchmarkMetricFileCreation(b *testing.B) { 30 | for n := 0; n < b.N; n++ { 31 | ensureThatFileCreatedForHeapsterData(b) 32 | } 33 | } 34 | 35 | func BenchmarkPodsFile(b *testing.B) { 36 | for n := 0; n < b.N; n++ { 37 | ensureThatFileCreatedForPodsData(b) 38 | } 39 | } 40 | 41 | func BenchmarkParsedPodsFile(b *testing.B) { 42 | for n := 0; n < b.N; n++ { 43 | ensureThatFileParsedAndCreatedForPodsData(b) 44 | } 45 | } 46 | 47 | // nolint:revive 48 | func ensureThatErrorsAreHandled(t testing.TB) { 49 | httpClient := http.DefaultClient 50 | client := NewClient( 51 | *httpClient, 52 | true, 53 | "", 54 | "", 55 | 2, 56 | false, 57 | ) 58 | 59 | wd, _ := os.MkdirTemp("", "raw_endpoint_test") 60 | workingDir, _ := os.Open(wd) 61 | 62 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 63 | w.WriteHeader(404) 64 | })) 65 | defer ts.Close() 66 | 67 | _, err := client.GetRawEndPoint(http.MethodGet, "heapster", workingDir, ts.URL, nil, true) 68 | if err == nil { 69 | t.Error("Server returned invalid response code but function did not raise error") 70 | } 71 | } 72 | 73 | func ensureThatFileCreatedForHeapsterData(t testing.TB) { 74 | ensureThatFileCreated(t, "../../testdata/heapster-metric-export.json", "heapster", true, false) 75 | } 76 | 77 | func ensureThatFileParsedAndCreatedForPodsData(t testing.TB) { 78 | ensureThatFileCreated(t, "../../testdata/pods.json", "pods", true, true) 79 | } 80 | 81 | func ensureThatFileCreatedForPodsData(t testing.TB) { 82 | ensureThatFileCreated(t, "../../testdata/pods.json", "pods", false, false) 83 | } 84 | 85 | // nolint:revive, errcheck 86 | func ensureThatFileCreated(t testing.TB, testData string, source string, parseData bool, checkForSecrets bool) { 87 | httpClient := http.DefaultClient 88 | client := NewClient( 89 | *httpClient, 90 | true, 91 | "", 92 | "", 93 | 2, 94 | parseData, 95 | ) 96 | 97 | wd, _ := os.MkdirTemp("", "raw_endpoint_test") 98 | workingDir, _ := os.Open(wd) 99 | 100 | body, _ := os.ReadFile(testData) 101 | 102 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 103 | w.Write(body) 104 | })) 105 | defer ts.Close() 106 | 107 | testFileName, err := client.GetRawEndPoint(http.MethodGet, source, workingDir, ts.URL, nil, true) 108 | if err != nil { 109 | t.Error(err) 110 | } 111 | sourceFile, _ := os.Open(testData) 112 | testFile, _ := os.Open(testFileName) 113 | 114 | defer sourceFile.Close() 115 | defer testFile.Close() 116 | 117 | sF, _ := sourceFile.Stat() 118 | tF, _ := testFile.Stat() 119 | 120 | if checkForSecrets { 121 | in, _ := os.ReadFile(testData) 122 | if !strings.Contains(string(in), "superSecret") { 123 | t.Error("Source file should have contained secret, but did not") 124 | } 125 | 126 | out, _ := os.ReadFile(testFileName) 127 | if strings.Contains(string(out), "superSecret") { 128 | t.Error("Dest file should not have contained secret, but did") 129 | } 130 | } 131 | 132 | _, fileShouldBeParsed := ParsableFileSet[source] 133 | if fileShouldBeParsed && parseData { 134 | tFs := tF.Size() 135 | sFs := sF.Size() 136 | if sFs == tFs { 137 | t.Error("Source file matches output, but should have been parsed") 138 | } 139 | percent := ((sFs - tFs) * 100) / sFs 140 | if percent > 51 || percent < 49 { 141 | t.Error("Output file should be roughly 50% the size of the input file but was " + strconv.Itoa(int(percent))) 142 | } 143 | } else { 144 | if sF.Size() != tF.Size() { 145 | t.Error("Source file size does not match output") 146 | } 147 | } 148 | 149 | } 150 | 151 | func ensureNetworkErrorsAreHandled(t testing.TB) { 152 | httpClient := http.DefaultClient 153 | client := NewClient( 154 | *httpClient, 155 | true, 156 | "", 157 | "", 158 | 2, 159 | false, 160 | ) 161 | 162 | wd, _ := os.MkdirTemp("", "raw_endpoint_test") 163 | workingDir, _ := os.Open(wd) 164 | 165 | _, err := client.GetRawEndPoint(http.MethodGet, "heapster", workingDir, "http://localhost:1234", nil, true) 166 | if err == nil { 167 | t.Error("Unable to to connect to server but function did not raise error") 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /test/random.go: -------------------------------------------------------------------------------- 1 | package test 2 | 3 | import ( 4 | "crypto/rand" 5 | 6 | log "github.com/sirupsen/logrus" 7 | ) 8 | 9 | const ( 10 | // 52 possibilities 11 | letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 12 | 13 | // 6 bits to represent 64 possibilities / indexes 14 | letterIdxBits = 6 15 | 16 | // All 1-bits, as many as letterIdxBits 17 | letterIdxMask = 1< /dev/null || true 18 | if [ -d $WORKINGDIR ]; then 19 | echo "Cleaning up Temp directory : ${WORKINGDIR}" 20 | rm -rf $WORKINGDIR 21 | fi 22 | } 23 | 24 | setup_kind() { 25 | 26 | export PATH=$(go env GOPATH)/bin:$PATH 27 | 28 | cleanup 29 | 30 | if ! (kind create cluster --name=e2e-${KUBERNETES_VERSION} --image=kindest/node:${KUBERNETES_VERSION}) ; then 31 | echo "Could not create KinD cluster" 32 | exit 1 33 | fi 34 | 35 | sleep 2 36 | kubectl version 37 | 38 | i=0 39 | until [ $i -ge 5 ] 40 | do 41 | kind load docker-image ${IMAGE} --name e2e-${KUBERNETES_VERSION} && echo "${IMAGE} image added to cluster" && break 42 | n=$[$i+1] 43 | sleep 15 44 | done 45 | } 46 | 47 | deploy(){ 48 | mkdir -p -m 0777 ${WORKINGDIR} 49 | 50 | if [ ! -d $WORKINGDIR ]; then 51 | >&2 echo "Failed to create temp directory ${WORKINGDIR}" 52 | exit 1 53 | fi 54 | 55 | export CONTAINER="\"name\": \"metrics-agent\", \"image\": \"${IMAGE}\",\"imagePullPolicy\": \"Never\"" 56 | export ENVS="\"env\": [{\"name\": \"CLOUDABILITY_CLUSTER_NAME\", \"value\": \"e2e\"}, {\"name\": \"CLOUDABILITY_POLL_INTERVAL\", \"value\": \"20\"} ]" 57 | 58 | if [ "${CI}" = "true" ]; then 59 | docker cp ~/.kube/config e2e-${KUBERNETES_VERSION}-control-plane:/root/.kube/config 60 | ${CI_KUBECTL} apply -f - < deploy/kubernetes/cloudability-metrics-agent.yaml 61 | ${CI_KUBECTL} -n cloudability patch deployment metrics-agent --patch "{\"spec\": {\"template\": {\"spec\": {\"containers\": [{${CONTAINER}, ${ENVS} }]}}}}" 62 | sleep 10 63 | ${CI_KUBECTL} create ns stress 64 | ${CI_KUBECTL} -n stress run stress --labels=app=stress --image=jfusterm/stress -- --cpu 50 --vm 1 --vm-bytes 127m 65 | else 66 | kubectl apply -f deploy/kubernetes/cloudability-metrics-agent.yaml 67 | kubectl -n cloudability patch deployment metrics-agent --patch \ 68 | "{\"spec\": {\"template\": {\"spec\": {\"containers\": [{${CONTAINER}, ${ENVS} }]}}}}" 69 | sleep 10 70 | kubectl create ns stress 71 | kubectl -n stress run stress --labels=app=stress --image=jfusterm/stress -- --cpu 50 --vm 1 --vm-bytes 127m 72 | fi 73 | } 74 | 75 | wait_for_metrics() { 76 | # Wait for metrics-agent pod ready 77 | if [ "${CI}" = "true" ]; then 78 | while [[ $(${CI_KUBECTL} get pods -n cloudability -l app=metrics-agent -o 'jsonpath={..status.conditions[?(@.type=="Ready")].status}') != "True" ]]; do 79 | echo "waiting for pod ready" && sleep 5; 80 | done 81 | else 82 | while [[ $(kubectl get pods -n cloudability -l app=metrics-agent -o 'jsonpath={..status.conditions[?(@.type=="Ready")].status}') != "True" ]]; do 83 | echo "waiting for pod ready" && sleep 5; 84 | done 85 | fi 86 | } 87 | 88 | get_sample_data(){ 89 | echo "Waiting for agent data collection check: docker cp e2e-${KUBERNETES_VERSION}-control-plane:/tmp ${WORKINGDIR}" 90 | sleep 30 91 | if [ "${CI}" = "true" ]; then 92 | POD=$(${CI_KUBECTL} get pod -n cloudability -l app=metrics-agent -o jsonpath="{.items[0].metadata.name}") 93 | echo "pod is $POD" 94 | ${CI_KUBECTL} cp cloudability/${POD}:/tmp /root/export 95 | sleep 10 96 | docker cp e2e-${KUBERNETES_VERSION}-control-plane:/root/export ${WORKINGDIR} 97 | else 98 | POD=$(kubectl get pod -n cloudability -l app=metrics-agent -o jsonpath="{.items[0].metadata.name}") 99 | kubectl cp cloudability/$POD:/tmp ${WORKINGDIR} 100 | fi 101 | } 102 | 103 | run_tests() { 104 | echo "running tests: WORKING_DIR=${WORKINGDIR} KUBERNETES_VERSION=${KUBERNETES_VERSION} go test ./testdata/e2e/... -v" 105 | WORKING_DIR=${WORKINGDIR} KUBERNETES_VERSION=${KUBERNETES_VERSION} go test ./testdata/e2e/... -v 106 | } 107 | 108 | trap cleanup EXIT 109 | setup_kind 110 | deploy 111 | wait_for_metrics 112 | get_sample_data 113 | run_tests 114 | -------------------------------------------------------------------------------- /testdata/e2e/e2e_test.go: -------------------------------------------------------------------------------- 1 | package test 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | "strconv" 7 | 8 | "strings" 9 | "testing" 10 | 11 | v1 "k8s.io/api/core/v1" 12 | statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" 13 | ) 14 | 15 | func TestMetricSample(t *testing.T) { 16 | const stress = "stress" 17 | const metricsAgent = "metrics-agent" 18 | wd := os.Getenv("WORKING_DIR") 19 | kv := os.Getenv("KUBERNETES_VERSION") 20 | versionParts := strings.Split(kv, ".") 21 | minorVersion, err := strconv.Atoi(versionParts[1]) 22 | if err != nil { 23 | t.Errorf("Unable to determine kubernetes minor version: %s", err) 24 | } 25 | 26 | parsedK8sLists := &ParsedK8sLists{ 27 | NodeSummaries: make(map[string]statsapi.Summary), 28 | BaselineNodeSummaries: make(map[string]statsapi.Summary), 29 | } 30 | t.Parallel() 31 | 32 | t.Run("ensure that a metrics sample has expected files for cluster version", func(t *testing.T) { 33 | seen := make(map[string]bool, len(knownFileTypes)) 34 | 35 | err := filepath.Walk(wd, func(path string, info os.FileInfo, e error) error { 36 | if e != nil { 37 | return e 38 | } 39 | 40 | // check if it is a regular file (not dir) 41 | if info.Mode().IsRegular() { 42 | n := info.Name() 43 | ft := toAgentFileType(n) 44 | 45 | // for all json/jsonl files mark both as true once we see one 46 | if strings.Contains(ft, "json") { 47 | if strings.Contains(ft, "jsonl") { 48 | // mark json seen 49 | seen[strings.TrimSuffix(ft, "l")] = true 50 | } else { 51 | // mark jsonl seen 52 | seen[ft+"l"] = true 53 | } 54 | } 55 | seen[ft] = true 56 | 57 | if unmarshalFn, ok := knownFileTypes[ft]; ok { 58 | t.Logf("Processing: %v", n) 59 | f, err := os.ReadFile(path) 60 | if err != nil { 61 | return err 62 | } 63 | 64 | if err := unmarshalFn(path, f, parsedK8sLists); err != nil { 65 | return err 66 | } 67 | 68 | } 69 | } 70 | return nil 71 | }) 72 | if err != nil { 73 | t.Fatalf("Failed: %v", err) 74 | } 75 | err = checkForRequiredFiles(seen, minorVersion) 76 | if err != nil { 77 | t.Fatalf("Failed: %v", err) 78 | } 79 | }) 80 | 81 | t.Run("ensure that a metrics sample contains the cloudability namespace", func(t *testing.T) { 82 | for _, ns := range parsedK8sLists.Namespaces.Items { 83 | if ns.Name == "cloudability" { 84 | return 85 | } 86 | } 87 | t.Error("Namespace cloudability not found in metric sample") 88 | }) 89 | 90 | t.Run("ensure that a metrics sample has expected pod data", func(t *testing.T) { 91 | for _, po := range parsedK8sLists.Pods.Items { 92 | if strings.HasPrefix(po.Name, stress) && po.Status.QOSClass == v1.PodQOSBestEffort { 93 | return 94 | } 95 | } 96 | t.Error("pod stress not found in metric sample") 97 | }) 98 | 99 | t.Run("ensure that a metrics sample has expected containers summary data", func(t *testing.T) { 100 | for _, ns := range parsedK8sLists.NodeSummaries { 101 | for _, pf := range ns.Pods { 102 | if strings.HasPrefix(pf.PodRef.Name, stress) && pf.PodRef.Namespace == stress && pf.CPU.UsageNanoCores != nil { 103 | return 104 | } 105 | } 106 | } 107 | t.Error("pod summary data not found in metric sample") 108 | }) 109 | 110 | t.Run("ensure that a metrics sample has accurate pod label data for stress", func(t *testing.T) { 111 | for _, po := range parsedK8sLists.Pods.Items { 112 | if strings.HasPrefix(po.Name, stress) { 113 | if po.ObjectMeta.Labels["app"] == "stress" { 114 | return 115 | } 116 | } 117 | } 118 | t.Error("pod stress has incorrect labels in metric sample") 119 | }) 120 | 121 | t.Run("ensure that a metrics sample has accurate pod label data for metrics-agent", func(t *testing.T) { 122 | for _, po := range parsedK8sLists.Pods.Items { 123 | if strings.HasPrefix(po.Name, metricsAgent) { 124 | if po.ObjectMeta.Labels["app"] == "metrics-agent" { 125 | return 126 | } 127 | } 128 | } 129 | t.Error("pod metrics-agent has incorrect labels in metric sample") 130 | }) 131 | } 132 | -------------------------------------------------------------------------------- /testdata/heapster-metric-export.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "metrics": { 4 | "cpu/usage": [ 5 | { 6 | "start": "2017-11-01T20:44:10.366120463Z", 7 | "end": "2017-11-02T19:56:00Z", 8 | "value": 0 9 | } 10 | ], 11 | "memory/cache": [ 12 | { 13 | "start": "2017-11-02T19:56:00Z", 14 | "end": "2017-11-02T19:56:00Z", 15 | "value": 0 16 | } 17 | ], 18 | "memory/major_page_faults": [ 19 | { 20 | "start": "2017-11-01T20:44:10.366120463Z", 21 | "end": "2017-11-02T19:56:00Z", 22 | "value": 0 23 | } 24 | ], 25 | "memory/page_faults": [ 26 | { 27 | "start": "2017-11-01T20:44:10.366120463Z", 28 | "end": "2017-11-02T19:56:00Z", 29 | "value": 0 30 | } 31 | ], 32 | "memory/rss": [ 33 | { 34 | "start": "2017-11-02T19:56:00Z", 35 | "end": "2017-11-02T19:56:00Z", 36 | "value": 0 37 | } 38 | ], 39 | "memory/usage": [ 40 | { 41 | "start": "2017-11-02T19:56:00Z", 42 | "end": "2017-11-02T19:56:00Z", 43 | "value": 0 44 | } 45 | ], 46 | "memory/working_set": [ 47 | { 48 | "start": "2017-11-02T19:56:00Z", 49 | "end": "2017-11-02T19:56:00Z", 50 | "value": 0 51 | } 52 | ], 53 | "uptime": [ 54 | { 55 | "start": "2017-11-01T20:44:10.366120463Z", 56 | "end": "2017-11-02T19:56:00Z", 57 | "value": 83515311 58 | } 59 | ] 60 | }, 61 | "labels": { 62 | "container_name": "system.slice/audit-rules.service", 63 | "host_id": "172.17.4.201", 64 | "hostname": "172.17.4.201", 65 | "nodename": "172.17.4.201" 66 | } 67 | }, 68 | { 69 | "metrics": { 70 | "cpu/limit": [ 71 | { 72 | "start": "2017-11-02T19:56:00Z", 73 | "end": "2017-11-02T19:56:00Z", 74 | "value": 0 75 | } 76 | ], 77 | "cpu/usage": [ 78 | { 79 | "start": "2017-11-01T20:44:25.960287295Z", 80 | "end": "2017-11-02T19:56:00Z", 81 | "value": 17524061 82 | } 83 | ], 84 | "memory/cache": [ 85 | { 86 | "start": "2017-11-02T19:56:00Z", 87 | "end": "2017-11-02T19:56:00Z", 88 | "value": 34263040 89 | } 90 | ], 91 | "memory/limit": [ 92 | { 93 | "start": "2017-11-02T19:56:00Z", 94 | "end": "2017-11-02T19:56:00Z", 95 | "value": 230686720 96 | } 97 | ], 98 | "memory/major_page_faults": [ 99 | { 100 | "start": "2017-11-01T20:44:25.960287295Z", 101 | "end": "2017-11-02T19:56:00Z", 102 | "value": 0 103 | } 104 | ], 105 | "memory/page_faults": [ 106 | { 107 | "start": "2017-11-01T20:44:25.960287295Z", 108 | "end": "2017-11-02T19:56:00Z", 109 | "value": 416 110 | } 111 | ], 112 | "memory/rss": [ 113 | { 114 | "start": "2017-11-02T19:56:00Z", 115 | "end": "2017-11-02T19:56:00Z", 116 | "value": 19959808 117 | } 118 | ], 119 | "memory/usage": [ 120 | { 121 | "start": "2017-11-02T19:56:00Z", 122 | "end": "2017-11-02T19:56:00Z", 123 | "value": 56078336 124 | } 125 | ], 126 | "memory/working_set": [ 127 | { 128 | "start": "2017-11-02T19:56:00Z", 129 | "end": "2017-11-02T19:56:00Z", 130 | "value": 22556672 131 | } 132 | ], 133 | "network/rx": [ 134 | { 135 | "start": "2017-11-01T20:44:25.960287295Z", 136 | "end": "2017-11-02T19:56:00Z", 137 | "value": 12346462 138 | } 139 | ], 140 | "network/rx_errors": [ 141 | { 142 | "start": "2017-11-01T20:44:25.960287295Z", 143 | "end": "2017-11-02T19:56:00Z", 144 | "value": 0 145 | } 146 | ], 147 | "network/tx": [ 148 | { 149 | "start": "2017-11-01T20:44:25.960287295Z", 150 | "end": "2017-11-02T19:56:00Z", 151 | "value": 6206035 152 | } 153 | ], 154 | "network/tx_errors": [ 155 | { 156 | "start": "2017-11-01T20:44:25.960287295Z", 157 | "end": "2017-11-02T19:56:00Z", 158 | "value": 0 159 | } 160 | ], 161 | "uptime": [ 162 | { 163 | "start": "2017-11-01T20:44:25.960287295Z", 164 | "end": "2017-11-02T19:56:00Z", 165 | "value": 83499717 166 | } 167 | ] 168 | }, 169 | "labels": { 170 | "container_name": "/pod", 171 | "host_id": "172.17.4.201", 172 | "hostname": "172.17.4.201", 173 | "labels": "k8s-app:kube-dns,pod-template-hash:782804071", 174 | "namespace_id": "5399ba7d-b5c4-11e7-b3bd-08002751ec84", 175 | "nodename": "172.17.4.201", 176 | "pod_id": "5521b349-b5c4-11e7-b3bd-08002751ec84", 177 | "pod_name": "kube-dns-782804071-qtcwv" 178 | } 179 | } 180 | ] -------------------------------------------------------------------------------- /testdata/namespaces.jsonl: -------------------------------------------------------------------------------- 1 | {"metadata":{"name":"mySpecialNamespace","uid":"0f61dc67-45da-a0c5-915ac617d4a5","resourceVersion":"555","creationTimestamp":"2022-02-07T15:37:28Z","labels":{"kubernetes.io/metadata.name":"mySpecialNamespace"},"annotations":{"kubectl.kubernetes.io/last-applied-configuration":"{\"apiVersion\":\"v1\",\"kind\":\"Namespace\",\"metadata\":{\"annotations\":{},\"name\":\"mySpecialNamespace\"}}\n"},"managedFields":[{"manager":"ManageFieldToBeDeleted","operation":"Update","apiVersion":"v1","time":"2022-02-07T15:37:28Z","fieldsType":"FieldsV1","fieldsV1":{"f:status":{"f:phase":{}}}},{"manager":"kubectl-client-side-apply","operation":"Update","apiVersion":"v1","time":"2022-02-07T17:10:24Z","fieldsType":"FieldsV1","fieldsV1":{"f:metadata":{"f:annotations":{".":{},"f:kubectl.kubernetes.io/last-applied-configuration":{}}}}}]},"spec":{"finalizers":["kubernetes"]},"status":{"phase":"Active"}} 2 | -------------------------------------------------------------------------------- /testdata/pods.jsonl: -------------------------------------------------------------------------------- 1 | {"metadata":{"name":"cloudability-metrics-agent-6f978f966-64pn2","generateName":"cloudability-metrics-agent-6f978f966-","namespace":"cloudability","uid":"63edfc10-8ca7-462b-bbb7-1ba9c6929da2","resourceVersion":"6494382","creationTimestamp":"2022-08-04T19:01:15Z","labels":{"app":"cloudability-metrics-agent","pod-template-hash":"6f978f966"},"annotations":{"kubernetes.io/psp":"eks.privileged"},"ownerReferences":[{"apiVersion":"apps/v1","kind":"ReplicaSet","name":"cloudability-metrics-agent-6f978f966","uid":"161e5990-fa30-4b8c-93c2-5efb8ab8a062","controller":true,"blockOwnerDeletion":true}],"managedFields":[{"manager":"kube-controller-manager","operation":"Update","apiVersion":"v1","time":"2022-08-04T19:01:15Z","fieldsType":"FieldsV1","fieldsV1":{"f:metadata":{"f:generateName":{},"f:labels":{".":{},"f:app":{},"f:pod-template-hash":{}},"f:ownerReferences":{".":{},"k:{\"uid\":\"161e5990-fa30-4b8c-93c2-5efb8ab8a062\"}":{}}},"f:spec":{"f:containers":{"k:{\"name\":\"cloudability-metrics-agent\"}":{".":{},"f:args":{},"f:env":{".":{},"k:{\"name\":\"CLOUDABILITY_API_KEY\"}":{".":{},"f:name":{},"f:value":{}},"k:{\"name\":\"CLOUDABILITY_CLUSTER_NAME\"}":{".":{},"f:name":{},"f:value":{}},"k:{\"name\":\"CLOUDABILITY_POLL_INTERVAL\"}":{".":{},"f:name":{},"f:value":{}}},"f:image":{},"f:imagePullPolicy":{},"f:livenessProbe":{".":{},"f:exec":{".":{},"f:command":{}},"f:failureThreshold":{},"f:initialDelaySeconds":{},"f:periodSeconds":{},"f:successThreshold":{},"f:timeoutSeconds":{}},"f:name":{},"f:resources":{".":{},"f:limits":{".":{},"f:cpu":{},"f:memory":{}},"f:requests":{".":{},"f:cpu":{},"f:memory":{}}},"f:securityContext":{".":{},"f:allowPrivilegeEscalation":{},"f:capabilities":{".":{},"f:drop":{}},"f:runAsNonRoot":{},"f:runAsUser":{}},"f:terminationMessagePath":{},"f:terminationMessagePolicy":{}}},"f:dnsPolicy":{},"f:enableServiceLinks":{},"f:imagePullSecrets":{".":{},"k:{\"name\":\"cldy-docker-auth\"}":{}},"f:restartPolicy":{},"f:schedulerName":{},"f:securityContext":{},"f:serviceAccount":{},"f:serviceAccountName":{},"f:terminationGracePeriodSeconds":{}}}},{"manager":"kubelet","operation":"Update","apiVersion":"v1","time":"2022-08-04T19:02:01Z","fieldsType":"FieldsV1","fieldsV1":{"f:status":{"f:conditions":{"k:{\"type\":\"ContainersReady\"}":{".":{},"f:lastProbeTime":{},"f:lastTransitionTime":{},"f:status":{},"f:type":{}},"k:{\"type\":\"Initialized\"}":{".":{},"f:lastProbeTime":{},"f:lastTransitionTime":{},"f:status":{},"f:type":{}},"k:{\"type\":\"Ready\"}":{".":{},"f:lastProbeTime":{},"f:lastTransitionTime":{},"f:status":{},"f:type":{}}},"f:containerStatuses":{},"f:hostIP":{},"f:phase":{},"f:podIP":{},"f:podIPs":{".":{},"k:{\"ip\":\"172.26.94.113\"}":{".":{},"f:ip":{}}},"f:startTime":{}}},"subresource":"status"}]},"spec":{"volumes":[{"name":"kube-api-access-sx2nm","projected":{"sources":[{"serviceAccountToken":{"expirationSeconds":3607,"path":"token"}},{"configMap":{"name":"kube-root-ca.crt","items":[{"key":"ca.crt","path":"ca.crt"}]}},{"downwardAPI":{"items":[{"path":"namespace","fieldRef":{"apiVersion":"v1","fieldPath":"metadata.namespace"}}]}}],"defaultMode":420}}],"containers":[{"name":"cloudability-metrics-agent","image":"AMetricsAgentImage","args":["kubernetes"],"env":[{"name":"CLOUDABILITY_API_KEY","value":"ReallySecretStuff"},{"name":"CLOUDABILITY_CLUSTER_NAME","value":"testCluster"},{"name":"CLOUDABILITY_POLL_INTERVAL","value":"180"}],"resources":{"limits":{"cpu":"500m","memory":"1Gi"},"requests":{"cpu":"100m","memory":"128Mi"}},"volumeMounts":[{"name":"kube-api-access-sx2nm","readOnly":true,"mountPath":"/var/run/secrets/kubernetes.io/serviceaccount"}],"livenessProbe":{"exec":{"command":["touch","tmp/healthy"]},"initialDelaySeconds":120,"timeoutSeconds":1,"periodSeconds":600,"successThreshold":1,"failureThreshold":3},"terminationMessagePath":"/dev/termination-log","terminationMessagePolicy":"File","imagePullPolicy":"Always","securityContext":{"capabilities":{"drop":["all"]},"runAsUser":10000,"runAsNonRoot":true,"allowPrivilegeEscalation":false}}],"restartPolicy":"Always","terminationGracePeriodSeconds":30,"dnsPolicy":"ClusterFirst","serviceAccountName":"cloudability","serviceAccount":"cloudability","nodeName":"ip-172-26-95-214.ec2.internal","securityContext":{},"imagePullSecrets":[{"name":"cldy-docker-auth"}],"schedulerName":"default-scheduler","tolerations":[{"key":"node.kubernetes.io/not-ready","operator":"Exists","effect":"NoExecute","tolerationSeconds":300},{"key":"node.kubernetes.io/unreachable","operator":"Exists","effect":"NoExecute","tolerationSeconds":300}],"priority":0,"enableServiceLinks":true,"preemptionPolicy":"PreemptLowerPriority"},"status":{"phase":"Running","conditions":[{"type":"Initialized","status":"True","lastProbeTime":null,"lastTransitionTime":"2022-08-04T19:01:15Z"},{"type":"Ready","status":"True","lastProbeTime":null,"lastTransitionTime":"2022-08-04T19:02:01Z"},{"type":"ContainersReady","status":"True","lastProbeTime":null,"lastTransitionTime":"2022-08-04T19:02:01Z"},{"type":"PodScheduled","status":"True","lastProbeTime":null,"lastTransitionTime":"2022-08-04T19:01:15Z"}],"hostIP":"172.26.95.214","podIP":"172.26.94.113","podIPs":[{"ip":"172.26.94.113"}],"startTime":"2022-08-04T19:01:15Z","containerStatuses":[{"name":"cloudability-metrics-agent","state":{"running":{"startedAt":"2022-08-04T19:02:00Z"}},"lastState":{},"ready":true,"restartCount":0,"image":"AMetricsagentimage","imageID":"38a6c80a3b283116803cccf0a64894f69d","containerID":"docker://ed1a1750c138ad51cce50f5aba6fef453ae332ec70bc277c716cd3449e5fecac","started":true}],"qosClass":"Burstable"}} 2 | {"metadata":{"name":"cloudability-metrics-agent-6f978f966-64pn3","generateName":"cloudability-metrics-agent-6f978f966-","namespace":"cloudability","uid":"63edfc10-8ca7-462b-bbb7-1ba9c6929da2","resourceVersion":"6494382","creationTimestamp":"2022-08-04T19:01:15Z","labels":{"app":"cloudability-metrics-agent","pod-template-hash":"6f978f966"},"annotations":{"kubernetes.io/psp":"eks.privileged"},"ownerReferences":[{"apiVersion":"apps/v1","kind":"ReplicaSet","name":"cloudability-metrics-agent-6f978f966","uid":"161e5990-fa30-4b8c-93c2-5efb8ab8a062","controller":true,"blockOwnerDeletion":true}],"managedFields":[{"manager":"kube-controller-manager","operation":"Update","apiVersion":"v1","time":"2022-08-04T19:01:15Z","fieldsType":"FieldsV1","fieldsV1":{"f:metadata":{"f:generateName":{},"f:labels":{".":{},"f:app":{},"f:pod-template-hash":{}},"f:ownerReferences":{".":{},"k:{\"uid\":\"161e5990-fa30-4b8c-93c2-5efb8ab8a062\"}":{}}},"f:spec":{"f:containers":{"k:{\"name\":\"cloudability-metrics-agent\"}":{".":{},"f:args":{},"f:env":{".":{},"k:{\"name\":\"CLOUDABILITY_API_KEY\"}":{".":{},"f:name":{},"f:value":{}},"k:{\"name\":\"CLOUDABILITY_CLUSTER_NAME\"}":{".":{},"f:name":{},"f:value":{}},"k:{\"name\":\"CLOUDABILITY_POLL_INTERVAL\"}":{".":{},"f:name":{},"f:value":{}}},"f:image":{},"f:imagePullPolicy":{},"f:livenessProbe":{".":{},"f:exec":{".":{},"f:command":{}},"f:failureThreshold":{},"f:initialDelaySeconds":{},"f:periodSeconds":{},"f:successThreshold":{},"f:timeoutSeconds":{}},"f:name":{},"f:resources":{".":{},"f:limits":{".":{},"f:cpu":{},"f:memory":{}},"f:requests":{".":{},"f:cpu":{},"f:memory":{}}},"f:securityContext":{".":{},"f:allowPrivilegeEscalation":{},"f:capabilities":{".":{},"f:drop":{}},"f:runAsNonRoot":{},"f:runAsUser":{}},"f:terminationMessagePath":{},"f:terminationMessagePolicy":{}}},"f:dnsPolicy":{},"f:enableServiceLinks":{},"f:imagePullSecrets":{".":{},"k:{\"name\":\"cldy-docker-auth\"}":{}},"f:restartPolicy":{},"f:schedulerName":{},"f:securityContext":{},"f:serviceAccount":{},"f:serviceAccountName":{},"f:terminationGracePeriodSeconds":{}}}},{"manager":"kubelet","operation":"Update","apiVersion":"v1","time":"2022-08-04T19:02:01Z","fieldsType":"FieldsV1","fieldsV1":{"f:status":{"f:conditions":{"k:{\"type\":\"ContainersReady\"}":{".":{},"f:lastProbeTime":{},"f:lastTransitionTime":{},"f:status":{},"f:type":{}},"k:{\"type\":\"Initialized\"}":{".":{},"f:lastProbeTime":{},"f:lastTransitionTime":{},"f:status":{},"f:type":{}},"k:{\"type\":\"Ready\"}":{".":{},"f:lastProbeTime":{},"f:lastTransitionTime":{},"f:status":{},"f:type":{}}},"f:containerStatuses":{},"f:hostIP":{},"f:phase":{},"f:podIP":{},"f:podIPs":{".":{},"k:{\"ip\":\"172.26.94.113\"}":{".":{},"f:ip":{}}},"f:startTime":{}}},"subresource":"status"}]},"spec":{"volumes":[{"name":"kube-api-access-sx2nm","projected":{"sources":[{"serviceAccountToken":{"expirationSeconds":3607,"path":"token"}},{"configMap":{"name":"kube-root-ca.crt","items":[{"key":"ca.crt","path":"ca.crt"}]}},{"downwardAPI":{"items":[{"path":"namespace","fieldRef":{"apiVersion":"v1","fieldPath":"metadata.namespace"}}]}}],"defaultMode":420}}],"containers":[{"name":"cloudability-metrics-agent","image":"AMetricsAgentImage","args":["kubernetes"],"env":[{"name":"CLOUDABILITY_API_KEY","value":"ReallySecretStuff"},{"name":"CLOUDABILITY_CLUSTER_NAME","value":"testCluster"},{"name":"CLOUDABILITY_POLL_INTERVAL","value":"180"}],"resources":{"limits":{"cpu":"500m","memory":"1Gi"},"requests":{"cpu":"100m","memory":"128Mi"}},"volumeMounts":[{"name":"kube-api-access-sx2nm","readOnly":true,"mountPath":"/var/run/secrets/kubernetes.io/serviceaccount"}],"livenessProbe":{"exec":{"command":["touch","tmp/healthy"]},"initialDelaySeconds":120,"timeoutSeconds":1,"periodSeconds":600,"successThreshold":1,"failureThreshold":3},"terminationMessagePath":"/dev/termination-log","terminationMessagePolicy":"File","imagePullPolicy":"Always","securityContext":{"capabilities":{"drop":["all"]},"runAsUser":10000,"runAsNonRoot":true,"allowPrivilegeEscalation":false}}],"restartPolicy":"Always","terminationGracePeriodSeconds":30,"dnsPolicy":"ClusterFirst","serviceAccountName":"cloudability","serviceAccount":"cloudability","nodeName":"ip-172-26-95-214.ec2.internal","securityContext":{},"imagePullSecrets":[{"name":"cldy-docker-auth"}],"schedulerName":"default-scheduler","tolerations":[{"key":"node.kubernetes.io/not-ready","operator":"Exists","effect":"NoExecute","tolerationSeconds":300},{"key":"node.kubernetes.io/unreachable","operator":"Exists","effect":"NoExecute","tolerationSeconds":300}],"priority":0,"enableServiceLinks":true,"preemptionPolicy":"PreemptLowerPriority"},"status":{"phase":"Succeeded","conditions":[{"type":"Initialized","status":"True","lastProbeTime":null,"lastTransitionTime":"2022-08-04T19:01:15Z"},{"type":"Ready","status":"True","lastProbeTime":null,"lastTransitionTime":"2022-08-04T19:02:01Z"},{"type":"ContainersReady","status":"True","lastProbeTime":null,"lastTransitionTime":"2022-08-04T19:02:01Z"},{"type":"PodScheduled","status":"True","lastProbeTime":null,"lastTransitionTime":"2022-08-04T19:01:15Z"}],"hostIP":"172.26.95.214","podIP":"172.26.94.113","podIPs":[{"ip":"172.26.94.113"}],"startTime":"2022-08-04T19:01:15Z","containerStatuses":[{"name":"cloudability-metrics-agent","state":{"running":{"startedAt":"2022-08-04T19:02:00Z"}},"lastState":{},"ready":true,"restartCount":0,"image":"AMetricsagentimage","imageID":"38a6c80a3b283116803cccf0a64894f69d","containerID":"docker://ed1a1750c138ad51cce50f5aba6fef453ae332ec70bc277c716cd3449e5fecac","started":true}],"qosClass":"Burstable"}} 3 | {"metadata":{"name":"cloudability-metrics-agent-6f978f966-64pn4","generateName":"cloudability-metrics-agent-6f978f966-","namespace":"cloudability","uid":"63edfc10-8ca7-462b-bbb7-1ba9c6929da2","resourceVersion":"6494382","creationTimestamp":"2022-08-04T19:01:15Z","labels":{"app":"cloudability-metrics-agent","pod-template-hash":"6f978f966"},"annotations":{"kubernetes.io/psp":"eks.privileged"},"ownerReferences":[{"apiVersion":"apps/v1","kind":"ReplicaSet","name":"cloudability-metrics-agent-6f978f966","uid":"161e5990-fa30-4b8c-93c2-5efb8ab8a062","controller":true,"blockOwnerDeletion":true}],"managedFields":[{"manager":"kube-controller-manager","operation":"Update","apiVersion":"v1","time":"2022-08-04T19:01:15Z","fieldsType":"FieldsV1","fieldsV1":{"f:metadata":{"f:generateName":{},"f:labels":{".":{},"f:app":{},"f:pod-template-hash":{}},"f:ownerReferences":{".":{},"k:{\"uid\":\"161e5990-fa30-4b8c-93c2-5efb8ab8a062\"}":{}}},"f:spec":{"f:containers":{"k:{\"name\":\"cloudability-metrics-agent\"}":{".":{},"f:args":{},"f:env":{".":{},"k:{\"name\":\"CLOUDABILITY_API_KEY\"}":{".":{},"f:name":{},"f:value":{}},"k:{\"name\":\"CLOUDABILITY_CLUSTER_NAME\"}":{".":{},"f:name":{},"f:value":{}},"k:{\"name\":\"CLOUDABILITY_POLL_INTERVAL\"}":{".":{},"f:name":{},"f:value":{}}},"f:image":{},"f:imagePullPolicy":{},"f:livenessProbe":{".":{},"f:exec":{".":{},"f:command":{}},"f:failureThreshold":{},"f:initialDelaySeconds":{},"f:periodSeconds":{},"f:successThreshold":{},"f:timeoutSeconds":{}},"f:name":{},"f:resources":{".":{},"f:limits":{".":{},"f:cpu":{},"f:memory":{}},"f:requests":{".":{},"f:cpu":{},"f:memory":{}}},"f:securityContext":{".":{},"f:allowPrivilegeEscalation":{},"f:capabilities":{".":{},"f:drop":{}},"f:runAsNonRoot":{},"f:runAsUser":{}},"f:terminationMessagePath":{},"f:terminationMessagePolicy":{}}},"f:dnsPolicy":{},"f:enableServiceLinks":{},"f:imagePullSecrets":{".":{},"k:{\"name\":\"cldy-docker-auth\"}":{}},"f:restartPolicy":{},"f:schedulerName":{},"f:securityContext":{},"f:serviceAccount":{},"f:serviceAccountName":{},"f:terminationGracePeriodSeconds":{}}}},{"manager":"kubelet","operation":"Update","apiVersion":"v1","time":"2022-08-04T19:02:01Z","fieldsType":"FieldsV1","fieldsV1":{"f:status":{"f:conditions":{"k:{\"type\":\"ContainersReady\"}":{".":{},"f:lastProbeTime":{},"f:lastTransitionTime":{},"f:status":{},"f:type":{}},"k:{\"type\":\"Initialized\"}":{".":{},"f:lastProbeTime":{},"f:lastTransitionTime":{},"f:status":{},"f:type":{}},"k:{\"type\":\"Ready\"}":{".":{},"f:lastProbeTime":{},"f:lastTransitionTime":{},"f:status":{},"f:type":{}}},"f:containerStatuses":{},"f:hostIP":{},"f:phase":{},"f:podIP":{},"f:podIPs":{".":{},"k:{\"ip\":\"172.26.94.113\"}":{".":{},"f:ip":{}}},"f:startTime":{}}},"subresource":"status"}]},"spec":{"volumes":[{"name":"kube-api-access-sx2nm","projected":{"sources":[{"serviceAccountToken":{"expirationSeconds":3607,"path":"token"}},{"configMap":{"name":"kube-root-ca.crt","items":[{"key":"ca.crt","path":"ca.crt"}]}},{"downwardAPI":{"items":[{"path":"namespace","fieldRef":{"apiVersion":"v1","fieldPath":"metadata.namespace"}}]}}],"defaultMode":420}}],"containers":[{"name":"cloudability-metrics-agent","image":"AMetricsAgentImage","args":["kubernetes"],"env":[{"name":"CLOUDABILITY_API_KEY","value":"ReallySecretStuff"},{"name":"CLOUDABILITY_CLUSTER_NAME","value":"testCluster"},{"name":"CLOUDABILITY_POLL_INTERVAL","value":"180"}],"resources":{"limits":{"cpu":"500m","memory":"1Gi"},"requests":{"cpu":"100m","memory":"128Mi"}},"volumeMounts":[{"name":"kube-api-access-sx2nm","readOnly":true,"mountPath":"/var/run/secrets/kubernetes.io/serviceaccount"}],"livenessProbe":{"exec":{"command":["touch","tmp/healthy"]},"initialDelaySeconds":120,"timeoutSeconds":1,"periodSeconds":600,"successThreshold":1,"failureThreshold":3},"terminationMessagePath":"/dev/termination-log","terminationMessagePolicy":"File","imagePullPolicy":"Always","securityContext":{"capabilities":{"drop":["all"]},"runAsUser":10000,"runAsNonRoot":true,"allowPrivilegeEscalation":false}}],"restartPolicy":"Always","terminationGracePeriodSeconds":30,"dnsPolicy":"ClusterFirst","serviceAccountName":"cloudability","serviceAccount":"cloudability","nodeName":"ip-172-26-95-214.ec2.internal","securityContext":{},"imagePullSecrets":[{"name":"cldy-docker-auth"}],"schedulerName":"default-scheduler","tolerations":[{"key":"node.kubernetes.io/not-ready","operator":"Exists","effect":"NoExecute","tolerationSeconds":300},{"key":"node.kubernetes.io/unreachable","operator":"Exists","effect":"NoExecute","tolerationSeconds":300}],"priority":0,"enableServiceLinks":true,"preemptionPolicy":"PreemptLowerPriority"},"status":{"phase":"Failed","conditions":[{"type":"Initialized","status":"True","lastProbeTime":null,"lastTransitionTime":"2022-08-04T19:01:15Z"},{"type":"Ready","status":"True","lastProbeTime":null,"lastTransitionTime":"2022-08-04T19:02:01Z"},{"type":"ContainersReady","status":"True","lastProbeTime":null,"lastTransitionTime":"2022-08-04T19:02:01Z"},{"type":"PodScheduled","status":"True","lastProbeTime":null,"lastTransitionTime":"2022-08-04T19:01:15Z"}],"hostIP":"172.26.95.214","podIP":"172.26.94.113","podIPs":[{"ip":"172.26.94.113"}],"startTime":"2022-08-04T19:01:15Z","containerStatuses":[{"name":"cloudability-metrics-agent","state":{"running":{"startedAt":"2022-08-04T19:02:00Z"}},"lastState":{},"ready":true,"restartCount":0,"image":"AMetricsagentimage","imageID":"38a6c80a3b283116803cccf0a64894f69d","containerID":"docker://ed1a1750c138ad51cce50f5aba6fef453ae332ec70bc277c716cd3449e5fecac","started":true}],"qosClass":"Burstable"}} 4 | -------------------------------------------------------------------------------- /testdata/services.jsonl: -------------------------------------------------------------------------------- 1 | {"metadata":{"name":"kubernetes","namespace":"default","uid":"6155d8a6-ff7f-4c38-a34d-4490999d16f5","resourceVersion":"228","creationTimestamp":"2022-07-07T19:40:33Z","labels":{"component":"apiserver","provider":"kubernetes"},"managedFields":[{"manager":"kube-apiserver","operation":"Update","apiVersion":"v1","time":"2022-07-07T19:40:33Z","fieldsType":"FieldsV1","fieldsV1":{"f:metadata":{"f:labels":{".":{},"f:component":{},"f:provider":{}}},"f:spec":{"f:clusterIP":{},"f:internalTrafficPolicy":{},"f:ipFamilyPolicy":{},"f:ports":{".":{},"k:{\"port\":443,\"protocol\":\"TCP\"}":{".":{},"f:name":{},"f:port":{},"f:protocol":{},"f:targetPort":{}}},"f:sessionAffinity":{},"f:type":{}}}}]},"spec":{"ports":[{"name":"https","protocol":"TCP","port":443,"targetPort":443}],"clusterIP":"10.100.0.1","clusterIPs":["10.100.0.1"],"type":"ClusterIP","sessionAffinity":"None","ipFamilies":["IPv4"],"ipFamilyPolicy":"SingleStack","internalTrafficPolicy":"Cluster"},"status":{"loadBalancer":{}}} 2 | {"metadata":{"name":"kube-dns","namespace":"kube-system","uid":"79a0d234-2ea5-44f9-bb18-16d1b7573db2","resourceVersion":"4186","creationTimestamp":"2022-07-07T19:40:35Z","labels":{"eks.amazonaws.com/component":"kube-dns","k8s-app":"kube-dns","kubernetes.io/cluster-service":"true","kubernetes.io/name":"CoreDNS"},"annotations":{"kubectl.kubernetes.io/last-applied-configuration":"{\"apiVersion\":\"v1\",\"kind\":\"Service\",\"metadata\":{\"annotations\":{\"prometheus.io/port\":\"9153\",\"prometheus.io/scrape\":\"true\"},\"labels\":{\"eks.amazonaws.com/component\":\"kube-dns\",\"k8s-app\":\"kube-dns\",\"kubernetes.io/cluster-service\":\"true\",\"kubernetes.io/name\":\"CoreDNS\"},\"name\":\"kube-dns\",\"namespace\":\"kube-system\"},\"spec\":{\"clusterIP\":\"10.100.0.10\",\"ports\":[{\"name\":\"dns\",\"port\":53,\"protocol\":\"UDP\"},{\"name\":\"dns-tcp\",\"port\":53,\"protocol\":\"TCP\"}],\"selector\":{\"k8s-app\":\"kube-dns\"}}}\n","prometheus.io/port":"9153","prometheus.io/scrape":"true"},"managedFields":[{"manager":"eks","operation":"Apply","apiVersion":"v1","time":"2022-07-07T20:03:50Z","fieldsType":"FieldsV1","fieldsV1":{"f:metadata":{"f:annotations":{"f:prometheus.io/port":{},"f:prometheus.io/scrape":{}},"f:labels":{"f:eks.amazonaws.com/component":{},"f:k8s-app":{},"f:kubernetes.io/cluster-service":{},"f:kubernetes.io/name":{}}},"f:spec":{"f:clusterIP":{},"f:ports":{"k:{\"port\":53,\"protocol\":\"TCP\"}":{".":{},"f:name":{},"f:port":{},"f:protocol":{}},"k:{\"port\":53,\"protocol\":\"UDP\"}":{".":{},"f:name":{},"f:port":{},"f:protocol":{}}},"f:selector":{}}}},{"manager":"kubectl-client-side-apply","operation":"Update","apiVersion":"v1","time":"2022-07-07T19:40:35Z","fieldsType":"FieldsV1","fieldsV1":{"f:metadata":{"f:annotations":{".":{},"f:kubectl.kubernetes.io/last-applied-configuration":{},"f:prometheus.io/port":{},"f:prometheus.io/scrape":{}},"f:labels":{".":{},"f:eks.amazonaws.com/component":{},"f:k8s-app":{},"f:kubernetes.io/cluster-service":{},"f:kubernetes.io/name":{}}},"f:spec":{"f:clusterIP":{},"f:internalTrafficPolicy":{},"f:ports":{".":{},"k:{\"port\":53,\"protocol\":\"TCP\"}":{".":{},"f:name":{},"f:port":{},"f:protocol":{},"f:targetPort":{}},"k:{\"port\":53,\"protocol\":\"UDP\"}":{".":{},"f:name":{},"f:port":{},"f:protocol":{},"f:targetPort":{}}},"f:selector":{},"f:sessionAffinity":{},"f:type":{}}}}]},"spec":{"ports":[{"name":"dns","protocol":"UDP","port":53,"targetPort":53},{"name":"dns-tcp","protocol":"TCP","port":53,"targetPort":53}],"selector":{"k8s-app":"kube-dns"},"clusterIP":"10.100.0.10","clusterIPs":["10.100.0.10"],"type":"ClusterIP","sessionAffinity":"None","ipFamilies":["IPv4"],"ipFamilyPolicy":"SingleStack","internalTrafficPolicy":"Cluster"},"status":{"loadBalancer":{}}} 3 | -------------------------------------------------------------------------------- /tools/tools.go: -------------------------------------------------------------------------------- 1 | //go:build tools 2 | // +build tools 3 | 4 | package tools 5 | 6 | import ( 7 | // e2e test deps pinned here so `go mod tidy` can be run without removing these dependencies 8 | // used in `make test-e2e-X.XX` and `make test-e2e-all` cmds 9 | _ "github.com/google/cadvisor/info/v1" 10 | _ "github.com/googleapis/gnostic" 11 | _ "github.com/prometheus/common/expfmt" 12 | _ "github.com/prometheus/prom2json" 13 | _ "k8s.io/kubelet/pkg/apis/stats/v1alpha1" 14 | _ "sigs.k8s.io/kind" 15 | ) 16 | -------------------------------------------------------------------------------- /util/testdata/test-cluster-metrics-sample/sample-1510159016/agent-measurement.jsonl: -------------------------------------------------------------------------------- 1 | {"name":"cldy_agent_status","tags":{"agent_version":"v0.5.0","heapster_override_url":"","heapster_url":"https://172.17.4.101:443/api/v1/namespaces/kube-system/services/heapster:80/proxy/api/v1/metric-export","incluster_config":"false","insecure":"true","kube_state_metrics_override_url":"","poll_interval":"5","uptime":"5.256551679s"},"ts":1510159016} 2 | -------------------------------------------------------------------------------- /util/util.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "archive/tar" 5 | "compress/gzip" 6 | "errors" 7 | "fmt" 8 | "io" 9 | "math" 10 | "net/http" 11 | "net/url" 12 | "os" 13 | "path/filepath" 14 | "strings" 15 | "time" 16 | 17 | log "github.com/sirupsen/logrus" 18 | 19 | "github.com/spf13/viper" 20 | "k8s.io/client-go/rest" 21 | ) 22 | 23 | // ErrEmptyDataDir error to indicate the data directory is empty 24 | var ErrEmptyDataDir = errors.New("empty data directory") 25 | 26 | // IsValidURL returns true if string is a valid URL 27 | func IsValidURL(toTest string) bool { 28 | _, err := url.ParseRequestURI(toTest) 29 | return err == nil 30 | } 31 | 32 | // TestHTTPConnection takes 33 | // a given client / URL(string) / bearerToken(string)/ retries count (int) 34 | // and returns true if response code is 2xx. 35 | func TestHTTPConnection(testClient rest.HTTPClient, 36 | URL, method, bearerToken string, retries uint, verbose bool) (successful bool, body *[]byte, err error) { 37 | IsValidURL(URL) 38 | attempts := retries + 1 39 | 40 | req, err := http.NewRequest(method, URL, nil) 41 | if err != nil { 42 | log.Fatalf("Unable to make new request: %v", err) 43 | } 44 | 45 | if bearerToken != "" { 46 | req.Header.Add("Authorization", "Bearer "+bearerToken) 47 | } 48 | for i := uint(0); i < attempts; i++ { 49 | resp, err := testClient.Do(req) 50 | if err != nil { 51 | if verbose { 52 | log.Warnf("Unable to connect to URL: %s retrying: %v", URL, i+1) 53 | } 54 | time.Sleep(time.Duration(int64(math.Pow(2, float64(i)))) * time.Second) 55 | continue 56 | } 57 | defer SafeClose(resp.Body.Close, &err) 58 | body, rerr := io.ReadAll(resp.Body) 59 | if rerr != nil { 60 | err = fmt.Errorf("Unable to read response from: %s", URL) 61 | } 62 | 63 | return resp.StatusCode <= 200, &body, err 64 | 65 | } 66 | 67 | return false, &[]byte{}, err 68 | 69 | } 70 | 71 | // CheckRequiredSettings checks for required min values / flags / environment variables 72 | func CheckRequiredSettings(requiredArgs []string) error { 73 | 74 | for _, a := range requiredArgs { 75 | if viper.GetString(a) != "" { 76 | continue 77 | } 78 | return fmt.Errorf("Required flag: %v or environment variable: CLOUDABILITY_"+strings.ToUpper( 79 | a)+" has not been set", a) 80 | 81 | } 82 | 83 | if strings.TrimSpace(viper.GetString("cluster_name")) == "" { 84 | return fmt.Errorf("Cluster name cannot only contain whitespace") 85 | } 86 | 87 | if viper.IsSet("poll_interval") && viper.GetInt("poll_interval") < 5 { 88 | return fmt.Errorf( 89 | "Polling interval must be 5 seconds or greater") 90 | } 91 | 92 | return nil 93 | } 94 | 95 | // CreateMetricSample creates a metric sample from a given directory removing the source directory if cleanup is true 96 | func CreateMetricSample(exportDirectory os.File, uid string, cleanUp bool, scratchDir string) (*os.File, error) { 97 | 98 | ed, err := exportDirectory.Stat() 99 | if err != nil || !ed.IsDir() { 100 | log.Errorf("Unable to stat sample directory: %v", err) 101 | return nil, err 102 | } 103 | 104 | if err := CheckIfDirEmpty(exportDirectory.Name()); err != nil { 105 | return nil, err 106 | } 107 | 108 | sampleFilename := getExportFilename(uid) 109 | destFile, err := os.Create(scratchDir + "/" + sampleFilename + ".tgz") 110 | 111 | if err != nil { 112 | log.Errorf("Unable to create metric sample file: %v", err) 113 | return nil, err 114 | } 115 | 116 | err = createTGZ(exportDirectory, destFile) 117 | 118 | if err != nil { 119 | log.Errorf("Unable to tar metric sample directory: %v", err) 120 | return nil, err 121 | } 122 | 123 | // cleanup directory after creating the sample 124 | if cleanUp { 125 | err = removeDirectoryContents(exportDirectory.Name() + "/") 126 | } 127 | 128 | if err != nil { 129 | log.Errorf("Unable to cleanup metric sample directory: %v", err) 130 | return nil, err 131 | } 132 | 133 | return destFile, err 134 | } 135 | 136 | // createTGZ takes a source and variable writers and walks 'source' writing each file 137 | // found to the tar writer; the purpose for accepting multiple writers is to allow 138 | // for multiple outputs 139 | func createTGZ(src os.File, writers ...io.Writer) (rerr error) { 140 | 141 | // ensure the src actually exists before trying to tar it 142 | if _, err := os.Stat(src.Name()); err != nil { 143 | return fmt.Errorf("Unable to tar files - %v", err.Error()) 144 | } 145 | 146 | mw := io.MultiWriter(writers...) 147 | 148 | //nolint gas 149 | gzw, _ := gzip.NewWriterLevel(mw, 9) 150 | 151 | defer SafeClose(gzw.Close, &rerr) 152 | 153 | tw := tar.NewWriter(gzw) 154 | 155 | defer func() { 156 | err := tw.Close() 157 | if err != nil { 158 | log.Fatal(err) 159 | } 160 | }() 161 | 162 | // walk path 163 | return filepath.Walk(src.Name(), func(file string, fileInfo os.FileInfo, err error) (rerr error) { 164 | 165 | // return on any error 166 | if err != nil { 167 | return err 168 | } 169 | 170 | // create a new dir/file header 171 | header, err := tar.FileInfoHeader(fileInfo, fileInfo.Name()) 172 | if err != nil { 173 | return err 174 | } 175 | 176 | // return on directories since there will be no content to tar 177 | if fileInfo.Mode().IsDir() { 178 | return nil 179 | } 180 | 181 | // if not a directory update the name to correctly reflect the desired destination when untaring 182 | if !fileInfo.Mode().IsDir() { 183 | header.Name = filepath.Join(filepath.Base(src.Name()), strings.TrimPrefix(file, src.Name())) 184 | } 185 | // write the header 186 | if err := tw.WriteHeader(header); err != nil { 187 | return err 188 | } 189 | 190 | // open files for taring 191 | //nolint gosec 192 | f, err := os.Open(file) 193 | if err != nil { 194 | return err 195 | } 196 | 197 | defer SafeClose(f.Close, &rerr) 198 | 199 | // copy file data into tar writer 200 | if _, err := io.Copy(tw, f); err != nil { 201 | return err 202 | } 203 | 204 | return err 205 | }) 206 | } 207 | 208 | func getExportFilename(uid string) string { 209 | t := time.Now().UTC() 210 | return uid + "_" + t.Format("20060102150405") 211 | } 212 | 213 | // CreateMSWorkingDirectory takes a given prefix and returns a metric sample working directory 214 | func CreateMSWorkingDirectory(uid string, scratchDir string) (*os.File, error) { 215 | // create metric sample directory 216 | td, err := os.MkdirTemp(scratchDir, "cldy-metrics") 217 | if err != nil { 218 | log.Errorf("Unable to create temporary directory: %v", err) 219 | return nil, err 220 | } 221 | 222 | t := time.Now().UTC() 223 | 224 | ed := td + "/" + uid + "_" + t.Format("20060102150405") 225 | 226 | err = os.MkdirAll(ed, os.ModePerm) 227 | if err != nil { 228 | log.Errorf("Error creating metric sample export directory : %v", err) 229 | } 230 | //nolint gosec 231 | exportDir, err := os.Open(ed) 232 | if err != nil { 233 | log.Fatalln("Unable to open metric sample export directory") 234 | } 235 | 236 | return exportDir, err 237 | } 238 | 239 | func removeDirectoryContents(dir string) (err error) { 240 | //nolint gosec 241 | d, err := os.Open(dir) 242 | if err != nil { 243 | return err 244 | } 245 | 246 | defer SafeClose(d.Close, &err) 247 | 248 | names, err := d.Readdirnames(-1) 249 | if err != nil { 250 | return err 251 | } 252 | for _, name := range names { 253 | err = os.RemoveAll(filepath.Join(dir, name)) 254 | if err != nil { 255 | return err 256 | } 257 | } 258 | return nil 259 | } 260 | 261 | // CopyFileContents copies the contents of the file named src to the file named 262 | // by dst. The file will be created if it does not already exist. If the 263 | // destination file exists, all it's contents will be replaced by the contents 264 | // of the source file. 265 | func CopyFileContents(dst, src string) (rerr error) { 266 | //nolint gosec 267 | in, err := os.Open(src) 268 | if err != nil { 269 | return err 270 | } 271 | 272 | defer SafeClose(in.Close, &rerr) 273 | 274 | out, err := os.Create(dst) 275 | if err != nil { 276 | return err 277 | } 278 | defer SafeClose(out.Close, &rerr) 279 | 280 | if _, err = io.Copy(out, in); err != nil { 281 | return err 282 | } 283 | return out.Sync() 284 | } 285 | 286 | // SafeClose will close the given closer function, setting the err ONLY if it is currently nil. This 287 | // allows for cleaner handling of always-closing, but retaining the original error (ie from a previous 288 | // Write). 289 | func SafeClose(closer func() error, err *error) { 290 | if closeErr := closer(); closeErr != nil && *err == nil { 291 | (*err) = closeErr 292 | } 293 | } 294 | 295 | // MatchOneFile returns the name of one file based on a given directory and pattern 296 | // returning an error if more or less than one match is found. The syntax of patterns is the same 297 | // as in filepath.Glob & Match. 298 | func MatchOneFile(directory string, pattern string) (fileName string, err error) { 299 | results, err := filepath.Glob(directory + pattern) 300 | if err != nil { 301 | return "", fmt.Errorf("Error encountered reading directory: %v", err) 302 | } 303 | 304 | if len(results) == 1 { 305 | return results[0], nil 306 | } else if len(results) > 1 { 307 | return "", fmt.Errorf("More than one file matched the pattern: %+v", results) 308 | } 309 | 310 | return "", fmt.Errorf("No matches found") 311 | } 312 | 313 | // SetupLogger sets configuration for the default logger 314 | func SetupLogger() (err error) { 315 | 316 | var ( 317 | ll = viper.GetString("log_level") 318 | lf = strings.ToLower(viper.GetString("log_format")) 319 | ) 320 | 321 | // Set log level 322 | l, err := log.ParseLevel(ll) 323 | if err != nil { 324 | return fmt.Errorf("Invalid log level: %v", ll) 325 | } 326 | log.SetLevel(l) 327 | log.Debugf("Log level set to: %v", l.String()) 328 | 329 | // Set log format 330 | switch lf { 331 | case "json": 332 | log.SetFormatter(&log.JSONFormatter{}) 333 | default: 334 | log.SetFormatter(&log.TextFormatter{ 335 | DisableLevelTruncation: true, 336 | PadLevelText: true, 337 | }) 338 | } 339 | return nil 340 | } 341 | 342 | // ValidateScratchDir validates whether or not the scratch directory exists or not 343 | func ValidateScratchDir(scratchDir string) error { 344 | if _, err := os.Stat(scratchDir); os.IsNotExist(err) { 345 | return fmt.Errorf("There was a problem validating provided scratch directory: %v", err) 346 | } 347 | 348 | return nil 349 | } 350 | 351 | // CheckIfDirEmpty checks if a directory is empty, returning an ErrEmptyDataDir error if it is 352 | func CheckIfDirEmpty(dirname string) (rerr error) { 353 | dir, err := os.Open(dirname) 354 | if err != nil { 355 | return err 356 | } 357 | 358 | defer SafeClose(dir.Close, &rerr) 359 | 360 | _, err = dir.Readdir(1) 361 | if err != nil { 362 | switch err { 363 | case io.EOF: 364 | return ErrEmptyDataDir 365 | default: 366 | return err 367 | } 368 | } 369 | 370 | return nil 371 | } 372 | -------------------------------------------------------------------------------- /util/util_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "net/http" 7 | "net/http/httptest" 8 | "os" 9 | "path/filepath" 10 | "strconv" 11 | _ "strconv" 12 | "testing" 13 | "time" 14 | 15 | "github.com/spf13/cobra" 16 | "github.com/spf13/viper" 17 | ) 18 | 19 | type testConfig struct { 20 | APIKey string 21 | HeapsterURL string 22 | KubeStateMetricsURL string 23 | PollInterval int 24 | UseInClusterConfig bool 25 | ClusterName string 26 | } 27 | 28 | func TestIsValidURL(t *testing.T) { 29 | 30 | t.Parallel() 31 | 32 | t.Run("ensure that an invalid URL returns false ", func(t *testing.T) { 33 | URL := "sbn//bad-url" 34 | URLTest := IsValidURL(URL) 35 | if URLTest { 36 | t.Errorf("Invaild URL not detected: %v", URL) 37 | } 38 | }) 39 | 40 | t.Run("ensure that an valid URL returns true ", func(t *testing.T) { 41 | URL := "https://verynicesite.com/index.html?option=1" 42 | URLTest := IsValidURL(URL) 43 | if !URLTest { 44 | t.Errorf("Vaild URL not detected: %v", URL) 45 | } 46 | }) 47 | 48 | } 49 | 50 | func TestTestHTTPConnection(t *testing.T) { 51 | 52 | testClient := &http.Client{} 53 | 54 | t.Run("ensure that a 200 HTTP response returns true", func(t *testing.T) { 55 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 56 | if r.Method != http.MethodGet { 57 | t.Error("Expected to be a GET") 58 | } 59 | w.WriteHeader(200) 60 | })) 61 | defer ts.Close() 62 | 63 | b, _, _ := TestHTTPConnection(testClient, ts.URL, http.MethodGet, "", 10, true) 64 | log.Print(strconv.FormatBool(b)) 65 | if !b { 66 | t.Error("invalid connection") 67 | } 68 | }) 69 | 70 | t.Run("ensure that a non 200 HTTP response returns false", func(t *testing.T) { 71 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 72 | if r.Method != http.MethodGet { 73 | t.Error("Expected to be a GET") 74 | } 75 | w.WriteHeader(500) 76 | })) 77 | defer ts.Close() 78 | 79 | b, _, err := TestHTTPConnection(testClient, ts.URL, http.MethodGet, "", 10, true) 80 | log.Print(strconv.FormatBool(b)) 81 | if b { 82 | t.Errorf("Non 200 should return false : %v", err) 83 | } 84 | }) 85 | 86 | } 87 | 88 | // nolint:revive 89 | func TestCheckRequiredSettings(t *testing.T) { 90 | 91 | t.Parallel() 92 | 93 | var config testConfig 94 | var kubernetesCmd = &cobra.Command{ 95 | Use: "kubernetes", 96 | Short: "Collect Kubernetes Metrics", 97 | Long: `Command to collect Kubernetes Metrics`, 98 | PreRunE: func(cmd *cobra.Command, args []string) error { 99 | return CheckRequiredSettings([]string{"api_key"}) 100 | }, 101 | Run: func(cmd *cobra.Command, args []string) {}, 102 | } 103 | 104 | // add cobra and viper ENVs and flags 105 | kubernetesCmd.PersistentFlags().StringVar( 106 | &config.APIKey, 107 | "api_key", 108 | "", 109 | "Cloudability API Key", 110 | ) 111 | kubernetesCmd.PersistentFlags().IntVar( 112 | &config.PollInterval, 113 | "poll_interval", 114 | 600, 115 | "Time, in seconds to poll the services infrastructure. Default: 600", 116 | ) 117 | kubernetesCmd.PersistentFlags().StringVar( 118 | &config.ClusterName, 119 | "cluster_name", 120 | "default-test", 121 | "Kubernetes Cluster Name - required this must be unique to every cluster.", 122 | ) 123 | 124 | _ = viper.BindPFlag("api_key", kubernetesCmd.PersistentFlags().Lookup("api_key")) 125 | _ = viper.BindPFlag("poll_interval", kubernetesCmd.PersistentFlags().Lookup("poll_interval")) 126 | _ = viper.BindPFlag("cluster_name", kubernetesCmd.PersistentFlags().Lookup("cluster_name")) 127 | 128 | // nolint dupl 129 | t.Run("ensure that required settings are set as cmd flags", func(t *testing.T) { 130 | 131 | args := []string{"kubernetes", "--poll_interval", "5", "--api_key", "8675309-9035768", "--cluster_name", "specificTest"} 132 | kubernetesCmd.SetArgs(args) 133 | 134 | if err := kubernetesCmd.Execute(); err != nil { 135 | t.Errorf("required settings set via cmd flag but not detected: %v", err) 136 | } 137 | }) 138 | 139 | // nolint dupl 140 | t.Run("ensure that missing required cmd flags is detected", func(t *testing.T) { 141 | 142 | args := []string{"kubernetes", "--poll_interval", "5", "--api_key", "8675309-9035768", "--cluster_name", "specificTest"} 143 | kubernetesCmd.SetArgs(args) 144 | 145 | if err := kubernetesCmd.Execute(); err != nil { 146 | t.Errorf("required setting set via cmd flag is missing but not detected: %v", err) 147 | } 148 | }) 149 | 150 | t.Run("ensure that required settings are set as environment variables", func(t *testing.T) { 151 | 152 | viper.SetEnvPrefix("cloudability") 153 | viper.AutomaticEnv() 154 | 155 | _ = os.Setenv("CLOUDABILITY_API_KEY", "8675309-9035768") 156 | _ = os.Setenv("CLOUDABILITY_POLL_INTERVAL", "5") 157 | _ = os.Setenv("CLOUDABILITY_CLUSTER_NAME", "test") 158 | 159 | if err := kubernetesCmd.Execute(); err != nil { 160 | t.Errorf("required settings set via environment variables but not detected: %v", err) 161 | } 162 | }) 163 | 164 | // nolint dupl 165 | t.Run("ensure that missing required environment variable is detected", func(t *testing.T) { 166 | 167 | viper.SetEnvPrefix("cloudability") 168 | viper.AutomaticEnv() 169 | 170 | envArgs := []string{"kubernetes"} 171 | kubernetesCmd.SetArgs(envArgs) 172 | 173 | _ = os.Setenv("CLOUDABILITY_API_KEY", "8675309-9035768") 174 | _ = os.Setenv("CLOUDABILITY_POLL_INTERVAL", "5") 175 | _ = os.Setenv("CLOUDABILITY_CLUSTER_NAME", "test") 176 | 177 | if err := kubernetesCmd.Execute(); err != nil { 178 | t.Errorf("incorrect settings via environment variables but condition not detected: %v", err) 179 | } 180 | }) 181 | 182 | // nolint dupl 183 | t.Run("ensure that invalid min value is detected", func(t *testing.T) { 184 | 185 | viper.SetEnvPrefix("cloudability") 186 | viper.AutomaticEnv() 187 | 188 | envArgs := []string{"kubernetes"} 189 | kubernetesCmd.SetArgs(envArgs) 190 | _ = os.Setenv("CLOUDABILITY_API_KEY", "8675309-9035768") 191 | _ = os.Setenv("CLOUDABILITY_POLL_INTERVAL", "4") 192 | _ = os.Setenv("CLOUDABILITY_CLUSTER_NAME", "test") 193 | 194 | if err := kubernetesCmd.Execute(); err != nil { 195 | t.Errorf("incorrect poll interval set via environment variables but not detected: %v", err) 196 | } 197 | }) 198 | 199 | t.Run("ensure that invalid string value is detected", func(t *testing.T) { 200 | 201 | viper.SetEnvPrefix("cloudability") 202 | viper.AutomaticEnv() 203 | 204 | envArgs := []string{"kubernetes"} 205 | kubernetesCmd.SetArgs(envArgs) 206 | _ = os.Setenv("CLOUDABILITY_API_KEY", "8675309-9035768") 207 | _ = os.Setenv("CLOUDABILITY_POLL_INTERVAL", "5") 208 | _ = os.Setenv("CLOUDABILITY_CLUSTER_NAME", " ") 209 | 210 | if err := kubernetesCmd.Execute(); err != nil { 211 | t.Errorf("incorrect cluster name set via environment variables but condition not detected: %v", err) 212 | } 213 | }) 214 | } 215 | 216 | func TestCreateMetricSample(t *testing.T) { 217 | var err error 218 | var tgz *os.File 219 | var sampleDirectory *os.File 220 | 221 | testDataDirectory := "testdata/test-cluster-metrics-sample" 222 | 223 | t.Run("Ensure that a metric sample is created", func(t *testing.T) { 224 | 225 | if _, err = os.Stat(testDataDirectory); err == nil { 226 | sampleDirectory, err = os.Open(testDataDirectory) 227 | ms, err := CreateMetricSample(*sampleDirectory, "cluster-id", false, os.TempDir()) 228 | if err != nil { 229 | t.Errorf("Error creating agent Status Metric: %v", err) 230 | } 231 | 232 | tgz, err = os.Open(ms.Name()) 233 | if err != nil { 234 | t.Error("unable to open gzip'ed file. ") 235 | } 236 | defer tgz.Close() 237 | 238 | // clean up 239 | _ = os.Remove("/tmp/" + filepath.Base(testDataDirectory) + ".tgz") 240 | 241 | } else { 242 | t.Error("Unable find data directory") 243 | } 244 | 245 | }) 246 | 247 | t.Run("Only create metric sample if data directory contains files", func(t *testing.T) { 248 | emptySampleDirectory, err := os.MkdirTemp(os.TempDir(), "empty_sample_directory") 249 | if err != nil { 250 | t.Errorf("error creating temporary sample directory: %v", err) 251 | } 252 | defer func() { 253 | _ = os.Remove(emptySampleDirectory) 254 | }() 255 | 256 | sampleDirectory, err := os.Open(emptySampleDirectory) 257 | if err != nil { 258 | t.Errorf("error opening temporary sample directory as file: %v", err) 259 | } 260 | 261 | // First we expect no data 262 | _, err = CreateMetricSample(*sampleDirectory, "cluster-id", false, os.TempDir()) 263 | if err != ErrEmptyDataDir { 264 | t.Errorf("expected an ErrEmptyDataDir error but got: %v", err) 265 | } 266 | 267 | // Add a file 268 | fp, err := os.Create(fmt.Sprintf("%s/sample_file.txt", sampleDirectory.Name())) 269 | if err != nil { 270 | t.Errorf("unable to create file: %v", err) 271 | } 272 | _, _ = fp.WriteString("test") 273 | _ = fp.Close() 274 | 275 | // Then we expect data 276 | _, err = CreateMetricSample(*sampleDirectory, "cluster-id", false, os.TempDir()) 277 | if err != nil { 278 | t.Errorf("unexpected error but got: %v", err) 279 | } 280 | }) 281 | } 282 | 283 | // nolint: gosec 284 | func TestMatchOneFile(t *testing.T) { 285 | dir := os.TempDir() + "/cldy-test" + strconv.FormatInt( 286 | time.Now().Unix(), 10) 287 | _ = os.MkdirAll(dir, 0777) 288 | _ = os.WriteFile(dir+"/shouldBeHere.file", []byte(nil), 0644) 289 | 290 | t.Run("Ensure that one file is matched", func(t *testing.T) { 291 | 292 | pattern := "/shouldBeHere.file*" 293 | file, err := MatchOneFile(dir, pattern) 294 | if err != nil || filepath.Base(file) != "shouldBeHere.file" { 295 | t.Errorf("Did not match pattern when looking in the directory: %s for the pattern: %s error: %v", 296 | dir, pattern, err) 297 | } 298 | 299 | }) 300 | 301 | t.Run("Ensure that more than one file returns an error", func(t *testing.T) { 302 | 303 | _ = os.WriteFile(dir+"/shouldBeHere.file2", []byte(nil), 0644) 304 | pattern := "/shouldBeHere.file*" 305 | file, err := MatchOneFile(dir, pattern) 306 | if err == nil || file != "" { 307 | t.Errorf("Should have raised an error when looking in the directory: %s for pattern: %s error: %v", 308 | dir, pattern, err) 309 | } 310 | 311 | }) 312 | 313 | t.Run("Ensure that zero matches return an error", func(t *testing.T) { 314 | pattern := "/shouldNOtBeHere" + strconv.Itoa(time.Now().Nanosecond()) + "*" 315 | file, err := MatchOneFile(dir, pattern) 316 | if err == nil || file != "" { 317 | t.Errorf("Should have raised an error when looking in the directory: %s for a non-matching pattern: %s error: %v", 318 | dir, pattern, err) 319 | } 320 | 321 | }) 322 | 323 | // clean up 324 | _ = os.RemoveAll(dir) 325 | 326 | } 327 | 328 | func TestValidateScratchDir(t *testing.T) { 329 | t.Run("Ensure that an error is returned when directory doesn't exist", func(t *testing.T) { 330 | fakeDir := "/fake_dir" 331 | err := ValidateScratchDir(fakeDir) 332 | 333 | if err == nil { 334 | t.Errorf("Should have raised an error when validating scratch directory that does not exist, error: %v", err) 335 | } 336 | }) 337 | 338 | t.Run("Ensure that no error is returned when it is directory that does exist", func(t *testing.T) { 339 | scratchDir := "/tmp" 340 | err := ValidateScratchDir(scratchDir) 341 | 342 | if err != nil { 343 | t.Errorf("Should not have raised an error when validating scratch directory that does exist, error: %v", err) 344 | } 345 | }) 346 | } 347 | -------------------------------------------------------------------------------- /version/version.go: -------------------------------------------------------------------------------- 1 | package version 2 | 3 | // VERSION is the current version of the agent 4 | var VERSION = "2.13.0" 5 | --------------------------------------------------------------------------------