├── .gitignore ├── .goreleaser.yml ├── .tekton ├── aws-resource-exporter-master-pull-request.yaml └── aws-resource-exporter-master-push.yaml ├── Dockerfile ├── LICENSE ├── Makefile ├── OWNERS ├── README.md ├── aws-resource-exporter-config.yaml ├── build_deploy.sh ├── build_tag.sh ├── dashboards └── aws-resource-exporter.yaml ├── go.mod ├── go.sum ├── main.go ├── openshift ├── acceptance-job.yaml └── aws-resource-exporter.yaml ├── pkg ├── awsclient │ ├── awsclient.go │ ├── exporter.go │ └── mock │ │ ├── mock_iam.go │ │ └── zz_generated.mock_client.go ├── cache.go ├── cache_test.go ├── config.go ├── constats.go ├── ec2.go ├── ec2_test.go ├── elasticache.go ├── elasticache_test.go ├── iam.go ├── iam_test.go ├── msk.go ├── msk_test.go ├── proxy.go ├── proxy_test.go ├── rds.go ├── rds_test.go ├── route53.go ├── route53_test.go ├── util.go ├── util_test.go └── vpc.go ├── pr_check.sh └── renovate.json /.gitignore: -------------------------------------------------------------------------------- 1 | /aws-resource-exporter 2 | /config.yml 3 | /.build 4 | /.release 5 | /.tarballs 6 | .deps 7 | *tar.gz 8 | dist/ 9 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | builds: 2 | - env: 3 | - CGO_ENABLED=0 4 | archives: 5 | - replacements: 6 | darwin: Darwin 7 | linux: Linux 8 | windows: Windows 9 | 386: i386 10 | amd64: x86_64 11 | snapshot: 12 | name_template: "{{ .Tag }}-next" 13 | changelog: 14 | sort: asc 15 | filters: 16 | exclude: 17 | - '^docs:' 18 | - '^test:' 19 | -------------------------------------------------------------------------------- /.tekton/aws-resource-exporter-master-pull-request.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: tekton.dev/v1 2 | kind: PipelineRun 3 | metadata: 4 | annotations: 5 | build.appstudio.openshift.io/repo: https://github.com/app-sre/aws-resource-exporter?rev={{revision}} 6 | build.appstudio.redhat.com/commit_sha: '{{revision}}' 7 | build.appstudio.redhat.com/pull_request_number: '{{pull_request_number}}' 8 | build.appstudio.redhat.com/target_branch: '{{target_branch}}' 9 | pipelinesascode.tekton.dev/max-keep-runs: "10" 10 | pipelinesascode.tekton.dev/on-cel-expression: event == "pull_request" && target_branch 11 | == "master" 12 | creationTimestamp: null 13 | labels: 14 | appstudio.openshift.io/application: aws-resource-exporter-master 15 | appstudio.openshift.io/component: aws-resource-exporter-master 16 | pipelines.appstudio.openshift.io/type: build 17 | name: aws-resource-exporter-master-on-pull-request 18 | namespace: app-sre-tenant 19 | spec: 20 | params: 21 | - name: git-url 22 | value: '{{source_url}}' 23 | - name: revision 24 | value: '{{revision}}' 25 | - name: output-image 26 | value: quay.io/redhat-user-workloads/app-sre-tenant/aws-resource-exporter-master/aws-resource-exporter-master:on-pr-{{revision}} 27 | - name: image-expires-after 28 | value: 5d 29 | - name: dockerfile 30 | value: Dockerfile 31 | - name: path-context 32 | value: . 33 | - name: target-stage 34 | value: test 35 | pipelineRef: 36 | params: 37 | - name: url 38 | value: https://github.com/app-sre/shared-pipelines 39 | - name: revision 40 | value: main 41 | - name: pathInRepo 42 | value: pipelines/multi-arch-build-pipeline.yaml 43 | resolver: git 44 | taskRunTemplate: 45 | serviceAccountName: build-pipeline-aws-resource-exporter-master 46 | workspaces: 47 | - name: workspace 48 | volumeClaimTemplate: 49 | metadata: 50 | creationTimestamp: null 51 | spec: 52 | accessModes: 53 | - ReadWriteOnce 54 | resources: 55 | requests: 56 | storage: 1Gi 57 | status: {} 58 | - name: git-auth 59 | secret: 60 | secretName: '{{ git_auth_secret }}' 61 | status: {} 62 | -------------------------------------------------------------------------------- /.tekton/aws-resource-exporter-master-push.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: tekton.dev/v1 2 | kind: PipelineRun 3 | metadata: 4 | annotations: 5 | build.appstudio.openshift.io/repo: https://github.com/app-sre/aws-resource-exporter?rev={{revision}} 6 | build.appstudio.redhat.com/commit_sha: '{{revision}}' 7 | build.appstudio.redhat.com/target_branch: '{{target_branch}}' 8 | pipelinesascode.tekton.dev/max-keep-runs: "25" 9 | pipelinesascode.tekton.dev/on-cel-expression: event == "push" && target_branch 10 | == "master" 11 | creationTimestamp: null 12 | labels: 13 | appstudio.openshift.io/application: aws-resource-exporter-master 14 | appstudio.openshift.io/component: aws-resource-exporter-master 15 | pipelines.appstudio.openshift.io/type: build 16 | name: aws-resource-exporter-master-on-push 17 | namespace: app-sre-tenant 18 | spec: 19 | params: 20 | - name: git-url 21 | value: '{{source_url}}' 22 | - name: revision 23 | value: '{{revision}}' 24 | - name: output-image 25 | value: quay.io/redhat-user-workloads/app-sre-tenant/aws-resource-exporter-master/aws-resource-exporter-master:{{revision}} 26 | - name: dockerfile 27 | value: Dockerfile 28 | - name: path-context 29 | value: . 30 | pipelineRef: 31 | params: 32 | - name: url 33 | value: https://github.com/app-sre/shared-pipelines 34 | - name: revision 35 | value: main 36 | - name: pathInRepo 37 | value: pipelines/multi-arch-build-pipeline.yaml 38 | resolver: git 39 | taskRunTemplate: 40 | serviceAccountName: build-pipeline-aws-resource-exporter-master 41 | workspaces: 42 | - name: workspace 43 | volumeClaimTemplate: 44 | metadata: 45 | creationTimestamp: null 46 | spec: 47 | accessModes: 48 | - ReadWriteOnce 49 | resources: 50 | requests: 51 | storage: 1Gi 52 | status: {} 53 | - name: git-auth 54 | secret: 55 | secretName: '{{ git_auth_secret }}' 56 | status: {} 57 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM registry.access.redhat.com/ubi9/go-toolset:1.23.6-1747333074@sha256:e0ad156b08e0b50ad509d79513e13e8a31f2812c66e9c48c98cea53420ec2bca as builder 2 | COPY LICENSE /licenses/LICENSE 3 | WORKDIR /build 4 | RUN git config --global --add safe.directory /build 5 | COPY . . 6 | RUN make build 7 | 8 | FROM builder as test 9 | RUN make test 10 | 11 | FROM registry.access.redhat.com/ubi9-minimal@sha256:92b1d5747a93608b6adb64dfd54515c3c5a360802db4706765ff3d8470df6290 12 | COPY --from=builder /build/aws-resource-exporter /bin/aws-resource-exporter 13 | 14 | EXPOSE 9115 15 | ENTRYPOINT [ "/bin/aws-resource-exporter" ] 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | NAME := aws-resource-exporter 2 | REPO := quay.io/app-sre/$(NAME) 3 | TAG := $(shell git rev-parse --short HEAD) 4 | 5 | PKGS := $(shell go list ./... | grep -v -E '/vendor/|/test') 6 | FIRST_GOPATH := $(firstword $(subst :, ,$(shell go env GOPATH))) 7 | CONTAINER_ENGINE ?= $(shell which podman >/dev/null 2>&1 && echo podman || echo docker) 8 | IMAGE_TEST := aws-resource-exporter 9 | 10 | ifneq (,$(wildcard $(CURDIR)/.docker)) 11 | DOCKER_CONF := $(CURDIR)/.docker 12 | else 13 | DOCKER_CONF := $(HOME)/.docker 14 | endif 15 | 16 | .PHONY: all 17 | all: test image 18 | 19 | .PHONY: clean 20 | clean: 21 | # Remove all files and directories ignored by git. 22 | git clean -Xfd . 23 | 24 | ############ 25 | # Building # 26 | ############ 27 | 28 | .PHONY: build 29 | build: 30 | go build -o $(NAME) . 31 | 32 | .PHONY: image 33 | image: 34 | ifeq ($(CONTAINER_ENGINE), podman) 35 | @DOCKER_BUILDKIT=1 $(CONTAINER_ENGINE) build --no-cache -t $(REPO):latest . --progress=plain 36 | else 37 | @DOCKER_BUILDKIT=1 $(CONTAINER_ENGINE) --config=$(DOCKER_CONF) build --no-cache -t $(REPO):latest . --progress=plain 38 | endif 39 | @$(CONTAINER_ENGINE) tag $(REPO):latest $(REPO):$(TAG) 40 | 41 | .PHONY: image-push 42 | image-push: 43 | $(CONTAINER_ENGINE) --config=$(DOCKER_CONF) push $(REPO):$(TAG) 44 | $(CONTAINER_ENGINE) --config=$(DOCKER_CONF) push $(REPO):latest 45 | 46 | ############## 47 | # Formatting # 48 | ############## 49 | 50 | .PHONY: format 51 | format: go-fmt 52 | 53 | .PHONY: go-fmt 54 | go-fmt: 55 | go fmt $(PKGS) 56 | 57 | ########### 58 | # Testing # 59 | ########### 60 | 61 | .PHONY: vet 62 | vet: 63 | go vet ./... 64 | 65 | .PHONY: test 66 | test: vet test-unit 67 | 68 | .PHONY: test-unit 69 | test-unit: 70 | go test -race -short $(PKGS) -count=1 71 | 72 | .PHONY: container-test 73 | container-test: 74 | $(CONTAINER_ENGINE) build --target test -t $(IMAGE_TEST) -f Dockerfile . 75 | -------------------------------------------------------------------------------- /OWNERS: -------------------------------------------------------------------------------- 1 | approvers: 2 | - geoberle 3 | - janboll 4 | - jfchevrette 5 | - lechuk47 6 | - patjlm 7 | - skryzhny 8 | - steveteahan 9 | - suzana-nesic 10 | - maorfr 11 | - AlexVulaj 12 | - dkeohane 13 | - bergmannf 14 | - iamkirkbater 15 | - mrWinston 16 | - macgregor 17 | - dastergon 18 | reviewers: 19 | - geoberle 20 | - janboll 21 | - jfchevrette 22 | - lechuk47 23 | - patjlm 24 | - skryzhny 25 | - steveteahan 26 | - suzana-nesic 27 | - maorfr 28 | - AlexVulaj 29 | - dkeohane 30 | - bergmannf 31 | - iamkirkbater 32 | - mrWinston 33 | - macgregor 34 | - dastergon 35 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # aws-resource-exporter 2 | 3 | Prometheus exporter for AWS resources 4 | 5 | This was made as a complement to [CloudWatch Exporter](https://github.com/prometheus/cloudwatch_exporter) to get resource information that are useful to keep around as metrics in Prometheus but are out of scope for CloudWatch Exporter. 6 | 7 | ## Included metadata & metrics 8 | 9 | | Service | Metric | Description | 10 | |---------|-----------------------------|-----------------------------------------------------| 11 | | RDS | allocatedstorage | The amount of allocated storage in GB | 12 | | RDS | dbinstanceclass | The DB instance class (type) | 13 | | RDS | dbinstancestatus | The instance status | 14 | | RDS | engineversion | The DB engine type and version | 15 | | RDS | pendingmaintenanceactions | The pending maintenance actions for a RDS instance | 16 | | RDS | logs_amount | The amount of log files present in the RDS Instance | 17 | | RDS | logsstorage_size_bytes | The amount of storage used by the log files nstance | 18 | | VPC | vpcsperregion | Quota and usage of the VPCs per region | 19 | | VPC | subnetspervpc | Quota and usage of subnets per VPC | 20 | | VPC | interfacevpcendpointspervpc | Quota and usage of interface endpoints per VPC | 21 | | VPC | routetablespervpc | Quota and usage of routetables per VPC | 22 | | VPC | routesperroutetable | Quota and usage of the routes per routetable | 23 | | VPC | ipv4blockspervpc | Quota and usage of ipv4 blocks per VPC | 24 | | EC2 | transitgatewaysperregion | Quota and usage of transitgateways per region | 25 | | Route53 | recordsperhostedzone | Quota and usage of resource records per Hosted Zone | 26 | 27 | 28 | ## Running this software 29 | 30 | ### From binaries 31 | 32 | Download the most suitable binary from [the releases tab](https://github.com/app-sre/aws-resource-exporter/releases) 33 | 34 | Then: 35 | 36 | ./aws-resource-exporter 37 | 38 | ### Using the container image 39 | 40 | podman run --rm -d -p 9115:9115 \ 41 | --name aws-resource-exporter \ 42 | --env AWS_ACCESS_KEY_ID=AAA \ 43 | --env AWS_SECRET_ACCESS_KEY=AAA \ 44 | --env AWS_REGION=AAA \ 45 | quay.io/app-sre/aws-resource-exporter:latest 46 | 47 | ## Building the software 48 | 49 | ### Local Build 50 | 51 | make build 52 | 53 | ### Building container image 54 | 55 | make image image-push 56 | 57 | ## Configuration 58 | 59 | AWS credentials can be passed as environment variables `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`. 60 | 61 | Additional configuration can be supplied in a configuration file and might differ between collectors. 62 | 63 | An example file can look like this: 64 | 65 | ```yaml 66 | rds: 67 | enabled: true 68 | regions: 69 | - "us-east-1" 70 | vpc: 71 | enabled: true 72 | regions: 73 | - "us-east-1" 74 | - "eu-central-1" 75 | timeout: 30s 76 | interval: 300s 77 | cache_ttl: 500s 78 | ec2: 79 | enabled: true 80 | regions: 81 | - "us-east-1" 82 | - "eu-central-1" 83 | - "us-west-1" 84 | timeout: 30s 85 | route53: 86 | enabled: true 87 | region: "us-east-1" 88 | timeout: 60s 89 | interval: 90s 90 | ``` 91 | 92 | Some exporters might expose different configuration values, see the example files for possible keys. 93 | 94 | The config file location can be specified using the environment variable `AWS_RESOURCE_EXPORTER_CONFIG_FILE`. 95 | 96 | RDS Logs metrics are requested in parallel to improve the scrappping time. Also, metrics are cached to prevent AWS api rate limits. Parameters to 97 | tweak this behavior. 98 | 99 | - `LOGS_METRICS_WORKERS`: Number of workers to request log metrics in parallel (default=10) 100 | - `LOGS_METRICS_TTL`: Cache TTL for rds logs related metrics (default=300) 101 | 102 | 103 | Defaults: 104 | - interval: 15 seconds 105 | - cache_ttl: 35 seconds 106 | - timeout: 10 seconds 107 | 108 | 109 | To view all available command-line flags, run `./aws-resource-exporter -h`. 110 | 111 | ## License 112 | 113 | Apache License 2.0, see [LICENSE](LICENSE). 114 | -------------------------------------------------------------------------------- /aws-resource-exporter-config.yaml: -------------------------------------------------------------------------------- 1 | rds: 2 | enabled: true 3 | regions: 4 | - "us-east-1" 5 | vpc: 6 | enabled: true 7 | regions: 8 | - "us-east-1" 9 | - "eu-central-1" 10 | - "eu-west-1" 11 | route53: 12 | enabled: true 13 | regions: 14 | - "us-east-1" 15 | ec2: 16 | enabled: true 17 | regions: 18 | - "us-east-1" 19 | - "eu-central-1" 20 | - "us-west-1" 21 | iam: 22 | enabled: true 23 | region: "us-east-1" 24 | -------------------------------------------------------------------------------- /build_deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | DOCKER_CONF="$PWD/.docker" 5 | mkdir -p "$DOCKER_CONF" 6 | docker --config="$DOCKER_CONF" login -u="$QUAY_USER" -p="$QUAY_TOKEN" quay.io 7 | 8 | # build images 9 | make image image-push 10 | -------------------------------------------------------------------------------- /build_tag.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | curl -sL https://git.io/goreleaser | bash 5 | -------------------------------------------------------------------------------- /dashboards/aws-resource-exporter.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: grafana-dashboard-aws-resource-exporter 5 | labels: 6 | grafana_dashboard: "true" 7 | annotations: 8 | grafana-folder: /grafana-dashboard-definitions/AppSRE 9 | data: 10 | aws-resource-exporter.json: |- 11 | { 12 | "annotations": { 13 | "list": [ 14 | { 15 | "builtIn": 1, 16 | "datasource": { 17 | "type": "grafana", 18 | "uid": "-- Grafana --" 19 | }, 20 | "enable": true, 21 | "hide": true, 22 | "iconColor": "rgba(0, 211, 255, 1)", 23 | "name": "Annotations & Alerts", 24 | "target": { 25 | "limit": 100, 26 | "matchAny": false, 27 | "tags": [], 28 | "type": "dashboard" 29 | }, 30 | "type": "dashboard" 31 | } 32 | ] 33 | }, 34 | "editable": true, 35 | "fiscalYearStartMonth": 0, 36 | "graphTooltip": 0, 37 | "links": [], 38 | "liveNow": false, 39 | "panels": [ 40 | { 41 | "collapsed": false, 42 | "gridPos": { 43 | "h": 1, 44 | "w": 24, 45 | "x": 0, 46 | "y": 0 47 | }, 48 | "id": 8, 49 | "panels": [], 50 | "title": "RDS", 51 | "type": "row" 52 | }, 53 | { 54 | "datasource": { 55 | "type": "prometheus", 56 | "uid": "${datasource}" 57 | }, 58 | "description": "Shows the number of RDS Instances categorized by their End-of-Life (EOL) status.", 59 | "fieldConfig": { 60 | "defaults": { 61 | "color": { 62 | "mode": "palette-classic" 63 | }, 64 | "mappings": [], 65 | "thresholds": { 66 | "mode": "absolute", 67 | "steps": [ 68 | { 69 | "color": "green", 70 | "value": null 71 | } 72 | ] 73 | } 74 | }, 75 | "overrides": [ 76 | { 77 | "matcher": { 78 | "id": "byName", 79 | "options": "green" 80 | }, 81 | "properties": [ 82 | { 83 | "id": "displayName", 84 | "value": "Green: >180 days to EOL" 85 | }, 86 | { 87 | "id": "color", 88 | "value": { 89 | "fixedColor": "green", 90 | "mode": "fixed" 91 | } 92 | } 93 | ] 94 | }, 95 | { 96 | "matcher": { 97 | "id": "byName", 98 | "options": "red" 99 | }, 100 | "properties": [ 101 | { 102 | "id": "displayName", 103 | "value": "Red: <90 Days to EOL" 104 | }, 105 | { 106 | "id": "color", 107 | "value": { 108 | "fixedColor": "red", 109 | "mode": "fixed" 110 | } 111 | } 112 | ] 113 | }, 114 | { 115 | "matcher": { 116 | "id": "byName", 117 | "options": "yellow" 118 | }, 119 | "properties": [ 120 | { 121 | "id": "displayName", 122 | "value": "Yellow: <180 Days to EOL" 123 | }, 124 | { 125 | "id": "color", 126 | "value": { 127 | "fixedColor": "yellow", 128 | "mode": "fixed" 129 | } 130 | } 131 | ] 132 | } 133 | ] 134 | }, 135 | "gridPos": { 136 | "h": 7, 137 | "w": 5, 138 | "x": 0, 139 | "y": 1 140 | }, 141 | "id": 6, 142 | "options": { 143 | "displayMode": "gradient", 144 | "minVizHeight": 10, 145 | "minVizWidth": 0, 146 | "orientation": "horizontal", 147 | "reduceOptions": { 148 | "calcs": [ 149 | "lastNotNull" 150 | ], 151 | "fields": "", 152 | "values": false 153 | }, 154 | "showUnfilled": true 155 | }, 156 | "pluginVersion": "9.3.8", 157 | "targets": [ 158 | { 159 | "datasource": { 160 | "type": "prometheus", 161 | "uid": "${datasource}" 162 | }, 163 | "editorMode": "code", 164 | "expr": "count(aws_resources_exporter_rds_eol_info) by (eol_status)", 165 | "legendFormat": "__auto", 166 | "range": true, 167 | "refId": "A" 168 | } 169 | ], 170 | "title": "EOL Status Distribution of RDS Instances", 171 | "transformations": [ 172 | { 173 | "id": "joinByField", 174 | "options": { 175 | "byField": "Time", 176 | "mode": "outer" 177 | } 178 | }, 179 | { 180 | "id": "organize", 181 | "options": { 182 | "excludeByName": {}, 183 | "indexByName": { 184 | "Time": 3, 185 | "green": 2, 186 | "red": 0, 187 | "yellow": 1 188 | }, 189 | "renameByName": {} 190 | } 191 | } 192 | ], 193 | "type": "bargauge" 194 | }, 195 | { 196 | "datasource": { 197 | "type": "prometheus", 198 | "uid": "${datasource}" 199 | }, 200 | "description": "", 201 | "fieldConfig": { 202 | "defaults": { 203 | "color": { 204 | "fixedColor": "text", 205 | "mode": "fixed" 206 | }, 207 | "custom": { 208 | "align": "auto", 209 | "displayMode": "color-text", 210 | "filterable": false, 211 | "inspect": false 212 | }, 213 | "mappings": [ 214 | { 215 | "options": { 216 | "green": { 217 | "color": "green", 218 | "index": 2 219 | }, 220 | "red": { 221 | "color": "red", 222 | "index": 1 223 | }, 224 | "yellow": { 225 | "color": "yellow", 226 | "index": 0 227 | } 228 | }, 229 | "type": "value" 230 | } 231 | ], 232 | "thresholds": { 233 | "mode": "absolute", 234 | "steps": [ 235 | { 236 | "color": "text", 237 | "value": null 238 | } 239 | ] 240 | } 241 | }, 242 | "overrides": [] 243 | }, 244 | "gridPos": { 245 | "h": 7, 246 | "w": 19, 247 | "x": 5, 248 | "y": 1 249 | }, 250 | "id": 10, 251 | "options": { 252 | "footer": { 253 | "enablePagination": false, 254 | "fields": "", 255 | "reducer": [ 256 | "sum" 257 | ], 258 | "show": false 259 | }, 260 | "showHeader": true, 261 | "sortBy": [ 262 | { 263 | "desc": false, 264 | "displayName": "dbinstance_identifier" 265 | } 266 | ] 267 | }, 268 | "pluginVersion": "9.3.8", 269 | "targets": [ 270 | { 271 | "datasource": { 272 | "type": "prometheus", 273 | "uid": "${datasource}" 274 | }, 275 | "editorMode": "code", 276 | "exemplar": false, 277 | "expr": "aws_resources_exporter_rds_eol_info", 278 | "format": "table", 279 | "hide": false, 280 | "instant": false, 281 | "legendFormat": "__auto", 282 | "range": true, 283 | "refId": "A" 284 | } 285 | ], 286 | "title": "RDS Version and EOL Status", 287 | "transformations": [ 288 | { 289 | "id": "groupBy", 290 | "options": { 291 | "fields": { 292 | "dbinstance_identifier": { 293 | "aggregations": [], 294 | "operation": "groupby" 295 | }, 296 | "engine": { 297 | "aggregations": [], 298 | "operation": "groupby" 299 | }, 300 | "engine_version": { 301 | "aggregations": [], 302 | "operation": "groupby" 303 | }, 304 | "eol_date": { 305 | "aggregations": [], 306 | "operation": "groupby" 307 | }, 308 | "eol_status": { 309 | "aggregations": [], 310 | "operation": "groupby" 311 | } 312 | } 313 | } 314 | }, 315 | { 316 | "id": "organize", 317 | "options": { 318 | "excludeByName": {}, 319 | "indexByName": {}, 320 | "renameByName": { 321 | "dbinstance_identifier": "Instance", 322 | "engine": "Engine", 323 | "engine_version": "Engine Version", 324 | "eol_date": "EOL Date", 325 | "eol_status": "EOL Status" 326 | } 327 | } 328 | } 329 | ], 330 | "type": "table" 331 | }, 332 | { 333 | "datasource": { 334 | "type": "prometheus", 335 | "uid": "${datasource}" 336 | }, 337 | "description": "List of all RDS EOL alerts currently firing", 338 | "gridPos": { 339 | "h": 8, 340 | "w": 24, 341 | "x": 0, 342 | "y": 8 343 | }, 344 | "id": 12, 345 | "options": { 346 | "alertInstanceLabelFilter": "", 347 | "alertName": "RDSEOL", 348 | "dashboardAlerts": false, 349 | "groupBy": [], 350 | "groupMode": "default", 351 | "maxItems": 100, 352 | "sortOrder": 1, 353 | "stateFilter": { 354 | "error": false, 355 | "firing": true, 356 | "noData": false, 357 | "normal": false, 358 | "pending": false 359 | }, 360 | "viewMode": "list" 361 | }, 362 | "title": "RDS EOL Alerts", 363 | "type": "alertlist" 364 | }, 365 | { 366 | "collapsed": false, 367 | "gridPos": { 368 | "h": 1, 369 | "w": 24, 370 | "x": 0, 371 | "y": 16 372 | }, 373 | "id": 13, 374 | "panels": [], 375 | "title": "MSK", 376 | "type": "row" 377 | }, 378 | { 379 | "datasource": { 380 | "type": "prometheus", 381 | "uid": "${datasource}" 382 | }, 383 | "description": "Shows the number of MSK clusters categorized by their End-of-Life (EOL) status.", 384 | "fieldConfig": { 385 | "defaults": { 386 | "color": { 387 | "mode": "palette-classic" 388 | }, 389 | "mappings": [], 390 | "thresholds": { 391 | "mode": "absolute", 392 | "steps": [ 393 | { 394 | "color": "green", 395 | "value": null 396 | } 397 | ] 398 | } 399 | }, 400 | "overrides": [ 401 | { 402 | "matcher": { 403 | "id": "byName", 404 | "options": "green" 405 | }, 406 | "properties": [ 407 | { 408 | "id": "displayName", 409 | "value": "Green: >180 days to EOL" 410 | }, 411 | { 412 | "id": "color", 413 | "value": { 414 | "fixedColor": "green", 415 | "mode": "fixed" 416 | } 417 | } 418 | ] 419 | }, 420 | { 421 | "matcher": { 422 | "id": "byName", 423 | "options": "red" 424 | }, 425 | "properties": [ 426 | { 427 | "id": "displayName", 428 | "value": "Red: <90 Days to EOL" 429 | }, 430 | { 431 | "id": "color", 432 | "value": { 433 | "fixedColor": "red", 434 | "mode": "fixed" 435 | } 436 | } 437 | ] 438 | }, 439 | { 440 | "matcher": { 441 | "id": "byName", 442 | "options": "yellow" 443 | }, 444 | "properties": [ 445 | { 446 | "id": "displayName", 447 | "value": "Yellow: <180 Days to EOL" 448 | }, 449 | { 450 | "id": "color", 451 | "value": { 452 | "fixedColor": "yellow", 453 | "mode": "fixed" 454 | } 455 | } 456 | ] 457 | }, 458 | { 459 | "matcher": { 460 | "id": "byName", 461 | "options": "unknown" 462 | }, 463 | "properties": [ 464 | { 465 | "id": "displayName", 466 | "value": "Unknown: no EOL for version" 467 | }, 468 | { 469 | "id": "color", 470 | "value": { 471 | "fixedColor": "blue", 472 | "mode": "fixed" 473 | } 474 | } 475 | ] 476 | } 477 | ] 478 | }, 479 | "gridPos": { 480 | "h": 7, 481 | "w": 5, 482 | "x": 0, 483 | "y": 17 484 | }, 485 | "id": 14, 486 | "options": { 487 | "displayMode": "gradient", 488 | "maxVizHeight": 300, 489 | "minVizHeight": 10, 490 | "minVizWidth": 0, 491 | "namePlacement": "auto", 492 | "orientation": "horizontal", 493 | "reduceOptions": { 494 | "calcs": [ 495 | "lastNotNull" 496 | ], 497 | "fields": "", 498 | "values": false 499 | }, 500 | "showUnfilled": true, 501 | "sizing": "auto", 502 | "valueMode": "color" 503 | }, 504 | "pluginVersion": "10.4.1", 505 | "targets": [ 506 | { 507 | "datasource": { 508 | "type": "prometheus", 509 | "uid": "${datasource}" 510 | }, 511 | "editorMode": "code", 512 | "expr": "count(aws_resources_exporter_msk_eol_info) by (eol_status)", 513 | "legendFormat": "__auto", 514 | "range": true, 515 | "refId": "A" 516 | } 517 | ], 518 | "title": "EOL Status Distribution of MSK Clusters", 519 | "transformations": [ 520 | { 521 | "id": "joinByField", 522 | "options": { 523 | "byField": "Time", 524 | "mode": "outer" 525 | } 526 | }, 527 | { 528 | "id": "organize", 529 | "options": { 530 | "excludeByName": {}, 531 | "indexByName": { 532 | "Time": 3, 533 | "green": 2, 534 | "red": 0, 535 | "yellow": 1 536 | }, 537 | "renameByName": {} 538 | } 539 | } 540 | ], 541 | "type": "bargauge" 542 | }, 543 | { 544 | "datasource": { 545 | "type": "prometheus", 546 | "uid": "${datasource}" 547 | }, 548 | "description": "", 549 | "fieldConfig": { 550 | "defaults": { 551 | "color": { 552 | "fixedColor": "text", 553 | "mode": "fixed" 554 | }, 555 | "custom": { 556 | "align": "auto", 557 | "cellOptions": { 558 | "type": "color-text" 559 | }, 560 | "filterable": false, 561 | "inspect": false 562 | }, 563 | "mappings": [ 564 | { 565 | "options": { 566 | "green": { 567 | "color": "green", 568 | "index": 2 569 | }, 570 | "red": { 571 | "color": "red", 572 | "index": 1 573 | }, 574 | "unknown": { 575 | "color": "blue", 576 | "index": 3 577 | }, 578 | "yellow": { 579 | "color": "yellow", 580 | "index": 0 581 | } 582 | }, 583 | "type": "value" 584 | } 585 | ], 586 | "thresholds": { 587 | "mode": "absolute", 588 | "steps": [ 589 | { 590 | "color": "text", 591 | "value": null 592 | } 593 | ] 594 | } 595 | }, 596 | "overrides": [] 597 | }, 598 | "gridPos": { 599 | "h": 7, 600 | "w": 19, 601 | "x": 5, 602 | "y": 17 603 | }, 604 | "id": 15, 605 | "options": { 606 | "cellHeight": "sm", 607 | "footer": { 608 | "countRows": false, 609 | "enablePagination": false, 610 | "fields": "", 611 | "reducer": [ 612 | "sum" 613 | ], 614 | "show": false 615 | }, 616 | "showHeader": true, 617 | "sortBy": [ 618 | { 619 | "desc": false, 620 | "displayName": "dbinstance_identifier" 621 | } 622 | ] 623 | }, 624 | "pluginVersion": "10.4.1", 625 | "targets": [ 626 | { 627 | "datasource": { 628 | "type": "prometheus", 629 | "uid": "${datasource}" 630 | }, 631 | "editorMode": "code", 632 | "exemplar": false, 633 | "expr": "aws_resources_exporter_msk_eol_info", 634 | "format": "table", 635 | "hide": false, 636 | "instant": false, 637 | "legendFormat": "__auto", 638 | "range": true, 639 | "refId": "A" 640 | } 641 | ], 642 | "title": "MSK Version and EOL Status", 643 | "transformations": [ 644 | { 645 | "id": "groupBy", 646 | "options": { 647 | "fields": { 648 | "cluster_name": { 649 | "aggregations": [], 650 | "operation": "groupby" 651 | }, 652 | "dbinstance_identifier": { 653 | "aggregations": [], 654 | "operation": "groupby" 655 | }, 656 | "engine": { 657 | "aggregations": [], 658 | "operation": "groupby" 659 | }, 660 | "engine_version": { 661 | "aggregations": [], 662 | "operation": "groupby" 663 | }, 664 | "eol_date": { 665 | "aggregations": [], 666 | "operation": "groupby" 667 | }, 668 | "eol_status": { 669 | "aggregations": [], 670 | "operation": "groupby" 671 | }, 672 | "msk_version": { 673 | "aggregations": [], 674 | "operation": "groupby" 675 | } 676 | } 677 | } 678 | }, 679 | { 680 | "id": "organize", 681 | "options": { 682 | "excludeByName": {}, 683 | "includeByName": {}, 684 | "indexByName": { 685 | "cluster_name": 0, 686 | "eol_date": 2, 687 | "eol_status": 3, 688 | "msk_version": 1 689 | }, 690 | "renameByName": { 691 | "cluster_name": "Cluster", 692 | "dbinstance_identifier": "Instance", 693 | "engine": "Engine", 694 | "engine_version": "Engine Version", 695 | "eol_date": "EOL Date", 696 | "eol_status": "EOL Status", 697 | "msk_version": "Version" 698 | } 699 | } 700 | } 701 | ], 702 | "type": "table" 703 | }, 704 | { 705 | "datasource": { 706 | "type": "prometheus", 707 | "uid": "${datasource}" 708 | }, 709 | "description": "List of all MSK EOL alerts currently firing", 710 | "gridPos": { 711 | "h": 8, 712 | "w": 24, 713 | "x": 0, 714 | "y": 24 715 | }, 716 | "id": 16, 717 | "options": { 718 | "alertInstanceLabelFilter": "", 719 | "alertName": "MSKEOL", 720 | "dashboardAlerts": false, 721 | "groupBy": [], 722 | "groupMode": "default", 723 | "maxItems": 100, 724 | "sortOrder": 1, 725 | "stateFilter": { 726 | "error": false, 727 | "firing": true, 728 | "noData": false, 729 | "normal": false, 730 | "pending": false 731 | }, 732 | "viewMode": "list" 733 | }, 734 | "title": "MSK EOL Alerts", 735 | "type": "alertlist" 736 | }, 737 | { 738 | "gridPos": { 739 | "h": 1, 740 | "w": 24, 741 | "x": 0, 742 | "y": 32 743 | }, 744 | "id": 4, 745 | "title": "Route53", 746 | "type": "row" 747 | }, 748 | { 749 | "datasource": { 750 | "type": "prometheus", 751 | "uid": "${datasource}" 752 | }, 753 | "fieldConfig": { 754 | "defaults": { 755 | "color": { 756 | "mode": "palette-classic" 757 | }, 758 | "custom": { 759 | "axisCenteredZero": false, 760 | "axisColorMode": "text", 761 | "axisLabel": "", 762 | "axisPlacement": "auto", 763 | "barAlignment": 0, 764 | "drawStyle": "line", 765 | "fillOpacity": 0, 766 | "gradientMode": "none", 767 | "hideFrom": { 768 | "legend": false, 769 | "tooltip": false, 770 | "viz": false 771 | }, 772 | "lineInterpolation": "linear", 773 | "lineWidth": 1, 774 | "pointSize": 5, 775 | "scaleDistribution": { 776 | "type": "linear" 777 | }, 778 | "showPoints": "auto", 779 | "spanNulls": false, 780 | "stacking": { 781 | "group": "A", 782 | "mode": "none" 783 | }, 784 | "thresholdsStyle": { 785 | "mode": "off" 786 | } 787 | }, 788 | "mappings": [], 789 | "thresholds": { 790 | "mode": "absolute", 791 | "steps": [ 792 | { 793 | "color": "green", 794 | "value": null 795 | }, 796 | { 797 | "color": "red", 798 | "value": 80 799 | } 800 | ] 801 | } 802 | }, 803 | "overrides": [] 804 | }, 805 | "gridPos": { 806 | "h": 8, 807 | "w": 24, 808 | "x": 0, 809 | "y": 33 810 | }, 811 | "id": 2, 812 | "options": { 813 | "legend": { 814 | "calcs": [], 815 | "displayMode": "list", 816 | "placement": "bottom", 817 | "showLegend": true 818 | }, 819 | "tooltip": { 820 | "mode": "single", 821 | "sort": "none" 822 | } 823 | }, 824 | "targets": [ 825 | { 826 | "datasource": { 827 | "type": "prometheus", 828 | "uid": "${datasource}" 829 | }, 830 | "editorMode": "builder", 831 | "expr": "aws_resources_exporter_route53_recordsperhostedzone_quota", 832 | "legendFormat": "{{hostedzonename}}-quota", 833 | "range": true, 834 | "refId": "A" 835 | }, 836 | { 837 | "datasource": { 838 | "type": "prometheus", 839 | "uid": "${datasource}" 840 | }, 841 | "editorMode": "builder", 842 | "expr": "aws_resources_exporter_route53_recordsperhostedzone_total", 843 | "hide": false, 844 | "legendFormat": "{{hostedzonename}}-total", 845 | "range": true, 846 | "refId": "B" 847 | } 848 | ], 849 | "title": "Route53", 850 | "type": "timeseries" 851 | } 852 | ], 853 | "schemaVersion": 37, 854 | "style": "dark", 855 | "tags": [], 856 | "templating": { 857 | "list": [ 858 | { 859 | "allValue": "All", 860 | "current": { 861 | "selected": false, 862 | "text": "appsrep11ue1-prometheus", 863 | "value": "appsrep11ue1-prometheus" 864 | }, 865 | "hide": 0, 866 | "includeAll": false, 867 | "multi": false, 868 | "name": "datasource", 869 | "options": [], 870 | "query": "prometheus", 871 | "queryValue": "", 872 | "refresh": 1, 873 | "regex": "appsrep11ue1-prometheus|appsres11ue1-prometheus", 874 | "skipUrlSync": false, 875 | "type": "datasource" 876 | } 877 | ] 878 | }, 879 | "time": { 880 | "from": "now-6h", 881 | "to": "now" 882 | }, 883 | "timepicker": {}, 884 | "timezone": "", 885 | "title": "AWS Resource Exporter", 886 | "uid": "4EIL66R4k", 887 | "version": 7, 888 | "weekStart": "" 889 | } 890 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/app-sre/aws-resource-exporter 2 | 3 | go 1.22 4 | 5 | require ( 6 | github.com/alecthomas/kingpin/v2 v2.4.0 7 | github.com/aws/aws-sdk-go v1.55.5 8 | github.com/go-kit/log v0.2.1 9 | github.com/golang/mock v1.6.0 10 | github.com/prometheus/client_golang v1.20.5 11 | github.com/prometheus/client_model v0.6.1 12 | github.com/prometheus/common v0.60.0 13 | github.com/stretchr/testify v1.9.0 14 | gopkg.in/yaml.v3 v3.0.1 15 | ) 16 | 17 | require ( 18 | github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b // indirect 19 | github.com/beorn7/perks v1.0.1 // indirect 20 | github.com/cespare/xxhash/v2 v2.3.0 // indirect 21 | github.com/davecgh/go-spew v1.1.1 // indirect 22 | github.com/go-logfmt/logfmt v0.6.0 // indirect 23 | github.com/jmespath/go-jmespath v0.4.0 // indirect 24 | github.com/klauspost/compress v1.17.11 // indirect 25 | github.com/kr/text v0.2.0 // indirect 26 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect 27 | github.com/pmezard/go-difflib v1.0.0 // indirect 28 | github.com/prometheus/procfs v0.15.1 // indirect 29 | github.com/xhit/go-str2duration/v2 v2.1.0 // indirect 30 | golang.org/x/sys v0.26.0 // indirect 31 | google.golang.org/protobuf v1.35.1 // indirect 32 | ) 33 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/alecthomas/kingpin/v2 v2.4.0 h1:f48lwail6p8zpO1bC4TxtqACaGqHYA22qkHjHpqDjYY= 2 | github.com/alecthomas/kingpin/v2 v2.4.0/go.mod h1:0gyi0zQnjuFk8xrkNKamJoyUo382HRL7ATRpFZCw6tE= 3 | github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b h1:mimo19zliBX/vSQ6PWWSL9lK8qwHozUj03+zLoEB8O0= 4 | github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b/go.mod h1:fvzegU4vN3H1qMT+8wDmzjAcDONcgo2/SZ/TyfdUOFs= 5 | github.com/aws/aws-sdk-go v1.55.5 h1:KKUZBfBoyqy5d3swXyiC7Q76ic40rYcbqH7qjh59kzU= 6 | github.com/aws/aws-sdk-go v1.55.5/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU= 7 | github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= 8 | github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= 9 | github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= 10 | github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= 11 | github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= 12 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 13 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 14 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 15 | github.com/go-kit/log v0.2.1 h1:MRVx0/zhvdseW+Gza6N9rVzU/IVzaeE1SFI4raAhmBU= 16 | github.com/go-kit/log v0.2.1/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0= 17 | github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi4= 18 | github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= 19 | github.com/golang/mock v1.6.0 h1:ErTB+efbowRARo13NNdxyJji2egdxLGQhRaY+DUumQc= 20 | github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs= 21 | github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= 22 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 23 | github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= 24 | github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= 25 | github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= 26 | github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= 27 | github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= 28 | github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= 29 | github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= 30 | github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= 31 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 32 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 33 | github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= 34 | github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= 35 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= 36 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= 37 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 38 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 39 | github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= 40 | github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= 41 | github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= 42 | github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= 43 | github.com/prometheus/common v0.60.0 h1:+V9PAREWNvJMAuJ1x1BaWl9dewMW4YrHZQbx0sJNllA= 44 | github.com/prometheus/common v0.60.0/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= 45 | github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= 46 | github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= 47 | github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= 48 | github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= 49 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 50 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 51 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 52 | github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= 53 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 54 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 55 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= 56 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= 57 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 58 | github.com/xhit/go-str2duration/v2 v2.1.0 h1:lxklc02Drh6ynqX+DdPyp5pCKLUQpRT8bp8Ydu2Bstc= 59 | github.com/xhit/go-str2duration/v2 v2.1.0/go.mod h1:ohY8p+0f07DiV6Em5LKB0s2YpLtXVyJfNt1+BlmyAsU= 60 | github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= 61 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 62 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 63 | golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 64 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 65 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 66 | golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= 67 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 68 | golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 69 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 70 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 71 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 72 | golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 73 | golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 74 | golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= 75 | golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 76 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 77 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 78 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 79 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 80 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 81 | golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= 82 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 83 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 84 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 85 | google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= 86 | google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= 87 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 88 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 89 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 90 | gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 91 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= 92 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 93 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 94 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 95 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 96 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "net/http" 5 | "os" 6 | "os/signal" 7 | "strings" 8 | "syscall" 9 | "time" 10 | 11 | "github.com/alecthomas/kingpin/v2" 12 | "github.com/app-sre/aws-resource-exporter/pkg" 13 | "github.com/app-sre/aws-resource-exporter/pkg/awsclient" 14 | "github.com/aws/aws-sdk-go/aws" 15 | "github.com/aws/aws-sdk-go/aws/session" 16 | "github.com/aws/aws-sdk-go/service/sts" 17 | "github.com/go-kit/log" 18 | "github.com/go-kit/log/level" 19 | "github.com/prometheus/client_golang/prometheus" 20 | "github.com/prometheus/client_golang/prometheus/promhttp" 21 | "github.com/prometheus/common/promlog" 22 | "github.com/prometheus/common/promlog/flag" 23 | "github.com/prometheus/common/version" 24 | ) 25 | 26 | const ( 27 | namespace = "aws_resources_exporter" 28 | DEFAULT_TIMEOUT time.Duration = 30 * time.Second 29 | CONFIG_FILE_PATH = "./aws-resource-exporter-config.yaml" 30 | ) 31 | 32 | var ( 33 | listenAddress = kingpin.Flag("web.listen-address", "The address to listen on for HTTP requests.").Default(":9115").String() 34 | metricsPath = kingpin.Flag("web.telemetry-path", "Path under which to expose metrics.").Default("/metrics").String() 35 | ) 36 | 37 | func main() { 38 | os.Exit(run()) 39 | } 40 | 41 | func getAwsAccountNumber(logger log.Logger, sess *session.Session) (string, error) { 42 | stsClient := sts.New(sess) 43 | identityOutput, err := stsClient.GetCallerIdentity(&sts.GetCallerIdentityInput{}) 44 | if err != nil { 45 | level.Error(logger).Log("msg", "Could not retrieve caller identity of the aws account", "err", err) 46 | return "", err 47 | } 48 | return *identityOutput.Account, nil 49 | } 50 | 51 | func setupCollectors(logger log.Logger, configFile string) ([]prometheus.Collector, error) { 52 | var collectors []prometheus.Collector 53 | config, err := pkg.LoadExporterConfiguration(logger, configFile) 54 | if err != nil { 55 | return nil, err 56 | } 57 | level.Info(logger).Log("msg", "Configuring vpc with regions", "regions", strings.Join(config.VpcConfig.Regions, ",")) 58 | level.Info(logger).Log("msg", "Configuring rds with regions", "regions", strings.Join(config.RdsConfig.Regions, ",")) 59 | level.Info(logger).Log("msg", "Configuring ec2 with regions", "regions", strings.Join(config.EC2Config.Regions, ",")) 60 | level.Info(logger).Log("msg", "Configuring route53 with region", "region", config.Route53Config.Region) 61 | level.Info(logger).Log("msg", "Configuring elasticache with regions", "regions", strings.Join(config.ElastiCacheConfig.Regions, ",")) 62 | level.Info(logger).Log("msg", "Configuring msk with regions", "regions", strings.Join(config.MskConfig.Regions, ",")) 63 | level.Info(logger).Log("msg", "Will VPC metrics be gathered?", "vpc-enabled", config.VpcConfig.Enabled) 64 | level.Info(logger).Log("msg", "Will IAM metrics be gathered?", "iam-enabled", config.IamConfig.Enabled) 65 | 66 | sessionRegion := "us-east-1" 67 | if sr := os.Getenv("AWS_REGION"); sr != "" { 68 | sessionRegion = sr 69 | } 70 | 71 | // Create a single session here, because we need the accountid, before we create the other configs 72 | awsConfig := aws.NewConfig().WithRegion(sessionRegion) 73 | sess := session.Must(session.NewSession(awsConfig)) 74 | awsAccountId, err := getAwsAccountNumber(logger, sess) 75 | if err != nil { 76 | return collectors, err 77 | } 78 | var vpcSessions []*session.Session 79 | if config.VpcConfig.Enabled { 80 | for _, region := range config.VpcConfig.Regions { 81 | config := aws.NewConfig().WithRegion(region) 82 | sess := session.Must(session.NewSession(config)) 83 | vpcSessions = append(vpcSessions, sess) 84 | } 85 | vpcExporter := pkg.NewVPCExporter(vpcSessions, logger, config.VpcConfig, awsAccountId) 86 | collectors = append(collectors, vpcExporter) 87 | go vpcExporter.CollectLoop() 88 | } 89 | level.Info(logger).Log("msg", "Will RDS metrics be gathered?", "rds-enabled", config.RdsConfig.Enabled) 90 | var rdsSessions []*session.Session 91 | if config.RdsConfig.Enabled { 92 | for _, region := range config.RdsConfig.Regions { 93 | config := aws.NewConfig().WithRegion(region) 94 | sess := session.Must(session.NewSession(config)) 95 | rdsSessions = append(rdsSessions, sess) 96 | } 97 | rdsExporter := pkg.NewRDSExporter(rdsSessions, logger, config.RdsConfig, awsAccountId) 98 | collectors = append(collectors, rdsExporter) 99 | go rdsExporter.CollectLoop() 100 | } 101 | level.Info(logger).Log("msg", "Will EC2 metrics be gathered?", "ec2-enabled", config.EC2Config.Enabled) 102 | var ec2Sessions []*session.Session 103 | if config.EC2Config.Enabled { 104 | for _, region := range config.EC2Config.Regions { 105 | config := aws.NewConfig().WithRegion(region) 106 | sess := session.Must(session.NewSession(config)) 107 | ec2Sessions = append(ec2Sessions, sess) 108 | } 109 | ec2Exporter := pkg.NewEC2Exporter(ec2Sessions, logger, config.EC2Config, awsAccountId) 110 | collectors = append(collectors, ec2Exporter) 111 | go ec2Exporter.CollectLoop() 112 | } 113 | level.Info(logger).Log("msg", "Will Route53 metrics be gathered?", "route53-enabled", config.Route53Config.Enabled) 114 | if config.Route53Config.Enabled { 115 | awsConfig := aws.NewConfig().WithRegion(config.Route53Config.Region) 116 | sess := session.Must(session.NewSession(awsConfig)) 117 | r53Exporter := pkg.NewRoute53Exporter(sess, logger, config.Route53Config, awsAccountId) 118 | collectors = append(collectors, r53Exporter) 119 | go r53Exporter.CollectLoop() 120 | } 121 | level.Info(logger).Log("msg", "Will ElastiCache metrics be gathered?", "elasticache-enabled", config.ElastiCacheConfig.Enabled) 122 | var elasticacheSessions []*session.Session 123 | if config.ElastiCacheConfig.Enabled { 124 | for _, region := range config.ElastiCacheConfig.Regions { 125 | config := aws.NewConfig().WithRegion(region) 126 | sess := session.Must(session.NewSession(config)) 127 | elasticacheSessions = append(elasticacheSessions, sess) 128 | } 129 | elasticacheExporter := pkg.NewElastiCacheExporter(elasticacheSessions, logger, config.ElastiCacheConfig, awsAccountId) 130 | collectors = append(collectors, elasticacheExporter) 131 | go elasticacheExporter.CollectLoop() 132 | } 133 | level.Info(logger).Log("msg", "Will MSK metrics be gathered?", "msk-enabled", config.MskConfig.Enabled) 134 | var mskSessions []*session.Session 135 | if config.MskConfig.Enabled { 136 | for _, region := range config.MskConfig.Regions { 137 | config := aws.NewConfig().WithRegion(region) 138 | sess := session.Must(session.NewSession(config)) 139 | mskSessions = append(mskSessions, sess) 140 | } 141 | mskExporter := pkg.NewMSKExporter(mskSessions, logger, config.MskConfig, awsAccountId) 142 | collectors = append(collectors, mskExporter) 143 | go mskExporter.CollectLoop() 144 | } 145 | level.Info(logger).Log("msg", "Will IAM metrics be gathered?", "iam-enabled", config.IamConfig.Enabled) 146 | if config.IamConfig.Enabled { 147 | awsConfig := aws.NewConfig().WithRegion(config.IamConfig.Region) // IAM is global, this region just for AWS SDK initialization 148 | sess := session.Must(session.NewSession(awsConfig)) 149 | iamExporter := pkg.NewIAMExporter(sess, logger, config.IamConfig, awsAccountId) 150 | collectors = append(collectors, iamExporter) 151 | go iamExporter.CollectLoop() 152 | } 153 | 154 | return collectors, nil 155 | } 156 | 157 | func run() int { 158 | promlogConfig := &promlog.Config{} 159 | flag.AddFlags(kingpin.CommandLine, promlogConfig) 160 | kingpin.Version(version.Print(namespace)) 161 | kingpin.HelpFlag.Short('h') 162 | kingpin.Parse() 163 | logger := promlog.New(promlogConfig) 164 | 165 | level.Info(logger).Log("msg", "Starting"+namespace, "version", version.Info()) 166 | level.Info(logger).Log("msg", "Build context", version.BuildContext()) 167 | 168 | awsclient.AwsExporterMetrics = awsclient.NewExporterMetrics(namespace) 169 | 170 | var configFile string 171 | if path := os.Getenv("AWS_RESOURCE_EXPORTER_CONFIG_FILE"); path != "" { 172 | configFile = path 173 | } else { 174 | configFile = CONFIG_FILE_PATH 175 | } 176 | cs, err := setupCollectors(logger, configFile) 177 | if err != nil { 178 | level.Error(logger).Log("msg", "Could not load configuration file", "err", err) 179 | return 1 180 | } 181 | collectors := append(cs, awsclient.AwsExporterMetrics) 182 | prometheus.MustRegister( 183 | collectors..., 184 | ) 185 | 186 | http.Handle(*metricsPath, promhttp.Handler()) 187 | http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { 188 | w.Write([]byte(` 189 | AWS Resources Exporter 190 | 191 |

AWS Resources Exporter

192 |

Metrics

193 | 194 | `)) 195 | }) 196 | 197 | srv := http.Server{Addr: *listenAddress} 198 | srvc := make(chan struct{}) 199 | term := make(chan os.Signal, 1) 200 | signal.Notify(term, os.Interrupt, syscall.SIGTERM) 201 | 202 | go func() { 203 | level.Info(logger).Log("msg", "Starting HTTP server", "address", *listenAddress) 204 | if err := srv.ListenAndServe(); err != http.ErrServerClosed { 205 | level.Error(logger).Log("msg", "Error starting HTTP server", "err", err) 206 | close(srvc) 207 | } 208 | }() 209 | 210 | for { 211 | select { 212 | case <-term: 213 | level.Info(logger).Log("msg", "Received SIGTERM, exiting gracefully...") 214 | return 0 215 | case <-srvc: 216 | return 1 217 | } 218 | } 219 | } 220 | -------------------------------------------------------------------------------- /openshift/acceptance-job.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Template 4 | metadata: 5 | name: ${EXPORTER_NAME}-acceptance-tests 6 | objects: 7 | - apiVersion: v1 8 | kind: ServiceAccount 9 | metadata: 10 | name: ${EXPORTER_NAME}-acceptance-tests 11 | - apiVersion: batch/v1 12 | kind: Job 13 | metadata: 14 | annotations: 15 | ignore-check.kube-linter.io/unset-cpu-requirements: "no cpu limits" 16 | name: ${EXPORTER_NAME}-${IMAGE_TAG}-tests 17 | spec: 18 | backoffLimit: 5 19 | template: 20 | spec: 21 | restartPolicy: Never 22 | serviceAccountName: ${EXPORTER_NAME}-acceptance-tests 23 | containers: 24 | - image: ${TEST_IMAGE}:${TEST_IMAGE_TAG} 25 | imagePullPolicy: Always 26 | name: acceptance-tests 27 | env: 28 | - name: METRICS_URL 29 | value: ${METRICS_URL} 30 | - name: METRICS_NAMES 31 | value: '${METRICS_NAMES}' 32 | - name: METRICS_TIMEOUT 33 | value: '${METRICS_TIMEOUT}' 34 | - name: EXPORTER_NAME 35 | value: ${EXPORTER_NAME} 36 | - name: PUSHGATEWAY_URL 37 | valueFrom: 38 | secretKeyRef: 39 | name: ${PUSHGATEWAY_SECRET} 40 | key: ${PUSHGATEWAY_SECRET_URL_KEY} 41 | - name: PUSHGATEWAY_USERNAME 42 | valueFrom: 43 | secretKeyRef: 44 | name: ${PUSHGATEWAY_SECRET} 45 | key: ${PUSHGATEWAY_SECRET_USERNAME_KEY} 46 | - name: PUSHGATEWAY_PASSWORD 47 | valueFrom: 48 | secretKeyRef: 49 | name: ${PUSHGATEWAY_SECRET} 50 | key: ${PUSHGATEWAY_SECRET_PASSWORD_KEY} 51 | resources: 52 | requests: 53 | memory: ${MEMORY_REQUESTS} 54 | cpu: ${CPU_REQUESTS} 55 | limits: 56 | memory: ${MEMORY_LIMIT} 57 | parameters: 58 | - name: TEST_IMAGE 59 | value: quay.io/redhat-services-prod/app-sre-tenant/prometheus-exporter-acceptance-tests-main/prometheus-exporter-acceptance-tests-main 60 | - name: TEST_IMAGE_TAG 61 | value: latest 62 | - name: IMAGE_TAG 63 | description: tag of the exporter image to test 64 | required: true 65 | - name: METRICS_URL 66 | required: true 67 | - name: METRICS_NAMES 68 | required: true 69 | - name: METRICS_TIMEOUT 70 | value: "30" 71 | - name: EXPORTER_NAME 72 | required: true 73 | - name: PUSHGATEWAY_SECRET 74 | required: true 75 | - name: PUSHGATEWAY_SECRET_URL_KEY 76 | value: server 77 | - name: PUSHGATEWAY_SECRET_USERNAME_KEY 78 | value: username 79 | - name: PUSHGATEWAY_SECRET_PASSWORD_KEY 80 | value: password 81 | - name: MEMORY_REQUESTS 82 | value: 128Mi 83 | - name: MEMORY_LIMIT 84 | value: 512Mi 85 | - name: CPU_REQUESTS 86 | value: 100m 87 | -------------------------------------------------------------------------------- /openshift/aws-resource-exporter.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Template 3 | metadata: 4 | name: aws-resource-exporter 5 | objects: 6 | - apiVersion: v1 7 | kind: ServiceAccount 8 | metadata: 9 | name: ${NAME} 10 | - apiVersion: apps/v1 11 | kind: Deployment 12 | metadata: 13 | annotations: 14 | ignore-check.kube-linter.io/minimum-three-replicas: "multiple replicas is multiple cost" 15 | ignore-check.kube-linter.io/unset-cpu-requirements: "no cpu limits" 16 | labels: 17 | app: ${NAME} 18 | name: ${NAME} 19 | spec: 20 | replicas: ${{REPLICAS}} 21 | selector: 22 | matchLabels: 23 | app: ${NAME} 24 | template: 25 | metadata: 26 | labels: 27 | app: ${NAME} 28 | spec: 29 | serviceAccountName: ${NAME} 30 | volumes: 31 | - name: exporter-configuration 32 | configMap: 33 | name: ${CONFIGMAP_NAME} 34 | containers: 35 | - name: aws-resource-exporter 36 | image: ${IMAGE}:${IMAGE_TAG} 37 | ports: 38 | - containerPort: 9115 39 | name: http 40 | protocol: TCP 41 | env: 42 | - name: AWS_ACCESS_KEY_ID 43 | valueFrom: 44 | secretKeyRef: 45 | name: ${SECRET_NAME} 46 | key: aws_access_key_id 47 | - name: AWS_SECRET_ACCESS_KEY 48 | valueFrom: 49 | secretKeyRef: 50 | name: ${SECRET_NAME} 51 | key: aws_secret_access_key 52 | - name: AWS_REGION 53 | value: ${AWS_REGION} 54 | - name: AWS_RESOURCE_EXPORTER_CONFIG_FILE 55 | value: /etc/aws-resource-exporter/aws-resource-exporter-config.yaml 56 | volumeMounts: 57 | - name: exporter-configuration 58 | mountPath: /etc/aws-resource-exporter/ 59 | resources: 60 | limits: 61 | memory: ${MEMORY_LIMITS} 62 | requests: 63 | cpu: ${CPU_REQUESTS} 64 | memory: ${MEMORY_REQUESTS} 65 | readinessProbe: 66 | failureThreshold: 3 67 | periodSeconds: 10 68 | successThreshold: 1 69 | httpGet: 70 | path: / 71 | port: http 72 | scheme: HTTP 73 | timeoutSeconds: 1 74 | initialDelaySeconds: 10 75 | livenessProbe: 76 | failureThreshold: 3 77 | periodSeconds: 10 78 | successThreshold: 1 79 | httpGet: 80 | path: / 81 | port: http 82 | scheme: HTTP 83 | timeoutSeconds: 1 84 | initialDelaySeconds: 10 85 | - apiVersion: v1 86 | kind: Service 87 | metadata: 88 | labels: 89 | app: ${NAME} 90 | name: ${NAME} 91 | spec: 92 | selector: 93 | app: ${NAME} 94 | ports: 95 | - name: http 96 | protocol: TCP 97 | port: 9115 98 | targetPort: 9115 99 | type: ClusterIP 100 | - apiVersion: v1 101 | kind: ConfigMap 102 | metadata: 103 | annotations: 104 | qontract.recycle: "true" 105 | labels: 106 | app: ${NAME} 107 | name: ${CONFIGMAP_NAME} 108 | data: 109 | aws-resource-exporter-config.yaml: ${AWS_RESOURCE_EXPORTER_CONFIGURATION} 110 | parameters: 111 | - name: NAME 112 | value: aws-resource-exporter 113 | - name: IMAGE 114 | value: quay.io/app-sre/aws-resource-exporter 115 | - name: IMAGE_TAG 116 | value: latest 117 | - name: REPLICAS 118 | value: "1" 119 | - name: SECRET_NAME 120 | value: aws-resource-exporter 121 | - name: AWS_REGION 122 | value: us-east-1 123 | description: if in GovCloud, use us-gov-east-1/us-gov-west-1 124 | - name: CONFIGMAP_NAME 125 | value: aws-resource-exporter-config 126 | - name: CPU_REQUESTS 127 | value: 50m 128 | - name: MEMORY_LIMITS 129 | value: 150Mi 130 | - name: MEMORY_REQUESTS 131 | value: 150Mi 132 | - name: AWS_RESOURCE_EXPORTER_CONFIGURATION 133 | value: | 134 | default: 135 | regions: "us-east-1" 136 | timeout: 10s 137 | rds: {} 138 | vpc: 139 | regions: "" 140 | timeout: 30s 141 | route53: 142 | regions: "" 143 | timeout: 60s 144 | -------------------------------------------------------------------------------- /pkg/awsclient/awsclient.go: -------------------------------------------------------------------------------- 1 | // inspired by https://github.com/openshift/aws-account-operator/blob/master/pkg/awsclient/client.go 2 | 3 | package awsclient 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/aws/aws-sdk-go/aws/request" 9 | "github.com/aws/aws-sdk-go/service/rds" 10 | "github.com/aws/aws-sdk-go/service/route53" 11 | "github.com/aws/aws-sdk-go/service/route53/route53iface" 12 | "github.com/aws/aws-sdk-go/service/servicequotas" 13 | "github.com/aws/aws-sdk-go/service/servicequotas/servicequotasiface" 14 | 15 | "github.com/aws/aws-sdk-go/aws" 16 | "github.com/aws/aws-sdk-go/aws/session" 17 | "github.com/aws/aws-sdk-go/service/ec2" 18 | "github.com/aws/aws-sdk-go/service/ec2/ec2iface" 19 | "github.com/aws/aws-sdk-go/service/elasticache" 20 | "github.com/aws/aws-sdk-go/service/kafka" 21 | ) 22 | 23 | //go:generate mockgen -source=./awsclient.go -destination=./mock/zz_generated.mock_client.go -package=mock 24 | 25 | // Client is a wrapper object for actual AWS SDK clients to allow for easier testing. 26 | type Client interface { 27 | //EC2 28 | DescribeTransitGatewaysWithContext(ctx aws.Context, input *ec2.DescribeTransitGatewaysInput, opts ...request.Option) (*ec2.DescribeTransitGatewaysOutput, error) 29 | 30 | //RDS 31 | DescribeDBInstancesPagesWithContext(ctx aws.Context, input *rds.DescribeDBInstancesInput, fn func(*rds.DescribeDBInstancesOutput, bool) bool, opts ...request.Option) error 32 | DescribeDBLogFilesPagesWithContext(ctx aws.Context, input *rds.DescribeDBLogFilesInput, fn func(*rds.DescribeDBLogFilesOutput, bool) bool, opts ...request.Option) error 33 | DescribePendingMaintenanceActionsPagesWithContext(ctx aws.Context, input *rds.DescribePendingMaintenanceActionsInput, fn func(*rds.DescribePendingMaintenanceActionsOutput, bool) bool, opts ...request.Option) error 34 | DescribeDBLogFilesAll(ctx context.Context, instanceId string) ([]*rds.DescribeDBLogFilesOutput, error) 35 | DescribePendingMaintenanceActionsAll(ctx context.Context) ([]*rds.ResourcePendingMaintenanceActions, error) 36 | DescribeDBInstancesAll(ctx context.Context) ([]*rds.DBInstance, error) 37 | 38 | // Service Quota 39 | GetServiceQuotaWithContext(ctx aws.Context, input *servicequotas.GetServiceQuotaInput, opts ...request.Option) (*servicequotas.GetServiceQuotaOutput, error) 40 | 41 | //route53 42 | ListHostedZonesWithContext(ctx context.Context, input *route53.ListHostedZonesInput, opts ...request.Option) (*route53.ListHostedZonesOutput, error) 43 | GetHostedZoneLimitWithContext(ctx context.Context, input *route53.GetHostedZoneLimitInput, opts ...request.Option) (*route53.GetHostedZoneLimitOutput, error) 44 | 45 | // ElastiCache 46 | DescribeCacheClustersAll(ctx context.Context) ([]*elasticache.CacheCluster, error) 47 | 48 | // MSK 49 | ListClustersAll(ctx context.Context) ([]*kafka.ClusterInfo, error) 50 | } 51 | 52 | type awsClient struct { 53 | ec2Client ec2iface.EC2API 54 | rdsClient rds.RDS 55 | serviceQuotasClient servicequotasiface.ServiceQuotasAPI 56 | route53Client route53iface.Route53API 57 | elasticacheClient elasticache.ElastiCache 58 | mskClient kafka.Kafka 59 | } 60 | 61 | func (c *awsClient) DescribeTransitGatewaysWithContext(ctx aws.Context, input *ec2.DescribeTransitGatewaysInput, opts ...request.Option) (*ec2.DescribeTransitGatewaysOutput, error) { 62 | return c.ec2Client.DescribeTransitGatewaysWithContext(ctx, input, opts...) 63 | } 64 | 65 | func (c *awsClient) DescribeDBLogFilesPagesWithContext(ctx aws.Context, input *rds.DescribeDBLogFilesInput, fn func(*rds.DescribeDBLogFilesOutput, bool) bool, opts ...request.Option) error { 66 | return c.rdsClient.DescribeDBLogFilesPagesWithContext(ctx, input, fn, opts...) 67 | } 68 | 69 | func (c *awsClient) DescribeDBInstancesPagesWithContext(ctx aws.Context, input *rds.DescribeDBInstancesInput, fn func(*rds.DescribeDBInstancesOutput, bool) bool, opts ...request.Option) error { 70 | return c.rdsClient.DescribeDBInstancesPagesWithContext(ctx, input, fn, opts...) 71 | } 72 | 73 | func (c *awsClient) DescribePendingMaintenanceActionsPagesWithContext(ctx aws.Context, input *rds.DescribePendingMaintenanceActionsInput, fn func(*rds.DescribePendingMaintenanceActionsOutput, bool) bool, opts ...request.Option) error { 74 | return c.rdsClient.DescribePendingMaintenanceActionsPagesWithContext(ctx, input, fn, opts...) 75 | } 76 | 77 | func (c *awsClient) GetServiceQuotaWithContext(ctx aws.Context, input *servicequotas.GetServiceQuotaInput, opts ...request.Option) (*servicequotas.GetServiceQuotaOutput, error) { 78 | return c.serviceQuotasClient.GetServiceQuotaWithContext(ctx, input, opts...) 79 | } 80 | 81 | func (c *awsClient) DescribeDBLogFilesAll(ctx context.Context, instanceId string) ([]*rds.DescribeDBLogFilesOutput, error) { 82 | input := &rds.DescribeDBLogFilesInput{ 83 | DBInstanceIdentifier: &instanceId, 84 | } 85 | 86 | var logOutPuts []*rds.DescribeDBLogFilesOutput 87 | err := c.DescribeDBLogFilesPagesWithContext(ctx, input, func(ddlo *rds.DescribeDBLogFilesOutput, b bool) bool { 88 | AwsExporterMetrics.IncrementRequests() 89 | logOutPuts = append(logOutPuts, ddlo) 90 | return true 91 | }) 92 | 93 | if err != nil { 94 | AwsExporterMetrics.IncrementErrors() 95 | return nil, err 96 | } 97 | 98 | return logOutPuts, nil 99 | } 100 | 101 | func (c *awsClient) DescribePendingMaintenanceActionsAll(ctx context.Context) ([]*rds.ResourcePendingMaintenanceActions, error) { 102 | describePendingMaintInput := &rds.DescribePendingMaintenanceActionsInput{} 103 | 104 | var instancesPendMaintActionsData []*rds.ResourcePendingMaintenanceActions 105 | err := c.DescribePendingMaintenanceActionsPagesWithContext(ctx, describePendingMaintInput, func(dpm *rds.DescribePendingMaintenanceActionsOutput, b bool) bool { 106 | AwsExporterMetrics.IncrementRequests() 107 | instancesPendMaintActionsData = append(instancesPendMaintActionsData, dpm.PendingMaintenanceActions...) 108 | return true 109 | }) 110 | 111 | if err != nil { 112 | AwsExporterMetrics.IncrementErrors() 113 | return nil, err 114 | } 115 | 116 | return instancesPendMaintActionsData, nil 117 | } 118 | 119 | func (c *awsClient) DescribeDBInstancesAll(ctx context.Context) ([]*rds.DBInstance, error) { 120 | input := &rds.DescribeDBInstancesInput{} 121 | 122 | var instances []*rds.DBInstance 123 | err := c.DescribeDBInstancesPagesWithContext(ctx, input, func(ddo *rds.DescribeDBInstancesOutput, b bool) bool { 124 | AwsExporterMetrics.IncrementRequests() 125 | instances = append(instances, ddo.DBInstances...) 126 | return true 127 | }) 128 | if err != nil { 129 | AwsExporterMetrics.IncrementErrors() 130 | return nil, err 131 | } 132 | return instances, nil 133 | } 134 | 135 | func (c *awsClient) ListHostedZonesWithContext(ctx context.Context, input *route53.ListHostedZonesInput, opts ...request.Option) (*route53.ListHostedZonesOutput, error) { 136 | return c.route53Client.ListHostedZonesWithContext(ctx, input, opts...) 137 | } 138 | 139 | func (c *awsClient) GetHostedZoneLimitWithContext(ctx context.Context, input *route53.GetHostedZoneLimitInput, opts ...request.Option) (*route53.GetHostedZoneLimitOutput, error) { 140 | return c.route53Client.GetHostedZoneLimitWithContext(ctx, input, opts...) 141 | } 142 | 143 | func (c *awsClient) DescribeCacheClustersAll(ctx context.Context) ([]*elasticache.CacheCluster, error) { 144 | input := &elasticache.DescribeCacheClustersInput{} 145 | 146 | var clusters []*elasticache.CacheCluster 147 | err := c.DescribeCacheClustersPagesWithContext(ctx, input, func(dco *elasticache.DescribeCacheClustersOutput, more bool) bool { 148 | AwsExporterMetrics.IncrementRequests() 149 | clusters = append(clusters, dco.CacheClusters...) 150 | return more 151 | }) 152 | if err != nil { 153 | AwsExporterMetrics.IncrementErrors() 154 | return nil, err 155 | } 156 | return clusters, nil 157 | } 158 | 159 | func (c *awsClient) DescribeCacheClustersPagesWithContext(ctx aws.Context, input *elasticache.DescribeCacheClustersInput, fn func(*elasticache.DescribeCacheClustersOutput, bool) bool, opts ...request.Option) error { 160 | return c.elasticacheClient.DescribeCacheClustersPagesWithContext(ctx, input, fn, opts...) 161 | } 162 | 163 | func (c *awsClient) ListClustersPagesWithContext(ctx context.Context, input *kafka.ListClustersInput, fn func(*kafka.ListClustersOutput, bool) bool, opts ...request.Option) error { 164 | return c.mskClient.ListClustersPagesWithContext(ctx, input, fn, opts...) 165 | } 166 | 167 | func (c *awsClient) ListClustersAll(ctx context.Context) ([]*kafka.ClusterInfo, error) { 168 | input := &kafka.ListClustersInput{} 169 | 170 | var clusters []*kafka.ClusterInfo 171 | err := c.mskClient.ListClustersPagesWithContext(ctx, input, func(lco *kafka.ListClustersOutput, lastPage bool) bool { 172 | AwsExporterMetrics.IncrementRequests() 173 | clusters = append(clusters, lco.ClusterInfoList...) 174 | return true 175 | }) 176 | 177 | if err != nil { 178 | AwsExporterMetrics.IncrementErrors() 179 | return nil, err 180 | } 181 | 182 | return clusters, nil 183 | } 184 | 185 | func NewClientFromSession(sess *session.Session) Client { 186 | return &awsClient{ 187 | ec2Client: ec2.New(sess), 188 | serviceQuotasClient: servicequotas.New(sess), 189 | rdsClient: *rds.New(sess), 190 | route53Client: route53.New(sess), 191 | elasticacheClient: *elasticache.New(sess), 192 | mskClient: *kafka.New(sess), 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /pkg/awsclient/exporter.go: -------------------------------------------------------------------------------- 1 | package awsclient 2 | 3 | import ( 4 | "sync" 5 | 6 | "github.com/aws/aws-sdk-go/aws/session" 7 | "github.com/prometheus/client_golang/prometheus" 8 | ) 9 | 10 | var AwsExporterMetrics *ExporterMetrics 11 | 12 | // ExporterMetrics defines an instance of the exporter metrics 13 | type ExporterMetrics struct { 14 | sess *session.Session 15 | 16 | APIRequestsCount float64 17 | APIErrorsCount float64 18 | 19 | APIRequests *prometheus.Desc 20 | APIErrors *prometheus.Desc 21 | 22 | mutex *sync.Mutex 23 | } 24 | 25 | // NewExporterMetrics creates a new exporter metrics instance 26 | func NewExporterMetrics(namespace string) *ExporterMetrics { 27 | return &ExporterMetrics{ 28 | APIRequests: prometheus.NewDesc( 29 | prometheus.BuildFQName(namespace, "", "apirequests"), 30 | "API requests made by the exporter.", 31 | []string{}, 32 | nil, 33 | ), 34 | APIErrors: prometheus.NewDesc( 35 | prometheus.BuildFQName(namespace, "", "apierrors"), 36 | "API errors encountered by the exporter.", 37 | []string{}, 38 | nil, 39 | ), 40 | mutex: &sync.Mutex{}, 41 | } 42 | } 43 | 44 | // Describe is used by the Prometheus client to return a description of the metrics 45 | func (e *ExporterMetrics) Describe(ch chan<- *prometheus.Desc) { 46 | ch <- e.APIRequests 47 | ch <- e.APIErrors 48 | } 49 | 50 | // Collect is used by the Prometheus client to collect and return the metrics values 51 | func (e *ExporterMetrics) Collect(ch chan<- prometheus.Metric) { 52 | ch <- prometheus.MustNewConstMetric(e.APIRequests, prometheus.CounterValue, e.APIRequestsCount) 53 | ch <- prometheus.MustNewConstMetric(e.APIErrors, prometheus.CounterValue, e.APIErrorsCount) 54 | } 55 | 56 | // IncrementRequests increments the API requests counter 57 | func (e *ExporterMetrics) IncrementRequests() { 58 | e.mutex.Lock() 59 | e.APIRequestsCount++ 60 | e.mutex.Unlock() 61 | } 62 | 63 | // IncrementErrors increments the API requests error counter 64 | func (e *ExporterMetrics) IncrementErrors() { 65 | e.mutex.Lock() 66 | e.APIErrorsCount++ 67 | e.mutex.Unlock() 68 | } 69 | -------------------------------------------------------------------------------- /pkg/awsclient/mock/mock_iam.go: -------------------------------------------------------------------------------- 1 | // Code generated by MockGen. DO NOT EDIT. 2 | // Source: pkg/iam.go 3 | 4 | // Package mock is a generated GoMock package. 5 | package mock 6 | 7 | import ( 8 | reflect "reflect" 9 | 10 | aws "github.com/aws/aws-sdk-go/aws" 11 | request "github.com/aws/aws-sdk-go/aws/request" 12 | iam "github.com/aws/aws-sdk-go/service/iam" 13 | gomock "github.com/golang/mock/gomock" 14 | ) 15 | 16 | // MockIAMClient is a mock of IAMClient interface. 17 | type MockIAMClient struct { 18 | ctrl *gomock.Controller 19 | recorder *MockIAMClientMockRecorder 20 | } 21 | 22 | // MockIAMClientMockRecorder is the mock recorder for MockIAMClient. 23 | type MockIAMClientMockRecorder struct { 24 | mock *MockIAMClient 25 | } 26 | 27 | // NewMockIAMClient creates a new mock instance. 28 | func NewMockIAMClient(ctrl *gomock.Controller) *MockIAMClient { 29 | mock := &MockIAMClient{ctrl: ctrl} 30 | mock.recorder = &MockIAMClientMockRecorder{mock} 31 | return mock 32 | } 33 | 34 | // EXPECT returns an object that allows the caller to indicate expected use. 35 | func (m *MockIAMClient) EXPECT() *MockIAMClientMockRecorder { 36 | return m.recorder 37 | } 38 | 39 | // ListRolesPagesWithContext mocks base method. 40 | func (m *MockIAMClient) ListRolesPagesWithContext(ctx aws.Context, input *iam.ListRolesInput, fn func(*iam.ListRolesOutput, bool) bool, opts ...request.Option) error { 41 | m.ctrl.T.Helper() 42 | varargs := []interface{}{ctx, input, fn} 43 | for _, a := range opts { 44 | varargs = append(varargs, a) 45 | } 46 | ret := m.ctrl.Call(m, "ListRolesPagesWithContext", varargs...) 47 | ret0, _ := ret[0].(error) 48 | return ret0 49 | } 50 | 51 | // ListRolesPagesWithContext indicates an expected call of ListRolesPagesWithContext. 52 | func (mr *MockIAMClientMockRecorder) ListRolesPagesWithContext(ctx, input, fn interface{}, opts ...interface{}) *gomock.Call { 53 | mr.mock.ctrl.T.Helper() 54 | varargs := append([]interface{}{ctx, input, fn}, opts...) 55 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListRolesPagesWithContext", reflect.TypeOf((*MockIAMClient)(nil).ListRolesPagesWithContext), varargs...) 56 | } 57 | -------------------------------------------------------------------------------- /pkg/awsclient/mock/zz_generated.mock_client.go: -------------------------------------------------------------------------------- 1 | // Code generated by MockGen. DO NOT EDIT. 2 | // Source: pkg/awsclient/awsclient.go 3 | 4 | // Package mock is a generated GoMock package. 5 | package mock 6 | 7 | import ( 8 | context "context" 9 | reflect "reflect" 10 | 11 | aws "github.com/aws/aws-sdk-go/aws" 12 | request "github.com/aws/aws-sdk-go/aws/request" 13 | ec2 "github.com/aws/aws-sdk-go/service/ec2" 14 | elasticache "github.com/aws/aws-sdk-go/service/elasticache" 15 | kafka "github.com/aws/aws-sdk-go/service/kafka" 16 | rds "github.com/aws/aws-sdk-go/service/rds" 17 | route53 "github.com/aws/aws-sdk-go/service/route53" 18 | servicequotas "github.com/aws/aws-sdk-go/service/servicequotas" 19 | gomock "github.com/golang/mock/gomock" 20 | ) 21 | 22 | // MockClient is a mock of Client interface. 23 | type MockClient struct { 24 | ctrl *gomock.Controller 25 | recorder *MockClientMockRecorder 26 | } 27 | 28 | // MockClientMockRecorder is the mock recorder for MockClient. 29 | type MockClientMockRecorder struct { 30 | mock *MockClient 31 | } 32 | 33 | // NewMockClient creates a new mock instance. 34 | func NewMockClient(ctrl *gomock.Controller) *MockClient { 35 | mock := &MockClient{ctrl: ctrl} 36 | mock.recorder = &MockClientMockRecorder{mock} 37 | return mock 38 | } 39 | 40 | // EXPECT returns an object that allows the caller to indicate expected use. 41 | func (m *MockClient) EXPECT() *MockClientMockRecorder { 42 | return m.recorder 43 | } 44 | 45 | // DescribeCacheClustersAll mocks base method. 46 | func (m *MockClient) DescribeCacheClustersAll(ctx context.Context) ([]*elasticache.CacheCluster, error) { 47 | m.ctrl.T.Helper() 48 | ret := m.ctrl.Call(m, "DescribeCacheClustersAll", ctx) 49 | ret0, _ := ret[0].([]*elasticache.CacheCluster) 50 | ret1, _ := ret[1].(error) 51 | return ret0, ret1 52 | } 53 | 54 | // DescribeCacheClustersAll indicates an expected call of DescribeCacheClustersAll. 55 | func (mr *MockClientMockRecorder) DescribeCacheClustersAll(ctx interface{}) *gomock.Call { 56 | mr.mock.ctrl.T.Helper() 57 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeCacheClustersAll", reflect.TypeOf((*MockClient)(nil).DescribeCacheClustersAll), ctx) 58 | } 59 | 60 | // DescribeDBInstancesAll mocks base method. 61 | func (m *MockClient) DescribeDBInstancesAll(ctx context.Context) ([]*rds.DBInstance, error) { 62 | m.ctrl.T.Helper() 63 | ret := m.ctrl.Call(m, "DescribeDBInstancesAll", ctx) 64 | ret0, _ := ret[0].([]*rds.DBInstance) 65 | ret1, _ := ret[1].(error) 66 | return ret0, ret1 67 | } 68 | 69 | // DescribeDBInstancesAll indicates an expected call of DescribeDBInstancesAll. 70 | func (mr *MockClientMockRecorder) DescribeDBInstancesAll(ctx interface{}) *gomock.Call { 71 | mr.mock.ctrl.T.Helper() 72 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeDBInstancesAll", reflect.TypeOf((*MockClient)(nil).DescribeDBInstancesAll), ctx) 73 | } 74 | 75 | // DescribeDBInstancesPagesWithContext mocks base method. 76 | func (m *MockClient) DescribeDBInstancesPagesWithContext(ctx aws.Context, input *rds.DescribeDBInstancesInput, fn func(*rds.DescribeDBInstancesOutput, bool) bool, opts ...request.Option) error { 77 | m.ctrl.T.Helper() 78 | varargs := []interface{}{ctx, input, fn} 79 | for _, a := range opts { 80 | varargs = append(varargs, a) 81 | } 82 | ret := m.ctrl.Call(m, "DescribeDBInstancesPagesWithContext", varargs...) 83 | ret0, _ := ret[0].(error) 84 | return ret0 85 | } 86 | 87 | // DescribeDBInstancesPagesWithContext indicates an expected call of DescribeDBInstancesPagesWithContext. 88 | func (mr *MockClientMockRecorder) DescribeDBInstancesPagesWithContext(ctx, input, fn interface{}, opts ...interface{}) *gomock.Call { 89 | mr.mock.ctrl.T.Helper() 90 | varargs := append([]interface{}{ctx, input, fn}, opts...) 91 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeDBInstancesPagesWithContext", reflect.TypeOf((*MockClient)(nil).DescribeDBInstancesPagesWithContext), varargs...) 92 | } 93 | 94 | // DescribeDBLogFilesAll mocks base method. 95 | func (m *MockClient) DescribeDBLogFilesAll(ctx context.Context, instanceId string) ([]*rds.DescribeDBLogFilesOutput, error) { 96 | m.ctrl.T.Helper() 97 | ret := m.ctrl.Call(m, "DescribeDBLogFilesAll", ctx, instanceId) 98 | ret0, _ := ret[0].([]*rds.DescribeDBLogFilesOutput) 99 | ret1, _ := ret[1].(error) 100 | return ret0, ret1 101 | } 102 | 103 | // DescribeDBLogFilesAll indicates an expected call of DescribeDBLogFilesAll. 104 | func (mr *MockClientMockRecorder) DescribeDBLogFilesAll(ctx, instanceId interface{}) *gomock.Call { 105 | mr.mock.ctrl.T.Helper() 106 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeDBLogFilesAll", reflect.TypeOf((*MockClient)(nil).DescribeDBLogFilesAll), ctx, instanceId) 107 | } 108 | 109 | // DescribeDBLogFilesPagesWithContext mocks base method. 110 | func (m *MockClient) DescribeDBLogFilesPagesWithContext(ctx aws.Context, input *rds.DescribeDBLogFilesInput, fn func(*rds.DescribeDBLogFilesOutput, bool) bool, opts ...request.Option) error { 111 | m.ctrl.T.Helper() 112 | varargs := []interface{}{ctx, input, fn} 113 | for _, a := range opts { 114 | varargs = append(varargs, a) 115 | } 116 | ret := m.ctrl.Call(m, "DescribeDBLogFilesPagesWithContext", varargs...) 117 | ret0, _ := ret[0].(error) 118 | return ret0 119 | } 120 | 121 | // DescribeDBLogFilesPagesWithContext indicates an expected call of DescribeDBLogFilesPagesWithContext. 122 | func (mr *MockClientMockRecorder) DescribeDBLogFilesPagesWithContext(ctx, input, fn interface{}, opts ...interface{}) *gomock.Call { 123 | mr.mock.ctrl.T.Helper() 124 | varargs := append([]interface{}{ctx, input, fn}, opts...) 125 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeDBLogFilesPagesWithContext", reflect.TypeOf((*MockClient)(nil).DescribeDBLogFilesPagesWithContext), varargs...) 126 | } 127 | 128 | // DescribePendingMaintenanceActionsAll mocks base method. 129 | func (m *MockClient) DescribePendingMaintenanceActionsAll(ctx context.Context) ([]*rds.ResourcePendingMaintenanceActions, error) { 130 | m.ctrl.T.Helper() 131 | ret := m.ctrl.Call(m, "DescribePendingMaintenanceActionsAll", ctx) 132 | ret0, _ := ret[0].([]*rds.ResourcePendingMaintenanceActions) 133 | ret1, _ := ret[1].(error) 134 | return ret0, ret1 135 | } 136 | 137 | // DescribePendingMaintenanceActionsAll indicates an expected call of DescribePendingMaintenanceActionsAll. 138 | func (mr *MockClientMockRecorder) DescribePendingMaintenanceActionsAll(ctx interface{}) *gomock.Call { 139 | mr.mock.ctrl.T.Helper() 140 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribePendingMaintenanceActionsAll", reflect.TypeOf((*MockClient)(nil).DescribePendingMaintenanceActionsAll), ctx) 141 | } 142 | 143 | // DescribePendingMaintenanceActionsPagesWithContext mocks base method. 144 | func (m *MockClient) DescribePendingMaintenanceActionsPagesWithContext(ctx aws.Context, input *rds.DescribePendingMaintenanceActionsInput, fn func(*rds.DescribePendingMaintenanceActionsOutput, bool) bool, opts ...request.Option) error { 145 | m.ctrl.T.Helper() 146 | varargs := []interface{}{ctx, input, fn} 147 | for _, a := range opts { 148 | varargs = append(varargs, a) 149 | } 150 | ret := m.ctrl.Call(m, "DescribePendingMaintenanceActionsPagesWithContext", varargs...) 151 | ret0, _ := ret[0].(error) 152 | return ret0 153 | } 154 | 155 | // DescribePendingMaintenanceActionsPagesWithContext indicates an expected call of DescribePendingMaintenanceActionsPagesWithContext. 156 | func (mr *MockClientMockRecorder) DescribePendingMaintenanceActionsPagesWithContext(ctx, input, fn interface{}, opts ...interface{}) *gomock.Call { 157 | mr.mock.ctrl.T.Helper() 158 | varargs := append([]interface{}{ctx, input, fn}, opts...) 159 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribePendingMaintenanceActionsPagesWithContext", reflect.TypeOf((*MockClient)(nil).DescribePendingMaintenanceActionsPagesWithContext), varargs...) 160 | } 161 | 162 | // DescribeTransitGatewaysWithContext mocks base method. 163 | func (m *MockClient) DescribeTransitGatewaysWithContext(ctx aws.Context, input *ec2.DescribeTransitGatewaysInput, opts ...request.Option) (*ec2.DescribeTransitGatewaysOutput, error) { 164 | m.ctrl.T.Helper() 165 | varargs := []interface{}{ctx, input} 166 | for _, a := range opts { 167 | varargs = append(varargs, a) 168 | } 169 | ret := m.ctrl.Call(m, "DescribeTransitGatewaysWithContext", varargs...) 170 | ret0, _ := ret[0].(*ec2.DescribeTransitGatewaysOutput) 171 | ret1, _ := ret[1].(error) 172 | return ret0, ret1 173 | } 174 | 175 | // DescribeTransitGatewaysWithContext indicates an expected call of DescribeTransitGatewaysWithContext. 176 | func (mr *MockClientMockRecorder) DescribeTransitGatewaysWithContext(ctx, input interface{}, opts ...interface{}) *gomock.Call { 177 | mr.mock.ctrl.T.Helper() 178 | varargs := append([]interface{}{ctx, input}, opts...) 179 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeTransitGatewaysWithContext", reflect.TypeOf((*MockClient)(nil).DescribeTransitGatewaysWithContext), varargs...) 180 | } 181 | 182 | // GetHostedZoneLimitWithContext mocks base method. 183 | func (m *MockClient) GetHostedZoneLimitWithContext(ctx context.Context, input *route53.GetHostedZoneLimitInput, opts ...request.Option) (*route53.GetHostedZoneLimitOutput, error) { 184 | m.ctrl.T.Helper() 185 | varargs := []interface{}{ctx, input} 186 | for _, a := range opts { 187 | varargs = append(varargs, a) 188 | } 189 | ret := m.ctrl.Call(m, "GetHostedZoneLimitWithContext", varargs...) 190 | ret0, _ := ret[0].(*route53.GetHostedZoneLimitOutput) 191 | ret1, _ := ret[1].(error) 192 | return ret0, ret1 193 | } 194 | 195 | // GetHostedZoneLimitWithContext indicates an expected call of GetHostedZoneLimitWithContext. 196 | func (mr *MockClientMockRecorder) GetHostedZoneLimitWithContext(ctx, input interface{}, opts ...interface{}) *gomock.Call { 197 | mr.mock.ctrl.T.Helper() 198 | varargs := append([]interface{}{ctx, input}, opts...) 199 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetHostedZoneLimitWithContext", reflect.TypeOf((*MockClient)(nil).GetHostedZoneLimitWithContext), varargs...) 200 | } 201 | 202 | // GetServiceQuotaWithContext mocks base method. 203 | func (m *MockClient) GetServiceQuotaWithContext(ctx aws.Context, input *servicequotas.GetServiceQuotaInput, opts ...request.Option) (*servicequotas.GetServiceQuotaOutput, error) { 204 | m.ctrl.T.Helper() 205 | varargs := []interface{}{ctx, input} 206 | for _, a := range opts { 207 | varargs = append(varargs, a) 208 | } 209 | ret := m.ctrl.Call(m, "GetServiceQuotaWithContext", varargs...) 210 | ret0, _ := ret[0].(*servicequotas.GetServiceQuotaOutput) 211 | ret1, _ := ret[1].(error) 212 | return ret0, ret1 213 | } 214 | 215 | // GetServiceQuotaWithContext indicates an expected call of GetServiceQuotaWithContext. 216 | func (mr *MockClientMockRecorder) GetServiceQuotaWithContext(ctx, input interface{}, opts ...interface{}) *gomock.Call { 217 | mr.mock.ctrl.T.Helper() 218 | varargs := append([]interface{}{ctx, input}, opts...) 219 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetServiceQuotaWithContext", reflect.TypeOf((*MockClient)(nil).GetServiceQuotaWithContext), varargs...) 220 | } 221 | 222 | // ListClustersAll mocks base method. 223 | func (m *MockClient) ListClustersAll(ctx context.Context) ([]*kafka.ClusterInfo, error) { 224 | m.ctrl.T.Helper() 225 | ret := m.ctrl.Call(m, "ListClustersAll", ctx) 226 | ret0, _ := ret[0].([]*kafka.ClusterInfo) 227 | ret1, _ := ret[1].(error) 228 | return ret0, ret1 229 | } 230 | 231 | // ListClustersAll indicates an expected call of ListClustersAll. 232 | func (mr *MockClientMockRecorder) ListClustersAll(ctx interface{}) *gomock.Call { 233 | mr.mock.ctrl.T.Helper() 234 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListClustersAll", reflect.TypeOf((*MockClient)(nil).ListClustersAll), ctx) 235 | } 236 | 237 | // ListHostedZonesWithContext mocks base method. 238 | func (m *MockClient) ListHostedZonesWithContext(ctx context.Context, input *route53.ListHostedZonesInput, opts ...request.Option) (*route53.ListHostedZonesOutput, error) { 239 | m.ctrl.T.Helper() 240 | varargs := []interface{}{ctx, input} 241 | for _, a := range opts { 242 | varargs = append(varargs, a) 243 | } 244 | ret := m.ctrl.Call(m, "ListHostedZonesWithContext", varargs...) 245 | ret0, _ := ret[0].(*route53.ListHostedZonesOutput) 246 | ret1, _ := ret[1].(error) 247 | return ret0, ret1 248 | } 249 | 250 | // ListHostedZonesWithContext indicates an expected call of ListHostedZonesWithContext. 251 | func (mr *MockClientMockRecorder) ListHostedZonesWithContext(ctx, input interface{}, opts ...interface{}) *gomock.Call { 252 | mr.mock.ctrl.T.Helper() 253 | varargs := append([]interface{}{ctx, input}, opts...) 254 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListHostedZonesWithContext", reflect.TypeOf((*MockClient)(nil).ListHostedZonesWithContext), varargs...) 255 | } 256 | -------------------------------------------------------------------------------- /pkg/cache.go: -------------------------------------------------------------------------------- 1 | package pkg 2 | 3 | import ( 4 | "crypto/sha256" 5 | "fmt" 6 | "sync" 7 | "time" 8 | 9 | "github.com/prometheus/client_golang/prometheus" 10 | dto "github.com/prometheus/client_model/go" 11 | ) 12 | 13 | type MetricsCache struct { 14 | cacheMutex *sync.Mutex 15 | entries map[string]cacheEntry 16 | ttl time.Duration 17 | } 18 | 19 | func NewMetricsCache(ttl time.Duration) *MetricsCache { 20 | return &MetricsCache{ 21 | cacheMutex: &sync.Mutex{}, 22 | entries: map[string]cacheEntry{}, 23 | ttl: ttl, 24 | } 25 | } 26 | 27 | func getMetricHash(metric prometheus.Metric) string { 28 | var dto dto.Metric 29 | metric.Write(&dto) 30 | labelString := metric.Desc().String() 31 | 32 | for _, labelPair := range dto.GetLabel() { 33 | labelString = fmt.Sprintf("%s,%s,%s", labelString, labelPair.GetName(), labelPair.GetValue()) 34 | } 35 | 36 | checksum := sha256.Sum256([]byte(labelString)) 37 | return fmt.Sprintf("%x", checksum[:]) 38 | } 39 | 40 | // AddMetric adds a metric to the cache 41 | func (mc *MetricsCache) AddMetric(metric prometheus.Metric) { 42 | mc.cacheMutex.Lock() 43 | mc.entries[getMetricHash(metric)] = cacheEntry{ 44 | creation: time.Now(), 45 | metric: metric, 46 | } 47 | mc.cacheMutex.Unlock() 48 | } 49 | 50 | // GetAllMetrics Iterates over all cached metrics and discards expired ones. 51 | func (mc *MetricsCache) GetAllMetrics() []prometheus.Metric { 52 | mc.cacheMutex.Lock() 53 | returnArr := make([]prometheus.Metric, 0) 54 | for k, v := range mc.entries { 55 | if time.Since(v.creation).Seconds() > mc.ttl.Seconds() { 56 | delete(mc.entries, k) 57 | } else { 58 | returnArr = append(returnArr, v.metric) 59 | } 60 | } 61 | mc.cacheMutex.Unlock() 62 | return returnArr 63 | } 64 | 65 | type cacheEntry struct { 66 | creation time.Time 67 | metric prometheus.Metric 68 | } 69 | -------------------------------------------------------------------------------- /pkg/cache_test.go: -------------------------------------------------------------------------------- 1 | package pkg 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/prometheus/client_golang/prometheus" 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func createTestMetric(fqdn string, value float64) prometheus.Metric { 12 | desc := prometheus.NewDesc(fqdn, "help", []string{"labels"}, nil) 13 | return prometheus.MustNewConstMetric(desc, prometheus.CounterValue, 1, "test") 14 | } 15 | 16 | func TestGetMetricHash(t *testing.T) { 17 | assert.Equal(t, "5e5435705ad2e07a1f989a92f230e6437dec1a12ae4f43fd26f74bcf8fa029cf", getMetricHash(createTestMetric("foo_bar", 1))) 18 | assert.Equal(t, "5e5435705ad2e07a1f989a92f230e6437dec1a12ae4f43fd26f74bcf8fa029cf", getMetricHash(createTestMetric("foo_bar", 10))) 19 | assert.NotEqual(t, "5e5435705ad2e07a1f989a92f230e6437dec1a12ae4f43fd26f74bcf8fa029cf", getMetricHash(createTestMetric("other", 1))) 20 | } 21 | 22 | func TestSameMetricWithDifferentLabelsDontOverwrite(t *testing.T) { 23 | cache := NewMetricsCache(1 * time.Second) 24 | desc := prometheus.NewDesc("test", "multimetric", []string{"aws_region"}, nil) 25 | 26 | metricEast1 := prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, 1, "us-east-1") 27 | metricWest1 := prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, 1, "us-west-1") 28 | metricEast2 := prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, 2, "us-east-1") 29 | cache.AddMetric(metricEast1) 30 | cache.AddMetric(metricWest1) // should *not* overwrite metricEast1 31 | cache.AddMetric(metricEast2) // should overwrite metricEast1 32 | 33 | assert.Len(t, cache.GetAllMetrics(), 2) 34 | assert.NotContains(t, cache.GetAllMetrics(), metricEast1) 35 | assert.Contains(t, cache.GetAllMetrics(), metricWest1) 36 | assert.Contains(t, cache.GetAllMetrics(), metricEast2) 37 | } 38 | 39 | func TestMetricCacheGetAllWithTTL(t *testing.T) { 40 | cache := NewMetricsCache(1 * time.Second) 41 | 42 | testMetric := createTestMetric("testing", 1) 43 | cache.AddMetric(testMetric) 44 | assert.Len(t, cache.entries, 1) 45 | 46 | assert.Equal(t, []prometheus.Metric{testMetric}, cache.GetAllMetrics()) 47 | time.Sleep(2 * time.Second) 48 | assert.Len(t, cache.GetAllMetrics(), 0) 49 | } 50 | -------------------------------------------------------------------------------- /pkg/config.go: -------------------------------------------------------------------------------- 1 | package pkg 2 | 3 | import ( 4 | "errors" 5 | "io/ioutil" 6 | "time" 7 | 8 | "github.com/go-kit/log" 9 | "github.com/go-kit/log/level" 10 | "gopkg.in/yaml.v3" 11 | ) 12 | 13 | type BaseConfig struct { 14 | Enabled bool `yaml:"enabled"` 15 | Interval *time.Duration `yaml:"interval"` 16 | Timeout *time.Duration `yaml:"timeout"` 17 | CacheTTL *time.Duration `yaml:"cache_ttl"` 18 | } 19 | 20 | type RDSConfig struct { 21 | BaseConfig `yaml:"base,inline"` 22 | Regions []string `yaml:"regions"` 23 | EOLInfos []EOLInfo `yaml:"eol_info"` 24 | Thresholds []Threshold `yaml:"thresholds"` 25 | } 26 | type Threshold struct { 27 | Name string `yaml:"name"` 28 | Days int `yaml:"days"` 29 | } 30 | 31 | type EOLInfo struct { 32 | Engine string `yaml:"engine"` 33 | EOL string `yaml:"eol"` 34 | Version string `yaml:"version"` 35 | } 36 | 37 | type EOLKey struct { 38 | Engine string 39 | Version string 40 | } 41 | 42 | type VPCConfig struct { 43 | BaseConfig `yaml:"base,inline"` 44 | Regions []string `yaml:"regions"` 45 | } 46 | 47 | type Route53Config struct { 48 | BaseConfig `yaml:"base,inline"` 49 | Region string `yaml:"region"` // Use only a single Region for now, as the current metric is global 50 | } 51 | 52 | type EC2Config struct { 53 | BaseConfig `yaml:"base,inline"` 54 | Regions []string `yaml:"regions"` 55 | } 56 | 57 | type ElastiCacheConfig struct { 58 | BaseConfig `yaml:"base,inline"` 59 | Regions []string `yaml:"regions"` 60 | } 61 | type MSKConfig struct { 62 | BaseConfig `yaml:"base,inline"` 63 | Regions []string `yaml:"regions"` 64 | MSKInfos []MSKInfo `yaml:"msk_info"` 65 | Thresholds []Threshold `yaml:"thresholds"` 66 | } 67 | 68 | type MSKInfo struct { 69 | EOL string `yaml:"eol"` 70 | Version string `yaml:"version"` 71 | } 72 | 73 | type IAMConfig struct { 74 | BaseConfig `yaml:"base,inline"` 75 | Region string `yaml:"region"` 76 | } 77 | 78 | type Config struct { 79 | RdsConfig RDSConfig `yaml:"rds"` 80 | VpcConfig VPCConfig `yaml:"vpc"` 81 | Route53Config Route53Config `yaml:"route53"` 82 | EC2Config EC2Config `yaml:"ec2"` 83 | ElastiCacheConfig ElastiCacheConfig `yaml:"elasticache"` 84 | MskConfig MSKConfig `yaml:"msk"` 85 | IamConfig IAMConfig `yaml:"iam"` 86 | } 87 | 88 | func LoadExporterConfiguration(logger log.Logger, configFile string) (*Config, error) { 89 | var config Config 90 | file, err := ioutil.ReadFile(configFile) 91 | if err != nil { 92 | level.Error(logger).Log("Could not load configuration file") 93 | return nil, errors.New("Could not load configuration file: " + configFile) 94 | } 95 | yaml.Unmarshal(file, &config) 96 | 97 | if config.RdsConfig.CacheTTL == nil { 98 | config.RdsConfig.CacheTTL = durationPtr(35 * time.Second) 99 | } 100 | if config.VpcConfig.CacheTTL == nil { 101 | config.VpcConfig.CacheTTL = durationPtr(35 * time.Second) 102 | } 103 | if config.Route53Config.CacheTTL == nil { 104 | config.Route53Config.CacheTTL = durationPtr(35 * time.Second) 105 | } 106 | if config.EC2Config.CacheTTL == nil { 107 | config.EC2Config.CacheTTL = durationPtr(35 * time.Second) 108 | } 109 | if config.ElastiCacheConfig.CacheTTL == nil { 110 | config.ElastiCacheConfig.CacheTTL = durationPtr(35 * time.Second) 111 | } 112 | if config.MskConfig.CacheTTL == nil { 113 | config.MskConfig.CacheTTL = durationPtr(35 * time.Second) 114 | } 115 | if config.IamConfig.CacheTTL == nil { 116 | config.IamConfig.CacheTTL = durationPtr(35 * time.Second) 117 | } 118 | 119 | if config.RdsConfig.Interval == nil { 120 | config.RdsConfig.Interval = durationPtr(15 * time.Second) 121 | } 122 | if config.VpcConfig.Interval == nil { 123 | config.VpcConfig.Interval = durationPtr(15 * time.Second) 124 | } 125 | if config.Route53Config.Interval == nil { 126 | config.Route53Config.Interval = durationPtr(15 * time.Second) 127 | } 128 | if config.EC2Config.Interval == nil { 129 | config.EC2Config.Interval = durationPtr(15 * time.Second) 130 | } 131 | if config.ElastiCacheConfig.Interval == nil { 132 | config.ElastiCacheConfig.Interval = durationPtr(15 * time.Second) 133 | } 134 | if config.MskConfig.Interval == nil { 135 | config.MskConfig.Interval = durationPtr(15 * time.Second) 136 | } 137 | if config.IamConfig.Interval == nil { 138 | config.IamConfig.Interval = durationPtr(15 * time.Second) 139 | } 140 | 141 | if config.RdsConfig.Timeout == nil { 142 | config.RdsConfig.Timeout = durationPtr(10 * time.Second) 143 | } 144 | if config.VpcConfig.Timeout == nil { 145 | config.VpcConfig.Timeout = durationPtr(10 * time.Second) 146 | } 147 | if config.Route53Config.Timeout == nil { 148 | config.Route53Config.Timeout = durationPtr(10 * time.Second) 149 | } 150 | if config.EC2Config.Timeout == nil { 151 | config.EC2Config.Timeout = durationPtr(10 * time.Second) 152 | } 153 | if config.ElastiCacheConfig.Timeout == nil { 154 | config.ElastiCacheConfig.Timeout = durationPtr(10 * time.Second) 155 | } 156 | if config.MskConfig.Timeout == nil { 157 | config.MskConfig.Timeout = durationPtr(10 * time.Second) 158 | } 159 | if config.IamConfig.Timeout == nil { 160 | config.IamConfig.Timeout = durationPtr(10 * time.Second) 161 | } 162 | 163 | // Setting defaults when threshold is not defined to ease the transition from hardcoded thresholds 164 | if len(config.RdsConfig.Thresholds) == 0 { 165 | config.RdsConfig.Thresholds = []Threshold{ 166 | {Name: "red", Days: 90}, 167 | {Name: "yellow", Days: 180}, 168 | {Name: "green", Days: 365}, 169 | } 170 | } 171 | 172 | return &config, nil 173 | } 174 | -------------------------------------------------------------------------------- /pkg/constats.go: -------------------------------------------------------------------------------- 1 | package pkg 2 | 3 | const ( 4 | namespace = "aws_resources_exporter" 5 | SERVICE_CODE_KEY = "service_code" 6 | QUOTA_CODE_KEY = "quota_code" 7 | ) 8 | -------------------------------------------------------------------------------- /pkg/ec2.go: -------------------------------------------------------------------------------- 1 | package pkg 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "sync" 7 | "time" 8 | 9 | "github.com/app-sre/aws-resource-exporter/pkg/awsclient" 10 | "github.com/aws/aws-sdk-go/aws" 11 | "github.com/aws/aws-sdk-go/aws/session" 12 | "github.com/aws/aws-sdk-go/service/ec2" 13 | "github.com/aws/aws-sdk-go/service/servicequotas" 14 | "github.com/go-kit/log" 15 | "github.com/go-kit/log/level" 16 | "github.com/prometheus/client_golang/prometheus" 17 | ) 18 | 19 | const ( 20 | transitGatewayPerAccountQuotaCode string = "L-A2478D36" 21 | ec2ServiceCode string = "ec2" 22 | ) 23 | 24 | var TransitGatewaysQuota *prometheus.Desc 25 | var TransitGatewaysUsage *prometheus.Desc 26 | 27 | type EC2Exporter struct { 28 | sessions []*session.Session 29 | cache MetricsCache 30 | 31 | logger log.Logger 32 | timeout time.Duration 33 | interval time.Duration 34 | } 35 | 36 | func NewEC2Exporter(sessions []*session.Session, logger log.Logger, config EC2Config, awsAccountId string) *EC2Exporter { 37 | 38 | level.Info(logger).Log("msg", "Initializing EC2 exporter") 39 | constLabels := map[string]string{"aws_account_id": awsAccountId, QUOTA_CODE_KEY: transitGatewayPerAccountQuotaCode, SERVICE_CODE_KEY: ec2ServiceCode} 40 | 41 | TransitGatewaysQuota = prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "ec2_transitgatewaysperregion_quota"), "Quota for maximum number of Transitgateways in this account", []string{"aws_region"}, constLabels) 42 | TransitGatewaysUsage = prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "ec2_transitgatewaysperregion_usage"), "Number of Tranitgatewyas in the AWS Account", []string{"aws_region"}, constLabels) 43 | 44 | return &EC2Exporter{ 45 | sessions: sessions, 46 | cache: *NewMetricsCache(*config.CacheTTL), 47 | 48 | logger: logger, 49 | timeout: *config.Timeout, 50 | interval: *config.Interval, 51 | } 52 | } 53 | 54 | func (e *EC2Exporter) Collect(ch chan<- prometheus.Metric) { 55 | for _, m := range e.cache.GetAllMetrics() { 56 | ch <- m 57 | } 58 | } 59 | 60 | func (e *EC2Exporter) CollectLoop() { 61 | for { 62 | ctx, ctxCancel := context.WithTimeout(context.Background(), e.timeout) 63 | defer ctxCancel() 64 | wg := &sync.WaitGroup{} 65 | wg.Add(len(e.sessions)) 66 | 67 | for _, sess := range e.sessions { 68 | go e.collectInRegion(sess, e.logger, wg, ctx) 69 | } 70 | wg.Wait() 71 | 72 | level.Info(e.logger).Log("msg", "EC2 metrics Updated") 73 | 74 | time.Sleep(e.interval) 75 | } 76 | } 77 | 78 | func (e *EC2Exporter) collectInRegion(sess *session.Session, logger log.Logger, wg *sync.WaitGroup, ctx context.Context) { 79 | defer wg.Done() 80 | 81 | aws := awsclient.NewClientFromSession(sess) 82 | 83 | quota, err := getQuotaValueWithContext(aws, ec2ServiceCode, transitGatewayPerAccountQuotaCode, ctx) 84 | if err != nil { 85 | level.Error(logger).Log("msg", "Could not retrieve Transit Gateway quota", "error", err.Error()) 86 | awsclient.AwsExporterMetrics.IncrementErrors() 87 | return 88 | } 89 | 90 | gateways, err := getAllTransitGatewaysWithContext(aws, ctx) 91 | if err != nil { 92 | level.Error(logger).Log("msg", "Could not retrieve Transit Gateway quota", "error", err.Error()) 93 | awsclient.AwsExporterMetrics.IncrementErrors() 94 | return 95 | } 96 | 97 | e.cache.AddMetric(prometheus.MustNewConstMetric(TransitGatewaysUsage, prometheus.GaugeValue, float64(len(gateways)), *sess.Config.Region)) 98 | e.cache.AddMetric(prometheus.MustNewConstMetric(TransitGatewaysQuota, prometheus.GaugeValue, quota, *sess.Config.Region)) 99 | } 100 | 101 | func (e *EC2Exporter) Describe(ch chan<- *prometheus.Desc) { 102 | ch <- TransitGatewaysQuota 103 | ch <- TransitGatewaysUsage 104 | } 105 | 106 | func createDescribeTransitGatewayInput() *ec2.DescribeTransitGatewaysInput { 107 | return &ec2.DescribeTransitGatewaysInput{ 108 | DryRun: aws.Bool(false), 109 | MaxResults: aws.Int64(1000), 110 | } 111 | } 112 | 113 | func createGetServiceQuotaInput(serviceCode, quotaCode string) *servicequotas.GetServiceQuotaInput { 114 | return &servicequotas.GetServiceQuotaInput{ 115 | ServiceCode: aws.String(serviceCode), 116 | QuotaCode: aws.String(quotaCode), 117 | } 118 | } 119 | 120 | func getAllTransitGatewaysWithContext(client awsclient.Client, ctx context.Context) ([]*ec2.TransitGateway, error) { 121 | results := []*ec2.TransitGateway{} 122 | describeGatewaysInput := createDescribeTransitGatewayInput() 123 | describeGatewaysOutput, err := client.DescribeTransitGatewaysWithContext(ctx, describeGatewaysInput) 124 | 125 | if err != nil { 126 | return nil, err 127 | } 128 | results = append(results, describeGatewaysOutput.TransitGateways...) 129 | // TODO: replace with aws-go-sdk pagination method 130 | for describeGatewaysOutput.NextToken != nil { 131 | describeGatewaysInput.SetNextToken(*describeGatewaysOutput.NextToken) 132 | describeGatewaysOutput, err := client.DescribeTransitGatewaysWithContext(ctx, describeGatewaysInput) 133 | if err != nil { 134 | return nil, err 135 | } 136 | results = append(results, describeGatewaysOutput.TransitGateways...) 137 | } 138 | 139 | return results, nil 140 | } 141 | 142 | func getQuotaValueWithContext(client awsclient.Client, serviceCode string, quotaCode string, ctx context.Context) (float64, error) { 143 | sqOutput, err := client.GetServiceQuotaWithContext(ctx, createGetServiceQuotaInput(serviceCode, quotaCode)) 144 | 145 | if err != nil { 146 | return 0, err 147 | } 148 | 149 | if sqOutput.Quota == nil || sqOutput.Quota.Value == nil { 150 | return 0, fmt.Errorf("quota value not found for servicecode %s and quotacode %s", serviceCode, quotaCode) 151 | } 152 | 153 | return *sqOutput.Quota.Value, nil 154 | } 155 | -------------------------------------------------------------------------------- /pkg/ec2_test.go: -------------------------------------------------------------------------------- 1 | package pkg 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/app-sre/aws-resource-exporter/pkg/awsclient/mock" 8 | "github.com/aws/aws-sdk-go/aws" 9 | "github.com/aws/aws-sdk-go/service/ec2" 10 | "github.com/aws/aws-sdk-go/service/servicequotas" 11 | "github.com/golang/mock/gomock" 12 | "github.com/stretchr/testify/assert" 13 | ) 14 | 15 | func TestGetAllTransitGatewaysWithContext(t *testing.T) { 16 | ctx := context.TODO() 17 | ctrl := gomock.NewController(t) 18 | defer ctrl.Finish() 19 | 20 | mockClient := mock.NewMockClient(ctrl) 21 | 22 | mockClient.EXPECT().DescribeTransitGatewaysWithContext(ctx, createDescribeTransitGatewayInput()). 23 | Return(&ec2.DescribeTransitGatewaysOutput{ 24 | TransitGateways: []*ec2.TransitGateway{&ec2.TransitGateway{}}, 25 | }, nil) 26 | 27 | gateways, err := getAllTransitGatewaysWithContext(mockClient, ctx) 28 | assert.Nil(t, err) 29 | assert.Len(t, gateways, 1) 30 | } 31 | 32 | func TestGetQuotaValueWithContext(t *testing.T) { 33 | ctx := context.TODO() 34 | ctrl := gomock.NewController(t) 35 | defer ctrl.Finish() 36 | 37 | mockClient := mock.NewMockClient(ctrl) 38 | 39 | mockClient.EXPECT().GetServiceQuotaWithContext(ctx, 40 | createGetServiceQuotaInput(ec2ServiceCode, transitGatewayPerAccountQuotaCode)).Return( 41 | &servicequotas.GetServiceQuotaOutput{Quota: &servicequotas.ServiceQuota{Value: aws.Float64(123.0)}}, nil, 42 | ) 43 | 44 | quotaValue, err := getQuotaValueWithContext(mockClient, ec2ServiceCode, transitGatewayPerAccountQuotaCode, ctx) 45 | assert.Nil(t, err) 46 | assert.Equal(t, quotaValue, 123.0) 47 | } 48 | 49 | func TestGetQuotaValueWithContextError(t *testing.T) { 50 | ctx := context.TODO() 51 | ctrl := gomock.NewController(t) 52 | defer ctrl.Finish() 53 | 54 | mockClient := mock.NewMockClient(ctrl) 55 | 56 | mockClient.EXPECT().GetServiceQuotaWithContext(ctx, 57 | createGetServiceQuotaInput(ec2ServiceCode, transitGatewayPerAccountQuotaCode)).Return( 58 | &servicequotas.GetServiceQuotaOutput{Quota: &servicequotas.ServiceQuota{Value: nil}}, nil, 59 | ) 60 | 61 | quotaValue, err := getQuotaValueWithContext(mockClient, ec2ServiceCode, transitGatewayPerAccountQuotaCode, ctx) 62 | assert.NotNil(t, err) 63 | assert.Equal(t, quotaValue, 0.0) 64 | } 65 | -------------------------------------------------------------------------------- /pkg/elasticache.go: -------------------------------------------------------------------------------- 1 | package pkg 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/app-sre/aws-resource-exporter/pkg/awsclient" 8 | 9 | "github.com/aws/aws-sdk-go/aws" 10 | "github.com/aws/aws-sdk-go/aws/session" 11 | "github.com/aws/aws-sdk-go/service/elasticache" 12 | "github.com/go-kit/log" 13 | "github.com/go-kit/log/level" 14 | "github.com/prometheus/client_golang/prometheus" 15 | ) 16 | 17 | var RedisVersion *prometheus.Desc = prometheus.NewDesc( 18 | prometheus.BuildFQName(namespace, "", "elasticache_redisversion"), 19 | "The ElastiCache engine type and version.", 20 | []string{"aws_region", "replication_group_id", "engine", "engine_version", "aws_account_id"}, 21 | nil, 22 | ) 23 | 24 | type ElastiCacheExporter struct { 25 | sessions []*session.Session 26 | svcs []awsclient.Client 27 | cache MetricsCache 28 | awsAccountId string 29 | 30 | logger log.Logger 31 | timeout time.Duration 32 | interval time.Duration 33 | } 34 | 35 | // NewElastiCacheExporter creates a new ElastiCacheExporter instance 36 | func NewElastiCacheExporter(sessions []*session.Session, logger log.Logger, config ElastiCacheConfig, awsAccountId string) *ElastiCacheExporter { 37 | level.Info(logger).Log("msg", "Initializing ElastiCache exporter") 38 | 39 | var elasticaches []awsclient.Client 40 | for _, session := range sessions { 41 | elasticaches = append(elasticaches, awsclient.NewClientFromSession(session)) 42 | } 43 | 44 | return &ElastiCacheExporter{ 45 | sessions: sessions, 46 | svcs: elasticaches, 47 | cache: *NewMetricsCache(*config.CacheTTL), 48 | logger: logger, 49 | timeout: *config.Timeout, 50 | interval: *config.Interval, 51 | awsAccountId: awsAccountId, 52 | } 53 | } 54 | 55 | func (e *ElastiCacheExporter) getRegion(sessionIndex int) string { 56 | return *e.sessions[sessionIndex].Config.Region 57 | } 58 | 59 | // Adds ElastiCache info to metrics cache 60 | func (e *ElastiCacheExporter) addMetricFromElastiCacheInfo(sessionIndex int, clusters []*elasticache.CacheCluster) { 61 | region := e.getRegion(sessionIndex) 62 | 63 | for _, cluster := range clusters { 64 | replicationGroupId := aws.StringValue(cluster.ReplicationGroupId) 65 | engine := aws.StringValue(cluster.Engine) 66 | engineVersion := aws.StringValue(cluster.EngineVersion) 67 | 68 | e.cache.AddMetric(prometheus.MustNewConstMetric(RedisVersion, prometheus.GaugeValue, 1, region, replicationGroupId, engine, engineVersion, e.awsAccountId)) 69 | } 70 | } 71 | 72 | func (e *ElastiCacheExporter) Describe(ch chan<- *prometheus.Desc) { 73 | ch <- RedisVersion 74 | } 75 | 76 | func (e *ElastiCacheExporter) Collect(ch chan<- prometheus.Metric) { 77 | for _, m := range e.cache.GetAllMetrics() { 78 | ch <- m 79 | } 80 | } 81 | 82 | func (e *ElastiCacheExporter) CollectLoop() { 83 | for { 84 | ctx, cancel := context.WithTimeout(context.Background(), e.timeout) 85 | for i, client := range e.svcs { 86 | clusters, err := client.DescribeCacheClustersAll(ctx) 87 | if err != nil { 88 | level.Error(e.logger).Log("msg", "Call to DescribeCacheClustersAll failed", "region", *e.sessions[i].Config.Region, "err", err) 89 | continue 90 | } 91 | e.addMetricFromElastiCacheInfo(i, clusters) 92 | } 93 | level.Info(e.logger).Log("msg", "ElastiCache metrics updated") 94 | 95 | cancel() 96 | time.Sleep(e.interval) 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /pkg/elasticache_test.go: -------------------------------------------------------------------------------- 1 | package pkg 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/aws/aws-sdk-go/aws" 8 | "github.com/aws/aws-sdk-go/aws/session" 9 | "github.com/aws/aws-sdk-go/service/elasticache" 10 | "github.com/go-kit/log" 11 | "github.com/stretchr/testify/assert" 12 | ) 13 | 14 | func createTestCacheClusters() []*elasticache.CacheCluster { 15 | return []*elasticache.CacheCluster{ 16 | { 17 | CacheClusterId: aws.String("test-cluster"), 18 | Engine: aws.String("redis"), 19 | EngineVersion: aws.String("123"), 20 | }, 21 | } 22 | } 23 | 24 | func TestAddMetricFromElastiCacheInfo(t *testing.T) { 25 | x := ElastiCacheExporter{ 26 | sessions: []*session.Session{session.New(&aws.Config{Region: aws.String("foo")})}, 27 | cache: *NewMetricsCache(10 * time.Second), 28 | logger: log.NewNopLogger(), 29 | } 30 | 31 | var clusters = []*elasticache.CacheCluster{} 32 | 33 | x.addMetricFromElastiCacheInfo(0, clusters) 34 | assert.Len(t, x.cache.GetAllMetrics(), 0) 35 | 36 | x.addMetricFromElastiCacheInfo(0, createTestCacheClusters()) 37 | assert.Len(t, x.cache.GetAllMetrics(), 1) 38 | } 39 | -------------------------------------------------------------------------------- /pkg/iam.go: -------------------------------------------------------------------------------- 1 | package pkg 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/app-sre/aws-resource-exporter/pkg/awsclient" 8 | 9 | "github.com/aws/aws-sdk-go/aws" 10 | "github.com/aws/aws-sdk-go/aws/request" 11 | "github.com/aws/aws-sdk-go/aws/session" 12 | "github.com/aws/aws-sdk-go/service/iam" 13 | "github.com/go-kit/log" 14 | "github.com/go-kit/log/level" 15 | "github.com/prometheus/client_golang/prometheus" 16 | ) 17 | 18 | type IAMClient interface { 19 | ListRolesPagesWithContext(ctx aws.Context, input *iam.ListRolesInput, fn func(*iam.ListRolesOutput, bool) bool, opts ...request.Option) error 20 | } 21 | 22 | var ( 23 | IamRolesUsed = prometheus.NewDesc( 24 | prometheus.BuildFQName(namespace, "iam", "roles_used"), 25 | "Number of IAM roles used in the account.", 26 | []string{"aws_account_id"}, nil, 27 | ) 28 | IamRolesQuota = prometheus.NewDesc( 29 | prometheus.BuildFQName(namespace, "iam", "roles_quota"), 30 | "IAM role quota for the account.", 31 | []string{"aws_account_id"}, nil, 32 | ) 33 | ) 34 | 35 | type IAMExporter struct { 36 | session *session.Session 37 | iamClient IAMClient 38 | sqClient awsclient.Client 39 | logger log.Logger 40 | timeout time.Duration 41 | interval time.Duration 42 | awsAccountId string 43 | cache MetricsCache 44 | } 45 | 46 | // NewIAMExporter creates a new IAMExporter 47 | func NewIAMExporter(sess *session.Session, logger log.Logger, config IAMConfig, awsAccountId string) *IAMExporter { 48 | level.Info(logger).Log("msg", "Initializing IAM exporter") 49 | 50 | return &IAMExporter{ 51 | session: sess, 52 | iamClient: iam.New(sess), 53 | sqClient: awsclient.NewClientFromSession(sess), 54 | logger: logger, 55 | timeout: *config.Timeout, 56 | interval: *config.Interval, 57 | awsAccountId: awsAccountId, 58 | cache: *NewMetricsCache(*config.CacheTTL), 59 | } 60 | } 61 | 62 | func (e *IAMExporter) Describe(ch chan<- *prometheus.Desc) { 63 | ch <- IamRolesUsed 64 | ch <- IamRolesQuota 65 | } 66 | 67 | func (e *IAMExporter) Collect(ch chan<- prometheus.Metric) { 68 | for _, m := range e.cache.GetAllMetrics() { 69 | ch <- m 70 | } 71 | } 72 | 73 | func (e *IAMExporter) CollectLoop() { 74 | for { 75 | ctx, cancel := context.WithTimeout(context.Background(), e.timeout) 76 | 77 | roleCount, err := getIAMRoleCount(ctx, e.iamClient) 78 | if err != nil { 79 | level.Error(e.logger).Log("msg", "Failed to get IAM role count", "err", err) 80 | cancel() 81 | time.Sleep(e.interval) 82 | continue 83 | } 84 | 85 | quota, err := getQuotaValueWithContext(e.sqClient, "iam", "L-FE177D64", ctx) 86 | if err != nil { 87 | level.Error(e.logger).Log("msg", "Failed to get IAM role quota", "err", err) 88 | cancel() 89 | time.Sleep(e.interval) 90 | continue 91 | } 92 | 93 | e.cache.AddMetric(prometheus.MustNewConstMetric(IamRolesUsed, prometheus.GaugeValue, float64(roleCount), e.awsAccountId)) 94 | e.cache.AddMetric(prometheus.MustNewConstMetric(IamRolesQuota, prometheus.GaugeValue, quota, e.awsAccountId)) 95 | 96 | level.Info(e.logger).Log("msg", "IAM metrics updated", "used", roleCount, "quota", quota) 97 | cancel() 98 | time.Sleep(e.interval) 99 | } 100 | } 101 | 102 | // getIAMRoleCount returns number of IAM roles using IAMClient 103 | func getIAMRoleCount(ctx context.Context, client IAMClient) (int, error) { 104 | var count int 105 | err := client.ListRolesPagesWithContext(ctx, &iam.ListRolesInput{ 106 | MaxItems: aws.Int64(1000), 107 | }, func(output *iam.ListRolesOutput, _ bool) bool { 108 | count += len(output.Roles) 109 | return true 110 | }) 111 | return count, err 112 | } 113 | -------------------------------------------------------------------------------- /pkg/iam_test.go: -------------------------------------------------------------------------------- 1 | package pkg 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | "time" 7 | 8 | "github.com/app-sre/aws-resource-exporter/pkg/awsclient/mock" 9 | "github.com/aws/aws-sdk-go/aws" 10 | "github.com/aws/aws-sdk-go/service/iam" 11 | "github.com/golang/mock/gomock" 12 | "github.com/stretchr/testify/assert" 13 | ) 14 | 15 | func TestGetIAMRoleCount_Success(t *testing.T) { 16 | ctrl := gomock.NewController(t) 17 | defer ctrl.Finish() 18 | 19 | mockIAM := mock.NewMockIAMClient(ctrl) 20 | mockIAM.EXPECT(). 21 | ListRolesPagesWithContext(gomock.Any(), gomock.Any(), gomock.Any()). 22 | DoAndReturn(func(ctx aws.Context, input *iam.ListRolesInput, fn func(*iam.ListRolesOutput, bool) bool, opts ...interface{}) error { 23 | fn(&iam.ListRolesOutput{Roles: []*iam.Role{{}, {}, {}}}, true) 24 | return nil 25 | }) 26 | 27 | ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) 28 | defer cancel() 29 | 30 | count, err := getIAMRoleCount(ctx, mockIAM) 31 | assert.NoError(t, err) 32 | assert.Equal(t, 3, count) 33 | } 34 | 35 | func TestGetIAMRoleCount_Error(t *testing.T) { 36 | ctrl := gomock.NewController(t) 37 | defer ctrl.Finish() 38 | 39 | mockIAM := mock.NewMockIAMClient(ctrl) 40 | mockIAM.EXPECT(). 41 | ListRolesPagesWithContext(gomock.Any(), gomock.Any(), gomock.Any()). 42 | Return(assert.AnError) 43 | 44 | ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) 45 | defer cancel() 46 | 47 | count, err := getIAMRoleCount(ctx, mockIAM) 48 | assert.Error(t, err) 49 | assert.Equal(t, 0, count) 50 | assert.Contains(t, err.Error(), "assert.AnError") 51 | } 52 | -------------------------------------------------------------------------------- /pkg/msk.go: -------------------------------------------------------------------------------- 1 | package pkg 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/app-sre/aws-resource-exporter/pkg/awsclient" 8 | 9 | "github.com/aws/aws-sdk-go/aws" 10 | "github.com/aws/aws-sdk-go/aws/session" 11 | "github.com/aws/aws-sdk-go/service/kafka" 12 | "github.com/go-kit/log" 13 | "github.com/go-kit/log/level" 14 | "github.com/prometheus/client_golang/prometheus" 15 | ) 16 | 17 | var MSKInfos *prometheus.Desc = prometheus.NewDesc( 18 | prometheus.BuildFQName(namespace, "", "msk_eol_info"), 19 | "The MSK eol date and status for the version.", 20 | []string{"aws_region", "cluster_name", "msk_version", "eol_date", "eol_status"}, 21 | nil, 22 | ) 23 | 24 | type MSKExporter struct { 25 | sessions []*session.Session 26 | svcs []awsclient.Client 27 | mskInfos []MSKInfo 28 | thresholds []Threshold 29 | cache MetricsCache 30 | awsAccountId string 31 | 32 | logger log.Logger 33 | timeout time.Duration 34 | interval time.Duration 35 | } 36 | 37 | // NewMSKExporter creates a new MSKExporter instance 38 | func NewMSKExporter(sessions []*session.Session, logger log.Logger, config MSKConfig, awsAccountId string) *MSKExporter { 39 | level.Info(logger).Log("msg", "Initializing MSK exporter") 40 | 41 | var msks []awsclient.Client 42 | for _, session := range sessions { 43 | msks = append(msks, awsclient.NewClientFromSession(session)) 44 | } 45 | 46 | return &MSKExporter{ 47 | sessions: sessions, 48 | svcs: msks, 49 | cache: *NewMetricsCache(*config.CacheTTL), 50 | logger: logger, 51 | timeout: *config.Timeout, 52 | interval: *config.Interval, 53 | mskInfos: config.MSKInfos, 54 | thresholds: config.Thresholds, 55 | } 56 | } 57 | 58 | func (e *MSKExporter) getRegion(sessionIndex int) string { 59 | return *e.sessions[sessionIndex].Config.Region 60 | } 61 | 62 | func (e *MSKExporter) addMetricFromMSKInfo(sessionIndex int, clusters []*kafka.ClusterInfo, mskInfos []MSKInfo) { 63 | region := e.getRegion(sessionIndex) 64 | 65 | eolMap := make(map[string]string) 66 | for _, eolinfo := range mskInfos { 67 | eolMap[eolinfo.Version] = eolinfo.EOL 68 | } 69 | 70 | for _, cluster := range clusters { 71 | clusterName := aws.StringValue(cluster.ClusterName) 72 | mskVersion := aws.StringValue(cluster.CurrentBrokerSoftwareInfo.KafkaVersion) 73 | 74 | if eolDate, found := eolMap[mskVersion]; found { 75 | eolStatus, err := GetEOLStatus(eolDate, e.thresholds) 76 | if err != nil { 77 | level.Error(e.logger).Log("msg", "Error determining MSK EOL status", "version", mskVersion, "error", err) 78 | } 79 | e.cache.AddMetric(prometheus.MustNewConstMetric(MSKInfos, prometheus.GaugeValue, 1, region, clusterName, mskVersion, eolDate, eolStatus)) 80 | } else { 81 | level.Info(e.logger).Log("msg", "EOL information not found for MSK version %s, setting status to 'unknown'", mskVersion) 82 | e.cache.AddMetric(prometheus.MustNewConstMetric(MSKInfos, prometheus.GaugeValue, 1, region, clusterName, mskVersion, "no-eol-date", "unknown")) 83 | } 84 | } 85 | } 86 | 87 | func (e *MSKExporter) Describe(ch chan<- *prometheus.Desc) { 88 | ch <- MSKInfos 89 | } 90 | 91 | func (e *MSKExporter) Collect(ch chan<- prometheus.Metric) { 92 | for _, m := range e.cache.GetAllMetrics() { 93 | ch <- m 94 | } 95 | } 96 | 97 | func (e *MSKExporter) CollectLoop() { 98 | for { 99 | ctx, cancel := context.WithTimeout(context.Background(), e.timeout) 100 | for i, svc := range e.svcs { 101 | clusters, err := svc.ListClustersAll(ctx) 102 | if err != nil { 103 | level.Error(e.logger).Log("msg", "Call to ListClustersAll failed", "region", *e.sessions[i].Config.Region, "err", err) 104 | continue 105 | } 106 | e.addMetricFromMSKInfo(i, clusters, e.mskInfos) 107 | } 108 | level.Info(e.logger).Log("msg", "MSK metrics updated") 109 | 110 | cancel() 111 | time.Sleep(e.interval) 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /pkg/msk_test.go: -------------------------------------------------------------------------------- 1 | package pkg 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | "time" 7 | 8 | "github.com/aws/aws-sdk-go/aws" 9 | "github.com/aws/aws-sdk-go/aws/session" 10 | "github.com/aws/aws-sdk-go/service/kafka" 11 | "github.com/go-kit/log" 12 | "github.com/prometheus/client_golang/prometheus" 13 | dto "github.com/prometheus/client_model/go" 14 | ) 15 | 16 | func createTestClusters() []*kafka.ClusterInfo { 17 | return []*kafka.ClusterInfo{ 18 | { 19 | ClusterName: aws.String("test-cluster-1"), 20 | CurrentBrokerSoftwareInfo: &kafka.BrokerSoftwareInfo{ 21 | KafkaVersion: aws.String("1000"), 22 | }, 23 | }, 24 | } 25 | } 26 | 27 | func TestAddAllMSKMetricsWithEOLMatch(t *testing.T) { 28 | thresholds := []Threshold{ 29 | {Name: "red", Days: 90}, 30 | {Name: "yellow", Days: 180}, 31 | {Name: "green", Days: 365}, 32 | } 33 | 34 | e := MSKExporter{ 35 | sessions: []*session.Session{session.New(&aws.Config{Region: aws.String("foo")})}, 36 | cache: *NewMetricsCache(10 * time.Second), 37 | logger: log.NewNopLogger(), 38 | thresholds: thresholds, 39 | } 40 | 41 | mskInfos := []MSKInfo{ 42 | {Version: "1000", EOL: "2000-12-01"}, 43 | } 44 | 45 | e.addMetricFromMSKInfo(0, createTestClusters(), mskInfos) 46 | 47 | labels, err := getMSKMetricLabels(&e, MSKInfos, "eol_date", "eol_status") 48 | if err != nil { 49 | t.Errorf("Error retrieving EOL labels: %v", err) 50 | } 51 | 52 | expectedEOLDate := "2000-12-01" 53 | expectedEOLStatus := "red" 54 | 55 | if eolDate, ok := labels["eol_date"]; !ok || eolDate != expectedEOLDate { 56 | t.Errorf("EOLDate metric has an unexpected value. Expected: %s, Actual: %s", expectedEOLDate, eolDate) 57 | } 58 | 59 | if eolStatus, ok := labels["eol_status"]; !ok || eolStatus != expectedEOLStatus { 60 | t.Errorf("EOLStatus metric has an unexpected value. Expected: %s, Actual: %s", expectedEOLStatus, eolStatus) 61 | } 62 | } 63 | 64 | func TestAddAllMSKMetricsWithoutEOLMatch(t *testing.T) { 65 | thresholds := []Threshold{ 66 | {Name: "red", Days: 90}, 67 | {Name: "yellow", Days: 180}, 68 | {Name: "green", Days: 365}, 69 | } 70 | 71 | e := MSKExporter{ 72 | sessions: []*session.Session{session.New(&aws.Config{Region: aws.String("foo")})}, 73 | cache: *NewMetricsCache(10 * time.Second), 74 | logger: log.NewNopLogger(), 75 | thresholds: thresholds, 76 | } 77 | 78 | mskInfos := []MSKInfo{ 79 | {Version: "2000", EOL: "2000-12-01"}, 80 | } 81 | 82 | e.addMetricFromMSKInfo(0, createTestClusters(), mskInfos) 83 | 84 | labels, err := getMSKMetricLabels(&e, MSKInfos, "eol_date", "eol_status") 85 | if err != nil { 86 | t.Errorf("Error retrieving EOL labels: %v", err) 87 | } 88 | 89 | expectedEOLDate := "no-eol-date" 90 | expectedEOLStatus := "unknown" 91 | 92 | if eolDate, ok := labels["eol_date"]; !ok || eolDate != expectedEOLDate { 93 | t.Errorf("EOLDate metric has an unexpected value. Expected: %s, Actual: %s", expectedEOLDate, eolDate) 94 | } 95 | 96 | if eolStatus, ok := labels["eol_status"]; !ok || eolStatus != expectedEOLStatus { 97 | t.Errorf("EOLStatus metric has an unexpected value. Expected: %s, Actual: %s", expectedEOLStatus, eolStatus) 98 | } 99 | } 100 | 101 | func getMSKMetricLabels(x *MSKExporter, metricDesc *prometheus.Desc, labelNames ...string) (map[string]string, error) { 102 | metricDescription := metricDesc.String() 103 | metrics := x.cache.GetAllMetrics() 104 | 105 | for _, metric := range metrics { 106 | if metric.Desc().String() == metricDescription { 107 | dtoMetric := &dto.Metric{} 108 | if err := metric.Write(dtoMetric); err != nil { 109 | return nil, err 110 | } 111 | 112 | labelValues := make(map[string]string) 113 | for _, label := range dtoMetric.GetLabel() { 114 | for _, labelName := range labelNames { 115 | if label.GetName() == labelName { 116 | labelValues[labelName] = label.GetValue() 117 | } 118 | } 119 | } 120 | 121 | if len(labelValues) != len(labelNames) { 122 | return nil, fmt.Errorf("not all requested labels found in metric") 123 | } 124 | 125 | return labelValues, nil 126 | } 127 | } 128 | return nil, fmt.Errorf("metric not found") 129 | } 130 | -------------------------------------------------------------------------------- /pkg/proxy.go: -------------------------------------------------------------------------------- 1 | package pkg 2 | 3 | import ( 4 | "errors" 5 | "sync" 6 | "time" 7 | ) 8 | 9 | type MetricProxyItem struct { 10 | value interface{} 11 | ttl int 12 | creationTime time.Time 13 | } 14 | 15 | type MetricProxy struct { 16 | metrics map[string]*MetricProxyItem 17 | mutex sync.RWMutex 18 | } 19 | 20 | func NewMetricProxy() *MetricProxy { 21 | mp := &MetricProxy{} 22 | mp.metrics = make(map[string]*MetricProxyItem) 23 | return mp 24 | } 25 | 26 | func (mp *MetricProxy) GetMetricById(id string) (*MetricProxyItem, error) { 27 | mp.mutex.RLock() 28 | defer mp.mutex.RUnlock() 29 | if m, ok := mp.metrics[id]; ok { 30 | if time.Since(m.creationTime).Seconds() > float64(m.ttl) { 31 | return nil, errors.New("metric ttl has expired") 32 | } 33 | return m, nil 34 | } else { 35 | return nil, errors.New("metric not found") 36 | } 37 | } 38 | 39 | func (mp *MetricProxy) StoreMetricById(id string, value interface{}, ttl int) { 40 | mp.mutex.Lock() 41 | mp.metrics[id] = &MetricProxyItem{ 42 | value: value, 43 | creationTime: time.Now(), 44 | ttl: ttl, 45 | } 46 | mp.mutex.Unlock() 47 | } 48 | -------------------------------------------------------------------------------- /pkg/proxy_test.go: -------------------------------------------------------------------------------- 1 | package pkg 2 | 3 | import ( 4 | "math" 5 | "reflect" 6 | "testing" 7 | "time" 8 | ) 9 | 10 | func TestGetMetricById(t *testing.T) { 11 | type args struct { 12 | mp *MetricProxy 13 | key string 14 | } 15 | now := time.Now() 16 | tests := []struct { 17 | name string 18 | args args 19 | want *MetricProxyItem 20 | }{ 21 | { 22 | name: "Attempt retrieving value by providing valid id (key exists)", 23 | args: args{ 24 | mp: &MetricProxy{ 25 | metrics: map[string]*MetricProxyItem{ 26 | "valid": &MetricProxyItem{ 27 | value: "value", 28 | ttl: math.MaxInt32, 29 | creationTime: now, 30 | }, 31 | }, 32 | }, 33 | key: "valid", 34 | }, 35 | want: &MetricProxyItem{ 36 | value: "value", 37 | ttl: math.MaxInt32, 38 | creationTime: now, 39 | }, 40 | }, 41 | { 42 | name: "Attempt retrieving value by providing valid id but ttl expired", 43 | args: args{ 44 | mp: &MetricProxy{ 45 | metrics: map[string]*MetricProxyItem{ 46 | "expired": &MetricProxyItem{ 47 | value: 100, 48 | ttl: 1, 49 | creationTime: now, 50 | }, 51 | }, 52 | }, 53 | key: "expired", 54 | }, 55 | want: nil, 56 | }, 57 | } 58 | 59 | time.Sleep(2 * time.Second) //ensure ttl for second test expires 60 | 61 | for _, tt := range tests { 62 | t.Run(tt.name, func(t *testing.T) { 63 | if got, _ := tt.args.mp.GetMetricById(tt.args.key); !reflect.DeepEqual(got, tt.want) { 64 | t.Errorf("GetMetricById() = %v, want %v", got, tt.want) 65 | } 66 | }) 67 | } 68 | } 69 | 70 | func TestStoreMetricById(t *testing.T) { 71 | type args struct { 72 | mp *MetricProxy 73 | key string 74 | value interface{} 75 | ttl int 76 | } 77 | tests := []struct { 78 | name string 79 | args args 80 | want *MetricProxyItem 81 | }{ 82 | { 83 | name: "Attempt storing new metric by id", 84 | args: args{ 85 | mp: NewMetricProxy(), 86 | key: "new", 87 | value: 1, 88 | ttl: math.MaxInt32, 89 | }, 90 | want: &MetricProxyItem{ 91 | value: 1, 92 | }, 93 | }, 94 | } 95 | 96 | for _, tt := range tests { 97 | t.Run(tt.name, func(t *testing.T) { 98 | tt.args.mp.StoreMetricById(tt.args.key, tt.args.value, tt.args.ttl) 99 | if got, err := tt.args.mp.GetMetricById(tt.args.key); got.value != tt.want.value || err != nil { 100 | t.Errorf("StoreMetricById() = %v, want %v", got.value, tt.want.value) 101 | } 102 | }) 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /pkg/rds.go: -------------------------------------------------------------------------------- 1 | package pkg 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "strings" 7 | "sync" 8 | "time" 9 | 10 | "github.com/app-sre/aws-resource-exporter/pkg/awsclient" 11 | "github.com/aws/aws-sdk-go/aws/session" 12 | "github.com/aws/aws-sdk-go/service/rds" 13 | "github.com/go-kit/log" 14 | "github.com/go-kit/log/level" 15 | "github.com/prometheus/client_golang/prometheus" 16 | ) 17 | 18 | // Default TTL value for RDS logs related metrics 19 | // To get the log metrics an api call for each instance is needed 20 | // Since this cause rate limit problems to the AWS api, these metrics 21 | // are cached for this amount of time before requesting them again 22 | var RDS_LOGS_METRICS_TTL = "LOGS_METRICS_TTL" 23 | var RDS_LOGS_METRICS_TTL_DEFAULT = 300 24 | 25 | // RDS log metrics are requested in parallel with a workerPool. 26 | // this variable sets the number of workers 27 | var RDS_LOGS_METRICS_WORKERS = "LOGS_METRICS_WORKERS" 28 | var RDS_LOGS_METRICS_WORKERS_DEFAULT = 10 29 | 30 | // Struct to store RDS Instances log files data 31 | // This struct is used to store the data in the MetricsProxy 32 | type RDSLogsMetrics struct { 33 | logs int 34 | totalLogSize int64 35 | } 36 | 37 | // MetricsProxy 38 | var metricsProxy = NewMetricProxy() 39 | 40 | // DBMaxConnections is a hardcoded map of instance types and DB Parameter Group names 41 | // This is a dump workaround created because by default the DB Parameter Group `max_connections` is a function 42 | // that is hard to parse and process in code and it contains a variable whose value is unknown to us (DBInstanceClassMemory) 43 | // AWS has no means to return the actual `max_connections` value. 44 | 45 | // Non Aurora: https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/CHAP_Limits.html#RDS_Limits.MaxConnections 46 | // DBInstanceClassMemory in bytes: Memory (in GiB) * 1024 * 1024 * 1024 47 | // Attention: DBInstanceClassMemory is the real memory available for the DB procoess and not all the instance memory! 48 | // * postgres: LEAST({DBInstanceClassMemory_in_Bytes / 9531392},5000) 49 | // * mysql: {DBInstanceClassMemory/12582880} - 50 and round down to the nearest hundreds. 50 | 51 | // For MYSQL Aurora see: https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/AuroraMySQL.Managing.Performance.html 52 | 53 | // Attention: use "default" for all Postgres versions (non-aurora)! 54 | var DBMaxConnections = map[string]map[string]int64{ 55 | // 56 | // Tx 57 | // 58 | "db.t3.micro": map[string]int64{ 59 | // Memory: 1 GiB 60 | "default": 112, 61 | "default.mysql5.7": 45, 62 | "default.mysql8.0": 45, 63 | }, 64 | "db.t3.small": map[string]int64{ 65 | // Memory: 2 GiB 66 | "default": 225, 67 | "default.mysql5.7": 130, 68 | "default.mysql8.0": 130, 69 | }, 70 | "db.t3.medium": map[string]int64{ 71 | // Memory: 4 GiB 72 | "default": 450, 73 | "default.mysql5.7": 300, 74 | "default.mysql8.0": 300, 75 | }, 76 | "db.t4g.micro": map[string]int64{ 77 | // Memory: 1 GiB 78 | "default": 112, 79 | "default.mysql5.7": 45, 80 | "default.mysql8.0": 45, 81 | }, 82 | "db.t4g.small": map[string]int64{ 83 | // Memory: 2 GiB 84 | "default": 225, 85 | "default.mysql5.7": 130, 86 | "default.mysql8.0": 130, 87 | }, 88 | "db.t4g.medium": map[string]int64{ 89 | // Memory: 4 GiB 90 | "default": 450, 91 | "default.mysql5.7": 300, 92 | "default.mysql8.0": 300, 93 | }, 94 | "db.t4g.large": map[string]int64{ 95 | // Memory: 8 GiB 96 | "default": 900, 97 | "default.mysql5.7": 600, 98 | "default.mysql8.0": 600, 99 | }, 100 | "db.t4g.xlarge": map[string]int64{ 101 | // Memory: 16 GiB 102 | "default": 1800, 103 | "default.mysql5.7": 1300, 104 | "default.mysql8.0": 1300, 105 | }, 106 | "db.t4g.2xlarge": map[string]int64{ 107 | // Memory: 32 GiB 108 | "default": 3600, 109 | "default.mysql5.7": 2600, 110 | "default.mysql8.0": 2600, 111 | }, 112 | 113 | // 114 | // M5 115 | // 116 | "db.m5.large": map[string]int64{ 117 | // Memory: 8 GiB 118 | "default": 900, 119 | "default.mysql5.7": 600, 120 | "default.mysql8.0": 600, 121 | }, 122 | "db.m5.xlarge": map[string]int64{ 123 | // Memory: 16 GiB 124 | "default": 1800, 125 | "default.mysql5.7": 1300, 126 | "default.mysql8.0": 1300, 127 | }, 128 | "db.m5.2xlarge": map[string]int64{ 129 | // Memory: 32 GiB 130 | "default": 3600, 131 | "default.mysql5.7": 2600, 132 | "default.mysql8.0": 2600, 133 | }, 134 | "db.m5.4xlarge": map[string]int64{ 135 | // Memory: 64 GiB 136 | "default": 5000, 137 | "default.mysql5.7": 5300, 138 | "default.mysql8.0": 5300, 139 | }, 140 | "db.m5.8xlarge": map[string]int64{ 141 | // Memory: 128 GiB 142 | "default": 5000, 143 | "default.mysql5.7": 10700, 144 | "default.mysql8.0": 10700, 145 | }, 146 | "db.m5.16xlarge": map[string]int64{ 147 | // Memory: 256 GiB 148 | "default": 5000, 149 | "default.aurora-mysql5.7": 6000, 150 | "default.aurora-mysql5.8": 6000, 151 | "default.aurora-mysql8.0": 6000, 152 | "default.mysql5.7": 21600, 153 | "default.mysql8.0": 21600, 154 | }, 155 | 156 | // 157 | // M6g 158 | // 159 | "db.m6g.large": map[string]int64{ 160 | // Memory: 8 GiB 161 | "default": 900, 162 | "default.mysql5.7": 600, 163 | "default.mysql8.0": 600, 164 | }, 165 | "db.m6g.xlarge": map[string]int64{ 166 | // Memory: 16 GiB 167 | "default": 1800, 168 | "default.mysql5.7": 1300, 169 | "default.mysql8.0": 1300, 170 | }, 171 | "db.m6g.2xlarge": map[string]int64{ 172 | // Memory: 32 GiB 173 | "default": 3600, 174 | "default.mysql5.7": 2600, 175 | "default.mysql8.0": 2600, 176 | }, 177 | "db.m6g.4xlarge": map[string]int64{ 178 | // Memory: 64 GiB 179 | "default": 5000, 180 | "default.mysql5.7": 5300, 181 | "default.mysql8.0": 5300, 182 | }, 183 | "db.m6g.8xlarge": map[string]int64{ 184 | // Memory: 128 GiB 185 | "default": 5000, 186 | "default.mysql5.7": 10700, 187 | "default.mysql8.0": 10700, 188 | }, 189 | "db.m6g.12xlarge": map[string]int64{ 190 | // Memory: 192 GiB 191 | "default": 5000, 192 | "default.mysql5.7": 16200, 193 | "default.mysql8.0": 16200, 194 | }, 195 | 196 | // 197 | // M6gd 198 | // 199 | "db.m6gd.xlarge": map[string]int64{ 200 | // Memory: 16 GiB 201 | "default": 1800, 202 | "default.mysql5.7": 1300, 203 | "default.mysql8.0": 1300, 204 | }, 205 | "db.m6gd.2xlarge": map[string]int64{ 206 | // Memory: 32 GiB 207 | "default": 3600, 208 | "default.mysql5.7": 2600, 209 | "default.mysql8.0": 2600, 210 | }, 211 | 212 | // 213 | // M6i 214 | // 215 | "db.m6i.2xlarge": map[string]int64{ 216 | // Memory: 32 GiB 217 | "default": 3600, 218 | "default.mysql5.7": 2600, 219 | "default.mysql8.0": 2600, 220 | }, 221 | 222 | // 223 | // M7g 224 | // 225 | "db.m7g.large": map[string]int64{ 226 | // Memory: 8 GiB 227 | "default": 900, 228 | "default.mysql5.7": 600, 229 | "default.mysql8.0": 600, 230 | }, 231 | "db.m7g.xlarge": map[string]int64{ 232 | // Memory: 16 GiB 233 | "default": 1800, 234 | "default.mysql5.7": 1300, 235 | "default.mysql8.0": 1300, 236 | }, 237 | "db.m7g.2xlarge": map[string]int64{ 238 | // Memory: 32 GiB 239 | "default": 3600, 240 | "default.mysql5.7": 2600, 241 | "default.mysql8.0": 2600, 242 | }, 243 | "db.m7g.4xlarge": map[string]int64{ 244 | // Memory: 64 GiB 245 | "default": 5000, 246 | "default.mysql5.7": 5300, 247 | "default.mysql8.0": 5300, 248 | }, 249 | "db.m7g.8xlarge": map[string]int64{ 250 | // Memory: 128 GiB 251 | "default": 5000, 252 | "default.mysql5.7": 10700, 253 | "default.mysql8.0": 10700, 254 | }, 255 | "db.m7g.12xlarge": map[string]int64{ 256 | // Memory: 192 GiB 257 | "default": 5000, 258 | "default.mysql5.7": 16200, 259 | "default.mysql8.0": 16200, 260 | }, 261 | 262 | // 263 | // R5 264 | // 265 | "db.r5.large": map[string]int64{ 266 | // Memory: 16 GiB 267 | "default": 1800, 268 | "default.mysql5.7": 1300, 269 | "default.mysql8.0": 1300, 270 | }, 271 | "db.r5.xlarge": map[string]int64{ 272 | // Memory: 32 GiB 273 | "default": 3600, 274 | "default.mysql5.7": 2600, 275 | "default.mysql8.0": 2600, 276 | }, 277 | "db.r5.2xlarge": map[string]int64{ 278 | // Memory: 64 GiB 279 | "default": 5000, 280 | "default.mysql5.7": 5300, 281 | "default.mysql8.0": 5300, 282 | "default.aurora-mysql5.7": 3000, 283 | "default.aurora-mysql5.8": 3000, 284 | "default.aurora-mysql8.0": 3000, 285 | }, 286 | "db.r5.4xlarge": map[string]int64{ 287 | // Memory: 128 GiB 288 | "default": 5000, 289 | "default.mysql5.7": 10700, 290 | "default.mysql8.0": 10700, 291 | }, 292 | "db.r5.8xlarge": map[string]int64{ 293 | // Memory: 256 GiB 294 | "default": 5000, 295 | "default.mysql5.7": 21600, 296 | "default.mysql8.0": 21600, 297 | }, 298 | "db.r5.12xlarge": map[string]int64{ 299 | // Memory: 384 GiB 300 | "default": 5000, 301 | "default.mysql5.7": 32768, 302 | "default.mysql8.0": 32768, 303 | }, 304 | "db.r5.16xlarge": map[string]int64{ 305 | // Memory: 512 GiB 306 | "default": 5000, 307 | "default.mysql5.7": 43400, 308 | "default.mysql8.0": 43400, 309 | }, 310 | "db.r5.24xlarge": map[string]int64{ 311 | // Memory: 768 GiB 312 | "default": 5000, 313 | "default.mysql5.7": 65400, 314 | "default.mysql8.0": 65400, 315 | }, 316 | 317 | // 318 | // R6 319 | // 320 | "db.r6g.12xlarge": map[string]int64{ 321 | // Memory: 384 GiB 322 | "default": 5000, 323 | "default.mysql5.7": 32768, 324 | "default.mysql8.0": 32768, 325 | }, 326 | "db.r6i.large": map[string]int64{ 327 | // Memory: 16 GiB 328 | "default": 1800, 329 | "default.mysql5.7": 1300, 330 | "default.mysql8.0": 1300, 331 | }, 332 | "db.r6i.16xlarge": map[string]int64{ 333 | // Memory: 512 GiB 334 | "default": 5000, 335 | "default.mysql5.7": 43400, 336 | "default.mysql8.0": 43400, 337 | }, 338 | "db.r6g.8xlarge": map[string]int64{ 339 | // Memory: 256 GiB 340 | "default": 5000, 341 | "default.mysql5.7": 21600, 342 | "default.mysql8.0": 21600, 343 | }, 344 | } 345 | 346 | var AllocatedStorage *prometheus.Desc = prometheus.NewDesc( 347 | prometheus.BuildFQName(namespace, "", "rds_allocatedstorage"), 348 | "The amount of allocated storage in bytes.", 349 | []string{"aws_region", "dbinstance_identifier"}, 350 | nil, 351 | ) 352 | var DBInstanceClass *prometheus.Desc = prometheus.NewDesc( 353 | prometheus.BuildFQName(namespace, "", "rds_dbinstanceclass"), 354 | "The DB instance class (type).", 355 | []string{"aws_region", "dbinstance_identifier", "instance_class"}, 356 | nil, 357 | ) 358 | var DBInstanceStatus *prometheus.Desc = prometheus.NewDesc( 359 | prometheus.BuildFQName(namespace, "", "rds_dbinstancestatus"), 360 | "The instance status.", 361 | []string{"aws_region", "dbinstance_identifier", "instance_status"}, 362 | nil, 363 | ) 364 | var EngineVersion *prometheus.Desc = prometheus.NewDesc( 365 | prometheus.BuildFQName(namespace, "", "rds_engineversion"), 366 | "The DB engine type and version.", 367 | []string{"aws_region", "dbinstance_identifier", "engine", "engine_version", "aws_account_id"}, 368 | nil, 369 | ) 370 | var LatestRestorableTime *prometheus.Desc = prometheus.NewDesc( 371 | prometheus.BuildFQName(namespace, "", "rds_latestrestorabletime"), 372 | "Latest restorable time (UTC date timestamp).", 373 | []string{"aws_region", "dbinstance_identifier"}, 374 | nil, 375 | ) 376 | var MaxConnections *prometheus.Desc = prometheus.NewDesc( 377 | prometheus.BuildFQName(namespace, "", "rds_maxconnections"), 378 | "The DB's max_connections value", 379 | []string{"aws_region", "dbinstance_identifier"}, 380 | nil, 381 | ) 382 | var MaxConnectionsMappingError *prometheus.Desc = prometheus.NewDesc( 383 | prometheus.BuildFQName(namespace, "", "rds_maxconnections_error"), 384 | "Indicates no mapping found for instance/parameter group.", 385 | []string{"aws_region", "dbinstance_identifier", "instance_class"}, 386 | nil, 387 | ) 388 | var PendingMaintenanceActions *prometheus.Desc = prometheus.NewDesc( 389 | prometheus.BuildFQName(namespace, "", "rds_pendingmaintenanceactions"), 390 | "Pending maintenance actions for a RDS instance. 0 indicates no available maintenance and a separate metric with a value of 1 will be published for every separate action.", 391 | []string{"aws_region", "dbinstance_identifier", "action", "auto_apply_after", "current_apply_date", "description"}, 392 | nil, 393 | ) 394 | var PubliclyAccessible *prometheus.Desc = prometheus.NewDesc( 395 | prometheus.BuildFQName(namespace, "", "rds_publiclyaccessible"), 396 | "Indicates if the DB is publicly accessible", 397 | []string{"aws_region", "dbinstance_identifier"}, 398 | nil, 399 | ) 400 | var StorageEncrypted *prometheus.Desc = prometheus.NewDesc( 401 | prometheus.BuildFQName(namespace, "", "rds_storageencrypted"), 402 | "Indicates if the DB storage is encrypted", 403 | []string{"aws_region", "dbinstance_identifier"}, 404 | nil, 405 | ) 406 | var LogsStorageSize *prometheus.Desc = prometheus.NewDesc( 407 | prometheus.BuildFQName(namespace, "", "rds_logsstorage_size_bytes"), 408 | "The amount of storage consumed by log files (in bytes)", 409 | []string{"aws_region", "dbinstance_identifier"}, 410 | nil, 411 | ) 412 | var LogsAmount *prometheus.Desc = prometheus.NewDesc( 413 | prometheus.BuildFQName(namespace, "", "rds_logs_amount"), 414 | "The amount of existent log files", 415 | []string{"aws_region", "dbinstance_identifier"}, 416 | nil, 417 | ) 418 | var EOLInfos *prometheus.Desc = prometheus.NewDesc( 419 | prometheus.BuildFQName(namespace, "", "rds_eol_info"), 420 | "The EOL date and status for the DB engine type and version.", 421 | []string{"aws_region", "dbinstance_identifier", "engine", "engine_version", "eol_date", "eol_status"}, 422 | nil, 423 | ) 424 | 425 | // RDSExporter defines an instance of the RDS Exporter 426 | type RDSExporter struct { 427 | sessions []*session.Session 428 | svcs []awsclient.Client 429 | eolInfos []EOLInfo 430 | thresholds []Threshold 431 | awsAccountId string 432 | 433 | workers int 434 | logsMetricsTTL int 435 | 436 | logger log.Logger 437 | cache MetricsCache 438 | interval time.Duration 439 | timeout time.Duration 440 | } 441 | 442 | // NewRDSExporter creates a new RDSExporter instance 443 | func NewRDSExporter(sessions []*session.Session, logger log.Logger, config RDSConfig, awsAccountId string) *RDSExporter { 444 | level.Info(logger).Log("msg", "Initializing RDS exporter") 445 | 446 | workers, _ := GetEnvIntValue(RDS_LOGS_METRICS_WORKERS) 447 | if workers == nil { 448 | workers = &RDS_LOGS_METRICS_WORKERS_DEFAULT 449 | level.Info(logger).Log("msg", fmt.Sprintf("Using default value for number Workers: %d", RDS_LOGS_METRICS_WORKERS_DEFAULT)) 450 | } else { 451 | level.Info(logger).Log("msg", fmt.Sprintf("Using Env value for number of Workers: %d", *workers)) 452 | } 453 | 454 | logMetricsTTL, _ := GetEnvIntValue(RDS_LOGS_METRICS_TTL) 455 | if logMetricsTTL == nil { 456 | logMetricsTTL = &RDS_LOGS_METRICS_TTL_DEFAULT 457 | level.Info(logger).Log("msg", fmt.Sprintf("Using default value for logs metrics TTL: %d", RDS_LOGS_METRICS_TTL_DEFAULT)) 458 | } else { 459 | level.Info(logger).Log("msg", fmt.Sprintf("Using Env value for logs metrics TTL: %d", *logMetricsTTL)) 460 | } 461 | var rdses []awsclient.Client 462 | for _, session := range sessions { 463 | rdses = append(rdses, awsclient.NewClientFromSession(session)) 464 | } 465 | 466 | return &RDSExporter{ 467 | sessions: sessions, 468 | svcs: rdses, 469 | workers: *workers, 470 | logsMetricsTTL: *logMetricsTTL, 471 | logger: logger, 472 | cache: *NewMetricsCache(*config.CacheTTL), 473 | interval: *config.Interval, 474 | timeout: *config.Timeout, 475 | eolInfos: config.EOLInfos, 476 | thresholds: config.Thresholds, 477 | awsAccountId: awsAccountId, 478 | } 479 | 480 | } 481 | 482 | func (e *RDSExporter) getRegion(sessionIndex int) string { 483 | return *e.sessions[sessionIndex].Config.Region 484 | } 485 | 486 | func (e *RDSExporter) requestRDSLogMetrics(ctx context.Context, sessionIndex int, instanceId string) (*RDSLogsMetrics, error) { 487 | var logMetrics = &RDSLogsMetrics{ 488 | logs: 0, 489 | totalLogSize: 0, 490 | } 491 | 492 | logOutPuts, err := e.svcs[sessionIndex].DescribeDBLogFilesAll(ctx, instanceId) 493 | if err != nil { 494 | level.Error(e.logger).Log("msg", "Call to DescribeDBLogFiles failed", "region", e.getRegion(sessionIndex), "instance", &instanceId, "err", err) 495 | return nil, err 496 | } 497 | 498 | for _, outputs := range logOutPuts { 499 | for _, log := range outputs.DescribeDBLogFiles { 500 | logMetrics.logs++ 501 | logMetrics.totalLogSize += *log.Size 502 | } 503 | 504 | } 505 | 506 | return logMetrics, nil 507 | } 508 | 509 | func (e *RDSExporter) addRDSLogMetrics(ctx context.Context, sessionIndex int, instanceId string) error { 510 | instaceLogFilesId := instanceId + "-" + "logfiles" 511 | var logMetrics *RDSLogsMetrics 512 | cachedItem, err := metricsProxy.GetMetricById(instaceLogFilesId) 513 | if err != nil { 514 | logMetrics, err = e.requestRDSLogMetrics(ctx, sessionIndex, instanceId) 515 | if err != nil { 516 | return err 517 | } 518 | metricsProxy.StoreMetricById(instaceLogFilesId, logMetrics, e.logsMetricsTTL) 519 | } else { 520 | logMetrics = cachedItem.value.(*RDSLogsMetrics) 521 | } 522 | e.cache.AddMetric(prometheus.MustNewConstMetric(LogsAmount, prometheus.GaugeValue, float64(logMetrics.logs), e.getRegion(sessionIndex), instanceId)) 523 | e.cache.AddMetric(prometheus.MustNewConstMetric(LogsStorageSize, prometheus.GaugeValue, float64(logMetrics.totalLogSize), e.getRegion(sessionIndex), instanceId)) 524 | return nil 525 | } 526 | 527 | func (e *RDSExporter) addAllLogMetrics(ctx context.Context, sessionIndex int, instances []*rds.DBInstance) { 528 | wg := &sync.WaitGroup{} 529 | wg.Add(len(instances)) 530 | 531 | // this channel is used to limit the number of concurrency 532 | sem := make(chan int, e.workers) 533 | 534 | defer close(sem) 535 | for _, instance := range instances { 536 | sem <- 1 537 | go func(instanceName string) { 538 | defer func() { 539 | <-sem 540 | wg.Done() 541 | }() 542 | e.addRDSLogMetrics(ctx, sessionIndex, instanceName) 543 | }(*instance.DBInstanceIdentifier) 544 | } 545 | wg.Wait() 546 | } 547 | 548 | func (e *RDSExporter) addAllInstanceMetrics(sessionIndex int, instances []*rds.DBInstance, eolInfos []EOLInfo) { 549 | var eolMap = make(map[EOLKey]EOLInfo) 550 | 551 | // Fill eolMap with EOLInfo indexed by engine and version 552 | for _, eolinfo := range eolInfos { 553 | eolMap[EOLKey{Engine: eolinfo.Engine, Version: eolinfo.Version}] = eolinfo 554 | } 555 | 556 | for _, instance := range instances { 557 | var maxConnections int64 558 | if valmap, ok := DBMaxConnections[*instance.DBInstanceClass]; ok { 559 | var maxconn int64 560 | var found bool 561 | if val, ok := valmap[*instance.DBParameterGroups[0].DBParameterGroupName]; ok { 562 | maxconn = val 563 | found = true 564 | } else if val, ok := valmap["default"]; ok { 565 | maxconn = val 566 | found = true 567 | } 568 | if found { 569 | level.Debug(e.logger).Log("msg", "Found mapping for instance", 570 | "type", *instance.DBInstanceClass, 571 | "group", *instance.DBParameterGroups[0].DBParameterGroupName, 572 | "value", maxconn) 573 | maxConnections = maxconn 574 | e.cache.AddMetric(prometheus.MustNewConstMetric(MaxConnectionsMappingError, prometheus.GaugeValue, 0, e.getRegion(sessionIndex), *instance.DBInstanceIdentifier, *instance.DBInstanceClass)) 575 | } else { 576 | level.Error(e.logger).Log("msg", "No DB max_connections mapping exists for instance", 577 | "type", *instance.DBInstanceClass, 578 | "group", *instance.DBParameterGroups[0].DBParameterGroupName) 579 | e.cache.AddMetric(prometheus.MustNewConstMetric(MaxConnectionsMappingError, prometheus.GaugeValue, 1, e.getRegion(sessionIndex), *instance.DBInstanceIdentifier, *instance.DBInstanceClass)) 580 | } 581 | } else { 582 | level.Error(e.logger).Log("msg", "No DB max_connections mapping exists for instance", 583 | "type", *instance.DBInstanceClass) 584 | e.cache.AddMetric(prometheus.MustNewConstMetric(MaxConnectionsMappingError, prometheus.GaugeValue, 1, e.getRegion(sessionIndex), *instance.DBInstanceIdentifier, *instance.DBInstanceClass)) 585 | } 586 | 587 | //Gets EOL for engine and version 588 | if eolInfo, ok := eolMap[EOLKey{Engine: *instance.Engine, Version: *instance.EngineVersion}]; ok { 589 | eolStatus, err := GetEOLStatus(eolInfo.EOL, e.thresholds) 590 | if err != nil { 591 | level.Error(e.logger).Log("msg", fmt.Sprintf("Could not get days to RDS EOL for Engine %s, Version %s: %s\n", *instance.Engine, *instance.EngineVersion, err.Error())) 592 | } else { 593 | e.cache.AddMetric(prometheus.MustNewConstMetric(EOLInfos, prometheus.GaugeValue, 1, e.getRegion(sessionIndex), *instance.DBInstanceIdentifier, *instance.Engine, *instance.EngineVersion, eolInfo.EOL, eolStatus)) 594 | } 595 | } else { 596 | level.Info(e.logger).Log("msg", fmt.Sprintf("RDS EOL not found for Engine %s, Version %s\n", *instance.Engine, *instance.EngineVersion)) 597 | } 598 | 599 | var public = 0.0 600 | if *instance.PubliclyAccessible { 601 | public = 1.0 602 | } 603 | e.cache.AddMetric(prometheus.MustNewConstMetric(PubliclyAccessible, prometheus.GaugeValue, public, e.getRegion(sessionIndex), *instance.DBInstanceIdentifier)) 604 | 605 | var encrypted = 0.0 606 | if *instance.StorageEncrypted { 607 | encrypted = 1.0 608 | } 609 | e.cache.AddMetric(prometheus.MustNewConstMetric(StorageEncrypted, prometheus.GaugeValue, encrypted, e.getRegion(sessionIndex), *instance.DBInstanceIdentifier)) 610 | 611 | var restoreTime = 0.0 612 | if instance.LatestRestorableTime != nil { 613 | restoreTime = float64(instance.LatestRestorableTime.Unix()) 614 | } 615 | e.cache.AddMetric(prometheus.MustNewConstMetric(LatestRestorableTime, prometheus.CounterValue, restoreTime, e.getRegion(sessionIndex), *instance.DBInstanceIdentifier)) 616 | 617 | e.cache.AddMetric(prometheus.MustNewConstMetric(MaxConnections, prometheus.GaugeValue, float64(maxConnections), e.getRegion(sessionIndex), *instance.DBInstanceIdentifier)) 618 | e.cache.AddMetric(prometheus.MustNewConstMetric(AllocatedStorage, prometheus.GaugeValue, float64(*instance.AllocatedStorage*1024*1024*1024), e.getRegion(sessionIndex), *instance.DBInstanceIdentifier)) 619 | e.cache.AddMetric(prometheus.MustNewConstMetric(DBInstanceStatus, prometheus.GaugeValue, 1, e.getRegion(sessionIndex), *instance.DBInstanceIdentifier, *instance.DBInstanceStatus)) 620 | e.cache.AddMetric(prometheus.MustNewConstMetric(EngineVersion, prometheus.GaugeValue, 1, e.getRegion(sessionIndex), *instance.DBInstanceIdentifier, *instance.Engine, *instance.EngineVersion, e.awsAccountId)) 621 | e.cache.AddMetric(prometheus.MustNewConstMetric(DBInstanceClass, prometheus.GaugeValue, 1, e.getRegion(sessionIndex), *instance.DBInstanceIdentifier, *instance.DBInstanceClass)) 622 | } 623 | } 624 | 625 | func (e *RDSExporter) addAllPendingMaintenancesMetrics(ctx context.Context, sessionIndex int, instances []*rds.DBInstance) { 626 | // Get pending maintenance data because this isn't provided in DescribeDBInstances 627 | instancesWithPendingMaint := make(map[string]bool) 628 | 629 | instancesPendMaintActionsData, err := e.svcs[sessionIndex].DescribePendingMaintenanceActionsAll(ctx) 630 | 631 | if err != nil { 632 | level.Error(e.logger).Log("msg", "Call to DescribePendingMaintenanceActions failed", "region", e.getRegion(sessionIndex), "err", err) 633 | return 634 | } 635 | 636 | // Create the metrics for all instances that have pending maintenance actions 637 | for _, instance := range instancesPendMaintActionsData { 638 | for _, action := range instance.PendingMaintenanceActionDetails { 639 | // DescribePendingMaintenanceActions only returns ARNs, so this gets the identifier. 640 | dbIdentifier := strings.Split(*instance.ResourceIdentifier, ":")[6] 641 | instancesWithPendingMaint[dbIdentifier] = true 642 | 643 | var autoApplyDate string 644 | if action.AutoAppliedAfterDate != nil { 645 | autoApplyDate = action.AutoAppliedAfterDate.String() 646 | } 647 | 648 | var currentApplyDate string 649 | if action.CurrentApplyDate != nil { 650 | currentApplyDate = action.CurrentApplyDate.String() 651 | } 652 | 653 | e.cache.AddMetric(prometheus.MustNewConstMetric(PendingMaintenanceActions, prometheus.GaugeValue, 1, e.getRegion(sessionIndex), dbIdentifier, *action.Action, autoApplyDate, currentApplyDate, *action.Description)) 654 | } 655 | } 656 | 657 | // DescribePendingMaintenanceActions only returns data about database with pending maintenance, so for any of the 658 | // other databases returned from DescribeDBInstances, publish a value of "0" indicating that maintenance isn't 659 | // available. 660 | for _, instance := range instances { 661 | if !instancesWithPendingMaint[*instance.DBInstanceIdentifier] { 662 | e.cache.AddMetric(prometheus.MustNewConstMetric(PendingMaintenanceActions, prometheus.GaugeValue, 0, e.getRegion(sessionIndex), *instance.DBInstanceIdentifier, "", "", "", "")) 663 | } 664 | } 665 | 666 | } 667 | 668 | // Describe is used by the Prometheus client to return a description of the metrics 669 | func (e *RDSExporter) Describe(ch chan<- *prometheus.Desc) { 670 | ch <- AllocatedStorage 671 | ch <- DBInstanceClass 672 | ch <- DBInstanceStatus 673 | ch <- EngineVersion 674 | ch <- LatestRestorableTime 675 | ch <- MaxConnections 676 | ch <- MaxConnectionsMappingError 677 | ch <- PendingMaintenanceActions 678 | ch <- PubliclyAccessible 679 | ch <- StorageEncrypted 680 | ch <- EOLInfos 681 | 682 | } 683 | 684 | func (e *RDSExporter) CollectLoop() { 685 | for { 686 | ctx, cancel := context.WithTimeout(context.Background(), e.timeout) 687 | for i, _ := range e.sessions { 688 | 689 | instances, err := e.svcs[i].DescribeDBInstancesAll(ctx) 690 | if err != nil { 691 | level.Error(e.logger).Log("msg", "Call to DescribeDBInstances failed", "region", *e.sessions[i].Config.Region, "err", err) 692 | } 693 | 694 | wg := sync.WaitGroup{} 695 | wg.Add(3) 696 | 697 | go func() { 698 | e.addAllInstanceMetrics(i, instances, e.eolInfos) 699 | wg.Done() 700 | }() 701 | go func() { 702 | e.addAllLogMetrics(ctx, i, instances) 703 | wg.Done() 704 | }() 705 | go func() { 706 | e.addAllPendingMaintenancesMetrics(ctx, i, instances) 707 | wg.Done() 708 | }() 709 | wg.Wait() 710 | } 711 | 712 | level.Info(e.logger).Log("msg", "RDS metrics Updated") 713 | 714 | cancel() 715 | time.Sleep(e.interval) 716 | } 717 | } 718 | 719 | // Collect is used by the Prometheus client to collect and return the metrics values 720 | func (e *RDSExporter) Collect(ch chan<- prometheus.Metric) { 721 | for _, m := range e.cache.GetAllMetrics() { 722 | ch <- m 723 | } 724 | } 725 | -------------------------------------------------------------------------------- /pkg/rds_test.go: -------------------------------------------------------------------------------- 1 | package pkg 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "testing" 7 | "time" 8 | 9 | "github.com/app-sre/aws-resource-exporter/pkg/awsclient" 10 | "github.com/app-sre/aws-resource-exporter/pkg/awsclient/mock" 11 | "github.com/aws/aws-sdk-go/aws" 12 | "github.com/aws/aws-sdk-go/aws/session" 13 | "github.com/aws/aws-sdk-go/service/rds" 14 | "github.com/go-kit/log" 15 | "github.com/golang/mock/gomock" 16 | "github.com/prometheus/client_golang/prometheus" 17 | dto "github.com/prometheus/client_model/go" 18 | "github.com/stretchr/testify/assert" 19 | ) 20 | 21 | func createTestDBInstances() []*rds.DBInstance { 22 | return []*rds.DBInstance{ 23 | { 24 | DBInstanceIdentifier: aws.String("footest"), 25 | DBInstanceClass: aws.String("db.m5.xlarge"), 26 | DBParameterGroups: []*rds.DBParameterGroupStatus{{DBParameterGroupName: aws.String("default.postgres14")}}, 27 | PubliclyAccessible: aws.Bool(false), 28 | StorageEncrypted: aws.Bool(false), 29 | AllocatedStorage: aws.Int64(1024), 30 | DBInstanceStatus: aws.String("on fire"), 31 | Engine: aws.String("SQL"), 32 | EngineVersion: aws.String("1000"), 33 | }, 34 | } 35 | } 36 | 37 | func TestRequestRDSLogMetrics(t *testing.T) { 38 | ctx := context.TODO() 39 | ctrl := gomock.NewController(t) 40 | defer ctrl.Finish() 41 | 42 | mockClient := mock.NewMockClient(ctrl) 43 | mockClient.EXPECT().DescribeDBLogFilesAll(ctx, "footest").Return([]*rds.DescribeDBLogFilesOutput{ 44 | {DescribeDBLogFiles: []*rds.DescribeDBLogFilesDetails{{Size: aws.Int64(123)}, {Size: aws.Int64(123)}}}, 45 | {DescribeDBLogFiles: []*rds.DescribeDBLogFilesDetails{{Size: aws.Int64(1)}}}, 46 | }, nil) 47 | 48 | x := RDSExporter{ 49 | svcs: []awsclient.Client{mockClient}, 50 | } 51 | 52 | metrics, err := x.requestRDSLogMetrics(ctx, 0, "footest") 53 | assert.Equal(t, int64(247), metrics.totalLogSize) 54 | assert.Equal(t, 3, metrics.logs) 55 | assert.Nil(t, err) 56 | } 57 | 58 | func TestAddRDSLogMetrics(t *testing.T) { 59 | ctx := context.TODO() 60 | ctrl := gomock.NewController(t) 61 | defer ctrl.Finish() 62 | 63 | mockClient := mock.NewMockClient(ctrl) 64 | mockClient.EXPECT().DescribeDBLogFilesAll(ctx, "footest").Return([]*rds.DescribeDBLogFilesOutput{ 65 | {DescribeDBLogFiles: []*rds.DescribeDBLogFilesDetails{{Size: aws.Int64(123)}, {Size: aws.Int64(123)}}}, 66 | {DescribeDBLogFiles: []*rds.DescribeDBLogFilesDetails{{Size: aws.Int64(1)}}}, 67 | }, nil) 68 | 69 | x := RDSExporter{ 70 | svcs: []awsclient.Client{mockClient}, 71 | sessions: []*session.Session{session.New(&aws.Config{Region: aws.String("foo")})}, 72 | cache: *NewMetricsCache(10 * time.Second), 73 | } 74 | 75 | err := x.addRDSLogMetrics(ctx, 0, "footest") 76 | assert.Len(t, x.cache.GetAllMetrics(), 2) 77 | assert.Nil(t, err) 78 | } 79 | 80 | func TestAddAllInstanceMetrics(t *testing.T) { 81 | x := RDSExporter{ 82 | sessions: []*session.Session{session.New(&aws.Config{Region: aws.String("foo")})}, 83 | cache: *NewMetricsCache(10 * time.Second), 84 | logger: log.NewNopLogger(), 85 | } 86 | 87 | var instances = []*rds.DBInstance{} 88 | 89 | // Test with no match 90 | eolInfos := []EOLInfo{ 91 | {Engine: "engine", Version: "123", EOL: "2023-12-01"}, 92 | } 93 | 94 | x.addAllInstanceMetrics(0, instances, eolInfos) 95 | assert.Len(t, x.cache.GetAllMetrics(), 0) 96 | 97 | x.addAllInstanceMetrics(0, createTestDBInstances(), eolInfos) 98 | assert.Len(t, x.cache.GetAllMetrics(), 9) 99 | } 100 | 101 | func TestAddAllInstanceMetricsWithEOLMatch(t *testing.T) { 102 | thresholds := []Threshold{ 103 | {Name: "red", Days: 90}, 104 | {Name: "yellow", Days: 180}, 105 | {Name: "green", Days: 365}, 106 | } 107 | 108 | x := RDSExporter{ 109 | sessions: []*session.Session{session.New(&aws.Config{Region: aws.String("foo")})}, 110 | cache: *NewMetricsCache(10 * time.Second), 111 | logger: log.NewNopLogger(), 112 | thresholds: thresholds, 113 | } 114 | 115 | eolInfos := []EOLInfo{ 116 | {Engine: "SQL", Version: "1000", EOL: "2000-12-01"}, 117 | } 118 | 119 | x.addAllInstanceMetrics(0, createTestDBInstances(), eolInfos) 120 | 121 | labels, err := getMetricLabels(&x, EOLInfos, "eol_date", "eol_status") 122 | if err != nil { 123 | t.Errorf("Error retrieving EOL labels: %v", err) 124 | } 125 | 126 | expectedEOLDate := "2000-12-01" 127 | expectedEOLStatus := "red" 128 | 129 | if eolDate, ok := labels["eol_date"]; !ok || eolDate != expectedEOLDate { 130 | t.Errorf("EOLDate metric has an unexpected value. Expected: %s, Actual: %s", expectedEOLDate, eolDate) 131 | } 132 | 133 | if eolStatus, ok := labels["eol_status"]; !ok || eolStatus != expectedEOLStatus { 134 | t.Errorf("EOLStatus metric has an unexpected value. Expected: %s, Actual: %s", expectedEOLStatus, eolStatus) 135 | } 136 | } 137 | 138 | func TestAddAllInstanceMetricsWithGetEOLStatusError(t *testing.T) { 139 | x := RDSExporter{ 140 | sessions: []*session.Session{session.New(&aws.Config{Region: aws.String("foo")})}, 141 | cache: *NewMetricsCache(10 * time.Second), 142 | logger: log.NewNopLogger(), 143 | } 144 | 145 | eolInfos := []EOLInfo{ 146 | {Engine: "SQL", Version: "1000", EOL: "invalid-date"}, 147 | } 148 | 149 | x.addAllInstanceMetrics(0, createTestDBInstances(), eolInfos) 150 | 151 | labels, err := getMetricLabels(&x, EOLInfos, "eol_date", "eol_status") 152 | 153 | if err == nil { 154 | t.Errorf("Expected an error from getMetricLabels but got none") 155 | } 156 | if len(labels) > 0 { 157 | t.Errorf("Expected no labels to be returned, got: %v", labels) 158 | } 159 | } 160 | 161 | func TestGetEOLStatus(t *testing.T) { 162 | thresholds := []Threshold{ 163 | {Name: "red", Days: 90}, 164 | {Name: "yellow", Days: 180}, 165 | {Name: "green", Days: 365}, 166 | } 167 | 168 | // EOL date is within 90 days 169 | eol := time.Now().Add(2 * 24 * time.Hour).Format("2006-01-02") 170 | expectedStatus := "red" 171 | status, err := GetEOLStatus(eol, thresholds) 172 | if err != nil { 173 | t.Errorf("Expected no error, but got an error: %v", err) 174 | } 175 | if status != expectedStatus { 176 | t.Errorf("Expected status '%s', but got '%s'", expectedStatus, status) 177 | } 178 | 179 | // EOL date is within 180 days 180 | eol = time.Now().Add(120 * 24 * time.Hour).Format("2006-01-02") 181 | expectedStatus = "yellow" 182 | status, err = GetEOLStatus(eol, thresholds) 183 | if err != nil { 184 | t.Errorf("Expected no error, but got an error: %v", err) 185 | } 186 | if status != expectedStatus { 187 | t.Errorf("Expected status '%s', but got '%s'", expectedStatus, status) 188 | } 189 | 190 | // EOL date is more than 180 days 191 | eol = time.Now().Add(200 * 24 * time.Hour).Format("2006-01-02") 192 | expectedStatus = "green" 193 | status, err = GetEOLStatus(eol, thresholds) 194 | if err != nil { 195 | t.Errorf("Expected no error, but got an error: %v", err) 196 | } 197 | if status != expectedStatus { 198 | t.Errorf("Expected status '%s', but got '%s'", expectedStatus, status) 199 | } 200 | 201 | //EOL date exceeds highest threshold 202 | eol = time.Now().Add(400 * 24 * time.Hour).Format("2006-01-02") 203 | expectedStatus = "green" 204 | status, err = GetEOLStatus(eol, thresholds) 205 | if err != nil { 206 | t.Errorf("Expected no error, but got an error: %v", err) 207 | } 208 | if status != expectedStatus { 209 | t.Errorf("Expected status '%s', but got '%s'", expectedStatus, status) 210 | } 211 | 212 | //Thresholds is empty 213 | eol = time.Now().Add(30 * 24 * time.Hour).Format("2006-01-02") 214 | emptyThresholds := []Threshold{} 215 | status, err = GetEOLStatus(eol, emptyThresholds) 216 | if err == nil { 217 | t.Errorf("Expected an error for empty thresholds, but got none") 218 | } 219 | if status != "" { 220 | t.Errorf("Expected no status for empty thresholds, but got '%s'", status) 221 | } 222 | } 223 | 224 | func TestEngineVersionMetricIncludesAWSAccountId(t *testing.T) { 225 | x := RDSExporter{ 226 | sessions: []*session.Session{session.New(&aws.Config{Region: aws.String("foo")})}, 227 | cache: *NewMetricsCache(10 * time.Second), 228 | logger: log.NewNopLogger(), 229 | awsAccountId: "1234567890", 230 | } 231 | 232 | x.addAllInstanceMetrics(0, createTestDBInstances(), nil) 233 | 234 | labels, err := getMetricLabels(&x, EngineVersion, "aws_account_id") 235 | if err != nil { 236 | t.Fatalf("Failed to get metric labels: %v", err) 237 | } 238 | 239 | if accountId, ok := labels["aws_account_id"]; !ok || accountId != "1234567890" { 240 | t.Errorf("aws_account_id label has an unexpected value. Expected: %s, Actual: %s", "1234567890", accountId) 241 | } 242 | } 243 | 244 | // Helper function to retrieve metric values from the cache 245 | func getMetricLabels(x *RDSExporter, metricDesc *prometheus.Desc, labelNames ...string) (map[string]string, error) { 246 | metricDescription := metricDesc.String() 247 | metrics := x.cache.GetAllMetrics() 248 | 249 | for _, metric := range metrics { 250 | if metric.Desc().String() == metricDescription { 251 | dtoMetric := &dto.Metric{} 252 | if err := metric.Write(dtoMetric); err != nil { 253 | return nil, err 254 | } 255 | 256 | labelValues := make(map[string]string) 257 | for _, label := range dtoMetric.GetLabel() { 258 | for _, labelName := range labelNames { 259 | if label.GetName() == labelName { 260 | labelValues[labelName] = label.GetValue() 261 | } 262 | } 263 | } 264 | 265 | if len(labelValues) != len(labelNames) { 266 | return nil, fmt.Errorf("not all requested labels found in metric") 267 | } 268 | 269 | return labelValues, nil 270 | } 271 | } 272 | return nil, fmt.Errorf("metric not found") 273 | } 274 | 275 | func TestAddAllPendingMaintenancesMetrics(t *testing.T) { 276 | ctx := context.TODO() 277 | ctrl := gomock.NewController(t) 278 | defer ctrl.Finish() 279 | 280 | mockClient := mock.NewMockClient(ctrl) 281 | mockClient.EXPECT().DescribePendingMaintenanceActionsAll(ctx).Return([]*rds.ResourcePendingMaintenanceActions{ 282 | { 283 | PendingMaintenanceActionDetails: []*rds.PendingMaintenanceAction{{ 284 | Action: aws.String("something going on"), 285 | Description: aws.String("plumbing"), 286 | }}, 287 | ResourceIdentifier: aws.String("::::::footest"), 288 | }, 289 | }, nil) 290 | 291 | x := RDSExporter{ 292 | svcs: []awsclient.Client{mockClient}, 293 | sessions: []*session.Session{session.New(&aws.Config{Region: aws.String("foo")})}, 294 | cache: *NewMetricsCache(10 * time.Second), 295 | logger: log.NewNopLogger(), 296 | } 297 | 298 | x.addAllPendingMaintenancesMetrics(ctx, 0, createTestDBInstances()) 299 | metrics := x.cache.GetAllMetrics() 300 | assert.Len(t, metrics, 1) 301 | 302 | var dto dto.Metric 303 | metrics[0].Write(&dto) 304 | 305 | // Expecting a maintenance, thus value 1 306 | assert.Equal(t, float64(1), *dto.Gauge.Value) 307 | 308 | } 309 | 310 | func TestAddAllPendingMaintenancesNoMetrics(t *testing.T) { 311 | ctx := context.TODO() 312 | ctrl := gomock.NewController(t) 313 | defer ctrl.Finish() 314 | 315 | mockClient := mock.NewMockClient(ctrl) 316 | mockClient.EXPECT().DescribePendingMaintenanceActionsAll(ctx).Return([]*rds.ResourcePendingMaintenanceActions{}, nil) 317 | 318 | x := RDSExporter{ 319 | svcs: []awsclient.Client{mockClient}, 320 | sessions: []*session.Session{session.New(&aws.Config{Region: aws.String("foo")})}, 321 | cache: *NewMetricsCache(10 * time.Second), 322 | logger: log.NewNopLogger(), 323 | } 324 | 325 | x.addAllPendingMaintenancesMetrics(ctx, 0, createTestDBInstances()) 326 | metrics := x.cache.GetAllMetrics() 327 | assert.Len(t, metrics, 1) 328 | 329 | var dto dto.Metric 330 | metrics[0].Write(&dto) 331 | 332 | // Expecting no maintenance, thus 0 value 333 | assert.Equal(t, float64(0), *dto.Gauge.Value) 334 | } 335 | -------------------------------------------------------------------------------- /pkg/route53.go: -------------------------------------------------------------------------------- 1 | package pkg 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "math" 7 | "sync" 8 | "time" 9 | 10 | "github.com/app-sre/aws-resource-exporter/pkg/awsclient" 11 | "github.com/aws/aws-sdk-go/aws" 12 | "github.com/aws/aws-sdk-go/aws/awserr" 13 | "github.com/aws/aws-sdk-go/aws/session" 14 | "github.com/aws/aws-sdk-go/service/route53" 15 | "github.com/go-kit/log" 16 | "github.com/go-kit/log/level" 17 | "github.com/prometheus/client_golang/prometheus" 18 | ) 19 | 20 | const ( 21 | maxRetries = 10 22 | route53MaxConcurrency = 5 23 | route53ServiceCode = "route53" 24 | hostedZonesQuotaCode = "L-4EA4796A" 25 | recordsPerHostedZoneQuotaCode = "L-E209CC9F" 26 | errorCodeThrottling = "Throttling" 27 | ) 28 | 29 | type Route53Exporter struct { 30 | sess *session.Session 31 | RecordsPerHostedZoneQuota *prometheus.Desc 32 | RecordsPerHostedZoneUsage *prometheus.Desc 33 | HostedZonesPerAccountQuota *prometheus.Desc 34 | HostedZonesPerAccountUsage *prometheus.Desc 35 | LastUpdateTime *prometheus.Desc 36 | Cancel context.CancelFunc 37 | 38 | cache MetricsCache 39 | logger log.Logger 40 | interval time.Duration 41 | timeout time.Duration 42 | } 43 | 44 | func NewRoute53Exporter(sess *session.Session, logger log.Logger, config Route53Config, awsAccountId string) *Route53Exporter { 45 | 46 | level.Info(logger).Log("msg", "Initializing Route53 exporter") 47 | constLabels := map[string]string{"aws_account_id": awsAccountId, SERVICE_CODE_KEY: route53ServiceCode} 48 | 49 | exporter := &Route53Exporter{ 50 | sess: sess, 51 | RecordsPerHostedZoneQuota: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "route53_recordsperhostedzone_quota"), "Quota for maximum number of records in a Route53 hosted zone", []string{"hostedzoneid", "hostedzonename"}, WithKeyValue(constLabels, QUOTA_CODE_KEY, recordsPerHostedZoneQuotaCode)), 52 | RecordsPerHostedZoneUsage: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "route53_recordsperhostedzone_total"), "Number of Resource records", []string{"hostedzoneid", "hostedzonename"}, WithKeyValue(constLabels, QUOTA_CODE_KEY, recordsPerHostedZoneQuotaCode)), 53 | HostedZonesPerAccountQuota: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "route53_hostedzonesperaccount_quota"), "Quota for maximum number of Route53 hosted zones in an account", []string{}, WithKeyValue(constLabels, QUOTA_CODE_KEY, hostedZonesQuotaCode)), 54 | HostedZonesPerAccountUsage: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "route53_hostedzonesperaccount_total"), "Number of Resource records", []string{}, WithKeyValue(constLabels, QUOTA_CODE_KEY, hostedZonesQuotaCode)), 55 | LastUpdateTime: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "route53_last_updated_timestamp_seconds"), "Last time, the route53 metrics were sucessfully updated", []string{}, constLabels), 56 | cache: *NewMetricsCache(*config.CacheTTL), 57 | logger: logger, 58 | interval: *config.Interval, 59 | timeout: *config.Timeout, 60 | } 61 | return exporter 62 | } 63 | 64 | func (e *Route53Exporter) getRecordsPerHostedZoneMetrics(client awsclient.Client, hostedZones []*route53.HostedZone, ctx context.Context) []error { 65 | errChan := make(chan error, len(hostedZones)) 66 | errs := []error{} 67 | 68 | wg := &sync.WaitGroup{} 69 | wg.Add(len(hostedZones)) 70 | sem := make(chan int, route53MaxConcurrency) 71 | defer close(sem) 72 | for i, hostedZone := range hostedZones { 73 | 74 | sem <- 1 75 | go func(i int, hostedZone *route53.HostedZone) { 76 | defer func() { 77 | <-sem 78 | wg.Done() 79 | }() 80 | hostedZoneLimitOut, err := GetHostedZoneLimitWithBackoff(client, ctx, hostedZone.Id, maxRetries, e.logger) 81 | 82 | if err != nil { 83 | errChan <- fmt.Errorf("Could not get Limits for hosted zone with ID '%s' and name '%s'. Error was: %s", *hostedZone.Id, *hostedZone.Name, err.Error()) 84 | awsclient.AwsExporterMetrics.IncrementErrors() 85 | return 86 | } 87 | level.Info(e.logger).Log("msg", fmt.Sprintf("Currently at hosted zone: %d / %d", i, len(hostedZones))) 88 | e.cache.AddMetric(prometheus.MustNewConstMetric(e.RecordsPerHostedZoneQuota, prometheus.GaugeValue, float64(*hostedZoneLimitOut.Limit.Value), *hostedZone.Id, *hostedZone.Name)) 89 | e.cache.AddMetric(prometheus.MustNewConstMetric(e.RecordsPerHostedZoneUsage, prometheus.GaugeValue, float64(*hostedZoneLimitOut.Count), *hostedZone.Id, *hostedZone.Name)) 90 | 91 | }(i, hostedZone) 92 | } 93 | wg.Wait() 94 | close(errChan) 95 | 96 | for err := range errChan { 97 | errs = append(errs, err) 98 | } 99 | 100 | return errs 101 | } 102 | 103 | func (e *Route53Exporter) getHostedZonesPerAccountMetrics(client awsclient.Client, hostedZones []*route53.HostedZone, ctx context.Context) error { 104 | quota, err := getQuotaValueWithContext(client, route53ServiceCode, hostedZonesQuotaCode, ctx) 105 | if err != nil { 106 | return err 107 | } 108 | 109 | e.cache.AddMetric(prometheus.MustNewConstMetric(e.HostedZonesPerAccountQuota, prometheus.GaugeValue, quota)) 110 | e.cache.AddMetric(prometheus.MustNewConstMetric(e.HostedZonesPerAccountUsage, prometheus.GaugeValue, float64(len(hostedZones)))) 111 | return nil 112 | } 113 | 114 | // CollectLoop runs indefinitely to collect the route53 metrics in a cache. Metrics are only written into the cache once all have been collected to ensure that we don't have a partial collect. 115 | func (e *Route53Exporter) CollectLoop() { 116 | client := awsclient.NewClientFromSession(e.sess) 117 | 118 | for { 119 | ctx, ctxCancelFunc := context.WithTimeout(context.Background(), e.timeout) 120 | e.Cancel = ctxCancelFunc 121 | level.Info(e.logger).Log("msg", "Updating Route53 metrics...") 122 | 123 | hostedZones, err := getAllHostedZones(client, ctx, e.logger) 124 | 125 | level.Info(e.logger).Log("msg", "Got all zones") 126 | if err != nil { 127 | level.Error(e.logger).Log("msg", "Could not retrieve the list of hosted zones", "error", err.Error()) 128 | awsclient.AwsExporterMetrics.IncrementErrors() 129 | } 130 | 131 | err = e.getHostedZonesPerAccountMetrics(client, hostedZones, ctx) 132 | if err != nil { 133 | level.Error(e.logger).Log("msg", "Could not get limits for hosted zone", "error", err.Error()) 134 | awsclient.AwsExporterMetrics.IncrementErrors() 135 | } 136 | 137 | errs := e.getRecordsPerHostedZoneMetrics(client, hostedZones, ctx) 138 | for _, err = range errs { 139 | level.Error(e.logger).Log("msg", "Could not get limits for hosted zone", "error", err.Error()) 140 | awsclient.AwsExporterMetrics.IncrementErrors() 141 | } 142 | 143 | level.Info(e.logger).Log("msg", "Route53 metrics Updated") 144 | 145 | ctxCancelFunc() // should never do anything as we don't run stuff in the background 146 | 147 | time.Sleep(e.interval) 148 | } 149 | } 150 | 151 | func (e *Route53Exporter) Collect(ch chan<- prometheus.Metric) { 152 | for _, m := range e.cache.GetAllMetrics() { 153 | ch <- m 154 | } 155 | } 156 | 157 | func (e *Route53Exporter) Describe(ch chan<- *prometheus.Desc) { 158 | ch <- e.RecordsPerHostedZoneQuota 159 | ch <- e.RecordsPerHostedZoneUsage 160 | ch <- e.LastUpdateTime 161 | } 162 | 163 | func getAllHostedZones(client awsclient.Client, ctx context.Context, logger log.Logger) ([]*route53.HostedZone, error) { 164 | result := []*route53.HostedZone{} 165 | 166 | listZonesInput := route53.ListHostedZonesInput{} 167 | 168 | listZonesOut, err := ListHostedZonesWithBackoff(client, ctx, &listZonesInput, maxRetries, logger) 169 | if err != nil { 170 | return nil, err 171 | } 172 | result = append(result, listZonesOut.HostedZones...) 173 | 174 | for *listZonesOut.IsTruncated { 175 | listZonesInput.Marker = listZonesOut.NextMarker 176 | listZonesOut, err = ListHostedZonesWithBackoff(client, ctx, &listZonesInput, maxRetries, logger) 177 | if err != nil { 178 | return nil, err 179 | } 180 | result = append(result, listZonesOut.HostedZones...) 181 | } 182 | 183 | return result, nil 184 | } 185 | 186 | func ListHostedZonesWithBackoff(client awsclient.Client, ctx context.Context, input *route53.ListHostedZonesInput, maxTries int, logger log.Logger) (*route53.ListHostedZonesOutput, error) { 187 | var listHostedZonesOut *route53.ListHostedZonesOutput 188 | var err error 189 | 190 | for i := 0; i < maxTries; i++ { 191 | listHostedZonesOut, err = client.ListHostedZonesWithContext(ctx, input) 192 | if err == nil { 193 | return listHostedZonesOut, err 194 | } 195 | if !isThrottlingError(err) { 196 | return nil, err 197 | } 198 | level.Debug(logger).Log("msg", "Retrying throttling api call", "tries", i+1, "endpoint", "ListHostedZones") 199 | backOffSeconds := math.Pow(2, float64(i-1)) 200 | time.Sleep(time.Duration(backOffSeconds) * time.Second) 201 | } 202 | return nil, err 203 | } 204 | 205 | func GetHostedZoneLimitWithBackoff(client awsclient.Client, ctx context.Context, hostedZoneId *string, maxTries int, logger log.Logger) (*route53.GetHostedZoneLimitOutput, error) { 206 | hostedZoneLimitInput := &route53.GetHostedZoneLimitInput{ 207 | HostedZoneId: hostedZoneId, 208 | Type: aws.String(route53.HostedZoneLimitTypeMaxRrsetsByZone), 209 | } 210 | var hostedZoneLimitOut *route53.GetHostedZoneLimitOutput 211 | var err error 212 | 213 | for i := 0; i < maxTries; i++ { 214 | hostedZoneLimitOut, err = client.GetHostedZoneLimitWithContext(ctx, hostedZoneLimitInput) 215 | if err == nil { 216 | return hostedZoneLimitOut, err 217 | } 218 | 219 | if !isThrottlingError(err) { 220 | return nil, err 221 | } 222 | level.Debug(logger).Log("msg", "Retrying throttling api call", "tries", i+1, "endpoint", "GetHostedZoneLimit", "hostedZoneID", hostedZoneId) 223 | backOffSeconds := math.Pow(2, float64(i-1)) 224 | time.Sleep(time.Duration(backOffSeconds) * time.Second) 225 | 226 | } 227 | return nil, err 228 | } 229 | 230 | func createGetHostedZoneLimitInput(hostedZoneId, limitType string) *route53.GetHostedZoneLimitInput { 231 | return &route53.GetHostedZoneLimitInput{ 232 | HostedZoneId: aws.String(hostedZoneId), 233 | Type: aws.String(limitType), 234 | } 235 | } 236 | 237 | func createListHostedZonesWithContext(maxItems string) *route53.ListHostedZonesInput { 238 | return &route53.ListHostedZonesInput{ 239 | MaxItems: aws.String(maxItems), 240 | } 241 | } 242 | 243 | func createGetHostedZoneLimitWithContext(hostedZoneId, limitType string) *route53.GetHostedZoneLimitInput { 244 | return &route53.GetHostedZoneLimitInput{ 245 | HostedZoneId: aws.String(hostedZoneId), 246 | Type: aws.String(limitType), 247 | } 248 | } 249 | 250 | func getHostedZoneValueWithContext(client awsclient.Client, hostedZoneId string, limitType string, ctx context.Context) (int64, error) { 251 | sqOutput, err := client.GetHostedZoneLimitWithContext(ctx, createGetHostedZoneLimitInput(hostedZoneId, limitType)) 252 | 253 | if err != nil { 254 | return 0, err 255 | } 256 | 257 | return *sqOutput.Limit.Value, nil 258 | } 259 | 260 | // isThrottlingError returns true if the error given is an instance of awserr.Error and the error code matches the constant errorCodeThrottling. It's not compared against route53.ErrCodeThrottlingException as this does not match what the api is returning. 261 | func isThrottlingError(err error) bool { 262 | awsError, isAwsError := err.(awserr.Error) 263 | return isAwsError && awsError.Code() == errorCodeThrottling 264 | } 265 | -------------------------------------------------------------------------------- /pkg/route53_test.go: -------------------------------------------------------------------------------- 1 | package pkg 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/app-sre/aws-resource-exporter/pkg/awsclient/mock" 8 | "github.com/aws/aws-sdk-go/aws" 9 | "github.com/aws/aws-sdk-go/service/route53" 10 | "github.com/go-kit/log" 11 | "github.com/golang/mock/gomock" 12 | "github.com/stretchr/testify/assert" 13 | ) 14 | 15 | func TestGetHostedZoneLimitWithContext(t *testing.T) { 16 | ctx := context.TODO() 17 | ctrl := gomock.NewController(t) 18 | defer ctrl.Finish() 19 | mockClient := mock.NewMockClient(ctrl) 20 | mockClient.EXPECT().GetHostedZoneLimitWithContext(ctx, 21 | createGetHostedZoneLimitInput(route53ServiceCode, hostedZonesQuotaCode)).Return( 22 | 23 | &route53.GetHostedZoneLimitOutput{ 24 | Count: aws.Int64(12), 25 | Limit: &route53.HostedZoneLimit{ 26 | Type: aws.String("route53"), 27 | Value: aws.Int64(10)}}, nil) 28 | 29 | value, err := getHostedZoneValueWithContext(mockClient, route53ServiceCode, hostedZonesQuotaCode, ctx) 30 | assert.Nil(t, err) 31 | assert.Equal(t, value, int64(10)) 32 | } 33 | 34 | func TestListHostedZonesWithContext(t *testing.T) { 35 | ctx := context.TODO() 36 | ctrl := gomock.NewController(t) 37 | defer ctrl.Finish() 38 | mockClient := mock.NewMockClient(ctrl) 39 | var logger log.Logger 40 | maxItems := "10" 41 | input := route53.ListHostedZonesInput{ 42 | MaxItems: aws.String("10"), 43 | } 44 | mockClient.EXPECT().ListHostedZonesWithContext(ctx, createListHostedZonesWithContext(maxItems)). 45 | Return(&route53.ListHostedZonesOutput{ 46 | HostedZones: []*route53.HostedZone{&route53.HostedZone{}}, 47 | MaxItems: aws.String("10"), 48 | }, nil) 49 | hostedZonesOutput, err := ListHostedZonesWithBackoff(mockClient, ctx, &input, maxRetries, logger) 50 | assert.Nil(t, err) 51 | assert.Equal(t, "10", *hostedZonesOutput.MaxItems) 52 | } 53 | 54 | func TestGetHostedZoneLimitWithBackoff(t *testing.T) { 55 | ctx := context.TODO() 56 | ctrl := gomock.NewController(t) 57 | defer ctrl.Finish() 58 | mockClient := mock.NewMockClient(ctrl) 59 | var logger log.Logger 60 | 61 | mockClient.EXPECT().GetHostedZoneLimitWithContext(ctx, createGetHostedZoneLimitWithContext(route53ServiceCode, route53.HostedZoneLimitTypeMaxRrsetsByZone)).Return( 62 | &route53.GetHostedZoneLimitOutput{ 63 | Limit: &route53.HostedZoneLimit{ 64 | Type: aws.String("route53"), 65 | Value: aws.Int64(10), 66 | }, 67 | }, nil) 68 | 69 | hostedZoneLimitInput := &route53.GetHostedZoneLimitInput{ 70 | HostedZoneId: aws.String("route53"), 71 | Type: aws.String(route53.HostedZoneLimitTypeMaxRrsetsByZone), 72 | } 73 | 74 | actualResult, actualErr := GetHostedZoneLimitWithBackoff(mockClient, ctx, hostedZoneLimitInput.HostedZoneId, maxRetries, logger) 75 | assert.Nil(t, actualErr) 76 | assert.Equal(t, "route53", *actualResult.Limit.Type) 77 | 78 | } 79 | 80 | func TestListHostedZonesWithBackoff(t *testing.T) { 81 | ctx := context.TODO() 82 | ctrl := gomock.NewController(t) 83 | defer ctrl.Finish() 84 | mockClient := mock.NewMockClient(ctrl) 85 | var logger log.Logger 86 | maxItems := "10" 87 | 88 | input := route53.ListHostedZonesInput{ 89 | MaxItems: aws.String("10"), 90 | } 91 | 92 | mockClient.EXPECT().ListHostedZonesWithContext(ctx, createListHostedZonesWithContext(maxItems)).Return( 93 | &route53.ListHostedZonesOutput{ 94 | HostedZones: []*route53.HostedZone{&route53.HostedZone{}}, 95 | MaxItems: aws.String("10"), 96 | }, nil) 97 | 98 | actualResult, actualErr := ListHostedZonesWithBackoff(mockClient, ctx, &input, maxRetries, logger) 99 | assert.Nil(t, actualErr) 100 | assert.Equal(t, "10", *actualResult.MaxItems) 101 | } 102 | -------------------------------------------------------------------------------- /pkg/util.go: -------------------------------------------------------------------------------- 1 | package pkg 2 | 3 | import ( 4 | "errors" 5 | "os" 6 | "sort" 7 | "strconv" 8 | "time" 9 | ) 10 | 11 | func GetEnvIntValue(envname string) (*int, error) { 12 | if value, ok := os.LookupEnv(envname); ok { 13 | int64val, err := strconv.ParseInt(value, 10, 0) 14 | if err != nil { 15 | return nil, err 16 | } else { 17 | intval := int(int64val) 18 | return &intval, nil 19 | } 20 | } else { 21 | return nil, nil 22 | } 23 | } 24 | 25 | func durationPtr(duration time.Duration) *time.Duration { 26 | return &duration 27 | } 28 | 29 | // Add a new key to the map and return the new map 30 | func WithKeyValue(m map[string]string, key string, value string) map[string]string { 31 | newMap := make(map[string]string) 32 | for k, v := range m { 33 | newMap[k] = v 34 | } 35 | newMap[key] = value 36 | return newMap 37 | } 38 | 39 | // Determines status from the number of days until EOL 40 | func GetEOLStatus(eol string, thresholds []Threshold) (string, error) { 41 | eolDate, err := time.Parse("2006-01-02", eol) 42 | if err != nil { 43 | return "", err 44 | } 45 | 46 | if len(thresholds) == 0 { 47 | return "", errors.New("thresholds slice is empty") 48 | } 49 | 50 | currentDate := time.Now() 51 | daysToEOL := int(eolDate.Sub(currentDate).Hours() / 24) 52 | 53 | sort.Slice(thresholds, func(i, j int) bool { 54 | return thresholds[i].Days < thresholds[j].Days 55 | }) 56 | 57 | for _, threshold := range thresholds { 58 | if daysToEOL <= threshold.Days { 59 | return threshold.Name, nil 60 | } 61 | } 62 | return thresholds[len(thresholds)-1].Name, nil 63 | } 64 | -------------------------------------------------------------------------------- /pkg/util_test.go: -------------------------------------------------------------------------------- 1 | package pkg 2 | 3 | import ( 4 | "reflect" 5 | "testing" 6 | ) 7 | 8 | func TestWithKeyValue(t *testing.T) { 9 | type args struct { 10 | m map[string]string 11 | key string 12 | value string 13 | } 14 | tests := []struct { 15 | name string 16 | args args 17 | want map[string]string 18 | }{ 19 | { 20 | name: "Adding a key-value-pair to empty map returns a map with one key-value-pair", 21 | args: args{ 22 | m: map[string]string{}, 23 | key: "new", 24 | value: "new", 25 | }, 26 | want: map[string]string{"new": "new"}, 27 | }, 28 | { 29 | name: "Adding a key-value-pair to existing map returns a new map with an additional key-value-pair", 30 | args: args{ 31 | m: map[string]string{"old": "old"}, 32 | key: "new", 33 | value: "new", 34 | }, 35 | want: map[string]string{"old": "old", "new": "new"}, 36 | }, 37 | } 38 | for _, tt := range tests { 39 | t.Run(tt.name, func(t *testing.T) { 40 | if got := WithKeyValue(tt.args.m, tt.args.key, tt.args.value); !reflect.DeepEqual(got, tt.want) { 41 | t.Errorf("WithKeyValue() = %v, want %v", got, tt.want) 42 | } 43 | }) 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /pkg/vpc.go: -------------------------------------------------------------------------------- 1 | package pkg 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "sync" 7 | "time" 8 | 9 | "github.com/app-sre/aws-resource-exporter/pkg/awsclient" 10 | "github.com/aws/aws-sdk-go/aws" 11 | "github.com/aws/aws-sdk-go/aws/session" 12 | "github.com/aws/aws-sdk-go/service/ec2" 13 | "github.com/aws/aws-sdk-go/service/servicequotas" 14 | "github.com/go-kit/log" 15 | "github.com/go-kit/log/level" 16 | "github.com/prometheus/client_golang/prometheus" 17 | ) 18 | 19 | const ( 20 | QUOTA_VPCS_PER_REGION string = "L-F678F1CE" 21 | QUOTA_SUBNETS_PER_VPC string = "L-407747CB" 22 | QUOTA_ROUTES_PER_ROUTE_TABLE string = "L-93826ACB" 23 | QUOTA_INTERFACE_VPC_ENDPOINTS_PER_VPC string = "L-29B6F2EB" 24 | QUOTA_ROUTE_TABLES_PER_VPC string = "L-589F43AA" 25 | QUOTA_IPV4_BLOCKS_PER_VPC string = "L-83CA0A9D" 26 | SERVICE_CODE_VPC string = "vpc" 27 | ) 28 | 29 | type VPCExporter struct { 30 | awsAccountId string 31 | sessions []*session.Session 32 | VpcsPerRegionQuota *prometheus.Desc 33 | VpcsPerRegionUsage *prometheus.Desc 34 | SubnetsPerVpcQuota *prometheus.Desc 35 | SubnetsPerVpcUsage *prometheus.Desc 36 | RoutesPerRouteTableQuota *prometheus.Desc 37 | RoutesPerRouteTableUsage *prometheus.Desc 38 | InterfaceVpcEndpointsPerVpcQuota *prometheus.Desc 39 | InterfaceVpcEndpointsPerVpcUsage *prometheus.Desc 40 | RouteTablesPerVpcQuota *prometheus.Desc 41 | RouteTablesPerVpcUsage *prometheus.Desc 42 | IPv4BlocksPerVpcQuota *prometheus.Desc 43 | IPv4BlocksPerVpcUsage *prometheus.Desc 44 | 45 | logger log.Logger 46 | timeout time.Duration 47 | cache MetricsCache 48 | interval time.Duration 49 | } 50 | 51 | type VPCCollector struct { 52 | e *VPCExporter 53 | ec2 *ec2.EC2 54 | serviceQuotas *servicequotas.ServiceQuotas 55 | region *string 56 | wg *sync.WaitGroup 57 | } 58 | 59 | func NewVPCExporter(sess []*session.Session, logger log.Logger, config VPCConfig, awsAccountId string) *VPCExporter { 60 | level.Info(logger).Log("msg", "Initializing VPC exporter") 61 | constLabels := map[string]string{"aws_account_id": awsAccountId, SERVICE_CODE_KEY: SERVICE_CODE_VPC} 62 | return &VPCExporter{ 63 | awsAccountId: awsAccountId, 64 | sessions: sess, 65 | VpcsPerRegionQuota: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "vpc_vpcsperregion_quota"), "The quota of VPCs per region", []string{"aws_region"}, WithKeyValue(constLabels, QUOTA_CODE_KEY, QUOTA_VPCS_PER_REGION)), 66 | VpcsPerRegionUsage: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "vpc_vpcsperregion_usage"), "The usage of VPCs per region", []string{"aws_region"}, WithKeyValue(constLabels, QUOTA_CODE_KEY, QUOTA_VPCS_PER_REGION)), 67 | SubnetsPerVpcQuota: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "vpc_subnetspervpc_quota"), "The quota of subnets per VPC", []string{"aws_region"}, WithKeyValue(constLabels, QUOTA_CODE_KEY, QUOTA_SUBNETS_PER_VPC)), 68 | SubnetsPerVpcUsage: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "vpc_subnetspervpc_usage"), "The usage of subnets per VPC", []string{"aws_region", "vpcid"}, WithKeyValue(constLabels, QUOTA_CODE_KEY, QUOTA_SUBNETS_PER_VPC)), 69 | RoutesPerRouteTableQuota: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "vpc_routesperroutetable_quota"), "The quota of routes per routetable", []string{"aws_region"}, WithKeyValue(constLabels, QUOTA_CODE_KEY, QUOTA_ROUTES_PER_ROUTE_TABLE)), 70 | RoutesPerRouteTableUsage: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "vpc_routesperroutetable_usage"), "The usage of routes per routetable", []string{"aws_region", "vpcid", "routetableid"}, WithKeyValue(constLabels, QUOTA_CODE_KEY, QUOTA_ROUTES_PER_ROUTE_TABLE)), 71 | InterfaceVpcEndpointsPerVpcQuota: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "vpc_interfacevpcendpointspervpc_quota"), "The quota of interface vpc endpoints per vpc", []string{"aws_region"}, WithKeyValue(constLabels, QUOTA_CODE_KEY, QUOTA_INTERFACE_VPC_ENDPOINTS_PER_VPC)), 72 | InterfaceVpcEndpointsPerVpcUsage: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "vpc_interfacevpcendpointspervpc_usage"), "The usage of interface vpc endpoints per vpc", []string{"aws_region", "vpcid"}, WithKeyValue(constLabels, QUOTA_CODE_KEY, QUOTA_INTERFACE_VPC_ENDPOINTS_PER_VPC)), 73 | RouteTablesPerVpcQuota: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "vpc_routetablespervpc_quota"), "The quota of route tables per vpc", []string{"aws_region"}, WithKeyValue(constLabels, QUOTA_CODE_KEY, QUOTA_ROUTE_TABLES_PER_VPC)), 74 | RouteTablesPerVpcUsage: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "vpc_routetablespervpc_usage"), "The usage of route tables per vpc", []string{"aws_region", "vpcid"}, WithKeyValue(constLabels, QUOTA_CODE_KEY, QUOTA_ROUTE_TABLES_PER_VPC)), 75 | IPv4BlocksPerVpcQuota: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "vpc_ipv4blockspervpc_quota"), "The quota of ipv4 blocks per vpc", []string{"aws_region"}, WithKeyValue(constLabels, QUOTA_CODE_KEY, QUOTA_IPV4_BLOCKS_PER_VPC)), 76 | IPv4BlocksPerVpcUsage: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "vpc_ipv4blockspervpc_usage"), "The usage of ipv4 blocks per vpc", []string{"aws_region", "vpcid"}, WithKeyValue(constLabels, QUOTA_CODE_KEY, QUOTA_IPV4_BLOCKS_PER_VPC)), 77 | logger: logger, 78 | timeout: *config.Timeout, 79 | cache: *NewMetricsCache(*config.CacheTTL), 80 | interval: *config.Interval, 81 | } 82 | } 83 | 84 | func (e *VPCExporter) CollectInRegion(session *session.Session, region *string, wg *sync.WaitGroup) { 85 | defer wg.Done() 86 | 87 | ec2Svc := ec2.New(session) 88 | quotaSvc := servicequotas.New(session) 89 | 90 | e.collectVpcsPerRegionQuota(quotaSvc, *region) 91 | e.collectVpcsPerRegionUsage(ec2Svc, *region) 92 | e.collectRoutesTablesPerVpcQuota(quotaSvc, *region) 93 | e.collectInterfaceVpcEndpointsPerVpcQuota(quotaSvc, *region) 94 | e.collectSubnetsPerVpcQuota(quotaSvc, *region) 95 | e.collectIPv4BlocksPerVpcQuota(quotaSvc, *region) 96 | vpcCtx, vpcCancel := context.WithTimeout(context.Background(), e.timeout) 97 | defer vpcCancel() 98 | allVpcs, err := ec2Svc.DescribeVpcsWithContext(vpcCtx, &ec2.DescribeVpcsInput{}) 99 | if err != nil { 100 | level.Error(e.logger).Log("msg", "Call to DescribeVpcs failed", "region", region, "err", err) 101 | } else { 102 | for i, _ := range allVpcs.Vpcs { 103 | e.collectSubnetsPerVpcUsage(allVpcs.Vpcs[i], ec2Svc, *region) 104 | e.collectInterfaceVpcEndpointsPerVpcUsage(allVpcs.Vpcs[i], ec2Svc, *region) 105 | e.collectRoutesTablesPerVpcUsage(allVpcs.Vpcs[i], ec2Svc, *region) 106 | e.collectIPv4BlocksPerVpcUsage(allVpcs.Vpcs[i], ec2Svc, *region) 107 | } 108 | } 109 | e.collectRoutesPerRouteTableQuota(quotaSvc, *region) 110 | routesCtx, routesCancel := context.WithTimeout(context.Background(), e.timeout) 111 | defer routesCancel() 112 | allRouteTables, err := ec2Svc.DescribeRouteTablesWithContext(routesCtx, &ec2.DescribeRouteTablesInput{}) 113 | if err != nil { 114 | level.Error(e.logger).Log("msg", "Call to DescribeRouteTables failed", "region", region, "err", err) 115 | } else { 116 | for i, _ := range allRouteTables.RouteTables { 117 | e.collectRoutesPerRouteTableUsage(allRouteTables.RouteTables[i], ec2Svc, *region) 118 | } 119 | } 120 | } 121 | 122 | func (e *VPCExporter) CollectLoop() { 123 | for { 124 | 125 | wg := &sync.WaitGroup{} 126 | wg.Add(len(e.sessions)) 127 | for i, _ := range e.sessions { 128 | session := e.sessions[i] 129 | region := session.Config.Region 130 | go e.CollectInRegion(session, region, wg) 131 | } 132 | wg.Wait() 133 | 134 | level.Info(e.logger).Log("msg", "VPC metrics Updated") 135 | 136 | time.Sleep(e.interval) 137 | } 138 | } 139 | 140 | func (e *VPCExporter) Collect(ch chan<- prometheus.Metric) { 141 | for _, m := range e.cache.GetAllMetrics() { 142 | ch <- m 143 | } 144 | } 145 | 146 | func (e *VPCExporter) GetQuotaValue(client *servicequotas.ServiceQuotas, serviceCode string, quotaCode string) (float64, error) { 147 | ctx, cancelFunc := context.WithTimeout(context.Background(), e.timeout) 148 | defer cancelFunc() 149 | sqOutput, err := client.GetServiceQuotaWithContext(ctx, &servicequotas.GetServiceQuotaInput{ 150 | QuotaCode: aws.String(quotaCode), 151 | ServiceCode: aws.String(serviceCode), 152 | }) 153 | 154 | if err != nil { 155 | return 0, err 156 | } 157 | // It seems sometimes the returned Quota contains a nil value - probably because the Value is "Required: No" 158 | // https://docs.aws.amazon.com/servicequotas/2019-06-24/apireference/API_ServiceQuota.html#servicequotas-Type-ServiceQuota-Value 159 | if sqOutput.Quota == nil || sqOutput.Quota.Value == nil { 160 | level.Error(e.logger).Log("msg", "VPC Quota was nil", "quota-code", quotaCode) 161 | return 0, errors.New("VPC Quota was nil") 162 | } 163 | return *sqOutput.Quota.Value, nil 164 | } 165 | 166 | func (e *VPCExporter) collectVpcsPerRegionQuota(client *servicequotas.ServiceQuotas, region string) { 167 | quota, err := e.GetQuotaValue(client, SERVICE_CODE_VPC, QUOTA_VPCS_PER_REGION) 168 | if err != nil { 169 | level.Error(e.logger).Log("msg", "Call to VpcsPerRegion ServiceQuota failed", "region", region, "err", err) 170 | awsclient.AwsExporterMetrics.IncrementErrors() 171 | return 172 | } 173 | e.cache.AddMetric(prometheus.MustNewConstMetric(e.VpcsPerRegionQuota, prometheus.GaugeValue, quota, region)) 174 | } 175 | 176 | func (e *VPCExporter) collectVpcsPerRegionUsage(ec2Svc *ec2.EC2, region string) { 177 | ctx, cancelFunc := context.WithTimeout(context.Background(), e.timeout) 178 | defer cancelFunc() 179 | describeVpcsOutput, err := ec2Svc.DescribeVpcsWithContext(ctx, &ec2.DescribeVpcsInput{}) 180 | if err != nil { 181 | level.Error(e.logger).Log("msg", "Call to DescribeVpcs failed", "region", region, "err", err) 182 | awsclient.AwsExporterMetrics.IncrementErrors() 183 | return 184 | } 185 | usage := len(describeVpcsOutput.Vpcs) 186 | e.cache.AddMetric(prometheus.MustNewConstMetric(e.VpcsPerRegionUsage, prometheus.GaugeValue, float64(usage), region)) 187 | } 188 | 189 | func (e *VPCExporter) collectSubnetsPerVpcQuota(client *servicequotas.ServiceQuotas, region string) { 190 | quota, err := e.GetQuotaValue(client, SERVICE_CODE_VPC, QUOTA_SUBNETS_PER_VPC) 191 | if err != nil { 192 | level.Error(e.logger).Log("msg", "Call to SubnetsPerVpc ServiceQuota failed", "region", region, "err", err) 193 | awsclient.AwsExporterMetrics.IncrementErrors() 194 | return 195 | } 196 | e.cache.AddMetric(prometheus.MustNewConstMetric(e.SubnetsPerVpcQuota, prometheus.GaugeValue, quota, region)) 197 | } 198 | 199 | func (e *VPCExporter) collectSubnetsPerVpcUsage(vpc *ec2.Vpc, ec2Svc *ec2.EC2, region string) { 200 | ctx, cancelFunc := context.WithTimeout(context.Background(), e.timeout) 201 | defer cancelFunc() 202 | describeSubnetsOutput, err := ec2Svc.DescribeSubnetsWithContext(ctx, &ec2.DescribeSubnetsInput{ 203 | Filters: []*ec2.Filter{&ec2.Filter{ 204 | Name: aws.String("vpc-id"), 205 | Values: []*string{vpc.VpcId}, 206 | }}, 207 | }) 208 | if err != nil { 209 | level.Error(e.logger).Log("msg", "Call to DescribeSubnets failed", "region", region, "err", err) 210 | awsclient.AwsExporterMetrics.IncrementErrors() 211 | return 212 | } 213 | usage := len(describeSubnetsOutput.Subnets) 214 | e.cache.AddMetric(prometheus.MustNewConstMetric(e.SubnetsPerVpcUsage, prometheus.GaugeValue, float64(usage), region, *vpc.VpcId)) 215 | } 216 | 217 | func (e *VPCExporter) collectRoutesPerRouteTableQuota(client *servicequotas.ServiceQuotas, region string) { 218 | quota, err := e.GetQuotaValue(client, SERVICE_CODE_VPC, QUOTA_ROUTES_PER_ROUTE_TABLE) 219 | if err != nil { 220 | level.Error(e.logger).Log("msg", "Call to RoutesPerRouteTable ServiceQuota failed", "region", region, "err", err) 221 | awsclient.AwsExporterMetrics.IncrementErrors() 222 | return 223 | } 224 | e.cache.AddMetric(prometheus.MustNewConstMetric(e.RoutesPerRouteTableQuota, prometheus.GaugeValue, quota, region)) 225 | } 226 | 227 | func (e *VPCExporter) collectRoutesPerRouteTableUsage(rtb *ec2.RouteTable, ec2Svc *ec2.EC2, region string) { 228 | ctx, cancelFunc := context.WithTimeout(context.Background(), e.timeout) 229 | defer cancelFunc() 230 | descRouteTableOutput, err := ec2Svc.DescribeRouteTablesWithContext(ctx, &ec2.DescribeRouteTablesInput{ 231 | RouteTableIds: []*string{rtb.RouteTableId}, 232 | }) 233 | if err != nil { 234 | level.Error(e.logger).Log("msg", "Call to DescribeRouteTables failed", "region", region, "err", err) 235 | awsclient.AwsExporterMetrics.IncrementErrors() 236 | return 237 | } 238 | quota := len(descRouteTableOutput.RouteTables) 239 | e.cache.AddMetric(prometheus.MustNewConstMetric(e.RoutesPerRouteTableUsage, prometheus.GaugeValue, float64(quota), region, *rtb.VpcId, *rtb.RouteTableId)) 240 | } 241 | 242 | func (e *VPCExporter) collectInterfaceVpcEndpointsPerVpcQuota(client *servicequotas.ServiceQuotas, region string) { 243 | quota, err := e.GetQuotaValue(client, SERVICE_CODE_VPC, QUOTA_INTERFACE_VPC_ENDPOINTS_PER_VPC) 244 | if err != nil { 245 | level.Error(e.logger).Log("msg", "Call to InterfaceVpcEndpointsPerVpc ServiceQuota failed", "region", region, "err", err) 246 | awsclient.AwsExporterMetrics.IncrementErrors() 247 | return 248 | } 249 | e.cache.AddMetric(prometheus.MustNewConstMetric(e.InterfaceVpcEndpointsPerVpcQuota, prometheus.GaugeValue, quota, region)) 250 | } 251 | 252 | func (e *VPCExporter) collectInterfaceVpcEndpointsPerVpcUsage(vpc *ec2.Vpc, ec2Svc *ec2.EC2, region string) { 253 | ctx, cancelFunc := context.WithTimeout(context.Background(), e.timeout) 254 | defer cancelFunc() 255 | descVpcEndpoints, err := ec2Svc.DescribeVpcEndpointsWithContext(ctx, &ec2.DescribeVpcEndpointsInput{ 256 | Filters: []*ec2.Filter{{ 257 | Name: aws.String("vpc-id"), 258 | Values: []*string{vpc.VpcId}, 259 | }}, 260 | }) 261 | if err != nil { 262 | level.Error(e.logger).Log("msg", "Call to DescribeVpcEndpoints failed", "region", region, "err", err) 263 | awsclient.AwsExporterMetrics.IncrementErrors() 264 | return 265 | } 266 | quota := len(descVpcEndpoints.VpcEndpoints) 267 | e.cache.AddMetric(prometheus.MustNewConstMetric(e.InterfaceVpcEndpointsPerVpcUsage, prometheus.GaugeValue, float64(quota), region, *vpc.VpcId)) 268 | } 269 | 270 | func (e *VPCExporter) collectRoutesTablesPerVpcQuota(client *servicequotas.ServiceQuotas, region string) { 271 | quota, err := e.GetQuotaValue(client, SERVICE_CODE_VPC, QUOTA_ROUTE_TABLES_PER_VPC) 272 | if err != nil { 273 | level.Error(e.logger).Log("msg", "Call to RoutesTablesPerVpc ServiceQuota failed", "region", region, "err", err) 274 | awsclient.AwsExporterMetrics.IncrementErrors() 275 | return 276 | } 277 | e.cache.AddMetric(prometheus.MustNewConstMetric(e.RouteTablesPerVpcQuota, prometheus.GaugeValue, quota, region)) 278 | } 279 | 280 | func (e *VPCExporter) collectRoutesTablesPerVpcUsage(vpc *ec2.Vpc, ec2Svc *ec2.EC2, region string) { 281 | ctx, cancelFunc := context.WithTimeout(context.Background(), e.timeout) 282 | defer cancelFunc() 283 | descRouteTables, err := ec2Svc.DescribeRouteTablesWithContext(ctx, &ec2.DescribeRouteTablesInput{ 284 | Filters: []*ec2.Filter{{ 285 | Name: aws.String("vpc-id"), 286 | Values: []*string{vpc.VpcId}, 287 | }}, 288 | }) 289 | if err != nil { 290 | level.Error(e.logger).Log("msg", "Call to DescribeRouteTables failed", "region", region, "err", err) 291 | awsclient.AwsExporterMetrics.IncrementErrors() 292 | return 293 | } 294 | quota := len(descRouteTables.RouteTables) 295 | e.cache.AddMetric(prometheus.MustNewConstMetric(e.RouteTablesPerVpcUsage, prometheus.GaugeValue, float64(quota), region, *vpc.VpcId)) 296 | } 297 | 298 | func (e *VPCExporter) collectIPv4BlocksPerVpcQuota(client *servicequotas.ServiceQuotas, region string) { 299 | quota, err := e.GetQuotaValue(client, SERVICE_CODE_VPC, QUOTA_IPV4_BLOCKS_PER_VPC) 300 | if err != nil { 301 | level.Error(e.logger).Log("msg", "Call to IPv4BlocksPerVpc ServiceQuota failed", "region", region, "err", err) 302 | awsclient.AwsExporterMetrics.IncrementErrors() 303 | return 304 | } 305 | e.cache.AddMetric(prometheus.MustNewConstMetric(e.IPv4BlocksPerVpcQuota, prometheus.GaugeValue, quota, region)) 306 | } 307 | 308 | func (e *VPCExporter) collectIPv4BlocksPerVpcUsage(vpc *ec2.Vpc, ec2Svc *ec2.EC2, region string) { 309 | ctx, cancelFunc := context.WithTimeout(context.Background(), e.timeout) 310 | defer cancelFunc() 311 | descVpcs, err := ec2Svc.DescribeVpcsWithContext(ctx, &ec2.DescribeVpcsInput{ 312 | VpcIds: []*string{vpc.VpcId}, 313 | }) 314 | if err != nil { 315 | level.Error(e.logger).Log("msg", "Call to DescribeVpcs failed", "region", region, "err", err) 316 | awsclient.AwsExporterMetrics.IncrementErrors() 317 | return 318 | } 319 | if len(descVpcs.Vpcs) != 1 { 320 | level.Error(e.logger).Log("msg", "Unexpected numbers of VPCs (!= 1) returned", "region", region, "vpcId", vpc.VpcId) 321 | } 322 | quota := len(descVpcs.Vpcs[0].CidrBlockAssociationSet) 323 | e.cache.AddMetric(prometheus.MustNewConstMetric(e.IPv4BlocksPerVpcUsage, prometheus.GaugeValue, float64(quota), region, *vpc.VpcId)) 324 | } 325 | 326 | func (e *VPCExporter) Describe(ch chan<- *prometheus.Desc) { 327 | ch <- e.VpcsPerRegionQuota 328 | ch <- e.VpcsPerRegionUsage 329 | ch <- e.SubnetsPerVpcQuota 330 | ch <- e.SubnetsPerVpcUsage 331 | ch <- e.RoutesPerRouteTableQuota 332 | ch <- e.RoutesPerRouteTableUsage 333 | ch <- e.IPv4BlocksPerVpcQuota 334 | ch <- e.IPv4BlocksPerVpcUsage 335 | ch <- e.InterfaceVpcEndpointsPerVpcQuota 336 | ch <- e.InterfaceVpcEndpointsPerVpcUsage 337 | ch <- e.RouteTablesPerVpcQuota 338 | ch <- e.RoutesPerRouteTableUsage 339 | } 340 | -------------------------------------------------------------------------------- /pr_check.sh: -------------------------------------------------------------------------------- 1 | # Set the `GOBIN` environment variable so that dependencies will be installed 2 | # always in the same place, regardless of the value of `GOPATH`: 3 | export GOBIN="${PWD}/.gobin" 4 | export PATH="${GOBIN}:${PATH}" 5 | 6 | make container-test build 7 | -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "extends": [ 4 | "github>app-sre/shared-pipelines//renovate/default.json" 5 | ], 6 | "gomod": { 7 | "enabled": true 8 | }, 9 | "postUpdateOptions": ["gomodTidy"] 10 | } 11 | --------------------------------------------------------------------------------