├── doc ├── rds-health.png └── health-rules.md ├── SECURITY.md ├── .gitignore ├── main.go ├── .zappr.yaml ├── .github └── workflows │ ├── check-code.yml │ ├── check-test.yml │ └── build.yml ├── MAINTAINERS.md ├── internal ├── cache │ └── cache.go ├── insight │ ├── types.go │ ├── insight_test.go │ └── insight.go ├── cluster │ ├── cluster_test.go │ └── cluster.go ├── mocks │ ├── insight.go │ ├── cluster.go │ ├── database.go │ └── instance.go ├── show │ ├── compact │ │ └── compact.go │ ├── show.go │ ├── verbose │ │ └── verbose.go │ └── minimal │ │ └── minimal.go ├── instance │ ├── instance.go │ └── instance_test.go ├── discovery │ ├── discovery.go │ └── discovery_test.go ├── types │ ├── stats.go │ ├── types_test.go │ ├── types.go │ └── status.go ├── database │ ├── database_test.go │ └── database.go ├── rules │ ├── rules.go │ ├── estimator.go │ ├── calculator.go │ └── metrics.go └── service │ └── service.go ├── .goreleaser.yml ├── LICENSE.md ├── go.mod ├── cmd ├── list.go ├── show.go ├── progress.go ├── check.go └── root.go ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── go.sum └── README.md /doc/rds-health.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zalando/rds-health/HEAD/doc/rds-health.png -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | We acknowledge that every line of code that we write may potentially contain security issues. We are trying to deal with it responsibly and provide patches as quickly as possible. 2 | 3 | If you would like to report a vulnerability and get rewarded for it, please ask to join our program by filling this form: 4 | 5 | https://corporate.zalando.com/en/about-us/report-vulnerability 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # If you prefer the allow list template instead of the deny list, see community template: 2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore 3 | # 4 | # Binaries for programs and plugins 5 | *.exe 6 | *.exe~ 7 | *.dll 8 | *.so 9 | *.dylib 10 | 11 | # Test binary, built with `go test -c` 12 | *.test 13 | 14 | # Output of the go coverage tool, specifically when used with LiteIDE 15 | *.out 16 | 17 | # Dependency directories (remove the comment below to include it) 18 | # vendor/ 19 | 20 | # Go workspace file 21 | go.work 22 | 23 | #Go Releaser output 24 | dist/ 25 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package main 10 | 11 | import ( 12 | "fmt" 13 | 14 | "github.com/zalando/rds-health/cmd" 15 | ) 16 | 17 | var ( 18 | // See https://goreleaser.com/cookbooks/using-main.version/ 19 | version = "dev" 20 | commit = "unknown" 21 | date = "unknown" 22 | ) 23 | 24 | func main() { 25 | cmd.Execute(fmt.Sprintf("rds-health/%s (%s), %s", version, commit[:7], date)) 26 | } 27 | -------------------------------------------------------------------------------- /.zappr.yaml: -------------------------------------------------------------------------------- 1 | approvals: 2 | groups: 3 | zalando: 4 | minimum: 2 5 | from: 6 | orgs: 7 | - zalando 8 | 9 | maintainers: 10 | minimum: 2 11 | from: 12 | users: 13 | - fogfish 14 | - pioneerit 15 | - larry-dalmeida 16 | - tcondeixa 17 | - remychantenay 18 | 19 | # mandatory pull request labels 20 | pull-request: 21 | labels: 22 | additional: true 23 | oneOf: 24 | - architectural 25 | - major 26 | - minor 27 | - bugfix 28 | - documentation 29 | - dependencies 30 | 31 | # one of [code, doc, config, tools, secrets] 32 | X-Zalando-Type: code 33 | X-Zalando-Team: "wardrobefi" 34 | -------------------------------------------------------------------------------- /.github/workflows/check-code.yml: -------------------------------------------------------------------------------- 1 | ## 2 | ## Check quality of source code 3 | ## 4 | name: check 5 | on: 6 | pull_request: 7 | types: 8 | - opened 9 | - synchronize 10 | 11 | jobs: 12 | code: 13 | runs-on: ubuntu-latest 14 | steps: 15 | 16 | ## actions/setup-go@v5 17 | - uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 18 | with: 19 | go-version: "1.21" 20 | 21 | ## actions/checkout@v4.1.1 22 | - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 23 | 24 | ## dominikh/staticcheck-action@v1.3.1 25 | - uses: dominikh/staticcheck-action@fe1dd0c3658873b46f8c9bb3291096a617310ca6 26 | with: 27 | install-go: false 28 | -------------------------------------------------------------------------------- /.github/workflows/check-test.yml: -------------------------------------------------------------------------------- 1 | ## 2 | ## Unit Tests & Coverage 3 | ## 4 | name: test 5 | on: 6 | pull_request: 7 | types: 8 | - opened 9 | - synchronize 10 | 11 | jobs: 12 | 13 | unit: 14 | runs-on: ubuntu-latest 15 | steps: 16 | 17 | ## actions/setup-go@v5 18 | - uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 19 | with: 20 | go-version: "1.21" 21 | 22 | ## actions/checkout@v4.1.1 23 | - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 24 | 25 | - name: go build 26 | run: | 27 | go build ./... 28 | 29 | - name: go test 30 | run: | 31 | go test -v $(go list ./... | grep -v /examples/) 32 | -------------------------------------------------------------------------------- /MAINTAINERS.md: -------------------------------------------------------------------------------- 1 | ## Overview 2 | 3 | This document contains a list of maintainers for the `rds-health` project. If you're interested in contributing to the project, see [CONTRIBUTING.md](./CONTRIBUTING.md). 4 | 5 | ## Maintainers 6 | 7 | | Maintainer | GitHub | Affiliation | 8 | |----------------------|--------------------------------------------|-----------------------| 9 | | Dmitry Kolesnikov | [@fogfish](https://github.com/fogfish) | Zalando Finland Oy | 10 | | Oleksandr Sakharchuk | [@pioneerit](https://github.com/pioneerit) | Zalando Payments GmbH | 11 | | Larry D Almeida | [@larry-dalmeida](https://github.com/larry-dalmeida) | Zalando SE | 12 | | Tiago Silvestre Condeixa | [@tcondeixa](https://github.com/tcondeixa) | Zalando SE | 13 | | Remy Chantenay | [@remychantenay](https://github.com/remychantenay) | Zalando SE | 14 | -------------------------------------------------------------------------------- /internal/cache/cache.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package cache 10 | 11 | import "context" 12 | 13 | type Getter[K comparable, V any] interface { 14 | Lookup(context.Context, K) (V, error) 15 | } 16 | 17 | type Cache[K comparable, V any] struct { 18 | keyval map[K]V 19 | getter Getter[K, V] 20 | } 21 | 22 | func New[K comparable, V any](getter Getter[K, V]) *Cache[K, V] { 23 | return &Cache[K, V]{ 24 | keyval: make(map[K]V), 25 | getter: getter, 26 | } 27 | 28 | } 29 | 30 | func (c Cache[K, V]) Lookup(ctx context.Context, key K) (V, error) { 31 | if v, has := c.keyval[key]; has { 32 | return v, nil 33 | } 34 | 35 | v, err := c.getter.Lookup(ctx, key) 36 | if err != nil { 37 | return *new(V), nil 38 | } 39 | 40 | c.keyval[key] = v 41 | 42 | return v, nil 43 | } 44 | -------------------------------------------------------------------------------- /internal/insight/types.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package insight 10 | 11 | import ( 12 | "time" 13 | 14 | "github.com/aws/aws-sdk-go-v2/aws" 15 | "github.com/aws/aws-sdk-go-v2/service/pi/types" 16 | ) 17 | 18 | // Sample abstract the dependencies to AWS types.DataPoint 19 | type Sample interface { 20 | T() time.Time 21 | X() float64 22 | } 23 | 24 | // Samples is a time series sequence 25 | type Samples []Sample 26 | 27 | func (samples Samples) ToSeq() []float64 { 28 | seq := make([]float64, len(samples)) 29 | for i, val := range samples { 30 | seq[i] = val.X() 31 | } 32 | return seq 33 | } 34 | 35 | type sample types.DataPoint 36 | 37 | func (v sample) T() time.Time { return aws.ToTime(v.Timestamp) } 38 | func (v sample) X() float64 { return aws.ToFloat64(v.Value) } 39 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | project_name: rds-health 2 | 3 | release: 4 | github: 5 | owner: zalando 6 | name: rds-health 7 | 8 | before: 9 | hooks: 10 | - go mod tidy 11 | 12 | builds: 13 | - env: 14 | - CGO_ENABLED=0 15 | goos: 16 | - linux 17 | - windows 18 | - darwin 19 | 20 | ignore: 21 | - goarch: 386 22 | 23 | archives: 24 | - format: binary 25 | 26 | checksum: 27 | name_template: '{{ .ProjectName }}_{{ .Version }}_checksums.txt' 28 | 29 | snapshot: 30 | name_template: "{{ .Tag }}-next" 31 | 32 | changelog: 33 | sort: asc 34 | filters: 35 | exclude: 36 | - '^docs:' 37 | - '^test:' 38 | 39 | # Note: temporary disabled brew releases 40 | # 41 | # brews: 42 | # - repository: 43 | # owner: zalando 44 | # name: rds-health 45 | # folder: Formula 46 | # goarm: "7" 47 | # homepage: https://github.com/zalando/rds-health 48 | # description: rds-health discovers anomalies, performance issues and optimization within AWS RDS. 49 | # license: MIT 50 | # test: | 51 | # system "#{bin}/rds-health --version" 52 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Zalando SE 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | ## 2 | ## Build the main branch 3 | ## 4 | name: build 5 | on: 6 | push: 7 | branches: 8 | - main 9 | - /refs/heads/main 10 | 11 | jobs: 12 | 13 | build: 14 | runs-on: ubuntu-latest 15 | steps: 16 | 17 | ## actions/setup-go@v5 18 | - uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 19 | with: 20 | go-version: "1.21" 21 | 22 | ## actions/checkout@v4.1.1 23 | - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 24 | with: 25 | fetch-depth: 0 26 | 27 | - name: go build 28 | run: | 29 | go build ./... 30 | 31 | - name: go test 32 | run: | 33 | go test -v ./... 34 | 35 | ## reecetech/version-increment@2023.10.2 36 | - uses: reecetech/version-increment@71036b212bbdc100b48aae069870f10953433346 37 | id: version 38 | with: 39 | scheme: semver 40 | increment: patch 41 | 42 | - name: tag version 43 | run: | 44 | git config user.name "GitHub Actions" 45 | git config user.email "github-actions@users.noreply.github.com" 46 | git tag ${{ steps.version.outputs.v-version }} 47 | git push origin -u ${{ steps.version.outputs.v-version }} 48 | 49 | ## goreleaser/goreleaser-action@v5 50 | - uses: goreleaser/goreleaser-action@7ec5c2b0c6cdda6e8bbb49444bc797dd33d74dd8 51 | with: 52 | distribution: goreleaser 53 | version: latest 54 | args: release 55 | env: 56 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 57 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/zalando/rds-health 2 | 3 | go 1.22 4 | 5 | toolchain go1.22.0 6 | 7 | require ( 8 | github.com/aws/aws-sdk-go-v2 v1.32.2 9 | github.com/aws/aws-sdk-go-v2/config v1.27.37 10 | github.com/aws/aws-sdk-go-v2/service/ec2 v1.179.0 11 | github.com/aws/aws-sdk-go-v2/service/pi v1.28.1 12 | github.com/aws/aws-sdk-go-v2/service/rds v1.85.0 13 | github.com/lynn9388/supsub v0.0.0-20210304091550-458423b0e16a 14 | github.com/montanaflynn/stats v0.7.1 15 | github.com/schollz/progressbar/v3 v3.16.0 16 | github.com/spf13/cobra v1.8.1 17 | go.uber.org/mock v0.4.0 18 | ) 19 | 20 | require ( 21 | github.com/aws/aws-sdk-go-v2/credentials v1.17.35 // indirect 22 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.14 // indirect 23 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.21 // indirect 24 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.21 // indirect 25 | github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1 // indirect 26 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.5 // indirect 27 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.20 // indirect 28 | github.com/aws/aws-sdk-go-v2/service/sso v1.23.1 // indirect 29 | github.com/aws/aws-sdk-go-v2/service/ssooidc v1.27.1 // indirect 30 | github.com/aws/aws-sdk-go-v2/service/sts v1.31.1 // indirect 31 | github.com/aws/smithy-go v1.22.0 // indirect 32 | github.com/inconshreveable/mousetrap v1.1.0 // indirect 33 | github.com/jmespath/go-jmespath v0.4.0 // indirect 34 | github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect 35 | github.com/rivo/uniseg v0.4.7 // indirect 36 | github.com/spf13/pflag v1.0.5 // indirect 37 | golang.org/x/sys v0.25.0 // indirect 38 | golang.org/x/term v0.24.0 // indirect 39 | ) 40 | -------------------------------------------------------------------------------- /cmd/list.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package cmd 10 | 11 | import ( 12 | "fmt" 13 | 14 | "github.com/spf13/cobra" 15 | "github.com/zalando/rds-health/internal/show" 16 | "github.com/zalando/rds-health/internal/show/minimal" 17 | "github.com/zalando/rds-health/internal/show/verbose" 18 | "github.com/zalando/rds-health/internal/types" 19 | ) 20 | 21 | func init() { 22 | rootCmd.AddCommand(listCmd) 23 | listCmd.InheritedFlags().SetAnnotation("database", cobra.BashCompOneRequiredFlag, []string{"false"}) 24 | listCmd.InheritedFlags().SetAnnotation("interval", cobra.BashCompOneRequiredFlag, []string{"false"}) 25 | } 26 | 27 | var listCmd = &cobra.Command{ 28 | Use: "list", 29 | Short: "list all database instances and clusters in AWS account", 30 | Example: ` 31 | rds-health list 32 | `, 33 | SilenceUsage: true, 34 | RunE: WithService(list), 35 | PostRun: listPost, 36 | } 37 | 38 | func list(cmd *cobra.Command, args []string, api Service) error { 39 | out := minimal.ShowConfigRegion 40 | switch { 41 | case outVerbose: 42 | out = verbose.ShowConfigRegion 43 | case outSilent: 44 | out = show.None[types.Region]() 45 | case outJsonify: 46 | out = show.JSON[types.Region]() 47 | } 48 | 49 | region, err := api.ShowRegion(cmd.Context()) 50 | if err != nil { 51 | return err 52 | } 53 | 54 | if len(region.Clusters)+len(region.Nodes) == 0 { 55 | return fmt.Errorf("no instances are found") 56 | } 57 | 58 | return stdout(out.Show(*region)) 59 | } 60 | 61 | func listPost(cmd *cobra.Command, args []string) { 62 | if !outJsonify { 63 | stderr("\n(use \"rds-health check\" to check health status of instances)\n") 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /cmd/show.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package cmd 10 | 11 | import ( 12 | "fmt" 13 | "time" 14 | 15 | "github.com/spf13/cobra" 16 | "github.com/zalando/rds-health/internal/show" 17 | "github.com/zalando/rds-health/internal/show/minimal" 18 | "github.com/zalando/rds-health/internal/show/verbose" 19 | "github.com/zalando/rds-health/internal/types" 20 | ) 21 | 22 | var ( 23 | showDuration time.Duration 24 | ) 25 | 26 | func init() { 27 | rootCmd.AddCommand(showCmd) 28 | } 29 | 30 | var showCmd = &cobra.Command{ 31 | Use: "show", 32 | Short: "show resource utilization", 33 | Long: "show system resource utilization of RDS instance using AWS Performance Insights", 34 | Example: ` 35 | rds-health show -n name-of-rds-instance -t 7d 36 | rds-health show -n name-of-rds-instance -t 7d -a max 37 | `, 38 | SilenceUsage: true, 39 | PreRunE: usageOpts, 40 | RunE: WithService(showNode), 41 | } 42 | 43 | func usageOpts(cmd *cobra.Command, args []string) (err error) { 44 | showDuration, err = parseInterval() 45 | if err != nil { 46 | return err 47 | } 48 | 49 | if rootDatabase == "" { 50 | return fmt.Errorf("undefined database name") 51 | } 52 | 53 | return nil 54 | } 55 | 56 | func showNode(cmd *cobra.Command, args []string, api Service) error { 57 | var out show.Printer[types.StatusNode] = minimal.ShowValueNode 58 | switch { 59 | case outVerbose: 60 | out = verbose.ShowValueNode 61 | case outSilent: 62 | out = show.None[types.StatusNode]() 63 | case outJsonify: 64 | out = show.JSON[types.StatusNode]() 65 | } 66 | 67 | usage, err := api.ShowNode(cmd.Context(), rootDatabase, showDuration) 68 | if err != nil { 69 | return err 70 | } 71 | 72 | return stdout(out.Show(*usage)) 73 | } 74 | -------------------------------------------------------------------------------- /internal/insight/insight_test.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package insight_test 10 | 11 | import ( 12 | "context" 13 | "testing" 14 | "time" 15 | 16 | "github.com/aws/aws-sdk-go-v2/aws" 17 | "github.com/aws/aws-sdk-go-v2/service/pi" 18 | "github.com/aws/aws-sdk-go-v2/service/pi/types" 19 | "github.com/zalando/rds-health/internal/insight" 20 | "github.com/zalando/rds-health/internal/mocks" 21 | "go.uber.org/mock/gomock" 22 | ) 23 | 24 | func TestFetch(t *testing.T) { 25 | ctrl := gomock.NewController(t) 26 | defer ctrl.Finish() 27 | 28 | fixKey := "db.cpu.avg" 29 | fixRet := &pi.GetResourceMetricsOutput{ 30 | MetricList: []types.MetricKeyDataPoints{ 31 | { 32 | Key: &types.ResponseResourceMetricKey{Metric: aws.String(fixKey)}, 33 | DataPoints: []types.DataPoint{ 34 | {Timestamp: aws.Time(time.Now()), Value: aws.Float64(100.0)}, 35 | }, 36 | }, 37 | }, 38 | } 39 | 40 | mock := mocks.NewInsight(ctrl) 41 | mock.EXPECT().GetResourceMetrics(gomock.Any(), gomock.Any()).Return(fixRet, nil) 42 | 43 | sut := insight.New(mock) 44 | 45 | samples, err := sut.Fetch(context.TODO(), "db-XXXXXXXXXXXXXXXXXXXXXXXXXX", 60*time.Minute, fixKey) 46 | switch { 47 | case err != nil: 48 | t.Errorf("should not fail with error %s", err) 49 | case len(samples) != 1: 50 | t.Errorf("should not return multiple metrics %v", samples) 51 | case samples[fixKey] == nil: 52 | t.Errorf("should %v contain samples for %v", samples, fixKey) 53 | case len(samples[fixKey]) != 1: 54 | t.Errorf("should return %v samples of length 1", samples[fixKey]) 55 | case samples[fixKey][0].X() != *fixRet.MetricList[0].DataPoints[0].Value: 56 | t.Errorf("should return %v samples with first value %v", samples[fixKey], *fixRet.MetricList[0].DataPoints[0].Value) 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /internal/cluster/cluster_test.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package cluster_test 10 | 11 | import ( 12 | "context" 13 | "testing" 14 | 15 | "github.com/aws/aws-sdk-go-v2/aws" 16 | "github.com/aws/aws-sdk-go-v2/service/rds" 17 | rdstypes "github.com/aws/aws-sdk-go-v2/service/rds/types" 18 | "github.com/zalando/rds-health/internal/cluster" 19 | "github.com/zalando/rds-health/internal/mocks" 20 | "go.uber.org/mock/gomock" 21 | ) 22 | 23 | func TestLookup(t *testing.T) { 24 | ctrl := gomock.NewController(t) 25 | defer ctrl.Finish() 26 | 27 | fix := &rds.DescribeDBClustersOutput{ 28 | DBClusters: []rdstypes.DBCluster{ 29 | { 30 | DBClusterIdentifier: aws.String("test-db"), 31 | Engine: aws.String("postgres"), 32 | EngineVersion: aws.String("13.14"), 33 | DBClusterMembers: []rdstypes.DBClusterMember{ 34 | { 35 | DBInstanceIdentifier: aws.String("test-1"), 36 | IsClusterWriter: aws.Bool(true), 37 | }, 38 | { 39 | DBInstanceIdentifier: aws.String("test-2"), 40 | IsClusterWriter: aws.Bool(false), 41 | }, 42 | }, 43 | }, 44 | }, 45 | } 46 | 47 | mock := mocks.NewCluster(ctrl) 48 | mock.EXPECT().DescribeDBClusters(gomock.Any(), gomock.Any()).Return(fix, nil) 49 | 50 | sut := cluster.New(mock) 51 | 52 | seq, err := sut.Lookup(context.TODO()) 53 | switch { 54 | case err != nil: 55 | t.Errorf("should not failed with error %s", err) 56 | case len(seq) == 0: 57 | t.Errorf("should return db instances") 58 | case seq[0].ID != "test-db": 59 | t.Errorf("should not return unexpected value |%s|", seq[0]) 60 | case len(seq[0].Writer) == 0: 61 | t.Errorf("should have writer nodes") 62 | case len(seq[0].Reader) == 0: 63 | t.Errorf("should have reader nodes") 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /internal/mocks/insight.go: -------------------------------------------------------------------------------- 1 | // Code generated by MockGen. DO NOT EDIT. 2 | // Source: github.com/zalando/rds-health/internal/insight (interfaces: Provider) 3 | 4 | // Package mocks is a generated GoMock package. 5 | package mocks 6 | 7 | import ( 8 | context "context" 9 | reflect "reflect" 10 | 11 | pi "github.com/aws/aws-sdk-go-v2/service/pi" 12 | gomock "go.uber.org/mock/gomock" 13 | ) 14 | 15 | // Insight is a mock of Provider interface. 16 | type Insight struct { 17 | ctrl *gomock.Controller 18 | recorder *InsightMockRecorder 19 | } 20 | 21 | // InsightMockRecorder is the mock recorder for Insight. 22 | type InsightMockRecorder struct { 23 | mock *Insight 24 | } 25 | 26 | // NewInsight creates a new mock instance. 27 | func NewInsight(ctrl *gomock.Controller) *Insight { 28 | mock := &Insight{ctrl: ctrl} 29 | mock.recorder = &InsightMockRecorder{mock} 30 | return mock 31 | } 32 | 33 | // EXPECT returns an object that allows the caller to indicate expected use. 34 | func (m *Insight) EXPECT() *InsightMockRecorder { 35 | return m.recorder 36 | } 37 | 38 | // GetResourceMetrics mocks base method. 39 | func (m *Insight) GetResourceMetrics(arg0 context.Context, arg1 *pi.GetResourceMetricsInput, arg2 ...func(*pi.Options)) (*pi.GetResourceMetricsOutput, error) { 40 | m.ctrl.T.Helper() 41 | varargs := []interface{}{arg0, arg1} 42 | for _, a := range arg2 { 43 | varargs = append(varargs, a) 44 | } 45 | ret := m.ctrl.Call(m, "GetResourceMetrics", varargs...) 46 | ret0, _ := ret[0].(*pi.GetResourceMetricsOutput) 47 | ret1, _ := ret[1].(error) 48 | return ret0, ret1 49 | } 50 | 51 | // GetResourceMetrics indicates an expected call of GetResourceMetrics. 52 | func (mr *InsightMockRecorder) GetResourceMetrics(arg0, arg1 interface{}, arg2 ...interface{}) *gomock.Call { 53 | mr.mock.ctrl.T.Helper() 54 | varargs := append([]interface{}{arg0, arg1}, arg2...) 55 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetResourceMetrics", reflect.TypeOf((*Insight)(nil).GetResourceMetrics), varargs...) 56 | } 57 | -------------------------------------------------------------------------------- /internal/mocks/cluster.go: -------------------------------------------------------------------------------- 1 | // Code generated by MockGen. DO NOT EDIT. 2 | // Source: github.com/zalando/rds-health/internal/cluster (interfaces: Provider) 3 | 4 | // Package mocks is a generated GoMock package. 5 | package mocks 6 | 7 | import ( 8 | context "context" 9 | reflect "reflect" 10 | 11 | rds "github.com/aws/aws-sdk-go-v2/service/rds" 12 | gomock "go.uber.org/mock/gomock" 13 | ) 14 | 15 | // Cluster is a mock of Provider interface. 16 | type Cluster struct { 17 | ctrl *gomock.Controller 18 | recorder *ClusterMockRecorder 19 | } 20 | 21 | // ClusterMockRecorder is the mock recorder for Cluster. 22 | type ClusterMockRecorder struct { 23 | mock *Cluster 24 | } 25 | 26 | // NewCluster creates a new mock instance. 27 | func NewCluster(ctrl *gomock.Controller) *Cluster { 28 | mock := &Cluster{ctrl: ctrl} 29 | mock.recorder = &ClusterMockRecorder{mock} 30 | return mock 31 | } 32 | 33 | // EXPECT returns an object that allows the caller to indicate expected use. 34 | func (m *Cluster) EXPECT() *ClusterMockRecorder { 35 | return m.recorder 36 | } 37 | 38 | // DescribeDBClusters mocks base method. 39 | func (m *Cluster) DescribeDBClusters(arg0 context.Context, arg1 *rds.DescribeDBClustersInput, arg2 ...func(*rds.Options)) (*rds.DescribeDBClustersOutput, error) { 40 | m.ctrl.T.Helper() 41 | varargs := []interface{}{arg0, arg1} 42 | for _, a := range arg2 { 43 | varargs = append(varargs, a) 44 | } 45 | ret := m.ctrl.Call(m, "DescribeDBClusters", varargs...) 46 | ret0, _ := ret[0].(*rds.DescribeDBClustersOutput) 47 | ret1, _ := ret[1].(error) 48 | return ret0, ret1 49 | } 50 | 51 | // DescribeDBClusters indicates an expected call of DescribeDBClusters. 52 | func (mr *ClusterMockRecorder) DescribeDBClusters(arg0, arg1 interface{}, arg2 ...interface{}) *gomock.Call { 53 | mr.mock.ctrl.T.Helper() 54 | varargs := append([]interface{}{arg0, arg1}, arg2...) 55 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeDBClusters", reflect.TypeOf((*Cluster)(nil).DescribeDBClusters), varargs...) 56 | } 57 | -------------------------------------------------------------------------------- /internal/mocks/database.go: -------------------------------------------------------------------------------- 1 | // Code generated by MockGen. DO NOT EDIT. 2 | // Source: github.com/zalando/rds-health/internal/database (interfaces: Provider) 3 | 4 | // Package mocks is a generated GoMock package. 5 | package mocks 6 | 7 | import ( 8 | context "context" 9 | reflect "reflect" 10 | 11 | rds "github.com/aws/aws-sdk-go-v2/service/rds" 12 | gomock "go.uber.org/mock/gomock" 13 | ) 14 | 15 | // Database is a mock of Provider interface. 16 | type Database struct { 17 | ctrl *gomock.Controller 18 | recorder *DatabaseMockRecorder 19 | } 20 | 21 | // DatabaseMockRecorder is the mock recorder for Database. 22 | type DatabaseMockRecorder struct { 23 | mock *Database 24 | } 25 | 26 | // NewDatabase creates a new mock instance. 27 | func NewDatabase(ctrl *gomock.Controller) *Database { 28 | mock := &Database{ctrl: ctrl} 29 | mock.recorder = &DatabaseMockRecorder{mock} 30 | return mock 31 | } 32 | 33 | // EXPECT returns an object that allows the caller to indicate expected use. 34 | func (m *Database) EXPECT() *DatabaseMockRecorder { 35 | return m.recorder 36 | } 37 | 38 | // DescribeDBInstances mocks base method. 39 | func (m *Database) DescribeDBInstances(arg0 context.Context, arg1 *rds.DescribeDBInstancesInput, arg2 ...func(*rds.Options)) (*rds.DescribeDBInstancesOutput, error) { 40 | m.ctrl.T.Helper() 41 | varargs := []interface{}{arg0, arg1} 42 | for _, a := range arg2 { 43 | varargs = append(varargs, a) 44 | } 45 | ret := m.ctrl.Call(m, "DescribeDBInstances", varargs...) 46 | ret0, _ := ret[0].(*rds.DescribeDBInstancesOutput) 47 | ret1, _ := ret[1].(error) 48 | return ret0, ret1 49 | } 50 | 51 | // DescribeDBInstances indicates an expected call of DescribeDBInstances. 52 | func (mr *DatabaseMockRecorder) DescribeDBInstances(arg0, arg1 interface{}, arg2 ...interface{}) *gomock.Call { 53 | mr.mock.ctrl.T.Helper() 54 | varargs := append([]interface{}{arg0, arg1}, arg2...) 55 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeDBInstances", reflect.TypeOf((*Database)(nil).DescribeDBInstances), varargs...) 56 | } 57 | -------------------------------------------------------------------------------- /internal/mocks/instance.go: -------------------------------------------------------------------------------- 1 | // Code generated by MockGen. DO NOT EDIT. 2 | // Source: github.com/zalando/rds-health/internal/instance (interfaces: Provider) 3 | 4 | // Package mocks is a generated GoMock package. 5 | package mocks 6 | 7 | import ( 8 | context "context" 9 | reflect "reflect" 10 | 11 | ec2 "github.com/aws/aws-sdk-go-v2/service/ec2" 12 | gomock "go.uber.org/mock/gomock" 13 | ) 14 | 15 | // Instance is a mock of Provider interface. 16 | type Instance struct { 17 | ctrl *gomock.Controller 18 | recorder *InstanceMockRecorder 19 | } 20 | 21 | // InstanceMockRecorder is the mock recorder for Instance. 22 | type InstanceMockRecorder struct { 23 | mock *Instance 24 | } 25 | 26 | // NewInstance creates a new mock instance. 27 | func NewInstance(ctrl *gomock.Controller) *Instance { 28 | mock := &Instance{ctrl: ctrl} 29 | mock.recorder = &InstanceMockRecorder{mock} 30 | return mock 31 | } 32 | 33 | // EXPECT returns an object that allows the caller to indicate expected use. 34 | func (m *Instance) EXPECT() *InstanceMockRecorder { 35 | return m.recorder 36 | } 37 | 38 | // DescribeInstanceTypes mocks base method. 39 | func (m *Instance) DescribeInstanceTypes(arg0 context.Context, arg1 *ec2.DescribeInstanceTypesInput, arg2 ...func(*ec2.Options)) (*ec2.DescribeInstanceTypesOutput, error) { 40 | m.ctrl.T.Helper() 41 | varargs := []interface{}{arg0, arg1} 42 | for _, a := range arg2 { 43 | varargs = append(varargs, a) 44 | } 45 | ret := m.ctrl.Call(m, "DescribeInstanceTypes", varargs...) 46 | ret0, _ := ret[0].(*ec2.DescribeInstanceTypesOutput) 47 | ret1, _ := ret[1].(error) 48 | return ret0, ret1 49 | } 50 | 51 | // DescribeInstanceTypes indicates an expected call of DescribeInstanceTypes. 52 | func (mr *InstanceMockRecorder) DescribeInstanceTypes(arg0, arg1 interface{}, arg2 ...interface{}) *gomock.Call { 53 | mr.mock.ctrl.T.Helper() 54 | varargs := append([]interface{}{arg0, arg1}, arg2...) 55 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeInstanceTypes", reflect.TypeOf((*Instance)(nil).DescribeInstanceTypes), varargs...) 56 | } 57 | -------------------------------------------------------------------------------- /internal/show/compact/compact.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package compact 10 | 11 | import ( 12 | "bytes" 13 | "fmt" 14 | 15 | "github.com/zalando/rds-health/internal/show" 16 | "github.com/zalando/rds-health/internal/types" 17 | ) 18 | 19 | // 20 | // Compact rendering, 2 lines per entity. 21 | // 22 | 23 | var ( 24 | showMinMax = show.FromShow[types.MinMax]( 25 | func(mm types.MinMax) ([]byte, error) { 26 | text := fmt.Sprintf("min: %4.2f\tavg: %4.2f\tmax: %4.2f", mm.Min, mm.Avg, mm.Max) 27 | return []byte(text), nil 28 | }, 29 | ) 30 | 31 | showHealthRule = show.FromShow[types.Status]( 32 | func(status types.Status) ([]byte, error) { 33 | if status.Code > types.STATUS_CODE_SUCCESS { 34 | rate := 100.0 - *status.SuccessRate 35 | soft, _ := showMinMax.Show(*status.SoftMM) 36 | 37 | b := &bytes.Buffer{} 38 | b.WriteString(fmt.Sprintf("%s %6.2f%% ¦ %s: %s\n", status.Code, rate, status.Rule.ID, status.Rule.About)) 39 | b.WriteString(fmt.Sprintf("\t%6s ¦ %s\n\n", status.Rule.Unit, string(soft))) 40 | return b.Bytes(), nil 41 | } 42 | 43 | return nil, nil 44 | }, 45 | ) 46 | 47 | showHealthNode = show.FromShow[types.StatusNode]( 48 | func(node types.StatusNode) ([]byte, error) { 49 | status := show.StatusText(node.Status) 50 | text := fmt.Sprintf("%s %s (%s, %s)\n\n", status, node.Node.Name, node.Node.Engine, node.Node.Type) 51 | return []byte(text), nil 52 | }, 53 | ) 54 | 55 | // FAIL example-database-a 56 | // FAILED 99.9% ¦ C01: cpu utilization 57 | // % ¦ min: 17.5 avg: 25.0 max: 80.0 58 | // 59 | ShowHealthNode = show.Printer2[types.StatusNode, types.StatusNode, []types.Status]{ 60 | A: showHealthNode, 61 | B: show.Seq[types.Status]{T: showHealthRule}, 62 | UnApply2: func(sn types.StatusNode) (types.StatusNode, []types.Status) { 63 | return sn, sn.Checks 64 | }, 65 | } 66 | ) 67 | -------------------------------------------------------------------------------- /internal/instance/instance.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package instance 10 | 11 | import ( 12 | "context" 13 | "strings" 14 | 15 | "github.com/aws/aws-sdk-go-v2/aws" 16 | "github.com/aws/aws-sdk-go-v2/service/ec2" 17 | ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" 18 | "github.com/zalando/rds-health/internal/types" 19 | ) 20 | 21 | //go:generate mockgen -destination=../mocks/instance.go -package=mocks -mock_names Provider=Instance . Provider 22 | type Provider interface { 23 | DescribeInstanceTypes( 24 | context.Context, 25 | *ec2.DescribeInstanceTypesInput, 26 | ...func(*ec2.Options), 27 | ) (*ec2.DescribeInstanceTypesOutput, error) 28 | } 29 | 30 | type Instance struct { 31 | provider Provider 32 | } 33 | 34 | func New(provider Provider) *Instance { 35 | return &Instance{provider: provider} 36 | } 37 | 38 | // Lookup metadata about the database 39 | func (in *Instance) Lookup(ctx context.Context, dbInstanceType string) (*types.Compute, error) { 40 | dbInstanceType = strings.TrimPrefix(dbInstanceType, "db.") 41 | 42 | spec, err := in.provider.DescribeInstanceTypes(ctx, 43 | &ec2.DescribeInstanceTypesInput{ 44 | InstanceTypes: []ec2types.InstanceType{ 45 | ec2types.InstanceType(dbInstanceType), 46 | }, 47 | }, 48 | ) 49 | if err != nil { 50 | return nil, err 51 | } 52 | 53 | if len(spec.InstanceTypes) != 1 { 54 | return nil, nil 55 | } 56 | 57 | instance := spec.InstanceTypes[0] 58 | 59 | compute := types.Compute{} 60 | 61 | if mem := instance.MemoryInfo; mem != nil { 62 | compute.Memory = &types.Storage{ 63 | Type: "memory", 64 | Size: types.BiB(aws.ToInt64(mem.SizeInMiB)) * types.MiB, 65 | } 66 | } 67 | 68 | if cpu := instance.VCpuInfo; cpu != nil { 69 | compute.CPU = &types.CPU{ 70 | Cores: int(aws.ToInt32(cpu.DefaultVCpus)), 71 | } 72 | } 73 | 74 | if proc := instance.ProcessorInfo; proc != nil && compute.CPU != nil { 75 | compute.CPU.Clock = types.GHz(aws.ToFloat64(proc.SustainedClockSpeedInGhz)) 76 | } 77 | 78 | return &compute, nil 79 | } 80 | -------------------------------------------------------------------------------- /internal/instance/instance_test.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package instance_test 10 | 11 | import ( 12 | "context" 13 | "testing" 14 | 15 | "github.com/aws/aws-sdk-go-v2/aws" 16 | "github.com/aws/aws-sdk-go-v2/service/ec2" 17 | ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" 18 | "github.com/zalando/rds-health/internal/instance" 19 | "github.com/zalando/rds-health/internal/mocks" 20 | "go.uber.org/mock/gomock" 21 | ) 22 | 23 | func TestLookupSuccess(t *testing.T) { 24 | ctrl := gomock.NewController(t) 25 | defer ctrl.Finish() 26 | 27 | fix := &ec2.DescribeInstanceTypesOutput{ 28 | InstanceTypes: []ec2types.InstanceTypeInfo{ 29 | { 30 | MemoryInfo: &ec2types.MemoryInfo{SizeInMiB: aws.Int64(4 * 1024)}, 31 | VCpuInfo: &ec2types.VCpuInfo{DefaultVCpus: aws.Int32(2)}, 32 | ProcessorInfo: &ec2types.ProcessorInfo{SustainedClockSpeedInGhz: aws.Float64(2.20)}, 33 | }, 34 | }, 35 | } 36 | 37 | mock := mocks.NewInstance(ctrl) 38 | mock.EXPECT().DescribeInstanceTypes(gomock.Any(), gomock.Any()).Return(fix, nil) 39 | 40 | sut := instance.New(mock) 41 | 42 | compute, err := sut.Lookup(context.TODO(), "db.t2.small") 43 | switch { 44 | case err != nil: 45 | t.Errorf("should not failed with error %s", err) 46 | case compute == nil: 47 | t.Errorf("should not return nil") 48 | case compute.String() != "2 vcpu 2.20 GHz, mem 4 GiB": 49 | t.Errorf("should not return unexpected value |%s|", compute) 50 | } 51 | } 52 | 53 | func TestLookupNotFound(t *testing.T) { 54 | ctrl := gomock.NewController(t) 55 | defer ctrl.Finish() 56 | 57 | fix := &ec2.DescribeInstanceTypesOutput{ 58 | InstanceTypes: []ec2types.InstanceTypeInfo{}, 59 | } 60 | 61 | mock := mocks.NewInstance(ctrl) 62 | mock.EXPECT().DescribeInstanceTypes(gomock.Any(), gomock.Any()).Return(fix, nil) 63 | 64 | sut := instance.New(mock) 65 | 66 | compute, err := sut.Lookup(context.TODO(), "db.t2.small") 67 | switch { 68 | case err != nil: 69 | t.Errorf("should not failed with error %s", err) 70 | case compute != nil: 71 | t.Errorf("should return nil") 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /internal/cluster/cluster.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package cluster 10 | 11 | import ( 12 | "context" 13 | 14 | "github.com/aws/aws-sdk-go-v2/aws" 15 | "github.com/aws/aws-sdk-go-v2/service/rds" 16 | rdstypes "github.com/aws/aws-sdk-go-v2/service/rds/types" 17 | "github.com/zalando/rds-health/internal/types" 18 | ) 19 | 20 | //go:generate mockgen -destination=../mocks/cluster.go -package=mocks -mock_names Provider=Cluster . Provider 21 | type Provider interface { 22 | DescribeDBClusters( 23 | context.Context, 24 | *rds.DescribeDBClustersInput, 25 | ...func(*rds.Options), 26 | ) (*rds.DescribeDBClustersOutput, error) 27 | } 28 | 29 | type Cluster struct { 30 | provider Provider 31 | } 32 | 33 | func New(provider Provider) *Cluster { 34 | return &Cluster{provider: provider} 35 | } 36 | 37 | func (api Cluster) Lookup(ctx context.Context) ([]types.Cluster, error) { 38 | clusters := make([]types.Cluster, 0) 39 | 40 | var cursor *string 41 | for do := true; do; do = cursor != nil { 42 | bag, err := api.provider.DescribeDBClusters(ctx, 43 | &rds.DescribeDBClustersInput{ 44 | Marker: cursor, 45 | }, 46 | ) 47 | if err != nil { 48 | return nil, err 49 | } 50 | 51 | for _, c := range bag.DBClusters { 52 | clusters = append(clusters, api.toCluster(c)) 53 | } 54 | cursor = bag.Marker 55 | } 56 | 57 | return clusters, nil 58 | } 59 | 60 | func (api Cluster) toCluster(c rdstypes.DBCluster) types.Cluster { 61 | cluster := types.Cluster{ 62 | ID: aws.ToString(c.DBClusterIdentifier), 63 | Reader: make([]types.Node, 0), 64 | Writer: make([]types.Node, 0), 65 | } 66 | 67 | if aws.ToString(c.Engine) != "" { 68 | cluster.Engine = &types.Engine{ 69 | ID: aws.ToString(c.Engine), 70 | Version: aws.ToString(c.EngineVersion), 71 | } 72 | } 73 | 74 | for _, member := range c.DBClusterMembers { 75 | node := types.Node{ 76 | Name: aws.ToString(member.DBInstanceIdentifier), 77 | } 78 | if aws.ToBool(member.IsClusterWriter) { 79 | cluster.Writer = append(cluster.Writer, node) 80 | } else { 81 | cluster.Reader = append(cluster.Reader, node) 82 | } 83 | } 84 | 85 | return cluster 86 | } 87 | -------------------------------------------------------------------------------- /internal/discovery/discovery.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package discovery 10 | 11 | import ( 12 | "context" 13 | "sort" 14 | 15 | "github.com/zalando/rds-health/internal/cache" 16 | "github.com/zalando/rds-health/internal/cluster" 17 | "github.com/zalando/rds-health/internal/database" 18 | "github.com/zalando/rds-health/internal/instance" 19 | "github.com/zalando/rds-health/internal/types" 20 | ) 21 | 22 | // 23 | // Discovery of database instances in the cluster 24 | // 25 | 26 | type Discovery struct { 27 | cluster *cluster.Cluster 28 | database *database.Database 29 | instance *cache.Cache[string, *types.Compute] 30 | } 31 | 32 | func New( 33 | cp cluster.Provider, 34 | dp database.Provider, 35 | ip instance.Provider, 36 | ) *Discovery { 37 | return &Discovery{ 38 | cluster: cluster.New(cp), 39 | database: database.New(dp), 40 | instance: cache.New(instance.New(ip)), 41 | } 42 | } 43 | 44 | func (service *Discovery) LookupAll(ctx context.Context) ([]types.Cluster, []types.Node, error) { 45 | allNodes, err := service.database.LookupAll(ctx) 46 | if err != nil { 47 | return nil, nil, err 48 | } 49 | 50 | mapNodes := make(map[string]types.Node) 51 | for i := 0; i < len(allNodes); i++ { 52 | node := allNodes[i] 53 | node.Compute, _ = service.instance.Lookup(context.Background(), node.Type) 54 | mapNodes[node.Name] = node 55 | } 56 | 57 | clusters, err := service.cluster.Lookup(context.Background()) 58 | if err != nil { 59 | return nil, nil, err 60 | } 61 | 62 | for i := 0; i < len(clusters); i++ { 63 | writers := make([]types.Node, len(clusters[i].Writer)) 64 | for k, w := range clusters[i].Writer { 65 | if node, has := mapNodes[w.Name]; has { 66 | writers[k] = node 67 | delete(mapNodes, w.Name) 68 | } 69 | } 70 | clusters[i].Writer = writers 71 | 72 | readers := make([]types.Node, len(clusters[i].Reader)) 73 | for k, r := range clusters[i].Reader { 74 | if node, has := mapNodes[r.Name]; has { 75 | node.ReadOnly = true 76 | readers[k] = node 77 | delete(mapNodes, r.Name) 78 | } 79 | } 80 | clusters[i].Reader = readers 81 | } 82 | 83 | nodes := make([]types.Node, 0, len(mapNodes)) 84 | for _, node := range mapNodes { 85 | nodes = append(nodes, node) 86 | } 87 | 88 | sort.SliceStable(clusters, func(i, j int) bool { return clusters[i].ID < clusters[j].ID }) 89 | sort.SliceStable(nodes, func(i, j int) bool { return nodes[i].Name < nodes[j].Name }) 90 | return clusters, nodes, nil 91 | } 92 | -------------------------------------------------------------------------------- /internal/types/stats.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package types 10 | 11 | import ( 12 | "encoding/json" 13 | "fmt" 14 | "math" 15 | 16 | "github.com/montanaflynn/stats" 17 | ) 18 | 19 | type Percentile struct { 20 | P50 float64 `json:"p50"` 21 | P95 float64 `json:"p95"` 22 | P99 float64 `json:"p99"` 23 | P999 float64 `json:"p999"` 24 | } 25 | 26 | func (x Percentile) String() string { 27 | return fmt.Sprintf("p50: %-8.2f p95: %-8.2f p99: %-8.2f p999: %-8.2f", 28 | x.P50, x.P95, x.P99, x.P999) 29 | } 30 | 31 | func (x Percentile) MarshalJSON() ([]byte, error) { 32 | return json.Marshal(map[string]any{ 33 | "p50": encodeVal(x.P50), 34 | "p95": encodeVal(x.P95), 35 | "p99": encodeVal(x.P99), 36 | "p999": encodeVal(x.P999), 37 | }) 38 | } 39 | 40 | func NewPercentile(seq []float64) Percentile { 41 | return Percentile{ 42 | P50: maybeNaN(stats.Percentile(seq, 50.0)), 43 | P95: maybeNaN(stats.Percentile(seq, 95.0)), 44 | P99: maybeNaN(stats.Percentile(seq, 99.0)), 45 | P999: maybeNaN(stats.Percentile(seq, 99.9)), 46 | } 47 | } 48 | 49 | type MinMax struct { 50 | Min, Avg, Max float64 51 | } 52 | 53 | func (x MinMax) String() string { 54 | return fmt.Sprintf("[%8.2f, %8.2f, %8.2f]", 55 | x.Min, x.Avg, x.Max) 56 | } 57 | 58 | func (x MinMax) MarshalJSON() ([]byte, error) { 59 | return json.Marshal(map[string]any{ 60 | "min": encodeVal(x.Min), 61 | "avg": encodeVal(x.Avg), 62 | "max": encodeVal(x.Max), 63 | }) 64 | } 65 | 66 | func NewMinMax(min, avg, max []float64) MinMax { 67 | return MinMax{ 68 | Min: maybeNaN(stats.Min(min)), 69 | Avg: maybeNaN(stats.Mean(avg)), 70 | Max: maybeNaN(stats.Max(max)), 71 | } 72 | } 73 | 74 | func NewMinMaxSoft(min, avg, max []float64) MinMax { 75 | return MinMax{ 76 | Min: maybeNaN(stats.Percentile(min, 95.0)), 77 | Avg: maybeNaN(stats.Percentile(avg, 95.0)), 78 | Max: maybeNaN(stats.Percentile(max, 95.0)), 79 | } 80 | } 81 | 82 | // Approximate percentile value for threshold X 83 | func PercentileOf(seq []float64, x float64) float64 { 84 | hi := 100.0 85 | lo := 0.0 86 | md := (lo + hi) / 2 87 | 88 | for lo <= hi { 89 | md = (lo + hi) / 2 90 | p, _ := stats.Percentile(seq, md) 91 | switch { 92 | case p < x: 93 | lo = md + 0.01 94 | case p > x: 95 | hi = md - 0.01 96 | default: 97 | return md 98 | } 99 | } 100 | 101 | return md 102 | } 103 | 104 | func maybeNaN(x float64, _ error) float64 { return x } 105 | 106 | func encodeVal(x float64) any { 107 | if math.IsNaN(x) { 108 | return "NaN" 109 | } 110 | 111 | return x 112 | } 113 | -------------------------------------------------------------------------------- /internal/database/database_test.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package database_test 10 | 11 | import ( 12 | "context" 13 | "testing" 14 | 15 | "github.com/aws/aws-sdk-go-v2/aws" 16 | "github.com/aws/aws-sdk-go-v2/service/rds" 17 | rdstypes "github.com/aws/aws-sdk-go-v2/service/rds/types" 18 | "github.com/zalando/rds-health/internal/database" 19 | "github.com/zalando/rds-health/internal/mocks" 20 | "go.uber.org/mock/gomock" 21 | ) 22 | 23 | func TestLookupAllSuccess(t *testing.T) { 24 | ctrl := gomock.NewController(t) 25 | defer ctrl.Finish() 26 | 27 | fix := &rds.DescribeDBInstancesOutput{ 28 | DBInstances: []rdstypes.DBInstance{ 29 | { 30 | DBInstanceIdentifier: aws.String("test-db"), 31 | DBInstanceClass: aws.String("db.t2.small"), 32 | Engine: aws.String("postgres"), 33 | EngineVersion: aws.String("13.14"), 34 | StorageType: aws.String(""), 35 | AllocatedStorage: aws.Int32(100), 36 | AvailabilityZone: aws.String("eu-central-1a"), 37 | SecondaryAvailabilityZone: nil, 38 | }, 39 | }, 40 | } 41 | 42 | mock := mocks.NewDatabase(ctrl) 43 | mock.EXPECT().DescribeDBInstances(gomock.Any(), gomock.Any()).Return(fix, nil) 44 | 45 | sut := database.New(mock) 46 | 47 | seq, err := sut.LookupAll(context.TODO()) 48 | switch { 49 | case err != nil: 50 | t.Errorf("should not failed with error %s", err) 51 | case len(seq) == 0: 52 | t.Errorf("should return db instances") 53 | case seq[0].String() != "db.t2.small postgres v13.14 (storage 100 GiB)": 54 | t.Errorf("should not return unexpected value |%s|", seq[0]) 55 | } 56 | } 57 | 58 | func TestLookupSuccess(t *testing.T) { 59 | ctrl := gomock.NewController(t) 60 | defer ctrl.Finish() 61 | 62 | fix := &rds.DescribeDBInstancesOutput{ 63 | DBInstances: []rdstypes.DBInstance{ 64 | { 65 | DBInstanceIdentifier: aws.String("test-db"), 66 | DBInstanceClass: aws.String("db.t2.small"), 67 | Engine: aws.String("postgres"), 68 | EngineVersion: aws.String("13.14"), 69 | StorageType: aws.String(""), 70 | AllocatedStorage: aws.Int32(100), 71 | AvailabilityZone: aws.String("eu-central-1a"), 72 | SecondaryAvailabilityZone: nil, 73 | }, 74 | }, 75 | } 76 | 77 | mock := mocks.NewDatabase(ctrl) 78 | mock.EXPECT().DescribeDBInstances(gomock.Any(), gomock.Any()).Return(fix, nil) 79 | 80 | sut := database.New(mock) 81 | 82 | db, err := sut.Lookup(context.TODO(), "test-db") 83 | switch { 84 | case err != nil: 85 | t.Errorf("should not failed with error %s", err) 86 | case db == nil: 87 | t.Errorf("should return db instances") 88 | case db.String() != "db.t2.small postgres v13.14 (storage 100 GiB)": 89 | t.Errorf("should not return unexpected value |%s|", db) 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /cmd/progress.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package cmd 10 | 11 | import ( 12 | "context" 13 | "os" 14 | "time" 15 | 16 | "github.com/aws/aws-sdk-go-v2/aws" 17 | "github.com/schollz/progressbar/v3" 18 | "github.com/zalando/rds-health/internal/service" 19 | "github.com/zalando/rds-health/internal/types" 20 | ) 21 | 22 | // 23 | // 24 | 25 | type silentbar int 26 | 27 | func (silentbar) Describe(string) {} 28 | 29 | func spinner[T any](bar *progressbar.ProgressBar, f func() (T, error)) (T, error) { 30 | ch := make(chan bool) 31 | 32 | go func() { 33 | for { 34 | select { 35 | case <-ch: 36 | return 37 | default: 38 | bar.Add(1) 39 | time.Sleep(40 * time.Millisecond) 40 | } 41 | } 42 | }() 43 | 44 | val, err := f() 45 | 46 | ch <- false 47 | bar.Finish() 48 | 49 | return val, err 50 | } 51 | 52 | // 53 | // 54 | 55 | type Service interface { 56 | CheckHealthRegion(ctx context.Context, interval time.Duration) (*types.StatusRegion, error) 57 | CheckHealthNode(ctx context.Context, name string, interval time.Duration) (*types.StatusNode, error) 58 | ShowRegion(ctx context.Context) (*types.Region, error) 59 | ShowNode(ctx context.Context, name string, interval time.Duration) (*types.StatusNode, error) 60 | } 61 | 62 | type serviceWithSpinner struct { 63 | Service 64 | bar *progressbar.ProgressBar 65 | } 66 | 67 | func newServiceWithSpinner(conf aws.Config) Service { 68 | bar := progressbar.NewOptions(-1, 69 | progressbar.OptionShowBytes(false), 70 | progressbar.OptionClearOnFinish(), 71 | progressbar.OptionSetWriter(os.Stderr), 72 | progressbar.OptionShowDescriptionAtLineEnd(), 73 | progressbar.OptionSpinnerType(11), 74 | ) 75 | 76 | return serviceWithSpinner{ 77 | Service: service.New(conf, bar), 78 | bar: bar, 79 | } 80 | } 81 | 82 | func (s serviceWithSpinner) CheckHealthRegion(ctx context.Context, interval time.Duration) (*types.StatusRegion, error) { 83 | return spinner(s.bar, func() (*types.StatusRegion, error) { 84 | return s.Service.CheckHealthRegion(ctx, interval) 85 | }) 86 | } 87 | 88 | func (s serviceWithSpinner) CheckHealthNode(ctx context.Context, name string, interval time.Duration) (*types.StatusNode, error) { 89 | return spinner(s.bar, func() (*types.StatusNode, error) { 90 | return s.Service.CheckHealthNode(ctx, name, interval) 91 | }) 92 | 93 | } 94 | 95 | func (s serviceWithSpinner) ShowRegion(ctx context.Context) (*types.Region, error) { 96 | return spinner(s.bar, func() (*types.Region, error) { 97 | return s.Service.ShowRegion(ctx) 98 | }) 99 | } 100 | 101 | func (s serviceWithSpinner) ShowNode(ctx context.Context, name string, interval time.Duration) (*types.StatusNode, error) { 102 | return spinner(s.bar, func() (*types.StatusNode, error) { 103 | return s.Service.ShowNode(ctx, name, interval) 104 | }) 105 | } 106 | -------------------------------------------------------------------------------- /internal/database/database.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package database 10 | 11 | import ( 12 | "context" 13 | "fmt" 14 | 15 | "github.com/aws/aws-sdk-go-v2/aws" 16 | "github.com/aws/aws-sdk-go-v2/service/rds" 17 | rdstypes "github.com/aws/aws-sdk-go-v2/service/rds/types" 18 | "github.com/zalando/rds-health/internal/types" 19 | ) 20 | 21 | //go:generate mockgen -destination=../mocks/database.go -package=mocks -mock_names Provider=Database . Provider 22 | type Provider interface { 23 | DescribeDBInstances( 24 | context.Context, 25 | *rds.DescribeDBInstancesInput, 26 | ...func(*rds.Options), 27 | ) (*rds.DescribeDBInstancesOutput, error) 28 | } 29 | 30 | type Database struct { 31 | provider Provider 32 | } 33 | 34 | func New(provider Provider) *Database { 35 | return &Database{provider: provider} 36 | } 37 | 38 | func (db *Database) LookupAll(ctx context.Context) ([]types.Node, error) { 39 | clusters := make([]types.Node, 0) 40 | 41 | var cursor *string 42 | for do := true; do; do = cursor != nil { 43 | bag, err := db.provider.DescribeDBInstances(ctx, 44 | &rds.DescribeDBInstancesInput{ 45 | Marker: cursor, 46 | }, 47 | ) 48 | if err != nil { 49 | return nil, err 50 | } 51 | 52 | for _, c := range bag.DBInstances { 53 | clusters = append(clusters, db.toNode(c)) 54 | } 55 | cursor = bag.Marker 56 | } 57 | 58 | return clusters, nil 59 | } 60 | 61 | // Lookup database ID using human-friendly name 62 | func (db *Database) Lookup(ctx context.Context, name string) (*types.Node, error) { 63 | val, err := db.provider.DescribeDBInstances(ctx, 64 | &rds.DescribeDBInstancesInput{DBInstanceIdentifier: &name}, 65 | ) 66 | if err != nil { 67 | return nil, err 68 | } 69 | 70 | if len(val.DBInstances) == 0 { 71 | return nil, fmt.Errorf("not found: rds %s", name) 72 | } 73 | 74 | node := db.toNode(val.DBInstances[0]) 75 | return &node, nil 76 | } 77 | 78 | func (db *Database) toNode(instance rdstypes.DBInstance) types.Node { 79 | engine := types.Engine{ 80 | ID: aws.ToString(instance.Engine), 81 | Version: aws.ToString(instance.EngineVersion), 82 | } 83 | 84 | storage := types.Storage{ 85 | Type: aws.ToString(instance.StorageType), 86 | Size: types.BiB(aws.ToInt32(instance.AllocatedStorage)) * types.GiB, 87 | } 88 | 89 | az := types.AvailabilityZones{} 90 | if instance.AvailabilityZone != nil { 91 | az = append(az, aws.ToString(instance.AvailabilityZone)) 92 | } 93 | 94 | if instance.SecondaryAvailabilityZone != nil { 95 | az = append(az, aws.ToString(instance.SecondaryAvailabilityZone)) 96 | } 97 | 98 | // instance.DbiResourceId 99 | 100 | node := types.Node{ 101 | ID: aws.ToString(instance.DbiResourceId), 102 | Name: aws.ToString(instance.DBInstanceIdentifier), 103 | Type: aws.ToString(instance.DBInstanceClass), 104 | Zones: az, 105 | Engine: &engine, 106 | Storage: &storage, 107 | } 108 | 109 | return node 110 | } 111 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at opensource@zalando.de. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ -------------------------------------------------------------------------------- /internal/types/types_test.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package types_test 10 | 11 | import ( 12 | "fmt" 13 | "testing" 14 | 15 | "github.com/zalando/rds-health/internal/types" 16 | ) 17 | 18 | func TestBiB(t *testing.T) { 19 | for value, expected := range map[types.BiB]string{ 20 | types.BiB(1): "1 bytes", 21 | types.BiB(5): "5 bytes", 22 | types.KiB: "1 KiB", 23 | 5 * types.KiB: "5 KiB", 24 | types.MiB: "1 MiB", 25 | 5 * types.MiB: "5 MiB", 26 | types.GiB: "1 GiB", 27 | 5 * types.GiB: "5 GiB", 28 | types.TiB: "1 TiB", 29 | 5 * types.TiB: "5 TiB", 30 | } { 31 | check(t, value, expected) 32 | } 33 | } 34 | 35 | func TestGHz(t *testing.T) { 36 | for value, expected := range map[types.GHz]string{ 37 | 1.0: "1.00 GHz", 38 | 1.1: "1.10 GHz", 39 | 1.123: "1.12 GHz", 40 | 1.127: "1.13 GHz", 41 | } { 42 | check(t, value, expected) 43 | } 44 | } 45 | 46 | func TestStorage(t *testing.T) { 47 | for value, expected := range map[types.Storage]string{ 48 | {"memory", 4 * types.GiB}: "mem 4 GiB", 49 | } { 50 | check(t, value, expected) 51 | } 52 | } 53 | 54 | func TestCPU(t *testing.T) { 55 | for value, expected := range map[types.CPU]string{ 56 | {4, types.GHz(2.2)}: "4 vcpu 2.20 GHz", 57 | } { 58 | check(t, value, expected) 59 | } 60 | } 61 | 62 | func TestCompute(t *testing.T) { 63 | for value, expected := range map[types.Compute]string{ 64 | {}: "", 65 | { 66 | CPU: &types.CPU{4, 1.2}, 67 | }: "4 vcpu 1.20 GHz", 68 | { 69 | CPU: &types.CPU{4, 1.2}, 70 | Memory: &types.Storage{"memory", 16 * types.GiB}, 71 | }: "4 vcpu 1.20 GHz, mem 16 GiB", 72 | } { 73 | check(t, value, expected) 74 | } 75 | } 76 | 77 | func TestEngine(t *testing.T) { 78 | for value, expected := range map[types.Engine]string{ 79 | {"aurora", "3.4.5"}: "aurora v3.4.5", 80 | } { 81 | check(t, value, expected) 82 | } 83 | } 84 | 85 | func TestNode(t *testing.T) { 86 | for value, expected := range map[*types.Node]string{ 87 | { 88 | ID: "x", 89 | }: "", 90 | { 91 | Type: "db.m5d.large", 92 | }: "db.m5d.large", 93 | { 94 | Type: "db.m5d.large", 95 | Engine: &types.Engine{"aurora", "3.4.5"}, 96 | }: "db.m5d.large aurora v3.4.5", 97 | { 98 | Type: "db.m5d.large", 99 | Engine: &types.Engine{"aurora", "3.4.5"}, 100 | Compute: &types.Compute{CPU: &types.CPU{4, 1.2}, Memory: &types.Storage{"memory", 16 * types.GiB}}, 101 | }: "db.m5d.large aurora v3.4.5 (4 vcpu 1.20 GHz, mem 16 GiB)", 102 | { 103 | Type: "db.m5d.large", 104 | Engine: &types.Engine{"aurora", "3.4.5"}, 105 | Compute: &types.Compute{CPU: &types.CPU{4, 1.2}, Memory: &types.Storage{"memory", 16 * types.GiB}}, 106 | Storage: &types.Storage{"io1", 100 * types.GiB}, 107 | }: "db.m5d.large aurora v3.4.5 (4 vcpu 1.20 GHz, mem 16 GiB, storage io1 100 GiB)", 108 | } { 109 | check(t, value, expected) 110 | } 111 | 112 | } 113 | 114 | // 115 | // Helper 116 | // 117 | 118 | func check[T interface{ String() string }](t *testing.T, value T, expected string) { 119 | t.Helper() 120 | 121 | s := fmt.Sprintf("%s", value) 122 | if s != expected { 123 | t.Errorf("%s != %s", value, expected) 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /cmd/check.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package cmd 10 | 11 | import ( 12 | "os" 13 | "time" 14 | 15 | "github.com/spf13/cobra" 16 | "github.com/zalando/rds-health/internal/show" 17 | "github.com/zalando/rds-health/internal/show/minimal" 18 | "github.com/zalando/rds-health/internal/show/verbose" 19 | "github.com/zalando/rds-health/internal/types" 20 | ) 21 | 22 | var ( 23 | // checkIgnore string 24 | checkDuration time.Duration 25 | checkStatus types.StatusCode 26 | ) 27 | 28 | func init() { 29 | rootCmd.AddCommand(checkCmd) 30 | // checkCmd.Flags().StringVar(&checkIgnore, "ignore", "", "comma separated list of rules to ignore") 31 | } 32 | 33 | var checkCmd = &cobra.Command{ 34 | Use: "check", 35 | Short: "check health status of database instance using AWS Performance Insights service", 36 | Example: ` 37 | rds-health check -n myrds -t 7d 38 | `, 39 | SilenceUsage: true, 40 | PreRunE: checkOpts, 41 | RunE: WithService(check), 42 | PostRunE: checkPost, 43 | } 44 | 45 | func checkOpts(cmd *cobra.Command, args []string) (err error) { 46 | checkDuration, err = parseInterval() 47 | if err != nil { 48 | return err 49 | } 50 | 51 | return nil 52 | } 53 | 54 | func checkPost(cmd *cobra.Command, args []string) error { 55 | if (rootDatabase == "") && !outVerbose && !outJsonify { 56 | stderr("\n(use \"rds-health check -v\" to see details)\n") 57 | } 58 | 59 | if (rootDatabase == "") && outVerbose && !outJsonify { 60 | stderr("\n(use \"rds-health check -n NAME\" for the status of the instance)\n") 61 | } 62 | 63 | if rootDatabase != "" && !outVerbose && !outJsonify { 64 | stderr("\n(use \"rds-health check -v -n " + rootDatabase + "\" to see full report)\n") 65 | } 66 | 67 | if checkStatus > types.STATUS_CODE_SUCCESS { 68 | os.Exit(128) 69 | } 70 | 71 | return nil 72 | } 73 | 74 | func check(cmd *cobra.Command, args []string, api Service) error { 75 | if rootDatabase == "" { 76 | var out show.Printer[types.StatusRegion] = minimal.ShowHealthRegion 77 | switch { 78 | case outVerbose: 79 | out = minimal.ShowHealthRegionWithRules 80 | case outSilent: 81 | out = show.None[types.StatusRegion]() 82 | case outJsonify: 83 | out = show.JSON[types.StatusRegion]() 84 | } 85 | 86 | return checkRegion(cmd, args, api, out) 87 | } 88 | 89 | var out show.Printer[types.StatusNode] = minimal.ShowHealthNode 90 | switch { 91 | case outVerbose: 92 | out = verbose.ShowHealthNode 93 | case outSilent: 94 | out = show.None[types.StatusNode]() 95 | case outJsonify: 96 | out = show.JSON[types.StatusNode]() 97 | } 98 | 99 | return checkNode(cmd, args, api, out) 100 | } 101 | 102 | func checkRegion(cmd *cobra.Command, _ []string, api Service, show show.Printer[types.StatusRegion]) error { 103 | status, err := api.CheckHealthRegion(cmd.Context(), checkDuration) 104 | if err != nil { 105 | return err 106 | } 107 | 108 | checkStatus = status.Status 109 | return stdout(show.Show(*status)) 110 | } 111 | 112 | func checkNode(cmd *cobra.Command, _ []string, api Service, show show.Printer[types.StatusNode]) error { 113 | status, err := api.CheckHealthNode(cmd.Context(), rootDatabase, checkDuration) 114 | if err != nil { 115 | return err 116 | } 117 | 118 | checkStatus = status.Status 119 | return stdout(show.Show(*status)) 120 | } 121 | -------------------------------------------------------------------------------- /internal/rules/rules.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package rules 10 | 11 | import ( 12 | "context" 13 | "fmt" 14 | "time" 15 | 16 | "github.com/zalando/rds-health/internal/insight" 17 | "github.com/zalando/rds-health/internal/types" 18 | ) 19 | 20 | type Metric string 21 | 22 | func (m Metric) ToMin() []Metric { return []Metric{m + ".min"} } 23 | func (m Metric) ToAvg() []Metric { return []Metric{m + ".avg"} } 24 | func (m Metric) ToMax() []Metric { return []Metric{m + ".max"} } 25 | func (m Metric) ToSum() []Metric { return []Metric{m + ".sum"} } 26 | 27 | func (m Metric) ToAgg(agg Aggregator) []Metric { 28 | return []Metric{m + "." + Metric(agg)} 29 | } 30 | 31 | func (m Metric) ToMinMax() []Metric { 32 | return append(append(m.ToMin(), m.ToAvg()...), m.ToMax()...) 33 | } 34 | 35 | // Aggregator function used by telemetry system 36 | // 37 | // The following statistic aggregators are supported for the metrics: 38 | // 39 | // `.avg` - The average value for the metric over a period of time. 40 | // 41 | // `.min` - The minimum value for the metric over a period of time. 42 | // 43 | // `.max` - The maximum value for the metric over a period of time. 44 | // 45 | // `.sum` - The sum of the metric values over a period of time. 46 | // 47 | // `.sample_count` - The number of times the metric was collected over a period of time. Append to the metric name. 48 | // 49 | // See https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_PerfInsights.API.html 50 | type Aggregator string 51 | 52 | const ( 53 | STATS_SUM = Aggregator("sum") 54 | STATS_AVG = Aggregator("avg") 55 | STATS_MIN = Aggregator("min") 56 | STATS_MAX = Aggregator("max") 57 | ) 58 | 59 | type Eval func(...insight.Samples) types.Status 60 | type Rule func() ([]Metric, Eval) 61 | 62 | type Source interface { 63 | Fetch(context.Context, string, time.Duration, ...string) (map[string]insight.Samples, error) 64 | } 65 | 66 | type Check struct { 67 | source Source 68 | metrics []Metric 69 | related map[Metric][]Metric 70 | should map[Metric]Eval 71 | index []Metric 72 | } 73 | 74 | func New(source Source) *Check { 75 | return &Check{ 76 | source: source, 77 | metrics: []Metric{}, 78 | related: map[Metric][]Metric{}, 79 | should: map[Metric]Eval{}, 80 | index: []Metric{}, 81 | } 82 | } 83 | 84 | func (check *Check) Should(metrics []Metric, eval Eval) *Check { 85 | root := metrics[0] 86 | check.related[root] = metrics 87 | check.metrics = append(check.metrics, metrics...) 88 | check.should[root] = eval 89 | check.index = append(check.index, root) 90 | return check 91 | } 92 | 93 | func (check *Check) Run(ctx context.Context, dbiResourceId string, dur time.Duration) ([]types.Status, error) { 94 | seqToFetch := make([]string, len(check.metrics)) 95 | for i, metric := range check.metrics { 96 | seqToFetch[i] = string(metric) 97 | } 98 | 99 | samples, err := check.source.Fetch(ctx, dbiResourceId, dur, seqToFetch...) 100 | if err != nil { 101 | return nil, fmt.Errorf("%w: failed to fetch samples", err) 102 | } 103 | 104 | status := make([]types.Status, 0) 105 | for _, root := range check.index { 106 | eval := check.should[root] 107 | seqOfSamples := make([]insight.Samples, 0) 108 | for _, related := range check.related[root] { 109 | seqOfSamples = append(seqOfSamples, samples[string(related)]) 110 | } 111 | 112 | status = append(status, eval(seqOfSamples...)) 113 | } 114 | 115 | return status, nil 116 | } 117 | -------------------------------------------------------------------------------- /internal/insight/insight.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package insight 10 | 11 | import ( 12 | "context" 13 | "sync" 14 | "time" 15 | 16 | "github.com/aws/aws-sdk-go-v2/aws" 17 | "github.com/aws/aws-sdk-go-v2/service/pi" 18 | "github.com/aws/aws-sdk-go-v2/service/pi/types" 19 | ) 20 | 21 | //go:generate mockgen -destination=../mocks/insight.go -mock_names Provider=Insight -package=mocks . Provider 22 | type Provider interface { 23 | GetResourceMetrics( 24 | context.Context, 25 | *pi.GetResourceMetricsInput, 26 | ...func(*pi.Options), 27 | ) (*pi.GetResourceMetricsOutput, error) 28 | } 29 | 30 | type Insight struct { 31 | api Provider 32 | service *string 33 | } 34 | 35 | func New(provider Provider) *Insight { 36 | return &Insight{ 37 | api: provider, 38 | service: aws.String("RDS"), 39 | } 40 | } 41 | 42 | func (in *Insight) periodInSeconds(dur time.Duration) int32 { 43 | // Note: Valid values are: 1, 60, 300, 3600, 86400 44 | switch { 45 | case dur <= 10*time.Minute: 46 | return 1 47 | case dur <= 5*time.Hour: 48 | return 60 49 | case dur <= 24*time.Hour: 50 | return 300 51 | default: 52 | return 3600 53 | } 54 | } 55 | 56 | func (in *Insight) Fetch(ctx context.Context, dbiResourceId string, dur time.Duration, metrics ...string) (map[string]Samples, error) { 57 | var chunks [][]string 58 | chunkSize := 15 59 | for i := 0; i < len(metrics); i += chunkSize { 60 | end := i + chunkSize 61 | 62 | if end > len(metrics) { 63 | end = len(metrics) 64 | } 65 | 66 | chunks = append(chunks, metrics[i:end]) 67 | } 68 | 69 | childContext, cancel := context.WithCancel(ctx) 70 | defer cancel() 71 | 72 | samples := map[string]Samples{} 73 | var wg sync.WaitGroup 74 | var err error 75 | var mu sync.Mutex 76 | 77 | for _, chunk := range chunks { 78 | chunk := chunk 79 | wg.Add(1) 80 | 81 | go func() { 82 | defer wg.Done() 83 | 84 | set, e := in.fetch(childContext, dbiResourceId, dur, chunk...) 85 | 86 | mu.Lock() 87 | defer mu.Unlock() 88 | 89 | if e != nil { 90 | if err == nil { 91 | cancel() 92 | err = e 93 | } 94 | return 95 | } 96 | 97 | for k, v := range set { 98 | samples[k] = v 99 | } 100 | }() 101 | } 102 | wg.Wait() 103 | if err != nil { 104 | return nil, err 105 | } 106 | 107 | return samples, nil 108 | } 109 | 110 | func (in *Insight) fetch(ctx context.Context, dbiResourceId string, dur time.Duration, metrics ...string) (map[string]Samples, error) { 111 | query := make([]types.MetricQuery, 0, len(metrics)) 112 | for _, metric := range metrics { 113 | query = append(query, 114 | types.MetricQuery{Metric: aws.String(metric)}, 115 | ) 116 | } 117 | 118 | period := in.periodInSeconds(dur) 119 | req := pi.GetResourceMetricsInput{ 120 | ServiceType: types.ServiceType(*in.service), 121 | Identifier: aws.String(dbiResourceId), 122 | StartTime: aws.Time(time.Now().Add(-dur)), 123 | EndTime: aws.Time(time.Now()), 124 | PeriodInSeconds: aws.Int32(period), 125 | MetricQueries: query, 126 | } 127 | 128 | ret, err := in.api.GetResourceMetrics(ctx, &req) 129 | if err != nil { 130 | return nil, err 131 | } 132 | 133 | series := make(map[string]Samples) 134 | for _, metric := range ret.MetricList { 135 | seq := make(Samples, len(metric.DataPoints)) 136 | for i, v := range metric.DataPoints { 137 | seq[i] = sample(v) 138 | } 139 | series[aws.ToString(metric.Key.Metric)] = seq 140 | } 141 | 142 | return series, nil 143 | } 144 | -------------------------------------------------------------------------------- /internal/types/types.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package types 10 | 11 | import ( 12 | "fmt" 13 | "strings" 14 | ) 15 | 16 | // 17 | // Common domain types used by the application 18 | // 19 | 20 | // Binary Storage Unit 21 | type BiB uint 22 | 23 | const ( 24 | KiB = BiB(1024) 25 | MiB = BiB(1024 * 1024) 26 | GiB = BiB(1024 * 1024 * 1024) 27 | TiB = BiB(1024 * 1024 * 1024 * 1024) 28 | ) 29 | 30 | func (v BiB) String() string { 31 | switch { 32 | case v >= TiB: 33 | return fmt.Sprintf("%d TiB", v/TiB) 34 | case v >= GiB: 35 | return fmt.Sprintf("%d GiB", v/GiB) 36 | case v >= MiB: 37 | return fmt.Sprintf("%d MiB", v/MiB) 38 | case v >= KiB: 39 | return fmt.Sprintf("%d KiB", v/KiB) 40 | default: 41 | return fmt.Sprintf("%d bytes", v) 42 | } 43 | } 44 | 45 | // Frequency data type 46 | type GHz float64 47 | 48 | func (v GHz) String() string { 49 | return fmt.Sprintf("%.2f GHz", v) 50 | } 51 | 52 | // Storage specification 53 | type Storage struct { 54 | Type string `json:"type"` 55 | Size BiB `json:"size"` 56 | } 57 | 58 | func (v Storage) String() string { 59 | if v.Type == "memory" { 60 | return fmt.Sprintf("mem %s", v.Size) 61 | } 62 | 63 | return fmt.Sprintf("storage %s %s", v.Type, v.Size) 64 | } 65 | 66 | // CPU specification 67 | type CPU struct { 68 | Cores int `json:"cores"` 69 | Clock GHz `json:"clock"` 70 | } 71 | 72 | func (v CPU) String() string { 73 | return fmt.Sprintf("%d vcpu %s", v.Cores, v.Clock) 74 | } 75 | 76 | // Compute resource 77 | type Compute struct { 78 | CPU *CPU `json:"cpu,omitempty"` 79 | Memory *Storage `json:"memory,omitempty"` 80 | } 81 | 82 | func (v Compute) String() string { 83 | spec := []string{} 84 | if v.CPU != nil { 85 | spec = append(spec, v.CPU.String()) 86 | } 87 | 88 | if v.Memory != nil { 89 | spec = append(spec, v.Memory.String()) 90 | } 91 | 92 | return strings.Join(spec, ", ") 93 | } 94 | 95 | // Availability Zones Node is deployed to 96 | type AvailabilityZones []string 97 | 98 | func (v AvailabilityZones) String() string { 99 | return strings.Join(v, ", ") 100 | } 101 | 102 | // Database engine specification 103 | type Engine struct { 104 | ID string `json:"id"` 105 | Version string `json:"version"` 106 | } 107 | 108 | func (v Engine) String() string { 109 | return fmt.Sprintf("%s v%s", v.ID, v.Version) 110 | } 111 | 112 | // Cluster Node 113 | type Node struct { 114 | ID string `json:"id"` 115 | Name string `json:"name"` 116 | Type string `json:"type"` 117 | Zones AvailabilityZones `json:"zones"` 118 | Engine *Engine `json:"engine,omitempty"` 119 | Storage *Storage `json:"storage,omitempty"` 120 | Compute *Compute `json:"compute,omitempty"` 121 | ReadOnly bool `json:"readonly"` 122 | } 123 | 124 | func (v Node) String() string { 125 | engine := "" 126 | if v.Engine != nil { 127 | engine = " " + v.Engine.String() 128 | } 129 | 130 | spec := []string{} 131 | if v.Compute != nil { 132 | spec = append(spec, v.Compute.String()) 133 | } 134 | 135 | if v.Storage != nil { 136 | spec = append(spec, v.Storage.String()) 137 | } 138 | 139 | conf := "" 140 | if len(spec) > 0 { 141 | conf = " (" + strings.Join(spec, ", ") + ")" 142 | } 143 | 144 | return v.Type + engine + conf 145 | } 146 | 147 | // DB cluster topology 148 | type Cluster struct { 149 | ID string `json:"id"` 150 | Engine *Engine `json:"engine,omitempty"` 151 | Reader []Node `json:"reader,omitempty"` 152 | Writer []Node `json:"writer,omitempty"` 153 | } 154 | 155 | // Region topology 156 | type Region struct { 157 | Clusters []Cluster 158 | Nodes []Node 159 | } 160 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to `rds-health` 2 | 3 | **Thank you for your interest in `rds-health`. Your contributions are highly welcome.** 4 | 5 | There are multiple ways of getting involved: 6 | 7 | - [Report a bug](#report-a-bug) 8 | - [Suggest a feature](#suggest-a-feature) 9 | - [Contribute code](#contribute-code) 10 | 11 | Below are a few guidelines we would like you to follow. 12 | If you need help, please reach out to us by opening an issue. 13 | 14 | ## Report a bug 15 | Reporting bugs is one of the best ways to contribute. Before creating a bug report, please check that an [issue](../../issues) reporting the same problem does not already exist. If there is such an issue, you may add your information as a comment. 16 | 17 | To report a new bug you should open an issue that summarizes the bug and set the label to "bug". 18 | 19 | If you want to provide a fix along with your bug report: That is great! In this case please send us a pull request as described in section [Contribute Code](#contribute-code). 20 | 21 | ## Suggest a feature 22 | To request a new feature you should open an [issue](../../issues/new) and summarize the desired functionality and its use case. Set the issue label to "feature". 23 | 24 | ## Contribute code 25 | This is an outline of what the workflow for code contributions looks like 26 | 27 | - Check the list of open [issues](../../issues). Either assign an existing issue to yourself, or 28 | create a new one that you would like work on and discuss your ideas and use cases. 29 | 30 | It is always best to discuss your plans beforehand, to ensure that your contribution is in line with our goals. 31 | 32 | - Fork the repository on GitHub 33 | - Create a feature branch from where you want to base your work. This is usually `main` branch. (`git checkout -b my-new-feature`) 34 | - Open a new pull request, label it `work in progress` and outline what you will be contributing 35 | - Make commits of logical units. 36 | - Make sure you sign-off on your commits `git commit -s -m "adding X to change Y"` 37 | - Write good commit messages (see below). 38 | - Push your changes to a topic branch in your fork of the repository. 39 | - As you push your changes, update the pull request with new information and tasks as you complete them 40 | - Project maintainers might comment on your work as you progress 41 | - When you are done, remove the `work in progess` label and ping the maintainers for a review 42 | - Your pull request must receive a :thumbsup: from two [maintainers](MAINTAINERS.md) 43 | 44 | Thanks for your contributions! 45 | 46 | ### Commit messages 47 | The commit message helps us to write a good release note, speed-up review process. The message should address two question what changed and why. The subject line should feature the “what” and the body of the commit should describe the “why”. The project follows the template defined by chapter [Contributing to a Project](http://git-scm.com/book/ch5-2.html) of Git book. 48 | 49 | When creating a pull request, its description should reference the corresponding issue id. 50 | 51 | ### Sign your work / Developer certificate of origin 52 | All contributions (including pull requests) must agree to the Developer Certificate of Origin (DCO) version 1.1. This is exactly the same one created and used by the Linux kernel developers and posted on http://developercertificate.org/. This is a developer's certification that he or she has the right to submit the patch for inclusion into the project. Simply submitting a contribution implies this agreement, however, please include a "Signed-off-by" tag in every patch (this tag is a conventional way to confirm that you agree to the DCO) - you can automate this with a [Git hook](https://stackoverflow.com/questions/15015894/git-add-signed-off-by-line-using-format-signoff-not-working) 53 | 54 | ``` 55 | git commit -s -m "adding X to change Y" 56 | ``` 57 | 58 | ### Building and testing 59 | 60 | The build and testing process requires [Go](https://golang.org) version 1.21 or later. 61 | 62 | ```bash 63 | git clone https://github.com/zalando/rds-health 64 | cd rds-health 65 | go test ./... 66 | staticcheck ./... 67 | ``` 68 | 69 | **Have fun, and happy hacking!** -------------------------------------------------------------------------------- /internal/rules/estimator.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package rules 10 | 11 | import ( 12 | "time" 13 | 14 | "github.com/zalando/rds-health/internal/insight" 15 | "github.com/zalando/rds-health/internal/types" 16 | ) 17 | 18 | // Estimator is container for generic algorithms and utility functions 19 | // required to introduce a new rule. 20 | type estimator struct { 21 | id string // rule id 22 | name Metric // metric name (e.g. db.Transactions.xact_commit) 23 | unit string // metric measurement unit (e.g. iops) 24 | info string // short human readable description about metric 25 | desc string // long human readable description 26 | } 27 | 28 | func (est estimator) samplingInterval(samples insight.Samples) time.Duration { 29 | a := samples[0] 30 | b := samples[1] 31 | return b.T().Sub(a.T()) 32 | } 33 | 34 | // utility function to show metric values 35 | func (est estimator) ShowMinMax() ([]Metric, Eval) { 36 | return est.name.ToMinMax(), func(samples ...insight.Samples) types.Status { 37 | t := est.samplingInterval(samples[0]) 38 | min, avg, max := samples[0].ToSeq(), samples[1].ToSeq(), samples[2].ToSeq() 39 | minmax := types.NewMinMax(min, avg, max) 40 | softminmax := types.NewMinMaxSoft(min, avg, max) 41 | 42 | return types.Status{ 43 | Code: types.STATUS_CODE_UNKNOWN, 44 | Rule: types.Rule{Unit: est.unit, About: est.info}, 45 | Interval: t, 46 | HardMM: &minmax, 47 | SoftMM: &softminmax, 48 | } 49 | } 50 | } 51 | 52 | // utility function to show metric values 53 | func (est estimator) Show(stats Aggregator) ([]Metric, Eval) { 54 | return est.name.ToAgg(stats), func(samples ...insight.Samples) types.Status { 55 | t := est.samplingInterval(samples[0]) 56 | pps := types.NewPercentile(samples[0].ToSeq()) 57 | 58 | return types.Status{ 59 | Code: types.STATUS_CODE_UNKNOWN, 60 | Rule: types.Rule{Unit: est.unit, About: est.info}, 61 | Interval: t, 62 | Aggregator: (*string)(&stats), 63 | Percentile: &pps, 64 | } 65 | } 66 | } 67 | 68 | // utility function to estimate that statistic is below the threshold 69 | func (est estimator) Below(tAvg, tMax float64) ([]Metric, Eval) { 70 | return est.name.ToMinMax(), func(samples ...insight.Samples) types.Status { 71 | t := est.samplingInterval(samples[0]) 72 | min, avg, max := samples[0].ToSeq(), samples[1].ToSeq(), samples[2].ToSeq() 73 | minmax := types.NewMinMaxSoft(min, avg, max) 74 | val := types.PercentileOf(avg, tAvg) 75 | 76 | status := types.STATUS_CODE_SUCCESS 77 | if minmax.Avg > tAvg { 78 | status = types.STATUS_CODE_WARNING 79 | } 80 | if minmax.Avg > tAvg && minmax.Max > tMax { 81 | status = types.STATUS_CODE_FAILURE 82 | } 83 | 84 | return types.Status{ 85 | Code: status, 86 | Rule: types.Rule{ID: est.id, Unit: est.unit, About: est.info}, 87 | Interval: t, 88 | SuccessRate: &val, 89 | SoftMM: &minmax, 90 | } 91 | } 92 | } 93 | 94 | // utility function to estimate that statistic is above the threshold 95 | func (est estimator) Above(tMin, tAvg float64) ([]Metric, Eval) { 96 | return est.name.ToMinMax(), func(samples ...insight.Samples) types.Status { 97 | t := est.samplingInterval(samples[0]) 98 | min, avg, max := samples[0].ToSeq(), samples[1].ToSeq(), samples[2].ToSeq() 99 | minmax := types.NewMinMaxSoft(min, avg, max) 100 | val := 100.0 - types.PercentileOf(avg, tAvg) 101 | 102 | status := types.STATUS_CODE_SUCCESS 103 | if minmax.Avg < tAvg { 104 | status = types.STATUS_CODE_WARNING 105 | } 106 | if minmax.Avg < tAvg && minmax.Min < tMin { 107 | status = types.STATUS_CODE_FAILURE 108 | } 109 | 110 | return types.Status{ 111 | Code: status, 112 | Rule: types.Rule{ID: est.id, Unit: est.unit, About: est.info}, 113 | Interval: t, 114 | SuccessRate: &val, 115 | SoftMM: &minmax, 116 | } 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /internal/rules/calculator.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package rules 10 | 11 | import ( 12 | "time" 13 | 14 | "github.com/zalando/rds-health/internal/insight" 15 | "github.com/zalando/rds-health/internal/types" 16 | ) 17 | 18 | // Calculator is container for generic algorithms and utility functions 19 | // required to introduce a new "calculate-able" rules. 20 | type calculator struct { 21 | id string // rule id 22 | lhm Metric // left hand metric 23 | rhm Metric // right hand metric 24 | fop func(float64, float64) float64 25 | unit string // metric measurement unit (e.g. iops) 26 | info string // short human readable description about metric 27 | desc string // long human readable description 28 | } 29 | 30 | func (cal calculator) samplingInterval(samples insight.Samples) time.Duration { 31 | a := samples[0] 32 | b := samples[1] 33 | return b.T().Sub(a.T()) 34 | } 35 | 36 | func (cal calculator) apply(lhm, rhm []float64) []float64 { 37 | seq := make([]float64, len(lhm)) 38 | for i := 0; i < len(lhm); i++ { 39 | seq[i] = cal.fop(lhm[i], rhm[i]) 40 | } 41 | return seq 42 | } 43 | 44 | // utility function to show metric values 45 | func (cal calculator) ShowMinMax() ([]Metric, Eval) { 46 | return append(cal.lhm.ToMinMax(), cal.rhm.ToMinMax()...), func(samples ...insight.Samples) types.Status { 47 | t := cal.samplingInterval(samples[0]) 48 | 49 | lmin, lavg, lmax := samples[0].ToSeq(), samples[1].ToSeq(), samples[2].ToSeq() 50 | rmin, ravg, rmax := samples[3].ToSeq(), samples[4].ToSeq(), samples[5].ToSeq() 51 | 52 | min := cal.apply(lmin, rmin) 53 | avg := cal.apply(lavg, ravg) 54 | max := cal.apply(lmax, rmax) 55 | 56 | minmax := types.NewMinMax(min, avg, max) 57 | softminmax := types.NewMinMaxSoft(min, avg, max) 58 | 59 | return types.Status{ 60 | Code: types.STATUS_CODE_UNKNOWN, 61 | Rule: types.Rule{Unit: cal.unit, About: cal.info}, 62 | Interval: t, 63 | HardMM: &minmax, 64 | SoftMM: &softminmax, 65 | } 66 | } 67 | } 68 | 69 | // utility function to estimate that statistic is below the threshold 70 | func (cal calculator) Below(tAvg, tMax float64) ([]Metric, Eval) { 71 | return append(cal.lhm.ToMinMax(), cal.rhm.ToMinMax()...), func(samples ...insight.Samples) types.Status { 72 | t := cal.samplingInterval(samples[0]) 73 | lmin, lavg, lmax := samples[0].ToSeq(), samples[1].ToSeq(), samples[2].ToSeq() 74 | rmin, ravg, rmax := samples[3].ToSeq(), samples[4].ToSeq(), samples[5].ToSeq() 75 | 76 | min := cal.apply(lmin, rmin) 77 | avg := cal.apply(lavg, ravg) 78 | max := cal.apply(lmax, rmax) 79 | 80 | minmax := types.NewMinMaxSoft(min, avg, max) 81 | val := types.PercentileOf(avg, tAvg) 82 | 83 | status := types.STATUS_CODE_SUCCESS 84 | if minmax.Avg > tAvg { 85 | status = types.STATUS_CODE_WARNING 86 | } 87 | if minmax.Avg > tAvg && minmax.Max > tMax { 88 | status = types.STATUS_CODE_FAILURE 89 | } 90 | 91 | return types.Status{ 92 | Code: status, 93 | Rule: types.Rule{ID: cal.id, Unit: cal.unit, About: cal.info}, 94 | Interval: t, 95 | SuccessRate: &val, 96 | SoftMM: &minmax, 97 | } 98 | } 99 | } 100 | 101 | // utility function to estimate that statistic is above the threshold 102 | func (cal calculator) Above(tMin, tAvg float64) ([]Metric, Eval) { 103 | return append(cal.lhm.ToMinMax(), cal.rhm.ToMinMax()...), func(samples ...insight.Samples) types.Status { 104 | t := cal.samplingInterval(samples[0]) 105 | lmin, lavg, lmax := samples[0].ToSeq(), samples[1].ToSeq(), samples[2].ToSeq() 106 | rmin, ravg, rmax := samples[3].ToSeq(), samples[4].ToSeq(), samples[5].ToSeq() 107 | 108 | min := cal.apply(lmin, rmin) 109 | avg := cal.apply(lavg, ravg) 110 | max := cal.apply(lmax, rmax) 111 | 112 | minmax := types.NewMinMaxSoft(min, avg, max) 113 | val := 100.0 - types.PercentileOf(avg, tAvg) 114 | 115 | status := types.STATUS_CODE_SUCCESS 116 | if minmax.Avg < tAvg { 117 | status = types.STATUS_CODE_WARNING 118 | } 119 | if minmax.Avg < tAvg && minmax.Min < tMin { 120 | status = types.STATUS_CODE_FAILURE 121 | } 122 | 123 | return types.Status{ 124 | Code: status, 125 | Rule: types.Rule{ID: cal.id, Unit: cal.unit, About: cal.info}, 126 | Interval: t, 127 | SuccessRate: &val, 128 | SoftMM: &minmax, 129 | } 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /internal/discovery/discovery_test.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package discovery_test 10 | 11 | import ( 12 | "context" 13 | "testing" 14 | 15 | "github.com/aws/aws-sdk-go-v2/aws" 16 | "github.com/aws/aws-sdk-go-v2/service/ec2" 17 | ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" 18 | "github.com/aws/aws-sdk-go-v2/service/rds" 19 | rdstypes "github.com/aws/aws-sdk-go-v2/service/rds/types" 20 | "github.com/zalando/rds-health/internal/discovery" 21 | "github.com/zalando/rds-health/internal/mocks" 22 | "go.uber.org/mock/gomock" 23 | ) 24 | 25 | func TestLookupAll(t *testing.T) { 26 | ctrl := gomock.NewController(t) 27 | defer ctrl.Finish() 28 | 29 | // 30 | dbs := &rds.DescribeDBInstancesOutput{ 31 | DBInstances: []rdstypes.DBInstance{ 32 | database("a"), 33 | database("e"), 34 | database("b"), 35 | database("c"), 36 | database("d"), 37 | }, 38 | } 39 | databases := mocks.NewDatabase(ctrl) 40 | databases.EXPECT().DescribeDBInstances(gomock.Any(), gomock.Any()).Return(dbs, nil) 41 | 42 | // 43 | cls := &rds.DescribeDBClustersOutput{ 44 | DBClusters: []rdstypes.DBCluster{ 45 | cluster("B", "b", ""), 46 | cluster("A", "a", "d"), 47 | }, 48 | } 49 | clusters := mocks.NewCluster(ctrl) 50 | clusters.EXPECT().DescribeDBClusters(gomock.Any(), gomock.Any()).Return(cls, nil) 51 | 52 | // 53 | its := &ec2.DescribeInstanceTypesOutput{ 54 | InstanceTypes: []ec2types.InstanceTypeInfo{}, 55 | } 56 | instances := mocks.NewInstance(ctrl) 57 | instances.EXPECT().DescribeInstanceTypes(gomock.Any(), gomock.Any()).Return(its, nil) 58 | 59 | sut := discovery.New(clusters, databases, instances) 60 | 61 | c, n, err := sut.LookupAll(context.Background()) 62 | switch { 63 | case err != nil: 64 | t.Errorf("should not failed with error %s", err) 65 | case len(c) != 2: 66 | t.Errorf("should return clusters") 67 | 68 | case c[0].ID != "A": 69 | t.Errorf("should not return unexpected value of 1st cluster |%s|", c[0].ID) 70 | case len(c[0].Writer) != 1: 71 | t.Errorf("should have writer nodes at 1st cluster") 72 | case c[0].Writer[0].Name != "a": 73 | t.Errorf("unexpected writer node at 1st cluster |%s|", c[0].Writer[0].Name) 74 | case len(c[0].Reader) != 1: 75 | t.Errorf("should have reader nodes at 1st cluster") 76 | case c[0].Reader[0].Name != "d": 77 | t.Errorf("unexpected reader node at 1st cluster |%s|", c[0].Reader[0].Name) 78 | 79 | case c[1].ID != "B": 80 | t.Errorf("should not return unexpected value of 2nd cluster |%s|", c[1].ID) 81 | case len(c[1].Writer) != 1: 82 | t.Errorf("should have writer nodes at 2nd cluster") 83 | case c[1].Writer[0].Name != "b": 84 | t.Errorf("unexpected writer node at 2nd cluster |%s|", c[1].Writer[0].Name) 85 | case len(c[1].Reader) != 0: 86 | t.Errorf("should not have reader nodes at 2nd cluster") 87 | 88 | case len(n) != 2: 89 | t.Errorf("should return databases") 90 | case n[0].Name != "c": 91 | t.Errorf("should not return unexpected value of 1st database |%s|", n[0].Name) 92 | case n[1].Name != "e": 93 | t.Errorf("should not return unexpected value of 2nd database |%s|", n[1].Name) 94 | } 95 | } 96 | 97 | // 98 | // Helper 99 | // 100 | 101 | // mock database 102 | func database(name string) rdstypes.DBInstance { 103 | return rdstypes.DBInstance{ 104 | DBInstanceIdentifier: aws.String(name), 105 | DBInstanceClass: aws.String("db.t2.small"), 106 | Engine: aws.String("postgres"), 107 | EngineVersion: aws.String("13.14"), 108 | StorageType: aws.String("gp2"), 109 | AllocatedStorage: aws.Int32(100), 110 | AvailabilityZone: aws.String("eu-central-1a"), 111 | SecondaryAvailabilityZone: nil, 112 | } 113 | } 114 | 115 | // mock cluster 116 | func cluster(name string, writer string, reader string) rdstypes.DBCluster { 117 | members := []rdstypes.DBClusterMember{} 118 | 119 | if writer != "" { 120 | members = append(members, rdstypes.DBClusterMember{ 121 | DBInstanceIdentifier: aws.String(writer), 122 | IsClusterWriter: aws.Bool(true), 123 | }) 124 | } 125 | 126 | if reader != "" { 127 | members = append(members, rdstypes.DBClusterMember{ 128 | DBInstanceIdentifier: aws.String(reader), 129 | IsClusterWriter: aws.Bool(false), 130 | }) 131 | } 132 | 133 | return rdstypes.DBCluster{ 134 | DBClusterIdentifier: aws.String(name), 135 | Engine: aws.String("postgres"), 136 | EngineVersion: aws.String("13.14"), 137 | DBClusterMembers: members, 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /internal/types/status.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package types 10 | 11 | import ( 12 | "encoding/json" 13 | "fmt" 14 | "strings" 15 | "time" 16 | ) 17 | 18 | // 19 | // 20 | 21 | type Rule struct { 22 | ID string `json:"id,omitempty"` 23 | Unit string `json:"unit,omitempty"` 24 | About string `json:"about,omitempty"` 25 | } 26 | 27 | func (v Rule) String() string { 28 | if v.ID == "" { 29 | return fmt.Sprintf("%-37s", v.About) 30 | } 31 | 32 | return fmt.Sprintf("%s: %-37s", v.ID, v.About) 33 | } 34 | 35 | // 36 | // 37 | 38 | type StatusCode int 39 | 40 | func (v StatusCode) String() string { 41 | switch v { 42 | case STATUS_CODE_UNKNOWN: 43 | return "UNKNOWN" 44 | case STATUS_CODE_SUCCESS: 45 | return "PASSED" 46 | case STATUS_CODE_WARNING: 47 | return "WARNED" 48 | case STATUS_CODE_FAILURE: 49 | return "FAILED" 50 | default: 51 | panic(fmt.Errorf("status code %d unknown", v)) 52 | } 53 | } 54 | 55 | const ( 56 | STATUS_CODE_UNKNOWN StatusCode = iota 57 | STATUS_CODE_SUCCESS 58 | STATUS_CODE_WARNING 59 | STATUS_CODE_FAILURE 60 | ) 61 | 62 | // helper formatter for colored output 63 | func (code StatusCode) sprintf(m string) string { 64 | switch code { 65 | case STATUS_CODE_UNKNOWN: 66 | return "\033[32m" + m + "\033[0m" 67 | case STATUS_CODE_SUCCESS: 68 | return "\033[32m PASSED: " + m + "\033[0m" 69 | case STATUS_CODE_WARNING: 70 | return "\033[33m WARNED: " + m + "\033[0m" 71 | case STATUS_CODE_FAILURE: 72 | return "\033[31m FAILED: " + m + "\033[0m" 73 | default: 74 | return m 75 | } 76 | } 77 | 78 | func (code StatusCode) MarshalJSON() ([]byte, error) { 79 | switch code { 80 | case STATUS_CODE_UNKNOWN: 81 | return json.Marshal("unknown") 82 | case STATUS_CODE_SUCCESS: 83 | return json.Marshal("passed") 84 | case STATUS_CODE_WARNING: 85 | return json.Marshal("warned") 86 | case STATUS_CODE_FAILURE: 87 | return json.Marshal("failed") 88 | default: 89 | return nil, fmt.Errorf("status code %d unknown to JSON codec", code) 90 | } 91 | } 92 | 93 | // 94 | // 95 | 96 | // Status of rule evaluation 97 | type Status struct { 98 | Code StatusCode `json:"status"` 99 | Rule Rule `json:"rule"` 100 | Interval time.Duration `json:"-"` 101 | SuccessRate *float64 `json:"success_rate,omitempty"` 102 | HardMM *MinMax `json:"hard_minmax,omitempty"` 103 | SoftMM *MinMax `json:"soft_minmax,omitempty"` 104 | Aggregator *string `json:"aggregator,omitempty"` 105 | Percentile *Percentile `json:"distribution,omitempty"` 106 | } 107 | 108 | func (v Status) String() string { 109 | // Note: special formatting for percentiles 110 | if v.Percentile != nil && v.Aggregator != nil { 111 | return fmt.Sprintf("%-37s | %s %s %4s %s ", v.Rule.About, v.Interval, *v.Aggregator, v.Rule.Unit, v.Percentile) 112 | } 113 | 114 | seq := make([]string, 0) 115 | 116 | if v.SuccessRate != nil { 117 | seq = append(seq, fmt.Sprintf("%7.3f", *v.SuccessRate)) 118 | } 119 | 120 | seq = append(seq, v.Rule.String()) 121 | 122 | if v.HardMM != nil { 123 | seq = append(seq, fmt.Sprintf("%4s minmax %-32s soft %s on %s", v.Rule.Unit, *v.HardMM, *v.SoftMM, v.Interval)) 124 | } else { 125 | seq = append(seq, fmt.Sprintf("%4s soft %s on %s", v.Rule.Unit, *v.SoftMM, v.Interval)) 126 | } 127 | 128 | return v.Code.sprintf(strings.Join(seq, " | ")) 129 | } 130 | 131 | func (v Status) MarshalJSON() ([]byte, error) { 132 | type Struct Status 133 | 134 | return json.Marshal(struct { 135 | *Struct 136 | IntervalInSec int `json:"interval"` 137 | }{ 138 | Struct: (*Struct)(&v), 139 | IntervalInSec: int(v.Interval.Seconds()), 140 | }) 141 | } 142 | 143 | // 144 | // 145 | 146 | type StatusNode struct { 147 | Status StatusCode `json:"code,omitempty"` 148 | Node *Node `json:"node,omitempty"` 149 | Checks []Status `json:"status,omitempty"` 150 | } 151 | 152 | func (v StatusNode) String() string { 153 | sb := strings.Builder{} 154 | 155 | sb.WriteString(fmt.Sprintf("\033[37m%s ⇒ %s\033[0m\n", v.Node.Name, v.Node)) 156 | 157 | for _, s := range v.Checks { 158 | sb.WriteString(fmt.Sprintf("%s\n", s)) 159 | } 160 | 161 | return sb.String() 162 | } 163 | 164 | type StatusCluster struct { 165 | Status StatusCode 166 | Cluster *Cluster 167 | Writer []StatusNode 168 | Reader []StatusNode 169 | } 170 | 171 | type StatusRegion struct { 172 | Status StatusCode 173 | Clusters []StatusCluster 174 | Nodes []StatusNode 175 | } 176 | 177 | func (v StatusRegion) String() string { 178 | formatter := func(prefix string, status StatusNode) string { 179 | errors := make([]string, 0) 180 | for _, s := range status.Checks { 181 | if s.Code > STATUS_CODE_SUCCESS { 182 | errors = append(errors, s.Rule.ID) 183 | } 184 | } 185 | 186 | if len(errors) != 0 { 187 | return fmt.Sprintf("\033[31m%s %-36s | %-25s | %s | %s\033[0m\n", prefix, status.Node.Name, status.Node.Engine, status.Node.Zones, strings.Join(errors, " ")) 188 | } 189 | 190 | return fmt.Sprintf("%s %-36s | %-25s | %s\n", prefix, status.Node.Name, status.Node.Engine, status.Node.Zones) 191 | } 192 | 193 | sb := strings.Builder{} 194 | for _, c := range v.Clusters { 195 | sb.WriteString(fmt.Sprintf("%s\n", c.Cluster.ID)) 196 | for _, w := range c.Writer { 197 | sb.WriteString(formatter("[w]", w)) 198 | } 199 | for _, r := range c.Reader { 200 | sb.WriteString(formatter("[r]", r)) 201 | } 202 | sb.WriteString("\n") 203 | } 204 | 205 | for _, n := range v.Nodes { 206 | sb.WriteString(formatter("", n)) 207 | } 208 | 209 | return sb.String() 210 | } 211 | -------------------------------------------------------------------------------- /doc/health-rules.md: -------------------------------------------------------------------------------- 1 | # Database health rules 2 | 3 | The command-line utility checks the health of AWS RDS. 4 | The utility uses a rules defined by the following checklist. 5 | 6 | 7 | ## C1: cpu utilization 8 | 9 | **Metric**: os.cpuUtilization.total (%) 10 | 11 | **Condition**: `max cpu util` < 60% and `avg cpu util` < 40% 12 | 13 | We should worrying if value is higher than 40%. Typical database workloads is bound to memory or storage, high CPU is anomaly that requires further investigation. 14 | 15 | ## C2: cpu await for storage 16 | 17 | **Metric**: os.cpuUtilization.wait (%) 18 | 19 | **Condition**: `max cpu await` < 10% and `avg cpu await` < 8% 20 | 21 | Any value above 5%% - 10%% shows suboptimal disk configuration. High value is the indicated of database instance to be bounded by the storage capacity. Highly likely the storage needs to be scaled. 22 | 23 | ## M1: swapped in from disk 24 | 25 | **Metric**: os.swap.in (KB/s) 26 | 27 | **Condition**: `max swap in` < 1KB/s and `avg swap in` < 1KB/s 28 | 29 | Any intensive activities indicates that system is swapping. It is an indication about having low memory. 30 | 31 | ## M2: swapped out to disk 32 | 33 | **Metric**: os.swap.out (KB/s) 34 | 35 | **Condition**: `max swap out` < 1KB/s and `avg swap out` < 1KB/s 36 | 37 | Any intensive activities indicates that system is swapping. It is an indication about having low memory. 38 | 39 | 40 | ## D1: storage read i/o 41 | 42 | **Metric**: os.diskIO.rdsdev.readIOsPS (IOPS) 43 | 44 | **Condition**: `max storage read` < 300 IOPS and `avg storage read` < 100 IOPS 45 | 46 | The number shall be aligned with the storage architecture deployed for the database instance. Each instance has a limit of IOPS it can do. With the GP2 volume type, IOPS are provisioned by volume size, 3 IOPS per GB of storage with a minimum of 100 IOPS. IO volume types has explicit value. 47 | 48 | A very low value shows that the entire dataset is served from memory. In this case, align the storage capacity with the overall database workload so that storage capacity is enough to handle 49 | 50 | ## D2: storage write i/o 51 | 52 | **Metric**: os.diskIO.rdsdev.writeIOsPS (IOPS) 53 | 54 | **Condition**: `max storage write` < 300 IOPS and `avg storage write` < 100 IOPS 55 | 56 | The number shall be aligned with the storage architecture deployed for the database instance. Each instance has a limit of IOPS it can do. With the GP2 volume type, IOPS are provisioned by volume size, 3 IOPS per GB of storage with a minimum of 100 IOPS. IO volume types has explicit value. 57 | 58 | High number shows that the workload is write-mostly and potentially bound to the disk storage. 59 | 60 | ## D3: storage i/o latency 61 | 62 | **Metric**: os.diskIO.rdsdev.await (ms) 63 | 64 | **Condition**: `max storage latency` < 20 ms and `avg storage latency` < 10 ms 65 | 66 | The metric reflect a time used by the storage to fulfill the database queries. High latency on the storage implies a high latency of SQL queries. 67 | 68 | Please be aware that latency above 10ms requires improvement to the storage system. A typically disk latency should be less than 4 - 5 ms. Please validate that application SLOs are not impacted if application latency above 5 ms. 69 | 70 | 71 | ## P1: database cache hit ratio 72 | 73 | **Metric**: db.Cache.blks_hit / (db.Cache.blks_hit + db.IO.blk_read) 74 | 75 | **Condition**: `min db cache hit ratio` > 80 %and `avg db cache hit ratio` > 90 % 76 | 77 | The database does reading and writing of tables data in blocks. Default page size of PostgreSQL is 8192 bytes. Default IO block size in Linux is 4096 bytes. The number of block read by database from the physical storage has to be aligned with storage capacity provisioned to database instance. Database caches these blocks in the memory to optimize the application performance. When clients request data, database checks cached memory and if there are no relevant data there it has to read it from disk, thus queries become slower. 78 | 79 | Any values below 80 % show that database have insufficient amount of shared buffers or physical RAM. Data required for top-called queries don't fit into memory, and database has to read it from disk. 80 | 81 | 82 | ## P2: database blocks read latency 83 | 84 | **Metric**: db.IO.blk_read_time (ms) 85 | 86 | **Condition**: `max db blocks read latency` < 20 ms and `avg db blocks read latency` < 10 ms 87 | 88 | The metric reflect a time used by the database to read blocks from the storage. High latency on the storage implies a high latency of SQL queries. 89 | 90 | Please be aware that latency above 10ms requires validation on the impact of application SLOs and improvement to the storage system. 91 | 92 | 93 | ## P3: database deadlocks 94 | 95 | **Metric**: db.Concurrency.deadlocks (tps) 96 | 97 | **Condition**: `max db deadlocks` == 0 and `avg db deadlocks` == 0 98 | 99 | Number of deadlocks detected in this database. Ideally, it shall be 0 shall be 0. The application schema and I/O logic requires evaluation if number is high. 100 | 101 | 102 | ## P4: database transactions 103 | 104 | **Metric**: db.Transactions.xact_commit (tps) 105 | 106 | **Condition**: `min db tx` > 3 tps and `avg db tx` > 5 tps 107 | 108 | Number of transaction executed by database. The low number indicates that database instance is standby. 109 | 110 | 111 | ## P5: SQL efficiency 112 | 113 | **Metric**: db.SQL.tup_fetched / db.SQL.tup_returned 114 | 115 | **Condition**: `min sql efficiency` > 10 % and `avg sql efficiency` > 20 % 116 | 117 | SQL efficiency shows the percentage of rows fetched by the client vs rows returned from the storage. The metric does not necessarily show any performance issue with databases but high ratio of returned vs fetched rows should trigger the question about optimization of SQL queries, schema or indexes. 118 | 119 | For example, If you do `select count(*) from million_row_table`, one million rows will be returned, but only one row will be fetched. 120 | -------------------------------------------------------------------------------- /internal/show/show.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package show 10 | 11 | import ( 12 | "bytes" 13 | "encoding/json" 14 | "fmt" 15 | 16 | "github.com/zalando/rds-health/internal/types" 17 | ) 18 | 19 | // 20 | // The package defines generic primitives to implement formatted output 21 | // 22 | 23 | // Generic printer that translates type T instance to sequence of bytes 24 | type Printer[T any] interface { 25 | Show(T) ([]byte, error) 26 | } 27 | 28 | // Lifts a printer function to Pinter interface 29 | type FromShow[T any] func(T) ([]byte, error) 30 | 31 | func (f FromShow[T]) Show(x T) ([]byte, error) { return f(x) } 32 | 33 | // Prepend prefix 34 | type Prefix[T any] string 35 | 36 | func (p Prefix[T]) FMap(f Printer[T]) Printer[T] { 37 | return FromShow[T](func(x T) ([]byte, error) { 38 | b := &bytes.Buffer{} 39 | 40 | v, err := f.Show(x) 41 | if err != nil { 42 | return nil, err 43 | } 44 | 45 | if len(v) != 0 { 46 | if _, err := b.Write([]byte(p)); err != nil { 47 | return nil, err 48 | } 49 | 50 | if _, err := b.Write(v); err != nil { 51 | return nil, err 52 | } 53 | } 54 | 55 | return b.Bytes(), nil 56 | }) 57 | } 58 | 59 | // Builds printer for type B from printer of type A and contramap B -> A 60 | type ContraMap[A, B any] struct{ T Printer[A] } 61 | 62 | func (c ContraMap[A, B]) FMap(f func(B) A) Printer[B] { 63 | return FromShow[B](func(a B) ([]byte, error) { 64 | return c.T.Show(f(a)) 65 | }) 66 | } 67 | 68 | // Build a printer for sequence 69 | type Seq[T any] struct{ T Printer[T] } 70 | 71 | func (seq Seq[T]) Show(x []T) ([]byte, error) { 72 | b := &bytes.Buffer{} 73 | 74 | for _, k := range x { 75 | v, err := seq.T.Show(k) 76 | if err != nil { 77 | return nil, err 78 | } 79 | 80 | if len(v) != 0 { 81 | if _, err := b.Write(v); err != nil { 82 | return nil, err 83 | } 84 | } 85 | } 86 | 87 | return b.Bytes(), nil 88 | } 89 | 90 | type UnApply2[T, A, B any] func(T) (A, B) 91 | 92 | // Build printer for product type 93 | type Printer2[T, A, B any] struct { 94 | A Printer[A] 95 | B Printer[B] 96 | UnApply2[T, A, B] 97 | } 98 | 99 | func (p Printer2[T, A, B]) Show(x T) ([]byte, error) { 100 | a, b := p.UnApply2(x) 101 | 102 | c := &bytes.Buffer{} 103 | 104 | va, err := p.A.Show(a) 105 | if err != nil { 106 | return nil, err 107 | } 108 | 109 | if len(va) != 0 { 110 | if _, err := c.Write(va); err != nil { 111 | return nil, err 112 | } 113 | } 114 | 115 | vb, err := p.B.Show(b) 116 | if err != nil { 117 | return nil, err 118 | } 119 | 120 | if len(vb) != 0 { 121 | if _, err := c.Write(vb); err != nil { 122 | return nil, err 123 | } 124 | } 125 | 126 | return c.Bytes(), nil 127 | } 128 | 129 | func Cluster[T, A any](t Printer[T], a Printer[A], f UnApply2[T, []A, []A]) Printer[T] { 130 | showNodes := Printer2[T, []A, []A]{ 131 | A: Seq[A]{T: a}, 132 | B: Seq[A]{T: a}, 133 | UnApply2: f, 134 | } 135 | 136 | return Printer2[T, T, T]{ 137 | A: t, 138 | B: showNodes, 139 | UnApply2: func(x T) (T, T) { return x, x }, 140 | } 141 | } 142 | 143 | func Region[T, A, B any](a Printer[A], b Printer[B], f UnApply2[T, []A, []B]) Printer[T] { 144 | return Printer2[T, []A, []B]{ 145 | A: Prefix[[]A]("").FMap(Seq[A]{T: a}), 146 | B: Prefix[[]B]("\n").FMap(Seq[B]{T: b}), 147 | UnApply2: f, 148 | } 149 | } 150 | 151 | // outputs json 152 | func JSON[T any]() Printer[T] { 153 | return FromShow[T](func(x T) ([]byte, error) { 154 | return json.MarshalIndent(x, "", " ") 155 | }) 156 | } 157 | 158 | // outputs nothing 159 | func None[T any]() Printer[T] { 160 | return FromShow[T](func(x T) ([]byte, error) { 161 | return nil, nil 162 | }) 163 | } 164 | 165 | type SchemaStatusCode struct { 166 | NONE string 167 | PASS string 168 | WARN string 169 | FAIL string 170 | } 171 | 172 | type Schema struct { 173 | StatusCodeIcon SchemaStatusCode 174 | StatusCodeText SchemaStatusCode 175 | Cluster string 176 | } 177 | 178 | func (s Schema) FmtForStatus(c types.StatusCode) string { 179 | switch c { 180 | case types.STATUS_CODE_UNKNOWN: 181 | return s.StatusCodeText.NONE 182 | case types.STATUS_CODE_SUCCESS: 183 | return s.StatusCodeText.PASS 184 | case types.STATUS_CODE_WARNING: 185 | return s.StatusCodeText.WARN 186 | case types.STATUS_CODE_FAILURE: 187 | return s.StatusCodeText.FAIL 188 | default: 189 | return "%s" 190 | } 191 | } 192 | 193 | func StatusText(x types.StatusCode) string { 194 | switch x { 195 | case types.STATUS_CODE_UNKNOWN: 196 | return fmt.Sprintf(SCHEMA.StatusCodeText.NONE, "NONE") 197 | case types.STATUS_CODE_SUCCESS: 198 | return fmt.Sprintf(SCHEMA.StatusCodeText.PASS, "PASS") 199 | case types.STATUS_CODE_WARNING: 200 | return fmt.Sprintf(SCHEMA.StatusCodeText.WARN, "WARN") 201 | case types.STATUS_CODE_FAILURE: 202 | return fmt.Sprintf(SCHEMA.StatusCodeText.FAIL, "FAIL") 203 | default: 204 | return "" 205 | } 206 | } 207 | 208 | func StatusIcon(x types.StatusCode) string { 209 | switch x { 210 | case types.STATUS_CODE_UNKNOWN: 211 | return SCHEMA.StatusCodeIcon.NONE 212 | case types.STATUS_CODE_SUCCESS: 213 | return SCHEMA.StatusCodeIcon.PASS 214 | case types.STATUS_CODE_WARNING: 215 | return SCHEMA.StatusCodeIcon.WARN 216 | case types.STATUS_CODE_FAILURE: 217 | return SCHEMA.StatusCodeIcon.FAIL 218 | default: 219 | return "" 220 | } 221 | } 222 | 223 | var ( 224 | SCHEMA_PLAIN = Schema{ 225 | StatusCodeIcon: SchemaStatusCode{ 226 | NONE: "", 227 | PASS: "", 228 | WARN: "", 229 | FAIL: "", 230 | }, 231 | StatusCodeText: SchemaStatusCode{ 232 | NONE: "%s", 233 | PASS: "%s", 234 | WARN: "%s", 235 | FAIL: "%s", 236 | }, 237 | 238 | Cluster: "%s", 239 | } 240 | 241 | SCHEMA_COLOR = Schema{ 242 | StatusCodeIcon: SchemaStatusCode{ 243 | NONE: "", 244 | PASS: "✅ ", 245 | WARN: "🟧 ", 246 | FAIL: "❌ ", 247 | }, 248 | StatusCodeText: SchemaStatusCode{ 249 | NONE: "%s", 250 | PASS: "\033[32m%s\033[0m", 251 | WARN: "\033[33m%s\033[0m", 252 | FAIL: "\033[31m%s\033[0m", 253 | }, 254 | Cluster: "\u001b[1m%s\u001b[0m", 255 | } 256 | 257 | SCHEMA = SCHEMA_PLAIN 258 | ) 259 | -------------------------------------------------------------------------------- /internal/service/service.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package service 10 | 11 | import ( 12 | "context" 13 | "time" 14 | 15 | "github.com/aws/aws-sdk-go-v2/aws" 16 | "github.com/aws/aws-sdk-go-v2/service/ec2" 17 | "github.com/aws/aws-sdk-go-v2/service/pi" 18 | "github.com/aws/aws-sdk-go-v2/service/rds" 19 | "github.com/zalando/rds-health/internal/database" 20 | "github.com/zalando/rds-health/internal/discovery" 21 | "github.com/zalando/rds-health/internal/insight" 22 | "github.com/zalando/rds-health/internal/instance" 23 | "github.com/zalando/rds-health/internal/rules" 24 | "github.com/zalando/rds-health/internal/types" 25 | ) 26 | 27 | type ProgressBar interface { 28 | Describe(string) 29 | } 30 | 31 | type Service struct { 32 | progress ProgressBar 33 | database *database.Database 34 | instance *instance.Instance 35 | insight *insight.Insight 36 | discovery *discovery.Discovery 37 | } 38 | 39 | func New(conf aws.Config, progress ProgressBar) *Service { 40 | rds := rds.NewFromConfig(conf) 41 | ec2 := ec2.NewFromConfig(conf) 42 | 43 | return &Service{ 44 | progress: progress, 45 | database: database.New(rds), 46 | instance: instance.New(ec2), 47 | insight: insight.New(pi.NewFromConfig(conf)), 48 | discovery: discovery.New(rds, rds, ec2), 49 | } 50 | } 51 | 52 | // 53 | // 54 | 55 | func (service *Service) CheckHealthRegion(ctx context.Context, interval time.Duration) (*types.StatusRegion, error) { 56 | service.progress.Describe("discovering") 57 | 58 | clusters, nodes, err := service.discovery.LookupAll(context.Background()) 59 | if err != nil { 60 | return nil, err 61 | } 62 | 63 | region := types.StatusRegion{ 64 | Status: types.STATUS_CODE_UNKNOWN, 65 | Clusters: make([]types.StatusCluster, len(clusters)), 66 | Nodes: make([]types.StatusNode, len(nodes)), 67 | } 68 | 69 | for c := 0; c < len(clusters); c++ { 70 | cluster := clusters[c] 71 | status := types.StatusCluster{ 72 | Status: types.STATUS_CODE_UNKNOWN, 73 | Cluster: &cluster, 74 | Writer: make([]types.StatusNode, len(cluster.Writer)), 75 | Reader: make([]types.StatusNode, len(cluster.Reader)), 76 | } 77 | 78 | for w := 0; w < len(cluster.Writer); w++ { 79 | v, err := service.checkHealthNode(ctx, cluster.Writer[w], interval) 80 | if err != nil { 81 | return nil, err 82 | } 83 | status.Writer[w] = *v 84 | if status.Status < v.Status { 85 | status.Status = v.Status 86 | } 87 | } 88 | for r := 0; r < len(cluster.Reader); r++ { 89 | v, err := service.checkHealthNode(ctx, cluster.Reader[r], interval) 90 | if err != nil { 91 | return nil, err 92 | } 93 | status.Reader[r] = *v 94 | if status.Status < v.Status { 95 | status.Status = v.Status 96 | } 97 | } 98 | 99 | region.Clusters[c] = status 100 | if region.Status < status.Status { 101 | region.Status = status.Status 102 | } 103 | } 104 | 105 | for n := 0; n < len(nodes); n++ { 106 | v, err := service.checkHealthNode(ctx, nodes[n], interval) 107 | if err != nil { 108 | return nil, err 109 | } 110 | region.Nodes[n] = *v 111 | if region.Status < v.Status { 112 | region.Status = v.Status 113 | } 114 | } 115 | 116 | return ®ion, nil 117 | } 118 | 119 | func (service *Service) CheckHealthNode(ctx context.Context, name string, interval time.Duration) (*types.StatusNode, error) { 120 | service.progress.Describe("discovering " + name) 121 | 122 | node, err := service.database.Lookup(ctx, name) 123 | if err != nil { 124 | return nil, err 125 | } 126 | 127 | node.Compute, _ = service.instance.Lookup(context.Background(), node.Type) 128 | 129 | return service.checkHealthNode(ctx, *node, interval) 130 | } 131 | 132 | func (service *Service) checkHealthNode(ctx context.Context, node types.Node, interval time.Duration) (*types.StatusNode, error) { 133 | service.progress.Describe("checking " + node.Name) 134 | 135 | check := rules.New(service.insight). 136 | Should(rules.OsCpuUtil.Below(40.0, 60.0)). 137 | Should(rules.OsCpuWait.Below(8.0, 10.0)). 138 | Should(rules.OsSwapIn.Below(1.0, 1.0)). 139 | Should(rules.OsSwapOut.Below(1.0, 1.0)). 140 | Should(rules.DbStorageReadIO.Below(100.0, 300.0)). 141 | Should(rules.DbStorageWriteIO.Below(100.0, 300.0)). 142 | Should(rules.DbStorageAwait.Below(10.0, 20.0)). 143 | Should(rules.DbDataBlockCacheHitRatio.Above(80, 90)). 144 | Should(rules.DbDataBlockReadTime.Below(10.0, 20.0)). 145 | Should(rules.DbDeadlocks.Below(0.001, 0.01)). 146 | Should(rules.DbXactCommit.Above(3.0, 5.0)). 147 | Should(rules.SqlEfficiency.Above(10.0, 20.0)) 148 | 149 | status, err := check.Run(ctx, node.ID, interval) 150 | if err != nil { 151 | return nil, err 152 | } 153 | 154 | code := types.STATUS_CODE_UNKNOWN 155 | for _, v := range status { 156 | if v.Code > code { 157 | code = v.Code 158 | } 159 | } 160 | 161 | return &types.StatusNode{ 162 | Status: code, 163 | Node: &node, 164 | Checks: status, 165 | }, nil 166 | } 167 | 168 | // 169 | // 170 | 171 | func (service *Service) ShowRegion(ctx context.Context) (*types.Region, error) { 172 | service.progress.Describe("discovering") 173 | 174 | clusters, nodes, err := service.discovery.LookupAll(context.Background()) 175 | if err != nil { 176 | return nil, err 177 | } 178 | 179 | return &types.Region{ 180 | Clusters: clusters, 181 | Nodes: nodes, 182 | }, nil 183 | } 184 | 185 | // 186 | // 187 | 188 | func (service *Service) ShowNode(ctx context.Context, name string, interval time.Duration) (*types.StatusNode, error) { 189 | service.progress.Describe("checking " + name) 190 | 191 | db, err := service.database.Lookup(context.Background(), name) 192 | if err != nil { 193 | return nil, err 194 | } 195 | 196 | db.Compute, _ = service.instance.Lookup(context.Background(), db.Type) 197 | 198 | node := types.StatusNode{Node: db} 199 | node.Checks, err = rules.New(service.insight). 200 | Should(rules.DbXactCommit.ShowMinMax()). 201 | Should(rules.SqlTuplesFetched.ShowMinMax()). 202 | Should(rules.SqlTuplesReturned.ShowMinMax()). 203 | Should(rules.SqlTuplesInserted.ShowMinMax()). 204 | Should(rules.SqlTuplesUpdated.ShowMinMax()). 205 | Should(rules.SqlTuplesDeleted.ShowMinMax()). 206 | Should(rules.OsCpuUtil.ShowMinMax()). 207 | Should(rules.OsCpuWait.ShowMinMax()). 208 | Should(rules.DbStorageReadIO.ShowMinMax()). 209 | Should(rules.DbStorageWriteIO.ShowMinMax()). 210 | Should(rules.DbDataBlockReadIO.ShowMinMax()). 211 | Should(rules.DbDataBlockCacheHit.ShowMinMax()). 212 | Should(rules.DbBuffersCheckpoints.ShowMinMax()). 213 | Should(rules.DbBuffersCheckpointsTime.ShowMinMax()). 214 | Should(rules.OsMemoryFree.ShowMinMax()). 215 | Should(rules.OsMemoryCached.ShowMinMax()). 216 | Should(rules.OsFileSysUsed.ShowMinMax()). 217 | Run(context.Background(), db.ID, interval) 218 | 219 | if err != nil { 220 | return nil, err 221 | } 222 | 223 | return &node, nil 224 | } 225 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/aws/aws-sdk-go-v2 v1.32.2 h1:AkNLZEyYMLnx/Q/mSKkcMqwNFXMAvFto9bNsHqcTduI= 2 | github.com/aws/aws-sdk-go-v2 v1.32.2/go.mod h1:2SK5n0a2karNTv5tbP1SjsX0uhttou00v/HpXKM1ZUo= 3 | github.com/aws/aws-sdk-go-v2/config v1.27.37 h1:xaoIwzHVuRWRHFI0jhgEdEGc8xE1l91KaeRDsWEIncU= 4 | github.com/aws/aws-sdk-go-v2/config v1.27.37/go.mod h1:S2e3ax9/8KnMSyRVNd3sWTKs+1clJ2f1U6nE0lpvQRg= 5 | github.com/aws/aws-sdk-go-v2/credentials v1.17.35 h1:7QknrZhYySEB1lEXJxGAmuD5sWwys5ZXNr4m5oEz0IE= 6 | github.com/aws/aws-sdk-go-v2/credentials v1.17.35/go.mod h1:8Vy4kk7at4aPSmibr7K+nLTzG6qUQAUO4tW49fzUV4E= 7 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.14 h1:C/d03NAmh8C4BZXhuRNboF/DqhBkBCeDiJDcaqIT5pA= 8 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.14/go.mod h1:7I0Ju7p9mCIdlrfS+JCgqcYD0VXz/N4yozsox+0o078= 9 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.21 h1:UAsR3xA31QGf79WzpG/ixT9FZvQlh5HY1NRqSHBNOCk= 10 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.21/go.mod h1:JNr43NFf5L9YaG3eKTm7HQzls9J+A9YYcGI5Quh1r2Y= 11 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.21 h1:6jZVETqmYCadGFvrYEQfC5fAQmlo80CeL5psbno6r0s= 12 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.21/go.mod h1:1SR0GbLlnN3QUmYaflZNiH1ql+1qrSiB2vwcJ+4UM60= 13 | github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1 h1:VaRN3TlFdd6KxX1x3ILT5ynH6HvKgqdiXoTxAF4HQcQ= 14 | github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1/go.mod h1:FbtygfRFze9usAadmnGJNc8KsP346kEe+y2/oyhGAGc= 15 | github.com/aws/aws-sdk-go-v2/service/ec2 v1.179.0 h1:yCb6SUDqSodc2t8Jqdc35zq9V81a9pyV8SUTBluvA/Q= 16 | github.com/aws/aws-sdk-go-v2/service/ec2 v1.179.0/go.mod h1:W6sNzs5T4VpZn1Vy+FMKw8s24vt5k6zPJXcNOK0asBo= 17 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.5 h1:QFASJGfT8wMXtuP3D5CRmMjARHv9ZmzFUMJznHDOY3w= 18 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.5/go.mod h1:QdZ3OmoIjSX+8D1OPAzPxDfjXASbBMDsz9qvtyIhtik= 19 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.20 h1:Xbwbmk44URTiHNx6PNo0ujDE6ERlsCKJD3u1zfnzAPg= 20 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.20/go.mod h1:oAfOFzUB14ltPZj1rWwRc3d/6OgD76R8KlvU3EqM9Fg= 21 | github.com/aws/aws-sdk-go-v2/service/pi v1.28.1 h1:llnJtqfCCyYcvUJ+hN/1LiTvSEXR1NNkxWRk24TENKI= 22 | github.com/aws/aws-sdk-go-v2/service/pi v1.28.1/go.mod h1:uMwOUQk0LAcKX15wJYmzffJkho/zfSYzA2XSZ1lRfjc= 23 | github.com/aws/aws-sdk-go-v2/service/rds v1.85.0 h1:upDtFzeQmH2sk6RBInByUBYnGeR62FiwdnzrO0bAzOw= 24 | github.com/aws/aws-sdk-go-v2/service/rds v1.85.0/go.mod h1:lhiPj6RvoJHWG2STp+k5az55YqGgFLBzkKYdYHgUh9g= 25 | github.com/aws/aws-sdk-go-v2/service/sso v1.23.1 h1:2jrVsMHqdLD1+PA4BA6Nh1eZp0Gsy3mFSB5MxDvcJtU= 26 | github.com/aws/aws-sdk-go-v2/service/sso v1.23.1/go.mod h1:XRlMvmad0ZNL+75C5FYdMvbbLkd6qiqz6foR1nA1PXY= 27 | github.com/aws/aws-sdk-go-v2/service/ssooidc v1.27.1 h1:0L7yGCg3Hb3YQqnSgBTZM5wepougtL1aEccdcdYhHME= 28 | github.com/aws/aws-sdk-go-v2/service/ssooidc v1.27.1/go.mod h1:FnvDM4sfa+isJ3kDXIzAB9GAwVSzFzSy97uZ3IsHo4E= 29 | github.com/aws/aws-sdk-go-v2/service/sts v1.31.1 h1:8K0UNOkZiK9Uh3HIF6Bx0rcNCftqGCeKmOaR7Gp5BSo= 30 | github.com/aws/aws-sdk-go-v2/service/sts v1.31.1/go.mod h1:yMWe0F+XG0DkRZK5ODZhG7BEFYhLXi2dqGsv6tX0cgI= 31 | github.com/aws/smithy-go v1.22.0 h1:uunKnWlcoL3zO7q+gG2Pk53joueEOsnNB28QdMsmiMM= 32 | github.com/aws/smithy-go v1.22.0/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= 33 | github.com/chengxilo/virtualterm v1.0.4 h1:Z6IpERbRVlfB8WkOmtbHiDbBANU7cimRIof7mk9/PwM= 34 | github.com/chengxilo/virtualterm v1.0.4/go.mod h1:DyxxBZz/x1iqJjFxTFcr6/x+jSpqN0iwWCOK1q10rlY= 35 | github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= 36 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 37 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 38 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 39 | github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= 40 | github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= 41 | github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= 42 | github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= 43 | github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= 44 | github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= 45 | github.com/lynn9388/supsub v0.0.0-20210304091550-458423b0e16a h1:LR5m8mfIAR1hp8GSkiWISYlxqcEa6eVWyWdqeC6OJic= 46 | github.com/lynn9388/supsub v0.0.0-20210304091550-458423b0e16a/go.mod h1:GNY2ynzkWq/wErpdsMxCjp9twbNGKOFcHnuduKsYD6k= 47 | github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= 48 | github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= 49 | github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= 50 | github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= 51 | github.com/montanaflynn/stats v0.7.1 h1:etflOAAHORrCC44V+aR6Ftzort912ZU+YLiSTuV8eaE= 52 | github.com/montanaflynn/stats v0.7.1/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow= 53 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 54 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 55 | github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= 56 | github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= 57 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= 58 | github.com/schollz/progressbar/v3 v3.16.0 h1:+MbBim/cE9DqDb8UXRfLJ6RZdyDkXG1BDy/sWc5s0Mc= 59 | github.com/schollz/progressbar/v3 v3.16.0/go.mod h1:lLiKjKJ9/yzc9Q8jk+sVLfxWxgXKsktvUf6TO+4Y2nw= 60 | github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= 61 | github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= 62 | github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= 63 | github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= 64 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 65 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= 66 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 67 | go.uber.org/mock v0.4.0 h1:VcM4ZOtdbR4f6VXfiOpwpVJDL6lCReaZ6mw31wqh7KU= 68 | go.uber.org/mock v0.4.0/go.mod h1:a6FSlNadKUHUa9IP5Vyt1zh4fC7uAwxMutEAscFbkZc= 69 | golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= 70 | golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 71 | golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM= 72 | golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8= 73 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 74 | gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= 75 | gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 76 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 77 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 78 | -------------------------------------------------------------------------------- /internal/show/verbose/verbose.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package verbose 10 | 11 | import ( 12 | "bytes" 13 | "fmt" 14 | "strings" 15 | 16 | "github.com/zalando/rds-health/internal/show" 17 | "github.com/zalando/rds-health/internal/show/minimal" 18 | "github.com/zalando/rds-health/internal/types" 19 | ) 20 | 21 | // 22 | // Show config information about Nodes, Clusters, Regions 23 | // 24 | 25 | var ( 26 | // Show detailed information about node: 27 | // 28 | // example-database-a 29 | // Engine ¦ postgres v11.19 30 | // Instance ¦ db.m5.large 31 | // CPU ¦ 2x 3.10 GHz 32 | // Memory ¦ 8 GiB 33 | // Storage ¦ 100 GiB, gp2 34 | // Zones ¦ eu-central-1b 35 | showConfigNode = show.FromShow[types.Node]( 36 | func(node types.Node) ([]byte, error) { 37 | cpu := "-" 38 | mem := "-" 39 | if node.Compute != nil { 40 | cpu = node.Compute.CPU.String() 41 | mem = node.Compute.Memory.String() 42 | } 43 | 44 | ro := "" 45 | if node.ReadOnly { 46 | ro = " (read-only)" 47 | } 48 | 49 | b := &bytes.Buffer{} 50 | b.WriteString(fmt.Sprintf("\n%s%s\n", node.Name, ro)) 51 | b.WriteString(fmt.Sprintf("\t%9s ¦ %s\n", "Engine", node.Engine)) 52 | b.WriteString(fmt.Sprintf("\t%9s ¦ %s\n", "Instance", node.Type)) 53 | b.WriteString(fmt.Sprintf("\t%9s ¦ %s\n", "CPU", cpu)) 54 | b.WriteString(fmt.Sprintf("\t%9s ¦ %s\n", "Memory", mem)) 55 | b.WriteString(fmt.Sprintf("\t%9s ¦ %s\n", "Storage", node.Storage)) 56 | b.WriteString(fmt.Sprintf("\t%9s ¦ %s\n", "Zones", strings.Join(node.Zones, ", "))) 57 | return b.Bytes(), nil 58 | }, 59 | ) 60 | 61 | // Show cluster information as one liner 62 | // example-cluster 63 | // Engine ¦ postgres v11.19 64 | // Writers ¦ example-node-a, example-node-b 65 | // Readers ¦ example-node-c, example-node-d 66 | showConfigCluster = show.FromShow[types.Cluster]( 67 | func(c types.Cluster) ([]byte, error) { 68 | w := make([]string, len(c.Writer)) 69 | for i, x := range c.Writer { 70 | w[i] = x.Name 71 | } 72 | 73 | r := make([]string, len(c.Reader)) 74 | for i, x := range c.Reader { 75 | r[i] = x.Name 76 | } 77 | 78 | b := &bytes.Buffer{} 79 | b.WriteString(fmt.Sprintf("\n"+show.SCHEMA.Cluster+"\n", c.ID)) 80 | b.WriteString(fmt.Sprintf("\t%9s ¦ %s\n", "Engine", c.Engine)) 81 | b.WriteString(fmt.Sprintf("\t%9s ¦ %s\n", "Writers", strings.Join(w, ", "))) 82 | b.WriteString(fmt.Sprintf("\t%9s ¦ %s\n", "Readers", strings.Join(r, ", "))) 83 | return b.Bytes(), nil 84 | }, 85 | ) 86 | 87 | // Show cluster and its nodes config 88 | ShowConfigCluster = show.Cluster( 89 | showConfigCluster, 90 | showConfigNode, 91 | func(c types.Cluster) ([]types.Node, []types.Node) { return c.Writer, c.Reader }, 92 | ) 93 | 94 | ShowConfigRegion = show.Region[types.Region]( 95 | ShowConfigCluster, 96 | showConfigNode, 97 | func(sr types.Region) ([]types.Cluster, []types.Node) { return sr.Clusters, sr.Nodes }, 98 | ) 99 | ) 100 | 101 | // 102 | // Show health status about Nodes, Clusters, Regions 103 | // 104 | 105 | var ( 106 | // show MinMax measurement as one liner 107 | showMinMax = show.FromShow[types.MinMax]( 108 | func(mm types.MinMax) ([]byte, error) { 109 | text := fmt.Sprintf("min: %6.2f\tavg: %6.2f\tmax: %6.2f", mm.Min, mm.Avg, mm.Max) 110 | return []byte(text), nil 111 | }, 112 | ) 113 | 114 | // Show the status of single check 115 | // FAILED 5.55% 0.56 11.53 44.80 D3: storage i/o latency 116 | showHealthRule = show.FromShow[types.Status]( 117 | func(status types.Status) ([]byte, error) { 118 | b := &bytes.Buffer{} 119 | 120 | rate := *status.SuccessRate 121 | if status.Code > types.STATUS_CODE_SUCCESS { 122 | rate = 100.0 - *status.SuccessRate 123 | } 124 | 125 | ffs := show.SCHEMA.FmtForStatus(status.Code) 126 | b.WriteString(fmt.Sprintf(ffs+" "+ffs+" %4s %14.2f %14.2f %14.2f\t %s: %s\n", status.Code, fmt.Sprintf("%6.2f%%", rate), status.Rule.Unit, status.SoftMM.Min, status.SoftMM.Avg, status.SoftMM.Max, status.Rule.ID, status.Rule.About)) 127 | return b.Bytes(), nil 128 | }, 129 | ) 130 | 131 | // Show node health status as one line 132 | showHealthNode = show.FromShow[types.StatusNode]( 133 | func(node types.StatusNode) ([]byte, error) { 134 | status := show.StatusText(node.Status) 135 | 136 | cpu := "-" 137 | mem := "-" 138 | if node.Node.Compute != nil { 139 | cpu = node.Node.Compute.CPU.String() 140 | mem = node.Node.Compute.Memory.String() 141 | } 142 | 143 | ro := "" 144 | if node.Node.ReadOnly { 145 | ro = " (read-only)" 146 | } 147 | 148 | b := &bytes.Buffer{} 149 | b.WriteString(fmt.Sprintf("%s %s%s\n", status, node.Node.Name, ro)) 150 | b.WriteString(fmt.Sprintf("%14s ¦ %s\n", "Engine", node.Node.Engine)) 151 | b.WriteString(fmt.Sprintf("%14s ¦ %s\n", "Instance", node.Node.Type)) 152 | b.WriteString(fmt.Sprintf("%14s ¦ %s\n", "CPU", cpu)) 153 | b.WriteString(fmt.Sprintf("%14s ¦ %s\n", "Memory", mem)) 154 | b.WriteString(fmt.Sprintf("%14s ¦ %s\n", "Storage", node.Node.Storage)) 155 | b.WriteString(fmt.Sprintf("%14s ¦ %s\n\n", "Zones", strings.Join(node.Node.Zones, ", "))) 156 | return b.Bytes(), nil 157 | }, 158 | ) 159 | 160 | showHealthNodeWithSymbol = show.Prefix[types.StatusNode]("\n").FMap( 161 | show.Printer2[types.StatusNode, types.StatusCode, types.StatusNode]{ 162 | A: minimal.ShowHealthSymbol, 163 | B: showHealthNode, 164 | UnApply2: func(sn types.StatusNode) (types.StatusCode, types.StatusNode) { 165 | return sn.Status, sn 166 | }, 167 | }, 168 | ) 169 | 170 | // FAIL example-database-a 171 | // FAILED 99.9% ¦ C01: cpu utilization 172 | // % ¦ min: 17.5 avg: 25.0 max: 80.0 173 | // 174 | ShowHealthNode = show.Prefix[types.StatusNode]( 175 | fmt.Sprintf("%6s %7s %4s %14s %14s %14s\t%3s %s\n", "STATUS", "%", "UNIT", "MIN", "AVG", "MAX", "ID", "CHECK"), 176 | ).FMap( 177 | show.Printer2[types.StatusNode, []types.Status, types.StatusNode]{ 178 | A: show.Seq[types.Status]{T: showHealthRule}, 179 | B: showHealthNodeWithSymbol, 180 | UnApply2: func(sn types.StatusNode) ([]types.Status, types.StatusNode) { 181 | return sn.Checks, sn 182 | }, 183 | }, 184 | ) 185 | ) 186 | 187 | // 188 | // Show Values of Rules 189 | // 190 | 191 | var ( 192 | // Show measured values for each rule 193 | showValueRule = show.FromShow[types.Status]( 194 | func(status types.Status) ([]byte, error) { 195 | b := &bytes.Buffer{} 196 | b.WriteString(fmt.Sprintf("\n%s (%s)\n", status.Rule.About, status.Rule.Unit)) 197 | 198 | if status.SoftMM != nil { 199 | soft, _ := showMinMax.Show(*status.SoftMM) 200 | b.WriteString(fmt.Sprintf("%s ¦ %s\n", "soft", string(soft))) 201 | } 202 | 203 | if status.HardMM != nil { 204 | hard, _ := showMinMax.Show(*status.HardMM) 205 | b.WriteString(fmt.Sprintf("%s ¦ %s\n", "hard", string(hard))) 206 | } 207 | 208 | return b.Bytes(), nil 209 | }, 210 | ) 211 | 212 | // Show short information about node 213 | showInfoNode = show.FromShow[types.StatusNode]( 214 | func(node types.StatusNode) ([]byte, error) { 215 | text := fmt.Sprintf("%s (%s, %s)\n", node.Node.Name, node.Node.Type, node.Node.Engine) 216 | return []byte(text), nil 217 | }, 218 | ) 219 | 220 | // Show stats about node 221 | ShowValueNode = show.Printer2[types.StatusNode, types.StatusNode, []types.Status]{ 222 | A: showInfoNode, 223 | B: show.Seq[types.Status]{T: showValueRule}, 224 | UnApply2: func(sn types.StatusNode) (types.StatusNode, []types.Status) { 225 | return sn, sn.Checks 226 | }, 227 | } 228 | ) 229 | -------------------------------------------------------------------------------- /cmd/root.go: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2024 Zalando SE 3 | // 4 | // This file may be modified and distributed under the terms 5 | // of the MIT license. See the LICENSE file for details. 6 | // https://github.com/zalando/rds-health 7 | // 8 | 9 | package cmd 10 | 11 | import ( 12 | "context" 13 | "fmt" 14 | "os" 15 | "strconv" 16 | "strings" 17 | "time" 18 | 19 | "github.com/aws/aws-sdk-go-v2/config" 20 | "github.com/spf13/cobra" 21 | "github.com/zalando/rds-health/internal/service" 22 | "github.com/zalando/rds-health/internal/show" 23 | ) 24 | 25 | // Execute is entry point for cobra cli application 26 | func Execute(vsn string) { 27 | rootCmd.Version = vsn 28 | 29 | if err := rootCmd.Execute(); err != nil { 30 | e := err.Error() 31 | fmt.Println(strings.ToUpper(e[:1]) + e[1:]) 32 | os.Exit(1) 33 | } 34 | } 35 | 36 | var ( 37 | outColored bool 38 | outVerbose bool 39 | outSilent bool 40 | outJsonify bool 41 | rootDatabase string 42 | rootInterval string 43 | ) 44 | 45 | func init() { 46 | rootCmd.PersistentFlags().BoolVarP(&outColored, "color", "C", false, "output colored") 47 | rootCmd.PersistentFlags().BoolVarP(&outVerbose, "verbose", "v", false, "output detailed information") 48 | rootCmd.PersistentFlags().BoolVar(&outSilent, "silent", false, "output nothing") 49 | rootCmd.PersistentFlags().BoolVar(&outJsonify, "json", false, "output raw json") 50 | // 51 | rootCmd.PersistentFlags().StringVarP(&rootDatabase, "database", "n", "", "AWS RDS database name") 52 | rootCmd.PersistentFlags().StringVarP(&rootInterval, "interval", "t", "24h", "time interval either in minutes (m), hours (h), days (d) or week (w)") 53 | 54 | } 55 | 56 | var rootCmd = &cobra.Command{ 57 | Use: "rds-health", 58 | Short: "command line interface to check health of AWS RDS", 59 | Long: ` 60 | The health utility is a command-line utility to check "health" of AWS RDS 61 | instances and clusters using 12 simple rules. The health utility conducts 62 | analysis of using time-series metrics collected by AWS Performance Insights. 63 | 64 | It is essential requirement to enable AWS Performance Insight for 65 | AWS RDS instances before using rds-health. 66 | 67 | This utility is the faster way to check the health status of AWS RDS instance. 68 | The health utility has defined 12 rules to be checked. For each rule, 69 | the utility reports the status (passed, failed), percent of time the rules is 70 | passed, and actual values. In order to reduce number of false positives, 71 | the utility applies softening on raw data to remove outliers. 72 | 73 | rds-health check -t 7d -n my-example-database 74 | 75 | STATUS % MIN AVG MAX ID CHECK 76 | FAILED 32.14% 0.03 13.33 250.61 D3: storage i/o latency 77 | WARNED 100.00% 4.10 4.34 4.69 P4: db transactions (xact_commit) 78 | FAILED 100.00% 1.04 1.06 1.61 P5: sql efficiency 79 | 80 | FAIL my-example-database 81 | 82 | (use "rds-health check -v -n my-example-database" to see full report) 83 | 84 | 85 | Health rules 86 | 87 | C1: CPU utilization (os.cpuUtilization.total) - Typical database workloads is 88 | bound to memory or storage, high CPU is anomaly that requires further 89 | investigation. 90 | 91 | C2: CPU await for storage (os.cpuUtilization.wait) - High value is the indicated 92 | of database instance to be bounded by the storage capacity. Highly likely the 93 | storage needs to be scaled. 94 | 95 | M1: swapped in from disk (os.swap.in) - Any intensive activities indicates that 96 | system is swapping. It is an indication about having low memory. 97 | 98 | M2: swapped out to disk (os.swap.out) - Any intensive activities indicates that 99 | system is swapping. It is an indication about having low memory. 100 | 101 | D1: storage read i/o (os.diskIO.rdsdev.readIOsPS) - A very low value shows that 102 | the entire dataset is served from memory. In this case, align the storage 103 | capacity with the overall database workload so that storage capacity is enough 104 | to handle peak traffic. The number shall be aligned with the storage 105 | architecture deployed for the database instance. 106 | 107 | D2: storage write i/o (os.diskIO.rdsdev.writeIOsPS) - High number shows that 108 | the workload is write-mostly and potentially bound to the disk storage. 109 | 110 | D3: storage i/o latency (os.diskIO.rdsdev.await) - The metric reflect a time 111 | used by the storage to fulfill the database queries. High latency on the storage 112 | implies a high latency of SQL queries. Please be aware that latency above 10ms 113 | requires improvement to the storage system. A typically disk latency should be 114 | less than 4 - 5 ms. Please validate that application SLOs are not impacted if 115 | application latency above 5 ms. 116 | 117 | P1: database cache hit ratio - Any values below 80 percent show that database 118 | have insufficient amount of shared buffers or physical RAM. Data required for 119 | top-called queries don't fit into memory, and database has to read it from disk. 120 | 121 | P2: database blocks read latency (db.IO.blk_read_time) - The metric reflect a 122 | time used by the database to read blocks from the storage. High latency on the 123 | storage implies a high latency of SQL queries. 124 | 125 | P3: database deadlocks (db.Concurrency.deadlocks) - Number of deadlocks detected 126 | in this database. Ideally, it shall be 0 shall be 0. The application schema and 127 | I/O logic requires evaluation if number is high. 128 | 129 | P4: database transactions (db.Transactions.xact_commit) - Number of transaction 130 | executed by database. The low number indicates that database instance is standby. 131 | 132 | P5: SQL efficiency - SQL efficiency shows the percentage of rows fetched by 133 | the client vs rows returned from the storage. The metric does not necessarily 134 | show any performance issue with databases but high ratio of returned vs 135 | fetched rows should trigger the question about optimization of SQL queries, 136 | schema or indexes. 137 | 138 | Usage: 139 | 140 | * checking the health status of individual instances or entire fleet 141 | * plan database capacity and its scalability 142 | * analysis of the database workloads 143 | * debug anomalies 144 | 145 | Examples: 146 | 147 | rds-health check -t 7d 148 | rds-health check -t 7d -n my-example-database 149 | rds-health show -t 7d -n my-example-database 150 | rds-health list 151 | 152 | `, 153 | Run: root, 154 | PersistentPreRun: setup, 155 | } 156 | 157 | func root(cmd *cobra.Command, args []string) { 158 | cmd.Help() 159 | } 160 | 161 | func setup(cmd *cobra.Command, args []string) { 162 | if outColored { 163 | show.SCHEMA = show.SCHEMA_COLOR 164 | } 165 | } 166 | 167 | // 168 | // utils for commands 169 | // 170 | 171 | // decodes human-readable time interval to time.Duration 172 | func parseInterval() (time.Duration, error) { 173 | v, err := strconv.Atoi(rootInterval[0 : len(rootInterval)-1]) 174 | if err != nil { 175 | return 0, err 176 | } 177 | 178 | switch rootInterval[len(rootInterval)-1] { 179 | case 'm': 180 | return time.Duration(v) * time.Minute, nil 181 | case 'h': 182 | return time.Duration(v) * time.Hour, nil 183 | case 'd': 184 | return time.Duration(v) * time.Hour * 24, nil 185 | case 'w': 186 | return time.Duration(v) * time.Hour * 24 * 7, nil 187 | default: 188 | return 0, fmt.Errorf("time scale %s is not supported", rootInterval) 189 | } 190 | } 191 | 192 | // outputs result of printer to stdout 193 | func stdout(data []byte, err error) error { 194 | if err != nil { 195 | return err 196 | } 197 | 198 | if _, err := os.Stdout.Write(data); err != nil { 199 | return err 200 | } 201 | 202 | return nil 203 | } 204 | 205 | // outputs to stderr 206 | func stderr(data string) { 207 | if !outSilent { 208 | os.Stderr.WriteString(data) 209 | } 210 | } 211 | 212 | func WithService( 213 | f func(cmd *cobra.Command, args []string, api Service) error, 214 | ) func(cmd *cobra.Command, args []string) error { 215 | return func(cmd *cobra.Command, args []string) error { 216 | conf, err := config.LoadDefaultConfig(context.Background()) 217 | if err != nil { 218 | return err 219 | } 220 | 221 | var api Service 222 | 223 | switch { 224 | case outSilent: 225 | api = service.New(conf, silentbar(0)) 226 | default: 227 | api = newServiceWithSpinner(conf) 228 | } 229 | 230 | return f(cmd, args, api) 231 | } 232 | } 233 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 |
3 |
discover anomalies, performance issues and optimization within AWS RDS
5 | 6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
28 |