├── .github ├── dependabot.yaml ├── release.yml └── workflows │ ├── manual_release.yaml │ ├── tagpr.yaml │ └── test.yaml ├── .gitignore ├── .goreleaser.yaml ├── .tagpr ├── CHANGELOG.md ├── CREDITS ├── LICENSE ├── README.md ├── alert.go ├── alert_based_sli.go ├── alert_based_sli_test.go ├── alert_test.go ├── app.go ├── app_test.go ├── cmd └── shimesaba │ └── main.go ├── config.go ├── config_test.go ├── definition.go ├── definition_test.go ├── destination.go ├── destination_metric_type.go ├── destination_metric_type_enumer.go ├── go.mod ├── go.sum ├── internal ├── logger │ └── logger.go └── timeutils │ ├── iterator.go │ ├── iterator_test.go │ ├── parse.go │ ├── parse_test.go │ ├── stirng_test.go │ └── string.go ├── mackerel.go ├── mackerel_test.go ├── mock_test.go ├── monitor.go ├── reliability.go ├── reliability_test.go ├── report.go ├── report_test.go └── testdata ├── app_disable_test.yaml ├── app_test.yaml ├── app_uptime_and_failuretime.yaml ├── v1.0.0_destination.yaml └── v1.0.0_simple.yaml /.github/dependabot.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | open-pull-requests-limit: 5 8 | labels: 9 | - "dependencies" 10 | 11 | - package-ecosystem: gomod 12 | directory: "/" 13 | schedule: 14 | interval: weekly 15 | time: "20:00" 16 | open-pull-requests-limit: 5 17 | reviewers: 18 | - "mashiike" 19 | ignore: 20 | - dependency-name: "aws*" 21 | update-types: ["version-update:semver-patch"] 22 | labels: 23 | - "dependencies" 24 | groups: 25 | aws-sdk-go-v2: 26 | patterns: 27 | - "github.com/aws/aws-sdk-go-v2/*" 28 | -------------------------------------------------------------------------------- /.github/release.yml: -------------------------------------------------------------------------------- 1 | changelog: 2 | exclude: 3 | labels: 4 | - tagpr 5 | -------------------------------------------------------------------------------- /.github/workflows/manual_release.yaml: -------------------------------------------------------------------------------- 1 | name: manual_release 2 | on: 3 | workflow_dispatch: 4 | inputs: 5 | tag: 6 | description: "release tag" 7 | required: true 8 | type: string 9 | 10 | jobs: 11 | goreleaser: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout 15 | uses: actions/checkout@v4 16 | with: 17 | ref: ${{ inputs.tag }} 18 | - name: Set up Go 19 | uses: actions/setup-go@v5 20 | with: 21 | go-version: 1.23 22 | - name: Run GoReleaser 23 | uses: goreleaser/goreleaser-action@v6 24 | with: 25 | version: latest 26 | args: release 27 | -------------------------------------------------------------------------------- /.github/workflows/tagpr.yaml: -------------------------------------------------------------------------------- 1 | # .github/workflows/tagpr.yml 2 | name: tagpr 3 | on: 4 | push: 5 | branches: ["main"] 6 | 7 | jobs: 8 | tagpr: 9 | runs-on: ubuntu-latest 10 | env: 11 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 12 | steps: 13 | - uses: actions/checkout@v4 14 | - id: tagpr 15 | uses: Songmu/tagpr@v1 16 | - name: Set up Go 17 | uses: actions/setup-go@v5 18 | with: 19 | go-version: 1.23 20 | - uses: mashiike/action-gocredits@v0 21 | with: 22 | github_token: ${{ secrets.GITHUB_TOKEN }} 23 | if: "steps.tagpr.outputs.tag == ''" 24 | - name: Checkout 25 | uses: actions/checkout@v4 26 | if: ${{ steps.tagpr.outputs.tag != '' }} 27 | with: 28 | ref: ${{ steps.tagpr.outputs.tag }} 29 | - name: Run GoReleaser 30 | uses: goreleaser/goreleaser-action@v6 31 | with: 32 | version: latest 33 | args: release 34 | if: ${{ steps.tagpr.outputs.tag != '' }} 35 | -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | name: Test 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | types: 8 | - opened 9 | - synchronize 10 | - reopened 11 | 12 | jobs: 13 | test: 14 | strategy: 15 | matrix: 16 | go: 17 | - 1.23 18 | name: Build 19 | runs-on: ubuntu-latest 20 | steps: 21 | - name: Set up Go 22 | uses: actions/setup-go@v5 23 | with: 24 | go-version: ${{ matrix.go }} 25 | id: go 26 | 27 | - name: Check out code into the Go module directory 28 | uses: actions/checkout@v4 29 | 30 | - name: Build & Test 31 | run: | 32 | go test -race ./... 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, built with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | # Dependency directories (remove the comment below to include it) 15 | # vendor/ 16 | 17 | .envrc 18 | dist/ 19 | -------------------------------------------------------------------------------- /.goreleaser.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | 3 | before: 4 | hooks: 5 | - go mod download 6 | 7 | builds: 8 | - env: 9 | - CGO_ENABLED=0 10 | main: ./cmd/shimesaba 11 | binary: shimesaba 12 | ldflags: 13 | - -s -w 14 | - -X main.Version={{.Version}} 15 | goos: 16 | - linux 17 | - windows 18 | - darwin 19 | goarch: 20 | - amd64 21 | - arm64 22 | 23 | release: 24 | prerelease: false 25 | 26 | archives: 27 | - files: 28 | - LICENSE 29 | - README.md 30 | - CHANGELOG.md 31 | - CREDITS 32 | 33 | snapshot: 34 | name_template: "{{ .Env.NIGHTLY_VERSION }}" 35 | 36 | changelog: 37 | sort: asc 38 | filters: 39 | exclude: 40 | - "^docs:" 41 | - "^test:" 42 | -------------------------------------------------------------------------------- /.tagpr: -------------------------------------------------------------------------------- 1 | # config file for the tagpr in git config format 2 | # The tagpr generates the initial configuration, which you can rewrite to suit your environment. 3 | # CONFIGURATIONS: 4 | # tagpr.releaseBranch 5 | # Generally, it is "main." It is the branch for releases. The pcpr tracks this branch, 6 | # creates or updates a pull request as a release candidate, or tags when they are merged. 7 | # 8 | # tagpr.versionFile 9 | # Versioning file containing the semantic version needed to be updated at release. 10 | # It will be synchronized with the "git tag". 11 | # Often this is a meta-information file such as gemspec, setup.cfg, package.json, etc. 12 | # Sometimes the source code file, such as version.go or Bar.pm, is used. 13 | # If you do not want to use versioning files but only git tags, specify the "-" string here. 14 | # You can specify multiple version files by comma separated strings. 15 | # 16 | # tagpr.vPrefix 17 | # Flag whether or not v-prefix is added to semver when git tagging. (e.g. v1.2.3 if true) 18 | # This is only a tagging convention, not how it is described in the version file. 19 | # 20 | # tagpr.changelog (Optional) 21 | # Flag whether or not changelog is added or changed during the release. 22 | # 23 | # tagpr.command (Optional) 24 | # Command to change files just before release. 25 | # 26 | # tagpr.tmplate (Optional) 27 | # Pull request template in go template format 28 | [tagpr] 29 | vPrefix = true 30 | releaseBranch = main 31 | versionFile = - 32 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [v1.4.4](https://github.com/mashiike/shimesaba/compare/v1.4.3...v1.4.4) - 2025-01-28 4 | - build(deps): bump github.com/urfave/cli/v2 from 2.27.1 to 2.27.2 by @dependabot in https://github.com/mashiike/shimesaba/pull/147 5 | - build(deps): bump github.com/mackerelio/mackerel-client-go from 0.31.0 to 0.34.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/156 6 | - build(deps): bump goreleaser/goreleaser-action from 5 to 6 by @dependabot in https://github.com/mashiike/shimesaba/pull/154 7 | - build(deps): bump github.com/fatih/color from 1.16.0 to 1.17.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/149 8 | - build(deps): bump github.com/hashicorp/go-version from 1.6.0 to 1.7.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/151 9 | - update go 1.23 and handlename/ssmwrap/v2 by @mashiike in https://github.com/mashiike/shimesaba/pull/157 10 | - build(deps): bump github.com/aws/aws-lambda-go from 1.46.0 to 1.47.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/146 11 | 12 | ## [v1.4.3](https://github.com/mashiike/shimesaba/compare/v1.4.2...v1.4.3) - 2024-04-08 13 | - change config for dependabot by @mashiike in https://github.com/mashiike/shimesaba/pull/141 14 | - build(deps): bump github.com/stretchr/testify from 1.8.4 to 1.9.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/140 15 | - Add action-gocredits. by @mashiike in https://github.com/mashiike/shimesaba/pull/145 16 | - build(deps): bump golang.org/x/sync from 0.6.0 to 0.7.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/143 17 | - build(deps): bump github.com/mackerelio/mackerel-client-go from 0.30.0 to 0.31.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/144 18 | 19 | ## [v1.4.2](https://github.com/mashiike/shimesaba/compare/v1.4.1...v1.4.2) - 2024-04-01 20 | - build(deps): bump actions/checkout from 3 to 4 by @dependabot in https://github.com/mashiike/shimesaba/pull/128 21 | - Ignore if it is an unknown monitor type error. by @mashiike in https://github.com/mashiike/shimesaba/pull/138 22 | - build(deps): bump github.com/urfave/cli/v2 from 2.25.7 to 2.27.1 by @dependabot in https://github.com/mashiike/shimesaba/pull/134 23 | - build(deps): bump actions/setup-go from 4 to 5 by @dependabot in https://github.com/mashiike/shimesaba/pull/129 24 | - build(deps): bump goreleaser/goreleaser-action from 1 to 5 by @dependabot in https://github.com/mashiike/shimesaba/pull/127 25 | - build(deps): bump github.com/fatih/color from 1.15.0 to 1.16.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/132 26 | - build(deps): bump github.com/shogo82148/go-retry from 1.1.1 to 1.2.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/133 27 | 28 | ## [v1.4.1](https://github.com/mashiike/shimesaba/compare/v1.4.0...v1.4.1) - 2024-02-02 29 | - build(deps): bump github.com/mackerelio/mackerel-client-go from 0.23.0 to 0.24.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/91 30 | - build(deps): bump github.com/urfave/cli/v2 from 2.23.7 to 2.25.1 by @dependabot in https://github.com/mashiike/shimesaba/pull/105 31 | - build(deps): bump actions/setup-go from 3 to 4 by @dependabot in https://github.com/mashiike/shimesaba/pull/103 32 | - build(deps): bump github.com/aws/aws-lambda-go from 1.36.1 to 1.39.1 by @dependabot in https://github.com/mashiike/shimesaba/pull/104 33 | - build(deps): bump github.com/fatih/color from 1.13.0 to 1.15.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/102 34 | - build(deps): bump github.com/fujiwara/logutils from 1.1.0 to 1.1.2 by @dependabot in https://github.com/mashiike/shimesaba/pull/101 35 | - build(deps): bump github.com/mackerelio/mackerel-client-go from 0.24.0 to 0.25.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/110 36 | - build(deps): bump github.com/aws/aws-lambda-go from 1.39.1 to 1.40.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/108 37 | - build(deps): bump golang.org/x/sync from 0.0.0-20220722155255-886fb9371eb4 to 0.2.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/112 38 | - build(deps): bump github.com/aws/aws-lambda-go from 1.40.0 to 1.41.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/113 39 | - build(deps): bump github.com/mackerelio/mackerel-client-go from 0.25.0 to 0.26.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/114 40 | - build(deps): bump github.com/stretchr/testify from 1.8.1 to 1.8.3 by @dependabot in https://github.com/mashiike/shimesaba/pull/115 41 | - build(deps): bump github.com/urfave/cli/v2 from 2.25.1 to 2.25.7 by @dependabot in https://github.com/mashiike/shimesaba/pull/121 42 | - Fix error handling in CreateReports function by @mashiike in https://github.com/mashiike/shimesaba/pull/123 43 | - if check monitor alerts, no GetMonitor API Call. by @mashiike in https://github.com/mashiike/shimesaba/pull/124 44 | - build(deps): bump golang.org/x/sync from 0.2.0 to 0.6.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/125 45 | - build(deps): bump github.com/stretchr/testify from 1.8.3 to 1.8.4 by @dependabot in https://github.com/mashiike/shimesaba/pull/118 46 | 47 | ## [v1.4.0](https://github.com/mashiike/shimesaba/compare/v1.3.0...v1.4.0) - 2023-01-04 48 | - reconfigure dependabot for github-actions by @mashiike in https://github.com/mashiike/shimesaba/pull/85 49 | - Feature/tagpr by @mashiike in https://github.com/mashiike/shimesaba/pull/87 50 | - build(deps): bump github.com/aws/aws-lambda-go from 1.32.1 to 1.36.1 by @dependabot in https://github.com/mashiike/shimesaba/pull/84 51 | - build(deps): bump github.com/stretchr/testify from 1.8.0 to 1.8.1 by @dependabot in https://github.com/mashiike/shimesaba/pull/74 52 | - no actions composite for dependabot by @mashiike in https://github.com/mashiike/shimesaba/pull/89 53 | - build(deps): bump github.com/urfave/cli/v2 from 2.11.0 to 2.23.7 by @dependabot in https://github.com/mashiike/shimesaba/pull/83 54 | - build(deps): bump github.com/handlename/ssmwrap from 1.2.0 to 1.2.1 by @dependabot in https://github.com/mashiike/shimesaba/pull/75 55 | - build(deps): bump github.com/mackerelio/mackerel-client-go from 0.21.1 to 0.23.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/79 56 | 57 | ## [v1.3.0](https://github.com/mashiike/shimesaba/compare/v1.2.1...v1.3.0) - 2022-08-15 58 | - SSMWRAP_NAMES to be able to retrieve the value of the specified SSM Parameter. by @mashiike in https://github.com/mashiike/shimesaba/pull/64 59 | 60 | ## [v1.2.1](https://github.com/mashiike/shimesaba/compare/v1.2.0...v1.2.1) - 2022-07-29 61 | - lower case slo keyword by @mashiike in https://github.com/mashiike/shimesaba/pull/62 62 | 63 | ## [v1.2.0](https://github.com/mashiike/shimesaba/compare/v1.1.1...v1.2.0) - 2022-07-19 64 | - build(deps): bump github.com/stretchr/testify from 1.7.1 to 1.7.2 by @dependabot in https://github.com/mashiike/shimesaba/pull/47 65 | - build(deps): bump github.com/stretchr/testify from 1.7.2 to 1.7.3 by @dependabot in https://github.com/mashiike/shimesaba/pull/48 66 | - build(deps): bump github.com/aws/aws-lambda-go from 1.32.0 to 1.32.1 by @dependabot in https://github.com/mashiike/shimesaba/pull/55 67 | - build(deps): bump github.com/hashicorp/go-version from 1.5.0 to 1.6.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/54 68 | - build(deps): bump github.com/urfave/cli/v2 from 2.8.1 to 2.10.3 by @dependabot in https://github.com/mashiike/shimesaba/pull/51 69 | - build(deps): bump github.com/mackerelio/mackerel-client-go from 0.21.0 to 0.21.1 by @dependabot in https://github.com/mashiike/shimesaba/pull/50 70 | - update go modules by @mashiike in https://github.com/mashiike/shimesaba/pull/58 71 | - In v1.2.0 and later, the default for UpTime and FailureTime is Disabled. by @mashiike in https://github.com/mashiike/shimesaba/pull/57 72 | - Ability to make graph annotations virtual alerts by @mashiike in https://github.com/mashiike/shimesaba/pull/59 73 | - build(deps): bump github.com/urfave/cli/v2 from 2.10.3 to 2.11.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/56 74 | - build(deps): bump github.com/stretchr/testify from 1.7.3 to 1.8.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/53 75 | 76 | ## [v1.1.1](https://github.com/mashiike/shimesaba/compare/v1.1.0...v1.1.1) - 2022-05-30 77 | - update yaml.v3.0.1 by @mashiike in https://github.com/mashiike/shimesaba/pull/46 78 | 79 | ## [v1.1.0](https://github.com/mashiike/shimesaba/compare/v1.0.0...v1.1.0) - 2022-05-24 80 | - Feature/v1.1.0 by @mashiike in https://github.com/mashiike/shimesaba/pull/45 81 | 82 | ## [v1.0.0](https://github.com/mashiike/shimesaba/compare/v0.7.2...v1.0.0) - 2022-04-19 83 | - build(deps): bump github.com/aws/aws-lambda-go from 1.28.0 to 1.29.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/35 84 | - v1.0.0 Release Candidate by @mashiike in https://github.com/mashiike/shimesaba/pull/36 85 | 86 | ## [v0.7.2](https://github.com/mashiike/shimesaba/compare/v0.7.1...v0.7.2) - 2022-02-02 87 | - fix: no default subcommand backfill by @mashiike in https://github.com/mashiike/shimesaba/pull/31 88 | 89 | ## [v0.7.1](https://github.com/mashiike/shimesaba/compare/v0.7.0...v0.7.1) - 2022-02-02 90 | - fix not working global backfill and dry-run by @mashiike in https://github.com/mashiike/shimesaba/pull/30 91 | 92 | ## [v0.7.0](https://github.com/mashiike/shimesaba/compare/v0.6.2...v0.7.0) - 2022-01-31 93 | - Feature/v0.7.0 by @mashiike in https://github.com/mashiike/shimesaba/pull/29 94 | 95 | ## [v0.6.2](https://github.com/mashiike/shimesaba/compare/v0.6.1...v0.6.2) - 2022-01-20 96 | - Fixed a bug that occurs when the time zone of the runtime is not UTC. by @mashiike in https://github.com/mashiike/shimesaba/pull/28 97 | 98 | ## [v0.6.1](https://github.com/mashiike/shimesaba/compare/v0.6.0...v0.6.1) - 2022-01-19 99 | - Feature/v0.6.1 by @mashiike in https://github.com/mashiike/shimesaba/pull/27 100 | 101 | ## [v0.6.0](https://github.com/mashiike/shimesaba/compare/v0.5.0...v0.6.0) - 2022-01-17 102 | - Bump github.com/mashiike/evaluator from 0.3.0 to 0.4.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/16 103 | - Bump github.com/aws/aws-lambda-go from 1.27.0 to 1.27.1 by @dependabot in https://github.com/mashiike/shimesaba/pull/17 104 | - Bump github.com/mackerelio/mackerel-client-go from 0.20.0 to 0.21.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/18 105 | - Bump github.com/google/go-jsonnet from 0.17.0 to 0.18.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/21 106 | - Bump github.com/hashicorp/go-version from 1.3.0 to 1.4.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/23 107 | - Bump github.com/aws/aws-lambda-go from 1.27.1 to 1.28.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/22 108 | - Refactoring the Definition by @mashiike in https://github.com/mashiike/shimesaba/pull/24 109 | - Error budget calculation based on alerts. by @mashiike in https://github.com/mashiike/shimesaba/pull/25 110 | - Extend the way the configuration is written. by @mashiike in https://github.com/mashiike/shimesaba/pull/26 111 | 112 | ## [v0.5.0](https://github.com/mashiike/shimesaba/compare/v0.4.1...v0.5.0) - 2021-11-30 113 | - Bump github.com/urfave/cli/v2 from 2.2.0 to 2.3.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/13 114 | - Bump github.com/mackerelio/mackerel-client-go from 0.19.0 to 0.20.0 by @dependabot in https://github.com/mashiike/shimesaba/pull/14 115 | - Feature/v0.5.0 by @mashiike in https://github.com/mashiike/shimesaba/pull/15 116 | 117 | ## [v0.4.1](https://github.com/mashiike/shimesaba/compare/v0.4.0...v0.4.1) - 2021-11-15 118 | 119 | ## [v0.4.0](https://github.com/mashiike/shimesaba/compare/v0.3.1...v0.4.0) - 2021-11-15 120 | - Feature/v0.4.0 by @mashiike in https://github.com/mashiike/shimesaba/pull/12 121 | 122 | ## [v0.3.1](https://github.com/mashiike/shimesaba/compare/v0.3.0...v0.3.1) - 2021-11-15 123 | - fix ssmwrap_paths by @mashiike in https://github.com/mashiike/shimesaba/pull/11 124 | 125 | ## [v0.3.0](https://github.com/mashiike/shimesaba/compare/v0.2.3...v0.3.0) - 2021-11-14 126 | - Feature/dashboard command by @mashiike in https://github.com/mashiike/shimesaba/pull/10 127 | 128 | ## [v0.2.3](https://github.com/mashiike/shimesaba/compare/v0.2.2...v0.2.3) - 2021-11-11 129 | - use mashiike/evaluator by @mashiike in https://github.com/mashiike/shimesaba/pull/6 130 | - error_budget_consumption_percentage is not percentage by @mashiike in https://github.com/mashiike/shimesaba/pull/8 131 | 132 | ## [v0.2.2](https://github.com/mashiike/shimesaba/compare/v0.2.1...v0.2.2) - 2021-11-05 133 | - if set ssmwrap and failed, version option not work by @mashiike in https://github.com/mashiike/shimesaba/pull/4 134 | - fix Backfill does not work when multiple TimeFrames are mixed by @mashiike in https://github.com/mashiike/shimesaba/pull/5 135 | 136 | ## [v0.2.1](https://github.com/mashiike/shimesaba/compare/v0.2.0...v0.2.1) - 2021-11-01 137 | 138 | ## [v0.2.0](https://github.com/mashiike/shimesaba/compare/v0.1.0...v0.2.0) - 2021-11-01 139 | - add verison flag by @mashiike in https://github.com/mashiike/shimesaba/pull/3 140 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 ikeda-masashi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Latest GitHub release](https://img.shields.io/github/release/mashiike/shimesaba.svg) 2 | ![Github Actions test](https://github.com/mashiike/shimesaba/workflows/Test/badge.svg?branch=main) 3 | [![Go Report Card](https://goreportcard.com/badge/mashiike/shimesaba)](https://goreportcard.com/report/mashiike/shimesaba) [![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/mashiike/shimesaba/blob/master/LICENSE) 4 | # shimesaba 5 | 6 | For SRE to operate and monitor services using Mackerel. 7 | ## Description 8 | 9 | shimesaba is a tool for tracking SLO/ErrorBudget using Mackerel as an SLI measurement service. 10 | 11 | - shimesaba evaluates window-based SLOs with monitoring data on Mackerel. 12 | - Post the calculated values (error budget, failure time for SLO violation, uptime etc) by evaluating SLOs . as Mackerel service metric. 13 | 14 | 15 | ## Install 16 | 17 | ### binary packages 18 | 19 | [Releases](https://github.com/mashiike/shimesaba/releases). 20 | 21 | ### Homebrew tap 22 | 23 | ```console 24 | $ brew install mashiike/tap/shimesaba 25 | ``` 26 | 27 | ## Usage 28 | 29 | ### as CLI command 30 | 31 | ```console 32 | $ shimesaba -config config.yaml -mackerel-apikey 33 | ``` 34 | 35 | ```console 36 | NAME: 37 | shimesaba - A commandline tool for tracking SLO/ErrorBudget using Mackerel as an SLI measurement service. 38 | 39 | USAGE: 40 | shimesaba -config [command options] 41 | 42 | VERSION: 43 | v1.0.0 44 | 45 | COMMANDS: 46 | run run shimesaba. this is main feature (deprecated), use no subcommand 47 | help, h Shows a list of commands or help for one command 48 | 49 | GLOBAL OPTIONS: 50 | --backfill value generate report before n point (default: 3) [$BACKFILL, $SHIMESABA_BACKFILL] 51 | --config value, -c value config file path, can set multiple [$CONFIG, $SHIMESABA_CONFIG] 52 | --debug output debug log (default: false) [$SHIMESABA_DEBUG] 53 | --dry-run report output stdout and not put mackerel (default: false) [$SHIMESABA_DRY_RUN] 54 | --mackerel-apikey value, -k value for access mackerel API (default: *********) [$MACKEREL_APIKEY, $SHIMESABA_MACKEREL_APIKEY] 55 | --help, -h show help (default: false) 56 | --version, -v print the version (default: false) 57 | ``` 58 | 59 | ### as AWS Lambda function 60 | 61 | `shimesaba` binary also runs as AWS Lambda function. 62 | shimesaba implicitly behaves as a run command when run as a bootstrap with a Lambda Function 63 | 64 | CLI options can be specified from environment variables. For example, when `MACKEREL_APIKEY` environment variable is set, the value is set to `-mackerel-apikey` option. 65 | 66 | Example Lambda functions configuration with [github.com/fujiwara/lambroll](https://github.com/fujiwara/lambroll) 67 | 68 | ```json 69 | { 70 | "FunctionName": "shimesaba", 71 | "Environment": { 72 | "Variables": { 73 | "SHIMESABA_CONFIG": "config.yaml", 74 | "MACKEREL_APIKEY": "" 75 | } 76 | }, 77 | "Handler": "shimesaba", 78 | "MemorySize": 128, 79 | "Role": "arn:aws:iam::0123456789012:role/lambda-function", 80 | "Runtime": "provided.al2", 81 | "Timeout": 300 82 | } 83 | ``` 84 | 85 | ### Configuration file 86 | 87 | The following are the settings for the latest v0.7.0. 88 | 89 | YAML format. 90 | 91 | ```yaml 92 | required_version: ">=1.0.0" # which specifies which versions of shimesaba can be used with your configuration. 93 | 94 | # This is a common setting item for error budget calculation. 95 | # It is possible to override the same settings in each SLO definition. 96 | destination: 97 | service_name: prod # - The name of the service to which you want to submit the service metric for error budgeting. 98 | metric_prefix: api # - Specifies the service metric prefix for error budgeting. 99 | rolling_period: 28d # - Specify the size of the rolling window to calculate the error budget. 100 | calculate_interval: 1h # - Settings related to the interval for calculating the error budget. 101 | error_budget_size: 0.1% # - This setting is related to the size of the error budget. 102 | # If % is used, it is a ratio to the size of the rolling window. 103 | # It is also possible to specify a time such as 1h or 40m. 104 | 105 | # Describes the settings for each SLO. SLOs are treated as monitoring rules. 106 | # The definition of each SLO is determined by ORing the monitoring rules that match the conditions specified in `objectives`. 107 | # That is, based on the alerts corresponding to the monitoring rules that match the conditions, the existence of any of the alerts will be judged as SLO violation. 108 | slo: 109 | # In the availability SLO, if an alert occurs for a monitoring rule name that starts with "SLO availability" 110 | # or an external monitoring rule that ends with "api.example.com", it is considered an SLO violation. 111 | - id: availability 112 | alert_based_sli: # This setting uses Mackerel alerts as SLI. 113 | - monitor_name_prefix: "SLO availability" 114 | - monitor_name_suffix: "api.example.com" 115 | monitor_type: "external" 116 | # In the latency SLO, we consider it an SLO violation if an alert occurs for a host metric monitoring rule with a name starting with "SLO availability". 117 | - id: latency 118 | error_budget_size: 200m 119 | alert_based_sli: 120 | - monitor_name_prefix: "SLO latency" 121 | - monitor_type: "host" 122 | try_reassessment: true # This setting attempts to reevaluate an alert using the actual metric only if the type of monitor from which the alert originated is service or host. 123 | ``` 124 | 125 | `slo` takes a list of constituent SLI/SLO definitions. 126 | 6 Mackerel service metrics will be listed per definition. 127 | 128 | For example, if id is `latency` in the above configuration, the following service metric will be posted. 129 | - `api.error_budget.latency`: Current error budget remaining number (unit:minutes) 130 | - `api.error_budget_percentage.latency`: percentage of current error budget remaining. If it exceeds 100%, the error budget is used up. 131 | - `api.error_budget_consumption.latency`: Error budget newly consumed in this calculation window (unit:minutes) 132 | - `api.error_budget_consumption_percentage.latency`: Percentage of newly consumed error budget in this calculation window 133 | - `api.failure_time.latency`: Time of SLO violation within the rolling window time frame (unit:minutes) 134 | - `api.uptime.latency`: Time that can be treated as normal operation within the time frame of the rolling window (unit:minutes) 135 | 136 | ### Manual correction feature 137 | 138 | If you enter `downtime:3m` or similar in the reason for closing an alert, the alert will be calculated as if the SLO had been violated for 3 minutes from the time it was opened. 139 | 140 | The description "3m" can be any time like `1h`, `40m`, `1h50m`, etc. as well as other settings. 141 | When combined with other statements, half-width spaces are required before and after the above keywords. 142 | 143 | ### Environment variable `SSMWRAP_PATHS`, `SSMWRAP_NAMES` 144 | 145 | It incorporates [github.com/handlename/ssmwrap](https://github.com/handlename/ssmwrap) for parameter management. 146 | If you specify the path of the Parameter Store of AWS Systems Manager separated by commas, it will be output to the environment variable. 147 | Useful when used as a Lambda function. 148 | 149 | For example, if you have a secrets named `prod/MACKEREL_APIKEY` in your secrets manager, it is useful to set the following environment variable. 150 | 151 | `SSMWRAP_NAMES=/aws/reference/secretsmanager/prod/MACKEREL_APIKEY` 152 | 153 | ## LICENSE 154 | 155 | MIT 156 | -------------------------------------------------------------------------------- /alert.go: -------------------------------------------------------------------------------- 1 | package shimesaba 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "strings" 7 | "sync" 8 | "time" 9 | 10 | "github.com/Songmu/flextime" 11 | "github.com/mashiike/shimesaba/internal/timeutils" 12 | ) 13 | 14 | type Alert struct { 15 | Monitor *Monitor 16 | HostID string 17 | OpenedAt time.Time 18 | ClosedAt *time.Time 19 | Reason string 20 | 21 | mu sync.Mutex 22 | cache Reliabilities 23 | } 24 | 25 | func NewAlert(monitor *Monitor, openedAt time.Time, closedAt *time.Time) *Alert { 26 | if closedAt != nil { 27 | tmp := closedAt.Truncate(time.Minute).UTC() 28 | closedAt = &tmp 29 | } 30 | return &Alert{ 31 | Monitor: monitor, 32 | OpenedAt: openedAt.Truncate(time.Minute).UTC(), 33 | ClosedAt: closedAt, 34 | } 35 | } 36 | 37 | func NewVirtualAlert(description string, openedAt time.Time, closedAt time.Time) *Alert { 38 | closedAt = closedAt.Truncate(time.Minute).UTC() 39 | return &Alert{ 40 | OpenedAt: openedAt.Truncate(time.Minute).UTC(), 41 | ClosedAt: &closedAt, 42 | Reason: description, 43 | } 44 | } 45 | 46 | func (alert *Alert) WithHostID(hostID string) *Alert { 47 | return &Alert{ 48 | Monitor: alert.Monitor, 49 | OpenedAt: alert.OpenedAt, 50 | ClosedAt: alert.ClosedAt, 51 | HostID: hostID, 52 | Reason: alert.Reason, 53 | } 54 | } 55 | 56 | func (alert *Alert) WithReason(reason string) *Alert { 57 | return &Alert{ 58 | Monitor: alert.Monitor, 59 | OpenedAt: alert.OpenedAt, 60 | ClosedAt: alert.ClosedAt, 61 | HostID: alert.HostID, 62 | Reason: reason, 63 | } 64 | } 65 | 66 | func (alert *Alert) String() string { 67 | monitor := "???" 68 | if alert.Monitor != nil { 69 | monitor = alert.Monitor.ID() + ":" + alert.Monitor.Name() 70 | } 71 | return fmt.Sprintf("alert[%s] %s ~ %s", 72 | monitor, 73 | alert.OpenedAt, 74 | alert.ClosedAt, 75 | ) 76 | } 77 | 78 | func (alert *Alert) IsVirtual() bool { 79 | return alert.Monitor == nil 80 | } 81 | 82 | func (alert *Alert) endAt() time.Time { 83 | if alert.ClosedAt != nil { 84 | return *alert.ClosedAt 85 | } 86 | return flextime.Now().Add(time.Minute) 87 | } 88 | 89 | func (alert *Alert) EvaluateReliabilities(timeFrame time.Duration, enableReassessment bool) (Reliabilities, error) { 90 | log.Printf("[debug] EvaluateReliabilities alert=%s", alert) 91 | alert.mu.Lock() 92 | defer alert.mu.Unlock() 93 | if alert.cache != nil { 94 | log.Printf("[debug] return cache alert=%s", alert) 95 | return alert.cache, nil 96 | } 97 | if enableReassessment { 98 | if reliabilities, ok := alert.Monitor.EvaluateReliabilities( 99 | alert.HostID, 100 | timeFrame, 101 | alert.OpenedAt.Add(-15*time.Minute), 102 | alert.endAt(), 103 | ); ok { 104 | log.Printf("[notice] applying SLO reassessment as an experimental feature for Monitor %s.", alert.Monitor.name) 105 | alert.cache = reliabilities 106 | return reliabilities, nil 107 | } 108 | } 109 | var startAt, endAt time.Time 110 | var isNoViolation IsNoViolationCollection 111 | if correctionTime, ok := alert.CorrectionTime(); ok { 112 | log.Printf("[notice] applying SLO Violation time %s, to %s", correctionTime, alert.Monitor.name) 113 | startAt = alert.OpenedAt 114 | endAt = alert.endAt() 115 | isNoViolation = make(IsNoViolationCollection, correctionTime/time.Minute) 116 | iter := timeutils.NewIterator(startAt, alert.OpenedAt.Add(correctionTime), time.Minute) 117 | for iter.HasNext() { 118 | t, _ := iter.Next() 119 | isNoViolation[t] = false 120 | } 121 | } else { 122 | isNoViolation, startAt, endAt = alert.newIsNoViolation() 123 | } 124 | 125 | reliabilities, err := isNoViolation.NewReliabilities(timeFrame, startAt, endAt) 126 | if err != nil { 127 | return nil, err 128 | } 129 | alert.cache = reliabilities 130 | return reliabilities, nil 131 | } 132 | 133 | const correctionKeyword = "downtime:" 134 | 135 | func (alert *Alert) CorrectionTime() (time.Duration, bool) { 136 | i := strings.Index(alert.Reason, correctionKeyword) 137 | if i < 0 { 138 | return 0, false 139 | } 140 | str := alert.Reason[i+len(correctionKeyword):] 141 | j := strings.IndexRune(str, ' ') 142 | if j >= 0 { 143 | str = str[:j] 144 | } 145 | d, err := timeutils.ParseDuration(str) 146 | if err != nil { 147 | log.Printf("[debug] try parse correction time failed:%s", err) 148 | return 0, false 149 | } 150 | return d, true 151 | } 152 | 153 | func (alert *Alert) newIsNoViolation() (isNoViolation IsNoViolationCollection, startAt, endAt time.Time) { 154 | startAt = alert.OpenedAt 155 | endAt = alert.endAt() 156 | 157 | isNoViolation = make(IsNoViolationCollection, endAt.Sub(startAt)/time.Minute) 158 | iter := timeutils.NewIterator(startAt, endAt, time.Minute) 159 | for iter.HasNext() { 160 | t, _ := iter.Next() 161 | isNoViolation[t] = false 162 | } 163 | return 164 | } 165 | 166 | type Alerts []*Alert 167 | 168 | func (alerts Alerts) StartAt() time.Time { 169 | startAt := flextime.Now() 170 | for _, alert := range alerts { 171 | if alert.OpenedAt.Before(startAt) { 172 | startAt = alert.OpenedAt 173 | } 174 | } 175 | return startAt 176 | } 177 | func (alerts Alerts) EndAt() time.Time { 178 | endAt := time.Unix(0, 0) 179 | for _, alert := range alerts { 180 | if alert.ClosedAt == nil { 181 | return flextime.Now() 182 | } 183 | if alert.ClosedAt.After(endAt) { 184 | endAt = *alert.ClosedAt 185 | } 186 | } 187 | return endAt 188 | } 189 | -------------------------------------------------------------------------------- /alert_based_sli.go: -------------------------------------------------------------------------------- 1 | package shimesaba 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "os" 8 | "strconv" 9 | "strings" 10 | "sync" 11 | "time" 12 | 13 | "github.com/mashiike/shimesaba/internal/timeutils" 14 | "golang.org/x/sync/errgroup" 15 | ) 16 | 17 | type AlertBasedSLI struct { 18 | cfg *AlertBasedSLIConfig 19 | } 20 | 21 | func NewAlertBasedSLI(cfg *AlertBasedSLIConfig) *AlertBasedSLI { 22 | return &AlertBasedSLI{cfg: cfg} 23 | } 24 | 25 | var evaluateReliabilitiesWorkerNum int = 10 26 | 27 | func init() { 28 | if str := os.Getenv("SHIMESABA_EVALUATE_RELIABILITIES_WORKER_NUM"); str != "" { 29 | i, err := strconv.ParseInt(str, 10, 32) 30 | if err != nil { 31 | panic(fmt.Errorf("SHIMESABA_EVALUATE_RELIABILITIES_WORKER_NUM can not parse as int: %w", err)) 32 | } 33 | evaluateReliabilitiesWorkerNum = int(i) 34 | if evaluateReliabilitiesWorkerNum <= 0 { 35 | evaluateReliabilitiesWorkerNum = 1 36 | } 37 | } 38 | } 39 | func (o AlertBasedSLI) EvaluateReliabilities(timeFrame time.Duration, alerts Alerts, startAt, endAt time.Time) (Reliabilities, error) { 40 | iter := timeutils.NewIterator(startAt, endAt, timeFrame) 41 | iter.SetEnableOverWindow(true) 42 | rc := make([]*Reliability, 0) 43 | for iter.HasNext() { 44 | cursorAt, _ := iter.Next() 45 | rc = append(rc, NewReliability(cursorAt, timeFrame, nil)) 46 | } 47 | reliabilities, err := NewReliabilities(rc) 48 | if err != nil { 49 | return nil, fmt.Errorf("failed to create Reliabilities: %w", err) 50 | } 51 | 52 | inputQueue := make(chan *Alert, len(alerts)) 53 | outputQueue := make(chan Reliabilities, evaluateReliabilitiesWorkerNum*2) 54 | quit := make(chan struct{}) 55 | cancelCtx, cancel := context.WithCancel(context.Background()) 56 | defer cancel() 57 | eg, egCtx := errgroup.WithContext(cancelCtx) 58 | for i := 0; i < evaluateReliabilitiesWorkerNum; i++ { 59 | //input workers 60 | workerID := i 61 | eg.Go(func() error { 62 | log.Printf("[debug] start EvaluateReliabilities input worker_id=%d", workerID) 63 | for { 64 | select { 65 | case <-egCtx.Done(): 66 | log.Printf("[debug] end EvaluateReliabilities input worker_id=%d: %v", workerID, egCtx.Err()) 67 | return egCtx.Err() 68 | case <-quit: 69 | log.Printf("[debug] end EvaluateReliabilities input worker_id=%d: quit", workerID) 70 | return nil 71 | case alert, ok := <-inputQueue: 72 | if !ok { 73 | log.Printf("[debug] end EvaluateReliabilities input worker_id=%d: success", workerID) 74 | return nil 75 | } 76 | log.Printf("[debug] worker_id=%d EvaluateReliabilities %s", workerID, alert.String()) 77 | tmp, err := alert.EvaluateReliabilities(timeFrame, o.cfg.TryReassessment) 78 | if err != nil { 79 | log.Printf("[debug] end EvaluateReliabilities input worker_id=%d: EvaluateReliabilities err: %v", workerID, err) 80 | return err 81 | } 82 | outputQueue <- tmp 83 | } 84 | } 85 | }) 86 | } 87 | var outputErr error 88 | var wg sync.WaitGroup 89 | wg.Add(1) 90 | go func() { 91 | // output worker 92 | log.Printf("[debug] start EvaluateReliabilities output worker") 93 | defer wg.Done() 94 | log.Printf("[debug] end EvaluateReliabilities output worker") 95 | for { 96 | select { 97 | case <-cancelCtx.Done(): 98 | log.Printf("[debug] end EvaluateReliabilities output worker: %v", cancelCtx.Err()) 99 | return 100 | case <-quit: 101 | log.Printf("[debug] end EvaluateReliabilities output worker: quit") 102 | return 103 | case tmp, ok := <-outputQueue: 104 | if !ok { 105 | // Completed evaluation of all alerts 106 | log.Printf("[debug] end EvaluateReliabilities output worker: success") 107 | return 108 | } 109 | reliabilities, outputErr = reliabilities.MergeInRange(tmp, startAt, endAt) 110 | if outputErr != nil { 111 | log.Printf("[debug] end EvaluateReliabilities output worker: MergeInRange err: %v", err) 112 | return 113 | } 114 | } 115 | } 116 | }() 117 | 118 | for _, alert := range alerts { 119 | if !o.matchAlert(alert) { 120 | continue 121 | } 122 | inputQueue <- alert 123 | } 124 | close(inputQueue) 125 | 126 | // wait input wokers done 127 | if err := eg.Wait(); err != nil { 128 | // send quit to output worker and wait output woker done. 129 | close(quit) 130 | wg.Wait() 131 | return nil, err 132 | } 133 | 134 | // Evaluation of all alerts was completed. Close queue of ouptut and wait for merge process. 135 | close(outputQueue) 136 | wg.Wait() 137 | return reliabilities, nil 138 | } 139 | 140 | func (o AlertBasedSLI) matchAlert(alert *Alert) bool { 141 | if alert.IsVirtual() { 142 | return true 143 | } 144 | log.Printf("[debug] try match %s vs %v", alert, o.cfg) 145 | if o.MatchMonitor(alert.Monitor) { 146 | log.Printf("[debug] match %s", alert) 147 | return true 148 | } 149 | return false 150 | } 151 | 152 | func (o AlertBasedSLI) MatchMonitor(monitor *Monitor) bool { 153 | if o.cfg.MonitorID != "" { 154 | if monitor.ID() != o.cfg.MonitorID { 155 | return false 156 | } 157 | } 158 | if o.cfg.MonitorName != "" { 159 | if monitor.Name() != o.cfg.MonitorName { 160 | return false 161 | } 162 | } 163 | if o.cfg.MonitorNamePrefix != "" { 164 | if !strings.HasPrefix(monitor.Name(), o.cfg.MonitorNamePrefix) { 165 | return false 166 | } 167 | } 168 | if o.cfg.MonitorNameSuffix != "" { 169 | if !strings.HasSuffix(monitor.Name(), o.cfg.MonitorNameSuffix) { 170 | return false 171 | } 172 | } 173 | if o.cfg.MonitorType != "" { 174 | if !strings.EqualFold(monitor.Type(), o.cfg.MonitorType) { 175 | return false 176 | } 177 | } 178 | return true 179 | } 180 | -------------------------------------------------------------------------------- /alert_based_sli_test.go: -------------------------------------------------------------------------------- 1 | package shimesaba_test 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | "time" 7 | 8 | "github.com/Songmu/flextime" 9 | "github.com/mashiike/shimesaba" 10 | "github.com/stretchr/testify/require" 11 | ) 12 | 13 | func TestAlertBasedSLI(t *testing.T) { 14 | restore := flextime.Fix(time.Date(2021, time.October, 1, 0, 6, 0, 0, time.UTC)) 15 | defer restore() 16 | alerts := shimesaba.Alerts{ 17 | shimesaba.NewAlert( 18 | shimesaba.NewMonitor( 19 | "hogera", 20 | "SLO hoge", 21 | "expression", 22 | ), 23 | time.Date(2021, time.October, 1, 0, 0, 0, 0, time.UTC), 24 | ptrTime(time.Date(2021, time.October, 1, 0, 3, 0, 0, time.UTC)), 25 | ), 26 | shimesaba.NewAlert( 27 | shimesaba.NewMonitor( 28 | "fugara", 29 | "SLO fuga", 30 | "service", 31 | ), 32 | time.Date(2021, time.October, 1, 0, 2, 0, 0, time.UTC), 33 | ptrTime(time.Date(2021, time.October, 1, 0, 4, 0, 0, time.UTC)), 34 | ), 35 | shimesaba.NewAlert( 36 | shimesaba.NewMonitor( 37 | "fugara", 38 | "SLO fuga", 39 | "service", 40 | ), 41 | time.Date(2021, time.October, 1, 0, 3, 0, 0, time.UTC), 42 | ptrTime(time.Date(2021, time.October, 1, 0, 5, 0, 0, time.UTC)), 43 | ), 44 | shimesaba.NewAlert( 45 | shimesaba.NewMonitor( 46 | "hogera", 47 | "SLO hoge", 48 | "expression", 49 | ), 50 | time.Date(2021, time.October, 1, 0, 5, 0, 0, time.UTC), 51 | nil, 52 | ), 53 | } 54 | cases := []struct { 55 | cfg *shimesaba.AlertBasedSLIConfig 56 | expected map[time.Time]bool 57 | }{ 58 | { 59 | cfg: &shimesaba.AlertBasedSLIConfig{ 60 | MonitorID: "hogera", 61 | }, 62 | expected: map[time.Time]bool{ 63 | time.Date(2021, time.October, 1, 0, 0, 0, 0, time.UTC): false, 64 | time.Date(2021, time.October, 1, 0, 1, 0, 0, time.UTC): false, 65 | time.Date(2021, time.October, 1, 0, 2, 0, 0, time.UTC): false, 66 | time.Date(2021, time.October, 1, 0, 5, 0, 0, time.UTC): false, 67 | }, 68 | }, 69 | { 70 | cfg: &shimesaba.AlertBasedSLIConfig{ 71 | MonitorNameSuffix: "hoge", 72 | }, 73 | expected: map[time.Time]bool{ 74 | time.Date(2021, time.October, 1, 0, 0, 0, 0, time.UTC): false, 75 | time.Date(2021, time.October, 1, 0, 1, 0, 0, time.UTC): false, 76 | time.Date(2021, time.October, 1, 0, 2, 0, 0, time.UTC): false, 77 | time.Date(2021, time.October, 1, 0, 5, 0, 0, time.UTC): false, 78 | }, 79 | }, 80 | { 81 | cfg: &shimesaba.AlertBasedSLIConfig{ 82 | MonitorID: "fugara", 83 | }, 84 | expected: map[time.Time]bool{ 85 | time.Date(2021, time.October, 1, 0, 0, 0, 0, time.UTC): true, 86 | time.Date(2021, time.October, 1, 0, 1, 0, 0, time.UTC): true, 87 | time.Date(2021, time.October, 1, 0, 2, 0, 0, time.UTC): false, 88 | time.Date(2021, time.October, 1, 0, 3, 0, 0, time.UTC): false, 89 | time.Date(2021, time.October, 1, 0, 4, 0, 0, time.UTC): false, 90 | time.Date(2021, time.October, 1, 0, 5, 0, 0, time.UTC): true, 91 | }, 92 | }, 93 | { 94 | cfg: &shimesaba.AlertBasedSLIConfig{ 95 | MonitorNamePrefix: "SLO", 96 | }, 97 | expected: map[time.Time]bool{ 98 | time.Date(2021, time.October, 1, 0, 0, 0, 0, time.UTC): false, 99 | time.Date(2021, time.October, 1, 0, 1, 0, 0, time.UTC): false, 100 | time.Date(2021, time.October, 1, 0, 2, 0, 0, time.UTC): false, 101 | time.Date(2021, time.October, 1, 0, 3, 0, 0, time.UTC): false, 102 | time.Date(2021, time.October, 1, 0, 4, 0, 0, time.UTC): false, 103 | time.Date(2021, time.October, 1, 0, 5, 0, 0, time.UTC): false, 104 | }, 105 | }, 106 | { 107 | cfg: &shimesaba.AlertBasedSLIConfig{ 108 | MonitorNamePrefix: "SLO", 109 | MonitorType: "Expression", 110 | }, 111 | expected: map[time.Time]bool{ 112 | time.Date(2021, time.October, 1, 0, 0, 0, 0, time.UTC): false, 113 | time.Date(2021, time.October, 1, 0, 1, 0, 0, time.UTC): false, 114 | time.Date(2021, time.October, 1, 0, 2, 0, 0, time.UTC): false, 115 | time.Date(2021, time.October, 1, 0, 5, 0, 0, time.UTC): false, 116 | }, 117 | }, 118 | { 119 | cfg: &shimesaba.AlertBasedSLIConfig{ 120 | MonitorNameSuffix: "hoge", 121 | MonitorType: "service", 122 | }, 123 | expected: map[time.Time]bool{}, 124 | }, 125 | } 126 | for i, c := range cases { 127 | t.Run(fmt.Sprintf("case.%d", i), func(t *testing.T) { 128 | obj := shimesaba.NewAlertBasedSLI(c.cfg) 129 | actual, err := obj.EvaluateReliabilities( 130 | time.Minute, 131 | alerts, 132 | time.Date(2021, time.October, 1, 0, 0, 0, 0, time.UTC), 133 | time.Date(2021, time.October, 1, 0, 5, 0, 0, time.UTC), 134 | ) 135 | require.NoError(t, err) 136 | expected, _ := shimesaba.NewReliabilities([]*shimesaba.Reliability{ 137 | shimesaba.NewReliability(time.Date(2021, time.October, 1, 0, 0, 0, 0, time.UTC), time.Minute, c.expected), 138 | shimesaba.NewReliability(time.Date(2021, time.October, 1, 0, 1, 0, 0, time.UTC), time.Minute, c.expected), 139 | shimesaba.NewReliability(time.Date(2021, time.October, 1, 0, 2, 0, 0, time.UTC), time.Minute, c.expected), 140 | shimesaba.NewReliability(time.Date(2021, time.October, 1, 0, 3, 0, 0, time.UTC), time.Minute, c.expected), 141 | shimesaba.NewReliability(time.Date(2021, time.October, 1, 0, 4, 0, 0, time.UTC), time.Minute, c.expected), 142 | shimesaba.NewReliability(time.Date(2021, time.October, 1, 0, 5, 0, 0, time.UTC), time.Minute, c.expected), 143 | }) 144 | require.EqualValues(t, expected, actual) 145 | }) 146 | } 147 | 148 | } 149 | 150 | func ptrTime(t time.Time) *time.Time { 151 | return &t 152 | } 153 | -------------------------------------------------------------------------------- /alert_test.go: -------------------------------------------------------------------------------- 1 | package shimesaba_test 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | "time" 7 | 8 | "github.com/Songmu/flextime" 9 | "github.com/mashiike/shimesaba" 10 | "github.com/stretchr/testify/require" 11 | ) 12 | 13 | func TestAlerts(t *testing.T) { 14 | restore := flextime.Fix(time.Date(2021, time.October, 1, 0, 6, 0, 0, time.UTC)) 15 | defer restore() 16 | alerts := shimesaba.Alerts{ 17 | shimesaba.NewAlert( 18 | shimesaba.NewMonitor( 19 | "hogera", 20 | "hogera.example.com", 21 | "external", 22 | ), 23 | time.Date(2021, time.October, 1, 0, 0, 0, 0, time.UTC), 24 | ptrTime(time.Date(2021, time.October, 1, 0, 3, 0, 0, time.UTC)), 25 | ), 26 | shimesaba.NewAlert( 27 | shimesaba.NewMonitor( 28 | "fugara", 29 | "fugara.example.com", 30 | "external", 31 | ), 32 | time.Date(2021, time.October, 1, 0, 2, 0, 0, time.UTC), 33 | ptrTime(time.Date(2021, time.October, 1, 0, 4, 0, 0, time.UTC)), 34 | ), 35 | shimesaba.NewAlert( 36 | shimesaba.NewMonitor( 37 | "fugara", 38 | "fugara.example.com", 39 | "external", 40 | ), 41 | time.Date(2021, time.October, 1, 0, 3, 0, 0, time.UTC), 42 | ptrTime(time.Date(2021, time.October, 1, 0, 5, 0, 0, time.UTC)), 43 | ), 44 | shimesaba.NewVirtualAlert( 45 | "slo:hoge", 46 | time.Date(2021, time.October, 1, 0, 2, 0, 0, time.UTC), 47 | time.Date(2021, time.October, 1, 0, 4, 0, 0, time.UTC), 48 | ), 49 | } 50 | require.EqualValues(t, time.Date(2021, time.October, 1, 0, 0, 0, 0, time.UTC), alerts.StartAt()) 51 | require.EqualValues(t, time.Date(2021, time.October, 1, 0, 5, 0, 0, time.UTC), alerts.EndAt()) 52 | alerts = append(alerts, shimesaba.NewAlert( 53 | shimesaba.NewMonitor( 54 | "hogera", 55 | "hogera.example.com", 56 | "external", 57 | ), 58 | time.Date(2021, time.October, 1, 0, 0, 0, 0, time.UTC), 59 | nil, 60 | )) 61 | require.EqualValues(t, time.Date(2021, time.October, 1, 0, 0, 0, 0, time.UTC), alerts.StartAt()) 62 | require.EqualValues(t, time.Date(2021, time.October, 1, 0, 6, 0, 0, time.UTC), alerts.EndAt()) 63 | } 64 | 65 | func TestAlertEvaluateReliabilities(t *testing.T) { 66 | restore := flextime.Fix(time.Date(2021, time.October, 1, 0, 8, 0, 0, time.UTC)) 67 | defer restore() 68 | cases := []struct { 69 | alert *shimesaba.Alert 70 | timeFrame time.Duration 71 | expectedGenerator func() shimesaba.Reliabilities 72 | }{ 73 | { 74 | alert: shimesaba.NewAlert( 75 | shimesaba.NewMonitor( 76 | "fugara", 77 | "fugara.example.com", 78 | "external", 79 | ), 80 | time.Date(2021, time.October, 1, 0, 3, 0, 0, time.UTC), 81 | ptrTime(time.Date(2021, time.October, 1, 0, 5, 0, 0, time.UTC)), 82 | ), 83 | timeFrame: 5 * time.Minute, 84 | expectedGenerator: func() shimesaba.Reliabilities { 85 | isNoViolation := map[time.Time]bool{ 86 | time.Date(2021, time.October, 1, 0, 3, 0, 0, time.UTC): false, 87 | time.Date(2021, time.October, 1, 0, 4, 0, 0, time.UTC): false, 88 | } 89 | expected, _ := shimesaba.NewReliabilities([]*shimesaba.Reliability{ 90 | shimesaba.NewReliability(time.Date(2021, time.October, 1, 0, 0, 0, 0, time.UTC), 5*time.Minute, isNoViolation), 91 | }) 92 | return expected 93 | }, 94 | }, 95 | { 96 | alert: shimesaba.NewAlert( 97 | shimesaba.NewMonitor( 98 | "fugara", 99 | "fugara.example.com", 100 | "external", 101 | ), 102 | time.Date(2021, time.October, 1, 0, 3, 0, 0, time.UTC), 103 | ptrTime(time.Date(2021, time.October, 1, 0, 8, 0, 0, time.UTC)), 104 | ), 105 | timeFrame: 5 * time.Minute, 106 | expectedGenerator: func() shimesaba.Reliabilities { 107 | isNoViolation := map[time.Time]bool{ 108 | time.Date(2021, time.October, 1, 0, 3, 0, 0, time.UTC): false, 109 | time.Date(2021, time.October, 1, 0, 4, 0, 0, time.UTC): false, 110 | time.Date(2021, time.October, 1, 0, 5, 0, 0, time.UTC): false, 111 | time.Date(2021, time.October, 1, 0, 6, 0, 0, time.UTC): false, 112 | time.Date(2021, time.October, 1, 0, 7, 0, 0, time.UTC): false, 113 | } 114 | expected, _ := shimesaba.NewReliabilities([]*shimesaba.Reliability{ 115 | shimesaba.NewReliability(time.Date(2021, time.October, 1, 0, 0, 0, 0, time.UTC), 5*time.Minute, isNoViolation), 116 | shimesaba.NewReliability(time.Date(2021, time.October, 1, 0, 5, 0, 0, time.UTC), 5*time.Minute, isNoViolation), 117 | }) 118 | return expected 119 | }, 120 | }, 121 | { 122 | alert: shimesaba.NewAlert( 123 | shimesaba.NewMonitor( 124 | "fugara", 125 | "fugara.example.com", 126 | "external", 127 | ), 128 | time.Date(2021, time.October, 1, 0, 3, 0, 0, time.UTC), 129 | nil, 130 | ), 131 | timeFrame: 2 * time.Minute, 132 | expectedGenerator: func() shimesaba.Reliabilities { 133 | isNoViolation := map[time.Time]bool{ 134 | time.Date(2021, time.October, 1, 0, 2, 0, 0, time.UTC): true, 135 | time.Date(2021, time.October, 1, 0, 3, 0, 0, time.UTC): false, 136 | time.Date(2021, time.October, 1, 0, 4, 0, 0, time.UTC): false, 137 | time.Date(2021, time.October, 1, 0, 5, 0, 0, time.UTC): false, 138 | time.Date(2021, time.October, 1, 0, 6, 0, 0, time.UTC): false, 139 | time.Date(2021, time.October, 1, 0, 7, 0, 0, time.UTC): false, 140 | time.Date(2021, time.October, 1, 0, 8, 0, 0, time.UTC): false, 141 | } 142 | expected, _ := shimesaba.NewReliabilities([]*shimesaba.Reliability{ 143 | shimesaba.NewReliability(time.Date(2021, time.October, 1, 0, 2, 0, 0, time.UTC), 2*time.Minute, isNoViolation), 144 | shimesaba.NewReliability(time.Date(2021, time.October, 1, 0, 4, 0, 0, time.UTC), 2*time.Minute, isNoViolation), 145 | shimesaba.NewReliability(time.Date(2021, time.October, 1, 0, 6, 0, 0, time.UTC), 2*time.Minute, isNoViolation), 146 | shimesaba.NewReliability(time.Date(2021, time.October, 1, 0, 8, 0, 0, time.UTC), 2*time.Minute, isNoViolation), 147 | }) 148 | return expected 149 | }, 150 | }, 151 | { 152 | alert: shimesaba.NewAlert( 153 | shimesaba.NewMonitor( 154 | "fugara", 155 | "fugara.example.com", 156 | "external", 157 | ), 158 | time.Date(2021, time.October, 1, 0, 3, 0, 0, time.UTC), 159 | ptrTime(time.Date(2021, time.October, 1, 0, 8, 0, 0, time.UTC)), 160 | ).WithReason("downtime:2m"), 161 | timeFrame: 5 * time.Minute, 162 | expectedGenerator: func() shimesaba.Reliabilities { 163 | isNoViolation := map[time.Time]bool{ 164 | time.Date(2021, time.October, 1, 0, 3, 0, 0, time.UTC): false, 165 | time.Date(2021, time.October, 1, 0, 4, 0, 0, time.UTC): false, 166 | } 167 | expected, _ := shimesaba.NewReliabilities([]*shimesaba.Reliability{ 168 | shimesaba.NewReliability(time.Date(2021, time.October, 1, 0, 0, 0, 0, time.UTC), 5*time.Minute, isNoViolation), 169 | shimesaba.NewReliability(time.Date(2021, time.October, 1, 0, 5, 0, 0, time.UTC), 5*time.Minute, isNoViolation), 170 | }) 171 | return expected 172 | }, 173 | }, 174 | { 175 | alert: shimesaba.NewAlert( 176 | shimesaba.NewMonitor( 177 | "fugara", 178 | "fugara.example.com", 179 | "external", 180 | ), 181 | time.Date(2021, time.October, 1, 0, 3, 0, 0, time.UTC), 182 | ptrTime(time.Date(2021, time.October, 1, 0, 8, 0, 0, time.UTC)), 183 | ).WithReason("downtime:0m"), 184 | timeFrame: 5 * time.Minute, 185 | expectedGenerator: func() shimesaba.Reliabilities { 186 | isNoViolation := map[time.Time]bool{} 187 | expected, _ := shimesaba.NewReliabilities([]*shimesaba.Reliability{ 188 | shimesaba.NewReliability(time.Date(2021, time.October, 1, 0, 0, 0, 0, time.UTC), 5*time.Minute, isNoViolation), 189 | shimesaba.NewReliability(time.Date(2021, time.October, 1, 0, 5, 0, 0, time.UTC), 5*time.Minute, isNoViolation), 190 | }) 191 | return expected 192 | }, 193 | }, 194 | { 195 | alert: shimesaba.NewAlert( 196 | shimesaba.NewMonitor( 197 | "fugara", 198 | "fugara.example.com", 199 | "service", 200 | ).WithEvaluator(func(hostID string, timeFrame time.Duration, startAt, endAt time.Time) (shimesaba.Reliabilities, bool) { 201 | isNoViolation := map[time.Time]bool{ 202 | time.Date(2021, time.September, 30, 23, 58, 0, 0, time.UTC): false, 203 | time.Date(2021, time.September, 30, 23, 59, 0, 0, time.UTC): false, 204 | time.Date(2021, time.October, 1, 0, 0, 0, 0, time.UTC): false, 205 | time.Date(2021, time.October, 1, 0, 1, 0, 0, time.UTC): false, 206 | time.Date(2021, time.October, 1, 0, 2, 0, 0, time.UTC): false, 207 | } 208 | reliabilities, _ := shimesaba.NewReliabilities([]*shimesaba.Reliability{ 209 | shimesaba.NewReliability(time.Date(2021, time.September, 30, 23, 55, 0, 0, time.UTC), 5*time.Minute, isNoViolation), 210 | shimesaba.NewReliability(time.Date(2021, time.October, 1, 0, 0, 0, 0, time.UTC), 5*time.Minute, isNoViolation), 211 | shimesaba.NewReliability(time.Date(2021, time.October, 1, 0, 5, 0, 0, time.UTC), 5*time.Minute, isNoViolation), 212 | }) 213 | return reliabilities, true 214 | }), 215 | time.Date(2021, time.October, 1, 0, 3, 0, 0, time.UTC), 216 | ptrTime(time.Date(2021, time.October, 1, 0, 8, 0, 0, time.UTC)), 217 | ), 218 | timeFrame: 5 * time.Minute, 219 | expectedGenerator: func() shimesaba.Reliabilities { 220 | isNoViolation := map[time.Time]bool{ 221 | time.Date(2021, time.September, 30, 23, 58, 0, 0, time.UTC): false, 222 | time.Date(2021, time.September, 30, 23, 59, 0, 0, time.UTC): false, 223 | time.Date(2021, time.October, 1, 0, 0, 0, 0, time.UTC): false, 224 | time.Date(2021, time.October, 1, 0, 1, 0, 0, time.UTC): false, 225 | time.Date(2021, time.October, 1, 0, 2, 0, 0, time.UTC): false, 226 | } 227 | expected, _ := shimesaba.NewReliabilities([]*shimesaba.Reliability{ 228 | shimesaba.NewReliability(time.Date(2021, time.September, 30, 23, 55, 0, 0, time.UTC), 5*time.Minute, isNoViolation), 229 | shimesaba.NewReliability(time.Date(2021, time.October, 1, 0, 0, 0, 0, time.UTC), 5*time.Minute, isNoViolation), 230 | shimesaba.NewReliability(time.Date(2021, time.October, 1, 0, 5, 0, 0, time.UTC), 5*time.Minute, isNoViolation), 231 | }) 232 | return expected 233 | }, 234 | }, 235 | } 236 | for i, c := range cases { 237 | t.Run(fmt.Sprintf("case.%d", i), func(t *testing.T) { 238 | actual, err := c.alert.EvaluateReliabilities(c.timeFrame, true) 239 | require.NoError(t, err) 240 | require.EqualValues(t, c.expectedGenerator(), actual) 241 | }) 242 | } 243 | } 244 | 245 | func TestAlertCorrectionTime(t *testing.T) { 246 | cases := []struct { 247 | alert *shimesaba.Alert 248 | exceptedOk bool 249 | excepted time.Duration 250 | }{ 251 | { 252 | alert: shimesaba.NewAlert( 253 | shimesaba.NewMonitor("test", "test", "external"), 254 | time.Date(2021, time.October, 1, 0, 2, 0, 0, time.UTC), 255 | ptrTime(time.Date(2021, time.October, 1, 0, 7, 0, 0, time.UTC)), 256 | ).WithReason("Actual downtime:3m 5xx during this time, 5 cases."), 257 | exceptedOk: true, 258 | excepted: 3 * time.Minute, 259 | }, 260 | { 261 | alert: shimesaba.NewAlert( 262 | shimesaba.NewMonitor("test", "test", "external"), 263 | time.Date(2021, time.October, 1, 0, 2, 0, 0, time.UTC), 264 | ptrTime(time.Date(2021, time.October, 1, 0, 7, 0, 0, time.UTC)), 265 | ).WithReason("Actual downtime:3m, 5xx during this time, 5 cases."), 266 | exceptedOk: false, 267 | }, 268 | { 269 | alert: shimesaba.NewAlert( 270 | shimesaba.NewMonitor("test", "test", "external"), 271 | time.Date(2021, time.October, 1, 0, 2, 0, 0, time.UTC), 272 | ptrTime(time.Date(2021, time.October, 1, 0, 7, 0, 0, time.UTC)), 273 | ).WithReason("downtime:8m"), 274 | exceptedOk: true, 275 | excepted: 8 * time.Minute, 276 | }, 277 | { 278 | alert: shimesaba.NewAlert( 279 | shimesaba.NewMonitor("test", "test", "external"), 280 | time.Date(2021, time.October, 1, 0, 2, 0, 0, time.UTC), 281 | ptrTime(time.Date(2021, time.October, 1, 0, 7, 0, 0, time.UTC)), 282 | ), 283 | exceptedOk: false, 284 | }, 285 | } 286 | for i, c := range cases { 287 | t.Run(fmt.Sprintf("case.%d", i), func(t *testing.T) { 288 | actual, ok := c.alert.CorrectionTime() 289 | require.EqualValues(t, c.exceptedOk, ok) 290 | require.EqualValues(t, c.excepted, actual) 291 | }) 292 | } 293 | } 294 | -------------------------------------------------------------------------------- /app.go: -------------------------------------------------------------------------------- 1 | package shimesaba 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "log" 8 | "sort" 9 | 10 | "github.com/Songmu/flextime" 11 | mackerel "github.com/mackerelio/mackerel-client-go" 12 | ) 13 | 14 | //App manages life cycle 15 | type App struct { 16 | repo *Repository 17 | SLODefinitions []*Definition 18 | } 19 | 20 | //New creates an app 21 | func New(apikey string, cfg *Config) (*App, error) { 22 | client := mackerel.NewClient(apikey) 23 | return NewWithMackerelClient(client, cfg) 24 | } 25 | 26 | //NewWithMackerelClient is there to accept mock clients. 27 | func NewWithMackerelClient(client MackerelClient, cfg *Config) (*App, error) { 28 | slo := make([]*Definition, 0, len(cfg.SLO)) 29 | for _, c := range cfg.SLO { 30 | d, err := NewDefinition(c) 31 | if err != nil { 32 | return nil, err 33 | } 34 | slo = append(slo, d) 35 | } 36 | app := &App{ 37 | repo: NewRepository(client), 38 | SLODefinitions: slo, 39 | } 40 | return app, nil 41 | } 42 | 43 | type Options struct { 44 | dryRun bool 45 | backfill int 46 | dumpReports bool 47 | } 48 | 49 | //DryRunOption is an option to output the calculated error budget as standard without posting it to Mackerel. 50 | func DryRunOption(dryRun bool) func(*Options) { 51 | return func(opt *Options) { 52 | opt.dryRun = dryRun 53 | } 54 | } 55 | 56 | //BackfillOption specifies how many points of data to calculate retroactively from the current time. 57 | func BackfillOption(count int) func(*Options) { 58 | return func(opt *Options) { 59 | opt.backfill = count 60 | } 61 | } 62 | 63 | func DumpReportsOption(dump bool) func(*Options) { 64 | return func(opt *Options) { 65 | opt.dumpReports = dump 66 | } 67 | } 68 | 69 | //Run performs the calculation of the error bar calculation 70 | func (app *App) Run(ctx context.Context, optFns ...func(*Options)) error { 71 | orgName, err := app.repo.GetOrgName(ctx) 72 | if err != nil { 73 | return err 74 | } 75 | log.Printf("[info] start run in the `%s` organization.", orgName) 76 | opts := &Options{ 77 | backfill: 3, 78 | dryRun: false, 79 | } 80 | for _, optFn := range optFns { 81 | optFn(opts) 82 | } 83 | 84 | repo := app.repo 85 | if opts.dryRun { 86 | log.Println("[notice] **with dry run**") 87 | repo = repo.WithDryRun() 88 | } 89 | 90 | if opts.backfill <= 0 { 91 | return errors.New("backfill must over 0") 92 | } 93 | now := flextime.Now() 94 | 95 | for _, d := range app.SLODefinitions { 96 | log.Printf("[info] service level objective[id=%s]: start create reports \n", d.ID()) 97 | reports, err := d.CreateReports(ctx, repo, now, opts.backfill) 98 | if err != nil { 99 | return fmt.Errorf("service level objective[id=%s]: create report faileds: %w", d.ID(), err) 100 | } 101 | if len(reports) > opts.backfill { 102 | sort.Slice(reports, func(i, j int) bool { 103 | return reports[i].DataPoint.Before(reports[j].DataPoint) 104 | }) 105 | n := len(reports) - opts.backfill 106 | if n < 0 { 107 | n = 0 108 | } 109 | reports = reports[n:] 110 | } 111 | log.Printf("[info] service level objective[id=%s]: finish create reports \n", d.ID()) 112 | if opts.dumpReports { 113 | for _, report := range reports { 114 | log.Printf("[info] %s", report) 115 | } 116 | } 117 | log.Printf("[info] service level objective[id=%s]: start save reports \n", d.ID()) 118 | if err := repo.SaveReports(ctx, reports); err != nil { 119 | return fmt.Errorf("objective[%s] save report failed: %w", d.ID(), err) 120 | } 121 | log.Printf("[info] service level objective[id=%s]: finish save reports \n", d.ID()) 122 | } 123 | runTime := flextime.Now().Sub(now) 124 | log.Printf("[info] run successes. run time:%s\n", runTime) 125 | return nil 126 | } 127 | -------------------------------------------------------------------------------- /app_test.go: -------------------------------------------------------------------------------- 1 | package shimesaba_test 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "fmt" 7 | "os" 8 | "testing" 9 | "time" 10 | 11 | "github.com/Songmu/flextime" 12 | "github.com/mashiike/shimesaba" 13 | "github.com/mashiike/shimesaba/internal/logger" 14 | "github.com/stretchr/testify/require" 15 | ) 16 | 17 | func TestAppWithMock(t *testing.T) { 18 | backfillCounts := []int{3, 4, 5} 19 | for _, backfill := range backfillCounts { 20 | t.Run(fmt.Sprintf("backfill=%d", backfill), func(t *testing.T) { 21 | cases := []struct { 22 | configFile string 23 | expected map[string]int 24 | }{ 25 | { 26 | configFile: "testdata/app_test.yaml", 27 | expected: map[string]int{ 28 | "shimesaba.error_budget.alerts": backfill, 29 | "shimesaba.error_budget_consumption.alerts": backfill, 30 | "shimesaba.error_budget_consumption_percentage.alerts": backfill, 31 | "shimesaba.error_budget_percentage.alerts": backfill, 32 | "shimesaba.error_budget_remaining_percentage.alerts": backfill, 33 | }, 34 | }, 35 | { 36 | configFile: "testdata/app_disable_test.yaml", 37 | expected: map[string]int{ 38 | "app_test.eb.availability": backfill, 39 | "app_test.ebr.availability": backfill, 40 | }, 41 | }, 42 | { 43 | configFile: "testdata/app_uptime_and_failuretime.yaml", 44 | expected: map[string]int{ 45 | "shimesaba.error_budget.alerts": backfill, 46 | "shimesaba.error_budget_consumption.alerts": backfill, 47 | "shimesaba.error_budget_consumption_percentage.alerts": backfill, 48 | "shimesaba.error_budget_percentage.alerts": backfill, 49 | "shimesaba.error_budget_remaining_percentage.alerts": backfill, 50 | "shimesaba.failure_time.alerts": backfill, 51 | "shimesaba.uptime.alerts": backfill, 52 | }, 53 | }, 54 | } 55 | for _, c := range cases { 56 | t.Run(c.configFile, func(t *testing.T) { 57 | var buf bytes.Buffer 58 | logger.Setup(&buf, "debug") 59 | defer func() { 60 | t.Log(buf.String()) 61 | logger.Setup(os.Stderr, "info") 62 | }() 63 | cfg := shimesaba.NewDefaultConfig() 64 | err := cfg.Load(c.configFile) 65 | require.NoError(t, err, "load cfg") 66 | client := newMockMackerelClient(t) 67 | app, err := shimesaba.NewWithMackerelClient(client, cfg) 68 | require.NoError(t, err, "create app") 69 | restore := flextime.Set(time.Date(2021, 10, 1, 0, 21, 0, 0, time.UTC)) 70 | defer restore() 71 | err = app.Run(context.Background(), shimesaba.BackfillOption(backfill)) 72 | require.NoError(t, err, "run app") 73 | 74 | actual := make(map[string]int) 75 | for _, v := range client.posted { 76 | if _, ok := actual[v.Name]; !ok { 77 | actual[v.Name] = 0 78 | } 79 | actual[v.Name]++ 80 | } 81 | require.EqualValues(t, c.expected, actual) 82 | }) 83 | } 84 | }) 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /cmd/shimesaba/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "os" 8 | "os/signal" 9 | "sort" 10 | "strings" 11 | 12 | "github.com/aws/aws-lambda-go/lambda" 13 | "github.com/handlename/ssmwrap/v2" 14 | "github.com/mashiike/shimesaba" 15 | "github.com/mashiike/shimesaba/internal/logger" 16 | cli "github.com/urfave/cli/v2" 17 | ) 18 | 19 | var ( 20 | Version = "current" 21 | ssmwrapPathsErr error 22 | ssmwrapNamesErr error 23 | globalDryRun bool 24 | globalDumpReports bool 25 | globalBackfill int 26 | ) 27 | 28 | func main() { 29 | ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt) 30 | defer cancel() 31 | var ssmwrapExportRules []ssmwrap.ExportRule 32 | if ssmwrapPaths := os.Getenv("SSMWRAP_PATHS"); ssmwrapPaths != "" { 33 | for _, path := range strings.Split(ssmwrapPaths, ",") { 34 | path = strings.TrimSuffix(path, "/") 35 | ssmwrapExportRules = append(ssmwrapExportRules, ssmwrap.ExportRule{ 36 | Path: path + "/**/*", 37 | }) 38 | } 39 | } 40 | if ssmwarpNames := os.Getenv("SSMWRAP_NAMES"); ssmwarpNames != "" { 41 | for _, name := range strings.Split(ssmwarpNames, ",") { 42 | ssmwrapExportRules = append(ssmwrapExportRules, ssmwrap.ExportRule{ 43 | Path: name, 44 | }) 45 | } 46 | } 47 | if len(ssmwrapExportRules) > 0 { 48 | err := ssmwrap.Export(ctx, ssmwrapExportRules, ssmwrap.ExportOptions{ 49 | Retries: 3, 50 | }) 51 | if err != nil { 52 | logger.Setup(os.Stderr, "error") 53 | log.Fatalf("[error] failed to export SSM parameters: %s", err) 54 | } 55 | } 56 | 57 | cliApp := &cli.App{ 58 | Name: "shimesaba", 59 | Usage: "A commandline tool for tracking SLO/ErrorBudget using Mackerel as an SLI measurement service.", 60 | UsageText: "shimesaba -config [command options]", 61 | Flags: []cli.Flag{ 62 | &cli.StringSliceFlag{ 63 | Name: "config", 64 | Aliases: []string{"c"}, 65 | Usage: "config file path, can set multiple", 66 | EnvVars: []string{"CONFIG", "SHIMESABA_CONFIG"}, 67 | }, 68 | &cli.StringFlag{ 69 | Name: "mackerel-apikey", 70 | Aliases: []string{"k"}, 71 | Usage: "for access mackerel API", 72 | DefaultText: "*********", 73 | EnvVars: []string{"MACKEREL_APIKEY", "SHIMESABA_MACKEREL_APIKEY"}, 74 | }, 75 | &cli.BoolFlag{ 76 | Name: "debug", 77 | Usage: "output debug log", 78 | EnvVars: []string{"SHIMESABA_DEBUG"}, 79 | }, 80 | &cli.BoolFlag{ 81 | Name: "dry-run", 82 | Usage: "report output stdout and not put mackerel", 83 | EnvVars: []string{"SHIMESABA_DRY_RUN"}, 84 | Destination: &globalDryRun, 85 | }, 86 | &cli.BoolFlag{ 87 | Name: "dump-reports", 88 | Usage: "dump error budget report", 89 | EnvVars: []string{"SHIMESABA_DUMP_REPORTS"}, 90 | Destination: &globalDumpReports, 91 | }, 92 | &cli.IntFlag{ 93 | Name: "backfill", 94 | DefaultText: "3", 95 | Value: 3, 96 | Usage: "generate report before n point", 97 | EnvVars: []string{"BACKFILL", "SHIMESABA_BACKFILL"}, 98 | Destination: &globalBackfill, 99 | }, 100 | }, 101 | Action: run, 102 | Commands: []*cli.Command{ 103 | { 104 | Name: "run", 105 | Usage: "run shimesaba. this is main feature, use no subcommand", 106 | UsageText: "shimesaba -config run [command options]", 107 | Action: func(c *cli.Context) error { 108 | return run(c) 109 | }, 110 | Flags: []cli.Flag{ 111 | &cli.BoolFlag{ 112 | Name: "dry-run", 113 | Usage: "report output stdout and not put mackerel", 114 | }, 115 | &cli.BoolFlag{ 116 | Name: "dump-reports", 117 | Usage: "dump error budget report", 118 | }, 119 | &cli.IntFlag{ 120 | Name: "backfill", 121 | Usage: "generate report before n point", 122 | }, 123 | }, 124 | }, 125 | }, 126 | } 127 | sort.Sort(cli.FlagsByName(cliApp.Flags)) 128 | sort.Sort(cli.CommandsByName(cliApp.Commands)) 129 | cliApp.Version = Version 130 | cliApp.EnableBashCompletion = true 131 | cliApp.Before = func(c *cli.Context) error { 132 | minLevel := "info" 133 | if c.Bool("debug") { 134 | minLevel = "debug" 135 | } 136 | logger.Setup(os.Stderr, minLevel) 137 | return nil 138 | } 139 | 140 | if isLambda() { 141 | if len(os.Args) <= 1 { 142 | os.Args = append(os.Args, "run") 143 | } 144 | } 145 | if err := cliApp.RunContext(ctx, os.Args); err != nil { 146 | log.Fatalf("[error] %s", err) 147 | } 148 | } 149 | 150 | func isLambda() bool { 151 | return strings.HasPrefix(os.Getenv("AWS_EXECUTION_ENV"), "AWS_Lambda") || 152 | os.Getenv("AWS_LAMBDA_RUNTIME_API") != "" 153 | } 154 | 155 | func buildApp(c *cli.Context) (*shimesaba.App, error) { 156 | if ssmwrapPathsErr != nil { 157 | return nil, fmt.Errorf("ssmwrap.Export SSMWRAP_PATHS failed: %w", ssmwrapPathsErr) 158 | } 159 | if ssmwrapNamesErr != nil { 160 | return nil, fmt.Errorf("ssmwrap.Export SSMWRAP_NAMES failed: %w", ssmwrapNamesErr) 161 | } 162 | cfg := shimesaba.NewDefaultConfig() 163 | if err := cfg.Load(c.StringSlice("config")...); err != nil { 164 | return nil, err 165 | } 166 | if err := cfg.ValidateVersion(Version); err != nil { 167 | return nil, err 168 | } 169 | return shimesaba.New(c.String("mackerel-apikey"), cfg) 170 | } 171 | 172 | func run(c *cli.Context) error { 173 | app, err := buildApp(c) 174 | if err != nil { 175 | return err 176 | } 177 | backfill := globalBackfill 178 | if c.Int("backfill") > 0 { 179 | backfill = c.Int("backfill") 180 | } 181 | optFns := []func(*shimesaba.Options){ 182 | shimesaba.DryRunOption(c.Bool("dry-run") || globalDryRun), 183 | shimesaba.DumpReportsOption(c.Bool("dump-reports") || globalDumpReports), 184 | shimesaba.BackfillOption(backfill), 185 | } 186 | handler := func(ctx context.Context) error { 187 | return app.Run(ctx, optFns...) 188 | } 189 | if isLambda() { 190 | lambda.Start(handler) 191 | return nil 192 | } 193 | return handler(c.Context) 194 | } 195 | -------------------------------------------------------------------------------- /config.go: -------------------------------------------------------------------------------- 1 | package shimesaba 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "log" 7 | "path/filepath" 8 | "strconv" 9 | "strings" 10 | "time" 11 | 12 | gv "github.com/hashicorp/go-version" 13 | gc "github.com/kayac/go-config" 14 | "github.com/mashiike/shimesaba/internal/timeutils" 15 | ) 16 | 17 | // Config for App 18 | type Config struct { 19 | RequiredVersion string `yaml:"required_version" json:"required_version"` 20 | 21 | SLOConfig `yaml:"-,inline" json:"-,inline"` 22 | SLO []*SLOConfig `yaml:"slo" json:"slo"` 23 | 24 | configFilePath string 25 | versionConstraints gv.Constraints 26 | } 27 | 28 | // SLOConfig is a setting related to SLI/SLO 29 | type SLOConfig struct { 30 | ID string `json:"id" yaml:"id"` 31 | RollingPeriod string `yaml:"rolling_period" json:"rolling_period"` 32 | Destination *DestinationConfig `yaml:"destination" json:"destination"` 33 | ErrorBudgetSize interface{} `yaml:"error_budget_size" json:"error_budget_size"` 34 | AlertBasedSLI []*AlertBasedSLIConfig `json:"alert_based_sli" yaml:"alert_based_sli"` 35 | CalculateInterval string `yaml:"calculate_interval" json:"calculate_interval"` 36 | 37 | rollingPeriod time.Duration 38 | errorBudgetSizePercentage float64 39 | calculateInterval time.Duration 40 | } 41 | 42 | // DestinationConfig is a configuration for submitting service metrics to Mackerel 43 | type DestinationConfig struct { 44 | ServiceName string `json:"service_name" yaml:"service_name"` 45 | MetricPrefix string `json:"metric_prefix" yaml:"metric_prefix"` 46 | MetricSuffix string `json:"metric_suffix" yaml:"metric_suffix"` 47 | Metrics map[string]*DestinationMetricConfig `json:"metrics" yaml:"metrics"` 48 | } 49 | 50 | type DestinationMetricConfig struct { 51 | MetricTypeName string `json:"metric_type_name,omitempty" yaml:"metric_type_name,omitempty"` 52 | Enabled *bool `json:"enabled,omitempty" yaml:"enabled,omitempty"` 53 | } 54 | 55 | type AlertBasedSLIConfig struct { 56 | MonitorID string `json:"monitor_id,omitempty" yaml:"monitor_id,omitempty"` 57 | MonitorName string `json:"monitor_name,omitempty" yaml:"monitor_name,omitempty"` 58 | MonitorNamePrefix string `json:"monitor_name_prefix,omitempty" yaml:"monitor_name_prefix,omitempty"` 59 | MonitorNameSuffix string `json:"monitor_name_suffix,omitempty" yaml:"monitor_name_suffix,omitempty"` 60 | MonitorType string `json:"monitor_type,omitempty" yaml:"monitor_type,omitempty"` 61 | TryReassessment bool `json:"try_reassessment,omitempty" yaml:"try_reassessment,omitempty"` 62 | } 63 | 64 | const ( 65 | defaultMetricPrefix = "shimesaba" 66 | ) 67 | 68 | // NewDefaultConfig creates a default configuration. 69 | func NewDefaultConfig() *Config { 70 | return &Config{ 71 | SLOConfig: SLOConfig{ 72 | RollingPeriod: "28d", 73 | Destination: &DestinationConfig{ 74 | MetricPrefix: defaultMetricPrefix, 75 | }, 76 | CalculateInterval: "1h", 77 | }, 78 | } 79 | } 80 | 81 | // Load loads configuration file from file paths. 82 | func (c *Config) Load(paths ...string) error { 83 | if len(paths) == 0 { 84 | return errors.New("no config") 85 | } 86 | if err := gc.LoadWithEnv(c, paths...); err != nil { 87 | return err 88 | } 89 | c.configFilePath = filepath.Dir(paths[len(paths)-1]) 90 | return c.Restrict() 91 | } 92 | 93 | // Restrict restricts a configuration. 94 | func (c *Config) Restrict() error { 95 | if c.RequiredVersion != "" { 96 | constraints, err := gv.NewConstraint(c.RequiredVersion) 97 | if err != nil { 98 | return fmt.Errorf("required_version has invalid format: %w", err) 99 | } 100 | c.versionConstraints = constraints 101 | } 102 | if len(c.SLO) == 0 { 103 | return errors.New("slo definition not found") 104 | } 105 | 106 | sloIDs := make(map[string]struct{}, len(c.SLO)) 107 | 108 | for i, cfg := range c.SLO { 109 | mergedCfg := c.SLOConfig.Merge(cfg) 110 | if _, ok := sloIDs[mergedCfg.ID]; ok { 111 | return fmt.Errorf("slo id=%s is duplicated", mergedCfg.ID) 112 | } 113 | c.SLO[i] = mergedCfg 114 | if err := mergedCfg.Restrict(); err != nil { 115 | return fmt.Errorf("slo[%s] is invalid: %w", mergedCfg.ID, err) 116 | } 117 | } 118 | 119 | return nil 120 | } 121 | 122 | // Restrict restricts a definition configuration. 123 | func (c *SLOConfig) Restrict() error { 124 | if c.ID == "" { 125 | return errors.New("id is required") 126 | } 127 | 128 | if c.RollingPeriod == "" { 129 | return errors.New("rolling_period is required") 130 | } 131 | var err error 132 | c.rollingPeriod, err = timeutils.ParseDuration(c.RollingPeriod) 133 | if err != nil { 134 | return fmt.Errorf("rolling_period is invalid format: %w", err) 135 | } 136 | if c.rollingPeriod < time.Minute { 137 | return fmt.Errorf("rolling_period must over or equal 1m") 138 | } 139 | 140 | if c.Destination == nil { 141 | return errors.New("destination is not configured") 142 | } 143 | if err := c.Destination.Restrict(c.ID); err != nil { 144 | return fmt.Errorf("destination %w", err) 145 | } 146 | 147 | if errorBudgetSizePercentage, ok := c.ErrorBudgetSize.(float64); ok { 148 | log.Printf("[warn] make sure to set it in m with units. example %f%%", errorBudgetSizePercentage*100.0) 149 | c.errorBudgetSizePercentage = errorBudgetSizePercentage 150 | } 151 | if errorBudgetSizeString, ok := c.ErrorBudgetSize.(string); ok { 152 | if strings.ContainsRune(errorBudgetSizeString, '%') { 153 | value, err := strconv.ParseFloat(strings.TrimRight(errorBudgetSizeString, `%`), 64) 154 | if err != nil { 155 | return fmt.Errorf("error_budget can not parse as percentage: %w", err) 156 | } 157 | c.errorBudgetSizePercentage = value / 100.0 158 | } else { 159 | errorBudgetSizeDuration, err := timeutils.ParseDuration(errorBudgetSizeString) 160 | if err != nil { 161 | return fmt.Errorf("error_budget can not parse as duration: %w", err) 162 | } 163 | if errorBudgetSizeDuration >= c.rollingPeriod || errorBudgetSizeDuration == 0 { 164 | return fmt.Errorf("error_budget must between %s and 0m", c.rollingPeriod) 165 | } 166 | c.errorBudgetSizePercentage = float64(errorBudgetSizeDuration) / float64(c.rollingPeriod) 167 | } 168 | } 169 | if c.errorBudgetSizePercentage >= 1.0 || c.errorBudgetSizePercentage <= 0.0 { 170 | return errors.New("error_budget must between 1.0 and 0.0") 171 | } 172 | 173 | for i, alertBasedSLI := range c.AlertBasedSLI { 174 | if err := alertBasedSLI.Restrict(); err != nil { 175 | return fmt.Errorf("alert_based_sli[%d] %w", i, err) 176 | } 177 | } 178 | 179 | if c.CalculateInterval == "" { 180 | return errors.New("calculate_interval is required") 181 | } 182 | c.calculateInterval, err = timeutils.ParseDuration(c.CalculateInterval) 183 | if err != nil { 184 | return fmt.Errorf("calculate_interval is invalid format: %w", err) 185 | } 186 | if c.calculateInterval < time.Minute { 187 | return fmt.Errorf("calculate_interval must over or equal 1m") 188 | } 189 | if c.calculateInterval >= 24*time.Hour { 190 | log.Printf("[warn] We do not recommend calculate_interval=`%s` setting. because can not post service metrics older than 24 hours to Mackerel.\n", c.CalculateInterval) 191 | } 192 | 193 | return nil 194 | } 195 | 196 | // Restrict restricts a definition configuration. 197 | func (c *DestinationConfig) Restrict(sloID string) error { 198 | if c.ServiceName == "" { 199 | return errors.New("service_name is required") 200 | } 201 | if c.MetricPrefix == "" { 202 | log.Printf("[debug] metric_prefix is empty, fallback %s", defaultMetricPrefix) 203 | c.MetricPrefix = defaultMetricPrefix 204 | } 205 | if c.MetricSuffix == "" { 206 | log.Printf("[debug] metric_suffix is empty, fallback %s", sloID) 207 | c.MetricSuffix = sloID 208 | } 209 | if c.Metrics == nil { 210 | c.Metrics = make(map[string]*DestinationMetricConfig) 211 | } 212 | keys := DestinationMetricTypeValues() 213 | for _, key := range keys { 214 | metricCfg, ok := c.Metrics[key.ID()] 215 | if !ok { 216 | metricCfg = &DestinationMetricConfig{} 217 | } 218 | if err := metricCfg.Restrict(key); err != nil { 219 | return fmt.Errorf("metrics `%s`: %w", key.ID(), err) 220 | } 221 | c.Metrics[key.ID()] = metricCfg 222 | } 223 | 224 | return nil 225 | } 226 | 227 | // Restrict restricts a definition configuration. 228 | func (c *DestinationMetricConfig) Restrict(t DestinationMetricType) error { 229 | if c.MetricTypeName == "" { 230 | c.MetricTypeName = t.DefaultTypeName() 231 | } 232 | if c.Enabled == nil { 233 | enabled := t.DefaultEnabled() 234 | c.Enabled = &enabled 235 | } 236 | return nil 237 | } 238 | 239 | // Restrict restricts a configuration. 240 | func (c *AlertBasedSLIConfig) Restrict() error { 241 | if c.MonitorID != "" { 242 | return nil 243 | } 244 | if c.MonitorName != "" { 245 | return nil 246 | } 247 | if c.MonitorNamePrefix != "" { 248 | return nil 249 | } 250 | if c.MonitorNameSuffix != "" { 251 | return nil 252 | } 253 | if c.MonitorType != "" { 254 | return nil 255 | } 256 | 257 | return errors.New("either monitor_id, monitor_name, monitor_name_prefix, monitor_name_suffix or monitor_type is required") 258 | } 259 | 260 | // Merge merges SLOConfig together 261 | func (c *SLOConfig) Merge(o *SLOConfig) *SLOConfig { 262 | ret := &SLOConfig{ 263 | ID: coalesceString(o.ID, c.ID), 264 | RollingPeriod: coalesceString(o.RollingPeriod, c.RollingPeriod), 265 | Destination: c.Destination.Merge(o.Destination), 266 | ErrorBudgetSize: c.ErrorBudgetSize, 267 | CalculateInterval: coalesceString(o.CalculateInterval, c.CalculateInterval), 268 | } 269 | if o.ErrorBudgetSize != nil { 270 | ret.ErrorBudgetSize = o.ErrorBudgetSize 271 | } 272 | ret.AlertBasedSLI = append(ret.AlertBasedSLI, c.AlertBasedSLI...) 273 | ret.AlertBasedSLI = append(ret.AlertBasedSLI, o.AlertBasedSLI...) 274 | 275 | return ret 276 | } 277 | 278 | // Merge merges DestinationConfig together 279 | func (c *DestinationConfig) Merge(o *DestinationConfig) *DestinationConfig { 280 | if o == nil { 281 | o = &DestinationConfig{} 282 | } 283 | ret := &DestinationConfig{ 284 | ServiceName: coalesceString(o.ServiceName, c.ServiceName), 285 | MetricPrefix: coalesceString(o.MetricPrefix, c.MetricPrefix), 286 | MetricSuffix: coalesceString(o.MetricSuffix, c.MetricSuffix), 287 | } 288 | keys := DestinationMetricTypeValues() 289 | metrics := make(map[string]*DestinationMetricConfig, len(keys)) 290 | base := c.Metrics 291 | if base == nil { 292 | base = make(map[string]*DestinationMetricConfig) 293 | } 294 | if o.Metrics != nil { 295 | for _, key := range keys { 296 | metricCfg, ok := base[key.ID()] 297 | if !ok { 298 | metricCfg = &DestinationMetricConfig{} 299 | } 300 | metrics[key.ID()] = metricCfg.Merge(o.Metrics[key.ID()]) 301 | } 302 | } else { 303 | metrics = base 304 | } 305 | ret.Metrics = metrics 306 | return ret 307 | } 308 | 309 | // Merge merges DestinationMetricConfig together 310 | func (c *DestinationMetricConfig) Merge(o *DestinationMetricConfig) *DestinationMetricConfig { 311 | if o == nil { 312 | o = &DestinationMetricConfig{} 313 | } 314 | ret := &DestinationMetricConfig{ 315 | MetricTypeName: coalesceString(o.MetricTypeName, c.MetricTypeName), 316 | Enabled: coalesce(o.Enabled, c.Enabled), 317 | } 318 | return ret 319 | } 320 | 321 | // ValidateVersion validates a version satisfies required_version. 322 | func (c *Config) ValidateVersion(version string) error { 323 | if c.versionConstraints == nil { 324 | log.Println("[warn] required_version is empty. Skip checking required_version.") 325 | return nil 326 | } 327 | versionParts := strings.SplitN(version, "-", 2) 328 | v, err := gv.NewVersion(versionParts[0]) 329 | if err != nil { 330 | log.Printf("[warn]: Invalid version format \"%s\". Skip checking required_version.", version) 331 | // invalid version string (e.g. "current") always allowed 332 | return nil 333 | } 334 | if !c.versionConstraints.Check(v) { 335 | return fmt.Errorf("version %s does not satisfy constraints required_version: %s", version, c.versionConstraints) 336 | } 337 | return nil 338 | } 339 | 340 | // DurationRollingPeriod converts RollingPeriod as time.Duration 341 | func (c *SLOConfig) DurationRollingPeriod() time.Duration { 342 | return c.rollingPeriod 343 | } 344 | 345 | // DurationCalculate converts CalculateInterval as time.Duration 346 | func (c *SLOConfig) DurationCalculate() time.Duration { 347 | return c.calculateInterval 348 | } 349 | 350 | func (c *SLOConfig) ErrorBudgetSizePercentage() float64 { 351 | return c.errorBudgetSizePercentage 352 | } 353 | 354 | func coalesceString(strs ...string) string { 355 | for _, str := range strs { 356 | if str != "" { 357 | return str 358 | } 359 | } 360 | return "" 361 | } 362 | 363 | func coalesce[T any](elements ...*T) *T { 364 | for _, element := range elements { 365 | if element != nil { 366 | ret := *element 367 | return &ret 368 | } 369 | } 370 | return nil 371 | } 372 | -------------------------------------------------------------------------------- /config_test.go: -------------------------------------------------------------------------------- 1 | package shimesaba_test 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "os" 7 | "testing" 8 | 9 | "github.com/mashiike/shimesaba" 10 | "github.com/mashiike/shimesaba/internal/logger" 11 | "github.com/stretchr/testify/require" 12 | ) 13 | 14 | func TestConfigLoadNoError(t *testing.T) { 15 | os.Setenv("TARGET_ALB_NAME", "dummy-alb") 16 | os.Setenv("POST_METRIC_SERVICE", "dummy-service") 17 | cases := []struct { 18 | casename string 19 | paths []string 20 | }{ 21 | { 22 | casename: "v1.0.0 over simple config", 23 | paths: []string{"testdata/v1.0.0_simple.yaml"}, 24 | }, 25 | { 26 | casename: "v1.0.0 over check destination", 27 | paths: []string{"testdata/v1.0.0_destination.yaml"}, 28 | }, 29 | } 30 | 31 | for _, c := range cases { 32 | t.Run(c.casename, func(t *testing.T) { 33 | var buf bytes.Buffer 34 | logger.Setup(&buf, "debug") 35 | defer func() { 36 | t.Log(buf.String()) 37 | logger.Setup(os.Stderr, "info") 38 | }() 39 | cfg := shimesaba.NewDefaultConfig() 40 | err := cfg.Load(c.paths...) 41 | require.NoError(t, err) 42 | err = cfg.Restrict() 43 | require.NoError(t, err) 44 | }) 45 | } 46 | } 47 | 48 | func TestSLOConfigErrorBudgetSize(t *testing.T) { 49 | cases := []struct { 50 | cfg *shimesaba.SLOConfig 51 | exceptedErr bool 52 | expected float64 53 | }{ 54 | { 55 | cfg: &shimesaba.SLOConfig{ 56 | ID: "test", 57 | RollingPeriod: "28d", 58 | Destination: &shimesaba.DestinationConfig{ 59 | ServiceName: "shimesaba", 60 | }, 61 | CalculateInterval: "1h", 62 | ErrorBudgetSize: 0.001, 63 | }, 64 | expected: 0.001, 65 | }, 66 | { 67 | cfg: &shimesaba.SLOConfig{ 68 | ID: "test", 69 | RollingPeriod: "28d", 70 | Destination: &shimesaba.DestinationConfig{ 71 | ServiceName: "shimesaba", 72 | }, 73 | CalculateInterval: "1d", 74 | ErrorBudgetSize: "40m", 75 | }, 76 | expected: 0.001, 77 | }, 78 | { 79 | cfg: &shimesaba.SLOConfig{ 80 | ID: "test", 81 | RollingPeriod: "28d", 82 | Destination: &shimesaba.DestinationConfig{ 83 | ServiceName: "shimesaba", 84 | }, 85 | CalculateInterval: "1d", 86 | ErrorBudgetSize: "0.1%", 87 | }, 88 | expected: 0.001, 89 | }, 90 | { 91 | cfg: &shimesaba.SLOConfig{ 92 | ID: "test", 93 | RollingPeriod: "28d", 94 | Destination: &shimesaba.DestinationConfig{ 95 | ServiceName: "shimesaba", 96 | }, 97 | CalculateInterval: "1d", 98 | ErrorBudgetSize: "5m0.001%", 99 | }, 100 | exceptedErr: true, 101 | }, 102 | { 103 | cfg: &shimesaba.SLOConfig{ 104 | ID: "test", 105 | RollingPeriod: "28d", 106 | Destination: &shimesaba.DestinationConfig{ 107 | ServiceName: "shimesaba", 108 | }, 109 | CalculateInterval: "1d", 110 | ErrorBudgetSize: "0.01", 111 | }, 112 | exceptedErr: true, 113 | }, 114 | } 115 | 116 | for i, c := range cases { 117 | t.Run(fmt.Sprintf("case.%d", i), func(t *testing.T) { 118 | err := c.cfg.Restrict() 119 | if !c.exceptedErr { 120 | require.NoError(t, err) 121 | require.InEpsilon(t, c.expected, c.cfg.ErrorBudgetSizePercentage(), 0.01) 122 | } else { 123 | require.Error(t, err) 124 | } 125 | }) 126 | } 127 | } 128 | 129 | func TestSLOConfigMetricPrefixSuffix(t *testing.T) { 130 | cases := []struct { 131 | cfg *shimesaba.SLOConfig 132 | expectedPrefix string 133 | expectedSuffix string 134 | }{ 135 | { 136 | cfg: &shimesaba.SLOConfig{ 137 | ID: "test", 138 | RollingPeriod: "28d", 139 | Destination: &shimesaba.DestinationConfig{ 140 | ServiceName: "shimesaba", 141 | }, 142 | CalculateInterval: "1h", 143 | ErrorBudgetSize: 0.001, 144 | }, 145 | expectedPrefix: "shimesaba", 146 | expectedSuffix: "test", 147 | }, 148 | { 149 | cfg: &shimesaba.SLOConfig{ 150 | ID: "test", 151 | RollingPeriod: "28d", 152 | Destination: &shimesaba.DestinationConfig{ 153 | ServiceName: "shimesaba", 154 | MetricPrefix: "hoge", 155 | MetricSuffix: "fuga", 156 | }, 157 | CalculateInterval: "1h", 158 | ErrorBudgetSize: 0.001, 159 | }, 160 | expectedPrefix: "hoge", 161 | expectedSuffix: "fuga", 162 | }, 163 | } 164 | 165 | for i, c := range cases { 166 | t.Run(fmt.Sprintf("case.%d", i), func(t *testing.T) { 167 | err := c.cfg.Restrict() 168 | require.NoError(t, err) 169 | require.Equal(t, c.expectedPrefix, c.cfg.Destination.MetricPrefix) 170 | }) 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /definition.go: -------------------------------------------------------------------------------- 1 | package shimesaba 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "sort" 8 | "time" 9 | ) 10 | 11 | // Definition is SLO Definition 12 | type Definition struct { 13 | id string 14 | destination *Destination 15 | rollingPeriod time.Duration 16 | calculate time.Duration 17 | errorBudgetSize float64 18 | 19 | alertBasedSLIs []*AlertBasedSLI 20 | } 21 | 22 | // NewDefinition creates Definition from SLOConfig 23 | func NewDefinition(cfg *SLOConfig) (*Definition, error) { 24 | AlertBasedSLIs := make([]*AlertBasedSLI, 0, len(cfg.AlertBasedSLI)) 25 | for _, cfg := range cfg.AlertBasedSLI { 26 | AlertBasedSLIs = append(AlertBasedSLIs, NewAlertBasedSLI(cfg)) 27 | } 28 | return &Definition{ 29 | id: cfg.ID, 30 | destination: NewDestination(cfg.Destination), 31 | rollingPeriod: cfg.DurationRollingPeriod(), 32 | calculate: cfg.DurationCalculate(), 33 | errorBudgetSize: cfg.ErrorBudgetSizePercentage(), 34 | alertBasedSLIs: AlertBasedSLIs, 35 | }, nil 36 | } 37 | 38 | // ID returns SLOConfig.id 39 | func (d *Definition) ID() string { 40 | return d.id 41 | } 42 | 43 | type DataProvider interface { 44 | FetchAlerts(ctx context.Context, startAt time.Time, endAt time.Time) (Alerts, error) 45 | FetchVirtualAlerts(ctx context.Context, serviceName string, sloID string, startAt time.Time, endAt time.Time) (Alerts, error) 46 | } 47 | 48 | // CreateReports returns Report with Metrics 49 | func (d *Definition) CreateReports(ctx context.Context, provider DataProvider, now time.Time, backfill int) ([]*Report, error) { 50 | startAt := d.StartAt(now, backfill) 51 | alerts, err := provider.FetchAlerts(ctx, startAt, now) 52 | if err != nil { 53 | return nil, fmt.Errorf("failed to fetch alerts: %w", err) 54 | } 55 | log.Printf("[debug] get %d alerts", len(alerts)) 56 | valerts, err := provider.FetchVirtualAlerts(ctx, d.destination.ServiceName, d.id, startAt, now) 57 | if err != nil { 58 | return nil, fmt.Errorf("failed to fetch virtual alerts: %w", err) 59 | } 60 | log.Printf("[debug] get %d virtual alerts", len(valerts)) 61 | alerts = append(alerts, valerts...) 62 | reports, err := d.CreateReportsWithAlertsAndPeriod(ctx, alerts, startAt, now) 63 | if err != nil { 64 | return nil, fmt.Errorf("failed to create reports: %w", err) 65 | } 66 | return reports, nil 67 | } 68 | 69 | func (d *Definition) CreateReportsWithAlertsAndPeriod(ctx context.Context, alerts Alerts, startAt, endAt time.Time) ([]*Report, error) { 70 | log.Printf("[debug] original report range = %s ~ %s", startAt, endAt) 71 | startAt = startAt.Truncate(d.calculate) 72 | endAt = endAt.Add(+time.Nanosecond).Truncate(d.calculate).Add(-time.Nanosecond) 73 | log.Printf("[debug] truncate report range = %s ~ %s", startAt, endAt) 74 | log.Printf("[debug] timeFrame = %s, calculateInterval = %s", d.rollingPeriod, d.calculate) 75 | var Reliabilities Reliabilities 76 | log.Printf("[debug] alert based SLI count = %d", len(d.alertBasedSLIs)) 77 | for i, o := range d.alertBasedSLIs { 78 | rc, err := o.EvaluateReliabilities(d.calculate, alerts, startAt, endAt) 79 | if err != nil { 80 | return nil, fmt.Errorf("failed to evaluate reliabilities for alert_based_sli[%d]: %w", i, err) 81 | } 82 | Reliabilities, err = Reliabilities.Merge(rc) 83 | if err != nil { 84 | return nil, fmt.Errorf("failed to merge reliabilities for alert_based_sli[%d]: %w", i, err) 85 | } 86 | } 87 | for _, r := range Reliabilities { 88 | log.Printf("[debug] reliability[%s~%s] = (%s, %s)", r.TimeFrameStartAt(), r.TimeFrameEndAt(), r.UpTime(), r.FailureTime()) 89 | } 90 | reports := NewReports(d.id, d.destination, d.errorBudgetSize, d.rollingPeriod, Reliabilities) 91 | sort.Slice(reports, func(i, j int) bool { 92 | return reports[i].DataPoint.Before(reports[j].DataPoint) 93 | }) 94 | log.Printf("[debug] created %d reports", len(reports)) 95 | return reports, nil 96 | } 97 | 98 | func (d *Definition) AlertBasedSLIs(monitors []*Monitor) []*Monitor { 99 | matched := make(map[string]*Monitor) 100 | for _, m := range monitors { 101 | for _, obj := range d.alertBasedSLIs { 102 | if obj.MatchMonitor(m) { 103 | matched[m.ID()] = m 104 | } 105 | } 106 | } 107 | objectiveMonitors := make([]*Monitor, 0, len(matched)) 108 | for _, monitor := range matched { 109 | objectiveMonitors = append(objectiveMonitors, monitor) 110 | } 111 | return objectiveMonitors 112 | } 113 | 114 | func (d *Definition) StartAt(now time.Time, backfill int) time.Time { 115 | return now.Truncate(d.calculate).Add(-(time.Duration(backfill) * d.calculate) - d.rollingPeriod) 116 | } 117 | -------------------------------------------------------------------------------- /definition_test.go: -------------------------------------------------------------------------------- 1 | package shimesaba_test 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "encoding/json" 7 | "fmt" 8 | "os" 9 | "testing" 10 | "time" 11 | 12 | "github.com/Songmu/flextime" 13 | "github.com/mashiike/shimesaba" 14 | "github.com/mashiike/shimesaba/internal/logger" 15 | "github.com/stretchr/testify/require" 16 | ) 17 | 18 | func TestDefinition(t *testing.T) { 19 | restore := flextime.Fix(time.Date(2021, 10, 01, 0, 22, 0, 0, time.UTC)) 20 | defer restore() 21 | alerts := shimesaba.Alerts{ 22 | shimesaba.NewAlert( 23 | shimesaba.NewMonitor( 24 | "hogera", 25 | "hogera.example.com", 26 | "external", 27 | ), 28 | time.Date(2021, 10, 1, 0, 3, 0, 0, time.UTC), 29 | ptrTime(time.Date(2021, 10, 1, 0, 9, 0, 0, time.UTC)), 30 | ), 31 | shimesaba.NewAlert( 32 | shimesaba.NewMonitor( 33 | "hogera", 34 | "hogera.example.com", 35 | "external", 36 | ), 37 | time.Date(2021, 10, 1, 0, 15, 0, 0, time.UTC), 38 | nil, 39 | ), 40 | } 41 | cases := []struct { 42 | defCfg *shimesaba.SLOConfig 43 | expected []*shimesaba.Report 44 | }{ 45 | { 46 | defCfg: &shimesaba.SLOConfig{ 47 | ID: "alert_and_metric_mixing", 48 | Destination: &shimesaba.DestinationConfig{ 49 | ServiceName: "test", 50 | }, 51 | RollingPeriod: "10m", 52 | CalculateInterval: "5m", 53 | ErrorBudgetSize: 0.3, 54 | AlertBasedSLI: []*shimesaba.AlertBasedSLIConfig{ 55 | { 56 | MonitorID: "hogera", 57 | }, 58 | }, 59 | }, 60 | expected: []*shimesaba.Report{ 61 | { 62 | DefinitionID: "alert_and_metric_mixing", 63 | Destination: &shimesaba.Destination{ 64 | ServiceName: "test", 65 | MetricPrefix: "shimesaba", 66 | MetricSuffix: "alert_and_metric_mixing", 67 | }, 68 | DataPoint: time.Date(2021, 10, 01, 0, 10, 0, 0, time.UTC), 69 | TimeFrameStartAt: time.Date(2021, 10, 01, 0, 0, 0, 0, time.UTC), 70 | TimeFrameEndAt: time.Date(2021, 10, 01, 0, 9, 59, 999999999, time.UTC), 71 | UpTime: 4 * time.Minute, 72 | FailureTime: 6 * time.Minute, 73 | ErrorBudgetSize: 3 * time.Minute, 74 | ErrorBudget: -3 * time.Minute, 75 | ErrorBudgetConsumption: 4 * time.Minute, 76 | }, 77 | { 78 | DefinitionID: "alert_and_metric_mixing", 79 | Destination: &shimesaba.Destination{ 80 | ServiceName: "test", 81 | MetricPrefix: "shimesaba", 82 | MetricSuffix: "alert_and_metric_mixing", 83 | }, 84 | DataPoint: time.Date(2021, 10, 01, 0, 15, 0, 0, time.UTC), 85 | TimeFrameStartAt: time.Date(2021, 10, 01, 0, 5, 0, 0, time.UTC), 86 | TimeFrameEndAt: time.Date(2021, 10, 01, 0, 14, 59, 999999999, time.UTC), 87 | UpTime: 6 * time.Minute, 88 | FailureTime: 4 * time.Minute, 89 | ErrorBudgetSize: 3 * time.Minute, 90 | ErrorBudget: -1 * time.Minute, 91 | ErrorBudgetConsumption: 0 * time.Minute, 92 | }, 93 | { 94 | DefinitionID: "alert_and_metric_mixing", 95 | Destination: &shimesaba.Destination{ 96 | ServiceName: "test", 97 | MetricPrefix: "shimesaba", 98 | MetricSuffix: "alert_and_metric_mixing", 99 | }, 100 | DataPoint: time.Date(2021, 10, 01, 0, 20, 0, 0, time.UTC), 101 | TimeFrameStartAt: time.Date(2021, 10, 01, 0, 10, 0, 0, time.UTC), 102 | TimeFrameEndAt: time.Date(2021, 10, 01, 0, 19, 59, 999999999, time.UTC), 103 | UpTime: 5 * time.Minute, 104 | FailureTime: 5 * time.Minute, 105 | ErrorBudgetSize: 3 * time.Minute, 106 | ErrorBudget: -2 * time.Minute, 107 | ErrorBudgetConsumption: 5 * time.Minute, 108 | }, 109 | }, 110 | }, 111 | } 112 | for _, c := range cases { 113 | t.Run(c.defCfg.ID, func(t *testing.T) { 114 | var buf bytes.Buffer 115 | logger.Setup(&buf, "debug") 116 | defer func() { 117 | t.Log(buf.String()) 118 | logger.Setup(os.Stderr, "info") 119 | }() 120 | err := c.defCfg.Restrict() 121 | require.NoError(t, err) 122 | def, err := shimesaba.NewDefinition(c.defCfg) 123 | require.NoError(t, err) 124 | actual, err := def.CreateReportsWithAlertsAndPeriod(context.Background(), alerts, 125 | time.Date(2021, 10, 01, 0, 0, 0, 0, time.UTC), 126 | time.Date(2021, 10, 01, 0, 20, 0, 0, time.UTC), 127 | ) 128 | require.NoError(t, err) 129 | t.Log("actual:") 130 | for _, a := range actual { 131 | bs, _ := json.MarshalIndent(a, "", " ") 132 | t.Log(string(bs)) 133 | } 134 | t.Log("expected:") 135 | for _, e := range c.expected { 136 | bs, _ := json.MarshalIndent(e, "", " ") 137 | t.Log(string(bs)) 138 | if e.Destination.MetricTypeNames == nil { 139 | e.Destination.MetricTypeNames = make(map[shimesaba.DestinationMetricType]string) 140 | for _, metricType := range shimesaba.DestinationMetricTypeValues() { 141 | e.Destination.MetricTypeNames[metricType] = metricType.String() 142 | } 143 | } 144 | if e.Destination.MetricTypeEnabled == nil { 145 | e.Destination.MetricTypeEnabled = make(map[shimesaba.DestinationMetricType]bool) 146 | for _, metricType := range shimesaba.DestinationMetricTypeValues() { 147 | e.Destination.MetricTypeEnabled[metricType] = metricType.DefaultEnabled() 148 | } 149 | } 150 | 151 | } 152 | require.EqualValues(t, c.expected, actual) 153 | }) 154 | } 155 | 156 | } 157 | 158 | func TestSLODefinitionStartAt(t *testing.T) { 159 | cases := []struct { 160 | now time.Time 161 | backfill int 162 | cfg *shimesaba.SLOConfig 163 | expected time.Time 164 | }{ 165 | { 166 | now: time.Date(2022, 1, 14, 3, 13, 23, 999, time.UTC), 167 | backfill: 3, 168 | cfg: &shimesaba.SLOConfig{ 169 | ID: "test", 170 | RollingPeriod: "1d", 171 | Destination: &shimesaba.DestinationConfig{ 172 | ServiceName: "shimesaba", 173 | }, 174 | CalculateInterval: "1h", 175 | ErrorBudgetSize: 0.05, 176 | }, 177 | expected: time.Date(2022, 1, 13, 0, 0, 0, 0, time.UTC), 178 | }, 179 | { 180 | now: time.Date(2022, 1, 14, 3, 13, 23, 999, time.UTC), 181 | backfill: 3, 182 | cfg: &shimesaba.SLOConfig{ 183 | ID: "test", 184 | RollingPeriod: "365d", 185 | Destination: &shimesaba.DestinationConfig{ 186 | ServiceName: "shimesaba", 187 | }, 188 | CalculateInterval: "1d", 189 | ErrorBudgetSize: 0.05, 190 | }, 191 | expected: time.Date(2021, 1, 11, 0, 0, 0, 0, time.UTC), 192 | }, 193 | } 194 | 195 | for i, c := range cases { 196 | t.Run(fmt.Sprintf("case.%d", i), func(t *testing.T) { 197 | require.NoError(t, c.cfg.Restrict()) 198 | d, err := shimesaba.NewDefinition(c.cfg) 199 | require.NoError(t, err) 200 | actual := d.StartAt(c.now, c.backfill) 201 | require.EqualValues(t, c.expected, actual) 202 | }) 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /destination.go: -------------------------------------------------------------------------------- 1 | package shimesaba 2 | 3 | import "fmt" 4 | 5 | type Destination struct { 6 | ServiceName string 7 | MetricPrefix string 8 | MetricSuffix string 9 | MetricTypeNames map[DestinationMetricType]string 10 | MetricTypeEnabled map[DestinationMetricType]bool 11 | } 12 | 13 | func NewDestination(cfg *DestinationConfig) *Destination { 14 | ret := &Destination{ 15 | ServiceName: cfg.ServiceName, 16 | MetricPrefix: cfg.MetricPrefix, 17 | MetricSuffix: cfg.MetricSuffix, 18 | MetricTypeNames: make(map[DestinationMetricType]string), 19 | MetricTypeEnabled: make(map[DestinationMetricType]bool), 20 | } 21 | if cfg.Metrics == nil { 22 | return ret 23 | } 24 | for _, metricType := range DestinationMetricTypeValues() { 25 | if metricCfg, ok := cfg.Metrics[metricType.ID()]; ok { 26 | ret.MetricTypeNames[metricType] = metricCfg.MetricTypeName 27 | if metricCfg.Enabled == nil { 28 | ret.MetricTypeEnabled[metricType] = metricType.DefaultEnabled() 29 | } else { 30 | ret.MetricTypeEnabled[metricType] = *metricCfg.Enabled 31 | } 32 | } 33 | } 34 | return ret 35 | } 36 | 37 | func (d *Destination) MetricName(metricType DestinationMetricType) string { 38 | if d.MetricTypeNames == nil { 39 | return fmt.Sprintf("%s.%s.%s", d.MetricPrefix, metricType.DefaultTypeName(), d.MetricSuffix) 40 | } 41 | if name, ok := d.MetricTypeNames[metricType]; ok { 42 | return fmt.Sprintf("%s.%s.%s", d.MetricPrefix, name, d.MetricSuffix) 43 | } 44 | return fmt.Sprintf("%s.%s.%s", d.MetricPrefix, metricType.DefaultTypeName(), d.MetricSuffix) 45 | } 46 | 47 | func (d *Destination) MetricEnabled(metricType DestinationMetricType) bool { 48 | if d.MetricTypeEnabled == nil { 49 | return true 50 | } 51 | if enabled, ok := d.MetricTypeEnabled[metricType]; ok { 52 | return enabled 53 | } 54 | return false 55 | } 56 | -------------------------------------------------------------------------------- /destination_metric_type.go: -------------------------------------------------------------------------------- 1 | package shimesaba 2 | 3 | type DestinationMetricType int 4 | 5 | //go:generate go install github.com/dmarkham/enumer 6 | //go:generate enumer -type=DestinationMetricType -yaml -linecomment -transform=snake -output destination_metric_type_enumer.go 7 | 8 | const ( 9 | ErrorBudget DestinationMetricType = iota 10 | ErrorBudgetRemainingPercentage 11 | ErrorBudgetPercentage 12 | ErrorBudgetConsumption 13 | ErrorBudgetConsumptionPercentage 14 | UpTime //uptime 15 | FailureTime 16 | ) 17 | 18 | func (t DestinationMetricType) ID() string { 19 | return t.String() 20 | } 21 | 22 | func (t DestinationMetricType) DefaultTypeName() string { 23 | return t.String() 24 | } 25 | 26 | func (t DestinationMetricType) DefaultEnabled() bool { 27 | switch t { 28 | case UpTime, FailureTime: 29 | return false 30 | default: 31 | return true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /destination_metric_type_enumer.go: -------------------------------------------------------------------------------- 1 | // Code generated by "enumer -type=DestinationMetricType -yaml -linecomment -transform=snake -output destination_metric_type_enumer.go"; DO NOT EDIT. 2 | 3 | package shimesaba 4 | 5 | import ( 6 | "fmt" 7 | "strings" 8 | ) 9 | 10 | const _DestinationMetricTypeName = "error_budgeterror_budget_remaining_percentageerror_budget_percentageerror_budget_consumptionerror_budget_consumption_percentageuptimefailure_time" 11 | 12 | var _DestinationMetricTypeIndex = [...]uint8{0, 12, 45, 68, 92, 127, 133, 145} 13 | 14 | const _DestinationMetricTypeLowerName = "error_budgeterror_budget_remaining_percentageerror_budget_percentageerror_budget_consumptionerror_budget_consumption_percentageuptimefailure_time" 15 | 16 | func (i DestinationMetricType) String() string { 17 | if i < 0 || i >= DestinationMetricType(len(_DestinationMetricTypeIndex)-1) { 18 | return fmt.Sprintf("DestinationMetricType(%d)", i) 19 | } 20 | return _DestinationMetricTypeName[_DestinationMetricTypeIndex[i]:_DestinationMetricTypeIndex[i+1]] 21 | } 22 | 23 | // An "invalid array index" compiler error signifies that the constant values have changed. 24 | // Re-run the stringer command to generate them again. 25 | func _DestinationMetricTypeNoOp() { 26 | var x [1]struct{} 27 | _ = x[ErrorBudget-(0)] 28 | _ = x[ErrorBudgetRemainingPercentage-(1)] 29 | _ = x[ErrorBudgetPercentage-(2)] 30 | _ = x[ErrorBudgetConsumption-(3)] 31 | _ = x[ErrorBudgetConsumptionPercentage-(4)] 32 | _ = x[UpTime-(5)] 33 | _ = x[FailureTime-(6)] 34 | } 35 | 36 | var _DestinationMetricTypeValues = []DestinationMetricType{ErrorBudget, ErrorBudgetRemainingPercentage, ErrorBudgetPercentage, ErrorBudgetConsumption, ErrorBudgetConsumptionPercentage, UpTime, FailureTime} 37 | 38 | var _DestinationMetricTypeNameToValueMap = map[string]DestinationMetricType{ 39 | _DestinationMetricTypeName[0:12]: ErrorBudget, 40 | _DestinationMetricTypeLowerName[0:12]: ErrorBudget, 41 | _DestinationMetricTypeName[12:45]: ErrorBudgetRemainingPercentage, 42 | _DestinationMetricTypeLowerName[12:45]: ErrorBudgetRemainingPercentage, 43 | _DestinationMetricTypeName[45:68]: ErrorBudgetPercentage, 44 | _DestinationMetricTypeLowerName[45:68]: ErrorBudgetPercentage, 45 | _DestinationMetricTypeName[68:92]: ErrorBudgetConsumption, 46 | _DestinationMetricTypeLowerName[68:92]: ErrorBudgetConsumption, 47 | _DestinationMetricTypeName[92:127]: ErrorBudgetConsumptionPercentage, 48 | _DestinationMetricTypeLowerName[92:127]: ErrorBudgetConsumptionPercentage, 49 | _DestinationMetricTypeName[127:133]: UpTime, 50 | _DestinationMetricTypeLowerName[127:133]: UpTime, 51 | _DestinationMetricTypeName[133:145]: FailureTime, 52 | _DestinationMetricTypeLowerName[133:145]: FailureTime, 53 | } 54 | 55 | var _DestinationMetricTypeNames = []string{ 56 | _DestinationMetricTypeName[0:12], 57 | _DestinationMetricTypeName[12:45], 58 | _DestinationMetricTypeName[45:68], 59 | _DestinationMetricTypeName[68:92], 60 | _DestinationMetricTypeName[92:127], 61 | _DestinationMetricTypeName[127:133], 62 | _DestinationMetricTypeName[133:145], 63 | } 64 | 65 | // DestinationMetricTypeString retrieves an enum value from the enum constants string name. 66 | // Throws an error if the param is not part of the enum. 67 | func DestinationMetricTypeString(s string) (DestinationMetricType, error) { 68 | if val, ok := _DestinationMetricTypeNameToValueMap[s]; ok { 69 | return val, nil 70 | } 71 | 72 | if val, ok := _DestinationMetricTypeNameToValueMap[strings.ToLower(s)]; ok { 73 | return val, nil 74 | } 75 | return 0, fmt.Errorf("%s does not belong to DestinationMetricType values", s) 76 | } 77 | 78 | // DestinationMetricTypeValues returns all values of the enum 79 | func DestinationMetricTypeValues() []DestinationMetricType { 80 | return _DestinationMetricTypeValues 81 | } 82 | 83 | // DestinationMetricTypeStrings returns a slice of all String values of the enum 84 | func DestinationMetricTypeStrings() []string { 85 | strs := make([]string, len(_DestinationMetricTypeNames)) 86 | copy(strs, _DestinationMetricTypeNames) 87 | return strs 88 | } 89 | 90 | // IsADestinationMetricType returns "true" if the value is listed in the enum definition. "false" otherwise 91 | func (i DestinationMetricType) IsADestinationMetricType() bool { 92 | for _, v := range _DestinationMetricTypeValues { 93 | if i == v { 94 | return true 95 | } 96 | } 97 | return false 98 | } 99 | 100 | // MarshalYAML implements a YAML Marshaler for DestinationMetricType 101 | func (i DestinationMetricType) MarshalYAML() (interface{}, error) { 102 | return i.String(), nil 103 | } 104 | 105 | // UnmarshalYAML implements a YAML Unmarshaler for DestinationMetricType 106 | func (i *DestinationMetricType) UnmarshalYAML(unmarshal func(interface{}) error) error { 107 | var s string 108 | if err := unmarshal(&s); err != nil { 109 | return err 110 | } 111 | 112 | var err error 113 | *i, err = DestinationMetricTypeString(s) 114 | return err 115 | } 116 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/mashiike/shimesaba 2 | 3 | go 1.23 4 | 5 | require ( 6 | github.com/Songmu/flextime v0.1.0 7 | github.com/aws/aws-lambda-go v1.47.0 8 | github.com/fatih/color v1.17.0 9 | github.com/fujiwara/logutils v1.1.2 10 | github.com/handlename/ssmwrap/v2 v2.2.0 11 | github.com/hashicorp/go-version v1.7.0 12 | github.com/kayac/go-config v0.7.0 13 | github.com/mackerelio/mackerel-client-go v0.34.0 14 | github.com/shogo82148/go-retry v1.3.1 15 | github.com/stretchr/testify v1.10.0 16 | github.com/urfave/cli/v2 v2.27.6 17 | golang.org/x/sync v0.10.0 18 | ) 19 | 20 | require ( 21 | github.com/BurntSushi/toml v1.4.0 // indirect 22 | github.com/aws/aws-sdk-go-v2 v1.30.1 // indirect 23 | github.com/aws/aws-sdk-go-v2/config v1.27.23 // indirect 24 | github.com/aws/aws-sdk-go-v2/credentials v1.17.23 // indirect 25 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.9 // indirect 26 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.13 // indirect 27 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.13 // indirect 28 | github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0 // indirect 29 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.3 // indirect 30 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.15 // indirect 31 | github.com/aws/aws-sdk-go-v2/service/ssm v1.52.1 // indirect 32 | github.com/aws/aws-sdk-go-v2/service/sso v1.22.1 // indirect 33 | github.com/aws/aws-sdk-go-v2/service/ssooidc v1.26.1 // indirect 34 | github.com/aws/aws-sdk-go-v2/service/sts v1.30.1 // indirect 35 | github.com/aws/smithy-go v1.20.3 // indirect 36 | github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect 37 | github.com/davecgh/go-spew v1.1.1 // indirect 38 | github.com/jmespath/go-jmespath v0.4.0 // indirect 39 | github.com/lmittmann/tint v1.0.4 // indirect 40 | github.com/mattn/go-colorable v0.1.13 // indirect 41 | github.com/mattn/go-isatty v0.0.20 // indirect 42 | github.com/pmezard/go-difflib v1.0.0 // indirect 43 | github.com/russross/blackfriday/v2 v2.1.0 // indirect 44 | github.com/samber/lo v1.44.0 // indirect 45 | github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect 46 | golang.org/x/sys v0.18.0 // indirect 47 | golang.org/x/text v0.16.0 // indirect 48 | gopkg.in/yaml.v2 v2.4.0 // indirect 49 | gopkg.in/yaml.v3 v3.0.1 // indirect 50 | ) 51 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/BurntSushi/toml v1.4.0 h1:kuoIxZQy2WRRk1pttg9asf+WVv6tWQuBNVmK8+nqPr0= 2 | github.com/BurntSushi/toml v1.4.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= 3 | github.com/Songmu/flextime v0.1.0 h1:sss5IALl84LbvU/cS5D1cKNd5ffT94N2BZwC+esgAJI= 4 | github.com/Songmu/flextime v0.1.0/go.mod h1:ofUSZ/qj7f1BfQQ6rEH4ovewJ0SZmLOjBF1xa8iE87Q= 5 | github.com/aws/aws-lambda-go v1.47.0 h1:0H8s0vumYx/YKs4sE7YM0ktwL2eWse+kfopsRI1sXVI= 6 | github.com/aws/aws-lambda-go v1.47.0/go.mod h1:dpMpZgvWx5vuQJfBt0zqBha60q7Dd7RfgJv23DymV8A= 7 | github.com/aws/aws-sdk-go-v2 v1.30.1 h1:4y/5Dvfrhd1MxRDD77SrfsDaj8kUkkljU7XE83NPV+o= 8 | github.com/aws/aws-sdk-go-v2 v1.30.1/go.mod h1:nIQjQVp5sfpQcTc9mPSr1B0PaWK5ByX9MOoDadSN4lc= 9 | github.com/aws/aws-sdk-go-v2/config v1.27.23 h1:Cr/gJEa9NAS7CDAjbnB7tHYb3aLZI2gVggfmSAasDac= 10 | github.com/aws/aws-sdk-go-v2/config v1.27.23/go.mod h1:WMMYHqLCFu5LH05mFOF5tsq1PGEMfKbu083VKqLCd0o= 11 | github.com/aws/aws-sdk-go-v2/credentials v1.17.23 h1:G1CfmLVoO2TdQ8z9dW+JBc/r8+MqyPQhXCafNZcXVZo= 12 | github.com/aws/aws-sdk-go-v2/credentials v1.17.23/go.mod h1:V/DvSURn6kKgcuKEk4qwSwb/fZ2d++FFARtWSbXnLqY= 13 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.9 h1:Aznqksmd6Rfv2HQN9cpqIV/lQRMaIpJkLLaJ1ZI76no= 14 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.9/go.mod h1:WQr3MY7AxGNxaqAtsDWn+fBxmd4XvLkzeqQ8P1VM0/w= 15 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.13 h1:5SAoZ4jYpGH4721ZNoS1znQrhOfZinOhc4XuTXx/nVc= 16 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.13/go.mod h1:+rdA6ZLpaSeM7tSg/B0IEDinCIBJGmW8rKDFkYpP04g= 17 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.13 h1:WIijqeaAO7TYFLbhsZmi2rgLEAtWOC1LhxCAVTJlSKw= 18 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.13/go.mod h1:i+kbfa76PQbWw/ULoWnp51EYVWH4ENln76fLQE3lXT8= 19 | github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0 h1:hT8rVHwugYE2lEfdFE0QWVo81lF7jMrYJVDWI+f+VxU= 20 | github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0/go.mod h1:8tu/lYfQfFe6IGnaOdrpVgEL2IrrDOf6/m9RQum4NkY= 21 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.3 h1:dT3MqvGhSoaIhRseqw2I0yH81l7wiR2vjs57O51EAm8= 22 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.3/go.mod h1:GlAeCkHwugxdHaueRr4nhPuY+WW+gR8UjlcqzPr1SPI= 23 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.15 h1:I9zMeF107l0rJrpnHpjEiiTSCKYAIw8mALiXcPsGBiA= 24 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.15/go.mod h1:9xWJ3Q/S6Ojusz1UIkfycgD1mGirJfLLKqq3LPT7WN8= 25 | github.com/aws/aws-sdk-go-v2/service/ssm v1.52.1 h1:zeWJA3f0Td70984ZoSocVAEwVtZBGQu+Q0p/pA7dNoE= 26 | github.com/aws/aws-sdk-go-v2/service/ssm v1.52.1/go.mod h1:xvWzNAXicm5A+1iOiH4sqMLwYHEbiQqpRSe6hvHdQrE= 27 | github.com/aws/aws-sdk-go-v2/service/sso v1.22.1 h1:p1GahKIjyMDZtiKoIn0/jAj/TkMzfzndDv5+zi2Mhgc= 28 | github.com/aws/aws-sdk-go-v2/service/sso v1.22.1/go.mod h1:/vWdhoIoYA5hYoPZ6fm7Sv4d8701PiG5VKe8/pPJL60= 29 | github.com/aws/aws-sdk-go-v2/service/ssooidc v1.26.1 h1:lCEv9f8f+zJ8kcFeAjRZsekLd/x5SAm96Cva+VbUdo8= 30 | github.com/aws/aws-sdk-go-v2/service/ssooidc v1.26.1/go.mod h1:xyFHA4zGxgYkdD73VeezHt3vSKEG9EmFnGwoKlP00u4= 31 | github.com/aws/aws-sdk-go-v2/service/sts v1.30.1 h1:+woJ607dllHJQtsnJLi52ycuqHMwlW+Wqm2Ppsfp4nQ= 32 | github.com/aws/aws-sdk-go-v2/service/sts v1.30.1/go.mod h1:jiNR3JqT15Dm+QWq2SRgh0x0bCNSRP2L25+CqPNpJlQ= 33 | github.com/aws/smithy-go v1.20.3 h1:ryHwveWzPV5BIof6fyDvor6V3iUL7nTfiTKXHiW05nE= 34 | github.com/aws/smithy-go v1.20.3/go.mod h1:krry+ya/rV9RDcV/Q16kpu6ypI4K2czasz0NC3qS14E= 35 | github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc= 36 | github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= 37 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 38 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 39 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 40 | github.com/fatih/color v1.17.0 h1:GlRw1BRJxkpqUCBKzKOw098ed57fEsKeNjpTe3cSjK4= 41 | github.com/fatih/color v1.17.0/go.mod h1:YZ7TlrGPkiz6ku9fK3TLD/pl3CpsiFyu8N92HLgmosI= 42 | github.com/fujiwara/logutils v1.1.2 h1:nYVRyTj+5SyCvpZUrYIZU4kubqNycGTxFXMKJBKe0Sg= 43 | github.com/fujiwara/logutils v1.1.2/go.mod h1:pdb/Uk70rjQWEmFm/OvYH7OG8meZt1fEIqC0qZbvro4= 44 | github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= 45 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 46 | github.com/handlename/ssmwrap/v2 v2.2.0 h1:0MRN4pDSATlNeL0k09aJfTkqbM0r7DRjQvKNT94Kg+8= 47 | github.com/handlename/ssmwrap/v2 v2.2.0/go.mod h1:f6wQjYC/8g0d+ONOzY6yd181bzdxgZprv/W6Lk+N+fE= 48 | github.com/hashicorp/go-version v1.7.0 h1:5tqGy27NaOTB8yJKUZELlFAS/LTKJkrmONwQKeRZfjY= 49 | github.com/hashicorp/go-version v1.7.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= 50 | github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= 51 | github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= 52 | github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= 53 | github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= 54 | github.com/kayac/go-config v0.7.0 h1:BeONaFFq/ILFiEzkCMpKarsjcc3YBgJ7QKg39hXU+nk= 55 | github.com/kayac/go-config v0.7.0/go.mod h1:Nfkw4LZOh/7HGepftBvD2lKEpPyl1Vp89yA7gDJS5r0= 56 | github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= 57 | github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= 58 | github.com/lmittmann/tint v1.0.4 h1:LeYihpJ9hyGvE0w+K2okPTGUdVLfng1+nDNVR4vWISc= 59 | github.com/lmittmann/tint v1.0.4/go.mod h1:HIS3gSy7qNwGCj+5oRjAutErFBl4BzdQP6cJZ0NfMwE= 60 | github.com/mackerelio/mackerel-client-go v0.34.0 h1:p7m2ceyqn3iihIb08URCb/pO5gs30IWCJKStQAI5ARc= 61 | github.com/mackerelio/mackerel-client-go v0.34.0/go.mod h1:YEWy40Ybr+Z75TvQw0t9/KYO0ZfPW2GrbCJhRp0UHpY= 62 | github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= 63 | github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= 64 | github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= 65 | github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= 66 | github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= 67 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 68 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 69 | github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= 70 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= 71 | github.com/samber/lo v1.44.0 h1:5il56KxRE+GHsm1IR+sZ/6J42NODigFiqCWpSc2dybA= 72 | github.com/samber/lo v1.44.0/go.mod h1:RmDH9Ct32Qy3gduHQuKJ3gW1fMHAnE/fAzQuf6He5cU= 73 | github.com/shogo82148/go-retry v1.3.1 h1:AFJHUWG7mLzLFN/21p3NdzdL55ttZgdapWaFgbtYf8g= 74 | github.com/shogo82148/go-retry v1.3.1/go.mod h1:wttfgfwCMQvNqv4kOpqIvDDJeSmwU+AEIpUyG+5Ca6M= 75 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 76 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 77 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 78 | github.com/urfave/cli/v2 v2.27.6 h1:VdRdS98FNhKZ8/Az8B7MTyGQmpIr36O1EHybx/LaZ4g= 79 | github.com/urfave/cli/v2 v2.27.6/go.mod h1:3Sevf16NykTbInEnD0yKkjDAeZDS0A6bzhBH5hrMvTQ= 80 | github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4= 81 | github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= 82 | golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= 83 | golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= 84 | golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 85 | golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 86 | golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= 87 | golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 88 | golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= 89 | golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= 90 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 91 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 92 | gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 93 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= 94 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 95 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 96 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 97 | -------------------------------------------------------------------------------- /internal/logger/logger.go: -------------------------------------------------------------------------------- 1 | package logger 2 | 3 | import ( 4 | "io" 5 | "log" 6 | "strings" 7 | 8 | "github.com/fatih/color" 9 | "github.com/fujiwara/logutils" 10 | ) 11 | 12 | //Setup logger 13 | func Setup(out io.Writer, minLevel string) { 14 | filter := &logutils.LevelFilter{ 15 | Levels: []logutils.LogLevel{"debug", "info", "notice", "warn", "error"}, 16 | ModifierFuncs: []logutils.ModifierFunc{ 17 | logutils.Color(color.FgHiBlack), 18 | nil, 19 | logutils.Color(color.FgHiBlue), 20 | logutils.Color(color.FgYellow), 21 | logutils.Color(color.FgRed, color.BgBlack), 22 | }, 23 | MinLevel: logutils.LogLevel(strings.ToLower(minLevel)), 24 | Writer: out, 25 | } 26 | log.SetOutput(filter) 27 | } 28 | -------------------------------------------------------------------------------- /internal/timeutils/iterator.go: -------------------------------------------------------------------------------- 1 | package timeutils 2 | 3 | import "time" 4 | 5 | // Iterator achieves a loop for a specified period of time 6 | type Iterator struct { 7 | startAt time.Time 8 | endAt time.Time 9 | tick time.Duration 10 | curAt time.Time 11 | enableOverWindow bool 12 | } 13 | 14 | //NewIterator create Iterator 15 | func NewIterator(startAt, endAt time.Time, tick time.Duration) *Iterator { 16 | return &Iterator{ 17 | startAt: startAt, 18 | endAt: endAt, 19 | curAt: startAt, 20 | tick: tick, 21 | enableOverWindow: false, 22 | } 23 | } 24 | 25 | func (iter *Iterator) remaining() time.Duration { 26 | return iter.endAt.Sub(iter.curAt) 27 | } 28 | 29 | func (iter *Iterator) nextTick() time.Duration { 30 | if remaining := iter.remaining(); !iter.enableOverWindow && remaining < iter.tick { 31 | return remaining 32 | } 33 | return iter.tick 34 | } 35 | 36 | // HasNext is a loop continuation condition 37 | func (iter *Iterator) HasNext() bool { 38 | remaining := iter.remaining() 39 | if remaining > 0 { 40 | return true 41 | } 42 | return iter.enableOverWindow && remaining == 0 43 | } 44 | 45 | // Next returns the current rolling window and recommends Iterator to the next window 46 | func (iter *Iterator) Next() (time.Time, time.Time) { 47 | nextTick := iter.nextTick() 48 | curStartAt := iter.curAt 49 | curEndAt := iter.curAt.Add(nextTick) 50 | iter.curAt = curEndAt 51 | return curStartAt, curEndAt.Add(-time.Nanosecond) 52 | } 53 | 54 | //SetEnableOverWindow affects the Iterator's end condition and specifies whether to allow the end time of the rolling window to exceed the end time of the specified period. 55 | func (iter *Iterator) SetEnableOverWindow(flag bool) { 56 | iter.enableOverWindow = flag 57 | } 58 | -------------------------------------------------------------------------------- /internal/timeutils/iterator_test.go: -------------------------------------------------------------------------------- 1 | package timeutils_test 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | "time" 7 | 8 | "github.com/mashiike/shimesaba/internal/timeutils" 9 | "github.com/stretchr/testify/require" 10 | ) 11 | 12 | type timeTuple struct { 13 | StartAt, EndAt time.Time 14 | } 15 | 16 | func (t timeTuple) GoString() string { 17 | return fmt.Sprintf("[%s ~ %s]", t.StartAt, t.EndAt) 18 | } 19 | 20 | func TestIterator(t *testing.T) { 21 | cases := []struct { 22 | startAt time.Time 23 | endAt time.Time 24 | tick time.Duration 25 | enableOverWindow bool 26 | expected []timeTuple 27 | }{ 28 | { 29 | startAt: time.Date(2021, time.October, 1, 0, 0, 0, 0, time.UTC), 30 | endAt: time.Date(2021, time.October, 1, 5, 0, 0, 0, time.UTC), 31 | tick: time.Hour, 32 | expected: []timeTuple{ 33 | 34 | { 35 | StartAt: time.Date(2021, time.October, 1, 0, 0, 0, 0, time.UTC), 36 | EndAt: time.Date(2021, time.October, 1, 0, 59, 59, 999999999, time.UTC), 37 | }, 38 | { 39 | StartAt: time.Date(2021, time.October, 1, 1, 0, 0, 0, time.UTC), 40 | EndAt: time.Date(2021, time.October, 1, 1, 59, 59, 999999999, time.UTC), 41 | }, 42 | { 43 | StartAt: time.Date(2021, time.October, 1, 2, 0, 0, 0, time.UTC), 44 | EndAt: time.Date(2021, time.October, 1, 2, 59, 59, 999999999, time.UTC), 45 | }, 46 | { 47 | StartAt: time.Date(2021, time.October, 1, 3, 0, 0, 0, time.UTC), 48 | EndAt: time.Date(2021, time.October, 1, 3, 59, 59, 999999999, time.UTC), 49 | }, 50 | { 51 | StartAt: time.Date(2021, time.October, 1, 4, 0, 0, 0, time.UTC), 52 | EndAt: time.Date(2021, time.October, 1, 4, 59, 59, 999999999, time.UTC), 53 | }, 54 | }, 55 | }, 56 | { 57 | startAt: time.Date(2021, time.October, 1, 0, 1, 0, 0, time.UTC), 58 | endAt: time.Date(2021, time.October, 1, 0, 2, 0, 0, time.UTC), 59 | tick: 25 * time.Second, 60 | expected: []timeTuple{ 61 | 62 | { 63 | StartAt: time.Date(2021, time.October, 1, 0, 1, 0, 0, time.UTC), 64 | EndAt: time.Date(2021, time.October, 1, 0, 1, 24, 999999999, time.UTC), 65 | }, 66 | { 67 | StartAt: time.Date(2021, time.October, 1, 0, 1, 25, 0, time.UTC), 68 | EndAt: time.Date(2021, time.October, 1, 0, 1, 49, 999999999, time.UTC), 69 | }, 70 | { 71 | StartAt: time.Date(2021, time.October, 1, 0, 1, 50, 0, time.UTC), 72 | EndAt: time.Date(2021, time.October, 1, 0, 1, 59, 999999999, time.UTC), 73 | }, 74 | }, 75 | }, 76 | { 77 | startAt: time.Date(2021, time.October, 1, 0, 1, 0, 0, time.UTC), 78 | endAt: time.Date(2021, time.October, 1, 0, 2, 0, 0, time.UTC), 79 | enableOverWindow: true, 80 | tick: 25 * time.Second, 81 | expected: []timeTuple{ 82 | 83 | { 84 | StartAt: time.Date(2021, time.October, 1, 0, 1, 0, 0, time.UTC), 85 | EndAt: time.Date(2021, time.October, 1, 0, 1, 24, 999999999, time.UTC), 86 | }, 87 | { 88 | StartAt: time.Date(2021, time.October, 1, 0, 1, 25, 0, time.UTC), 89 | EndAt: time.Date(2021, time.October, 1, 0, 1, 49, 999999999, time.UTC), 90 | }, 91 | { 92 | StartAt: time.Date(2021, time.October, 1, 0, 1, 50, 0, time.UTC), 93 | EndAt: time.Date(2021, time.October, 1, 0, 2, 14, 999999999, time.UTC), 94 | }, 95 | }, 96 | }, 97 | { 98 | startAt: time.Date(2021, time.October, 1, 0, 1, 0, 0, time.UTC), 99 | endAt: time.Date(2021, time.October, 1, 0, 2, 0, 0, time.UTC), 100 | enableOverWindow: true, 101 | tick: 30 * time.Second, 102 | expected: []timeTuple{ 103 | 104 | { 105 | StartAt: time.Date(2021, time.October, 1, 0, 1, 0, 0, time.UTC), 106 | EndAt: time.Date(2021, time.October, 1, 0, 1, 29, 999999999, time.UTC), 107 | }, 108 | { 109 | StartAt: time.Date(2021, time.October, 1, 0, 1, 30, 0, time.UTC), 110 | EndAt: time.Date(2021, time.October, 1, 0, 1, 59, 999999999, time.UTC), 111 | }, 112 | { 113 | StartAt: time.Date(2021, time.October, 1, 0, 2, 0, 0, time.UTC), 114 | EndAt: time.Date(2021, time.October, 1, 0, 2, 29, 999999999, time.UTC), 115 | }, 116 | }, 117 | }, 118 | } 119 | for _, c := range cases { 120 | t.Run(fmt.Sprintf("%s~%s[tick=%s,over=%v]", c.startAt, c.endAt, c.tick, c.enableOverWindow), func(t *testing.T) { 121 | iter := timeutils.NewIterator( 122 | c.startAt, 123 | c.endAt, 124 | c.tick, 125 | ) 126 | iter.SetEnableOverWindow(c.enableOverWindow) 127 | actual := make([]timeTuple, 0) 128 | for iter.HasNext() { 129 | startAt, endAt := iter.Next() 130 | actual = append(actual, timeTuple{ 131 | StartAt: startAt, 132 | EndAt: endAt, 133 | }) 134 | } 135 | require.EqualValues(t, c.expected, actual) 136 | }) 137 | } 138 | 139 | } 140 | -------------------------------------------------------------------------------- /internal/timeutils/parse.go: -------------------------------------------------------------------------------- 1 | package timeutils 2 | 3 | import ( 4 | "errors" 5 | "log" 6 | "strconv" 7 | "time" 8 | ) 9 | 10 | func ParseDuration(str string) (time.Duration, error) { 11 | if d, err := strconv.ParseUint(str, 10, 64); err == nil { 12 | log.Printf("[warn] Setting an interval without a unit is deprecated. Please write `%s` as` %sm`", str, str) 13 | return time.Duration(d) * time.Minute, nil 14 | } 15 | 16 | days, parts := trimDay(str) 17 | if parts != "" { 18 | d, err := time.ParseDuration(parts) 19 | return days + d, err 20 | } 21 | if days == 0 { 22 | return 0, errors.New("invalid format") 23 | } 24 | return days, nil 25 | } 26 | 27 | func trimDay(str string) (time.Duration, string) { 28 | var val int64 29 | for i, c := range str { 30 | if '0' <= c && c <= '9' { 31 | v := int64(c - '0') 32 | val = val*10 + v 33 | continue 34 | } 35 | if c == 'd' { 36 | return time.Duration(val) * 24 * time.Hour, str[i+1:] 37 | } 38 | return 0, str 39 | } 40 | return 0, str 41 | } 42 | -------------------------------------------------------------------------------- /internal/timeutils/parse_test.go: -------------------------------------------------------------------------------- 1 | package timeutils_test 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/mashiike/shimesaba/internal/timeutils" 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestParseDurationSuccess(t *testing.T) { 12 | cases := []struct { 13 | str string 14 | expected time.Duration 15 | }{ 16 | { 17 | str: "1", 18 | expected: time.Minute, 19 | }, 20 | { 21 | str: "1m", 22 | expected: time.Minute, 23 | }, 24 | { 25 | str: "1h1m", 26 | expected: time.Hour + time.Minute, 27 | }, 28 | { 29 | str: "1d", 30 | expected: 24 * time.Hour, 31 | }, 32 | { 33 | str: "1d1m3s", 34 | expected: 24*time.Hour + time.Minute + 3*time.Second, 35 | }, 36 | } 37 | 38 | for _, c := range cases { 39 | t.Run(c.str, func(t *testing.T) { 40 | actual, err := timeutils.ParseDuration(c.str) 41 | require.NoError(t, err) 42 | require.EqualValues(t, c.expected, actual) 43 | }) 44 | } 45 | } 46 | 47 | func TestParseDurationFailed(t *testing.T) { 48 | cases := []struct { 49 | str string 50 | }{ 51 | { 52 | str: "s", 53 | }, 54 | { 55 | str: "1mins", 56 | }, 57 | { 58 | str: "1h1m1d", 59 | }, 60 | { 61 | str: "1days", 62 | }, 63 | { 64 | str: "11m1d3s", 65 | }, 66 | { 67 | str: "d", 68 | }, 69 | } 70 | 71 | for _, c := range cases { 72 | t.Run(c.str, func(t *testing.T) { 73 | _, err := timeutils.ParseDuration(c.str) 74 | require.Error(t, err) 75 | }) 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /internal/timeutils/stirng_test.go: -------------------------------------------------------------------------------- 1 | package timeutils_test 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/mashiike/shimesaba/internal/timeutils" 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestDurationString(t *testing.T) { 12 | cases := []struct { 13 | d time.Duration 14 | expected string 15 | }{ 16 | { 17 | expected: "1m", 18 | d: time.Minute, 19 | }, 20 | { 21 | expected: "1h1m", 22 | d: time.Hour + time.Minute, 23 | }, 24 | { 25 | expected: "1d", 26 | d: 24 * time.Hour, 27 | }, 28 | { 29 | expected: "1d1m3s", 30 | d: 24*time.Hour + time.Minute + 3*time.Second, 31 | }, 32 | } 33 | 34 | for _, c := range cases { 35 | t.Run(c.expected, func(t *testing.T) { 36 | actual := timeutils.DurationString(c.d) 37 | require.EqualValues(t, c.expected, actual) 38 | }) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /internal/timeutils/string.go: -------------------------------------------------------------------------------- 1 | package timeutils 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | "time" 7 | ) 8 | 9 | func DurationString(d time.Duration) string { 10 | days := uint64(d.Truncate(24*time.Hour).Hours() / 24.0) 11 | remining := d - d.Truncate(24*time.Hour) 12 | hours := uint64(remining.Truncate(time.Hour).Hours()) 13 | remining = remining - remining.Truncate(time.Hour) 14 | minutes := uint64(remining.Truncate(time.Minute).Minutes()) 15 | remining = remining - remining.Truncate(time.Minute) 16 | seconds := uint64(remining.Truncate(time.Second).Seconds()) 17 | return durationString(days, hours, minutes, seconds) 18 | } 19 | 20 | func durationString(day, hours, minutes, seconds uint64) string { 21 | var builder strings.Builder 22 | if day > 0 { 23 | fmt.Fprintf(&builder, "%dd", day) 24 | } 25 | if hours > 0 { 26 | fmt.Fprintf(&builder, "%dh", hours) 27 | } 28 | if minutes > 0 { 29 | fmt.Fprintf(&builder, "%dm", minutes) 30 | } 31 | if seconds > 0 { 32 | fmt.Fprintf(&builder, "%ds", seconds) 33 | } 34 | return builder.String() 35 | } 36 | -------------------------------------------------------------------------------- /mackerel.go: -------------------------------------------------------------------------------- 1 | package shimesaba 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "strings" 8 | "sync" 9 | "time" 10 | 11 | "github.com/Songmu/flextime" 12 | mackerel "github.com/mackerelio/mackerel-client-go" 13 | retry "github.com/shogo82148/go-retry" 14 | ) 15 | 16 | // MackerelClient is an abstraction interface for mackerel-client-go.Client 17 | type MackerelClient interface { 18 | GetOrg() (*mackerel.Org, error) 19 | FindHosts(param *mackerel.FindHostsParam) ([]*mackerel.Host, error) 20 | FetchHostMetricValues(hostID string, metricName string, from int64, to int64) ([]mackerel.MetricValue, error) 21 | FetchServiceMetricValues(serviceName string, metricName string, from int64, to int64) ([]mackerel.MetricValue, error) 22 | PostServiceMetricValues(serviceName string, metricValues []*mackerel.MetricValue) error 23 | 24 | FindWithClosedAlerts() (*mackerel.AlertsResp, error) 25 | FindWithClosedAlertsByNextID(nextID string) (*mackerel.AlertsResp, error) 26 | GetMonitor(monitorID string) (mackerel.Monitor, error) 27 | FindMonitors() ([]mackerel.Monitor, error) 28 | 29 | FindGraphAnnotations(service string, from int64, to int64) ([]*mackerel.GraphAnnotation, error) 30 | } 31 | 32 | // Repository handles reading and writing data 33 | type Repository struct { 34 | client MackerelClient 35 | 36 | mu sync.Mutex 37 | monitorByID map[string]*Monitor 38 | 39 | alertMu sync.Mutex 40 | alertCache Alerts 41 | alertCurrentAt time.Time 42 | alertNextID string 43 | } 44 | 45 | // NewRepository creates Repository 46 | func NewRepository(client MackerelClient) *Repository { 47 | return &Repository{ 48 | client: client, 49 | monitorByID: make(map[string]*Monitor), 50 | alertCache: make(Alerts, 0, 100), 51 | } 52 | } 53 | 54 | func (repo *Repository) GetOrgName(ctx context.Context) (string, error) { 55 | org, err := repo.client.GetOrg() 56 | if err != nil { 57 | return "", err 58 | } 59 | return org.Name, nil 60 | } 61 | 62 | // SaveReports posts Reports to Mackerel 63 | func (repo *Repository) SaveReports(ctx context.Context, reports []*Report) error { 64 | services := make(map[string][]*mackerel.MetricValue) 65 | for _, report := range reports { 66 | values, ok := services[report.Destination.ServiceName] 67 | if !ok { 68 | values = make([]*mackerel.MetricValue, 0) 69 | } 70 | values = append(values, newMackerelMetricValuesFromReport(report)...) 71 | services[report.Destination.ServiceName] = values 72 | } 73 | for service, values := range services { 74 | select { 75 | case <-ctx.Done(): 76 | return ctx.Err() 77 | default: 78 | } 79 | if err := repo.postServiceMetricValues(ctx, service, values); err != nil { 80 | return fmt.Errorf("post service `%s` metric values: %w", service, err) 81 | } 82 | } 83 | return nil 84 | } 85 | 86 | const batchSize = 100 87 | 88 | var policy = retry.Policy{ 89 | MinDelay: time.Second, 90 | MaxDelay: 10 * time.Second, 91 | MaxCount: 10, 92 | } 93 | 94 | func (repo *Repository) postServiceMetricValues(ctx context.Context, service string, values []*mackerel.MetricValue) error { 95 | size := len(values) 96 | for i := 0; i < size; i += batchSize { 97 | start, end := i, i+batchSize 98 | if size < end { 99 | end = size 100 | } 101 | log.Printf("[debug] PostServiceMetricValues to Mackerel %s values[%d:%d]\n", service, start, end) 102 | err := policy.Do(ctx, func() error { 103 | err := repo.client.PostServiceMetricValues(service, values[start:end]) 104 | if err != nil { 105 | log.Printf("[warn] PostServiceMetricValues to Mackerel failed, retry because: %s\n", err) 106 | } 107 | return err 108 | }) 109 | if err != nil { 110 | log.Printf("[warn] PostServiceMetricValues to Mackerel failed:%s %s\n", service, err) 111 | } 112 | } 113 | return nil 114 | } 115 | 116 | func newMackerelMetricValuesFromReport(report *Report) []*mackerel.MetricValue { 117 | metricTypes := DestinationMetricTypeValues() 118 | values := make([]*mackerel.MetricValue, 0, len(metricTypes)) 119 | for _, metricType := range metricTypes { 120 | if report.Destination.MetricEnabled(metricType) { 121 | values = append(values, &mackerel.MetricValue{ 122 | Name: report.Destination.MetricName(metricType), 123 | Time: report.DataPoint.Unix(), 124 | Value: report.GetDestinationMetricValue(metricType), 125 | }) 126 | } 127 | } 128 | return values 129 | } 130 | 131 | // FetchAlerts retrieves alerts for a specified period of time 132 | func (repo *Repository) FetchAlerts(ctx context.Context, startAt time.Time, endAt time.Time) (Alerts, error) { 133 | repo.alertMu.Lock() 134 | defer repo.alertMu.Unlock() 135 | 136 | if len(repo.alertCache) == 0 { 137 | if err := repo.fetchAlertsInitial(ctx); err != nil { 138 | return nil, err 139 | } 140 | } 141 | for startAt.Before(repo.alertCurrentAt) && repo.alertNextID != "" { 142 | if err := repo.fetchAlertsIncremental(ctx); err != nil { 143 | return nil, err 144 | } 145 | } 146 | alerts := make(Alerts, 0, 100) 147 | for _, alert := range repo.alertCache { 148 | if alert.OpenedAt.After(endAt) { 149 | continue 150 | } 151 | if alert.OpenedAt.Before(startAt) { 152 | break 153 | } 154 | alerts = append(alerts, alert) 155 | } 156 | return alerts, nil 157 | } 158 | 159 | const virtualAlertKeyword = "SLO:" 160 | 161 | // FetchVirtualAlerts retrieves graph annotations for a specified time period and returns them as virtual alerts. 162 | func (repo *Repository) FetchVirtualAlerts(ctx context.Context, serviceName string, sloID string, startAt time.Time, endAt time.Time) (Alerts, error) { 163 | log.Printf("[debug] call MackerelClient.FindGraphAnnotations(%s, %s, %s)", serviceName, startAt, endAt) 164 | annotations, err := repo.client.FindGraphAnnotations(serviceName, startAt.Unix(), endAt.Unix()) 165 | if err != nil { 166 | return nil, err 167 | } 168 | log.Printf("[debug] get %d graph annotations", len(annotations)) 169 | vAlerts := make(Alerts, 0) 170 | for _, annotation := range annotations { 171 | i := strings.Index(annotation.Description, virtualAlertKeyword) 172 | if i < 0 { 173 | i = strings.Index(annotation.Description, strings.ToLower(virtualAlertKeyword)) 174 | if i < 0 { 175 | continue 176 | } 177 | } 178 | str := annotation.Description[i+len(virtualAlertKeyword):] 179 | j := strings.IndexRune(str, ' ') 180 | if j >= 0 { 181 | str = str[:j] 182 | } 183 | if strings.EqualFold(strings.TrimSpace(str), "*") { 184 | vAlerts = append(vAlerts, NewVirtualAlert( 185 | annotation.Description, 186 | time.Unix(annotation.From, 0), 187 | time.Unix(annotation.To, 0), 188 | )) 189 | } 190 | slos := strings.Split(str, ",") 191 | for _, slo := range slos { 192 | if strings.HasPrefix(slo, sloID) { 193 | vAlerts = append(vAlerts, NewVirtualAlert( 194 | annotation.Description, 195 | time.Unix(annotation.From, 0), 196 | time.Unix(annotation.To, 0), 197 | )) 198 | } 199 | } 200 | } 201 | return vAlerts, nil 202 | } 203 | 204 | func (repo *Repository) fetchAlertsInitial(ctx context.Context) error { 205 | log.Printf("[debug] call MackerelClient.FindWithClosedAlerts()") 206 | resp, err := repo.client.FindWithClosedAlerts() 207 | if err != nil { 208 | return err 209 | } 210 | converted, err := repo.convertAlerts(resp) 211 | if err != nil { 212 | return err 213 | } 214 | repo.alertCache = append(repo.alertCache, converted...) 215 | currentAt := flextime.Now() 216 | if len(repo.alertCache) != 0 { 217 | currentAt = repo.alertCache[len(repo.alertCache)-1].OpenedAt 218 | } 219 | repo.alertCurrentAt = currentAt 220 | repo.alertNextID = resp.NextID 221 | return nil 222 | } 223 | 224 | func (repo *Repository) fetchAlertsIncremental(ctx context.Context) error { 225 | log.Printf("[debug] call MackerelClient.FindWithClosedAlertsByNextID(%s)", repo.alertNextID) 226 | resp, err := repo.client.FindWithClosedAlertsByNextID(repo.alertNextID) 227 | if err != nil { 228 | return err 229 | } 230 | converted, err := repo.convertAlerts(resp) 231 | if err != nil { 232 | return err 233 | } 234 | repo.alertCache = append(repo.alertCache, converted...) 235 | 236 | if len(converted) != 0 { 237 | repo.alertCurrentAt = converted[len(converted)-1].OpenedAt 238 | repo.alertNextID = resp.NextID 239 | } 240 | return nil 241 | } 242 | 243 | func (repo *Repository) convertAlerts(resp *mackerel.AlertsResp) ([]*Alert, error) { 244 | alerts := make([]*Alert, 0, len(resp.Alerts)) 245 | for _, alert := range resp.Alerts { 246 | if alert.MonitorID == "" { 247 | continue 248 | } 249 | openedAt := time.Unix(alert.OpenedAt, 0) 250 | var closedAt *time.Time 251 | if alert.Status == "OK" { 252 | tmpClosedAt := time.Unix(alert.ClosedAt, 0) 253 | closedAt = &tmpClosedAt 254 | } 255 | var monitor *Monitor 256 | if alert.MonitorID == "" { 257 | log.Printf("[warn] alert[%s].MonitorID is empty", alert.ID) 258 | monitor = NewMonitor("unknown", "unknown", "unknown") 259 | } else { 260 | var err error 261 | monitor, err = repo.getMonitor(alert.MonitorID, alert.Type) 262 | if err != nil { 263 | if strings.Contains(err.Error(), "unknown monitor type:") { 264 | log.Printf("[warn] alert[%s].MonitorID=%s, unknown monitor type: %s", alert.ID, alert.MonitorID, alert.Type) 265 | monitor = NewMonitor(alert.MonitorID, alert.MonitorID, alert.Type) 266 | } else { 267 | return nil, fmt.Errorf("get monitor for alert `%s`: %w", alert.ID, err) 268 | } 269 | } 270 | } 271 | a := NewAlert( 272 | monitor, 273 | openedAt, 274 | closedAt, 275 | ) 276 | a = a.WithHostID(alert.HostID).WithReason(alert.Reason) 277 | log.Printf("[debug] %s", a) 278 | alerts = append(alerts, a) 279 | } 280 | return alerts, nil 281 | } 282 | 283 | func (repo *Repository) getMonitor(id string, monitorType string) (*Monitor, error) { 284 | repo.mu.Lock() 285 | defer repo.mu.Unlock() 286 | if monitor, ok := repo.monitorByID[id]; ok { 287 | return monitor, nil 288 | } 289 | switch monitorType { 290 | case "check": 291 | log.Printf("[debug] %s is check monitor, set dummy monitor", id) 292 | repo.monitorByID[id] = NewMonitor(id, fmt.Sprintf("check monitor %s", id), "check") 293 | return repo.monitorByID[id], nil 294 | default: 295 | log.Printf("[debug] call GetMonitor(%s)", id) 296 | monitor, err := repo.client.GetMonitor(id) 297 | if err != nil { 298 | return nil, err 299 | } 300 | log.Printf("[debug] catch monitor[%s] = %#v", id, monitor) 301 | repo.monitorByID[id] = repo.convertMonitor(monitor) 302 | return repo.monitorByID[id], nil 303 | } 304 | } 305 | 306 | func (repo *Repository) FindMonitors() ([]*Monitor, error) { 307 | repo.mu.Lock() 308 | defer repo.mu.Unlock() 309 | log.Printf("[debug] call FindMonitors()") 310 | monitors, err := repo.client.FindMonitors() 311 | if err != nil { 312 | return nil, err 313 | } 314 | ret := make([]*Monitor, 0, len(monitors)) 315 | for _, m := range monitors { 316 | monitor := repo.convertMonitor(m) 317 | repo.monitorByID[monitor.ID()] = monitor 318 | ret = append(ret, monitor) 319 | } 320 | return ret, nil 321 | } 322 | 323 | func (repo *Repository) convertMonitor(monitor mackerel.Monitor) *Monitor { 324 | m := NewMonitor( 325 | monitor.MonitorID(), 326 | monitor.MonitorName(), 327 | monitor.MonitorType(), 328 | ) 329 | switch monitor := monitor.(type) { 330 | case *mackerel.MonitorHostMetric: 331 | m = m.WithEvaluator(func(hostID string, timeFrame time.Duration, startAt, endAt time.Time) (Reliabilities, bool) { 332 | log.Printf("[debug] try evaluate host metric, host_id=`%s`, monitor=`%s` time=%s~%s", hostID, monitor.Name, startAt, endAt) 333 | metrics, err := repo.client.FetchHostMetricValues(hostID, monitor.Metric, startAt.Unix(), endAt.Unix()) 334 | if err != nil { 335 | log.Printf("[debug] FetchHostMetricValues failed: %s", err) 336 | log.Printf("[warn] monitor `%s`, can not get host metric = `%s`, reliability reassessment based on metric is not enabled.", monitor.Name, monitor.Metric) 337 | return nil, false 338 | } 339 | isNoViolation := make(IsNoViolationCollection, endAt.Sub(startAt)/time.Minute) 340 | for _, metric := range metrics { 341 | cursorAt := time.Unix(metric.Time, 0).UTC() 342 | value, ok := metric.Value.(float64) 343 | if !ok { 344 | continue 345 | } 346 | switch monitor.Operator { 347 | case ">": 348 | if monitor.Warning != nil { 349 | if value > *monitor.Warning { 350 | isNoViolation[cursorAt] = false 351 | log.Printf("[debug] monitor `%s`, SLO Violation, host_id=`%s`, time=`%s`, value[%f] > warning[%f]", monitor.Name, hostID, cursorAt, value, *monitor.Warning) 352 | continue 353 | } 354 | } 355 | if monitor.Critical != nil { 356 | if value > *monitor.Critical { 357 | isNoViolation[cursorAt] = false 358 | log.Printf("[debug] monitor `%s`, SLO Violation, hostId=`%s`, time=`%s`, value[%f] > critical[%f]", monitor.Name, hostID, cursorAt, value, *monitor.Critical) 359 | continue 360 | } 361 | } 362 | case "<": 363 | if monitor.Warning != nil { 364 | if value < *monitor.Warning { 365 | isNoViolation[cursorAt] = false 366 | log.Printf("[debug] monitor `%s`, SLO Violation, hostId=`%s`, time=`%s`, value[%f] < warning[%f]", monitor.Name, hostID, cursorAt, value, *monitor.Warning) 367 | continue 368 | } 369 | } 370 | if monitor.Critical != nil { 371 | if value < *monitor.Critical { 372 | isNoViolation[cursorAt] = false 373 | log.Printf("[debug] monitor `%s`, SLO Violation, hostId=`%s`, time=`%s`, value[%f] < critical[%f]", monitor.Name, hostID, cursorAt, value, *monitor.Warning) 374 | continue 375 | } 376 | } 377 | default: 378 | log.Printf("[warn] monitor `%s`, unknown operator `%s`, reliability reassessment based on metric is not enabled.", monitor.Name, monitor.Operator) 379 | return nil, false 380 | } 381 | } 382 | reliabilities, err := isNoViolation.NewReliabilities(timeFrame, startAt, endAt) 383 | if err != nil { 384 | log.Printf("[debug] NewReliabilities failed: %s", err) 385 | log.Printf("[warn] monitor `%s`, reliability reassessment based on metric is not enabled.", monitor.Name) 386 | return nil, false 387 | } 388 | return reliabilities, true 389 | }) 390 | case *mackerel.MonitorServiceMetric: 391 | m = m.WithEvaluator(func(_ string, timeFrame time.Duration, startAt, endAt time.Time) (Reliabilities, bool) { 392 | log.Printf("[debug] try evaluate service metric, service=%s monitor=`%s` time=%s~%s", monitor.Service, monitor.Name, startAt, endAt) 393 | metrics, err := repo.client.FetchServiceMetricValues(monitor.Service, monitor.Metric, startAt.Unix(), endAt.Unix()) 394 | if err != nil { 395 | log.Printf("[debug] FetchServiceMetricValues failed: %s", err) 396 | log.Printf("[warn] monitor `%s`, can not get service metric = `%s`, reliability reassessment based on metric is not enabled.", monitor.Name, monitor.Metric) 397 | return nil, false 398 | } 399 | isNoViolation := make(IsNoViolationCollection, endAt.Sub(startAt)/time.Minute) 400 | for _, metric := range metrics { 401 | cursorAt := time.Unix(metric.Time, 0).UTC() 402 | value, ok := metric.Value.(float64) 403 | if !ok { 404 | continue 405 | } 406 | switch monitor.Operator { 407 | case ">": 408 | if monitor.Warning != nil { 409 | if value > *monitor.Warning { 410 | isNoViolation[cursorAt] = false 411 | log.Printf("[debug] monitor `%s`, SLO Violation, service=`%s`, time=`%s`, value[%f] > warning[%f]", monitor.Name, monitor.Service, cursorAt, value, *monitor.Warning) 412 | continue 413 | } 414 | } 415 | if monitor.Critical != nil { 416 | if value > *monitor.Critical { 417 | isNoViolation[cursorAt] = false 418 | log.Printf("[debug] monitor `%s`, SLO Violation, service=`%s`, time=`%s`, value[%f] > critical[%f]", monitor.Name, monitor.Service, cursorAt, value, *monitor.Critical) 419 | continue 420 | } 421 | } 422 | case "<": 423 | if monitor.Warning != nil { 424 | if value < *monitor.Warning { 425 | isNoViolation[cursorAt] = false 426 | log.Printf("[debug] monitor `%s`, SLO Violation, service=`%s`, time=`%s`, value[%f] < warning[%f]", monitor.Name, monitor.Service, cursorAt, value, *monitor.Warning) 427 | continue 428 | } 429 | } 430 | if monitor.Critical != nil { 431 | if value < *monitor.Critical { 432 | isNoViolation[cursorAt] = false 433 | log.Printf("[debug] monitor `%s`, SLO Violation, service=`%s`, time=`%s`, value[%f] < critical[%f]", monitor.Name, monitor.Service, cursorAt, value, *monitor.Warning) 434 | continue 435 | } 436 | } 437 | default: 438 | log.Printf("[warn] monitor `%s`, unknown operator `%s`, reliability reassessment based on metric is not enabled.", monitor.Name, monitor.Operator) 439 | return nil, false 440 | } 441 | } 442 | reliabilities, err := isNoViolation.NewReliabilities(timeFrame, startAt, endAt) 443 | if err != nil { 444 | log.Printf("[debug] NewReliabilities failed: %s", err) 445 | log.Printf("[warn] monitor `%s`, reliability reassessment based on metric is not enabled.", monitor.Name) 446 | return nil, false 447 | } 448 | return reliabilities, true 449 | }) 450 | } 451 | return m 452 | } 453 | 454 | func (repo *Repository) WithDryRun() *Repository { 455 | return &Repository{ 456 | client: DryRunMackerelClient{ 457 | MackerelClient: repo.client, 458 | }, 459 | monitorByID: repo.monitorByID, 460 | } 461 | } 462 | 463 | type DryRunMackerelClient struct { 464 | MackerelClient 465 | } 466 | 467 | func (c DryRunMackerelClient) PostServiceMetricValues(serviceName string, metricValues []*mackerel.MetricValue) error { 468 | for _, value := range metricValues { 469 | log.Printf("[debug] **DRY RUN** action=PostServiceMetricValue, service=`%s`, metricName=`%s`, time=`%s`, value=`%f` ", serviceName, value.Name, time.Unix(value.Time, 0).UTC(), value.Value) 470 | } 471 | return nil 472 | } 473 | -------------------------------------------------------------------------------- /mackerel_test.go: -------------------------------------------------------------------------------- 1 | package shimesaba_test 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | "time" 7 | 8 | "github.com/mashiike/shimesaba" 9 | "github.com/stretchr/testify/require" 10 | ) 11 | 12 | func TestRepositoryFetchVirtualAlerts(t *testing.T) { 13 | client := newMockMackerelClient(t) 14 | repo := shimesaba.NewRepository(client) 15 | 16 | cases := []struct { 17 | name string 18 | serviceName string 19 | sloID string 20 | startAt time.Time 21 | endAt time.Time 22 | expected shimesaba.Alerts 23 | }{ 24 | { 25 | name: "SLO:*", 26 | serviceName: "shimesaba", 27 | sloID: "hoge", 28 | startAt: time.Date(2021, 10, 1, 0, 5, 0, 0, time.UTC), 29 | endAt: time.Date(2021, 10, 1, 0, 15, 0, 0, time.UTC), 30 | expected: shimesaba.Alerts{ 31 | { 32 | Reason: "SLO:*", 33 | OpenedAt: time.Date(2021, 10, 1, 0, 10, 0, 0, time.UTC), 34 | ClosedAt: ptrTime(time.Date(2021, 10, 1, 0, 15, 0, 0, time.UTC)), 35 | }, 36 | }, 37 | }, 38 | { 39 | name: "SLO:availability,quarity ", 40 | serviceName: "shimesaba", 41 | sloID: "availability", 42 | startAt: time.Date(2021, 10, 1, 0, 5, 0, 0, time.UTC), 43 | endAt: time.Date(2021, 10, 1, 0, 15, 0, 0, time.UTC), 44 | expected: shimesaba.Alerts{ 45 | { 46 | Reason: "SLO:*", 47 | OpenedAt: time.Date(2021, 10, 1, 0, 10, 0, 0, time.UTC), 48 | ClosedAt: ptrTime(time.Date(2021, 10, 1, 0, 15, 0, 0, time.UTC)), 49 | }, 50 | { 51 | Reason: "ALB Failures SLO:availability,quarity affected.", 52 | OpenedAt: time.Date(2021, 10, 1, 0, 10, 0, 0, time.UTC), 53 | ClosedAt: ptrTime(time.Date(2021, 10, 1, 0, 15, 0, 0, time.UTC)), 54 | }, 55 | }, 56 | }, 57 | } 58 | for _, c := range cases { 59 | t.Run(c.name, func(t *testing.T) { 60 | vAlerts, err := repo.FetchVirtualAlerts(context.Background(), c.serviceName, c.sloID, c.startAt, c.endAt) 61 | require.NoError(t, err) 62 | require.EqualValues(t, c.expected, vAlerts) 63 | }) 64 | } 65 | } 66 | 67 | func TestRepositoryFetchAlerts(t *testing.T) { 68 | client := newMockMackerelClient(t) 69 | repo := shimesaba.NewRepository(client) 70 | 71 | cases := []struct { 72 | name string 73 | startAt time.Time 74 | endAt time.Time 75 | expected shimesaba.Alerts 76 | }{ 77 | { 78 | name: "Alerts service", 79 | startAt: time.Date(2021, 10, 1, 0, 5, 0, 0, time.UTC), 80 | endAt: time.Date(2021, 10, 1, 0, 15, 0, 0, time.UTC), 81 | expected: shimesaba.Alerts{ 82 | { 83 | OpenedAt: time.Date(2021, 10, 1, 0, 10, 0, 0, time.UTC), 84 | Monitor: shimesaba.NewMonitor("dummyMonitorID", "Dummy Service Metric Monitor", "service"), 85 | ClosedAt: ptrTime(time.Date(2021, 10, 1, 0, 15, 0, 0, time.UTC)), 86 | }, 87 | }, 88 | }, 89 | { 90 | name: "No alerts", 91 | startAt: time.Date(2022, 10, 1, 0, 5, 0, 0, time.UTC), 92 | endAt: time.Date(2022, 10, 1, 0, 15, 0, 0, time.UTC), 93 | expected: shimesaba.Alerts{}, 94 | }, 95 | { 96 | name: "check monitor", 97 | startAt: time.Date(2021, 10, 1, 0, 17, 0, 0, time.UTC), 98 | endAt: time.Date(2021, 10, 1, 0, 18, 0, 0, time.UTC), 99 | expected: shimesaba.Alerts{ 100 | { 101 | OpenedAt: time.Date(2021, 10, 1, 0, 17, 0, 0, time.UTC), 102 | Monitor: shimesaba.NewMonitor("dummyCheckMonitorID", "check monitor dummyCheckMonitorID", "check"), 103 | ClosedAt: nil, 104 | }, 105 | }, 106 | }, 107 | } 108 | for _, c := range cases { 109 | t.Run(c.name, func(t *testing.T) { 110 | alerts, err := repo.FetchAlerts(context.Background(), c.startAt, c.endAt) 111 | require.NoError(t, err) 112 | for _, a := range alerts { 113 | if a.Monitor != nil { 114 | a.Monitor = a.Monitor.WithEvaluator(nil) 115 | } 116 | } 117 | require.EqualValues(t, c.expected, alerts) 118 | }) 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /mock_test.go: -------------------------------------------------------------------------------- 1 | package shimesaba_test 2 | 3 | import ( 4 | "errors" 5 | "testing" 6 | "time" 7 | 8 | mackerel "github.com/mackerelio/mackerel-client-go" 9 | "github.com/mashiike/shimesaba" 10 | "github.com/stretchr/testify/require" 11 | ) 12 | 13 | type mockMackerelClient struct { 14 | shimesaba.MackerelClient 15 | posted []*mackerel.MetricValue 16 | t *testing.T 17 | } 18 | 19 | func newMockMackerelClient(t *testing.T) *mockMackerelClient { 20 | t.Helper() 21 | return &mockMackerelClient{ 22 | t: t, 23 | } 24 | } 25 | 26 | func (m *mockMackerelClient) GetOrg() (*mackerel.Org, error) { 27 | return &mackerel.Org{ 28 | Name: "dummy", 29 | }, nil 30 | } 31 | 32 | func (m *mockMackerelClient) FindHosts(param *mackerel.FindHostsParam) ([]*mackerel.Host, error) { 33 | require.EqualValues( 34 | m.t, 35 | &mackerel.FindHostsParam{ 36 | Service: "shimesaba", 37 | Name: "dummy-alb", 38 | }, 39 | param, 40 | ) 41 | return []*mackerel.Host{ 42 | { 43 | ID: "dummyHostID", 44 | }, 45 | }, nil 46 | } 47 | 48 | func (m *mockMackerelClient) PostServiceMetricValues(serviceName string, metricValues []*mackerel.MetricValue) error { 49 | require.Equal(m.t, "shimesaba", serviceName) 50 | m.posted = append(m.posted, metricValues...) 51 | return nil 52 | } 53 | 54 | func (m *mockMackerelClient) FindWithClosedAlerts() (*mackerel.AlertsResp, error) { 55 | return &mackerel.AlertsResp{ 56 | Alerts: []*mackerel.Alert{ 57 | { 58 | ID: "dummyID20211001-001900", 59 | Status: "WARNING", 60 | MonitorID: "dummyMonitorID", 61 | OpenedAt: time.Date(2021, 10, 1, 0, 19, 0, 0, time.UTC).Unix(), 62 | Value: 0.01, 63 | Type: "service", 64 | }, 65 | { 66 | ID: "dummyID20211001-00200", 67 | Status: "WARNING", 68 | MonitorID: "dummyCheckMonitorID", 69 | OpenedAt: time.Date(2021, 10, 1, 0, 17, 0, 0, time.UTC).Unix(), 70 | Value: 0.01, 71 | Type: "check", 72 | }, 73 | }, 74 | NextID: "dummyNextID", 75 | }, nil 76 | } 77 | 78 | func (m *mockMackerelClient) FindWithClosedAlertsByNextID(nextID string) (*mackerel.AlertsResp, error) { 79 | require.Equal(m.t, "dummyNextID", nextID) 80 | return &mackerel.AlertsResp{ 81 | Alerts: []*mackerel.Alert{ 82 | { 83 | ID: "dummyID20211001-001000", 84 | Status: "OK", 85 | MonitorID: "dummyMonitorID", 86 | OpenedAt: time.Date(2021, 10, 1, 0, 10, 0, 0, time.UTC).Unix(), 87 | ClosedAt: time.Date(2021, 10, 1, 0, 15, 0, 0, time.UTC).Unix(), 88 | Value: 0.01, 89 | Type: "service", 90 | }, 91 | }, 92 | NextID: "", 93 | }, nil 94 | } 95 | 96 | func (m *mockMackerelClient) GetMonitor(monitorID string) (mackerel.Monitor, error) { 97 | switch monitorID { 98 | case "dummyMonitorID": 99 | return &mackerel.MonitorServiceMetric{ 100 | ID: monitorID, 101 | Name: "Dummy Service Metric Monitor", 102 | Type: "service", 103 | }, nil 104 | case "dummyCheckMonitorID": 105 | return nil, &mackerel.APIError{ 106 | StatusCode: 400, 107 | Message: "Cannot get a check monitor", 108 | } 109 | default: 110 | require.Equal(m.t, "dummyMonitorID", monitorID) 111 | return nil, errors.New("unexpected monitorID") 112 | } 113 | } 114 | 115 | var graphAnnotations = []*mackerel.GraphAnnotation{ 116 | { 117 | ID: "xxxxxxxxxxx", 118 | Title: "hogehogehoge", 119 | Description: "fugafugafuga", 120 | From: time.Date(2021, 10, 1, 0, 10, 0, 0, time.UTC).Unix(), 121 | To: time.Date(2021, 10, 1, 0, 15, 0, 0, time.UTC).Unix(), 122 | }, 123 | { 124 | ID: "yyyyyyyyyyy", 125 | Title: "hogehogehoge", 126 | Description: "SLO:*", 127 | From: time.Date(2021, 10, 1, 0, 10, 0, 0, time.UTC).Unix(), 128 | To: time.Date(2021, 10, 1, 0, 15, 0, 0, time.UTC).Unix(), 129 | }, 130 | { 131 | ID: "zzzzzzzzzzz", 132 | Title: "hogehogehoge", 133 | Description: "ALB Failures SLO:availability,quarity affected.", 134 | From: time.Date(2021, 10, 1, 0, 10, 0, 0, time.UTC).Unix(), 135 | To: time.Date(2021, 10, 1, 0, 15, 0, 0, time.UTC).Unix(), 136 | }, 137 | } 138 | 139 | func (m *mockMackerelClient) FindGraphAnnotations(service string, from int64, to int64) ([]*mackerel.GraphAnnotation, error) { 140 | require.Equal(m.t, "shimesaba", service) 141 | 142 | return graphAnnotations, nil 143 | } 144 | -------------------------------------------------------------------------------- /monitor.go: -------------------------------------------------------------------------------- 1 | package shimesaba 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | ) 7 | 8 | type Monitor struct { 9 | id string 10 | name string 11 | monitorType string 12 | evaluator func(hostID string, timeFrame time.Duration, startAt, endAt time.Time) (Reliabilities, bool) 13 | } 14 | 15 | func NewMonitor(id, name, monitorType string) *Monitor { 16 | return &Monitor{ 17 | id: id, 18 | name: name, 19 | monitorType: monitorType, 20 | } 21 | } 22 | 23 | func (m *Monitor) WithEvaluator(evaluator func(hostID string, timeFrame time.Duration, startAt, endAt time.Time) (Reliabilities, bool)) *Monitor { 24 | return &Monitor{ 25 | id: m.id, 26 | name: m.name, 27 | monitorType: m.monitorType, 28 | evaluator: evaluator, 29 | } 30 | } 31 | 32 | func (m *Monitor) ID() string { 33 | return m.id 34 | } 35 | 36 | func (m *Monitor) Name() string { 37 | return m.name 38 | } 39 | 40 | func (m *Monitor) Type() string { 41 | return m.monitorType 42 | } 43 | 44 | func (m *Monitor) String() string { 45 | return fmt.Sprintf("[%s]%s", m.monitorType, m.name) 46 | } 47 | 48 | func (m *Monitor) EvaluateReliabilities(hostID string, timeFrame time.Duration, startAt, endAt time.Time) (Reliabilities, bool) { 49 | if m.evaluator == nil { 50 | return nil, false 51 | } 52 | return m.evaluator(hostID, timeFrame, startAt, endAt) 53 | } 54 | -------------------------------------------------------------------------------- /reliability.go: -------------------------------------------------------------------------------- 1 | package shimesaba 2 | 3 | import ( 4 | "errors" 5 | "log" 6 | "sort" 7 | "time" 8 | 9 | "github.com/mashiike/shimesaba/internal/timeutils" 10 | ) 11 | 12 | // Reliability represents a group of values related to reliability per tumbling window. 13 | type Reliability struct { 14 | cursorAt time.Time 15 | timeFrame time.Duration 16 | isNoViolation IsNoViolationCollection 17 | upTime time.Duration 18 | failureTime time.Duration 19 | } 20 | 21 | type IsNoViolationCollection map[time.Time]bool 22 | 23 | func (c IsNoViolationCollection) IsUp(t time.Time) bool { 24 | if isUp, ok := c[t]; ok && !isUp { 25 | return false 26 | } 27 | return true 28 | } 29 | 30 | func (c IsNoViolationCollection) NewReliabilities(timeFrame time.Duration, startAt, endAt time.Time) (Reliabilities, error) { 31 | startAt = startAt.Truncate(timeFrame) 32 | iter := timeutils.NewIterator(startAt, endAt, timeFrame) 33 | reliabilitySlice := make([]*Reliability, 0) 34 | for iter.HasNext() { 35 | cursorAt, _ := iter.Next() 36 | reliabilitySlice = append(reliabilitySlice, NewReliability(cursorAt, timeFrame, c)) 37 | } 38 | return NewReliabilities(reliabilitySlice) 39 | } 40 | 41 | func NewReliability(cursorAt time.Time, timeFrame time.Duration, isNoViolation IsNoViolationCollection) *Reliability { 42 | cursorAt = cursorAt.Truncate(timeFrame).Add(timeFrame).UTC() 43 | r := &Reliability{ 44 | cursorAt: cursorAt, 45 | timeFrame: timeFrame, 46 | isNoViolation: isNoViolation, 47 | } 48 | r = r.Clone() 49 | r.calc() 50 | return r 51 | } 52 | 53 | func (r *Reliability) Clone() *Reliability { 54 | cloned := &Reliability{ 55 | cursorAt: r.cursorAt, 56 | timeFrame: r.timeFrame, 57 | upTime: r.upTime, 58 | failureTime: r.failureTime, 59 | } 60 | iter := timeutils.NewIterator(r.TimeFrameStartAt(), r.TimeFrameEndAt(), time.Minute) 61 | clonedIsNoViolation := make(IsNoViolationCollection, r.timeFrame/time.Minute) 62 | for iter.HasNext() { 63 | t, _ := iter.Next() 64 | clonedIsNoViolation[t] = r.isNoViolation.IsUp(t) 65 | } 66 | cloned.isNoViolation = clonedIsNoViolation 67 | return cloned 68 | } 69 | 70 | func (r *Reliability) calc() { 71 | iter := timeutils.NewIterator(r.TimeFrameStartAt(), r.TimeFrameEndAt(), time.Minute) 72 | var upTime, failureTime time.Duration 73 | for iter.HasNext() { 74 | t, _ := iter.Next() 75 | if r.isNoViolation.IsUp(t) { 76 | upTime += time.Minute 77 | } else { 78 | failureTime += time.Minute 79 | } 80 | } 81 | r.upTime = upTime 82 | r.failureTime = failureTime 83 | } 84 | 85 | //CursorAt is a representative value of the time shown by the tumbling window 86 | func (r *Reliability) CursorAt() time.Time { 87 | return r.cursorAt 88 | } 89 | 90 | //TimeFrame is the size of the tumbling window 91 | func (r *Reliability) TimeFrame() time.Duration { 92 | return r.timeFrame 93 | } 94 | 95 | //TimeFrameStartAt is the start time of the tumbling window 96 | func (r *Reliability) TimeFrameStartAt() time.Time { 97 | return r.cursorAt.Add(-r.timeFrame) 98 | } 99 | 100 | //TimeFrameEndAt is the end time of the tumbling window 101 | func (r *Reliability) TimeFrameEndAt() time.Time { 102 | return r.cursorAt.Add(-time.Nanosecond) 103 | } 104 | 105 | //UpTime is the uptime that can guarantee reliability. 106 | func (r *Reliability) UpTime() time.Duration { 107 | return r.upTime 108 | } 109 | 110 | //FailureTime is the time when reliability could not be ensured, i.e. SLO was violated 111 | func (r *Reliability) FailureTime() time.Duration { 112 | return r.failureTime 113 | } 114 | 115 | //Merge must be the same tumbling window 116 | func (r *Reliability) Merge(other *Reliability) (*Reliability, error) { 117 | if r.cursorAt != other.cursorAt { 118 | return r, errors.New("mismatch cursorAt") 119 | } 120 | if r.timeFrame != other.timeFrame { 121 | return r, errors.New("mismatch timeFrame") 122 | } 123 | cloned := r.Clone() 124 | for t, isUp2 := range other.isNoViolation { 125 | cloned.isNoViolation[t] = r.isNoViolation.IsUp(t) && isUp2 126 | } 127 | cloned.calc() 128 | return cloned, nil 129 | } 130 | 131 | // Reliabilities is sortable 132 | type Reliabilities []*Reliability 133 | 134 | func NewReliabilities(s []*Reliability) (Reliabilities, error) { 135 | c := Reliabilities(s) 136 | sort.Sort(c) 137 | if c.Len() == 0 { 138 | return c, nil 139 | } 140 | timeFrame := c[0].TimeFrame() 141 | cursorAt := time.Unix(0, 0) 142 | for _, r := range c { 143 | if r.CursorAt() == cursorAt { 144 | return nil, errors.New("duplicate cursorAt") 145 | } 146 | cursorAt = r.CursorAt() 147 | if r.TimeFrame() != timeFrame { 148 | return nil, errors.New("multiple timeFrame") 149 | } 150 | } 151 | return c, nil 152 | } 153 | 154 | func (c Reliabilities) Len() int { return len(c) } 155 | func (c Reliabilities) Less(i, j int) bool { return c[i].CursorAt().After(c[j].CursorAt()) } 156 | func (c Reliabilities) Swap(i, j int) { c[i], c[j] = c[j], c[i] } 157 | 158 | func (c Reliabilities) Clone() Reliabilities { 159 | cloned := make(Reliabilities, 0, len(c)) 160 | for _, r := range c { 161 | cloned = append(cloned, r.Clone()) 162 | } 163 | sort.Sort(cloned) 164 | return cloned 165 | } 166 | 167 | func (c Reliabilities) CalcTime(cursor, n int) (upTime, failureTime, deltaFailureTime time.Duration) { 168 | deltaFailureTime = c[cursor].FailureTime() 169 | i := cursor 170 | for ; i < cursor+n && i < c.Len(); i++ { 171 | upTime += c[i].UpTime() 172 | failureTime += c[i].FailureTime() 173 | } 174 | log.Printf("[debug] CalcTime[%s~%s] = (%s, %s, %s)", 175 | c[cursor].TimeFrameStartAt(), 176 | c[i-1].TimeFrameEndAt(), 177 | upTime, 178 | failureTime, 179 | deltaFailureTime, 180 | ) 181 | return 182 | } 183 | 184 | //TimeFrame is the size of the tumbling window 185 | func (c Reliabilities) TimeFrame() time.Duration { 186 | if c.Len() == 0 { 187 | return 0 188 | } 189 | return c[0].TimeFrame() 190 | } 191 | 192 | //CursorAt is a representative value of the time shown by the tumbling window 193 | func (c Reliabilities) CursorAt(i int) time.Time { 194 | if c.Len() == 0 { 195 | return time.Unix(0, 0) 196 | } 197 | return c[i].cursorAt 198 | } 199 | 200 | //Merge two collection 201 | func (c Reliabilities) Merge(other Reliabilities) (Reliabilities, error) { 202 | return c.MergeInRange(other, time.Unix(0, 0).UTC(), time.Date(9999, 12, 31, 23, 59, 59, 0, time.UTC)) 203 | } 204 | 205 | func (c Reliabilities) MergeInRange(other Reliabilities, startAt, endAt time.Time) (Reliabilities, error) { 206 | if len(other) == 0 { 207 | return c.Clone(), nil 208 | } 209 | if len(c) == 0 { 210 | return other.Clone(), nil 211 | } 212 | reliabilityByCursorAt := make(map[time.Time]*Reliability, len(c)) 213 | for _, r := range c { 214 | if r.TimeFrameStartAt().Before(startAt) { 215 | continue 216 | } 217 | if r.TimeFrameStartAt().After(endAt) { 218 | continue 219 | } 220 | reliabilityByCursorAt[r.CursorAt()] = r.Clone() 221 | } 222 | for _, r := range other { 223 | if r.TimeFrameStartAt().Before(startAt) { 224 | continue 225 | } 226 | if r.TimeFrameStartAt().After(endAt) { 227 | continue 228 | } 229 | cursorAt := r.CursorAt() 230 | if base, ok := reliabilityByCursorAt[cursorAt]; ok { 231 | var err error 232 | reliabilityByCursorAt[cursorAt], err = base.Merge(r) 233 | if err != nil { 234 | return nil, err 235 | } 236 | } else { 237 | reliabilityByCursorAt[cursorAt] = r 238 | } 239 | } 240 | merged := make([]*Reliability, 0, len(reliabilityByCursorAt)) 241 | for _, r := range reliabilityByCursorAt { 242 | merged = append(merged, r) 243 | } 244 | return NewReliabilities(merged) 245 | } 246 | -------------------------------------------------------------------------------- /reliability_test.go: -------------------------------------------------------------------------------- 1 | package shimesaba_test 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/mashiike/shimesaba" 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestReliability(t *testing.T) { 12 | 13 | r := shimesaba.NewReliability( 14 | time.Date(2022, 1, 6, 9, 39, 0, 0, time.UTC), 15 | time.Hour, 16 | map[time.Time]bool{ 17 | 18 | time.Date(2022, 1, 6, 8, 28, 0, 0, time.UTC): true, 19 | time.Date(2022, 1, 6, 8, 29, 0, 0, time.UTC): false, 20 | time.Date(2022, 1, 6, 8, 30, 0, 0, time.UTC): true, 21 | 22 | time.Date(2022, 1, 6, 9, 28, 0, 0, time.UTC): true, 23 | time.Date(2022, 1, 6, 9, 29, 0, 0, time.UTC): false, 24 | time.Date(2022, 1, 6, 9, 30, 0, 0, time.UTC): true, 25 | 26 | time.Date(2022, 1, 6, 9, 38, 0, 0, time.UTC): true, 27 | time.Date(2022, 1, 6, 9, 39, 0, 0, time.UTC): false, 28 | time.Date(2022, 1, 6, 9, 40, 0, 0, time.UTC): true, 29 | 30 | time.Date(2022, 1, 6, 10, 38, 0, 0, time.UTC): true, 31 | time.Date(2022, 1, 6, 10, 39, 0, 0, time.UTC): false, 32 | time.Date(2022, 1, 6, 10, 40, 0, 0, time.UTC): true, 33 | }, 34 | ) 35 | require.EqualValues(t, time.Date(2022, 1, 6, 10, 0, 0, 0, time.UTC), r.CursorAt(), "cursorAt 2022-1-6 10:00") 36 | require.EqualValues(t, time.Date(2022, 1, 6, 9, 0, 0, 0, time.UTC), r.TimeFrameStartAt(), "timeFrameStartAt 2022-1-6 9:00") 37 | require.EqualValues(t, time.Date(2022, 1, 6, 9, 59, 59, 999999999, time.UTC), r.TimeFrameEndAt(), "timeFrameEndAt 2022-1-6 9:59:59.999999999") 38 | require.EqualValues(t, 58*time.Minute, r.UpTime(), "upTime 58m") 39 | require.EqualValues(t, 2*time.Minute, r.FailureTime(), "failureTime 2m") 40 | require.True(t, r.UpTime()+r.FailureTime() == r.TimeFrame(), "upTime + failureTime = timeFrame") 41 | } 42 | 43 | func TestReliabilityMerge(t *testing.T) { 44 | r := shimesaba.NewReliability( 45 | time.Date(2022, 1, 6, 9, 39, 0, 0, time.UTC), 46 | time.Hour, 47 | map[time.Time]bool{ 48 | time.Date(2022, 1, 6, 9, 38, 0, 0, time.UTC): true, 49 | time.Date(2022, 1, 6, 9, 39, 0, 0, time.UTC): false, 50 | time.Date(2022, 1, 6, 9, 40, 0, 0, time.UTC): false, 51 | time.Date(2022, 1, 6, 9, 41, 0, 0, time.UTC): true, 52 | }, 53 | ) 54 | other := shimesaba.NewReliability( 55 | time.Date(2022, 1, 6, 9, 39, 0, 0, time.UTC), 56 | time.Hour, 57 | map[time.Time]bool{ 58 | time.Date(2022, 1, 6, 9, 37, 0, 0, time.UTC): true, 59 | time.Date(2022, 1, 6, 9, 38, 0, 0, time.UTC): false, 60 | time.Date(2022, 1, 6, 9, 39, 0, 0, time.UTC): false, 61 | time.Date(2022, 1, 6, 9, 40, 0, 0, time.UTC): true, 62 | }, 63 | ) 64 | actual, err := r.Merge(other) 65 | require.NoError(t, err) 66 | require.EqualValues(t, time.Date(2022, 1, 6, 10, 0, 0, 0, time.UTC), actual.CursorAt(), "cursorAt 2022-1-6 10:00") 67 | require.EqualValues(t, time.Date(2022, 1, 6, 9, 0, 0, 0, time.UTC), actual.TimeFrameStartAt(), "timeFrameStartAt 2022-1-6 9:00") 68 | require.EqualValues(t, time.Date(2022, 1, 6, 9, 59, 59, 999999999, time.UTC), actual.TimeFrameEndAt(), "timeFrameEndAt 2022-1-6 9:59:59.999999999") 69 | require.EqualValues(t, 57*time.Minute, actual.UpTime(), "upTime 57m") 70 | require.EqualValues(t, 3*time.Minute, actual.FailureTime(), "failureTime 3m") 71 | require.True(t, actual.UpTime()+actual.FailureTime() == actual.TimeFrame(), "upTime + failureTime = timeFrame") 72 | } 73 | 74 | func TestReliabilities(t *testing.T) { 75 | allTimeIsNoViolation := map[time.Time]bool{ 76 | 77 | time.Date(2022, 1, 6, 8, 28, 0, 0, time.UTC): true, 78 | time.Date(2022, 1, 6, 8, 29, 0, 0, time.UTC): false, 79 | time.Date(2022, 1, 6, 8, 30, 0, 0, time.UTC): true, 80 | 81 | time.Date(2022, 1, 6, 9, 28, 0, 0, time.UTC): true, 82 | time.Date(2022, 1, 6, 9, 29, 0, 0, time.UTC): false, 83 | time.Date(2022, 1, 6, 9, 30, 0, 0, time.UTC): true, 84 | 85 | time.Date(2022, 1, 6, 9, 38, 0, 0, time.UTC): true, 86 | time.Date(2022, 1, 6, 9, 39, 0, 0, time.UTC): false, 87 | time.Date(2022, 1, 6, 9, 40, 0, 0, time.UTC): true, 88 | 89 | time.Date(2022, 1, 6, 10, 38, 0, 0, time.UTC): false, 90 | time.Date(2022, 1, 6, 10, 39, 0, 0, time.UTC): false, 91 | time.Date(2022, 1, 6, 10, 40, 0, 0, time.UTC): false, 92 | } 93 | tumblingWindowTimeFrame := time.Hour 94 | c, err := shimesaba.NewReliabilities( 95 | []*shimesaba.Reliability{ 96 | shimesaba.NewReliability( 97 | time.Date(2022, 1, 6, 9, 0, 0, 0, time.UTC), 98 | tumblingWindowTimeFrame, 99 | allTimeIsNoViolation, 100 | ), 101 | shimesaba.NewReliability( 102 | time.Date(2022, 1, 6, 8, 0, 0, 0, time.UTC), 103 | tumblingWindowTimeFrame, 104 | allTimeIsNoViolation, 105 | ), 106 | shimesaba.NewReliability( 107 | time.Date(2022, 1, 6, 10, 0, 0, 0, time.UTC), 108 | tumblingWindowTimeFrame, 109 | allTimeIsNoViolation, 110 | ), 111 | }, 112 | ) 113 | require.NoError(t, err) 114 | require.True(t, c[0].CursorAt().UnixNano() > c[1].CursorAt().UnixNano(), "is desc? c[0].CursorAt > c[1].CursorAt") 115 | require.True(t, c[1].CursorAt().UnixNano() > c[2].CursorAt().UnixNano(), "is desc? c[1].CursorAt > c[2].CursorAt") 116 | 117 | upTime, failureTime, deltaFailureTime := c.CalcTime(0, 2) 118 | require.EqualValues(t, time.Date(2022, 1, 6, 11, 0, 0, 0, time.UTC), c.CursorAt(0), "1st CursorAt") 119 | require.EqualValues(t, (57+58)*time.Minute, upTime, "1st upTime") 120 | require.EqualValues(t, (3+2)*time.Minute, failureTime, "1st failureTime") 121 | require.EqualValues(t, 3*time.Minute, deltaFailureTime, "1st deltaFailureTime") 122 | upTime, failureTime, deltaFailureTime = c.CalcTime(1, 2) 123 | require.EqualValues(t, time.Date(2022, 1, 6, 10, 0, 0, 0, time.UTC), c.CursorAt(1), "2nd CursorAt") 124 | require.EqualValues(t, (58+59)*time.Minute, upTime, "2nd upTime") 125 | require.EqualValues(t, (2+1)*time.Minute, failureTime, "2nd failureTime") 126 | require.EqualValues(t, 2*time.Minute, deltaFailureTime, "2nd deltaFailureTime") 127 | } 128 | 129 | func TestReliabilitiesMerge(t *testing.T) { 130 | 131 | tumblingWindowTimeFrame := time.Hour 132 | baseAllTimeIsNoViolation := map[time.Time]bool{ 133 | 134 | time.Date(2022, 1, 6, 8, 28, 0, 0, time.UTC): true, 135 | time.Date(2022, 1, 6, 8, 29, 0, 0, time.UTC): false, 136 | time.Date(2022, 1, 6, 8, 30, 0, 0, time.UTC): true, 137 | 138 | time.Date(2022, 1, 6, 9, 28, 0, 0, time.UTC): true, 139 | time.Date(2022, 1, 6, 9, 29, 0, 0, time.UTC): false, 140 | time.Date(2022, 1, 6, 9, 30, 0, 0, time.UTC): true, 141 | 142 | time.Date(2022, 1, 6, 9, 38, 0, 0, time.UTC): true, 143 | time.Date(2022, 1, 6, 9, 39, 0, 0, time.UTC): false, 144 | time.Date(2022, 1, 6, 9, 40, 0, 0, time.UTC): true, 145 | 146 | time.Date(2022, 1, 6, 10, 38, 0, 0, time.UTC): false, 147 | time.Date(2022, 1, 6, 10, 39, 0, 0, time.UTC): false, 148 | time.Date(2022, 1, 6, 10, 40, 0, 0, time.UTC): false, 149 | } 150 | base, err := shimesaba.NewReliabilities( 151 | []*shimesaba.Reliability{ 152 | shimesaba.NewReliability( 153 | time.Date(2022, 1, 6, 9, 0, 0, 0, time.UTC), 154 | tumblingWindowTimeFrame, 155 | baseAllTimeIsNoViolation, 156 | ), 157 | shimesaba.NewReliability( 158 | time.Date(2022, 1, 6, 8, 0, 0, 0, time.UTC), 159 | tumblingWindowTimeFrame, 160 | baseAllTimeIsNoViolation, 161 | ), 162 | shimesaba.NewReliability( 163 | time.Date(2022, 1, 6, 10, 0, 0, 0, time.UTC), 164 | tumblingWindowTimeFrame, 165 | baseAllTimeIsNoViolation, 166 | ), 167 | }, 168 | ) 169 | require.NoError(t, err) 170 | otherAllTimeIsNoViolation := map[time.Time]bool{ 171 | 172 | time.Date(2022, 1, 6, 7, 1, 0, 0, time.UTC): true, 173 | time.Date(2022, 1, 6, 7, 2, 0, 0, time.UTC): false, 174 | time.Date(2022, 1, 6, 7, 3, 0, 0, time.UTC): true, 175 | 176 | time.Date(2022, 1, 6, 8, 1, 0, 0, time.UTC): true, 177 | time.Date(2022, 1, 6, 8, 2, 0, 0, time.UTC): false, 178 | time.Date(2022, 1, 6, 8, 3, 0, 0, time.UTC): true, 179 | 180 | time.Date(2022, 1, 6, 9, 1, 0, 0, time.UTC): true, 181 | time.Date(2022, 1, 6, 9, 2, 0, 0, time.UTC): false, 182 | time.Date(2022, 1, 6, 9, 3, 0, 0, time.UTC): true, 183 | 184 | time.Date(2022, 1, 6, 10, 1, 0, 0, time.UTC): false, 185 | time.Date(2022, 1, 6, 10, 2, 0, 0, time.UTC): false, 186 | time.Date(2022, 1, 6, 10, 3, 0, 0, time.UTC): false, 187 | } 188 | other, err := shimesaba.NewReliabilities( 189 | []*shimesaba.Reliability{ 190 | shimesaba.NewReliability( 191 | time.Date(2022, 1, 6, 7, 0, 0, 0, time.UTC), 192 | tumblingWindowTimeFrame, 193 | otherAllTimeIsNoViolation, 194 | ), 195 | shimesaba.NewReliability( 196 | time.Date(2022, 1, 6, 9, 0, 0, 0, time.UTC), 197 | tumblingWindowTimeFrame, 198 | otherAllTimeIsNoViolation, 199 | ), 200 | shimesaba.NewReliability( 201 | time.Date(2022, 1, 6, 8, 0, 0, 0, time.UTC), 202 | tumblingWindowTimeFrame, 203 | otherAllTimeIsNoViolation, 204 | ), 205 | shimesaba.NewReliability( 206 | time.Date(2022, 1, 6, 10, 0, 0, 0, time.UTC), 207 | tumblingWindowTimeFrame, 208 | otherAllTimeIsNoViolation, 209 | ), 210 | }, 211 | ) 212 | require.NoError(t, err) 213 | actual, err := base.Merge(other) 214 | require.NoError(t, err) 215 | require.Equal(t, 4, len(actual), "merged length 4") 216 | 217 | require.True(t, actual[0].CursorAt().UnixNano() > actual[1].CursorAt().UnixNano(), "is desc? c[0].CursorAt > c[1].CursorAt") 218 | require.True(t, actual[1].CursorAt().UnixNano() > actual[2].CursorAt().UnixNano(), "is desc? c[1].CursorAt > c[2].CursorAt") 219 | require.True(t, actual[2].CursorAt().UnixNano() > actual[3].CursorAt().UnixNano(), "is desc? c[2].CursorAt > c[3].CursorAt") 220 | 221 | upTime, failureTime, deltaFailureTime := actual.CalcTime(0, 3) 222 | require.EqualValues(t, time.Date(2022, 1, 6, 11, 0, 0, 0, time.UTC), actual.CursorAt(0), "1st CursorAt") 223 | require.EqualValues(t, (54+57+58)*time.Minute, upTime, "1st upTime") 224 | require.EqualValues(t, (6+3+2)*time.Minute, failureTime, "1st failureTime") 225 | require.EqualValues(t, 6*time.Minute, deltaFailureTime, "1st deltaFailureTime") 226 | 227 | upTime, failureTime, deltaFailureTime = actual.CalcTime(1, 3) 228 | require.EqualValues(t, time.Date(2022, 1, 6, 10, 0, 0, 0, time.UTC), actual.CursorAt(1), "2nd CursorAt") 229 | require.EqualValues(t, (57+58+59)*time.Minute, upTime, "2nd upTime") 230 | require.EqualValues(t, (3+2+1)*time.Minute, failureTime, "2nd failureTime") 231 | require.EqualValues(t, 3*time.Minute, deltaFailureTime, "2nd deltaFailureTime") 232 | 233 | } 234 | -------------------------------------------------------------------------------- /report.go: -------------------------------------------------------------------------------- 1 | package shimesaba 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "time" 7 | ) 8 | 9 | // Report has SLI/SLO/ErrorBudget numbers in one rolling window 10 | type Report struct { 11 | DefinitionID string 12 | Destination *Destination 13 | DataPoint time.Time 14 | TimeFrameStartAt time.Time 15 | TimeFrameEndAt time.Time 16 | UpTime time.Duration 17 | FailureTime time.Duration 18 | ErrorBudgetSize time.Duration 19 | ErrorBudget time.Duration 20 | ErrorBudgetConsumption time.Duration 21 | } 22 | 23 | func NewReport(definitionID string, destination *Destination, cursorAt time.Time, timeFrame time.Duration, errorBudgetSize float64) *Report { 24 | report := &Report{ 25 | DefinitionID: definitionID, 26 | Destination: destination, 27 | DataPoint: cursorAt, 28 | TimeFrameStartAt: cursorAt.Add(-timeFrame), 29 | TimeFrameEndAt: cursorAt.Add(-time.Nanosecond), 30 | ErrorBudgetSize: time.Duration(errorBudgetSize * float64(timeFrame)).Truncate(time.Minute), 31 | } 32 | return report 33 | } 34 | 35 | func NewReports(definitionID string, destination *Destination, errorBudgetSize float64, timeFrame time.Duration, reliability Reliabilities) []*Report { 36 | if reliability.Len() == 0 { 37 | return make([]*Report, 0) 38 | } 39 | n := int(timeFrame / reliability.TimeFrame()) 40 | numReports := reliability.Len() - n + 1 41 | reports := make([]*Report, 0, numReports) 42 | 43 | for i := 0; i < numReports; i++ { 44 | report := NewReport( 45 | definitionID, 46 | destination, 47 | reliability.CursorAt(i), 48 | timeFrame, 49 | errorBudgetSize, 50 | ) 51 | report.SetTime(reliability.CalcTime(i, n)) 52 | reports = append(reports, report) 53 | } 54 | 55 | return reports 56 | } 57 | 58 | func (r *Report) SetTime(upTime time.Duration, failureTime time.Duration, deltaFailureTime time.Duration) { 59 | r.UpTime = upTime 60 | r.FailureTime = failureTime 61 | r.ErrorBudget = (r.ErrorBudgetSize - failureTime).Truncate(time.Minute) 62 | r.ErrorBudgetConsumption = deltaFailureTime.Truncate(time.Minute) 63 | } 64 | 65 | // String implements fmt.Stringer 66 | func (r *Report) String() string { 67 | return fmt.Sprintf( 68 | "error budget report[id=`%s`,data_point=`%s`]: size=%0.4f[min], remaining=%0.4f[min](%0.1f%%), consumption=%0.4f[min](%0.1f%%)", 69 | r.DefinitionID, r.DataPoint.Format(time.RFC3339), 70 | r.ErrorBudgetSize.Minutes(), 71 | r.ErrorBudget.Minutes(), r.ErrorBudgetUsageRate()*100.0, 72 | r.ErrorBudgetConsumption.Minutes(), r.ErrorBudgetConsumptionRate()*100.0, 73 | ) 74 | } 75 | 76 | // ErrorBudgetUsageRate returns (1.0 - ErrorBudget/ErrorBudgetSize) 77 | func (r *Report) ErrorBudgetUsageRate() float64 { 78 | if r.ErrorBudget >= 0 { 79 | return 1.0 - float64(r.ErrorBudget)/float64(r.ErrorBudgetSize) 80 | } 81 | return -float64(r.ErrorBudget-r.ErrorBudgetSize) / float64(r.ErrorBudgetSize) 82 | } 83 | 84 | // ErrorBudgetConsumptionRate returns ErrorBudgetConsumption/ErrorBudgetSize 85 | func (r *Report) ErrorBudgetConsumptionRate() float64 { 86 | return float64(r.ErrorBudgetConsumption) / float64(r.ErrorBudgetSize) 87 | } 88 | 89 | // MarshalJSON implements json.Marshaler 90 | func (r *Report) MarshalJSON() ([]byte, error) { 91 | d := struct { 92 | DefinitionID string `json:"definition_id" yaml:"definition_id"` 93 | DataPoint time.Time `json:"data_point" yaml:"data_point"` 94 | TimeFrameStartAt time.Time `json:"time_frame_start_at" yaml:"time_frame_start_at"` 95 | TimeFrameEndAt time.Time `json:"time_frame_end_at" yaml:"time_frame_end_at"` 96 | UpTime float64 `json:"up_time" yaml:"up_time"` 97 | FailureTime float64 `json:"failure_time" yaml:"failure_time"` 98 | ErrorBudgetSize float64 `json:"error_budget_size" yaml:"error_budget_size"` 99 | ErrorBudget float64 `json:"error_budget" yaml:"error_budget"` 100 | ErrorBudgetUsageRate float64 `json:"error_budget_usage_rate" yaml:"error_budget_usage_rate"` 101 | ErrorBudgetConsumption float64 `json:"error_budget_consumption" yaml:"error_budget_consumption"` 102 | ErrorBudgetConsumptionRate float64 `json:"error_budget_consumption_rate" yaml:"error_budget_consumption_rate"` 103 | }{ 104 | DefinitionID: r.DefinitionID, 105 | DataPoint: r.DataPoint, 106 | TimeFrameStartAt: r.TimeFrameStartAt, 107 | TimeFrameEndAt: r.TimeFrameEndAt, 108 | UpTime: r.UpTime.Minutes(), 109 | FailureTime: r.FailureTime.Minutes(), 110 | ErrorBudgetSize: r.ErrorBudgetSize.Minutes(), 111 | ErrorBudget: r.ErrorBudget.Minutes(), 112 | ErrorBudgetUsageRate: r.ErrorBudgetUsageRate(), 113 | ErrorBudgetConsumption: r.ErrorBudgetConsumption.Minutes(), 114 | ErrorBudgetConsumptionRate: r.ErrorBudgetConsumptionRate(), 115 | } 116 | return json.Marshal(d) 117 | } 118 | 119 | func (r *Report) GetDestinationMetricValue(metricType DestinationMetricType) float64 { 120 | switch metricType { 121 | case ErrorBudget: 122 | return r.ErrorBudget.Minutes() 123 | case ErrorBudgetRemainingPercentage: 124 | return (1.0 - r.ErrorBudgetUsageRate()) * 100.0 125 | case ErrorBudgetPercentage: 126 | return r.ErrorBudgetUsageRate() * 100.0 127 | case ErrorBudgetConsumption: 128 | return r.ErrorBudgetConsumption.Minutes() 129 | case ErrorBudgetConsumptionPercentage: 130 | return r.ErrorBudgetConsumptionRate() * 100.0 131 | case UpTime: 132 | return r.UpTime.Minutes() 133 | case FailureTime: 134 | return r.FailureTime.Minutes() 135 | } 136 | panic(fmt.Sprintf("unknown metric type %v", metricType)) 137 | } 138 | -------------------------------------------------------------------------------- /report_test.go: -------------------------------------------------------------------------------- 1 | package shimesaba_test 2 | 3 | import ( 4 | "encoding/json" 5 | "testing" 6 | "time" 7 | 8 | "github.com/mashiike/shimesaba" 9 | "github.com/stretchr/testify/require" 10 | ) 11 | 12 | func TestReport(t *testing.T) { 13 | cases := []struct { 14 | casename string 15 | report *shimesaba.Report 16 | expectedErrorBudgetUsageRate float64 17 | expectedErrorBudgetConsumptionRate float64 18 | }{ 19 | { 20 | casename: "size=100min,budget=99min", 21 | report: &shimesaba.Report{ 22 | ErrorBudgetSize: 100 * time.Minute, 23 | ErrorBudget: 99 * time.Minute, 24 | ErrorBudgetConsumption: time.Minute, 25 | }, 26 | expectedErrorBudgetUsageRate: 0.01, 27 | expectedErrorBudgetConsumptionRate: 0.01, 28 | }, 29 | { 30 | casename: "size=100min,budget=-3min", 31 | report: &shimesaba.Report{ 32 | ErrorBudgetSize: 100 * time.Minute, 33 | ErrorBudget: -3 * time.Minute, 34 | ErrorBudgetConsumption: 99 * time.Minute, 35 | }, 36 | expectedErrorBudgetUsageRate: 1.03, 37 | expectedErrorBudgetConsumptionRate: 0.99, 38 | }, 39 | } 40 | epsilon := 0.00001 41 | for _, c := range cases { 42 | t.Run(c.casename, func(t *testing.T) { 43 | usageRate := c.report.ErrorBudgetUsageRate() 44 | t.Log(usageRate) 45 | require.InEpsilon( 46 | t, 47 | c.expectedErrorBudgetUsageRate, 48 | usageRate, 49 | epsilon, 50 | "usage rate", 51 | ) 52 | consumptionRate := c.report.ErrorBudgetConsumptionRate() 53 | t.Log(consumptionRate) 54 | require.InEpsilon( 55 | t, 56 | c.expectedErrorBudgetConsumptionRate, 57 | consumptionRate, 58 | epsilon, 59 | "consumption rate", 60 | ) 61 | }) 62 | } 63 | } 64 | 65 | func TestNewReports(t *testing.T) { 66 | dest := &shimesaba.Destination{ 67 | ServiceName: "test", 68 | MetricPrefix: "test", 69 | } 70 | allTimeIsNoViolation := map[time.Time]bool{ 71 | 72 | time.Date(2022, 1, 6, 8, 28, 0, 0, time.UTC): true, 73 | time.Date(2022, 1, 6, 8, 29, 0, 0, time.UTC): false, 74 | time.Date(2022, 1, 6, 8, 30, 0, 0, time.UTC): true, 75 | 76 | time.Date(2022, 1, 6, 9, 28, 0, 0, time.UTC): true, 77 | time.Date(2022, 1, 6, 9, 29, 0, 0, time.UTC): false, 78 | time.Date(2022, 1, 6, 9, 30, 0, 0, time.UTC): true, 79 | 80 | time.Date(2022, 1, 6, 9, 38, 0, 0, time.UTC): true, 81 | time.Date(2022, 1, 6, 9, 39, 0, 0, time.UTC): false, 82 | time.Date(2022, 1, 6, 9, 40, 0, 0, time.UTC): true, 83 | 84 | time.Date(2022, 1, 6, 10, 38, 0, 0, time.UTC): false, 85 | time.Date(2022, 1, 6, 10, 39, 0, 0, time.UTC): false, 86 | time.Date(2022, 1, 6, 10, 40, 0, 0, time.UTC): false, 87 | } 88 | tumblingWindowTimeFrame := time.Hour 89 | c, _ := shimesaba.NewReliabilities( 90 | []*shimesaba.Reliability{ 91 | shimesaba.NewReliability( 92 | time.Date(2022, 1, 6, 9, 0, 0, 0, time.UTC), 93 | tumblingWindowTimeFrame, 94 | allTimeIsNoViolation, 95 | ), 96 | shimesaba.NewReliability( 97 | time.Date(2022, 1, 6, 8, 0, 0, 0, time.UTC), 98 | tumblingWindowTimeFrame, 99 | allTimeIsNoViolation, 100 | ), 101 | shimesaba.NewReliability( 102 | time.Date(2022, 1, 6, 10, 0, 0, 0, time.UTC), 103 | tumblingWindowTimeFrame, 104 | allTimeIsNoViolation, 105 | ), 106 | }, 107 | ) 108 | actual := shimesaba.NewReports("test", dest, 0.05, 2*time.Hour, c) 109 | expected := []*shimesaba.Report{ 110 | { 111 | DefinitionID: "test", 112 | Destination: dest, 113 | DataPoint: time.Date(2022, 1, 6, 11, 0, 0, 0, time.UTC), 114 | TimeFrameStartAt: time.Date(2022, 1, 6, 9, 0, 0, 0, time.UTC), 115 | TimeFrameEndAt: time.Date(2022, 1, 6, 11, 0, 0, 0, time.UTC).Add(-time.Nanosecond), 116 | ErrorBudgetSize: 6 * time.Minute, 117 | UpTime: (57 + 58) * time.Minute, 118 | FailureTime: (3 + 2) * time.Minute, 119 | ErrorBudget: 1 * time.Minute, 120 | ErrorBudgetConsumption: 3 * time.Minute, 121 | }, 122 | { 123 | DefinitionID: "test", 124 | Destination: dest, 125 | DataPoint: time.Date(2022, 1, 6, 10, 0, 0, 0, time.UTC), 126 | TimeFrameStartAt: time.Date(2022, 1, 6, 8, 0, 0, 0, time.UTC), 127 | TimeFrameEndAt: time.Date(2022, 1, 6, 10, 0, 0, 0, time.UTC).Add(-time.Nanosecond), 128 | ErrorBudgetSize: 6 * time.Minute, 129 | UpTime: (58 + 59) * time.Minute, 130 | FailureTime: (2 + 1) * time.Minute, 131 | ErrorBudget: 3 * time.Minute, 132 | ErrorBudgetConsumption: 2 * time.Minute, 133 | }, 134 | } 135 | for i, a := range actual { 136 | bs, _ := json.MarshalIndent(a, "", " ") 137 | t.Logf("actual[%d]:%s", i, string(bs)) 138 | } 139 | for i, e := range expected { 140 | bs, _ := json.MarshalIndent(e, "", " ") 141 | t.Logf("expected[%d]:%s", i, string(bs)) 142 | } 143 | require.EqualValues(t, expected, actual) 144 | } 145 | -------------------------------------------------------------------------------- /testdata/app_disable_test.yaml: -------------------------------------------------------------------------------- 1 | 2 | required_version: ">=1.1.0" 3 | 4 | destination: 5 | service_name: shimesaba 6 | metric_prefix: app_test 7 | metrics: 8 | error_budget: 9 | metric_type_name: eb 10 | enabled: true 11 | error_budget_remaining_percentage: 12 | metric_type_name: ebr 13 | enabled: true 14 | error_budget_percentage: 15 | enabled: false 16 | error_budget_consumption: 17 | enabled: false 18 | error_budget_consumption_percentage: 19 | enabled: false 20 | uptime: 21 | enabled: false 22 | failure_time: 23 | enabled: false 24 | 25 | slo: 26 | - id: availability 27 | rolling_period: 5m 28 | calculate_interval: 1m 29 | error_budget_size: 0.1 30 | alert_based_sli: 31 | - monitor_id: "dummyMonitorID" 32 | - monitor_name_prefix: "Dummy" 33 | -------------------------------------------------------------------------------- /testdata/app_test.yaml: -------------------------------------------------------------------------------- 1 | 2 | required_version: ">=0.6.0" 3 | 4 | slo: 5 | - id: alerts 6 | destination: 7 | service_name: shimesaba 8 | rolling_period: 5m 9 | calculate_interval: 1m 10 | error_budget_size: 0.1 11 | alert_based_sli: 12 | - monitor_id: "dummyMonitorID" 13 | - monitor_name_prefix: "Dummy" 14 | -------------------------------------------------------------------------------- /testdata/app_uptime_and_failuretime.yaml: -------------------------------------------------------------------------------- 1 | 2 | required_version: ">=1.2.0" 3 | 4 | destination: 5 | metrics: 6 | uptime: 7 | enabled: true 8 | failure_time: 9 | enabled: true 10 | 11 | slo: 12 | - id: alerts 13 | destination: 14 | service_name: shimesaba 15 | rolling_period: 5m 16 | calculate_interval: 1m 17 | error_budget_size: 0.1 18 | alert_based_sli: 19 | - monitor_id: "dummyMonitorID" 20 | - monitor_name_prefix: "Dummy" 21 | -------------------------------------------------------------------------------- /testdata/v1.0.0_destination.yaml: -------------------------------------------------------------------------------- 1 | required_version: ">=1.0.0" 2 | 3 | rolling_period: 28d 4 | calculate_interval: 1h 5 | error_budget_size: 0.01% 6 | 7 | slo: 8 | - id: external_api_availability 9 | destination: 10 | service_name: prod 11 | metric_prefix: external 12 | metric_suffix: availability 13 | alert_based_sli: 14 | - monitor_name: api.example.com 15 | monitor_type: external 16 | - id: internal_api_availability 17 | destination: 18 | service_name: prod 19 | metric_prefix: internal 20 | metric_suffix: availability 21 | alert_based_sli: 22 | - monitor_name: internal.api.example.com 23 | monitor_type: external 24 | 25 | 26 | -------------------------------------------------------------------------------- /testdata/v1.0.0_simple.yaml: -------------------------------------------------------------------------------- 1 | required_version: ">=1.0.0" 2 | 3 | rolling_period: 28d 4 | calculate_interval: 1h 5 | destination: 6 | service_name: prod 7 | error_budget_size: 40m 8 | 9 | slo: 10 | - id: availability 11 | alert_based_sli: 12 | - monitor_name_suffix: api.example.com 13 | monitor_type: external 14 | - id: latency 15 | alert_based_sli: 16 | - monitor_name: ALB target p99 over 1.0 sec 17 | monitor_type: host 18 | - id: quality 19 | alert_based_sli: 20 | - monitor_name_prefix: "quality service metric" 21 | monitor_type: service 22 | 23 | --------------------------------------------------------------------------------